diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 9ce48658b1..505fe77f89 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,3 +1,10 @@ + + ##### System information (version) - [ ] I checked the problem with documentation, FAQ, open issues, - answers.opencv.org, Stack Overflow, etc and have not found solution + forum.opencv.org, Stack Overflow, etc and have not found solution diff --git a/.github/workflows/arm64-build-checks.yml b/.github/workflows/arm64-build-checks.yml index f5988c7895..d3cf532d59 100644 --- a/.github/workflows/arm64-build-checks.yml +++ b/.github/workflows/arm64-build-checks.yml @@ -1,6 +1,6 @@ name: arm64 build checks -on: [pull_request] +on: workflow_dispatch jobs: build: diff --git a/3rdparty/carotene/src/resize.cpp b/3rdparty/carotene/src/resize.cpp index 49205573cd..aa5b756c75 100644 --- a/3rdparty/carotene/src/resize.cpp +++ b/3rdparty/carotene/src/resize.cpp @@ -758,7 +758,7 @@ inline void resizeAreaRounding(const Size2D &ssize, const Size2D &dsize, } else if (channels == 3) { - if ((wr == 2.0f) && (wr == 2.0f)) + if ((wr == 2.0f) && (hr == 2.0f)) { #ifndef __ANDROID__ size_t roiw16 = dsize.width >= 15 ? (dsize.width - 15) * 3 : 0; diff --git a/3rdparty/ffmpeg/ffmpeg.cmake b/3rdparty/ffmpeg/ffmpeg.cmake index 8cf0f24f5e..3cd5e1be94 100644 --- a/3rdparty/ffmpeg/ffmpeg.cmake +++ b/3rdparty/ffmpeg/ffmpeg.cmake @@ -1,8 +1,8 @@ -# Binaries branch name: ffmpeg/master_20200908 -# Binaries were created for OpenCV: f445b826d084188077a5e9d204c4c33d1589f380 -ocv_update(FFMPEG_BINARIES_COMMIT "6152e132572dfdaa32887eabeb7199bef49b14dc") -ocv_update(FFMPEG_FILE_HASH_BIN32 "37e2dadf776631acc8856e281f29cf42") -ocv_update(FFMPEG_FILE_HASH_BIN64 "cf5dba83edf8619f57ccff4edb989c62") +# Binaries branch name: ffmpeg/master_20210303 +# Binaries were created for OpenCV: 7ac6abe02a33bef445a5b77214ad31964e2c5cc1 +ocv_update(FFMPEG_BINARIES_COMMIT "629590c3ba09fb0c8eaa9ab858ff13d3a84ca1aa") +ocv_update(FFMPEG_FILE_HASH_BIN32 "638065d5a0dab8a828879942375dcac4") +ocv_update(FFMPEG_FILE_HASH_BIN64 "7f10ae2e6a080ba3714f7a38ee03ae15") ocv_update(FFMPEG_FILE_HASH_CMAKE "f8e65dbe4a3b4eedc0d2997e07c3f3fd") function(download_win_ffmpeg script_var) diff --git a/3rdparty/libtiff/CMakeLists.txt b/3rdparty/libtiff/CMakeLists.txt index 61e40b2885..2074888a52 100644 --- a/3rdparty/libtiff/CMakeLists.txt +++ b/3rdparty/libtiff/CMakeLists.txt @@ -239,6 +239,9 @@ if(HOST_BIG_ENDIAN) else() set(HOST_BIG_ENDIAN 0) endif() +if(HOST_BIG_ENDIAN) + add_definitions(-DWORDS_BIGENDIAN) +endif() # IEEE floating point set(HAVE_IEEEFP 1 CACHE STRING "IEEE floating point is available") diff --git a/3rdparty/libtiff/ChangeLog b/3rdparty/libtiff/ChangeLog index 1f50e20135..452dcb3a18 100644 --- a/3rdparty/libtiff/ChangeLog +++ b/3rdparty/libtiff/ChangeLog @@ -1,3 +1,2329 @@ +2020-12-19 Bob Friesenhahn + + * libtiff 4.2.0 released. + + * configure.ac: Pass tar-ustar option to AM_INIT_AUTOMAKE rather + than tar-pax since ustar POSIX 1003.1-1988 format is more portable + than PAX POSIX 1003.1-2001 format. + +2020-12-12 Even Rouault + + Merge branch 'w_adjust-deflate_names' into 'master' + Set 'deflate' to DEFLATE_NAMES, instead of 'libdeflate' + + See merge request libtiff/libtiff!174 + +2020-12-12 Lemures Lemniscati + + Set 'deflate' to DEFLATE_NAMES, instead of 'libdeflate' + 'lib' will be automatically added as a prefix while doing find_library() + +2020-12-12 Even Rouault + + DoubleToRational(): avoid casting NaN to uint32 (fixes #227) + +2020-12-12 Even Rouault + + Merge branch 'fix_221' into 'master' + tiffio.h: do not define __attribute__ but defines TIFF_ATTRIBUTE instead (fixes #221) + + Closes #221 + + See merge request libtiff/libtiff!173 + +2020-12-12 Even Rouault + + tiffio.h: do not define __attribute__ but defines TIFF_ATTRIBUTE instead (fixes #221) + +2020-12-08 Even Rouault + + TIFFReadDirEntryArrayWithLimit(): properly read from offline tag value when we clamp the number of strips to 1. + Fixes regression of commit 7057734d986001b7fd6d2afde9667da7754ff2cc on reading + a file with StripByteCounts with 1 element (broken) and StripOffsets with + 896 elements, and where StripOffsets[0] is correct + + $ tiffdump foo.tif + Magic: 0x4949 Version: 0x2a + Directory 0: offset 25725448 (0x1888a08) next 0 (0) + SubFileType (254) LONG (4) 1<0> + ImageWidth (256) LONG (4) 1<640> + ImageLength (257) LONG (4) 1<20098> + BitsPerSample (258) SHORT (3) 1<16> + Photometric (262) SHORT (3) 1<1> + SamplesPerPixel (277) SHORT (3) 1<1> + ResolutionUnit (296) SHORT (3) 1<2> + StripByteCounts (279) LONG (4) 1<1806> + StripOffsets (273) LONG (4) 896<8 648 1288 1928 2568 3208 3848 4488 5128 5768 6408 7048 7688 8328 8968 9608 10248 10888 11528 12168 12808 13448 14088 14728 ...> + +2020-12-02 Even Rouault + + tif_jpeg.c: avoid potential harmless unsigned integer overflow on data->fileoffset in JPEGFixupTagsSubsamplingSkip() by validating earlier. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28200 + +2020-11-27 Even Rouault + + Merge branch 'Jamaika1-master-patch-47839' into 'master' + Change ULARGE_INTEGER to LARGE_INTEGER + + See merge request libtiff/libtiff!170 + +2020-11-27 Even Rouault + + Merge branch 'Jamaika1-master-patch-46397' into 'master' + Added stdint.h + + See merge request libtiff/libtiff!171 + +2020-11-27 Jamaika + + Added stdint.h. + + ``` tif_win32.c: In function '_tiffSizeProc': tif_win32.c:159:23: warning: passing argument 2 of 'GetFileSizeEx' from incompatible pointer type [-Wincompatible-pointer-types] 159 | if (GetFileSizeEx(fd,&m)) | ^~ | | | ULARGE_INTEGER * In file included from c:\msys1021\x86_64-w64-mingw32\include\winbase.h:18, from c:\msys1021\x86_64-w64-mingw32\include\windows.h:70, from tif_win32.c:32: c:\msys1021\x86_64-w64-mingw32\include\fileapi.h:78:73: note: expected 'PLARGE_INTEGER' {aka 'LARGE_INTEGER *'} but argument is of type 'ULARGE_INTEGER *' 78 | WINBASEAPI WINBOOL WINAPI GetFileSizeEx (HANDLE hFile, PLARGE_INTEGER lpFileSize); | ~~~~~~~~~~~~~~~^~~~~~~~~~ ``` + +2020-11-21 Even Rouault + + Merge branch 'issue-113' into 'master' + tiffcrop: fix buffer overrun in extractContigSamples24bits() + + Closes #113 + + See merge request libtiff/libtiff!169 + +2020-11-21 Even Rouault + + Merge branch 'issue-156' into 'master' + tiff2pdf: Check output size before writing + + Closes #156 + + See merge request libtiff/libtiff!168 + +2020-11-21 Even Rouault + + Merge branch 'issue-201' into 'master' + tiff2pdf: enforce memory limit for tiled pictures too + + Closes #201 + + See merge request libtiff/libtiff!167 + +2020-11-20 Even Rouault + + Merge branch 'issue-207' into 'master' + enforce (configurable) memory limit in tiff2rgba + + Closes #209 et #207 + + See merge request libtiff/libtiff!165 + +2020-11-20 Even Rouault + + tif_lzw.c: avoid false positive -Wnull-dereference of mingw32 gcc 7.3. + +2020-11-17 Thomas Bernard + + tiffcrop: fix buffer overrun in extractContigSamples24bits() + fixes #113 + + tiff2pdf: Check output size before writing. + fixes #156 + + tiff2pdf: enforce memory limit for tiled pictures too. + fixes #201 + +2020-11-15 Thomas Bernard + + tiff2rgba.1: -M option. + + enforce (configurable) memory limit in tiff2rgba. + fixes #207 + fixes #209 + +2020-11-14 Even Rouault + + Merge branch 'issue-220' into 'master' + tiff2pdf.c: properly calculate datasize when saving to JPEG YCbCr + + Closes #220 + + See merge request libtiff/libtiff!159 + +2020-11-14 Thomas Bernard + + tiff2pdf.c: properly calculate datasize when saving to JPEG YCbCr. + fixes #220 + +2020-11-14 Even Rouault + + Merge branch 'issue-204' into 'master' + avoid buffer overflow while writing jpeg end of file marker + + Closes #204 + + See merge request libtiff/libtiff!161 + +2020-11-14 Even Rouault + + Merge branch 'issue-193' into 'master' + fix buffer overflow in tiff2ps.c + + Closes #193 + + See merge request libtiff/libtiff!162 + +2020-11-14 Even Rouault + + Merge branch 'skal65535-master-patch-91082' into 'master' + More overflow fixes for large widths + + See merge request libtiff/libtiff!164 + +2020-11-14 skal + + More overflow fixes for large width. + Also: use INT_MAX instead of hard-coded constants. + +2020-11-12 Even Rouault + + Merge branch 'skal65535-master-patch-56655' into 'master' + Fix potential overflow in gtStripContig() + + See merge request libtiff/libtiff!163 + +2020-11-12 Even Rouault + + Merge branch 'issue-211' into 'master' + check for tile width overflow + + Closes #211 + + See merge request libtiff/libtiff!160 + +2020-11-12 skal + + Fix potential overflow in gtStripContig() + (w + w) might not fit in int32 if too large. + +2020-11-09 Thomas Bernard + + tiff2ps.c: fix buffer overread. + fixes #193 + + fix undefined behaviour (int shifted too much to the left) + + avoid buffer overflow while writing jpeg end of file marker. + fixes #204 + + gtTileContig(): check Tile width for overflow. + fixes #211 + + fix warning messages (v32 is unsigned) + +2020-10-26 Even Rouault + + TIFFStartStrip(): avoid potential crash in WebP codec when using scanline access on corrupted files. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=26650 + +2020-10-20 Even Rouault + + tif_webp.c: validate tile/strip dimension to avoid unsigned integer overflow in RGBA.size computation + +2020-10-19 Even Rouault + + tif_zip.c: fix typo in comment. + +2020-10-16 Even Rouault + + tiff.h: remove irrelevant warning about webp related pseudo-tags not being registered: they are purely internal libtiff concepts + +2020-10-16 Even Rouault + + Merge branch 'libdeflate' into 'master' + Add support for building against libdeflate for faster Zip/Deflate compression/decompression + + See merge request libtiff/libtiff!158 + +2020-10-16 Even Rouault + + test: add testdeflatelaststripextradata.sh. + +2020-10-16 Even Rouault + + Add support for optional building against libdeflate for faster Zip/Deflate compression/decompression. + So we can have 2 kind of builds with the Zip/Deflate codec: + - zlib only + - zlib + libdeflate + + Speed improvements in the 35%-50% range can be expected when libdeflate is used. + Compression level up to 12 is now supported (capped to 9 when zlib is used). + Still requires zlib for situations where libdeflate cannot be used (that + is for scanline access, since libdeflate has no streaming mode) + + Pseudo-tag TIFFTAG_DEFLATE_SUBCODEC=DEFLATE_SUBCODEC_ZLIB/DEFLATE_SUBCODEC_LIBDEFLATE + is added to control which subcodec (zlib or libdeflate) should be used (it defaults + of course to libdeflate, when it is available). + This is mostly aimed at being used on the writing side, to be able to reproduce + output of previous libtiff versions at a binary level, in situations where this would + be really needed. Or as a safety belt in case there would be unforeseen issues + with using libdeflate. + It can be used to know when libdeflate is available at runtime (DEFLATE_SUBCODEC_LIBDEFLATE + will be the default value in that situation). + + Of course, deflate codestreams produced by libdeflate can be read by zlib, and vice-versa. + +2020-10-14 Even Rouault + + tif_webp.c: fix compiler warnings with MSVC. + +2020-10-12 Even Rouault + + Merge branch 'various_fixes' into 'master' + Fix compiler warnings about unused variables when assert() expands to nothing + + See merge request libtiff/libtiff!157 + +2020-10-12 Even Rouault + + .gitignore: add entries for new files in test/ + + Fix compiler warnings about unused variables when assert() expands to nothing + +2020-10-09 Roger Leigh + + Merge branch '215-cygwin-appveyor-fail' into 'master' + Update Appveyor CI build to build with VS2019 image + + Closes #215 + + See merge request libtiff/libtiff!154 + +2020-10-09 Roger Leigh + + wip. + + wip. + + wip. + + wip. + + wip. + + wip. + +2020-10-09 Roger Leigh + + Merge branch 'TIFF-217_m_lib_path' into 'master' + cmake: Do not use absolute libm path + + Closes #217 + + See merge request libtiff/libtiff!156 + +2020-10-09 Roger Leigh + + cmake: Do not use absolute libm path. + +2020-10-08 Even Rouault + + tif_fax3.h: restore systematic calls to CLEANUP_RUNS() + now that SETVALUE() no longer cause overflows. + Those were removed per b351db8be1b4d3f712bdb9424a79d3174cc03202 and + 3440ac216463fcad170bbb391491e69730a59ffa. + + As SETVALUE() now returns an error, this allow the decoder to exit. + + Otherwise, the assert(x == lastx) in _TIFFFax3fillruns() can trigger. + + Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=26201 + +2020-10-06 Even Rouault + + Merge branch 'check_TIFFFlushData1' into 'master' + FAX/JPEG/LZMA/PixarLog/ZIP/ZSTD codecs: make sure to check TIFFFlushData1() return value + + See merge request libtiff/libtiff!155 + +2020-10-04 Even Rouault + + Merge branch 'shared-memory' into 'master' + Set the --shared-memory linker flag for Emscripten builds + + See merge request libtiff/libtiff!153 + +2020-10-03 Even Rouault + + tiff2rgba.c: fix -Wold-style-declaration warning. + + FAX/JPEG/LZMA/PixarLog/ZIP/ZSTD codecs: make sure to check TIFFFlushData1() return value + +2020-09-26 Even Rouault + + tif_fax3.h: extra buffer overflow checks. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=25934 + +2020-09-25 Roger Leigh + + wip. + + wip. + + wip. + + wip. + + wip. + + wip. + + Update AppVeyor image. + + test-appveyor. + +2020-09-24 Attila Oláh + + Also pass --shared-memory to raw_decode. + This is needed when building for Emscripten with *both* WEBP and JPEG + support. + + Set the --shared-memory linker flag for Emscripten builds. + This is only needed when building with WEBP support, which uses atomics, + therefore the linker needs the --shared-memory flag. The flag cannot be + added globally because not all executables link against libwebp. + +2020-09-22 Even Rouault + + tif_fax3.h: return error when a buffer overflow occurs. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=25552 and https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=25849 + +2020-09-11 Even Rouault + + Merge branch 'fix-float-compare' into 'master' + Fix comparison for max negative float value. + + See merge request libtiff/libtiff!152 + +2020-09-11 Dirk Lemstra + + Fix comparison for max negative float value. + +2020-09-07 Even Rouault + + Fax3PreDecode(): reset curruns and refruns state variables. + to avoid out-of-bounds write triggered by GDAL when repeatedly + reading a corrupt strip. + + Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=25493 + +2020-06-06 Thomas Bernard + + Merge branch 'issue-17' into 'master' + normalize tools behaviour regarding -h + + Closes #17 + + See merge request libtiff/libtiff!115 + +2020-05-31 Even Rouault + + TWebPSetupEncode(): fix logic problem (and instead of or) in test that checks input is 8bit unsigned data + +2020-05-12 Even Rouault + + TIFFGetConfiguredCODECs(): fix to avoid wrong structure to be returned for registered (ie non built-in) codecs + +2020-05-09 Even Rouault + + Merge branch 'zstd-webp-update' into 'master' + gitlab-ci: use latest zstd and webp versions + + See merge request libtiff/libtiff!148 + +2020-05-09 Even Rouault + + Merge branch 'deprecated' into 'master' + ojpeg: s/Depreciated/Deprecated/ + + See merge request libtiff/libtiff!149 + +2020-05-09 Aaron Boxer + + ojpeg: s/Depreciated/Deprecated/ + +2020-04-27 Even Rouault + + Fix typos. + + tif_jpeg.c: avoid potential division in previous fix (master only) + +2020-04-26 Thomas Bernard + + gitlab-ci: use latest zstd and webp versions. + +2020-04-26 Even Rouault + + tiff.h: fixes to use ASCII only characters (master only) + +2020-04-26 Thomas Bernard + + tiffsplit: use EXIT_SUCCESS / EXIT_FAILURE. + + tiffset: print usage on stdout when -h is used. + also use EXIT_FAILURE / EXIT_SUCCESS + see #17 + + tiffmedian: shopw usage on stdout when -h is used. + aslo use EXIT_SUCCESS/EXIT_FAILURE + see #17 + + tiffinfo: print usage on stdout when -h is used. + also use EXIT_FAILURE / EXIT_SUCCESS + see #17 + + raw2tiff: print usage to stdout when -h is used. + see #17 + + tiff2pdf: print usage on stdout when -h is used. + see #17 + + tiffgt: output usage on stdout with -h. + also use EXIT_SUCCESS / EXIT_FAILURE + + tiffdump: use EXIT_FAILURE / EXIT_SUCCESS. + see #17 + + tiffdither: print usage on stdout when -h is used. + see #17 + +2020-04-26 Thomas Bernard + + tiffcrop: -h / -v prints usage/version to stdout. + also uses the standard C EXIT_SUCCESS / EXIT_FAILURE + macros + + see #17 + +2020-04-26 Thomas Bernard + + tiffcp: output usage to stdout when using -h. + also use EXIT_FAILURE / EXIT_SUCCESS + see #17 + + tiffcmp: match exit status for posix cmp and diff tools. + + tiff2rgba: output usage to stdout when using -h. + also uses std C EXIT_FAILURE / EXIT_SUCCESS + see #17 + + tiff2ps: sue EXIT_FAILURE / EXIT_SUCCESS. + see #17 + + tiff2bw: output usage on stdout when using -h. + also uses EXIT_SUCCESS / EXIT_FAILURE + see #17 + + thumbnail: use EXIT_FAILURE / EXIT_SUCCESS. + the -h option was already used so it cannot be used for help/usage + see #17 + + rgb2ycbcr: use EXIT_FAILURE / EXIT_SUCCESS. + the -h option was already used so it cannot be used for help/usage + see #17 + + ppm2tiff: output usage to stdout when using -h option. + also uses std C EXIT_SUCCESS / EXIT_FAILURE + see #17 + + pal2rgb: output usage to stdout when -h is used. + see #17 + + fax2tiff.c: print usage on stdout when using -h option. + see #17 + + fax2ps: output usage to stdout when using -h option. + also use EXIT_SUCCESS, EXIT_FAILURE from C standard + +2020-04-25 Even Rouault + + Merge branch 'jpeg_multiscan_dos_logic' into 'master' + tif_jpeg.c: revise logic to detect potential excessive memory usage when... + + See merge request libtiff/libtiff!147 + +2020-04-24 Even Rouault + + Merge branch 'issue-176' into 'master' + tiff2pdf: get rid of uninitialized memory content + + Closes #176 + + See merge request libtiff/libtiff!143 + +2020-04-24 Even Rouault + + tif_jpeg.c: revise logic to detect potential excessive memory usage when decoding multiscan JPEG compressed images + +2020-04-19 Thomas Bernard + + tiff2pdf: test the return code of TIFFReadRawStrip() and TIFFReadRawTile() + + tiff2pdf.c: fix some whitespace problems in source. + + tiff2pdf: get rid of uninitialized memory content. + fixes #176 + +2020-04-19 Even Rouault + + Merge branch 'issue-18' into 'master' + tiffset: pass size for TIFFTAG_INKNAMES + + Closes #18 + + See merge request libtiff/libtiff!146 + +2020-04-18 Olivier Paquet + + Merge branch 'issue-80' into 'master' + tiffinfo: fix dump of Tiled images + + Closes #80 + + See merge request libtiff/libtiff!144 + +2020-04-15 Even Rouault + + Fix wrong file size checks for memory-mapped BigTIFF files that could lead to image rejection + +2020-04-05 Thomas Bernard + + tiffset: pass size for TIFFTAG_INKNAMES. + Uses TIFFFieldPassCount() to know which arguments need to be + passed to TiffSetField() + + fixes #18 + see http://bugzilla.maptools.org/show_bug.cgi?id=2202 + +2020-04-04 Thomas Bernard + + tiffinfo: showdata for tiled images. + + tiffinfo: fix dump of Tiled images. + fixes #80 + +2020-04-03 Even Rouault + + Merge branch 'issue-117' into 'master' + tiffcrop: enforce memory allocation limit + + Closes #117 + + See merge request libtiff/libtiff!140 + +2020-04-03 Thomas Bernard + + tiffcrop: enforce memory allocation limit. + uses -k option to change limit (default to 256MiB) + fixes #117 / http://bugzilla.maptools.org/show_bug.cgi?id=2757 + +2020-04-02 Even Rouault + + Merge branch 'issue-45' into 'master' + tiffcp: disable strip chopping when trying to convert to JBIG compression + + Closes #45 + + See merge request libtiff/libtiff!138 + +2020-04-02 Even Rouault + + Merge branch 'issue-124' into 'master' + TIFFGetFields(3tiff): TIFFTAG_*BYTECOUNTS TIFFTAG_*OFFSETS are uint64 + + Closes #124 + + See merge request libtiff/libtiff!137 + +2020-04-02 Even Rouault + + Merge branch 'aix_itrunc' into 'master' + Rename itrunc to fix name clash with a different itrunc in math.h on AIX. Fixes issue #189 + + Closes #189 + + See merge request libtiff/libtiff!139 + +2020-04-01 Rob Boehne + + Rename itrunc to fix name clash with a different itrunc in math.h on AIX. Fixes issue #189 + +2020-04-01 Thomas Bernard + + tiffcp: disable strip chopping when trying to convert to JBIG compression + fixes #45 + +2020-03-29 Thomas Bernard + + TIFFGetFields(3tiff): TIFFTAG_*BYTECOUNTS TIFFTAG_*OFFSETS are uint64. + fixes #124 / http://bugzilla.maptools.org/show_bug.cgi?id=2774 + +2020-03-29 Even Rouault + + Merge branch 'issue-48' into 'master' + tiff2pdf: fix "raw" copy of Deflate streams + + Closes #48 + + See merge request libtiff/libtiff!136 + +2020-03-27 Thomas Bernard + + tiff2pdf: fix "raw" copy of Deflate streams. + The Predictor parametter was not copied from the source tiff to the PDF. + fixes #48 / http://bugzilla.maptools.org/show_bug.cgi?id=2442 + +2020-03-26 Thomas Bernard + + tif_fax3: quit Fax3Decode2D() when a buffer overflow occurs. + fixes #186 + +2020-03-24 Even Rouault + + Merge branch 'issue-143-144' into 'master' + tiffdump: avoid unaligned memory access + + Closes #144 et #143 + + See merge request libtiff/libtiff!133 + +2020-03-24 Even Rouault + + Merge branch 'issue-133' into 'master' + tiff2pdf: avoid divide by 0 + + Closes #133 + + See merge request libtiff/libtiff!126 + +2020-03-24 Thomas Bernard + + tiff2pdf: normalizePoint() macro to normalize the white point. + +2020-03-23 Thomas Bernard + + tiffdump: avoid unaligned memory access. + fixes #143 + fixes #144 + +2020-03-23 Even Rouault + + Merge branch 'out-of-memory' into 'master' + tiffcp/tiff2pdf/tiff2ps: enforce maximum malloc size + + Closes #153, #84, #116 et #115 + + See merge request libtiff/libtiff!130 + +2020-03-23 Even Rouault + + Merge branch 'issue-157' into 'master' + tiffset: check memory allocation + + Closes #157 + + See merge request libtiff/libtiff!132 + +2020-03-23 Even Rouault + + Merge branch 'issue-185' into 'master' + tif_fax3: more buffer overflow checks in Fax3Decode2D() + + Closes #185 + + See merge request libtiff/libtiff!131 + +2020-03-23 Thomas Bernard + + tiffset: check memory allocation. + fixes #157 / http://bugzilla.maptools.org/show_bug.cgi?id=2850 + + tif_fax3: more buffer overflow checks in Fax3Decode2D() + fixes #185 + +2020-03-21 Thomas Bernard + + tiff2ps: enforce memory allocation limit. + fixes #153 / http://bugzilla.maptools.org/show_bug.cgi?id=2845 + + tiff2pdf: enforce maximum data size. + fixes #116 / http://bugzilla.maptools.org/show_bug.cgi?id=2756 + fixes #84 / http://bugzilla.maptools.org/show_bug.cgi?id=2683 + + update man page for tiffcp regarding the -m option. + + tiffcp.c: _TIFFmalloc() => limitMalloc() + +2020-03-21 Thomas Bernard + + tiffcp: enforce maximum malloc size. + default is 256MB. use -m option to change + + fixes #115 / http://bugzilla.maptools.org/show_bug.cgi?id=2755 + +2020-03-21 Even Rouault + + Merge branch 'issue-184' into 'master' + CmakeLists.txt: define WORDS_BIGENDIAN when the CPU is big endian + + Closes #184 + + See merge request libtiff/libtiff!127 + +2020-03-21 Even Rouault + + Merge branch 'issue-44' into 'master' + tiff2pdf: "" causes the relevant argument not to be written + + Closes #44 + + See merge request libtiff/libtiff!128 + +2020-03-21 Even Rouault + + Merge branch 'issue-56' into 'master' + fix man for TIFFReadEncodedStrip(), TIFFStripSize, TIFFVStripSize, TIFFRawStripSize + + Closes #56 + + See merge request libtiff/libtiff!129 + +2020-03-20 Thomas Bernard + + fix man for TIFFReadEncodedStrip(), TIFFStripSize, TIFFVStripSize, TIFFRawStripSize + fixes #56 + http://bugzilla.maptools.org/show_bug.cgi?id=2507 + + tiff2pdf: "" causes the relevant argument not to be written. + fixes #44 + + CmakeLists.txt: define WORDS_BIGENDIAN when the CPU is big endian. + fixes #184 + +2020-03-17 Thomas Bernard + + tiff2pdf: avoid divide by 0. + fixes #133 http://bugzilla.maptools.org/show_bug.cgi?id=2796 + +2020-03-17 Even Rouault + + Merge branch 'issue-22' into 'master' + do not _tiffMapProc 0 size files + + Closes #22 + + See merge request libtiff/libtiff!125 + +2020-03-13 Thomas Bernard + + tif_win32.c: do not _tiffMapProc() 0 sized files. + see #22 + + tif_unix.c: do not _tiffMapProc 0 size files. + fixes #22 + http://bugzilla.maptools.org/show_bug.cgi?id=2249 + +2020-03-12 Even Rouault + + tif_fax3.c: fix warning C4018: '<': signed/unsigned mismatch introduced in past commits + +2020-03-11 Even Rouault + + tiff.h: mention TIFFTAG_RPCCOEFFICIENT, TIFFTAG_TIFF_RSID, TIFFTAG_GEO_METADATA + +2020-03-11 Even Rouault + + Merge branch 'issue-60' into 'master' + added support for more private tags + + Closes #60 + + See merge request libtiff/libtiff!124 + +2020-03-11 Even Rouault + + Merge branch 'issue-160' into 'master' + Fax3SetupState(): check consistency of rowbytes and rowpixels + + Closes #160 + + See merge request libtiff/libtiff!123 + +2020-03-11 Thomas Bernard + + added support for more private tags. + see https://gitlab.com/libtiff/libtiff/-/issues/60 + bugzilla.maptools.org/show_bug.cgi?id=2525 + + closes #60 + + original author : art1@andreas-romeyke.de + +2020-03-11 Thomas Bernard + + Fax3SetupState(): check consistency of rowbytes and rowpixels. + also add some parameter documentation to Fax3Decode1D() + + fixes #160 + http://bugzilla.maptools.org/show_bug.cgi?id=2854 + +2020-03-10 Even Rouault + + Merge branch 'issue-11-const-pointers' into 'master' + Make pointers returned via TIFFGetField const + + Closes #11 + + See merge request libtiff/libtiff!118 + +2020-03-10 Even Rouault + + tif_ojpeg.c: relax again too strict sanity checks to allow reading of valid images such as https://gitlab.com/libtiff/libtiff/-/issues/181#note_302535232. Fixes #181 + +2020-03-09 Even Rouault + + Merge branch 'issue-52' into 'master' + contrib/win_dib/tiff2dib: fix Uninitialized variable: lpBits + + Closes #52 + + See merge request libtiff/libtiff!121 + +2020-03-09 Thomas Bernard + + contrib/win_dib/tiff2dib: fix Uninitialized variable: lpBits. + fixes #52 + http://bugzilla.maptools.org/show_bug.cgi?id=2469 + +2020-03-08 Even Rouault + + Merge branch 'issue-58' into 'master' + Make TIFFTAG_CFAPATTERN variable count + + Closes #58 + + See merge request libtiff/libtiff!120 + +2020-03-08 Even Rouault + + Merge branch 'issue-158-no-predictor-in-webp' into 'master' + TIFFTAG_PREDICTOR is not supported for WebP + + Closes #158 + + See merge request libtiff/libtiff!119 + +2020-03-08 Sam Hasinoff + + Make TIFFTAG_CFAPATTERN variable count. + The TIFFTAG_CFAPATTERN tag (33422) from TIFF/EP, recently introduced in libtiff + 3363eda09d082e3e1dfffa6281f53085cac51ad3 / http://bugzilla.maptools.org/show_bug.cgi?id=2457 + is described as having a fixed count of 4. + But the TIFF/EP spec says this should support a variable count (= CFARepeatRows * CFARepeatCols): + + TIFF/EP, ISO 12234-2:2001 + http://www.barrypearson.co.uk/top2009/downloads/TAG2000-22_DIS12234-2.pdf + page 18 and 26 + +2020-03-08 Thomas Bernard + + TIFFTAG_PREDICTOR is not supported for WebP. + fixes #158 + https://gitlab.com/libtiff/libtiff/-/issues/158 + + this bug was introduced by 9eacd59fecc4ef593ac17689bc530ab451c8ec14 + merge request !32 + +2020-03-07 Adam Goode + + Make the default whitepoint and ycbcrcoeffs arrays const. + Now that we are returning const pointers in TIFFGetFieldDefaulted, + we can now make these static default arrays const. + + see #11 + +2020-03-07 Adam Goode + + Make pointers returned via TIFFGetField const. + According to http://bugzilla.maptools.org/show_bug.cgi?id=2125#c6 + callers are not allowed to modify pointer or array values returned from + TIFFGetField or the like. So, make this explicit in the documentation + by specifying these things as const. Note that this is not an ABI + change, since C does not encode const in libraries. Also, this is + not really an API change, since the varargs call strips away all + the types anyway. So it really is more of a documentation change. + + fixes #11 + +2020-03-07 Even Rouault + + CMake: Skip custom_dir_EXIF_231 test on shared builds to avoid issues on Windows + +2020-03-07 Even Rouault + + Merge branch 'EXIF231_GPS_upgrade' into 'master' + EXIF 2.32 and GPS TIFF-tags and functionality upgraded. + + See merge request libtiff/libtiff!91 + +2020-03-07 Su_Laus + + EXIF 2.32 and GPS tags and functionality upgraded. + - Existing EXIF field definition of tags is upgraded to EXIF version 2.3.2 + - EXIF-GPS structure, tags and access functions are added as special CustomDirectory (like it was done for EXIF). + - Test program custom_dir_EXIF_231.c added to test writing/reading of EXID IFD and GPS IFD tags + and to highlight some quirks of IFD-handling and peculiarities of reading/writing the different data types. + - Reading error for FileSource and SceneType tags corrected. + + - EXIF_GPS_upgrade rebased onto c8c5309b765ef4ff097d2aaffbdb8f403db8967d (Merge branch 'Rational2DoublePrecision_correction' into 'master') + and adapted: + - tif_dirinfo.c: All rational tags set to TIFF_SETGET_FLOAT but only the GPSTAG_ tags set to TIFF_SETGET_DOUBLE. + - custom_dir_EXIF_231.c: Editorials amended and gcc warnigs fixed. + - CMakeLists.txt: add_test(NAME "custom_dir_EXIF_231" COMMAND "custom_dir_EXIF_231") added. + +2020-03-07 Even Rouault + + Merge branch 'issue-55' into 'master' + ppm2tiff: support any bps value from 1 to 16 + + Closes #55 + + See merge request libtiff/libtiff!106 + +2020-03-07 Thomas Bernard + + ppm2tiff: Add test for 16bpc PPM. + + ppm2tiff: remove unused argument warning. + +2020-03-07 Ludolf Holzheid + + ppm2tiff: support any bps value from 1 to 16. + fix #55 + http://bugzilla.maptools.org/show_bug.cgi?id=2505 + + Patch originally submited by Ludolf Holzheid + +2020-03-06 Even Rouault + + Merge branch 'fax-test' into 'master' + add test for fax4 decoding + + See merge request libtiff/libtiff!114 + +2020-03-05 Thomas Bernard + + add test for fax4 decoding. + This will check for regression on #46 + https://gitlab.com/libtiff/libtiff/issues/46 + http://bugzilla.maptools.org/show_bug.cgi?id=2434 + +2020-03-05 Even Rouault + + Merge branch 'freebsd-tests' into 'master' + make tests pass under FreeBSD. + + See merge request libtiff/libtiff!113 + +2020-03-05 Thomas Bernard + + make tests pass under FreeBSD. + the -I option for the GNU diff and the FreeBSD diff + behaves differently regarding escaping the ( ) and | + + By using two -I option, we avoid using such charracters. + +2020-03-05 Even Rouault + + Merge branch 'issue-31' into 'master' + HTML + + Closes #31 + + See merge request libtiff/libtiff!111 + +2020-03-05 Even Rouault + + Merge branch 'issue-179' into 'master' + tif_fax3.h: check for buffer overflow in EXPAND2D before "calling" CLEANUP_RUNS() + + Closes #179 + + See merge request libtiff/libtiff!112 + +2020-03-05 Thomas Bernard + + v4.1.0.html: fix for validation. + long comments were replaced + because they confused some parsers + + add DOCTYPE on v*.html. + + fix HTML files so they are valid according to https://validator.w3.org. + +2020-03-05 Thomas Bernard + + tif_fax3.h: check for buffer overflow in EXPAND2D before "calling" CLEANUP_RUNS() + fixes #179 + + this fixes the regression introduced in 02bb0175 / 72c4acef + ( merge request !110 ) + + It may be a better fix to do the overflow check in SETVALUE() but the + macro do { } while(0) construct makes it difficult to quit the loop + properly. + +2020-03-01 Thomas Bernard + + index.html: fix unclosed tag. + +2020-03-01 Thomas Bernard + + html: do not force colors (which are default anyway) + If needed, style should be set using CSS. + + fixes #31 + https://gitlab.com/libtiff/libtiff/issues/31 + http://bugzilla.maptools.org/show_bug.cgi?id=2326 + +2020-03-01 Even Rouault + + TIFFReadCustomDirectory(): fix potential heap buffer overflow when reading a custom directory, after a regular directory where a codec was active. Fixes https://gitlab.com/libtiff/libtiff/issues/178 + +2020-03-01 Even Rouault + + Merge branch 'issue-46' into 'master' + fix decoding of fax4 images + + Closes #46 + + See merge request libtiff/libtiff!110 + +2020-02-29 Thomas Bernard + + tif_fax3: better fix for CVE-2011-0192. + There are some legitimate case which were forbidden by the previous fix + + tif_fax3.h: allow 0 length run in DECODE2D. + fixes #46 + https://gitlab.com/libtiff/libtiff/issues/46 + http://bugzilla.maptools.org/show_bug.cgi?id=2434 + +2020-02-29 Even Rouault + + Merge branch 'mingwlibm' into 'master' + Don't use libm with libtiff due to conflict with libmsvcrt + + See merge request libtiff/libtiff!73 + +2020-02-29 Even Rouault + + Merge branch 'Rational2DoublePrecision_correction' into 'master' + tif_dirwrite.c: bugfix DoubleToSrational() for plain signed integers + + See merge request libtiff/libtiff!109 + +2020-02-29 Su_Laus + + tif_dirwrite.c: bugfix DoubleToSrational(), which returns plain signed interger values always as unsigned rationals. Add a test into rational_precision2double.c for "-1.0" and some editorials in tif_dirwrite.c. (code is related to 6df997c786928757caea0dd68d26ea5f098f49df changes). + +2020-02-29 Even Rouault + + Merge branch 'issue-174' into 'master' + tif_fax3.c: check buffer overflow in Fax4Decode() + + Closes #174 + + See merge request libtiff/libtiff!108 + +2020-02-29 Thomas Bernard + + Fax4Decode(): log error message in case of buffer overrun. + + tif_fax3.c: check buffer overflow in Fax4Decode() + fixes #174 + +2020-02-28 Even Rouault + + typo fixes in code comments. + + ToRationalEuclideanGCD: remove useless test that confuses Coverity Scan about a potential later modulo by zero + +2020-02-27 Even Rouault + + tif_dirwrite.c: fix other warnings related to 6df997c786928757caea0dd68d26ea5f098f49df changes + + rational_precision2double.c: fix many warnings, and do not build it on CMake on shared lib builds + + tif_dirwrite.c: fix various warnings found when building GDAL with internal libtiff after 6df997c786928757caea0dd68d26ea5f098f49df changes + + tif_dirwrite.c: qualify ToRationalEuclideanGCD() with static. + +2020-02-27 Even Rouault + + Merge branch 'Rational2DoublePrecision' into 'master' + Rational with Double Precision Upgrade + + See merge request libtiff/libtiff!100 + +2020-02-27 Su_Laus + + Rational with Double Precision Upgrade. + Unfortunately, custom rational tags (TIFF_RATIONAL with field_bit=FIELD_CUSTOM) are defined as TIFF_SETGET_DOUBLE + but for the reading interface and LibTiff internally they are stored ALLWAYS as floating point SINGLE precision. + Double precision custom rational tags are not supported by LibTiff. + + For the GPS tags in WGS84 a higher accuracy / precision is needed. + Therefore, this upgrade is made, keeping the old interface for the already defined tags and allowing a double precision definition, + as well as calculating rationals with higher accuracy / precision. + This higher accuracy can be used for newly defined tags like that in EXIF/GPS. + + Refer also to the very old Bugzilla issue 2542 (#69) + + A test file rational_precision2double.c is added, which shows prevention of the old interface to the already defined custom rational tags + with the standard library as well as with the upgraded library. + + Also TIFFTAG_XRESOLUTION, TIFFTAG_YRESOLUTION, TIFFTAG_XPOSITION, TIFFTAG_YPOSITION amended from TIFF_SETGET_DOUBLE to TIFF_SETGET_FLOAT and testcase inserted in rational_precision2double.c + +2020-02-26 Chris Degawa + + mingw-w64 cmake: Don't find libm. + mingw-w64 will provide libm symbols by default without -lm and mingw-64's + libm is just a stub. + + This is just to make sure that on systems with msys2 and also cygwin, cmake + doesn't find a libm that actually contains math functions. + +2020-02-26 Even Rouault + + Merge branch 'division-by-zero' into 'master' + tools/tiffcp.c: fix potential division by zero + + See merge request libtiff/libtiff!83 + +2020-02-26 Even Rouault + + Merge branch 'fix-unused-warning' into 'master' + warnings: mark conditionally used parameters + + See merge request libtiff/libtiff!49 + +2020-02-26 Even Rouault + + Merge branch 'master' into 'master' + fix issue #78 warnings regarding RichTIFFIPTC data type + + Closes #78 + + See merge request libtiff/libtiff!99 + +2020-02-26 Even Rouault + + Merge branch 'win64-handle-casts-warn-fix' into 'master' + Avoid warnings about casts between HANDLE and int in Win64 builds + + Closes #2 + + See merge request libtiff/libtiff!93 + +2020-02-26 Even Rouault + + Merge branch 'bug2839' into 'master' + raw2tiff: avoid divide by 0 + + Closes #151 + + See merge request libtiff/libtiff!103 + +2020-02-26 Even Rouault + + Merge branch 'bug2669' into 'master' + tiff2pdf: palette bound check in t2p_sample_realize_palette() + + Closes #82 + + See merge request libtiff/libtiff!104 + +2020-02-26 Even Rouault + + Merge branch 'int-shift' into 'master' + tiffcrop: fix asan runtime error caused by integer promotion + + See merge request libtiff/libtiff!105 + +2020-02-26 Even Rouault + + Merge branch 'bug-2538' into 'master' + libtiff.html: fix function casing + + Closes #68 + + See merge request libtiff/libtiff!107 + +2020-02-16 Thomas Bernard + + raw2tiff: avoid divide by 0. + fixes #151 / http://bugzilla.maptools.org/show_bug.cgi?id=2839 + + first memcmp() lines before computing corellation + and always avoid divide by 0 anyway + +2020-02-09 Even Rouault + + Merge branch 'bug2855' into 'master' + tiff2ps: fix heap buffer read overflow in PSDataColorContig() + + Closes #161 + + See merge request libtiff/libtiff!102 + +2020-02-08 Thomas Bernard + + libtiff.html: fix function casing. + + libtiff.html: fix function casing. + fixes #68 / http://bugzilla.maptools.org/show_bug.cgi?id=2538 + +2020-02-08 Thomas Bernard + + tiffcrop: fix asan runtime error caused by integer promotion. + tiffcrop.c:4027:20: runtime error: left shift of 190 by 24 places cannot be represented in type 'int' + + C treats (byte << 24) as an int expression. + casting explicitely to unsigned type uint32 avoids the problem. + + the same issue has been fixed elsewhere with a24213691616e7cd35aa3e2805493de80c7e4fcf + + I detected the bug with the test file of #86 + +2020-02-08 Thomas Bernard + + tiff2pdf: palette bound check in t2p_sample_realize_palette() + fixes #82 + +2020-02-08 Thomas Bernard + + tiff2ps: fix heap buffer read overflow in PSDataColorContig() + fixes #161 / http://bugzilla.maptools.org/show_bug.cgi?id=2855 + + in 05029fb7f1ecf771abaf90b5705b6cab9eb522a7 I missed that 1 extra byte is read + in this loop. + +2020-02-05 Even Rouault + + tif_dirread.c: suppress CLang static Analyzer 9.0 false positive. + +2020-02-01 Even Rouault + + TIFFSetupStrips: enforce 2GB limitation of Strip/Tile Offsets/ByteCounts arrays + TIFFWriteDirectoryTagData() has an assertion that checks that the + arrays are not larger than 2GB. So error out earlier if in that situation. + +2020-01-29 Bob Friesenhahn + + Simplify nmake configuration for building port directory. Now there is only one boolean setting to enable building strtoll() and strtoull() port functions. The boolean setting enables the necessary port files to be built, but the remainder of the logic is via pre-processor code in the common tif_config.h, which was prepared before entering the port directory to do a build. + +2020-01-28 Bob Friesenhahn + + Make sure that tif_config.h is produced prior to entering the port directory and add an include path so that the port files can include tif_config.h. Do not actually include tif_config.h at this time since CMake and Autotools builds are not prepared for that. This issue could be handled by updating the CMake and Autotools builds or by adding a define which directs libport.h to include tif_config.h. + +2020-01-26 Bob Friesenhahn + + Fix nmake build mistakes in my last commit: + tif_config.vc.h: + + Always define HAVE_STRTOL/HAVE_STRTOUL. + Define HAVE_STRTOLL/HAVE_STRTOULL if _MSC_VER >= 1900. + + nmake.opt: + + Provide defaults suitable for MSVC prior to 14.0. + + libport.h: + + The sense of the pre-processor logic was inverted from what it + should be. The intention is to only provide the prototype if the + function is missing. + +2020-01-25 Bob Friesenhahn + + Add nmake build support for manually configuring the 'port' files to be built based on MSVC features. Include tif_config.h in tools/tiffset.c. + +2020-01-23 Even Rouault + + Adjust previous fix to avoid undue warning in some situations triggered by GDAL + +2020-01-12 Even Rouault + + _TIFFPartialReadStripArray: bring back support for non-conformant SLONG8 data type + Such as in https://github.com/OSGeo/gdal/issues/2165 + +2020-01-07 Even Rouault + + test: add test for single-strip OJPEG file without RowsPerStrip tag (like in CR2 files) + + OJPEGReadHeaderInfo: if rowsperstrip not defined, then assume one-single-strip. Complementary fix to 0356ea76bac908c61160d735f078437ace953bd3 + +2019-12-16 Angel Sánchez + + fix issue #78 warnings regarding RichTIFFIPTC data type. + +2019-12-14 Even Rouault + + contrib/oss-fuzz/build.sh: fix broken if construct. + +2019-11-28 Even Rouault + + contrib/oss-fuzz/build.sh: other attempt at fixing build failure. + +2019-11-20 Even Rouault + + contrib/oss-fuzz/build.sh: install liblzma-dev for x86_64 builds. + +2019-11-17 Even Rouault + + contrib/oss-fuzz/build.sh: install liblzma-dev:i386 on i386 builds. + +2019-11-15 Even Rouault + + Merge branch 'cmake-parse' into 'master' + CMake: simplify parsing variables from configure + + See merge request libtiff/libtiff!98 + +2019-11-15 Rolf Eike Beer + + CMake: simplify parsing variables from configure. + +2019-11-14 Even Rouault + + contrib/oss-fuzz/build.sh: fix ossfuzz build by statically linking to lzma + +2019-11-12 Even Rouault + + Merge branch 'fix_ojpeg_172' into 'master' + OJPEG: fix broken sanity check added in 4.1.0 (#fixes 172) + + See merge request libtiff/libtiff!97 + +2019-11-11 Even Rouault + + OJPEG: fix broken sanity check added in 4.1.0, and add two OJPEG test files + + test/: add missing generated .sh files. + +2019-11-04 Even Rouault + + Merge branch 'fix-missing-checks-TIFFGetField-tiffcrop' into 'master' + adds missing checks on TIFFGetField in tiffcrop tool + + Closes #170 + + See merge request libtiff/libtiff!96 + +2019-11-04 Bug Checkers + + adds missing checks on TIFFGetField in tiffcrop tool (fixes #170) + +2019-11-04 Even Rouault + + Merge branch 'adds-missing-TIFFClose-rgb2ycbcr' into 'master' + adds a missing TIFFClose in rgb2ycbcr tool + + See merge request libtiff/libtiff!95 + +2019-11-04 Mansour Ahmadi + + adds a missing TIFFClose in rgb2ycbcr tool. + +2019-11-03 Bob Friesenhahn + + libtiff 4.1.0 released. + + Added a step for updating the legacy ChangeLog file. + + Ignore emacs temporary files (ending with tilde character). + + Added release summary page for the 4.1.0 release. + + Fix Cmake HAVE_GETOPT for systems which declare getopt in stdio.h. Fix utility baked-in getopt prototype which appears when HAVE_GETOPT is not defined. + + Fax2tiff.sh needs to remove its output file in advance. Syntax changes so that bash is not required. + +2019-10-26 Even Rouault + + tif_jpeg.c: extra cast to silence Coverity warning. GDAL CID 1406475. + +2019-10-23 Even Rouault + + tif_jpeg.c: fix warning added by previous commit (on 32bit builds) + +2019-10-23 Even Rouault + + Merge branch 'coverity-fixes' into 'master' + Coverity fixes + + See merge request libtiff/libtiff!94 + +2019-10-22 Timothy Lyanguzov + + Use 64-bit calculations correctly. + + Fix size calculation to use 64-bit tmsize_t correctly. + + Make bytesperclumpline calculations using tmsize_t type. + +2019-10-03 Even Rouault + + tif_read: align code of TIFFReadRawStrip() and TIFFReadRawTile() that differed for non good reason. Non-functional change normally. (fixes GitLab #162) + +2019-10-01 Even Rouault + + HTML: update for GitLab issues. + +2019-09-29 Even Rouault + + html/v3.5.6-beta.html: redact URL of defunct web site. + + Website: update links to mailing list. + +2019-09-17 Even Rouault + + TIFFReadAndRealloc(): avoid too large memory allocation attempts. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=17244 + +2019-09-03 Even Rouault + + ByteCountLooksBad and EstimateStripByteCounts: avoid unsigned integer overflows. Fixes https://oss-fuzz.com/testcase-detail/5686156066291712 and https://oss-fuzz.com/testcase-detail/6332499206078464 + +2019-09-02 Even Rouault + + tif_ojpeg.c: avoid relying on isTiled macro being wrapped in () + + tif_ojpeg.c: avoid use of uninitialized memory on edge/broken file. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16844 + + tiff_read_rgba_fuzzer.cc: add a -DSTANDALONE mode for easier reproduction of oss-fuzz reports + +2019-09-01 Even Rouault + + tif_dirread.c: allocChoppedUpStripArrays(). avoid unsigned integer overflow. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16846 + +2019-08-27 Even Rouault + + tif_ojpeg.c: avoid unsigned integer overflow. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16793 + +2019-08-26 Even Rouault + + TIFFReadDirEntryData(): rewrite to avoid unsigned integer overflow (not a bug). Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16792 + + TIFFFetchDirectory(): fix invalid cast from uint64 to tmsize_t. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16784 + +2019-08-25 Even Rouault + + JPEG: avoid use of unintialized memory on corrupted files. + Follow-up of cf3ce6fab894414a336546f62adc57f02590a22c + Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16602 + Credit to OSS Fuzz + +2019-08-23 Even Rouault + + _TIFFPartialReadStripArray(): avoid unsigned integer overflow. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16685 + + OJPEGWriteHeaderInfo(): avoid unsigned integer overflow on strile dimensions close to UINT32_MAX. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16683 + + TIFFFillStrip(): avoid harmless unsigned integer overflow. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16653 + + EstimateStripByteCounts(): avoid unsigned integer overflow. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16643& + + tif_ojpeg: avoid unsigned integer overflow (probably not a bug). Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16635 + + tif_thunder: avoid unsigned integer overflow (not a bug). Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16632 + +2019-08-22 Even Rouault + + _TIFFMultiply32() / _TIFFMultiply64(): avoid relying on unsigned integer overflow (not a bug) + + EstimateStripByteCounts(): avoid unsigned integer overflow. + +2019-08-21 Even Rouault + + EstimateStripByteCounts(): avoid unsigned integer overflow. + +2019-08-20 Even Rouault + + EstimateStripByteCounts(): avoid harmless unsigned integer overflow. + + _TIFFPartialReadStripArray(): avoid triggering unsigned integer overflow with -fsanitize=unsigned-integer-overflow (not a bug, this is well defined by itself) + +2019-08-18 Even Rouault + + tiff2ps: fix use of wrong data type that caused issues (/Height being written as 0) on 64-bit big endian platforms + +2019-08-16 Even Rouault + + setByteArray(): fix previous commit. + + setByteArray(): avoid potential signed integer overflow. Pointed by Hendra Gunadi. No actual problem known (which does not mean there wouldn't be any. Particularly on 32bit builds) + +2019-08-15 Even Rouault + + RGBA interface: fix integer overflow potentially causing write heap buffer overflow, especially on 32 bit builds. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16443. Credit to OSS Fuzz + +2019-08-14 Even Rouault + + Merge branch 'fix_integer_overflow' into 'master' + Fix integer overflow in _TIFFCheckMalloc() and other implementation-defined behaviour (CVE-2019-14973) + + See merge request libtiff/libtiff!90 + +2019-08-13 Even Rouault + + Fix integer overflow in _TIFFCheckMalloc() and other implementation-defined behaviour (CVE-2019-14973) + _TIFFCheckMalloc()/_TIFFCheckRealloc() used a unsafe way to detect overflow + in the multiplication of nmemb and elem_size (which are of type tmsize_t, thus + signed), which was especially easily triggered on 32-bit builds (with recent + enough compilers that assume that signed multiplication cannot overflow, since + this is undefined behaviour by the C standard). The original issue which lead to + this fix was trigged from tif_fax3.c + + There were also unsafe (implementation defied), and broken in practice on 64bit + builds, ways of checking that a uint64 fits of a (signed) tmsize_t by doing + (uint64)(tmsize_t)uint64_var != uint64_var comparisons. Those have no known + at that time exploits, but are better to fix in a more bullet-proof way. + Or similarly use of (int64)uint64_var <= 0. + +2019-08-12 Even Rouault + + TIFFClientOpen(): fix memory leak if one of the required callbacks is not provided. Fixed Coverity GDAL CID 1404110 + + OJPEGReadBufferFill(): avoid very long processing time on corrupted files. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16400. master only + +2019-08-10 Even Rouault + + oss-fuzz/tiff_read_rgba_fuzzer.cc: fix wrong env variable value in previous commit + + oss-fuzz/tiff_read_rgba_fuzzer.cc: avoid issue with libjpeg-turbo and MSAN + + OJPEG: fix integer division by zero on corrupted subsampling factors. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=15824. Credit to OSS Fuzz + + Merge branch 'ossfuzz_i386' + + contrib/oss-fuzz/build.sh: fix for i386 build of jbigkit, and use $LIB_FUZZING_ENGINE + +2019-08-10 Even Rouault + + Merge branch 'patch-1' into 'master' + fix two tiny typos + + See merge request libtiff/libtiff!89 + +2019-08-10 Reto Kromer + + fix two tiny typos. + +2019-08-09 Even Rouault + + Merge branch 'patch-1' into 'master' + fix a typo in man page + + See merge request libtiff/libtiff!88 + +2019-08-09 Reto Kromer + + fix typo. + +2019-08-04 Even Rouault + + Merge branch 'TIFFTAGID_Zero_reading_IGNORE' into 'master' + Suppressed Reading of Tiff tags with ID = 0 (like GPSVERSIONID) corrected. + + See merge request libtiff/libtiff!77 + +2019-08-04 Su Laus + + Reading of Tiff tags with ID = 0 (like GPSVERSIONID) corrected. + IGNORE placeholder in tif_dirread.c is now replaced by a field dir_ignore in the TIFFDirEntry structure + + Currently, in tif_dirread.c a special IGNORE value for the tif tags is defined + in order to flag status preventing already processed tags from further processing. + This irrational behaviour prevents reading of custom tags with id code 0 - like tag GPSVERSIONID from EXIF 2.31 definition. + + An additional field 'tdir_ignore' is now added to the TIFFDirEntry structure and code is changed + to allow tags with id code 0 to be read correctly. + + This change was already proposed as pending improvement in tif_dirread.c around line 32. + + Reference is also made to: + - Discussion in https://gitlab.com/libtiff/libtiff/merge_requests/39 + - http://bugzilla.maptools.org/show_bug.cgi?id=2540 + + Comments and indention adapted. + + Preparation to rebase onto master + +2019-07-16 Even Rouault + + Merge branch 'cmake_amd64' into 'master' + CMakeLists.txt: properly set value of HOST_FILLORDER to LSB2MSB for Windows CMake builds + + See merge request libtiff/libtiff!87 + +2019-07-15 Even Rouault + + CMakeLists.txt: properly set value of HOST_FILLORDER to LSB2MSB for Windows CMake builds + As can be seen in https://ci.appveyor.com/project/rleigh-codelibre/libtiff-didfs/builds/25846668/job/ory5w098j8wcij9x + log, the HOST_FILLORDER is not properly set: + + [00:02:58] -- CMAKE_HOST_SYSTEM_PROCESSOR set to AMD64 + [00:02:58] -- HOST_FILLORDER set to FILLORDER_MSB2LSB + + Ther reason is that we match the "amd64.*" lowercase string whereas + CMAKE_HOST_SYSTEM_PROCESSOR is set to AMD64 uppercase. + +2019-07-09 Even Rouault + + TIFFWriteCheck(): call TIFFForceStrileArrayWriting() when needed (should have gone with eaeca6274ae71cdfaeb9f673b6fb0f3cfc0e6ce5) (master only) + +2019-07-09 Even Rouault + + Merge branch 'fix_chromium_925269' into 'master' + OJPEG: avoid use of unintialized memory on corrupted files + + See merge request libtiff/libtiff!86 + +2019-07-05 Even Rouault + + OJPEG: avoid use of unintialized memory on corrupted files. + Fixes https://bugs.chromium.org/p/chromium/issues/detail?id=925269 + Patch from Lei Zhang with little adaptations. + +2019-06-29 Even Rouault + + Merge branch 'fix-division-by-zero' into 'master' + Return infinite distance when denominator is zero. + + See merge request libtiff/libtiff!85 + +2019-06-29 Dirk Lemstra + + Return infinite distance when denominator is zero. + +2019-06-29 Even Rouault + + Merge branch 'typetests' into 'master' + Add test to check that libtiff types have the correct size + + See merge request libtiff/libtiff!57 + +2019-05-31 Thomas Bernard + + make TIFF_SSIZE_T the same bitwidth as TIFF_SIZE_T. + it was previously the same bitwidth as unsigned char * + Pointers can be larger than size_t. + +2019-05-31 Thomas Bernard + + Add test to check that libtiff types have the correct size. + in configure/CMakeList.txt : + + - TIFF_INT8_T/TIFF_UINT8_T is signed/unsigned char + sizeof(char)==1 in C standard + - TIFF_INT16_T/TIFF_UINT16_T is signed/unsigned short + sizeof(short)>=2 in C standard + - TIFF_INT32_T/TIFF_UINT32_T is defined so its sizeof() is 4 + + - TIFF_INT64_T/TIFF_UINT64_T is defined so its sizeof() is 8 + + - TIFF_SIZE_T is defined so it has same sizeof() than size_t + + - TIFF_SSIZE_T is defined so it has same sizeof() than unsigned char * + +2019-05-29 Even Rouault + + Merge branch 'defer_strile_writing' into 'master' + Add TIFFDeferStrileArrayWriting() and TIFFForceStrileArrayWriting() + + See merge request libtiff/libtiff!82 + +2019-05-29 Even Rouault + + Merge branch 'TIFFReadFromUserBuffer' into 'master' + Add TIFFReadFromUserBuffer() + + See merge request libtiff/libtiff!81 + +2019-05-26 Even Rouault + + Fix vulnerability in 'D' (DeferStrileLoad) mode (master only) (fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=14908) + +2019-05-25 Even Rouault + + Replace 'stripped' by 'striped' in error messages. + +2019-05-25 Even Rouault + + Add TIFFDeferStrileArrayWriting() and TIFFForceStrileArrayWriting() + Those advanced writing functions must be used in a particular sequence + to make their intended effect. Their aim is to control when/where + the [Strip/Tile][Offsets/ByteCounts] arrays are written into the file. + + The purpose of this is to generate 'cloud-optimized geotiff' files where + the first KB of the file only contain the IFD entries without the potentially + large strile arrays. Those are written afterwards. + + The typical sequence of calls is: + TIFFOpen() + [ TIFFCreateDirectory(tif) ] + Set fields with calls to TIFFSetField(tif, ...) + TIFFDeferStrileArrayWriting(tif) + TIFFWriteCheck(tif, ...) + TIFFWriteDirectory(tif) + ... potentially create other directories and come back to the above directory + TIFFForceStrileArrayWriting(tif): emit the arrays at the end of file + + See test/defer_strile_writing.c for a practical example. + +2019-05-24 Even Rouault + + Fix vulnerability introduced by defer strile loading (master only) + Found on GDAL with https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=14894 + Disabling the TIFF_DEFERSTRILELOAD bit in ChopupStripArray() was a + bad idea since when using TIFFReadDirectory() to reload the directory again + would lead to a different value of td_rowsperstrip, which could confuse + readers if they relied on the value found initially. + + Fix typo in error message (master only) + +2019-05-22 Even Rouault + + Add TIFFReadFromUserBuffer() + This function replaces the use of TIFFReadEncodedStrip()/TIFFReadEncodedTile() + when the user can provide the buffer for the input data, for example when + he wants to avoid libtiff to read the strile offset/count values from the + [Strip|Tile][Offsets/ByteCounts] array. + + libtiff.def: add missing new symbols. + + test/defer_strile_loading.c: fix warning with Visual C++ + + _TIFFRewriteField(): fix for bigtiff case (master only) + 116cf67f4c59196605abdb244657c3070c4310af made StripByteCount/TileByteCount to + always be rewritten as TIFF_LONG8. + +2019-05-21 Even Rouault + + Merge branch 'ondemand_strile_offbytecount_loading' into 'master' + Make defer strile offset/bytecount loading available at runtime + + See merge request libtiff/libtiff!79 + +2019-05-21 Even Rouault + + Merge branch 'bigtiff_write_bytecount_on_long_when_possible' into 'master' + Create TileByteCounts/StripByteCounts tag with SHORT (ClassicTIFF/BigTIFF) or LONG (BigTIFF) type when possible + + See merge request libtiff/libtiff!78 + +2019-05-21 Even Rouault + + Merge branch 'html_link' into 'master' + libtiff.html, bigtiffpr.html: absolute => relative link + + See merge request libtiff/libtiff!80 + +2019-05-14 Thomas Bernard + + libtiff.html, bigtiffpr.html: absolute => relative link. + +2019-05-10 Even Rouault + + Make defer strile offset/bytecount loading available at runtime. + ... and add per-strile offset/bytecount loading capabilities. + + Part of this commit makes the behaviour that was previously met when + libtiff was compiled with -DDEFER_STRILE_LOAD available for default builds + when specifying the new 'D' (Deferred) TIFFOpen() flag. In that mode, the [Tile/Strip][ByteCounts/Offsets] + arrays are only loaded when first accessed. This can speed-up the opening + of files stored on the network when just metadata retrieval is needed. + This mode has been used for years by the GDAL library when compiled with + its embeded libtiff copy. + + To avoid potential out-of-tree code (typically codecs) that would use + the td_stripbytecount and td_stripoffset array inconditionnaly assuming they + have been loaded, those have been suffixed with _p (for protected). The + use of the new functions mentionned below is then recommended. + + Another addition of this commit is the capability of loading only the + values of the offset/bytecount of the strile of interest instead of the + whole array. This is enabled with the new 'O' (Ondemand) flag of TIFFOpen() + (which implies 'D'). That behaviour has also been used by GDAL, which hacked + into the td_stripoffset/td_stripbytecount arrays directly. The new code + added in the _TIFFFetchStrileValue() and _TIFFPartialReadStripArray() internal + functions is mostly a port of what was in GDAL GTiff driver previously. + + Related to that, the public TIFFGetStrileOffset[WithErr]() and TIFFGetStrileByteCount[WithErr]() + functions have been added to API. They are of particular interest when + using sparse files (with offset == bytecount == 0) and you want to detect + if a strile is present or not without decompressing the data, or updating + an existing sparse file. + They will also be used to enable a future enhancement where client code can entirely + skip bytecount loading in some situtations + + A new test/defer_strile_loading.c test has been added to test the above + capabilities. + +2019-05-10 Even Rouault + + Creation: use SHORT type when possible for StripByteCounts/TileByteCounts + This follows the same logic as previous commit. + +2019-05-09 Even Rouault + + BigTIFF creation: write TileByteCounts/StripByteCounts tag with LONG when possible + In most situations of BigTIFF file, the tile/strip sizes are of reasonable size, + that is they fit on a 4-byte LONG. So in that case, use LONG instead of LONG8 + to save some space. For uncompressed file, it is easy to detect such situations + by checking at the TIFFTileSize64()/TIFFStripSize64() return. For compressed file, + we must take into account the fact that compression may sometimes result in + larger compressed data. So we allow this optimization only for a few select + compression times, and take a huge security margin (10x factor). We also only + apply this optimization on multi-strip files, so as to allow easy on-the-fly + growing of single-strip files whose strip size could grow above the 4GB threshold. + + This change is compatible with the BigTIFF specification. According to + https://www.awaresystems.be/imaging/tiff/bigtiff.html: + "The StripOffsets, StripByteCounts, TileOffsets, and TileByteCounts tags are + allowed to have the datatype TIFF_LONG8 in BigTIFF. Old datatypes TIFF_LONG, + and TIFF_SHORT where allowed in the TIFF 6.0 specification, are still valid in BigTIFF, too. " + On a practical point of view, this is also compatible on reading/writing of + older libtiff 4.X versions. + + The only glitch I found, which is rather minor, is when using such a BigTIFF + file with TileByteCounts/StripByteCounts written with TIFF_LONG, and updating + it with an older libtiff 4.X version with a change in the + [Tile/Strip][ByteCounts/Offsets] array. In that case the _TIFFRewriteField() + function will rewrite the directory and array with TIFF_LONG8, instead of updating + the existing array (this is an issue fixed by this commit). The file will + still be valid however, hence the minor severity of this. + +2019-05-08 Even Rouault + + Merge branch 'bug2799' into 'master' + fix fax2tiff + + See merge request libtiff/libtiff!55 + +2019-05-08 Even Rouault + + Merge branch 'bug_2829' into 'master' + WIN32: use tif_win32.c when building with CMake + + See merge request libtiff/libtiff!75 + +2019-05-06 Even Rouault + + Merge branch 'FILESOURCE_SCENETYPE_reading' into 'master' + Reading error for FileSource and SceneType tags corrected. + + See merge request libtiff/libtiff!76 + +2019-05-06 Su Laus + + Reading error for FileSource and SceneType tags corrected. + EXIF tags FILESOURCE and SCENETYPE are defined as TIFF_UNDEFINED and field_readcount==1! + There is a bug in TIFFReadDirEntryByte() preventing to read correctly type TIFF_UNDEFINED fields with field_readcount==1 + Upgrade of TIFFReadDirEntryByte() with added TIFF_UNDEFINED switch-entry allows libtiff to read those tags correctly. + +2019-04-25 Thomas Bernard + + WIN32: use tif_win32.c when building with CMake. + see http://bugzilla.maptools.org/show_bug.cgi?id=2829 + + the top CMakeLists.txt defines + win32_io and USE_WIN32_FILEIO + + WIN32_IO is defined nowhere in CMake (only in automake things) + +2019-04-25 Even Rouault + + Merge branch 'gitlab_pages' into 'master' + Advertise https://libtiff.gitlab.io/libtiff/ as mirror + + See merge request libtiff/libtiff!70 + +2019-04-25 Even Rouault + + Merge branch 'bug_2844' into 'master' + tiff2ps.c: PSDataColorContig(): avoid heap buffer overrun + + See merge request libtiff/libtiff!69 + +2019-04-25 Even Rouault + + Merge branch 'issue_2785' into 'master' + tiff2pdf.c: don't call t2p_tile_collapse_left() for Ycbcr + + See merge request libtiff/libtiff!64 + +2019-04-11 Even Rouault + + Merge branch 'fix_gdal_1439' into 'master' + TIFFWriteEncodedStrip/TIFFWriteEncodedTile: fix rewriting of LZW-compressed data + + See merge request libtiff/libtiff!74 + +2019-04-11 Even Rouault + + TIFFWriteEncodedStrip/TIFFWriteEncodedTile: fix rewriting of LZW-compressed data + Fixes https://github.com/OSGeo/gdal/issues/1439 + + When rewriting a LZW tile/strip whose existing size is very close to a multiple of + 1024 bytes (and larger than 8192 bytes) with compressed data that is larger, + the new data was not placed at the end of the file, causing corruption. + +2019-04-08 Even Rouault + + Merge branch 'bug2848' into 'master' + tif_luv.c: LogLuvSetupEncode() error must return 0 + + See merge request libtiff/libtiff!72 + +2019-04-03 Thomas Bernard + + build/gitlab-ci: fix typo. + + show test-suite.log in gitlab-ci. + useful when build fails + + Add output check for tiff2ps. + note : the reference files have been generated in master branch + +2019-03-23 Even Rouault + + tif_read.c: potentially fix false positive from Coverity Scan. CID 1400288 + + tif_read.c: potentially fix false positive from Coverity Scan. CID 1400271 + + tif_zip.c: remove dead code. CID 1400360. + + tif_webp.c: remove false positive warning about dereference before null check. CID 1400255 + + tif_pixarlog.c: remove dead code. CID 1400342. + + tif_pixarlog.c: avoid false positive Coverity Scan warnings about overflow. CID 1400300 and 1400367 + + tif_lzw.c: silence CoverityScan false positive. CID 1400355. + + tif_luv.c: silence CoverityScan false positive. CID 1400231, 1400251, 1400254, 1400272, 1400318, 1400356 + + TryChopUpUncompressedBigTiff(): avoid potential division by zero. master only. GDAL Coverity CID 1400263 + +2019-03-22 Thomas Bernard + + tif_luv.c: LogLuvSetupEncode() error must return 0. + see http://bugzilla.maptools.org/show_bug.cgi?id=2848 + + if wrongly returning 1, the processing of incorrect file continues, + which causes problems. + +2019-03-22 Thomas Bernard + + add a test for fax2tiff tool. + +2019-02-28 Thomas Bernard + + tiff2pdf.c: don't call t2p_tile_collapse_left() when buffer size is wrong + see http://bugzilla.maptools.org/show_bug.cgi?id=2785 + + Advertise https://libtiff.gitlab.io/libtiff/ as mirror. + I'm put it above the maptools.org mirror because + Even Rouault believe at some point it will be completely removed + +2019-02-28 Even Rouault + + Merge branch 'bug_2826' into 'master' + tiff2pdf.c: check colormap pointers when loading CMYK with colormap + + See merge request libtiff/libtiff!65 + +2019-02-28 Thomas Bernard + + tiff2pdf.c: check colormap pointers. + Avoid access to non initialized pointers + http://bugzilla.maptools.org/show_bug.cgi?id=2826 + +2019-02-27 Even Rouault + + Merge branch 'fix_warnings' into 'master' + tiff2ps.c: fix warning caused by integer promotion + + See merge request libtiff/libtiff!68 + +2019-02-23 Thomas Bernard + + PSDataColorContig(): avoid heap buffer overrun. + fixes http://bugzilla.maptools.org/show_bug.cgi?id=2844 + each iteration of the loop read nc bytes + +2019-02-22 Thomas Bernard + + tiff2ps.c: fix warning caused by integer promotion. + uint8 value is promoted to int in (value << 24) so -fsanitize + yield runtime errors : + tiff2ps.c:2969:33: runtime error: left shift of 246 by 24 places cannot be represented in type 'int' + +2019-02-22 Even Rouault + + Merge branch 'large_strile_improvements' into 'master' + Large strile support improvements + + See merge request libtiff/libtiff!63 + +2019-02-21 Even Rouault + + Merge branch 'gitlab-pages' into 'master' + ci: Add pages job + + See merge request libtiff/libtiff!45 + +2019-02-19 Even Rouault + + Merge branch 'issue_2833' into 'master' + tiffcp.c: check that (Tile Width)*(Samples/Pixel) do no overflow + + See merge request libtiff/libtiff!60 + +2019-02-19 Even Rouault + + Merge branch 'issue_2831' into 'master' + tiffcrop.c: fix invertImage() for bps 2 and 4 + + See merge request libtiff/libtiff!61 + +2019-02-19 Even Rouault + + Merge branch 'issue_2842' into 'master' + move _TIFFClampDoubleToFloat() to tif_aux.c + + See merge request libtiff/libtiff!62 + +2019-02-19 Even Rouault + + tif_zip.c: allow reading and writing strips/tiles with more than 4 GB of compressed or uncompressed data + + tif_dirread.c: when strip chopping is enabled, extend this mechanism to multi-strip uncompressed files with strips larger than 2GB to expose them as strips of ~500 MB + +2019-02-19 Even Rouault + + Merge branch 'size_t_typo' into 'master' + CMakeLists.txt: fix TIFF_SIZE_T + + See merge request libtiff/libtiff!59 + +2019-02-12 Thomas Bernard + + move _TIFFClampDoubleToFloat() to tif_aux.c. + the same function was declared in tif_dir.c and tif_dirwrite.c + + see http://bugzilla.maptools.org/show_bug.cgi?id=2842 + +2019-02-11 Thomas Bernard + + tiffcrop.c: fix invertImage() for bps 2 and 4. + too much bytes were processed, causing a heap buffer overrun + http://bugzilla.maptools.org/show_bug.cgi?id=2831 + the loop counter must be + for (col = 0; col < width; col += 8 / bps) + + Also the values were not properly calculated. It should be + 255-x, 15-x, 3-x for bps 8, 4, 2. + + But anyway it is easyer to invert all bits as 255-x = ~x, etc. + (substracting from a binary number composed of all 1 is like inverting + the bits) + +2019-02-11 Thomas Bernard + + tiffcp.c: use INT_MAX. + + check that (Tile Width)*(Samples/Pixel) do no overflow. + fixes bug 2833 + +2019-02-03 Thomas Bernard + + CMakeLists.txt: fix TIFF_SIZE_T. + +2019-02-02 Even Rouault + + Merge branch 'master' into 'master' + Fix for simple memory leak that was assigned CVE-2019-6128. + + See merge request libtiff/libtiff!50 + +2019-02-02 Even Rouault + + Merge branch 'bug2835' into 'master' + tiff2ps: fix heap-buffer-overflow + + See merge request libtiff/libtiff!53 + +2019-02-02 Even Rouault + + Fix warning (use of uninitialized value) added per d0a842c5dbad2609aed43c701a12ed12461d3405 (fixes https://gitlab.com/libtiff/libtiff/merge_requests/54#note_137742985) + +2019-02-02 Yuri Aksenov + + fix fax2tiff. + see http://bugzilla.maptools.org/show_bug.cgi?id=2799 + fixes d9bc8472e72549f29c0062c1cbd3d56f279f3be2 + +2019-02-02 Even Rouault + + Merge branch 'tiffcrop' into 'master' + tiffcrop: shut up clang warnings + + See merge request libtiff/libtiff!52 + +2019-02-01 Even Rouault + + Merge branch 'bug2833' into 'master' + TIFFWriteDirectoryTagTransferfunction() : fix NULL dereferencing + + See merge request libtiff/libtiff!54 + +2019-02-01 Even Rouault + + Merge branch 'gitignore' into 'master' + add test/ files to .gitignore + + See merge request libtiff/libtiff!56 + +2019-02-01 Even Rouault + + Merge branch 'master' into 'master' + tif_dir: unset transferfunction field if necessary (CVE-2018-19210) + + See merge request libtiff/libtiff!47 + +2019-01-29 Thomas Bernard + + add test/ files to .gitignore. + +2019-01-29 Thomas Bernard + + TIFFWriteDirectoryTagTransferfunction() : fix NULL dereferencing. + http://bugzilla.maptools.org/show_bug.cgi?id=2833 + + we must check the pointer is not NULL before memcmp() the memory + +2019-01-29 Thomas Bernard + + tiff2ps: fix heap-buffer-overflow. + http://bugzilla.maptools.org/show_bug.cgi?id=2834 + + usually the test (i < byte_count) is OK because the byte_count is divisible by samplesperpixel. + But if that is not the case, (i + ncomps) < byte_count should be used, or + maybe (i + samplesperpixel) <= byte_count + +2019-01-28 Thomas Bernard + + tiffcrop: shut up clang warnings. + make the out filename building a bit more simple + and remove the use of strcat() + +2019-01-23 Scott Gayou + + Fix for simple memory leak that was assigned CVE-2019-6128. + pal2rgb failed to free memory on a few errors. This was reported + here: http://bugzilla.maptools.org/show_bug.cgi?id=2836. + +2019-01-05 Bob Friesenhahn + + Fix tiff2ps error regarding "Inconsistent value of es" by allowing es to be zero. Problem was reported to the tiff mailing list by Julian H. Stacey on January 5, 2019. + +2018-12-13 Hugo Lefeuvre + + tif_dir: unset transferfunction field if necessary. + The number of entries in the transfer table is determined as following: + + (td->td_samplesperpixel - td->td_extrasamples) > 1 ? 3 : 1 + + This means that whenever td->td_samplesperpixel or td->td_extrasamples are + modified we also need to make sure that the number of required entries in + the transfer table didn't change. + + If it changed and the number of entries is higher than before we should + invalidate the transfer table field and free previously allocated values. + In the other case there's nothing to do, additional tf entries won't harm + and properly written code will just ignore them since spp - es < 1. + + For instance this situation might happen when reading an OJPEG compressed + image with missing SamplesPerPixel tag. In this case the SamplesPerPixel + field might be updated after setting the transfer table. + + see http://bugzilla.maptools.org/show_bug.cgi?id=2500 + + This commit addresses CVE-2018-19210. + +2018-12-08 Bob Friesenhahn + + Do not attempt to re-sync zip stream after reported data error from inflate(). + +2018-12-07 Even Rouault + + Merge branch 'resource-leaks' into 'master' + Fix two resource leaks + + See merge request libtiff/libtiff!43 + +2018-12-07 Even Rouault + + Merge branch 'build-jbig' into 'master' + add jbig support to the fuzzer + + See merge request libtiff/libtiff!42 + +2018-12-01 Bob Friesenhahn + + tiffcrop.c: Avoid new clang warning about tools/tiffcrop.c "size argument in 'strncat' call appears to be size of the source". + +2018-11-28 Even Rouault + + Merge branch 'webp_memleak' into 'master' + fixed mem leak in webp compression + + See merge request libtiff/libtiff!48 + +2018-11-28 Norman Barker + + fixed mem leak in webp compression. + +2018-11-20 Even Rouault + + Merge branch 'lossless_webp' into 'master' + fixed lossless webp compression config + + See merge request libtiff/libtiff!46 + +2018-11-20 Norman Barker + + fixed lossless webp compression config. + +2018-11-18 Bob Friesenhahn + + snprintf porting fix for Visual Studio 2003. + +2018-11-18 Roger Leigh + + ci: Add pages job. + +2018-11-10 Bob Friesenhahn + + Change references from defunct ftp site to https site. + 2018-11-10 Bob Friesenhahn * configure.ac: libtiff 4.0.10 released. diff --git a/3rdparty/libtiff/libport.h b/3rdparty/libtiff/libport.h index ff26263829..9f2dace144 100644 --- a/3rdparty/libtiff/libport.h +++ b/3rdparty/libtiff/libport.h @@ -24,6 +24,10 @@ #ifndef _LIBPORT_ #define _LIBPORT_ +#if defined(HAVE_CONFIG_H) +# include +#endif + int getopt(int argc, char * const argv[], const char *optstring); extern char *optarg; extern int opterr; @@ -36,16 +40,16 @@ int strcasecmp(const char *s1, const char *s2); # define HAVE_GETOPT 1 #endif -#if HAVE_STRTOL +#if !defined(HAVE_STRTOL) long strtol(const char *nptr, char **endptr, int base); #endif -#if HAVE_STRTOLL +#if !defined(HAVE_STRTOLL) long long strtoll(const char *nptr, char **endptr, int base); #endif -#if HAVE_STRTOUL +#if !defined(HAVE_STRTOUL) unsigned long strtoul(const char *nptr, char **endptr, int base); #endif -#if HAVE_STRTOULL +#if !defined(HAVE_STRTOULL) unsigned long long strtoull(const char *nptr, char **endptr, int base); #endif diff --git a/3rdparty/libtiff/tif_aux.c b/3rdparty/libtiff/tif_aux.c index 90d30214c6..c9f190545e 100644 --- a/3rdparty/libtiff/tif_aux.c +++ b/3rdparty/libtiff/tif_aux.c @@ -35,27 +35,61 @@ uint32 _TIFFMultiply32(TIFF* tif, uint32 first, uint32 second, const char* where) { - uint32 bytes = first * second; - - if (second && bytes / second != first) { + if (second && first > TIFF_UINT32_MAX / second) { TIFFErrorExt(tif->tif_clientdata, where, "Integer overflow in %s", where); - bytes = 0; + return 0; } - return bytes; + return first * second; } uint64 _TIFFMultiply64(TIFF* tif, uint64 first, uint64 second, const char* where) { - uint64 bytes = first * second; - - if (second && bytes / second != first) { + if (second && first > TIFF_UINT64_MAX / second) { TIFFErrorExt(tif->tif_clientdata, where, "Integer overflow in %s", where); - bytes = 0; + return 0; } - return bytes; + return first * second; +} + +tmsize_t +_TIFFMultiplySSize(TIFF* tif, tmsize_t first, tmsize_t second, const char* where) +{ + if( first <= 0 || second <= 0 ) + { + if( tif != NULL && where != NULL ) + { + TIFFErrorExt(tif->tif_clientdata, where, + "Invalid argument to _TIFFMultiplySSize() in %s", where); + } + return 0; + } + + if( first > TIFF_TMSIZE_T_MAX / second ) + { + if( tif != NULL && where != NULL ) + { + TIFFErrorExt(tif->tif_clientdata, where, + "Integer overflow in %s", where); + } + return 0; + } + return first * second; +} + +tmsize_t _TIFFCastUInt64ToSSize(TIFF* tif, uint64 val, const char* module) +{ + if( val > (uint64)TIFF_TMSIZE_T_MAX ) + { + if( tif != NULL && module != NULL ) + { + TIFFErrorExt(tif->tif_clientdata,module,"Integer overflow"); + } + return 0; + } + return (tmsize_t)val; } void* @@ -63,13 +97,14 @@ _TIFFCheckRealloc(TIFF* tif, void* buffer, tmsize_t nmemb, tmsize_t elem_size, const char* what) { void* cp = NULL; - tmsize_t bytes = nmemb * elem_size; - + tmsize_t count = _TIFFMultiplySSize(tif, nmemb, elem_size, NULL); /* - * XXX: Check for integer overflow. + * Check for integer overflow. */ - if (nmemb && elem_size && bytes / elem_size == nmemb) - cp = _TIFFrealloc(buffer, bytes); + if (count != 0) + { + cp = _TIFFrealloc(buffer, count); + } if (cp == NULL) { TIFFErrorExt(tif->tif_clientdata, tif->tif_name, @@ -235,7 +270,7 @@ TIFFVGetFieldDefaulted(TIFF* tif, uint32 tag, va_list ap) return (1); case TIFFTAG_EXTRASAMPLES: *va_arg(ap, uint16 *) = td->td_extrasamples; - *va_arg(ap, uint16 **) = td->td_sampleinfo; + *va_arg(ap, const uint16 **) = td->td_sampleinfo; return (1); case TIFFTAG_MATTEING: *va_arg(ap, uint16 *) = @@ -257,8 +292,8 @@ TIFFVGetFieldDefaulted(TIFF* tif, uint32 tag, va_list ap) case TIFFTAG_YCBCRCOEFFICIENTS: { /* defaults are from CCIR Recommendation 601-1 */ - static float ycbcrcoeffs[] = { 0.299f, 0.587f, 0.114f }; - *va_arg(ap, float **) = ycbcrcoeffs; + static const float ycbcrcoeffs[] = { 0.299f, 0.587f, 0.114f }; + *va_arg(ap, const float **) = ycbcrcoeffs; return 1; } case TIFFTAG_YCBCRSUBSAMPLING: @@ -270,14 +305,14 @@ TIFFVGetFieldDefaulted(TIFF* tif, uint32 tag, va_list ap) return (1); case TIFFTAG_WHITEPOINT: { - static float whitepoint[2]; - /* TIFF 6.0 specification tells that it is no default value for the WhitePoint, but AdobePhotoshop TIFF Technical Note tells that it should be CIE D50. */ - whitepoint[0] = D50_X0 / (D50_X0 + D50_Y0 + D50_Z0); - whitepoint[1] = D50_Y0 / (D50_X0 + D50_Y0 + D50_Z0); - *va_arg(ap, float **) = whitepoint; + static const float whitepoint[] = { + D50_X0 / (D50_X0 + D50_Y0 + D50_Z0), + D50_Y0 / (D50_X0 + D50_Y0 + D50_Z0) + }; + *va_arg(ap, const float **) = whitepoint; return 1; } case TIFFTAG_TRANSFERFUNCTION: @@ -286,16 +321,16 @@ TIFFVGetFieldDefaulted(TIFF* tif, uint32 tag, va_list ap) TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "No space for \"TransferFunction\" tag"); return (0); } - *va_arg(ap, uint16 **) = td->td_transferfunction[0]; + *va_arg(ap, const uint16 **) = td->td_transferfunction[0]; if (td->td_samplesperpixel - td->td_extrasamples > 1) { - *va_arg(ap, uint16 **) = td->td_transferfunction[1]; - *va_arg(ap, uint16 **) = td->td_transferfunction[2]; + *va_arg(ap, const uint16 **) = td->td_transferfunction[1]; + *va_arg(ap, const uint16 **) = td->td_transferfunction[2]; } return (1); case TIFFTAG_REFERENCEBLACKWHITE: if (!td->td_refblackwhite && !TIFFDefaultRefBlackWhite(td)) return (0); - *va_arg(ap, float **) = td->td_refblackwhite; + *va_arg(ap, const float **) = td->td_refblackwhite; return (1); } return 0; diff --git a/3rdparty/libtiff/tif_compress.c b/3rdparty/libtiff/tif_compress.c index 8130ef08ef..915478f500 100644 --- a/3rdparty/libtiff/tif_compress.c +++ b/3rdparty/libtiff/tif_compress.c @@ -264,7 +264,7 @@ TIFFGetConfiguredCODECs() return NULL; } codecs = new_codecs; - _TIFFmemcpy(codecs + i - 1, cd, sizeof(TIFFCodec)); + _TIFFmemcpy(codecs + i - 1, cd->info, sizeof(TIFFCodec)); i++; } for (c = _TIFFBuiltinCODECS; c->name; c++) { diff --git a/3rdparty/libtiff/tif_dir.c b/3rdparty/libtiff/tif_dir.c index b4ecd44f95..347b7115cb 100644 --- a/3rdparty/libtiff/tif_dir.c +++ b/3rdparty/libtiff/tif_dir.c @@ -29,6 +29,7 @@ * (and also some miscellaneous stuff) */ #include "tiffiop.h" +#include /*--: for Rational2Double */ /* * These are used in the backwards compatibility code... @@ -46,8 +47,8 @@ setByteArray(void** vpp, void* vp, size_t nmemb, size_t elem_size) *vpp = 0; } if (vp) { - tmsize_t bytes = (tmsize_t)(nmemb * elem_size); - if (elem_size && bytes / elem_size == nmemb) + tmsize_t bytes = _TIFFMultiplySSize(NULL, nmemb, elem_size, NULL); + if (bytes) *vpp = (void*) _TIFFmalloc(bytes); if (*vpp) _TIFFmemcpy(*vpp, vp, bytes); @@ -123,7 +124,7 @@ setExtraSamples(TIFF* tif, va_list ap, uint32* v) { TIFFWarningExt(tif->tif_clientdata,module, "ExtraSamples tag value is changing, " - "but TransferFunction was read with a different value. Cancelling it"); + "but TransferFunction was read with a different value. Canceling it"); TIFFClrFieldBit(tif,FIELD_TRANSFERFUNCTION); _TIFFfree(td->td_transferfunction[0]); td->td_transferfunction[0] = NULL; @@ -205,7 +206,7 @@ _TIFFVSetField(TIFF* tif, uint32 tag, va_list ap) /* * If the data require post-decoding processing to byte-swap * samples, set it up here. Note that since tags are required - * to be ordered, compression code can override this behaviour + * to be ordered, compression code can override this behavior * in the setup method if it wants to roll the post decoding * work in with its normal work. */ @@ -275,7 +276,7 @@ _TIFFVSetField(TIFF* tif, uint32 tag, va_list ap) { TIFFWarningExt(tif->tif_clientdata,module, "SamplesPerPixel tag value is changing, " - "but SMinSampleValue tag was read with a different value. Cancelling it"); + "but SMinSampleValue tag was read with a different value. Canceling it"); TIFFClrFieldBit(tif,FIELD_SMINSAMPLEVALUE); _TIFFfree(td->td_sminsamplevalue); td->td_sminsamplevalue = NULL; @@ -284,7 +285,7 @@ _TIFFVSetField(TIFF* tif, uint32 tag, va_list ap) { TIFFWarningExt(tif->tif_clientdata,module, "SamplesPerPixel tag value is changing, " - "but SMaxSampleValue tag was read with a different value. Cancelling it"); + "but SMaxSampleValue tag was read with a different value. Canceling it"); TIFFClrFieldBit(tif,FIELD_SMAXSAMPLEVALUE); _TIFFfree(td->td_smaxsamplevalue); td->td_smaxsamplevalue = NULL; @@ -296,7 +297,7 @@ _TIFFVSetField(TIFF* tif, uint32 tag, va_list ap) { TIFFWarningExt(tif->tif_clientdata,module, "SamplesPerPixel tag value is changing, " - "but TransferFunction was read with a different value. Cancelling it"); + "but TransferFunction was read with a different value. Canceling it"); TIFFClrFieldBit(tif,FIELD_TRANSFERFUNCTION); _TIFFfree(td->td_transferfunction[0]); td->td_transferfunction[0] = NULL; @@ -393,7 +394,7 @@ _TIFFVSetField(TIFF* tif, uint32 tag, va_list ap) if (tif->tif_mode != O_RDONLY) goto badvalue32; TIFFWarningExt(tif->tif_clientdata, tif->tif_name, - "Nonstandard tile width %d, convert file", v32); + "Nonstandard tile width %u, convert file", v32); } td->td_tilewidth = v32; tif->tif_flags |= TIFF_ISTILED; @@ -404,7 +405,7 @@ _TIFFVSetField(TIFF* tif, uint32 tag, va_list ap) if (tif->tif_mode != O_RDONLY) goto badvalue32; TIFFWarningExt(tif->tif_clientdata, tif->tif_name, - "Nonstandard tile length %d, convert file", v32); + "Nonstandard tile length %u, convert file", v32); } td->td_tilelength = v32; tif->tif_flags |= TIFF_ISTILED; @@ -559,6 +560,10 @@ _TIFFVSetField(TIFF* tif, uint32 tag, va_list ap) * Set custom value ... save a copy of the custom tag value. */ tv_size = _TIFFDataSize(fip->field_type); + /*--: Rational2Double: For Rationals evaluate "set_field_type" to determine internal storage size. */ + if (fip->field_type == TIFF_RATIONAL || fip->field_type == TIFF_SRATIONAL) { + tv_size = _TIFFSetGetFieldSize(fip->set_field_type); + } if (tv_size == 0) { status = 0; TIFFErrorExt(tif->tif_clientdata, module, @@ -638,6 +643,7 @@ _TIFFVSetField(TIFF* tif, uint32 tag, va_list ap) || fip->field_writecount == TIFF_VARIABLE2 || fip->field_writecount == TIFF_SPP || tv->count > 1) { + /*--: Rational2Double: For Rationals tv_size is set above to 4 or 8 according to fip->set_field_type! */ _TIFFmemcpy(tv->value, va_arg(ap, void *), tv->count * tv_size); } else { @@ -698,6 +704,22 @@ _TIFFVSetField(TIFF* tif, uint32 tag, va_list ap) break; case TIFF_RATIONAL: case TIFF_SRATIONAL: + /*-- Rational2Double: For Rationals tv_size is set above to 4 or 8 according to fip->set_field_type! */ + { + if (tv_size == 8) { + double v2 = va_arg(ap, double); + _TIFFmemcpy(val, &v2, tv_size); + } else { + /*-- default should be tv_size == 4 */ + float v3 = (float)va_arg(ap, double); + _TIFFmemcpy(val, &v3, tv_size); + /*-- ToDo: After Testing, this should be removed and tv_size==4 should be set as default. */ + if (tv_size != 4) { + TIFFErrorExt(0,"TIFFLib: _TIFFVSetField()", "Rational2Double: .set_field_type in not 4 but %d", tv_size); + } + } + } + break; case TIFF_FLOAT: { float v2 = _TIFFClampDoubleToFloat(va_arg(ap, double)); @@ -1011,19 +1033,19 @@ _TIFFVGetField(TIFF* tif, uint32 tag, va_list ap) *va_arg(ap, uint16*) = td->td_halftonehints[1]; break; case TIFFTAG_COLORMAP: - *va_arg(ap, uint16**) = td->td_colormap[0]; - *va_arg(ap, uint16**) = td->td_colormap[1]; - *va_arg(ap, uint16**) = td->td_colormap[2]; + *va_arg(ap, const uint16**) = td->td_colormap[0]; + *va_arg(ap, const uint16**) = td->td_colormap[1]; + *va_arg(ap, const uint16**) = td->td_colormap[2]; break; case TIFFTAG_STRIPOFFSETS: case TIFFTAG_TILEOFFSETS: _TIFFFillStriles( tif ); - *va_arg(ap, uint64**) = td->td_stripoffset; + *va_arg(ap, const uint64**) = td->td_stripoffset_p; break; case TIFFTAG_STRIPBYTECOUNTS: case TIFFTAG_TILEBYTECOUNTS: _TIFFFillStriles( tif ); - *va_arg(ap, uint64**) = td->td_stripbytecount; + *va_arg(ap, const uint64**) = td->td_stripbytecount_p; break; case TIFFTAG_MATTEING: *va_arg(ap, uint16*) = @@ -1032,7 +1054,7 @@ _TIFFVGetField(TIFF* tif, uint32 tag, va_list ap) break; case TIFFTAG_EXTRASAMPLES: *va_arg(ap, uint16*) = td->td_extrasamples; - *va_arg(ap, uint16**) = td->td_sampleinfo; + *va_arg(ap, const uint16**) = td->td_sampleinfo; break; case TIFFTAG_TILEWIDTH: *va_arg(ap, uint32*) = td->td_tilewidth; @@ -1067,7 +1089,7 @@ _TIFFVGetField(TIFF* tif, uint32 tag, va_list ap) break; case TIFFTAG_SUBIFD: *va_arg(ap, uint16*) = td->td_nsubifd; - *va_arg(ap, uint64**) = td->td_subifd; + *va_arg(ap, const uint64**) = td->td_subifd; break; case TIFFTAG_YCBCRPOSITIONING: *va_arg(ap, uint16*) = td->td_ycbcrpositioning; @@ -1077,20 +1099,20 @@ _TIFFVGetField(TIFF* tif, uint32 tag, va_list ap) *va_arg(ap, uint16*) = td->td_ycbcrsubsampling[1]; break; case TIFFTAG_TRANSFERFUNCTION: - *va_arg(ap, uint16**) = td->td_transferfunction[0]; + *va_arg(ap, const uint16**) = td->td_transferfunction[0]; if (td->td_samplesperpixel - td->td_extrasamples > 1) { - *va_arg(ap, uint16**) = td->td_transferfunction[1]; - *va_arg(ap, uint16**) = td->td_transferfunction[2]; + *va_arg(ap, const uint16**) = td->td_transferfunction[1]; + *va_arg(ap, const uint16**) = td->td_transferfunction[2]; } else { - *va_arg(ap, uint16**) = NULL; - *va_arg(ap, uint16**) = NULL; + *va_arg(ap, const uint16**) = NULL; + *va_arg(ap, const uint16**) = NULL; } break; case TIFFTAG_REFERENCEBLACKWHITE: - *va_arg(ap, float**) = td->td_refblackwhite; + *va_arg(ap, const float**) = td->td_refblackwhite; break; case TIFFTAG_INKNAMES: - *va_arg(ap, char**) = td->td_inknames; + *va_arg(ap, const char**) = td->td_inknames; break; default: { @@ -1132,7 +1154,7 @@ _TIFFVGetField(TIFF* tif, uint32 tag, va_list ap) *va_arg(ap, uint32*) = (uint32)tv->count; else /* Assume TIFF_VARIABLE */ *va_arg(ap, uint16*) = (uint16)tv->count; - *va_arg(ap, void **) = tv->value; + *va_arg(ap, const void **) = tv->value; ret_val = 1; } else if (fip->field_tag == TIFFTAG_DOTRANGE && strcmp(fip->field_name,"DotRange") == 0) { @@ -1200,6 +1222,23 @@ _TIFFVGetField(TIFF* tif, uint32 tag, va_list ap) break; case TIFF_RATIONAL: case TIFF_SRATIONAL: + { + /*-- Rational2Double: For Rationals evaluate "set_field_type" to determine internal storage size and return value size. */ + int tv_size = _TIFFSetGetFieldSize(fip->set_field_type); + if (tv_size == 8) { + *va_arg(ap, double*) = *(double *)val; + ret_val = 1; + } else { + /*-- default should be tv_size == 4 */ + *va_arg(ap, float*) = *(float *)val; + ret_val = 1; + /*-- ToDo: After Testing, this should be removed and tv_size==4 should be set as default. */ + if (tv_size != 4) { + TIFFErrorExt(0,"TIFFLib: _TIFFVGetField()", "Rational2Double: .set_field_type in not 4 but %d", tv_size); + } + } + } + break; case TIFF_FLOAT: *va_arg(ap, float*) = *(float *)val; @@ -1282,8 +1321,9 @@ TIFFFreeDirectory(TIFF* tif) CleanupField(td_transferfunction[0]); CleanupField(td_transferfunction[1]); CleanupField(td_transferfunction[2]); - CleanupField(td_stripoffset); - CleanupField(td_stripbytecount); + CleanupField(td_stripoffset_p); + CleanupField(td_stripbytecount_p); + td->td_stripoffsetbyteallocsize = 0; TIFFClrFieldBit(tif, FIELD_YCBCRSUBSAMPLING); TIFFClrFieldBit(tif, FIELD_YCBCRPOSITIONING); @@ -1296,10 +1336,8 @@ TIFFFreeDirectory(TIFF* tif) td->td_customValueCount = 0; CleanupField(td_customValues); -#if defined(DEFER_STRILE_LOAD) _TIFFmemset( &(td->td_stripoffset_entry), 0, sizeof(TIFFDirEntry)); _TIFFmemset( &(td->td_stripbytecount_entry), 0, sizeof(TIFFDirEntry)); -#endif } #undef CleanupField @@ -1365,6 +1403,17 @@ TIFFCreateEXIFDirectory(TIFF* tif) return TIFFCreateCustomDirectory(tif, exifFieldArray); } +/* + * Creates the EXIF GPS custom directory + */ +int +TIFFCreateGPSDirectory(TIFF* tif) +{ + const TIFFFieldArray* gpsFieldArray; + gpsFieldArray = _TIFFGetGpsFields(); + return TIFFCreateCustomDirectory(tif, gpsFieldArray); +} + /* * Setup a default directory structure. */ @@ -1387,7 +1436,9 @@ TIFFDefaultDirectory(TIFF* tif) td->td_tilewidth = 0; td->td_tilelength = 0; td->td_tiledepth = 1; +#ifdef STRIPBYTECOUNTSORTED_UNUSED td->td_stripbytecountsorted = 1; /* Our own arrays always sorted. */ +#endif td->td_resolutionunit = RESUNIT_INCH; td->td_sampleformat = SAMPLEFORMAT_UINT; td->td_imagedepth = 1; diff --git a/3rdparty/libtiff/tif_dir.h b/3rdparty/libtiff/tif_dir.h index b2f5e69488..f608dd713b 100644 --- a/3rdparty/libtiff/tif_dir.h +++ b/3rdparty/libtiff/tif_dir.h @@ -58,6 +58,7 @@ typedef struct { uint32 toff_long; uint64 toff_long8; } tdir_offset; /* either offset or the data itself if fits */ + uint8 tdir_ignore; /* flag status to ignore tag when parsing tags in tif_dirread.c */ } TIFFDirEntry; /* @@ -97,13 +98,14 @@ typedef struct { * number of striles */ uint32 td_stripsperimage; uint32 td_nstrips; /* size of offset & bytecount arrays */ - uint64* td_stripoffset; - uint64* td_stripbytecount; + uint64* td_stripoffset_p; /* should be accessed with TIFFGetStrileOffset */ + uint64* td_stripbytecount_p; /* should be accessed with TIFFGetStrileByteCount */ + uint32 td_stripoffsetbyteallocsize; /* number of elements currently allocated for td_stripoffset/td_stripbytecount. Only used if TIFF_LAZYSTRILELOAD is set */ +#ifdef STRIPBYTECOUNTSORTED_UNUSED int td_stripbytecountsorted; /* is the bytecount array sorted ascending? */ -#if defined(DEFER_STRILE_LOAD) +#endif TIFFDirEntry td_stripoffset_entry; /* for deferred loading */ TIFFDirEntry td_stripbytecount_entry; /* for deferred loading */ -#endif uint16 td_nsubifd; uint64* td_subifd; /* YCbCr parameters */ @@ -118,6 +120,8 @@ typedef struct { int td_customValueCount; TIFFTagValue *td_customValues; + + unsigned char td_deferstrilearraywriting; /* see TIFFDeferStrileArrayWriting() */ } TIFFDirectory; /* @@ -257,6 +261,7 @@ extern "C" { extern const TIFFFieldArray* _TIFFGetFields(void); extern const TIFFFieldArray* _TIFFGetExifFields(void); +extern const TIFFFieldArray* _TIFFGetGpsFields(void); extern void _TIFFSetupFields(TIFF* tif, const TIFFFieldArray* infoarray); extern void _TIFFPrintFieldInfo(TIFF*, FILE*); @@ -265,6 +270,7 @@ extern int _TIFFFillStriles(TIFF*); typedef enum { tfiatImage, tfiatExif, + tfiatGps, /* EXIF-GPS fields array type */ tfiatOther } TIFFFieldArrayType; diff --git a/3rdparty/libtiff/tif_dirinfo.c b/3rdparty/libtiff/tif_dirinfo.c index e1f6b23e9a..7217042c25 100644 --- a/3rdparty/libtiff/tif_dirinfo.c +++ b/3rdparty/libtiff/tif_dirinfo.c @@ -47,9 +47,19 @@ #endif static const TIFFFieldArray tiffFieldArray; static const TIFFFieldArray exifFieldArray; +static const TIFFFieldArray gpsFieldArray; #ifdef _MSC_VER #pragma warning( pop ) #endif +/*--: Rational2Double: -- + * The Rational2Double upgraded libtiff functionality allows the definition and achievement of true double-precision accuracy + * for TIFF tags of RATIONAL type and field_bit=FIELD_CUSTOM using the set_field_type = TIFF_SETGET_DOUBLE. + * Unfortunately, that changes the old implemented interface for TIFFGetField(). + * In order to keep the old TIFFGetField() interface behavior those tags have to be redefined with set_field_type = TIFF_SETGET_FLOAT! + * + * Rational custom arrays are already defined as _Cxx_FLOAT, thus can stay. + * + */ static const TIFFField tiffFields[] = { @@ -75,12 +85,12 @@ tiffFields[] = { { TIFFTAG_STRIPBYTECOUNTS, -1, -1, TIFF_LONG8, 0, TIFF_SETGET_UNDEFINED, TIFF_SETGET_UNDEFINED, FIELD_STRIPBYTECOUNTS, 0, 0, "StripByteCounts", NULL }, { TIFFTAG_MINSAMPLEVALUE, -2, -1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_MINSAMPLEVALUE, 1, 0, "MinSampleValue", NULL }, { TIFFTAG_MAXSAMPLEVALUE, -2, -1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_MAXSAMPLEVALUE, 1, 0, "MaxSampleValue", NULL }, - { TIFFTAG_XRESOLUTION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_RESOLUTION, 1, 0, "XResolution", NULL }, - { TIFFTAG_YRESOLUTION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_RESOLUTION, 1, 0, "YResolution", NULL }, + { TIFFTAG_XRESOLUTION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_RESOLUTION, 1, 0, "XResolution", NULL }, + { TIFFTAG_YRESOLUTION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_RESOLUTION, 1, 0, "YResolution", NULL }, { TIFFTAG_PLANARCONFIG, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_PLANARCONFIG, 0, 0, "PlanarConfiguration", NULL }, { TIFFTAG_PAGENAME, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "PageName", NULL }, - { TIFFTAG_XPOSITION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_POSITION, 1, 0, "XPosition", NULL }, - { TIFFTAG_YPOSITION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_POSITION, 1, 0, "YPosition", NULL }, + { TIFFTAG_XPOSITION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_POSITION, 1, 0, "XPosition", NULL }, + { TIFFTAG_YPOSITION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_POSITION, 1, 0, "YPosition", NULL }, { TIFFTAG_FREEOFFSETS, -1, -1, TIFF_LONG8, 0, TIFF_SETGET_UNDEFINED, TIFF_SETGET_UNDEFINED, FIELD_IGNORE, 0, 0, "FreeOffsets", NULL }, { TIFFTAG_FREEBYTECOUNTS, -1, -1, TIFF_LONG8, 0, TIFF_SETGET_UNDEFINED, TIFF_SETGET_UNDEFINED, FIELD_IGNORE, 0, 0, "FreeByteCounts", NULL }, { TIFFTAG_GRAYRESPONSEUNIT, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UNDEFINED, TIFF_SETGET_UNDEFINED, FIELD_IGNORE, 1, 0, "GrayResponseUnit", NULL }, @@ -135,14 +145,18 @@ tiffFields[] = { { TIFFTAG_PIXAR_MATRIX_WORLDTOSCREEN, 16, 16, TIFF_FLOAT, 0, TIFF_SETGET_C0_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "MatrixWorldToScreen", NULL }, { TIFFTAG_PIXAR_MATRIX_WORLDTOCAMERA, 16, 16, TIFF_FLOAT, 0, TIFF_SETGET_C0_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "MatrixWorldToCamera", NULL }, { TIFFTAG_CFAREPEATPATTERNDIM, 2, 2, TIFF_SHORT, 0, TIFF_SETGET_C0_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "CFARepeatPatternDim", NULL }, - { TIFFTAG_CFAPATTERN, 4, 4, TIFF_BYTE, 0, TIFF_SETGET_C0_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "CFAPattern" , NULL}, + { TIFFTAG_CFAPATTERN, -1, -1, TIFF_BYTE, 0, TIFF_SETGET_C16_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 1, "CFAPattern" , NULL}, { TIFFTAG_COPYRIGHT, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "Copyright", NULL }, /* end Pixar tags */ - { TIFFTAG_RICHTIFFIPTC, -3, -3, TIFF_LONG, 0, TIFF_SETGET_C32_UINT32, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 1, "RichTIFFIPTC", NULL }, + { TIFFTAG_RICHTIFFIPTC, -3, -3, TIFF_UNDEFINED, 0, TIFF_SETGET_C32_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 1, "RichTIFFIPTC", NULL }, { TIFFTAG_PHOTOSHOP, -3, -3, TIFF_BYTE, 0, TIFF_SETGET_C32_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 1, "Photoshop", NULL }, - { TIFFTAG_EXIFIFD, 1, 1, TIFF_IFD8, 0, TIFF_SETGET_IFD8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "EXIFIFDOffset", (TIFFFieldArray*) &exifFieldArray }, + /*--: EXIFIFD and GPSIFD specified as TIFF_LONG by Aware-Systems and not TIFF_IFD8 as in original LibTiff. + * However, for IFD-like tags, libtiff uses the data type TIFF_IFD8 in tiffFields[]-tag definition combined with + * a special handling procedure in order to write either a 32-bit value and the TIFF_IFD type-id into ClassicTIFF files + * or a 64-bit value and the TIFF_IFD8 type-id into BigTIFF files. */ + { TIFFTAG_EXIFIFD, 1, 1, TIFF_IFD8, 0, TIFF_SETGET_IFD8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "EXIFIFDOffset", (TIFFFieldArray*) &exifFieldArray }, { TIFFTAG_ICCPROFILE, -3, -3, TIFF_UNDEFINED, 0, TIFF_SETGET_C32_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 1, "ICC Profile", NULL }, - { TIFFTAG_GPSIFD, 1, 1, TIFF_IFD8, 0, TIFF_SETGET_IFD8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "GPSIFDOffset", NULL }, + { TIFFTAG_GPSIFD, 1, 1, TIFF_IFD8, 0, TIFF_SETGET_IFD8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "GPSIFDOffset", (TIFFFieldArray*) &gpsFieldArray }, { TIFFTAG_FAXRECVPARAMS, 1, 1, TIFF_LONG, 0, TIFF_SETGET_UINT32, TIFF_SETGET_UINT32, FIELD_CUSTOM, TRUE, FALSE, "FaxRecvParams", NULL }, { TIFFTAG_FAXSUBADDRESS, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_ASCII, FIELD_CUSTOM, TRUE, FALSE, "FaxSubAddress", NULL }, { TIFFTAG_FAXRECVTIME, 1, 1, TIFF_LONG, 0, TIFF_SETGET_UINT32, TIFF_SETGET_UINT32, FIELD_CUSTOM, TRUE, FALSE, "FaxRecvTime", NULL }, @@ -163,7 +177,7 @@ tiffFields[] = { { TIFFTAG_BLACKLEVELDELTAV, -1, -1, TIFF_SRATIONAL, 0, TIFF_SETGET_C16_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 1, "BlackLevelDeltaV", NULL }, { TIFFTAG_WHITELEVEL, -1, -1, TIFF_LONG, 0, TIFF_SETGET_C16_UINT32, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 1, "WhiteLevel", NULL }, { TIFFTAG_DEFAULTSCALE, 2, 2, TIFF_RATIONAL, 0, TIFF_SETGET_C0_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "DefaultScale", NULL }, - { TIFFTAG_BESTQUALITYSCALE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "BestQualityScale", NULL }, + { TIFFTAG_BESTQUALITYSCALE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "BestQualityScale", NULL }, { TIFFTAG_DEFAULTCROPORIGIN, 2, 2, TIFF_RATIONAL, 0, TIFF_SETGET_C0_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "DefaultCropOrigin", NULL }, { TIFFTAG_DEFAULTCROPSIZE, 2, 2, TIFF_RATIONAL, 0, TIFF_SETGET_C0_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "DefaultCropSize", NULL }, { TIFFTAG_COLORMATRIX1, -1, -1, TIFF_SRATIONAL, 0, TIFF_SETGET_C16_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 1, "ColorMatrix1", NULL }, @@ -175,16 +189,16 @@ tiffFields[] = { { TIFFTAG_ANALOGBALANCE, -1, -1, TIFF_RATIONAL, 0, TIFF_SETGET_C16_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 1, "AnalogBalance", NULL }, { TIFFTAG_ASSHOTNEUTRAL, -1, -1, TIFF_RATIONAL, 0, TIFF_SETGET_C16_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 1, "AsShotNeutral", NULL }, { TIFFTAG_ASSHOTWHITEXY, 2, 2, TIFF_RATIONAL, 0, TIFF_SETGET_C0_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "AsShotWhiteXY", NULL }, - { TIFFTAG_BASELINEEXPOSURE, 1, 1, TIFF_SRATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "BaselineExposure", NULL }, - { TIFFTAG_BASELINENOISE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "BaselineNoise", NULL }, - { TIFFTAG_BASELINESHARPNESS, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "BaselineSharpness", NULL }, + { TIFFTAG_BASELINEEXPOSURE, 1, 1, TIFF_SRATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "BaselineExposure", NULL }, + { TIFFTAG_BASELINENOISE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "BaselineNoise", NULL }, + { TIFFTAG_BASELINESHARPNESS, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "BaselineSharpness", NULL }, { TIFFTAG_BAYERGREENSPLIT, 1, 1, TIFF_LONG, 0, TIFF_SETGET_UINT32, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "BayerGreenSplit", NULL }, - { TIFFTAG_LINEARRESPONSELIMIT, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "LinearResponseLimit", NULL }, + { TIFFTAG_LINEARRESPONSELIMIT, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "LinearResponseLimit", NULL }, { TIFFTAG_CAMERASERIALNUMBER, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "CameraSerialNumber", NULL }, { TIFFTAG_LENSINFO, 4, 4, TIFF_RATIONAL, 0, TIFF_SETGET_C0_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "LensInfo", NULL }, - { TIFFTAG_CHROMABLURRADIUS, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "ChromaBlurRadius", NULL }, - { TIFFTAG_ANTIALIASSTRENGTH, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "AntiAliasStrength", NULL }, - { TIFFTAG_SHADOWSCALE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "ShadowScale", NULL }, + { TIFFTAG_CHROMABLURRADIUS, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "ChromaBlurRadius", NULL }, + { TIFFTAG_ANTIALIASSTRENGTH, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "AntiAliasStrength", NULL }, + { TIFFTAG_SHADOWSCALE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "ShadowScale", NULL }, { TIFFTAG_DNGPRIVATEDATA, -1, -1, TIFF_BYTE, 0, TIFF_SETGET_C16_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 1, "DNGPrivateData", NULL }, { TIFFTAG_MAKERNOTESAFETY, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "MakerNoteSafety", NULL }, { TIFFTAG_CALIBRATIONILLUMINANT1, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 0, 0, "CalibrationIlluminant1", NULL }, @@ -217,47 +231,68 @@ tiffFields[] = { /* begin pseudo tags */ }; +/* + * EXIF tags (Version 2.31, July 2016 plus version 2.32 May 2019) + */ static const TIFFField exifFields[] = { - { EXIFTAG_EXPOSURETIME, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ExposureTime", NULL }, - { EXIFTAG_FNUMBER, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FNumber", NULL }, + { EXIFTAG_EXPOSURETIME, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ExposureTime", NULL }, + { EXIFTAG_FNUMBER, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FNumber", NULL }, { EXIFTAG_EXPOSUREPROGRAM, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ExposureProgram", NULL }, { EXIFTAG_SPECTRALSENSITIVITY, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SpectralSensitivity", NULL }, { EXIFTAG_ISOSPEEDRATINGS, -1, -1, TIFF_SHORT, 0, TIFF_SETGET_C16_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 1, "ISOSpeedRatings", NULL }, { EXIFTAG_OECF, -1, -1, TIFF_UNDEFINED, 0, TIFF_SETGET_C16_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 1, "OptoelectricConversionFactor", NULL }, + { EXIFTAG_SENSITIVITYTYPE, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SensitivityType", NULL }, + { EXIFTAG_STANDARDOUTPUTSENSITIVITY, 1, 1, TIFF_LONG, 0, TIFF_SETGET_UINT32, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "StandardOutputSensitivity", NULL }, + { EXIFTAG_RECOMMENDEDEXPOSUREINDEX, 1, 1, TIFF_LONG, 0, TIFF_SETGET_UINT32, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "RecommendedExposureIndex", NULL }, + { EXIFTAG_ISOSPEED, 1, 1, TIFF_LONG, 0, TIFF_SETGET_UINT32, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ISOSpeed", NULL }, + { EXIFTAG_ISOSPEEDLATITUDEYYY, 1, 1, TIFF_LONG, 0, TIFF_SETGET_UINT32, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ISOSpeedLatitudeyyy", NULL }, + { EXIFTAG_ISOSPEEDLATITUDEZZZ, 1, 1, TIFF_LONG, 0, TIFF_SETGET_UINT32, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ISOSpeedLatitudezzz", NULL }, { EXIFTAG_EXIFVERSION, 4, 4, TIFF_UNDEFINED, 0, TIFF_SETGET_C0_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ExifVersion", NULL }, { EXIFTAG_DATETIMEORIGINAL, 20, 20, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "DateTimeOriginal", NULL }, { EXIFTAG_DATETIMEDIGITIZED, 20, 20, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "DateTimeDigitized", NULL }, + { EXIFTAG_OFFSETTIME, 7, 7, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "OffsetTime", NULL }, + { EXIFTAG_OFFSETTIMEORIGINAL, 7, 7, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "OffsetTimeOriginal", NULL }, + { EXIFTAG_OFFSETTIMEDIGITIZED, 7, 7, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "OffsetTimeDigitized", NULL }, { EXIFTAG_COMPONENTSCONFIGURATION, 4, 4, TIFF_UNDEFINED, 0, TIFF_SETGET_C0_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ComponentsConfiguration", NULL }, - { EXIFTAG_COMPRESSEDBITSPERPIXEL, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "CompressedBitsPerPixel", NULL }, - { EXIFTAG_SHUTTERSPEEDVALUE, 1, 1, TIFF_SRATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ShutterSpeedValue", NULL }, - { EXIFTAG_APERTUREVALUE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ApertureValue", NULL }, - { EXIFTAG_BRIGHTNESSVALUE, 1, 1, TIFF_SRATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "BrightnessValue", NULL }, - { EXIFTAG_EXPOSUREBIASVALUE, 1, 1, TIFF_SRATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ExposureBiasValue", NULL }, - { EXIFTAG_MAXAPERTUREVALUE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "MaxApertureValue", NULL }, - { EXIFTAG_SUBJECTDISTANCE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SubjectDistance", NULL }, + { EXIFTAG_COMPRESSEDBITSPERPIXEL, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "CompressedBitsPerPixel", NULL }, + { EXIFTAG_SHUTTERSPEEDVALUE, 1, 1, TIFF_SRATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ShutterSpeedValue", NULL }, + { EXIFTAG_APERTUREVALUE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ApertureValue", NULL }, + { EXIFTAG_BRIGHTNESSVALUE, 1, 1, TIFF_SRATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "BrightnessValue", NULL }, + { EXIFTAG_EXPOSUREBIASVALUE, 1, 1, TIFF_SRATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ExposureBiasValue", NULL }, + { EXIFTAG_MAXAPERTUREVALUE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "MaxApertureValue", NULL }, + /*--: EXIFTAG_SUBJECTDISTANCE: LibTiff returns value of "-1" if numerator equals 4294967295 (0xFFFFFFFF) to indicate infinite distance! + * However, there are two other EXIF tags where numerator indicates a special value and six other cases where the denominator indicates special values, + * which are not treated within LibTiff!! */ + { EXIFTAG_SUBJECTDISTANCE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SubjectDistance", NULL }, { EXIFTAG_METERINGMODE, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "MeteringMode", NULL }, { EXIFTAG_LIGHTSOURCE, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "LightSource", NULL }, { EXIFTAG_FLASH, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "Flash", NULL }, - { EXIFTAG_FOCALLENGTH, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FocalLength", NULL }, + { EXIFTAG_FOCALLENGTH, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FocalLength", NULL }, { EXIFTAG_SUBJECTAREA, -1, -1, TIFF_SHORT, 0, TIFF_SETGET_C16_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 1, "SubjectArea", NULL }, { EXIFTAG_MAKERNOTE, -1, -1, TIFF_UNDEFINED, 0, TIFF_SETGET_C16_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 1, "MakerNote", NULL }, { EXIFTAG_USERCOMMENT, -1, -1, TIFF_UNDEFINED, 0, TIFF_SETGET_C16_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 1, "UserComment", NULL }, { EXIFTAG_SUBSECTIME, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SubSecTime", NULL }, { EXIFTAG_SUBSECTIMEORIGINAL, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SubSecTimeOriginal", NULL }, { EXIFTAG_SUBSECTIMEDIGITIZED, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SubSecTimeDigitized", NULL }, + { EXIFTAG_TEMPERATURE, 1, 1, TIFF_SRATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "Temperature", NULL }, + { EXIFTAG_HUMIDITY, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "Humidity", NULL }, + { EXIFTAG_PRESSURE, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "Pressure", NULL }, + { EXIFTAG_WATERDEPTH, 1, 1, TIFF_SRATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "WaterDepth", NULL }, + { EXIFTAG_ACCELERATION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "Acceleration", NULL }, + { EXIFTAG_CAMERAELEVATIONANGLE, 1, 1, TIFF_SRATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "CameraElevationAngle", NULL }, { EXIFTAG_FLASHPIXVERSION, 4, 4, TIFF_UNDEFINED, 0, TIFF_SETGET_C0_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FlashpixVersion", NULL }, { EXIFTAG_COLORSPACE, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ColorSpace", NULL }, { EXIFTAG_PIXELXDIMENSION, 1, 1, TIFF_LONG, 0, TIFF_SETGET_UINT32, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "PixelXDimension", NULL }, { EXIFTAG_PIXELYDIMENSION, 1, 1, TIFF_LONG, 0, TIFF_SETGET_UINT32, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "PixelYDimension", NULL }, { EXIFTAG_RELATEDSOUNDFILE, 13, 13, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "RelatedSoundFile", NULL }, - { EXIFTAG_FLASHENERGY, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FlashEnergy", NULL }, + { EXIFTAG_FLASHENERGY, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FlashEnergy", NULL }, { EXIFTAG_SPATIALFREQUENCYRESPONSE, -1, -1, TIFF_UNDEFINED, 0, TIFF_SETGET_C16_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 1, "SpatialFrequencyResponse", NULL }, - { EXIFTAG_FOCALPLANEXRESOLUTION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FocalPlaneXResolution", NULL }, - { EXIFTAG_FOCALPLANEYRESOLUTION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FocalPlaneYResolution", NULL }, + { EXIFTAG_FOCALPLANEXRESOLUTION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FocalPlaneXResolution", NULL }, + { EXIFTAG_FOCALPLANEYRESOLUTION, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FocalPlaneYResolution", NULL }, { EXIFTAG_FOCALPLANERESOLUTIONUNIT, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FocalPlaneResolutionUnit", NULL }, { EXIFTAG_SUBJECTLOCATION, 2, 2, TIFF_SHORT, 0, TIFF_SETGET_C0_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SubjectLocation", NULL }, - { EXIFTAG_EXPOSUREINDEX, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ExposureIndex", NULL }, + { EXIFTAG_EXPOSUREINDEX, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ExposureIndex", NULL }, { EXIFTAG_SENSINGMETHOD, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SensingMethod", NULL }, { EXIFTAG_FILESOURCE, 1, 1, TIFF_UNDEFINED, 0, TIFF_SETGET_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FileSource", NULL }, { EXIFTAG_SCENETYPE, 1, 1, TIFF_UNDEFINED, 0, TIFF_SETGET_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SceneType", NULL }, @@ -265,22 +300,79 @@ exifFields[] = { { EXIFTAG_CUSTOMRENDERED, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "CustomRendered", NULL }, { EXIFTAG_EXPOSUREMODE, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ExposureMode", NULL }, { EXIFTAG_WHITEBALANCE, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "WhiteBalance", NULL }, - { EXIFTAG_DIGITALZOOMRATIO, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "DigitalZoomRatio", NULL }, + { EXIFTAG_DIGITALZOOMRATIO, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "DigitalZoomRatio", NULL }, { EXIFTAG_FOCALLENGTHIN35MMFILM, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "FocalLengthIn35mmFilm", NULL }, { EXIFTAG_SCENECAPTURETYPE, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SceneCaptureType", NULL }, - { EXIFTAG_GAINCONTROL, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_DOUBLE, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "GainControl", NULL }, + { EXIFTAG_GAINCONTROL, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "GainControl", NULL }, { EXIFTAG_CONTRAST, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "Contrast", NULL }, { EXIFTAG_SATURATION, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "Saturation", NULL }, { EXIFTAG_SHARPNESS, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "Sharpness", NULL }, { EXIFTAG_DEVICESETTINGDESCRIPTION, -1, -1, TIFF_UNDEFINED, 0, TIFF_SETGET_C16_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 1, "DeviceSettingDescription", NULL }, { EXIFTAG_SUBJECTDISTANCERANGE, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SubjectDistanceRange", NULL }, - { EXIFTAG_IMAGEUNIQUEID, 33, 33, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ImageUniqueID", NULL } + { EXIFTAG_IMAGEUNIQUEID, 33, 33, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "ImageUniqueID", NULL }, + { EXIFTAG_CAMERAOWNERNAME, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "CameraOwnerName", NULL }, + { EXIFTAG_BODYSERIALNUMBER, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "BodySerialNumber", NULL }, + { EXIFTAG_LENSSPECIFICATION, 4, 4, TIFF_RATIONAL, 0, TIFF_SETGET_C0_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "LensSpecification", NULL }, + { EXIFTAG_LENSMAKE, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "LensMake", NULL }, + { EXIFTAG_LENSMODEL, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "LensModel", NULL }, + { EXIFTAG_LENSSERIALNUMBER, -1, -1, TIFF_ASCII, 0, TIFF_SETGET_ASCII, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "LensSerialNumber", NULL }, + { EXIFTAG_GAMMA, 1, 1, TIFF_RATIONAL, 0, TIFF_SETGET_FLOAT, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "Gamma", NULL }, + { EXIFTAG_COMPOSITEIMAGE, 1, 1, TIFF_SHORT, 0, TIFF_SETGET_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "CompositeImage", NULL }, + { EXIFTAG_SOURCEIMAGENUMBEROFCOMPOSITEIMAGE, 2, 2, TIFF_SHORT, 0, TIFF_SETGET_C0_UINT16, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 0, "SourceImageNumberOfCompositeImage", NULL }, + { EXIFTAG_SOURCEEXPOSURETIMESOFCOMPOSITEIMAGE, -1, -1, TIFF_UNDEFINED, 0, TIFF_SETGET_C16_UINT8, TIFF_SETGET_UNDEFINED, FIELD_CUSTOM, 1, 1, "SourceExposureTimesOfCompositeImage", NULL } +}; +/* + * EXIF-GPS tags (Version 2.31, July 2016; nothing changed for version 2.32 May 2019) + */ + +static TIFFField +gpsFields[] = { + /* For the GPS tag definitions in gpsFields[] the standard definition for Rationals is TIFF_SETGET_DOUBLE and TIFF_SETGET_C0_FLOAT. + *-- ATTENTION: After the upgrade with Rational2Double, the GPSTAG values can now be written and also read in double precision! + * In order to achieve double precision for GPS tags: + * Standard definitions for GPSTAG is kept to TIFF_SETGET_DOUBLE + * and TIFF_SETGET_C0_FLOAT is changed to TIFF_SETGET_C0_DOUBLE. + */ + { GPSTAG_VERSIONID , 4, 4, TIFF_BYTE , 0, TIFF_SETGET_C0_UINT8 , TIFF_SETGET_UINT8 , FIELD_CUSTOM , 1, 0, "VersionID", NULL }, + { GPSTAG_LATITUDEREF , 2, 2, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "LatitudeRef", NULL }, + { GPSTAG_LATITUDE , 3, 3, TIFF_RATIONAL , 0, TIFF_SETGET_C0_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "Latitude", NULL }, + { GPSTAG_LONGITUDEREF , 2, 2, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "LongitudeRef", NULL }, + { GPSTAG_LONGITUDE , 3, 3, TIFF_RATIONAL , 0, TIFF_SETGET_C0_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "Longitude", NULL }, + { GPSTAG_ALTITUDEREF , 1, 1, TIFF_BYTE , 0, TIFF_SETGET_UINT8 , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "AltitudeRef", NULL }, + { GPSTAG_ALTITUDE , 1, 1, TIFF_RATIONAL , 0, TIFF_SETGET_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "Altitude", NULL }, + { GPSTAG_TIMESTAMP , 3, 3, TIFF_RATIONAL , 0, TIFF_SETGET_C0_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "TimeStamp", NULL }, + { GPSTAG_SATELLITES , -1, -1, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "Satellites", NULL }, + { GPSTAG_STATUS , 2, 2, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "Status", NULL }, + { GPSTAG_MEASUREMODE , 2, 2, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "MeasureMode", NULL }, + { GPSTAG_DOP , 1, 1, TIFF_RATIONAL , 0, TIFF_SETGET_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "DOP", NULL }, + { GPSTAG_SPEEDREF , 2, 2, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "SpeedRef", NULL }, + { GPSTAG_SPEED , 1, 1, TIFF_RATIONAL , 0, TIFF_SETGET_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "Speed", NULL }, + { GPSTAG_TRACKREF , 2, 2, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "TrackRef", NULL }, + { GPSTAG_TRACK , 1, 1, TIFF_RATIONAL , 0, TIFF_SETGET_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "Track", NULL }, + { GPSTAG_IMGDIRECTIONREF , 2, 2, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "ImgDirectionRef", NULL }, + { GPSTAG_IMGDIRECTION , 1, 1, TIFF_RATIONAL , 0, TIFF_SETGET_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "ImgDirection", NULL }, + { GPSTAG_MAPDATUM , -1, -1, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "MapDatum", NULL }, + { GPSTAG_DESTLATITUDEREF , 2, 2, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "DestLatitudeRef", NULL }, + { GPSTAG_DESTLATITUDE , 3, 3, TIFF_RATIONAL , 0, TIFF_SETGET_C0_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "DestLatitude", NULL }, + { GPSTAG_DESTLONGITUDEREF , 2, 2, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "DestLongitudeRef", NULL }, + { GPSTAG_DESTLONGITUDE , 3, 3, TIFF_RATIONAL , 0, TIFF_SETGET_C0_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "DestLongitude", NULL }, + { GPSTAG_DESTBEARINGREF , 2, 2, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "DestBearingRef", NULL }, + { GPSTAG_DESTBEARING , 1, 1, TIFF_RATIONAL , 0, TIFF_SETGET_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "DestBearing", NULL }, + { GPSTAG_DESTDISTANCEREF , 2, 2, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "DestDistanceRef", NULL }, + { GPSTAG_DESTDISTANCE , 1, 1, TIFF_RATIONAL , 0, TIFF_SETGET_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "DestDistance", NULL }, + { GPSTAG_PROCESSINGMETHOD , -1, -1, TIFF_UNDEFINED , 0, TIFF_SETGET_C16_UINT8 , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 1, "ProcessingMethod", NULL }, + { GPSTAG_AREAINFORMATION , -1, -1, TIFF_UNDEFINED , 0, TIFF_SETGET_C16_UINT8 , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 1, "AreaInformation", NULL }, + { GPSTAG_DATESTAMP , 11, 11, TIFF_ASCII , 0, TIFF_SETGET_ASCII , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "DateStamp", NULL }, + { GPSTAG_DIFFERENTIAL , 1, 1, TIFF_SHORT , 0, TIFF_SETGET_UINT16 , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "Differential", NULL }, + { GPSTAG_GPSHPOSITIONINGERROR , 1, 1, TIFF_RATIONAL , 0, TIFF_SETGET_DOUBLE , TIFF_SETGET_UNDEFINED , FIELD_CUSTOM , 1, 0, "HorizontalPositioningError", NULL } }; static const TIFFFieldArray tiffFieldArray = { tfiatImage, 0, TIFFArrayCount(tiffFields), (TIFFField*) tiffFields }; static const TIFFFieldArray exifFieldArray = { tfiatExif, 0, TIFFArrayCount(exifFields), (TIFFField*) exifFields }; +static const TIFFFieldArray +gpsFieldArray = { tfiatGps, 0, TIFFArrayCount(gpsFields), (TIFFField*) gpsFields }; /* * We have our own local lfind() equivalent to avoid subtle differences @@ -313,6 +405,12 @@ _TIFFGetExifFields(void) return(&exifFieldArray); } +const TIFFFieldArray* +_TIFFGetGpsFields(void) +{ + return(&gpsFieldArray); +} + void _TIFFSetupFields(TIFF* tif, const TIFFFieldArray* fieldarray) { @@ -502,6 +600,82 @@ _TIFFDataSize(TIFFDataType type) } } +/* + * Rational2Double: + * Return size of TIFFSetGetFieldType in bytes. + * + * XXX: TIFF_RATIONAL values for FIELD_CUSTOM are stored internally as 4-byte float. + * However, some of them should be stored internally as 8-byte double. + * This is now managed by the SetGetField of the tag-definition! + */ +int +_TIFFSetGetFieldSize(TIFFSetGetFieldType setgettype) +{ + switch (setgettype) + { + case TIFF_SETGET_UNDEFINED: + case TIFF_SETGET_ASCII: + case TIFF_SETGET_C0_ASCII: + case TIFF_SETGET_C16_ASCII: + case TIFF_SETGET_C32_ASCII: + case TIFF_SETGET_OTHER: + return 0; + case TIFF_SETGET_UINT8: + case TIFF_SETGET_SINT8: + case TIFF_SETGET_C0_UINT8: + case TIFF_SETGET_C0_SINT8: + case TIFF_SETGET_C16_UINT8: + case TIFF_SETGET_C16_SINT8: + case TIFF_SETGET_C32_UINT8: + case TIFF_SETGET_C32_SINT8: + return 1; + case TIFF_SETGET_UINT16: + case TIFF_SETGET_SINT16: + case TIFF_SETGET_C0_UINT16: + case TIFF_SETGET_C0_SINT16: + case TIFF_SETGET_C16_UINT16: + case TIFF_SETGET_C16_SINT16: + case TIFF_SETGET_C32_UINT16: + case TIFF_SETGET_C32_SINT16: + return 2; + case TIFF_SETGET_INT: + case TIFF_SETGET_UINT32: + case TIFF_SETGET_SINT32: + case TIFF_SETGET_FLOAT: + case TIFF_SETGET_UINT16_PAIR: + case TIFF_SETGET_C0_UINT32: + case TIFF_SETGET_C0_SINT32: + case TIFF_SETGET_C0_FLOAT: + case TIFF_SETGET_C16_UINT32: + case TIFF_SETGET_C16_SINT32: + case TIFF_SETGET_C16_FLOAT: + case TIFF_SETGET_C32_UINT32: + case TIFF_SETGET_C32_SINT32: + case TIFF_SETGET_C32_FLOAT: + return 4; + case TIFF_SETGET_UINT64: + case TIFF_SETGET_SINT64: + case TIFF_SETGET_DOUBLE: + case TIFF_SETGET_IFD8: + case TIFF_SETGET_C0_UINT64: + case TIFF_SETGET_C0_SINT64: + case TIFF_SETGET_C0_DOUBLE: + case TIFF_SETGET_C0_IFD8: + case TIFF_SETGET_C16_UINT64: + case TIFF_SETGET_C16_SINT64: + case TIFF_SETGET_C16_DOUBLE: + case TIFF_SETGET_C16_IFD8: + case TIFF_SETGET_C32_UINT64: + case TIFF_SETGET_C32_SINT64: + case TIFF_SETGET_C32_DOUBLE: + case TIFF_SETGET_C32_IFD8: + return 8; + default: + return 0; + } +} /*-- _TIFFSetGetFieldSize --- */ + + const TIFFField* TIFFFindField(TIFF* tif, uint32 tag, TIFFDataType dt) { @@ -1062,10 +1236,6 @@ _TIFFCheckFieldIsValidForCodec(TIFF *tif, ttag_t tag) if (tag == TIFFTAG_LERC_PARAMETERS) return 1; break; - case COMPRESSION_WEBP: - if (tag == TIFFTAG_PREDICTOR) - return 1; - break; } return 0; } diff --git a/3rdparty/libtiff/tif_dirread.c b/3rdparty/libtiff/tif_dirread.c index 1fdcb0997a..ba127ca917 100644 --- a/3rdparty/libtiff/tif_dirread.c +++ b/3rdparty/libtiff/tif_dirread.c @@ -29,9 +29,6 @@ */ /* Suggested pending improvements: - * - add a field 'ignore' to the TIFFDirEntry structure, to flag status, - * eliminating current use of the IGNORE value, and therefore eliminating - * current irrational behaviour on tags with tag id code 0 * - add a field 'field_info' to the TIFFDirEntry structure, and set that with * the pointer to the appropriate TIFFField structure early on in * TIFFReadDirectory, so as to eliminate current possibly repetitive lookup. @@ -41,9 +38,13 @@ #include #include -#define IGNORE 0 /* tag placeholder used below */ #define FAILED_FII ((uint32) -1) +/* + * Largest 64-bit signed integer value. + */ +#define TIFF_INT64_MAX ((int64)(TIFF_UINT64_MAX >> 1)) + #ifdef HAVE_IEEEFP # define TIFFCvtIEEEFloatToNative(tif, n, fp) # define TIFFCvtIEEEDoubleToNative(tif, n, dp) @@ -206,6 +207,7 @@ static enum TIFFReadDirEntryErr TIFFReadDirEntryByte(TIFF* tif, TIFFDirEntry* di switch (direntry->tdir_type) { case TIFF_BYTE: + case TIFF_UNDEFINED: /* Support to read TIFF_UNDEFINED with field_readcount==1 */ TIFFReadDirEntryCheckedByte(tif,direntry,value); return(TIFFReadDirEntryErrOk); case TIFF_SBYTE: @@ -637,7 +639,7 @@ static enum TIFFReadDirEntryErr TIFFReadDirEntryFloat(TIFF* tif, TIFFDirEntry* d err=TIFFReadDirEntryCheckedDouble(tif,direntry,&m); if (err!=TIFFReadDirEntryErrOk) return(err); - if ((m > FLT_MAX) || (m < FLT_MIN)) + if ((m > FLT_MAX) || (m < -FLT_MAX)) return(TIFFReadDirEntryErrRange); *value=(float)m; return(TIFFReadDirEntryErrOk); @@ -836,6 +838,7 @@ static enum TIFFReadDirEntryErr TIFFReadDirEntryArrayWithLimit( uint32 datasize; void* data; uint64 target_count64; + int original_datasize_clamped; typesize=TIFFDataWidth(direntry->tdir_type); target_count64 = (direntry->tdir_count > maxcount) ? @@ -848,6 +851,12 @@ static enum TIFFReadDirEntryErr TIFFReadDirEntryArrayWithLimit( } (void) desttypesize; + /* We just want to know if the original tag size is more than 4 bytes + * (classic TIFF) or 8 bytes (BigTIFF) + */ + original_datasize_clamped = + ((direntry->tdir_count > 10) ? 10 : (int)direntry->tdir_count) * typesize; + /* * As a sanity check, make sure we have no more than a 2GB tag array * in either the current data type or the dest data type. This also @@ -862,7 +871,7 @@ static enum TIFFReadDirEntryErr TIFFReadDirEntryArrayWithLimit( datasize=(*count)*typesize; assert((tmsize_t)datasize>0); - if( isMapped(tif) && datasize > (uint32)tif->tif_size ) + if( isMapped(tif) && datasize > (uint64)tif->tif_size ) return TIFFReadDirEntryErrIo; if( !isMapped(tif) && @@ -879,7 +888,7 @@ static enum TIFFReadDirEntryErr TIFFReadDirEntryArrayWithLimit( } if (!(tif->tif_flags&TIFF_BIGTIFF)) { - if (datasize<=4) + if (original_datasize_clamped<=4) _TIFFmemcpy(data,&direntry->tdir_offset,datasize); else { @@ -900,7 +909,7 @@ static enum TIFFReadDirEntryErr TIFFReadDirEntryArrayWithLimit( } else { - if (datasize<=8) + if (original_datasize_clamped<=8) _TIFFmemcpy(data,&direntry->tdir_offset,datasize); else { @@ -3288,11 +3297,6 @@ static enum TIFFReadDirEntryErr TIFFReadDirEntryCheckRangeLongSlong(int32 value) return(TIFFReadDirEntryErrOk); } -/* - * Largest 32-bit unsigned integer value. - */ -#define TIFF_UINT32_MAX 0xFFFFFFFFU - static enum TIFFReadDirEntryErr TIFFReadDirEntryCheckRangeLongLong8(uint64 value) { @@ -3311,8 +3315,6 @@ TIFFReadDirEntryCheckRangeLongSlong8(int64 value) return(TIFFReadDirEntryErrOk); } -#undef TIFF_UINT32_MAX - static enum TIFFReadDirEntryErr TIFFReadDirEntryCheckRangeSlongLong(uint32 value) { @@ -3378,11 +3380,6 @@ TIFFReadDirEntryCheckRangeLong8Slong8(int64 value) return(TIFFReadDirEntryErrOk); } -/* - * Largest 64-bit signed integer value. - */ -#define TIFF_INT64_MAX ((int64)(((uint64) ~0) >> 1)) - static enum TIFFReadDirEntryErr TIFFReadDirEntryCheckRangeSlong8Long8(uint64 value) { @@ -3392,8 +3389,6 @@ TIFFReadDirEntryCheckRangeSlong8Long8(uint64 value) return(TIFFReadDirEntryErrOk); } -#undef TIFF_INT64_MAX - static enum TIFFReadDirEntryErr TIFFReadDirEntryData(TIFF* tif, uint64 offset, tmsize_t size, void* dest) { @@ -3406,13 +3401,13 @@ TIFFReadDirEntryData(TIFF* tif, uint64 offset, tmsize_t size, void* dest) } else { size_t ma,mb; ma=(size_t)offset; + if( (uint64)ma!=offset || + ma > (~(size_t)0) - (size_t)size ) + { + return TIFFReadDirEntryErrIo; + } mb=ma+size; - if (((uint64)ma!=offset) - || (mb < ma) - || (mb - ma != (size_t) size) - || (mb < (size_t)size) - || (mb > (size_t)tif->tif_size) - ) + if (mb > (uint64)tif->tif_size) return(TIFFReadDirEntryErrIo); _TIFFmemcpy(dest,tif->tif_base+ma,size); } @@ -3535,6 +3530,49 @@ static int _TIFFGetMaxColorChannels( uint16 photometric ) } } +static int ByteCountLooksBad(TIFF* tif) +{ + /* + * Assume we have wrong StripByteCount value (in case + * of single strip) in following cases: + * - it is equal to zero along with StripOffset; + * - it is larger than file itself (in case of uncompressed + * image); + * - it is smaller than the size of the bytes per row + * multiplied on the number of rows. The last case should + * not be checked in the case of writing new image, + * because we may do not know the exact strip size + * until the whole image will be written and directory + * dumped out. + */ + uint64 bytecount = TIFFGetStrileByteCount(tif, 0); + uint64 offset = TIFFGetStrileOffset(tif, 0); + uint64 filesize; + + if( offset == 0 ) + return 0; + if (bytecount == 0) + return 1; + if ( tif->tif_dir.td_compression != COMPRESSION_NONE ) + return 0; + filesize = TIFFGetFileSize(tif); + if( offset <= filesize && bytecount > filesize - offset ) + return 1; + if( tif->tif_mode == O_RDONLY ) + { + uint64 scanlinesize = TIFFScanlineSize64(tif); + if( tif->tif_dir.td_imagelength > 0 && + scanlinesize > TIFF_UINT64_MAX / tif->tif_dir.td_imagelength ) + { + return 1; + } + if( bytecount < scanlinesize * tif->tif_dir.td_imagelength) + return 1; + } + return 0; +} + + /* * Read the next TIFF directory from a file and convert it to the internal * format. We read directories sequentially. @@ -3581,14 +3619,17 @@ TIFFReadDirectory(TIFF* tif) uint16 nb; for (na=ma+1, nb=mb+1; nbtdir_tag==na->tdir_tag) - na->tdir_tag=IGNORE; + if (ma->tdir_tag == na->tdir_tag) { + na->tdir_ignore = TRUE; + } } } } tif->tif_flags &= ~TIFF_BEENWRITING; /* reset before new dir */ tif->tif_flags &= ~TIFF_BUF4WRITE; /* reset before new dir */ + tif->tif_flags &= ~TIFF_CHOPPEDUPARRAYS; + /* free any old stuff and reinit */ TIFFFreeDirectory(tif); TIFFDefaultDirectory(tif); @@ -3621,7 +3662,7 @@ TIFFReadDirectory(TIFF* tif) { if (!TIFFFetchNormalTag(tif,dp,0)) goto bad; - dp->tdir_tag=IGNORE; + dp->tdir_ignore = TRUE; } dp=TIFFReadDirectoryFindEntry(tif,dir,dircount,TIFFTAG_COMPRESSION); if (dp) @@ -3644,7 +3685,7 @@ TIFFReadDirectory(TIFF* tif) } if (!TIFFSetField(tif,TIFFTAG_COMPRESSION,value)) goto bad; - dp->tdir_tag=IGNORE; + dp->tdir_ignore = TRUE; } else { @@ -3656,7 +3697,7 @@ TIFFReadDirectory(TIFF* tif) */ for (di=0, dp=dir; ditdir_tag!=IGNORE) + if (!dp->tdir_ignore) { TIFFReadDirectoryFindFieldInfo(tif,dp->tdir_tag,&fii); if (fii == FAILED_FII) @@ -3664,8 +3705,8 @@ TIFFReadDirectory(TIFF* tif) TIFFWarningExt(tif->tif_clientdata, module, "Unknown field with tag %d (0x%x) encountered", dp->tdir_tag,dp->tdir_tag); - /* the following knowingly leaks the - anonymous field structure */ + /* the following knowingly leaks the + anonymous field structure */ if (!_TIFFMergeFields(tif, _TIFFCreateAnonField(tif, dp->tdir_tag, @@ -3676,18 +3717,18 @@ TIFFReadDirectory(TIFF* tif) "Registering anonymous field with tag %d (0x%x) failed", dp->tdir_tag, dp->tdir_tag); - dp->tdir_tag=IGNORE; + dp->tdir_ignore = TRUE; } else { TIFFReadDirectoryFindFieldInfo(tif,dp->tdir_tag,&fii); assert(fii != FAILED_FII); } } } - if (dp->tdir_tag!=IGNORE) + if (!dp->tdir_ignore) { fip=tif->tif_fields[fii]; if (fip->field_bit==FIELD_IGNORE) - dp->tdir_tag=IGNORE; + dp->tdir_ignore = TRUE; else { switch (dp->tdir_tag) @@ -3709,12 +3750,12 @@ TIFFReadDirectory(TIFF* tif) case TIFFTAG_EXTRASAMPLES: if (!TIFFFetchNormalTag(tif,dp,0)) goto bad; - dp->tdir_tag=IGNORE; + dp->tdir_ignore = TRUE; + break; + default: + if( !_TIFFCheckFieldIsValidForCodec(tif, dp->tdir_tag) ) + dp->tdir_ignore = TRUE; break; - default: - if( !_TIFFCheckFieldIsValidForCodec(tif, dp->tdir_tag) ) - dp->tdir_tag=IGNORE; - break; } } } @@ -3730,8 +3771,8 @@ TIFFReadDirectory(TIFF* tif) if ((tif->tif_dir.td_compression==COMPRESSION_OJPEG)&& (tif->tif_dir.td_planarconfig==PLANARCONFIG_SEPARATE)) { - if (!_TIFFFillStriles(tif)) - goto bad; + if (!_TIFFFillStriles(tif)) + goto bad; dp=TIFFReadDirectoryFindEntry(tif,dir,dircount,TIFFTAG_STRIPOFFSETS); if ((dp!=0)&&(dp->tdir_count==1)) { @@ -3803,190 +3844,240 @@ TIFFReadDirectory(TIFF* tif) */ for (di=0, dp=dir; ditdir_tag) - { - case IGNORE: - break; - case TIFFTAG_MINSAMPLEVALUE: - case TIFFTAG_MAXSAMPLEVALUE: - case TIFFTAG_BITSPERSAMPLE: - case TIFFTAG_DATATYPE: - case TIFFTAG_SAMPLEFORMAT: - /* - * The MinSampleValue, MaxSampleValue, BitsPerSample - * DataType and SampleFormat tags are supposed to be - * written as one value/sample, but some vendors - * incorrectly write one value only -- so we accept - * that as well (yuck). Other vendors write correct - * value for NumberOfSamples, but incorrect one for - * BitsPerSample and friends, and we will read this - * too. - */ - { - uint16 value; - enum TIFFReadDirEntryErr err; - err=TIFFReadDirEntryShort(tif,dp,&value); - if (err==TIFFReadDirEntryErrCount) - err=TIFFReadDirEntryPersampleShort(tif,dp,&value); - if (err!=TIFFReadDirEntryErrOk) + if (!dp->tdir_ignore) { + switch (dp->tdir_tag) + { + case TIFFTAG_MINSAMPLEVALUE: + case TIFFTAG_MAXSAMPLEVALUE: + case TIFFTAG_BITSPERSAMPLE: + case TIFFTAG_DATATYPE: + case TIFFTAG_SAMPLEFORMAT: + /* + * The MinSampleValue, MaxSampleValue, BitsPerSample + * DataType and SampleFormat tags are supposed to be + * written as one value/sample, but some vendors + * incorrectly write one value only -- so we accept + * that as well (yuck). Other vendors write correct + * value for NumberOfSamples, but incorrect one for + * BitsPerSample and friends, and we will read this + * too. + */ { - fip = TIFFFieldWithTag(tif,dp->tdir_tag); - TIFFReadDirEntryOutputErr(tif,err,module,fip ? fip->field_name : "unknown tagname",0); - goto bad; - } - if (!TIFFSetField(tif,dp->tdir_tag,value)) - goto bad; - if( dp->tdir_tag == TIFFTAG_BITSPERSAMPLE ) - bitspersample_read = TRUE; - } - break; - case TIFFTAG_SMINSAMPLEVALUE: - case TIFFTAG_SMAXSAMPLEVALUE: - { - - double *data = NULL; - enum TIFFReadDirEntryErr err; - uint32 saved_flags; - int m; - if (dp->tdir_count != (uint64)tif->tif_dir.td_samplesperpixel) - err = TIFFReadDirEntryErrCount; - else - err = TIFFReadDirEntryDoubleArray(tif, dp, &data); - if (err!=TIFFReadDirEntryErrOk) - { - fip = TIFFFieldWithTag(tif,dp->tdir_tag); - TIFFReadDirEntryOutputErr(tif,err,module,fip ? fip->field_name : "unknown tagname",0); - goto bad; - } - saved_flags = tif->tif_flags; - tif->tif_flags |= TIFF_PERSAMPLE; - m = TIFFSetField(tif,dp->tdir_tag,data); - tif->tif_flags = saved_flags; - _TIFFfree(data); - if (!m) - goto bad; - } - break; - case TIFFTAG_STRIPOFFSETS: - case TIFFTAG_TILEOFFSETS: -#if defined(DEFER_STRILE_LOAD) - _TIFFmemcpy( &(tif->tif_dir.td_stripoffset_entry), - dp, sizeof(TIFFDirEntry) ); -#else - if( tif->tif_dir.td_stripoffset != NULL ) - { - TIFFErrorExt(tif->tif_clientdata, module, - "tif->tif_dir.td_stripoffset is " - "already allocated. Likely duplicated " - "StripOffsets/TileOffsets tag"); - goto bad; - } - if (!TIFFFetchStripThing(tif,dp,tif->tif_dir.td_nstrips,&tif->tif_dir.td_stripoffset)) - goto bad; -#endif - break; - case TIFFTAG_STRIPBYTECOUNTS: - case TIFFTAG_TILEBYTECOUNTS: -#if defined(DEFER_STRILE_LOAD) - _TIFFmemcpy( &(tif->tif_dir.td_stripbytecount_entry), - dp, sizeof(TIFFDirEntry) ); -#else - if( tif->tif_dir.td_stripbytecount != NULL ) - { - TIFFErrorExt(tif->tif_clientdata, module, - "tif->tif_dir.td_stripbytecount is " - "already allocated. Likely duplicated " - "StripByteCounts/TileByteCounts tag"); - goto bad; - } - if (!TIFFFetchStripThing(tif,dp,tif->tif_dir.td_nstrips,&tif->tif_dir.td_stripbytecount)) - goto bad; -#endif - break; - case TIFFTAG_COLORMAP: - case TIFFTAG_TRANSFERFUNCTION: - { - enum TIFFReadDirEntryErr err; - uint32 countpersample; - uint32 countrequired; - uint32 incrementpersample; - uint16* value=NULL; - /* It would be dangerous to instantiate those tag values */ - /* since if td_bitspersample has not yet been read (due to */ - /* unordered tags), it could be read afterwards with a */ - /* values greater than the default one (1), which may cause */ - /* crashes in user code */ - if( !bitspersample_read ) - { - fip = TIFFFieldWithTag(tif,dp->tdir_tag); - TIFFWarningExt(tif->tif_clientdata,module, - "Ignoring %s since BitsPerSample tag not found", - fip ? fip->field_name : "unknown tagname"); - continue; - } - /* ColorMap or TransferFunction for high bit */ - /* depths do not make much sense and could be */ - /* used as a denial of service vector */ - if (tif->tif_dir.td_bitspersample > 24) - { - fip = TIFFFieldWithTag(tif,dp->tdir_tag); - TIFFWarningExt(tif->tif_clientdata,module, - "Ignoring %s because BitsPerSample=%d>24", - fip ? fip->field_name : "unknown tagname", - tif->tif_dir.td_bitspersample); - continue; - } - countpersample=(1U<tif_dir.td_bitspersample); - if ((dp->tdir_tag==TIFFTAG_TRANSFERFUNCTION)&&(dp->tdir_count==(uint64)countpersample)) - { - countrequired=countpersample; - incrementpersample=0; - } - else - { - countrequired=3*countpersample; - incrementpersample=countpersample; - } - if (dp->tdir_count!=(uint64)countrequired) - err=TIFFReadDirEntryErrCount; - else - err=TIFFReadDirEntryShortArray(tif,dp,&value); - if (err!=TIFFReadDirEntryErrOk) - { - fip = TIFFFieldWithTag(tif,dp->tdir_tag); - TIFFReadDirEntryOutputErr(tif,err,module,fip ? fip->field_name : "unknown tagname",1); - } - else - { - TIFFSetField(tif,dp->tdir_tag,value,value+incrementpersample,value+2*incrementpersample); - _TIFFfree(value); - } - } - break; -/* BEGIN REV 4.0 COMPATIBILITY */ - case TIFFTAG_OSUBFILETYPE: - { - uint16 valueo; - uint32 value; - if (TIFFReadDirEntryShort(tif,dp,&valueo)==TIFFReadDirEntryErrOk) - { - switch (valueo) + uint16 value; + enum TIFFReadDirEntryErr err; + err=TIFFReadDirEntryShort(tif,dp,&value); + if (err==TIFFReadDirEntryErrCount) + err=TIFFReadDirEntryPersampleShort(tif,dp,&value); + if (err!=TIFFReadDirEntryErrOk) { - case OFILETYPE_REDUCEDIMAGE: value=FILETYPE_REDUCEDIMAGE; break; - case OFILETYPE_PAGE: value=FILETYPE_PAGE; break; - default: value=0; break; + fip = TIFFFieldWithTag(tif,dp->tdir_tag); + TIFFReadDirEntryOutputErr(tif,err,module,fip ? fip->field_name : "unknown tagname",0); + goto bad; } - if (value!=0) - TIFFSetField(tif,TIFFTAG_SUBFILETYPE,value); + if (!TIFFSetField(tif,dp->tdir_tag,value)) + goto bad; + if( dp->tdir_tag == TIFFTAG_BITSPERSAMPLE ) + bitspersample_read = TRUE; } - } - break; + break; + case TIFFTAG_SMINSAMPLEVALUE: + case TIFFTAG_SMAXSAMPLEVALUE: + { + + double *data = NULL; + enum TIFFReadDirEntryErr err; + uint32 saved_flags; + int m; + if (dp->tdir_count != (uint64)tif->tif_dir.td_samplesperpixel) + err = TIFFReadDirEntryErrCount; + else + err = TIFFReadDirEntryDoubleArray(tif, dp, &data); + if (err!=TIFFReadDirEntryErrOk) + { + fip = TIFFFieldWithTag(tif,dp->tdir_tag); + TIFFReadDirEntryOutputErr(tif,err,module,fip ? fip->field_name : "unknown tagname",0); + goto bad; + } + saved_flags = tif->tif_flags; + tif->tif_flags |= TIFF_PERSAMPLE; + m = TIFFSetField(tif,dp->tdir_tag,data); + tif->tif_flags = saved_flags; + _TIFFfree(data); + if (!m) + goto bad; + } + break; + case TIFFTAG_STRIPOFFSETS: + case TIFFTAG_TILEOFFSETS: + switch( dp->tdir_type ) + { + case TIFF_SHORT: + case TIFF_LONG: + case TIFF_LONG8: + break; + default: + /* Warn except if directory typically created with TIFFDeferStrileArrayWriting() */ + if( !(tif->tif_mode == O_RDWR && + dp->tdir_count == 0 && + dp->tdir_type == 0 && + dp->tdir_offset.toff_long8 == 0) ) + { + fip = TIFFFieldWithTag(tif,dp->tdir_tag); + TIFFWarningExt(tif->tif_clientdata,module, + "Invalid data type for tag %s", + fip ? fip->field_name : "unknown tagname"); + } + break; + } + _TIFFmemcpy( &(tif->tif_dir.td_stripoffset_entry), + dp, sizeof(TIFFDirEntry) ); + break; + case TIFFTAG_STRIPBYTECOUNTS: + case TIFFTAG_TILEBYTECOUNTS: + switch( dp->tdir_type ) + { + case TIFF_SHORT: + case TIFF_LONG: + case TIFF_LONG8: + break; + default: + /* Warn except if directory typically created with TIFFDeferStrileArrayWriting() */ + if( !(tif->tif_mode == O_RDWR && + dp->tdir_count == 0 && + dp->tdir_type == 0 && + dp->tdir_offset.toff_long8 == 0) ) + { + fip = TIFFFieldWithTag(tif,dp->tdir_tag); + TIFFWarningExt(tif->tif_clientdata,module, + "Invalid data type for tag %s", + fip ? fip->field_name : "unknown tagname"); + } + break; + } + _TIFFmemcpy( &(tif->tif_dir.td_stripbytecount_entry), + dp, sizeof(TIFFDirEntry) ); + break; + case TIFFTAG_COLORMAP: + case TIFFTAG_TRANSFERFUNCTION: + { + enum TIFFReadDirEntryErr err; + uint32 countpersample; + uint32 countrequired; + uint32 incrementpersample; + uint16* value=NULL; + /* It would be dangerous to instantiate those tag values */ + /* since if td_bitspersample has not yet been read (due to */ + /* unordered tags), it could be read afterwards with a */ + /* values greater than the default one (1), which may cause */ + /* crashes in user code */ + if( !bitspersample_read ) + { + fip = TIFFFieldWithTag(tif,dp->tdir_tag); + TIFFWarningExt(tif->tif_clientdata,module, + "Ignoring %s since BitsPerSample tag not found", + fip ? fip->field_name : "unknown tagname"); + continue; + } + /* ColorMap or TransferFunction for high bit */ + /* depths do not make much sense and could be */ + /* used as a denial of service vector */ + if (tif->tif_dir.td_bitspersample > 24) + { + fip = TIFFFieldWithTag(tif,dp->tdir_tag); + TIFFWarningExt(tif->tif_clientdata,module, + "Ignoring %s because BitsPerSample=%d>24", + fip ? fip->field_name : "unknown tagname", + tif->tif_dir.td_bitspersample); + continue; + } + countpersample=(1U<tif_dir.td_bitspersample); + if ((dp->tdir_tag==TIFFTAG_TRANSFERFUNCTION)&&(dp->tdir_count==(uint64)countpersample)) + { + countrequired=countpersample; + incrementpersample=0; + } + else + { + countrequired=3*countpersample; + incrementpersample=countpersample; + } + if (dp->tdir_count!=(uint64)countrequired) + err=TIFFReadDirEntryErrCount; + else + err=TIFFReadDirEntryShortArray(tif,dp,&value); + if (err!=TIFFReadDirEntryErrOk) + { + fip = TIFFFieldWithTag(tif,dp->tdir_tag); + TIFFReadDirEntryOutputErr(tif,err,module,fip ? fip->field_name : "unknown tagname",1); + } + else + { + TIFFSetField(tif,dp->tdir_tag,value,value+incrementpersample,value+2*incrementpersample); + _TIFFfree(value); + } + } + break; +/* BEGIN REV 4.0 COMPATIBILITY */ + case TIFFTAG_OSUBFILETYPE: + { + uint16 valueo; + uint32 value; + if (TIFFReadDirEntryShort(tif,dp,&valueo)==TIFFReadDirEntryErrOk) + { + switch (valueo) + { + case OFILETYPE_REDUCEDIMAGE: value=FILETYPE_REDUCEDIMAGE; break; + case OFILETYPE_PAGE: value=FILETYPE_PAGE; break; + default: value=0; break; + } + if (value!=0) + TIFFSetField(tif,TIFFTAG_SUBFILETYPE,value); + } + } + break; /* END REV 4.0 COMPATIBILITY */ - default: - (void) TIFFFetchNormalTag(tif, dp, TRUE); - break; - } - } + default: + (void) TIFFFetchNormalTag(tif, dp, TRUE); + break; + } + } /* -- if (!dp->tdir_ignore) */ + } /* -- for-loop -- */ + + if( tif->tif_mode == O_RDWR && + tif->tif_dir.td_stripoffset_entry.tdir_tag != 0 && + tif->tif_dir.td_stripoffset_entry.tdir_count == 0 && + tif->tif_dir.td_stripoffset_entry.tdir_type == 0 && + tif->tif_dir.td_stripoffset_entry.tdir_offset.toff_long8 == 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_tag != 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_count == 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_type == 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_offset.toff_long8 == 0 ) + { + /* Directory typically created with TIFFDeferStrileArrayWriting() */ + TIFFSetupStrips(tif); + } + else if( !(tif->tif_flags&TIFF_DEFERSTRILELOAD) ) + { + if( tif->tif_dir.td_stripoffset_entry.tdir_tag != 0 ) + { + if (!TIFFFetchStripThing(tif,&(tif->tif_dir.td_stripoffset_entry), + tif->tif_dir.td_nstrips, + &tif->tif_dir.td_stripoffset_p)) + { + goto bad; + } + } + if( tif->tif_dir.td_stripbytecount_entry.tdir_tag != 0 ) + { + if (!TIFFFetchStripThing(tif,&(tif->tif_dir.td_stripbytecount_entry), + tif->tif_dir.td_nstrips, + &tif->tif_dir.td_stripbytecount_p)) + { + goto bad; + } + } + } + /* * OJPEG hack: * - If a) compression is OJPEG, and b) photometric tag is missing, @@ -4129,33 +4220,10 @@ TIFFReadDirectory(TIFF* tif) "\"StripByteCounts\" field, calculating from imagelength"); if (EstimateStripByteCounts(tif, dir, dircount) < 0) goto bad; - /* - * Assume we have wrong StripByteCount value (in case - * of single strip) in following cases: - * - it is equal to zero along with StripOffset; - * - it is larger than file itself (in case of uncompressed - * image); - * - it is smaller than the size of the bytes per row - * multiplied on the number of rows. The last case should - * not be checked in the case of writing new image, - * because we may do not know the exact strip size - * until the whole image will be written and directory - * dumped out. - */ - #define BYTECOUNTLOOKSBAD \ - ( (tif->tif_dir.td_stripbytecount[0] == 0 && tif->tif_dir.td_stripoffset[0] != 0) || \ - (tif->tif_dir.td_compression == COMPRESSION_NONE && \ - (tif->tif_dir.td_stripoffset[0] <= TIFFGetFileSize(tif) && \ - tif->tif_dir.td_stripbytecount[0] > TIFFGetFileSize(tif) - tif->tif_dir.td_stripoffset[0])) || \ - (tif->tif_mode == O_RDONLY && \ - tif->tif_dir.td_compression == COMPRESSION_NONE && \ - tif->tif_dir.td_stripbytecount[0] < TIFFScanlineSize64(tif) * tif->tif_dir.td_imagelength) ) } else if (tif->tif_dir.td_nstrips == 1 && !(tif->tif_flags&TIFF_ISTILED) - && _TIFFFillStriles(tif) - && tif->tif_dir.td_stripoffset[0] != 0 - && BYTECOUNTLOOKSBAD) { + && ByteCountLooksBad(tif)) { /* * XXX: Plexus (and others) sometimes give a value of * zero for a tag when they don't know what the @@ -4167,13 +4235,13 @@ TIFFReadDirectory(TIFF* tif) if(EstimateStripByteCounts(tif, dir, dircount) < 0) goto bad; -#if !defined(DEFER_STRILE_LOAD) - } else if (tif->tif_dir.td_planarconfig == PLANARCONFIG_CONTIG + } else if (!(tif->tif_flags&TIFF_DEFERSTRILELOAD) + && tif->tif_dir.td_planarconfig == PLANARCONFIG_CONTIG && tif->tif_dir.td_nstrips > 2 && tif->tif_dir.td_compression == COMPRESSION_NONE - && tif->tif_dir.td_stripbytecount[0] != tif->tif_dir.td_stripbytecount[1] - && tif->tif_dir.td_stripbytecount[0] != 0 - && tif->tif_dir.td_stripbytecount[1] != 0 ) { + && TIFFGetStrileByteCount(tif, 0) != TIFFGetStrileByteCount(tif, 1) + && TIFFGetStrileByteCount(tif, 0) != 0 + && TIFFGetStrileByteCount(tif, 1) != 0 ) { /* * XXX: Some vendors fill StripByteCount array with * absolutely wrong values (it can be equal to @@ -4188,7 +4256,6 @@ TIFFReadDirectory(TIFF* tif) "Wrong \"StripByteCounts\" field, ignoring and calculating from imagelength"); if (EstimateStripByteCounts(tif, dir, dircount) < 0) goto bad; -#endif /* !defined(DEFER_STRILE_LOAD) */ } } if (dir) @@ -4203,26 +4270,27 @@ TIFFReadDirectory(TIFF* tif) else tif->tif_dir.td_maxsamplevalue = (uint16)((1L<tif_dir.td_bitspersample)-1); } + +#ifdef STRIPBYTECOUNTSORTED_UNUSED /* * XXX: We can optimize checking for the strip bounds using the sorted * bytecounts array. See also comments for TIFFAppendToStrip() * function in tif_write.c. */ -#if !defined(DEFER_STRILE_LOAD) - if (tif->tif_dir.td_nstrips > 1) { + if (!(tif->tif_flags&TIFF_DEFERSTRILELOAD) && tif->tif_dir.td_nstrips > 1) { uint32 strip; tif->tif_dir.td_stripbytecountsorted = 1; for (strip = 1; strip < tif->tif_dir.td_nstrips; strip++) { - if (tif->tif_dir.td_stripoffset[strip - 1] > - tif->tif_dir.td_stripoffset[strip]) { + if (TIFFGetStrileOffset(tif, strip - 1) > + TIFFGetStrileOffset(tif, strip)) { tif->tif_dir.td_stripbytecountsorted = 0; break; } } } -#endif /* !defined(DEFER_STRILE_LOAD) */ - +#endif + /* * An opportunity for compression mode dependent tag fixup */ @@ -4241,13 +4309,11 @@ TIFFReadDirectory(TIFF* tif) (tif->tif_dir.td_nstrips==1)&& (tif->tif_dir.td_compression==COMPRESSION_NONE)&& ((tif->tif_flags&(TIFF_STRIPCHOP|TIFF_ISTILED))==TIFF_STRIPCHOP)) - { - if ( !_TIFFFillStriles(tif) || !tif->tif_dir.td_stripbytecount ) - return 0; - ChopUpSingleUncompressedStrip(tif); - } + { + ChopUpSingleUncompressedStrip(tif); + } - /* There are also uncompressed stripped files with strips larger than */ + /* There are also uncompressed striped files with strips larger than */ /* 2 GB, which make them unfriendly with a lot of code. If possible, */ /* try to expose smaller "virtual" strips. */ if( tif->tif_dir.td_planarconfig == PLANARCONFIG_CONTIG && @@ -4255,8 +4321,6 @@ TIFFReadDirectory(TIFF* tif) (tif->tif_flags&(TIFF_STRIPCHOP|TIFF_ISTILED)) == TIFF_STRIPCHOP && TIFFStripSize64(tif) > 0x7FFFFFFFUL ) { - if ( !_TIFFFillStriles(tif) || !tif->tif_dir.td_stripbytecount ) - return 0; TryChopUpUncompressedBigTiff(tif); } @@ -4384,6 +4448,7 @@ TIFFReadCustomDirectory(TIFF* tif, toff_t diroff, uint16 di; const TIFFField* fip; uint32 fii; + (*tif->tif_cleanup)(tif); /* cleanup any previous compression state */ _TIFFSetupFields(tif, infoarray); dircount=TIFFFetchDirectory(tif,diroff,&dir,NULL); if (!dircount) @@ -4410,17 +4475,17 @@ TIFFReadCustomDirectory(TIFF* tif, toff_t diroff, TIFFWarningExt(tif->tif_clientdata, module, "Registering anonymous field with tag %d (0x%x) failed", dp->tdir_tag, dp->tdir_tag); - dp->tdir_tag=IGNORE; + dp->tdir_ignore = TRUE; } else { TIFFReadDirectoryFindFieldInfo(tif,dp->tdir_tag,&fii); assert( fii != FAILED_FII ); } } - if (dp->tdir_tag!=IGNORE) + if (!dp->tdir_ignore) { fip=tif->tif_fields[fii]; if (fip->field_bit==FIELD_IGNORE) - dp->tdir_tag=IGNORE; + dp->tdir_ignore = TRUE; else { /* check data type */ @@ -4440,7 +4505,7 @@ TIFFReadCustomDirectory(TIFF* tif, toff_t diroff, TIFFWarningExt(tif->tif_clientdata, module, "Wrong data type %d for \"%s\"; tag ignored", dp->tdir_type,fip->field_name); - dp->tdir_tag=IGNORE; + dp->tdir_ignore = TRUE; } else { @@ -4454,21 +4519,21 @@ TIFFReadCustomDirectory(TIFF* tif, toff_t diroff, else expected=(uint32)fip->field_readcount; if (!CheckDirCount(tif,dp,expected)) - dp->tdir_tag=IGNORE; + dp->tdir_ignore = TRUE; } } } - switch (dp->tdir_tag) - { - case IGNORE: - break; - case EXIFTAG_SUBJECTDISTANCE: - (void) TIFFFetchSubjectDistance(tif,dp); - break; - default: - (void) TIFFFetchNormalTag(tif, dp, TRUE); - break; - } + if (!dp->tdir_ignore) { + switch (dp->tdir_tag) + { + case EXIFTAG_SUBJECTDISTANCE: + (void)TIFFFetchSubjectDistance(tif, dp); + break; + default: + (void)TIFFFetchNormalTag(tif, dp, TRUE); + break; + } + } /*-- if (!dp->tdir_ignore) */ } } if (dir) @@ -4488,6 +4553,17 @@ TIFFReadEXIFDirectory(TIFF* tif, toff_t diroff) return TIFFReadCustomDirectory(tif, diroff, exifFieldArray); } +/* + *--: EXIF-GPS custom directory reading as another special case of custom IFD. + */ +int +TIFFReadGPSDirectory(TIFF* tif, toff_t diroff) +{ + const TIFFFieldArray* gpsFieldArray; + gpsFieldArray = _TIFFGetGpsFields(); + return TIFFReadCustomDirectory(tif, diroff, gpsFieldArray); +} + static int EstimateStripByteCounts(TIFF* tif, TIFFDirEntry* dir, uint16 dircount) { @@ -4501,12 +4577,12 @@ EstimateStripByteCounts(TIFF* tif, TIFFDirEntry* dir, uint16 dircount) if( !_TIFFFillStrilesInternal( tif, 0 ) ) return -1; - if (td->td_stripbytecount) - _TIFFfree(td->td_stripbytecount); - td->td_stripbytecount = (uint64*) + if (td->td_stripbytecount_p) + _TIFFfree(td->td_stripbytecount_p); + td->td_stripbytecount_p = (uint64*) _TIFFCheckMalloc(tif, td->td_nstrips, sizeof (uint64), "for \"StripByteCounts\" array"); - if( td->td_stripbytecount == NULL ) + if( td->td_stripbytecount_p == NULL ) return -1; if (td->td_compression != COMPRESSION_NONE) { @@ -4530,6 +4606,8 @@ EstimateStripByteCounts(TIFF* tif, TIFFDirEntry* dir, uint16 dircount) dp->tdir_type); return -1; } + if( dp->tdir_count > TIFF_UINT64_MAX / typewidth ) + return -1; datasize=(uint64)typewidth*dp->tdir_count; if (!(tif->tif_flags&TIFF_BIGTIFF)) { @@ -4541,6 +4619,8 @@ EstimateStripByteCounts(TIFF* tif, TIFFDirEntry* dir, uint16 dircount) if (datasize<=8) datasize=0; } + if( space > TIFF_UINT64_MAX - datasize ) + return -1; space+=datasize; } if( filesize < space ) @@ -4551,7 +4631,7 @@ EstimateStripByteCounts(TIFF* tif, TIFFDirEntry* dir, uint16 dircount) if (td->td_planarconfig == PLANARCONFIG_SEPARATE) space /= td->td_samplesperpixel; for (strip = 0; strip < td->td_nstrips; strip++) - td->td_stripbytecount[strip] = space; + td->td_stripbytecount_p[strip] = space; /* * This gross hack handles the case were the offset to * the last strip is past the place where we think the strip @@ -4560,18 +4640,30 @@ EstimateStripByteCounts(TIFF* tif, TIFFDirEntry* dir, uint16 dircount) * of data in the strip and trim this number back accordingly. */ strip--; - if (td->td_stripoffset[strip]+td->td_stripbytecount[strip] > filesize) - td->td_stripbytecount[strip] = filesize - td->td_stripoffset[strip]; + if (td->td_stripoffset_p[strip] > TIFF_UINT64_MAX - td->td_stripbytecount_p[strip]) + return -1; + if (td->td_stripoffset_p[strip]+td->td_stripbytecount_p[strip] > filesize) { + if( td->td_stripoffset_p[strip] >= filesize ) { + /* Not sure what we should in that case... */ + td->td_stripbytecount_p[strip] = 0; + } else { + td->td_stripbytecount_p[strip] = filesize - td->td_stripoffset_p[strip]; + } + } } else if (isTiled(tif)) { uint64 bytespertile = TIFFTileSize64(tif); for (strip = 0; strip < td->td_nstrips; strip++) - td->td_stripbytecount[strip] = bytespertile; + td->td_stripbytecount_p[strip] = bytespertile; } else { uint64 rowbytes = TIFFScanlineSize64(tif); uint32 rowsperstrip = td->td_imagelength/td->td_stripsperimage; for (strip = 0; strip < td->td_nstrips; strip++) - td->td_stripbytecount[strip] = rowbytes * rowsperstrip; + { + if( rowbytes > 0 && rowsperstrip > TIFF_UINT64_MAX / rowbytes ) + return -1; + td->td_stripbytecount_p[strip] = rowbytes * rowsperstrip; + } } TIFFSetFieldBit(tif, FIELD_STRIPBYTECOUNTS); if (!TIFFFieldSet(tif, FIELD_ROWSPERSTRIP)) @@ -4765,12 +4857,13 @@ TIFFFetchDirectory(TIFF* tif, uint64 diroff, TIFFDirEntry** pdir, } } else { tmsize_t m; - tmsize_t off = (tmsize_t) tif->tif_diroff; - if ((uint64)off!=tif->tif_diroff) + tmsize_t off; + if (tif->tif_diroff > (uint64)TIFF_INT64_MAX) { TIFFErrorExt(tif->tif_clientdata,module,"Can not read TIFF directory count"); return(0); } + off = (tmsize_t) tif->tif_diroff; /* * Check for integer overflow when validating the dir_off, @@ -4888,6 +4981,7 @@ TIFFFetchDirectory(TIFF* tif, uint64 diroff, TIFFDirEntry** pdir, mb=dir; for (n=0; ntdir_ignore = FALSE; if (tif->tif_flags&TIFF_SWAB) TIFFSwabShort((uint16*)ma); mb->tdir_tag=*(uint16*)ma; @@ -4902,6 +4996,7 @@ TIFFFetchDirectory(TIFF* tif, uint64 diroff, TIFFDirEntry** pdir, TIFFSwabLong((uint32*)ma); mb->tdir_count=(uint64)(*(uint32*)ma); ma+=sizeof(uint32); + mb->tdir_offset.toff_long8=0; *(uint32*)(&mb->tdir_offset)=*(uint32*)ma; ma+=sizeof(uint32); } @@ -5104,6 +5199,7 @@ TIFFFetchNormalTag(TIFF* tif, TIFFDirEntry* dp, int recover) if (err==TIFFReadDirEntryErrOk) { int m; + assert(data); /* avoid CLang static Analyzer false positive */ m=TIFFSetField(tif,dp->tdir_tag,data[0],data[1]); _TIFFfree(data); if (!m) @@ -5187,7 +5283,7 @@ TIFFFetchNormalTag(TIFF* tif, TIFFDirEntry* dp, int recover) assert(fip->field_readcount>=1); assert(fip->field_passcount==0); if (dp->tdir_count!=(uint64)fip->field_readcount) - /* corrupt file */; + /* corrupt file */; else { err=TIFFReadDirEntryFloatArray(tif,dp,&data); @@ -5203,6 +5299,29 @@ TIFFFetchNormalTag(TIFF* tif, TIFFDirEntry* dp, int recover) } } break; + /*--: Rational2Double: Extend for Double Arrays and Rational-Arrays read into Double-Arrays. */ + case TIFF_SETGET_C0_DOUBLE: + { + double* data; + assert(fip->field_readcount>=1); + assert(fip->field_passcount==0); + if (dp->tdir_count!=(uint64)fip->field_readcount) + /* corrupt file */; + else + { + err=TIFFReadDirEntryDoubleArray(tif,dp,&data); + if (err==TIFFReadDirEntryErrOk) + { + int m; + m=TIFFSetField(tif,dp->tdir_tag,data); + if (data!=0) + _TIFFfree(data); + if (!m) + return(0); + } + } + } + break; case TIFF_SETGET_C16_ASCII: { uint8* data; @@ -5695,7 +5814,7 @@ TIFFFetchSubjectDistance(TIFF* tif, TIFFDirEntry* dir) TIFFSwabArrayOfLong(m.i,2); if (m.i[0]==0) n=0.0; - else if (m.i[0]==0xFFFFFFFF) + else if (m.i[0]==0xFFFFFFFF || m.i[1]==0) /* * XXX: Numerator 0xFFFFFFFF means that we have infinite * distance. Indicate that with a negative floating point @@ -5719,10 +5838,22 @@ static void allocChoppedUpStripArrays(TIFF* tif, uint32 nstrips, TIFFDirectory *td = &tif->tif_dir; uint64 bytecount; uint64 offset; + uint64 last_offset; + uint64 last_bytecount; uint32 i; uint64 *newcounts; uint64 *newoffsets; + offset = TIFFGetStrileOffset(tif, 0); + last_offset = TIFFGetStrileOffset(tif, td->td_nstrips-1); + last_bytecount = TIFFGetStrileByteCount(tif, td->td_nstrips-1); + if( last_offset > TIFF_UINT64_MAX - last_bytecount || + last_offset + last_bytecount < offset ) + { + return; + } + bytecount = last_offset + last_bytecount - offset; + newcounts = (uint64*) _TIFFCheckMalloc(tif, nstrips, sizeof (uint64), "for chopped \"StripByteCounts\" array"); newoffsets = (uint64*) _TIFFCheckMalloc(tif, nstrips, sizeof (uint64), @@ -5743,9 +5874,6 @@ static void allocChoppedUpStripArrays(TIFF* tif, uint32 nstrips, * Fill the strip information arrays with new bytecounts and offsets * that reflect the broken-up format. */ - offset = td->td_stripoffset[0]; - bytecount = td->td_stripoffset[td->td_nstrips-1] + - td->td_stripbytecount[td->td_nstrips-1] - offset; for (i = 0; i < nstrips; i++) { if (stripbytes > bytecount) @@ -5762,11 +5890,14 @@ static void allocChoppedUpStripArrays(TIFF* tif, uint32 nstrips, td->td_stripsperimage = td->td_nstrips = nstrips; TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, rowsperstrip); - _TIFFfree(td->td_stripbytecount); - _TIFFfree(td->td_stripoffset); - td->td_stripbytecount = newcounts; - td->td_stripoffset = newoffsets; + _TIFFfree(td->td_stripbytecount_p); + _TIFFfree(td->td_stripoffset_p); + td->td_stripbytecount_p = newcounts; + td->td_stripoffset_p = newoffsets; +#ifdef STRIPBYTECOUNTSORTED_UNUSED td->td_stripbytecountsorted = 1; +#endif + tif->tif_flags |= TIFF_CHOPPEDUPARRAYS; } @@ -5788,13 +5919,13 @@ ChopUpSingleUncompressedStrip(TIFF* tif) uint32 nstrips; uint32 rowsperstrip; - bytecount = td->td_stripbytecount[0]; + bytecount = TIFFGetStrileByteCount(tif, 0); /* On a newly created file, just re-opened to be filled, we */ /* don't want strip chop to trigger as it is going to cause issues */ /* later ( StripOffsets and StripByteCounts improperly filled) . */ if( bytecount == 0 && tif->tif_mode != O_RDONLY ) return; - offset = td->td_stripoffset[0]; + offset = TIFFGetStrileByteCount(tif, 0); assert(td->td_planarconfig == PLANARCONFIG_CONTIG); if ((td->td_photometric == PHOTOMETRIC_YCBCR)&& (!isUpSampled(tif))) @@ -5869,7 +6000,7 @@ static void TryChopUpUncompressedBigTiff( TIFF* tif ) /* On a newly created file, just re-opened to be filled, we */ /* don't want strip chop to trigger as it is going to cause issues */ /* later ( StripOffsets and StripByteCounts improperly filled) . */ - if( td->td_stripbytecount[0] == 0 && tif->tif_mode != O_RDONLY ) + if( TIFFGetStrileByteCount(tif, 0) == 0 && tif->tif_mode != O_RDONLY ) return; if ((td->td_photometric == PHOTOMETRIC_YCBCR)&& @@ -5889,7 +6020,7 @@ static void TryChopUpUncompressedBigTiff( TIFF* tif ) { if( i == td->td_nstrips - 1 ) { - if( td->td_stripbytecount[i] < TIFFVStripSize64( + if( TIFFGetStrileByteCount(tif, i) < TIFFVStripSize64( tif, td->td_imagelength - i * td->td_rowsperstrip ) ) { return; @@ -5897,12 +6028,12 @@ static void TryChopUpUncompressedBigTiff( TIFF* tif ) } else { - if( td->td_stripbytecount[i] != stripsize ) + if( TIFFGetStrileByteCount(tif, i) != stripsize ) { return; } - if( i > 0 && td->td_stripoffset[i] != - td->td_stripoffset[i-1] + td->td_stripbytecount[i - 1] ) + if( i > 0 && TIFFGetStrileOffset(tif, i) != + TIFFGetStrileOffset(tif, i-1) + TIFFGetStrileByteCount(tif, i-1) ) { return; } @@ -5924,18 +6055,367 @@ static void TryChopUpUncompressedBigTiff( TIFF* tif ) /* If we are going to allocate a lot of memory, make sure that the */ /* file is as big as needed */ if( tif->tif_mode == O_RDONLY && - nstrips > 1000000 && - (td->td_stripoffset[td->td_nstrips-1] > TIFFGetFileSize(tif) || - td->td_stripoffset[td->td_nstrips-1] + - td->td_stripbytecount[td->td_nstrips-1] > TIFFGetFileSize(tif)) ) + nstrips > 1000000 ) { - return; + uint64 last_offset = TIFFGetStrileOffset(tif, td->td_nstrips-1); + uint64 filesize = TIFFGetFileSize(tif); + uint64 last_bytecount = TIFFGetStrileByteCount(tif, td->td_nstrips-1); + if( last_offset > filesize || + last_bytecount > filesize - last_offset ) + { + return; + } } allocChoppedUpStripArrays(tif, nstrips, stripbytes, rowsperstrip); } +TIFF_NOSANITIZE_UNSIGNED_INT_OVERFLOW +static uint64 _TIFFUnsanitizedAddUInt64AndInt(uint64 a, int b) +{ + return a + b; +} + +/* Read the value of [Strip|Tile]Offset or [Strip|Tile]ByteCount around + * strip/tile of number strile. Also fetch the neighbouring values using a + * 4096 byte page size. + */ +static +int _TIFFPartialReadStripArray( TIFF* tif, TIFFDirEntry* dirent, + int strile, uint64* panVals ) +{ + static const char module[] = "_TIFFPartialReadStripArray"; +#define IO_CACHE_PAGE_SIZE 4096 + + size_t sizeofval; + const int bSwab = (tif->tif_flags & TIFF_SWAB) != 0; + int sizeofvalint; + uint64 nBaseOffset; + uint64 nOffset; + uint64 nOffsetStartPage; + uint64 nOffsetEndPage; + tmsize_t nToRead; + tmsize_t nRead; + uint64 nLastStripOffset; + int iStartBefore; + int i; + const uint32 arraySize = tif->tif_dir.td_stripoffsetbyteallocsize; + unsigned char buffer[2 * IO_CACHE_PAGE_SIZE]; + + assert( dirent->tdir_count > 4 ); + + if( dirent->tdir_type == TIFF_SHORT ) + { + sizeofval = sizeof(uint16); + } + else if( dirent->tdir_type == TIFF_LONG ) + { + sizeofval = sizeof(uint32); + } + else if( dirent->tdir_type == TIFF_LONG8 ) + { + sizeofval = sizeof(uint64); + } + else if( dirent->tdir_type == TIFF_SLONG8 ) + { + /* Non conformant but used by some images as in */ + /* https://github.com/OSGeo/gdal/issues/2165 */ + sizeofval = sizeof(int64); + } + else + { + TIFFErrorExt(tif->tif_clientdata, module, + "Invalid type for [Strip|Tile][Offset/ByteCount] tag"); + panVals[strile] = 0; + return 0; + } + sizeofvalint = (int)(sizeofval); + + if( tif->tif_flags&TIFF_BIGTIFF ) + { + uint64 offset = dirent->tdir_offset.toff_long8; + if( bSwab ) + TIFFSwabLong8(&offset); + nBaseOffset = offset; + } + else + { + uint32 offset = dirent->tdir_offset.toff_long; + if( bSwab ) + TIFFSwabLong(&offset); + nBaseOffset = offset; + } + /* To avoid later unsigned integer overflows */ + if( nBaseOffset > (uint64)TIFF_INT64_MAX ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Cannot read offset/size for strile %d", strile); + panVals[strile] = 0; + return 0; + } + nOffset = nBaseOffset + sizeofval * strile; + nOffsetStartPage = + (nOffset / IO_CACHE_PAGE_SIZE) * IO_CACHE_PAGE_SIZE; + nOffsetEndPage = nOffsetStartPage + IO_CACHE_PAGE_SIZE; + + if( nOffset + sizeofval > nOffsetEndPage ) + nOffsetEndPage += IO_CACHE_PAGE_SIZE; +#undef IO_CACHE_PAGE_SIZE + + nLastStripOffset = nBaseOffset + arraySize * sizeofval; + if( nLastStripOffset < nOffsetEndPage ) + nOffsetEndPage = nLastStripOffset; + if( nOffsetStartPage >= nOffsetEndPage ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Cannot read offset/size for strile %d", strile); + panVals[strile] = 0; + return 0; + } + if (!SeekOK(tif,nOffsetStartPage)) + { + panVals[strile] = 0; + return 0; + } + + nToRead = (tmsize_t)(nOffsetEndPage - nOffsetStartPage); + nRead = TIFFReadFile(tif, buffer, nToRead); + if( nRead < nToRead ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Cannot read offset/size for strile around ~%d", strile); + return 0; + } + iStartBefore = -(int)((nOffset - nOffsetStartPage) / sizeofval); + if( strile + iStartBefore < 0 ) + iStartBefore = -strile; + for( i = iStartBefore; + (uint32)(strile + i) < arraySize && + _TIFFUnsanitizedAddUInt64AndInt(nOffset, (i + 1) * sizeofvalint) <= nOffsetEndPage; + ++i ) + { + if( dirent->tdir_type == TIFF_SHORT ) + { + uint16 val; + memcpy(&val, + buffer + (nOffset - nOffsetStartPage) + i * sizeofvalint, + sizeof(val)); + if( bSwab ) + TIFFSwabShort(&val); + panVals[strile + i] = val; + } + else if( dirent->tdir_type == TIFF_LONG ) + { + uint32 val; + memcpy(&val, + buffer + (nOffset - nOffsetStartPage) + i * sizeofvalint, + sizeof(val)); + if( bSwab ) + TIFFSwabLong(&val); + panVals[strile + i] = val; + } + else if( dirent->tdir_type == TIFF_LONG8 ) + { + uint64 val; + memcpy(&val, + buffer + (nOffset - nOffsetStartPage) + i * sizeofvalint, + sizeof(val)); + if( bSwab ) + TIFFSwabLong8(&val); + panVals[strile + i] = val; + } + else /* if( dirent->tdir_type == TIFF_SLONG8 ) */ + { + /* Non conformant data type */ + int64 val; + memcpy(&val, + buffer + (nOffset - nOffsetStartPage) + i * sizeofvalint, + sizeof(val)); + if( bSwab ) + TIFFSwabLong8((uint64*) &val); + panVals[strile + i] = (uint64) val; + } + } + return 1; +} + +static int _TIFFFetchStrileValue(TIFF* tif, + uint32 strile, + TIFFDirEntry* dirent, + uint64** parray) +{ + static const char module[] = "_TIFFFetchStrileValue"; + TIFFDirectory *td = &tif->tif_dir; + if( strile >= dirent->tdir_count ) + { + return 0; + } + if( strile >= td->td_stripoffsetbyteallocsize ) + { + uint32 nStripArrayAllocBefore = td->td_stripoffsetbyteallocsize; + uint32 nStripArrayAllocNew; + uint64 nArraySize64; + size_t nArraySize; + uint64* offsetArray; + uint64* bytecountArray; + + if( strile > 1000000 ) + { + uint64 filesize = TIFFGetFileSize(tif); + /* Avoid excessive memory allocation attempt */ + /* For such a big blockid we need at least a TIFF_LONG per strile */ + /* for the offset array. */ + if( strile > filesize / sizeof(uint32) ) + { + TIFFErrorExt(tif->tif_clientdata, module, "File too short"); + return 0; + } + } + + if( td->td_stripoffsetbyteallocsize == 0 && + td->td_nstrips < 1024 * 1024 ) + { + nStripArrayAllocNew = td->td_nstrips; + } + else + { +#define TIFF_MAX(a,b) (((a)>(b)) ? (a) : (b)) +#define TIFF_MIN(a,b) (((a)<(b)) ? (a) : (b)) + nStripArrayAllocNew = TIFF_MAX(strile + 1, 1024U * 512U ); + if( nStripArrayAllocNew < 0xFFFFFFFFU / 2 ) + nStripArrayAllocNew *= 2; + nStripArrayAllocNew = TIFF_MIN(nStripArrayAllocNew, td->td_nstrips); + } + assert( strile < nStripArrayAllocNew ); + nArraySize64 = (uint64)sizeof(uint64) * nStripArrayAllocNew; + nArraySize = (size_t)(nArraySize64); +#if SIZEOF_SIZE_T == 4 + if( nArraySize != nArraySize64 ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Cannot allocate strip offset and bytecount arrays"); + return 0; + } +#endif + offsetArray = (uint64*)( + _TIFFrealloc( td->td_stripoffset_p, nArraySize ) ); + bytecountArray = (uint64*)( + _TIFFrealloc( td->td_stripbytecount_p, nArraySize ) ); + if( offsetArray ) + td->td_stripoffset_p = offsetArray; + if( bytecountArray ) + td->td_stripbytecount_p = bytecountArray; + if( offsetArray && bytecountArray ) + { + td->td_stripoffsetbyteallocsize = nStripArrayAllocNew; + /* Initialize new entries to ~0 / -1 */ + memset(td->td_stripoffset_p + nStripArrayAllocBefore, + 0xFF, + (td->td_stripoffsetbyteallocsize - nStripArrayAllocBefore) * sizeof(uint64) ); + memset(td->td_stripbytecount_p + nStripArrayAllocBefore, + 0xFF, + (td->td_stripoffsetbyteallocsize - nStripArrayAllocBefore) * sizeof(uint64) ); + } + else + { + TIFFErrorExt(tif->tif_clientdata, module, + "Cannot allocate strip offset and bytecount arrays"); + _TIFFfree(td->td_stripoffset_p); + td->td_stripoffset_p = NULL; + _TIFFfree(td->td_stripbytecount_p); + td->td_stripbytecount_p = NULL; + td->td_stripoffsetbyteallocsize = 0; + } + } + if( *parray == NULL || strile >= td->td_stripoffsetbyteallocsize ) + return 0; + + if( ~((*parray)[strile]) == 0 ) + { + if( !_TIFFPartialReadStripArray( tif, dirent, strile, *parray ) ) + { + (*parray)[strile] = 0; + return 0; + } + } + + return 1; +} + +static uint64 _TIFFGetStrileOffsetOrByteCountValue(TIFF *tif, uint32 strile, + TIFFDirEntry* dirent, + uint64** parray, + int *pbErr) +{ + TIFFDirectory *td = &tif->tif_dir; + if( pbErr ) + *pbErr = 0; + if( (tif->tif_flags&TIFF_DEFERSTRILELOAD) && !(tif->tif_flags&TIFF_CHOPPEDUPARRAYS) ) + { + if( !(tif->tif_flags&TIFF_LAZYSTRILELOAD) || + /* If the values may fit in the toff_long/toff_long8 member */ + /* then use _TIFFFillStriles to simplify _TIFFFetchStrileValue */ + dirent->tdir_count <= 4 ) + { + if( !_TIFFFillStriles(tif) ) + { + if( pbErr ) + *pbErr = 1; + /* Do not return, as we want this function to always */ + /* return the same value if called several times with */ + /* the same arguments */ + } + } + else + { + if( !_TIFFFetchStrileValue(tif, strile, dirent, parray) ) + { + if( pbErr ) + *pbErr = 1; + return 0; + } + } + } + if( *parray == NULL || strile >= td->td_nstrips ) + { + if( pbErr ) + *pbErr = 1; + return 0; + } + return (*parray)[strile]; +} + +/* Return the value of the TileOffsets/StripOffsets array for the specified tile/strile */ +uint64 TIFFGetStrileOffset(TIFF *tif, uint32 strile) +{ + return TIFFGetStrileOffsetWithErr(tif, strile, NULL); +} + +/* Return the value of the TileOffsets/StripOffsets array for the specified tile/strile */ +uint64 TIFFGetStrileOffsetWithErr(TIFF *tif, uint32 strile, int *pbErr) +{ + TIFFDirectory *td = &tif->tif_dir; + return _TIFFGetStrileOffsetOrByteCountValue(tif, strile, + &(td->td_stripoffset_entry), + &(td->td_stripoffset_p), pbErr); +} + +/* Return the value of the TileByteCounts/StripByteCounts array for the specified tile/strile */ +uint64 TIFFGetStrileByteCount(TIFF *tif, uint32 strile) +{ + return TIFFGetStrileByteCountWithErr(tif, strile, NULL); +} + +/* Return the value of the TileByteCounts/StripByteCounts array for the specified tile/strile */ +uint64 TIFFGetStrileByteCountWithErr(TIFF *tif, uint32 strile, int *pbErr) +{ + TIFFDirectory *td = &tif->tif_dir; + return _TIFFGetStrileOffsetOrByteCountValue(tif, strile, + &(td->td_stripbytecount_entry), + &(td->td_stripbytecount_p), pbErr); +} + int _TIFFFillStriles( TIFF *tif ) { @@ -5944,51 +6424,64 @@ int _TIFFFillStriles( TIFF *tif ) static int _TIFFFillStrilesInternal( TIFF *tif, int loadStripByteCount ) { -#if defined(DEFER_STRILE_LOAD) - register TIFFDirectory *td = &tif->tif_dir; - int return_value = 1; + register TIFFDirectory *td = &tif->tif_dir; + int return_value = 1; - if( td->td_stripoffset != NULL ) - return 1; - - if( td->td_stripoffset_entry.tdir_count == 0 ) - return 0; - - if (!TIFFFetchStripThing(tif,&(td->td_stripoffset_entry), - td->td_nstrips,&td->td_stripoffset)) - { - return_value = 0; - } - - if (loadStripByteCount && - !TIFFFetchStripThing(tif,&(td->td_stripbytecount_entry), - td->td_nstrips,&td->td_stripbytecount)) - { - return_value = 0; - } - - _TIFFmemset( &(td->td_stripoffset_entry), 0, sizeof(TIFFDirEntry)); - _TIFFmemset( &(td->td_stripbytecount_entry), 0, sizeof(TIFFDirEntry)); - - if (tif->tif_dir.td_nstrips > 1 && return_value == 1 ) { - uint32 strip; - - tif->tif_dir.td_stripbytecountsorted = 1; - for (strip = 1; strip < tif->tif_dir.td_nstrips; strip++) { - if (tif->tif_dir.td_stripoffset[strip - 1] > - tif->tif_dir.td_stripoffset[strip]) { - tif->tif_dir.td_stripbytecountsorted = 0; - break; - } - } - } - - return return_value; -#else /* !defined(DEFER_STRILE_LOAD) */ - (void) tif; - (void) loadStripByteCount; + /* Do not do anything if TIFF_DEFERSTRILELOAD is not set */ + if( !(tif->tif_flags&TIFF_DEFERSTRILELOAD) || (tif->tif_flags&TIFF_CHOPPEDUPARRAYS) != 0 ) return 1; -#endif + + if( tif->tif_flags&TIFF_LAZYSTRILELOAD ) + { + /* In case of lazy loading, reload completely the arrays */ + _TIFFfree(td->td_stripoffset_p); + _TIFFfree(td->td_stripbytecount_p); + td->td_stripoffset_p = NULL; + td->td_stripbytecount_p = NULL; + td->td_stripoffsetbyteallocsize = 0; + tif->tif_flags &= ~TIFF_LAZYSTRILELOAD; + } + + /* If stripoffset array is already loaded, exit with success */ + if( td->td_stripoffset_p != NULL ) + return 1; + + /* If tdir_count was canceled, then we already got there, but in error */ + if( td->td_stripoffset_entry.tdir_count == 0 ) + return 0; + + if (!TIFFFetchStripThing(tif,&(td->td_stripoffset_entry), + td->td_nstrips,&td->td_stripoffset_p)) + { + return_value = 0; + } + + if (loadStripByteCount && + !TIFFFetchStripThing(tif,&(td->td_stripbytecount_entry), + td->td_nstrips,&td->td_stripbytecount_p)) + { + return_value = 0; + } + + _TIFFmemset( &(td->td_stripoffset_entry), 0, sizeof(TIFFDirEntry)); + _TIFFmemset( &(td->td_stripbytecount_entry), 0, sizeof(TIFFDirEntry)); + +#ifdef STRIPBYTECOUNTSORTED_UNUSED + if (tif->tif_dir.td_nstrips > 1 && return_value == 1 ) { + uint32 strip; + + tif->tif_dir.td_stripbytecountsorted = 1; + for (strip = 1; strip < tif->tif_dir.td_nstrips; strip++) { + if (tif->tif_dir.td_stripoffset_p[strip - 1] > + tif->tif_dir.td_stripoffset_p[strip]) { + tif->tif_dir.td_stripbytecountsorted = 0; + break; + } + } + } +#endif + + return return_value; } diff --git a/3rdparty/libtiff/tif_dirwrite.c b/3rdparty/libtiff/tif_dirwrite.c index 83c01b24f2..f481250e3b 100644 --- a/3rdparty/libtiff/tif_dirwrite.c +++ b/3rdparty/libtiff/tif_dirwrite.c @@ -28,6 +28,8 @@ * Directory Write Support Routines. */ #include "tiffiop.h" +#include /*--: for Rational2Double */ +#include /*--: for Rational2Double */ #ifdef HAVE_IEEEFP #define TIFFCvtNativeToIEEEFloat(tif, n, fp) @@ -154,6 +156,19 @@ static int TIFFWriteDirectoryTagCheckedSlong8Array(TIFF* tif, uint32* ndir, TIFF static int TIFFWriteDirectoryTagCheckedRational(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, double value); static int TIFFWriteDirectoryTagCheckedRationalArray(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, uint32 count, float* value); static int TIFFWriteDirectoryTagCheckedSrationalArray(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, uint32 count, float* value); + +/*--: Rational2Double: New functions to support true double-precision for custom rational tag types. */ +static int TIFFWriteDirectoryTagRationalDoubleArray(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, uint32 count, double* value); +static int TIFFWriteDirectoryTagSrationalDoubleArray(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, uint32 count, double* value); +static int TIFFWriteDirectoryTagCheckedRationalDoubleArray(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, uint32 count, double* value); +static int TIFFWriteDirectoryTagCheckedSrationalDoubleArray(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, uint32 count, double* value); +static void DoubleToRational(double value, uint32 *num, uint32 *denom); +static void DoubleToSrational(double value, int32 *num, int32 *denom); +#if 0 +static void DoubleToRational_direct(double value, unsigned long *num, unsigned long *denom); +static void DoubleToSrational_direct(double value, long *num, long *denom); +#endif + #ifdef notdef static int TIFFWriteDirectoryTagCheckedFloat(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, float value); #endif @@ -181,6 +196,51 @@ TIFFWriteDirectory(TIFF* tif) return TIFFWriteDirectorySec(tif,TRUE,TRUE,NULL); } +/* + * This is an advanced writing function that must be used in a particular + * sequence, and generally together with TIFFForceStrileArrayWriting(), + * to make its intended effect. Its aim is to modify the location + * where the [Strip/Tile][Offsets/ByteCounts] arrays are located in the file. + * More precisely, when TIFFWriteCheck() will be called, the tag entries for + * those arrays will be written with type = count = offset = 0 as a temporary + * value. + * + * Its effect is only valid for the current directory, and before + * TIFFWriteDirectory() is first called, and will be reset when + * changing directory. + * + * The typical sequence of calls is: + * TIFFOpen() + * [ TIFFCreateDirectory(tif) ] + * Set fields with calls to TIFFSetField(tif, ...) + * TIFFDeferStrileArrayWriting(tif) + * TIFFWriteCheck(tif, ...) + * TIFFWriteDirectory(tif) + * ... potentially create other directories and come back to the above directory + * TIFFForceStrileArrayWriting(tif): emit the arrays at the end of file + * + * Returns 1 in case of success, 0 otherwise. + */ +int TIFFDeferStrileArrayWriting(TIFF* tif) +{ + static const char module[] = "TIFFDeferStrileArrayWriting"; + if (tif->tif_mode == O_RDONLY) + { + TIFFErrorExt(tif->tif_clientdata, tif->tif_name, + "File opened in read-only mode"); + return 0; + } + if( tif->tif_diroff != 0 ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Directory has already been written"); + return 0; + } + + tif->tif_dir.td_deferstrilearraywriting = TRUE; + return 1; +} + /* * Similar to TIFFWriteDirectory(), writes the directory out * but leaves all data structures in memory so that it can be @@ -192,7 +252,7 @@ TIFFCheckpointDirectory(TIFF* tif) { int rc; /* Setup the strips arrays, if they haven't already been. */ - if (tif->tif_dir.td_stripoffset == NULL) + if (tif->tif_dir.td_stripoffset_p == NULL) (void) TIFFSetupStrips(tif); rc = TIFFWriteDirectorySec(tif,TRUE,FALSE,NULL); (void) TIFFSetWriteOffset(tif, TIFFSeekFile(tif, 0, SEEK_END)); @@ -527,12 +587,12 @@ TIFFWriteDirectorySec(TIFF* tif, int isimage, int imagedone, uint64* pdiroff) { if (!isTiled(tif)) { - if (!TIFFWriteDirectoryTagLongLong8Array(tif,&ndir,dir,TIFFTAG_STRIPBYTECOUNTS,tif->tif_dir.td_nstrips,tif->tif_dir.td_stripbytecount)) + if (!TIFFWriteDirectoryTagLongLong8Array(tif,&ndir,dir,TIFFTAG_STRIPBYTECOUNTS,tif->tif_dir.td_nstrips,tif->tif_dir.td_stripbytecount_p)) goto bad; } else { - if (!TIFFWriteDirectoryTagLongLong8Array(tif,&ndir,dir,TIFFTAG_TILEBYTECOUNTS,tif->tif_dir.td_nstrips,tif->tif_dir.td_stripbytecount)) + if (!TIFFWriteDirectoryTagLongLong8Array(tif,&ndir,dir,TIFFTAG_TILEBYTECOUNTS,tif->tif_dir.td_nstrips,tif->tif_dir.td_stripbytecount_p)) goto bad; } } @@ -540,7 +600,7 @@ TIFFWriteDirectorySec(TIFF* tif, int isimage, int imagedone, uint64* pdiroff) { if (!isTiled(tif)) { - /* td_stripoffset might be NULL in an odd OJPEG case. See + /* td_stripoffset_p might be NULL in an odd OJPEG case. See * tif_dirread.c around line 3634. * XXX: OJPEG hack. * If a) compression is OJPEG, b) it's not a tiled TIFF, @@ -551,13 +611,13 @@ TIFFWriteDirectorySec(TIFF* tif, int isimage, int imagedone, uint64* pdiroff) * We can get here when using tiffset on such a file. * See http://bugzilla.maptools.org/show_bug.cgi?id=2500 */ - if (tif->tif_dir.td_stripoffset != NULL && - !TIFFWriteDirectoryTagLongLong8Array(tif,&ndir,dir,TIFFTAG_STRIPOFFSETS,tif->tif_dir.td_nstrips,tif->tif_dir.td_stripoffset)) + if (tif->tif_dir.td_stripoffset_p != NULL && + !TIFFWriteDirectoryTagLongLong8Array(tif,&ndir,dir,TIFFTAG_STRIPOFFSETS,tif->tif_dir.td_nstrips,tif->tif_dir.td_stripoffset_p)) goto bad; } else { - if (!TIFFWriteDirectoryTagLongLong8Array(tif,&ndir,dir,TIFFTAG_TILEOFFSETS,tif->tif_dir.td_nstrips,tif->tif_dir.td_stripoffset)) + if (!TIFFWriteDirectoryTagLongLong8Array(tif,&ndir,dir,TIFFTAG_TILEOFFSETS,tif->tif_dir.td_nstrips,tif->tif_dir.td_stripoffset_p)) goto bad; } } @@ -751,12 +811,42 @@ TIFFWriteDirectorySec(TIFF* tif, int isimage, int imagedone, uint64* pdiroff) goto bad; break; case TIFF_RATIONAL: - if (!TIFFWriteDirectoryTagRationalArray(tif,&ndir,dir,tag,count,tif->tif_dir.td_customValues[m].value)) - goto bad; + { + /*-- Rational2Double: For Rationals evaluate "set_field_type" to determine internal storage size. */ + int tv_size; + tv_size = _TIFFSetGetFieldSize(tif->tif_dir.td_customValues[m].info->set_field_type); + if (tv_size == 8) { + if (!TIFFWriteDirectoryTagRationalDoubleArray(tif,&ndir,dir,tag,count,tif->tif_dir.td_customValues[m].value)) + goto bad; + } else { + /*-- default should be tv_size == 4 */ + if (!TIFFWriteDirectoryTagRationalArray(tif,&ndir,dir,tag,count,tif->tif_dir.td_customValues[m].value)) + goto bad; + /*-- ToDo: After Testing, this should be removed and tv_size==4 should be set as default. */ + if (tv_size != 4) { + TIFFErrorExt(0,"TIFFLib: _TIFFWriteDirectorySec()", "Rational2Double: .set_field_type in not 4 but %d", tv_size); + } + } + } break; case TIFF_SRATIONAL: - if (!TIFFWriteDirectoryTagSrationalArray(tif,&ndir,dir,tag,count,tif->tif_dir.td_customValues[m].value)) - goto bad; + { + /*-- Rational2Double: For Rationals evaluate "set_field_type" to determine internal storage size. */ + int tv_size; + tv_size = _TIFFSetGetFieldSize(tif->tif_dir.td_customValues[m].info->set_field_type); + if (tv_size == 8) { + if (!TIFFWriteDirectoryTagSrationalDoubleArray(tif,&ndir,dir,tag,count,tif->tif_dir.td_customValues[m].value)) + goto bad; + } else { + /*-- default should be tv_size == 4 */ + if (!TIFFWriteDirectoryTagSrationalArray(tif,&ndir,dir,tag,count,tif->tif_dir.td_customValues[m].value)) + goto bad; + /*-- ToDo: After Testing, this should be removed and tv_size==4 should be set as default. */ + if (tv_size != 4) { + TIFFErrorExt(0,"TIFFLib: _TIFFWriteDirectorySec()", "Rational2Double: .set_field_type in not 4 but %d", tv_size); + } + } + } break; case TIFF_FLOAT: if (!TIFFWriteDirectoryTagFloatArray(tif,&ndir,dir,tag,count,tif->tif_dir.td_customValues[m].value)) @@ -1515,6 +1605,29 @@ TIFFWriteDirectoryTagSrationalArray(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, return(TIFFWriteDirectoryTagCheckedSrationalArray(tif,ndir,dir,tag,count,value)); } +/*-- Rational2Double: additional write functions */ +static int +TIFFWriteDirectoryTagRationalDoubleArray(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, uint32 count, double* value) +{ + if (dir==NULL) + { + (*ndir)++; + return(1); + } + return(TIFFWriteDirectoryTagCheckedRationalDoubleArray(tif,ndir,dir,tag,count,value)); +} + +static int +TIFFWriteDirectoryTagSrationalDoubleArray(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, uint32 count, double* value) +{ + if (dir==NULL) + { + (*ndir)++; + return(1); + } + return(TIFFWriteDirectoryTagCheckedSrationalDoubleArray(tif,ndir,dir,tag,count,value)); +} + #ifdef notdef static int TIFFWriteDirectoryTagFloat(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, float value) { @@ -1651,22 +1764,52 @@ TIFFWriteDirectoryTagShortLong(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint1 return(TIFFWriteDirectoryTagCheckedLong(tif,ndir,dir,tag,value)); } +static int _WriteAsType(TIFF* tif, uint64 strile_size, uint64 uncompressed_threshold) +{ + const uint16 compression = tif->tif_dir.td_compression; + if ( compression == COMPRESSION_NONE ) + { + return strile_size > uncompressed_threshold; + } + else if ( compression == COMPRESSION_JPEG || + compression == COMPRESSION_LZW || + compression == COMPRESSION_ADOBE_DEFLATE || + compression == COMPRESSION_LZMA || + compression == COMPRESSION_LERC || + compression == COMPRESSION_ZSTD || + compression == COMPRESSION_WEBP ) + { + /* For a few select compression types, we assume that in the worst */ + /* case the compressed size will be 10 times the uncompressed size */ + /* This is overly pessismistic ! */ + return strile_size >= uncompressed_threshold / 10; + } + return 1; +} + +static int WriteAsLong8(TIFF* tif, uint64 strile_size) +{ + return _WriteAsType(tif, strile_size, 0xFFFFFFFFU); +} + +static int WriteAsLong4(TIFF* tif, uint64 strile_size) +{ + return _WriteAsType(tif, strile_size, 0xFFFFU); +} + /************************************************************************/ /* TIFFWriteDirectoryTagLongLong8Array() */ /* */ -/* Write out LONG8 array as LONG8 for BigTIFF or LONG for */ -/* Classic TIFF with some checking. */ +/* Write out LONG8 array and write a SHORT/LONG/LONG8 depending */ +/* on strile size and Classic/BigTIFF mode. */ /************************************************************************/ static int TIFFWriteDirectoryTagLongLong8Array(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, uint32 count, uint64* value) { static const char module[] = "TIFFWriteDirectoryTagLongLong8Array"; - uint64* ma; - uint32 mb; - uint32* p; - uint32* q; int o; + int write_aslong4; /* is this just a counting pass? */ if (dir==NULL) @@ -1675,37 +1818,105 @@ TIFFWriteDirectoryTagLongLong8Array(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, return(1); } - /* We always write LONG8 for BigTIFF, no checking needed. */ - if( tif->tif_flags&TIFF_BIGTIFF ) - return TIFFWriteDirectoryTagCheckedLong8Array(tif,ndir,dir, - tag,count,value); - - /* - ** For classic tiff we want to verify everything is in range for LONG - ** and convert to long format. - */ - - p = _TIFFmalloc(count*sizeof(uint32)); - if (p==NULL) + if( tif->tif_dir.td_deferstrilearraywriting ) { - TIFFErrorExt(tif->tif_clientdata,module,"Out of memory"); - return(0); + return TIFFWriteDirectoryTagData(tif, ndir, dir, tag, TIFF_NOTYPE, 0, 0, NULL); } - for (q=p, ma=value, mb=0; mbtif_flags&TIFF_BIGTIFF ) { - if (*ma>0xFFFFFFFF) + int write_aslong8 = 1; + /* In the case of ByteCounts array, we may be able to write them on */ + /* LONG if the strip/tilesize is not too big. */ + /* Also do that for count > 1 in the case someone would want to create */ + /* a single-strip file with a growing height, in which case using */ + /* LONG8 will be safer. */ + if( count > 1 && tag == TIFFTAG_STRIPBYTECOUNTS ) { - TIFFErrorExt(tif->tif_clientdata,module, - "Attempt to write value larger than 0xFFFFFFFF in Classic TIFF file."); - _TIFFfree(p); + write_aslong8 = WriteAsLong8(tif, TIFFStripSize64(tif)); + } + else if( count > 1 && tag == TIFFTAG_TILEBYTECOUNTS ) + { + write_aslong8 = WriteAsLong8(tif, TIFFTileSize64(tif)); + } + if( write_aslong8 ) + { + return TIFFWriteDirectoryTagCheckedLong8Array(tif,ndir,dir, + tag,count,value); + } + } + + write_aslong4 = 1; + if( count > 1 && tag == TIFFTAG_STRIPBYTECOUNTS ) + { + write_aslong4 = WriteAsLong4(tif, TIFFStripSize64(tif)); + } + else if( count > 1 && tag == TIFFTAG_TILEBYTECOUNTS ) + { + write_aslong4 = WriteAsLong4(tif, TIFFTileSize64(tif)); + } + if( write_aslong4 ) + { + /* + ** For classic tiff we want to verify everything is in range for LONG + ** and convert to long format. + */ + + uint32* p = _TIFFmalloc(count*sizeof(uint32)); + uint32* q; + uint64* ma; + uint32 mb; + + if (p==NULL) + { + TIFFErrorExt(tif->tif_clientdata,module,"Out of memory"); return(0); } - *q= (uint32)(*ma); - } - o=TIFFWriteDirectoryTagCheckedLongArray(tif,ndir,dir,tag,count,p); - _TIFFfree(p); + for (q=p, ma=value, mb=0; mb0xFFFFFFFF) + { + TIFFErrorExt(tif->tif_clientdata,module, + "Attempt to write value larger than 0xFFFFFFFF in LONG array."); + _TIFFfree(p); + return(0); + } + *q= (uint32)(*ma); + } + + o=TIFFWriteDirectoryTagCheckedLongArray(tif,ndir,dir,tag,count,p); + _TIFFfree(p); + } + else + { + uint16* p = _TIFFmalloc(count*sizeof(uint16)); + uint16* q; + uint64* ma; + uint32 mb; + + if (p==NULL) + { + TIFFErrorExt(tif->tif_clientdata,module,"Out of memory"); + return(0); + } + + for (q=p, ma=value, mb=0; mb0xFFFF) + { + /* Should not happen normally given the check we did before */ + TIFFErrorExt(tif->tif_clientdata,module, + "Attempt to write value larger than 0xFFFF in SHORT array."); + _TIFFfree(p); + return(0); + } + *q= (uint16)(*ma); + } + + o=TIFFWriteDirectoryTagCheckedShortArray(tif,ndir,dir,tag,count,p); + _TIFFfree(p); + } return(o); } @@ -2175,19 +2386,20 @@ TIFFWriteDirectoryTagCheckedSlong8Array(TIFF* tif, uint32* ndir, TIFFDirEntry* d static int TIFFWriteDirectoryTagCheckedRational(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, double value) { - static const char module[] = "TIFFWriteDirectoryTagCheckedRational"; + static const char module[] = "TIFFWriteDirectoryTagCheckedRational"; uint32 m[2]; assert(sizeof(uint32)==4); - if( value < 0 ) - { - TIFFErrorExt(tif->tif_clientdata,module,"Negative value is illegal"); - return 0; - } - else if( value != value ) - { - TIFFErrorExt(tif->tif_clientdata,module,"Not-a-number value is illegal"); - return 0; - } + if (value < 0) + { + TIFFErrorExt(tif->tif_clientdata, module, "Negative value is illegal"); + return 0; + } + else if (value != value) + { + TIFFErrorExt(tif->tif_clientdata, module, "Not-a-number value is illegal"); + return 0; + } +#ifdef not_def else if (value==0.0) { m[0]=0; @@ -2208,6 +2420,15 @@ TIFFWriteDirectoryTagCheckedRational(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, m[0]=0xFFFFFFFF; m[1]=(uint32)(0xFFFFFFFF/value); } +#else + /*--Rational2Double: New function also used for non-custom rational tags. + * However, could be omitted here, because TIFFWriteDirectoryTagCheckedRational() is not used by code for custom tags, + * only by code for named-tiff-tags like FIELD_RESOLUTION and FIELD_POSITION */ + else { + DoubleToRational(value, &m[0], &m[1]); + } +#endif + if (tif->tif_flags&TIFF_SWAB) { TIFFSwabLong(&m[0]); @@ -2234,6 +2455,7 @@ TIFFWriteDirectoryTagCheckedRationalArray(TIFF* tif, uint32* ndir, TIFFDirEntry* } for (na=value, nb=m, nc=0; nctif_flags&TIFF_SWAB) TIFFSwabArrayOfLong(m,count*2); @@ -2281,6 +2507,7 @@ TIFFWriteDirectoryTagCheckedSrationalArray(TIFF* tif, uint32* ndir, TIFFDirEntry } for (na=value, nb=m, nc=0; nctif_flags&TIFF_SWAB) TIFFSwabArrayOfLong((uint32*)m,count*2); @@ -2325,6 +2556,400 @@ TIFFWriteDirectoryTagCheckedSrationalArray(TIFF* tif, uint32* ndir, TIFFDirEntry return(o); } +/*-- Rational2Double: additional write functions for double arrays */ +static int +TIFFWriteDirectoryTagCheckedRationalDoubleArray(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, uint32 count, double* value) +{ + static const char module[] = "TIFFWriteDirectoryTagCheckedRationalDoubleArray"; + uint32* m; + double* na; + uint32* nb; + uint32 nc; + int o; + assert(sizeof(uint32)==4); + m=_TIFFmalloc(count*2*sizeof(uint32)); + if (m==NULL) + { + TIFFErrorExt(tif->tif_clientdata,module,"Out of memory"); + return(0); + } + for (na=value, nb=m, nc=0; nctif_flags&TIFF_SWAB) + TIFFSwabArrayOfLong(m,count*2); + o=TIFFWriteDirectoryTagData(tif,ndir,dir,tag,TIFF_RATIONAL,count,count*8,&m[0]); + _TIFFfree(m); + return(o); +} /*-- TIFFWriteDirectoryTagCheckedRationalDoubleArray() ------- */ + +static int +TIFFWriteDirectoryTagCheckedSrationalDoubleArray(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, uint32 count, double* value) +{ + static const char module[] = "TIFFWriteDirectoryTagCheckedSrationalDoubleArray"; + int32* m; + double* na; + int32* nb; + uint32 nc; + int o; + assert(sizeof(int32)==4); + m=_TIFFmalloc(count*2*sizeof(int32)); + if (m==NULL) + { + TIFFErrorExt(tif->tif_clientdata,module,"Out of memory"); + return(0); + } + for (na=value, nb=m, nc=0; nctif_flags&TIFF_SWAB) + TIFFSwabArrayOfLong((uint32*)m,count*2); + o=TIFFWriteDirectoryTagData(tif,ndir,dir,tag,TIFF_SRATIONAL,count,count*8,&m[0]); + _TIFFfree(m); + return(o); +} /*--- TIFFWriteDirectoryTagCheckedSrationalDoubleArray() -------- */ + +#if 0 +static +void DoubleToRational_direct(double value, unsigned long *num, unsigned long *denom) +{ + /*--- OLD Code for debugging and comparison ---- */ + /* code merged from TIFFWriteDirectoryTagCheckedRationalArray() and TIFFWriteDirectoryTagCheckedRational() */ + + /* First check for zero and also check for negative numbers (which are illegal for RATIONAL) + * and also check for "not-a-number". In each case just set this to zero to support also rational-arrays. + */ + if (value<=0.0 || value != value) + { + *num=0; + *denom=1; + } + else if (value <= 0xFFFFFFFFU && (value==(double)(uint32)(value))) /* check for integer values */ + { + *num=(uint32)(value); + *denom=1; + } + else if (value<1.0) + { + *num = (uint32)((value) * (double)0xFFFFFFFFU); + *denom=0xFFFFFFFFU; + } + else + { + *num=0xFFFFFFFFU; + *denom=(uint32)((double)0xFFFFFFFFU/(value)); + } +} /*-- DoubleToRational_direct() -------------- */ +#endif + +#if 0 +static +void DoubleToSrational_direct(double value, long *num, long *denom) +{ + /*--- OLD Code for debugging and comparison -- SIGNED-version ----*/ + /* code was amended from original TIFFWriteDirectoryTagCheckedSrationalArray() */ + + /* First check for zero and also check for negative numbers (which are illegal for RATIONAL) + * and also check for "not-a-number". In each case just set this to zero to support also rational-arrays. + */ + if (value<0.0) + { + if (value==(int32)(value)) + { + *num=(int32)(value); + *denom=1; + } + else if (value>-1.0) + { + *num=-(int32)((-value) * (double)0x7FFFFFFF); + *denom=0x7FFFFFFF; + } + else + { + *num=-0x7FFFFFFF; + *denom=(int32)((double)0x7FFFFFFF / (-value)); + } + } + else + { + if (value==(int32)(value)) + { + *num=(int32)(value); + *denom=1; + } + else if (value<1.0) + { + *num=(int32)((value) *(double)0x7FFFFFFF); + *denom=0x7FFFFFFF; + } + else + { + *num=0x7FFFFFFF; + *denom=(int32)((double)0x7FFFFFFF / (value)); + } + } +} /*-- DoubleToSrational_direct() --------------*/ +#endif + +//#define DOUBLE2RAT_DEBUGOUTPUT +/** ----- Rational2Double: Double To Rational Conversion ---------------------------------------------------------- +* There is a mathematical theorem to convert real numbers into a rational (integer fraction) number. +* This is called "continuous fraction" which uses the Euclidean algorithm to find the greatest common divisor (GCD). +* (ref. e.g. https://de.wikipedia.org/wiki/Kettenbruch or https://en.wikipedia.org/wiki/Continued_fraction +* https://en.wikipedia.org/wiki/Euclidean_algorithm) +* The following functions implement the +* - ToRationalEuclideanGCD() auxiliary function which mainly implements euclidean GCD +* - DoubleToRational() conversion function for un-signed rationals +* - DoubleToSrational() conversion function for signed rationals +------------------------------------------------------------------------------------------------------------------*/ + +/**---- ToRationalEuclideanGCD() ----------------------------------------- +* Calculates the rational fractional of a double input value +* using the Euclidean algorithm to find the greatest common divisor (GCD) +------------------------------------------------------------------------*/ +static +void ToRationalEuclideanGCD(double value, int blnUseSignedRange, int blnUseSmallRange, unsigned long long *ullNum, unsigned long long *ullDenom) +{ + /* Internally, the integer variables can be bigger than the external ones, + * as long as the result will fit into the external variable size. + */ + unsigned long long val, numSum[3] = { 0, 1, 0 }, denomSum[3] = { 1, 0, 0 }; + unsigned long long aux, bigNum, bigDenom; + unsigned long long returnLimit; + int i; + unsigned long long nMax; + double fMax; + unsigned long maxDenom; + /*-- nMax and fMax defines the initial accuracy of the starting fractional, + * or better, the highest used integer numbers used within the starting fractional (bigNum/bigDenom). + * There are two approaches, which can accidentally lead to different accuracies just depending on the value. + * Therefore, blnUseSmallRange steers this behavior. + * For long long nMax = ((9223372036854775807-1)/2); for long nMax = ((2147483647-1)/2); + */ + if (blnUseSmallRange) { + nMax = (unsigned long long)((2147483647 - 1) / 2); /* for ULONG range */ + } + else { + nMax = ((9223372036854775807 - 1) / 2); /* for ULLONG range */ + } + fMax = (double)nMax; + + /*-- For the Euclidean GCD define the denominator range, so that it stays within size of unsigned long variables. + * maxDenom should be LONG_MAX for negative values and ULONG_MAX for positive ones. + * Also the final returned value of ullNum and ullDenom is limited according to signed- or unsigned-range. + */ + if (blnUseSignedRange) { + maxDenom = 2147483647UL; /*LONG_MAX = 0x7FFFFFFFUL*/ + returnLimit = maxDenom; + } + else { + maxDenom = 0xFFFFFFFFUL; /*ULONG_MAX = 0xFFFFFFFFUL*/ + returnLimit = maxDenom; + } + + /*-- First generate a rational fraction (bigNum/bigDenom) which represents the value + * as a rational number with the highest accuracy. Therefore, unsigned long long (uint64) is needed. + * This rational fraction is then reduced using the Euclidean algorithm to find the greatest common divisor (GCD). + * bigNum = big numinator of value without fraction (or cut residual fraction) + * bigDenom = big denominator of value + *-- Break-criteria so that uint64 cast to "bigNum" introduces no error and bigDenom has no overflow, + * and stop with enlargement of fraction when the double-value of it reaches an integer number without fractional part. + */ + bigDenom = 1; + while ((value != floor(value)) && (value < fMax) && (bigDenom < nMax)) { + bigDenom <<= 1; + value *= 2; + } + bigNum = (unsigned long long)value; + + /*-- Start Euclidean algorithm to find the greatest common divisor (GCD) -- */ +#define MAX_ITERATIONS 64 + for (i = 0; i < MAX_ITERATIONS; i++) { + /* if bigDenom is not zero, calculate integer part of fraction. */ + if (bigDenom == 0) { + val = 0; + break; + } + else { + val = bigNum / bigDenom; + } + + /* Set bigDenom to reminder of bigNum/bigDenom and bigNum to previous denominator bigDenom. */ + aux = bigNum; + bigNum = bigDenom; + bigDenom = aux % bigDenom; + + /* calculate next denominator and check for its given maximum */ + aux = val; + if (denomSum[1] * val + denomSum[0] >= maxDenom) { + aux = (maxDenom - denomSum[0]) / denomSum[1]; + if (aux * 2 >= val || denomSum[1] >= maxDenom) + i = (MAX_ITERATIONS + 1); /* exit but execute rest of for-loop */ + else + break; + } + /* calculate next numerator to numSum2 and save previous one to numSum0; numSum1 just copy of numSum2. */ + numSum[2] = aux * numSum[1] + numSum[0]; + numSum[0] = numSum[1]; + numSum[1] = numSum[2]; + /* calculate next denominator to denomSum2 and save previous one to denomSum0; denomSum1 just copy of denomSum2. */ + denomSum[2] = aux * denomSum[1] + denomSum[0]; + denomSum[0] = denomSum[1]; + denomSum[1] = denomSum[2]; + } + + /*-- Check and adapt for final variable size and return values; reduces internal accuracy; denominator is kept in ULONG-range with maxDenom -- */ + while (numSum[1] > returnLimit || denomSum[1] > returnLimit) { + numSum[1] = numSum[1] / 2; + denomSum[1] = denomSum[1] / 2; + } + + /* return values */ + *ullNum = numSum[1]; + *ullDenom = denomSum[1]; + +} /*-- ToRationalEuclideanGCD() -------------- */ + + +/**---- DoubleToRational() ----------------------------------------------- +* Calculates the rational fractional of a double input value +* for UN-SIGNED rationals, +* using the Euclidean algorithm to find the greatest common divisor (GCD) +------------------------------------------------------------------------*/ +static +void DoubleToRational(double value, uint32 *num, uint32 *denom) +{ + /*---- UN-SIGNED RATIONAL ---- */ + double dblDiff, dblDiff2; + unsigned long long ullNum, ullDenom, ullNum2, ullDenom2; + + /*-- Check for negative values. If so it is an error. */ + /* Test written that way to catch NaN */ + if (!(value >= 0)) { + *num = *denom = 0; + TIFFErrorExt(0, "TIFFLib: DoubleToRational()", " Negative Value for Unsigned Rational given."); + return; + } + + /*-- Check for too big numbers (> ULONG_MAX) -- */ + if (value > 0xFFFFFFFFUL) { + *num = 0xFFFFFFFFU; + *denom = 0; + return; + } + /*-- Check for easy integer numbers -- */ + if (value == (uint32)(value)) { + *num = (uint32)value; + *denom = 1; + return; + } + /*-- Check for too small numbers for "unsigned long" type rationals -- */ + if (value < 1.0 / (double)0xFFFFFFFFUL) { + *num = 0; + *denom = 0xFFFFFFFFU; + return; + } + + /*-- There are two approaches using the Euclidean algorithm, + * which can accidentally lead to different accuracies just depending on the value. + * Try both and define which one was better. + */ + ToRationalEuclideanGCD(value, FALSE, FALSE, &ullNum, &ullDenom); + ToRationalEuclideanGCD(value, FALSE, TRUE, &ullNum2, &ullDenom2); + /*-- Double-Check, that returned values fit into ULONG :*/ + if (ullNum > 0xFFFFFFFFUL || ullDenom > 0xFFFFFFFFUL || ullNum2 > 0xFFFFFFFFUL || ullDenom2 > 0xFFFFFFFFUL) { +#if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) + TIFFErrorExt(0, "TIFFLib: DoubleToRational()", " Num or Denom exceeds ULONG: val=%14.6f, num=%I64u, denom=%I64u | num2=%I64u, denom2=%I64u", value, ullNum, ullDenom, ullNum2, ullDenom2); +#else + TIFFErrorExt(0, "TIFFLib: DoubleToRational()", " Num or Denom exceeds ULONG: val=%14.6f, num=%12llu, denom=%12llu | num2=%12llu, denom2=%12llu", value, ullNum, ullDenom, ullNum2, ullDenom2); +#endif + assert(0); + } + + /* Check, which one has higher accuracy and take that. */ + dblDiff = fabs(value - ((double)ullNum / (double)ullDenom)); + dblDiff2 = fabs(value - ((double)ullNum2 / (double)ullDenom2)); + if (dblDiff < dblDiff2) { + *num = (uint32)ullNum; + *denom = (uint32)ullDenom; + } + else { + *num = (uint32)ullNum2; + *denom = (uint32)ullDenom2; + } +} /*-- DoubleToRational() -------------- */ + +/**---- DoubleToSrational() ----------------------------------------------- +* Calculates the rational fractional of a double input value +* for SIGNED rationals, +* using the Euclidean algorithm to find the greatest common divisor (GCD) +------------------------------------------------------------------------*/ +static +void DoubleToSrational(double value, int32 *num, int32 *denom) +{ + /*---- SIGNED RATIONAL ----*/ + int neg = 1; + double dblDiff, dblDiff2; + unsigned long long ullNum, ullDenom, ullNum2, ullDenom2; + + /*-- Check for negative values and use then the positive one for internal calculations, but take the sign into account before returning. */ + if (value < 0) { neg = -1; value = -value; } + + /*-- Check for too big numbers (> LONG_MAX) -- */ + if (value > 0x7FFFFFFFL) { + *num = 0x7FFFFFFFL; + *denom = 0; + return; + } + /*-- Check for easy numbers -- */ + if (value == (int32)(value)) { + *num = (int32)(neg * value); + *denom = 1; + return; + } + /*-- Check for too small numbers for "long" type rationals -- */ + if (value < 1.0 / (double)0x7FFFFFFFL) { + *num = 0; + *denom = 0x7FFFFFFFL; + return; + } + + /*-- There are two approaches using the Euclidean algorithm, + * which can accidentally lead to different accuracies just depending on the value. + * Try both and define which one was better. + * Furthermore, set behavior of ToRationalEuclideanGCD() to the range of signed-long. + */ + ToRationalEuclideanGCD(value, TRUE, FALSE, &ullNum, &ullDenom); + ToRationalEuclideanGCD(value, TRUE, TRUE, &ullNum2, &ullDenom2); + /*-- Double-Check, that returned values fit into LONG :*/ + if (ullNum > 0x7FFFFFFFL || ullDenom > 0x7FFFFFFFL || ullNum2 > 0x7FFFFFFFL || ullDenom2 > 0x7FFFFFFFL) { +#if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) + TIFFErrorExt(0, "TIFFLib: DoubleToSrational()", " Num or Denom exceeds LONG: val=%14.6f, num=%I64u, denom=%I64u | num2=%I64u, denom2=%I64u", neg*value, ullNum, ullDenom, ullNum2, ullDenom2); +#else + TIFFErrorExt(0, "TIFFLib: DoubleToSrational()", " Num or Denom exceeds LONG: val=%14.6f, num=%12llu, denom=%12llu | num2=%12llu, denom2=%12llu", neg*value, ullNum, ullDenom, ullNum2, ullDenom2); +#endif + assert(0); + } + + /* Check, which one has higher accuracy and take that. */ + dblDiff = fabs(value - ((double)ullNum / (double)ullDenom)); + dblDiff2 = fabs(value - ((double)ullNum2 / (double)ullDenom2)); + if (dblDiff < dblDiff2) { + *num = (int32)(neg * (long)ullNum); + *denom = (int32)ullDenom; + } + else { + *num = (int32)(neg * (long)ullNum2); + *denom = (int32)ullDenom2; + } +} /*-- DoubleToSrational() --------------*/ + + + + + #ifdef notdef static int TIFFWriteDirectoryTagCheckedFloat(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag, float value) @@ -2420,7 +3045,12 @@ TIFFWriteDirectoryTagData(TIFF* tif, uint32* ndir, TIFFDirEntry* dir, uint16 tag dir[m].tdir_count=count; dir[m].tdir_offset.toff_long8 = 0; if (datalength<=((tif->tif_flags&TIFF_BIGTIFF)?0x8U:0x4U)) - _TIFFmemcpy(&dir[m].tdir_offset,data,datalength); + { + if( data && datalength ) + { + _TIFFmemcpy(&dir[m].tdir_offset,data,datalength); + } + } else { uint64 na,nb; @@ -2812,13 +3442,60 @@ _TIFFRewriteField(TIFF* tif, uint16 tag, TIFFDataType in_datatype, TIFFSwabLong8( &entry_offset ); } +/* -------------------------------------------------------------------- */ +/* When a dummy tag was written due to TIFFDeferStrileArrayWriting() */ +/* -------------------------------------------------------------------- */ + if( entry_offset == 0 && entry_count == 0 && entry_type == 0 ) + { + if( tag == TIFFTAG_TILEOFFSETS || tag == TIFFTAG_STRIPOFFSETS ) + { + entry_type = (tif->tif_flags&TIFF_BIGTIFF) ? TIFF_LONG8 : TIFF_LONG; + } + else + { + int write_aslong8 = 1; + if( count > 1 && tag == TIFFTAG_STRIPBYTECOUNTS ) + { + write_aslong8 = WriteAsLong8(tif, TIFFStripSize64(tif)); + } + else if( count > 1 && tag == TIFFTAG_TILEBYTECOUNTS ) + { + write_aslong8 = WriteAsLong8(tif, TIFFTileSize64(tif)); + } + if( write_aslong8 ) + { + entry_type = TIFF_LONG8; + } + else + { + int write_aslong4 = 1; + if( count > 1 && tag == TIFFTAG_STRIPBYTECOUNTS ) + { + write_aslong4 = WriteAsLong4(tif, TIFFStripSize64(tif)); + } + else if( count > 1 && tag == TIFFTAG_TILEBYTECOUNTS ) + { + write_aslong4 = WriteAsLong4(tif, TIFFTileSize64(tif)); + } + if( write_aslong4 ) + { + entry_type = TIFF_LONG; + } + else + { + entry_type = TIFF_SHORT; + } + } + } + } + /* -------------------------------------------------------------------- */ /* What data type do we want to write this as? */ /* -------------------------------------------------------------------- */ if( TIFFDataWidth(in_datatype) == 8 && !(tif->tif_flags&TIFF_BIGTIFF) ) { if( in_datatype == TIFF_LONG8 ) - datatype = TIFF_LONG; + datatype = entry_type == TIFF_SHORT ? TIFF_SHORT : TIFF_LONG; else if( in_datatype == TIFF_SLONG8 ) datatype = TIFF_SLONG; else if( in_datatype == TIFF_IFD8 ) @@ -2826,8 +3503,21 @@ _TIFFRewriteField(TIFF* tif, uint16 tag, TIFFDataType in_datatype, else datatype = in_datatype; } - else - datatype = in_datatype; + else + { + if( in_datatype == TIFF_LONG8 && + (entry_type == TIFF_SHORT || entry_type == TIFF_LONG || + entry_type == TIFF_LONG8 ) ) + datatype = entry_type; + else if( in_datatype == TIFF_SLONG8 && + (entry_type == TIFF_SLONG || entry_type == TIFF_SLONG8 ) ) + datatype = entry_type; + else if( in_datatype == TIFF_IFD8 && + (entry_type == TIFF_IFD || entry_type == TIFF_IFD8 ) ) + datatype = entry_type; + else + datatype = in_datatype; + } /* -------------------------------------------------------------------- */ /* Prepare buffer of actual data to write. This includes */ @@ -2876,6 +3566,29 @@ _TIFFRewriteField(TIFF* tif, uint16 tag, TIFFDataType in_datatype, } } } + else if( datatype == TIFF_SHORT && in_datatype == TIFF_LONG8 ) + { + tmsize_t i; + + for( i = 0; i < count; i++ ) + { + ((uint16 *) buf_to_write)[i] = + (uint16) ((uint64 *) data)[i]; + if( (uint64) ((uint16 *) buf_to_write)[i] != ((uint64 *) data)[i] ) + { + _TIFFfree( buf_to_write ); + TIFFErrorExt( tif->tif_clientdata, module, + "Value exceeds 16bit range of output type." ); + return 0; + } + } + } + else + { + TIFFErrorExt( tif->tif_clientdata, module, + "Unhandled type conversion." ); + return 0; + } if( TIFFDataWidth(datatype) > 1 && (tif->tif_flags&TIFF_SWAB) ) { @@ -2907,6 +3620,23 @@ _TIFFRewriteField(TIFF* tif, uint16 tag, TIFFDataType in_datatype, } } + if( (tag == TIFFTAG_TILEOFFSETS || tag == TIFFTAG_STRIPOFFSETS) && + tif->tif_dir.td_stripoffset_entry.tdir_count == 0 && + tif->tif_dir.td_stripoffset_entry.tdir_type == 0 && + tif->tif_dir.td_stripoffset_entry.tdir_offset.toff_long8 == 0 ) + { + tif->tif_dir.td_stripoffset_entry.tdir_type = datatype; + tif->tif_dir.td_stripoffset_entry.tdir_count = count; + } + else if( (tag == TIFFTAG_TILEBYTECOUNTS || tag == TIFFTAG_STRIPBYTECOUNTS) && + tif->tif_dir.td_stripbytecount_entry.tdir_count == 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_type == 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_offset.toff_long8 == 0 ) + { + tif->tif_dir.td_stripbytecount_entry.tdir_type = datatype; + tif->tif_dir.td_stripbytecount_entry.tdir_count = count; + } + /* -------------------------------------------------------------------- */ /* If the tag type, and count match, then we just write it out */ /* over the old values without altering the directory entry at */ @@ -2958,6 +3688,7 @@ _TIFFRewriteField(TIFF* tif, uint16 tag, TIFFDataType in_datatype, /* Adjust the directory entry. */ /* -------------------------------------------------------------------- */ entry_type = datatype; + entry_count = (uint64)count; memcpy( direntry_raw + 2, &entry_type, sizeof(uint16) ); if (tif->tif_flags&TIFF_SWAB) TIFFSwabShort( (uint16 *) (direntry_raw + 2) ); diff --git a/3rdparty/libtiff/tif_fax3.c b/3rdparty/libtiff/tif_fax3.c index d11c968444..9ab5b26ad3 100644 --- a/3rdparty/libtiff/tif_fax3.c +++ b/3rdparty/libtiff/tif_fax3.c @@ -73,6 +73,7 @@ typedef struct { int EOLcnt; /* count of EOL codes recognized */ TIFFFaxFillFunc fill; /* fill routine */ uint32* runs; /* b&w runs for current/previous row */ + uint32 nruns; /* size of the refruns / curruns arrays */ uint32* refruns; /* runs for reference line */ uint32* curruns; /* runs for current line */ @@ -160,7 +161,9 @@ Fax3PreDecode(TIFF* tif, uint16 s) */ sp->bitmap = TIFFGetBitRevTable(tif->tif_dir.td_fillorder != FILLORDER_LSB2MSB); + sp->curruns = sp->runs; if (sp->refruns) { /* init reference line to white */ + sp->refruns = sp->runs + sp->nruns; sp->refruns[0] = (uint32) sp->b.rowpixels; sp->refruns[1] = 0; } @@ -218,8 +221,12 @@ Fax3PrematureEOF(const char* module, TIFF* tif, uint32 line, uint32 a0) #define Nop -/* +/** * Decode the requested amount of G3 1D-encoded data. + * @param buf destination buffer + * @param occ available bytes in destination buffer + * @param s number of planes (ignored) + * @returns 1 for success, -1 in case of error */ static int Fax3Decode1D(TIFF* tif, uint8* buf, tmsize_t occ, uint16 s) @@ -300,7 +307,9 @@ Fax3Decode2D(TIFF* tif, uint8* buf, tmsize_t occ, uint16 s) else EXPAND2D(EOF2Da); (*sp->fill)(buf, thisrun, pa, lastx); - SETVALUE(0); /* imaginary change for reference */ + if (pa < thisrun + sp->nruns) { + SETVALUE(0); /* imaginary change for reference */ + } SWAP(uint32*, sp->curruns, sp->refruns); buf += sp->b.rowbytes; occ -= sp->b.rowbytes; @@ -506,7 +515,7 @@ Fax3SetupState(TIFF* tif) int needsRefLine; Fax3CodecState* dsp = (Fax3CodecState*) Fax3State(tif); tmsize_t rowbytes; - uint32 rowpixels, nruns; + uint32 rowpixels; if (td->td_bitspersample != 1) { TIFFErrorExt(tif->tif_clientdata, module, @@ -523,6 +532,13 @@ Fax3SetupState(TIFF* tif) rowbytes = TIFFScanlineSize(tif); rowpixels = td->td_imagewidth; } + if ((uint64)rowbytes < ((uint64)rowpixels + 7) / 8) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Inconsistent number of bytes per row : rowbytes=%lu rowpixels=%lu", + (unsigned long)(rowbytes), (unsigned long)(rowpixels)); + return (0); + } sp->rowbytes = rowbytes; sp->rowpixels = rowpixels; /* @@ -539,26 +555,26 @@ Fax3SetupState(TIFF* tif) TIFFroundup and TIFFSafeMultiply return zero on integer overflow */ dsp->runs=(uint32*) NULL; - nruns = TIFFroundup_32(rowpixels,32); + dsp->nruns = TIFFroundup_32(rowpixels,32); if (needsRefLine) { - nruns = TIFFSafeMultiply(uint32,nruns,2); + dsp->nruns = TIFFSafeMultiply(uint32,dsp->nruns,2); } - if ((nruns == 0) || (TIFFSafeMultiply(uint32,nruns,2) == 0)) { + if ((dsp->nruns == 0) || (TIFFSafeMultiply(uint32,dsp->nruns,2) == 0)) { TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Row pixels integer overflow (rowpixels %u)", rowpixels); return (0); } dsp->runs = (uint32*) _TIFFCheckMalloc(tif, - TIFFSafeMultiply(uint32,nruns,2), + TIFFSafeMultiply(uint32,dsp->nruns,2), sizeof (uint32), "for Group 3/4 run arrays"); if (dsp->runs == NULL) return (0); - memset( dsp->runs, 0, TIFFSafeMultiply(uint32,nruns,2)*sizeof(uint32)); + memset( dsp->runs, 0, TIFFSafeMultiply(uint32,dsp->nruns,2)*sizeof(uint32)); dsp->curruns = dsp->runs; if (needsRefLine) - dsp->refruns = dsp->runs + nruns; + dsp->refruns = dsp->runs + dsp->nruns; else dsp->refruns = NULL; if (td->td_compression == COMPRESSION_CCITTFAX3 @@ -594,15 +610,19 @@ Fax3SetupState(TIFF* tif) */ #define Fax3FlushBits(tif, sp) { \ - if ((tif)->tif_rawcc >= (tif)->tif_rawdatasize) \ - (void) TIFFFlushData1(tif); \ + if ((tif)->tif_rawcc >= (tif)->tif_rawdatasize) { \ + if( !TIFFFlushData1(tif) ) \ + return 0; \ + } \ *(tif)->tif_rawcp++ = (uint8) (sp)->data; \ (tif)->tif_rawcc++; \ (sp)->data = 0, (sp)->bit = 8; \ } #define _FlushBits(tif) { \ - if ((tif)->tif_rawcc >= (tif)->tif_rawdatasize) \ - (void) TIFFFlushData1(tif); \ + if ((tif)->tif_rawcc >= (tif)->tif_rawdatasize) { \ + if( !TIFFFlushData1(tif) ) \ + return 0; \ + } \ *(tif)->tif_rawcp++ = (uint8) data; \ (tif)->tif_rawcc++; \ data = 0, bit = 8; \ @@ -627,7 +647,7 @@ static const int _msbmask[9] = * the output stream. Values are * assumed to be at most 16 bits. */ -static void +static int Fax3PutBits(TIFF* tif, unsigned int bits, unsigned int length) { Fax3CodecState* sp = EncoderState(tif); @@ -638,6 +658,7 @@ Fax3PutBits(TIFF* tif, unsigned int bits, unsigned int length) sp->data = data; sp->bit = bit; + return 1; } /* @@ -662,7 +683,7 @@ Fax3PutBits(TIFF* tif, unsigned int bits, unsigned int length) * appropriate table that holds the make-up and * terminating codes is supplied. */ -static void +static int putspan(TIFF* tif, int32 span, const tableentry* tab) { Fax3CodecState* sp = EncoderState(tif); @@ -700,6 +721,8 @@ putspan(TIFF* tif, int32 span, const tableentry* tab) sp->data = data; sp->bit = bit; + + return 1; } /* @@ -708,7 +731,7 @@ putspan(TIFF* tif, int32 span, const tableentry* tab) * here. We also handle writing the tag bit for the next * scanline when doing 2d encoding. */ -static void +static int Fax3PutEOL(TIFF* tif) { Fax3CodecState* sp = EncoderState(tif); @@ -742,6 +765,8 @@ Fax3PutEOL(TIFF* tif) sp->data = data; sp->bit = bit; + + return 1; } /* @@ -991,12 +1016,14 @@ Fax3Encode1DRow(TIFF* tif, unsigned char* bp, uint32 bits) for (;;) { span = find0span(bp, bs, bits); /* white span */ - putspan(tif, span, TIFFFaxWhiteCodes); + if( !putspan(tif, span, TIFFFaxWhiteCodes) ) + return 0; bs += span; if (bs >= bits) break; span = find1span(bp, bs, bits); /* black span */ - putspan(tif, span, TIFFFaxBlackCodes); + if( !putspan(tif, span, TIFFFaxBlackCodes) ) + return 0; bs += span; if (bs >= bits) break; @@ -1048,21 +1075,28 @@ Fax3Encode2DRow(TIFF* tif, unsigned char* bp, unsigned char* rp, uint32 bits) (b1 < a1 && a1 - b1 <= 3U) ? -(int32)(a1 - b1) : 0x7FFFFFFF; if (!(-3 <= d && d <= 3)) { /* horizontal mode */ a2 = finddiff2(bp, a1, bits, PIXEL(bp,a1)); - putcode(tif, &horizcode); + if( !putcode(tif, &horizcode) ) + return 0; if (a0+a1 == 0 || PIXEL(bp, a0) == 0) { - putspan(tif, a1-a0, TIFFFaxWhiteCodes); - putspan(tif, a2-a1, TIFFFaxBlackCodes); + if( !putspan(tif, a1-a0, TIFFFaxWhiteCodes) ) + return 0; + if( !putspan(tif, a2-a1, TIFFFaxBlackCodes) ) + return 0; } else { - putspan(tif, a1-a0, TIFFFaxBlackCodes); - putspan(tif, a2-a1, TIFFFaxWhiteCodes); + if( !putspan(tif, a1-a0, TIFFFaxBlackCodes) ) + return 0; + if( !putspan(tif, a2-a1, TIFFFaxWhiteCodes) ) + return 0; } a0 = a2; } else { /* vertical mode */ - putcode(tif, &vcodes[d+3]); + if( !putcode(tif, &vcodes[d+3]) ) + return 0; a0 = a1; } } else { /* pass mode */ - putcode(tif, &passcode); + if( !putcode(tif, &passcode) ) + return 0; a0 = b2; } if (a0 >= bits) @@ -1091,7 +1125,10 @@ Fax3Encode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) } while (cc > 0) { if ((sp->b.mode & FAXMODE_NOEOL) == 0) - Fax3PutEOL(tif); + { + if( !Fax3PutEOL(tif) ) + return 0; + } if (is2DEncoding(sp)) { if (sp->tag == G3_1D) { if (!Fax3Encode1DRow(tif, bp, sp->b.rowpixels)) @@ -1128,8 +1165,8 @@ Fax3PostEncode(TIFF* tif) return (1); } -static void -Fax3Close(TIFF* tif) +static int +_Fax3Close(TIFF* tif) { if ((Fax3State(tif)->mode & FAXMODE_NORTC) == 0 && tif->tif_rawcp) { Fax3CodecState* sp = EncoderState(tif); @@ -1145,6 +1182,13 @@ Fax3Close(TIFF* tif) Fax3PutBits(tif, code, length); Fax3FlushBits(tif, sp); } + return 1; +} + +static void +Fax3Close(TIFF* tif) +{ + _Fax3Close(tif); } static void @@ -1453,6 +1497,13 @@ Fax4Decode(TIFF* tif, uint8* buf, tmsize_t occ, uint16 s) EXPAND2D(EOFG4); if (EOLcnt) goto EOFG4; + if (((lastx + 7) >> 3) > (int)occ) /* check for buffer overrun */ + { + TIFFErrorExt(tif->tif_clientdata, module, + "Buffer overrun detected : %d bytes available, %d bits needed", + (int)occ, lastx); + return -1; + } (*sp->fill)(buf, thisrun, pa, lastx); SETVALUE(0); /* imaginary change for reference */ SWAP(uint32*, sp->curruns, sp->refruns); @@ -1468,6 +1519,13 @@ Fax4Decode(TIFF* tif, uint8* buf, tmsize_t occ, uint16 s) fputs( "Bad EOFB\n", stderr ); #endif ClrBits( 13 ); + if (((lastx + 7) >> 3) > (int)occ) /* check for buffer overrun */ + { + TIFFErrorExt(tif->tif_clientdata, module, + "Buffer overrun detected : %d bytes available, %d bits needed", + (int)occ, lastx); + return -1; + } (*sp->fill)(buf, thisrun, pa, lastx); UNCACHE_STATE(tif, sp); return ( sp->line ? 1 : -1); /* don't error on badly-terminated strips */ diff --git a/3rdparty/libtiff/tif_fax3.h b/3rdparty/libtiff/tif_fax3.h index abadcd97a2..701716cc18 100644 --- a/3rdparty/libtiff/tif_fax3.h +++ b/3rdparty/libtiff/tif_fax3.h @@ -240,6 +240,11 @@ static const char* StateNames[] = { * current row and reset decoding state. */ #define SETVALUE(x) do { \ + if (pa >= thisrun + sp->nruns) { \ + TIFFErrorExt(tif->tif_clientdata, module, "Buffer overflow at line %u of %s %u", \ + sp->line, isTiled(tif) ? "tile" : "strip", isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip); \ + return (-1); \ + } \ *pa++ = RunLength + (x); \ a0 += (x); \ RunLength = 0; \ @@ -377,6 +382,11 @@ done1d: \ */ #define CHECK_b1 do { \ if (pa != thisrun) while (b1 <= a0 && b1 < lastx) { \ + if( pb + 1 >= sp->refruns + sp->nruns) { \ + TIFFErrorExt(tif->tif_clientdata, module, "Buffer overflow at line %u of %s %u", \ + sp->line, isTiled(tif) ? "tile" : "strip", isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip); \ + return (-1); \ + } \ b1 += pb[0] + pb[1]; \ pb += 2; \ } \ @@ -387,10 +397,20 @@ done1d: \ */ #define EXPAND2D(eoflab) do { \ while (a0 < lastx) { \ + if (pa >= thisrun + sp->nruns) { \ + TIFFErrorExt(tif->tif_clientdata, module, "Buffer overflow at line %u of %s %u", \ + sp->line, isTiled(tif) ? "tile" : "strip", isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip); \ + return (-1); \ + } \ LOOKUP8(7, TIFFFaxMainTable, eof2d); \ switch (TabEnt->State) { \ case S_Pass: \ CHECK_b1; \ + if( pb + 1 >= sp->refruns + sp->nruns) { \ + TIFFErrorExt(tif->tif_clientdata, module, "Buffer overflow at line %u of %s %u", \ + sp->line, isTiled(tif) ? "tile" : "strip", isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip); \ + return (-1); \ + } \ b1 += *pb++; \ RunLength += b1 - a0; \ a0 = b1; \ @@ -469,20 +489,28 @@ done1d: \ case S_V0: \ CHECK_b1; \ SETVALUE(b1 - a0); \ + if( pb >= sp->refruns + sp->nruns) { \ + TIFFErrorExt(tif->tif_clientdata, module, "Buffer overflow at line %u of %s %u", \ + sp->line, isTiled(tif) ? "tile" : "strip", isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip); \ + return (-1); \ + } \ b1 += *pb++; \ break; \ case S_VR: \ CHECK_b1; \ SETVALUE(b1 - a0 + TabEnt->Param); \ + if( pb >= sp->refruns + sp->nruns) { \ + TIFFErrorExt(tif->tif_clientdata, module, "Buffer overflow at line %u of %s %u", \ + sp->line, isTiled(tif) ? "tile" : "strip", isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip); \ + return (-1); \ + } \ b1 += *pb++; \ break; \ case S_VL: \ CHECK_b1; \ - if (b1 <= (int) (a0 + TabEnt->Param)) { \ - if (b1 < (int) (a0 + TabEnt->Param) || pa != thisrun) { \ - unexpected("VL", a0); \ - goto eol2d; \ - } \ + if (b1 < (int) (a0 + TabEnt->Param)) { \ + unexpected("VL", a0); \ + goto eol2d; \ } \ SETVALUE(b1 - a0 - TabEnt->Param); \ b1 -= *--pb; \ @@ -529,6 +557,7 @@ eol2d: \ CLEANUP_RUNS(); \ } while (0) #endif /* _FAX3_ */ +/* vim: set ts=8 sts=4 sw=4 noet: */ /* * Local Variables: * mode: c diff --git a/3rdparty/libtiff/tif_fax3sm.c b/3rdparty/libtiff/tif_fax3sm.c index 822191ecf4..ba2fc532e8 100644 --- a/3rdparty/libtiff/tif_fax3sm.c +++ b/3rdparty/libtiff/tif_fax3sm.c @@ -1,5 +1,6 @@ /* WARNING, this file was automatically generated by the mkg3states program */ +#include #include "tiff.h" #include "tif_fax3.h" const TIFFFaxTabEnt TIFFFaxMainTable[128] = { diff --git a/3rdparty/libtiff/tif_flush.c b/3rdparty/libtiff/tif_flush.c index 881fac5121..f7fa2072ab 100644 --- a/3rdparty/libtiff/tif_flush.c +++ b/3rdparty/libtiff/tif_flush.c @@ -45,36 +45,8 @@ TIFFFlush(TIFF* tif) && !(tif->tif_flags & TIFF_DIRTYDIRECT) && tif->tif_mode == O_RDWR ) { - uint64 *offsets=NULL, *sizes=NULL; - - if( TIFFIsTiled(tif) ) - { - if( TIFFGetField( tif, TIFFTAG_TILEOFFSETS, &offsets ) - && TIFFGetField( tif, TIFFTAG_TILEBYTECOUNTS, &sizes ) - && _TIFFRewriteField( tif, TIFFTAG_TILEOFFSETS, TIFF_LONG8, - tif->tif_dir.td_nstrips, offsets ) - && _TIFFRewriteField( tif, TIFFTAG_TILEBYTECOUNTS, TIFF_LONG8, - tif->tif_dir.td_nstrips, sizes ) ) - { - tif->tif_flags &= ~TIFF_DIRTYSTRIP; - tif->tif_flags &= ~TIFF_BEENWRITING; - return 1; - } - } - else - { - if( TIFFGetField( tif, TIFFTAG_STRIPOFFSETS, &offsets ) - && TIFFGetField( tif, TIFFTAG_STRIPBYTECOUNTS, &sizes ) - && _TIFFRewriteField( tif, TIFFTAG_STRIPOFFSETS, TIFF_LONG8, - tif->tif_dir.td_nstrips, offsets ) - && _TIFFRewriteField( tif, TIFFTAG_STRIPBYTECOUNTS, TIFF_LONG8, - tif->tif_dir.td_nstrips, sizes ) ) - { - tif->tif_flags &= ~TIFF_DIRTYSTRIP; - tif->tif_flags &= ~TIFF_BEENWRITING; - return 1; - } - } + if( TIFFForceStrileArrayWriting(tif) ) + return 1; } if ((tif->tif_flags & (TIFF_DIRTYDIRECT|TIFF_DIRTYSTRIP)) @@ -84,6 +56,92 @@ TIFFFlush(TIFF* tif) return (1); } +/* + * This is an advanced writing function that must be used in a particular + * sequence, and together with TIFFDeferStrileArrayWriting(), + * to make its intended effect. Its aim is to force the writing of + * the [Strip/Tile][Offsets/ByteCounts] arrays at the end of the file, when + * they have not yet been rewritten. + * + * The typical sequence of calls is: + * TIFFOpen() + * [ TIFFCreateDirectory(tif) ] + * Set fields with calls to TIFFSetField(tif, ...) + * TIFFDeferStrileArrayWriting(tif) + * TIFFWriteCheck(tif, ...) + * TIFFWriteDirectory(tif) + * ... potentially create other directories and come back to the above directory + * TIFFForceStrileArrayWriting(tif) + * + * Returns 1 in case of success, 0 otherwise. + */ +int TIFFForceStrileArrayWriting(TIFF* tif) +{ + static const char module[] = "TIFFForceStrileArrayWriting"; + const int isTiled = TIFFIsTiled(tif); + + if (tif->tif_mode == O_RDONLY) + { + TIFFErrorExt(tif->tif_clientdata, tif->tif_name, + "File opened in read-only mode"); + return 0; + } + if( tif->tif_diroff == 0 ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Directory has not yet been written"); + return 0; + } + if( (tif->tif_flags & TIFF_DIRTYDIRECT) != 0 ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Directory has changes other than the strile arrays. " + "TIFFRewriteDirectory() should be called instead"); + return 0; + } + + if( !(tif->tif_flags & TIFF_DIRTYSTRIP) ) + { + if( !(tif->tif_dir.td_stripoffset_entry.tdir_tag != 0 && + tif->tif_dir.td_stripoffset_entry.tdir_count == 0 && + tif->tif_dir.td_stripoffset_entry.tdir_type == 0 && + tif->tif_dir.td_stripoffset_entry.tdir_offset.toff_long8 == 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_tag != 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_count == 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_type == 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_offset.toff_long8 == 0) ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Function not called together with " + "TIFFDeferStrileArrayWriting()"); + return 0; + } + + if (tif->tif_dir.td_stripoffset_p == NULL && !TIFFSetupStrips(tif)) + return 0; + } + + if( _TIFFRewriteField( tif, + isTiled ? TIFFTAG_TILEOFFSETS : + TIFFTAG_STRIPOFFSETS, + TIFF_LONG8, + tif->tif_dir.td_nstrips, + tif->tif_dir.td_stripoffset_p ) + && _TIFFRewriteField( tif, + isTiled ? TIFFTAG_TILEBYTECOUNTS : + TIFFTAG_STRIPBYTECOUNTS, + TIFF_LONG8, + tif->tif_dir.td_nstrips, + tif->tif_dir.td_stripbytecount_p ) ) + { + tif->tif_flags &= ~TIFF_DIRTYSTRIP; + tif->tif_flags &= ~TIFF_BEENWRITING; + return 1; + } + + return 0; +} + /* * Flush buffered data to the file. * diff --git a/3rdparty/libtiff/tif_getimage.c b/3rdparty/libtiff/tif_getimage.c index 6a9d5a7c0c..3460af744e 100644 --- a/3rdparty/libtiff/tif_getimage.c +++ b/3rdparty/libtiff/tif_getimage.c @@ -29,6 +29,7 @@ */ #include "tiffiop.h" #include +#include static int gtTileContig(TIFFRGBAImage*, uint32*, uint32, uint32); static int gtTileSeparate(TIFFRGBAImage*, uint32*, uint32, uint32); @@ -645,12 +646,20 @@ gtTileContig(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h) flip = setorientation(img); if (flip & FLIP_VERTICALLY) { - y = h - 1; - toskew = -(int32)(tw + w); + if ((tw + w) > INT_MAX) { + TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "%s", "unsupported tile size (too wide)"); + return (0); + } + y = h - 1; + toskew = -(int32)(tw + w); } else { - y = 0; - toskew = -(int32)(tw - w); + if (tw > (INT_MAX + w)) { + TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "%s", "unsupported tile size (too wide)"); + return (0); + } + y = 0; + toskew = -(int32)(tw - w); } /* @@ -755,9 +764,8 @@ gtTileSeparate(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h) uint32 leftmost_tw; tilesize = TIFFTileSize(tif); - bufsize = TIFFSafeMultiply(tmsize_t,alpha?4:3,tilesize); + bufsize = _TIFFMultiplySSize(tif, alpha?4:3,tilesize, "gtTileSeparate"); if (bufsize == 0) { - TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "Integer overflow in %s", "gtTileSeparate"); return (0); } @@ -766,10 +774,18 @@ gtTileSeparate(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h) flip = setorientation(img); if (flip & FLIP_VERTICALLY) { + if ((tw + w) > INT_MAX) { + TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "%s", "unsupported tile size (too wide)"); + return (0); + } y = h - 1; toskew = -(int32)(tw + w); } else { + if (tw > (INT_MAX + w)) { + TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "%s", "unsupported tile size (too wide)"); + return (0); + } y = 0; toskew = -(int32)(tw - w); } @@ -937,6 +953,10 @@ gtStripContig(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h) flip = setorientation(img); if (flip & FLIP_VERTICALLY) { + if ( w > INT_MAX ) { + TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "Width overflow"); + return (0); + } y = h - 1; toskew = -(int32)(w + w); } else { @@ -950,16 +970,23 @@ gtStripContig(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h) fromskew = (w < imagewidth ? imagewidth - w : 0); for (row = 0; row < h; row += nrow) { + uint32 temp; rowstoread = rowsperstrip - (row + img->row_offset) % rowsperstrip; nrow = (row + rowstoread > h ? h - row : rowstoread); nrowsub = nrow; if ((nrowsub%subsamplingver)!=0) nrowsub+=subsamplingver-nrowsub%subsamplingver; + temp = (row + img->row_offset)%rowsperstrip + nrowsub; + if( scanline > 0 && temp > (size_t)(TIFF_TMSIZE_T_MAX / scanline) ) + { + TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "Integer overflow in gtStripContig"); + return 0; + } if (_TIFFReadEncodedStripAndAllocBuffer(tif, TIFFComputeStrip(tif,row+img->row_offset, 0), (void**)(&buf), maxstripsize, - ((row + img->row_offset)%rowsperstrip + nrowsub) * scanline)==(tmsize_t)(-1) + temp * scanline)==(tmsize_t)(-1) && (buf == NULL || img->stoponerr)) { ret = 0; @@ -1019,14 +1046,17 @@ gtStripSeparate(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h) uint16 colorchannels; stripsize = TIFFStripSize(tif); - bufsize = TIFFSafeMultiply(tmsize_t,alpha?4:3,stripsize); + bufsize = _TIFFMultiplySSize(tif,alpha?4:3,stripsize, "gtStripSeparate"); if (bufsize == 0) { - TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "Integer overflow in %s", "gtStripSeparate"); return (0); } flip = setorientation(img); if (flip & FLIP_VERTICALLY) { + if ( w > INT_MAX ) { + TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "Width overflow"); + return (0); + } y = h - 1; toskew = -(int32)(w + w); } @@ -1053,15 +1083,22 @@ gtStripSeparate(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h) fromskew = (w < imagewidth ? imagewidth - w : 0); for (row = 0; row < h; row += nrow) { + uint32 temp; rowstoread = rowsperstrip - (row + img->row_offset) % rowsperstrip; nrow = (row + rowstoread > h ? h - row : rowstoread); offset_row = row + img->row_offset; + temp = (row + img->row_offset)%rowsperstrip + nrow; + if( scanline > 0 && temp > (size_t)(TIFF_TMSIZE_T_MAX / scanline) ) + { + TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "Integer overflow in gtStripSeparate"); + return 0; + } if( buf == NULL ) { if (_TIFFReadEncodedStripAndAllocBuffer( tif, TIFFComputeStrip(tif, offset_row, 0), (void**) &buf, bufsize, - ((row + img->row_offset)%rowsperstrip + nrow) * scanline)==(tmsize_t)(-1) + temp * scanline)==(tmsize_t)(-1) && (buf == NULL || img->stoponerr)) { ret = 0; @@ -1081,7 +1118,7 @@ gtStripSeparate(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h) } } else if (TIFFReadEncodedStrip(tif, TIFFComputeStrip(tif, offset_row, 0), - p0, ((row + img->row_offset)%rowsperstrip + nrow) * scanline)==(tmsize_t)(-1) + p0, temp * scanline)==(tmsize_t)(-1) && img->stoponerr) { ret = 0; @@ -1089,7 +1126,7 @@ gtStripSeparate(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h) } if (colorchannels > 1 && TIFFReadEncodedStrip(tif, TIFFComputeStrip(tif, offset_row, 1), - p1, ((row + img->row_offset)%rowsperstrip + nrow) * scanline) == (tmsize_t)(-1) + p1, temp * scanline) == (tmsize_t)(-1) && img->stoponerr) { ret = 0; @@ -1097,7 +1134,7 @@ gtStripSeparate(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h) } if (colorchannels > 1 && TIFFReadEncodedStrip(tif, TIFFComputeStrip(tif, offset_row, 2), - p2, ((row + img->row_offset)%rowsperstrip + nrow) * scanline) == (tmsize_t)(-1) + p2, temp * scanline) == (tmsize_t)(-1) && img->stoponerr) { ret = 0; @@ -1106,7 +1143,7 @@ gtStripSeparate(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h) if (alpha) { if (TIFFReadEncodedStrip(tif, TIFFComputeStrip(tif, offset_row, colorchannels), - pa, ((row + img->row_offset)%rowsperstrip + nrow) * scanline)==(tmsize_t)(-1) + pa, temp * scanline)==(tmsize_t)(-1) && img->stoponerr) { ret = 0; @@ -2957,7 +2994,7 @@ TIFFReadRGBATileExt(TIFF* tif, uint32 col, uint32 row, uint32 * raster, int stop if( !TIFFIsTiled( tif ) ) { TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), - "Can't use TIFFReadRGBATile() with stripped file."); + "Can't use TIFFReadRGBATile() with striped file."); return (0); } diff --git a/3rdparty/libtiff/tif_jbig.c b/3rdparty/libtiff/tif_jbig.c index 7ffe8851e8..a3500e0b6f 100644 --- a/3rdparty/libtiff/tif_jbig.c +++ b/3rdparty/libtiff/tif_jbig.c @@ -199,6 +199,7 @@ static int JBIGEncode(TIFF* tif, uint8* buffer, tmsize_t size, uint16 s) int TIFFInitJBIG(TIFF* tif, int scheme) { + (void)scheme; assert(scheme == COMPRESSION_JBIG); /* diff --git a/3rdparty/libtiff/tif_jpeg.c b/3rdparty/libtiff/tif_jpeg.c index f2ddc331a0..6711137a92 100644 --- a/3rdparty/libtiff/tif_jpeg.c +++ b/3rdparty/libtiff/tif_jpeg.c @@ -466,7 +466,8 @@ std_empty_output_buffer(j_compress_ptr cinfo) } #endif - TIFFFlushData1(tif); + if( !TIFFFlushData1(tif) ) + return FALSE; sp->dest.next_output_byte = (JOCTET*) tif->tif_rawdata; sp->dest.free_in_buffer = (size_t) tif->tif_rawdatasize; @@ -780,12 +781,9 @@ JPEGFixupTagsSubsampling(TIFF* tif) */ static const char module[] = "JPEGFixupTagsSubsampling"; struct JPEGFixupTagsSubsamplingData m; + uint64 fileoffset = TIFFGetStrileOffset(tif, 0); - _TIFFFillStriles( tif ); - - if( tif->tif_dir.td_stripbytecount == NULL - || tif->tif_dir.td_stripoffset == NULL - || tif->tif_dir.td_stripbytecount[0] == 0 ) + if( fileoffset == 0 ) { /* Do not even try to check if the first strip/tile does not yet exist, as occurs when GDAL has created a new NULL file @@ -804,9 +802,9 @@ JPEGFixupTagsSubsampling(TIFF* tif) } m.buffercurrentbyte=NULL; m.bufferbytesleft=0; - m.fileoffset=tif->tif_dir.td_stripoffset[0]; + m.fileoffset=fileoffset; m.filepositioned=0; - m.filebytesleft=tif->tif_dir.td_stripbytecount[0]; + m.filebytesleft=TIFFGetStrileByteCount(tif, 0); if (!JPEGFixupTagsSubsamplingSec(&m)) TIFFWarningExt(tif->tif_clientdata,module, "Unable to auto-correct subsampling values, likely corrupt JPEG compressed data in first strip/tile; auto-correcting skipped"); @@ -940,7 +938,10 @@ JPEGFixupTagsSubsamplingReadByte(struct JPEGFixupTagsSubsamplingData* data, uint return(0); if (!data->filepositioned) { - TIFFSeekFile(data->tif,data->fileoffset,SEEK_SET); + if (TIFFSeekFile(data->tif,data->fileoffset,SEEK_SET) == (toff_t)-1) + { + return 0; + } data->filepositioned=1; } m=data->buffersize; @@ -1209,35 +1210,37 @@ JPEGPreDecode(TIFF* tif, uint16 s) /* store for all coefficients */ /* See call to jinit_d_coef_controller() from master_selection() */ /* in libjpeg */ - toff_t nRequiredMemory = (toff_t)sp->cinfo.d.image_width * - sp->cinfo.d.image_height * - sp->cinfo.d.num_components * - ((td->td_bitspersample+7)/8); - /* BLOCK_SMOOTHING_SUPPORTED is generally defined, so we need */ - /* to replicate the logic of jinit_d_coef_controller() */ - if( sp->cinfo.d.progressive_mode ) - nRequiredMemory *= 3; -#ifndef TIFF_LIBJPEG_LARGEST_MEM_ALLOC -#define TIFF_LIBJPEG_LARGEST_MEM_ALLOC (100 * 1024 * 1024) -#endif + /* 1 MB for regular libjpeg usage */ + toff_t nRequiredMemory = 1024 * 1024; - if( nRequiredMemory > TIFF_LIBJPEG_LARGEST_MEM_ALLOC && + for (ci = 0; ci < sp->cinfo.d.num_components; ci++) { + const jpeg_component_info *compptr = &(sp->cinfo.d.comp_info[ci]); + if( compptr->h_samp_factor > 0 && compptr->v_samp_factor > 0 ) + { + nRequiredMemory += (toff_t)( + ((compptr->width_in_blocks + compptr->h_samp_factor - 1) / compptr->h_samp_factor)) * + ((compptr->height_in_blocks + compptr->v_samp_factor - 1) / compptr->v_samp_factor) * + sizeof(JBLOCK); + } + } + + if( sp->cinfo.d.mem->max_memory_to_use > 0 && + nRequiredMemory > (toff_t)(sp->cinfo.d.mem->max_memory_to_use) && getenv("LIBTIFF_ALLOW_LARGE_LIBJPEG_MEM_ALLOC") == NULL ) { - TIFFErrorExt(tif->tif_clientdata, module, - "Reading this strip would require libjpeg to allocate " - "at least %u bytes. " - "This is disabled since above the %u threshold. " - "You may override this restriction by defining the " - "LIBTIFF_ALLOW_LARGE_LIBJPEG_MEM_ALLOC environment variable, " - "or recompile libtiff by defining the " - "TIFF_LIBJPEG_LARGEST_MEM_ALLOC macro to a value greater " - "than %u", - (unsigned)nRequiredMemory, - (unsigned)TIFF_LIBJPEG_LARGEST_MEM_ALLOC, - (unsigned)TIFF_LIBJPEG_LARGEST_MEM_ALLOC); - return (0); + TIFFErrorExt(tif->tif_clientdata, module, + "Reading this image would require libjpeg to allocate " + "at least %u bytes. " + "This is disabled since above the %u threshold. " + "You may override this restriction by defining the " + "LIBTIFF_ALLOW_LARGE_LIBJPEG_MEM_ALLOC environment variable, " + "or setting the JPEGMEM environment variable to a value greater " + "or equal to '%uM'", + (unsigned)(nRequiredMemory), + (unsigned)(sp->cinfo.d.mem->max_memory_to_use), + (unsigned)((nRequiredMemory + 1000000 - 1) / 1000000)); + return 0; } } @@ -1566,7 +1569,7 @@ JPEGDecodeRaw(TIFF* tif, uint8* buf, tmsize_t cc, uint16 s) JSAMPLE *outptr = (JSAMPLE*)tmpbuf + clumpoffset; #else JSAMPLE *outptr = (JSAMPLE*)buf + clumpoffset; - if (cc < (tmsize_t) (clumpoffset + samples_per_clump*(clumps_per_line-1) + hsamp)) { + if (cc < (tmsize_t)(clumpoffset + (tmsize_t)samples_per_clump*(clumps_per_line-1) + hsamp)) { TIFFErrorExt(tif->tif_clientdata, "JPEGDecodeRaw", "application buffer not large enough for all data, possible subsampling issue"); return 0; @@ -2126,8 +2129,8 @@ JPEGEncodeRaw(TIFF* tif, uint8* buf, tmsize_t cc, uint16 s) /* data is expected to be supplied in multiples of a clumpline */ /* a clumpline is equivalent to v_sampling desubsampled scanlines */ /* TODO: the following calculation of bytesperclumpline, should substitute calculation of sp->bytesperline, except that it is per v_sampling lines */ - bytesperclumpline = (((sp->cinfo.c.image_width+sp->h_sampling-1)/sp->h_sampling) - *(sp->h_sampling*sp->v_sampling+2)*sp->cinfo.c.data_precision+7) + bytesperclumpline = ((((tmsize_t)sp->cinfo.c.image_width+sp->h_sampling-1)/sp->h_sampling) + *((tmsize_t)sp->h_sampling*sp->v_sampling+2)*sp->cinfo.c.data_precision+7) /8; nrows = ( cc / bytesperclumpline ) * sp->v_sampling; @@ -2347,7 +2350,7 @@ JPEGVGetField(TIFF* tif, uint32 tag, va_list ap) switch (tag) { case TIFFTAG_JPEGTABLES: *va_arg(ap, uint32*) = sp->jpegtables_length; - *va_arg(ap, void**) = sp->jpegtables; + *va_arg(ap, const void**) = sp->jpegtables; break; case TIFFTAG_JPEGQUALITY: *va_arg(ap, int*) = sp->jpegquality; @@ -2482,6 +2485,7 @@ TIFFInitJPEG(TIFF* tif, int scheme) { JPEGState* sp; + (void)scheme; assert(scheme == COMPRESSION_JPEG); /* diff --git a/3rdparty/libtiff/tif_luv.c b/3rdparty/libtiff/tif_luv.c index 192fa26188..3bd02e88e4 100644 --- a/3rdparty/libtiff/tif_luv.c +++ b/3rdparty/libtiff/tif_luv.c @@ -193,6 +193,7 @@ LogL16Decode(TIFF* tif, uint8* op, tmsize_t occ, uint16 s) tmsize_t cc; int rc; + (void)s; assert(s == 0); assert(sp != NULL); @@ -266,6 +267,7 @@ LogLuvDecode24(TIFF* tif, uint8* op, tmsize_t occ, uint16 s) unsigned char* bp; uint32* tp; + (void)s; assert(s == 0); assert(sp != NULL); @@ -326,6 +328,7 @@ LogLuvDecode32(TIFF* tif, uint8* op, tmsize_t occ, uint16 s) tmsize_t cc; int rc; + (void)s; assert(s == 0); sp = DecoderState(tif); assert(sp != NULL); @@ -447,6 +450,7 @@ LogL16Encode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) int rc=0, mask; tmsize_t beg; + (void)s; assert(s == 0); assert(sp != NULL); npixels = cc / sp->pixel_size; @@ -541,6 +545,7 @@ LogLuvEncode24(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) uint8* op; uint32* tp; + (void)s; assert(s == 0); assert(sp != NULL); npixels = cc / sp->pixel_size; @@ -598,6 +603,7 @@ LogLuvEncode32(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) int rc=0, mask; tmsize_t beg; + (void)s; assert(s == 0); assert(sp != NULL); @@ -742,7 +748,7 @@ LogLuvEncodeTile(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) #undef exp2 /* Conflict with C'99 function */ #define exp2(x) exp(M_LN2*(x)) -static int itrunc(double x, int m) +static int tiff_itrunc(double x, int m) { if( m == SGILOGENCODE_NODITHER ) return (int)x; @@ -777,9 +783,9 @@ LogL16fromY(double Y, int em) /* get 16-bit LogL from Y */ if (Y <= -1.8371976e19) return (0xffff); if (Y > 5.4136769e-20) - return itrunc(256.*(log2(Y) + 64.), em); + return tiff_itrunc(256.*(log2(Y) + 64.), em); if (Y < -5.4136769e-20) - return (~0x7fff | itrunc(256.*(log2(-Y) + 64.), em)); + return (~0x7fff | tiff_itrunc(256.*(log2(-Y) + 64.), em)); return (0); } @@ -855,7 +861,7 @@ LogL10fromY(double Y, int em) /* get 10-bit LogL from Y */ else if (Y <= .00024283) return (0); else - return itrunc(64.*(log2(Y) + 12.), em); + return tiff_itrunc(64.*(log2(Y) + 12.), em); } #define NANGLES 100 @@ -925,12 +931,12 @@ uv_encode(double u, double v, int em) /* encode (u',v') coordinates */ if (v < UV_VSTART) return oog_encode(u, v); - vi = itrunc((v - UV_VSTART)*(1./UV_SQSIZ), em); + vi = tiff_itrunc((v - UV_VSTART)*(1./UV_SQSIZ), em); if (vi >= UV_NVS) return oog_encode(u, v); if (u < uv_row[vi].ustart) return oog_encode(u, v); - ui = itrunc((u - uv_row[vi].ustart)*(1./UV_SQSIZ), em); + ui = tiff_itrunc((u - uv_row[vi].ustart)*(1./UV_SQSIZ), em); if (ui >= uv_row[vi].nus) return oog_encode(u, v); @@ -1099,7 +1105,7 @@ Luv24fromLuv48(LogLuvState* sp, uint8* op, tmsize_t n) else if (sp->encode_meth == SGILOGENCODE_NODITHER) Le = (luv3[0]-3314) >> 2; else - Le = itrunc(.25*(luv3[0]-3314.), sp->encode_meth); + Le = tiff_itrunc(.25*(luv3[0]-3314.), sp->encode_meth); Ce = uv_encode((luv3[1]+.5)/(1<<15), (luv3[2]+.5)/(1<<15), sp->encode_meth); @@ -1155,10 +1161,10 @@ LogLuv32fromXYZ(float XYZ[3], int em) v = 9.*XYZ[1] / s; } if (u <= 0.) ue = 0; - else ue = itrunc(UVSCALE*u, em); + else ue = tiff_itrunc(UVSCALE*u, em); if (ue > 255) ue = 255; if (v <= 0.) ve = 0; - else ve = itrunc(UVSCALE*v, em); + else ve = tiff_itrunc(UVSCALE*v, em); if (ve > 255) ve = 255; /* combine encodings */ return (Le << 16 | ue << 8 | ve); @@ -1238,8 +1244,8 @@ Luv32fromLuv48(LogLuvState* sp, uint8* op, tmsize_t n) } while (n-- > 0) { *luv++ = (uint32)luv3[0] << 16 | - (itrunc(luv3[1]*(UVSCALE/(1<<15)), sp->encode_meth) << 8 & 0xff00) | - (itrunc(luv3[2]*(UVSCALE/(1<<15)), sp->encode_meth) & 0xff); + (tiff_itrunc(luv3[1]*(UVSCALE/(1<<15)), sp->encode_meth) << 8 & 0xff00) | + (tiff_itrunc(luv3[2]*(UVSCALE/(1<<15)), sp->encode_meth) & 0xff); luv3 += 3; } } @@ -1269,16 +1275,10 @@ LogL16GuessDataFmt(TIFFDirectory *td) return (SGILOGDATAFMT_UNKNOWN); } - -#define TIFF_SIZE_T_MAX ((size_t) ~ ((size_t)0)) -#define TIFF_TMSIZE_T_MAX (tmsize_t)(TIFF_SIZE_T_MAX >> 1) - static tmsize_t multiply_ms(tmsize_t m1, tmsize_t m2) { - if( m1 == 0 || m2 > TIFF_TMSIZE_T_MAX / m1 ) - return 0; - return m1 * m2; + return _TIFFMultiplySSize(NULL, m1, m2, NULL); } static int @@ -1512,7 +1512,7 @@ LogLuvSetupEncode(TIFF* tif) switch (td->td_photometric) { case PHOTOMETRIC_LOGLUV: if (!LogLuvInitState(tif)) - break; + return (0); if (td->td_compression == COMPRESSION_SGILOG24) { tif->tif_encoderow = LogLuvEncode24; switch (sp->user_datafmt) { @@ -1545,7 +1545,7 @@ LogLuvSetupEncode(TIFF* tif) break; case PHOTOMETRIC_LOGL: if (!LogL16InitState(tif)) - break; + return (0); tif->tif_encoderow = LogL16Encode; switch (sp->user_datafmt) { case SGILOGDATAFMT_FLOAT: @@ -1561,7 +1561,7 @@ LogLuvSetupEncode(TIFF* tif) TIFFErrorExt(tif->tif_clientdata, module, "Inappropriate photometric interpretation %d for SGILog compression; %s", td->td_photometric, "must be either LogLUV or LogL"); - break; + return (0); } sp->encoder_state = 1; return (1); diff --git a/3rdparty/libtiff/tif_lzma.c b/3rdparty/libtiff/tif_lzma.c index 3f6096b62a..e150bd635d 100644 --- a/3rdparty/libtiff/tif_lzma.c +++ b/3rdparty/libtiff/tif_lzma.c @@ -300,7 +300,8 @@ LZMAEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) } if (sp->stream.avail_out == 0) { tif->tif_rawcc = tif->tif_rawdatasize; - TIFFFlushData1(tif); + if (!TIFFFlushData1(tif)) + return 0; sp->stream.next_out = tif->tif_rawdata; sp->stream.avail_out = (size_t)tif->tif_rawdatasize; /* this is a safe typecast, as check is made already in LZMAPreEncode */ } @@ -328,7 +329,8 @@ LZMAPostEncode(TIFF* tif) if ((tmsize_t)sp->stream.avail_out != tif->tif_rawdatasize) { tif->tif_rawcc = tif->tif_rawdatasize - sp->stream.avail_out; - TIFFFlushData1(tif); + if (!TIFFFlushData1(tif)) + return 0; sp->stream.next_out = tif->tif_rawdata; sp->stream.avail_out = (size_t)tif->tif_rawdatasize; /* this is a safe typecast, as check is made already in ZIPPreEncode */ } @@ -418,6 +420,7 @@ TIFFInitLZMA(TIFF* tif, int scheme) LZMAState* sp; lzma_stream tmp_stream = LZMA_STREAM_INIT; + (void)scheme; assert( scheme == COMPRESSION_LZMA ); /* diff --git a/3rdparty/libtiff/tif_lzw.c b/3rdparty/libtiff/tif_lzw.c index 21064f29ae..d92d0fd354 100644 --- a/3rdparty/libtiff/tif_lzw.c +++ b/3rdparty/libtiff/tif_lzw.c @@ -214,19 +214,16 @@ LZWSetupDecode(TIFF* tif) return (0); } - DecoderState(tif)->dec_codetab = NULL; - DecoderState(tif)->dec_decode = NULL; + sp = DecoderState(tif); + sp->dec_codetab = NULL; + sp->dec_decode = NULL; /* * Setup predictor setup. */ (void) TIFFPredictorInit(tif); - - sp = DecoderState(tif); } - assert(sp != NULL); - if (sp->dec_codetab == NULL) { sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t)); if (sp->dec_codetab == NULL) { @@ -1161,6 +1158,7 @@ int TIFFInitLZW(TIFF* tif, int scheme) { static const char module[] = "TIFFInitLZW"; + (void)scheme; assert(scheme == COMPRESSION_LZW); /* * Allocate state block so tag methods have storage to record values. @@ -1218,7 +1216,7 @@ bad: * from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #endif /* LZW_SUPPORT */ diff --git a/3rdparty/libtiff/tif_ojpeg.c b/3rdparty/libtiff/tif_ojpeg.c index 27385d8c47..133d1f1c49 100644 --- a/3rdparty/libtiff/tif_ojpeg.c +++ b/3rdparty/libtiff/tif_ojpeg.c @@ -74,7 +74,7 @@ or errors, up to the point where either these values are read, or it's clear they aren't there. This means that some of the data is read twice, but we feel speed in correcting these values is important enough to warrant this sacrifice. Although - there is currently no define or other configuration mechanism to disable this behaviour, + there is currently no define or other configuration mechanism to disable this behavior, the actual header scanning is build to robustly respond with error report if it should encounter an uncorrected mismatch of subsampling values. See OJPEGReadHeaderInfoSecStreamSof. @@ -243,6 +243,7 @@ typedef enum { typedef struct { TIFF* tif; int decoder_ok; + int error_in_raw_data_decoding; #ifndef LIBJPEG_ENCAP_EXTERNAL JMP_BUF exit_jmpbuf; #endif @@ -420,6 +421,7 @@ TIFFInitOJPEG(TIFF* tif, int scheme) static const char module[]="TIFFInitOJPEG"; OJPEGState* sp; + (void)scheme; assert(scheme==COMPRESSION_OJPEG); /* @@ -497,15 +499,15 @@ OJPEGVGetField(TIFF* tif, uint32 tag, va_list ap) break; case TIFFTAG_JPEGQTABLES: *va_arg(ap,uint32*)=(uint32)sp->qtable_offset_count; - *va_arg(ap,void**)=(void*)sp->qtable_offset; + *va_arg(ap,const void**)=(const void*)sp->qtable_offset; break; case TIFFTAG_JPEGDCTABLES: *va_arg(ap,uint32*)=(uint32)sp->dctable_offset_count; - *va_arg(ap,void**)=(void*)sp->dctable_offset; + *va_arg(ap,const void**)=(const void*)sp->dctable_offset; break; case TIFFTAG_JPEGACTABLES: *va_arg(ap,uint32*)=(uint32)sp->actable_offset_count; - *va_arg(ap,void**)=(void*)sp->actable_offset; + *va_arg(ap,const void**)=(const void*)sp->actable_offset; break; case TIFFTAG_JPEGPROC: *va_arg(ap,uint16*)=(uint16)sp->jpeg_proc; @@ -657,7 +659,7 @@ static int OJPEGSetupDecode(TIFF* tif) { static const char module[]="OJPEGSetupDecode"; - TIFFWarningExt(tif->tif_clientdata,module,"Depreciated and troublesome old-style JPEG compression mode, please convert to new-style JPEG compression and notify vendor of writing software"); + TIFFWarningExt(tif->tif_clientdata,module,"Deprecated and troublesome old-style JPEG compression mode, please convert to new-style JPEG compression and notify vendor of writing software"); return(1); } @@ -678,7 +680,7 @@ OJPEGPreDecode(TIFF* tif, uint16 s) if (OJPEGReadSecondarySos(tif,s)==0) return(0); } - if isTiled(tif) + if (isTiled(tif)) m=tif->tif_curtile; else m=tif->tif_curstrip; @@ -742,6 +744,7 @@ OJPEGPreDecodeSkipRaw(TIFF* tif) } m-=sp->subsampling_convert_clines-sp->subsampling_convert_state; sp->subsampling_convert_state=0; + sp->error_in_raw_data_decoding=0; } while (m>=sp->subsampling_convert_clines) { @@ -792,6 +795,10 @@ OJPEGDecode(TIFF* tif, uint8* buf, tmsize_t cc, uint16 s) TIFFErrorExt(tif->tif_clientdata,module,"Cannot decode: decoder not correctly initialized"); return 0; } + if( sp->error_in_raw_data_decoding ) + { + return 0; + } if (sp->libjpeg_jpeg_query_style==0) { if (OJPEGDecodeRaw(tif,buf,cc)==0) @@ -832,7 +839,10 @@ OJPEGDecodeRaw(TIFF* tif, uint8* buf, tmsize_t cc) if (sp->subsampling_convert_state==0) { if (jpeg_read_raw_data_encap(sp,&(sp->libjpeg_jpeg_decompress_struct),sp->subsampling_convert_ycbcrimage,sp->subsampling_ver*8)==0) + { + sp->error_in_raw_data_decoding = 1; return(0); + } } oy=sp->subsampling_convert_ybuf+sp->subsampling_convert_state*sp->subsampling_ver*sp->subsampling_convert_ylinelen; ocb=sp->subsampling_convert_cbbuf+sp->subsampling_convert_state*sp->subsampling_convert_clinelen; @@ -990,7 +1000,6 @@ OJPEGSubsamplingCorrect(TIFF* tif) OJPEGState* sp=(OJPEGState*)tif->tif_data; uint8 mh; uint8 mv; - _TIFFFillStriles( tif ); assert(sp->subsamplingcorrect_done==0); if ((tif->tif_dir.td_samplesperpixel!=3) || ((tif->tif_dir.td_photometric!=PHOTOMETRIC_YCBCR) && @@ -1046,7 +1055,7 @@ OJPEGReadHeaderInfo(TIFF* tif) assert(sp->readheader_done==0); sp->image_width=tif->tif_dir.td_imagewidth; sp->image_length=tif->tif_dir.td_imagelength; - if isTiled(tif) + if (isTiled(tif)) { sp->strile_width=tif->tif_dir.td_tilewidth; sp->strile_length=tif->tif_dir.td_tilelength; @@ -1056,6 +1065,8 @@ OJPEGReadHeaderInfo(TIFF* tif) { sp->strile_width=sp->image_width; sp->strile_length=tif->tif_dir.td_rowsperstrip; + if( sp->strile_length == (uint32)-1 ) + sp->strile_length = sp->image_length; sp->strile_length_total=sp->image_length; } if (tif->tif_dir.td_samplesperpixel==1) @@ -1082,6 +1093,12 @@ OJPEGReadHeaderInfo(TIFF* tif) } if (sp->strile_lengthimage_length) { + if (((sp->subsampling_hor!=1) && (sp->subsampling_hor!=2) && (sp->subsampling_hor!=4)) || + ((sp->subsampling_ver!=1) && (sp->subsampling_ver!=2) && (sp->subsampling_ver!=4))) + { + TIFFErrorExt(tif->tif_clientdata,module,"Invalid subsampling values"); + return(0); + } if (sp->strile_length%(sp->subsampling_ver*8)!=0) { TIFFErrorExt(tif->tif_clientdata,module,"Incompatible vertical subsampling and image strip/tile length"); @@ -1197,7 +1214,13 @@ OJPEGWriteHeaderInfo(TIFF* tif) sp->subsampling_convert_ybuflen=sp->subsampling_convert_ylinelen*sp->subsampling_convert_ylines; sp->subsampling_convert_cbuflen=sp->subsampling_convert_clinelen*sp->subsampling_convert_clines; sp->subsampling_convert_ycbcrbuflen=sp->subsampling_convert_ybuflen+2*sp->subsampling_convert_cbuflen; - sp->subsampling_convert_ycbcrbuf=_TIFFmalloc(sp->subsampling_convert_ycbcrbuflen); + /* The calloc is not normally necessary, except in some edge/broken cases */ + /* for example for a tiled image of height 1 with a tile height of 1 and subsampling_hor=subsampling_ver=2 */ + /* In that case, libjpeg will only fill the 8 first lines of the 16 lines */ + /* See https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=16844 */ + /* Even if this case is allowed (?), its handling is broken because OJPEGPreDecode() should also likely */ + /* reset subsampling_convert_state to 0 when changing tile. */ + sp->subsampling_convert_ycbcrbuf=_TIFFcalloc(1, sp->subsampling_convert_ycbcrbuflen); if (sp->subsampling_convert_ycbcrbuf==0) { TIFFErrorExt(tif->tif_clientdata,module,"Out of memory"); @@ -1223,10 +1246,11 @@ OJPEGWriteHeaderInfo(TIFF* tif) *m++=sp->subsampling_convert_cbbuf+n*sp->subsampling_convert_clinelen; for (n=0; nsubsampling_convert_clines; n++) *m++=sp->subsampling_convert_crbuf+n*sp->subsampling_convert_clinelen; - sp->subsampling_convert_clinelenout=((sp->strile_width+sp->subsampling_hor-1)/sp->subsampling_hor); + sp->subsampling_convert_clinelenout=sp->strile_width/sp->subsampling_hor + ((sp->strile_width % sp->subsampling_hor) != 0 ? 1 : 0); sp->subsampling_convert_state=0; + sp->error_in_raw_data_decoding=0; sp->bytes_per_line=sp->subsampling_convert_clinelenout*(sp->subsampling_ver*sp->subsampling_hor+2); - sp->lines_per_strile=((sp->strile_length+sp->subsampling_ver-1)/sp->subsampling_ver); + sp->lines_per_strile=sp->strile_length/sp->subsampling_ver + ((sp->strile_length % sp->subsampling_ver) != 0 ? 1 : 0); sp->subsampling_convert_log=1; } } @@ -1240,6 +1264,26 @@ OJPEGWriteHeaderInfo(TIFF* tif) } if (jpeg_start_decompress_encap(sp,&(sp->libjpeg_jpeg_decompress_struct))==0) return(0); + if(sp->libjpeg_jpeg_decompress_struct.image_width != sp->strile_width ) { + TIFFErrorExt(tif->tif_clientdata,module, + "jpeg_start_decompress() returned image_width = %d, " + "expected %d", + sp->libjpeg_jpeg_decompress_struct.image_width, + sp->strile_width); + return 0; + } + if(sp->libjpeg_jpeg_decompress_struct.max_h_samp_factor != sp->subsampling_hor || + sp->libjpeg_jpeg_decompress_struct.max_v_samp_factor != sp->subsampling_ver) { + TIFFErrorExt(tif->tif_clientdata,module, + "jpeg_start_decompress() returned max_h_samp_factor = %d " + "and max_v_samp_factor = %d, expected %d and %d", + sp->libjpeg_jpeg_decompress_struct.max_h_samp_factor, + sp->libjpeg_jpeg_decompress_struct.max_v_samp_factor, + sp->subsampling_hor, + sp->subsampling_ver); + return 0; + } + sp->writeheader_done=1; return(1); } @@ -1272,7 +1316,9 @@ OJPEGReadHeaderInfoSec(TIFF* tif) } else { - if ((sp->jpeg_interchange_format_length==0) || (sp->jpeg_interchange_format+sp->jpeg_interchange_format_length>sp->file_size)) + if ((sp->jpeg_interchange_format_length==0) || + (sp->jpeg_interchange_format > TIFF_UINT64_MAX - sp->jpeg_interchange_format_length) || + (sp->jpeg_interchange_format+sp->jpeg_interchange_format_length>sp->file_size)) sp->jpeg_interchange_format_length=sp->file_size-sp->jpeg_interchange_format; } } @@ -1989,32 +2035,30 @@ OJPEGReadBufferFill(OJPEGState* sp) sp->in_buffer_source=osibsStrile; break; case osibsStrile: - if (!_TIFFFillStriles( sp->tif ) - || sp->tif->tif_dir.td_stripoffset == NULL - || sp->tif->tif_dir.td_stripbytecount == NULL) - return 0; - if (sp->in_buffer_next_strile==sp->in_buffer_strile_count) sp->in_buffer_source=osibsEof; else { - sp->in_buffer_file_pos=sp->tif->tif_dir.td_stripoffset[sp->in_buffer_next_strile]; + int err = 0; + sp->in_buffer_file_pos=TIFFGetStrileOffsetWithErr(sp->tif, sp->in_buffer_next_strile, &err); + if( err ) + return 0; if (sp->in_buffer_file_pos!=0) { + uint64 bytecount = TIFFGetStrileByteCountWithErr(sp->tif, sp->in_buffer_next_strile, &err); + if( err ) + return 0; if (sp->in_buffer_file_pos>=sp->file_size) sp->in_buffer_file_pos=0; - else if (sp->tif->tif_dir.td_stripbytecount==NULL) + else if (bytecount==0) sp->in_buffer_file_togo=sp->file_size-sp->in_buffer_file_pos; else { - if (sp->tif->tif_dir.td_stripbytecount == 0) { - TIFFErrorExt(sp->tif->tif_clientdata,sp->tif->tif_name,"Strip byte counts are missing"); - return(0); - } - sp->in_buffer_file_togo=sp->tif->tif_dir.td_stripbytecount[sp->in_buffer_next_strile]; + sp->in_buffer_file_togo=bytecount; if (sp->in_buffer_file_togo==0) sp->in_buffer_file_pos=0; - else if (sp->in_buffer_file_pos+sp->in_buffer_file_togo>sp->file_size) + else if (sp->in_buffer_file_pos > TIFF_UINT64_MAX - sp->in_buffer_file_togo || + sp->in_buffer_file_pos+sp->in_buffer_file_togo>sp->file_size) sp->in_buffer_file_togo=sp->file_size-sp->in_buffer_file_pos; } } diff --git a/3rdparty/libtiff/tif_open.c b/3rdparty/libtiff/tif_open.c index c574c452aa..a0e31583a6 100644 --- a/3rdparty/libtiff/tif_open.c +++ b/3rdparty/libtiff/tif_open.c @@ -104,6 +104,7 @@ TIFFClientOpen( } n; n.a8[0]=1; n.a8[1]=0; + (void)n; #ifdef WORDS_BIGENDIAN assert(n.a16==256); #else @@ -131,6 +132,7 @@ TIFFClientOpen( if (!readproc || !writeproc || !seekproc || !closeproc || !sizeproc) { TIFFErrorExt(clientdata, module, "One of the client procedures is NULL pointer."); + _TIFFfree(tif); goto bad2; } tif->tif_readproc = readproc; @@ -164,7 +166,7 @@ TIFFClientOpen( /* * Process library-specific flags in the open mode string. * The following flags may be used to control intrinsic library - * behaviour that may or may not be desirable (usually for + * behavior that may or may not be desirable (usually for * compatibility with some application that claims to support * TIFF but only supports some brain dead idea of what the * vendor thinks TIFF is): @@ -181,6 +183,8 @@ TIFFClientOpen( * 'h' read TIFF header only, do not load the first IFD * '4' ClassicTIFF for creating a file (default) * '8' BigTIFF for creating a file + * 'D' enable use of deferred strip/tile offset/bytecount array loading. + * 'O' on-demand loading of values instead of whole array loading (implies D) * * The use of the 'l' and 'b' flags is strongly discouraged. * These flags are provided solely because numerous vendors, @@ -203,7 +207,7 @@ TIFFClientOpen( * not do right now. * * The 'M' and 'm' flags are provided because some virtual memory - * systems exhibit poor behaviour when large images are mapped. + * systems exhibit poor behavior when large images are mapped. * These options permit clients to control the use of memory-mapped * files on a per-file basis. * @@ -262,7 +266,22 @@ TIFFClientOpen( if (m&O_CREAT) tif->tif_flags |= TIFF_BIGTIFF; break; + case 'D': + tif->tif_flags |= TIFF_DEFERSTRILELOAD; + break; + case 'O': + if( m == O_RDONLY ) + tif->tif_flags |= (TIFF_LAZYSTRILELOAD | TIFF_DEFERSTRILELOAD); + break; } + +#ifdef DEFER_STRILE_LOAD + /* Compatibility with old DEFER_STRILE_LOAD compilation flag */ + /* Probably unneeded, since to the best of my knowledge (E. Rouault) */ + /* GDAL was the only user of this, and will now use the new 'D' flag */ + tif->tif_flags |= TIFF_DEFERSTRILELOAD; +#endif + /* * Read in TIFF header. */ diff --git a/3rdparty/libtiff/tif_pixarlog.c b/3rdparty/libtiff/tif_pixarlog.c index b1e48d99c9..f291201505 100644 --- a/3rdparty/libtiff/tif_pixarlog.c +++ b/3rdparty/libtiff/tif_pixarlog.c @@ -634,16 +634,10 @@ PixarLogGuessDataFmt(TIFFDirectory *td) return guess; } -#define TIFF_SIZE_T_MAX ((size_t) ~ ((size_t)0)) -#define TIFF_TMSIZE_T_MAX (tmsize_t)(TIFF_SIZE_T_MAX >> 1) - static tmsize_t multiply_ms(tmsize_t m1, tmsize_t m2) { - assert(m1 >= 0 && m2 >= 0); - if( m1 == 0 || m2 > TIFF_TMSIZE_T_MAX / m1 ) - return 0; - return m1 * m2; + return _TIFFMultiplySSize(NULL, m1, m2, NULL); } static tmsize_t @@ -1153,7 +1147,7 @@ PixarLogEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) llen = sp->stride * td->td_imagewidth; /* Check against the number of elements (of size uint16) of sp->tbuf */ - if( n > (tmsize_t)(td->td_rowsperstrip * llen) ) + if( n > ((tmsize_t)td->td_rowsperstrip * llen) ) { TIFFErrorExt(tif->tif_clientdata, module, "Too many input bytes provided"); @@ -1206,7 +1200,8 @@ PixarLogEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) } if (sp->stream.avail_out == 0) { tif->tif_rawcc = tif->tif_rawdatasize; - TIFFFlushData1(tif); + if (!TIFFFlushData1(tif)) + return 0; sp->stream.next_out = tif->tif_rawdata; sp->stream.avail_out = (uInt) tif->tif_rawdatasize; /* this is a safe typecast, as check is made already in PixarLogPreEncode */ } @@ -1236,7 +1231,8 @@ PixarLogPostEncode(TIFF* tif) if ((tmsize_t)sp->stream.avail_out != tif->tif_rawdatasize) { tif->tif_rawcc = tif->tif_rawdatasize - sp->stream.avail_out; - TIFFFlushData1(tif); + if (!TIFFFlushData1(tif)) + return 0; sp->stream.next_out = tif->tif_rawdata; sp->stream.avail_out = (uInt) tif->tif_rawdatasize; /* this is a safe typecast, as check is made already in PixarLogPreEncode */ } @@ -1404,6 +1400,7 @@ TIFFInitPixarLog(TIFF* tif, int scheme) PixarLogState* sp; + (void)scheme; assert(scheme == COMPRESSION_PIXARLOG); /* diff --git a/3rdparty/libtiff/tif_predict.c b/3rdparty/libtiff/tif_predict.c index b775663a7b..c023397459 100644 --- a/3rdparty/libtiff/tif_predict.c +++ b/3rdparty/libtiff/tif_predict.c @@ -116,7 +116,7 @@ PredictorSetupDecode(TIFF* tif) TIFFDirectory* td = &tif->tif_dir; /* Note: when PredictorSetup() fails, the effets of setupdecode() */ - /* will not be "cancelled" so setupdecode() might be robust to */ + /* will not be "canceled" so setupdecode() might be robust to */ /* be called several times. */ if (!(*sp->setupdecode)(tif) || !PredictorSetup(tif)) return 0; @@ -270,8 +270,8 @@ PredictorSetupEncode(TIFF* tif) } /* Remarks related to C standard compliance in all below functions : */ -/* - to avoid any undefined behaviour, we only operate on unsigned types */ -/* since the behaviour of "overflows" is defined (wrap over) */ +/* - to avoid any undefined behavior, we only operate on unsigned types */ +/* since the behavior of "overflows" is defined (wrap over) */ /* - when storing into the byte stream, we explicitly mask with 0xff so */ /* as to make icc -check=conversions happy (not necessary by the standard) */ diff --git a/3rdparty/libtiff/tif_print.c b/3rdparty/libtiff/tif_print.c index 1d86adbf05..a0737941f4 100644 --- a/3rdparty/libtiff/tif_print.c +++ b/3rdparty/libtiff/tif_print.c @@ -652,8 +652,6 @@ TIFFPrintDirectory(TIFF* tif, FILE* fd, long flags) if (tif->tif_tagmethods.printdir) (*tif->tif_tagmethods.printdir)(tif, fd, flags); - _TIFFFillStriles( tif ); - if ((flags & TIFFPRINT_STRIPS) && TIFFFieldSet(tif,FIELD_STRIPOFFSETS)) { uint32 s; @@ -665,13 +663,13 @@ TIFFPrintDirectory(TIFF* tif, FILE* fd, long flags) #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) fprintf(fd, " %3lu: [%8I64u, %8I64u]\n", (unsigned long) s, - td->td_stripoffset ? (unsigned __int64) td->td_stripoffset[s] : 0, - td->td_stripbytecount ? (unsigned __int64) td->td_stripbytecount[s] : 0); + (unsigned __int64) TIFFGetStrileOffset(tif, s), + (unsigned __int64) TIFFGetStrileByteCount(tif, s)); #else fprintf(fd, " %3lu: [%8llu, %8llu]\n", (unsigned long) s, - td->td_stripoffset ? (unsigned long long) td->td_stripoffset[s] : 0, - td->td_stripbytecount ? (unsigned long long) td->td_stripbytecount[s] : 0); + (unsigned long long) TIFFGetStrileOffset(tif, s), + (unsigned long long) TIFFGetStrileByteCount(tif, s)); #endif } } diff --git a/3rdparty/libtiff/tif_read.c b/3rdparty/libtiff/tif_read.c index 79c470cbf1..c4c868b1c5 100644 --- a/3rdparty/libtiff/tif_read.c +++ b/3rdparty/libtiff/tif_read.c @@ -29,9 +29,6 @@ #include "tiffiop.h" #include -#define TIFF_SIZE_T_MAX ((size_t) ~ ((size_t)0)) -#define TIFF_TMSIZE_T_MAX (tmsize_t)(TIFF_SIZE_T_MAX >> 1) - int TIFFFillStrip(TIFF* tif, uint32 strip); int TIFFFillTile(TIFF* tif, uint32 tile); static int TIFFStartStrip(TIFF* tif, uint32 strip); @@ -49,6 +46,8 @@ TIFFReadRawTile1(TIFF* tif, uint32 tile, void* buf, tmsize_t size, const char* m #define THRESHOLD_MULTIPLIER 10 #define MAX_THRESHOLD (THRESHOLD_MULTIPLIER * THRESHOLD_MULTIPLIER * THRESHOLD_MULTIPLIER * INITIAL_THRESHOLD) +#define TIFF_INT64_MAX ((((int64)0x7FFFFFFF) << 32) | 0xFFFFFFFF) + /* Read 'size' bytes in tif_rawdata buffer starting at offset 'rawdata_offset' * Returns 1 in case of success, 0 otherwise. */ static int TIFFReadAndRealloc( TIFF* tif, tmsize_t size, @@ -61,6 +60,22 @@ static int TIFFReadAndRealloc( TIFF* tif, tmsize_t size, #endif tmsize_t already_read = 0; + +#if SIZEOF_SIZE_T != 8 + /* On 32 bit processes, if the request is large enough, check against */ + /* file size */ + if( size > 1000 * 1000 * 1000 ) + { + uint64 filesize = TIFFGetFileSize(tif); + if( (uint64)size >= filesize ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Chunk size requested is larger than file size."); + return 0; + } + } +#endif + /* On 64 bit processes, read first a maximum of 1 MB, then 10 MB, etc */ /* so as to avoid allocating too much memory in case the file is too */ /* short. We could ask for the file size, but this might be */ @@ -175,17 +190,14 @@ TIFFFillStripPartial( TIFF *tif, int strip, tmsize_t read_ahead, int restart ) tmsize_t to_read; tmsize_t read_ahead_mod; /* tmsize_t bytecountm; */ - - if (!_TIFFFillStriles( tif ) || !tif->tif_dir.td_stripbytecount) - return 0; - + /* * Expand raw data buffer, if needed, to hold data * strip coming from file (perhaps should set upper * bound on the size of a buffer we'll use?). */ - /* bytecountm=(tmsize_t) td->td_stripbytecount[strip]; */ + /* bytecountm=(tmsize_t) TIFFGetStrileByteCount(tif, strip); */ /* Not completely sure where the * 2 comes from, but probably for */ /* an exponentional growth strategy of tif_rawdatasize */ @@ -229,7 +241,7 @@ TIFFFillStripPartial( TIFF *tif, int strip, tmsize_t read_ahead, int restart ) /* ** Seek to the point in the file where more data should be read. */ - read_offset = td->td_stripoffset[strip] + read_offset = TIFFGetStrileOffset(tif, strip) + tif->tif_rawdataoff + tif->tif_rawdataloaded; if (!SeekOK(tif, read_offset)) { @@ -246,10 +258,10 @@ TIFFFillStripPartial( TIFF *tif, int strip, tmsize_t read_ahead, int restart ) to_read = read_ahead_mod - unused_data; else to_read = tif->tif_rawdatasize - unused_data; - if( (uint64) to_read > td->td_stripbytecount[strip] + if( (uint64) to_read > TIFFGetStrileByteCount(tif, strip) - tif->tif_rawdataoff - tif->tif_rawdataloaded ) { - to_read = (tmsize_t) td->td_stripbytecount[strip] + to_read = (tmsize_t) TIFFGetStrileByteCount(tif, strip) - tif->tif_rawdataoff - tif->tif_rawdataloaded; } @@ -288,7 +300,7 @@ TIFFFillStripPartial( TIFF *tif, int strip, tmsize_t read_ahead, int restart ) /* For JPEG, if there are multiple scans (can generally be known */ /* with the read_ahead used), we need to read the whole strip */ if( tif->tif_dir.td_compression==COMPRESSION_JPEG && - (uint64)tif->tif_rawcc < td->td_stripbytecount[strip] ) + (uint64)tif->tif_rawcc < TIFFGetStrileByteCount(tif, strip) ) { if( TIFFJPEGIsFullStripRequired(tif) ) { @@ -347,9 +359,7 @@ TIFFSeek(TIFF* tif, uint32 row, uint16 sample ) * read it a few lines at a time? */ #if defined(CHUNKY_STRIP_READ_SUPPORT) - if (!_TIFFFillStriles( tif ) || !tif->tif_dir.td_stripbytecount) - return 0; - whole_strip = tif->tif_dir.td_stripbytecount[strip] < 10 + whole_strip = TIFFGetStrileByteCount(tif, strip) < 10 || isMapped(tif); if( td->td_compression == COMPRESSION_LERC || td->td_compression == COMPRESSION_JBIG ) @@ -402,7 +412,7 @@ TIFFSeek(TIFF* tif, uint32 row, uint16 sample ) else if( !whole_strip ) { if( ((tif->tif_rawdata + tif->tif_rawdataloaded) - tif->tif_rawcp) < read_ahead - && (uint64) tif->tif_rawdataoff+tif->tif_rawdataloaded < td->td_stripbytecount[strip] ) + && (uint64) tif->tif_rawdataoff+tif->tif_rawdataloaded < TIFFGetStrileByteCount(tif, strip) ) { if( !TIFFFillStripPartial(tif,strip,read_ahead,0) ) return 0; @@ -599,16 +609,11 @@ static tmsize_t TIFFReadRawStrip1(TIFF* tif, uint32 strip, void* buf, tmsize_t size, const char* module) { - TIFFDirectory *td = &tif->tif_dir; - - if (!_TIFFFillStriles( tif )) - return ((tmsize_t)(-1)); - assert((tif->tif_flags&TIFF_NOREADRAW)==0); if (!isMapped(tif)) { tmsize_t cc; - if (!SeekOK(tif, td->td_stripoffset[strip])) { + if (!SeekOK(tif, TIFFGetStrileOffset(tif, strip))) { TIFFErrorExt(tif->tif_clientdata, module, "Seek error at scanline %lu, strip %lu", (unsigned long) tif->tif_row, (unsigned long) strip); @@ -634,8 +639,8 @@ TIFFReadRawStrip1(TIFF* tif, uint32 strip, void* buf, tmsize_t size, } else { tmsize_t ma = 0; tmsize_t n; - if ((td->td_stripoffset[strip] > (uint64)TIFF_TMSIZE_T_MAX)|| - ((ma=(tmsize_t)td->td_stripoffset[strip])>tif->tif_size)) + if ((TIFFGetStrileOffset(tif, strip) > (uint64)TIFF_TMSIZE_T_MAX)|| + ((ma=(tmsize_t)TIFFGetStrileOffset(tif, strip))>tif->tif_size)) { n=0; } @@ -679,12 +684,10 @@ static tmsize_t TIFFReadRawStripOrTile2(TIFF* tif, uint32 strip_or_tile, int is_strip, tmsize_t size, const char* module) { - TIFFDirectory *td = &tif->tif_dir; - assert( !isMapped(tif) ); assert((tif->tif_flags&TIFF_NOREADRAW)==0); - if (!SeekOK(tif, td->td_stripoffset[strip_or_tile])) { + if (!SeekOK(tif, TIFFGetStrileOffset(tif, strip_or_tile))) { if( is_strip ) { TIFFErrorExt(tif->tif_clientdata, module, @@ -720,7 +723,7 @@ TIFFReadRawStrip(TIFF* tif, uint32 strip, void* buf, tmsize_t size) { static const char module[] = "TIFFReadRawStrip"; TIFFDirectory *td = &tif->tif_dir; - uint64 bytecount; + uint64 bytecount64; tmsize_t bytecountm; if (!TIFFCheckRead(tif, 0)) @@ -738,31 +741,23 @@ TIFFReadRawStrip(TIFF* tif, uint32 strip, void* buf, tmsize_t size) "Compression scheme does not support access to raw uncompressed data"); return ((tmsize_t)(-1)); } - bytecount = td->td_stripbytecount[strip]; - if ((int64)bytecount <= 0) { -#if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) - TIFFErrorExt(tif->tif_clientdata, module, - "%I64u: Invalid strip byte count, strip %lu", - (unsigned __int64) bytecount, - (unsigned long) strip); -#else - TIFFErrorExt(tif->tif_clientdata, module, - "%llu: Invalid strip byte count, strip %lu", - (unsigned long long) bytecount, - (unsigned long) strip); -#endif - return ((tmsize_t)(-1)); - } - bytecountm = (tmsize_t)bytecount; - if ((uint64)bytecountm!=bytecount) { - TIFFErrorExt(tif->tif_clientdata, module, "Integer overflow"); - return ((tmsize_t)(-1)); - } - if (size != (tmsize_t)(-1) && size < bytecountm) + bytecount64 = TIFFGetStrileByteCount(tif, strip); + if (size != (tmsize_t)(-1) && (uint64)size <= bytecount64) bytecountm = size; + else + bytecountm = _TIFFCastUInt64ToSSize(tif, bytecount64, module); + if( bytecountm == 0 ) { + return ((tmsize_t)(-1)); + } return (TIFFReadRawStrip1(tif, strip, buf, bytecountm, module)); } +TIFF_NOSANITIZE_UNSIGNED_INT_OVERFLOW +static uint64 NoSanitizeSubUInt64(uint64 a, uint64 b) +{ + return a - b; +} + /* * Read the specified strip and setup for decoding. The data buffer is * expanded, as necessary, to hold the strip's data. @@ -773,13 +768,10 @@ TIFFFillStrip(TIFF* tif, uint32 strip) static const char module[] = "TIFFFillStrip"; TIFFDirectory *td = &tif->tif_dir; - if (!_TIFFFillStriles( tif ) || !tif->tif_dir.td_stripbytecount) - return 0; - if ((tif->tif_flags&TIFF_NOREADRAW)==0) { - uint64 bytecount = td->td_stripbytecount[strip]; - if ((int64)bytecount <= 0) { + uint64 bytecount = TIFFGetStrileByteCount(tif, strip); + if( bytecount == 0 || bytecount > (uint64)TIFF_INT64_MAX ) { #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) TIFFErrorExt(tif->tif_clientdata, module, "Invalid strip byte count %I64u, strip %lu", @@ -806,7 +798,7 @@ TIFFFillStrip(TIFF* tif, uint32 strip) (bytecount - 4096) / 10 > (uint64)stripsize ) { uint64 newbytecount = (uint64)stripsize * 10 + 4096; - if( (int64)newbytecount >= 0 ) + if( newbytecount == 0 || newbytecount > (uint64)TIFF_INT64_MAX ) { #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) TIFFWarningExt(tif->tif_clientdata, module, @@ -831,13 +823,13 @@ TIFFFillStrip(TIFF* tif, uint32 strip) * We must check for overflow, potentially causing * an OOB read. Instead of simple * - * td->td_stripoffset[strip]+bytecount > tif->tif_size + * TIFFGetStrileOffset(tif, strip)+bytecount > tif->tif_size * * comparison (which can overflow) we do the following * two comparisons: */ if (bytecount > (uint64)tif->tif_size || - td->td_stripoffset[strip] > (uint64)tif->tif_size - bytecount) { + TIFFGetStrileOffset(tif, strip) > (uint64)tif->tif_size - bytecount) { /* * This error message might seem strange, but * it's what would happen if a read were done @@ -849,7 +841,7 @@ TIFFFillStrip(TIFF* tif, uint32 strip) "Read error on strip %lu; " "got %I64u bytes, expected %I64u", (unsigned long) strip, - (unsigned __int64) tif->tif_size - td->td_stripoffset[strip], + (unsigned __int64) NoSanitizeSubUInt64(tif->tif_size, TIFFGetStrileOffset(tif, strip)), (unsigned __int64) bytecount); #else TIFFErrorExt(tif->tif_clientdata, module, @@ -857,7 +849,7 @@ TIFFFillStrip(TIFF* tif, uint32 strip) "Read error on strip %lu; " "got %llu bytes, expected %llu", (unsigned long) strip, - (unsigned long long) tif->tif_size - td->td_stripoffset[strip], + (unsigned long long) NoSanitizeSubUInt64(tif->tif_size, TIFFGetStrileOffset(tif, strip)), (unsigned long long) bytecount); #endif tif->tif_curstrip = NOSTRIP; @@ -886,7 +878,7 @@ TIFFFillStrip(TIFF* tif, uint32 strip) } tif->tif_flags &= ~TIFF_MYBUFFER; tif->tif_rawdatasize = (tmsize_t)bytecount; - tif->tif_rawdata = tif->tif_base + (tmsize_t)td->td_stripoffset[strip]; + tif->tif_rawdata = tif->tif_base + (tmsize_t)TIFFGetStrileOffset(tif, strip); tif->tif_rawdataoff = 0; tif->tif_rawdataloaded = (tmsize_t) bytecount; @@ -1101,16 +1093,11 @@ _TIFFReadEncodedTileAndAllocBuffer(TIFF* tif, uint32 tile, static tmsize_t TIFFReadRawTile1(TIFF* tif, uint32 tile, void* buf, tmsize_t size, const char* module) { - TIFFDirectory *td = &tif->tif_dir; - - if (!_TIFFFillStriles( tif )) - return ((tmsize_t)(-1)); - assert((tif->tif_flags&TIFF_NOREADRAW)==0); if (!isMapped(tif)) { tmsize_t cc; - if (!SeekOK(tif, td->td_stripoffset[tile])) { + if (!SeekOK(tif, TIFFGetStrileOffset(tif, tile))) { TIFFErrorExt(tif->tif_clientdata, module, "Seek error at row %lu, col %lu, tile %lu", (unsigned long) tif->tif_row, @@ -1140,9 +1127,9 @@ TIFFReadRawTile1(TIFF* tif, uint32 tile, void* buf, tmsize_t size, const char* m } else { tmsize_t ma,mb; tmsize_t n; - ma=(tmsize_t)td->td_stripoffset[tile]; + ma=(tmsize_t)TIFFGetStrileOffset(tif, tile); mb=ma+size; - if ((td->td_stripoffset[tile] > (uint64)TIFF_TMSIZE_T_MAX)||(ma>tif->tif_size)) + if ((TIFFGetStrileOffset(tif, tile) > (uint64)TIFF_TMSIZE_T_MAX)||(ma>tif->tif_size)) n=0; else if ((mbtif->tif_size)) n=tif->tif_size-ma; @@ -1198,13 +1185,12 @@ TIFFReadRawTile(TIFF* tif, uint32 tile, void* buf, tmsize_t size) "Compression scheme does not support access to raw uncompressed data"); return ((tmsize_t)(-1)); } - bytecount64 = td->td_stripbytecount[tile]; - if (size != (tmsize_t)(-1) && (uint64)size < bytecount64) - bytecount64 = (uint64)size; - bytecountm = (tmsize_t)bytecount64; - if ((uint64)bytecountm!=bytecount64) - { - TIFFErrorExt(tif->tif_clientdata,module,"Integer overflow"); + bytecount64 = TIFFGetStrileByteCount(tif, tile); + if (size != (tmsize_t)(-1) && (uint64)size <= bytecount64) + bytecountm = size; + else + bytecountm = _TIFFCastUInt64ToSSize(tif, bytecount64, module); + if( bytecountm == 0 ) { return ((tmsize_t)(-1)); } return (TIFFReadRawTile1(tif, tile, buf, bytecountm, module)); @@ -1220,13 +1206,10 @@ TIFFFillTile(TIFF* tif, uint32 tile) static const char module[] = "TIFFFillTile"; TIFFDirectory *td = &tif->tif_dir; - if (!_TIFFFillStriles( tif ) || !tif->tif_dir.td_stripbytecount) - return 0; - if ((tif->tif_flags&TIFF_NOREADRAW)==0) { - uint64 bytecount = td->td_stripbytecount[tile]; - if ((int64)bytecount <= 0) { + uint64 bytecount = TIFFGetStrileByteCount(tif, tile); + if( bytecount == 0 || bytecount > (uint64)TIFF_INT64_MAX ) { #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) TIFFErrorExt(tif->tif_clientdata, module, "%I64u: Invalid tile byte count, tile %lu", @@ -1253,7 +1236,7 @@ TIFFFillTile(TIFF* tif, uint32 tile) (bytecount - 4096) / 10 > (uint64)stripsize ) { uint64 newbytecount = (uint64)stripsize * 10 + 4096; - if( (int64)newbytecount >= 0 ) + if( newbytecount == 0 || newbytecount > (uint64)TIFF_INT64_MAX ) { #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) TIFFWarningExt(tif->tif_clientdata, module, @@ -1278,13 +1261,13 @@ TIFFFillTile(TIFF* tif, uint32 tile) * We must check for overflow, potentially causing * an OOB read. Instead of simple * - * td->td_stripoffset[tile]+bytecount > tif->tif_size + * TIFFGetStrileOffset(tif, tile)+bytecount > tif->tif_size * * comparison (which can overflow) we do the following * two comparisons: */ if (bytecount > (uint64)tif->tif_size || - td->td_stripoffset[tile] > (uint64)tif->tif_size - bytecount) { + TIFFGetStrileOffset(tif, tile) > (uint64)tif->tif_size - bytecount) { tif->tif_curtile = NOTILE; return (0); } @@ -1313,7 +1296,7 @@ TIFFFillTile(TIFF* tif, uint32 tile) tif->tif_rawdatasize = (tmsize_t)bytecount; tif->tif_rawdata = - tif->tif_base + (tmsize_t)td->td_stripoffset[tile]; + tif->tif_base + (tmsize_t)TIFFGetStrileOffset(tif, tile); tif->tif_rawdataoff = 0; tif->tif_rawdataloaded = (tmsize_t) bytecount; tif->tif_flags |= TIFF_BUFFERMMAP; @@ -1440,9 +1423,6 @@ TIFFStartStrip(TIFF* tif, uint32 strip) { TIFFDirectory *td = &tif->tif_dir; - if (!_TIFFFillStriles( tif ) || !tif->tif_dir.td_stripbytecount) - return 0; - if ((tif->tif_flags & TIFF_CODERSETUP) == 0) { if (!(*tif->tif_setupdecode)(tif)) return (0); @@ -1463,10 +1443,18 @@ TIFFStartStrip(TIFF* tif, uint32 strip) if( tif->tif_rawdataloaded > 0 ) tif->tif_rawcc = tif->tif_rawdataloaded; else - tif->tif_rawcc = (tmsize_t)td->td_stripbytecount[strip]; + tif->tif_rawcc = (tmsize_t)TIFFGetStrileByteCount(tif, strip); } - return ((*tif->tif_predecode)(tif, - (uint16)(strip / td->td_stripsperimage))); + if ((*tif->tif_predecode)(tif, + (uint16)(strip / td->td_stripsperimage)) == 0 ) { + /* Needed for example for scanline access, if tif_predecode */ + /* fails, and we try to read the same strip again. Without invalidating */ + /* tif_curstrip, we'd call tif_decoderow() on a possibly invalid */ + /* codec state. */ + tif->tif_curstrip = NOSTRIP; + return 0; + } + return 1; } /* @@ -1480,9 +1468,6 @@ TIFFStartTile(TIFF* tif, uint32 tile) TIFFDirectory *td = &tif->tif_dir; uint32 howmany32; - if (!_TIFFFillStriles( tif ) || !tif->tif_dir.td_stripbytecount) - return 0; - if ((tif->tif_flags & TIFF_CODERSETUP) == 0) { if (!(*tif->tif_setupdecode)(tif)) return (0); @@ -1513,7 +1498,7 @@ TIFFStartTile(TIFF* tif, uint32 tile) if( tif->tif_rawdataloaded > 0 ) tif->tif_rawcc = tif->tif_rawdataloaded; else - tif->tif_rawcc = (tmsize_t)td->td_stripbytecount[tile]; + tif->tif_rawcc = (tmsize_t)TIFFGetStrileByteCount(tif, tile); } return ((*tif->tif_predecode)(tif, (uint16)(tile/td->td_stripsperimage))); @@ -1528,13 +1513,100 @@ TIFFCheckRead(TIFF* tif, int tiles) } if (tiles ^ isTiled(tif)) { TIFFErrorExt(tif->tif_clientdata, tif->tif_name, tiles ? - "Can not read tiles from a stripped image" : + "Can not read tiles from a striped image" : "Can not read scanlines from a tiled image"); return (0); } return (1); } +/* Use the provided input buffer (inbuf, insize) and decompress it into + * (outbuf, outsize). + * This function replaces the use of TIFFReadEncodedStrip()/TIFFReadEncodedTile() + * when the user can provide the buffer for the input data, for example when + * he wants to avoid libtiff to read the strile offset/count values from the + * [Strip|Tile][Offsets/ByteCounts] array. + * inbuf content must be writable (if bit reversal is needed) + * Returns 1 in case of success, 0 otherwise. + */ +int TIFFReadFromUserBuffer(TIFF* tif, uint32 strile, + void* inbuf, tmsize_t insize, + void* outbuf, tmsize_t outsize) +{ + static const char module[] = "TIFFReadFromUserBuffer"; + TIFFDirectory *td = &tif->tif_dir; + int ret = 1; + uint32 old_tif_flags = tif->tif_flags; + tmsize_t old_rawdatasize = tif->tif_rawdatasize; + void* old_rawdata = tif->tif_rawdata; + + if (tif->tif_mode == O_WRONLY) { + TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "File not open for reading"); + return 0; + } + if (tif->tif_flags&TIFF_NOREADRAW) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Compression scheme does not support access to raw uncompressed data"); + return 0; + } + + tif->tif_flags &= ~TIFF_MYBUFFER; + tif->tif_flags |= TIFF_BUFFERMMAP; + tif->tif_rawdatasize = insize; + tif->tif_rawdata = inbuf; + tif->tif_rawdataoff = 0; + tif->tif_rawdataloaded = insize; + + if (!isFillOrder(tif, td->td_fillorder) && + (tif->tif_flags & TIFF_NOBITREV) == 0) + { + TIFFReverseBits(inbuf, insize); + } + + if( TIFFIsTiled(tif) ) + { + if( !TIFFStartTile(tif, strile) || + !(*tif->tif_decodetile)(tif, (uint8*) outbuf, outsize, + (uint16)(strile/td->td_stripsperimage)) ) + { + ret = 0; + } + } + else + { + uint32 rowsperstrip=td->td_rowsperstrip; + uint32 stripsperplane; + if (rowsperstrip>td->td_imagelength) + rowsperstrip=td->td_imagelength; + stripsperplane= TIFFhowmany_32_maxuint_compat(td->td_imagelength, rowsperstrip); + if( !TIFFStartStrip(tif, strile) || + !(*tif->tif_decodestrip)(tif, (uint8*) outbuf, outsize, + (uint16)(strile/stripsperplane)) ) + { + ret = 0; + } + } + if( ret ) + { + (*tif->tif_postdecode)(tif, (uint8*) outbuf, outsize); + } + + if (!isFillOrder(tif, td->td_fillorder) && + (tif->tif_flags & TIFF_NOBITREV) == 0) + { + TIFFReverseBits(inbuf, insize); + } + + tif->tif_flags = old_tif_flags; + tif->tif_rawdatasize = old_rawdatasize; + tif->tif_rawdata = old_rawdata; + tif->tif_rawdataoff = 0; + tif->tif_rawdataloaded = 0; + + return ret; +} + void _TIFFNoPostDecode(TIFF* tif, uint8* buf, tmsize_t cc) { diff --git a/3rdparty/libtiff/tif_strip.c b/3rdparty/libtiff/tif_strip.c index 5b76fba56d..c08c60a792 100644 --- a/3rdparty/libtiff/tif_strip.c +++ b/3rdparty/libtiff/tif_strip.c @@ -129,15 +129,8 @@ TIFFVStripSize(TIFF* tif, uint32 nrows) { static const char module[] = "TIFFVStripSize"; uint64 m; - tmsize_t n; m=TIFFVStripSize64(tif,nrows); - n=(tmsize_t)m; - if ((uint64)n!=m) - { - TIFFErrorExt(tif->tif_clientdata,module,"Integer overflow"); - n=0; - } - return(n); + return _TIFFCastUInt64ToSSize(tif, m, module); } /* @@ -147,8 +140,7 @@ uint64 TIFFRawStripSize64(TIFF* tif, uint32 strip) { static const char module[] = "TIFFRawStripSize64"; - TIFFDirectory* td = &tif->tif_dir; - uint64 bytecount = td->td_stripbytecount[strip]; + uint64 bytecount = TIFFGetStrileByteCount(tif, strip); if (bytecount == 0) { @@ -211,15 +203,8 @@ TIFFStripSize(TIFF* tif) { static const char module[] = "TIFFStripSize"; uint64 m; - tmsize_t n; m=TIFFStripSize64(tif); - n=(tmsize_t)m; - if ((uint64)n!=m) - { - TIFFErrorExt(tif->tif_clientdata,module,"Integer overflow"); - n=0; - } - return(n); + return _TIFFCastUInt64ToSSize(tif, m, module); } /* @@ -330,14 +315,8 @@ TIFFScanlineSize(TIFF* tif) { static const char module[] = "TIFFScanlineSize"; uint64 m; - tmsize_t n; m=TIFFScanlineSize64(tif); - n=(tmsize_t)m; - if ((uint64)n!=m) { - TIFFErrorExt(tif->tif_clientdata,module,"Integer arithmetic overflow"); - n=0; - } - return(n); + return _TIFFCastUInt64ToSSize(tif, m, module); } /* @@ -366,15 +345,8 @@ TIFFRasterScanlineSize(TIFF* tif) { static const char module[] = "TIFFRasterScanlineSize"; uint64 m; - tmsize_t n; m=TIFFRasterScanlineSize64(tif); - n=(tmsize_t)m; - if ((uint64)n!=m) - { - TIFFErrorExt(tif->tif_clientdata,module,"Integer arithmetic overflow"); - n=0; - } - return(n); + return _TIFFCastUInt64ToSSize(tif, m, module); } /* vim: set ts=8 sts=8 sw=8 noet: */ diff --git a/3rdparty/libtiff/tif_thunder.c b/3rdparty/libtiff/tif_thunder.c index 2388dbb66b..db6383a81a 100644 --- a/3rdparty/libtiff/tif_thunder.c +++ b/3rdparty/libtiff/tif_thunder.c @@ -122,17 +122,17 @@ ThunderDecode(TIFF* tif, uint8* op, tmsize_t maxpixels) break; case THUNDER_2BITDELTAS: /* 2-bit deltas */ if ((delta = ((n >> 4) & 3)) != DELTA2_SKIP) - SETPIXEL(op, lastpixel + twobitdeltas[delta]); + SETPIXEL(op, (unsigned)((int)lastpixel + twobitdeltas[delta])); if ((delta = ((n >> 2) & 3)) != DELTA2_SKIP) - SETPIXEL(op, lastpixel + twobitdeltas[delta]); + SETPIXEL(op, (unsigned)((int)lastpixel + twobitdeltas[delta])); if ((delta = (n & 3)) != DELTA2_SKIP) - SETPIXEL(op, lastpixel + twobitdeltas[delta]); + SETPIXEL(op, (unsigned)((int)lastpixel + twobitdeltas[delta])); break; case THUNDER_3BITDELTAS: /* 3-bit deltas */ if ((delta = ((n >> 3) & 7)) != DELTA3_SKIP) - SETPIXEL(op, lastpixel + threebitdeltas[delta]); + SETPIXEL(op, (unsigned)((int)lastpixel + threebitdeltas[delta])); if ((delta = (n & 7)) != DELTA3_SKIP) - SETPIXEL(op, lastpixel + threebitdeltas[delta]); + SETPIXEL(op, (unsigned)((int)lastpixel + threebitdeltas[delta])); break; case THUNDER_RAW: /* raw data */ SETPIXEL(op, n); diff --git a/3rdparty/libtiff/tif_tile.c b/3rdparty/libtiff/tif_tile.c index 58fe9354a3..661cc77154 100644 --- a/3rdparty/libtiff/tif_tile.c +++ b/3rdparty/libtiff/tif_tile.c @@ -181,15 +181,8 @@ TIFFTileRowSize(TIFF* tif) { static const char module[] = "TIFFTileRowSize"; uint64 m; - tmsize_t n; m=TIFFTileRowSize64(tif); - n=(tmsize_t)m; - if ((uint64)n!=m) - { - TIFFErrorExt(tif->tif_clientdata,module,"Integer overflow"); - n=0; - } - return(n); + return _TIFFCastUInt64ToSSize(tif, m, module); } /* @@ -248,15 +241,8 @@ TIFFVTileSize(TIFF* tif, uint32 nrows) { static const char module[] = "TIFFVTileSize"; uint64 m; - tmsize_t n; m=TIFFVTileSize64(tif,nrows); - n=(tmsize_t)m; - if ((uint64)n!=m) - { - TIFFErrorExt(tif->tif_clientdata,module,"Integer overflow"); - n=0; - } - return(n); + return _TIFFCastUInt64ToSSize(tif, m, module); } /* @@ -272,15 +258,8 @@ TIFFTileSize(TIFF* tif) { static const char module[] = "TIFFTileSize"; uint64 m; - tmsize_t n; m=TIFFTileSize64(tif); - n=(tmsize_t)m; - if ((uint64)n!=m) - { - TIFFErrorExt(tif->tif_clientdata,module,"Integer overflow"); - n=0; - } - return(n); + return _TIFFCastUInt64ToSSize(tif, m, module); } /* diff --git a/3rdparty/libtiff/tif_unix.c b/3rdparty/libtiff/tif_unix.c index 874f1feb26..bea1ef7802 100644 --- a/3rdparty/libtiff/tif_unix.c +++ b/3rdparty/libtiff/tif_unix.c @@ -162,7 +162,7 @@ _tiffMapProc(thandle_t fd, void** pbase, toff_t* psize) { uint64 size64 = _tiffSizeProc(fd); tmsize_t sizem = (tmsize_t)size64; - if ((uint64)sizem==size64) { + if (size64 && (uint64)sizem==size64) { fd_as_handle_union_t fdh; fdh.h = fd; *pbase = (void*) diff --git a/3rdparty/libtiff/tif_webp.c b/3rdparty/libtiff/tif_webp.c index 22665f2d2b..a00478f6b9 100644 --- a/3rdparty/libtiff/tif_webp.c +++ b/3rdparty/libtiff/tif_webp.c @@ -267,6 +267,12 @@ TWebPPreDecode(TIFF* tif, uint16 s) segment_height = td->td_rowsperstrip; } + if( segment_width > 16383 || segment_height > 16383 ) { + TIFFErrorExt(tif->tif_clientdata, module, + "WEBP maximum image dimensions are 16383 x 16383."); + return 0; + } + if( (sp->state & LSTATE_INIT_DECODE) == 0 ) tif->tif_setupdecode(tif); @@ -333,7 +339,7 @@ TWebPSetupEncode(TIFF* tif) } /* check bits per sample and data type */ - if ((nBitsPerSample != 8) && (sampleFormat != 1)) { + if ((nBitsPerSample != 8) || (sampleFormat != SAMPLEFORMAT_UINT)) { TIFFErrorExt(tif->tif_clientdata, module, "WEBP driver requires 8 bit unsigned data"); return 0; @@ -356,7 +362,7 @@ TWebPSetupEncode(TIFF* tif) } if (!WebPConfigInitInternal(&sp->sEncoderConfig, WEBP_PRESET_DEFAULT, - sp->quality_level, + (float)sp->quality_level, WEBP_ENCODER_ABI_VERSION)) { TIFFErrorExt(tif->tif_clientdata, module, "Error creating WebP encoder configuration."); @@ -579,7 +585,7 @@ TWebPVSetField(TIFF* tif, uint32 tag, va_list ap) #if WEBP_ENCODER_ABI_VERSION >= 0x0100 sp->lossless = va_arg(ap, int); if (sp->lossless){ - sp->quality_level = 100.0f; + sp->quality_level = 100; } return 1; #else @@ -628,6 +634,7 @@ TIFFInitWebP(TIFF* tif, int scheme) static const char module[] = "TIFFInitWebP"; WebPState* sp; + (void)scheme; assert( scheme == COMPRESSION_WEBP ); /* @@ -656,7 +663,7 @@ TIFFInitWebP(TIFF* tif, int scheme) tif->tif_tagmethods.vsetfield = TWebPVSetField; /* hook for codec tags */ /* Default values for codec-specific fields */ - sp->quality_level = 75.0f; /* default comp. level */ + sp->quality_level = 75; /* default comp. level */ sp->lossless = 0; /* default to false */ sp->state = 0; sp->nSamples = 0; diff --git a/3rdparty/libtiff/tif_win32.c b/3rdparty/libtiff/tif_win32.c index 088880e7c4..8964569394 100644 --- a/3rdparty/libtiff/tif_win32.c +++ b/3rdparty/libtiff/tif_win32.c @@ -27,34 +27,38 @@ * Scott Wagner (wagner@itek.com), Itek Graphix, Rochester, NY USA */ -/* - CreateFileA/CreateFileW return type 'HANDLE'. - - thandle_t is declared like - - DECLARE_HANDLE(thandle_t); - - in tiffio.h. - - Windows (from winnt.h) DECLARE_HANDLE logic looks like - - #ifdef STRICT - typedef void *HANDLE; - #define DECLARE_HANDLE(name) struct name##__ { int unused; }; typedef struct name##__ *name - #else - typedef PVOID HANDLE; - #define DECLARE_HANDLE(name) typedef HANDLE name - #endif - - See http://bugzilla.maptools.org/show_bug.cgi?id=1941 for problems in WIN64 - builds resulting from this. Unfortunately, the proposed patch was lost. - -*/ - #include "tiffiop.h" #include +/* + CreateFileA/CreateFileW return type 'HANDLE' while TIFFFdOpen() takes 'int', + which is formally incompatible and can even seemingly be of different size: + HANDLE is 64 bit under Win64, while int is still 32 bits there. + + However, only the lower 32 bits of a HANDLE are significant under Win64 as, + for interoperability reasons, they must have the same values in 32- and + 64-bit programs running on the same system, see + + https://docs.microsoft.com/en-us/windows/win32/winprog64/interprocess-communication + + Because of this, it is safe to define the following trivial functions for + casting between ints and HANDLEs, which are only really needed to avoid + compiler warnings (and, perhaps, to make the code slightly more clear). + Note that using the intermediate cast to "intptr_t" is crucial for warning + avoidance, as this integer type has the same size as HANDLE in all builds. +*/ + +static inline thandle_t thandle_from_int(int ifd) +{ + return (thandle_t)(intptr_t)ifd; +} + +static inline int thandle_to_int(thandle_t fd) +{ + return (int)(intptr_t)fd; +} + static tmsize_t _tiffReadProc(thandle_t fd, void* buf, tmsize_t size) { @@ -151,9 +155,11 @@ _tiffCloseProc(thandle_t fd) static uint64 _tiffSizeProc(thandle_t fd) { - ULARGE_INTEGER m; - m.LowPart=GetFileSize(fd,&m.HighPart); - return(m.QuadPart); + LARGE_INTEGER m; + if (GetFileSizeEx(fd,&m)) + return(m.QuadPart); + else + return(0); } static int @@ -185,7 +191,7 @@ _tiffMapProc(thandle_t fd, void** pbase, toff_t* psize) size = _tiffSizeProc(fd); sizem = (tmsize_t)size; - if ((uint64)sizem!=size) + if (!size || (uint64)sizem!=size) return (0); /* By passing in 0 for the maximum file size, it specifies that we @@ -237,7 +243,7 @@ TIFFFdOpen(int ifd, const char* name, const char* mode) break; } } - tif = TIFFClientOpen(name, mode, (thandle_t)ifd, /* FIXME: WIN64 cast to pointer warning */ + tif = TIFFClientOpen(name, mode, thandle_from_int(ifd), _tiffReadProc, _tiffWriteProc, _tiffSeekProc, _tiffCloseProc, _tiffSizeProc, fSuppressMap ? _tiffDummyMapProc : _tiffMapProc, @@ -282,7 +288,7 @@ TIFFOpen(const char* name, const char* mode) return ((TIFF *)0); } - tif = TIFFFdOpen((int)fd, name, mode); /* FIXME: WIN64 cast from pointer to int warning */ + tif = TIFFFdOpen(thandle_to_int(fd), name, mode); if(!tif) CloseHandle(fd); return tif; @@ -337,7 +343,7 @@ TIFFOpenW(const wchar_t* name, const char* mode) NULL, NULL); } - tif = TIFFFdOpen((int)fd, /* FIXME: WIN64 cast from pointer to int warning */ + tif = TIFFFdOpen(thandle_to_int(fd), (mbname != NULL) ? mbname : "", mode); if(!tif) CloseHandle(fd); diff --git a/3rdparty/libtiff/tif_write.c b/3rdparty/libtiff/tif_write.c index a31ecd12c1..3af69ab4e7 100644 --- a/3rdparty/libtiff/tif_write.c +++ b/3rdparty/libtiff/tif_write.c @@ -128,10 +128,10 @@ TIFFWriteScanline(TIFF* tif, void* buf, uint32 row, uint16 sample) tif->tif_rawcc = 0; tif->tif_rawcp = tif->tif_rawdata; - if( td->td_stripbytecount[strip] > 0 ) + if( td->td_stripbytecount_p[strip] > 0 ) { /* if we are writing over existing tiles, zero length */ - td->td_stripbytecount[strip] = 0; + td->td_stripbytecount_p[strip] = 0; /* this forces TIFFAppendToStrip() to do a seek */ tif->tif_curoff = 0; @@ -176,6 +176,32 @@ TIFFWriteScanline(TIFF* tif, void* buf, uint32 row, uint16 sample) return (status); } +/* Make sure that at the first attempt of rewriting a tile/strip, we will have */ +/* more bytes available in the output buffer than the previous byte count, */ +/* so that TIFFAppendToStrip() will detect the overflow when it is called the first */ +/* time if the new compressed tile is bigger than the older one. (GDAL #4771) */ +static int _TIFFReserveLargeEnoughWriteBuffer(TIFF* tif, uint32 strip_or_tile) +{ + TIFFDirectory *td = &tif->tif_dir; + if( td->td_stripbytecount_p[strip_or_tile] > 0 ) + { + /* The +1 is to ensure at least one extra bytes */ + /* The +4 is because the LZW encoder flushes 4 bytes before the limit */ + uint64 safe_buffer_size = (uint64)(td->td_stripbytecount_p[strip_or_tile] + 1 + 4); + if( tif->tif_rawdatasize <= (tmsize_t)safe_buffer_size ) + { + if( !(TIFFWriteBufferSetup(tif, NULL, + (tmsize_t)TIFFroundup_64(safe_buffer_size, 1024))) ) + return 0; + } + + /* Force TIFFAppendToStrip() to consider placing data at end + of file. */ + tif->tif_curoff = 0; + } + return 1; +} + /* * Encode the supplied data and write it to the * specified strip. @@ -222,6 +248,13 @@ TIFFWriteEncodedStrip(TIFF* tif, uint32 strip, void* data, tmsize_t cc) tif->tif_flags |= TIFF_BUF4WRITE; tif->tif_curstrip = strip; + if( !_TIFFReserveLargeEnoughWriteBuffer(tif, strip) ) { + return ((tmsize_t)(-1)); + } + + tif->tif_rawcc = 0; + tif->tif_rawcp = tif->tif_rawdata; + if (td->td_stripsperimage == 0) { TIFFErrorExt(tif->tif_clientdata, module, "Zero strips per image"); return ((tmsize_t) -1); @@ -234,27 +267,6 @@ TIFFWriteEncodedStrip(TIFF* tif, uint32 strip, void* data, tmsize_t cc) tif->tif_flags |= TIFF_CODERSETUP; } - if( td->td_stripbytecount[strip] > 0 ) - { - /* Make sure that at the first attempt of rewriting the tile, we will have */ - /* more bytes available in the output buffer than the previous byte count, */ - /* so that TIFFAppendToStrip() will detect the overflow when it is called the first */ - /* time if the new compressed tile is bigger than the older one. (GDAL #4771) */ - if( tif->tif_rawdatasize <= (tmsize_t)td->td_stripbytecount[strip] ) - { - if( !(TIFFWriteBufferSetup(tif, NULL, - (tmsize_t)TIFFroundup_64((uint64)(td->td_stripbytecount[strip] + 1), 1024))) ) - return ((tmsize_t)(-1)); - } - - /* Force TIFFAppendToStrip() to consider placing data at end - of file. */ - tif->tif_curoff = 0; - } - - tif->tif_rawcc = 0; - tif->tif_rawcp = tif->tif_rawdata; - tif->tif_flags &= ~TIFF_POSTENCODE; /* shortcut to avoid an extra memcpy() */ @@ -402,22 +414,8 @@ TIFFWriteEncodedTile(TIFF* tif, uint32 tile, void* data, tmsize_t cc) tif->tif_flags |= TIFF_BUF4WRITE; tif->tif_curtile = tile; - if( td->td_stripbytecount[tile] > 0 ) - { - /* Make sure that at the first attempt of rewriting the tile, we will have */ - /* more bytes available in the output buffer than the previous byte count, */ - /* so that TIFFAppendToStrip() will detect the overflow when it is called the first */ - /* time if the new compressed tile is bigger than the older one. (GDAL #4771) */ - if( tif->tif_rawdatasize <= (tmsize_t) td->td_stripbytecount[tile] ) - { - if( !(TIFFWriteBufferSetup(tif, NULL, - (tmsize_t)TIFFroundup_64((uint64)(td->td_stripbytecount[tile] + 1), 1024))) ) - return ((tmsize_t)(-1)); - } - - /* Force TIFFAppendToStrip() to consider placing data at end - of file. */ - tif->tif_curoff = 0; + if( !_TIFFReserveLargeEnoughWriteBuffer(tif, tile) ) { + return ((tmsize_t)(-1)); } tif->tif_rawcc = 0; @@ -535,22 +533,29 @@ TIFFSetupStrips(TIFF* tif) isUnspecified(tif, FIELD_ROWSPERSTRIP) ? td->td_samplesperpixel : TIFFNumberOfStrips(tif); td->td_nstrips = td->td_stripsperimage; + /* TIFFWriteDirectoryTagData has a limitation to 0x80000000U bytes */ + if( td->td_nstrips >= 0x80000000U / ((tif->tif_flags&TIFF_BIGTIFF)?0x8U:0x4U) ) + { + TIFFErrorExt(tif->tif_clientdata, "TIFFSetupStrips", + "Too large Strip/Tile Offsets/ByteCounts arrays"); + return 0; + } if (td->td_planarconfig == PLANARCONFIG_SEPARATE) td->td_stripsperimage /= td->td_samplesperpixel; - td->td_stripoffset = (uint64 *) + td->td_stripoffset_p = (uint64 *) _TIFFCheckMalloc(tif, td->td_nstrips, sizeof (uint64), "for \"StripOffsets\" array"); - td->td_stripbytecount = (uint64 *) + td->td_stripbytecount_p = (uint64 *) _TIFFCheckMalloc(tif, td->td_nstrips, sizeof (uint64), "for \"StripByteCounts\" array"); - if (td->td_stripoffset == NULL || td->td_stripbytecount == NULL) + if (td->td_stripoffset_p == NULL || td->td_stripbytecount_p == NULL) return (0); /* * Place data at the end-of-file * (by setting offsets to zero). */ - _TIFFmemset(td->td_stripoffset, 0, td->td_nstrips*sizeof (uint64)); - _TIFFmemset(td->td_stripbytecount, 0, td->td_nstrips*sizeof (uint64)); + _TIFFmemset(td->td_stripoffset_p, 0, td->td_nstrips*sizeof (uint64)); + _TIFFmemset(td->td_stripbytecount_p, 0, td->td_nstrips*sizeof (uint64)); TIFFSetFieldBit(tif, FIELD_STRIPOFFSETS); TIFFSetFieldBit(tif, FIELD_STRIPBYTECOUNTS); return (1); @@ -572,7 +577,7 @@ TIFFWriteCheck(TIFF* tif, int tiles, const char* module) } if (tiles ^ isTiled(tif)) { TIFFErrorExt(tif->tif_clientdata, module, tiles ? - "Can not write tiles to a stripped image" : + "Can not write tiles to a striped image" : "Can not write scanlines to a tiled image"); return (0); } @@ -610,7 +615,7 @@ TIFFWriteCheck(TIFF* tif, int tiles, const char* module) return (0); } } - if (tif->tif_dir.td_stripoffset == NULL && !TIFFSetupStrips(tif)) { + if (tif->tif_dir.td_stripoffset_p == NULL && !TIFFSetupStrips(tif)) { tif->tif_dir.td_nstrips = 0; TIFFErrorExt(tif->tif_clientdata, module, "No space for %s arrays", isTiled(tif) ? "tile" : "strip"); @@ -628,6 +633,20 @@ TIFFWriteCheck(TIFF* tif, int tiles, const char* module) if (tif->tif_scanlinesize == 0) return (0); tif->tif_flags |= TIFF_BEENWRITING; + + if( tif->tif_dir.td_stripoffset_entry.tdir_tag != 0 && + tif->tif_dir.td_stripoffset_entry.tdir_count == 0 && + tif->tif_dir.td_stripoffset_entry.tdir_type == 0 && + tif->tif_dir.td_stripoffset_entry.tdir_offset.toff_long8 == 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_tag != 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_count == 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_type == 0 && + tif->tif_dir.td_stripbytecount_entry.tdir_offset.toff_long8 == 0 && + !(tif->tif_flags & TIFF_DIRTYDIRECT) ) + { + TIFFForceStrileArrayWriting(tif); + } + return (1); } @@ -649,6 +668,10 @@ TIFFWriteBufferSetup(TIFF* tif, void* bp, tmsize_t size) if (size == (tmsize_t)(-1)) { size = (isTiled(tif) ? tif->tif_tilesize : TIFFStripSize(tif)); + + /* Adds 10% margin for cases where compression would expand a bit */ + if( size < TIFF_TMSIZE_T_MAX - size / 10 ) + size += size / 10; /* * Make raw data buffer at least 8K */ @@ -684,9 +707,9 @@ TIFFGrowStrips(TIFF* tif, uint32 delta, const char* module) uint64* new_stripbytecount; assert(td->td_planarconfig == PLANARCONFIG_CONTIG); - new_stripoffset = (uint64*)_TIFFrealloc(td->td_stripoffset, + new_stripoffset = (uint64*)_TIFFrealloc(td->td_stripoffset_p, (td->td_nstrips + delta) * sizeof (uint64)); - new_stripbytecount = (uint64*)_TIFFrealloc(td->td_stripbytecount, + new_stripbytecount = (uint64*)_TIFFrealloc(td->td_stripbytecount_p, (td->td_nstrips + delta) * sizeof (uint64)); if (new_stripoffset == NULL || new_stripbytecount == NULL) { if (new_stripoffset) @@ -697,11 +720,11 @@ TIFFGrowStrips(TIFF* tif, uint32 delta, const char* module) TIFFErrorExt(tif->tif_clientdata, module, "No space to expand strip arrays"); return (0); } - td->td_stripoffset = new_stripoffset; - td->td_stripbytecount = new_stripbytecount; - _TIFFmemset(td->td_stripoffset + td->td_nstrips, + td->td_stripoffset_p = new_stripoffset; + td->td_stripbytecount_p = new_stripbytecount; + _TIFFmemset(td->td_stripoffset_p + td->td_nstrips, 0, delta*sizeof (uint64)); - _TIFFmemset(td->td_stripbytecount + td->td_nstrips, + _TIFFmemset(td->td_stripbytecount_p + td->td_nstrips, 0, delta*sizeof (uint64)); td->td_nstrips += delta; tif->tif_flags |= TIFF_DIRTYDIRECT; @@ -720,12 +743,12 @@ TIFFAppendToStrip(TIFF* tif, uint32 strip, uint8* data, tmsize_t cc) uint64 m; int64 old_byte_count = -1; - if (td->td_stripoffset[strip] == 0 || tif->tif_curoff == 0) { + if (td->td_stripoffset_p[strip] == 0 || tif->tif_curoff == 0) { assert(td->td_nstrips > 0); - if( td->td_stripbytecount[strip] != 0 - && td->td_stripoffset[strip] != 0 - && td->td_stripbytecount[strip] >= (uint64) cc ) + if( td->td_stripbytecount_p[strip] != 0 + && td->td_stripoffset_p[strip] != 0 + && td->td_stripbytecount_p[strip] >= (uint64) cc ) { /* * There is already tile data on disk, and the new tile @@ -734,7 +757,7 @@ TIFFAppendToStrip(TIFF* tif, uint32 strip, uint8* data, tmsize_t cc) * more data to append to this strip before we are done * depending on how we are getting called. */ - if (!SeekOK(tif, td->td_stripoffset[strip])) { + if (!SeekOK(tif, td->td_stripoffset_p[strip])) { TIFFErrorExt(tif->tif_clientdata, module, "Seek error at scanline %lu", (unsigned long)tif->tif_row); @@ -747,17 +770,17 @@ TIFFAppendToStrip(TIFF* tif, uint32 strip, uint8* data, tmsize_t cc) * Seek to end of file, and set that as our location to * write this strip. */ - td->td_stripoffset[strip] = TIFFSeekFile(tif, 0, SEEK_END); + td->td_stripoffset_p[strip] = TIFFSeekFile(tif, 0, SEEK_END); tif->tif_flags |= TIFF_DIRTYSTRIP; } - tif->tif_curoff = td->td_stripoffset[strip]; + tif->tif_curoff = td->td_stripoffset_p[strip]; /* * We are starting a fresh strip/tile, so set the size to zero. */ - old_byte_count = td->td_stripbytecount[strip]; - td->td_stripbytecount[strip] = 0; + old_byte_count = td->td_stripbytecount_p[strip]; + td->td_stripbytecount_p[strip] = 0; } m = tif->tif_curoff+cc; @@ -774,9 +797,9 @@ TIFFAppendToStrip(TIFF* tif, uint32 strip, uint8* data, tmsize_t cc) return (0); } tif->tif_curoff = m; - td->td_stripbytecount[strip] += cc; + td->td_stripbytecount_p[strip] += cc; - if( (int64) td->td_stripbytecount[strip] != old_byte_count ) + if( (int64) td->td_stripbytecount_p[strip] != old_byte_count ) tif->tif_flags |= TIFF_DIRTYSTRIP; return (1); diff --git a/3rdparty/libtiff/tif_zip.c b/3rdparty/libtiff/tif_zip.c index c75077349e..e71c312c80 100644 --- a/3rdparty/libtiff/tif_zip.c +++ b/3rdparty/libtiff/tif_zip.c @@ -29,24 +29,22 @@ * * ZIP (aka Deflate) Compression Support * - * This file is simply an interface to the zlib library written by + * This file is an interface to the zlib library written by * Jean-loup Gailly and Mark Adler. You must use version 1.0 or later - * of the library: this code assumes the 1.0 API and also depends on - * the ability to write the zlib header multiple times (one per strip) - * which was not possible with versions prior to 0.95. Note also that - * older versions of this codec avoided this bug by suppressing the header - * entirely. This means that files written with the old library cannot - * be read; they should be converted to a different compression scheme - * and then reconverted. + * of the library. * - * The data format used by the zlib library is described in the files - * zlib-3.1.doc, deflate-1.1.doc and gzip-4.1.doc, available in the - * directory ftp://ftp.uu.net/pub/archiving/zip/doc. The library was - * last found at ftp://ftp.uu.net/pub/archiving/zip/zlib/zlib-0.99.tar.gz. + * Optionally, libdeflate (https://github.com/ebiggers/libdeflate) may be used + * to do the compression and decompression, but only for whole strips and tiles. + * For scanline access, zlib will be sued as a fallback. */ #include "tif_predict.h" #include "zlib.h" +#if LIBDEFLATE_SUPPORT +#include "libdeflate.h" +#endif +#define LIBDEFLATE_MAX_COMPRESSION_LEVEL 12 + #include /* @@ -70,6 +68,12 @@ typedef struct { z_stream stream; int zipquality; /* compression level */ int state; /* state flags */ + int subcodec; /* DEFLATE_SUBCODEC_ZLIB or DEFLATE_SUBCODEC_LIBDEFLATE */ +#if LIBDEFLATE_SUPPORT + int libdeflate_state; /* -1 = until first time ZIPEncode() / ZIPDecode() is called, 0 = use zlib, 1 = use libdeflate */ + struct libdeflate_decompressor* libdeflate_dec; + struct libdeflate_compressor* libdeflate_enc; +#endif #define ZSTATE_INIT_DECODE 0x01 #define ZSTATE_INIT_ENCODE 0x02 @@ -132,6 +136,9 @@ ZIPPreDecode(TIFF* tif, uint16 s) if( (sp->state & ZSTATE_INIT_DECODE) == 0 ) tif->tif_setupdecode( tif ); +#if LIBDEFLATE_SUPPORT + sp->libdeflate_state = -1; +#endif sp->stream.next_in = tif->tif_rawdata; assert(sizeof(sp->stream.avail_in)==4); /* if this assert gets raised, we need to simplify this code to reflect a ZLib that is likely updated @@ -151,6 +158,77 @@ ZIPDecode(TIFF* tif, uint8* op, tmsize_t occ, uint16 s) assert(sp != NULL); assert(sp->state == ZSTATE_INIT_DECODE); +#if LIBDEFLATE_SUPPORT + if( sp->libdeflate_state == 1 ) + return 0; + + /* If we have libdeflate support and we are asked to read a whole */ + /* strip/tile, then go for using it */ + do { + TIFFDirectory *td = &tif->tif_dir; + + if( sp->libdeflate_state == 0 ) + break; + if( sp->subcodec == DEFLATE_SUBCODEC_ZLIB ) + break; + + /* Check if we are in the situation where we can use libdeflate */ + if (isTiled(tif)) { + if( TIFFTileSize64(tif) != (uint64)occ ) + break; + } else { + uint32 strip_height = td->td_imagelength - tif->tif_row; + if (strip_height > td->td_rowsperstrip) + strip_height = td->td_rowsperstrip; + if( TIFFVStripSize64(tif, strip_height) != (uint64)occ ) + break; + } + + /* Check for overflow */ + if( (size_t)tif->tif_rawcc != (uint64)tif->tif_rawcc ) + break; + if( (size_t)occ != (uint64)occ ) + break; + + /* Go for decompression using libdeflate */ + { + enum libdeflate_result res; + if( sp->libdeflate_dec == NULL ) + { + sp->libdeflate_dec = libdeflate_alloc_decompressor(); + if( sp->libdeflate_dec == NULL ) + { + break; + } + } + + sp->libdeflate_state = 1; + + res = libdeflate_zlib_decompress( + sp->libdeflate_dec, tif->tif_rawcp, (size_t)tif->tif_rawcc, op, (size_t)occ, NULL); + + tif->tif_rawcp += tif->tif_rawcc; + tif->tif_rawcc = 0; + + /* We accept LIBDEFLATE_INSUFFICIENT_SPACE has a return */ + /* There are odd files in the wild where the last strip, when */ + /* it is smaller in height than td_rowsperstrip, actually contains */ + /* data for td_rowsperstrip lines. Just ignore that silently. */ + if( res != LIBDEFLATE_SUCCESS && + res != LIBDEFLATE_INSUFFICIENT_SPACE ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Decoding error at scanline %lu", + (unsigned long) tif->tif_row); + return 0; + } + + return 1; + } + } while(0); + sp->libdeflate_state = 0; +#endif /* LIBDEFLATE_SUPPORT */ + sp->stream.next_in = tif->tif_rawcp; sp->stream.next_out = op; @@ -198,6 +276,7 @@ ZIPSetupEncode(TIFF* tif) { static const char module[] = "ZIPSetupEncode"; ZIPState* sp = EncoderState(tif); + int cappedQuality; assert(sp != NULL); if (sp->state & ZSTATE_INIT_DECODE) { @@ -205,7 +284,11 @@ ZIPSetupEncode(TIFF* tif) sp->state = 0; } - if (deflateInit(&sp->stream, sp->zipquality) != Z_OK) { + cappedQuality = sp->zipquality; + if( cappedQuality > Z_BEST_COMPRESSION ) + cappedQuality = Z_BEST_COMPRESSION; + + if (deflateInit(&sp->stream, cappedQuality) != Z_OK) { TIFFErrorExt(tif->tif_clientdata, module, "%s", SAFE_MSG(sp)); return (0); } else { @@ -227,6 +310,9 @@ ZIPPreEncode(TIFF* tif, uint16 s) if( sp->state != ZSTATE_INIT_ENCODE ) tif->tif_setupencode( tif ); +#if LIBDEFLATE_SUPPORT + sp->libdeflate_state = -1; +#endif sp->stream.next_out = tif->tif_rawdata; assert(sizeof(sp->stream.avail_out)==4); /* if this assert gets raised, we need to simplify this code to reflect a ZLib that is likely updated @@ -249,6 +335,95 @@ ZIPEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) assert(sp->state == ZSTATE_INIT_ENCODE); (void) s; + +#if LIBDEFLATE_SUPPORT + if( sp->libdeflate_state == 1 ) + return 0; + + /* If we have libdeflate support and we are asked to write a whole */ + /* strip/tile, then go for using it */ + do { + TIFFDirectory *td = &tif->tif_dir; + + if( sp->libdeflate_state == 0 ) + break; + if( sp->subcodec == DEFLATE_SUBCODEC_ZLIB ) + break; + + /* Libdeflate does not support the 0-compression level */ + if( sp->zipquality == Z_NO_COMPRESSION ) + break; + + /* Check if we are in the situation where we can use libdeflate */ + if (isTiled(tif)) { + if( TIFFTileSize64(tif) != (uint64)cc ) + break; + } else { + uint32 strip_height = td->td_imagelength - tif->tif_row; + if (strip_height > td->td_rowsperstrip) + strip_height = td->td_rowsperstrip; + if( TIFFVStripSize64(tif, strip_height) != (uint64)cc ) + break; + } + + /* Check for overflow */ + if( (size_t)tif->tif_rawdatasize != (uint64)tif->tif_rawdatasize ) + break; + if( (size_t)cc != (uint64)cc ) + break; + + /* Go for compression using libdeflate */ + { + size_t nCompressedBytes; + if( sp->libdeflate_enc == NULL ) + { + /* To get results as good as zlib, we asked for an extra */ + /* level of compression */ + sp->libdeflate_enc = libdeflate_alloc_compressor( + sp->zipquality == Z_DEFAULT_COMPRESSION ? 7 : + sp->zipquality >= 6 && sp->zipquality <= 9 ? sp->zipquality + 1 : + sp->zipquality); + if( sp->libdeflate_enc == NULL ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Cannot allocate compressor"); + break; + } + } + + /* Make sure the output buffer is large enough for the worse case. */ + /* In TIFFWriteBufferSetup(), when libtiff allocates the buffer */ + /* we've taken a 10% margin over the uncompressed size, which should */ + /* be large enough even for the the worse case scenario. */ + if( libdeflate_zlib_compress_bound(sp->libdeflate_enc, (size_t)cc) > + (size_t)tif->tif_rawdatasize) + { + break; + } + + sp->libdeflate_state = 1; + nCompressedBytes = libdeflate_zlib_compress( + sp->libdeflate_enc, bp, (size_t)cc, tif->tif_rawdata, (size_t)tif->tif_rawdatasize); + + if( nCompressedBytes == 0 ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Encoder error at scanline %lu", + (unsigned long) tif->tif_row); + return 0; + } + + tif->tif_rawcc = nCompressedBytes; + + if( !TIFFFlushData1(tif) ) + return 0; + + return 1; + } + } while(0); + sp->libdeflate_state = 0; +#endif /* LIBDEFLATE_SUPPORT */ + sp->stream.next_in = bp; assert(sizeof(sp->stream.avail_in)==4); /* if this assert gets raised, we need to simplify this code to reflect a ZLib that is likely updated @@ -265,7 +440,8 @@ ZIPEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) } if (sp->stream.avail_out == 0) { tif->tif_rawcc = tif->tif_rawdatasize; - TIFFFlushData1(tif); + if (!TIFFFlushData1(tif)) + return 0; sp->stream.next_out = tif->tif_rawdata; sp->stream.avail_out = (uint64)tif->tif_rawdatasize <= 0xFFFFFFFFU ? (uInt)tif->tif_rawdatasize : 0xFFFFFFFFU; } @@ -285,6 +461,11 @@ ZIPPostEncode(TIFF* tif) ZIPState *sp = EncoderState(tif); int state; +#if LIBDEFLATE_SUPPORT + if( sp->libdeflate_state == 1 ) + return 1; +#endif + sp->stream.avail_in = 0; do { state = deflate(&sp->stream, Z_FINISH); @@ -294,7 +475,8 @@ ZIPPostEncode(TIFF* tif) if ((tmsize_t)sp->stream.avail_out != tif->tif_rawdatasize) { tif->tif_rawcc = tif->tif_rawdatasize - sp->stream.avail_out; - TIFFFlushData1(tif); + if (!TIFFFlushData1(tif)) + return 0; sp->stream.next_out = tif->tif_rawdata; sp->stream.avail_out = (uint64)tif->tif_rawdatasize <= 0xFFFFFFFFU ? (uInt)tif->tif_rawdatasize : 0xFFFFFFFFU; } @@ -327,6 +509,14 @@ ZIPCleanup(TIFF* tif) inflateEnd(&sp->stream); sp->state = 0; } + +#if LIBDEFLATE_SUPPORT + if( sp->libdeflate_dec ) + libdeflate_free_decompressor(sp->libdeflate_dec); + if( sp->libdeflate_enc ) + libdeflate_free_compressor(sp->libdeflate_enc); +#endif + _TIFFfree(sp); tif->tif_data = NULL; @@ -342,15 +532,55 @@ ZIPVSetField(TIFF* tif, uint32 tag, va_list ap) switch (tag) { case TIFFTAG_ZIPQUALITY: sp->zipquality = (int) va_arg(ap, int); - if ( sp->state&ZSTATE_INIT_ENCODE ) { + if( sp->zipquality < Z_DEFAULT_COMPRESSION || + sp->zipquality > LIBDEFLATE_MAX_COMPRESSION_LEVEL ) { + TIFFErrorExt(tif->tif_clientdata, module, + "Invalid ZipQuality value. Should be in [-1,%d] range", + LIBDEFLATE_MAX_COMPRESSION_LEVEL); + return 0; + } + + if ( sp->state&ZSTATE_INIT_ENCODE ) { + int cappedQuality = sp->zipquality; + if( cappedQuality > Z_BEST_COMPRESSION ) + cappedQuality = Z_BEST_COMPRESSION; if (deflateParams(&sp->stream, - sp->zipquality, Z_DEFAULT_STRATEGY) != Z_OK) { + cappedQuality, Z_DEFAULT_STRATEGY) != Z_OK) { TIFFErrorExt(tif->tif_clientdata, module, "ZLib error: %s", SAFE_MSG(sp)); return (0); } } + +#if LIBDEFLATE_SUPPORT + if( sp->libdeflate_enc ) + { + libdeflate_free_compressor(sp->libdeflate_enc); + sp->libdeflate_enc = NULL; + } +#endif + return (1); + + case TIFFTAG_DEFLATE_SUBCODEC: + sp->subcodec = (int) va_arg(ap, int); + if( sp->subcodec != DEFLATE_SUBCODEC_ZLIB && + sp->subcodec != DEFLATE_SUBCODEC_LIBDEFLATE ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "Invalid DeflateCodec value."); + return 0; + } +#if !LIBDEFLATE_SUPPORT + if( sp->subcodec == DEFLATE_SUBCODEC_LIBDEFLATE ) + { + TIFFErrorExt(tif->tif_clientdata, module, + "DeflateCodec = DEFLATE_SUBCODEC_LIBDEFLATE unsupported in this build"); + return 0; + } +#endif + return 1; + default: return (*sp->vsetparent)(tif, tag, ap); } @@ -366,6 +596,11 @@ ZIPVGetField(TIFF* tif, uint32 tag, va_list ap) case TIFFTAG_ZIPQUALITY: *va_arg(ap, int*) = sp->zipquality; break; + + case TIFFTAG_DEFLATE_SUBCODEC: + *va_arg(ap, int*) = sp->subcodec; + break; + default: return (*sp->vgetparent)(tif, tag, ap); } @@ -374,6 +609,7 @@ ZIPVGetField(TIFF* tif, uint32 tag, va_list ap) static const TIFFField zipFields[] = { { TIFFTAG_ZIPQUALITY, 0, 0, TIFF_ANY, 0, TIFF_SETGET_INT, TIFF_SETGET_UNDEFINED, FIELD_PSEUDO, TRUE, FALSE, "", NULL }, + { TIFFTAG_DEFLATE_SUBCODEC, 0, 0, TIFF_ANY, 0, TIFF_SETGET_INT, TIFF_SETGET_UNDEFINED, FIELD_PSEUDO, TRUE, FALSE, "", NULL }, }; int @@ -384,6 +620,9 @@ TIFFInitZIP(TIFF* tif, int scheme) assert( (scheme == COMPRESSION_DEFLATE) || (scheme == COMPRESSION_ADOBE_DEFLATE)); +#ifdef NDEBUG + (void)scheme; +#endif /* * Merge codec-specific tag information. @@ -397,7 +636,7 @@ TIFFInitZIP(TIFF* tif, int scheme) /* * Allocate state block so tag methods have storage to record values. */ - tif->tif_data = (uint8*) _TIFFmalloc(sizeof (ZIPState)); + tif->tif_data = (uint8*) _TIFFcalloc(sizeof (ZIPState), 1); if (tif->tif_data == NULL) goto bad; sp = ZState(tif); @@ -417,6 +656,11 @@ TIFFInitZIP(TIFF* tif, int scheme) /* Default values for codec-specific fields */ sp->zipquality = Z_DEFAULT_COMPRESSION; /* default comp. level */ sp->state = 0; +#if LIBDEFLATE_SUPPORT + sp->subcodec = DEFLATE_SUBCODEC_LIBDEFLATE; +#else + sp->subcodec = DEFLATE_SUBCODEC_ZLIB; +#endif /* * Install codec methods. diff --git a/3rdparty/libtiff/tif_zstd.c b/3rdparty/libtiff/tif_zstd.c index 21c935e2da..66135e03c1 100644 --- a/3rdparty/libtiff/tif_zstd.c +++ b/3rdparty/libtiff/tif_zstd.c @@ -260,7 +260,8 @@ ZSTDEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) } if( sp->out_buffer.pos == sp->out_buffer.size ) { tif->tif_rawcc = tif->tif_rawdatasize; - TIFFFlushData1(tif); + if (!TIFFFlushData1(tif)) + return 0; sp->out_buffer.dst = tif->tif_rawcp; sp->out_buffer.pos = 0; } @@ -289,7 +290,8 @@ ZSTDPostEncode(TIFF* tif) } if( sp->out_buffer.pos > 0 ) { tif->tif_rawcc = sp->out_buffer.pos; - TIFFFlushData1(tif); + if (!TIFFFlushData1(tif)) + return 0; sp->out_buffer.dst = tif->tif_rawcp; sp->out_buffer.pos = 0; } diff --git a/3rdparty/libtiff/tiff.h b/3rdparty/libtiff/tiff.h index 5b0a0c90f6..2d4a47679d 100644 --- a/3rdparty/libtiff/tiff.h +++ b/3rdparty/libtiff/tiff.h @@ -119,6 +119,11 @@ typedef struct { * Tag data type information. * * Note: RATIONALs are the ratio of two 32-bit integer values. + *--: + * Note2: TIFF_IFD8 data type is used in tiffFields[]-tag definition in order to distinguish the write-handling + of those tags between ClassicTIFF and BigTiff: + For ClassicTIFF libtiff writes a 32-bit value and the TIFF_IFD type-id into the file + For BigTIFF libtiff writes a 64-bit value and the TIFF_IFD8 type-id into the file */ typedef enum { TIFF_NOTYPE = 0, /* placeholder */ @@ -375,6 +380,7 @@ typedef enum { January 2004 */ #define TIFFTAG_OPIIMAGEID 32781 /* %OPI ImageID [Adobe TIFF technote] */ +#define TIFFTAG_TIFFANNOTATIONDATA 32932 /* http://web.archive.org/web/20050309141348/http://www.kofile.com/support%20pro/faqs/annospec.htm */ /* tags 32952-32956 are private tags registered to Island Graphics */ #define TIFFTAG_REFPTS 32953 /* image reference points */ #define TIFFTAG_REGIONTACKPOINT 32954 /* region-xform tack point */ @@ -409,8 +415,23 @@ typedef enum { #define TIFFTAG_CFAPATTERN 33422 /* color filter array pattern */ /* tag 33432 is listed in the 6.0 spec w/ unknown ownership */ #define TIFFTAG_COPYRIGHT 33432 /* copyright string */ +/* Tags 33445-33452 are used for GEL fileformat, see + * http://research.stowers-institute.org/mcm/efg/ScientificSoftware/Utility/TiffTags/GEL-FileFormat.pdf + */ +#define TIFFTAG_MD_FILETAG 33445 /* http://research.stowers-institute.org/mcm/efg/ScientificSoftware/Utility/TiffTags/GEL-FileFormat.pdf */ +#define TIFFTAG_MD_SCALEPIXEL 33446 /* http://research.stowers-institute.org/mcm/efg/ScientificSoftware/Utility/TiffTags/GEL-FileFormat.pdf */ +#define TIFFTAG_MD_COLORTABLE 33447 /* http://research.stowers-institute.org/mcm/efg/ScientificSoftware/Utility/TiffTags/GEL-FileFormat.pdf */ +#define TIFFTAG_MD_LABNAME 33448 /* http://research.stowers-institute.org/mcm/efg/ScientificSoftware/Utility/TiffTags/GEL-FileFormat.pdf */ +#define TIFFTAG_MD_SAMPLEINFO 33449 /* http://research.stowers-institute.org/mcm/efg/ScientificSoftware/Utility/TiffTags/GEL-FileFormat.pdf */ +#define TIFFTAG_MD_PREPDATE 33450 /* http://research.stowers-institute.org/mcm/efg/ScientificSoftware/Utility/TiffTags/GEL-FileFormat.pdf */ +#define TIFFTAG_MD_PREPTIME 33451 /* http://research.stowers-institute.org/mcm/efg/ScientificSoftware/Utility/TiffTags/GEL-FileFormat.pdf */ +#define TIFFTAG_MD_FILEUNITS 33452 /* http://research.stowers-institute.org/mcm/efg/ScientificSoftware/Utility/TiffTags/GEL-FileFormat.pdf */ /* IPTC TAG from RichTIFF specifications */ #define TIFFTAG_RICHTIFFIPTC 33723 +#define TIFFTAG_INGR_PACKET_DATA_TAG 33918 /* Intergraph Application specific storage. */ +#define TIFFTAG_INGR_FLAG_REGISTERS 33919 /* Intergraph Application specific flags. */ +#define TIFFTAG_IRASB_TRANSORMATION_MATRIX 33920 /* Originally part of Intergraph's GeoTIFF tags, but likely understood by IrasB only. */ +#define TIFFTAG_MODELTIEPOINTTAG 33922 /* GeoTIFF */ /* 34016-34029 are reserved for ANSI IT8 TIFF/IT > 1) + +/* + * Largest 32-bit unsigned integer value. + */ +#define TIFF_UINT32_MAX 0xFFFFFFFFU + +/* + * Largest 64-bit unsigned integer value. + */ +#define TIFF_UINT64_MAX (((uint64)(TIFF_UINT32_MAX)) << 32 | TIFF_UINT32_MAX) + typedef struct client_info { struct client_info *next; void *data; @@ -127,6 +140,9 @@ struct tiff { #define TIFF_DIRTYSTRIP 0x200000U /* stripoffsets/stripbytecount dirty*/ #define TIFF_PERSAMPLE 0x400000U /* get/set per sample tags as arrays */ #define TIFF_BUFFERMMAP 0x800000U /* read buffer (tif_rawdata) points into mmap() memory */ + #define TIFF_DEFERSTRILELOAD 0x1000000U /* defer strip/tile offset/bytecount array loading. */ + #define TIFF_LAZYSTRILELOAD 0x2000000U /* lazy/ondemand loading of strip/tile offset/bytecount values. Only used if TIFF_DEFERSTRILELOAD is set and in read-only mode */ + #define TIFF_CHOPPEDUPARRAYS 0x4000000U /* set when allocChoppedUpStripArrays() has modified strip array */ uint64 tif_diroff; /* file offset of current directory */ uint64 tif_nextdiroff; /* file offset of following directory */ uint64* tif_dirlist; /* list of offsets to already seen directories to prevent IFD looping */ @@ -258,7 +274,7 @@ struct tiff { #define TIFFhowmany8_64(x) (((x)&0x07)?((uint64)(x)>>3)+1:(uint64)(x)>>3) #define TIFFroundup_64(x, y) (TIFFhowmany_64(x,y)*(y)) -/* Safe multiply which returns zero if there is an integer overflow */ +/* Safe multiply which returns zero if there is an *unsigned* integer overflow. This macro is not safe for *signed* integer types */ #define TIFFSafeMultiply(t,v,m) ((((t)(m) != (t)0) && (((t)(((v)*(m))/(m))) == (t)(v))) ? (t)((v)*(m)) : (t)0) #define TIFFmax(A,B) ((A)>(B)?(A):(B)) @@ -351,6 +367,9 @@ extern uint32 _TIFFDefaultStripSize(TIFF* tif, uint32 s); extern void _TIFFDefaultTileSize(TIFF* tif, uint32* tw, uint32* th); extern int _TIFFDataSize(TIFFDataType type); +/*--: Rational2Double: Return size of TIFFSetGetFieldType in bytes. */ +extern int _TIFFSetGetFieldSize(TIFFSetGetFieldType setgettype); + extern void _TIFFsetByteArray(void**, void*, uint32); extern void _TIFFsetString(char**, char*); extern void _TIFFsetShortArray(uint16**, uint16*, uint32); @@ -368,6 +387,8 @@ extern TIFFErrorHandlerExt _TIFFerrorHandlerExt; extern uint32 _TIFFMultiply32(TIFF*, uint32, uint32, const char*); extern uint64 _TIFFMultiply64(TIFF*, uint64, uint64, const char*); +extern tmsize_t _TIFFMultiplySSize(TIFF*, tmsize_t, tmsize_t, const char*); +extern tmsize_t _TIFFCastUInt64ToSSize(TIFF*, uint64, const char*); extern void* _TIFFCheckMalloc(TIFF*, tmsize_t, tmsize_t, const char*); extern void* _TIFFCheckRealloc(TIFF*, void*, tmsize_t, tmsize_t, const char*); diff --git a/3rdparty/libtiff/tiffvers.h b/3rdparty/libtiff/tiffvers.h index 403d61be04..0cce798b83 100644 --- a/3rdparty/libtiff/tiffvers.h +++ b/3rdparty/libtiff/tiffvers.h @@ -1,4 +1,4 @@ -#define TIFFLIB_VERSION_STR "LIBTIFF, Version 4.0.10\nCopyright (c) 1988-1996 Sam Leffler\nCopyright (c) 1991-1996 Silicon Graphics, Inc." +#define TIFFLIB_VERSION_STR "LIBTIFF, Version 4.2.0\nCopyright (c) 1988-1996 Sam Leffler\nCopyright (c) 1991-1996 Silicon Graphics, Inc." /* * This define can be used in code that requires * compilation-related definitions specific to a @@ -6,4 +6,4 @@ * version checking should be done based on the * string returned by TIFFGetVersion. */ -#define TIFFLIB_VERSION 20181110 +#define TIFFLIB_VERSION 20201219 diff --git a/3rdparty/libwebp/src/dec/io_dec.c b/3rdparty/libwebp/src/dec/io_dec.c index e603f19c98..29dc6345df 100644 --- a/3rdparty/libwebp/src/dec/io_dec.c +++ b/3rdparty/libwebp/src/dec/io_dec.c @@ -25,21 +25,16 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) { WebPDecBuffer* output = p->output; const WebPYUVABuffer* const buf = &output->u.YUVA; - uint8_t* const y_dst = buf->y + io->mb_y * buf->y_stride; - uint8_t* const u_dst = buf->u + (io->mb_y >> 1) * buf->u_stride; - uint8_t* const v_dst = buf->v + (io->mb_y >> 1) * buf->v_stride; + uint8_t* const y_dst = buf->y + (size_t)io->mb_y * buf->y_stride; + uint8_t* const u_dst = buf->u + (size_t)(io->mb_y >> 1) * buf->u_stride; + uint8_t* const v_dst = buf->v + (size_t)(io->mb_y >> 1) * buf->v_stride; const int mb_w = io->mb_w; const int mb_h = io->mb_h; const int uv_w = (mb_w + 1) / 2; const int uv_h = (mb_h + 1) / 2; - int j; - for (j = 0; j < mb_h; ++j) { - memcpy(y_dst + j * buf->y_stride, io->y + j * io->y_stride, mb_w); - } - for (j = 0; j < uv_h; ++j) { - memcpy(u_dst + j * buf->u_stride, io->u + j * io->uv_stride, uv_w); - memcpy(v_dst + j * buf->v_stride, io->v + j * io->uv_stride, uv_w); - } + WebPCopyPlane(io->y, io->y_stride, y_dst, buf->y_stride, mb_w, mb_h); + WebPCopyPlane(io->u, io->uv_stride, u_dst, buf->u_stride, uv_w, uv_h); + WebPCopyPlane(io->v, io->uv_stride, v_dst, buf->v_stride, uv_w, uv_h); return io->mb_h; } @@ -47,7 +42,7 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) { static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) { WebPDecBuffer* const output = p->output; WebPRGBABuffer* const buf = &output->u.RGBA; - uint8_t* const dst = buf->rgba + io->mb_y * buf->stride; + uint8_t* const dst = buf->rgba + (size_t)io->mb_y * buf->stride; WebPSamplerProcessPlane(io->y, io->y_stride, io->u, io->v, io->uv_stride, dst, buf->stride, io->mb_w, io->mb_h, @@ -62,7 +57,7 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) { static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) { int num_lines_out = io->mb_h; // a priori guess const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + io->mb_y * buf->stride; + uint8_t* dst = buf->rgba + (size_t)io->mb_y * buf->stride; WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace]; const uint8_t* cur_y = io->y; const uint8_t* cur_u = io->u; @@ -133,7 +128,7 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p, const WebPYUVABuffer* const buf = &p->output->u.YUVA; const int mb_w = io->mb_w; const int mb_h = io->mb_h; - uint8_t* dst = buf->a + io->mb_y * buf->a_stride; + uint8_t* dst = buf->a + (size_t)io->mb_y * buf->a_stride; int j; (void)expected_num_lines_out; assert(expected_num_lines_out == mb_h); @@ -186,7 +181,7 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p, (colorspace == MODE_ARGB || colorspace == MODE_Argb); const WebPRGBABuffer* const buf = &p->output->u.RGBA; int num_rows; - const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows); + const size_t start_y = GetAlphaSourceRow(io, &alpha, &num_rows); uint8_t* const base_rgba = buf->rgba + start_y * buf->stride; uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3); const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w, @@ -210,7 +205,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p, const WEBP_CSP_MODE colorspace = p->output->colorspace; const WebPRGBABuffer* const buf = &p->output->u.RGBA; int num_rows; - const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows); + const size_t start_y = GetAlphaSourceRow(io, &alpha, &num_rows); uint8_t* const base_rgba = buf->rgba + start_y * buf->stride; #if (WEBP_SWAP_16BIT_CSP == 1) uint8_t* alpha_dst = base_rgba; @@ -276,9 +271,9 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) { static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p, int expected_num_lines_out) { const WebPYUVABuffer* const buf = &p->output->u.YUVA; - uint8_t* const dst_a = buf->a + p->last_y * buf->a_stride; + uint8_t* const dst_a = buf->a + (size_t)p->last_y * buf->a_stride; if (io->a != NULL) { - uint8_t* const dst_y = buf->y + p->last_y * buf->y_stride; + uint8_t* const dst_y = buf->y + (size_t)p->last_y * buf->y_stride; const int num_lines_out = Rescale(io->a, io->width, io->mb_h, p->scaler_a); assert(expected_num_lines_out == num_lines_out); if (num_lines_out > 0) { // unmultiply the Y @@ -356,7 +351,7 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) { const WebPYUV444Converter convert = WebPYUV444Converters[p->output->colorspace]; const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + y_pos * buf->stride; + uint8_t* dst = buf->rgba + (size_t)y_pos * buf->stride; int num_lines_out = 0; // For RGB rescaling, because of the YUV420, current scan position // U/V can be +1/-1 line from the Y one. Hence the double test. @@ -383,15 +378,15 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) { while (j < mb_h) { const int y_lines_in = WebPRescalerImport(p->scaler_y, mb_h - j, - io->y + j * io->y_stride, io->y_stride); + io->y + (size_t)j * io->y_stride, io->y_stride); j += y_lines_in; if (WebPRescaleNeededLines(p->scaler_u, uv_mb_h - uv_j)) { - const int u_lines_in = - WebPRescalerImport(p->scaler_u, uv_mb_h - uv_j, - io->u + uv_j * io->uv_stride, io->uv_stride); - const int v_lines_in = - WebPRescalerImport(p->scaler_v, uv_mb_h - uv_j, - io->v + uv_j * io->uv_stride, io->uv_stride); + const int u_lines_in = WebPRescalerImport( + p->scaler_u, uv_mb_h - uv_j, io->u + (size_t)uv_j * io->uv_stride, + io->uv_stride); + const int v_lines_in = WebPRescalerImport( + p->scaler_v, uv_mb_h - uv_j, io->v + (size_t)uv_j * io->uv_stride, + io->uv_stride); (void)v_lines_in; // remove a gcc warning assert(u_lines_in == v_lines_in); uv_j += u_lines_in; @@ -403,7 +398,7 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) { static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride; + uint8_t* const base_rgba = buf->rgba + (size_t)y_pos * buf->stride; const WEBP_CSP_MODE colorspace = p->output->colorspace; const int alpha_first = (colorspace == MODE_ARGB || colorspace == MODE_Argb); @@ -431,7 +426,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) { static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos, int max_lines_out) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride; + uint8_t* const base_rgba = buf->rgba + (size_t)y_pos * buf->stride; #if (WEBP_SWAP_16BIT_CSP == 1) uint8_t* alpha_dst = base_rgba; #else @@ -470,7 +465,7 @@ static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p, int lines_left = expected_num_out_lines; const int y_end = p->last_y + lines_left; while (lines_left > 0) { - const int row_offset = scaler->src_y - io->mb_y; + const int64_t row_offset = (int64_t)scaler->src_y - io->mb_y; WebPRescalerImport(scaler, io->mb_h + io->mb_y - scaler->src_y, io->a + row_offset * io->width, io->width); lines_left -= p->emit_alpha_row(p, y_end - lines_left, lines_left); diff --git a/3rdparty/libwebp/src/dec/vp8_dec.c b/3rdparty/libwebp/src/dec/vp8_dec.c index 57efb69041..8f73697478 100644 --- a/3rdparty/libwebp/src/dec/vp8_dec.c +++ b/3rdparty/libwebp/src/dec/vp8_dec.c @@ -494,13 +494,11 @@ static int GetCoeffsAlt(VP8BitReader* const br, return 16; } -static WEBP_TSAN_IGNORE_FUNCTION void InitGetCoeffs(void) { - if (GetCoeffs == NULL) { - if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) { - GetCoeffs = GetCoeffsAlt; - } else { - GetCoeffs = GetCoeffsFast; - } +WEBP_DSP_INIT_FUNC(InitGetCoeffs) { + if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) { + GetCoeffs = GetCoeffsAlt; + } else { + GetCoeffs = GetCoeffsFast; } } diff --git a/3rdparty/libwebp/src/dec/vp8i_dec.h b/3rdparty/libwebp/src/dec/vp8i_dec.h index 600a684410..a0c0af1579 100644 --- a/3rdparty/libwebp/src/dec/vp8i_dec.h +++ b/3rdparty/libwebp/src/dec/vp8i_dec.h @@ -31,7 +31,7 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 1 -#define DEC_MIN_VERSION 1 +#define DEC_MIN_VERSION 2 #define DEC_REV_VERSION 0 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). diff --git a/3rdparty/libwebp/src/dec/vp8l_dec.c b/3rdparty/libwebp/src/dec/vp8l_dec.c index 93615d4ed2..2d603b4379 100644 --- a/3rdparty/libwebp/src/dec/vp8l_dec.c +++ b/3rdparty/libwebp/src/dec/vp8l_dec.c @@ -947,7 +947,6 @@ static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) { break; default: goto Copy; - break; } CopySmallPattern8b(src, dst, length, pattern); return; diff --git a/3rdparty/libwebp/src/demux/anim_decode.c b/3rdparty/libwebp/src/demux/anim_decode.c index 05dd707371..3dcacc35d6 100644 --- a/3rdparty/libwebp/src/demux/anim_decode.c +++ b/3rdparty/libwebp/src/demux/anim_decode.c @@ -346,12 +346,15 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec, { const uint8_t* in = iter.fragment.bytes; const size_t in_size = iter.fragment.size; - const size_t out_offset = - (iter.y_offset * width + iter.x_offset) * NUM_CHANNELS; + const uint32_t stride = width * NUM_CHANNELS; // at most 25 + 2 bits + const uint64_t out_offset = (uint64_t)iter.y_offset * stride + + (uint64_t)iter.x_offset * NUM_CHANNELS; // 53b + const uint64_t size = (uint64_t)iter.height * stride; // at most 25 + 27b WebPDecoderConfig* const config = &dec->config_; WebPRGBABuffer* const buf = &config->output.u.RGBA; - buf->stride = NUM_CHANNELS * width; - buf->size = buf->stride * iter.height; + if ((size_t)size != size) goto Error; + buf->stride = (int)stride; + buf->size = (size_t)size; buf->rgba = dec->curr_frame_ + out_offset; if (WebPDecode(in, in_size, config) != VP8_STATUS_OK) { diff --git a/3rdparty/libwebp/src/demux/demux.c b/3rdparty/libwebp/src/demux/demux.c index 1b3cc2e0a8..860e2ce761 100644 --- a/3rdparty/libwebp/src/demux/demux.c +++ b/3rdparty/libwebp/src/demux/demux.c @@ -24,7 +24,7 @@ #include "src/webp/format_constants.h" #define DMUX_MAJ_VERSION 1 -#define DMUX_MIN_VERSION 1 +#define DMUX_MIN_VERSION 2 #define DMUX_REV_VERSION 0 typedef struct { @@ -312,6 +312,7 @@ static ParseStatus ParseAnimationFrame( int bits; MemBuffer* const mem = &dmux->mem_; Frame* frame; + size_t start_offset; ParseStatus status = NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame); if (status != PARSE_OK) return status; @@ -332,7 +333,11 @@ static ParseStatus ParseAnimationFrame( // Store a frame only if the animation flag is set there is some data for // this frame is available. + start_offset = mem->start_; status = StoreFrame(dmux->num_frames_ + 1, anmf_payload_size, mem, frame); + if (status != PARSE_ERROR && mem->start_ - start_offset > anmf_payload_size) { + status = PARSE_ERROR; + } if (status != PARSE_ERROR && is_animation && frame->frame_num_ > 0) { added_frame = AddFrame(dmux, frame); if (added_frame) { diff --git a/3rdparty/libwebp/src/dsp/alpha_processing.c b/3rdparty/libwebp/src/dsp/alpha_processing.c index 819d1391f2..3a27990ddc 100644 --- a/3rdparty/libwebp/src/dsp/alpha_processing.c +++ b/3rdparty/libwebp/src/dsp/alpha_processing.c @@ -359,6 +359,11 @@ static int HasAlpha32b_C(const uint8_t* src, int length) { return 0; } +static void AlphaReplace_C(uint32_t* src, int length, uint32_t color) { + int x; + for (x = 0; x < length; ++x) if ((src[x] >> 24) == 0) src[x] = color; +} + //------------------------------------------------------------------------------ // Simple channel manipulations. @@ -400,6 +405,7 @@ void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, int (*WebPHasAlpha8b)(const uint8_t* src, int length); int (*WebPHasAlpha32b)(const uint8_t* src, int length); +void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color); //------------------------------------------------------------------------------ // Init function @@ -428,6 +434,7 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) { WebPHasAlpha8b = HasAlpha8b_C; WebPHasAlpha32b = HasAlpha32b_C; + WebPAlphaReplace = AlphaReplace_C; // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { @@ -469,4 +476,5 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) { assert(WebPPackRGB != NULL); assert(WebPHasAlpha8b != NULL); assert(WebPHasAlpha32b != NULL); + assert(WebPAlphaReplace != NULL); } diff --git a/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c b/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c index 2871c56d84..f6c6e0fb1a 100644 --- a/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c +++ b/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c @@ -265,6 +265,27 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) { return 0; } +static void AlphaReplace_SSE2(uint32_t* src, int length, uint32_t color) { + const __m128i m_color = _mm_set1_epi32(color); + const __m128i zero = _mm_setzero_si128(); + int i = 0; + for (; i + 8 <= length; i += 8) { + const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0)); + const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 4)); + const __m128i b0 = _mm_srai_epi32(a0, 24); + const __m128i b1 = _mm_srai_epi32(a1, 24); + const __m128i c0 = _mm_cmpeq_epi32(b0, zero); + const __m128i c1 = _mm_cmpeq_epi32(b1, zero); + const __m128i d0 = _mm_and_si128(c0, m_color); + const __m128i d1 = _mm_and_si128(c1, m_color); + const __m128i e0 = _mm_andnot_si128(c0, a0); + const __m128i e1 = _mm_andnot_si128(c1, a1); + _mm_storeu_si128((__m128i*)(src + i + 0), _mm_or_si128(d0, e0)); + _mm_storeu_si128((__m128i*)(src + i + 4), _mm_or_si128(d1, e1)); + } + for (; i < length; ++i) if ((src[i] >> 24) == 0) src[i] = color; +} + // ----------------------------------------------------------------------------- // Apply alpha value to rows @@ -334,6 +355,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) { WebPHasAlpha8b = HasAlpha8b_SSE2; WebPHasAlpha32b = HasAlpha32b_SSE2; + WebPAlphaReplace = AlphaReplace_SSE2; } #else // !WEBP_USE_SSE2 diff --git a/3rdparty/libwebp/src/dsp/cpu.c b/3rdparty/libwebp/src/dsp/cpu.c index 0fa5b6a5ce..4ca90d88bf 100644 --- a/3rdparty/libwebp/src/dsp/cpu.c +++ b/3rdparty/libwebp/src/dsp/cpu.c @@ -55,12 +55,18 @@ static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) : "a"(info_type), "c"(0)); } -#elif (defined(_M_X64) || defined(_M_IX86)) && \ - defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1 +#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + +#if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1 #include #define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0 -#elif defined(WEBP_MSC_SSE2) +#define WEBP_HAVE_MSC_CPUID +#elif _MSC_VER > 1310 +#include #define GetCPUInfo __cpuid +#define WEBP_HAVE_MSC_CPUID +#endif + #endif // NaCl has no support for xgetbv or the raw opcode. @@ -94,7 +100,7 @@ static WEBP_INLINE uint64_t xgetbv(void) { #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. #endif -#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2) +#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_HAVE_MSC_CPUID) // helper function for run-time detection of slow SSSE3 platforms static int CheckSlowModel(int info) { @@ -179,6 +185,30 @@ static int AndroidCPUInfo(CPUFeature feature) { return 0; } VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; +#elif defined(EMSCRIPTEN) // also needs to be before generic NEON test +// Use compile flags as an indicator of SIMD support instead of a runtime check. +static int wasmCPUInfo(CPUFeature feature) { + switch (feature) { +#ifdef WEBP_USE_SSE2 + case kSSE2: + return 1; +#endif +#ifdef WEBP_USE_SSE41 + case kSSE3: + case kSlowSSSE3: + case kSSE4_1: + return 1; +#endif +#ifdef WEBP_USE_NEON + case kNEON: + return 1; +#endif + default: + break; + } + return 0; +} +VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo; #elif defined(WEBP_USE_NEON) // define a dummy function to enable turning off NEON at runtime by setting // VP8DecGetCPUInfo = NULL diff --git a/3rdparty/libwebp/src/dsp/dec_neon.c b/3rdparty/libwebp/src/dsp/dec_neon.c index 239ec4167e..fa851707e2 100644 --- a/3rdparty/libwebp/src/dsp/dec_neon.c +++ b/3rdparty/libwebp/src/dsp/dec_neon.c @@ -1283,12 +1283,12 @@ static void DC4_NEON(uint8_t* dst) { // DC const uint8x8_t A = vld1_u8(dst - BPS); // top row const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top const uint16x4_t p1 = vpadd_u16(p0, p0); - const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1)); - const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1)); - const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1)); - const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1)); - const uint16x8_t s0 = vaddq_u16(L0, L1); - const uint16x8_t s1 = vaddq_u16(L2, L3); + const uint8x8_t L0 = vld1_u8(dst + 0 * BPS - 1); + const uint8x8_t L1 = vld1_u8(dst + 1 * BPS - 1); + const uint8x8_t L2 = vld1_u8(dst + 2 * BPS - 1); + const uint8x8_t L3 = vld1_u8(dst + 3 * BPS - 1); + const uint16x8_t s0 = vaddl_u8(L0, L1); + const uint16x8_t s1 = vaddl_u8(L2, L3); const uint16x8_t s01 = vaddq_u16(s0, s1); const uint16x8_t sum = vaddq_u16(s01, vcombine_u16(p1, p1)); const uint8x8_t dc0 = vrshrn_n_u16(sum, 3); // (sum + 4) >> 3 @@ -1429,8 +1429,7 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) { if (do_top) { const uint8x8_t A = vld1_u8(dst - BPS); // top row #if defined(__aarch64__) - const uint16x8_t B = vmovl_u8(A); - const uint16_t p2 = vaddvq_u16(B); + const uint16_t p2 = vaddlv_u8(A); sum_top = vdupq_n_u16(p2); #else const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top @@ -1441,18 +1440,18 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) { } if (do_left) { - const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1)); - const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1)); - const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1)); - const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1)); - const uint16x8_t L4 = vmovl_u8(vld1_u8(dst + 4 * BPS - 1)); - const uint16x8_t L5 = vmovl_u8(vld1_u8(dst + 5 * BPS - 1)); - const uint16x8_t L6 = vmovl_u8(vld1_u8(dst + 6 * BPS - 1)); - const uint16x8_t L7 = vmovl_u8(vld1_u8(dst + 7 * BPS - 1)); - const uint16x8_t s0 = vaddq_u16(L0, L1); - const uint16x8_t s1 = vaddq_u16(L2, L3); - const uint16x8_t s2 = vaddq_u16(L4, L5); - const uint16x8_t s3 = vaddq_u16(L6, L7); + const uint8x8_t L0 = vld1_u8(dst + 0 * BPS - 1); + const uint8x8_t L1 = vld1_u8(dst + 1 * BPS - 1); + const uint8x8_t L2 = vld1_u8(dst + 2 * BPS - 1); + const uint8x8_t L3 = vld1_u8(dst + 3 * BPS - 1); + const uint8x8_t L4 = vld1_u8(dst + 4 * BPS - 1); + const uint8x8_t L5 = vld1_u8(dst + 5 * BPS - 1); + const uint8x8_t L6 = vld1_u8(dst + 6 * BPS - 1); + const uint8x8_t L7 = vld1_u8(dst + 7 * BPS - 1); + const uint16x8_t s0 = vaddl_u8(L0, L1); + const uint16x8_t s1 = vaddl_u8(L2, L3); + const uint16x8_t s2 = vaddl_u8(L4, L5); + const uint16x8_t s3 = vaddl_u8(L6, L7); const uint16x8_t s01 = vaddq_u16(s0, s1); const uint16x8_t s23 = vaddq_u16(s2, s3); sum_left = vaddq_u16(s01, s23); @@ -1512,29 +1511,34 @@ static WEBP_INLINE void DC16_NEON(uint8_t* dst, int do_top, int do_left) { if (do_top) { const uint8x16_t A = vld1q_u8(dst - BPS); // top row +#if defined(__aarch64__) + const uint16_t p3 = vaddlvq_u8(A); + sum_top = vdupq_n_u16(p3); +#else const uint16x8_t p0 = vpaddlq_u8(A); // cascading summation of the top const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); const uint16x4_t p2 = vpadd_u16(p1, p1); const uint16x4_t p3 = vpadd_u16(p2, p2); sum_top = vcombine_u16(p3, p3); +#endif } if (do_left) { int i; sum_left = vdupq_n_u16(0); for (i = 0; i < 16; i += 8) { - const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + (i + 0) * BPS - 1)); - const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + (i + 1) * BPS - 1)); - const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + (i + 2) * BPS - 1)); - const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + (i + 3) * BPS - 1)); - const uint16x8_t L4 = vmovl_u8(vld1_u8(dst + (i + 4) * BPS - 1)); - const uint16x8_t L5 = vmovl_u8(vld1_u8(dst + (i + 5) * BPS - 1)); - const uint16x8_t L6 = vmovl_u8(vld1_u8(dst + (i + 6) * BPS - 1)); - const uint16x8_t L7 = vmovl_u8(vld1_u8(dst + (i + 7) * BPS - 1)); - const uint16x8_t s0 = vaddq_u16(L0, L1); - const uint16x8_t s1 = vaddq_u16(L2, L3); - const uint16x8_t s2 = vaddq_u16(L4, L5); - const uint16x8_t s3 = vaddq_u16(L6, L7); + const uint8x8_t L0 = vld1_u8(dst + (i + 0) * BPS - 1); + const uint8x8_t L1 = vld1_u8(dst + (i + 1) * BPS - 1); + const uint8x8_t L2 = vld1_u8(dst + (i + 2) * BPS - 1); + const uint8x8_t L3 = vld1_u8(dst + (i + 3) * BPS - 1); + const uint8x8_t L4 = vld1_u8(dst + (i + 4) * BPS - 1); + const uint8x8_t L5 = vld1_u8(dst + (i + 5) * BPS - 1); + const uint8x8_t L6 = vld1_u8(dst + (i + 6) * BPS - 1); + const uint8x8_t L7 = vld1_u8(dst + (i + 7) * BPS - 1); + const uint16x8_t s0 = vaddl_u8(L0, L1); + const uint16x8_t s1 = vaddl_u8(L2, L3); + const uint16x8_t s2 = vaddl_u8(L4, L5); + const uint16x8_t s3 = vaddl_u8(L6, L7); const uint16x8_t s01 = vaddq_u16(s0, s1); const uint16x8_t s23 = vaddq_u16(s2, s3); const uint16x8_t sum = vaddq_u16(s01, s23); diff --git a/3rdparty/libwebp/src/dsp/dsp.h b/3rdparty/libwebp/src/dsp/dsp.h index a784de334a..298c721ae2 100644 --- a/3rdparty/libwebp/src/dsp/dsp.h +++ b/3rdparty/libwebp/src/dsp/dsp.h @@ -51,9 +51,7 @@ extern "C" { # define __has_builtin(x) 0 #endif -// for now, none of the optimizations below are available in emscripten -#if !defined(EMSCRIPTEN) - +#if !defined(HAVE_CONFIG_H) #if defined(_MSC_VER) && _MSC_VER > 1310 && \ (defined(_M_X64) || defined(_M_IX86)) #define WEBP_MSC_SSE2 // Visual C++ SSE2 targets @@ -63,6 +61,7 @@ extern "C" { (defined(_M_X64) || defined(_M_IX86)) #define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets #endif +#endif // WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp // files without intrinsics, allowing the corresponding Init() to be called. @@ -76,6 +75,9 @@ extern "C" { #define WEBP_USE_SSE41 #endif +#undef WEBP_MSC_SSE41 +#undef WEBP_MSC_SSE2 + // The intrinsics currently cause compiler errors with arm-nacl-gcc and the // inline assembly would need to be modified for use with Native Client. #if (defined(__ARM_NEON__) || \ @@ -110,8 +112,6 @@ extern "C" { #define WEBP_USE_MSA #endif -#endif /* EMSCRIPTEN */ - #ifndef WEBP_DSP_OMIT_C_CODE #define WEBP_DSP_OMIT_C_CODE 1 #endif @@ -193,6 +193,12 @@ extern "C" { #endif #endif +// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'. +// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning. +#if !defined(WEBP_OFFSET_PTR) +#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off))) +#endif + // Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility) #if !defined(WEBP_SWAP_16BIT_CSP) #define WEBP_SWAP_16BIT_CSP 0 @@ -632,6 +638,8 @@ extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, extern int (*WebPHasAlpha8b)(const uint8_t* src, int length); // This function returns true if src[4*i] contains a value different from 0xff. extern int (*WebPHasAlpha32b)(const uint8_t* src, int length); +// replaces transparent values in src[] by 'color'. +extern void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color); // To be called first before using the above. void WebPInitAlphaProcessing(void); diff --git a/3rdparty/libwebp/src/dsp/lossless.c b/3rdparty/libwebp/src/dsp/lossless.c index aad5f43ec9..46b220e2ed 100644 --- a/3rdparty/libwebp/src/dsp/lossless.c +++ b/3rdparty/libwebp/src/dsp/lossless.c @@ -107,62 +107,62 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { //------------------------------------------------------------------------------ // Predictors -static uint32_t Predictor0_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top) { (void)top; (void)left; return ARGB_BLACK; } -static uint32_t Predictor1_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top) { (void)top; return left; } -static uint32_t Predictor2_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top) { (void)left; return top[0]; } -static uint32_t Predictor3_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top) { (void)left; return top[1]; } -static uint32_t Predictor4_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top) { (void)left; return top[-1]; } -static uint32_t Predictor5_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average3(left, top[0], top[1]); return pred; } -static uint32_t Predictor6_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average2(left, top[-1]); return pred; } -static uint32_t Predictor7_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average2(left, top[0]); return pred; } -static uint32_t Predictor8_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average2(top[-1], top[0]); (void)left; return pred; } -static uint32_t Predictor9_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average2(top[0], top[1]); (void)left; return pred; } -static uint32_t Predictor10_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Average4(left, top[-1], top[0], top[1]); return pred; } -static uint32_t Predictor11_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = Select(top[0], left, top[-1]); return pred; } -static uint32_t Predictor12_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); return pred; } -static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top) { const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); return pred; } @@ -182,18 +182,18 @@ static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper, out[i] = left = VP8LAddPixels(in[i], left); } } -GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C) -GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C) -GENERATE_PREDICTOR_ADD(Predictor4_C, PredictorAdd4_C) -GENERATE_PREDICTOR_ADD(Predictor5_C, PredictorAdd5_C) -GENERATE_PREDICTOR_ADD(Predictor6_C, PredictorAdd6_C) -GENERATE_PREDICTOR_ADD(Predictor7_C, PredictorAdd7_C) -GENERATE_PREDICTOR_ADD(Predictor8_C, PredictorAdd8_C) -GENERATE_PREDICTOR_ADD(Predictor9_C, PredictorAdd9_C) -GENERATE_PREDICTOR_ADD(Predictor10_C, PredictorAdd10_C) -GENERATE_PREDICTOR_ADD(Predictor11_C, PredictorAdd11_C) -GENERATE_PREDICTOR_ADD(Predictor12_C, PredictorAdd12_C) -GENERATE_PREDICTOR_ADD(Predictor13_C, PredictorAdd13_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor2_C, PredictorAdd2_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor3_C, PredictorAdd3_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor4_C, PredictorAdd4_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor5_C, PredictorAdd5_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor6_C, PredictorAdd6_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor7_C, PredictorAdd7_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor8_C, PredictorAdd8_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor9_C, PredictorAdd9_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor10_C, PredictorAdd10_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor11_C, PredictorAdd11_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor12_C, PredictorAdd12_C) +GENERATE_PREDICTOR_ADD(VP8LPredictor13_C, PredictorAdd13_C) //------------------------------------------------------------------------------ @@ -562,7 +562,6 @@ VP8LPredictorFunc VP8LPredictors[16]; // exposed plain-C implementations VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16]; -VP8LPredictorFunc VP8LPredictors_C[16]; VP8LTransformColorInverseFunc VP8LTransformColorInverse; @@ -600,8 +599,7 @@ extern void VP8LDspInitMSA(void); } while (0); WEBP_DSP_INIT_FUNC(VP8LDspInit) { - COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors) - COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors_C) + COPY_PREDICTOR_ARRAY(VP8LPredictor, VP8LPredictors) COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd) COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C) diff --git a/3rdparty/libwebp/src/dsp/lossless.h b/3rdparty/libwebp/src/dsp/lossless.h index f709cc86b2..ebd316d1ed 100644 --- a/3rdparty/libwebp/src/dsp/lossless.h +++ b/3rdparty/libwebp/src/dsp/lossless.h @@ -30,7 +30,22 @@ extern "C" { typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top); extern VP8LPredictorFunc VP8LPredictors[16]; -extern VP8LPredictorFunc VP8LPredictors_C[16]; + +uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top); + // These Add/Sub function expects upper[-1] and out[-1] to be readable. typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in, const uint32_t* upper, int num_pixels, diff --git a/3rdparty/libwebp/src/dsp/lossless_common.h b/3rdparty/libwebp/src/dsp/lossless_common.h index 9c2ebe6809..96a106f9ee 100644 --- a/3rdparty/libwebp/src/dsp/lossless_common.h +++ b/3rdparty/libwebp/src/dsp/lossless_common.h @@ -184,19 +184,6 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \ } \ } -// It subtracts the prediction from the input pixel and stores the residual -// in the output pixel. -#define GENERATE_PREDICTOR_SUB(PREDICTOR, PREDICTOR_SUB) \ -static void PREDICTOR_SUB(const uint32_t* in, const uint32_t* upper, \ - int num_pixels, uint32_t* out) { \ - int x; \ - assert(upper != NULL); \ - for (x = 0; x < num_pixels; ++x) { \ - const uint32_t pred = (PREDICTOR)(in[x - 1], upper + x); \ - out[x] = VP8LSubPixels(in[x], pred); \ - } \ -} - #ifdef __cplusplus } // extern "C" #endif diff --git a/3rdparty/libwebp/src/dsp/lossless_enc.c b/3rdparty/libwebp/src/dsp/lossless_enc.c index 9c36055afc..a0c7ab9117 100644 --- a/3rdparty/libwebp/src/dsp/lossless_enc.c +++ b/3rdparty/libwebp/src/dsp/lossless_enc.c @@ -702,140 +702,6 @@ void VP8LHistogramAdd(const VP8LHistogram* const a, //------------------------------------------------------------------------------ // Image transforms. -static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { - return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1); -} - -static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { - return Average2(Average2(a0, a2), a1); -} - -static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, - uint32_t a2, uint32_t a3) { - return Average2(Average2(a0, a1), Average2(a2, a3)); -} - -static WEBP_INLINE uint32_t Clip255(uint32_t a) { - if (a < 256) { - return a; - } - // return 0, when a is a negative integer. - // return 255, when a is positive. - return ~a >> 24; -} - -static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { - return Clip255(a + b - c); -} - -static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, - uint32_t c2) { - const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24); - const int r = AddSubtractComponentFull((c0 >> 16) & 0xff, - (c1 >> 16) & 0xff, - (c2 >> 16) & 0xff); - const int g = AddSubtractComponentFull((c0 >> 8) & 0xff, - (c1 >> 8) & 0xff, - (c2 >> 8) & 0xff); - const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff); - return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; -} - -static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) { - return Clip255(a + (a - b) / 2); -} - -static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, - uint32_t c2) { - const uint32_t ave = Average2(c0, c1); - const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24); - const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff); - const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff); - const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff); - return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b; -} - -// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined. -#if defined(__arm__) && \ - (LOCAL_GCC_VERSION == 0x409 || LOCAL_GCC_VERSION == 0x408) -# define LOCAL_INLINE __attribute__ ((noinline)) -#else -# define LOCAL_INLINE WEBP_INLINE -#endif - -static LOCAL_INLINE int Sub3(int a, int b, int c) { - const int pb = b - c; - const int pa = a - c; - return abs(pb) - abs(pa); -} - -#undef LOCAL_INLINE - -static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { - const int pa_minus_pb = - Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + - Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) + - Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + - Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff); - return (pa_minus_pb <= 0) ? a : b; -} - -//------------------------------------------------------------------------------ -// Predictors - -static uint32_t Predictor2(uint32_t left, const uint32_t* const top) { - (void)left; - return top[0]; -} -static uint32_t Predictor3(uint32_t left, const uint32_t* const top) { - (void)left; - return top[1]; -} -static uint32_t Predictor4(uint32_t left, const uint32_t* const top) { - (void)left; - return top[-1]; -} -static uint32_t Predictor5(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average3(left, top[0], top[1]); - return pred; -} -static uint32_t Predictor6(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(left, top[-1]); - return pred; -} -static uint32_t Predictor7(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(left, top[0]); - return pred; -} -static uint32_t Predictor8(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(top[-1], top[0]); - (void)left; - return pred; -} -static uint32_t Predictor9(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(top[0], top[1]); - (void)left; - return pred; -} -static uint32_t Predictor10(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average4(left, top[-1], top[0], top[1]); - return pred; -} -static uint32_t Predictor11(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Select(top[0], left, top[-1]); - return pred; -} -static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); - return pred; -} -static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); - return pred; -} - -//------------------------------------------------------------------------------ - static void PredictorSub0_C(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i; @@ -850,18 +716,33 @@ static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper, (void)upper; } -GENERATE_PREDICTOR_SUB(Predictor2, PredictorSub2_C) -GENERATE_PREDICTOR_SUB(Predictor3, PredictorSub3_C) -GENERATE_PREDICTOR_SUB(Predictor4, PredictorSub4_C) -GENERATE_PREDICTOR_SUB(Predictor5, PredictorSub5_C) -GENERATE_PREDICTOR_SUB(Predictor6, PredictorSub6_C) -GENERATE_PREDICTOR_SUB(Predictor7, PredictorSub7_C) -GENERATE_PREDICTOR_SUB(Predictor8, PredictorSub8_C) -GENERATE_PREDICTOR_SUB(Predictor9, PredictorSub9_C) -GENERATE_PREDICTOR_SUB(Predictor10, PredictorSub10_C) -GENERATE_PREDICTOR_SUB(Predictor11, PredictorSub11_C) -GENERATE_PREDICTOR_SUB(Predictor12, PredictorSub12_C) -GENERATE_PREDICTOR_SUB(Predictor13, PredictorSub13_C) +// It subtracts the prediction from the input pixel and stores the residual +// in the output pixel. +#define GENERATE_PREDICTOR_SUB(PREDICTOR_I) \ +static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \ + const uint32_t* upper, \ + int num_pixels, uint32_t* out) { \ + int x; \ + assert(upper != NULL); \ + for (x = 0; x < num_pixels; ++x) { \ + const uint32_t pred = \ + VP8LPredictor##PREDICTOR_I##_C(in[x - 1], upper + x); \ + out[x] = VP8LSubPixels(in[x], pred); \ + } \ +} + +GENERATE_PREDICTOR_SUB(2) +GENERATE_PREDICTOR_SUB(3) +GENERATE_PREDICTOR_SUB(4) +GENERATE_PREDICTOR_SUB(5) +GENERATE_PREDICTOR_SUB(6) +GENERATE_PREDICTOR_SUB(7) +GENERATE_PREDICTOR_SUB(8) +GENERATE_PREDICTOR_SUB(9) +GENERATE_PREDICTOR_SUB(10) +GENERATE_PREDICTOR_SUB(11) +GENERATE_PREDICTOR_SUB(12) +GENERATE_PREDICTOR_SUB(13) //------------------------------------------------------------------------------ diff --git a/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c b/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c index e676f6fdc9..90c263735f 100644 --- a/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c +++ b/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c @@ -249,6 +249,7 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) { } \ } while (0) +#if !(defined(__i386__) || defined(_M_IX86)) static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) { int i; double retval = 0.; @@ -300,6 +301,8 @@ static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) { retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); return (float)retval; } +#endif // !(defined(__i386__) || defined(_M_IX86)) + #undef ANALYZE_X_OR_Y #undef ANALYZE_XY @@ -460,20 +463,22 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper, (void)upper; } -#define GENERATE_PREDICTOR_1(X, IN) \ -static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \ - int num_pixels, uint32_t* out) { \ - int i; \ - for (i = 0; i + 4 <= num_pixels; i += 4) { \ - const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \ - const __m128i pred = _mm_loadu_si128((const __m128i*)&(IN)); \ - const __m128i res = _mm_sub_epi8(src, pred); \ - _mm_storeu_si128((__m128i*)&out[i], res); \ - } \ - if (i != num_pixels) { \ - VP8LPredictorsSub_C[(X)](in + i, upper + i, num_pixels - i, out + i); \ - } \ -} +#define GENERATE_PREDICTOR_1(X, IN) \ + static void PredictorSub##X##_SSE2(const uint32_t* const in, \ + const uint32_t* const upper, \ + int num_pixels, uint32_t* const out) { \ + int i; \ + for (i = 0; i + 4 <= num_pixels; i += 4) { \ + const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \ + const __m128i pred = _mm_loadu_si128((const __m128i*)&(IN)); \ + const __m128i res = _mm_sub_epi8(src, pred); \ + _mm_storeu_si128((__m128i*)&out[i], res); \ + } \ + if (i != num_pixels) { \ + VP8LPredictorsSub_C[(X)](in + i, WEBP_OFFSET_PTR(upper, i), \ + num_pixels - i, out + i); \ + } \ + } GENERATE_PREDICTOR_1(1, in[i - 1]) // Predictor1: L GENERATE_PREDICTOR_1(2, upper[i]) // Predictor2: T @@ -657,7 +662,12 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) { VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE2; VP8LAddVector = AddVector_SSE2; VP8LAddVectorEq = AddVectorEq_SSE2; + // TODO(https://crbug.com/webp/499): this function produces different results + // from the C code due to use of double/float resulting in output differences + // when compared to -noasm. +#if !(defined(__i386__) || defined(_M_IX86)) VP8LCombinedShannonEntropy = CombinedShannonEntropy_SSE2; +#endif VP8LVectorMismatch = VectorMismatch_SSE2; VP8LBundleColorMap = BundleColorMap_SSE2; diff --git a/3rdparty/libwebp/src/enc/analysis_enc.c b/3rdparty/libwebp/src/enc/analysis_enc.c index 687757ae03..ebb784261c 100644 --- a/3rdparty/libwebp/src/enc/analysis_enc.c +++ b/3rdparty/libwebp/src/enc/analysis_enc.c @@ -126,16 +126,6 @@ static void InitHistogram(VP8Histogram* const histo) { histo->last_non_zero = 1; } -static void MergeHistograms(const VP8Histogram* const in, - VP8Histogram* const out) { - if (in->max_value > out->max_value) { - out->max_value = in->max_value; - } - if (in->last_non_zero > out->last_non_zero) { - out->last_non_zero = in->last_non_zero; - } -} - //------------------------------------------------------------------------------ // Simplified k-Means, to assign Nb segments based on alpha-histogram @@ -285,49 +275,6 @@ static int FastMBAnalyze(VP8EncIterator* const it) { return 0; } -static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, - int best_alpha) { - uint8_t modes[16]; - const int max_mode = MAX_INTRA4_MODE; - int i4_alpha; - VP8Histogram total_histo; - int cur_histo = 0; - InitHistogram(&total_histo); - - VP8IteratorStartI4(it); - do { - int mode; - int best_mode_alpha = DEFAULT_ALPHA; - VP8Histogram histos[2]; - const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; - - VP8MakeIntra4Preds(it); - for (mode = 0; mode < max_mode; ++mode) { - int alpha; - - InitHistogram(&histos[cur_histo]); - VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode], - 0, 1, &histos[cur_histo]); - alpha = GetAlpha(&histos[cur_histo]); - if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) { - best_mode_alpha = alpha; - modes[it->i4_] = mode; - cur_histo ^= 1; // keep track of best histo so far. - } - } - // accumulate best histogram - MergeHistograms(&histos[cur_histo ^ 1], &total_histo); - // Note: we reuse the original samples for predictors - } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC)); - - i4_alpha = GetAlpha(&total_histo); - if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) { - VP8SetIntra4Mode(it, modes); - best_alpha = i4_alpha; - } - return best_alpha; -} - static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { int best_alpha = DEFAULT_ALPHA; int smallest_alpha = 0; @@ -371,13 +318,6 @@ static void MBAnalyze(VP8EncIterator* const it, best_alpha = FastMBAnalyze(it); } else { best_alpha = MBAnalyzeBestIntra16Mode(it); - if (enc->method_ >= 5) { - // We go and make a fast decision for intra4/intra16. - // It's usually not a good and definitive pick, but helps seeding the - // stats about level bit-cost. - // TODO(skal): improve criterion. - best_alpha = MBAnalyzeBestIntra4Mode(it, best_alpha); - } } best_uv_alpha = MBAnalyzeBestUVMode(it); diff --git a/3rdparty/libwebp/src/enc/backward_references_enc.c b/3rdparty/libwebp/src/enc/backward_references_enc.c index d445b40fc5..519b36a091 100644 --- a/3rdparty/libwebp/src/enc/backward_references_enc.c +++ b/3rdparty/libwebp/src/enc/backward_references_enc.c @@ -11,13 +11,14 @@ // #include +#include #include -#include "src/enc/backward_references_enc.h" -#include "src/enc/histogram_enc.h" +#include "src/dsp/dsp.h" #include "src/dsp/lossless.h" #include "src/dsp/lossless_common.h" -#include "src/dsp/dsp.h" +#include "src/enc/backward_references_enc.h" +#include "src/enc/histogram_enc.h" #include "src/utils/color_cache_utils.h" #include "src/utils/utils.h" @@ -103,6 +104,20 @@ void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) { } } +// Swaps the content of two VP8LBackwardRefs. +static void BackwardRefsSwap(VP8LBackwardRefs* const refs1, + VP8LBackwardRefs* const refs2) { + const int point_to_refs1 = + (refs1->tail_ != NULL && refs1->tail_ == &refs1->refs_); + const int point_to_refs2 = + (refs2->tail_ != NULL && refs2->tail_ == &refs2->refs_); + const VP8LBackwardRefs tmp = *refs1; + *refs1 = *refs2; + *refs2 = tmp; + if (point_to_refs2) refs1->tail_ = &refs1->refs_; + if (point_to_refs1) refs2->tail_ = &refs2->refs_; +} + void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) { assert(refs != NULL); memset(refs, 0, sizeof(*refs)); @@ -154,6 +169,22 @@ static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) { return b; } +// Return 1 on success, 0 on error. +static int BackwardRefsClone(const VP8LBackwardRefs* const from, + VP8LBackwardRefs* const to) { + const PixOrCopyBlock* block_from = from->refs_; + VP8LClearBackwardRefs(to); + while (block_from != NULL) { + PixOrCopyBlock* const block_to = BackwardRefsNewBlock(to); + if (block_to == NULL) return 0; + memcpy(block_to->start_, block_from->start_, + block_from->size_ * sizeof(PixOrCopy)); + block_to->size_ = block_from->size_; + block_from = block_from->next_; + } + return 1; +} + extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, const PixOrCopy v); void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, @@ -753,12 +784,18 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality, } } } else { + int code, extra_bits, extra_bits_value; // We should compute the contribution of the (distance,length) // histograms but those are the same independently from the cache size. // As those constant contributions are in the end added to the other - // histogram contributions, we can safely ignore them. + // histogram contributions, we can ignore them, except for the length + // prefix that is part of the literal_ histogram. int len = PixOrCopyLength(v); uint32_t argb_prev = *argb ^ 0xffffffffu; + VP8LPrefixEncode(len, &code, &extra_bits, &extra_bits_value); + for (i = 0; i <= cache_bits_max; ++i) { + ++histos[i]->literal_[NUM_LITERAL_CODES + code]; + } // Update the color caches. do { if (*argb != argb_prev) { @@ -842,16 +879,21 @@ extern int VP8LBackwardReferencesTraceBackwards( int xsize, int ysize, const uint32_t* const argb, int cache_bits, const VP8LHashChain* const hash_chain, const VP8LBackwardRefs* const refs_src, VP8LBackwardRefs* const refs_dst); -static VP8LBackwardRefs* GetBackwardReferences( - int width, int height, const uint32_t* const argb, int quality, - int lz77_types_to_try, int* const cache_bits, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs* best, - VP8LBackwardRefs* worst) { - const int cache_bits_initial = *cache_bits; - double bit_cost_best = -1; +static int GetBackwardReferences(int width, int height, + const uint32_t* const argb, int quality, + int lz77_types_to_try, int cache_bits_max, + int do_no_cache, + const VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs, + int* const cache_bits_best) { VP8LHistogram* histo = NULL; - int lz77_type, lz77_type_best = 0; + int i, lz77_type; + // Index 0 is for a color cache, index 1 for no cache (if needed). + int lz77_types_best[2] = {0, 0}; + double bit_costs_best[2] = {DBL_MAX, DBL_MAX}; VP8LHashChain hash_chain_box; + VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1]; + int status = 0; memset(&hash_chain_box, 0, sizeof(hash_chain_box)); histo = VP8LAllocateHistogram(MAX_COLOR_CACHE_BITS); @@ -860,86 +902,129 @@ static VP8LBackwardRefs* GetBackwardReferences( for (lz77_type = 1; lz77_types_to_try; lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) { int res = 0; - double bit_cost; - int cache_bits_tmp = cache_bits_initial; + double bit_cost = 0.; if ((lz77_types_to_try & lz77_type) == 0) continue; switch (lz77_type) { case kLZ77RLE: - res = BackwardReferencesRle(width, height, argb, 0, worst); + res = BackwardReferencesRle(width, height, argb, 0, refs_tmp); break; case kLZ77Standard: // Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color // cache is not that different in practice. - res = BackwardReferencesLz77(width, height, argb, 0, hash_chain, worst); + res = BackwardReferencesLz77(width, height, argb, 0, hash_chain, + refs_tmp); break; case kLZ77Box: if (!VP8LHashChainInit(&hash_chain_box, width * height)) goto Error; res = BackwardReferencesLz77Box(width, height, argb, 0, hash_chain, - &hash_chain_box, worst); + &hash_chain_box, refs_tmp); break; default: assert(0); } if (!res) goto Error; - // Next, try with a color cache and update the references. - if (!CalculateBestCacheSize(argb, quality, worst, &cache_bits_tmp)) { - goto Error; - } - if (cache_bits_tmp > 0) { - if (!BackwardRefsWithLocalCache(argb, cache_bits_tmp, worst)) { - goto Error; + // Start with the no color cache case. + for (i = 1; i >= 0; --i) { + int cache_bits = (i == 1) ? 0 : cache_bits_max; + + if (i == 1 && !do_no_cache) continue; + + if (i == 0) { + // Try with a color cache. + if (!CalculateBestCacheSize(argb, quality, refs_tmp, &cache_bits)) { + goto Error; + } + if (cache_bits > 0) { + if (!BackwardRefsWithLocalCache(argb, cache_bits, refs_tmp)) { + goto Error; + } + } + } + + if (i == 0 && do_no_cache && cache_bits == 0) { + // No need to re-compute bit_cost as it was computed at i == 1. + } else { + VP8LHistogramCreate(histo, refs_tmp, cache_bits); + bit_cost = VP8LHistogramEstimateBits(histo); + } + + if (bit_cost < bit_costs_best[i]) { + if (i == 1) { + // Do not swap as the full cache analysis would have the wrong + // VP8LBackwardRefs to start with. + if (!BackwardRefsClone(refs_tmp, &refs[1])) goto Error; + } else { + BackwardRefsSwap(refs_tmp, &refs[0]); + } + bit_costs_best[i] = bit_cost; + lz77_types_best[i] = lz77_type; + if (i == 0) *cache_bits_best = cache_bits; } } - - // Keep the best backward references. - VP8LHistogramCreate(histo, worst, cache_bits_tmp); - bit_cost = VP8LHistogramEstimateBits(histo); - if (lz77_type_best == 0 || bit_cost < bit_cost_best) { - VP8LBackwardRefs* const tmp = worst; - worst = best; - best = tmp; - bit_cost_best = bit_cost; - *cache_bits = cache_bits_tmp; - lz77_type_best = lz77_type; - } } - assert(lz77_type_best > 0); + assert(lz77_types_best[0] > 0); + assert(!do_no_cache || lz77_types_best[1] > 0); // Improve on simple LZ77 but only for high quality (TraceBackwards is // costly). - if ((lz77_type_best == kLZ77Standard || lz77_type_best == kLZ77Box) && - quality >= 25) { - const VP8LHashChain* const hash_chain_tmp = - (lz77_type_best == kLZ77Standard) ? hash_chain : &hash_chain_box; - if (VP8LBackwardReferencesTraceBackwards(width, height, argb, *cache_bits, - hash_chain_tmp, best, worst)) { - double bit_cost_trace; - VP8LHistogramCreate(histo, worst, *cache_bits); - bit_cost_trace = VP8LHistogramEstimateBits(histo); - if (bit_cost_trace < bit_cost_best) best = worst; + for (i = 1; i >= 0; --i) { + if (i == 1 && !do_no_cache) continue; + if ((lz77_types_best[i] == kLZ77Standard || + lz77_types_best[i] == kLZ77Box) && + quality >= 25) { + const VP8LHashChain* const hash_chain_tmp = + (lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box; + const int cache_bits = (i == 1) ? 0 : *cache_bits_best; + if (VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits, + hash_chain_tmp, &refs[i], + refs_tmp)) { + double bit_cost_trace; + VP8LHistogramCreate(histo, refs_tmp, cache_bits); + bit_cost_trace = VP8LHistogramEstimateBits(histo); + if (bit_cost_trace < bit_costs_best[i]) { + BackwardRefsSwap(refs_tmp, &refs[i]); + } + } + } + + BackwardReferences2DLocality(width, &refs[i]); + + if (i == 1 && lz77_types_best[0] == lz77_types_best[1] && + *cache_bits_best == 0) { + // If the best cache size is 0 and we have the same best LZ77, just copy + // the data over and stop here. + if (!BackwardRefsClone(&refs[1], &refs[0])) goto Error; + break; } } - - BackwardReferences2DLocality(width, best); + status = 1; Error: VP8LHashChainClear(&hash_chain_box); VP8LFreeHistogram(histo); - return best; + return status; } -VP8LBackwardRefs* VP8LGetBackwardReferences( +WebPEncodingError VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int low_effort, int lz77_types_to_try, int* const cache_bits, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_tmp1, - VP8LBackwardRefs* const refs_tmp2) { + int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, + int* const cache_bits_best) { if (low_effort) { - return GetBackwardReferencesLowEffort(width, height, argb, cache_bits, - hash_chain, refs_tmp1); + VP8LBackwardRefs* refs_best; + *cache_bits_best = cache_bits_max; + refs_best = GetBackwardReferencesLowEffort( + width, height, argb, cache_bits_best, hash_chain, refs); + if (refs_best == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + // Set it in first position. + BackwardRefsSwap(refs_best, &refs[0]); } else { - return GetBackwardReferences(width, height, argb, quality, - lz77_types_to_try, cache_bits, hash_chain, - refs_tmp1, refs_tmp2); + if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try, + cache_bits_max, do_no_cache, hash_chain, refs, + cache_bits_best)) { + return VP8_ENC_ERROR_OUT_OF_MEMORY; + } } + return VP8_ENC_OK; } diff --git a/3rdparty/libwebp/src/enc/backward_references_enc.h b/3rdparty/libwebp/src/enc/backward_references_enc.h index 103ddfdcb7..4c0267b41e 100644 --- a/3rdparty/libwebp/src/enc/backward_references_enc.h +++ b/3rdparty/libwebp/src/enc/backward_references_enc.h @@ -16,6 +16,7 @@ #include #include #include "src/webp/types.h" +#include "src/webp/encode.h" #include "src/webp/format_constants.h" #ifdef __cplusplus @@ -218,14 +219,19 @@ enum VP8LLZ77Type { // Evaluates best possible backward references for specified quality. // The input cache_bits to 'VP8LGetBackwardReferences' sets the maximum cache // bits to use (passing 0 implies disabling the local color cache). -// The optimal cache bits is evaluated and set for the *cache_bits parameter. -// The return value is the pointer to the best of the two backward refs viz, -// refs[0] or refs[1]. -VP8LBackwardRefs* VP8LGetBackwardReferences( +// The optimal cache bits is evaluated and set for the *cache_bits_best +// parameter with the matching refs_best. +// If do_no_cache == 0, refs is an array of 2 values and the best +// VP8LBackwardRefs is put in the first element. +// If do_no_cache != 0, refs is an array of 3 values and the best +// VP8LBackwardRefs is put in the first element, the best value with no-cache in +// the second element. +// In both cases, the last element is used as temporary internally. +WebPEncodingError VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, - int low_effort, int lz77_types_to_try, int* const cache_bits, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_tmp1, - VP8LBackwardRefs* const refs_tmp2); + int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache, + const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, + int* const cache_bits_best); #ifdef __cplusplus } diff --git a/3rdparty/libwebp/src/enc/config_enc.c b/3rdparty/libwebp/src/enc/config_enc.c index 9d4828978e..3518b41403 100644 --- a/3rdparty/libwebp/src/enc/config_enc.c +++ b/3rdparty/libwebp/src/enc/config_enc.c @@ -39,6 +39,8 @@ int WebPConfigInitInternal(WebPConfig* config, config->partitions = 0; config->segments = 4; config->pass = 1; + config->qmin = 0; + config->qmax = 100; config->show_compressed = 0; config->preprocessing = 0; config->autofilter = 0; @@ -106,6 +108,9 @@ int WebPValidateConfig(const WebPConfig* config) { if (config->filter_type < 0 || config->filter_type > 1) return 0; if (config->autofilter < 0 || config->autofilter > 1) return 0; if (config->pass < 1 || config->pass > 10) return 0; + if (config->qmin < 0 || config->qmax > 100 || config->qmin > config->qmax) { + return 0; + } if (config->show_compressed < 0 || config->show_compressed > 1) return 0; if (config->preprocessing < 0 || config->preprocessing > 7) return 0; if (config->partitions < 0 || config->partitions > 3) return 0; diff --git a/3rdparty/libwebp/src/enc/frame_enc.c b/3rdparty/libwebp/src/enc/frame_enc.c index 1aec376e44..af538d83ba 100644 --- a/3rdparty/libwebp/src/enc/frame_enc.c +++ b/3rdparty/libwebp/src/enc/frame_enc.c @@ -31,10 +31,15 @@ // we allow 2k of extra head-room in PARTITION0 limit. #define PARTITION0_SIZE_LIMIT ((VP8_MAX_PARTITION0_SIZE - 2048ULL) << 11) +static float Clamp(float v, float min, float max) { + return (v < min) ? min : (v > max) ? max : v; +} + typedef struct { // struct for organizing convergence in either size or PSNR int is_first; float dq; float q, last_q; + float qmin, qmax; double value, last_value; // PSNR or size double target; int do_size_search; @@ -47,7 +52,9 @@ static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) { s->is_first = 1; s->dq = 10.f; - s->q = s->last_q = enc->config_->quality; + s->qmin = 1.f * enc->config_->qmin; + s->qmax = 1.f * enc->config_->qmax; + s->q = s->last_q = Clamp(enc->config_->quality, s->qmin, s->qmax); s->target = do_size_search ? (double)target_size : (target_PSNR > 0.) ? target_PSNR : 40.; // default, just in case @@ -56,10 +63,6 @@ static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) { return do_size_search; } -static float Clamp(float v, float min, float max) { - return (v < min) ? min : (v > max) ? max : v; -} - static float ComputeNextQ(PassStats* const s) { float dq; if (s->is_first) { @@ -75,7 +78,7 @@ static float ComputeNextQ(PassStats* const s) { s->dq = Clamp(dq, -30.f, 30.f); s->last_q = s->q; s->last_value = s->value; - s->q = Clamp(s->q + s->dq, 0.f, 100.f); + s->q = Clamp(s->q + s->dq, s->qmin, s->qmax); return s->q; } @@ -848,9 +851,10 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { } #if (DEBUG_SEARCH > 0) - printf("#%2d metric:%.1lf -> %.1lf last_q=%.2lf q=%.2lf dq=%.2lf\n", + printf("#%2d metric:%.1lf -> %.1lf last_q=%.2lf q=%.2lf dq=%.2lf " + " range:[%.1f, %.1f]\n", num_pass_left, stats.last_value, stats.value, - stats.last_q, stats.q, stats.dq); + stats.last_q, stats.q, stats.dq, stats.qmin, stats.qmax); #endif if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) { ++num_pass_left; diff --git a/3rdparty/libwebp/src/enc/histogram_enc.c b/3rdparty/libwebp/src/enc/histogram_enc.c index a4e6bf3a98..edc6e4faa4 100644 --- a/3rdparty/libwebp/src/enc/histogram_enc.c +++ b/3rdparty/libwebp/src/enc/histogram_enc.c @@ -208,6 +208,7 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, } else if (PixOrCopyIsCacheIdx(v)) { const int literal_ix = NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v); + assert(histo->palette_code_bits_ != 0); ++histo->literal_[literal_ix]; } else { int code, extra_bits; diff --git a/3rdparty/libwebp/src/enc/picture_csp_enc.c b/3rdparty/libwebp/src/enc/picture_csp_enc.c index 718e014ed2..35eede9635 100644 --- a/3rdparty/libwebp/src/enc/picture_csp_enc.c +++ b/3rdparty/libwebp/src/enc/picture_csp_enc.c @@ -61,16 +61,14 @@ static int CheckNonOpaque(const uint8_t* alpha, int width, int height, // Checking for the presence of non-opaque alpha. int WebPPictureHasTransparency(const WebPPicture* picture) { if (picture == NULL) return 0; - if (!picture->use_argb) { - return CheckNonOpaque(picture->a, picture->width, picture->height, - 1, picture->a_stride); - } else { + if (picture->use_argb) { const int alpha_offset = ALPHA_OFFSET; return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset, picture->width, picture->height, 4, picture->argb_stride * sizeof(*picture->argb)); } - return 0; + return CheckNonOpaque(picture->a, picture->width, picture->height, + 1, picture->a_stride); } //------------------------------------------------------------------------------ @@ -90,8 +88,9 @@ int WebPPictureHasTransparency(const WebPPicture* picture) { static int kLinearToGammaTab[kGammaTabSize + 1]; static uint16_t kGammaToLinearTab[256]; static volatile int kGammaTablesOk = 0; +static void InitGammaTables(void); -static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) { +WEBP_DSP_INIT_FUNC(InitGammaTables) { if (!kGammaTablesOk) { int v; const double scale = (double)(1 << kGammaTabFix) / kGammaScale; @@ -181,8 +180,9 @@ static uint32_t kLinearToGammaTabS[kGammaTabSize + 2]; #define GAMMA_TO_LINEAR_BITS 14 static uint32_t kGammaToLinearTabS[MAX_Y_T + 1]; // size scales with Y_FIX static volatile int kGammaTablesSOk = 0; +static void InitGammaTablesS(void); -static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesS(void) { +WEBP_DSP_INIT_FUNC(InitGammaTablesS) { assert(2 * GAMMA_TO_LINEAR_BITS < 32); // we use uint32_t intermediate values if (!kGammaTablesSOk) { int v; diff --git a/3rdparty/libwebp/src/enc/picture_tools_enc.c b/3rdparty/libwebp/src/enc/picture_tools_enc.c index d0e8a495da..38cb01534a 100644 --- a/3rdparty/libwebp/src/enc/picture_tools_enc.c +++ b/3rdparty/libwebp/src/enc/picture_tools_enc.c @@ -83,6 +83,19 @@ static int SmoothenBlock(const uint8_t* a_ptr, int a_stride, uint8_t* y_ptr, return (count == 0); } +void WebPReplaceTransparentPixels(WebPPicture* const pic, uint32_t color) { + if (pic != NULL && pic->use_argb) { + int y = pic->height; + uint32_t* argb = pic->argb; + color &= 0xffffffu; // force alpha=0 + WebPInitAlphaProcessing(); + while (y-- > 0) { + WebPAlphaReplace(argb, pic->width, color); + argb += pic->argb_stride; + } + } +} + void WebPCleanupTransparentArea(WebPPicture* pic) { int x, y, w, h; if (pic == NULL) return; @@ -165,24 +178,6 @@ void WebPCleanupTransparentArea(WebPPicture* pic) { #undef SIZE #undef SIZE2 -void WebPCleanupTransparentAreaLossless(WebPPicture* const pic) { - int x, y, w, h; - uint32_t* argb; - assert(pic != NULL && pic->use_argb); - w = pic->width; - h = pic->height; - argb = pic->argb; - - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - if ((argb[x] & 0xff000000) == 0) { - argb[x] = 0x00000000; - } - } - argb += pic->argb_stride; - } -} - //------------------------------------------------------------------------------ // Blend color and remove transparency info diff --git a/3rdparty/libwebp/src/enc/vp8i_enc.h b/3rdparty/libwebp/src/enc/vp8i_enc.h index fedcaeea27..0e35562a8c 100644 --- a/3rdparty/libwebp/src/enc/vp8i_enc.h +++ b/3rdparty/libwebp/src/enc/vp8i_enc.h @@ -31,7 +31,7 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 1 -#define ENC_MIN_VERSION 1 +#define ENC_MIN_VERSION 2 #define ENC_REV_VERSION 0 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level @@ -505,9 +505,9 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height); // Returns false in case of error (invalid param, out-of-memory). int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height); -// Clean-up the RGB samples under fully transparent area, to help lossless -// compressibility (no guarantee, though). Assumes that pic->use_argb is true. -void WebPCleanupTransparentAreaLossless(WebPPicture* const pic); +// Replace samples that are fully transparent by 'color' to help compressibility +// (no guarantee, though). Assumes pic->use_argb is true. +void WebPReplaceTransparentPixels(WebPPicture* const pic, uint32_t color); //------------------------------------------------------------------------------ diff --git a/3rdparty/libwebp/src/enc/vp8l_enc.c b/3rdparty/libwebp/src/enc/vp8l_enc.c index 2efd403f77..0b44ebe46e 100644 --- a/3rdparty/libwebp/src/enc/vp8l_enc.c +++ b/3rdparty/libwebp/src/enc/vp8l_enc.c @@ -144,7 +144,8 @@ typedef enum { kSubGreen = 2, kSpatialSubGreen = 3, kPalette = 4, - kNumEntropyIx = 5 + kPaletteAndSpatial = 5, + kNumEntropyIx = 6 } EntropyIx; typedef enum { @@ -354,11 +355,15 @@ static int GetTransformBits(int method, int histo_bits) { } // Set of parameters to be used in each iteration of the cruncher. -#define CRUNCH_CONFIGS_LZ77_MAX 2 +#define CRUNCH_SUBCONFIGS_MAX 2 +typedef struct { + int lz77_; + int do_no_cache_; +} CrunchSubConfig; typedef struct { int entropy_idx_; - int lz77s_types_to_try_[CRUNCH_CONFIGS_LZ77_MAX]; - int lz77s_types_to_try_size_; + CrunchSubConfig sub_configs_[CRUNCH_SUBCONFIGS_MAX]; + int sub_configs_size_; } CrunchConfig; #define CRUNCH_CONFIGS_MAX kNumEntropyIx @@ -376,6 +381,9 @@ static int EncoderAnalyze(VP8LEncoder* const enc, int i; int use_palette; int n_lz77s; + // If set to 0, analyze the cache with the computed cache value. If 1, also + // analyze with no-cache. + int do_no_cache = 0; assert(pic != NULL && pic->argb != NULL); use_palette = @@ -402,10 +410,13 @@ static int EncoderAnalyze(VP8LEncoder* const enc, return 0; } if (method == 6 && config->quality == 100) { + do_no_cache = 1; // Go brute force on all transforms. *crunch_configs_size = 0; for (i = 0; i < kNumEntropyIx; ++i) { - if (i != kPalette || use_palette) { + // We can only apply kPalette or kPaletteAndSpatial if we can indeed use + // a palette. + if ((i != kPalette && i != kPaletteAndSpatial) || use_palette) { assert(*crunch_configs_size < CRUNCH_CONFIGS_MAX); crunch_configs[(*crunch_configs_size)++].entropy_idx_ = i; } @@ -414,17 +425,28 @@ static int EncoderAnalyze(VP8LEncoder* const enc, // Only choose the guessed best transform. *crunch_configs_size = 1; crunch_configs[0].entropy_idx_ = min_entropy_ix; + if (config->quality >= 75 && method == 5) { + // Test with and without color cache. + do_no_cache = 1; + // If we have a palette, also check in combination with spatial. + if (min_entropy_ix == kPalette) { + *crunch_configs_size = 2; + crunch_configs[1].entropy_idx_ = kPaletteAndSpatial; + } + } } } // Fill in the different LZ77s. - assert(n_lz77s <= CRUNCH_CONFIGS_LZ77_MAX); + assert(n_lz77s <= CRUNCH_SUBCONFIGS_MAX); for (i = 0; i < *crunch_configs_size; ++i) { int j; for (j = 0; j < n_lz77s; ++j) { - crunch_configs[i].lz77s_types_to_try_[j] = + assert(j < CRUNCH_SUBCONFIGS_MAX); + crunch_configs[i].sub_configs_[j].lz77_ = (j == 0) ? kLZ77Standard | kLZ77RLE : kLZ77Box; + crunch_configs[i].sub_configs_[j].do_no_cache_ = do_no_cache; } - crunch_configs[i].lz77s_types_to_try_size_ = n_lz77s; + crunch_configs[i].sub_configs_size_ = n_lz77s; } return 1; } @@ -440,7 +462,7 @@ static int EncoderInit(VP8LEncoder* const enc) { int i; if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0; - for (i = 0; i < 3; ++i) VP8LBackwardRefsInit(&enc->refs_[i], refs_block_size); + for (i = 0; i < 4; ++i) VP8LBackwardRefsInit(&enc->refs_[i], refs_block_size); return 1; } @@ -769,13 +791,10 @@ static WebPEncodingError StoreImageToBitMask( } // Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31 -static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, - const uint32_t* const argb, - VP8LHashChain* const hash_chain, - VP8LBackwardRefs* const refs_tmp1, - VP8LBackwardRefs* const refs_tmp2, - int width, int height, - int quality, int low_effort) { +static WebPEncodingError EncodeImageNoHuffman( + VP8LBitWriter* const bw, const uint32_t* const argb, + VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_array, + int width, int height, int quality, int low_effort) { int i; int max_tokens = 0; WebPEncodingError err = VP8_ENC_OK; @@ -798,13 +817,11 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } - refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, - kLZ77Standard | kLZ77RLE, &cache_bits, - hash_chain, refs_tmp1, refs_tmp2); - if (refs == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } + err = VP8LGetBackwardReferences( + width, height, argb, quality, /*low_effort=*/0, kLZ77Standard | kLZ77RLE, + cache_bits, /*do_no_cache=*/0, hash_chain, refs_array, &cache_bits); + if (err != VP8_ENC_OK) goto Error; + refs = &refs_array[0]; histogram_image = VP8LAllocateHistogramSet(1, cache_bits); if (histogram_image == NULL) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; @@ -860,11 +877,11 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw, static WebPEncodingError EncodeImageInternal( VP8LBitWriter* const bw, const uint32_t* const argb, - VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[3], int width, + VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[4], int width, int height, int quality, int low_effort, int use_cache, const CrunchConfig* const config, int* cache_bits, int histogram_bits, size_t init_byte_position, int* const hdr_size, int* const data_size) { - WebPEncodingError err = VP8_ENC_OK; + WebPEncodingError err = VP8_ENC_ERROR_OUT_OF_MEMORY; const uint32_t histogram_image_xysize = VP8LSubSampleSize(width, histogram_bits) * VP8LSubSampleSize(height, histogram_bits); @@ -876,103 +893,103 @@ static WebPEncodingError EncodeImageInternal( 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); HuffmanTreeToken* tokens = NULL; HuffmanTreeCode* huffman_codes = NULL; - VP8LBackwardRefs* refs_best; - VP8LBackwardRefs* refs_tmp; uint16_t* const histogram_symbols = (uint16_t*)WebPSafeMalloc(histogram_image_xysize, sizeof(*histogram_symbols)); - int lz77s_idx; + int sub_configs_idx; + int cache_bits_init, write_histogram_image; VP8LBitWriter bw_init = *bw, bw_best; int hdr_size_tmp; + VP8LHashChain hash_chain_histogram; // histogram image hash chain + size_t bw_size_best = ~(size_t)0; assert(histogram_bits >= MIN_HUFFMAN_BITS); assert(histogram_bits <= MAX_HUFFMAN_BITS); assert(hdr_size != NULL); assert(data_size != NULL); - if (histogram_symbols == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + // Make sure we can allocate the different objects. + memset(&hash_chain_histogram, 0, sizeof(hash_chain_histogram)); + if (huff_tree == NULL || histogram_symbols == NULL || + !VP8LHashChainInit(&hash_chain_histogram, histogram_image_xysize) || + !VP8LHashChainFill(hash_chain, quality, argb, width, height, + low_effort)) { goto Error; } - if (use_cache) { // If the value is different from zero, it has been set during the // palette analysis. - if (*cache_bits == 0) *cache_bits = MAX_COLOR_CACHE_BITS; + cache_bits_init = (*cache_bits == 0) ? MAX_COLOR_CACHE_BITS : *cache_bits; } else { - *cache_bits = 0; + cache_bits_init = 0; } - // 'best_refs' is the reference to the best backward refs and points to one - // of refs_array[0] or refs_array[1]. - // Calculate backward references from ARGB image. - if (huff_tree == NULL || - !VP8LHashChainFill(hash_chain, quality, argb, width, height, - low_effort) || - !VP8LBitWriterInit(&bw_best, 0) || - (config->lz77s_types_to_try_size_ > 1 && + // If several iterations will happen, clone into bw_best. + if (!VP8LBitWriterInit(&bw_best, 0) || + ((config->sub_configs_size_ > 1 || + config->sub_configs_[0].do_no_cache_) && !VP8LBitWriterClone(bw, &bw_best))) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } - for (lz77s_idx = 0; lz77s_idx < config->lz77s_types_to_try_size_; - ++lz77s_idx) { - refs_best = VP8LGetBackwardReferences( - width, height, argb, quality, low_effort, - config->lz77s_types_to_try_[lz77s_idx], cache_bits, hash_chain, - &refs_array[0], &refs_array[1]); - if (refs_best == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - // Keep the best references aside and use the other element from the first - // two as a temporary for later usage. - refs_tmp = &refs_array[refs_best == &refs_array[0] ? 1 : 0]; + for (sub_configs_idx = 0; sub_configs_idx < config->sub_configs_size_; + ++sub_configs_idx) { + const CrunchSubConfig* const sub_config = + &config->sub_configs_[sub_configs_idx]; + int cache_bits_best, i_cache; + err = VP8LGetBackwardReferences(width, height, argb, quality, low_effort, + sub_config->lz77_, cache_bits_init, + sub_config->do_no_cache_, hash_chain, + &refs_array[0], &cache_bits_best); + if (err != VP8_ENC_OK) goto Error; - histogram_image = - VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits); - tmp_histo = VP8LAllocateHistogram(*cache_bits); - if (histogram_image == NULL || tmp_histo == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } + for (i_cache = 0; i_cache < (sub_config->do_no_cache_ ? 2 : 1); ++i_cache) { + const int cache_bits_tmp = (i_cache == 0) ? cache_bits_best : 0; + // Speed-up: no need to study the no-cache case if it was already studied + // in i_cache == 0. + if (i_cache == 1 && cache_bits_best == 0) break; - // Build histogram image and symbols from backward references. - if (!VP8LGetHistoImageSymbols(width, height, refs_best, quality, low_effort, - histogram_bits, *cache_bits, histogram_image, - tmp_histo, histogram_symbols)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - // Create Huffman bit lengths and codes for each histogram image. - histogram_image_size = histogram_image->size; - bit_array_size = 5 * histogram_image_size; - huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size, - sizeof(*huffman_codes)); - // Note: some histogram_image entries may point to tmp_histos[], so the - // latter need to outlive the following call to GetHuffBitLengthsAndCodes(). - if (huffman_codes == NULL || - !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } - // Free combined histograms. - VP8LFreeHistogramSet(histogram_image); - histogram_image = NULL; + // Reset the bit writer for this iteration. + VP8LBitWriterReset(&bw_init, bw); - // Free scratch histograms. - VP8LFreeHistogram(tmp_histo); - tmp_histo = NULL; + // Build histogram image and symbols from backward references. + histogram_image = + VP8LAllocateHistogramSet(histogram_image_xysize, cache_bits_tmp); + tmp_histo = VP8LAllocateHistogram(cache_bits_tmp); + if (histogram_image == NULL || tmp_histo == NULL || + !VP8LGetHistoImageSymbols(width, height, &refs_array[i_cache], + quality, low_effort, histogram_bits, + cache_bits_tmp, histogram_image, tmp_histo, + histogram_symbols)) { + goto Error; + } + // Create Huffman bit lengths and codes for each histogram image. + histogram_image_size = histogram_image->size; + bit_array_size = 5 * histogram_image_size; + huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size, + sizeof(*huffman_codes)); + // Note: some histogram_image entries may point to tmp_histos[], so the + // latter need to outlive the following call to + // GetHuffBitLengthsAndCodes(). + if (huffman_codes == NULL || + !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { + goto Error; + } + // Free combined histograms. + VP8LFreeHistogramSet(histogram_image); + histogram_image = NULL; - // Color Cache parameters. - if (*cache_bits > 0) { - VP8LPutBits(bw, 1, 1); - VP8LPutBits(bw, *cache_bits, 4); - } else { - VP8LPutBits(bw, 0, 1); - } + // Free scratch histograms. + VP8LFreeHistogram(tmp_histo); + tmp_histo = NULL; - // Huffman image + meta huffman. - { - const int write_histogram_image = (histogram_image_size > 1); + // Color Cache parameters. + if (cache_bits_tmp > 0) { + VP8LPutBits(bw, 1, 1); + VP8LPutBits(bw, cache_bits_tmp, 4); + } else { + VP8LPutBits(bw, 0, 1); + } + + // Huffman image + meta huffman. + write_histogram_image = (histogram_image_size > 1); VP8LPutBits(bw, write_histogram_image, 1); if (write_histogram_image) { uint32_t* const histogram_argb = @@ -980,10 +997,7 @@ static WebPEncodingError EncodeImageInternal( sizeof(*histogram_argb)); int max_index = 0; uint32_t i; - if (histogram_argb == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; - } + if (histogram_argb == NULL) goto Error; for (i = 0; i < histogram_image_xysize; ++i) { const int symbol_index = histogram_symbols[i] & 0xffff; histogram_argb[i] = (symbol_index << 8); @@ -995,65 +1009,64 @@ static WebPEncodingError EncodeImageInternal( VP8LPutBits(bw, histogram_bits - 2, 3); err = EncodeImageNoHuffman( - bw, histogram_argb, hash_chain, refs_tmp, &refs_array[2], + bw, histogram_argb, &hash_chain_histogram, &refs_array[2], VP8LSubSampleSize(width, histogram_bits), VP8LSubSampleSize(height, histogram_bits), quality, low_effort); WebPSafeFree(histogram_argb); if (err != VP8_ENC_OK) goto Error; } - } - // Store Huffman codes. - { - int i; - int max_tokens = 0; - // Find maximum number of symbols for the huffman tree-set. - for (i = 0; i < 5 * histogram_image_size; ++i) { - HuffmanTreeCode* const codes = &huffman_codes[i]; - if (max_tokens < codes->num_symbols) { - max_tokens = codes->num_symbols; + // Store Huffman codes. + { + int i; + int max_tokens = 0; + // Find maximum number of symbols for the huffman tree-set. + for (i = 0; i < 5 * histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + if (max_tokens < codes->num_symbols) { + max_tokens = codes->num_symbols; + } + } + tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); + if (tokens == NULL) goto Error; + for (i = 0; i < 5 * histogram_image_size; ++i) { + HuffmanTreeCode* const codes = &huffman_codes[i]; + StoreHuffmanCode(bw, huff_tree, tokens, codes); + ClearHuffmanTreeIfOnlyOneSymbol(codes); } } - tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); - if (tokens == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; + // Store actual literals. + hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); + err = StoreImageToBitMask(bw, width, histogram_bits, &refs_array[i_cache], + histogram_symbols, huffman_codes); + if (err != VP8_ENC_OK) goto Error; + // Keep track of the smallest image so far. + if (VP8LBitWriterNumBytes(bw) < bw_size_best) { + bw_size_best = VP8LBitWriterNumBytes(bw); + *cache_bits = cache_bits_tmp; + *hdr_size = hdr_size_tmp; + *data_size = + (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size); + VP8LBitWriterSwap(bw, &bw_best); } - for (i = 0; i < 5 * histogram_image_size; ++i) { - HuffmanTreeCode* const codes = &huffman_codes[i]; - StoreHuffmanCode(bw, huff_tree, tokens, codes); - ClearHuffmanTreeIfOnlyOneSymbol(codes); + WebPSafeFree(tokens); + tokens = NULL; + if (huffman_codes != NULL) { + WebPSafeFree(huffman_codes->codes); + WebPSafeFree(huffman_codes); + huffman_codes = NULL; } } - // Store actual literals. - hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); - err = StoreImageToBitMask(bw, width, histogram_bits, refs_best, - histogram_symbols, huffman_codes); - // Keep track of the smallest image so far. - if (lz77s_idx == 0 || - VP8LBitWriterNumBytes(bw) < VP8LBitWriterNumBytes(&bw_best)) { - *hdr_size = hdr_size_tmp; - *data_size = - (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size); - VP8LBitWriterSwap(bw, &bw_best); - } - // Reset the bit writer for the following iteration if any. - if (config->lz77s_types_to_try_size_ > 1) VP8LBitWriterReset(&bw_init, bw); - WebPSafeFree(tokens); - tokens = NULL; - if (huffman_codes != NULL) { - WebPSafeFree(huffman_codes->codes); - WebPSafeFree(huffman_codes); - huffman_codes = NULL; - } } VP8LBitWriterSwap(bw, &bw_best); + err = VP8_ENC_OK; Error: WebPSafeFree(tokens); WebPSafeFree(huff_tree); VP8LFreeHistogramSet(histogram_image); VP8LFreeHistogram(tmp_histo); + VP8LHashChainClear(&hash_chain_histogram); if (huffman_codes != NULL) { WebPSafeFree(huffman_codes->codes); WebPSafeFree(huffman_codes); @@ -1095,8 +1108,7 @@ static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, VP8LPutBits(bw, pred_bits - 2, 3); return EncodeImageNoHuffman( bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, - (VP8LBackwardRefs*)&enc->refs_[0], // cast const away - (VP8LBackwardRefs*)&enc->refs_[1], transform_width, transform_height, + (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height, quality, low_effort); } @@ -1116,8 +1128,7 @@ static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, VP8LPutBits(bw, ccolor_transform_bits - 2, 3); return EncodeImageNoHuffman( bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, - (VP8LBackwardRefs*)&enc->refs_[0], // cast const away - (VP8LBackwardRefs*)&enc->refs_[1], transform_width, transform_height, + (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height, quality, low_effort); } @@ -1464,8 +1475,8 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort, } tmp_palette[0] = palette[0]; return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, - &enc->refs_[0], &enc->refs_[1], palette_size, 1, - 20 /* quality */, low_effort); + &enc->refs_[0], palette_size, 1, /*quality=*/20, + low_effort); } // ----------------------------------------------------------------------------- @@ -1491,7 +1502,7 @@ static void VP8LEncoderDelete(VP8LEncoder* enc) { if (enc != NULL) { int i; VP8LHashChainClear(&enc->hash_chain_); - for (i = 0; i < 3; ++i) VP8LBackwardRefsClear(&enc->refs_[i]); + for (i = 0; i < 4; ++i) VP8LBackwardRefsClear(&enc->refs_[i]); ClearTransformBuffer(enc); WebPSafeFree(enc); } @@ -1541,7 +1552,7 @@ static int EncodeStreamHook(void* input, void* data2) { int data_size = 0; int use_delta_palette = 0; int idx; - size_t best_size = 0; + size_t best_size = ~(size_t)0; VP8LBitWriter bw_init = *bw, bw_best; (void)data2; @@ -1553,11 +1564,13 @@ static int EncodeStreamHook(void* input, void* data2) { for (idx = 0; idx < num_crunch_configs; ++idx) { const int entropy_idx = crunch_configs[idx].entropy_idx_; - enc->use_palette_ = (entropy_idx == kPalette); + enc->use_palette_ = + (entropy_idx == kPalette) || (entropy_idx == kPaletteAndSpatial); enc->use_subtract_green_ = (entropy_idx == kSubGreen) || (entropy_idx == kSpatialSubGreen); - enc->use_predict_ = - (entropy_idx == kSpatial) || (entropy_idx == kSpatialSubGreen); + enc->use_predict_ = (entropy_idx == kSpatial) || + (entropy_idx == kSpatialSubGreen) || + (entropy_idx == kPaletteAndSpatial); if (low_effort) { enc->use_cross_color_ = 0; } else { @@ -1640,7 +1653,7 @@ static int EncodeStreamHook(void* input, void* data2) { if (err != VP8_ENC_OK) goto Error; // If we are better than what we already have. - if (idx == 0 || VP8LBitWriterNumBytes(bw) < best_size) { + if (VP8LBitWriterNumBytes(bw) < best_size) { best_size = VP8LBitWriterNumBytes(bw); // Store the BitWriter. VP8LBitWriterSwap(bw, &bw_best); @@ -1816,7 +1829,7 @@ Error: } #undef CRUNCH_CONFIGS_MAX -#undef CRUNCH_CONFIGS_LZ77_MAX +#undef CRUNCH_SUBCONFIGS_MAX int VP8LEncodeImage(const WebPConfig* const config, const WebPPicture* const picture) { diff --git a/3rdparty/libwebp/src/enc/vp8li_enc.h b/3rdparty/libwebp/src/enc/vp8li_enc.h index d2d0fc509c..94210ce9f3 100644 --- a/3rdparty/libwebp/src/enc/vp8li_enc.h +++ b/3rdparty/libwebp/src/enc/vp8li_enc.h @@ -71,7 +71,7 @@ typedef struct { uint32_t palette_[MAX_PALETTE_SIZE]; // Some 'scratch' (potentially large) objects. - struct VP8LBackwardRefs refs_[3]; // Backward Refs array for temporaries. + struct VP8LBackwardRefs refs_[4]; // Backward Refs array for temporaries. VP8LHashChain hash_chain_; // HashChain data for constructing // backward references. } VP8LEncoder; diff --git a/3rdparty/libwebp/src/enc/webp_enc.c b/3rdparty/libwebp/src/enc/webp_enc.c index 9f4b10c26c..ce2db2e94b 100644 --- a/3rdparty/libwebp/src/enc/webp_enc.c +++ b/3rdparty/libwebp/src/enc/webp_enc.c @@ -400,7 +400,7 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { } if (!config->exact) { - WebPCleanupTransparentAreaLossless(pic); + WebPReplaceTransparentPixels(pic, 0x000000); } ok = VP8LEncodeImage(config, pic); // Sets pic->error in case of problem. diff --git a/3rdparty/libwebp/src/mux/muxi.h b/3rdparty/libwebp/src/mux/muxi.h index ad3e1bdb97..2289822e8f 100644 --- a/3rdparty/libwebp/src/mux/muxi.h +++ b/3rdparty/libwebp/src/mux/muxi.h @@ -28,7 +28,7 @@ extern "C" { // Defines and constants. #define MUX_MAJ_VERSION 1 -#define MUX_MIN_VERSION 1 +#define MUX_MIN_VERSION 2 #define MUX_REV_VERSION 0 // Chunk object. diff --git a/3rdparty/libwebp/src/mux/muxread.c b/3rdparty/libwebp/src/mux/muxread.c index ae3b876bc5..0101fde15d 100644 --- a/3rdparty/libwebp/src/mux/muxread.c +++ b/3rdparty/libwebp/src/mux/muxread.c @@ -155,7 +155,6 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data, break; default: goto Fail; - break; } subchunk_size = ChunkDiskSize(&subchunk); bytes += subchunk_size; @@ -264,7 +263,6 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data, if (!MuxImageParse(&chunk, copy_data, wpi)) goto Err; ChunkRelease(&chunk); goto PushImage; - break; default: // A non-image chunk. if (wpi->is_partial_) goto Err; // Encountered a non-image chunk before // getting all chunks of an image. diff --git a/3rdparty/libwebp/src/utils/utils.c b/3rdparty/libwebp/src/utils/utils.c index 764f752b82..6080e19e21 100644 --- a/3rdparty/libwebp/src/utils/utils.c +++ b/3rdparty/libwebp/src/utils/utils.c @@ -231,7 +231,7 @@ void WebPFree(void* ptr) { void WebPCopyPlane(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height) { assert(src != NULL && dst != NULL); - assert(src_stride >= width && dst_stride >= width); + assert(abs(src_stride) >= width && abs(dst_stride) >= width); while (height-- > 0) { memcpy(dst, src, width); src += src_stride; diff --git a/3rdparty/libwebp/src/webp/decode.h b/3rdparty/libwebp/src/webp/decode.h index 80dd0ef0cc..44fcd64a84 100644 --- a/3rdparty/libwebp/src/webp/decode.h +++ b/3rdparty/libwebp/src/webp/decode.h @@ -453,7 +453,7 @@ struct WebPDecoderOptions { int scaled_width, scaled_height; // final resolution int use_threads; // if true, use multi-threaded decoding int dithering_strength; // dithering strength (0=Off, 100=full) - int flip; // flip output vertically + int flip; // if true, flip output vertically int alpha_dithering_strength; // alpha dithering strength in [0..100] uint32_t pad[5]; // padding for later use diff --git a/3rdparty/libwebp/src/webp/encode.h b/3rdparty/libwebp/src/webp/encode.h index 655166e7d4..b4c599df87 100644 --- a/3rdparty/libwebp/src/webp/encode.h +++ b/3rdparty/libwebp/src/webp/encode.h @@ -148,7 +148,8 @@ struct WebPConfig { int use_delta_palette; // reserved for future lossless feature int use_sharp_yuv; // if needed, use sharp (and slow) RGB->YUV conversion - uint32_t pad[2]; // padding for later use + int qmin; // minimum permissible quality factor + int qmax; // maximum permissible quality factor }; // Enumerate some predefined settings for WebPConfig, depending on the type @@ -291,6 +292,11 @@ typedef enum WebPEncodingError { #define WEBP_MAX_DIMENSION 16383 // Main exchange structure (input samples, output bytes, statistics) +// +// Once WebPPictureInit() has been called, it's ok to make all the INPUT fields +// (use_argb, y/u/v, argb, ...) point to user-owned data, even if +// WebPPictureAlloc() has been called. Depending on the value use_argb, +// it's guaranteed that either *argb or *y/*u/*v content will be kept untouched. struct WebPPicture { // INPUT ////////////// diff --git a/3rdparty/openjpeg/CHANGELOG.md b/3rdparty/openjpeg/CHANGELOG.md index e45b324658..4187b06730 100644 --- a/3rdparty/openjpeg/CHANGELOG.md +++ b/3rdparty/openjpeg/CHANGELOG.md @@ -1,5 +1,92 @@ # Changelog +## [v2.4.0](https://github.com/uclouvain/openjpeg/releases/v2.4.0) (2020-12-28) + +[Full Changelog](https://github.com/uclouvain/openjpeg/compare/v2.3.1...v2.4.0) + +**Closed issues:** + +- OPENJPEG\_INSTALL\_DOC\_DIR does not control a destination directory where HTML docs would be installed. [\#1309](https://github.com/uclouvain/openjpeg/issues/1309) +- Heap-buffer-overflow in lib/openjp2/pi.c:312 [\#1302](https://github.com/uclouvain/openjpeg/issues/1302) +- Heap-buffer-overflow in lib/openjp2/t2.c:973 [\#1299](https://github.com/uclouvain/openjpeg/issues/1299) +- Heap-buffer-overflow in lib/openjp2/pi.c:623 [\#1293](https://github.com/uclouvain/openjpeg/issues/1293) +- Global-buffer-overflow in lib/openjp2/dwt.c:1980 [\#1286](https://github.com/uclouvain/openjpeg/issues/1286) +- Heap-buffer-overflow in lib/openjp2/tcd.c:2417 [\#1284](https://github.com/uclouvain/openjpeg/issues/1284) +- Heap-buffer-overflow in lib/openjp2/mqc.c:499 [\#1283](https://github.com/uclouvain/openjpeg/issues/1283) +- Openjpeg could not encode 32bit RGB float image [\#1281](https://github.com/uclouvain/openjpeg/issues/1281) +- Openjpeg could not encode 32bit RGB float image [\#1280](https://github.com/uclouvain/openjpeg/issues/1280) +- ISO/IEC 15444-1:2019 \(E\) compared with 'cio.h' [\#1277](https://github.com/uclouvain/openjpeg/issues/1277) +- Test-suite failure due to hash mismatch [\#1264](https://github.com/uclouvain/openjpeg/issues/1264) +- Heap use-after-free [\#1261](https://github.com/uclouvain/openjpeg/issues/1261) +- Memory leak when failing to allocate object... [\#1259](https://github.com/uclouvain/openjpeg/issues/1259) +- Memory leak of Tier 1 handle when OpenJPEG fails to set it as TLS... [\#1257](https://github.com/uclouvain/openjpeg/issues/1257) +- Any plan to build release for CVE-2020-8112/CVE-2020-6851 [\#1247](https://github.com/uclouvain/openjpeg/issues/1247) +- failing to convert 16-bit file: opj\_t2\_encode\_packet\(\): only 5251 bytes remaining in output buffer. 5621 needed. [\#1243](https://github.com/uclouvain/openjpeg/issues/1243) +- CMake+VS2017 Compile OK, thirdparty Compile OK, but thirdparty not install [\#1239](https://github.com/uclouvain/openjpeg/issues/1239) +- New release to solve CVE-2019-6988 ? [\#1238](https://github.com/uclouvain/openjpeg/issues/1238) +- Many tests fail to pass after the update of libtiff to version 4.1.0 [\#1233](https://github.com/uclouvain/openjpeg/issues/1233) +- Another heap buffer overflow in libopenjp2 [\#1231](https://github.com/uclouvain/openjpeg/issues/1231) +- Heap buffer overflow in libopenjp2 [\#1228](https://github.com/uclouvain/openjpeg/issues/1228) +- Endianness of binary volume \(JP3D\) [\#1224](https://github.com/uclouvain/openjpeg/issues/1224) +- New release to resolve CVE-2019-12973 [\#1222](https://github.com/uclouvain/openjpeg/issues/1222) +- how to set the block size,like 128,256 ? [\#1216](https://github.com/uclouvain/openjpeg/issues/1216) +- compress YUV files to motion jpeg2000 standard [\#1213](https://github.com/uclouvain/openjpeg/issues/1213) +- Repair/update Java wrapper, and include in release [\#1208](https://github.com/uclouvain/openjpeg/issues/1208) +- abc [\#1206](https://github.com/uclouvain/openjpeg/issues/1206) +- Slow decoding [\#1202](https://github.com/uclouvain/openjpeg/issues/1202) +- Installation question [\#1201](https://github.com/uclouvain/openjpeg/issues/1201) +- Typo in test\_decode\_area - \*ptilew is assigned instead of \*ptileh [\#1195](https://github.com/uclouvain/openjpeg/issues/1195) +- Creating a J2K file with one POC is broken [\#1191](https://github.com/uclouvain/openjpeg/issues/1191) +- Make fails on Arch Linux [\#1174](https://github.com/uclouvain/openjpeg/issues/1174) +- Heap buffer overflow in opj\_t1\_clbl\_decode\_processor\(\) triggered with Ghostscript [\#1158](https://github.com/uclouvain/openjpeg/issues/1158) +- opj\_stream\_get\_number\_byte\_left: Assertion `p\_stream-\>m\_byte\_offset \>= 0' failed. [\#1151](https://github.com/uclouvain/openjpeg/issues/1151) +- The fuzzer ignores too many inputs [\#1079](https://github.com/uclouvain/openjpeg/issues/1079) +- out of bounds read [\#1068](https://github.com/uclouvain/openjpeg/issues/1068) + +**Merged pull requests:** + +- Change defined WIN32 [\#1310](https://github.com/uclouvain/openjpeg/pull/1310) ([Jamaika1](https://github.com/Jamaika1)) +- docs: fix simple typo, producted -\> produced [\#1308](https://github.com/uclouvain/openjpeg/pull/1308) ([timgates42](https://github.com/timgates42)) +- Set ${OPENJPEG\_INSTALL\_DOC\_DIR} to DESTINATION of HTMLs [\#1307](https://github.com/uclouvain/openjpeg/pull/1307) ([lemniscati](https://github.com/lemniscati)) +- Use INC\_DIR for OPENJPEG\_INCLUDE\_DIRS \(fixes uclouvain\#1174\) [\#1306](https://github.com/uclouvain/openjpeg/pull/1306) ([matthew-sharp](https://github.com/matthew-sharp)) +- pi.c: avoid out of bounds access with POC \(fixes \#1302\) [\#1304](https://github.com/uclouvain/openjpeg/pull/1304) ([rouault](https://github.com/rouault)) +- Encoder: grow again buffer size [\#1303](https://github.com/uclouvain/openjpeg/pull/1303) ([zodf0055980](https://github.com/zodf0055980)) +- opj\_j2k\_write\_sod\(\): avoid potential heap buffer overflow \(fixes \#1299\) \(probably master only\) [\#1301](https://github.com/uclouvain/openjpeg/pull/1301) ([rouault](https://github.com/rouault)) +- pi.c: avoid out of bounds access with POC \(refs https://github.com/uclouvain/openjpeg/issues/1293\#issuecomment-737122836\) [\#1300](https://github.com/uclouvain/openjpeg/pull/1300) ([rouault](https://github.com/rouault)) +- opj\_t2\_encode\_packet\(\): avoid out of bound access of \#1297, but likely not the proper fix [\#1298](https://github.com/uclouvain/openjpeg/pull/1298) ([rouault](https://github.com/rouault)) +- opj\_t2\_encode\_packet\(\): avoid out of bound access of \#1294, but likely not the proper fix [\#1296](https://github.com/uclouvain/openjpeg/pull/1296) ([rouault](https://github.com/rouault)) +- opj\_j2k\_setup\_encoder\(\): validate POC compno0 and compno1 \(fixes \#1293\) [\#1295](https://github.com/uclouvain/openjpeg/pull/1295) ([rouault](https://github.com/rouault)) +- Encoder: avoid global buffer overflow on irreversible conversion when… [\#1292](https://github.com/uclouvain/openjpeg/pull/1292) ([rouault](https://github.com/rouault)) +- Decoding: deal with some SPOT6 images that have tiles with a single tile-part with TPsot == 0 and TNsot == 0, and with missing EOC [\#1291](https://github.com/uclouvain/openjpeg/pull/1291) ([rouault](https://github.com/rouault)) +- Free p\_tcd\_marker\_info to avoid memory leak [\#1288](https://github.com/uclouvain/openjpeg/pull/1288) ([zodf0055980](https://github.com/zodf0055980)) +- Encoder: grow again buffer size [\#1287](https://github.com/uclouvain/openjpeg/pull/1287) ([zodf0055980](https://github.com/zodf0055980)) +- Encoder: avoid uint32 overflow when allocating memory for codestream buffer \(fixes \#1243\) [\#1276](https://github.com/uclouvain/openjpeg/pull/1276) ([rouault](https://github.com/rouault)) +- Java compatibility from 1.5 to 1.6 [\#1263](https://github.com/uclouvain/openjpeg/pull/1263) ([jiapei100](https://github.com/jiapei100)) +- opj\_decompress: fix double-free on input directory with mix of valid and invalid images [\#1262](https://github.com/uclouvain/openjpeg/pull/1262) ([rouault](https://github.com/rouault)) +- openjp2: Plug image leak when failing to allocate codestream index. [\#1260](https://github.com/uclouvain/openjpeg/pull/1260) ([sebras](https://github.com/sebras)) +- openjp2: Plug memory leak when setting data as TLS fails. [\#1258](https://github.com/uclouvain/openjpeg/pull/1258) ([sebras](https://github.com/sebras)) +- openjp2: Error out if failing to create Tier 1 handle. [\#1256](https://github.com/uclouvain/openjpeg/pull/1256) ([sebras](https://github.com/sebras)) +- Testing for invalid values of width, height, numcomps [\#1254](https://github.com/uclouvain/openjpeg/pull/1254) ([szukw000](https://github.com/szukw000)) +- Single-threaded performance improvements in forward DWT for 5-3 and 9-7 \(and other improvements\) [\#1253](https://github.com/uclouvain/openjpeg/pull/1253) ([rouault](https://github.com/rouault)) +- Add support for multithreading in encoder [\#1248](https://github.com/uclouvain/openjpeg/pull/1248) ([rouault](https://github.com/rouault)) +- Add support for generation of PLT markers in encoder [\#1246](https://github.com/uclouvain/openjpeg/pull/1246) ([rouault](https://github.com/rouault)) +- Fix warnings about signed/unsigned casts in pi.c [\#1244](https://github.com/uclouvain/openjpeg/pull/1244) ([rouault](https://github.com/rouault)) +- opj\_decompress: add sanity checks to avoid segfault in case of decoding error [\#1240](https://github.com/uclouvain/openjpeg/pull/1240) ([rouault](https://github.com/rouault)) +- ignore wrong icc [\#1236](https://github.com/uclouvain/openjpeg/pull/1236) ([szukw000](https://github.com/szukw000)) +- Implement writing of IMF profiles [\#1235](https://github.com/uclouvain/openjpeg/pull/1235) ([rouault](https://github.com/rouault)) +- tests: add alternate checksums for libtiff 4.1 [\#1234](https://github.com/uclouvain/openjpeg/pull/1234) ([rouault](https://github.com/rouault)) +- opj\_tcd\_init\_tile\(\): avoid integer overflow [\#1232](https://github.com/uclouvain/openjpeg/pull/1232) ([rouault](https://github.com/rouault)) +- tests/fuzzers: link fuzz binaries using $LIB\_FUZZING\_ENGINE. [\#1230](https://github.com/uclouvain/openjpeg/pull/1230) ([Dor1s](https://github.com/Dor1s)) +- opj\_j2k\_update\_image\_dimensions\(\): reject images whose coordinates are beyond INT\_MAX \(fixes \#1228\) [\#1229](https://github.com/uclouvain/openjpeg/pull/1229) ([rouault](https://github.com/rouault)) +- Fix resource leaks [\#1226](https://github.com/uclouvain/openjpeg/pull/1226) ([dodys](https://github.com/dodys)) +- abi-check.sh: fix false postive ABI error, and display output error log [\#1218](https://github.com/uclouvain/openjpeg/pull/1218) ([rouault](https://github.com/rouault)) +- pi.c: avoid integer overflow, resulting in later invalid access to memory in opj\_t2\_decode\_packets\(\) [\#1217](https://github.com/uclouvain/openjpeg/pull/1217) ([rouault](https://github.com/rouault)) +- Add check to validate SGcod/SPcoc/SPcod parameter values. [\#1211](https://github.com/uclouvain/openjpeg/pull/1211) ([sebras](https://github.com/sebras)) +- Fix buffer overflow reading an image file less than four characters [\#1196](https://github.com/uclouvain/openjpeg/pull/1196) ([robert-ancell](https://github.com/robert-ancell)) +- compression: emit POC marker when only one single POC is requested \(f… [\#1192](https://github.com/uclouvain/openjpeg/pull/1192) ([rouault](https://github.com/rouault)) +- Fix several potential vulnerabilities [\#1185](https://github.com/uclouvain/openjpeg/pull/1185) ([Young-X](https://github.com/Young-X)) +- openjp2/j2k: Report error if all wanted components are not decoded. [\#1164](https://github.com/uclouvain/openjpeg/pull/1164) ([sebras](https://github.com/sebras)) + ## [v2.3.1](https://github.com/uclouvain/openjpeg/releases/v2.3.1) (2019-04-02) [Full Changelog](https://github.com/uclouvain/openjpeg/compare/v2.3.0...v2.3.1) diff --git a/3rdparty/openjpeg/CMakeLists.txt b/3rdparty/openjpeg/CMakeLists.txt index b38bf28f05..fe766101d0 100644 --- a/3rdparty/openjpeg/CMakeLists.txt +++ b/3rdparty/openjpeg/CMakeLists.txt @@ -18,8 +18,8 @@ ocv_warnings_disable(CMAKE_C_FLAGS #----------------------------------------------------------------------------- # OPENJPEG version number, useful for packaging and doxygen doc: set(OPENJPEG_VERSION_MAJOR 2) -set(OPENJPEG_VERSION_MINOR 3) -set(OPENJPEG_VERSION_BUILD 1) +set(OPENJPEG_VERSION_MINOR 4) +set(OPENJPEG_VERSION_BUILD 0) set(OPENJPEG_VERSION "${OPENJPEG_VERSION_MAJOR}.${OPENJPEG_VERSION_MINOR}.${OPENJPEG_VERSION_BUILD}") set(PACKAGE_VERSION @@ -43,6 +43,7 @@ set(PACKAGE_VERSION # 2.2.0 | 7 # 2.3.0 | 7 # 2.3.1 | 7 +# 2.4.0 | 7 # above is the recommendation by the OPJ team. If you really need to override this default, # you can specify your own OPENJPEG_SOVERSION at cmake configuration time: # cmake -DOPENJPEG_SOVERSION:STRING=42 /path/to/openjpeg diff --git a/3rdparty/openjpeg/openjp2/CMakeLists.txt b/3rdparty/openjpeg/openjp2/CMakeLists.txt index 7decabe210..321d318642 100644 --- a/3rdparty/openjpeg/openjp2/CMakeLists.txt +++ b/3rdparty/openjpeg/openjp2/CMakeLists.txt @@ -33,7 +33,11 @@ endif() # set(WIN32 YES) # endif() -ocv_warnings_disable(CMAKE_C_FLAGS -Wundef -Wstrict-prototypes -Wcast-function-type) +ocv_warnings_disable(CMAKE_C_FLAGS + -Wundef -Wstrict-prototypes -Wcast-function-type + -Wshadow # v2.4.0: GCC + -Wunused-function # v2.4.0: Clang +) add_library(${OPENJPEG_LIBRARY_NAME} STATIC ${OPENJPEG_SRCS}) diff --git a/3rdparty/openjpeg/openjp2/dwt.c b/3rdparty/openjpeg/openjp2/dwt.c index 5930d1c71e..4164ba090e 100644 --- a/3rdparty/openjpeg/openjp2/dwt.c +++ b/3rdparty/openjpeg/openjp2/dwt.c @@ -87,12 +87,14 @@ typedef struct dwt_local { OPJ_INT32 cas; /* 0 = start on even coord, 1 = start on odd coord */ } opj_dwt_t; -typedef union { - OPJ_FLOAT32 f[4]; -} opj_v4_t; +#define NB_ELTS_V8 8 -typedef struct v4dwt_local { - opj_v4_t* wavelet ; +typedef union { + OPJ_FLOAT32 f[NB_ELTS_V8]; +} opj_v8_t; + +typedef struct v8dwt_local { + opj_v8_t* wavelet ; OPJ_INT32 dn ; /* number of elements in high pass band */ OPJ_INT32 sn ; /* number of elements in low pass band */ OPJ_INT32 cas ; /* 0 = start on even coord, 1 = start on odd coord */ @@ -100,45 +102,34 @@ typedef struct v4dwt_local { OPJ_UINT32 win_l_x1; /* end coord in low pass band */ OPJ_UINT32 win_h_x0; /* start coord in high pass band */ OPJ_UINT32 win_h_x1; /* end coord in high pass band */ -} opj_v4dwt_t ; +} opj_v8dwt_t ; -static const OPJ_FLOAT32 opj_dwt_alpha = 1.586134342f; /* 12994 */ -static const OPJ_FLOAT32 opj_dwt_beta = 0.052980118f; /* 434 */ -static const OPJ_FLOAT32 opj_dwt_gamma = -0.882911075f; /* -7233 */ -static const OPJ_FLOAT32 opj_dwt_delta = -0.443506852f; /* -3633 */ +/* From table F.4 from the standard */ +static const OPJ_FLOAT32 opj_dwt_alpha = -1.586134342f; +static const OPJ_FLOAT32 opj_dwt_beta = -0.052980118f; +static const OPJ_FLOAT32 opj_dwt_gamma = 0.882911075f; +static const OPJ_FLOAT32 opj_dwt_delta = 0.443506852f; -static const OPJ_FLOAT32 opj_K = 1.230174105f; /* 10078 */ -static const OPJ_FLOAT32 opj_c13318 = 1.625732422f; +static const OPJ_FLOAT32 opj_K = 1.230174105f; +static const OPJ_FLOAT32 opj_invK = (OPJ_FLOAT32)(1.0 / 1.230174105); /*@}*/ -/** -Virtual function type for wavelet transform in 1-D -*/ -typedef void (*DWT1DFN)(const opj_dwt_t* v); - /** @name Local static functions */ /*@{*/ /** Forward lazy transform (horizontal) */ -static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, +static void opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a, + OPJ_INT32 * OPJ_RESTRICT b, + OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); -/** -Forward lazy transform (vertical) -*/ -static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, - OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas); -/** -Forward 5-3 wavelet transform in 1-D -*/ -static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, - OPJ_INT32 cas); + /** Forward 9-7 wavelet transform in 1-D */ -static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, +static void opj_dwt_encode_1_real(void *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); /** Explicit calculation of the Quantization Stepsizes @@ -155,8 +146,29 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres); -static OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, - void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32)); +/* Forward transform, for the vertical pass, processing cols columns */ +/* where cols <= NB_ELTS_V8 */ +/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */ +typedef void (*opj_encode_and_deinterleave_v_fnptr_type)( + void *array, + void *tmp, + OPJ_UINT32 height, + OPJ_BOOL even, + OPJ_UINT32 stride_width, + OPJ_UINT32 cols); + +/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */ +typedef void (*opj_encode_and_deinterleave_h_one_row_fnptr_type)( + void *row, + void *tmp, + OPJ_UINT32 width, + OPJ_BOOL even); + +static OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp, + opj_tcd_tilecomp_t * tilec, + opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v, + opj_encode_and_deinterleave_h_one_row_fnptr_type + p_encode_and_deinterleave_h_one_row); static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r, OPJ_UINT32 i); @@ -164,42 +176,6 @@ static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r, /* */ /* Inverse 9-7 wavelet transform in 1-D. */ /* */ -static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt); - -static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, - OPJ_FLOAT32* OPJ_RESTRICT a, - OPJ_UINT32 width, - OPJ_UINT32 remaining_height); - -static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, - OPJ_FLOAT32* OPJ_RESTRICT a, - OPJ_UINT32 width, - OPJ_UINT32 nb_elts_read); - -#ifdef __SSE__ -static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, - OPJ_UINT32 start, - OPJ_UINT32 end, - const __m128 c); - -static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, - OPJ_UINT32 start, - OPJ_UINT32 end, - OPJ_UINT32 m, __m128 c); - -#else -static void opj_v4dwt_decode_step1(opj_v4_t* w, - OPJ_UINT32 start, - OPJ_UINT32 end, - const OPJ_FLOAT32 c); - -static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, - OPJ_UINT32 start, - OPJ_UINT32 end, - OPJ_UINT32 m, - OPJ_FLOAT32 c); - -#endif /*@}*/ @@ -246,12 +222,14 @@ static const OPJ_FLOAT64 opj_dwt_norms_real[4][10] = { /* */ /* Forward lazy transform (horizontal). */ /* */ -static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, +static void opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a, + OPJ_INT32 * OPJ_RESTRICT b, + OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { OPJ_INT32 i; - OPJ_INT32 * l_dest = b; - OPJ_INT32 * l_src = a + cas; + OPJ_INT32 * OPJ_RESTRICT l_dest = b; + const OPJ_INT32 * OPJ_RESTRICT l_src = a + cas; for (i = 0; i < sn; ++i) { *l_dest++ = *l_src; @@ -267,40 +245,13 @@ static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, } } -/* */ -/* Forward lazy transform (vertical). */ -/* */ -static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, - OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas) -{ - OPJ_INT32 i = sn; - OPJ_INT32 * l_dest = b; - OPJ_INT32 * l_src = a + cas; - - while (i--) { - *l_dest = *l_src; - l_dest += x; - l_src += 2; - } /* b[i*x]=a[2*i+cas]; */ - - l_dest = b + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)x; - l_src = a + 1 - cas; - - i = dn; - while (i--) { - *l_dest = *l_src; - l_dest += x; - l_src += 2; - } /*b[(sn+i)*x]=a[(2*i+1-cas)];*/ -} - #ifdef STANDARD_SLOW_VERSION /* */ /* Inverse lazy transform (horizontal). */ /* */ static void opj_dwt_interleave_h(const opj_dwt_t* h, OPJ_INT32 *a) { - OPJ_INT32 *ai = a; + const OPJ_INT32 *ai = a; OPJ_INT32 *bi = h->mem + h->cas; OPJ_INT32 i = h->sn; while (i--) { @@ -321,7 +272,7 @@ static void opj_dwt_interleave_h(const opj_dwt_t* h, OPJ_INT32 *a) /* */ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) { - OPJ_INT32 *ai = a; + const OPJ_INT32 *ai = a; OPJ_INT32 *bi = v->mem + v->cas; OPJ_INT32 i = v->sn; while (i--) { @@ -341,37 +292,6 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) #endif /* STANDARD_SLOW_VERSION */ -/* */ -/* Forward 5-3 wavelet transform in 1-D. */ -/* */ -static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, - OPJ_INT32 cas) -{ - OPJ_INT32 i; - - if (!cas) { - if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ - for (i = 0; i < dn; i++) { - OPJ_D(i) -= (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; - } - for (i = 0; i < sn; i++) { - OPJ_S(i) += (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; - } - } - } else { - if (!sn && dn == 1) { /* NEW : CASE ONE ELEMENT */ - OPJ_S(0) *= 2; - } else { - for (i = 0; i < dn; i++) { - OPJ_S(i) -= (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1; - } - for (i = 0; i < sn; i++) { - OPJ_D(i) += (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2; - } - } - } -} - #ifdef STANDARD_SLOW_VERSION /* */ /* Inverse 5-3 wavelet transform in 1-D. */ @@ -1033,57 +953,137 @@ static void opj_idwt53_v(const opj_dwt_t *dwt, #endif } - -/* */ -/* Forward 9-7 wavelet transform in 1-D. */ -/* */ -static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, - OPJ_INT32 cas) +#if 0 +static void opj_dwt_encode_step1(OPJ_FLOAT32* fw, + OPJ_UINT32 end, + const OPJ_FLOAT32 c) { - OPJ_INT32 i; - if (!cas) { - if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ - for (i = 0; i < dn; i++) { - OPJ_D(i) -= opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 12993); - } - for (i = 0; i < sn; i++) { - OPJ_S(i) -= opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 434); - } - for (i = 0; i < dn; i++) { - OPJ_D(i) += opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 7233); - } - for (i = 0; i < sn; i++) { - OPJ_S(i) += opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 3633); - } - for (i = 0; i < dn; i++) { - OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 5038); /*5038 */ - } - for (i = 0; i < sn; i++) { - OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 6659); /*6660 */ - } + OPJ_UINT32 i = 0; + for (; i < end; ++i) { + fw[0] *= c; + fw += 2; + } +} +#else +static void opj_dwt_encode_step1_combined(OPJ_FLOAT32* fw, + OPJ_UINT32 iters_c1, + OPJ_UINT32 iters_c2, + const OPJ_FLOAT32 c1, + const OPJ_FLOAT32 c2) +{ + OPJ_UINT32 i = 0; + const OPJ_UINT32 iters_common = opj_uint_min(iters_c1, iters_c2); + assert((((OPJ_SIZE_T)fw) & 0xf) == 0); + assert(opj_int_abs((OPJ_INT32)iters_c1 - (OPJ_INT32)iters_c2) <= 1); + for (; i + 3 < iters_common; i += 4) { +#ifdef __SSE__ + const __m128 vcst = _mm_set_ps(c2, c1, c2, c1); + *(__m128*)fw = _mm_mul_ps(*(__m128*)fw, vcst); + *(__m128*)(fw + 4) = _mm_mul_ps(*(__m128*)(fw + 4), vcst); +#else + fw[0] *= c1; + fw[1] *= c2; + fw[2] *= c1; + fw[3] *= c2; + fw[4] *= c1; + fw[5] *= c2; + fw[6] *= c1; + fw[7] *= c2; +#endif + fw += 8; + } + for (; i < iters_common; i++) { + fw[0] *= c1; + fw[1] *= c2; + fw += 2; + } + if (i < iters_c1) { + fw[0] *= c1; + } else if (i < iters_c2) { + fw[1] *= c2; + } +} + +#endif + +static void opj_dwt_encode_step2(OPJ_FLOAT32* fl, OPJ_FLOAT32* fw, + OPJ_UINT32 end, + OPJ_UINT32 m, + OPJ_FLOAT32 c) +{ + OPJ_UINT32 i; + OPJ_UINT32 imax = opj_uint_min(end, m); + if (imax > 0) { + fw[-1] += (fl[0] + fw[0]) * c; + fw += 2; + i = 1; + for (; i + 3 < imax; i += 4) { + fw[-1] += (fw[-2] + fw[0]) * c; + fw[1] += (fw[0] + fw[2]) * c; + fw[3] += (fw[2] + fw[4]) * c; + fw[5] += (fw[4] + fw[6]) * c; + fw += 8; } - } else { - if ((sn > 0) || (dn > 1)) { /* NEW : CASE ONE ELEMENT */ - for (i = 0; i < dn; i++) { - OPJ_S(i) -= opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 12993); - } - for (i = 0; i < sn; i++) { - OPJ_D(i) -= opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 434); - } - for (i = 0; i < dn; i++) { - OPJ_S(i) += opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 7233); - } - for (i = 0; i < sn; i++) { - OPJ_D(i) += opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 3633); - } - for (i = 0; i < dn; i++) { - OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 5038); /*5038 */ - } - for (i = 0; i < sn; i++) { - OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 6659); /*6660 */ - } + for (; i < imax; ++i) { + fw[-1] += (fw[-2] + fw[0]) * c; + fw += 2; } } + if (m < end) { + assert(m + 1 == end); + fw[-1] += (2 * fw[-2]) * c; + } +} + +static void opj_dwt_encode_1_real(void *aIn, OPJ_INT32 dn, OPJ_INT32 sn, + OPJ_INT32 cas) +{ + OPJ_FLOAT32* w = (OPJ_FLOAT32*)aIn; + OPJ_INT32 a, b; + assert(dn + sn > 1); + if (cas == 0) { + a = 0; + b = 1; + } else { + a = 1; + b = 0; + } + opj_dwt_encode_step2(w + a, w + b + 1, + (OPJ_UINT32)dn, + (OPJ_UINT32)opj_int_min(dn, sn - b), + opj_dwt_alpha); + opj_dwt_encode_step2(w + b, w + a + 1, + (OPJ_UINT32)sn, + (OPJ_UINT32)opj_int_min(sn, dn - a), + opj_dwt_beta); + opj_dwt_encode_step2(w + a, w + b + 1, + (OPJ_UINT32)dn, + (OPJ_UINT32)opj_int_min(dn, sn - b), + opj_dwt_gamma); + opj_dwt_encode_step2(w + b, w + a + 1, + (OPJ_UINT32)sn, + (OPJ_UINT32)opj_int_min(sn, dn - a), + opj_dwt_delta); +#if 0 + opj_dwt_encode_step1(w + b, (OPJ_UINT32)dn, + opj_K); + opj_dwt_encode_step1(w + a, (OPJ_UINT32)sn, + opj_invK); +#else + if (a == 0) { + opj_dwt_encode_step1_combined(w, + (OPJ_UINT32)sn, + (OPJ_UINT32)dn, + opj_invK, + opj_K); + } else { + opj_dwt_encode_step1_combined(w, + (OPJ_UINT32)dn, + (OPJ_UINT32)sn, + opj_K, + opj_invK); + } +#endif } static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, @@ -1102,41 +1102,650 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, ========================================================== */ +/** Process one line for the horizontal pass of the 5x3 forward transform */ +static +void opj_dwt_encode_and_deinterleave_h_one_row(void* rowIn, + void* tmpIn, + OPJ_UINT32 width, + OPJ_BOOL even) +{ + OPJ_INT32* OPJ_RESTRICT row = (OPJ_INT32*)rowIn; + OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32*)tmpIn; + const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1); + const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn); + + if (even) { + if (width > 1) { + OPJ_INT32 i; + for (i = 0; i < sn - 1; i++) { + tmp[sn + i] = row[2 * i + 1] - ((row[(i) * 2] + row[(i + 1) * 2]) >> 1); + } + if ((width % 2) == 0) { + tmp[sn + i] = row[2 * i + 1] - row[(i) * 2]; + } + row[0] += (tmp[sn] + tmp[sn] + 2) >> 2; + for (i = 1; i < dn; i++) { + row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + i] + 2) >> 2); + } + if ((width % 2) == 1) { + row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + (i - 1)] + 2) >> 2); + } + memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32)); + } + } else { + if (width == 1) { + row[0] *= 2; + } else { + OPJ_INT32 i; + tmp[sn + 0] = row[0] - row[1]; + for (i = 1; i < sn; i++) { + tmp[sn + i] = row[2 * i] - ((row[2 * i + 1] + row[2 * (i - 1) + 1]) >> 1); + } + if ((width % 2) == 1) { + tmp[sn + i] = row[2 * i] - row[2 * (i - 1) + 1]; + } + + for (i = 0; i < dn - 1; i++) { + row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i + 1] + 2) >> 2); + } + if ((width % 2) == 0) { + row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i] + 2) >> 2); + } + memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32)); + } + } +} + +/** Process one line for the horizontal pass of the 9x7 forward transform */ +static +void opj_dwt_encode_and_deinterleave_h_one_row_real(void* rowIn, + void* tmpIn, + OPJ_UINT32 width, + OPJ_BOOL even) +{ + OPJ_FLOAT32* OPJ_RESTRICT row = (OPJ_FLOAT32*)rowIn; + OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32*)tmpIn; + const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1); + const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn); + if (width == 1) { + return; + } + memcpy(tmp, row, width * sizeof(OPJ_FLOAT32)); + opj_dwt_encode_1_real(tmp, dn, sn, even ? 0 : 1); + opj_dwt_deinterleave_h((OPJ_INT32 * OPJ_RESTRICT)tmp, + (OPJ_INT32 * OPJ_RESTRICT)row, + dn, sn, even ? 0 : 1); +} + +typedef struct { + opj_dwt_t h; + OPJ_UINT32 rw; /* Width of the resolution to process */ + OPJ_UINT32 w; /* Width of tiledp */ + OPJ_INT32 * OPJ_RESTRICT tiledp; + OPJ_UINT32 min_j; + OPJ_UINT32 max_j; + opj_encode_and_deinterleave_h_one_row_fnptr_type p_function; +} opj_dwt_encode_h_job_t; + +static void opj_dwt_encode_h_func(void* user_data, opj_tls_t* tls) +{ + OPJ_UINT32 j; + opj_dwt_encode_h_job_t* job; + (void)tls; + + job = (opj_dwt_encode_h_job_t*)user_data; + for (j = job->min_j; j < job->max_j; j++) { + OPJ_INT32* OPJ_RESTRICT aj = job->tiledp + j * job->w; + (*job->p_function)(aj, job->h.mem, job->rw, + job->h.cas == 0 ? OPJ_TRUE : OPJ_FALSE); + } + + opj_aligned_free(job->h.mem); + opj_free(job); +} + +typedef struct { + opj_dwt_t v; + OPJ_UINT32 rh; + OPJ_UINT32 w; + OPJ_INT32 * OPJ_RESTRICT tiledp; + OPJ_UINT32 min_j; + OPJ_UINT32 max_j; + opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v; +} opj_dwt_encode_v_job_t; + +static void opj_dwt_encode_v_func(void* user_data, opj_tls_t* tls) +{ + OPJ_UINT32 j; + opj_dwt_encode_v_job_t* job; + (void)tls; + + job = (opj_dwt_encode_v_job_t*)user_data; + for (j = job->min_j; j + NB_ELTS_V8 - 1 < job->max_j; j += NB_ELTS_V8) { + (*job->p_encode_and_deinterleave_v)(job->tiledp + j, + job->v.mem, + job->rh, + job->v.cas == 0, + job->w, + NB_ELTS_V8); + } + if (j < job->max_j) { + (*job->p_encode_and_deinterleave_v)(job->tiledp + j, + job->v.mem, + job->rh, + job->v.cas == 0, + job->w, + job->max_j - j); + } + + opj_aligned_free(job->v.mem); + opj_free(job); +} + +/** Fetch up to cols <= NB_ELTS_V8 for each line, and put them in tmpOut */ +/* that has a NB_ELTS_V8 interleave factor. */ +static void opj_dwt_fetch_cols_vertical_pass(const void *arrayIn, + void *tmpOut, + OPJ_UINT32 height, + OPJ_UINT32 stride_width, + OPJ_UINT32 cols) +{ + const OPJ_INT32* OPJ_RESTRICT array = (const OPJ_INT32 * OPJ_RESTRICT)arrayIn; + OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpOut; + if (cols == NB_ELTS_V8) { + OPJ_UINT32 k; + for (k = 0; k < height; ++k) { + memcpy(tmp + NB_ELTS_V8 * k, + array + k * stride_width, + NB_ELTS_V8 * sizeof(OPJ_INT32)); + } + } else { + OPJ_UINT32 k; + for (k = 0; k < height; ++k) { + OPJ_UINT32 c; + for (c = 0; c < cols; c++) { + tmp[NB_ELTS_V8 * k + c] = array[c + k * stride_width]; + } + for (; c < NB_ELTS_V8; c++) { + tmp[NB_ELTS_V8 * k + c] = 0; + } + } + } +} + +/* Deinterleave result of forward transform, where cols <= NB_ELTS_V8 */ +/* and src contains NB_ELTS_V8 consecutive values for up to NB_ELTS_V8 */ +/* columns. */ +static INLINE void opj_dwt_deinterleave_v_cols( + const OPJ_INT32 * OPJ_RESTRICT src, + OPJ_INT32 * OPJ_RESTRICT dst, + OPJ_INT32 dn, + OPJ_INT32 sn, + OPJ_UINT32 stride_width, + OPJ_INT32 cas, + OPJ_UINT32 cols) +{ + OPJ_INT32 k; + OPJ_INT32 i = sn; + OPJ_INT32 * OPJ_RESTRICT l_dest = dst; + const OPJ_INT32 * OPJ_RESTRICT l_src = src + cas * NB_ELTS_V8; + OPJ_UINT32 c; + + for (k = 0; k < 2; k++) { + while (i--) { + if (cols == NB_ELTS_V8) { + memcpy(l_dest, l_src, NB_ELTS_V8 * sizeof(OPJ_INT32)); + } else { + c = 0; + switch (cols) { + case 7: + l_dest[c] = l_src[c]; + c++; /* fallthru */ + case 6: + l_dest[c] = l_src[c]; + c++; /* fallthru */ + case 5: + l_dest[c] = l_src[c]; + c++; /* fallthru */ + case 4: + l_dest[c] = l_src[c]; + c++; /* fallthru */ + case 3: + l_dest[c] = l_src[c]; + c++; /* fallthru */ + case 2: + l_dest[c] = l_src[c]; + c++; /* fallthru */ + default: + l_dest[c] = l_src[c]; + break; + } + } + l_dest += stride_width; + l_src += 2 * NB_ELTS_V8; + } + + l_dest = dst + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)stride_width; + l_src = src + (1 - cas) * NB_ELTS_V8; + i = dn; + } +} + + +/* Forward 5-3 transform, for the vertical pass, processing cols columns */ +/* where cols <= NB_ELTS_V8 */ +static void opj_dwt_encode_and_deinterleave_v( + void *arrayIn, + void *tmpIn, + OPJ_UINT32 height, + OPJ_BOOL even, + OPJ_UINT32 stride_width, + OPJ_UINT32 cols) +{ + OPJ_INT32* OPJ_RESTRICT array = (OPJ_INT32 * OPJ_RESTRICT)arrayIn; + OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpIn; + const OPJ_UINT32 sn = (height + (even ? 1 : 0)) >> 1; + const OPJ_UINT32 dn = height - sn; + + opj_dwt_fetch_cols_vertical_pass(arrayIn, tmpIn, height, stride_width, cols); + +#define OPJ_Sc(i) tmp[(i)*2* NB_ELTS_V8 + c] +#define OPJ_Dc(i) tmp[((1+(i)*2))* NB_ELTS_V8 + c] + +#ifdef __SSE2__ + if (height == 1) { + if (!even) { + OPJ_UINT32 c; + for (c = 0; c < NB_ELTS_V8; c++) { + tmp[c] *= 2; + } + } + } else if (even) { + OPJ_UINT32 c; + OPJ_UINT32 i; + i = 0; + if (i + 1 < sn) { + __m128i xmm_Si_0 = *(const __m128i*)(tmp + 4 * 0); + __m128i xmm_Si_1 = *(const __m128i*)(tmp + 4 * 1); + for (; i + 1 < sn; i++) { + __m128i xmm_Sip1_0 = *(const __m128i*)(tmp + + (i + 1) * 2 * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Sip1_1 = *(const __m128i*)(tmp + + (i + 1) * 2 * NB_ELTS_V8 + 4 * 1); + __m128i xmm_Di_0 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Di_1 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 1); + xmm_Di_0 = _mm_sub_epi32(xmm_Di_0, + _mm_srai_epi32(_mm_add_epi32(xmm_Si_0, xmm_Sip1_0), 1)); + xmm_Di_1 = _mm_sub_epi32(xmm_Di_1, + _mm_srai_epi32(_mm_add_epi32(xmm_Si_1, xmm_Sip1_1), 1)); + *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Di_0; + *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Di_1; + xmm_Si_0 = xmm_Sip1_0; + xmm_Si_1 = xmm_Sip1_1; + } + } + if (((height) % 2) == 0) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Dc(i) -= OPJ_Sc(i); + } + } + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(0) += (OPJ_Dc(0) + OPJ_Dc(0) + 2) >> 2; + } + i = 1; + if (i < dn) { + __m128i xmm_Dim1_0 = *(const __m128i*)(tmp + (1 + + (i - 1) * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Dim1_1 = *(const __m128i*)(tmp + (1 + + (i - 1) * 2) * NB_ELTS_V8 + 4 * 1); + const __m128i xmm_two = _mm_set1_epi32(2); + for (; i < dn; i++) { + __m128i xmm_Di_0 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Di_1 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 1); + __m128i xmm_Si_0 = *(const __m128i*)(tmp + + (i * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Si_1 = *(const __m128i*)(tmp + + (i * 2) * NB_ELTS_V8 + 4 * 1); + xmm_Si_0 = _mm_add_epi32(xmm_Si_0, + _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Dim1_0, xmm_Di_0), xmm_two), 2)); + xmm_Si_1 = _mm_add_epi32(xmm_Si_1, + _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Dim1_1, xmm_Di_1), xmm_two), 2)); + *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Si_0; + *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Si_1; + xmm_Dim1_0 = xmm_Di_0; + xmm_Dim1_1 = xmm_Di_1; + } + } + if (((height) % 2) == 1) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i - 1) + 2) >> 2; + } + } + } else { + OPJ_UINT32 c; + OPJ_UINT32 i; + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(0) -= OPJ_Dc(0); + } + i = 1; + if (i < sn) { + __m128i xmm_Dim1_0 = *(const __m128i*)(tmp + (1 + + (i - 1) * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Dim1_1 = *(const __m128i*)(tmp + (1 + + (i - 1) * 2) * NB_ELTS_V8 + 4 * 1); + for (; i < sn; i++) { + __m128i xmm_Di_0 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Di_1 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 1); + __m128i xmm_Si_0 = *(const __m128i*)(tmp + + (i * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Si_1 = *(const __m128i*)(tmp + + (i * 2) * NB_ELTS_V8 + 4 * 1); + xmm_Si_0 = _mm_sub_epi32(xmm_Si_0, + _mm_srai_epi32(_mm_add_epi32(xmm_Di_0, xmm_Dim1_0), 1)); + xmm_Si_1 = _mm_sub_epi32(xmm_Si_1, + _mm_srai_epi32(_mm_add_epi32(xmm_Di_1, xmm_Dim1_1), 1)); + *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Si_0; + *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Si_1; + xmm_Dim1_0 = xmm_Di_0; + xmm_Dim1_1 = xmm_Di_1; + } + } + if (((height) % 2) == 1) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(i) -= OPJ_Dc(i - 1); + } + } + i = 0; + if (i + 1 < dn) { + __m128i xmm_Si_0 = *((const __m128i*)(tmp + 4 * 0)); + __m128i xmm_Si_1 = *((const __m128i*)(tmp + 4 * 1)); + const __m128i xmm_two = _mm_set1_epi32(2); + for (; i + 1 < dn; i++) { + __m128i xmm_Sip1_0 = *(const __m128i*)(tmp + + (i + 1) * 2 * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Sip1_1 = *(const __m128i*)(tmp + + (i + 1) * 2 * NB_ELTS_V8 + 4 * 1); + __m128i xmm_Di_0 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 0); + __m128i xmm_Di_1 = *(const __m128i*)(tmp + + (1 + i * 2) * NB_ELTS_V8 + 4 * 1); + xmm_Di_0 = _mm_add_epi32(xmm_Di_0, + _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Si_0, xmm_Sip1_0), xmm_two), 2)); + xmm_Di_1 = _mm_add_epi32(xmm_Di_1, + _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Si_1, xmm_Sip1_1), xmm_two), 2)); + *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Di_0; + *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Di_1; + xmm_Si_0 = xmm_Sip1_0; + xmm_Si_1 = xmm_Sip1_1; + } + } + if (((height) % 2) == 0) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i) + 2) >> 2; + } + } + } +#else + if (even) { + OPJ_UINT32 c; + if (height > 1) { + OPJ_UINT32 i; + for (i = 0; i + 1 < sn; i++) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Dc(i) -= (OPJ_Sc(i) + OPJ_Sc(i + 1)) >> 1; + } + } + if (((height) % 2) == 0) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Dc(i) -= OPJ_Sc(i); + } + } + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(0) += (OPJ_Dc(0) + OPJ_Dc(0) + 2) >> 2; + } + for (i = 1; i < dn; i++) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i) + 2) >> 2; + } + } + if (((height) % 2) == 1) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i - 1) + 2) >> 2; + } + } + } + } else { + OPJ_UINT32 c; + if (height == 1) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(0) *= 2; + } + } else { + OPJ_UINT32 i; + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(0) -= OPJ_Dc(0); + } + for (i = 1; i < sn; i++) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(i) -= (OPJ_Dc(i) + OPJ_Dc(i - 1)) >> 1; + } + } + if (((height) % 2) == 1) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Sc(i) -= OPJ_Dc(i - 1); + } + } + for (i = 0; i + 1 < dn; i++) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i + 1) + 2) >> 2; + } + } + if (((height) % 2) == 0) { + for (c = 0; c < NB_ELTS_V8; c++) { + OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i) + 2) >> 2; + } + } + } + } +#endif + + if (cols == NB_ELTS_V8) { + opj_dwt_deinterleave_v_cols(tmp, array, (OPJ_INT32)dn, (OPJ_INT32)sn, + stride_width, even ? 0 : 1, NB_ELTS_V8); + } else { + opj_dwt_deinterleave_v_cols(tmp, array, (OPJ_INT32)dn, (OPJ_INT32)sn, + stride_width, even ? 0 : 1, cols); + } +} + +static void opj_v8dwt_encode_step1(OPJ_FLOAT32* fw, + OPJ_UINT32 end, + const OPJ_FLOAT32 cst) +{ + OPJ_UINT32 i; +#ifdef __SSE__ + __m128* vw = (__m128*) fw; + const __m128 vcst = _mm_set1_ps(cst); + for (i = 0; i < end; ++i) { + vw[0] = _mm_mul_ps(vw[0], vcst); + vw[1] = _mm_mul_ps(vw[1], vcst); + vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128)); + } +#else + OPJ_UINT32 c; + for (i = 0; i < end; ++i) { + for (c = 0; c < NB_ELTS_V8; c++) { + fw[i * 2 * NB_ELTS_V8 + c] *= cst; + } + } +#endif +} + +static void opj_v8dwt_encode_step2(OPJ_FLOAT32* fl, OPJ_FLOAT32* fw, + OPJ_UINT32 end, + OPJ_UINT32 m, + OPJ_FLOAT32 cst) +{ + OPJ_UINT32 i; + OPJ_UINT32 imax = opj_uint_min(end, m); +#ifdef __SSE__ + __m128* vw = (__m128*) fw; + __m128 vcst = _mm_set1_ps(cst); + if (imax > 0) { + __m128* vl = (__m128*) fl; + vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vl[0], vw[0]), vcst)); + vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vl[1], vw[1]), vcst)); + vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128)); + i = 1; + + for (; i < imax; ++i) { + vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vw[-4], vw[0]), vcst)); + vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vw[-3], vw[1]), vcst)); + vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128)); + } + } + if (m < end) { + assert(m + 1 == end); + vcst = _mm_add_ps(vcst, vcst); + vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(vw[-4], vcst)); + vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(vw[-3], vcst)); + } +#else + OPJ_INT32 c; + if (imax > 0) { + for (c = 0; c < NB_ELTS_V8; c++) { + fw[-1 * NB_ELTS_V8 + c] += (fl[0 * NB_ELTS_V8 + c] + fw[0 * NB_ELTS_V8 + c]) * + cst; + } + fw += 2 * NB_ELTS_V8; + i = 1; + for (; i < imax; ++i) { + for (c = 0; c < NB_ELTS_V8; c++) { + fw[-1 * NB_ELTS_V8 + c] += (fw[-2 * NB_ELTS_V8 + c] + fw[0 * NB_ELTS_V8 + c]) * + cst; + } + fw += 2 * NB_ELTS_V8; + } + } + if (m < end) { + assert(m + 1 == end); + for (c = 0; c < NB_ELTS_V8; c++) { + fw[-1 * NB_ELTS_V8 + c] += (2 * fw[-2 * NB_ELTS_V8 + c]) * cst; + } + } +#endif +} + +/* Forward 9-7 transform, for the vertical pass, processing cols columns */ +/* where cols <= NB_ELTS_V8 */ +static void opj_dwt_encode_and_deinterleave_v_real( + void *arrayIn, + void *tmpIn, + OPJ_UINT32 height, + OPJ_BOOL even, + OPJ_UINT32 stride_width, + OPJ_UINT32 cols) +{ + OPJ_FLOAT32* OPJ_RESTRICT array = (OPJ_FLOAT32 * OPJ_RESTRICT)arrayIn; + OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32 * OPJ_RESTRICT)tmpIn; + const OPJ_INT32 sn = (OPJ_INT32)((height + (even ? 1 : 0)) >> 1); + const OPJ_INT32 dn = (OPJ_INT32)(height - (OPJ_UINT32)sn); + OPJ_INT32 a, b; + + if (height == 1) { + return; + } + + opj_dwt_fetch_cols_vertical_pass(arrayIn, tmpIn, height, stride_width, cols); + + if (even) { + a = 0; + b = 1; + } else { + a = 1; + b = 0; + } + opj_v8dwt_encode_step2(tmp + a * NB_ELTS_V8, + tmp + (b + 1) * NB_ELTS_V8, + (OPJ_UINT32)dn, + (OPJ_UINT32)opj_int_min(dn, sn - b), + opj_dwt_alpha); + opj_v8dwt_encode_step2(tmp + b * NB_ELTS_V8, + tmp + (a + 1) * NB_ELTS_V8, + (OPJ_UINT32)sn, + (OPJ_UINT32)opj_int_min(sn, dn - a), + opj_dwt_beta); + opj_v8dwt_encode_step2(tmp + a * NB_ELTS_V8, + tmp + (b + 1) * NB_ELTS_V8, + (OPJ_UINT32)dn, + (OPJ_UINT32)opj_int_min(dn, sn - b), + opj_dwt_gamma); + opj_v8dwt_encode_step2(tmp + b * NB_ELTS_V8, + tmp + (a + 1) * NB_ELTS_V8, + (OPJ_UINT32)sn, + (OPJ_UINT32)opj_int_min(sn, dn - a), + opj_dwt_delta); + opj_v8dwt_encode_step1(tmp + b * NB_ELTS_V8, (OPJ_UINT32)dn, + opj_K); + opj_v8dwt_encode_step1(tmp + a * NB_ELTS_V8, (OPJ_UINT32)sn, + opj_invK); + + + if (cols == NB_ELTS_V8) { + opj_dwt_deinterleave_v_cols((OPJ_INT32*)tmp, + (OPJ_INT32*)array, + (OPJ_INT32)dn, (OPJ_INT32)sn, + stride_width, even ? 0 : 1, NB_ELTS_V8); + } else { + opj_dwt_deinterleave_v_cols((OPJ_INT32*)tmp, + (OPJ_INT32*)array, + (OPJ_INT32)dn, (OPJ_INT32)sn, + stride_width, even ? 0 : 1, cols); + } +} + /* */ /* Forward 5-3 wavelet transform in 2-D. */ /* */ -static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, - void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32)) +static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp, + opj_tcd_tilecomp_t * tilec, + opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v, + opj_encode_and_deinterleave_h_one_row_fnptr_type + p_encode_and_deinterleave_h_one_row) { - OPJ_INT32 i, j, k; - OPJ_INT32 *a = 00; - OPJ_INT32 *aj = 00; + OPJ_INT32 i; OPJ_INT32 *bj = 00; - OPJ_INT32 w, l; + OPJ_UINT32 w; + OPJ_INT32 l; - OPJ_INT32 rw; /* width of the resolution level computed */ - OPJ_INT32 rh; /* height of the resolution level computed */ OPJ_SIZE_T l_data_size; opj_tcd_resolution_t * l_cur_res = 0; opj_tcd_resolution_t * l_last_res = 0; + const int num_threads = opj_thread_pool_get_thread_count(tp); + OPJ_INT32 * OPJ_RESTRICT tiledp = tilec->data; - w = tilec->x1 - tilec->x0; + w = (OPJ_UINT32)(tilec->x1 - tilec->x0); l = (OPJ_INT32)tilec->numresolutions - 1; - a = tilec->data; l_cur_res = tilec->resolutions + l; l_last_res = l_cur_res - 1; l_data_size = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions); /* overflow check */ - if (l_data_size > (SIZE_MAX / sizeof(OPJ_INT32))) { + if (l_data_size > (SIZE_MAX / (NB_ELTS_V8 * sizeof(OPJ_INT32)))) { /* FIXME event manager error callback */ return OPJ_FALSE; } - l_data_size *= sizeof(OPJ_INT32); - bj = (OPJ_INT32*)opj_malloc(l_data_size); + l_data_size *= NB_ELTS_V8 * sizeof(OPJ_INT32); + bj = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size); /* l_data_size is equal to 0 when numresolutions == 1 but bj is not used */ /* in that case, so do not error out */ if (l_data_size != 0 && ! bj) { @@ -1145,43 +1754,135 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, i = l; while (i--) { - OPJ_INT32 rw1; /* width of the resolution level once lower than computed one */ - OPJ_INT32 rh1; /* height of the resolution level once lower than computed one */ + OPJ_UINT32 j; + OPJ_UINT32 rw; /* width of the resolution level computed */ + OPJ_UINT32 rh; /* height of the resolution level computed */ + OPJ_UINT32 + rw1; /* width of the resolution level once lower than computed one */ + OPJ_UINT32 + rh1; /* height of the resolution level once lower than computed one */ OPJ_INT32 cas_col; /* 0 = non inversion on horizontal filtering 1 = inversion between low-pass and high-pass filtering */ OPJ_INT32 cas_row; /* 0 = non inversion on vertical filtering 1 = inversion between low-pass and high-pass filtering */ OPJ_INT32 dn, sn; - rw = l_cur_res->x1 - l_cur_res->x0; - rh = l_cur_res->y1 - l_cur_res->y0; - rw1 = l_last_res->x1 - l_last_res->x0; - rh1 = l_last_res->y1 - l_last_res->y0; + rw = (OPJ_UINT32)(l_cur_res->x1 - l_cur_res->x0); + rh = (OPJ_UINT32)(l_cur_res->y1 - l_cur_res->y0); + rw1 = (OPJ_UINT32)(l_last_res->x1 - l_last_res->x0); + rh1 = (OPJ_UINT32)(l_last_res->y1 - l_last_res->y0); cas_row = l_cur_res->x0 & 1; cas_col = l_cur_res->y0 & 1; - sn = rh1; - dn = rh - rh1; - for (j = 0; j < rw; ++j) { - aj = a + j; - for (k = 0; k < rh; ++k) { - bj[k] = aj[k * w]; + sn = (OPJ_INT32)rh1; + dn = (OPJ_INT32)(rh - rh1); + + /* Perform vertical pass */ + if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) { + for (j = 0; j + NB_ELTS_V8 - 1 < rw; j += NB_ELTS_V8) { + p_encode_and_deinterleave_v(tiledp + j, + bj, + rh, + cas_col == 0, + w, + NB_ELTS_V8); } + if (j < rw) { + p_encode_and_deinterleave_v(tiledp + j, + bj, + rh, + cas_col == 0, + w, + rw - j); + } + } else { + OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; + OPJ_UINT32 step_j; - (*p_function)(bj, dn, sn, cas_col); + if (rw < num_jobs) { + num_jobs = rw; + } + step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8; - opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col); + for (j = 0; j < num_jobs; j++) { + opj_dwt_encode_v_job_t* job; + + job = (opj_dwt_encode_v_job_t*) opj_malloc(sizeof(opj_dwt_encode_v_job_t)); + if (!job) { + opj_thread_pool_wait_completion(tp, 0); + opj_aligned_free(bj); + return OPJ_FALSE; + } + job->v.mem = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size); + if (!job->v.mem) { + opj_thread_pool_wait_completion(tp, 0); + opj_free(job); + opj_aligned_free(bj); + return OPJ_FALSE; + } + job->v.dn = dn; + job->v.sn = sn; + job->v.cas = cas_col; + job->rh = rh; + job->w = w; + job->tiledp = tiledp; + job->min_j = j * step_j; + job->max_j = (j + 1 == num_jobs) ? rw : (j + 1) * step_j; + job->p_encode_and_deinterleave_v = p_encode_and_deinterleave_v; + opj_thread_pool_submit_job(tp, opj_dwt_encode_v_func, job); + } + opj_thread_pool_wait_completion(tp, 0); } - sn = rw1; - dn = rw - rw1; + sn = (OPJ_INT32)rw1; + dn = (OPJ_INT32)(rw - rw1); - for (j = 0; j < rh; j++) { - aj = a + j * w; - for (k = 0; k < rw; k++) { - bj[k] = aj[k]; + /* Perform horizontal pass */ + if (num_threads <= 1 || rh <= 1) { + for (j = 0; j < rh; j++) { + OPJ_INT32* OPJ_RESTRICT aj = tiledp + j * w; + (*p_encode_and_deinterleave_h_one_row)(aj, bj, rw, + cas_row == 0 ? OPJ_TRUE : OPJ_FALSE); } - (*p_function)(bj, dn, sn, cas_row); - opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row); + } else { + OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; + OPJ_UINT32 step_j; + + if (rh < num_jobs) { + num_jobs = rh; + } + step_j = (rh / num_jobs); + + for (j = 0; j < num_jobs; j++) { + opj_dwt_encode_h_job_t* job; + + job = (opj_dwt_encode_h_job_t*) opj_malloc(sizeof(opj_dwt_encode_h_job_t)); + if (!job) { + opj_thread_pool_wait_completion(tp, 0); + opj_aligned_free(bj); + return OPJ_FALSE; + } + job->h.mem = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size); + if (!job->h.mem) { + opj_thread_pool_wait_completion(tp, 0); + opj_free(job); + opj_aligned_free(bj); + return OPJ_FALSE; + } + job->h.dn = dn; + job->h.sn = sn; + job->h.cas = cas_row; + job->rw = rw; + job->w = w; + job->tiledp = tiledp; + job->min_j = j * step_j; + job->max_j = (j + 1U) * step_j; /* this can overflow */ + if (j == (num_jobs - 1U)) { /* this will take care of the overflow */ + job->max_j = rh; + } + job->p_function = p_encode_and_deinterleave_h_one_row; + opj_thread_pool_submit_job(tp, opj_dwt_encode_h_func, job); + } + opj_thread_pool_wait_completion(tp, 0); } l_cur_res = l_last_res; @@ -1189,15 +1890,18 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, --l_last_res; } - opj_free(bj); + opj_aligned_free(bj); return OPJ_TRUE; } /* Forward 5-3 wavelet transform in 2-D. */ /* */ -OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec) +OPJ_BOOL opj_dwt_encode(opj_tcd_t *p_tcd, + opj_tcd_tilecomp_t * tilec) { - return opj_dwt_encode_procedure(tilec, opj_dwt_encode_1); + return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec, + opj_dwt_encode_and_deinterleave_v, + opj_dwt_encode_and_deinterleave_h_one_row); } /* */ @@ -1213,21 +1917,6 @@ OPJ_BOOL opj_dwt_decode(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec, } } - -/* */ -/* Get gain of 5-3 wavelet transform. */ -/* */ -OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) -{ - if (orient == 0) { - return 0; - } - if (orient == 1 || orient == 2) { - return 1; - } - return 2; -} - /* */ /* Get norm of 5-3 wavelet. */ /* */ @@ -1247,18 +1936,12 @@ OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient) /* */ /* Forward 9-7 wavelet transform in 2-D. */ /* */ -OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec) +OPJ_BOOL opj_dwt_encode_real(opj_tcd_t *p_tcd, + opj_tcd_tilecomp_t * tilec) { - return opj_dwt_encode_procedure(tilec, opj_dwt_encode_1_real); -} - -/* */ -/* Get gain of 9-7 wavelet transform. */ -/* */ -OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient) -{ - (void)orient; - return 0; + return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec, + opj_dwt_encode_and_deinterleave_v_real, + opj_dwt_encode_and_deinterleave_h_one_row_real); } /* */ @@ -1293,7 +1976,7 @@ void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec) if (tccp->qntsty == J2K_CCP_QNTSTY_NOQNT) { stepsize = 1.0; } else { - OPJ_FLOAT64 norm = opj_dwt_norms_real[orient][level]; + OPJ_FLOAT64 norm = opj_dwt_getnorm_real(level, orient); stepsize = (1 << (gain)) / norm; } opj_dwt_encode_stepsize((OPJ_INT32) floor(stepsize * 8192.0), @@ -1328,15 +2011,15 @@ typedef struct { OPJ_INT32 * OPJ_RESTRICT tiledp; OPJ_UINT32 min_j; OPJ_UINT32 max_j; -} opj_dwd_decode_h_job_t; +} opj_dwt_decode_h_job_t; static void opj_dwt_decode_h_func(void* user_data, opj_tls_t* tls) { OPJ_UINT32 j; - opj_dwd_decode_h_job_t* job; + opj_dwt_decode_h_job_t* job; (void)tls; - job = (opj_dwd_decode_h_job_t*)user_data; + job = (opj_dwt_decode_h_job_t*)user_data; for (j = job->min_j; j < job->max_j; j++) { opj_idwt53_h(&job->h, &job->tiledp[j * job->w]); } @@ -1352,15 +2035,15 @@ typedef struct { OPJ_INT32 * OPJ_RESTRICT tiledp; OPJ_UINT32 min_j; OPJ_UINT32 max_j; -} opj_dwd_decode_v_job_t; +} opj_dwt_decode_v_job_t; static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) { OPJ_UINT32 j; - opj_dwd_decode_v_job_t* job; + opj_dwt_decode_v_job_t* job; (void)tls; - job = (opj_dwd_decode_v_job_t*)user_data; + job = (opj_dwt_decode_v_job_t*)user_data; for (j = job->min_j; j + PARALLEL_COLS_53 <= job->max_j; j += PARALLEL_COLS_53) { opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w, @@ -1447,9 +2130,9 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, step_j = (rh / num_jobs); for (j = 0; j < num_jobs; j++) { - opj_dwd_decode_h_job_t* job; + opj_dwt_decode_h_job_t* job; - job = (opj_dwd_decode_h_job_t*) opj_malloc(sizeof(opj_dwd_decode_h_job_t)); + job = (opj_dwt_decode_h_job_t*) opj_malloc(sizeof(opj_dwt_decode_h_job_t)); if (!job) { /* It would be nice to fallback to single thread case, but */ /* unfortunately some jobs may be launched and have modified */ @@ -1502,9 +2185,9 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, step_j = (rw / num_jobs); for (j = 0; j < num_jobs; j++) { - opj_dwd_decode_v_job_t* job; + opj_dwt_decode_v_job_t* job; - job = (opj_dwd_decode_v_job_t*) opj_malloc(sizeof(opj_dwd_decode_v_job_t)); + job = (opj_dwt_decode_v_job_t*) opj_malloc(sizeof(opj_dwt_decode_v_job_t)); if (!job) { /* It would be nice to fallback to single thread case, but */ /* unfortunately some jobs may be launched and have modified */ @@ -2168,7 +2851,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( return OPJ_TRUE; } -static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, +static void opj_v8dwt_interleave_h(opj_v8dwt_t* OPJ_RESTRICT dwt, OPJ_FLOAT32* OPJ_RESTRICT a, OPJ_UINT32 width, OPJ_UINT32 remaining_height) @@ -2179,39 +2862,69 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, OPJ_UINT32 x1 = dwt->win_l_x1; for (k = 0; k < 2; ++k) { - if (remaining_height >= 4 && ((OPJ_SIZE_T) a & 0x0f) == 0 && - ((OPJ_SIZE_T) bi & 0x0f) == 0 && (width & 0x0f) == 0) { + if (remaining_height >= NB_ELTS_V8 && ((OPJ_SIZE_T) a & 0x0f) == 0 && + ((OPJ_SIZE_T) bi & 0x0f) == 0) { /* Fast code path */ for (i = x0; i < x1; ++i) { OPJ_UINT32 j = i; - bi[i * 8 ] = a[j]; + OPJ_FLOAT32* OPJ_RESTRICT dst = bi + i * 2 * NB_ELTS_V8; + dst[0] = a[j]; j += width; - bi[i * 8 + 1] = a[j]; + dst[1] = a[j]; j += width; - bi[i * 8 + 2] = a[j]; + dst[2] = a[j]; j += width; - bi[i * 8 + 3] = a[j]; + dst[3] = a[j]; + j += width; + dst[4] = a[j]; + j += width; + dst[5] = a[j]; + j += width; + dst[6] = a[j]; + j += width; + dst[7] = a[j]; } } else { /* Slow code path */ for (i = x0; i < x1; ++i) { OPJ_UINT32 j = i; - bi[i * 8 ] = a[j]; + OPJ_FLOAT32* OPJ_RESTRICT dst = bi + i * 2 * NB_ELTS_V8; + dst[0] = a[j]; j += width; if (remaining_height == 1) { continue; } - bi[i * 8 + 1] = a[j]; + dst[1] = a[j]; j += width; if (remaining_height == 2) { continue; } - bi[i * 8 + 2] = a[j]; + dst[2] = a[j]; j += width; if (remaining_height == 3) { continue; } - bi[i * 8 + 3] = a[j]; /* This one*/ + dst[3] = a[j]; + j += width; + if (remaining_height == 4) { + continue; + } + dst[4] = a[j]; + j += width; + if (remaining_height == 5) { + continue; + } + dst[5] = a[j]; + j += width; + if (remaining_height == 6) { + continue; + } + dst[6] = a[j]; + j += width; + if (remaining_height == 7) { + continue; + } + dst[7] = a[j]; } } @@ -2222,7 +2935,7 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, } } -static void opj_v4dwt_interleave_partial_h(opj_v4dwt_t* dwt, +static void opj_v8dwt_interleave_partial_h(opj_v8dwt_t* dwt, opj_sparse_array_int32_t* sa, OPJ_UINT32 sa_line, OPJ_UINT32 remaining_height) @@ -2235,25 +2948,25 @@ static void opj_v4dwt_interleave_partial_h(opj_v4dwt_t* dwt, dwt->win_l_x1, sa_line + i + 1, /* Nasty cast from float* to int32* */ (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0) + i, - 8, 0, OPJ_TRUE); + 2 * NB_ELTS_V8, 0, OPJ_TRUE); assert(ret); ret = opj_sparse_array_int32_read(sa, (OPJ_UINT32)dwt->sn + dwt->win_h_x0, sa_line + i, (OPJ_UINT32)dwt->sn + dwt->win_h_x1, sa_line + i + 1, /* Nasty cast from float* to int32* */ (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0) + i, - 8, 0, OPJ_TRUE); + 2 * NB_ELTS_V8, 0, OPJ_TRUE); assert(ret); OPJ_UNUSED(ret); } } -static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, - OPJ_FLOAT32* OPJ_RESTRICT a, - OPJ_UINT32 width, - OPJ_UINT32 nb_elts_read) +static INLINE void opj_v8dwt_interleave_v(opj_v8dwt_t* OPJ_RESTRICT dwt, + OPJ_FLOAT32* OPJ_RESTRICT a, + OPJ_UINT32 width, + OPJ_UINT32 nb_elts_read) { - opj_v4_t* OPJ_RESTRICT bi = dwt->wavelet + dwt->cas; + opj_v8_t* OPJ_RESTRICT bi = dwt->wavelet + dwt->cas; OPJ_UINT32 i; for (i = dwt->win_l_x0; i < dwt->win_l_x1; ++i) { @@ -2270,7 +2983,7 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, } } -static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt, +static void opj_v8dwt_interleave_partial_v(opj_v8dwt_t* OPJ_RESTRICT dwt, opj_sparse_array_int32_t* sa, OPJ_UINT32 sa_col, OPJ_UINT32 nb_elts_read) @@ -2280,44 +2993,36 @@ static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt, sa_col, dwt->win_l_x0, sa_col + nb_elts_read, dwt->win_l_x1, (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0), - 1, 8, OPJ_TRUE); + 1, 2 * NB_ELTS_V8, OPJ_TRUE); assert(ret); ret = opj_sparse_array_int32_read(sa, sa_col, (OPJ_UINT32)dwt->sn + dwt->win_h_x0, sa_col + nb_elts_read, (OPJ_UINT32)dwt->sn + dwt->win_h_x1, (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0), - 1, 8, OPJ_TRUE); + 1, 2 * NB_ELTS_V8, OPJ_TRUE); assert(ret); OPJ_UNUSED(ret); } #ifdef __SSE__ -static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, +static void opj_v8dwt_decode_step1_sse(opj_v8_t* w, OPJ_UINT32 start, OPJ_UINT32 end, const __m128 c) { __m128* OPJ_RESTRICT vw = (__m128*) w; - OPJ_UINT32 i; - /* 4x unrolled loop */ - vw += 2 * start; - for (i = start; i + 3 < end; i += 4, vw += 8) { - __m128 xmm0 = _mm_mul_ps(vw[0], c); - __m128 xmm2 = _mm_mul_ps(vw[2], c); - __m128 xmm4 = _mm_mul_ps(vw[4], c); - __m128 xmm6 = _mm_mul_ps(vw[6], c); - vw[0] = xmm0; - vw[2] = xmm2; - vw[4] = xmm4; - vw[6] = xmm6; - } - for (; i < end; ++i, vw += 2) { + OPJ_UINT32 i = start; + /* To be adapted if NB_ELTS_V8 changes */ + vw += 4 * start; + /* Note: attempt at loop unrolling x2 doesn't help */ + for (; i < end; ++i, vw += 4) { vw[0] = _mm_mul_ps(vw[0], c); + vw[1] = _mm_mul_ps(vw[1], c); } } -static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, +static void opj_v8dwt_decode_step2_sse(opj_v8_t* l, opj_v8_t* w, OPJ_UINT32 start, OPJ_UINT32 end, OPJ_UINT32 m, @@ -2325,74 +3030,58 @@ static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, { __m128* OPJ_RESTRICT vl = (__m128*) l; __m128* OPJ_RESTRICT vw = (__m128*) w; + /* To be adapted if NB_ELTS_V8 changes */ OPJ_UINT32 i; OPJ_UINT32 imax = opj_uint_min(end, m); - __m128 tmp1, tmp2, tmp3; if (start == 0) { - tmp1 = vl[0]; + if (imax >= 1) { + vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vl[0], vw[0]), c)); + vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vl[1], vw[1]), c)); + vw += 4; + start = 1; + } } else { - vw += start * 2; - tmp1 = vw[-3]; + vw += start * 4; } i = start; - - /* 4x loop unrolling */ - for (; i + 3 < imax; i += 4) { - __m128 tmp4, tmp5, tmp6, tmp7, tmp8, tmp9; - tmp2 = vw[-1]; - tmp3 = vw[ 0]; - tmp4 = vw[ 1]; - tmp5 = vw[ 2]; - tmp6 = vw[ 3]; - tmp7 = vw[ 4]; - tmp8 = vw[ 5]; - tmp9 = vw[ 6]; - vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c)); - vw[ 1] = _mm_add_ps(tmp4, _mm_mul_ps(_mm_add_ps(tmp3, tmp5), c)); - vw[ 3] = _mm_add_ps(tmp6, _mm_mul_ps(_mm_add_ps(tmp5, tmp7), c)); - vw[ 5] = _mm_add_ps(tmp8, _mm_mul_ps(_mm_add_ps(tmp7, tmp9), c)); - tmp1 = tmp9; - vw += 8; - } - + /* Note: attempt at loop unrolling x2 doesn't help */ for (; i < imax; ++i) { - tmp2 = vw[-1]; - tmp3 = vw[ 0]; - vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c)); - tmp1 = tmp3; - vw += 2; + vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vw[-4], vw[0]), c)); + vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vw[-3], vw[1]), c)); + vw += 4; } if (m < end) { assert(m + 1 == end); c = _mm_add_ps(c, c); - c = _mm_mul_ps(c, vw[-2]); - vw[-1] = _mm_add_ps(vw[-1], c); + vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(c, vw[-4])); + vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(c, vw[-3])); } } #else -static void opj_v4dwt_decode_step1(opj_v4_t* w, +static void opj_v8dwt_decode_step1(opj_v8_t* w, OPJ_UINT32 start, OPJ_UINT32 end, const OPJ_FLOAT32 c) { OPJ_FLOAT32* OPJ_RESTRICT fw = (OPJ_FLOAT32*) w; OPJ_UINT32 i; + /* To be adapted if NB_ELTS_V8 changes */ for (i = start; i < end; ++i) { - OPJ_FLOAT32 tmp1 = fw[i * 8 ]; - OPJ_FLOAT32 tmp2 = fw[i * 8 + 1]; - OPJ_FLOAT32 tmp3 = fw[i * 8 + 2]; - OPJ_FLOAT32 tmp4 = fw[i * 8 + 3]; - fw[i * 8 ] = tmp1 * c; - fw[i * 8 + 1] = tmp2 * c; - fw[i * 8 + 2] = tmp3 * c; - fw[i * 8 + 3] = tmp4 * c; + fw[i * 2 * 8 ] = fw[i * 2 * 8 ] * c; + fw[i * 2 * 8 + 1] = fw[i * 2 * 8 + 1] * c; + fw[i * 2 * 8 + 2] = fw[i * 2 * 8 + 2] * c; + fw[i * 2 * 8 + 3] = fw[i * 2 * 8 + 3] * c; + fw[i * 2 * 8 + 4] = fw[i * 2 * 8 + 4] * c; + fw[i * 2 * 8 + 5] = fw[i * 2 * 8 + 5] * c; + fw[i * 2 * 8 + 6] = fw[i * 2 * 8 + 6] * c; + fw[i * 2 * 8 + 7] = fw[i * 2 * 8 + 7] * c; } } -static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, +static void opj_v8dwt_decode_step2(opj_v8_t* l, opj_v8_t* w, OPJ_UINT32 start, OPJ_UINT32 end, OPJ_UINT32 m, @@ -2403,36 +3092,33 @@ static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_UINT32 i; OPJ_UINT32 imax = opj_uint_min(end, m); if (start > 0) { - fw += 8 * start; - fl = fw - 8; + fw += 2 * NB_ELTS_V8 * start; + fl = fw - 2 * NB_ELTS_V8; } + /* To be adapted if NB_ELTS_V8 changes */ for (i = start; i < imax; ++i) { - OPJ_FLOAT32 tmp1_1 = fl[0]; - OPJ_FLOAT32 tmp1_2 = fl[1]; - OPJ_FLOAT32 tmp1_3 = fl[2]; - OPJ_FLOAT32 tmp1_4 = fl[3]; - OPJ_FLOAT32 tmp2_1 = fw[-4]; - OPJ_FLOAT32 tmp2_2 = fw[-3]; - OPJ_FLOAT32 tmp2_3 = fw[-2]; - OPJ_FLOAT32 tmp2_4 = fw[-1]; - OPJ_FLOAT32 tmp3_1 = fw[0]; - OPJ_FLOAT32 tmp3_2 = fw[1]; - OPJ_FLOAT32 tmp3_3 = fw[2]; - OPJ_FLOAT32 tmp3_4 = fw[3]; - fw[-4] = tmp2_1 + ((tmp1_1 + tmp3_1) * c); - fw[-3] = tmp2_2 + ((tmp1_2 + tmp3_2) * c); - fw[-2] = tmp2_3 + ((tmp1_3 + tmp3_3) * c); - fw[-1] = tmp2_4 + ((tmp1_4 + tmp3_4) * c); + fw[-8] = fw[-8] + ((fl[0] + fw[0]) * c); + fw[-7] = fw[-7] + ((fl[1] + fw[1]) * c); + fw[-6] = fw[-6] + ((fl[2] + fw[2]) * c); + fw[-5] = fw[-5] + ((fl[3] + fw[3]) * c); + fw[-4] = fw[-4] + ((fl[4] + fw[4]) * c); + fw[-3] = fw[-3] + ((fl[5] + fw[5]) * c); + fw[-2] = fw[-2] + ((fl[6] + fw[6]) * c); + fw[-1] = fw[-1] + ((fl[7] + fw[7]) * c); fl = fw; - fw += 8; + fw += 2 * NB_ELTS_V8; } if (m < end) { assert(m + 1 == end); c += c; - fw[-4] = fw[-4] + fl[0] * c; - fw[-3] = fw[-3] + fl[1] * c; - fw[-2] = fw[-2] + fl[2] * c; - fw[-1] = fw[-1] + fl[3] * c; + fw[-8] = fw[-8] + fl[0] * c; + fw[-7] = fw[-7] + fl[1] * c; + fw[-6] = fw[-6] + fl[2] * c; + fw[-5] = fw[-5] + fl[3] * c; + fw[-4] = fw[-4] + fl[4] * c; + fw[-3] = fw[-3] + fl[5] * c; + fw[-2] = fw[-2] + fl[6] * c; + fw[-1] = fw[-1] + fl[7] * c; } } @@ -2441,9 +3127,17 @@ static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, /* */ /* Inverse 9-7 wavelet transform in 1-D. */ /* */ -static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt) +static void opj_v8dwt_decode(opj_v8dwt_t* OPJ_RESTRICT dwt) { OPJ_INT32 a, b; + /* BUG_WEIRD_TWO_INVK (look for this identifier in tcd.c) */ + /* Historic value for 2 / opj_invK */ + /* Normally, we should use invK, but if we do so, we have failures in the */ + /* conformance test, due to MSE and peak errors significantly higher than */ + /* accepted value */ + /* Due to using two_invK instead of invK, we have to compensate in tcd.c */ + /* the computation of the stepsize for the non LL subbands */ + const float two_invK = 1.625732422f; if (dwt->cas == 0) { if (!((dwt->dn > 0) || (dwt->sn > 1))) { return; @@ -2458,60 +3152,147 @@ static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt) b = 0; } #ifdef __SSE__ - opj_v4dwt_decode_step1_sse(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1, + opj_v8dwt_decode_step1_sse(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1, _mm_set1_ps(opj_K)); - opj_v4dwt_decode_step1_sse(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1, - _mm_set1_ps(opj_c13318)); - opj_v4dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1, + opj_v8dwt_decode_step1_sse(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1, + _mm_set1_ps(two_invK)); + opj_v8dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1, dwt->win_l_x0, dwt->win_l_x1, (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a), - _mm_set1_ps(opj_dwt_delta)); - opj_v4dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1, + _mm_set1_ps(-opj_dwt_delta)); + opj_v8dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1, dwt->win_h_x0, dwt->win_h_x1, (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b), - _mm_set1_ps(opj_dwt_gamma)); - opj_v4dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1, + _mm_set1_ps(-opj_dwt_gamma)); + opj_v8dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1, dwt->win_l_x0, dwt->win_l_x1, (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a), - _mm_set1_ps(opj_dwt_beta)); - opj_v4dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1, + _mm_set1_ps(-opj_dwt_beta)); + opj_v8dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1, dwt->win_h_x0, dwt->win_h_x1, (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b), - _mm_set1_ps(opj_dwt_alpha)); + _mm_set1_ps(-opj_dwt_alpha)); #else - opj_v4dwt_decode_step1(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1, + opj_v8dwt_decode_step1(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1, opj_K); - opj_v4dwt_decode_step1(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1, - opj_c13318); - opj_v4dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1, + opj_v8dwt_decode_step1(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1, + two_invK); + opj_v8dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1, dwt->win_l_x0, dwt->win_l_x1, (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a), - opj_dwt_delta); - opj_v4dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1, + -opj_dwt_delta); + opj_v8dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1, dwt->win_h_x0, dwt->win_h_x1, (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b), - opj_dwt_gamma); - opj_v4dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1, + -opj_dwt_gamma); + opj_v8dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1, dwt->win_l_x0, dwt->win_l_x1, (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a), - opj_dwt_beta); - opj_v4dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1, + -opj_dwt_beta); + opj_v8dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1, dwt->win_h_x0, dwt->win_h_x1, (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b), - opj_dwt_alpha); + -opj_dwt_alpha); #endif } +typedef struct { + opj_v8dwt_t h; + OPJ_UINT32 rw; + OPJ_UINT32 w; + OPJ_FLOAT32 * OPJ_RESTRICT aj; + OPJ_UINT32 nb_rows; +} opj_dwt97_decode_h_job_t; + +static void opj_dwt97_decode_h_func(void* user_data, opj_tls_t* tls) +{ + OPJ_UINT32 j; + opj_dwt97_decode_h_job_t* job; + OPJ_FLOAT32 * OPJ_RESTRICT aj; + OPJ_UINT32 w; + (void)tls; + + job = (opj_dwt97_decode_h_job_t*)user_data; + w = job->w; + + assert((job->nb_rows % NB_ELTS_V8) == 0); + + aj = job->aj; + for (j = 0; j + NB_ELTS_V8 <= job->nb_rows; j += NB_ELTS_V8) { + OPJ_UINT32 k; + opj_v8dwt_interleave_h(&job->h, aj, job->w, NB_ELTS_V8); + opj_v8dwt_decode(&job->h); + + /* To be adapted if NB_ELTS_V8 changes */ + for (k = 0; k < job->rw; k++) { + aj[k ] = job->h.wavelet[k].f[0]; + aj[k + (OPJ_SIZE_T)w ] = job->h.wavelet[k].f[1]; + aj[k + (OPJ_SIZE_T)w * 2] = job->h.wavelet[k].f[2]; + aj[k + (OPJ_SIZE_T)w * 3] = job->h.wavelet[k].f[3]; + } + for (k = 0; k < job->rw; k++) { + aj[k + (OPJ_SIZE_T)w * 4] = job->h.wavelet[k].f[4]; + aj[k + (OPJ_SIZE_T)w * 5] = job->h.wavelet[k].f[5]; + aj[k + (OPJ_SIZE_T)w * 6] = job->h.wavelet[k].f[6]; + aj[k + (OPJ_SIZE_T)w * 7] = job->h.wavelet[k].f[7]; + } + + aj += w * NB_ELTS_V8; + } + + opj_aligned_free(job->h.wavelet); + opj_free(job); +} + + +typedef struct { + opj_v8dwt_t v; + OPJ_UINT32 rh; + OPJ_UINT32 w; + OPJ_FLOAT32 * OPJ_RESTRICT aj; + OPJ_UINT32 nb_columns; +} opj_dwt97_decode_v_job_t; + +static void opj_dwt97_decode_v_func(void* user_data, opj_tls_t* tls) +{ + OPJ_UINT32 j; + opj_dwt97_decode_v_job_t* job; + OPJ_FLOAT32 * OPJ_RESTRICT aj; + (void)tls; + + job = (opj_dwt97_decode_v_job_t*)user_data; + + assert((job->nb_columns % NB_ELTS_V8) == 0); + + aj = job->aj; + for (j = 0; j + NB_ELTS_V8 <= job->nb_columns; j += NB_ELTS_V8) { + OPJ_UINT32 k; + + opj_v8dwt_interleave_v(&job->v, aj, job->w, NB_ELTS_V8); + opj_v8dwt_decode(&job->v); + + for (k = 0; k < job->rh; ++k) { + memcpy(&aj[k * (OPJ_SIZE_T)job->w], &job->v.wavelet[k], + NB_ELTS_V8 * sizeof(OPJ_FLOAT32)); + } + aj += NB_ELTS_V8; + } + + opj_aligned_free(job->v.wavelet); + opj_free(job); +} + /* */ /* Inverse 9-7 wavelet transform in 2-D. */ /* */ static -OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, +OPJ_BOOL opj_dwt_decode_tile_97(opj_thread_pool_t* tp, + opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 numres) { - opj_v4dwt_t h; - opj_v4dwt_t v; + opj_v8dwt_t h; + opj_v8dwt_t v; opj_tcd_resolution_t* res = tilec->resolutions; @@ -2525,20 +3306,19 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, tilec->resolutions[tilec->minimum_num_resolutions - 1].x0); OPJ_SIZE_T l_data_size; + const int num_threads = opj_thread_pool_get_thread_count(tp); + + if (numres == 1) { + return OPJ_TRUE; + } l_data_size = opj_dwt_max_resolution(res, numres); /* overflow check */ - if (l_data_size > (SIZE_MAX - 5U)) { + if (l_data_size > (SIZE_MAX / sizeof(opj_v8_t))) { /* FIXME event manager error callback */ return OPJ_FALSE; } - l_data_size += 5U; - /* overflow check */ - if (l_data_size > (SIZE_MAX / sizeof(opj_v4_t))) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } - h.wavelet = (opj_v4_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v4_t)); + h.wavelet = (opj_v8_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v8_t)); if (!h.wavelet) { /* FIXME event manager error callback */ return OPJ_FALSE; @@ -2566,35 +3346,80 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, h.win_l_x1 = (OPJ_UINT32)h.sn; h.win_h_x0 = 0; h.win_h_x1 = (OPJ_UINT32)h.dn; - for (j = 0; j + 3 < rh; j += 4) { - OPJ_UINT32 k; - opj_v4dwt_interleave_h(&h, aj, w, rh - j); - opj_v4dwt_decode(&h); - for (k = 0; k < rw; k++) { - aj[k ] = h.wavelet[k].f[0]; - aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; - aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; - aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3]; + if (num_threads <= 1 || rh < 2 * NB_ELTS_V8) { + for (j = 0; j + (NB_ELTS_V8 - 1) < rh; j += NB_ELTS_V8) { + OPJ_UINT32 k; + opj_v8dwt_interleave_h(&h, aj, w, NB_ELTS_V8); + opj_v8dwt_decode(&h); + + /* To be adapted if NB_ELTS_V8 changes */ + for (k = 0; k < rw; k++) { + aj[k ] = h.wavelet[k].f[0]; + aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; + aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; + aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3]; + } + for (k = 0; k < rw; k++) { + aj[k + (OPJ_SIZE_T)w * 4] = h.wavelet[k].f[4]; + aj[k + (OPJ_SIZE_T)w * 5] = h.wavelet[k].f[5]; + aj[k + (OPJ_SIZE_T)w * 6] = h.wavelet[k].f[6]; + aj[k + (OPJ_SIZE_T)w * 7] = h.wavelet[k].f[7]; + } + + aj += w * NB_ELTS_V8; } + } else { + OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; + OPJ_UINT32 step_j; - aj += w * 4; + if ((rh / NB_ELTS_V8) < num_jobs) { + num_jobs = rh / NB_ELTS_V8; + } + step_j = ((rh / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8; + for (j = 0; j < num_jobs; j++) { + opj_dwt97_decode_h_job_t* job; + + job = (opj_dwt97_decode_h_job_t*) opj_malloc(sizeof(opj_dwt97_decode_h_job_t)); + if (!job) { + opj_thread_pool_wait_completion(tp, 0); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; + } + job->h.wavelet = (opj_v8_t*)opj_aligned_malloc(l_data_size * sizeof(opj_v8_t)); + if (!job->h.wavelet) { + opj_thread_pool_wait_completion(tp, 0); + opj_free(job); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; + } + job->h.dn = h.dn; + job->h.sn = h.sn; + job->h.cas = h.cas; + job->h.win_l_x0 = h.win_l_x0; + job->h.win_l_x1 = h.win_l_x1; + job->h.win_h_x0 = h.win_h_x0; + job->h.win_h_x1 = h.win_h_x1; + job->rw = rw; + job->w = w; + job->aj = aj; + job->nb_rows = (j + 1 == num_jobs) ? (rh & (OPJ_UINT32)~ + (NB_ELTS_V8 - 1)) - j * step_j : step_j; + aj += w * job->nb_rows; + opj_thread_pool_submit_job(tp, opj_dwt97_decode_h_func, job); + } + opj_thread_pool_wait_completion(tp, 0); + j = rh & (OPJ_UINT32)~(NB_ELTS_V8 - 1); } if (j < rh) { OPJ_UINT32 k; - opj_v4dwt_interleave_h(&h, aj, w, rh - j); - opj_v4dwt_decode(&h); + opj_v8dwt_interleave_h(&h, aj, w, rh - j); + opj_v8dwt_decode(&h); for (k = 0; k < rw; k++) { - switch (rh - j) { - case 3: - aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; - /* FALLTHRU */ - case 2: - aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; - /* FALLTHRU */ - case 1: - aj[k] = h.wavelet[k].f[0]; + OPJ_UINT32 l; + for (l = 0; l < rh - j; l++) { + aj[k + (OPJ_SIZE_T)w * l ] = h.wavelet[k].f[l]; } } } @@ -2607,25 +3432,71 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, v.win_h_x1 = (OPJ_UINT32)v.dn; aj = (OPJ_FLOAT32*) tilec->data; - for (j = rw; j > 3; j -= 4) { - OPJ_UINT32 k; + if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) { + for (j = rw; j > (NB_ELTS_V8 - 1); j -= NB_ELTS_V8) { + OPJ_UINT32 k; - opj_v4dwt_interleave_v(&v, aj, w, 4); - opj_v4dwt_decode(&v); + opj_v8dwt_interleave_v(&v, aj, w, NB_ELTS_V8); + opj_v8dwt_decode(&v); - for (k = 0; k < rh; ++k) { - memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); + for (k = 0; k < rh; ++k) { + memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], NB_ELTS_V8 * sizeof(OPJ_FLOAT32)); + } + aj += NB_ELTS_V8; } - aj += 4; + } else { + /* "bench_dwt -I" shows that scaling is poor, likely due to RAM + transfer being the limiting factor. So limit the number of + threads. + */ + OPJ_UINT32 num_jobs = opj_uint_max((OPJ_UINT32)num_threads / 2, 2U); + OPJ_UINT32 step_j; + + if ((rw / NB_ELTS_V8) < num_jobs) { + num_jobs = rw / NB_ELTS_V8; + } + step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8; + for (j = 0; j < num_jobs; j++) { + opj_dwt97_decode_v_job_t* job; + + job = (opj_dwt97_decode_v_job_t*) opj_malloc(sizeof(opj_dwt97_decode_v_job_t)); + if (!job) { + opj_thread_pool_wait_completion(tp, 0); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; + } + job->v.wavelet = (opj_v8_t*)opj_aligned_malloc(l_data_size * sizeof(opj_v8_t)); + if (!job->v.wavelet) { + opj_thread_pool_wait_completion(tp, 0); + opj_free(job); + opj_aligned_free(h.wavelet); + return OPJ_FALSE; + } + job->v.dn = v.dn; + job->v.sn = v.sn; + job->v.cas = v.cas; + job->v.win_l_x0 = v.win_l_x0; + job->v.win_l_x1 = v.win_l_x1; + job->v.win_h_x0 = v.win_h_x0; + job->v.win_h_x1 = v.win_h_x1; + job->rh = rh; + job->w = w; + job->aj = aj; + job->nb_columns = (j + 1 == num_jobs) ? (rw & (OPJ_UINT32)~ + (NB_ELTS_V8 - 1)) - j * step_j : step_j; + aj += job->nb_columns; + opj_thread_pool_submit_job(tp, opj_dwt97_decode_v_func, job); + } + opj_thread_pool_wait_completion(tp, 0); } - if (rw & 0x03) { + if (rw & (NB_ELTS_V8 - 1)) { OPJ_UINT32 k; - j = rw & 0x03; + j = rw & (NB_ELTS_V8 - 1); - opj_v4dwt_interleave_v(&v, aj, w, j); - opj_v4dwt_decode(&v); + opj_v8dwt_interleave_v(&v, aj, w, j); + opj_v8dwt_decode(&v); for (k = 0; k < rh; ++k) { memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], @@ -2643,8 +3514,8 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 numres) { opj_sparse_array_int32_t* sa; - opj_v4dwt_t h; - opj_v4dwt_t v; + opj_v8dwt_t h; + opj_v8dwt_t v; OPJ_UINT32 resno; /* This value matches the maximum left/right extension given in tables */ /* F.2 and F.3 of the standard. Note: in opj_tcd_is_subband_area_of_interest() */ @@ -2694,19 +3565,12 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, l_data_size = opj_dwt_max_resolution(tr, numres); /* overflow check */ - if (l_data_size > (SIZE_MAX - 5U)) { + if (l_data_size > (SIZE_MAX / sizeof(opj_v8_t))) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); return OPJ_FALSE; } - l_data_size += 5U; - /* overflow check */ - if (l_data_size > (SIZE_MAX / sizeof(opj_v4_t))) { - /* FIXME event manager error callback */ - opj_sparse_array_int32_free(sa); - return OPJ_FALSE; - } - h.wavelet = (opj_v4_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v4_t)); + h.wavelet = (opj_v8_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v8_t)); if (!h.wavelet) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); @@ -2801,17 +3665,17 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, h.win_l_x1 = win_ll_x1; h.win_h_x0 = win_hl_x0; h.win_h_x1 = win_hl_x1; - for (j = 0; j + 3 < rh; j += 4) { - if ((j + 3 >= win_ll_y0 && j < win_ll_y1) || - (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn && + for (j = 0; j + (NB_ELTS_V8 - 1) < rh; j += NB_ELTS_V8) { + if ((j + (NB_ELTS_V8 - 1) >= win_ll_y0 && j < win_ll_y1) || + (j + (NB_ELTS_V8 - 1) >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn)) { - opj_v4dwt_interleave_partial_h(&h, sa, j, opj_uint_min(4U, rh - j)); - opj_v4dwt_decode(&h); + opj_v8dwt_interleave_partial_h(&h, sa, j, opj_uint_min(NB_ELTS_V8, rh - j)); + opj_v8dwt_decode(&h); if (!opj_sparse_array_int32_write(sa, win_tr_x0, j, - win_tr_x1, j + 4, + win_tr_x1, j + NB_ELTS_V8, (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0], - 4, 1, OPJ_TRUE)) { + NB_ELTS_V8, 1, OPJ_TRUE)) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); opj_aligned_free(h.wavelet); @@ -2821,16 +3685,16 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, } if (j < rh && - ((j + 3 >= win_ll_y0 && j < win_ll_y1) || - (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn && + ((j + (NB_ELTS_V8 - 1) >= win_ll_y0 && j < win_ll_y1) || + (j + (NB_ELTS_V8 - 1) >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn))) { - opj_v4dwt_interleave_partial_h(&h, sa, j, rh - j); - opj_v4dwt_decode(&h); + opj_v8dwt_interleave_partial_h(&h, sa, j, rh - j); + opj_v8dwt_decode(&h); if (!opj_sparse_array_int32_write(sa, win_tr_x0, j, win_tr_x1, rh, (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0], - 4, 1, OPJ_TRUE)) { + NB_ELTS_V8, 1, OPJ_TRUE)) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); opj_aligned_free(h.wavelet); @@ -2842,17 +3706,17 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, v.win_l_x1 = win_ll_y1; v.win_h_x0 = win_lh_y0; v.win_h_x1 = win_lh_y1; - for (j = win_tr_x0; j < win_tr_x1; j += 4) { - OPJ_UINT32 nb_elts = opj_uint_min(4U, win_tr_x1 - j); + for (j = win_tr_x0; j < win_tr_x1; j += NB_ELTS_V8) { + OPJ_UINT32 nb_elts = opj_uint_min(NB_ELTS_V8, win_tr_x1 - j); - opj_v4dwt_interleave_partial_v(&v, sa, j, nb_elts); - opj_v4dwt_decode(&v); + opj_v8dwt_interleave_partial_v(&v, sa, j, nb_elts); + opj_v8dwt_decode(&v); if (!opj_sparse_array_int32_write(sa, j, win_tr_y0, j + nb_elts, win_tr_y1, (OPJ_INT32*)&h.wavelet[win_tr_y0].f[0], - 1, 4, OPJ_TRUE)) { + 1, NB_ELTS_V8, OPJ_TRUE)) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); opj_aligned_free(h.wavelet); @@ -2885,7 +3749,7 @@ OPJ_BOOL opj_dwt_decode_real(opj_tcd_t *p_tcd, OPJ_UINT32 numres) { if (p_tcd->whole_tile_decoding) { - return opj_dwt_decode_tile_97(tilec, numres); + return opj_dwt_decode_tile_97(p_tcd->thread_pool, tilec, numres); } else { return opj_dwt_decode_partial_97(tilec, numres); } diff --git a/3rdparty/openjpeg/openjp2/dwt.h b/3rdparty/openjpeg/openjp2/dwt.h index 4f63e524a6..215061e6b9 100644 --- a/3rdparty/openjpeg/openjp2/dwt.h +++ b/3rdparty/openjpeg/openjp2/dwt.h @@ -56,9 +56,11 @@ DWT.C are used by some function in TCD.C. /** Forward 5-3 wavelet transform in 2-D. Apply a reversible DWT transform to a component of an image. +@param p_tcd TCD handle @param tilec Tile component information (current tile) */ -OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec); +OPJ_BOOL opj_dwt_encode(opj_tcd_t *p_tcd, + opj_tcd_tilecomp_t * tilec); /** Inverse 5-3 wavelet transform in 2-D. @@ -71,12 +73,6 @@ OPJ_BOOL opj_dwt_decode(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres); -/** -Get the gain of a subband for the reversible 5-3 DWT. -@param orient Number that identifies the subband (0->LL, 1->HL, 2->LH, 3->HH) -@return Returns 0 if orient = 0, returns 1 if orient = 1 or 2, returns 2 otherwise -*/ -OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) ; /** Get the norm of a wavelet function of a subband at a specified level for the reversible 5-3 DWT. @param level Level of the wavelet function @@ -87,9 +83,11 @@ OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient); /** Forward 9-7 wavelet transform in 2-D. Apply an irreversible DWT transform to a component of an image. +@param p_tcd TCD handle @param tilec Tile component information (current tile) */ -OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec); +OPJ_BOOL opj_dwt_encode_real(opj_tcd_t *p_tcd, + opj_tcd_tilecomp_t * tilec); /** Inverse 9-7 wavelet transform in 2-D. Apply an irreversible inverse DWT transform to a component of an image. @@ -101,12 +99,6 @@ OPJ_BOOL opj_dwt_decode_real(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 numres); -/** -Get the gain of a subband for the irreversible 9-7 DWT. -@param orient Number that identifies the subband (0->LL, 1->HL, 2->LH, 3->HH) -@return Returns the gain of the 9-7 wavelet transform -*/ -OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient); /** Get the norm of a wavelet function of a subband at a specified level for the irreversible 9-7 DWT @param level Level of the wavelet function diff --git a/3rdparty/openjpeg/openjp2/j2k.c b/3rdparty/openjpeg/openjp2/j2k.c index 4169cd672b..8e343ab2e3 100644 --- a/3rdparty/openjpeg/openjp2/j2k.c +++ b/3rdparty/openjpeg/openjp2/j2k.c @@ -400,14 +400,14 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k, static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, OPJ_BYTE * p_data, OPJ_UINT32 * p_data_written, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, opj_stream_private_t *p_stream, struct opj_event_mgr * p_manager); static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, OPJ_BYTE * p_data, OPJ_UINT32 * p_data_written, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, opj_stream_private_t *p_stream, struct opj_event_mgr * p_manager); @@ -832,14 +832,14 @@ static OPJ_BOOL opj_j2k_write_tlm(opj_j2k_t *p_j2k, * * @param p_j2k J2K codec. * @param p_data Output buffer - * @param p_total_data_size Output buffer size + * @param total_data_size Output buffer size * @param p_data_written Number of bytes written into stream * @param p_stream the stream to write data to. * @param p_manager the user event manager. */ static OPJ_BOOL opj_j2k_write_sot(opj_j2k_t *p_j2k, OPJ_BYTE * p_data, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, OPJ_UINT32 * p_data_written, const opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager); @@ -879,11 +879,13 @@ static OPJ_BOOL opj_j2k_read_sot(opj_j2k_t *p_j2k, /** * Writes the SOD marker (Start of data) * + * This also writes optional PLT markers (before SOD) + * * @param p_j2k J2K codec. * @param p_tile_coder FIXME DOC * @param p_data FIXME DOC * @param p_data_written FIXME DOC - * @param p_total_data_size FIXME DOC + * @param total_data_size FIXME DOC * @param p_stream the stream to write data to. * @param p_manager the user event manager. */ @@ -891,7 +893,7 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, opj_tcd_t * p_tile_coder, OPJ_BYTE * p_data, OPJ_UINT32 * p_data_written, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, const opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager); @@ -1219,6 +1221,7 @@ static OPJ_BOOL opj_j2k_write_epc(opj_j2k_t *p_j2k, * A nice message is outputted at errors. * * @param p_pocs the progression order changes. + * @param tileno the tile number of interest * @param p_nb_pocs the number of progression order changes. * @param p_nb_resolutions the number of resolutions. * @param numcomps the number of components @@ -1228,6 +1231,7 @@ static OPJ_BOOL opj_j2k_write_epc(opj_j2k_t *p_j2k, * @return true if the pocs are valid. */ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, + OPJ_UINT32 tileno, OPJ_UINT32 p_nb_pocs, OPJ_UINT32 p_nb_resolutions, OPJ_UINT32 numcomps, @@ -1282,6 +1286,13 @@ static void opj_j2k_set_cinema_parameters(opj_cparameters_t *parameters, static OPJ_BOOL opj_j2k_is_cinema_compliant(opj_image_t *image, OPJ_UINT16 rsiz, opj_event_mgr_t *p_manager); +static void opj_j2k_set_imf_parameters(opj_cparameters_t *parameters, + opj_image_t *image, opj_event_mgr_t *p_manager); + +static OPJ_BOOL opj_j2k_is_imf_compliant(opj_cparameters_t *parameters, + opj_image_t *image, + opj_event_mgr_t *p_manager); + /** * Checks for invalid number of tile-parts in SOT marker (TPsot==TNsot). See issue 254. * @@ -1615,6 +1626,7 @@ const char *opj_j2k_convert_progression_order(OPJ_PROG_ORDER prg_order) } static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, + OPJ_UINT32 tileno, OPJ_UINT32 p_nb_pocs, OPJ_UINT32 p_nb_resolutions, OPJ_UINT32 p_num_comps, @@ -1628,7 +1640,8 @@ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, OPJ_UINT32 step_r = p_num_comps * step_c; OPJ_UINT32 step_l = p_nb_resolutions * step_r; OPJ_BOOL loss = OPJ_FALSE; - OPJ_UINT32 layno0 = 0; + + assert(p_nb_pocs > 0); packet_array = (OPJ_UINT32*) opj_calloc(step_l * p_num_layers, sizeof(OPJ_UINT32)); @@ -1638,63 +1651,37 @@ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, return OPJ_FALSE; } - if (p_nb_pocs == 0) { - opj_free(packet_array); - return OPJ_TRUE; - } + /* iterate through all the pocs that match our tile of interest. */ + for (i = 0; i < p_nb_pocs; ++i) { + const opj_poc_t *poc = &p_pocs[i]; + if (tileno + 1 == poc->tile) { + index = step_r * poc->resno0; - index = step_r * p_pocs->resno0; - /* take each resolution for each poc */ - for (resno = p_pocs->resno0 ; resno < p_pocs->resno1 ; ++resno) { - OPJ_UINT32 res_index = index + p_pocs->compno0 * step_c; + /* take each resolution for each poc */ + for (resno = poc->resno0 ; + resno < opj_uint_min(poc->resno1, p_nb_resolutions); ++resno) { + OPJ_UINT32 res_index = index + poc->compno0 * step_c; - /* take each comp of each resolution for each poc */ - for (compno = p_pocs->compno0 ; compno < p_pocs->compno1 ; ++compno) { - OPJ_UINT32 comp_index = res_index + layno0 * step_l; + /* take each comp of each resolution for each poc */ + for (compno = poc->compno0 ; + compno < opj_uint_min(poc->compno1, p_num_comps); ++compno) { + /* The layer index always starts at zero for every progression. */ + const OPJ_UINT32 layno0 = 0; + OPJ_UINT32 comp_index = res_index + layno0 * step_l; - /* and finally take each layer of each res of ... */ - for (layno = layno0; layno < p_pocs->layno1 ; ++layno) { - /*index = step_r * resno + step_c * compno + step_l * layno;*/ - packet_array[comp_index] = 1; - comp_index += step_l; - } + /* and finally take each layer of each res of ... */ + for (layno = layno0; layno < opj_uint_min(poc->layno1, p_num_layers); + ++layno) { + packet_array[comp_index] = 1; + comp_index += step_l; + } - res_index += step_c; - } - - index += step_r; - } - ++p_pocs; - - /* iterate through all the pocs */ - for (i = 1; i < p_nb_pocs ; ++i) { - OPJ_UINT32 l_last_layno1 = (p_pocs - 1)->layno1 ; - - layno0 = (p_pocs->layno1 > l_last_layno1) ? l_last_layno1 : 0; - index = step_r * p_pocs->resno0; - - /* take each resolution for each poc */ - for (resno = p_pocs->resno0 ; resno < p_pocs->resno1 ; ++resno) { - OPJ_UINT32 res_index = index + p_pocs->compno0 * step_c; - - /* take each comp of each resolution for each poc */ - for (compno = p_pocs->compno0 ; compno < p_pocs->compno1 ; ++compno) { - OPJ_UINT32 comp_index = res_index + layno0 * step_l; - - /* and finally take each layer of each res of ... */ - for (layno = layno0; layno < p_pocs->layno1 ; ++layno) { - /*index = step_r * resno + step_c * compno + step_l * layno;*/ - packet_array[comp_index] = 1; - comp_index += step_l; + res_index += step_c; } - res_index += step_c; + index += step_r; } - - index += step_r; } - - ++p_pocs; } index = 0; @@ -1702,7 +1689,13 @@ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, for (resno = 0; resno < p_nb_resolutions; ++resno) { for (compno = 0; compno < p_num_comps; ++compno) { loss |= (packet_array[index] != 1); - /*index = step_r * resno + step_c * compno + step_l * layno;*/ +#ifdef DEBUG_VERBOSE + if (packet_array[index] != 1) { + fprintf(stderr, + "Missing packet in POC: layno=%d resno=%d compno=%d\n", + layno, resno, compno); + } +#endif index += step_c; } } @@ -2714,6 +2707,12 @@ static OPJ_BOOL opj_j2k_read_cod(opj_j2k_t *p_j2k, opj_read_bytes(p_header_data, &l_tcp->mct, 1); /* SGcod (C) */ ++p_header_data; + if (l_tcp->mct > 1) { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid multiple component transformation\n"); + return OPJ_FALSE; + } + p_header_size -= 5; for (i = 0; i < l_image->numcomps; ++i) { l_tcp->tccps[i].csty = l_tcp->csty & J2K_CCP_CSTY_PRT; @@ -3452,6 +3451,28 @@ static OPJ_UINT32 opj_j2k_get_specific_header_sizes(opj_j2k_t *p_j2k) l_nb_bytes += opj_j2k_get_max_poc_size(p_j2k); + if (p_j2k->m_specific_param.m_encoder.m_PLT) { + /* Reserve space for PLT markers */ + + OPJ_UINT32 i; + const opj_cp_t * l_cp = &(p_j2k->m_cp); + OPJ_UINT32 l_max_packet_count = 0; + for (i = 0; i < l_cp->th * l_cp->tw; ++i) { + l_max_packet_count = opj_uint_max(l_max_packet_count, + opj_get_encoding_packet_count(p_j2k->m_private_image, l_cp, i)); + } + /* Minimum 6 bytes per PLT marker, and at a minimum (taking a pessimistic */ + /* estimate of 4 bytes for a packet size), one can write */ + /* (65536-6) / 4 = 16382 paquet sizes per PLT marker */ + p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT = + 6 * opj_uint_ceildiv(l_max_packet_count, 16382); + /* Maximum 5 bytes per packet to encode a full UINT32 */ + p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT += + l_nb_bytes += 5 * l_max_packet_count; + p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT += 1; + l_nb_bytes += p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT; + } + /*** DEVELOPER CORNER, Add room for your headers ***/ return l_nb_bytes; @@ -4205,7 +4226,7 @@ static OPJ_BOOL opj_j2k_write_tlm(opj_j2k_t *p_j2k, static OPJ_BOOL opj_j2k_write_sot(opj_j2k_t *p_j2k, OPJ_BYTE * p_data, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, OPJ_UINT32 * p_data_written, const opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager @@ -4218,7 +4239,7 @@ static OPJ_BOOL opj_j2k_write_sot(opj_j2k_t *p_j2k, OPJ_UNUSED(p_stream); - if (p_total_data_size < 12) { + if (total_data_size < 12) { opj_event_msg(p_manager, EVT_ERROR, "Not enough bytes in output buffer to write SOT marker\n"); return OPJ_FALSE; @@ -4611,17 +4632,105 @@ static OPJ_BOOL opj_j2k_read_sot(opj_j2k_t *p_j2k, return OPJ_TRUE; } +/** + * Write one or more PLT markers in the provided buffer + */ +static OPJ_BOOL opj_j2k_write_plt_in_memory(opj_j2k_t *p_j2k, + opj_tcd_marker_info_t* marker_info, + OPJ_BYTE * p_data, + OPJ_UINT32 * p_data_written, + opj_event_mgr_t * p_manager) +{ + OPJ_BYTE Zplt = 0; + OPJ_UINT16 Lplt; + OPJ_BYTE* p_data_start = p_data; + OPJ_BYTE* p_data_Lplt = p_data + 2; + OPJ_UINT32 i; + + OPJ_UNUSED(p_j2k); + + opj_write_bytes(p_data, J2K_MS_PLT, 2); + p_data += 2; + + /* Reserve space for Lplt */ + p_data += 2; + + opj_write_bytes(p_data, Zplt, 1); + p_data += 1; + + Lplt = 3; + + for (i = 0; i < marker_info->packet_count; i++) { + OPJ_BYTE var_bytes[5]; + OPJ_UINT8 var_bytes_size = 0; + OPJ_UINT32 packet_size = marker_info->p_packet_size[i]; + + /* Packet size written in variable-length way, starting with LSB */ + var_bytes[var_bytes_size] = (OPJ_BYTE)(packet_size & 0x7f); + var_bytes_size ++; + packet_size >>= 7; + while (packet_size > 0) { + var_bytes[var_bytes_size] = (OPJ_BYTE)((packet_size & 0x7f) | 0x80); + var_bytes_size ++; + packet_size >>= 7; + } + + /* Check if that can fit in the current PLT marker. If not, finish */ + /* current one, and start a new one */ + if (Lplt + var_bytes_size > 65535) { + if (Zplt == 255) { + opj_event_msg(p_manager, EVT_ERROR, + "More than 255 PLT markers would be needed for current tile-part !\n"); + return OPJ_FALSE; + } + + /* Patch Lplt */ + opj_write_bytes(p_data_Lplt, Lplt, 2); + + /* Start new segment */ + opj_write_bytes(p_data, J2K_MS_PLT, 2); + p_data += 2; + + /* Reserve space for Lplt */ + p_data_Lplt = p_data; + p_data += 2; + + Zplt ++; + opj_write_bytes(p_data, Zplt, 1); + p_data += 1; + + Lplt = 3; + } + + Lplt = (OPJ_UINT16)(Lplt + var_bytes_size); + + /* Serialize variable-length packet size, starting with MSB */ + for (; var_bytes_size > 0; --var_bytes_size) { + opj_write_bytes(p_data, var_bytes[var_bytes_size - 1], 1); + p_data += 1; + } + } + + *p_data_written = (OPJ_UINT32)(p_data - p_data_start); + + /* Patch Lplt */ + opj_write_bytes(p_data_Lplt, Lplt, 2); + + return OPJ_TRUE; +} + static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, opj_tcd_t * p_tile_coder, OPJ_BYTE * p_data, OPJ_UINT32 * p_data_written, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, const opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager ) { opj_codestream_info_t *l_cstr_info = 00; OPJ_UINT32 l_remaining_data; + opj_tcd_marker_info_t* marker_info = NULL; /* preconditions */ assert(p_j2k != 00); @@ -4630,7 +4739,7 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, OPJ_UNUSED(p_stream); - if (p_total_data_size < 4) { + if (total_data_size < 4) { opj_event_msg(p_manager, EVT_ERROR, "Not enough bytes in output buffer to write SOD marker\n"); return OPJ_FALSE; @@ -4638,10 +4747,9 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, opj_write_bytes(p_data, J2K_MS_SOD, 2); /* SOD */ - p_data += 2; /* make room for the EOF marker */ - l_remaining_data = p_total_data_size - 4; + l_remaining_data = total_data_size - 4; /* update tile coder */ p_tile_coder->tp_num = @@ -4688,15 +4796,69 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, *p_data_written = 0; - if (! opj_tcd_encode_tile(p_tile_coder, p_j2k->m_current_tile_number, p_data, + if (p_j2k->m_specific_param.m_encoder.m_PLT) { + marker_info = opj_tcd_marker_info_create( + p_j2k->m_specific_param.m_encoder.m_PLT); + if (marker_info == NULL) { + opj_event_msg(p_manager, EVT_ERROR, + "Cannot encode tile: opj_tcd_marker_info_create() failed\n"); + return OPJ_FALSE; + } + } + + if (l_remaining_data < + p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT) { + opj_event_msg(p_manager, EVT_ERROR, + "Not enough bytes in output buffer to write SOD marker\n"); + opj_tcd_marker_info_destroy(marker_info); + return OPJ_FALSE; + } + l_remaining_data -= p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT; + + if (! opj_tcd_encode_tile(p_tile_coder, p_j2k->m_current_tile_number, + p_data + 2, p_data_written, l_remaining_data, l_cstr_info, + marker_info, p_manager)) { opj_event_msg(p_manager, EVT_ERROR, "Cannot encode tile\n"); + opj_tcd_marker_info_destroy(marker_info); return OPJ_FALSE; } + /* For SOD */ *p_data_written += 2; + if (p_j2k->m_specific_param.m_encoder.m_PLT) { + OPJ_UINT32 l_data_written_PLT = 0; + OPJ_BYTE* p_PLT_buffer = (OPJ_BYTE*)opj_malloc( + p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT); + if (!p_PLT_buffer) { + opj_event_msg(p_manager, EVT_ERROR, "Cannot allocate memory\n"); + opj_tcd_marker_info_destroy(marker_info); + return OPJ_FALSE; + } + if (!opj_j2k_write_plt_in_memory(p_j2k, + marker_info, + p_PLT_buffer, + &l_data_written_PLT, + p_manager)) { + opj_tcd_marker_info_destroy(marker_info); + opj_free(p_PLT_buffer); + return OPJ_FALSE; + } + + assert(l_data_written_PLT <= + p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT); + + /* Move PLT marker(s) before SOD */ + memmove(p_data + l_data_written_PLT, p_data, *p_data_written); + memcpy(p_data, p_PLT_buffer, l_data_written_PLT); + opj_free(p_PLT_buffer); + *p_data_written += l_data_written_PLT; + } + + opj_tcd_marker_info_destroy(marker_info); + return OPJ_TRUE; } @@ -5046,7 +5208,7 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, OPJ_FLOAT32 * l_rates = 0; OPJ_FLOAT32 l_sot_remove; OPJ_UINT32 l_bits_empty, l_size_pixel; - OPJ_UINT32 l_tile_size = 0; + OPJ_UINT64 l_tile_size = 0; OPJ_UINT32 l_last_res; OPJ_FLOAT32(* l_tp_stride_func)(opj_tcp_t *) = 00; @@ -5090,25 +5252,12 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, l_rates = l_tcp->rates; /* Modification of the RATE >> */ - if (*l_rates > 0.0f) { - *l_rates = (((OPJ_FLOAT32)(l_size_pixel * (OPJ_UINT32)(l_x1 - l_x0) * - (OPJ_UINT32)(l_y1 - l_y0))) - / - ((*l_rates) * (OPJ_FLOAT32)l_bits_empty) - ) - - - l_offset; - } - - ++l_rates; - - for (k = 1; k < l_tcp->numlayers; ++k) { + for (k = 0; k < l_tcp->numlayers; ++k) { if (*l_rates > 0.0f) { - *l_rates = (((OPJ_FLOAT32)(l_size_pixel * (OPJ_UINT32)(l_x1 - l_x0) * - (OPJ_UINT32)(l_y1 - l_y0))) - / - ((*l_rates) * (OPJ_FLOAT32)l_bits_empty) - ) + *l_rates = (OPJ_FLOAT32)(((OPJ_FLOAT64)l_size_pixel * (OPJ_UINT32)( + l_x1 - l_x0) * + (OPJ_UINT32)(l_y1 - l_y0)) + / ((*l_rates) * (OPJ_FLOAT32)l_bits_empty)) - l_offset; } @@ -5168,12 +5317,11 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, l_tile_size = 0; for (i = 0; i < l_image->numcomps; ++i) { - l_tile_size += (opj_uint_ceildiv(l_cp->tdx, l_img_comp->dx) - * - opj_uint_ceildiv(l_cp->tdy, l_img_comp->dy) - * - l_img_comp->prec - ); + l_tile_size += (OPJ_UINT64)opj_uint_ceildiv(l_cp->tdx, l_img_comp->dx) + * + opj_uint_ceildiv(l_cp->tdy, l_img_comp->dy) + * + l_img_comp->prec; ++l_img_comp; } @@ -5184,7 +5332,7 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, /* bin/test_tile_encoder 1 256 256 32 32 8 0 reversible_with_precinct.j2k 4 4 3 0 0 1 16 16 */ /* TODO revise this to take into account the overhead linked to the */ /* number of packets and number of code blocks in packets */ - l_tile_size = (OPJ_UINT32)(l_tile_size * 1.4 / 8); + l_tile_size = (OPJ_UINT64)((double)l_tile_size * 1.4 / 8); /* Arbitrary amount to make the following work: */ /* bin/test_tile_encoder 1 256 256 17 16 8 0 reversible_no_precinct.j2k 4 4 3 0 0 1 */ @@ -5192,14 +5340,21 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, l_tile_size += opj_j2k_get_specific_header_sizes(p_j2k); - p_j2k->m_specific_param.m_encoder.m_encoded_tile_size = l_tile_size; + if (l_tile_size > UINT_MAX) { + l_tile_size = UINT_MAX; + } + + p_j2k->m_specific_param.m_encoder.m_encoded_tile_size = (OPJ_UINT32)l_tile_size; p_j2k->m_specific_param.m_encoder.m_encoded_tile_data = (OPJ_BYTE *) opj_malloc(p_j2k->m_specific_param.m_encoder.m_encoded_tile_size); if (p_j2k->m_specific_param.m_encoder.m_encoded_tile_data == 00) { + opj_event_msg(p_manager, EVT_ERROR, + "Not enough memory to allocate m_encoded_tile_data. %u MB required\n", + (OPJ_UINT32)(l_tile_size / 1024 / 1024)); return OPJ_FALSE; } - if (OPJ_IS_CINEMA(l_cp->rsiz)) { + if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) { p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_buffer = (OPJ_BYTE *) opj_malloc(5 * p_j2k->m_specific_param.m_encoder.m_total_tile_parts); @@ -6627,7 +6782,7 @@ static void opj_j2k_set_cinema_parameters(opj_cparameters_t *parameters, } /* Precincts */ - parameters->csty |= 0x01; + parameters->csty |= J2K_CP_CSTY_PRT; if (parameters->numresolution == 1) { parameters->res_spec = 1; parameters->prcw_init[0] = 128; @@ -6753,6 +6908,589 @@ static OPJ_BOOL opj_j2k_is_cinema_compliant(opj_image_t *image, OPJ_UINT16 rsiz, return OPJ_TRUE; } +static int opj_j2k_get_imf_max_NL(opj_cparameters_t *parameters, + opj_image_t *image) +{ + /* Decomposition levels */ + const OPJ_UINT16 rsiz = parameters->rsiz; + const OPJ_UINT16 profile = OPJ_GET_IMF_PROFILE(rsiz); + const OPJ_UINT32 XTsiz = parameters->tile_size_on ? (OPJ_UINT32) + parameters->cp_tdx : image->x1; + switch (profile) { + case OPJ_PROFILE_IMF_2K: + return 5; + case OPJ_PROFILE_IMF_4K: + return 6; + case OPJ_PROFILE_IMF_8K: + return 7; + case OPJ_PROFILE_IMF_2K_R: { + if (XTsiz >= 2048) { + return 5; + } else if (XTsiz >= 1024) { + return 4; + } + break; + } + case OPJ_PROFILE_IMF_4K_R: { + if (XTsiz >= 4096) { + return 6; + } else if (XTsiz >= 2048) { + return 5; + } else if (XTsiz >= 1024) { + return 4; + } + break; + } + case OPJ_PROFILE_IMF_8K_R: { + if (XTsiz >= 8192) { + return 7; + } else if (XTsiz >= 4096) { + return 6; + } else if (XTsiz >= 2048) { + return 5; + } else if (XTsiz >= 1024) { + return 4; + } + break; + } + default: + break; + } + return -1; +} + +static void opj_j2k_set_imf_parameters(opj_cparameters_t *parameters, + opj_image_t *image, opj_event_mgr_t *p_manager) +{ + const OPJ_UINT16 rsiz = parameters->rsiz; + const OPJ_UINT16 profile = OPJ_GET_IMF_PROFILE(rsiz); + + OPJ_UNUSED(p_manager); + + /* Override defaults set by opj_set_default_encoder_parameters */ + if (parameters->cblockw_init == OPJ_COMP_PARAM_DEFAULT_CBLOCKW && + parameters->cblockh_init == OPJ_COMP_PARAM_DEFAULT_CBLOCKH) { + parameters->cblockw_init = 32; + parameters->cblockh_init = 32; + } + + /* One tile part for each component */ + parameters->tp_flag = 'C'; + parameters->tp_on = 1; + + if (parameters->prog_order == OPJ_COMP_PARAM_DEFAULT_PROG_ORDER) { + parameters->prog_order = OPJ_CPRL; + } + + if (profile == OPJ_PROFILE_IMF_2K || + profile == OPJ_PROFILE_IMF_4K || + profile == OPJ_PROFILE_IMF_8K) { + /* 9-7 transform */ + parameters->irreversible = 1; + } + + /* Adjust the number of resolutions if set to its defaults */ + if (parameters->numresolution == OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION && + image->x0 == 0 && + image->y0 == 0) { + const int max_NL = opj_j2k_get_imf_max_NL(parameters, image); + if (max_NL >= 0 && parameters->numresolution > max_NL) { + parameters->numresolution = max_NL + 1; + } + + /* Note: below is generic logic */ + if (!parameters->tile_size_on) { + while (parameters->numresolution > 0) { + if (image->x1 < (1U << ((OPJ_UINT32)parameters->numresolution - 1U))) { + parameters->numresolution --; + continue; + } + if (image->y1 < (1U << ((OPJ_UINT32)parameters->numresolution - 1U))) { + parameters->numresolution --; + continue; + } + break; + } + } + } + + /* Set defaults precincts */ + if (parameters->csty == 0) { + parameters->csty |= J2K_CP_CSTY_PRT; + if (parameters->numresolution == 1) { + parameters->res_spec = 1; + parameters->prcw_init[0] = 128; + parameters->prch_init[0] = 128; + } else { + int i; + parameters->res_spec = parameters->numresolution - 1; + for (i = 0; i < parameters->res_spec; i++) { + parameters->prcw_init[i] = 256; + parameters->prch_init[i] = 256; + } + } + } +} + +/* Table A.53 from JPEG2000 standard */ +static const OPJ_UINT16 tabMaxSubLevelFromMainLevel[] = { + 15, /* unspecified */ + 1, + 1, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 +}; + +static OPJ_BOOL opj_j2k_is_imf_compliant(opj_cparameters_t *parameters, + opj_image_t *image, + opj_event_mgr_t *p_manager) +{ + OPJ_UINT32 i; + const OPJ_UINT16 rsiz = parameters->rsiz; + const OPJ_UINT16 profile = OPJ_GET_IMF_PROFILE(rsiz); + const OPJ_UINT16 mainlevel = OPJ_GET_IMF_MAINLEVEL(rsiz); + const OPJ_UINT16 sublevel = OPJ_GET_IMF_SUBLEVEL(rsiz); + const int NL = parameters->numresolution - 1; + const OPJ_UINT32 XTsiz = parameters->tile_size_on ? (OPJ_UINT32) + parameters->cp_tdx : image->x1; + OPJ_BOOL ret = OPJ_TRUE; + + /* Validate mainlevel */ + if (mainlevel > OPJ_IMF_MAINLEVEL_MAX) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile require mainlevel <= 11.\n" + "-> %d is thus not compliant\n" + "-> Non-IMF codestream will be generated\n", + mainlevel); + ret = OPJ_FALSE; + } + + /* Validate sublevel */ + assert(sizeof(tabMaxSubLevelFromMainLevel) == + (OPJ_IMF_MAINLEVEL_MAX + 1) * sizeof(tabMaxSubLevelFromMainLevel[0])); + if (sublevel > tabMaxSubLevelFromMainLevel[mainlevel]) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile require sublevel <= %d for mainlevel = %d.\n" + "-> %d is thus not compliant\n" + "-> Non-IMF codestream will be generated\n", + tabMaxSubLevelFromMainLevel[mainlevel], + mainlevel, + sublevel); + ret = OPJ_FALSE; + } + + /* Number of components */ + if (image->numcomps > 3) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require at most 3 components.\n" + "-> Number of components of input image (%d) is not compliant\n" + "-> Non-IMF codestream will be generated\n", + image->numcomps); + ret = OPJ_FALSE; + } + + if (image->x0 != 0 || image->y0 != 0) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require image origin to be at 0,0.\n" + "-> %d,%d is not compliant\n" + "-> Non-IMF codestream will be generated\n", + image->x0, image->y0 != 0); + ret = OPJ_FALSE; + } + + if (parameters->cp_tx0 != 0 || parameters->cp_ty0 != 0) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require tile origin to be at 0,0.\n" + "-> %d,%d is not compliant\n" + "-> Non-IMF codestream will be generated\n", + parameters->cp_tx0, parameters->cp_ty0); + ret = OPJ_FALSE; + } + + if (parameters->tile_size_on) { + if (profile == OPJ_PROFILE_IMF_2K || + profile == OPJ_PROFILE_IMF_4K || + profile == OPJ_PROFILE_IMF_8K) { + if ((OPJ_UINT32)parameters->cp_tdx < image->x1 || + (OPJ_UINT32)parameters->cp_tdy < image->y1) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K/4K/8K single tile profiles require tile to be greater or equal to image size.\n" + "-> %d,%d is lesser than %d,%d\n" + "-> Non-IMF codestream will be generated\n", + parameters->cp_tdx, + parameters->cp_tdy, + image->x1, + image->y1); + ret = OPJ_FALSE; + } + } else { + if ((OPJ_UINT32)parameters->cp_tdx >= image->x1 && + (OPJ_UINT32)parameters->cp_tdy >= image->y1) { + /* ok */ + } else if (parameters->cp_tdx == 1024 && + parameters->cp_tdy == 1024) { + /* ok */ + } else if (parameters->cp_tdx == 2048 && + parameters->cp_tdy == 2048 && + (profile == OPJ_PROFILE_IMF_4K || + profile == OPJ_PROFILE_IMF_8K)) { + /* ok */ + } else if (parameters->cp_tdx == 4096 && + parameters->cp_tdy == 4096 && + profile == OPJ_PROFILE_IMF_8K) { + /* ok */ + } else { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K_R/4K_R/8K_R single/multiple tile profiles " + "require tile to be greater or equal to image size,\n" + "or to be (1024,1024), or (2048,2048) for 4K_R/8K_R " + "or (4096,4096) for 8K_R.\n" + "-> %d,%d is non conformant\n" + "-> Non-IMF codestream will be generated\n", + parameters->cp_tdx, + parameters->cp_tdy); + ret = OPJ_FALSE; + } + } + } + + /* Bitdepth */ + for (i = 0; i < image->numcomps; i++) { + if (!(image->comps[i].bpp >= 8 && image->comps[i].bpp <= 16) || + (image->comps[i].sgnd)) { + char signed_str[] = "signed"; + char unsigned_str[] = "unsigned"; + char *tmp_str = image->comps[i].sgnd ? signed_str : unsigned_str; + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require precision of each component to b in [8-16] bits unsigned" + "-> At least component %d of input image (%d bits, %s) is not compliant\n" + "-> Non-IMF codestream will be generated\n", + i, image->comps[i].bpp, tmp_str); + ret = OPJ_FALSE; + } + } + + /* Sub-sampling */ + for (i = 0; i < image->numcomps; i++) { + if (i == 0 && image->comps[i].dx != 1) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require XRSiz1 == 1. Here it is set to %d.\n" + "-> Non-IMF codestream will be generated\n", + image->comps[i].dx); + ret = OPJ_FALSE; + } + if (i == 1 && image->comps[i].dx != 1 && image->comps[i].dx != 2) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require XRSiz2 == 1 or 2. Here it is set to %d.\n" + "-> Non-IMF codestream will be generated\n", + image->comps[i].dx); + ret = OPJ_FALSE; + } + if (i > 1 && image->comps[i].dx != image->comps[i - 1].dx) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require XRSiz%d to be the same as XRSiz2. " + "Here it is set to %d instead of %d.\n" + "-> Non-IMF codestream will be generated\n", + i + 1, image->comps[i].dx, image->comps[i - 1].dx); + ret = OPJ_FALSE; + } + if (image->comps[i].dy != 1) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require YRsiz == 1. " + "Here it is set to %d for component i.\n" + "-> Non-IMF codestream will be generated\n", + image->comps[i].dy, i); + ret = OPJ_FALSE; + } + } + + /* Image size */ + switch (profile) { + case OPJ_PROFILE_IMF_2K: + case OPJ_PROFILE_IMF_2K_R: + if (((image->comps[0].w > 2048) | (image->comps[0].h > 1556))) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K/2K_R profile require:\n" + "width <= 2048 and height <= 1556\n" + "-> Input image size %d x %d is not compliant\n" + "-> Non-IMF codestream will be generated\n", + image->comps[0].w, image->comps[0].h); + ret = OPJ_FALSE; + } + break; + case OPJ_PROFILE_IMF_4K: + case OPJ_PROFILE_IMF_4K_R: + if (((image->comps[0].w > 4096) | (image->comps[0].h > 3112))) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K/4K_R profile require:\n" + "width <= 4096 and height <= 3112\n" + "-> Input image size %d x %d is not compliant\n" + "-> Non-IMF codestream will be generated\n", + image->comps[0].w, image->comps[0].h); + ret = OPJ_FALSE; + } + break; + case OPJ_PROFILE_IMF_8K: + case OPJ_PROFILE_IMF_8K_R: + if (((image->comps[0].w > 8192) | (image->comps[0].h > 6224))) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 8K/8K_R profile require:\n" + "width <= 8192 and height <= 6224\n" + "-> Input image size %d x %d is not compliant\n" + "-> Non-IMF codestream will be generated\n", + image->comps[0].w, image->comps[0].h); + ret = OPJ_FALSE; + } + break; + default : + assert(0); + return OPJ_FALSE; + } + + if (parameters->roi_compno != -1) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile forbid RGN / region of interest marker.\n" + "-> Compression parameters specify a ROI\n" + "-> Non-IMF codestream will be generated\n"); + ret = OPJ_FALSE; + } + + if (parameters->cblockw_init != 32 || parameters->cblockh_init != 32) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile require code block size to be 32x32.\n" + "-> Compression parameters set it to %dx%d.\n" + "-> Non-IMF codestream will be generated\n", + parameters->cblockw_init, + parameters->cblockh_init); + ret = OPJ_FALSE; + } + + if (parameters->prog_order != OPJ_CPRL) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile require progression order to be CPRL.\n" + "-> Compression parameters set it to %d.\n" + "-> Non-IMF codestream will be generated\n", + parameters->prog_order); + ret = OPJ_FALSE; + } + + if (parameters->numpocs != 0) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile forbid POC markers.\n" + "-> Compression parameters set %d POC.\n" + "-> Non-IMF codestream will be generated\n", + parameters->numpocs); + ret = OPJ_FALSE; + } + + /* Codeblock style: no mode switch enabled */ + if (parameters->mode != 0) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profile forbid mode switch in code block style.\n" + "-> Compression parameters set code block style to %d.\n" + "-> Non-IMF codestream will be generated\n", + parameters->mode); + ret = OPJ_FALSE; + } + + if (profile == OPJ_PROFILE_IMF_2K || + profile == OPJ_PROFILE_IMF_4K || + profile == OPJ_PROFILE_IMF_8K) { + /* Expect 9-7 transform */ + if (parameters->irreversible != 1) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K/4K/8K profiles require 9-7 Irreversible Transform.\n" + "-> Compression parameters set it to reversible.\n" + "-> Non-IMF codestream will be generated\n"); + ret = OPJ_FALSE; + } + } else { + /* Expect 5-3 transform */ + if (parameters->irreversible != 0) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K/4K/8K profiles require 5-3 reversible Transform.\n" + "-> Compression parameters set it to irreversible.\n" + "-> Non-IMF codestream will be generated\n"); + ret = OPJ_FALSE; + } + } + + /* Number of layers */ + if (parameters->tcp_numlayers != 1) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K/4K/8K profiles require 1 single quality layer.\n" + "-> Number of layers is %d.\n" + "-> Non-IMF codestream will be generated\n", + parameters->tcp_numlayers); + ret = OPJ_FALSE; + } + + /* Decomposition levels */ + switch (profile) { + case OPJ_PROFILE_IMF_2K: + if (!(NL >= 1 && NL <= 5)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K profile requires 1 <= NL <= 5:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + break; + case OPJ_PROFILE_IMF_4K: + if (!(NL >= 1 && NL <= 6)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K profile requires 1 <= NL <= 6:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + break; + case OPJ_PROFILE_IMF_8K: + if (!(NL >= 1 && NL <= 7)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 8K profile requires 1 <= NL <= 7:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + break; + case OPJ_PROFILE_IMF_2K_R: { + if (XTsiz >= 2048) { + if (!(NL >= 1 && NL <= 5)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K_R profile requires 1 <= NL <= 5 for XTsiz >= 2048:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else if (XTsiz >= 1024) { + if (!(NL >= 1 && NL <= 4)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 2K_R profile requires 1 <= NL <= 4 for XTsiz in [1024,2048[:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } + break; + } + case OPJ_PROFILE_IMF_4K_R: { + if (XTsiz >= 4096) { + if (!(NL >= 1 && NL <= 6)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 6 for XTsiz >= 4096:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else if (XTsiz >= 2048) { + if (!(NL >= 1 && NL <= 5)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 5 for XTsiz in [2048,4096[:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else if (XTsiz >= 1024) { + if (!(NL >= 1 && NL <= 4)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 4 for XTsiz in [1024,2048[:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } + break; + } + case OPJ_PROFILE_IMF_8K_R: { + if (XTsiz >= 8192) { + if (!(NL >= 1 && NL <= 7)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 7 for XTsiz >= 8192:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else if (XTsiz >= 4096) { + if (!(NL >= 1 && NL <= 6)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 6 for XTsiz in [4096,8192[:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else if (XTsiz >= 2048) { + if (!(NL >= 1 && NL <= 5)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 5 for XTsiz in [2048,4096[:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else if (XTsiz >= 1024) { + if (!(NL >= 1 && NL <= 4)) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF 4K_R profile requires 1 <= NL <= 4 for XTsiz in [1024,2048[:\n" + "-> Number of decomposition levels is %d.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } + break; + } + default: + break; + } + + if (parameters->numresolution == 1) { + if (parameters->res_spec != 1 || + parameters->prcw_init[0] != 128 || + parameters->prch_init[0] != 128) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require PPx = PPy = 7 for NLLL band, else 8.\n" + "-> Supplied values are different from that.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } else { + int i; + for (i = 0; i < parameters->res_spec; i++) { + if (parameters->prcw_init[i] != 256 || + parameters->prch_init[i] != 256) { + opj_event_msg(p_manager, EVT_WARNING, + "IMF profiles require PPx = PPy = 7 for NLLL band, else 8.\n" + "-> Supplied values are different from that.\n" + "-> Non-IMF codestream will be generated\n", + NL); + ret = OPJ_FALSE; + } + } + } + + return ret; +} + + OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, opj_cparameters_t *parameters, opj_image_t *image, @@ -6945,6 +7683,15 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, } else { OPJ_FLOAT32 temp_rate; OPJ_BOOL cap = OPJ_FALSE; + + if (OPJ_IS_IMF(parameters->rsiz) && parameters->max_cs_size > 0 && + parameters->tcp_numlayers == 1 && parameters->tcp_rates[0] == 0) { + parameters->tcp_rates[0] = (OPJ_FLOAT32)(image->numcomps * image->comps[0].w * + image->comps[0].h * image->comps[0].prec) / + (OPJ_FLOAT32)(((OPJ_UINT32)parameters->max_cs_size) * 8 * image->comps[0].dx * + image->comps[0].dy); + } + temp_rate = (OPJ_FLOAT32)(((double)image->numcomps * image->comps[0].w * image->comps[0].h * image->comps[0].prec) / (((double)parameters->max_cs_size) * 8 * image->comps[0].dx * @@ -6985,9 +7732,10 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, "JPEG 2000 Broadcast profiles not yet supported\n"); parameters->rsiz = OPJ_PROFILE_NONE; } else if (OPJ_IS_IMF(parameters->rsiz)) { - opj_event_msg(p_manager, EVT_WARNING, - "JPEG 2000 IMF profiles not yet supported\n"); - parameters->rsiz = OPJ_PROFILE_NONE; + opj_j2k_set_imf_parameters(parameters, image, p_manager); + if (!opj_j2k_is_imf_compliant(parameters, image, p_manager)) { + parameters->rsiz = OPJ_PROFILE_NONE; + } } else if (OPJ_IS_PART2(parameters->rsiz)) { if (parameters->rsiz == ((OPJ_PROFILE_PART2) | (OPJ_EXTENSION_NONE))) { opj_event_msg(p_manager, EVT_WARNING, @@ -7079,6 +7827,14 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, */ if (parameters->tile_size_on) { + if (cp->tdx == 0) { + opj_event_msg(p_manager, EVT_ERROR, "Invalid tile width\n"); + return OPJ_FALSE; + } + if (cp->tdy == 0) { + opj_event_msg(p_manager, EVT_ERROR, "Invalid tile height\n"); + return OPJ_FALSE; + } cp->tw = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)(image->x1 - cp->tx0), (OPJ_INT32)cp->tdx); cp->th = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)(image->y1 - cp->ty0), @@ -7157,20 +7913,13 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, "Not enough memory to allocate tile coding parameters\n"); return OPJ_FALSE; } - if (parameters->numpocs) { - /* initialisation of POC */ - opj_j2k_check_poc_val(parameters->POC, parameters->numpocs, - (OPJ_UINT32)parameters->numresolution, image->numcomps, - (OPJ_UINT32)parameters->tcp_numlayers, p_manager); - /* TODO MSD use the return value*/ - } for (tileno = 0; tileno < cp->tw * cp->th; tileno++) { opj_tcp_t *tcp = &cp->tcps[tileno]; tcp->numlayers = (OPJ_UINT32)parameters->tcp_numlayers; for (j = 0; j < tcp->numlayers; j++) { - if (OPJ_IS_CINEMA(cp->rsiz)) { + if (OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)) { if (cp->m_specific_param.m_enc.m_fixed_quality) { tcp->distoratio[j] = parameters->tcp_distoratio[j]; } @@ -7197,16 +7946,22 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, if (parameters->numpocs) { /* initialisation of POC */ - tcp->POC = 1; for (i = 0; i < parameters->numpocs; i++) { if (tileno + 1 == parameters->POC[i].tile) { opj_poc_t *tcp_poc = &tcp->pocs[numpocs_tile]; + if (parameters->POC[numpocs_tile].compno0 >= image->numcomps) { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid compno0 for POC %d\n", i); + return OPJ_FALSE; + } + tcp_poc->resno0 = parameters->POC[numpocs_tile].resno0; tcp_poc->compno0 = parameters->POC[numpocs_tile].compno0; tcp_poc->layno1 = parameters->POC[numpocs_tile].layno1; tcp_poc->resno1 = parameters->POC[numpocs_tile].resno1; - tcp_poc->compno1 = parameters->POC[numpocs_tile].compno1; + tcp_poc->compno1 = opj_uint_min(parameters->POC[numpocs_tile].compno1, + image->numcomps); tcp_poc->prg1 = parameters->POC[numpocs_tile].prg1; tcp_poc->tile = parameters->POC[numpocs_tile].tile; @@ -7214,7 +7969,16 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, } } - tcp->numpocs = numpocs_tile - 1 ; + if (numpocs_tile) { + + /* TODO MSD use the return value*/ + opj_j2k_check_poc_val(parameters->POC, tileno, parameters->numpocs, + (OPJ_UINT32)parameters->numresolution, image->numcomps, + (OPJ_UINT32)parameters->tcp_numlayers, p_manager); + + tcp->POC = 1; + tcp->numpocs = numpocs_tile - 1 ; + } } else { tcp->numpocs = 0; } @@ -7542,6 +8306,8 @@ OPJ_BOOL opj_j2k_read_header(opj_stream_private_t *p_stream, /*Allocate and initialize some elements of codestrem index*/ if (!opj_j2k_allocate_tile_element_cstr_index(p_j2k)) { + opj_image_destroy(*p_image); + *p_image = NULL; return OPJ_FALSE; } @@ -8628,6 +9394,7 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, OPJ_UINT32 l_marker_size; const opj_dec_memory_marker_handler_t * l_marker_handler = 00; opj_tcp_t * l_tcp = NULL; + const OPJ_UINT32 l_nb_tiles = p_j2k->m_cp.tw * p_j2k->m_cp.th; /* preconditions */ assert(p_stream != 00); @@ -8803,7 +9570,6 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, return OPJ_FALSE; } if (l_correction_needed) { - OPJ_UINT32 l_nb_tiles = p_j2k->m_cp.tw * p_j2k->m_cp.th; OPJ_UINT32 l_tile_no; p_j2k->m_specific_param.m_decoder.m_can_decode = 0; @@ -8818,27 +9584,42 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, "Non conformant codestream TPsot==TNsot.\n"); } } - if (! p_j2k->m_specific_param.m_decoder.m_can_decode) { - /* Try to read 2 bytes (the next marker ID) from stream and copy them into the buffer */ - if (opj_stream_read_data(p_stream, - p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) { - opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n"); - return OPJ_FALSE; - } - - /* Read 2 bytes from buffer as the new marker ID */ - opj_read_bytes(p_j2k->m_specific_param.m_decoder.m_header_data, - &l_current_marker, 2); - } } else { /* Indicate we will try to read a new tile-part header*/ p_j2k->m_specific_param.m_decoder.m_skip_data = 0; p_j2k->m_specific_param.m_decoder.m_can_decode = 0; p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_TPHSOT; + } + if (! p_j2k->m_specific_param.m_decoder.m_can_decode) { /* Try to read 2 bytes (the next marker ID) from stream and copy them into the buffer */ if (opj_stream_read_data(p_stream, p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) { + + /* Deal with likely non conformant SPOT6 files, where the last */ + /* row of tiles have TPsot == 0 and TNsot == 0, and missing EOC, */ + /* but no other tile-parts were found. */ + if (p_j2k->m_current_tile_number + 1 == l_nb_tiles) { + OPJ_UINT32 l_tile_no; + for (l_tile_no = 0U; l_tile_no < l_nb_tiles; ++l_tile_no) { + if (p_j2k->m_cp.tcps[l_tile_no].m_current_tile_part_number == 0 && + p_j2k->m_cp.tcps[l_tile_no].m_nb_tile_parts == 0) { + break; + } + } + if (l_tile_no < l_nb_tiles) { + opj_event_msg(p_manager, EVT_INFO, + "Tile %u has TPsot == 0 and TNsot == 0, " + "but no other tile-parts were found. " + "EOC is also missing.\n", + l_tile_no); + p_j2k->m_current_tile_number = l_tile_no; + l_current_marker = J2K_MS_EOC; + p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_EOC; + break; + } + } + opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n"); return OPJ_FALSE; } @@ -8857,9 +9638,8 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, } } - /* FIXME DOC ???*/ + /* Deal with tiles that have a single tile-part with TPsot == 0 and TNsot == 0 */ if (! p_j2k->m_specific_param.m_decoder.m_can_decode) { - OPJ_UINT32 l_nb_tiles = p_j2k->m_cp.th * p_j2k->m_cp.tw; l_tcp = p_j2k->m_cp.tcps + p_j2k->m_current_tile_number; while ((p_j2k->m_current_tile_number < l_nb_tiles) && (l_tcp->m_data == 00)) { @@ -9236,6 +10016,14 @@ static OPJ_BOOL opj_j2k_update_image_dimensions(opj_image_t* p_image, l_img_comp = p_image->comps; for (it_comp = 0; it_comp < p_image->numcomps; ++it_comp) { OPJ_INT32 l_h, l_w; + if (p_image->x0 > (OPJ_UINT32)INT_MAX || + p_image->y0 > (OPJ_UINT32)INT_MAX || + p_image->x1 > (OPJ_UINT32)INT_MAX || + p_image->y1 > (OPJ_UINT32)INT_MAX) { + opj_event_msg(p_manager, EVT_ERROR, + "Image coordinates above INT_MAX are not supported\n"); + return OPJ_FALSE; + } l_img_comp->x0 = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)p_image->x0, (OPJ_INT32)l_img_comp->dx); @@ -9754,9 +10542,9 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, return OPJ_FALSE; } - opj_read_bytes(l_current_ptr, &l_tccp->numresolutions, - 1); /* SPcox (D) */ - ++l_tccp->numresolutions; /* tccp->numresolutions = read() + 1 */ + /* SPcod (D) / SPcoc (A) */ + opj_read_bytes(l_current_ptr, &l_tccp->numresolutions, 1); + ++l_tccp->numresolutions; /* tccp->numresolutions = read() + 1 */ if (l_tccp->numresolutions > OPJ_J2K_MAXRLVLS) { opj_event_msg(p_manager, EVT_ERROR, "Invalid value for numresolutions : %d, max value is set in openjpeg.h at %d\n", @@ -9777,11 +10565,13 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, return OPJ_FALSE; } - opj_read_bytes(l_current_ptr, &l_tccp->cblkw, 1); /* SPcoc (E) */ + /* SPcod (E) / SPcoc (B) */ + opj_read_bytes(l_current_ptr, &l_tccp->cblkw, 1); ++l_current_ptr; l_tccp->cblkw += 2; - opj_read_bytes(l_current_ptr, &l_tccp->cblkh, 1); /* SPcoc (F) */ + /* SPcod (F) / SPcoc (C) */ + opj_read_bytes(l_current_ptr, &l_tccp->cblkh, 1); ++l_current_ptr; l_tccp->cblkh += 2; @@ -9792,8 +10582,8 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, return OPJ_FALSE; } - - opj_read_bytes(l_current_ptr, &l_tccp->cblksty, 1); /* SPcoc (G) */ + /* SPcod (G) / SPcoc (D) */ + opj_read_bytes(l_current_ptr, &l_tccp->cblksty, 1); ++l_current_ptr; if (l_tccp->cblksty & 0xC0U) { /* 2 msb are reserved, assume we can't read */ opj_event_msg(p_manager, EVT_ERROR, @@ -9801,9 +10591,16 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, return OPJ_FALSE; } - opj_read_bytes(l_current_ptr, &l_tccp->qmfbid, 1); /* SPcoc (H) */ + /* SPcod (H) / SPcoc (E) */ + opj_read_bytes(l_current_ptr, &l_tccp->qmfbid, 1); ++l_current_ptr; + if (l_tccp->qmfbid > 1) { + opj_event_msg(p_manager, EVT_ERROR, + "Error reading SPCod SPCoc element, Invalid transformation found\n"); + return OPJ_FALSE; + } + *p_header_size = *p_header_size - 5; /* use custom precinct size ? */ @@ -9813,8 +10610,9 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, return OPJ_FALSE; } + /* SPcod (I_i) / SPcoc (F_i) */ for (i = 0; i < l_tccp->numresolutions; ++i) { - opj_read_bytes(l_current_ptr, &l_tmp, 1); /* SPcoc (I_i) */ + opj_read_bytes(l_current_ptr, &l_tmp, 1); ++l_current_ptr; /* Precinct exponent 0 is only allowed for lowest resolution level (Table A.21) */ if ((i != 0) && (((l_tmp & 0xf) == 0) || ((l_tmp >> 4) == 0))) { @@ -10657,6 +11455,42 @@ static OPJ_BOOL opj_j2k_allocate_tile_element_cstr_index(opj_j2k_t *p_j2k) return OPJ_TRUE; } +static OPJ_BOOL opj_j2k_are_all_used_components_decoded(opj_j2k_t *p_j2k, + opj_event_mgr_t * p_manager) +{ + OPJ_UINT32 compno; + OPJ_BOOL decoded_all_used_components = OPJ_TRUE; + + if (p_j2k->m_specific_param.m_decoder.m_numcomps_to_decode) { + for (compno = 0; + compno < p_j2k->m_specific_param.m_decoder.m_numcomps_to_decode; compno++) { + OPJ_UINT32 dec_compno = + p_j2k->m_specific_param.m_decoder.m_comps_indices_to_decode[compno]; + if (p_j2k->m_output_image->comps[dec_compno].data == NULL) { + opj_event_msg(p_manager, EVT_WARNING, "Failed to decode component %d\n", + dec_compno); + decoded_all_used_components = OPJ_FALSE; + } + } + } else { + for (compno = 0; compno < p_j2k->m_output_image->numcomps; compno++) { + if (p_j2k->m_output_image->comps[compno].data == NULL) { + opj_event_msg(p_manager, EVT_WARNING, "Failed to decode component %d\n", + compno); + decoded_all_used_components = OPJ_FALSE; + } + } + } + + if (decoded_all_used_components == OPJ_FALSE) { + opj_event_msg(p_manager, EVT_ERROR, "Failed to decode all used components\n"); + return OPJ_FALSE; + } + + return OPJ_TRUE; +} + + static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager) @@ -10768,6 +11602,10 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, } } + if (! opj_j2k_are_all_used_components_decoded(p_j2k, p_manager)) { + return OPJ_FALSE; + } + return OPJ_TRUE; } @@ -10896,6 +11734,10 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, } + if (! opj_j2k_are_all_used_components_decoded(p_j2k, p_manager)) { + return OPJ_FALSE; + } + return OPJ_TRUE; } @@ -11182,6 +12024,42 @@ OPJ_BOOL opj_j2k_set_decoded_resolution_factor(opj_j2k_t *p_j2k, return OPJ_FALSE; } +/* ----------------------------------------------------------------------- */ + +OPJ_BOOL opj_j2k_encoder_set_extra_options( + opj_j2k_t *p_j2k, + const char* const* p_options, + opj_event_mgr_t * p_manager) +{ + const char* const* p_option_iter; + + if (p_options == NULL) { + return OPJ_TRUE; + } + + for (p_option_iter = p_options; *p_option_iter != NULL; ++p_option_iter) { + if (strncmp(*p_option_iter, "PLT=", 4) == 0) { + if (strcmp(*p_option_iter, "PLT=YES") == 0) { + p_j2k->m_specific_param.m_encoder.m_PLT = OPJ_TRUE; + } else if (strcmp(*p_option_iter, "PLT=NO") == 0) { + p_j2k->m_specific_param.m_encoder.m_PLT = OPJ_FALSE; + } else { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid value for option: %s.\n", *p_option_iter); + return OPJ_FALSE; + } + } else { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid option: %s.\n", *p_option_iter); + return OPJ_FALSE; + } + } + + return OPJ_TRUE; +} + +/* ----------------------------------------------------------------------- */ + OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k, opj_stream_private_t *p_stream, opj_event_mgr_t * p_manager) @@ -11239,7 +12117,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k, } } } - l_current_tile_size = opj_tcd_get_encoded_tile_size(p_j2k->m_tcd); + l_current_tile_size = opj_tcd_get_encoder_input_buffer_size(p_j2k->m_tcd); if (!l_reuse_data) { if (l_current_tile_size > l_max_tile_size) { OPJ_BYTE *l_new_current_data = (OPJ_BYTE *) opj_realloc(l_current_data, @@ -11567,7 +12445,7 @@ static OPJ_BOOL opj_j2k_setup_end_compress(opj_j2k_t *p_j2k, return OPJ_FALSE; } - if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz)) { + if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz) || OPJ_IS_IMF(p_j2k->m_cp.rsiz)) { if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list, (opj_procedure)opj_j2k_write_updated_tlm, p_manager)) { return OPJ_FALSE; @@ -11650,7 +12528,7 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k, return OPJ_FALSE; } - if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz)) { + if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz) || OPJ_IS_IMF(p_j2k->m_cp.rsiz)) { if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list, (opj_procedure)opj_j2k_write_tlm, p_manager)) { return OPJ_FALSE; @@ -11677,7 +12555,8 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k, } /* DEVELOPER CORNER, insert your custom procedures */ - if (p_j2k->m_cp.rsiz & OPJ_EXTENSION_MCT) { + if ((p_j2k->m_cp.rsiz & (OPJ_PROFILE_PART2 | OPJ_EXTENSION_MCT)) == + (OPJ_PROFILE_PART2 | OPJ_EXTENSION_MCT)) { if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list, (opj_procedure)opj_j2k_write_mct_data_group, p_manager)) { return OPJ_FALSE; @@ -11707,7 +12586,7 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k, static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, OPJ_BYTE * p_data, OPJ_UINT32 * p_data_written, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, opj_stream_private_t *p_stream, struct opj_event_mgr * p_manager) { @@ -11731,7 +12610,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, l_current_nb_bytes_written = 0; l_begin_data = p_data; - if (! opj_j2k_write_sot(p_j2k, p_data, p_total_data_size, + if (! opj_j2k_write_sot(p_j2k, p_data, total_data_size, &l_current_nb_bytes_written, p_stream, p_manager)) { return OPJ_FALSE; @@ -11739,7 +12618,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; if (!OPJ_IS_CINEMA(l_cp->rsiz)) { #if 0 @@ -11749,29 +12628,29 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, p_manager); l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; l_current_nb_bytes_written = 0; opj_j2k_write_qcc_in_memory(p_j2k, compno, p_data, &l_current_nb_bytes_written, p_manager); l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; } #endif - if (l_cp->tcps[p_j2k->m_current_tile_number].numpocs) { + if (l_cp->tcps[p_j2k->m_current_tile_number].POC) { l_current_nb_bytes_written = 0; opj_j2k_write_poc_in_memory(p_j2k, p_data, &l_current_nb_bytes_written, p_manager); l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; } } l_current_nb_bytes_written = 0; if (! opj_j2k_write_sod(p_j2k, l_tcd, p_data, &l_current_nb_bytes_written, - p_total_data_size, p_stream, p_manager)) { + total_data_size, p_stream, p_manager)) { return OPJ_FALSE; } @@ -11782,7 +12661,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, opj_write_bytes(l_begin_data + 6, l_nb_bytes_written, 4); /* PSOT */ - if (OPJ_IS_CINEMA(l_cp->rsiz)) { + if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) { opj_j2k_update_tlm(p_j2k, l_nb_bytes_written); } @@ -11792,7 +12671,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, OPJ_BYTE * p_data, OPJ_UINT32 * p_data_written, - OPJ_UINT32 p_total_data_size, + OPJ_UINT32 total_data_size, opj_stream_private_t *p_stream, struct opj_event_mgr * p_manager ) @@ -11825,7 +12704,7 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, l_begin_data = p_data; if (! opj_j2k_write_sot(p_j2k, p_data, - p_total_data_size, + total_data_size, &l_current_nb_bytes_written, p_stream, p_manager)) { @@ -11834,25 +12713,25 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; l_part_tile_size += l_current_nb_bytes_written; l_current_nb_bytes_written = 0; if (! opj_j2k_write_sod(p_j2k, l_tcd, p_data, &l_current_nb_bytes_written, - p_total_data_size, p_stream, p_manager)) { + total_data_size, p_stream, p_manager)) { return OPJ_FALSE; } p_data += l_current_nb_bytes_written; l_nb_bytes_written += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; l_part_tile_size += l_current_nb_bytes_written; /* Writing Psot in SOT marker */ opj_write_bytes(l_begin_data + 6, l_part_tile_size, 4); /* PSOT */ - if (OPJ_IS_CINEMA(l_cp->rsiz)) { + if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) { opj_j2k_update_tlm(p_j2k, l_part_tile_size); } @@ -11871,7 +12750,7 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, l_begin_data = p_data; if (! opj_j2k_write_sot(p_j2k, p_data, - p_total_data_size, + total_data_size, &l_current_nb_bytes_written, p_stream, p_manager)) { return OPJ_FALSE; @@ -11879,26 +12758,26 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; l_part_tile_size += l_current_nb_bytes_written; l_current_nb_bytes_written = 0; if (! opj_j2k_write_sod(p_j2k, l_tcd, p_data, &l_current_nb_bytes_written, - p_total_data_size, p_stream, p_manager)) { + total_data_size, p_stream, p_manager)) { return OPJ_FALSE; } l_nb_bytes_written += l_current_nb_bytes_written; p_data += l_current_nb_bytes_written; - p_total_data_size -= l_current_nb_bytes_written; + total_data_size -= l_current_nb_bytes_written; l_part_tile_size += l_current_nb_bytes_written; /* Writing Psot in SOT marker */ opj_write_bytes(l_begin_data + 6, l_part_tile_size, 4); /* PSOT */ - if (OPJ_IS_CINEMA(l_cp->rsiz)) { + if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) { opj_j2k_update_tlm(p_j2k, l_part_tile_size); } diff --git a/3rdparty/openjpeg/openjp2/j2k.h b/3rdparty/openjpeg/openjp2/j2k.h index 5d393c9813..9eb50b50da 100644 --- a/3rdparty/openjpeg/openjp2/j2k.h +++ b/3rdparty/openjpeg/openjp2/j2k.h @@ -531,8 +531,14 @@ typedef struct opj_j2k_enc { OPJ_BYTE * m_header_tile_data; /* size of the encoded_data */ + OPJ_UINT32 m_header_tile_data_size; + /* whether to generate PLT markers */ + OPJ_BOOL m_PLT; + + /* reserved bytes in m_encoded_tile_size for PLT markers */ + OPJ_UINT32 m_reserved_bytes_for_PLT; } opj_j2k_enc_t; @@ -577,15 +583,16 @@ typedef struct opj_j2k { /** the current tile coder/decoder **/ struct opj_tcd * m_tcd; - /** Number of threads to use */ - int m_num_threads; - /** Thread pool */ opj_thread_pool_t* m_tp; + /** Image width coming from JP2 IHDR box. 0 from a pure codestream */ OPJ_UINT32 ihdr_w; + + /** Image height coming from JP2 IHDR box. 0 from a pure codestream */ OPJ_UINT32 ihdr_h; - OPJ_UINT32 enumcs; + + /** Set to 1 by the decoder initialization if OPJ_DPARAMETERS_DUMP_FLAG is set */ unsigned int dump_state; } opj_j2k_t; @@ -827,6 +834,19 @@ OPJ_BOOL opj_j2k_set_decoded_resolution_factor(opj_j2k_t *p_j2k, OPJ_UINT32 res_factor, opj_event_mgr_t * p_manager); +/** + * Specify extra options for the encoder. + * + * @param p_j2k the jpeg2000 codec. + * @param p_options options + * @param p_manager the user event manager + * + * @see opj_encoder_set_extra_options() for more details. + */ +OPJ_BOOL opj_j2k_encoder_set_extra_options( + opj_j2k_t *p_j2k, + const char* const* p_options, + opj_event_mgr_t * p_manager); /** * Writes a tile. diff --git a/3rdparty/openjpeg/openjp2/jp2.c b/3rdparty/openjpeg/openjp2/jp2.c index 4402ffe3c5..7c065ba742 100644 --- a/3rdparty/openjpeg/openjp2/jp2.c +++ b/3rdparty/openjpeg/openjp2/jp2.c @@ -586,6 +586,12 @@ static OPJ_BOOL opj_jp2_read_ihdr(opj_jp2_t *jp2, opj_read_bytes(p_image_header_data, &(jp2->numcomps), 2); /* NC */ p_image_header_data += 2; + if (jp2->h < 1 || jp2->w < 1 || jp2->numcomps < 1) { + opj_event_msg(p_manager, EVT_ERROR, + "Wrong values for: w(%d) h(%d) numcomps(%d) (ihdr)\n", + jp2->w, jp2->h, jp2->numcomps); + return OPJ_FALSE; + } if ((jp2->numcomps - 1U) >= 16384U) { /* unsigned underflow is well defined: 1U <= jp2->numcomps <= 16384U */ opj_event_msg(p_manager, EVT_ERROR, "Invalid number of components (ihdr)\n"); @@ -1584,9 +1590,7 @@ static OPJ_BOOL opj_jp2_read_colr(opj_jp2_t *jp2, "COLR BOX meth value is not a regular value (%d), " "so we will ignore the entire Colour Specification box. \n", jp2->meth); } - if (jp2->color.jp2_has_colr) { - jp2->j2k->enumcs = jp2->enumcs; - } + return OPJ_TRUE; } @@ -3236,6 +3240,18 @@ OPJ_BOOL opj_jp2_set_decoded_resolution_factor(opj_jp2_t *p_jp2, return opj_j2k_set_decoded_resolution_factor(p_jp2->j2k, res_factor, p_manager); } +/* ----------------------------------------------------------------------- */ + +OPJ_BOOL opj_jp2_encoder_set_extra_options( + opj_jp2_t *p_jp2, + const char* const* p_options, + opj_event_mgr_t * p_manager) +{ + return opj_j2k_encoder_set_extra_options(p_jp2->j2k, p_options, p_manager); +} + +/* ----------------------------------------------------------------------- */ + /* JPIP specific */ #ifdef USE_JPIP diff --git a/3rdparty/openjpeg/openjp2/jp2.h b/3rdparty/openjpeg/openjp2/jp2.h index 34abd5118e..9e7fa56674 100644 --- a/3rdparty/openjpeg/openjp2/jp2.h +++ b/3rdparty/openjpeg/openjp2/jp2.h @@ -459,6 +459,20 @@ OPJ_BOOL opj_jp2_set_decoded_resolution_factor(opj_jp2_t *p_jp2, OPJ_UINT32 res_factor, opj_event_mgr_t * p_manager); +/** + * Specify extra options for the encoder. + * + * @param p_jp2 the jpeg2000 codec. + * @param p_options options + * @param p_manager the user event manager + * + * @see opj_encoder_set_extra_options() for more details. + */ +OPJ_BOOL opj_jp2_encoder_set_extra_options( + opj_jp2_t *p_jp2, + const char* const* p_options, + opj_event_mgr_t * p_manager); + /* TODO MSD: clean these 3 functions */ /** diff --git a/3rdparty/openjpeg/openjp2/libopenjp2.pc.cmake.in b/3rdparty/openjpeg/openjp2/libopenjp2.pc.cmake.in deleted file mode 100644 index 62159b00a4..0000000000 --- a/3rdparty/openjpeg/openjp2/libopenjp2.pc.cmake.in +++ /dev/null @@ -1,14 +0,0 @@ -prefix=@CMAKE_INSTALL_PREFIX@ -bindir=${prefix}/@OPENJPEG_INSTALL_BIN_DIR@ -mandir=${prefix}/@OPENJPEG_INSTALL_MAN_DIR@ -docdir=${prefix}/@OPENJPEG_INSTALL_DOC_DIR@ -libdir=${prefix}/@OPENJPEG_INSTALL_LIB_DIR@ -includedir=${prefix}/@OPENJPEG_INSTALL_INCLUDE_DIR@ - -Name: openjp2 -Description: JPEG2000 library (Part 1 and 2) -URL: http://www.openjpeg.org/ -Version: @OPENJPEG_VERSION@ -Libs: -L${libdir} -lopenjp2 -Libs.private: -lm -Cflags: -I${includedir} diff --git a/3rdparty/openjpeg/openjp2/mct.c b/3rdparty/openjpeg/openjp2/mct.c index b79d4b87c4..88c8f40920 100644 --- a/3rdparty/openjpeg/openjp2/mct.c +++ b/3rdparty/openjpeg/openjp2/mct.c @@ -183,7 +183,7 @@ void opj_mct_decode( OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n) { - OPJ_UINT32 i; + OPJ_SIZE_T i; for (i = 0; i < n; ++i) { OPJ_INT32 y = c0[i]; OPJ_INT32 u = c1[i]; @@ -209,175 +209,72 @@ OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) /* */ /* Forward irreversible MCT. */ /* */ -#ifdef __SSE4_1__ void opj_mct_encode_real( - OPJ_INT32* OPJ_RESTRICT c0, - OPJ_INT32* OPJ_RESTRICT c1, - OPJ_INT32* OPJ_RESTRICT c2, + OPJ_FLOAT32* OPJ_RESTRICT c0, + OPJ_FLOAT32* OPJ_RESTRICT c1, + OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_SIZE_T n) { OPJ_SIZE_T i; - const OPJ_SIZE_T len = n; +#ifdef __SSE__ + const __m128 YR = _mm_set1_ps(0.299f); + const __m128 YG = _mm_set1_ps(0.587f); + const __m128 YB = _mm_set1_ps(0.114f); + const __m128 UR = _mm_set1_ps(-0.16875f); + const __m128 UG = _mm_set1_ps(-0.331260f); + const __m128 UB = _mm_set1_ps(0.5f); + const __m128 VR = _mm_set1_ps(0.5f); + const __m128 VG = _mm_set1_ps(-0.41869f); + const __m128 VB = _mm_set1_ps(-0.08131f); + for (i = 0; i < (n >> 3); i ++) { + __m128 r, g, b, y, u, v; - const __m128i ry = _mm_set1_epi32(2449); - const __m128i gy = _mm_set1_epi32(4809); - const __m128i by = _mm_set1_epi32(934); - const __m128i ru = _mm_set1_epi32(1382); - const __m128i gu = _mm_set1_epi32(2714); - /* const __m128i bu = _mm_set1_epi32(4096); */ - /* const __m128i rv = _mm_set1_epi32(4096); */ - const __m128i gv = _mm_set1_epi32(3430); - const __m128i bv = _mm_set1_epi32(666); - const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096), - _MM_SHUFFLE(1, 0, 1, 0)); + r = _mm_load_ps(c0); + g = _mm_load_ps(c1); + b = _mm_load_ps(c2); + y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, YR), _mm_mul_ps(g, YG)), + _mm_mul_ps(b, YB)); + u = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, UR), _mm_mul_ps(g, UG)), + _mm_mul_ps(b, UB)); + v = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, VR), _mm_mul_ps(g, VG)), + _mm_mul_ps(b, VB)); + _mm_store_ps(c0, y); + _mm_store_ps(c1, u); + _mm_store_ps(c2, v); + c0 += 4; + c1 += 4; + c2 += 4; - for (i = 0; i < (len & ~3U); i += 4) { - __m128i lo, hi; - __m128i y, u, v; - __m128i r = _mm_load_si128((const __m128i *) & (c0[i])); - __m128i g = _mm_load_si128((const __m128i *) & (c1[i])); - __m128i b = _mm_load_si128((const __m128i *) & (c2[i])); - - lo = r; - hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, ry); - hi = _mm_mul_epi32(hi, ry); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - y = _mm_blend_epi16(lo, hi, 0xCC); - - lo = g; - hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, gy); - hi = _mm_mul_epi32(hi, gy); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC)); - - lo = b; - hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, by); - hi = _mm_mul_epi32(hi, by); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC)); - _mm_store_si128((__m128i *) & (c0[i]), y); - - /*lo = b; - hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, mulround); - hi = _mm_mul_epi32(hi, mulround);*/ - lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0))); - hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1))); - lo = _mm_slli_epi64(lo, 12); - hi = _mm_slli_epi64(hi, 12); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - u = _mm_blend_epi16(lo, hi, 0xCC); - - lo = r; - hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, ru); - hi = _mm_mul_epi32(hi, ru); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC)); - - lo = g; - hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, gu); - hi = _mm_mul_epi32(hi, gu); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC)); - _mm_store_si128((__m128i *) & (c1[i]), u); - - /*lo = r; - hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, mulround); - hi = _mm_mul_epi32(hi, mulround);*/ - lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0))); - hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1))); - lo = _mm_slli_epi64(lo, 12); - hi = _mm_slli_epi64(hi, 12); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - v = _mm_blend_epi16(lo, hi, 0xCC); - - lo = g; - hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, gv); - hi = _mm_mul_epi32(hi, gv); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC)); - - lo = b; - hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); - lo = _mm_mul_epi32(lo, bv); - hi = _mm_mul_epi32(hi, bv); - lo = _mm_add_epi64(lo, mulround); - hi = _mm_add_epi64(hi, mulround); - lo = _mm_srli_epi64(lo, 13); - hi = _mm_slli_epi64(hi, 32 - 13); - v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC)); - _mm_store_si128((__m128i *) & (c2[i]), v); + r = _mm_load_ps(c0); + g = _mm_load_ps(c1); + b = _mm_load_ps(c2); + y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, YR), _mm_mul_ps(g, YG)), + _mm_mul_ps(b, YB)); + u = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, UR), _mm_mul_ps(g, UG)), + _mm_mul_ps(b, UB)); + v = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, VR), _mm_mul_ps(g, VG)), + _mm_mul_ps(b, VB)); + _mm_store_ps(c0, y); + _mm_store_ps(c1, u); + _mm_store_ps(c2, v); + c0 += 4; + c1 += 4; + c2 += 4; } - for (; i < len; ++i) { - OPJ_INT32 r = c0[i]; - OPJ_INT32 g = c1[i]; - OPJ_INT32 b = c2[i]; - OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, - 4809) + opj_int_fix_mul(b, 934); - OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, - 2714) + opj_int_fix_mul(b, 4096); - OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, - 3430) - opj_int_fix_mul(b, 666); - c0[i] = y; - c1[i] = u; - c2[i] = v; - } -} -#else -void opj_mct_encode_real( - OPJ_INT32* OPJ_RESTRICT c0, - OPJ_INT32* OPJ_RESTRICT c1, - OPJ_INT32* OPJ_RESTRICT c2, - OPJ_SIZE_T n) -{ - OPJ_UINT32 i; - for (i = 0; i < n; ++i) { - OPJ_INT32 r = c0[i]; - OPJ_INT32 g = c1[i]; - OPJ_INT32 b = c2[i]; - OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, - 4809) + opj_int_fix_mul(b, 934); - OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, - 2714) + opj_int_fix_mul(b, 4096); - OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, - 3430) - opj_int_fix_mul(b, 666); - c0[i] = y; - c1[i] = u; - c2[i] = v; - } -} + n &= 7; #endif + for (i = 0; i < n; ++i) { + OPJ_FLOAT32 r = c0[i]; + OPJ_FLOAT32 g = c1[i]; + OPJ_FLOAT32 b = c2[i]; + OPJ_FLOAT32 y = 0.299f * r + 0.587f * g + 0.114f * b; + OPJ_FLOAT32 u = -0.16875f * r - 0.331260f * g + 0.5f * b; + OPJ_FLOAT32 v = 0.5f * r - 0.41869f * g - 0.08131f * b; + c0[i] = y; + c1[i] = u; + c2[i] = v; + } +} /* */ /* Inverse irreversible MCT. */ @@ -388,7 +285,7 @@ void opj_mct_decode_real( OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_SIZE_T n) { - OPJ_UINT32 i; + OPJ_SIZE_T i; #ifdef __SSE__ __m128 vrv, vgu, vgv, vbu; vrv = _mm_set1_ps(1.402f); diff --git a/3rdparty/openjpeg/openjp2/mct.h b/3rdparty/openjpeg/openjp2/mct.h index 2e37ce7333..3e1f5e4946 100644 --- a/3rdparty/openjpeg/openjp2/mct.h +++ b/3rdparty/openjpeg/openjp2/mct.h @@ -85,8 +85,9 @@ Apply an irreversible multi-component transform to an image @param c2 Samples blue component @param n Number of samples for each component */ -void opj_mct_encode_real(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, - OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); +void opj_mct_encode_real(OPJ_FLOAT32* OPJ_RESTRICT c0, + OPJ_FLOAT32* OPJ_RESTRICT c1, + OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); /** Apply an irreversible multi-component inverse transform to an image @param c0 Samples for luminance component diff --git a/3rdparty/openjpeg/openjp2/mqc.c b/3rdparty/openjpeg/openjp2/mqc.c index 6299b171d8..4cbfabd033 100644 --- a/3rdparty/openjpeg/openjp2/mqc.c +++ b/3rdparty/openjpeg/openjp2/mqc.c @@ -46,27 +46,6 @@ /** @name Local static functions */ /*@{*/ -/** -Output a byte, doing bit-stuffing if necessary. -After a 0xff byte, the next byte must be smaller than 0x90. -@param mqc MQC handle -*/ -static void opj_mqc_byteout(opj_mqc_t *mqc); -/** -Renormalize mqc->a and mqc->c while encoding, so that mqc->a stays between 0x8000 and 0x10000 -@param mqc MQC handle -*/ -static void opj_mqc_renorme(opj_mqc_t *mqc); -/** -Encode the most probable symbol -@param mqc MQC handle -*/ -static void opj_mqc_codemps(opj_mqc_t *mqc); -/** -Encode the most least symbol -@param mqc MQC handle -*/ -static void opj_mqc_codelps(opj_mqc_t *mqc); /** Fill mqc->c with 1's for flushing @param mqc MQC handle @@ -182,80 +161,6 @@ static const opj_mqc_state_t mqc_states[47 * 2] = { ========================================================== */ -static void opj_mqc_byteout(opj_mqc_t *mqc) -{ - /* bp is initialized to start - 1 in opj_mqc_init_enc() */ - /* but this is safe, see opj_tcd_code_block_enc_allocate_data() */ - assert(mqc->bp >= mqc->start - 1); - if (*mqc->bp == 0xff) { - mqc->bp++; - *mqc->bp = (OPJ_BYTE)(mqc->c >> 20); - mqc->c &= 0xfffff; - mqc->ct = 7; - } else { - if ((mqc->c & 0x8000000) == 0) { - mqc->bp++; - *mqc->bp = (OPJ_BYTE)(mqc->c >> 19); - mqc->c &= 0x7ffff; - mqc->ct = 8; - } else { - (*mqc->bp)++; - if (*mqc->bp == 0xff) { - mqc->c &= 0x7ffffff; - mqc->bp++; - *mqc->bp = (OPJ_BYTE)(mqc->c >> 20); - mqc->c &= 0xfffff; - mqc->ct = 7; - } else { - mqc->bp++; - *mqc->bp = (OPJ_BYTE)(mqc->c >> 19); - mqc->c &= 0x7ffff; - mqc->ct = 8; - } - } - } -} - -static void opj_mqc_renorme(opj_mqc_t *mqc) -{ - do { - mqc->a <<= 1; - mqc->c <<= 1; - mqc->ct--; - if (mqc->ct == 0) { - opj_mqc_byteout(mqc); - } - } while ((mqc->a & 0x8000) == 0); -} - -static void opj_mqc_codemps(opj_mqc_t *mqc) -{ - mqc->a -= (*mqc->curctx)->qeval; - if ((mqc->a & 0x8000) == 0) { - if (mqc->a < (*mqc->curctx)->qeval) { - mqc->a = (*mqc->curctx)->qeval; - } else { - mqc->c += (*mqc->curctx)->qeval; - } - *mqc->curctx = (*mqc->curctx)->nmps; - opj_mqc_renorme(mqc); - } else { - mqc->c += (*mqc->curctx)->qeval; - } -} - -static void opj_mqc_codelps(opj_mqc_t *mqc) -{ - mqc->a -= (*mqc->curctx)->qeval; - if (mqc->a < (*mqc->curctx)->qeval) { - mqc->c += (*mqc->curctx)->qeval; - } else { - mqc->a = (*mqc->curctx)->qeval; - } - *mqc->curctx = (*mqc->curctx)->nlps; - opj_mqc_renorme(mqc); -} - static void opj_mqc_setbits(opj_mqc_t *mqc) { OPJ_UINT32 tempc = mqc->c + mqc->a; @@ -303,14 +208,6 @@ void opj_mqc_init_enc(opj_mqc_t *mqc, OPJ_BYTE *bp) mqc->end_of_byte_stream_counter = 0; } -void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d) -{ - if ((*mqc->curctx)->mps == d) { - opj_mqc_codemps(mqc); - } else { - opj_mqc_codelps(mqc); - } -} void opj_mqc_flush(opj_mqc_t *mqc) { @@ -329,8 +226,6 @@ void opj_mqc_flush(opj_mqc_t *mqc) } } -#define BYPASS_CT_INIT 0xDEADBEEF - void opj_mqc_bypass_init_enc(opj_mqc_t *mqc) { /* This function is normally called after at least one opj_mqc_flush() */ @@ -475,6 +370,43 @@ void opj_mqc_erterm_enc(opj_mqc_t *mqc) } } +static INLINE void opj_mqc_renorme(opj_mqc_t *mqc) +{ + opj_mqc_renorme_macro(mqc, mqc->a, mqc->c, mqc->ct); +} + +/** +Encode the most probable symbol +@param mqc MQC handle +*/ +static INLINE void opj_mqc_codemps(opj_mqc_t *mqc) +{ + opj_mqc_codemps_macro(mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct); +} + +/** +Encode the most least symbol +@param mqc MQC handle +*/ +static INLINE void opj_mqc_codelps(opj_mqc_t *mqc) +{ + opj_mqc_codelps_macro(mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct); +} + +/** +Encode a symbol using the MQ-coder +@param mqc MQC handle +@param d The symbol to be encoded (0 or 1) +*/ +static INLINE void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d) +{ + if ((*mqc->curctx)->mps == d) { + opj_mqc_codemps(mqc); + } else { + opj_mqc_codelps(mqc); + } +} + void opj_mqc_segmark_enc(opj_mqc_t *mqc) { OPJ_UINT32 i; @@ -557,4 +489,36 @@ void opj_mqc_setstate(opj_mqc_t *mqc, OPJ_UINT32 ctxno, OPJ_UINT32 msb, mqc->ctxs[ctxno] = &mqc_states[msb + (OPJ_UINT32)(prob << 1)]; } - +void opj_mqc_byteout(opj_mqc_t *mqc) +{ + /* bp is initialized to start - 1 in opj_mqc_init_enc() */ + /* but this is safe, see opj_tcd_code_block_enc_allocate_data() */ + assert(mqc->bp >= mqc->start - 1); + if (*mqc->bp == 0xff) { + mqc->bp++; + *mqc->bp = (OPJ_BYTE)(mqc->c >> 20); + mqc->c &= 0xfffff; + mqc->ct = 7; + } else { + if ((mqc->c & 0x8000000) == 0) { + mqc->bp++; + *mqc->bp = (OPJ_BYTE)(mqc->c >> 19); + mqc->c &= 0x7ffff; + mqc->ct = 8; + } else { + (*mqc->bp)++; + if (*mqc->bp == 0xff) { + mqc->c &= 0x7ffffff; + mqc->bp++; + *mqc->bp = (OPJ_BYTE)(mqc->c >> 20); + mqc->c &= 0xfffff; + mqc->ct = 7; + } else { + mqc->bp++; + *mqc->bp = (OPJ_BYTE)(mqc->c >> 19); + mqc->c &= 0x7ffff; + mqc->ct = 8; + } + } + } +} \ No newline at end of file diff --git a/3rdparty/openjpeg/openjp2/mqc.h b/3rdparty/openjpeg/openjp2/mqc.h index 69a2a79dc0..9850fed031 100644 --- a/3rdparty/openjpeg/openjp2/mqc.h +++ b/3rdparty/openjpeg/openjp2/mqc.h @@ -96,6 +96,8 @@ typedef struct opj_mqc { OPJ_BYTE backup[OPJ_COMMON_CBLK_DATA_EXTRA]; } opj_mqc_t; +#define BYPASS_CT_INIT 0xDEADBEEF + #include "mqc_inl.h" /** @name Exported functions */ @@ -135,12 +137,7 @@ Set the current context used for coding/decoding @param ctxno Number that identifies the context */ #define opj_mqc_setcurctx(mqc, ctxno) (mqc)->curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)] -/** -Encode a symbol using the MQ-coder -@param mqc MQC handle -@param d The symbol to be encoded (0 or 1) -*/ -void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d); + /** Flush the encoder, so that all remaining data is written @param mqc MQC handle diff --git a/3rdparty/openjpeg/openjp2/mqc_inl.h b/3rdparty/openjpeg/openjp2/mqc_inl.h index 310a3287fd..0031b94be3 100644 --- a/3rdparty/openjpeg/openjp2/mqc_inl.h +++ b/3rdparty/openjpeg/openjp2/mqc_inl.h @@ -156,13 +156,13 @@ static INLINE OPJ_UINT32 opj_mqc_raw_decode(opj_mqc_t *mqc) } \ } -#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \ +#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct) \ register const opj_mqc_state_t **curctx = mqc->curctx; \ register OPJ_UINT32 c = mqc->c; \ register OPJ_UINT32 a = mqc->a; \ register OPJ_UINT32 ct = mqc->ct -#define UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \ +#define UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct) \ mqc->curctx = curctx; \ mqc->c = c; \ mqc->a = a; \ @@ -193,4 +193,90 @@ Decode a symbol #define opj_mqc_decode(d, mqc) \ opj_mqc_decode_macro(d, mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct) +/** +Output a byte, doing bit-stuffing if necessary. +After a 0xff byte, the next byte must be smaller than 0x90. +@param mqc MQC handle +*/ +void opj_mqc_byteout(opj_mqc_t *mqc); + +/** +Renormalize mqc->a and mqc->c while encoding, so that mqc->a stays between 0x8000 and 0x10000 +@param mqc MQC handle +@param a_ value of mqc->a +@param c_ value of mqc->c_ +@param ct_ value of mqc->ct_ +*/ +#define opj_mqc_renorme_macro(mqc, a_, c_, ct_) \ +{ \ + do { \ + a_ <<= 1; \ + c_ <<= 1; \ + ct_--; \ + if (ct_ == 0) { \ + mqc->c = c_; \ + opj_mqc_byteout(mqc); \ + c_ = mqc->c; \ + ct_ = mqc->ct; \ + } \ + } while( (a_ & 0x8000) == 0); \ +} + +#define opj_mqc_codemps_macro(mqc, curctx, a, c, ct) \ +{ \ + a -= (*curctx)->qeval; \ + if ((a & 0x8000) == 0) { \ + if (a < (*curctx)->qeval) { \ + a = (*curctx)->qeval; \ + } else { \ + c += (*curctx)->qeval; \ + } \ + *curctx = (*curctx)->nmps; \ + opj_mqc_renorme_macro(mqc, a, c, ct); \ + } else { \ + c += (*curctx)->qeval; \ + } \ +} + +#define opj_mqc_codelps_macro(mqc, curctx, a, c, ct) \ +{ \ + a -= (*curctx)->qeval; \ + if (a < (*curctx)->qeval) { \ + c += (*curctx)->qeval; \ + } else { \ + a = (*curctx)->qeval; \ + } \ + *curctx = (*curctx)->nlps; \ + opj_mqc_renorme_macro(mqc, a, c, ct); \ +} + +#define opj_mqc_encode_macro(mqc, curctx, a, c, ct, d) \ +{ \ + if ((*curctx)->mps == (d)) { \ + opj_mqc_codemps_macro(mqc, curctx, a, c, ct); \ + } else { \ + opj_mqc_codelps_macro(mqc, curctx, a, c, ct); \ + } \ +} + + +#define opj_mqc_bypass_enc_macro(mqc, c, ct, d) \ +{\ + if (ct == BYPASS_CT_INIT) {\ + ct = 8;\ + }\ + ct--;\ + c = c + ((d) << ct);\ + if (ct == 0) {\ + *mqc->bp = (OPJ_BYTE)c;\ + ct = 8;\ + /* If the previous byte was 0xff, make sure that the next msb is 0 */ \ + if (*mqc->bp == 0xff) {\ + ct = 7;\ + }\ + mqc->bp++;\ + c = 0;\ + }\ +} + #endif /* OPJ_MQC_INL_H */ diff --git a/3rdparty/openjpeg/openjp2/openjpeg.c b/3rdparty/openjpeg/openjp2/openjpeg.c index 7b12303423..9c9b6eb0c0 100644 --- a/3rdparty/openjpeg/openjp2/openjpeg.c +++ b/3rdparty/openjpeg/openjp2/openjpeg.c @@ -652,6 +652,14 @@ opj_codec_t* OPJ_CALLCONV opj_create_compress(OPJ_CODEC_FORMAT p_format) struct opj_image *, struct opj_event_mgr *)) opj_j2k_setup_encoder; + l_codec->m_codec_data.m_compression.opj_encoder_set_extra_options = (OPJ_BOOL( + *)(void *, + const char* const*, + struct opj_event_mgr *)) opj_j2k_encoder_set_extra_options; + + l_codec->opj_set_threads = + (OPJ_BOOL(*)(void * p_codec, OPJ_UINT32 num_threads)) opj_j2k_set_threads; + l_codec->m_codec = opj_j2k_create_compress(); if (! l_codec->m_codec) { opj_free(l_codec); @@ -690,6 +698,14 @@ opj_codec_t* OPJ_CALLCONV opj_create_compress(OPJ_CODEC_FORMAT p_format) struct opj_image *, struct opj_event_mgr *)) opj_jp2_setup_encoder; + l_codec->m_codec_data.m_compression.opj_encoder_set_extra_options = (OPJ_BOOL( + *)(void *, + const char* const*, + struct opj_event_mgr *)) opj_jp2_encoder_set_extra_options; + + l_codec->opj_set_threads = + (OPJ_BOOL(*)(void * p_codec, OPJ_UINT32 num_threads)) opj_jp2_set_threads; + l_codec->m_codec = opj_jp2_create(OPJ_FALSE); if (! l_codec->m_codec) { opj_free(l_codec); @@ -718,11 +734,11 @@ void OPJ_CALLCONV opj_set_default_encoder_parameters(opj_cparameters_t parameters->cp_cinema = OPJ_OFF; /* DEPRECATED */ parameters->rsiz = OPJ_PROFILE_NONE; parameters->max_comp_size = 0; - parameters->numresolution = 6; + parameters->numresolution = OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION; parameters->cp_rsiz = OPJ_STD_RSIZ; /* DEPRECATED */ - parameters->cblockw_init = 64; - parameters->cblockh_init = 64; - parameters->prog_order = OPJ_LRCP; + parameters->cblockw_init = OPJ_COMP_PARAM_DEFAULT_CBLOCKW; + parameters->cblockh_init = OPJ_COMP_PARAM_DEFAULT_CBLOCKH; + parameters->prog_order = OPJ_COMP_PARAM_DEFAULT_PROG_ORDER; parameters->roi_compno = -1; /* no ROI */ parameters->subsampling_dx = 1; parameters->subsampling_dy = 1; @@ -788,6 +804,27 @@ OPJ_BOOL OPJ_CALLCONV opj_setup_encoder(opj_codec_t *p_codec, return OPJ_FALSE; } +/* ----------------------------------------------------------------------- */ + +OPJ_BOOL OPJ_CALLCONV opj_encoder_set_extra_options(opj_codec_t *p_codec, + const char* const* options) +{ + if (p_codec) { + opj_codec_private_t * l_codec = (opj_codec_private_t *) p_codec; + + if (! l_codec->is_decompressor) { + return l_codec->m_codec_data.m_compression.opj_encoder_set_extra_options( + l_codec->m_codec, + options, + &(l_codec->m_event_mgr)); + } + } + + return OPJ_FALSE; +} + +/* ----------------------------------------------------------------------- */ + OPJ_BOOL OPJ_CALLCONV opj_start_compress(opj_codec_t *p_codec, opj_image_t * p_image, opj_stream_t *p_stream) diff --git a/3rdparty/openjpeg/openjp2/openjpeg.h b/3rdparty/openjpeg/openjp2/openjpeg.h index 53a0e10c54..269ac329ae 100644 --- a/3rdparty/openjpeg/openjp2/openjpeg.h +++ b/3rdparty/openjpeg/openjp2/openjpeg.h @@ -78,7 +78,7 @@ Most compilers implement their own version of this keyword ... #if defined(OPJ_STATIC) || !defined(_WIN32) /* http://gcc.gnu.org/wiki/Visibility */ -# if __GNUC__ >= 4 +# if !defined(_WIN32) && __GNUC__ >= 4 # if defined(OPJ_STATIC) /* static library uses "hidden" */ # define OPJ_API __attribute__ ((visibility ("hidden"))) # else @@ -204,11 +204,11 @@ typedef size_t OPJ_SIZE_T; #define OPJ_PROFILE_BC_MULTI 0x0200 /** Multi Tile Broadcast profile defined in 15444-1 AMD3 */ #define OPJ_PROFILE_BC_MULTI_R 0x0300 /** Multi Tile Reversible Broadcast profile defined in 15444-1 AMD3 */ #define OPJ_PROFILE_IMF_2K 0x0400 /** 2K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ -#define OPJ_PROFILE_IMF_4K 0x0401 /** 4K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ -#define OPJ_PROFILE_IMF_8K 0x0402 /** 8K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ -#define OPJ_PROFILE_IMF_2K_R 0x0403 /** 2K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ +#define OPJ_PROFILE_IMF_4K 0x0500 /** 4K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ +#define OPJ_PROFILE_IMF_8K 0x0600 /** 8K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ +#define OPJ_PROFILE_IMF_2K_R 0x0700 /** 2K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ #define OPJ_PROFILE_IMF_4K_R 0x0800 /** 4K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ -#define OPJ_PROFILE_IMF_8K_R 0x0801 /** 8K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ +#define OPJ_PROFILE_IMF_8K_R 0x0900 /** 8K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ /** * JPEG 2000 Part-2 extensions @@ -225,6 +225,36 @@ typedef size_t OPJ_SIZE_T; #define OPJ_IS_IMF(v) (((v) >= OPJ_PROFILE_IMF_2K)&&((v) <= ((OPJ_PROFILE_IMF_8K_R) | (0x009b)))) #define OPJ_IS_PART2(v) ((v) & OPJ_PROFILE_PART2) +#define OPJ_GET_IMF_PROFILE(v) ((v) & 0xff00) /** Extract IMF profile without mainlevel/sublevel */ +#define OPJ_GET_IMF_MAINLEVEL(v) ((v) & 0xf) /** Extract IMF main level */ +#define OPJ_GET_IMF_SUBLEVEL(v) (((v) >> 4) & 0xf) /** Extract IMF sub level */ + +#define OPJ_IMF_MAINLEVEL_MAX 11 /** Maximum main level */ + +/** Max. Components Sampling Rate (MSamples/sec) per IMF main level */ +#define OPJ_IMF_MAINLEVEL_1_MSAMPLESEC 65 /** MSamples/sec for IMF main level 1 */ +#define OPJ_IMF_MAINLEVEL_2_MSAMPLESEC 130 /** MSamples/sec for IMF main level 2 */ +#define OPJ_IMF_MAINLEVEL_3_MSAMPLESEC 195 /** MSamples/sec for IMF main level 3 */ +#define OPJ_IMF_MAINLEVEL_4_MSAMPLESEC 260 /** MSamples/sec for IMF main level 4 */ +#define OPJ_IMF_MAINLEVEL_5_MSAMPLESEC 520 /** MSamples/sec for IMF main level 5 */ +#define OPJ_IMF_MAINLEVEL_6_MSAMPLESEC 1200 /** MSamples/sec for IMF main level 6 */ +#define OPJ_IMF_MAINLEVEL_7_MSAMPLESEC 2400 /** MSamples/sec for IMF main level 7 */ +#define OPJ_IMF_MAINLEVEL_8_MSAMPLESEC 4800 /** MSamples/sec for IMF main level 8 */ +#define OPJ_IMF_MAINLEVEL_9_MSAMPLESEC 9600 /** MSamples/sec for IMF main level 9 */ +#define OPJ_IMF_MAINLEVEL_10_MSAMPLESEC 19200 /** MSamples/sec for IMF main level 10 */ +#define OPJ_IMF_MAINLEVEL_11_MSAMPLESEC 38400 /** MSamples/sec for IMF main level 11 */ + +/** Max. compressed Bit Rate (Mbits/s) per IMF sub level */ +#define OPJ_IMF_SUBLEVEL_1_MBITSSEC 200 /** Mbits/s for IMF sub level 1 */ +#define OPJ_IMF_SUBLEVEL_2_MBITSSEC 400 /** Mbits/s for IMF sub level 2 */ +#define OPJ_IMF_SUBLEVEL_3_MBITSSEC 800 /** Mbits/s for IMF sub level 3 */ +#define OPJ_IMF_SUBLEVEL_4_MBITSSEC 1600 /** Mbits/s for IMF sub level 4 */ +#define OPJ_IMF_SUBLEVEL_5_MBITSSEC 3200 /** Mbits/s for IMF sub level 5 */ +#define OPJ_IMF_SUBLEVEL_6_MBITSSEC 6400 /** Mbits/s for IMF sub level 6 */ +#define OPJ_IMF_SUBLEVEL_7_MBITSSEC 12800 /** Mbits/s for IMF sub level 7 */ +#define OPJ_IMF_SUBLEVEL_8_MBITSSEC 25600 /** Mbits/s for IMF sub level 8 */ +#define OPJ_IMF_SUBLEVEL_9_MBITSSEC 51200 /** Mbits/s for IMF sub level 9 */ + /** * JPEG 2000 codestream and component size limits in cinema profiles * */ @@ -318,6 +348,10 @@ typedef void (*opj_msg_callback)(const char *msg, void *client_data); ========================================================== */ +#ifndef OPJ_UINT32_SEMANTICALLY_BUT_INT32 +#define OPJ_UINT32_SEMANTICALLY_BUT_INT32 OPJ_INT32 +#endif + /** * Progression order changes * @@ -333,10 +367,10 @@ typedef struct opj_poc { OPJ_PROG_ORDER prg1, prg; /** Progression order string*/ OPJ_CHAR progorder[5]; - /** Tile number */ + /** Tile number (starting at 1) */ OPJ_UINT32 tile; /** Start and end values for Tile width and height*/ - OPJ_INT32 tx0, tx1, ty0, ty1; + OPJ_UINT32_SEMANTICALLY_BUT_INT32 tx0, tx1, ty0, ty1; /** Start value, initialised in pi_initialise_encode*/ OPJ_UINT32 layS, resS, compS, prcS; /** End value, initialised in pi_initialise_encode */ @@ -1314,15 +1348,14 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_setup_decoder(opj_codec_t *p_codec, * number, or "ALL_CPUS". If OPJ_NUM_THREADS is set and this function is called, * this function will override the behaviour of the environment variable. * - * Currently this function must be called after opj_setup_decoder() and - * before opj_read_header(). + * This function must be called after opj_setup_decoder() and + * before opj_read_header() for the decoding side, or after opj_setup_encoder() + * and before opj_start_compress() for the encoding side. * - * Note: currently only has effect on the decompressor. - * - * @param p_codec decompressor handler + * @param p_codec decompressor or compressor handler * @param num_threads number of threads. * - * @return OPJ_TRUE if the decoder is correctly set + * @return OPJ_TRUE if the function is successful. */ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_codec_set_threads(opj_codec_t *p_codec, int num_threads); @@ -1546,6 +1579,33 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_setup_encoder(opj_codec_t *p_codec, opj_cparameters_t *parameters, opj_image_t *image); + +/** + * Specify extra options for the encoder. + * + * This may be called after opj_setup_encoder() and before opj_start_compress() + * + * This is the way to add new options in a fully ABI compatible way, without + * extending the opj_cparameters_t structure. + * + * Currently supported options are: + *
    + *
  • PLT=YES/NO. Defaults to NO. If set to YES, PLT marker segments, + * indicating the length of each packet in the tile-part header, will be + * written. Since 2.3.2
  • + *
+ * + * @param p_codec Compressor handle + * @param p_options Compression options. This should be a NULL terminated + * array of strings. Each string is of the form KEY=VALUE. + * + * @return OPJ_TRUE in case of success. + * @since 2.3.2 + */ +OPJ_API OPJ_BOOL OPJ_CALLCONV opj_encoder_set_extra_options( + opj_codec_t *p_codec, + const char* const* p_options); + /** * Start to compress the current image. * @param p_codec Compressor handle diff --git a/3rdparty/openjpeg/openjp2/opj_codec.h b/3rdparty/openjpeg/openjp2/opj_codec.h index b962b12163..8a8af9119e 100644 --- a/3rdparty/openjpeg/openjp2/opj_codec.h +++ b/3rdparty/openjpeg/openjp2/opj_codec.h @@ -148,6 +148,11 @@ typedef struct opj_codec_private { opj_cparameters_t * p_param, struct opj_image * p_image, struct opj_event_mgr * p_manager); + + OPJ_BOOL(* opj_encoder_set_extra_options)(void * p_codec, + const char* const* p_options, + struct opj_event_mgr * p_manager); + } m_compression; } m_codec_data; /** FIXME DOC*/ diff --git a/3rdparty/openjpeg/openjp2/opj_common.h b/3rdparty/openjpeg/openjp2/opj_common.h index a051339154..ee8adf4725 100644 --- a/3rdparty/openjpeg/openjp2/opj_common.h +++ b/3rdparty/openjpeg/openjp2/opj_common.h @@ -38,4 +38,10 @@ */ #define OPJ_COMMON_CBLK_DATA_EXTRA 2 /**< Margin for a fake FFFF marker */ + +#define OPJ_COMP_PARAM_DEFAULT_CBLOCKW 64 +#define OPJ_COMP_PARAM_DEFAULT_CBLOCKH 64 +#define OPJ_COMP_PARAM_DEFAULT_PROG_ORDER OPJ_LRCP +#define OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION 6 + #endif /* OPJ_COMMMON_H */ diff --git a/3rdparty/openjpeg/openjp2/opj_intmath.h b/3rdparty/openjpeg/openjp2/opj_intmath.h index 754b5512ff..afe69d90c0 100644 --- a/3rdparty/openjpeg/openjp2/opj_intmath.h +++ b/3rdparty/openjpeg/openjp2/opj_intmath.h @@ -208,6 +208,16 @@ static INLINE OPJ_INT32 opj_int_floordivpow2(OPJ_INT32 a, OPJ_INT32 b) { return a >> b; } + +/** +Divide an integer by a power of 2 and round downwards +@return Returns a divided by 2^b +*/ +static INLINE OPJ_UINT32 opj_uint_floordivpow2(OPJ_UINT32 a, OPJ_UINT32 b) +{ + return a >> b; +} + /** Get logarithm of an integer and round downwards @return Returns log2(a) diff --git a/3rdparty/openjpeg/openjp2/pi.c b/3rdparty/openjpeg/openjp2/pi.c index 4a6ed68e2b..4f7dd50f16 100644 --- a/3rdparty/openjpeg/openjp2/pi.c +++ b/3rdparty/openjpeg/openjp2/pi.c @@ -36,6 +36,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#define OPJ_UINT32_SEMANTICALLY_BUT_INT32 OPJ_UINT32 + #include "opj_includes.h" /** @defgroup PI PI - Implementation of a packet iterator */ @@ -91,10 +93,10 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi); */ static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp, OPJ_UINT32 p_tileno, - OPJ_INT32 p_tx0, - OPJ_INT32 p_tx1, - OPJ_INT32 p_ty0, - OPJ_INT32 p_ty1, + OPJ_UINT32 p_tx0, + OPJ_UINT32 p_tx1, + OPJ_UINT32 p_ty0, + OPJ_UINT32 p_ty1, OPJ_UINT32 p_max_prec, OPJ_UINT32 p_max_res, OPJ_UINT32 p_dx_min, @@ -118,10 +120,10 @@ static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp, static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp, OPJ_UINT32 p_num_comps, OPJ_UINT32 p_tileno, - OPJ_INT32 p_tx0, - OPJ_INT32 p_tx1, - OPJ_INT32 p_ty0, - OPJ_INT32 p_ty1, + OPJ_UINT32 p_tx0, + OPJ_UINT32 p_tx1, + OPJ_UINT32 p_ty0, + OPJ_UINT32 p_ty1, OPJ_UINT32 p_max_prec, OPJ_UINT32 p_max_res, OPJ_UINT32 p_dx_min, @@ -144,10 +146,10 @@ static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp, static void opj_get_encoding_parameters(const opj_image_t *p_image, const opj_cp_t *p_cp, OPJ_UINT32 tileno, - OPJ_INT32 * p_tx0, - OPJ_INT32 * p_tx1, - OPJ_INT32 * p_ty0, - OPJ_INT32 * p_ty1, + OPJ_UINT32 * p_tx0, + OPJ_UINT32 * p_tx1, + OPJ_UINT32 * p_ty0, + OPJ_UINT32 * p_ty1, OPJ_UINT32 * p_dx_min, OPJ_UINT32 * p_dy_min, OPJ_UINT32 * p_max_prec, @@ -176,10 +178,10 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, static void opj_get_all_encoding_parameters(const opj_image_t *p_image, const opj_cp_t *p_cp, OPJ_UINT32 tileno, - OPJ_INT32 * p_tx0, - OPJ_INT32 * p_tx1, - OPJ_INT32 * p_ty0, - OPJ_INT32 * p_ty1, + OPJ_UINT32 * p_tx0, + OPJ_UINT32 * p_tx1, + OPJ_UINT32 * p_ty0, + OPJ_UINT32 * p_ty1, OPJ_UINT32 * p_dx_min, OPJ_UINT32 * p_dy_min, OPJ_UINT32 * p_max_prec, @@ -192,10 +194,12 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, * @param p_image the image used to initialize the packet iterator (in fact only the number of components is relevant. * @param p_cp the coding parameters. * @param tileno the index of the tile from which creating the packet iterator. + * @param manager Event manager */ static opj_pi_iterator_t * opj_pi_create(const opj_image_t *p_image, const opj_cp_t *p_cp, - OPJ_UINT32 tileno); + OPJ_UINT32 tileno, + opj_event_mgr_t* manager); /** * FIXME DOC */ @@ -230,18 +234,19 @@ static OPJ_BOOL opj_pi_check_next_level(OPJ_INT32 pos, ========================================================== */ -static void opj_pi_emit_error(opj_pi_iterator_t * pi, const char* msg) -{ - (void)pi; - (void)msg; -} - static OPJ_BOOL opj_pi_next_lrcp(opj_pi_iterator_t * pi) { opj_pi_comp_t *comp = NULL; opj_pi_resolution_t *res = NULL; OPJ_UINT32 index = 0; + if (pi->poc.compno0 >= pi->numcomps || + pi->poc.compno1 >= pi->numcomps + 1) { + opj_event_msg(pi->manager, EVT_ERROR, + "opj_pi_next_lrcp(): invalid compno0/compno1\n"); + return OPJ_FALSE; + } + if (!pi->first) { comp = &pi->comps[pi->compno]; res = &comp->resolutions[pi->resno]; @@ -272,7 +277,7 @@ static OPJ_BOOL opj_pi_next_lrcp(opj_pi_iterator_t * pi) /* include should be resized when a POC arises, or */ /* the POC should be rejected */ if (index >= pi->include_size) { - opj_pi_emit_error(pi, "Invalid access to pi->include"); + opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); return OPJ_FALSE; } if (!pi->include[index]) { @@ -295,6 +300,13 @@ static OPJ_BOOL opj_pi_next_rlcp(opj_pi_iterator_t * pi) opj_pi_resolution_t *res = NULL; OPJ_UINT32 index = 0; + if (pi->poc.compno0 >= pi->numcomps || + pi->poc.compno1 >= pi->numcomps + 1) { + opj_event_msg(pi->manager, EVT_ERROR, + "opj_pi_next_rlcp(): invalid compno0/compno1\n"); + return OPJ_FALSE; + } + if (!pi->first) { comp = &pi->comps[pi->compno]; res = &comp->resolutions[pi->resno]; @@ -318,7 +330,7 @@ static OPJ_BOOL opj_pi_next_rlcp(opj_pi_iterator_t * pi) index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * pi->step_c + pi->precno * pi->step_p; if (index >= pi->include_size) { - opj_pi_emit_error(pi, "Invalid access to pi->include"); + opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); return OPJ_FALSE; } if (!pi->include[index]) { @@ -341,6 +353,13 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) opj_pi_resolution_t *res = NULL; OPJ_UINT32 index = 0; + if (pi->poc.compno0 >= pi->numcomps || + pi->poc.compno1 >= pi->numcomps + 1) { + opj_event_msg(pi->manager, EVT_ERROR, + "opj_pi_next_rpcl(): invalid compno0/compno1\n"); + return OPJ_FALSE; + } + if (!pi->first) { goto LABEL_SKIP; } else { @@ -376,16 +395,16 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) pi->poc.tx1 = pi->tx1; } for (pi->resno = pi->poc.resno0; pi->resno < pi->poc.resno1; pi->resno++) { - for (pi->y = pi->poc.ty0; pi->y < pi->poc.ty1; - pi->y += (OPJ_INT32)(pi->dy - (OPJ_UINT32)(pi->y % (OPJ_INT32)pi->dy))) { - for (pi->x = pi->poc.tx0; pi->x < pi->poc.tx1; - pi->x += (OPJ_INT32)(pi->dx - (OPJ_UINT32)(pi->x % (OPJ_INT32)pi->dx))) { + for (pi->y = (OPJ_UINT32)pi->poc.ty0; pi->y < (OPJ_UINT32)pi->poc.ty1; + pi->y += (pi->dy - (pi->y % pi->dy))) { + for (pi->x = (OPJ_UINT32)pi->poc.tx0; pi->x < (OPJ_UINT32)pi->poc.tx1; + pi->x += (pi->dx - (pi->x % pi->dx))) { for (pi->compno = pi->poc.compno0; pi->compno < pi->poc.compno1; pi->compno++) { OPJ_UINT32 levelno; - OPJ_INT32 trx0, try0; - OPJ_INT32 trx1, try1; + OPJ_UINT32 trx0, try0; + OPJ_UINT32 trx1, try1; OPJ_UINT32 rpx, rpy; - OPJ_INT32 prci, prcj; + OPJ_UINT32 prci, prcj; comp = &pi->comps[pi->compno]; if (pi->resno >= comp->numresolutions) { continue; @@ -404,10 +423,10 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) (comp->dy << levelno) > INT_MAX) { continue; } - trx0 = opj_int_ceildiv(pi->tx0, (OPJ_INT32)(comp->dx << levelno)); - try0 = opj_int_ceildiv(pi->ty0, (OPJ_INT32)(comp->dy << levelno)); - trx1 = opj_int_ceildiv(pi->tx1, (OPJ_INT32)(comp->dx << levelno)); - try1 = opj_int_ceildiv(pi->ty1, (OPJ_INT32)(comp->dy << levelno)); + trx0 = opj_uint_ceildiv(pi->tx0, (comp->dx << levelno)); + try0 = opj_uint_ceildiv(pi->ty0, (comp->dy << levelno)); + trx1 = opj_uint_ceildiv(pi->tx1, (comp->dx << levelno)); + try1 = opj_uint_ceildiv(pi->ty1, (comp->dy << levelno)); rpx = res->pdx + levelno; rpy = res->pdy + levelno; @@ -421,12 +440,12 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) } /* See ISO-15441. B.12.1.3 Resolution level-position-component-layer progression */ - if (!((pi->y % (OPJ_INT32)(comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && - ((try0 << levelno) % (1 << rpy))))) { + if (!((pi->y % (comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && + ((try0 << levelno) % (1U << rpy))))) { continue; } - if (!((pi->x % (OPJ_INT32)(comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && - ((trx0 << levelno) % (1 << rpx))))) { + if (!((pi->x % (comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && + ((trx0 << levelno) % (1U << rpx))))) { continue; } @@ -438,18 +457,18 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) continue; } - prci = opj_int_floordivpow2(opj_int_ceildiv(pi->x, - (OPJ_INT32)(comp->dx << levelno)), (OPJ_INT32)res->pdx) - - opj_int_floordivpow2(trx0, (OPJ_INT32)res->pdx); - prcj = opj_int_floordivpow2(opj_int_ceildiv(pi->y, - (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy) - - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy); - pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw); + prci = opj_uint_floordivpow2(opj_uint_ceildiv(pi->x, + (comp->dx << levelno)), res->pdx) + - opj_uint_floordivpow2(trx0, res->pdx); + prcj = opj_uint_floordivpow2(opj_uint_ceildiv(pi->y, + (comp->dy << levelno)), res->pdy) + - opj_uint_floordivpow2(try0, res->pdy); + pi->precno = prci + prcj * res->pw; for (pi->layno = pi->poc.layno0; pi->layno < pi->poc.layno1; pi->layno++) { index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * pi->step_c + pi->precno * pi->step_p; if (index >= pi->include_size) { - opj_pi_emit_error(pi, "Invalid access to pi->include"); + opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); return OPJ_FALSE; } if (!pi->include[index]) { @@ -473,6 +492,13 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) opj_pi_resolution_t *res = NULL; OPJ_UINT32 index = 0; + if (pi->poc.compno0 >= pi->numcomps || + pi->poc.compno1 >= pi->numcomps + 1) { + opj_event_msg(pi->manager, EVT_ERROR, + "opj_pi_next_pcrl(): invalid compno0/compno1\n"); + return OPJ_FALSE; + } + if (!pi->first) { comp = &pi->comps[pi->compno]; goto LABEL_SKIP; @@ -508,19 +534,19 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) pi->poc.ty1 = pi->ty1; pi->poc.tx1 = pi->tx1; } - for (pi->y = pi->poc.ty0; pi->y < pi->poc.ty1; - pi->y += (OPJ_INT32)(pi->dy - (OPJ_UINT32)(pi->y % (OPJ_INT32)pi->dy))) { - for (pi->x = pi->poc.tx0; pi->x < pi->poc.tx1; - pi->x += (OPJ_INT32)(pi->dx - (OPJ_UINT32)(pi->x % (OPJ_INT32)pi->dx))) { + for (pi->y = (OPJ_UINT32)pi->poc.ty0; pi->y < (OPJ_UINT32)pi->poc.ty1; + pi->y += (pi->dy - (pi->y % pi->dy))) { + for (pi->x = (OPJ_UINT32)pi->poc.tx0; pi->x < (OPJ_UINT32)pi->poc.tx1; + pi->x += (pi->dx - (pi->x % pi->dx))) { for (pi->compno = pi->poc.compno0; pi->compno < pi->poc.compno1; pi->compno++) { comp = &pi->comps[pi->compno]; for (pi->resno = pi->poc.resno0; pi->resno < opj_uint_min(pi->poc.resno1, comp->numresolutions); pi->resno++) { OPJ_UINT32 levelno; - OPJ_INT32 trx0, try0; - OPJ_INT32 trx1, try1; + OPJ_UINT32 trx0, try0; + OPJ_UINT32 trx1, try1; OPJ_UINT32 rpx, rpy; - OPJ_INT32 prci, prcj; + OPJ_UINT32 prci, prcj; res = &comp->resolutions[pi->resno]; levelno = comp->numresolutions - 1 - pi->resno; /* Avoids division by zero */ @@ -535,10 +561,10 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) (comp->dy << levelno) > INT_MAX) { continue; } - trx0 = opj_int_ceildiv(pi->tx0, (OPJ_INT32)(comp->dx << levelno)); - try0 = opj_int_ceildiv(pi->ty0, (OPJ_INT32)(comp->dy << levelno)); - trx1 = opj_int_ceildiv(pi->tx1, (OPJ_INT32)(comp->dx << levelno)); - try1 = opj_int_ceildiv(pi->ty1, (OPJ_INT32)(comp->dy << levelno)); + trx0 = opj_uint_ceildiv(pi->tx0, (comp->dx << levelno)); + try0 = opj_uint_ceildiv(pi->ty0, (comp->dy << levelno)); + trx1 = opj_uint_ceildiv(pi->tx1, (comp->dx << levelno)); + try1 = opj_uint_ceildiv(pi->ty1, (comp->dy << levelno)); rpx = res->pdx + levelno; rpy = res->pdy + levelno; @@ -552,12 +578,12 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) } /* See ISO-15441. B.12.1.4 Position-component-resolution level-layer progression */ - if (!((pi->y % (OPJ_INT32)(comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && - ((try0 << levelno) % (1 << rpy))))) { + if (!((pi->y % (comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && + ((try0 << levelno) % (1U << rpy))))) { continue; } - if (!((pi->x % (OPJ_INT32)(comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && - ((trx0 << levelno) % (1 << rpx))))) { + if (!((pi->x % (comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && + ((trx0 << levelno) % (1U << rpx))))) { continue; } @@ -569,18 +595,18 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) continue; } - prci = opj_int_floordivpow2(opj_int_ceildiv(pi->x, - (OPJ_INT32)(comp->dx << levelno)), (OPJ_INT32)res->pdx) - - opj_int_floordivpow2(trx0, (OPJ_INT32)res->pdx); - prcj = opj_int_floordivpow2(opj_int_ceildiv(pi->y, - (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy) - - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy); - pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw); + prci = opj_uint_floordivpow2(opj_uint_ceildiv(pi->x, + (comp->dx << levelno)), res->pdx) + - opj_uint_floordivpow2(trx0, res->pdx); + prcj = opj_uint_floordivpow2(opj_uint_ceildiv(pi->y, + (comp->dy << levelno)), res->pdy) + - opj_uint_floordivpow2(try0, res->pdy); + pi->precno = prci + prcj * res->pw; for (pi->layno = pi->poc.layno0; pi->layno < pi->poc.layno1; pi->layno++) { index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * pi->step_c + pi->precno * pi->step_p; if (index >= pi->include_size) { - opj_pi_emit_error(pi, "Invalid access to pi->include"); + opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); return OPJ_FALSE; } if (!pi->include[index]) { @@ -604,6 +630,13 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) opj_pi_resolution_t *res = NULL; OPJ_UINT32 index = 0; + if (pi->poc.compno0 >= pi->numcomps || + pi->poc.compno1 >= pi->numcomps + 1) { + opj_event_msg(pi->manager, EVT_ERROR, + "opj_pi_next_cprl(): invalid compno0/compno1\n"); + return OPJ_FALSE; + } + if (!pi->first) { comp = &pi->comps[pi->compno]; goto LABEL_SKIP; @@ -639,17 +672,17 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) pi->poc.ty1 = pi->ty1; pi->poc.tx1 = pi->tx1; } - for (pi->y = pi->poc.ty0; pi->y < pi->poc.ty1; - pi->y += (OPJ_INT32)(pi->dy - (OPJ_UINT32)(pi->y % (OPJ_INT32)pi->dy))) { - for (pi->x = pi->poc.tx0; pi->x < pi->poc.tx1; - pi->x += (OPJ_INT32)(pi->dx - (OPJ_UINT32)(pi->x % (OPJ_INT32)pi->dx))) { + for (pi->y = (OPJ_UINT32)pi->poc.ty0; pi->y < (OPJ_UINT32)pi->poc.ty1; + pi->y += (pi->dy - (pi->y % pi->dy))) { + for (pi->x = (OPJ_UINT32)pi->poc.tx0; pi->x < (OPJ_UINT32)pi->poc.tx1; + pi->x += (pi->dx - (pi->x % pi->dx))) { for (pi->resno = pi->poc.resno0; pi->resno < opj_uint_min(pi->poc.resno1, comp->numresolutions); pi->resno++) { OPJ_UINT32 levelno; - OPJ_INT32 trx0, try0; - OPJ_INT32 trx1, try1; + OPJ_UINT32 trx0, try0; + OPJ_UINT32 trx1, try1; OPJ_UINT32 rpx, rpy; - OPJ_INT32 prci, prcj; + OPJ_UINT32 prci, prcj; res = &comp->resolutions[pi->resno]; levelno = comp->numresolutions - 1 - pi->resno; /* Avoids division by zero on id_000004,sig_06,src_000679,op_arith8,pos_49,val_-17 */ @@ -663,10 +696,10 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) (comp->dy << levelno) > INT_MAX) { continue; } - trx0 = opj_int_ceildiv(pi->tx0, (OPJ_INT32)(comp->dx << levelno)); - try0 = opj_int_ceildiv(pi->ty0, (OPJ_INT32)(comp->dy << levelno)); - trx1 = opj_int_ceildiv(pi->tx1, (OPJ_INT32)(comp->dx << levelno)); - try1 = opj_int_ceildiv(pi->ty1, (OPJ_INT32)(comp->dy << levelno)); + trx0 = opj_uint_ceildiv(pi->tx0, (comp->dx << levelno)); + try0 = opj_uint_ceildiv(pi->ty0, (comp->dy << levelno)); + trx1 = opj_uint_ceildiv(pi->tx1, (comp->dx << levelno)); + try1 = opj_uint_ceildiv(pi->ty1, (comp->dy << levelno)); rpx = res->pdx + levelno; rpy = res->pdy + levelno; @@ -680,12 +713,12 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) } /* See ISO-15441. B.12.1.5 Component-position-resolution level-layer progression */ - if (!((pi->y % (OPJ_INT32)(comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && - ((try0 << levelno) % (1 << rpy))))) { + if (!((pi->y % (comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && + ((try0 << levelno) % (1U << rpy))))) { continue; } - if (!((pi->x % (OPJ_INT32)(comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && - ((trx0 << levelno) % (1 << rpx))))) { + if (!((pi->x % (comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && + ((trx0 << levelno) % (1U << rpx))))) { continue; } @@ -697,18 +730,18 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) continue; } - prci = opj_int_floordivpow2(opj_int_ceildiv(pi->x, - (OPJ_INT32)(comp->dx << levelno)), (OPJ_INT32)res->pdx) - - opj_int_floordivpow2(trx0, (OPJ_INT32)res->pdx); - prcj = opj_int_floordivpow2(opj_int_ceildiv(pi->y, - (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy) - - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy); - pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw); + prci = opj_uint_floordivpow2(opj_uint_ceildiv(pi->x, + (comp->dx << levelno)), res->pdx) + - opj_uint_floordivpow2(trx0, res->pdx); + prcj = opj_uint_floordivpow2(opj_uint_ceildiv(pi->y, + (comp->dy << levelno)), res->pdy) + - opj_uint_floordivpow2(try0, res->pdy); + pi->precno = (OPJ_UINT32)(prci + prcj * res->pw); for (pi->layno = pi->poc.layno0; pi->layno < pi->poc.layno1; pi->layno++) { index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * pi->step_c + pi->precno * pi->step_p; if (index >= pi->include_size) { - opj_pi_emit_error(pi, "Invalid access to pi->include"); + opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); return OPJ_FALSE; } if (!pi->include[index]) { @@ -729,10 +762,10 @@ LABEL_SKIP: static void opj_get_encoding_parameters(const opj_image_t *p_image, const opj_cp_t *p_cp, OPJ_UINT32 p_tileno, - OPJ_INT32 * p_tx0, - OPJ_INT32 * p_tx1, - OPJ_INT32 * p_ty0, - OPJ_INT32 * p_ty1, + OPJ_UINT32 * p_tx0, + OPJ_UINT32 * p_tx1, + OPJ_UINT32 * p_ty0, + OPJ_UINT32 * p_ty1, OPJ_UINT32 * p_dx_min, OPJ_UINT32 * p_dy_min, OPJ_UINT32 * p_max_prec, @@ -768,12 +801,12 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, /* find extent of tile */ l_tx0 = p_cp->tx0 + p * p_cp->tdx; /* can't be greater than p_image->x1 so won't overflow */ - *p_tx0 = (OPJ_INT32)opj_uint_max(l_tx0, p_image->x0); - *p_tx1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1); + *p_tx0 = opj_uint_max(l_tx0, p_image->x0); + *p_tx1 = opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1); l_ty0 = p_cp->ty0 + q * p_cp->tdy; /* can't be greater than p_image->y1 so won't overflow */ - *p_ty0 = (OPJ_INT32)opj_uint_max(l_ty0, p_image->y0); - *p_ty1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1); + *p_ty0 = opj_uint_max(l_ty0, p_image->y0); + *p_ty1 = opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1); /* max precision is 0 (can only grow) */ *p_max_prec = 0; @@ -786,17 +819,17 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, for (compno = 0; compno < p_image->numcomps; ++compno) { /* arithmetic variables to calculate */ OPJ_UINT32 l_level_no; - OPJ_INT32 l_rx0, l_ry0, l_rx1, l_ry1; - OPJ_INT32 l_px0, l_py0, l_px1, py1; + OPJ_UINT32 l_rx0, l_ry0, l_rx1, l_ry1; + OPJ_UINT32 l_px0, l_py0, l_px1, py1; OPJ_UINT32 l_pdx, l_pdy; OPJ_UINT32 l_pw, l_ph; OPJ_UINT32 l_product; - OPJ_INT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1; + OPJ_UINT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1; - l_tcx0 = opj_int_ceildiv(*p_tx0, (OPJ_INT32)l_img_comp->dx); - l_tcy0 = opj_int_ceildiv(*p_ty0, (OPJ_INT32)l_img_comp->dy); - l_tcx1 = opj_int_ceildiv(*p_tx1, (OPJ_INT32)l_img_comp->dx); - l_tcy1 = opj_int_ceildiv(*p_ty1, (OPJ_INT32)l_img_comp->dy); + l_tcx0 = opj_uint_ceildiv(*p_tx0, l_img_comp->dx); + l_tcy0 = opj_uint_ceildiv(*p_ty0, l_img_comp->dy); + l_tcx1 = opj_uint_ceildiv(*p_tx1, l_img_comp->dx); + l_tcy1 = opj_uint_ceildiv(*p_ty1, l_img_comp->dy); if (l_tccp->numresolutions > *p_max_res) { *p_max_res = l_tccp->numresolutions; @@ -820,19 +853,19 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, /* various calculations of extents */ l_level_no = l_tccp->numresolutions - 1 - resno; - l_rx0 = opj_int_ceildivpow2(l_tcx0, (OPJ_INT32)l_level_no); - l_ry0 = opj_int_ceildivpow2(l_tcy0, (OPJ_INT32)l_level_no); - l_rx1 = opj_int_ceildivpow2(l_tcx1, (OPJ_INT32)l_level_no); - l_ry1 = opj_int_ceildivpow2(l_tcy1, (OPJ_INT32)l_level_no); + l_rx0 = opj_uint_ceildivpow2(l_tcx0, l_level_no); + l_ry0 = opj_uint_ceildivpow2(l_tcy0, l_level_no); + l_rx1 = opj_uint_ceildivpow2(l_tcx1, l_level_no); + l_ry1 = opj_uint_ceildivpow2(l_tcy1, l_level_no); - l_px0 = opj_int_floordivpow2(l_rx0, (OPJ_INT32)l_pdx) << l_pdx; - l_py0 = opj_int_floordivpow2(l_ry0, (OPJ_INT32)l_pdy) << l_pdy; - l_px1 = opj_int_ceildivpow2(l_rx1, (OPJ_INT32)l_pdx) << l_pdx; + l_px0 = opj_uint_floordivpow2(l_rx0, l_pdx) << l_pdx; + l_py0 = opj_uint_floordivpow2(l_ry0, l_pdy) << l_pdy; + l_px1 = opj_uint_ceildivpow2(l_rx1, l_pdx) << l_pdx; - py1 = opj_int_ceildivpow2(l_ry1, (OPJ_INT32)l_pdy) << l_pdy; + py1 = opj_uint_ceildivpow2(l_ry1, l_pdy) << l_pdy; - l_pw = (l_rx0 == l_rx1) ? 0 : (OPJ_UINT32)((l_px1 - l_px0) >> l_pdx); - l_ph = (l_ry0 == l_ry1) ? 0 : (OPJ_UINT32)((py1 - l_py0) >> l_pdy); + l_pw = (l_rx0 == l_rx1) ? 0 : ((l_px1 - l_px0) >> l_pdx); + l_ph = (l_ry0 == l_ry1) ? 0 : ((py1 - l_py0) >> l_pdy); l_product = l_pw * l_ph; @@ -850,10 +883,10 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, static void opj_get_all_encoding_parameters(const opj_image_t *p_image, const opj_cp_t *p_cp, OPJ_UINT32 tileno, - OPJ_INT32 * p_tx0, - OPJ_INT32 * p_tx1, - OPJ_INT32 * p_ty0, - OPJ_INT32 * p_ty1, + OPJ_UINT32 * p_tx0, + OPJ_UINT32 * p_tx1, + OPJ_UINT32 * p_ty0, + OPJ_UINT32 * p_ty1, OPJ_UINT32 * p_dx_min, OPJ_UINT32 * p_dy_min, OPJ_UINT32 * p_max_prec, @@ -894,12 +927,12 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, /* here calculation of tx0, tx1, ty0, ty1, maxprec, l_dx and l_dy */ l_tx0 = p_cp->tx0 + p * p_cp->tdx; /* can't be greater than p_image->x1 so won't overflow */ - *p_tx0 = (OPJ_INT32)opj_uint_max(l_tx0, p_image->x0); - *p_tx1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1); + *p_tx0 = opj_uint_max(l_tx0, p_image->x0); + *p_tx1 = opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1); l_ty0 = p_cp->ty0 + q * p_cp->tdy; /* can't be greater than p_image->y1 so won't overflow */ - *p_ty0 = (OPJ_INT32)opj_uint_max(l_ty0, p_image->y0); - *p_ty1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1); + *p_ty0 = opj_uint_max(l_ty0, p_image->y0); + *p_ty1 = opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1); /* max precision and resolution is 0 (can only grow)*/ *p_max_prec = 0; @@ -912,18 +945,18 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, for (compno = 0; compno < p_image->numcomps; ++compno) { /* aritmetic variables to calculate*/ OPJ_UINT32 l_level_no; - OPJ_INT32 l_rx0, l_ry0, l_rx1, l_ry1; - OPJ_INT32 l_px0, l_py0, l_px1, py1; + OPJ_UINT32 l_rx0, l_ry0, l_rx1, l_ry1; + OPJ_UINT32 l_px0, l_py0, l_px1, py1; OPJ_UINT32 l_product; - OPJ_INT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1; + OPJ_UINT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1; OPJ_UINT32 l_pdx, l_pdy, l_pw, l_ph; - lResolutionPtr = p_resolutions[compno]; + lResolutionPtr = p_resolutions ? p_resolutions[compno] : NULL; - l_tcx0 = opj_int_ceildiv(*p_tx0, (OPJ_INT32)l_img_comp->dx); - l_tcy0 = opj_int_ceildiv(*p_ty0, (OPJ_INT32)l_img_comp->dy); - l_tcx1 = opj_int_ceildiv(*p_tx1, (OPJ_INT32)l_img_comp->dx); - l_tcy1 = opj_int_ceildiv(*p_ty1, (OPJ_INT32)l_img_comp->dy); + l_tcx0 = opj_uint_ceildiv(*p_tx0, l_img_comp->dx); + l_tcy0 = opj_uint_ceildiv(*p_ty0, l_img_comp->dy); + l_tcx1 = opj_uint_ceildiv(*p_tx1, l_img_comp->dx); + l_tcy1 = opj_uint_ceildiv(*p_ty1, l_img_comp->dy); if (l_tccp->numresolutions > *p_max_res) { *p_max_res = l_tccp->numresolutions; @@ -939,33 +972,37 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, /* precinct width and height*/ l_pdx = l_tccp->prcw[resno]; l_pdy = l_tccp->prch[resno]; - *lResolutionPtr++ = l_pdx; - *lResolutionPtr++ = l_pdy; + if (lResolutionPtr) { + *lResolutionPtr++ = l_pdx; + *lResolutionPtr++ = l_pdy; + } if (l_pdx + l_level_no < 32 && l_img_comp->dx <= UINT_MAX / (1u << (l_pdx + l_level_no))) { l_dx = l_img_comp->dx * (1u << (l_pdx + l_level_no)); /* take the minimum size for l_dx for each comp and resolution*/ - *p_dx_min = (OPJ_UINT32)opj_int_min((OPJ_INT32) * p_dx_min, (OPJ_INT32)l_dx); + *p_dx_min = opj_uint_min(*p_dx_min, l_dx); } if (l_pdy + l_level_no < 32 && l_img_comp->dy <= UINT_MAX / (1u << (l_pdy + l_level_no))) { l_dy = l_img_comp->dy * (1u << (l_pdy + l_level_no)); - *p_dy_min = (OPJ_UINT32)opj_int_min((OPJ_INT32) * p_dy_min, (OPJ_INT32)l_dy); + *p_dy_min = opj_uint_min(*p_dy_min, l_dy); } /* various calculations of extents*/ - l_rx0 = opj_int_ceildivpow2(l_tcx0, (OPJ_INT32)l_level_no); - l_ry0 = opj_int_ceildivpow2(l_tcy0, (OPJ_INT32)l_level_no); - l_rx1 = opj_int_ceildivpow2(l_tcx1, (OPJ_INT32)l_level_no); - l_ry1 = opj_int_ceildivpow2(l_tcy1, (OPJ_INT32)l_level_no); - l_px0 = opj_int_floordivpow2(l_rx0, (OPJ_INT32)l_pdx) << l_pdx; - l_py0 = opj_int_floordivpow2(l_ry0, (OPJ_INT32)l_pdy) << l_pdy; - l_px1 = opj_int_ceildivpow2(l_rx1, (OPJ_INT32)l_pdx) << l_pdx; - py1 = opj_int_ceildivpow2(l_ry1, (OPJ_INT32)l_pdy) << l_pdy; - l_pw = (l_rx0 == l_rx1) ? 0 : (OPJ_UINT32)((l_px1 - l_px0) >> l_pdx); - l_ph = (l_ry0 == l_ry1) ? 0 : (OPJ_UINT32)((py1 - l_py0) >> l_pdy); - *lResolutionPtr++ = l_pw; - *lResolutionPtr++ = l_ph; + l_rx0 = opj_uint_ceildivpow2(l_tcx0, l_level_no); + l_ry0 = opj_uint_ceildivpow2(l_tcy0, l_level_no); + l_rx1 = opj_uint_ceildivpow2(l_tcx1, l_level_no); + l_ry1 = opj_uint_ceildivpow2(l_tcy1, l_level_no); + l_px0 = opj_uint_floordivpow2(l_rx0, l_pdx) << l_pdx; + l_py0 = opj_uint_floordivpow2(l_ry0, l_pdy) << l_pdy; + l_px1 = opj_uint_ceildivpow2(l_rx1, l_pdx) << l_pdx; + py1 = opj_uint_ceildivpow2(l_ry1, l_pdy) << l_pdy; + l_pw = (l_rx0 == l_rx1) ? 0 : ((l_px1 - l_px0) >> l_pdx); + l_ph = (l_ry0 == l_ry1) ? 0 : ((py1 - l_py0) >> l_pdy); + if (lResolutionPtr) { + *lResolutionPtr++ = l_pw; + *lResolutionPtr++ = l_ph; + } l_product = l_pw * l_ph; /* update precision*/ @@ -981,7 +1018,8 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, static opj_pi_iterator_t * opj_pi_create(const opj_image_t *image, const opj_cp_t *cp, - OPJ_UINT32 tileno) + OPJ_UINT32 tileno, + opj_event_mgr_t* manager) { /* loop*/ OPJ_UINT32 pino, compno; @@ -1015,6 +1053,8 @@ static opj_pi_iterator_t * opj_pi_create(const opj_image_t *image, l_current_pi = l_pi; for (pino = 0; pino < l_poc_bound ; ++pino) { + l_current_pi->manager = manager; + l_current_pi->comps = (opj_pi_comp_t*) opj_calloc(image->numcomps, sizeof(opj_pi_comp_t)); if (! l_current_pi->comps) { @@ -1045,10 +1085,10 @@ static opj_pi_iterator_t * opj_pi_create(const opj_image_t *image, static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp, OPJ_UINT32 p_tileno, - OPJ_INT32 p_tx0, - OPJ_INT32 p_tx1, - OPJ_INT32 p_ty0, - OPJ_INT32 p_ty1, + OPJ_UINT32 p_tx0, + OPJ_UINT32 p_tx1, + OPJ_UINT32 p_ty0, + OPJ_UINT32 p_ty1, OPJ_UINT32 p_max_prec, OPJ_UINT32 p_max_res, OPJ_UINT32 p_dx_min, @@ -1125,10 +1165,10 @@ static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp, static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp, OPJ_UINT32 p_num_comps, OPJ_UINT32 p_tileno, - OPJ_INT32 p_tx0, - OPJ_INT32 p_tx1, - OPJ_INT32 p_ty0, - OPJ_INT32 p_ty1, + OPJ_UINT32 p_tx0, + OPJ_UINT32 p_tx1, + OPJ_UINT32 p_ty0, + OPJ_UINT32 p_ty1, OPJ_UINT32 p_max_prec, OPJ_UINT32 p_max_res, OPJ_UINT32 p_dx_min, @@ -1167,10 +1207,10 @@ static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp, l_current_poc->prg = l_tcp->prg; l_current_poc->prcS = 0; l_current_poc->prcE = p_max_prec; - l_current_poc->txS = (OPJ_UINT32)p_tx0; - l_current_poc->txE = (OPJ_UINT32)p_tx1; - l_current_poc->tyS = (OPJ_UINT32)p_ty0; - l_current_poc->tyE = (OPJ_UINT32)p_ty1; + l_current_poc->txS = p_tx0; + l_current_poc->txE = p_tx1; + l_current_poc->tyS = p_ty0; + l_current_poc->tyE = p_ty1; l_current_poc->dx = p_dx_min; l_current_poc->dy = p_dy_min; ++ l_current_poc; @@ -1352,7 +1392,8 @@ static OPJ_BOOL opj_pi_check_next_level(OPJ_INT32 pos, */ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, opj_cp_t *p_cp, - OPJ_UINT32 p_tile_no) + OPJ_UINT32 p_tile_no, + opj_event_mgr_t* manager) { OPJ_UINT32 numcomps = p_image->numcomps; @@ -1367,7 +1408,7 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, /* encoding prameters to set */ OPJ_UINT32 l_max_res; OPJ_UINT32 l_max_prec; - OPJ_INT32 l_tx0, l_tx1, l_ty0, l_ty1; + OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1; OPJ_UINT32 l_dx_min, l_dy_min; OPJ_UINT32 l_bound; OPJ_UINT32 l_step_p, l_step_c, l_step_r, l_step_l ; @@ -1407,7 +1448,7 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, } /* memory allocation for pi */ - l_pi = opj_pi_create(p_image, p_cp, p_tile_no); + l_pi = opj_pi_create(p_image, p_cp, p_tile_no, manager); if (!l_pi) { opj_free(l_tmp_data); opj_free(l_tmp_ptr); @@ -1548,11 +1589,34 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, } +OPJ_UINT32 opj_get_encoding_packet_count(const opj_image_t *p_image, + const opj_cp_t *p_cp, + OPJ_UINT32 p_tile_no) +{ + OPJ_UINT32 l_max_res; + OPJ_UINT32 l_max_prec; + OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1; + OPJ_UINT32 l_dx_min, l_dy_min; + + /* preconditions in debug*/ + assert(p_cp != 00); + assert(p_image != 00); + assert(p_tile_no < p_cp->tw * p_cp->th); + + /* get encoding parameters*/ + opj_get_all_encoding_parameters(p_image, p_cp, p_tile_no, &l_tx0, &l_tx1, + &l_ty0, &l_ty1, &l_dx_min, &l_dy_min, &l_max_prec, &l_max_res, NULL); + + return p_cp->tcps[p_tile_no].numlayers * l_max_prec * p_image->numcomps * + l_max_res; +} + opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *p_image, opj_cp_t *p_cp, OPJ_UINT32 p_tile_no, - J2K_T2_MODE p_t2_mode) + J2K_T2_MODE p_t2_mode, + opj_event_mgr_t* manager) { OPJ_UINT32 numcomps = p_image->numcomps; @@ -1567,7 +1631,7 @@ opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *p_image, /* encoding prameters to set*/ OPJ_UINT32 l_max_res; OPJ_UINT32 l_max_prec; - OPJ_INT32 l_tx0, l_tx1, l_ty0, l_ty1; + OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1; OPJ_UINT32 l_dx_min, l_dy_min; OPJ_UINT32 l_bound; OPJ_UINT32 l_step_p, l_step_c, l_step_r, l_step_l ; @@ -1606,7 +1670,7 @@ opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *p_image, } /* memory allocation for pi*/ - l_pi = opj_pi_create(p_image, p_cp, p_tile_no); + l_pi = opj_pi_create(p_image, p_cp, p_tile_no, manager); if (!l_pi) { opj_free(l_tmp_data); opj_free(l_tmp_ptr); @@ -1761,7 +1825,8 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, pi[pino].poc.prg = tcp->prg; if (!(cp->m_specific_param.m_enc.m_tp_on && ((!OPJ_IS_CINEMA(cp->rsiz) && - (t2_mode == FINAL_PASS)) || OPJ_IS_CINEMA(cp->rsiz)))) { + !OPJ_IS_IMF(cp->rsiz) && + (t2_mode == FINAL_PASS)) || OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)))) { pi[pino].poc.resno0 = tcp->resS; pi[pino].poc.resno1 = tcp->resE; pi[pino].poc.compno0 = tcp->compS; @@ -1770,10 +1835,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, pi[pino].poc.layno1 = tcp->layE; pi[pino].poc.precno0 = tcp->prcS; pi[pino].poc.precno1 = tcp->prcE; - pi[pino].poc.tx0 = (OPJ_INT32)tcp->txS; - pi[pino].poc.ty0 = (OPJ_INT32)tcp->tyS; - pi[pino].poc.tx1 = (OPJ_INT32)tcp->txE; - pi[pino].poc.ty1 = (OPJ_INT32)tcp->tyE; + pi[pino].poc.tx0 = tcp->txS; + pi[pino].poc.ty0 = tcp->tyS; + pi[pino].poc.tx1 = tcp->txE; + pi[pino].poc.ty1 = tcp->tyE; } else { for (i = tppos + 1; i < 4; i++) { switch (prog[i]) { @@ -1797,10 +1862,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, pi[pino].poc.precno1 = tcp->prcE; break; default: - pi[pino].poc.tx0 = (OPJ_INT32)tcp->txS; - pi[pino].poc.ty0 = (OPJ_INT32)tcp->tyS; - pi[pino].poc.tx1 = (OPJ_INT32)tcp->txE; - pi[pino].poc.ty1 = (OPJ_INT32)tcp->tyE; + pi[pino].poc.tx0 = tcp->txS; + pi[pino].poc.ty0 = tcp->tyS; + pi[pino].poc.tx1 = tcp->txE; + pi[pino].poc.ty1 = tcp->tyE; break; } break; @@ -1840,10 +1905,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, default: tcp->tx0_t = tcp->txS; tcp->ty0_t = tcp->tyS; - pi[pino].poc.tx0 = (OPJ_INT32)tcp->tx0_t; - pi[pino].poc.tx1 = (OPJ_INT32)(tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx)); - pi[pino].poc.ty0 = (OPJ_INT32)tcp->ty0_t; - pi[pino].poc.ty1 = (OPJ_INT32)(tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy)); + pi[pino].poc.tx0 = tcp->tx0_t; + pi[pino].poc.tx1 = tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx); + pi[pino].poc.ty0 = tcp->ty0_t; + pi[pino].poc.ty1 = tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy); tcp->tx0_t = (OPJ_UINT32)pi[pino].poc.tx1; tcp->ty0_t = (OPJ_UINT32)pi[pino].poc.ty1; break; @@ -1875,10 +1940,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, pi[pino].poc.precno1 = tcp->prc_t; break; default: - pi[pino].poc.tx0 = (OPJ_INT32)(tcp->tx0_t - tcp->dx - (tcp->tx0_t % tcp->dx)); - pi[pino].poc.tx1 = (OPJ_INT32)tcp->tx0_t ; - pi[pino].poc.ty0 = (OPJ_INT32)(tcp->ty0_t - tcp->dy - (tcp->ty0_t % tcp->dy)); - pi[pino].poc.ty1 = (OPJ_INT32)tcp->ty0_t ; + pi[pino].poc.tx0 = tcp->tx0_t - tcp->dx - (tcp->tx0_t % tcp->dx); + pi[pino].poc.tx1 = tcp->tx0_t ; + pi[pino].poc.ty0 = tcp->ty0_t - tcp->dy - (tcp->ty0_t % tcp->dy); + pi[pino].poc.ty1 = tcp->ty0_t ; break; } break; @@ -1965,8 +2030,8 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, if (tcp->ty0_t >= tcp->tyE) { if (opj_pi_check_next_level(i - 1, cp, tileno, pino, prog)) { tcp->ty0_t = tcp->tyS; - pi[pino].poc.ty0 = (OPJ_INT32)tcp->ty0_t; - pi[pino].poc.ty1 = (OPJ_INT32)(tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy)); + pi[pino].poc.ty0 = tcp->ty0_t; + pi[pino].poc.ty1 = tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy); tcp->ty0_t = (OPJ_UINT32)pi[pino].poc.ty1; incr_top = 1; resetX = 1; @@ -1975,21 +2040,21 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, resetX = 0; } } else { - pi[pino].poc.ty0 = (OPJ_INT32)tcp->ty0_t; - pi[pino].poc.ty1 = (OPJ_INT32)(tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy)); + pi[pino].poc.ty0 = tcp->ty0_t; + pi[pino].poc.ty1 = tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy); tcp->ty0_t = (OPJ_UINT32)pi[pino].poc.ty1; incr_top = 0; resetX = 1; } if (resetX == 1) { tcp->tx0_t = tcp->txS; - pi[pino].poc.tx0 = (OPJ_INT32)tcp->tx0_t; - pi[pino].poc.tx1 = (OPJ_INT32)(tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx)); + pi[pino].poc.tx0 = tcp->tx0_t; + pi[pino].poc.tx1 = tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx); tcp->tx0_t = (OPJ_UINT32)pi[pino].poc.tx1; } } else { - pi[pino].poc.tx0 = (OPJ_INT32)tcp->tx0_t; - pi[pino].poc.tx1 = (OPJ_INT32)(tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx)); + pi[pino].poc.tx0 = tcp->tx0_t; + pi[pino].poc.tx1 = tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx); tcp->tx0_t = (OPJ_UINT32)pi[pino].poc.tx1; incr_top = 0; } @@ -2042,7 +2107,7 @@ void opj_pi_update_encoding_parameters(const opj_image_t *p_image, /* encoding parameters to set */ OPJ_UINT32 l_max_res; OPJ_UINT32 l_max_prec; - OPJ_INT32 l_tx0, l_tx1, l_ty0, l_ty1; + OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1; OPJ_UINT32 l_dx_min, l_dy_min; /* pointers */ diff --git a/3rdparty/openjpeg/openjp2/pi.h b/3rdparty/openjpeg/openjp2/pi.h index 8c0dc25c19..0320523b76 100644 --- a/3rdparty/openjpeg/openjp2/pi.h +++ b/3rdparty/openjpeg/openjp2/pi.h @@ -102,11 +102,13 @@ typedef struct opj_pi_iterator { /** Components*/ opj_pi_comp_t *comps; /** FIXME DOC*/ - OPJ_INT32 tx0, ty0, tx1, ty1; + OPJ_UINT32 tx0, ty0, tx1, ty1; /** FIXME DOC*/ - OPJ_INT32 x, y; + OPJ_UINT32 x, y; /** FIXME DOC*/ OPJ_UINT32 dx, dy; + /** event manager */ + opj_event_mgr_t* manager; } opj_pi_iterator_t; /** @name Exported functions */ @@ -119,13 +121,15 @@ typedef struct opj_pi_iterator { * @param cp the coding parameters. * @param tileno index of the tile being encoded. * @param t2_mode the type of pass for generating the packet iterator + * @param manager Event manager * * @return a list of packet iterator that points to the first packet of the tile (not true). */ opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *image, opj_cp_t *cp, OPJ_UINT32 tileno, - J2K_T2_MODE t2_mode); + J2K_T2_MODE t2_mode, + opj_event_mgr_t* manager); /** * Updates the encoding parameters of the codec. @@ -161,12 +165,14 @@ Create a packet iterator for Decoder @param image Raw image for which the packets will be listed @param cp Coding parameters @param tileno Number that identifies the tile for which to list the packets +@param manager Event manager @return Returns a packet iterator that points to the first packet of the tile @see opj_pi_destroy */ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t * image, opj_cp_t * cp, - OPJ_UINT32 tileno); + OPJ_UINT32 tileno, + opj_event_mgr_t* manager); /** * Destroys a packet iterator array. * @@ -182,6 +188,17 @@ Modify the packet iterator to point to the next packet @return Returns false if pi pointed to the last packet or else returns true */ OPJ_BOOL opj_pi_next(opj_pi_iterator_t * pi); + +/** + * Return the number of packets in the tile. + * @param image the image being encoded. + * @param cp Coding parameters + * @param tileno Number that identifies the tile. + */ +OPJ_UINT32 opj_get_encoding_packet_count(const opj_image_t *p_image, + const opj_cp_t *p_cp, + OPJ_UINT32 p_tile_no); + /* ----------------------------------------------------------------------- */ /*@}*/ diff --git a/3rdparty/openjpeg/openjp2/t1.c b/3rdparty/openjpeg/openjp2/t1.c index f6f7671190..1bea54b0d5 100644 --- a/3rdparty/openjpeg/openjp2/t1.c +++ b/3rdparty/openjpeg/openjp2/t1.c @@ -61,6 +61,13 @@ #define opj_t1_setcurctx(curctx, ctxno) curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)] +/* Macros to deal with signed integer with just MSB bit set for + * negative values (smr = signed magnitude representation) */ +#define opj_smr_abs(x) (((OPJ_UINT32)(x)) & 0x7FFFFFFFU) +#define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31) +#define opj_to_smr(x) ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U)) + + /** @name Local static functions */ /*@{*/ @@ -177,18 +184,18 @@ static OPJ_FLOAT64 opj_t1_getwmsedec( const OPJ_FLOAT64 * mct_norms, OPJ_UINT32 mct_numcomps); -static void opj_t1_encode_cblk(opj_t1_t *t1, - opj_tcd_cblk_enc_t* cblk, - OPJ_UINT32 orient, - OPJ_UINT32 compno, - OPJ_UINT32 level, - OPJ_UINT32 qmfbid, - OPJ_FLOAT64 stepsize, - OPJ_UINT32 cblksty, - OPJ_UINT32 numcomps, - opj_tcd_tile_t * tile, - const OPJ_FLOAT64 * mct_norms, - OPJ_UINT32 mct_numcomps); +/** Return "cumwmsedec" that should be used to increase tile->distotile */ +static double opj_t1_encode_cblk(opj_t1_t *t1, + opj_tcd_cblk_enc_t* cblk, + OPJ_UINT32 orient, + OPJ_UINT32 compno, + OPJ_UINT32 level, + OPJ_UINT32 qmfbid, + OPJ_FLOAT64 stepsize, + OPJ_UINT32 cblksty, + OPJ_UINT32 numcomps, + const OPJ_FLOAT64 * mct_norms, + OPJ_UINT32 mct_numcomps); /** Decode 1 code-block @@ -329,61 +336,53 @@ static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci, /** Encode significant pass */ -static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 bpno, - OPJ_INT32 one, - OPJ_INT32 *nmsedec, - OPJ_BYTE type, - OPJ_UINT32 ci, - OPJ_UINT32 vsc) -{ - OPJ_UINT32 v; - - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - - OPJ_UINT32 const flags = *flagsp; - - if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && - (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { - OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); - v = (opj_int_abs(*datap) & one) ? 1 : 0; -#ifdef DEBUG_ENC_SIG - fprintf(stderr, " ctxt1=%d\n", ctxt1); -#endif - opj_mqc_setcurctx(mqc, ctxt1); - if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ - opj_mqc_bypass_enc(mqc, v); - } else { - opj_mqc_encode(mqc, v); - } - if (v) { - OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( - *flagsp, - flagsp[-1], flagsp[1], - ci); - OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); - v = *datap < 0 ? 1U : 0U; - *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), - (OPJ_UINT32)bpno); -#ifdef DEBUG_ENC_SIG - fprintf(stderr, " ctxt2=%d\n", ctxt2); -#endif - opj_mqc_setcurctx(mqc, ctxt2); - if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ - opj_mqc_bypass_enc(mqc, v); - } else { - OPJ_UINT32 spb = opj_t1_getspb(lu); -#ifdef DEBUG_ENC_SIG - fprintf(stderr, " spb=%d\n", spb); -#endif - opj_mqc_encode(mqc, v ^ spb); - } - opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); - } - *flagsp |= T1_PI_THIS << (ci * 3U); - } +#define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \ +{ \ + OPJ_UINT32 v; \ + const OPJ_UINT32 ci = (ciIn); \ + const OPJ_UINT32 vsc = (vscIn); \ + const OPJ_INT32* l_datap = (datapIn); \ + opj_flag_t* flagsp = (flagspIn); \ + OPJ_UINT32 const flags = *flagsp; \ + if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \ + (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \ + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \ + v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \ +/* #ifdef DEBUG_ENC_SIG */ \ +/* fprintf(stderr, " ctxt1=%d\n", ctxt1); */ \ +/* #endif */ \ + opj_t1_setcurctx(curctx, ctxt1); \ + if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \ + opj_mqc_bypass_enc_macro(mqc, c, ct, v); \ + } else { \ + opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \ + } \ + if (v) { \ + OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \ + *flagsp, \ + flagsp[-1], flagsp[1], \ + ci); \ + OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \ + v = opj_smr_sign(*l_datap); \ + *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \ + (OPJ_UINT32)bpno); \ +/* #ifdef DEBUG_ENC_SIG */ \ +/* fprintf(stderr, " ctxt2=%d\n", ctxt2); */ \ +/* #endif */ \ + opj_t1_setcurctx(curctx, ctxt2); \ + if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \ + opj_mqc_bypass_enc_macro(mqc, c, ct, v); \ + } else { \ + OPJ_UINT32 spb = opj_t1_getspb(lu); \ +/* #ifdef DEBUG_ENC_SIG */ \ +/* fprintf(stderr, " spb=%d\n", spb); */ \ +/* #endif */ \ + opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \ + } \ + opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \ + } \ + *flagsp |= T1_PI_THIS << (ci * 3U); \ + } \ } static INLINE void opj_t1_dec_sigpass_step_raw( @@ -464,63 +463,64 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS); opj_flag_t* f = &T1_FLAGS(0, 0); OPJ_UINT32 const extra = 2; + opj_mqc_t* mqc = &(t1->mqc); + DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); + const OPJ_INT32* datap = t1->data; *nmsedec = 0; #ifdef DEBUG_ENC_SIG fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno); #endif - for (k = 0; k < (t1->h & ~3U); k += 4) { + for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) { + const OPJ_UINT32 w = t1->w; #ifdef DEBUG_ENC_SIG fprintf(stderr, " k=%d\n", k); #endif - for (i = 0; i < t1->w; ++i) { + for (i = 0; i < w; ++i, ++f, datap += 4) { #ifdef DEBUG_ENC_SIG fprintf(stderr, " i=%d\n", i); #endif if (*f == 0U) { /* Nothing to do for any of the 4 data points */ - f++; continue; } - opj_t1_enc_sigpass_step( - t1, + opj_t1_enc_sigpass_step_macro( + mqc, curctx, a, c, ct, f, - &t1->data[((k + 0) * t1->data_stride) + i], + &datap[0], bpno, one, nmsedec, type, 0, cblksty & J2K_CCP_CBLKSTY_VSC); - opj_t1_enc_sigpass_step( - t1, + opj_t1_enc_sigpass_step_macro( + mqc, curctx, a, c, ct, f, - &t1->data[((k + 1) * t1->data_stride) + i], + &datap[1], bpno, one, nmsedec, type, 1, 0); - opj_t1_enc_sigpass_step( - t1, + opj_t1_enc_sigpass_step_macro( + mqc, curctx, a, c, ct, f, - &t1->data[((k + 2) * t1->data_stride) + i], + &datap[2], bpno, one, nmsedec, type, 2, 0); - opj_t1_enc_sigpass_step( - t1, + opj_t1_enc_sigpass_step_macro( + mqc, curctx, a, c, ct, f, - &t1->data[((k + 3) * t1->data_stride) + i], + &datap[3], bpno, one, nmsedec, type, 3, 0); - ++f; } - f += extra; } if (k < t1->h) { @@ -528,20 +528,20 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, #ifdef DEBUG_ENC_SIG fprintf(stderr, " k=%d\n", k); #endif - for (i = 0; i < t1->w; ++i) { + for (i = 0; i < t1->w; ++i, ++f) { #ifdef DEBUG_ENC_SIG fprintf(stderr, " i=%d\n", i); #endif if (*f == 0U) { /* Nothing to do for any of the 4 data points */ - f++; + datap += (t1->h - k); continue; } - for (j = k; j < t1->h; ++j) { - opj_t1_enc_sigpass_step( - t1, + for (j = k; j < t1->h; ++j, ++datap) { + opj_t1_enc_sigpass_step_macro( + mqc, curctx, a, c, ct, f, - &t1->data[(j * t1->data_stride) + i], + &datap[0], bpno, one, nmsedec, @@ -549,9 +549,10 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, j - k, (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0)); } - ++f; } } + + UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); } static void opj_t1_dec_sigpass_raw( @@ -626,7 +627,7 @@ static void opj_t1_dec_sigpass_raw( register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \ const OPJ_UINT32 l_w = w; \ opj_mqc_t* mqc = &(t1->mqc); \ - DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ register OPJ_UINT32 v; \ one = 1 << bpno; \ half = one >> 1; \ @@ -651,7 +652,7 @@ static void opj_t1_dec_sigpass_raw( } \ } \ } \ - UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ if( k < h ) { \ for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ for (j = 0; j < h - k; ++j) { \ @@ -715,38 +716,27 @@ static void opj_t1_dec_sigpass_mqc( /** Encode refinement pass step */ -static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 bpno, - OPJ_INT32 one, - OPJ_INT32 *nmsedec, - OPJ_BYTE type, - OPJ_UINT32 ci) -{ - OPJ_UINT32 v; - - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - - OPJ_UINT32 const shift_flags = - (*flagsp >> (ci * 3U)); - - if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) { - OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); - *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap), - (OPJ_UINT32)bpno); - v = (opj_int_abs(*datap) & one) ? 1 : 0; -#ifdef DEBUG_ENC_REF - fprintf(stderr, " ctxt=%d\n", ctxt); -#endif - opj_mqc_setcurctx(mqc, ctxt); - if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ - opj_mqc_bypass_enc(mqc, v); - } else { - opj_mqc_encode(mqc, v); - } - *flagsp |= T1_MU_THIS << (ci * 3U); - } +#define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \ +{\ + OPJ_UINT32 v; \ + if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \ + const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \ + OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \ + OPJ_UINT32 abs_data = opj_smr_abs(*datap); \ + *nmsedec += opj_t1_getnmsedec_ref(abs_data, \ + (OPJ_UINT32)bpno); \ + v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \ +/* #ifdef DEBUG_ENC_REF */ \ +/* fprintf(stderr, " ctxt=%d\n", ctxt); */ \ +/* #endif */ \ + opj_t1_setcurctx(curctx, ctxt); \ + if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \ + opj_mqc_bypass_enc_macro(mqc, c, ct, v); \ + } else { \ + opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \ + } \ + flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \ + } \ } @@ -807,100 +797,104 @@ static void opj_t1_enc_refpass( const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); opj_flag_t* f = &T1_FLAGS(0, 0); const OPJ_UINT32 extra = 2U; + opj_mqc_t* mqc = &(t1->mqc); + DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); + const OPJ_INT32* datap = t1->data; *nmsedec = 0; #ifdef DEBUG_ENC_REF fprintf(stderr, "enc_refpass: bpno=%d\n", bpno); #endif - for (k = 0; k < (t1->h & ~3U); k += 4) { + for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) { #ifdef DEBUG_ENC_REF fprintf(stderr, " k=%d\n", k); #endif - for (i = 0; i < t1->w; ++i) { + for (i = 0; i < t1->w; ++i, f++, datap += 4) { + const OPJ_UINT32 flags = *f; + OPJ_UINT32 flagsUpdated = flags; #ifdef DEBUG_ENC_REF fprintf(stderr, " i=%d\n", i); #endif - if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { + if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { /* none significant */ - f++; continue; } - if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) == + if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) == (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) { /* all processed by sigpass */ - f++; continue; } - opj_t1_enc_refpass_step( - t1, - f, - &t1->data[((k + 0) * t1->data_stride) + i], + opj_t1_enc_refpass_step_macro( + mqc, curctx, a, c, ct, + flags, flagsUpdated, + &datap[0], bpno, one, nmsedec, type, 0); - opj_t1_enc_refpass_step( - t1, - f, - &t1->data[((k + 1) * t1->data_stride) + i], + opj_t1_enc_refpass_step_macro( + mqc, curctx, a, c, ct, + flags, flagsUpdated, + &datap[1], bpno, one, nmsedec, type, 1); - opj_t1_enc_refpass_step( - t1, - f, - &t1->data[((k + 2) * t1->data_stride) + i], + opj_t1_enc_refpass_step_macro( + mqc, curctx, a, c, ct, + flags, flagsUpdated, + &datap[2], bpno, one, nmsedec, type, 2); - opj_t1_enc_refpass_step( - t1, - f, - &t1->data[((k + 3) * t1->data_stride) + i], + opj_t1_enc_refpass_step_macro( + mqc, curctx, a, c, ct, + flags, flagsUpdated, + &datap[3], bpno, one, nmsedec, type, 3); - ++f; + *f = flagsUpdated; } - f += extra; } if (k < t1->h) { OPJ_UINT32 j; + const OPJ_UINT32 remaining_lines = t1->h - k; #ifdef DEBUG_ENC_REF fprintf(stderr, " k=%d\n", k); #endif - for (i = 0; i < t1->w; ++i) { + for (i = 0; i < t1->w; ++i, ++f) { #ifdef DEBUG_ENC_REF fprintf(stderr, " i=%d\n", i); #endif if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { /* none significant */ - f++; + datap += remaining_lines; continue; } - for (j = k; j < t1->h; ++j) { - opj_t1_enc_refpass_step( - t1, - f, - &t1->data[(j * t1->data_stride) + i], + for (j = 0; j < remaining_lines; ++j, datap ++) { + opj_t1_enc_refpass_step_macro( + mqc, curctx, a, c, ct, + *f, *f, + &datap[0], bpno, one, nmsedec, type, - j - k); + j); } - ++f; } } + + UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); } @@ -968,7 +962,7 @@ static void opj_t1_dec_refpass_raw( register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ const OPJ_UINT32 l_w = w; \ opj_mqc_t* mqc = &(t1->mqc); \ - DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ register OPJ_UINT32 v; \ one = 1 << bpno; \ poshalf = one >> 1; \ @@ -992,7 +986,7 @@ static void opj_t1_dec_refpass_raw( } \ } \ } \ - UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ if( k < h ) { \ for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ for (j = 0; j < h - k; ++j) { \ @@ -1030,86 +1024,71 @@ static void opj_t1_dec_refpass_mqc( /** Encode clean-up pass step */ -static void opj_t1_enc_clnpass_step( - opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 bpno, - OPJ_INT32 one, - OPJ_INT32 *nmsedec, - OPJ_UINT32 agg, - OPJ_UINT32 runlen, - OPJ_UINT32 lim, - OPJ_UINT32 cblksty) -{ - OPJ_UINT32 v; - OPJ_UINT32 ci; - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - - const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | - T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); - - if ((*flagsp & check) == check) { - if (runlen == 0) { - *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); - } else if (runlen == 1) { - *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); - } else if (runlen == 2) { - *flagsp &= ~(T1_PI_2 | T1_PI_3); - } else if (runlen == 3) { - *flagsp &= ~(T1_PI_3); - } - return; - } - - for (ci = runlen; ci < lim; ++ci) { - OPJ_UINT32 vsc; - opj_flag_t flags; - OPJ_UINT32 ctxt1; - - flags = *flagsp; - - if ((agg != 0) && (ci == runlen)) { - goto LABEL_PARTIAL; - } - - if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { - ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); -#ifdef DEBUG_ENC_CLN - printf(" ctxt1=%d\n", ctxt1); -#endif - opj_mqc_setcurctx(mqc, ctxt1); - v = (opj_int_abs(*datap) & one) ? 1 : 0; - opj_mqc_encode(mqc, v); - if (v) { - OPJ_UINT32 ctxt2, spb; - OPJ_UINT32 lu; -LABEL_PARTIAL: - lu = opj_t1_getctxtno_sc_or_spb_index( - *flagsp, - flagsp[-1], flagsp[1], - ci); - *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), - (OPJ_UINT32)bpno); - ctxt2 = opj_t1_getctxno_sc(lu); -#ifdef DEBUG_ENC_CLN - printf(" ctxt2=%d\n", ctxt2); -#endif - opj_mqc_setcurctx(mqc, ctxt2); - - v = *datap < 0 ? 1U : 0U; - spb = opj_t1_getspb(lu); -#ifdef DEBUG_ENC_CLN - printf(" spb=%d\n", spb); -#endif - opj_mqc_encode(mqc, v ^ spb); - vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; - opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); - } - } - *flagsp &= ~(T1_PI_THIS << (3U * ci)); - datap += t1->data_stride; - } +#define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \ +{ \ + OPJ_UINT32 v; \ + OPJ_UINT32 ci; \ + opj_flag_t* const flagsp = (flagspIn); \ + const OPJ_INT32* l_datap = (datapIn); \ + const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \ + T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ + \ + if ((*flagsp & check) == check) { \ + if (runlen == 0) { \ + *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ + } else if (runlen == 1) { \ + *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \ + } else if (runlen == 2) { \ + *flagsp &= ~(T1_PI_2 | T1_PI_3); \ + } else if (runlen == 3) { \ + *flagsp &= ~(T1_PI_3); \ + } \ + } \ + else \ + for (ci = runlen; ci < lim; ++ci) { \ + OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \ + if ((agg != 0) && (ci == runlen)) { \ + goto_PARTIAL = OPJ_TRUE; \ + } \ + else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \ + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \ +/* #ifdef DEBUG_ENC_CLN */ \ +/* printf(" ctxt1=%d\n", ctxt1); */ \ +/* #endif */ \ + opj_t1_setcurctx(curctx, ctxt1); \ + v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \ + opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \ + if (v) { \ + goto_PARTIAL = OPJ_TRUE; \ + } \ + } \ + if( goto_PARTIAL ) { \ + OPJ_UINT32 vsc; \ + OPJ_UINT32 ctxt2, spb; \ + OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \ + *flagsp, \ + flagsp[-1], flagsp[1], \ + ci); \ + *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \ + (OPJ_UINT32)bpno); \ + ctxt2 = opj_t1_getctxno_sc(lu); \ +/* #ifdef DEBUG_ENC_CLN */ \ +/* printf(" ctxt2=%d\n", ctxt2); */ \ +/* #endif */ \ + opj_t1_setcurctx(curctx, ctxt2); \ + \ + v = opj_smr_sign(*l_datap); \ + spb = opj_t1_getspb(lu); \ +/* #ifdef DEBUG_ENC_CLN */ \ +/* printf(" spb=%d\n", spb); */\ +/* #endif */ \ + opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \ + vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \ + opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \ + } \ + *flagsp &= ~(T1_PI_THIS << (3U * ci)); \ + l_datap ++; \ + } \ } #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \ @@ -1165,47 +1144,50 @@ static void opj_t1_enc_clnpass( { OPJ_UINT32 i, k; const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); - OPJ_UINT32 agg, runlen; - - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ + opj_mqc_t* mqc = &(t1->mqc); + DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); + const OPJ_INT32* datap = t1->data; + opj_flag_t *f = &T1_FLAGS(0, 0); + const OPJ_UINT32 extra = 2U; *nmsedec = 0; #ifdef DEBUG_ENC_CLN printf("enc_clnpass: bpno=%d\n", bpno); #endif - for (k = 0; k < (t1->h & ~3U); k += 4) { + for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) { #ifdef DEBUG_ENC_CLN printf(" k=%d\n", k); #endif - for (i = 0; i < t1->w; ++i) { + for (i = 0; i < t1->w; ++i, f++) { + OPJ_UINT32 agg, runlen; #ifdef DEBUG_ENC_CLN printf(" i=%d\n", i); #endif - agg = !(T1_FLAGS(i, k)); + agg = !*f; #ifdef DEBUG_ENC_CLN printf(" agg=%d\n", agg); #endif if (agg) { - for (runlen = 0; runlen < 4; ++runlen) { - if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) { + for (runlen = 0; runlen < 4; ++runlen, ++datap) { + if (opj_smr_abs(*datap) & (OPJ_UINT32)one) { break; } } - opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); - opj_mqc_encode(mqc, runlen != 4); + opj_t1_setcurctx(curctx, T1_CTXNO_AGG); + opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4); if (runlen == 4) { continue; } - opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); - opj_mqc_encode(mqc, runlen >> 1); - opj_mqc_encode(mqc, runlen & 1); + opj_t1_setcurctx(curctx, T1_CTXNO_UNI); + opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1); + opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1); } else { runlen = 0; } - opj_t1_enc_clnpass_step( - t1, - &T1_FLAGS(i, k), - &t1->data[((k + runlen) * t1->data_stride) + i], + opj_t1_enc_clnpass_step_macro( + mqc, curctx, a, c, ct, + f, + datap, bpno, one, nmsedec, @@ -1213,23 +1195,24 @@ static void opj_t1_enc_clnpass( runlen, 4U, cblksty); + datap += 4 - runlen; } } if (k < t1->h) { - agg = 0; - runlen = 0; + const OPJ_UINT32 agg = 0; + const OPJ_UINT32 runlen = 0; #ifdef DEBUG_ENC_CLN printf(" k=%d\n", k); #endif - for (i = 0; i < t1->w; ++i) { + for (i = 0; i < t1->w; ++i, f++) { #ifdef DEBUG_ENC_CLN printf(" i=%d\n", i); printf(" agg=%d\n", agg); #endif - opj_t1_enc_clnpass_step( - t1, - &T1_FLAGS(i, k), - &t1->data[((k + runlen) * t1->data_stride) + i], + opj_t1_enc_clnpass_step_macro( + mqc, curctx, a, c, ct, + f, + datap, bpno, one, nmsedec, @@ -1237,8 +1220,11 @@ static void opj_t1_enc_clnpass( runlen, t1->h - k, cblksty); + datap += t1->h - k; } } + + UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); } #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \ @@ -1250,7 +1236,7 @@ static void opj_t1_enc_clnpass( opj_mqc_t* mqc = &(t1->mqc); \ register OPJ_INT32 *data = t1->data; \ register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ - DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ register OPJ_UINT32 v; \ one = 1 << bpno; \ half = one >> 1; \ @@ -1319,7 +1305,7 @@ static void opj_t1_enc_clnpass( *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ } \ } \ - UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ if( k < h ) { \ for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \ for (j = 0; j < h - k; ++j) { \ @@ -1426,7 +1412,11 @@ static OPJ_FLOAT64 opj_t1_getwmsedec( if (qmfbid == 1) { w2 = opj_dwt_getnorm(level, orient); } else { /* if (qmfbid == 0) */ + const OPJ_INT32 log2_gain = (orient == 0) ? 0 : + (orient == 3) ? 2 : 1; w2 = opj_dwt_getnorm_real(level, orient); + /* Not sure this is right. But preserves past behaviour */ + stepsize /= (1 << log2_gain); } wmsedec = w1 * w2 * stepsize * (1 << bpno); @@ -1450,7 +1440,7 @@ static OPJ_BOOL opj_t1_allocate_buffers( assert(w * h <= 4096); /* encoder uses tile buffer, so no need to allocate */ - if (!t1->encoder) { + { OPJ_UINT32 datasize = w * h; if (datasize > t1->datasize) { @@ -1560,8 +1550,7 @@ void opj_t1_destroy(opj_t1_t *p_t1) return; } - /* encoder uses tile buffer, so no need to free */ - if (!p_t1->encoder && p_t1->data) { + if (p_t1->data) { opj_aligned_free(p_t1->data); p_t1->data = 00; } @@ -1658,7 +1647,21 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1); if (t1 == NULL) { t1 = opj_t1_create(OPJ_FALSE); - opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper); + if (t1 == NULL) { + opj_event_msg(job->p_manager, EVT_ERROR, + "Cannot allocate Tier 1 handle\n"); + *(job->pret) = OPJ_FALSE; + opj_free(job); + return; + } + if (!opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper)) { + opj_event_msg(job->p_manager, EVT_ERROR, + "Unable to set t1 handle as TLS\n"); + opj_t1_destroy(t1); + *(job->pret) = OPJ_FALSE; + opj_free(job); + return; + } } t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer; @@ -1725,10 +1728,11 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) datap[i] /= 2; } } else { /* if (tccp->qmfbid == 0) */ + const float stepsize = 0.5f * band->stepsize; i = 0; #ifdef __SSE2__ { - const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize); + const __m128 xmm_stepsize = _mm_set1_ps(stepsize); for (; i < (cblk_size & ~15U); i += 16) { __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( datap + 0))); @@ -1747,7 +1751,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) } #endif for (; i < cblk_size; ++i) { - OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize; + OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize; memcpy(datap, &tmp, sizeof(tmp)); datap++; } @@ -1773,12 +1777,13 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) } } } else { /* if (tccp->qmfbid == 0) */ + const float stepsize = 0.5f * band->stepsize; OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; for (j = 0; j < cblk_h; ++j) { OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp; for (i = 0; i < cblk_w; ++i) { - OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * band->stepsize; + OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize; *tiledp2 = tmp; datap++; tiledp2++; @@ -2100,124 +2105,232 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, } +typedef struct { + OPJ_UINT32 compno; + OPJ_UINT32 resno; + opj_tcd_cblk_enc_t* cblk; + opj_tcd_tile_t *tile; + opj_tcd_band_t* band; + opj_tcd_tilecomp_t* tilec; + opj_tccp_t* tccp; + const OPJ_FLOAT64 * mct_norms; + OPJ_UINT32 mct_numcomps; + volatile OPJ_BOOL* pret; + opj_mutex_t* mutex; +} opj_t1_cblk_encode_processing_job_t; + +/** Procedure to deal with a asynchronous code-block encoding job. + * + * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure + * @param tls TLS handle. + */ +static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls) +{ + opj_t1_cblk_encode_processing_job_t* job = + (opj_t1_cblk_encode_processing_job_t*)user_data; + opj_tcd_cblk_enc_t* cblk = job->cblk; + const opj_tcd_band_t* band = job->band; + const opj_tcd_tilecomp_t* tilec = job->tilec; + const opj_tccp_t* tccp = job->tccp; + const OPJ_UINT32 resno = job->resno; + opj_t1_t* t1; + const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); + + OPJ_INT32* OPJ_RESTRICT tiledp; + OPJ_UINT32 cblk_w; + OPJ_UINT32 cblk_h; + OPJ_UINT32 i, j; + + OPJ_INT32 x = cblk->x0 - band->x0; + OPJ_INT32 y = cblk->y0 - band->y0; + + if (!*(job->pret)) { + opj_free(job); + return; + } + + t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1); + if (t1 == NULL) { + t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */ + opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper); + } + + if (band->bandno & 1) { + opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; + x += pres->x1 - pres->x0; + } + if (band->bandno & 2) { + opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; + y += pres->y1 - pres->y0; + } + + if (!opj_t1_allocate_buffers( + t1, + (OPJ_UINT32)(cblk->x1 - cblk->x0), + (OPJ_UINT32)(cblk->y1 - cblk->y0))) { + *(job->pret) = OPJ_FALSE; + opj_free(job); + return; + } + + cblk_w = t1->w; + cblk_h = t1->h; + + tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; + + if (tccp->qmfbid == 1) { + /* Do multiplication on unsigned type, even if the + * underlying type is signed, to avoid potential + * int overflow on large value (the output will be + * incorrect in such situation, but whatever...) + * This assumes complement-to-2 signed integer + * representation + * Fixes https://github.com/uclouvain/openjpeg/issues/1053 + */ + OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp; + OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data; + /* Change from "natural" order to "zigzag" order of T1 passes */ + for (j = 0; j < (cblk_h & ~3U); j += 4) { + for (i = 0; i < cblk_w; ++i) { + t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS; + t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS; + t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS; + t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS; + t1data += 4; + } + } + if (j < cblk_h) { + for (i = 0; i < cblk_w; ++i) { + OPJ_UINT32 k; + for (k = j; k < cblk_h; k++) { + t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS; + t1data ++; + } + } + } + } else { /* if (tccp->qmfbid == 0) */ + OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp; + OPJ_INT32* OPJ_RESTRICT t1data = t1->data; + /* Change from "natural" order to "zigzag" order of T1 passes */ + for (j = 0; j < (cblk_h & ~3U); j += 4) { + for (i = 0; i < cblk_w; ++i) { + t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] / + band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); + t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] / + band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); + t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] / + band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); + t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] / + band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); + t1data += 4; + } + } + if (j < cblk_h) { + for (i = 0; i < cblk_w; ++i) { + OPJ_UINT32 k; + for (k = j; k < cblk_h; k++) { + t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize) + * (1 << T1_NMSEDEC_FRACBITS)); + t1data ++; + } + } + } + } + + { + OPJ_FLOAT64 cumwmsedec = + opj_t1_encode_cblk( + t1, + cblk, + band->bandno, + job->compno, + tilec->numresolutions - 1 - resno, + tccp->qmfbid, + band->stepsize, + tccp->cblksty, + job->tile->numcomps, + job->mct_norms, + job->mct_numcomps); + if (job->mutex) { + opj_mutex_lock(job->mutex); + } + job->tile->distotile += cumwmsedec; + if (job->mutex) { + opj_mutex_unlock(job->mutex); + } + } + + opj_free(job); +} -OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, +OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd, opj_tcd_tile_t *tile, opj_tcp_t *tcp, const OPJ_FLOAT64 * mct_norms, OPJ_UINT32 mct_numcomps ) { + volatile OPJ_BOOL ret = OPJ_TRUE; + opj_thread_pool_t* tp = tcd->thread_pool; OPJ_UINT32 compno, resno, bandno, precno, cblkno; + opj_mutex_t* mutex = opj_mutex_create(); tile->distotile = 0; /* fixed_quality */ for (compno = 0; compno < tile->numcomps; ++compno) { opj_tcd_tilecomp_t* tilec = &tile->comps[compno]; opj_tccp_t* tccp = &tcp->tccps[compno]; - OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); for (resno = 0; resno < tilec->numresolutions; ++resno) { opj_tcd_resolution_t *res = &tilec->resolutions[resno]; for (bandno = 0; bandno < res->numbands; ++bandno) { opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno]; - OPJ_INT32 bandconst; /* Skip empty bands */ if (opj_tcd_is_band_empty(band)) { continue; } - - bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192)); for (precno = 0; precno < res->pw * res->ph; ++precno) { opj_tcd_precinct_t *prc = &band->precincts[precno]; for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) { opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno]; - OPJ_INT32* OPJ_RESTRICT tiledp; - OPJ_UINT32 cblk_w; - OPJ_UINT32 cblk_h; - OPJ_UINT32 i, j, tileLineAdvance; - OPJ_SIZE_T tileIndex = 0; - OPJ_INT32 x = cblk->x0 - band->x0; - OPJ_INT32 y = cblk->y0 - band->y0; - if (band->bandno & 1) { - opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; - x += pres->x1 - pres->x0; + opj_t1_cblk_encode_processing_job_t* job = + (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1, + sizeof(opj_t1_cblk_encode_processing_job_t)); + if (!job) { + ret = OPJ_FALSE; + goto end; } - if (band->bandno & 2) { - opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; - y += pres->y1 - pres->y0; - } - - if (!opj_t1_allocate_buffers( - t1, - (OPJ_UINT32)(cblk->x1 - cblk->x0), - (OPJ_UINT32)(cblk->y1 - cblk->y0))) { - return OPJ_FALSE; - } - - cblk_w = t1->w; - cblk_h = t1->h; - tileLineAdvance = tile_w - cblk_w; - - tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; - t1->data = tiledp; - t1->data_stride = tile_w; - if (tccp->qmfbid == 1) { - /* Do multiplication on unsigned type, even if the - * underlying type is signed, to avoid potential - * int overflow on large value (the output will be - * incorrect in such situation, but whatever...) - * This assumes complement-to-2 signed integer - * representation - * Fixes https://github.com/uclouvain/openjpeg/issues/1053 - */ - OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp; - for (j = 0; j < cblk_h; ++j) { - for (i = 0; i < cblk_w; ++i) { - tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS; - tileIndex++; - } - tileIndex += tileLineAdvance; - } - } else { /* if (tccp->qmfbid == 0) */ - for (j = 0; j < cblk_h; ++j) { - for (i = 0; i < cblk_w; ++i) { - OPJ_INT32 tmp = tiledp[tileIndex]; - tiledp[tileIndex] = - opj_int_fix_mul_t1( - tmp, - bandconst); - tileIndex++; - } - tileIndex += tileLineAdvance; - } - } - - opj_t1_encode_cblk( - t1, - cblk, - band->bandno, - compno, - tilec->numresolutions - 1 - resno, - tccp->qmfbid, - band->stepsize, - tccp->cblksty, - tile->numcomps, - tile, - mct_norms, - mct_numcomps); + job->compno = compno; + job->tile = tile; + job->resno = resno; + job->cblk = cblk; + job->band = band; + job->tilec = tilec; + job->tccp = tccp; + job->mct_norms = mct_norms; + job->mct_numcomps = mct_numcomps; + job->pret = &ret; + job->mutex = mutex; + opj_thread_pool_submit_job(tp, opj_t1_cblk_encode_processor, job); } /* cblkno */ } /* precno */ } /* bandno */ } /* resno */ } /* compno */ - return OPJ_TRUE; + +end: + opj_thread_pool_wait_completion(tcd->thread_pool, 0); + if (mutex) { + opj_mutex_destroy(mutex); + } + + return ret; } /* Returns whether the pass (bpno, passtype) is terminated */ @@ -2252,18 +2365,17 @@ static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk, /** mod fixed_quality */ -static void opj_t1_encode_cblk(opj_t1_t *t1, - opj_tcd_cblk_enc_t* cblk, - OPJ_UINT32 orient, - OPJ_UINT32 compno, - OPJ_UINT32 level, - OPJ_UINT32 qmfbid, - OPJ_FLOAT64 stepsize, - OPJ_UINT32 cblksty, - OPJ_UINT32 numcomps, - opj_tcd_tile_t * tile, - const OPJ_FLOAT64 * mct_norms, - OPJ_UINT32 mct_numcomps) +static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1, + opj_tcd_cblk_enc_t* cblk, + OPJ_UINT32 orient, + OPJ_UINT32 compno, + OPJ_UINT32 level, + OPJ_UINT32 qmfbid, + OPJ_FLOAT64 stepsize, + OPJ_UINT32 cblksty, + OPJ_UINT32 numcomps, + const OPJ_FLOAT64 * mct_norms, + OPJ_UINT32 mct_numcomps) { OPJ_FLOAT64 cumwmsedec = 0.0; @@ -2277,6 +2389,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, OPJ_UINT32 i, j; OPJ_BYTE type = T1_TYPE_MQ; OPJ_FLOAT64 tempwmsedec; + OPJ_INT32* datap; #ifdef EXTRA_DEBUG printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n", @@ -2286,10 +2399,19 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9); max = 0; - for (i = 0; i < t1->w; ++i) { - for (j = 0; j < t1->h; ++j) { - OPJ_INT32 tmp = abs(t1->data[i + j * t1->data_stride]); - max = opj_int_max(max, tmp); + datap = t1->data; + for (j = 0; j < t1->h; ++j) { + const OPJ_UINT32 w = t1->w; + for (i = 0; i < w; ++i, ++datap) { + OPJ_INT32 tmp = *datap; + if (tmp < 0) { + OPJ_UINT32 tmp_unsigned; + max = opj_int_max(max, -tmp); + tmp_unsigned = opj_to_smr(tmp); + memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32)); + } else { + max = opj_int_max(max, tmp); + } } } @@ -2297,7 +2419,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, T1_NMSEDEC_FRACBITS) : 0; if (cblk->numbps == 0) { cblk->totalpasses = 0; - return; + return cumwmsedec; } bpno = (OPJ_INT32)(cblk->numbps - 1); @@ -2343,7 +2465,6 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid, stepsize, numcomps, mct_norms, mct_numcomps) ; cumwmsedec += tempwmsedec; - tile->distotile += tempwmsedec; pass->distortiondec = cumwmsedec; if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) { @@ -2425,4 +2546,6 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, } } #endif + + return cumwmsedec; } diff --git a/3rdparty/openjpeg/openjp2/t1.h b/3rdparty/openjpeg/openjp2/t1.h index 171dfb0a7a..81ad0d00f1 100644 --- a/3rdparty/openjpeg/openjp2/t1.h +++ b/3rdparty/openjpeg/openjp2/t1.h @@ -198,7 +198,6 @@ typedef struct opj_t1 { OPJ_UINT32 h; OPJ_UINT32 datasize; OPJ_UINT32 flagssize; - OPJ_UINT32 data_stride; OPJ_BOOL encoder; /* Thre 3 variables below are only used by the decoder */ @@ -216,13 +215,13 @@ typedef struct opj_t1 { /** Encode the code-blocks of a tile -@param t1 T1 handle +@param tcd TCD handle @param tile The tile to encode @param tcp Tile coding parameters @param mct_norms FIXME DOC @param mct_numcomps Number of components used for MCT */ -OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, +OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd, opj_tcd_tile_t *tile, opj_tcp_t *tcp, const OPJ_FLOAT64 * mct_norms, diff --git a/3rdparty/openjpeg/openjp2/t2.c b/3rdparty/openjpeg/openjp2/t2.c index 9825118cfd..1481e16f46 100644 --- a/3rdparty/openjpeg/openjp2/t2.c +++ b/3rdparty/openjpeg/openjp2/t2.c @@ -224,6 +224,7 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2, OPJ_UINT32 * p_data_written, OPJ_UINT32 p_max_len, opj_codestream_info_t *cstr_info, + opj_tcd_marker_info_t* p_marker_info, OPJ_UINT32 p_tp_num, OPJ_INT32 p_tp_pos, OPJ_UINT32 p_pino, @@ -244,7 +245,7 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2, l_image->numcomps : 1; OPJ_UINT32 l_nb_pocs = l_tcp->numpocs + 1; - l_pi = opj_pi_initialise_encode(l_image, l_cp, p_tile_no, p_t2_mode); + l_pi = opj_pi_initialise_encode(l_image, l_cp, p_tile_no, p_t2_mode, p_manager); if (!l_pi) { return OPJ_FALSE; } @@ -310,6 +311,20 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2, opj_pi_destroy(l_pi, l_nb_pocs); return OPJ_FALSE; } + + if (p_marker_info && p_marker_info->need_PLT) { + /* One time use intended */ + assert(p_marker_info->packet_count == 0); + assert(p_marker_info->p_packet_size == NULL); + + p_marker_info->p_packet_size = (OPJ_UINT32*) opj_malloc( + opj_get_encoding_packet_count(l_image, l_cp, p_tile_no) * sizeof(OPJ_UINT32)); + if (p_marker_info->p_packet_size == NULL) { + opj_pi_destroy(l_pi, l_nb_pocs); + return OPJ_FALSE; + } + } + while (opj_pi_next(l_current_pi)) { if (l_current_pi->layno < p_maxlayers) { l_nb_bytes = 0; @@ -326,6 +341,11 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2, * p_data_written += l_nb_bytes; + if (p_marker_info && p_marker_info->need_PLT) { + p_marker_info->p_packet_size[p_marker_info->packet_count] = l_nb_bytes; + p_marker_info->packet_count ++; + } + /* INDEX >> */ if (cstr_info) { if (cstr_info->index_write) { @@ -405,7 +425,7 @@ OPJ_BOOL opj_t2_decode_packets(opj_tcd_t* tcd, #endif /* create a packet iterator */ - l_pi = opj_pi_create_decode(l_image, l_cp, p_tile_no); + l_pi = opj_pi_create_decode(l_image, l_cp, p_tile_no, p_manager); if (!l_pi) { return OPJ_FALSE; } @@ -673,6 +693,14 @@ static OPJ_BOOL opj_t2_encode_packet(OPJ_UINT32 tileno, OPJ_BOOL packet_empty = OPJ_FALSE; #endif +#ifdef DEBUG_VERBOSE + if (p_t2_mode == FINAL_PASS) { + fprintf(stderr, + "encode packet compono=%d, resno=%d, precno=%d, layno=%d\n", + compno, resno, precno, layno); + } +#endif + /* */ if (tcp->csty & J2K_CP_CSTY_SOP) { if (length < 6) { @@ -711,6 +739,15 @@ static OPJ_BOOL opj_t2_encode_packet(OPJ_UINT32 tileno, continue; } + /* Avoid out of bounds access of https://github.com/uclouvain/openjpeg/issues/1294 */ + /* but likely not a proper fix. */ + if (precno >= res->pw * res->ph) { + opj_event_msg(p_manager, EVT_ERROR, + "opj_t2_encode_packet(): accessing precno=%u >= %u\n", + precno, res->pw * res->ph); + return OPJ_FALSE; + } + prc = &band->precincts[precno]; opj_tgt_reset(prc->incltree); opj_tgt_reset(prc->imsbtree); @@ -778,6 +815,15 @@ static OPJ_BOOL opj_t2_encode_packet(OPJ_UINT32 tileno, continue; } + /* Avoid out of bounds access of https://github.com/uclouvain/openjpeg/issues/1297 */ + /* but likely not a proper fix. */ + if (precno >= res->pw * res->ph) { + opj_event_msg(p_manager, EVT_ERROR, + "opj_t2_encode_packet(): accessing precno=%u >= %u\n", + precno, res->pw * res->ph); + return OPJ_FALSE; + } + prc = &band->precincts[precno]; l_nb_blocks = prc->cw * prc->ch; cblk = prc->cblks.enc; diff --git a/3rdparty/openjpeg/openjp2/t2.h b/3rdparty/openjpeg/openjp2/t2.h index 66500b1699..becfa91a4d 100644 --- a/3rdparty/openjpeg/openjp2/t2.h +++ b/3rdparty/openjpeg/openjp2/t2.h @@ -73,6 +73,7 @@ Encode the packets of a tile to a destination buffer @param p_data_written FIXME DOC @param len the length of the destination buffer @param cstr_info Codestream information structure +@param p_marker_info Marker information structure @param tpnum Tile part number of the current tile @param tppos The position of the tile part flag in the progression order @param pino FIXME DOC @@ -87,6 +88,7 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* t2, OPJ_UINT32 * p_data_written, OPJ_UINT32 len, opj_codestream_info_t *cstr_info, + opj_tcd_marker_info_t* p_marker_info, OPJ_UINT32 tpnum, OPJ_INT32 tppos, OPJ_UINT32 pino, diff --git a/3rdparty/openjpeg/openjp2/tcd.c b/3rdparty/openjpeg/openjp2/tcd.c index be3b84363f..6442669d60 100644 --- a/3rdparty/openjpeg/openjp2/tcd.c +++ b/3rdparty/openjpeg/openjp2/tcd.c @@ -112,7 +112,7 @@ void tcd_dump(FILE *fd, opj_tcd_t *tcd, opj_tcd_image_t * img) * Initializes tile coding/decoding */ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, - OPJ_BOOL isEncoder, OPJ_FLOAT32 fraction, OPJ_SIZE_T sizeof_block, + OPJ_BOOL isEncoder, OPJ_SIZE_T sizeof_block, opj_event_mgr_t* manager); /** @@ -182,6 +182,7 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd, OPJ_UINT32 * p_data_written, OPJ_UINT32 p_max_dest_size, opj_codestream_info_t *p_cstr_info, + opj_tcd_marker_info_t* p_marker_info, opj_event_mgr_t *p_manager); static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd, @@ -573,9 +574,10 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd, opj_tcd_makelayer(tcd, layno, thresh, 0); if (cp->m_specific_param.m_enc.m_fixed_quality) { /* fixed_quality */ - if (OPJ_IS_CINEMA(cp->rsiz)) { + if (OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)) { if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest, - p_data_written, maxlen, cstr_info, tcd->cur_tp_num, tcd->tp_pos, tcd->cur_pino, + p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos, + tcd->cur_pino, THRESH_CALC, p_manager)) { lo = thresh; @@ -605,7 +607,8 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd, } } else { if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest, - p_data_written, maxlen, cstr_info, tcd->cur_tp_num, tcd->tp_pos, tcd->cur_pino, + p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos, + tcd->cur_pino, THRESH_CALC, p_manager)) { /* TODO: what to do with l ??? seek / tell ??? */ /* opj_event_msg(tcd->cinfo, EVT_INFO, "rate alloc: len=%d, max=%d\n", l, maxlen); */ @@ -718,10 +721,9 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec) /* ----------------------------------------------------------------------- */ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, - OPJ_BOOL isEncoder, OPJ_FLOAT32 fraction, OPJ_SIZE_T sizeof_block, + OPJ_BOOL isEncoder, OPJ_SIZE_T sizeof_block, opj_event_mgr_t* manager) { - OPJ_UINT32(*l_gain_ptr)(OPJ_UINT32) = 00; OPJ_UINT32 compno, resno, bandno, precno, cblkno; opj_tcp_t * l_tcp = 00; opj_cp_t * l_cp = 00; @@ -737,7 +739,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, OPJ_UINT32 p, q; OPJ_UINT32 l_level_no; OPJ_UINT32 l_pdx, l_pdy; - OPJ_UINT32 l_gain; OPJ_INT32 l_x0b, l_y0b; OPJ_UINT32 l_tx0, l_ty0; /* extent of precincts , top left, bottom right**/ @@ -876,11 +877,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, l_level_no = l_tilec->numresolutions; l_res = l_tilec->resolutions; l_step_size = l_tccp->stepsizes; - if (l_tccp->qmfbid == 0) { - l_gain_ptr = &opj_dwt_getgain_real; - } else { - l_gain_ptr = &opj_dwt_getgain; - } /*fprintf(stderr, "\tlevel_no=%d\n",l_level_no);*/ for (resno = 0; resno < l_tilec->numresolutions; ++resno) { @@ -905,8 +901,24 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, /* p. 64, B.6, ISO/IEC FDIS15444-1 : 2000 (18 august 2000) */ l_tl_prc_x_start = opj_int_floordivpow2(l_res->x0, (OPJ_INT32)l_pdx) << l_pdx; l_tl_prc_y_start = opj_int_floordivpow2(l_res->y0, (OPJ_INT32)l_pdy) << l_pdy; - l_br_prc_x_end = opj_int_ceildivpow2(l_res->x1, (OPJ_INT32)l_pdx) << l_pdx; - l_br_prc_y_end = opj_int_ceildivpow2(l_res->y1, (OPJ_INT32)l_pdy) << l_pdy; + { + OPJ_UINT32 tmp = ((OPJ_UINT32)opj_int_ceildivpow2(l_res->x1, + (OPJ_INT32)l_pdx)) << l_pdx; + if (tmp > (OPJ_UINT32)INT_MAX) { + opj_event_msg(manager, EVT_ERROR, "Integer overflow\n"); + return OPJ_FALSE; + } + l_br_prc_x_end = (OPJ_INT32)tmp; + } + { + OPJ_UINT32 tmp = ((OPJ_UINT32)opj_int_ceildivpow2(l_res->y1, + (OPJ_INT32)l_pdy)) << l_pdy; + if (tmp > (OPJ_UINT32)INT_MAX) { + opj_event_msg(manager, EVT_ERROR, "Integer overflow\n"); + return OPJ_FALSE; + } + l_br_prc_y_end = (OPJ_INT32)tmp; + } /*fprintf(stderr, "\t\t\tprc_x_start=%d, prc_y_start=%d, br_prc_x_end=%d, br_prc_y_end=%d \n", l_tl_prc_x_start, l_tl_prc_y_start, l_br_prc_x_end ,l_br_prc_y_end );*/ l_res->pw = (l_res->x0 == l_res->x1) ? 0U : (OPJ_UINT32)(( @@ -951,7 +963,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, l_band = l_res->bands; for (bandno = 0; bandno < l_res->numbands; ++bandno, ++l_band, ++l_step_size) { - OPJ_INT32 numbps; /*fprintf(stderr, "\t\t\tband_no=%d/%d\n", bandno, l_res->numbands );*/ if (resno == 0) { @@ -987,11 +998,24 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, } } - /** avoid an if with storing function pointer */ - l_gain = (*l_gain_ptr)(l_band->bandno); - numbps = (OPJ_INT32)(l_image_comp->prec + l_gain); - l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0, - (OPJ_INT32)(numbps - l_step_size->expn)))) * fraction; + { + /* Table E-1 - Sub-band gains */ + /* BUG_WEIRD_TWO_INVK (look for this identifier in dwt.c): */ + /* the test (!isEncoder && l_tccp->qmfbid == 0) is strongly */ + /* linked to the use of two_invK instead of invK */ + const OPJ_INT32 log2_gain = (!isEncoder && + l_tccp->qmfbid == 0) ? 0 : (l_band->bandno == 0) ? 0 : + (l_band->bandno == 3) ? 2 : 1; + + /* Nominal dynamic range. Equation E-4 */ + const OPJ_INT32 Rb = (OPJ_INT32)l_image_comp->prec + log2_gain; + + /* Delta_b value of Equation E-3 in "E.1 Inverse quantization + * procedure" of the standard */ + l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0, + (OPJ_INT32)(Rb - l_step_size->expn)))); + } + /* Mb value of Equation E-2 in "E.1 Inverse quantization * procedure" of the standard */ l_band->numbps = l_step_size->expn + (OPJ_INT32)l_tccp->numgbits - @@ -1174,14 +1198,14 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, opj_event_mgr_t* p_manager) { - return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_TRUE, 1.0F, + return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_TRUE, sizeof(opj_tcd_cblk_enc_t), p_manager); } OPJ_BOOL opj_tcd_init_decode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, opj_event_mgr_t* p_manager) { - return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_FALSE, 0.5F, + return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_FALSE, sizeof(opj_tcd_cblk_dec_t), p_manager); } @@ -1219,10 +1243,16 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t * /* +1 is needed for https://github.com/uclouvain/openjpeg/issues/835 */ /* and actually +2 required for https://github.com/uclouvain/openjpeg/issues/982 */ + /* and +7 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 3) */ + /* and +26 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 7) */ + /* and +28 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 44) */ + /* and +33 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4) */ + /* and +63 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4 -IMF 2K) */ + /* and +74 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4 -n 8 -s 7,7 -I) */ /* TODO: is there a theoretical upper-bound for the compressed code */ /* block size ? */ - l_data_size = 2 + (OPJ_UINT32)((p_code_block->x1 - p_code_block->x0) * - (p_code_block->y1 - p_code_block->y0) * (OPJ_INT32)sizeof(OPJ_UINT32)); + l_data_size = 74 + (OPJ_UINT32)((p_code_block->x1 - p_code_block->x0) * + (p_code_block->y1 - p_code_block->y0) * (OPJ_INT32)sizeof(OPJ_UINT32)); if (l_data_size > p_code_block->data_size) { if (p_code_block->data) { @@ -1354,6 +1384,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 * p_data_written, OPJ_UINT32 p_max_length, opj_codestream_info_t *p_cstr_info, + opj_tcd_marker_info_t* p_marker_info, opj_event_mgr_t *p_manager) { @@ -1433,7 +1464,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, /* FIXME _ProfStart(PGROUP_T2); */ if (! opj_tcd_t2_encode(p_tcd, p_dest, p_data_written, p_max_length, - p_cstr_info, p_manager)) { + p_cstr_info, p_marker_info, p_manager)) { return OPJ_FALSE; } /* FIXME _ProfStop(PGROUP_T2); */ @@ -2017,7 +2048,8 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles; opj_tcp_t * l_tcp = p_tcd->tcp; opj_tcd_tilecomp_t * l_tile_comp = l_tile->comps; - OPJ_UINT32 l_samples, i; + OPJ_SIZE_T l_samples; + OPJ_UINT32 i; if (l_tcp->mct == 0 || p_tcd->used_component != NULL) { return OPJ_TRUE; @@ -2030,8 +2062,8 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) /* A bit inefficient: we process more data than needed if */ /* resno_decoded < l_tile_comp->minimum_num_resolutions-1, */ /* but we would need to take into account a stride then */ - l_samples = (OPJ_UINT32)((res_comp0->x1 - res_comp0->x0) * - (res_comp0->y1 - res_comp0->y0)); + l_samples = (OPJ_SIZE_T)(res_comp0->x1 - res_comp0->x0) * + (OPJ_SIZE_T)(res_comp0->y1 - res_comp0->y0); if (l_tile->numcomps >= 3) { if (l_tile_comp->minimum_num_resolutions != l_tile->comps[1].minimum_num_resolutions || @@ -2065,8 +2097,8 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) opj_tcd_resolution_t* res_comp0 = l_tile->comps[0].resolutions + p_tcd->image->comps[0].resno_decoded; - l_samples = (res_comp0->win_x1 - res_comp0->win_x0) * - (res_comp0->win_y1 - res_comp0->win_y0); + l_samples = (OPJ_SIZE_T)(res_comp0->win_x1 - res_comp0->win_x0) * + (OPJ_SIZE_T)(res_comp0->win_y1 - res_comp0->win_y0); if (l_tile->numcomps >= 3) { opj_tcd_resolution_t* res_comp1 = l_tile->comps[1].resolutions + p_tcd->image->comps[1].resno_decoded; @@ -2332,7 +2364,7 @@ static void opj_tcd_code_block_enc_deallocate(opj_tcd_precinct_t * p_precinct) } } -OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd) +OPJ_SIZE_T opj_tcd_get_encoder_input_buffer_size(opj_tcd_t *p_tcd) { OPJ_UINT32 i; OPJ_SIZE_T l_data_size = 0; @@ -2390,7 +2422,8 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd) } } else { for (i = 0; i < l_nb_elem; ++i) { - *l_current_ptr = (*l_current_ptr - l_tccp->m_dc_level_shift) * (1 << 11); + *((OPJ_FLOAT32 *) l_current_ptr) = (OPJ_FLOAT32)(*l_current_ptr - + l_tccp->m_dc_level_shift); ++l_current_ptr; } } @@ -2448,8 +2481,11 @@ static OPJ_BOOL opj_tcd_mct_encode(opj_tcd_t *p_tcd) opj_free(l_data); } else if (l_tcp->tccps->qmfbid == 0) { - opj_mct_encode_real(l_tile->comps[0].data, l_tile->comps[1].data, - l_tile->comps[2].data, samples); + opj_mct_encode_real( + (OPJ_FLOAT32*)l_tile->comps[0].data, + (OPJ_FLOAT32*)l_tile->comps[1].data, + (OPJ_FLOAT32*)l_tile->comps[2].data, + samples); } else { opj_mct_encode(l_tile->comps[0].data, l_tile->comps[1].data, l_tile->comps[2].data, samples); @@ -2467,11 +2503,11 @@ static OPJ_BOOL opj_tcd_dwt_encode(opj_tcd_t *p_tcd) for (compno = 0; compno < l_tile->numcomps; ++compno) { if (l_tccp->qmfbid == 1) { - if (! opj_dwt_encode(l_tile_comp)) { + if (! opj_dwt_encode(p_tcd, l_tile_comp)) { return OPJ_FALSE; } } else if (l_tccp->qmfbid == 0) { - if (! opj_dwt_encode_real(l_tile_comp)) { + if (! opj_dwt_encode_real(p_tcd, l_tile_comp)) { return OPJ_FALSE; } } @@ -2485,16 +2521,10 @@ static OPJ_BOOL opj_tcd_dwt_encode(opj_tcd_t *p_tcd) static OPJ_BOOL opj_tcd_t1_encode(opj_tcd_t *p_tcd) { - opj_t1_t * l_t1; const OPJ_FLOAT64 * l_mct_norms; OPJ_UINT32 l_mct_numcomps = 0U; opj_tcp_t * l_tcp = p_tcd->tcp; - l_t1 = opj_t1_create(OPJ_TRUE); - if (l_t1 == 00) { - return OPJ_FALSE; - } - if (l_tcp->mct == 1) { l_mct_numcomps = 3U; /* irreversible encoding */ @@ -2508,13 +2538,9 @@ static OPJ_BOOL opj_tcd_t1_encode(opj_tcd_t *p_tcd) l_mct_norms = (const OPJ_FLOAT64 *)(l_tcp->mct_norms); } - if (! opj_t1_encode_cblks(l_t1, p_tcd->tcd_image->tiles, l_tcp, l_mct_norms, - l_mct_numcomps)) { - opj_t1_destroy(l_t1); - return OPJ_FALSE; - } - - opj_t1_destroy(l_t1); + return opj_t1_encode_cblks(p_tcd, + p_tcd->tcd_image->tiles, l_tcp, l_mct_norms, + l_mct_numcomps); return OPJ_TRUE; } @@ -2524,6 +2550,7 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd, OPJ_UINT32 * p_data_written, OPJ_UINT32 p_max_dest_size, opj_codestream_info_t *p_cstr_info, + opj_tcd_marker_info_t* p_marker_info, opj_event_mgr_t *p_manager) { opj_t2_t * l_t2; @@ -2542,6 +2569,7 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd, p_data_written, p_max_dest_size, p_cstr_info, + p_marker_info, p_tcd->tp_num, p_tcd->tp_pos, p_tcd->cur_pino, @@ -2600,7 +2628,7 @@ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, OPJ_UINT32 l_size_comp, l_remaining; OPJ_SIZE_T l_nb_elem; - l_data_size = opj_tcd_get_encoded_tile_size(p_tcd); + l_data_size = opj_tcd_get_encoder_input_buffer_size(p_tcd); if (l_data_size != p_src_length) { return OPJ_FALSE; } @@ -2802,3 +2830,30 @@ static OPJ_BOOL opj_tcd_is_whole_tilecomp_decoding(opj_tcd_t *p_tcd, (((OPJ_UINT32)tilec->x1 - tcx1) >> shift) == 0 && (((OPJ_UINT32)tilec->y1 - tcy1) >> shift) == 0))); } + +/* ----------------------------------------------------------------------- */ + +opj_tcd_marker_info_t* opj_tcd_marker_info_create(OPJ_BOOL need_PLT) +{ + opj_tcd_marker_info_t *l_tcd_marker_info = + (opj_tcd_marker_info_t*) opj_calloc(1, sizeof(opj_tcd_marker_info_t)); + if (!l_tcd_marker_info) { + return NULL; + } + + l_tcd_marker_info->need_PLT = need_PLT; + + return l_tcd_marker_info; +} + +/* ----------------------------------------------------------------------- */ + +void opj_tcd_marker_info_destroy(opj_tcd_marker_info_t *p_tcd_marker_info) +{ + if (p_tcd_marker_info) { + opj_free(p_tcd_marker_info->p_packet_size); + opj_free(p_tcd_marker_info); + } +} + +/* ----------------------------------------------------------------------- */ diff --git a/3rdparty/openjpeg/openjp2/tcd.h b/3rdparty/openjpeg/openjp2/tcd.h index e3214c1d98..f1b52b8dac 100644 --- a/3rdparty/openjpeg/openjp2/tcd.h +++ b/3rdparty/openjpeg/openjp2/tcd.h @@ -284,6 +284,22 @@ typedef struct opj_tcd { OPJ_BOOL* used_component; } opj_tcd_t; +/** + * Structure to hold information needed to generate some markers. + * Used by encoder. + */ +typedef struct opj_tcd_marker_info { + /** In: Whether information to generate PLT markers in needed */ + OPJ_BOOL need_PLT; + + /** OUT: Number of elements in p_packet_size[] array */ + OPJ_UINT32 packet_count; + + /** OUT: Array of size packet_count, such that p_packet_size[i] is + * the size in bytes of the ith packet */ + OPJ_UINT32* p_packet_size; +} opj_tcd_marker_info_t; + /** @name Exported functions */ /*@{*/ /* ----------------------------------------------------------------------- */ @@ -306,6 +322,21 @@ Destroy a previously created TCD handle */ void opj_tcd_destroy(opj_tcd_t *tcd); + +/** + * Create a new opj_tcd_marker_info_t* structure + * @param need_PLT Whether information is needed to generate PLT markers. + */ +opj_tcd_marker_info_t* opj_tcd_marker_info_create(OPJ_BOOL need_PLT); + + +/** +Destroy a previously created opj_tcd_marker_info_t* structure +@param p_tcd_marker_info Structure to destroy +*/ +void opj_tcd_marker_info_destroy(opj_tcd_marker_info_t *p_tcd_marker_info); + + /** * Initialize the tile coder and may reuse some memory. * @param p_tcd TCD handle. @@ -364,6 +395,7 @@ OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd, * @param p_data_written pointer to an int that is incremented by the number of bytes really written on p_dest * @param p_len Maximum length of the destination buffer * @param p_cstr_info Codestream information structure + * @param p_marker_info Marker information structure * @param p_manager the user event manager * @return true if the coding is successful. */ @@ -373,6 +405,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 * p_data_written, OPJ_UINT32 p_len, struct opj_codestream_info *p_cstr_info, + opj_tcd_marker_info_t* p_marker_info, opj_event_mgr_t *p_manager); @@ -415,9 +448,11 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd, OPJ_UINT32 p_dest_length); /** - * + * Get the size in bytes of the input buffer provided before encoded. + * This must be the size provided to the p_src_length argument of + * opj_tcd_copy_tile_data() */ -OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd); +OPJ_SIZE_T opj_tcd_get_encoder_input_buffer_size(opj_tcd_t *p_tcd); /** * Initialize the tile coder and may reuse some meory. @@ -433,6 +468,8 @@ OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd, /** * Copies tile data from the given memory block onto the system. + * + * p_src_length must be equal to opj_tcd_get_encoder_input_buffer_size() */ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, OPJ_BYTE * p_src, diff --git a/3rdparty/protobuf/CMakeLists.txt b/3rdparty/protobuf/CMakeLists.txt index c71bf9faff..f249d2dcc3 100644 --- a/3rdparty/protobuf/CMakeLists.txt +++ b/3rdparty/protobuf/CMakeLists.txt @@ -153,6 +153,11 @@ set_target_properties(libprotobuf ARCHIVE_OUTPUT_DIRECTORY ${3P_LIBRARY_OUTPUT_PATH} ) +if(ANDROID) + # https://github.com/opencv/opencv/issues/17282 + target_link_libraries(libprotobuf INTERFACE "-landroid" "-llog") +endif() + get_protobuf_version(Protobuf_VERSION "${PROTOBUF_ROOT}/src") set(Protobuf_VERSION ${Protobuf_VERSION} CACHE INTERNAL "" FORCE) diff --git a/3rdparty/tbb/CMakeLists.txt b/3rdparty/tbb/CMakeLists.txt index a085b0f3ca..50f3e6ccf1 100644 --- a/3rdparty/tbb/CMakeLists.txt +++ b/3rdparty/tbb/CMakeLists.txt @@ -170,4 +170,4 @@ ocv_install_target(tbb EXPORT OpenCVModules ocv_install_3rdparty_licenses(tbb "${tbb_src_dir}/LICENSE" "${tbb_src_dir}/README") -ocv_tbb_read_version("${tbb_src_dir}/include") +ocv_tbb_read_version("${tbb_src_dir}/include" tbb) diff --git a/CMakeLists.txt b/CMakeLists.txt index 487efd5f7e..78327c1a70 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,9 +17,7 @@ endif() include(cmake/OpenCVMinDepVersions.cmake) -if(CMAKE_GENERATOR MATCHES Xcode AND XCODE_VERSION VERSION_GREATER 4.3) - cmake_minimum_required(VERSION 3.0 FATAL_ERROR) -elseif(CMAKE_SYSTEM_NAME MATCHES WindowsPhone OR CMAKE_SYSTEM_NAME MATCHES WindowsStore) +if(CMAKE_SYSTEM_NAME MATCHES WindowsPhone OR CMAKE_SYSTEM_NAME MATCHES WindowsStore) cmake_minimum_required(VERSION 3.1 FATAL_ERROR) #Required to resolve linker error issues due to incompatibility with CMake v3.0+ policies. #CMake fails to find _fseeko() which leads to subsequent linker error. @@ -215,7 +213,7 @@ OCV_OPTION(OPENCV_ENABLE_NONFREE "Enable non-free algorithms" OFF) OCV_OPTION(OPENCV_FORCE_3RDPARTY_BUILD "Force using 3rdparty code from source" OFF) OCV_OPTION(BUILD_ZLIB "Build zlib from source" (WIN32 OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_TIFF "Build libtiff from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) -OCV_OPTION(BUILD_OPENJPEG "Build OpenJPEG from source" (WIN32 OR ANDRIOD OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) +OCV_OPTION(BUILD_OPENJPEG "Build OpenJPEG from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_JASPER "Build libjasper from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_JPEG "Build libjpeg from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_PNG "Build libpng from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) @@ -398,11 +396,11 @@ OCV_OPTION(WITH_OPENCL_D3D11_NV "Include NVIDIA OpenCL D3D11 support" WITH_DIREC OCV_OPTION(WITH_LIBREALSENSE "Include Intel librealsense support" OFF VISIBLE_IF NOT WITH_INTELPERC VERIFY HAVE_LIBREALSENSE) -OCV_OPTION(WITH_VA "Include VA support" OFF - VISIBLE_IF UNIX AND NOT ANDROID +OCV_OPTION(WITH_VA "Include VA support" (X86_64 OR X86) + VISIBLE_IF UNIX AND NOT APPLE AND NOT ANDROID VERIFY HAVE_VA) -OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF - VISIBLE_IF UNIX AND NOT ANDROID +OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" (X86_64 OR X86) + VISIBLE_IF UNIX AND NOT APPLE AND NOT ANDROID VERIFY HAVE_VA_INTEL) OCV_OPTION(WITH_MFX "Include Intel Media SDK support" OFF VISIBLE_IF (UNIX AND NOT ANDROID) OR (WIN32 AND NOT WINRT AND NOT MINGW) @@ -440,6 +438,9 @@ OCV_OPTION(WITH_QUIRC "Include library QR-code decoding" ON OCV_OPTION(WITH_ANDROID_MEDIANDK "Use Android Media NDK for Video I/O (Android)" (ANDROID_NATIVE_API_LEVEL GREATER 20) VISIBLE_IF ANDROID VERIFY HAVE_ANDROID_MEDIANDK) +OCV_OPTION(WITH_ANDROID_NATIVE_CAMERA "Use Android NDK for Camera I/O (Android)" (ANDROID_NATIVE_API_LEVEL GREATER 23) + VISIBLE_IF ANDROID + VERIFY HAVE_ANDROID_NATIVE_CAMERA) OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF VISIBLE_IF (ARM OR AARCH64) AND (UNIX OR ANDROID) AND NOT IOS VERIFY HAVE_TENGINE) @@ -480,7 +481,7 @@ OCV_OPTION(INSTALL_TESTS "Install accuracy and performance test binar # OpenCV build options # =================================================== -OCV_OPTION(ENABLE_CCACHE "Use ccache" (UNIX AND NOT IOS AND (CMAKE_GENERATOR MATCHES "Makefile" OR CMAKE_GENERATOR MATCHES "Ninja")) ) +OCV_OPTION(ENABLE_CCACHE "Use ccache" (UNIX AND (CMAKE_GENERATOR MATCHES "Makefile" OR CMAKE_GENERATOR MATCHES "Ninja" OR CMAKE_GENERATOR MATCHES "Xcode")) ) OCV_OPTION(ENABLE_PRECOMPILED_HEADERS "Use precompiled headers" MSVC IF (MSVC OR (NOT IOS AND NOT CMAKE_CROSSCOMPILING) ) ) OCV_OPTION(ENABLE_SOLUTION_FOLDERS "Solution folder in Visual Studio or in other IDEs" (MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode) ) OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add flags: -g -pg)" OFF IF CV_GCC ) @@ -489,7 +490,7 @@ OCV_OPTION(OPENCV_ENABLE_MEMORY_SANITIZER "Better support for memory/address san OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CV_GCC ) OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) ) OCV_OPTION(ENABLE_FAST_MATH "Enable compiler options for fast math optimizations on FP computations (not recommended)" OFF) -if(NOT IOS AND (NOT ANDROID OR OPENCV_ANDROID_USE_LEGACY_FLAGS)) # Use CPU_BASELINE instead +if(NOT IOS AND (NOT ANDROID OR OPENCV_ANDROID_USE_LEGACY_FLAGS) AND CMAKE_CROSSCOMPILING) # Use CPU_BASELINE instead OCV_OPTION(ENABLE_NEON "Enable NEON instructions" (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) ) OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) ) endif() @@ -668,9 +669,18 @@ if(UNIX) CHECK_SYMBOL_EXISTS(memalign malloc.h HAVE_MEMALIGN) endif() # TODO: - # - _aligned_malloc() on Win32 # - std::aligned_alloc() C++17 / C11 endif() +elseif(WIN32) + include(CheckIncludeFile) + include(CheckSymbolExists) + + if(OPENCV_ENABLE_MEMALIGN) + CHECK_INCLUDE_FILE(malloc.h HAVE_MALLOC_H) + if(HAVE_MALLOC_H) + CHECK_SYMBOL_EXISTS(_aligned_malloc malloc.h HAVE_WIN32_ALIGNED_MALLOC) + endif() + endif() endif() include(cmake/OpenCVPCHSupport.cmake) @@ -736,8 +746,15 @@ if(ENABLE_FLAKE8 AND PYTHON_DEFAULT_AVAILABLE) include("${CMAKE_CURRENT_LIST_DIR}/cmake/FindFlake8.cmake") endif() if(FLAKE8_FOUND) + list(APPEND OPENCV_FLAKE8_EXCLUDES ".git" "__pycache__" "config.py" "*.config.py" "config-*.py") + list(APPEND OPENCV_FLAKE8_EXCLUDES "svgfig.py") # 3rdparty + if(NOT PYTHON3_VERSION_STRING VERSION_GREATER 3.6) + # Python 3.6+ (PEP 526): variable annotations (type hints) + list(APPEND OPENCV_FLAKE8_EXCLUDES "samples/dnn/dnn_model_runner/dnn_conversion/common/test/configs") + endif() + string(REPLACE ";" "," OPENCV_FLAKE8_EXCLUDES_STR "${OPENCV_FLAKE8_EXCLUDES}") add_custom_target(check_flake8 - COMMAND "${FLAKE8_EXECUTABLE}" . --count --select=E9,E901,E999,F821,F822,F823 --show-source --statistics --exclude='.git,__pycache__,*.config.py,svgfig.py' + COMMAND "${FLAKE8_EXECUTABLE}" . --count --select=E9,E901,E999,F821,F822,F823 --show-source --statistics --exclude='${OPENCV_FLAKE8_EXCLUDES_STR}' WORKING_DIRECTORY "${OpenCV_SOURCE_DIR}" COMMENT "Running flake8" ) @@ -989,6 +1006,12 @@ if(COMMAND ocv_pylint_finalize) ocv_pylint_add_directory_recurse(${CMAKE_CURRENT_LIST_DIR}/samples/python/tutorial_code) ocv_pylint_finalize() endif() +if(TARGET check_pylint) + message(STATUS "Registered 'check_pylint' target: using ${PYLINT_EXECUTABLE} (ver: ${PYLINT_VERSION}), checks: ${PYLINT_TOTAL_TARGETS}") +endif() +if(TARGET check_flake8) + message(STATUS "Registered 'check_flake8' target: using ${FLAKE8_EXECUTABLE} (ver: ${FLAKE8_VERSION})") +endif() if(OPENCV_GENERATE_SETUPVARS) include(cmake/OpenCVGenSetupVars.cmake) @@ -1047,7 +1070,9 @@ endif() if(CMAKE_GENERATOR MATCHES Xcode) status(" Xcode:" ${XCODE_VERSION}) endif() -if(NOT CMAKE_GENERATOR MATCHES "Xcode|Visual Studio") +if(CMAKE_GENERATOR MATCHES "Xcode|Visual Studio|Multi-Config") + status(" Configuration:" ${CMAKE_CONFIGURATION_TYPES}) +else() status(" Configuration:" ${CMAKE_BUILD_TYPE}) endif() @@ -1620,12 +1645,6 @@ endif() status("") status(" Python (for build):" PYTHON_DEFAULT_AVAILABLE THEN "${PYTHON_DEFAULT_EXECUTABLE}" ELSE NO) -if(PYLINT_FOUND AND PYLINT_EXECUTABLE) - status(" Pylint:" PYLINT_FOUND THEN "${PYLINT_EXECUTABLE} (ver: ${PYLINT_VERSION}, checks: ${PYLINT_TOTAL_TARGETS})" ELSE NO) -endif() -if(FLAKE8_FOUND AND FLAKE8_EXECUTABLE) - status(" Flake8:" FLAKE8_FOUND THEN "${FLAKE8_EXECUTABLE} (ver: ${FLAKE8_VERSION})" ELSE NO) -endif() # ========================== java ========================== if(BUILD_JAVA) diff --git a/README.md b/README.md index 0653a9e73e..b9897205ba 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,8 @@ * Homepage: * Courses: * Docs: -* Q&A forum: +* Q&A forum: + * previous forum (read only): * Issue tracking: * Additional OpenCV functionality: diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 1504fa61c4..e9a7be7c66 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -59,3 +59,4 @@ ocv_add_app(annotation) ocv_add_app(visualisation) ocv_add_app(interactive-calibration) ocv_add_app(version) +ocv_add_app(model-diagnostics) diff --git a/apps/model-diagnostics/CMakeLists.txt b/apps/model-diagnostics/CMakeLists.txt new file mode 100644 index 0000000000..b48f8264ff --- /dev/null +++ b/apps/model-diagnostics/CMakeLists.txt @@ -0,0 +1,3 @@ +ocv_add_application(opencv_model_diagnostics + MODULES opencv_core opencv_dnn + SRCS model_diagnostics.cpp) diff --git a/apps/model-diagnostics/model_diagnostics.cpp b/apps/model-diagnostics/model_diagnostics.cpp new file mode 100644 index 0000000000..2ffeaa1ea5 --- /dev/null +++ b/apps/model-diagnostics/model_diagnostics.cpp @@ -0,0 +1,65 @@ +/************************************************* +USAGE: +./model_diagnostics -m +**************************************************/ +#include +#include + +#include + + +using namespace cv; +using namespace dnn; + + +static +int diagnosticsErrorCallback(int /*status*/, const char* /*func_name*/, + const char* /*err_msg*/, const char* /*file_name*/, + int /*line*/, void* /*userdata*/) +{ + fflush(stdout); + fflush(stderr); + return 0; +} + +static std::string checkFileExists(const std::string& fileName) +{ + if (fileName.empty() || utils::fs::exists(fileName)) + return fileName; + + CV_Error(Error::StsObjectNotFound, "File " + fileName + " was not found! " + "Please, specify a full path to the file."); +} + +std::string diagnosticKeys = + "{ model m | | Path to the model .onnx file. }" + "{ config c | | Path to the model configuration file. }" + "{ framework f | | [Optional] Name of the model framework. }"; + + + +int main( int argc, const char** argv ) +{ + CommandLineParser argParser(argc, argv, diagnosticKeys); + argParser.about("Use this tool to run the diagnostics of provided ONNX model" + "to obtain the information about its support (supported layers)."); + + if (argc == 1) + { + argParser.printMessage(); + return 0; + } + + std::string model = checkFileExists(argParser.get("model")); + std::string config = checkFileExists(argParser.get("config")); + std::string frameworkId = argParser.get("framework"); + + CV_Assert(!model.empty()); + + enableModelDiagnostics(true); + redirectError(diagnosticsErrorCallback, NULL); + + Net ocvNet = readNet(model, config, frameworkId); + + return 0; +} diff --git a/cmake/FindCUDNN.cmake b/cmake/FindCUDNN.cmake index 90d8b2ea78..195781b957 100644 --- a/cmake/FindCUDNN.cmake +++ b/cmake/FindCUDNN.cmake @@ -72,14 +72,14 @@ if(CUDNN_INCLUDE_DIR) endif() string(REGEX MATCH "define CUDNN_MAJOR ([0-9]+)" _ "${CUDNN_H_CONTENTS}") - set(CUDNN_MAJOR_VERSION ${CMAKE_MATCH_1} CACHE INTERNAL "") + set(CUDNN_VERSION_MAJOR ${CMAKE_MATCH_1} CACHE INTERNAL "") string(REGEX MATCH "define CUDNN_MINOR ([0-9]+)" _ "${CUDNN_H_CONTENTS}") - set(CUDNN_MINOR_VERSION ${CMAKE_MATCH_1} CACHE INTERNAL "") + set(CUDNN_VERSION_MINOR ${CMAKE_MATCH_1} CACHE INTERNAL "") string(REGEX MATCH "define CUDNN_PATCHLEVEL ([0-9]+)" _ "${CUDNN_H_CONTENTS}") - set(CUDNN_PATCH_VERSION ${CMAKE_MATCH_1} CACHE INTERNAL "") + set(CUDNN_VERSION_PATCH ${CMAKE_MATCH_1} CACHE INTERNAL "") set(CUDNN_VERSION - "${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}.${CUDNN_PATCH_VERSION}" + "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}" CACHE STRING "cuDNN version" diff --git a/cmake/FindFlake8.cmake b/cmake/FindFlake8.cmake index b18225a011..8063571393 100644 --- a/cmake/FindFlake8.cmake +++ b/cmake/FindFlake8.cmake @@ -12,9 +12,11 @@ find_host_program(FLAKE8_EXECUTABLE flake8 PATHS /usr/bin) -if(FLAKE8_EXECUTABLE) - execute_process(COMMAND ${FLAKE8_EXECUTABLE} --version OUTPUT_VARIABLE FLAKE8_VERSION_RAW ERROR_QUIET) - if(FLAKE8_VERSION_RAW MATCHES "^([0-9\\.]+[0-9])") +if(FLAKE8_EXECUTABLE AND NOT DEFINED FLAKE8_VERSION) + execute_process(COMMAND ${FLAKE8_EXECUTABLE} --version RESULT_VARIABLE _result OUTPUT_VARIABLE FLAKE8_VERSION_RAW) + if(NOT _result EQUAL 0) + ocv_clear_vars(FLAKE8_EXECUTABLE FLAKE8_VERSION) + elseif(FLAKE8_VERSION_RAW MATCHES "^([0-9\\.]+[0-9])") set(FLAKE8_VERSION "${CMAKE_MATCH_1}") else() set(FLAKE8_VERSION "unknown") @@ -22,6 +24,9 @@ if(FLAKE8_EXECUTABLE) endif() include(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(Flake8 DEFAULT_MSG FLAKE8_EXECUTABLE) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(Flake8 + REQUIRED_VARS FLAKE8_EXECUTABLE + VERSION_VAR FLAKE8_VERSION +) mark_as_advanced(FLAKE8_EXECUTABLE FLAKE8_VERSION) diff --git a/cmake/FindPylint.cmake b/cmake/FindPylint.cmake index 7e26fe246e..ef4b4394ff 100644 --- a/cmake/FindPylint.cmake +++ b/cmake/FindPylint.cmake @@ -12,9 +12,11 @@ find_host_program(PYLINT_EXECUTABLE pylint PATHS /usr/bin) -if(PYLINT_EXECUTABLE) - execute_process(COMMAND ${PYLINT_EXECUTABLE} --version OUTPUT_VARIABLE PYLINT_VERSION_RAW ERROR_QUIET) - if(PYLINT_VERSION_RAW MATCHES "pylint([^,]*) ([0-9\\.]+[0-9])") +if(PYLINT_EXECUTABLE AND NOT DEFINED PYLINT_VERSION) + execute_process(COMMAND ${PYLINT_EXECUTABLE} --version RESULT_VARIABLE _result OUTPUT_VARIABLE PYLINT_VERSION_RAW) + if(NOT _result EQUAL 0) + ocv_clear_vars(PYLINT_EXECUTABLE PYLINT_VERSION) + elseif(PYLINT_VERSION_RAW MATCHES "pylint([^,\n]*) ([0-9\\.]+[0-9])") set(PYLINT_VERSION "${CMAKE_MATCH_2}") else() set(PYLINT_VERSION "unknown") @@ -22,6 +24,9 @@ if(PYLINT_EXECUTABLE) endif() include(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(Pylint DEFAULT_MSG PYLINT_EXECUTABLE) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(Pylint + REQUIRED_VARS PYLINT_EXECUTABLE + VERSION_VAR PYLINT_VERSION +) mark_as_advanced(PYLINT_EXECUTABLE PYLINT_VERSION) diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 929c5b5e51..40a058d74e 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -8,13 +8,27 @@ function(access_CMAKE_COMPILER_IS_CCACHE) endif() endfunction() variable_watch(CMAKE_COMPILER_IS_CCACHE access_CMAKE_COMPILER_IS_CCACHE) -if(ENABLE_CCACHE AND NOT OPENCV_COMPILER_IS_CCACHE AND NOT CMAKE_GENERATOR MATCHES "Xcode") +if(ENABLE_CCACHE AND NOT OPENCV_COMPILER_IS_CCACHE) # This works fine with Unix Makefiles and Ninja generators find_host_program(CCACHE_PROGRAM ccache) if(CCACHE_PROGRAM) message(STATUS "Looking for ccache - found (${CCACHE_PROGRAM})") get_property(__OLD_RULE_LAUNCH_COMPILE GLOBAL PROPERTY RULE_LAUNCH_COMPILE) - if(__OLD_RULE_LAUNCH_COMPILE) + if(CMAKE_GENERATOR MATCHES "Xcode") + configure_file("${CMAKE_CURRENT_LIST_DIR}/templates/xcode-launch-c.in" "${CMAKE_BINARY_DIR}/xcode-launch-c") + configure_file("${CMAKE_CURRENT_LIST_DIR}/templates/xcode-launch-cxx.in" "${CMAKE_BINARY_DIR}/xcode-launch-cxx") + execute_process(COMMAND chmod a+rx + "${CMAKE_BINARY_DIR}/xcode-launch-c" + "${CMAKE_BINARY_DIR}/xcode-launch-cxx" + ) + # Xcode project attributes + set(CMAKE_XCODE_ATTRIBUTE_CC "${CMAKE_BINARY_DIR}/xcode-launch-c") + set(CMAKE_XCODE_ATTRIBUTE_CXX "${CMAKE_BINARY_DIR}/xcode-launch-cxx") + set(CMAKE_XCODE_ATTRIBUTE_LD "${CMAKE_BINARY_DIR}/xcode-launch-c") + set(CMAKE_XCODE_ATTRIBUTE_LDPLUSPLUS "${CMAKE_BINARY_DIR}/xcode-launch-cxx") + set(OPENCV_COMPILER_IS_CCACHE 1) + message(STATUS "ccache: enable support through Xcode project properties") + elseif(__OLD_RULE_LAUNCH_COMPILE) message(STATUS "Can't replace CMake compiler launcher") else() set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") @@ -122,7 +136,6 @@ if(CV_GCC OR CV_CLANG) endif() add_extra_compiler_option(-Wsign-promo) add_extra_compiler_option(-Wuninitialized) - add_extra_compiler_option(-Winit-self) if(CV_GCC AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.0) AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0)) add_extra_compiler_option(-Wno-psabi) endif() @@ -291,7 +304,9 @@ if(MSVC) endif() endif() -include(cmake/OpenCVCompilerOptimizations.cmake) +if(PROJECT_NAME STREQUAL "OpenCV") + include("${OpenCV_SOURCE_DIR}/cmake/OpenCVCompilerOptimizations.cmake") +endif() if(COMMAND ocv_compiler_optimization_options) ocv_compiler_optimization_options() endif() @@ -398,11 +413,11 @@ if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/ endif() if(ENABLE_BUILD_HARDENING) - include(${CMAKE_CURRENT_LIST_DIR}/OpenCVCompilerDefenses.cmake) + include("${CMAKE_CURRENT_LIST_DIR}/OpenCVCompilerDefenses.cmake") endif() if(MSVC) - include(cmake/OpenCVCRTLinkage.cmake) + include("${CMAKE_CURRENT_LIST_DIR}/OpenCVCRTLinkage.cmake") add_definitions(-D_VARIADIC_MAX=10) endif() diff --git a/cmake/OpenCVDetectInferenceEngine.cmake b/cmake/OpenCVDetectInferenceEngine.cmake index c838a40409..216c02c3cc 100644 --- a/cmake/OpenCVDetectInferenceEngine.cmake +++ b/cmake/OpenCVDetectInferenceEngine.cmake @@ -62,7 +62,13 @@ function(add_custom_ie_build _inc _lib _lib_rel _lib_dbg _msg) if(find_prefix STREQUAL "_empty_") # foreach doesn't iterate over empty elements set(find_prefix "") endif() - foreach(find_suffix ${CMAKE_FIND_LIBRARY_SUFFIXES}) + if(NOT DEFINED INFERENCE_ENGINE_FIND_LIBRARY_SUFFIXES) # allow custom override + set(INFERENCE_ENGINE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) + if(APPLE) + ocv_list_filterout(INFERENCE_ENGINE_FIND_LIBRARY_SUFFIXES "^.so$") # skip plugins (can't be linked) + endif() + endif() + foreach(find_suffix ${INFERENCE_ENGINE_FIND_LIBRARY_SUFFIXES}) ocv_ie_find_extra_libraries("${find_prefix}" "${find_suffix}") endforeach() if(NOT CMAKE_FIND_LIBRARY_SUFFIXES) @@ -129,9 +135,9 @@ endif() if(INF_ENGINE_TARGET) if(NOT INF_ENGINE_RELEASE) - message(WARNING "InferenceEngine version has not been set, 2021.1 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") + message(WARNING "InferenceEngine version has not been set, 2021.3 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") endif() - set(INF_ENGINE_RELEASE "2021010000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") + set(INF_ENGINE_RELEASE "2021030000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") set_target_properties(${INF_ENGINE_TARGET} PROPERTIES INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" ) diff --git a/cmake/OpenCVDetectTBB.cmake b/cmake/OpenCVDetectTBB.cmake index 38137f44f0..fc564e981a 100644 --- a/cmake/OpenCVDetectTBB.cmake +++ b/cmake/OpenCVDetectTBB.cmake @@ -1,4 +1,4 @@ -# Search TBB library (4.1 - 4.4, 2017) +# Search TBB library: 4.1 - 4.4, 2017-2020, 2021+ (oneTBB) # # Own TBB (3rdparty/tbb): # - set cmake option BUILD_TBB to ON @@ -19,7 +19,7 @@ # - "tbb" target exists and added to OPENCV_LINKER_LIBS function(ocv_tbb_cmake_guess _found) - find_package(TBB QUIET COMPONENTS tbb PATHS "$ENV{TBBROOT}/cmake") + find_package(TBB QUIET COMPONENTS tbb PATHS "$ENV{TBBROOT}/cmake" "$ENV{TBBROOT}/lib/cmake/tbb") if(TBB_FOUND) if(NOT TARGET TBB::tbb) message(WARNING "No TBB::tbb target found!") @@ -28,11 +28,11 @@ function(ocv_tbb_cmake_guess _found) get_target_property(_lib TBB::tbb IMPORTED_LOCATION_RELEASE) message(STATUS "Found TBB (cmake): ${_lib}") get_target_property(_inc TBB::tbb INTERFACE_INCLUDE_DIRECTORIES) - ocv_tbb_read_version("${_inc}") add_library(tbb INTERFACE IMPORTED) set_target_properties(tbb PROPERTIES INTERFACE_LINK_LIBRARIES TBB::tbb ) + ocv_tbb_read_version("${_inc}" tbb) set(${_found} TRUE PARENT_SCOPE) endif() endfunction() @@ -66,7 +66,6 @@ function(ocv_tbb_env_guess _found) find_library(TBB_ENV_LIB_DEBUG NAMES "tbb_debug") if (TBB_ENV_INCLUDE AND (TBB_ENV_LIB OR TBB_ENV_LIB_DEBUG)) ocv_tbb_env_verify() - ocv_tbb_read_version("${TBB_ENV_INCLUDE}") add_library(tbb UNKNOWN IMPORTED) set_target_properties(tbb PROPERTIES IMPORTED_LOCATION "${TBB_ENV_LIB}" @@ -82,12 +81,23 @@ function(ocv_tbb_env_guess _found) get_filename_component(_dir "${TBB_ENV_LIB}" DIRECTORY) set_target_properties(tbb PROPERTIES INTERFACE_LINK_LIBRARIES "-L${_dir}") endif() + ocv_tbb_read_version("${TBB_ENV_INCLUDE}" tbb) + if(NOT (TBB_INTERFACE_VERSION LESS 12000)) # >= 12000, oneTBB 2021+ + # avoid "defaultlib" requirement of tbb12.lib (we are using absolute path to 'tbb.lib' only) + # https://github.com/oneapi-src/oneTBB/blame/2dba2072869a189b9fdab3ffa431d3ea49059a19/include/oneapi/tbb/detail/_config.h#L334 + if(NOT (CMAKE_VERSION VERSION_LESS "3.16.0")) # https://gitlab.kitware.com/cmake/cmake/-/issues/19434 + target_compile_definitions(tbb INTERFACE "__TBB_NO_IMPLICIT_LINKAGE=1") + else() + set_target_properties(tbb PROPERTIES INTERFACE_COMPILE_DEFINITIONS "__TBB_NO_IMPLICIT_LINKAGE=1") + endif() + endif() message(STATUS "Found TBB (env): ${TBB_ENV_LIB}") set(${_found} TRUE PARENT_SCOPE) endif() endfunction() -function(ocv_tbb_read_version _path) +function(ocv_tbb_read_version _path _tgt) + find_file(TBB_VER_FILE oneapi/tbb/version.h "${_path}" NO_DEFAULT_PATH CMAKE_FIND_ROOT_PATH_BOTH) find_file(TBB_VER_FILE tbb/tbb_stddef.h "${_path}" NO_DEFAULT_PATH CMAKE_FIND_ROOT_PATH_BOTH) ocv_parse_header("${TBB_VER_FILE}" TBB_VERSION_LINES TBB_VERSION_MAJOR TBB_VERSION_MINOR TBB_INTERFACE_VERSION CACHE) endfunction() diff --git a/cmake/OpenCVFindIPP.cmake b/cmake/OpenCVFindIPP.cmake index 9bc215f415..6bcd81d8b4 100644 --- a/cmake/OpenCVFindIPP.cmake +++ b/cmake/OpenCVFindIPP.cmake @@ -143,10 +143,25 @@ macro(ipp_detect_version) list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) else () add_library(ipp${name} STATIC IMPORTED) + set(_filename "${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX}") set_target_properties(ipp${name} PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "" - IMPORTED_LOCATION ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX} + IMPORTED_LOCATION ${IPP_LIBRARY_DIR}/${_filename} ) + if("${name}" STREQUAL "core") # https://github.com/opencv/opencv/pull/19681 + if(OPENCV_FORCE_IPP_EXCLUDE_LIBS OR OPENCV_FORCE_IPP_EXCLUDE_LIBS_CORE + OR (UNIX AND NOT ANDROID AND NOT APPLE + AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang|Intel" + ) + AND NOT OPENCV_SKIP_IPP_EXCLUDE_LIBS_CORE + ) + if(CMAKE_VERSION VERSION_LESS "3.13.0") + set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--exclude-libs,${_filename} ${CMAKE_SHARED_LINKER_FLAGS}") + else() + target_link_options(ipp${name} INTERFACE "LINKER:--exclude-libs,${_filename}") + endif() + endif() + endif() list(APPEND IPP_LIBRARIES ipp${name}) if (NOT BUILD_SHARED_LIBS AND (HAVE_IPP_ICV OR ";${OPENCV_INSTALL_EXTERNAL_DEPENDENCIES};" MATCHES ";ipp;")) # CMake doesn't support "install(TARGETS ${IPP_PREFIX}${name} " command with imported targets diff --git a/cmake/OpenCVFindLAPACK.cmake b/cmake/OpenCVFindLAPACK.cmake index 3fa23ef83f..4ff2dee4d8 100644 --- a/cmake/OpenCVFindLAPACK.cmake +++ b/cmake/OpenCVFindLAPACK.cmake @@ -51,17 +51,53 @@ macro(ocv_lapack_make_hdr _cblas_hdr _lapacke_hdr) endmacro() macro(ocv_lapack_run_check) + if(CMAKE_GENERATOR MATCHES "Visual Studio" # MSBuild + AND LAPACK_IMPL STREQUAL "MKL" + AND ";${LAPACK_LIBRARIES};" MATCHES ";tbb;" AND TARGET tbb + AND DEFINED TBB_INTERFACE_VERSION AND NOT (TBB_INTERFACE_VERSION LESS 12000) # oneTBB/oneAPI workaround + ) + # workaround DEFAULTLIB:tbb12.lib issue + get_target_property(_tbb_lib tbb IMPORTED_LOCATION) + if(NOT _tbb_lib) + get_target_property(_tbb_lib tbb IMPORTED_LOCATION_RELEASE) + endif() + if(_tbb_lib AND NOT OPENCV_SKIP_WORKAROUND_MKL_LINK_DIRECTORIES_TBB) + # MSBuild drops content of 'LIB' environment variable, + # so pass TBB library directory through `link_directories()` + get_filename_component(_tbb_lib_dir "${_tbb_lib}" DIRECTORY) + message(STATUS "MKL: adding '${_tbb_lib_dir}' to link directories (workaround DEFAULTLIB issue)") + link_directories("${_tbb_lib_dir}") + elseif(NOT OPENCV_SKIP_WORKAROUND_MKL_DEFAULTLIB) + # We may have tbb.lib for 'tbb' target, but not 'tbb12.lib' + ocv_update(OPENCV_MKL_IGNORE_DEFAULTLIB_TBB "tbb12.lib") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /NODEFAULTLIB:${OPENCV_MKL_IGNORE_DEFAULTLIB_TBB}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:${OPENCV_MKL_IGNORE_DEFAULTLIB_TBB}") + endif() + endif() + + # TODO add cache for try_compile() inputs/results + + get_property(__link_directories DIRECTORY PROPERTY LINK_DIRECTORIES) + if(LAPACK_LINK_LIBRARIES) + list(APPEND __link_directories ${LAPACK_LINK_LIBRARIES}) + endif() + try_compile(__VALID_LAPACK - "${OpenCV_BINARY_DIR}" - "${OpenCV_SOURCE_DIR}/cmake/checks/lapack_check.cpp" - CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${LAPACK_INCLUDE_DIR}\;${CMAKE_BINARY_DIR}" - "-DLINK_DIRECTORIES:STRING=${LAPACK_LINK_LIBRARIES}" - "-DLINK_LIBRARIES:STRING=${LAPACK_LIBRARIES}" - OUTPUT_VARIABLE TRY_OUT + "${OpenCV_BINARY_DIR}" + "${OpenCV_SOURCE_DIR}/cmake/checks/lapack_check.cpp" + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${LAPACK_INCLUDE_DIR}\;${CMAKE_BINARY_DIR}" + "-DLINK_DIRECTORIES:STRING=${__link_directories}" + LINK_LIBRARIES ${LAPACK_LIBRARIES} + OUTPUT_VARIABLE TRY_OUT ) if(NOT __VALID_LAPACK) - #message(FATAL_ERROR "LAPACK: check build log:\n${TRY_OUT}") - message(STATUS "${LAPACK_IMPL}: Can't build LAPACK check code. This LAPACK version is not supported.") + file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log + "\nLAPACK(${LAPACK_IMPL}) check FAILED:\n" + " LAPACK_INCLUDE_DIR: '${LAPACK_INCLUDE_DIR}'\n" + " LAPACK_LIBRARIES: '${LAPACK_LIBRARIES}'\n" + " LAPACK_LINK_LIBRARIES: '${__link_directories}'\n" + " Output:\n${TRY_OUT}\n\n") + message(STATUS "LAPACK(${LAPACK_IMPL}): Can't build LAPACK check code. This LAPACK version is not supported.") unset(LAPACK_LIBRARIES) else() message(STATUS "${LAPACK_IMPL}: Support is enabled.") diff --git a/cmake/OpenCVFindLibsGrfmt.cmake b/cmake/OpenCVFindLibsGrfmt.cmake index 28aa47ba9c..23a6ca6959 100644 --- a/cmake/OpenCVFindLibsGrfmt.cmake +++ b/cmake/OpenCVFindLibsGrfmt.cmake @@ -6,9 +6,18 @@ if(BUILD_ZLIB) ocv_clear_vars(ZLIB_FOUND) else() + ocv_clear_internal_cache_vars(ZLIB_LIBRARY ZLIB_INCLUDE_DIR) + if(ANDROID) + set(_zlib_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) + set(CMAKE_FIND_LIBRARY_SUFFIXES .so) + endif() find_package(ZLIB "${MIN_VER_ZLIB}") + if(ANDROID) + set(CMAKE_FIND_LIBRARY_SUFFIXES ${_zlib_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) + unset(_zlib_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES) + endif() if(ZLIB_FOUND AND ANDROID) - if(ZLIB_LIBRARIES MATCHES "/usr/(lib|lib32|lib64)/libz.so$") + if(ZLIB_LIBRARIES MATCHES "/usr/lib.*/libz.so$") set(ZLIB_LIBRARIES z) endif() endif() @@ -31,11 +40,12 @@ if(WITH_JPEG) if(BUILD_JPEG) ocv_clear_vars(JPEG_FOUND) else() + ocv_clear_internal_cache_vars(JPEG_LIBRARY JPEG_INCLUDE_DIR) include(FindJPEG) endif() if(NOT JPEG_FOUND) - ocv_clear_vars(JPEG_LIBRARY JPEG_LIBRARIES JPEG_INCLUDE_DIR) + ocv_clear_vars(JPEG_LIBRARY JPEG_INCLUDE_DIR) if(NOT BUILD_JPEG_TURBO_DISABLE) set(JPEG_LIBRARY libjpeg-turbo CACHE INTERNAL "") @@ -76,6 +86,7 @@ if(WITH_TIFF) if(BUILD_TIFF) ocv_clear_vars(TIFF_FOUND) else() + ocv_clear_internal_cache_vars(TIFF_LIBRARY TIFF_INCLUDE_DIR) include(FindTIFF) if(TIFF_FOUND) ocv_parse_header("${TIFF_INCLUDE_DIR}/tiff.h" TIFF_VERSION_LINES TIFF_VERSION_CLASSIC TIFF_VERSION_BIG TIFF_VERSION TIFF_BIGTIFF_VERSION) @@ -119,6 +130,7 @@ if(WITH_WEBP) if(BUILD_WEBP) ocv_clear_vars(WEBP_FOUND WEBP_LIBRARY WEBP_LIBRARIES WEBP_INCLUDE_DIR) else() + ocv_clear_internal_cache_vars(WEBP_LIBRARY WEBP_INCLUDE_DIR) include(cmake/OpenCVFindWebP.cmake) if(WEBP_FOUND) set(HAVE_WEBP 1) @@ -212,6 +224,7 @@ if(WITH_PNG) if(BUILD_PNG) ocv_clear_vars(PNG_FOUND) else() + ocv_clear_internal_cache_vars(PNG_LIBRARY PNG_INCLUDE_DIR) include(FindPNG) if(PNG_FOUND) include(CheckIncludeFile) @@ -243,6 +256,7 @@ endif() if(WITH_OPENEXR) ocv_clear_vars(HAVE_OPENEXR) if(NOT BUILD_OPENEXR) + ocv_clear_internal_cache_vars(OPENEXR_INCLUDE_PATHS OPENEXR_LIBRARIES OPENEXR_ILMIMF_LIBRARY OPENEXR_VERSION) include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindOpenEXR.cmake") endif() diff --git a/cmake/OpenCVFindLibsPerf.cmake b/cmake/OpenCVFindLibsPerf.cmake index 3753084d28..a191afde58 100644 --- a/cmake/OpenCVFindLibsPerf.cmake +++ b/cmake/OpenCVFindLibsPerf.cmake @@ -29,7 +29,7 @@ if(WITH_IPP) if(OPENCV_FORCE_IPP_EXCLUDE_LIBS OR (HAVE_IPP_ICV AND UNIX AND NOT ANDROID AND NOT APPLE - AND (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang|Intel" ) AND NOT OPENCV_SKIP_IPP_EXCLUDE_LIBS ) diff --git a/cmake/OpenCVFindMKL.cmake b/cmake/OpenCVFindMKL.cmake index 141481ed42..00fd637ca1 100644 --- a/cmake/OpenCVFindMKL.cmake +++ b/cmake/OpenCVFindMKL.cmake @@ -3,7 +3,14 @@ # installation/package # # Parameters: -# MKL_WITH_TBB +# MKL_ROOT_DIR / ENV{MKLROOT} +# MKL_INCLUDE_DIR +# MKL_LIBRARIES +# MKL_USE_SINGLE_DYNAMIC_LIBRARY - use single dynamic library mkl_rt.lib / libmkl_rt.so +# MKL_WITH_TBB / MKL_WITH_OPENMP +# +# Extra: +# MKL_LIB_FIND_PATHS # # On return this will define: # @@ -13,12 +20,6 @@ # MKL_LIBRARIES - MKL libraries that are used by OpenCV # -macro (mkl_find_lib VAR NAME DIRS) - find_path(${VAR} ${NAME} ${DIRS} NO_DEFAULT_PATH) - set(${VAR} ${${VAR}}/${NAME}) - unset(${VAR} CACHE) -endmacro() - macro(mkl_fail) set(HAVE_MKL OFF) set(MKL_ROOT_DIR "${MKL_ROOT_DIR}" CACHE PATH "Path to MKL directory") @@ -39,43 +40,50 @@ macro(get_mkl_version VERSION_FILE) set(MKL_VERSION_STR "${MKL_VERSION_MAJOR}.${MKL_VERSION_MINOR}.${MKL_VERSION_UPDATE}" CACHE STRING "MKL version" FORCE) endmacro() +OCV_OPTION(MKL_USE_SINGLE_DYNAMIC_LIBRARY "Use MKL Single Dynamic Library thorugh mkl_rt.lib / libmkl_rt.so" OFF) +OCV_OPTION(MKL_WITH_TBB "Use MKL with TBB multithreading" OFF)#ON IF WITH_TBB) +OCV_OPTION(MKL_WITH_OPENMP "Use MKL with OpenMP multithreading" OFF)#ON IF WITH_OPENMP) -if(NOT DEFINED MKL_USE_MULTITHREAD) - OCV_OPTION(MKL_WITH_TBB "Use MKL with TBB multithreading" OFF)#ON IF WITH_TBB) - OCV_OPTION(MKL_WITH_OPENMP "Use MKL with OpenMP multithreading" OFF)#ON IF WITH_OPENMP) +if(NOT MKL_ROOT_DIR AND DEFINED MKL_INCLUDE_DIR AND EXISTS "${MKL_INCLUDE_DIR}/mkl.h") + file(TO_CMAKE_PATH "${MKL_INCLUDE_DIR}" MKL_INCLUDE_DIR) + get_filename_component(MKL_ROOT_DIR "${MKL_INCLUDE_DIR}/.." ABSOLUTE) +endif() +if(NOT MKL_ROOT_DIR) + file(TO_CMAKE_PATH "${MKL_ROOT_DIR}" mkl_root_paths) + if(DEFINED ENV{MKLROOT}) + file(TO_CMAKE_PATH "$ENV{MKLROOT}" path) + list(APPEND mkl_root_paths "${path}") + endif() + + if(WITH_MKL AND NOT mkl_root_paths) + if(WIN32) + set(ProgramFilesx86 "ProgramFiles(x86)") + file(TO_CMAKE_PATH "$ENV{${ProgramFilesx86}}" path) + list(APPEND mkl_root_paths ${path}/IntelSWTools/compilers_and_libraries/windows/mkl) + endif() + if(UNIX) + list(APPEND mkl_root_paths "/opt/intel/mkl") + endif() + endif() + + find_path(MKL_ROOT_DIR include/mkl.h PATHS ${mkl_root_paths}) endif() -#check current MKL_ROOT_DIR if(NOT MKL_ROOT_DIR OR NOT EXISTS "${MKL_ROOT_DIR}/include/mkl.h") - set(mkl_root_paths "${MKL_ROOT_DIR}") - if(DEFINED ENV{MKLROOT}) - list(APPEND mkl_root_paths "$ENV{MKLROOT}") - endif() - - if(WITH_MKL AND NOT mkl_root_paths) - if(WIN32) - set(ProgramFilesx86 "ProgramFiles(x86)") - list(APPEND mkl_root_paths $ENV{${ProgramFilesx86}}/IntelSWTools/compilers_and_libraries/windows/mkl) - endif() - if(UNIX) - list(APPEND mkl_root_paths "/opt/intel/mkl") - endif() - endif() - - find_path(MKL_ROOT_DIR include/mkl.h PATHS ${mkl_root_paths}) + mkl_fail() endif() -set(MKL_INCLUDE_DIRS "${MKL_ROOT_DIR}/include" CACHE PATH "Path to MKL include directory") +set(MKL_INCLUDE_DIR "${MKL_ROOT_DIR}/include" CACHE PATH "Path to MKL include directory") if(NOT MKL_ROOT_DIR OR NOT EXISTS "${MKL_ROOT_DIR}" - OR NOT EXISTS "${MKL_INCLUDE_DIRS}" - OR NOT EXISTS "${MKL_INCLUDE_DIRS}/mkl_version.h" + OR NOT EXISTS "${MKL_INCLUDE_DIR}" + OR NOT EXISTS "${MKL_INCLUDE_DIR}/mkl_version.h" ) - mkl_fail() + mkl_fail() endif() -get_mkl_version(${MKL_INCLUDE_DIRS}/mkl_version.h) +get_mkl_version(${MKL_INCLUDE_DIR}/mkl_version.h) #determine arch if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8) @@ -95,52 +103,73 @@ else() set(MKL_ARCH_SUFFIX "c") endif() -if(MKL_VERSION_STR VERSION_GREATER "11.3.0" OR MKL_VERSION_STR VERSION_EQUAL "11.3.0") - set(mkl_lib_find_paths - ${MKL_ROOT_DIR}/lib) - foreach(MKL_ARCH ${MKL_ARCH_LIST}) - list(APPEND mkl_lib_find_paths - ${MKL_ROOT_DIR}/lib/${MKL_ARCH} - ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH} - ${MKL_ROOT_DIR}/${MKL_ARCH}) - endforeach() +set(mkl_lib_find_paths ${MKL_LIB_FIND_PATHS} ${MKL_ROOT_DIR}/lib) +foreach(MKL_ARCH ${MKL_ARCH_LIST}) + list(APPEND mkl_lib_find_paths + ${MKL_ROOT_DIR}/lib/${MKL_ARCH} + ${MKL_ROOT_DIR}/${MKL_ARCH} + ) +endforeach() - set(mkl_lib_list "mkl_intel_${MKL_ARCH_SUFFIX}") +if(DEFINED OPENCV_MKL_LIBRARIES) + # custom list, user specified + set(mkl_lib_list ${OPENCV_MKL_LIBRARIES}) - if(MKL_WITH_TBB) - list(APPEND mkl_lib_list mkl_tbb_thread tbb) - elseif(MKL_WITH_OPENMP) - if(MSVC) - list(APPEND mkl_lib_list mkl_intel_thread libiomp5md) - else() - list(APPEND mkl_lib_list mkl_gnu_thread) - endif() +elseif(MKL_USE_SINGLE_DYNAMIC_LIBRARY AND NOT (MKL_VERSION_STR VERSION_LESS "10.3.0")) + + # https://software.intel.com/content/www/us/en/develop/articles/a-new-linking-model-single-dynamic-library-mkl_rt-since-intel-mkl-103.html + set(mkl_lib_list "mkl_rt") + +elseif(NOT (MKL_VERSION_STR VERSION_LESS "11.3.0")) + + set(mkl_lib_list "mkl_intel_${MKL_ARCH_SUFFIX}") + + if(MKL_WITH_TBB) + list(APPEND mkl_lib_list mkl_tbb_thread) + elseif(MKL_WITH_OPENMP) + if(MSVC) + list(APPEND mkl_lib_list mkl_intel_thread libiomp5md) else() - list(APPEND mkl_lib_list mkl_sequential) + list(APPEND mkl_lib_list mkl_gnu_thread) endif() + else() + list(APPEND mkl_lib_list mkl_sequential) + endif() - list(APPEND mkl_lib_list mkl_core) + list(APPEND mkl_lib_list mkl_core) else() - message(STATUS "MKL version ${MKL_VERSION_STR} is not supported") - mkl_fail() + message(STATUS "MKL version ${MKL_VERSION_STR} is not supported") + mkl_fail() endif() -set(MKL_LIBRARIES "") -foreach(lib ${mkl_lib_list}) - find_library(${lib} NAMES ${lib} ${lib}_dll HINTS ${mkl_lib_find_paths}) - mark_as_advanced(${lib}) - if(NOT ${lib}) - mkl_fail() +if(NOT MKL_LIBRARIES) + set(MKL_LIBRARIES "") + foreach(lib ${mkl_lib_list}) + set(lib_var_name MKL_LIBRARY_${lib}) + find_library(${lib_var_name} NAMES ${lib} ${lib}_dll HINTS ${mkl_lib_find_paths}) + mark_as_advanced(${lib_var_name}) + if(NOT ${lib_var_name}) + mkl_fail() endif() - list(APPEND MKL_LIBRARIES ${${lib}}) -endforeach() + list(APPEND MKL_LIBRARIES ${${lib_var_name}}) + endforeach() + list(APPEND MKL_LIBRARIES ${OPENCV_EXTRA_MKL_LIBRARIES}) +endif() + +if(MKL_WITH_TBB) + if(BUILD_TBB) + message(STATUS "MKL: reusing builtin TBB binaries is not supported. Consider disabling MKL_WITH_TBB flag to prevent build/runtime errors") + else() + list(APPEND MKL_LIBRARIES tbb) # tbb target is expected + endif() +endif() message(STATUS "Found MKL ${MKL_VERSION_STR} at: ${MKL_ROOT_DIR}") set(HAVE_MKL ON) set(MKL_ROOT_DIR "${MKL_ROOT_DIR}" CACHE PATH "Path to MKL directory") -set(MKL_INCLUDE_DIRS "${MKL_INCLUDE_DIRS}" CACHE PATH "Path to MKL include directory") -set(MKL_LIBRARIES "${MKL_LIBRARIES}" CACHE STRING "MKL libraries") -if(UNIX AND NOT MKL_LIBRARIES_DONT_HACK) +set(MKL_INCLUDE_DIRS "${MKL_INCLUDE_DIR}") +set(MKL_LIBRARIES "${MKL_LIBRARIES}") +if(UNIX AND NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY AND NOT MKL_LIBRARIES_DONT_HACK) #it's ugly but helps to avoid cyclic lib problem set(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_LIBRARIES} ${MKL_LIBRARIES} "-lpthread" "-lm" "-ldl") endif() diff --git a/cmake/OpenCVFindVA.cmake b/cmake/OpenCVFindVA.cmake index 9d0ceec2c5..08d034f690 100644 --- a/cmake/OpenCVFindVA.cmake +++ b/cmake/OpenCVFindVA.cmake @@ -2,21 +2,20 @@ # HAVE_VA - libva is available # HAVE_VA_INTEL - OpenCL/libva Intel interoperability extension is available -if(UNIX AND NOT ANDROID) - find_path( +find_path( VA_INCLUDE_DIR NAMES va/va.h - PATHS "/usr/include" + PATHS ${VA_ROOT_DIR} PATH_SUFFIXES include - DOC "Path to libva headers") -endif() + DOC "Path to libva headers" +) if(VA_INCLUDE_DIR) set(HAVE_VA TRUE) - if(NOT DEFINED VA_LIBRARIES) + if(NOT DEFINED VA_LIBRARIES AND NOT OPENCV_LIBVA_LINK) set(VA_LIBRARIES "va" "va-drm") endif() else() set(HAVE_VA FALSE) - message(WARNING "libva installation is not found.") + message(STATUS "libva: missing va.h header (VA_INCLUDE_DIR)") endif() diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index bd14aa2378..224953a1f3 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -98,15 +98,6 @@ macro(ocv_add_dependencies full_modname) endforeach() unset(__depsvar) - # hack for python - set(__python_idx) - list(FIND OPENCV_MODULE_${full_modname}_WRAPPERS "python" __python_idx) - if (NOT __python_idx EQUAL -1) - list(REMOVE_ITEM OPENCV_MODULE_${full_modname}_WRAPPERS "python") - list(APPEND OPENCV_MODULE_${full_modname}_WRAPPERS "python_bindings_generator" "python2" "python3") - endif() - unset(__python_idx) - ocv_list_unique(OPENCV_MODULE_${full_modname}_REQ_DEPS) ocv_list_unique(OPENCV_MODULE_${full_modname}_OPT_DEPS) ocv_list_unique(OPENCV_MODULE_${full_modname}_PRIVATE_REQ_DEPS) @@ -210,11 +201,6 @@ macro(ocv_add_module _name) set(OPENCV_MODULES_DISABLED_USER ${OPENCV_MODULES_DISABLED_USER} "${the_module}" CACHE INTERNAL "List of OpenCV modules explicitly disabled by user") endif() - # add reverse wrapper dependencies - foreach (wrapper ${OPENCV_MODULE_${the_module}_WRAPPERS}) - ocv_add_dependencies(opencv_${wrapper} OPTIONAL ${the_module}) - endforeach() - # stop processing of current file ocv_cmake_hook(POST_ADD_MODULE) ocv_cmake_hook(POST_ADD_MODULE_${the_module}) @@ -501,6 +487,21 @@ function(__ocv_resolve_dependencies) endforeach() endif() + # add reverse wrapper dependencies (BINDINDS) + foreach(the_module ${OPENCV_MODULES_BUILD}) + foreach (wrapper ${OPENCV_MODULE_${the_module}_WRAPPERS}) + if(wrapper STREQUAL "python") # hack for python (BINDINDS) + ocv_add_dependencies(opencv_python2 OPTIONAL ${the_module}) + ocv_add_dependencies(opencv_python3 OPTIONAL ${the_module}) + else() + ocv_add_dependencies(opencv_${wrapper} OPTIONAL ${the_module}) + endif() + if(DEFINED OPENCV_MODULE_opencv_${wrapper}_bindings_generator_CLASS) + ocv_add_dependencies(opencv_${wrapper}_bindings_generator OPTIONAL ${the_module}) + endif() + endforeach() + endforeach() + # disable MODULES with unresolved dependencies set(has_changes ON) while(has_changes) @@ -878,7 +879,9 @@ endmacro() macro(_ocv_create_module) ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module}) - set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}") + set(__module_headers ${OPENCV_MODULE_${the_module}_HEADERS}) + list(SORT __module_headers) # fix headers order, useful for bindings + set(OPENCV_MODULE_${the_module}_HEADERS ${__module_headers} CACHE INTERNAL "List of header files for ${the_module}") set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}") # The condition we ought to be testing here is whether ocv_add_precompiled_headers will diff --git a/cmake/OpenCVPluginStandalone.cmake b/cmake/OpenCVPluginStandalone.cmake new file mode 100644 index 0000000000..15b7a8085e --- /dev/null +++ b/cmake/OpenCVPluginStandalone.cmake @@ -0,0 +1,131 @@ +# Standalone OpenCV plugins build scripts +# +# Useful OpenCV common build variables: +# - CMAKE_BUILD_TYPE=Release/Debug +# - BUILD_WITH_DEBUG_INFO=ON +# - ENABLE_BUILD_HARDENING=ON +# +# Plugin configuration variables: +# - OPENCV_PLUGIN_DEPS - set of extra dependencies (modules), used for include dirs, target_link_libraries +# - OPENCV_PLUGIN_SUFFIX +# - OPENCV_PLUGIN_NAME +# - OPENCV_PLUGIN_OUTPUT_NAME_FULL (overrides both OPENCV_PLUGIN_NAME / OPENCV_PLUGIN_SUFFIX) +# +#============================================= + +if(NOT OpenCV_SOURCE_DIR) + message(FATAL_ERROR "OpenCV_SOURCE_DIR must be set to build the plugin!") +endif() + +if(NOT DEFINED CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release") +endif() +message(STATUS "CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}") + +set(BUILD_SHARED_LIBS ON CACHE BOOL "") +if(NOT BUILD_SHARED_LIBS) + message(FATAL_ERROR "Static plugin build does not make sense") +endif() + +# re-use OpenCV build scripts +include("${OpenCV_SOURCE_DIR}/cmake/OpenCVUtils.cmake") +include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectCXXCompiler.cmake") +include("${OpenCV_SOURCE_DIR}/cmake/OpenCVCompilerOptions.cmake") + +function(ocv_create_plugin module default_name dependency_target dependency_target_desc) + + set(OPENCV_PLUGIN_NAME ${default_name} CACHE STRING "") + set(OPENCV_PLUGIN_DESTINATION "" CACHE PATH "") + project(${OPENCV_PLUGIN_NAME} LANGUAGES CXX) + + if(NOT TARGET ${dependency_target}) + message(FATAL_ERROR "${dependency_target_desc} was not found! (missing target ${dependency_target})") + endif() + + set(modules_ROOT "${OpenCV_SOURCE_DIR}/modules") + set(module_ROOT "${modules_ROOT}/${module}") + + foreach(src ${ARGN}) + list(APPEND sources "${module_ROOT}/${src}") + endforeach() + + add_library(${OPENCV_PLUGIN_NAME} MODULE + "${sources}" + ${OPENCV_PLUGIN_EXTRA_SRC_FILES} + ) + + if(OPENCV_PLUGIN_DEPS) + foreach(d ${OPENCV_PLUGIN_DEPS}) + list(APPEND OPENCV_PLUGIN_EXTRA_INCLUDES "${modules_ROOT}/${d}/include") + endforeach() + endif() + + target_include_directories(${OPENCV_PLUGIN_NAME} PRIVATE + "${CMAKE_CURRENT_BINARY_DIR}" + "${module_ROOT}/src" + "${module_ROOT}/include" + ${OPENCV_PLUGIN_EXTRA_INCLUDES} + ) + target_compile_definitions(${OPENCV_PLUGIN_NAME} PRIVATE "BUILD_PLUGIN=1") + + target_link_libraries(${OPENCV_PLUGIN_NAME} PRIVATE ${dependency_target}) + set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES + CXX_STANDARD 11 + CXX_VISIBILITY_PRESET hidden + ) + + if(DEFINED OPENCV_PLUGIN_MODULE_PREFIX) + set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES PREFIX "${OPENCV_PLUGIN_MODULE_PREFIX}") + endif() + + if(APPLE) + set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") + elseif(WIN32) + # Hack for Windows only, Linux/MacOS uses global symbol table (without exact .so binding) + find_package(OpenCV REQUIRED ${module} ${OPENCV_PLUGIN_DEPS}) + target_link_libraries(${OPENCV_PLUGIN_NAME} PRIVATE ${OpenCV_LIBRARIES}) + endif() + + if(NOT OpenCV_FOUND) # build against sources (Linux) + file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/opencv2/opencv_modules.hpp" "#pragma once") + endif() + + if(WIN32) + ocv_update(OPENCV_DEBUG_POSTFIX d) + endif() + set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}") + + if(DEFINED OPENCV_PLUGIN_SUFFIX) + # custom value + else() + if(WIN32) + ocv_update(OPENCV_PLUGIN_VERSION "${OpenCV_VERSION_MAJOR}${OpenCV_VERSION_MINOR}${OpenCV_VERSION_PATCH}") + if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8) + ocv_update(OPENCV_PLUGIN_ARCH "_64") + else() + ocv_update(OPENCV_PLUGIN_ARCH "") + endif() + else() + # empty + endif() + ocv_update(OPENCV_PLUGIN_SUFFIX "${OPENCV_PLUGIN_VERSION}${OPENCV_PLUGIN_ARCH}") + endif() + + if(OPENCV_PLUGIN_DESTINATION) + set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${OPENCV_PLUGIN_DESTINATION}") + message(STATUS "Output destination: ${OPENCV_PLUGIN_DESTINATION}") + endif() + + if(OPENCV_PLUGIN_OUTPUT_NAME_FULL) + set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES OUTPUT_NAME "${OPENCV_PLUGIN_OUTPUT_NAME_FULL}") + elseif(OPENCV_PLUGIN_OUTPUT_NAME) + set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES OUTPUT_NAME "${OPENCV_PLUGIN_OUTPUT_NAME}${OPENCV_PLUGIN_SUFFIX}") + else() + set_target_properties(${OPENCV_PLUGIN_NAME} PROPERTIES OUTPUT_NAME "${OPENCV_PLUGIN_NAME}${OPENCV_PLUGIN_SUFFIX}") + endif() + + install(TARGETS ${OPENCV_PLUGIN_NAME} LIBRARY DESTINATION . COMPONENT plugins) + + message(STATUS "Library name: ${OPENCV_PLUGIN_NAME}") + +endfunction() diff --git a/cmake/OpenCVPylint.cmake b/cmake/OpenCVPylint.cmake index 50da730946..928926d340 100644 --- a/cmake/OpenCVPylint.cmake +++ b/cmake/OpenCVPylint.cmake @@ -122,7 +122,6 @@ function(ocv_pylint_finalize) list(LENGTH PYLINT_TARGET_ID __total) set(PYLINT_TOTAL_TARGETS "${__total}" CACHE INTERNAL "") - message(STATUS "Pylint: registered ${__total} targets. Build 'check_pylint' target to run checks (\"cmake --build . --target check_pylint\" or \"make check_pylint\")") configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/pylint.cmake.in" "${CMAKE_BINARY_DIR}/pylint.cmake" @ONLY) add_custom_target(check_pylint diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index 2ad380236c..0951e06581 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -8,7 +8,20 @@ include(CMakeParseArguments) function(ocv_cmake_dump_vars) set(OPENCV_SUPPRESS_DEPRECATIONS 1) # suppress deprecation warnings from variable_watch() guards get_cmake_property(__variableNames VARIABLES) - cmake_parse_arguments(DUMP "" "TOFILE" "" ${ARGN}) + cmake_parse_arguments(DUMP "FORCE" "TOFILE" "" ${ARGN}) + + # avoid generation of excessive logs with "--trace" or "--trace-expand" parameters + # Note: `-DCMAKE_TRACE_MODE=1` should be passed to CMake through command line. It is not a CMake buildin variable for now (2020-12) + # Use `cmake . -UCMAKE_TRACE_MODE` to remove this variable from cache + if(CMAKE_TRACE_MODE AND NOT DUMP_FORCE) + if(DUMP_TOFILE) + file(WRITE ${CMAKE_BINARY_DIR}/${DUMP_TOFILE} "Skipped due to enabled CMAKE_TRACE_MODE") + else() + message(AUTHOR_WARNING "ocv_cmake_dump_vars() is skipped due to enabled CMAKE_TRACE_MODE") + endif() + return() + endif() + set(regex "${DUMP_UNPARSED_ARGUMENTS}") string(TOLOWER "${regex}" regex_lower) set(__VARS "") @@ -400,6 +413,24 @@ macro(ocv_clear_vars) endforeach() endmacro() + +# Clears passed variables with INTERNAL type from CMake cache +macro(ocv_clear_internal_cache_vars) + foreach(_var ${ARGN}) + get_property(_propertySet CACHE ${_var} PROPERTY TYPE SET) + if(_propertySet) + get_property(_type CACHE ${_var} PROPERTY TYPE) + if(_type STREQUAL "INTERNAL") + message("Cleaning INTERNAL cached variable: ${_var}") + unset(${_var} CACHE) + endif() + endif() + endforeach() + unset(_propertySet) + unset(_type) +endmacro() + + set(OCV_COMPILER_FAIL_REGEX "argument .* is not valid" # GCC 9+ (including support of unicode quotes) "command[- ]line option .* is valid for .* but not for C\\+\\+" # GNU @@ -533,7 +564,11 @@ macro(ocv_check_flag_support lang flag varname base_options) elseif("_${lang}_" MATCHES "_C_") set(_lang C) elseif("_${lang}_" MATCHES "_OBJCXX_") - set(_lang OBJCXX) + if(DEFINED CMAKE_OBJCXX_COMPILER) # CMake 3.16+ and enable_language(OBJCXX) call are required + set(_lang OBJCXX) + else() + set(_lang CXX) + endif() else() set(_lang ${lang}) endif() @@ -542,7 +577,9 @@ macro(ocv_check_flag_support lang flag varname base_options) string(REGEX REPLACE "^(/|-)" "HAVE_${_lang}_" ${varname} "${${varname}}") string(REGEX REPLACE " -|-|=| |\\.|," "_" ${varname} "${${varname}}") - ocv_check_compiler_flag("${_lang}" "${base_options} ${flag}" ${${varname}} ${ARGN}) + if(DEFINED CMAKE_${_lang}_COMPILER) + ocv_check_compiler_flag("${_lang}" "${base_options} ${flag}" ${${varname}} ${ARGN}) + endif() endmacro() macro(ocv_check_runtime_flag flag result) @@ -1540,6 +1577,30 @@ function(ocv_add_library target) endfunction() +function(ocv_add_external_target name inc link def) + if(BUILD_SHARED_LIBS) + set(imp IMPORTED) + endif() + add_library(ocv.3rdparty.${name} INTERFACE ${imp}) + set_target_properties(ocv.3rdparty.${name} PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${inc}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${inc}" + INTERFACE_COMPILE_DEFINITIONS "${def}") + # When cmake version is greater than or equal to 3.11, INTERFACE_LINK_LIBRARIES no longer applies to interface library + # See https://github.com/opencv/opencv/pull/18658 + if (CMAKE_VERSION VERSION_LESS 3.11) + set_target_properties(ocv.3rdparty.${name} PROPERTIES + INTERFACE_LINK_LIBRARIES "${link}") + else() + target_link_libraries(ocv.3rdparty.${name} INTERFACE ${link}) + endif() + # + if(NOT BUILD_SHARED_LIBS) + install(TARGETS ocv.3rdparty.${name} EXPORT OpenCVModules) + endif() +endfunction() + + macro(ocv_get_libname var_name) get_filename_component(__libname "${ARGN}" NAME) # libopencv_core.so.3.3 -> opencv_core diff --git a/cmake/android/android_gradle_projects.cmake b/cmake/android/android_gradle_projects.cmake index c595bee107..2e34a20d97 100644 --- a/cmake/android/android_gradle_projects.cmake +++ b/cmake/android/android_gradle_projects.cmake @@ -1,9 +1,16 @@ # https://developer.android.com/studio/releases/gradle-plugin -set(ANDROID_GRADLE_PLUGIN_VERSION "3.2.1" CACHE STRING "Android Gradle Plugin version (3.0+)") +set(ANDROID_GRADLE_PLUGIN_VERSION "3.2.1" CACHE STRING "Android Gradle Plugin version") message(STATUS "Android Gradle Plugin version: ${ANDROID_GRADLE_PLUGIN_VERSION}") +set(GRADLE_VERSION "5.6.4" CACHE STRING "Gradle version") +message(STATUS "Gradle version: ${GRADLE_VERSION}") + set(ANDROID_COMPILE_SDK_VERSION "26" CACHE STRING "Android compileSdkVersion") -set(ANDROID_MIN_SDK_VERSION "21" CACHE STRING "Android minSdkVersion") +if(ANDROID_NATIVE_API_LEVEL GREATER 21) + set(ANDROID_MIN_SDK_VERSION "${ANDROID_NATIVE_API_LEVEL}" CACHE STRING "Android minSdkVersion") +else() + set(ANDROID_MIN_SDK_VERSION "21" CACHE STRING "Android minSdkVersion") +endif() set(ANDROID_TARGET_SDK_VERSION "26" CACHE STRING "Android minSdkVersion") set(ANDROID_BUILD_BASE_DIR "${OpenCV_BINARY_DIR}/opencv_android" CACHE INTERNAL "") @@ -38,9 +45,11 @@ set(ANDROID_ABI_FILTER "${ANDROID_INSTALL_ABI_FILTER}") configure_file("${OpenCV_SOURCE_DIR}/samples/android/build.gradle.in" "${ANDROID_TMP_INSTALL_BASE_DIR}/${ANDROID_INSTALL_SAMPLES_DIR}/build.gradle" @ONLY) install(FILES "${ANDROID_TMP_INSTALL_BASE_DIR}/${ANDROID_INSTALL_SAMPLES_DIR}/build.gradle" DESTINATION "${ANDROID_INSTALL_SAMPLES_DIR}" COMPONENT samples) +configure_file("${OpenCV_SOURCE_DIR}/platforms/android/gradle-wrapper/gradle/wrapper/gradle-wrapper.properties.in" "${ANDROID_BUILD_BASE_DIR}/gradle/wrapper/gradle-wrapper.properties" @ONLY) +install(FILES "${ANDROID_BUILD_BASE_DIR}/gradle/wrapper/gradle-wrapper.properties" DESTINATION "${ANDROID_INSTALL_SAMPLES_DIR}/gradle/wrapper" COMPONENT samples) + set(GRADLE_WRAPPER_FILES "gradle/wrapper/gradle-wrapper.jar" - "gradle/wrapper/gradle-wrapper.properties" "gradlew.bat" "gradlew" "gradle.properties" diff --git a/cmake/platforms/OpenCV-Emscripten.cmake b/cmake/platforms/OpenCV-Emscripten.cmake new file mode 100644 index 0000000000..ec15fba799 --- /dev/null +++ b/cmake/platforms/OpenCV-Emscripten.cmake @@ -0,0 +1 @@ +set(OPENCV_SKIP_LINK_AS_NEEDED 1) diff --git a/cmake/templates/opencv_abi.xml.in b/cmake/templates/opencv_abi.xml.in index 711c4e99ee..c3a39d6dfe 100644 --- a/cmake/templates/opencv_abi.xml.in +++ b/cmake/templates/opencv_abi.xml.in @@ -26,7 +26,9 @@ opencv2/core/hal/*.impl.* opencv2/core/cuda* opencv2/core/opencl* + opencv2/core/parallel/backend/* opencv2/core/private* + opencv2/core/*quaternion* opencv/cxeigen.hpp opencv2/core/eigen.hpp opencv2/flann/hdf5.h diff --git a/cmake/templates/xcode-launch-c.in b/cmake/templates/xcode-launch-c.in new file mode 100644 index 0000000000..609dbf47b1 --- /dev/null +++ b/cmake/templates/xcode-launch-c.in @@ -0,0 +1,11 @@ +#!/bin/sh +# https://crascit.com/2016/04/09/using-ccache-with-cmake/ + +# Xcode generator doesn't include the compiler as the +# first argument, Ninja and Makefiles do. Handle both cases. +if [[ "$1" = "${CMAKE_C_COMPILER}" ]] ; then + shift +fi + +export CCACHE_CPP2=true +exec "${CCACHE_PROGRAM}" "${CMAKE_C_COMPILER}" "$@" diff --git a/cmake/templates/xcode-launch-cxx.in b/cmake/templates/xcode-launch-cxx.in new file mode 100644 index 0000000000..09233b3859 --- /dev/null +++ b/cmake/templates/xcode-launch-cxx.in @@ -0,0 +1,11 @@ +#!/bin/sh +# https://crascit.com/2016/04/09/using-ccache-with-cmake/ + +# Xcode generator doesn't include the compiler as the +# first argument, Ninja and Makefiles do. Handle both cases. +if [[ "$1" = "${CMAKE_CXX_COMPILER}" ]] ; then + shift +fi + +export CCACHE_CPP2=true +exec "${CCACHE_PROGRAM}" "${CMAKE_CXX_COMPILER}" "$@" diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index ec7d7cd3b3..a321be9878 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -39,7 +39,6 @@ ALIASES += end_toggle="@htmlonly[block] @endhtmlonly" ALIASES += prev_tutorial{1}="**Prev Tutorial:** \ref \1 \n" ALIASES += next_tutorial{1}="**Next Tutorial:** \ref \1 \n" ALIASES += youtube{1}="@htmlonly[block]
@endhtmlonly" -TCL_SUBST = OPTIMIZE_OUTPUT_FOR_C = NO OPTIMIZE_OUTPUT_JAVA = NO OPTIMIZE_FOR_FORTRAN = NO @@ -228,6 +227,7 @@ INCLUDE_PATH = INCLUDE_FILE_PATTERNS = PREDEFINED = __cplusplus=1 \ CVAPI(x)=x \ + CV_API_CALL= \ CV_DOXYGEN= \ CV_EXPORTS= \ CV_EXPORTS_W= \ @@ -255,6 +255,12 @@ PREDEFINED = __cplusplus=1 \ CV_DEFAULT(x)=" = x" \ CV_NEON=1 \ CV_SSE2=1 \ + CV_SIMD128=1 \ + CV_SIMD256=1 \ + CV_SIMD512=1 \ + CV_SIMD128_64F=1 \ + CV_SIMD256_64F=1 \ + CV_SIMD512_64F=1 \ CV__DEBUG_NS_BEGIN= \ CV__DEBUG_NS_END= \ CV_DEPRECATED_EXTERNAL= \ diff --git a/doc/js_tutorials/js_assets/js_dnn_example_helper.js b/doc/js_tutorials/js_assets/js_dnn_example_helper.js new file mode 100644 index 0000000000..06baa6760b --- /dev/null +++ b/doc/js_tutorials/js_assets/js_dnn_example_helper.js @@ -0,0 +1,119 @@ +getBlobFromImage = function(inputSize, mean, std, swapRB, image) { + let mat; + if (typeof(image) === 'string') { + mat = cv.imread(image); + } else { + mat = image; + } + + let matC3 = new cv.Mat(mat.matSize[0], mat.matSize[1], cv.CV_8UC3); + cv.cvtColor(mat, matC3, cv.COLOR_RGBA2BGR); + let input = cv.blobFromImage(matC3, std, new cv.Size(inputSize[0], inputSize[1]), + new cv.Scalar(mean[0], mean[1], mean[2]), swapRB); + + matC3.delete(); + return input; +} + +loadLables = async function(labelsUrl) { + let response = await fetch(labelsUrl); + let label = await response.text(); + label = label.split('\n'); + return label; +} + +loadModel = async function(e) { + return new Promise((resolve) => { + let file = e.target.files[0]; + let path = file.name; + let reader = new FileReader(); + reader.readAsArrayBuffer(file); + reader.onload = function(ev) { + if (reader.readyState === 2) { + let buffer = reader.result; + let data = new Uint8Array(buffer); + cv.FS_createDataFile('/', path, data, true, false, false); + resolve(path); + } + } + }); +} + +getTopClasses = function(probs, labels, topK = 3) { + probs = Array.from(probs); + let indexes = probs.map((prob, index) => [prob, index]); + let sorted = indexes.sort((a, b) => { + if (a[0] === b[0]) {return 0;} + return a[0] < b[0] ? -1 : 1; + }); + sorted.reverse(); + let classes = []; + for (let i = 0; i < topK; ++i) { + let prob = sorted[i][0]; + let index = sorted[i][1]; + let c = { + label: labels[index], + prob: (prob * 100).toFixed(2) + } + classes.push(c); + } + return classes; +} + +loadImageToCanvas = function(e, canvasId) { + let files = e.target.files; + let imgUrl = URL.createObjectURL(files[0]); + let canvas = document.getElementById(canvasId); + let ctx = canvas.getContext('2d'); + let img = new Image(); + img.crossOrigin = 'anonymous'; + img.src = imgUrl; + img.onload = function() { + ctx.drawImage(img, 0, 0, canvas.width, canvas.height); + }; +} + +drawInfoTable = async function(jsonUrl, divId) { + let response = await fetch(jsonUrl); + let json = await response.json(); + + let appendix = document.getElementById(divId); + for (key of Object.keys(json)) { + let h3 = document.createElement('h3'); + h3.textContent = key + " model"; + appendix.appendChild(h3); + + let table = document.createElement('table'); + let head_tr = document.createElement('tr'); + for (head of Object.keys(json[key][0])) { + let th = document.createElement('th'); + th.textContent = head; + th.style.border = "1px solid black"; + head_tr.appendChild(th); + } + table.appendChild(head_tr) + + for (model of json[key]) { + let tr = document.createElement('tr'); + for (params of Object.keys(model)) { + let td = document.createElement('td'); + td.style.border = "1px solid black"; + if (params !== "modelUrl" && params !== "configUrl" && params !== "labelsUrl") { + td.textContent = model[params]; + tr.appendChild(td); + } else { + let a = document.createElement('a'); + let link = document.createTextNode('link'); + a.append(link); + a.href = model[params]; + td.appendChild(a); + tr.appendChild(td); + } + } + table.appendChild(tr); + } + table.style.width = "800px"; + table.style.borderCollapse = "collapse"; + appendix.appendChild(table); + } +} diff --git a/doc/js_tutorials/js_assets/js_image_classification.html b/doc/js_tutorials/js_assets/js_image_classification.html new file mode 100644 index 0000000000..656f2720b6 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_image_classification.html @@ -0,0 +1,263 @@ + + + + + + Image Classification Example + + + + +

Image Classification Example

+

+ This tutorial shows you how to write an image classification example with OpenCV.js.
+ To try the example you should click the modelFile button(and configFile button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Try it button to see the result. You can choose any other images.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+
+
+ canvasInput +
+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.Main loop in which will read the image from canvas and do inference once.

+ +

3.Load labels from txt file and process it into an array.

+ +

4.Get blob from image as input for net, and standardize it with mean and std.

+ +

5.Fetch model file and save to emscripten file system once click the input button.

+ +

6.The post-processing, including softmax if needed and get the top classes from the output vector.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_image_classification_model_info.json b/doc/js_tutorials/js_assets/js_image_classification_model_info.json new file mode 100644 index 0000000000..67553ec2d3 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_image_classification_model_info.json @@ -0,0 +1,65 @@ +{ + "caffe": [ + { + "model": "alexnet", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "needSoftmax": "false", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt", + "modelUrl": "http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel", + "configUrl": "https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_alexnet/deploy.prototxt" + }, + { + "model": "densenet", + "mean": "127.5, 127.5, 127.5", + "std": "0.007843", + "swapRB": "false", + "needSoftmax": "true", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt", + "modelUrl": "https://drive.google.com/open?id=0B7ubpZO7HnlCcHlfNmJkU2VPelE", + "configUrl": "https://raw.githubusercontent.com/shicai/DenseNet-Caffe/master/DenseNet_121.prototxt" + }, + { + "model": "googlenet", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "needSoftmax": "false", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt", + "modelUrl": "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel", + "configUrl": "https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_googlenet/deploy.prototxt" + }, + { + "model": "squeezenet", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "needSoftmax": "false", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt", + "modelUrl": "https://raw.githubusercontent.com/forresti/SqueezeNet/master/SqueezeNet_v1.0/squeezenet_v1.0.caffemodel", + "configUrl": "https://raw.githubusercontent.com/forresti/SqueezeNet/master/SqueezeNet_v1.0/deploy.prototxt" + }, + { + "model": "VGG", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "needSoftmax": "false", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/classification_classes_ILSVRC2012.txt", + "modelUrl": "http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel", + "configUrl": "https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/f02f8769e64494bcd3d7e97d5d747ac275825721/VGG_ILSVRC_19_layers_deploy.prototxt" + } + ], + "tensorflow": [ + { + "model": "inception", + "mean": "123, 117, 104", + "std": "1", + "swapRB": "true", + "needSoftmax": "false", + "labelsUrl": "https://raw.githubusercontent.com/petewarden/tf_ios_makefile_example/master/data/imagenet_comp_graph_label_strings.txt", + "modelUrl": "https://raw.githubusercontent.com/petewarden/tf_ios_makefile_example/master/data/tensorflow_inception_graph.pb" + } + ] +} \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_image_classification_with_camera.html b/doc/js_tutorials/js_assets/js_image_classification_with_camera.html new file mode 100644 index 0000000000..9a2473cf2b --- /dev/null +++ b/doc/js_tutorials/js_assets/js_image_classification_with_camera.html @@ -0,0 +1,281 @@ + + + + + + Image Classification Example with Camera + + + + +

Image Classification Example with Camera

+

+ This tutorial shows you how to write an image classification example with camera.
+ To try the example you should click the modelFile button(and configFile button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Start/Stop button to start or stop the camera capture.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+
+
+ videoInput +
+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.The function to capture video from camera, and the main loop in which will do inference once.

+ +

3.Load labels from txt file and process it into an array.

+ +

4.Get blob from image as input for net, and standardize it with mean and std.

+ +

5.Fetch model file and save to emscripten file system once click the input button.

+ +

6.The post-processing, including softmax if needed and get the top classes from the output vector.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_intelligent_scissors.html b/doc/js_tutorials/js_assets/js_intelligent_scissors.html new file mode 100644 index 0000000000..1782dc6f03 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_intelligent_scissors.html @@ -0,0 +1,127 @@ + + + + +Intelligent Scissors Example + + + +

Intelligent Scissors Example

+

+ Click Start button to launch the code below.
+ Then click on image to pick source point. After that you can hover mouse pointer over canvas to specify target point candidate.
+ You can change the code in the <textarea> to investigate more. You can choose another image (need to "Stop" first). +

+
+
+ +

+
+
+
canvasInput
+ +
+ + + + + + + + + diff --git a/doc/js_tutorials/js_assets/js_object_detection.html b/doc/js_tutorials/js_assets/js_object_detection.html new file mode 100644 index 0000000000..53f1e48639 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_object_detection.html @@ -0,0 +1,387 @@ + + + + + + Object Detection Example + + + + +

Object Detection Example

+

+ This tutorial shows you how to write an object detection example with OpenCV.js.
+ To try the example you should click the modelFile button(and configFile button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Try it button to see the result. You can choose any other images.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + +
+
+ canvasInput +
+
+

+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.Main loop in which will read the image from canvas and do inference once.

+ +

3.Load labels from txt file and process it into an array.

+ +

4.Get blob from image as input for net, and standardize it with mean and std.

+ +

5.Fetch model file and save to emscripten file system once click the input button.

+ +

6.The post-processing, including get boxes from output and draw boxes into the image.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_object_detection_model_info.json b/doc/js_tutorials/js_assets/js_object_detection_model_info.json new file mode 100644 index 0000000000..c0d14be714 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_object_detection_model_info.json @@ -0,0 +1,39 @@ +{ + "caffe": [ + { + "model": "mobilenet_SSD", + "inputSize": "300, 300", + "mean": "127.5, 127.5, 127.5", + "std": "0.007843", + "swapRB": "false", + "outType": "SSD", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_pascal_voc.txt", + "modelUrl": "https://raw.githubusercontent.com/chuanqi305/MobileNet-SSD/master/mobilenet_iter_73000.caffemodel", + "configUrl": "https://raw.githubusercontent.com/chuanqi305/MobileNet-SSD/master/deploy.prototxt" + }, + { + "model": "VGG_SSD", + "inputSize": "300, 300", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "outType": "SSD", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_pascal_voc.txt", + "modelUrl": "https://drive.google.com/uc?id=0BzKzrI_SkD1_WVVTSmQxU0dVRzA&export=download", + "configUrl": "https://drive.google.com/uc?id=0BzKzrI_SkD1_WVVTSmQxU0dVRzA&export=download" + } + ], + "darknet": [ + { + "model": "yolov2_tiny", + "inputSize": "416, 416", + "mean": "0, 0, 0", + "std": "0.00392", + "swapRB": "false", + "outType": "YOLO", + "labelsUrl": "https://raw.githubusercontent.com/opencv/opencv/master/samples/data/dnn/object_detection_classes_yolov3.txt", + "modelUrl": "https://pjreddie.com/media/files/yolov2-tiny.weights", + "configUrl": "https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov2-tiny.cfg" + } + ] +} \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_object_detection_with_camera.html b/doc/js_tutorials/js_assets/js_object_detection_with_camera.html new file mode 100644 index 0000000000..41bb609708 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_object_detection_with_camera.html @@ -0,0 +1,402 @@ + + + + + + Object Detection Example with Camera + + + + +

Object Detection Example with Camera

+

+ This tutorial shows you how to write an object detection example with camera.
+ To try the example you should click the modelFile button(and configInput button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Start/Stop button to start or stop the camera capture.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + +
+
+ videoInput +
+
+

+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.The function to capture video from camera, and the main loop in which will do inference once.

+ +

3.Load labels from txt file and process it into an array.

+ +

4.Get blob from image as input for net, and standardize it with mean and std.

+ +

5.Fetch model file and save to emscripten file system once click the input button.

+ +

6.The post-processing, including get boxes from output and draw boxes into the image.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_pose_estimation.html b/doc/js_tutorials/js_assets/js_pose_estimation.html new file mode 100644 index 0000000000..19c64663d1 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_pose_estimation.html @@ -0,0 +1,327 @@ + + + + + + Pose Estimation Example + + + + +

Pose Estimation Example

+

+ This tutorial shows you how to write an pose estimation example with OpenCV.js.
+ To try the example you should click the modelFile button(and configInput button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Try it button to see the result. You can choose any other images.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + +
+
+ canvasInput +
+
+

+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.Main loop in which will read the image from canvas and do inference once.

+ +

3.Get blob from image as input for net, and standardize it with mean and std.

+ +

4.Fetch model file and save to emscripten file system once click the input button.

+ +

5.The pairs of keypoints of different dataset.

+ +

6.The post-processing, including get the predicted points and draw lines into the image.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_pose_estimation_model_info.json b/doc/js_tutorials/js_assets/js_pose_estimation_model_info.json new file mode 100644 index 0000000000..922c813f39 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_pose_estimation_model_info.json @@ -0,0 +1,34 @@ +{ + "caffe": [ + { + "model": "body_25", + "inputSize": "368, 368", + "mean": "0, 0, 0", + "std": "0.00392", + "swapRB": "false", + "dataset": "BODY_25", + "modelUrl": "http://posefs1.perception.cs.cmu.edu/OpenPose/models/pose/body_25/pose_iter_584000.caffemodel", + "configUrl": "https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/openpose/master/models/pose/body_25/pose_deploy.prototxt" + }, + { + "model": "coco", + "inputSize": "368, 368", + "mean": "0, 0, 0", + "std": "0.00392", + "swapRB": "false", + "dataset": "COCO", + "modelUrl": "http://posefs1.perception.cs.cmu.edu/OpenPose/models/pose/coco/pose_iter_440000.caffemodel", + "configUrl": "https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/openpose/master/models/pose/coco/pose_deploy_linevec.prototxt" + }, + { + "model": "mpi", + "inputSize": "368, 368", + "mean": "0, 0, 0", + "std": "0.00392", + "swapRB": "false", + "dataset": "MPI", + "modelUrl": "http://posefs1.perception.cs.cmu.edu/OpenPose/models/pose/mpi/pose_iter_160000.caffemodel", + "configUrl": "https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/openpose/master/models/pose/mpi/pose_deploy_linevec.prototxt" + } + ] +} \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_semantic_segmentation.html b/doc/js_tutorials/js_assets/js_semantic_segmentation.html new file mode 100644 index 0000000000..6fc27dbd19 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_semantic_segmentation.html @@ -0,0 +1,243 @@ + + + + + + Semantic Segmentation Example + + + + +

Semantic Segmentation Example

+

+ This tutorial shows you how to write an semantic segmentation example with OpenCV.js.
+ To try the example you should click the modelFile button(and configInput button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Try it button to see the result. You can choose any other images.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + +
+
+ canvasInput +
+
+

+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.Main loop in which will read the image from canvas and do inference once.

+ +

3.Get blob from image as input for net, and standardize it with mean and std.

+ +

4.Fetch model file and save to emscripten file system once click the input button.

+ +

5.The post-processing, including gengerate colors for different classes and argmax to get the classes for each pixel.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_semantic_segmentation_model_info.json b/doc/js_tutorials/js_assets/js_semantic_segmentation_model_info.json new file mode 100644 index 0000000000..ef0016af1d --- /dev/null +++ b/doc/js_tutorials/js_assets/js_semantic_segmentation_model_info.json @@ -0,0 +1,12 @@ +{ + "tensorflow": [ + { + "model": "deeplabv3", + "inputSize": "513, 513", + "mean": "127.5, 127.5, 127.5", + "std": "0.007843", + "swapRB": "false", + "modelUrl": "https://drive.google.com/uc?id=1v-hfGenaE9tiGOzo5qdgMNG_gqQ5-Xn4&export=download" + } + ] +} \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_style_transfer.html b/doc/js_tutorials/js_assets/js_style_transfer.html new file mode 100644 index 0000000000..91422e1344 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_style_transfer.html @@ -0,0 +1,228 @@ + + + + + + Style Transfer Example + + + + +

Style Transfer Example

+

+ This tutorial shows you how to write an style transfer example with OpenCV.js.
+ To try the example you should click the modelFile button(and configFile button if needed) to upload inference model. + You can find the model URLs and parameters in the model info section. + Then You should change the parameters in the first code snippet according to the uploaded model. + Finally click Try it button to see the result. You can choose any other images.
+

+ +
+
+ + + + + + + + + + + + + + + +
+ + + +
+
+ canvasInput +
+
+

+
+
+ modelFile +
+
+
+ configFile +
+
+
+ +
+

+
+ +
+

Help function

+

1.The parameters for model inference which you can modify to investigate more models.

+ +

2.Main loop in which will read the image from canvas and do inference once.

+ +

3.Get blob from image as input for net, and standardize it with mean and std.

+ +

4.Fetch model file and save to emscripten file system once click the input button.

+ +

5.The post-processing, including scaling and reordering.

+ +
+ +
+

Model Info:

+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_style_transfer_model_info.json b/doc/js_tutorials/js_assets/js_style_transfer_model_info.json new file mode 100644 index 0000000000..9cc66018a0 --- /dev/null +++ b/doc/js_tutorials/js_assets/js_style_transfer_model_info.json @@ -0,0 +1,76 @@ +{ + "torch": [ + { + "model": "candy.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//instance_norm/candy.t7" + }, + { + "model": "composition_vii.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//eccv16/composition_vii.t7" + }, + { + "model": "feathers.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//instance_norm/feathers.t7" + }, + { + "model": "la_muse.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//instance_norm/la_muse.t7" + }, + { + "model": "mosaic.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//instance_norm/mosaic.t7" + }, + { + "model": "starry_night.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//eccv16/starry_night.t7" + }, + { + "model": "the_scream.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//instance_norm/the_scream.t7" + }, + { + "model": "the_wave.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//eccv16/the_wave.t7" + }, + { + "model": "udnie.t7", + "inputSize": "224, 224", + "mean": "104, 117, 123", + "std": "1", + "swapRB": "false", + "modelUrl": "https://cs.stanford.edu/people/jcjohns/fast-neural-style/models//instance_norm/udnie.t7" + } + ] +} \ No newline at end of file diff --git a/doc/js_tutorials/js_assets/js_template_matching_matchTemplate.html b/doc/js_tutorials/js_assets/js_template_matching_matchTemplate.html index ad2bb54c48..b9f6871ec0 100644 --- a/doc/js_tutorials/js_assets/js_template_matching_matchTemplate.html +++ b/doc/js_tutorials/js_assets/js_template_matching_matchTemplate.html @@ -74,7 +74,8 @@ let utils = new Utils('errorMessage'); utils.loadCode('codeSnippet', 'codeEditor'); utils.loadImageToCanvas('lena.jpg', 'imageCanvasInput'); utils.loadImageToCanvas('lenaFace.png', 'templateCanvasInput'); -utils.addFileInputHandler('fileInput', 'canvasInput'); +utils.addFileInputHandler('fileInput', 'imageCanvasInput'); +utils.addFileInputHandler('templateFileInput', 'templateCanvasInput'); let tryIt = document.getElementById('tryIt'); tryIt.addEventListener('click', () => { diff --git a/doc/js_tutorials/js_assets/utils.js b/doc/js_tutorials/js_assets/utils.js index 4d5deb0b51..65f6d1782d 100644 --- a/doc/js_tutorials/js_assets/utils.js +++ b/doc/js_tutorials/js_assets/utils.js @@ -7,7 +7,7 @@ function Utils(errorOutputId) { // eslint-disable-line no-unused-vars let script = document.createElement('script'); script.setAttribute('async', ''); script.setAttribute('type', 'text/javascript'); - script.addEventListener('load', () => { + script.addEventListener('load', async () => { if (cv.getBuildInformation) { console.log(cv.getBuildInformation()); @@ -16,9 +16,15 @@ function Utils(errorOutputId) { // eslint-disable-line no-unused-vars else { // WASM - cv['onRuntimeInitialized']=()=>{ + if (cv instanceof Promise) { + cv = await cv; console.log(cv.getBuildInformation()); onloadCallback(); + } else { + cv['onRuntimeInitialized']=()=>{ + console.log(cv.getBuildInformation()); + onloadCallback(); + } } } }); diff --git a/doc/js_tutorials/js_dnn/js_image_classification/js_image_classification.markdown b/doc/js_tutorials/js_dnn/js_image_classification/js_image_classification.markdown new file mode 100644 index 0000000000..1a94f8d14a --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_image_classification/js_image_classification.markdown @@ -0,0 +1,13 @@ +Image Classification Example {#tutorial_js_image_classification} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for image classification. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_image_classification/js_image_classification_with_camera.markdown b/doc/js_tutorials/js_dnn/js_image_classification/js_image_classification_with_camera.markdown new file mode 100644 index 0000000000..bdf11161fc --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_image_classification/js_image_classification_with_camera.markdown @@ -0,0 +1,15 @@ +Image Classification Example with Camera {#tutorial_js_image_classification_with_camera} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for image classification example with camera. + +@note If you don't know how to capture video from camera, please review @ref tutorial_js_video_display. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_object_detection/js_object_detection.markdown b/doc/js_tutorials/js_dnn/js_object_detection/js_object_detection.markdown new file mode 100644 index 0000000000..980b45c236 --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_object_detection/js_object_detection.markdown @@ -0,0 +1,13 @@ +Object Detection Example {#tutorial_js_object_detection} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for object detection. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_object_detection/js_object_detection_with_camera.markdown b/doc/js_tutorials/js_dnn/js_object_detection/js_object_detection_with_camera.markdown new file mode 100644 index 0000000000..e6e8f6f957 --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_object_detection/js_object_detection_with_camera.markdown @@ -0,0 +1,13 @@ +Object Detection Example with Camera{#tutorial_js_object_detection_with_camera} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for object detection with camera. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_pose_estimation/js_pose_estimation.markdown b/doc/js_tutorials/js_dnn/js_pose_estimation/js_pose_estimation.markdown new file mode 100644 index 0000000000..b090ff2cfb --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_pose_estimation/js_pose_estimation.markdown @@ -0,0 +1,13 @@ +Pose Estimation Example {#tutorial_js_pose_estimation} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for pose estimation. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_semantic_segmentation/js_semantic_segmentation.markdown b/doc/js_tutorials/js_dnn/js_semantic_segmentation/js_semantic_segmentation.markdown new file mode 100644 index 0000000000..50177fb549 --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_semantic_segmentation/js_semantic_segmentation.markdown @@ -0,0 +1,13 @@ +Semantic Segmentation Example {#tutorial_js_semantic_segmentation} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for semantic segmentation. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_style_transfer/js_style_transfer.markdown b/doc/js_tutorials/js_dnn/js_style_transfer/js_style_transfer.markdown new file mode 100644 index 0000000000..7c1799ac6a --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_style_transfer/js_style_transfer.markdown @@ -0,0 +1,13 @@ +Style Transfer Example {#tutorial_js_style_transfer} +======================================= + +Goal +---- + +- In this tutorial you will learn how to use OpenCV.js dnn module for style transfer. + +\htmlonly + +\endhtmlonly \ No newline at end of file diff --git a/doc/js_tutorials/js_dnn/js_table_of_contents_dnn.markdown b/doc/js_tutorials/js_dnn/js_table_of_contents_dnn.markdown new file mode 100644 index 0000000000..e008dc81d1 --- /dev/null +++ b/doc/js_tutorials/js_dnn/js_table_of_contents_dnn.markdown @@ -0,0 +1,30 @@ +Deep Neural Networks (dnn module) {#tutorial_js_table_of_contents_dnn} +============ + +- @subpage tutorial_js_image_classification + + Image classification example + +- @subpage tutorial_js_image_classification_with_camera + + Image classification example with camera + +- @subpage tutorial_js_object_detection + + Object detection example + +- @subpage tutorial_js_object_detection_with_camera + + Object detection example with camera + +- @subpage tutorial_js_semantic_segmentation + + Semantic segmentation example + +- @subpage tutorial_js_style_transfer + + Style transfer example + +- @subpage tutorial_js_pose_estimation + + Pose estimation example diff --git a/doc/js_tutorials/js_imgproc/js_intelligent_scissors/js_intelligent_scissors.markdown b/doc/js_tutorials/js_imgproc/js_intelligent_scissors/js_intelligent_scissors.markdown new file mode 100644 index 0000000000..1a3ca4c484 --- /dev/null +++ b/doc/js_tutorials/js_imgproc/js_intelligent_scissors/js_intelligent_scissors.markdown @@ -0,0 +1,16 @@ +Intelligent Scissors Demo {#tutorial_js_intelligent_scissors} +========================= + +Goal +---- + +- Here you can check how to use IntelligentScissors tool for image segmentation task. +- Available methods and parameters: @ref cv::segmentation::IntelligentScissorsMB + +@note The feature is integrated into [CVAT](https://github.com/openvinotoolkit/cvat) annotation tool and you can try it online on https://cvat.org + +\htmlonly + +\endhtmlonly diff --git a/doc/js_tutorials/js_imgproc/js_table_of_contents_imgproc.markdown b/doc/js_tutorials/js_imgproc/js_table_of_contents_imgproc.markdown index 3bb809be71..b06eb95639 100644 --- a/doc/js_tutorials/js_imgproc/js_table_of_contents_imgproc.markdown +++ b/doc/js_tutorials/js_imgproc/js_table_of_contents_imgproc.markdown @@ -77,3 +77,7 @@ Image Processing {#tutorial_js_table_of_contents_imgproc} - @subpage tutorial_js_imgproc_camera Learn image processing for video capture. + +- @subpage tutorial_js_intelligent_scissors + + Learn how to use IntelligentScissors tool for image segmentation task. diff --git a/doc/js_tutorials/js_setup/js_intro/js_intro.markdown b/doc/js_tutorials/js_setup/js_intro/js_intro.markdown index 416aa3ded5..01a123c5f4 100644 --- a/doc/js_tutorials/js_setup/js_intro/js_intro.markdown +++ b/doc/js_tutorials/js_setup/js_intro/js_intro.markdown @@ -13,7 +13,7 @@ OpenCV.js: OpenCV for the JavaScript programmer Web is the most ubiquitous open computing platform. With HTML5 standards implemented in every browser, web applications are able to render online video with HTML5 video tags, capture webcam video via WebRTC API, and access each pixel of a video frame via canvas API. With abundance of available multimedia content, web developers are in need of a wide array of image and vision processing algorithms in JavaScript to build innovative applications. This requirement is even more essential for emerging applications on the web, such as Web Virtual Reality (WebVR) and Augmented Reality (WebAR). All of these use cases demand efficient implementations of computation-intensive vision kernels on web. -[Emscripten](http://kripken.github.io/emscripten-site) is an LLVM-to-JavaScript compiler. It takes LLVM bitcode - which can be generated from C/C++ using clang, and compiles that into asm.js or WebAssembly that can execute directly inside the web browsers. . Asm.js is a highly optimizable, low-level subset of JavaScript. Asm.js enables ahead-of-time compilation and optimization in JavaScript engine that provide near-to-native execution speed. WebAssembly is a new portable, size- and load-time-efficient binary format suitable for compilation to the web. WebAssembly aims to execute at native speed. WebAssembly is currently being designed as an open standard by W3C. +[Emscripten](https://emscripten.org/) is an LLVM-to-JavaScript compiler. It takes LLVM bitcode - which can be generated from C/C++ using clang, and compiles that into asm.js or WebAssembly that can execute directly inside the web browsers. . Asm.js is a highly optimizable, low-level subset of JavaScript. Asm.js enables ahead-of-time compilation and optimization in JavaScript engine that provide near-to-native execution speed. WebAssembly is a new portable, size- and load-time-efficient binary format suitable for compilation to the web. WebAssembly aims to execute at native speed. WebAssembly is currently being designed as an open standard by W3C. OpenCV.js is a JavaScript binding for selected subset of OpenCV functions for the web platform. It allows emerging web applications with multimedia processing to benefit from the wide variety of vision functions available in OpenCV. OpenCV.js leverages Emscripten to compile OpenCV functions into asm.js or WebAssembly targets, and provides a JavaScript APIs for web application to access them. The future versions of the library will take advantage of acceleration APIs that are available on the Web such as SIMD and multi-threaded execution. @@ -42,4 +42,4 @@ Below is the list of contributors of OpenCV.js bindings and tutorials. - Gang Song (GSoC student, Shanghai Jiao Tong University) - Wenyao Gan (Student intern, Shanghai Jiao Tong University) - Mohammad Reza Haghighat (Project initiator & sponsor, Intel Corporation) -- Ningxin Hu (Students' supervisor, Intel Corporation) \ No newline at end of file +- Ningxin Hu (Students' supervisor, Intel Corporation) diff --git a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown index 435f06fe02..ad14185a35 100644 --- a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown +++ b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown @@ -7,12 +7,12 @@ You don't have to build your own copy if you simply want to start using it. Refe Installing Emscripten ----------------------------- -[Emscripten](https://github.com/kripken/emscripten) is an LLVM-to-JavaScript compiler. We will use Emscripten to build OpenCV.js. +[Emscripten](https://github.com/emscripten-core/emscripten) is an LLVM-to-JavaScript compiler. We will use Emscripten to build OpenCV.js. @note While this describes installation of required tools from scratch, there's a section below also describing an alternative procedure to perform the same build using docker containers which is often easier. -To Install Emscripten, follow instructions of [Emscripten SDK](https://kripken.github.io/emscripten-site/docs/getting_started/downloads.html). +To Install Emscripten, follow instructions of [Emscripten SDK](https://emscripten.org/docs/getting_started/downloads.html). For example: @code{.bash} @@ -21,24 +21,29 @@ For example: ./emsdk activate latest @endcode -@note -To compile to [WebAssembly](http://webassembly.org), you need to install and activate [Binaryen](https://github.com/WebAssembly/binaryen) with the `emsdk` command. Please refer to [Developer's Guide](http://webassembly.org/getting-started/developers-guide/) for more details. -After install, ensure the `EMSCRIPTEN` environment is setup correctly. +After install, ensure the `EMSDK` environment is setup correctly. For example: @code{.bash} source ./emsdk_env.sh -echo ${EMSCRIPTEN} +echo ${EMSDK} @endcode -The version 1.39.16 of emscripten is verified for latest WebAssembly. Please check the version of emscripten to use the newest features of WebAssembly. +Modern versions of Emscripten requires to use `emcmake` / `emmake` launchers: + +@code{.bash} +emcmake sh -c 'echo ${EMSCRIPTEN}' +@endcode + + +The version 2.0.10 of emscripten is verified for latest WebAssembly. Please check the version of Emscripten to use the newest features of WebAssembly. For example: @code{.bash} ./emsdk update -./emsdk install 1.39.16 -./emsdk activate 1.39.16 +./emsdk install 2.0.10 +./emsdk activate 2.0.10 @endcode Obtaining OpenCV Source Code @@ -71,8 +76,7 @@ Building OpenCV.js from Source For example, to build in `build_js` directory: @code{.bash} - cd opencv - python ./platforms/js/build_js.py build_js + emcmake python ./opencv/platforms/js/build_js.py build_js @endcode @note @@ -82,14 +86,14 @@ Building OpenCV.js from Source For example, to build wasm version in `build_wasm` directory: @code{.bash} - python ./platforms/js/build_js.py build_wasm --build_wasm + emcmake python ./opencv/platforms/js/build_js.py build_wasm --build_wasm @endcode -# [Optional] To build the OpenCV.js loader, append `--build_loader`. For example: @code{.bash} - python ./platforms/js/build_js.py build_js --build_loader + emcmake python ./opencv/platforms/js/build_js.py build_js --build_loader @endcode @note @@ -114,7 +118,7 @@ Building OpenCV.js from Source For example: @code{.bash} - python ./platforms/js/build_js.py build_js --build_doc + emcmake python ./opencv/platforms/js/build_js.py build_js --build_doc @endcode @note @@ -124,9 +128,24 @@ Building OpenCV.js from Source For example: @code{.bash} - python ./platforms/js/build_js.py build_js --build_test + emcmake python ./opencv/platforms/js/build_js.py build_js --build_test @endcode +-# [optional] To enable OpenCV contrib modules append `--cmake_option="-DOPENCV_EXTRA_MODULES_PATH=/path/to/opencv_contrib/modules/"` + + For example: + @code{.bash} + python ./platforms/js/build_js.py build_js --cmake_option="-DOPENCV_EXTRA_MODULES_PATH=opencv_contrib/modules" + @endcode + +-# [optional] To enable OpenCV contrib modules append `--cmake_option="-DOPENCV_EXTRA_MODULES_PATH=/path/to/opencv_contrib/modules/"` + + For example: + @code{.bash} + python ./platforms/js/build_js.py build_js --cmake_option="-DOPENCV_EXTRA_MODULES_PATH=opencv_contrib/modules" + @endcode + + Running OpenCV.js Tests --------------------------------------- @@ -186,7 +205,7 @@ node tests.js For example: @code{.bash} - python ./platforms/js/build_js.py build_js --build_wasm --threads + emcmake python ./opencv/platforms/js/build_js.py build_js --build_wasm --threads @endcode The default threads number is the logic core number of your device. You can use `cv.parallel_pthreads_set_threads_num(number)` to set threads number by yourself and use `cv.parallel_pthreads_get_threads_num()` to get the current threads number. @@ -198,7 +217,7 @@ node tests.js For example: @code{.bash} - python ./platforms/js/build_js.py build_js --build_wasm --simd + emcmake python ./opencv/platforms/js/build_js.py build_js --build_wasm --simd @endcode The simd optimization is experimental as wasm simd is still in development. @@ -222,7 +241,7 @@ node tests.js For example: @code{.bash} - python ./platforms/js/build_js.py build_js --build_wasm --simd --build_wasm_intrin_test + emcmake python ./opencv/platforms/js/build_js.py build_js --build_wasm --simd --build_wasm_intrin_test @endcode For wasm intrinsics tests, you can use the following function to test all the cases: @@ -250,7 +269,7 @@ node tests.js For example: @code{.bash} - python ./platforms/js/build_js.py build_js --build_perf + emcmake python ./opencv/platforms/js/build_js.py build_js --build_perf @endcode To run performance tests, launch a local web server in \/bin folder. For example, node http-server which serves on `localhost:8080`. @@ -271,25 +290,31 @@ Building OpenCV.js with Docker Alternatively, the same build can be can be accomplished using [docker](https://www.docker.com/) containers which is often easier and more reliable, particularly in non linux systems. You only need to install [docker](https://www.docker.com/) on your system and use a popular container that provides a clean well tested environment for emscripten builds like this, that already has latest versions of all the necessary tools installed. -So, make sure [docker](https://www.docker.com/) is installed in your system and running. The following shell script should work in linux and MacOS: +So, make sure [docker](https://www.docker.com/) is installed in your system and running. The following shell script should work in Linux and MacOS: @code{.bash} git clone https://github.com/opencv/opencv.git cd opencv -docker run --rm --workdir /code -v "$PWD":/code "trzeci/emscripten:latest" python ./platforms/js/build_js.py build +docker run --rm -v $(pwd):/src -u $(id -u):$(id -g) emscripten/emsdk emcmake python3 ./platforms/js/build_js.py build_js @endcode In Windows use the following PowerShell command: @code{.bash} -docker run --rm --workdir /code -v "$(get-location):/code" "trzeci/emscripten:latest" python ./platforms/js/build_js.py build +docker run --rm --workdir /src -v "$(get-location):/src" "emscripten/emsdk" emcmake python3 ./platforms/js/build_js.py build_js @endcode @warning -The example uses latest version of emscripten. If the build fails you should try a version that is known to work fine which is `1.38.32` using the following command: +The example uses latest version of emscripten. If the build fails you should try a version that is known to work fine which is `2.0.10` using the following command: @code{.bash} -docker run --rm --workdir /code -v "$PWD":/code "trzeci/emscripten:sdk-tag-1.38.32-64bit" python ./platforms/js/build_js.py build +docker run --rm -v $(pwd):/src -u $(id -u):$(id -g) emscripten/emsdk:2.0.10 emcmake python3 ./platforms/js/build_js.py build_js +@endcode + +In Windows use the following PowerShell command: + +@code{.bash} +docker run --rm --workdir /src -v "$(get-location):/src" "emscripten/emsdk:2.0.10" emcmake python3 ./platforms/js/build_js.py build_js @endcode ### Building the documentation with Docker @@ -297,10 +322,11 @@ docker run --rm --workdir /code -v "$PWD":/code "trzeci/emscripten:sdk-tag-1.38. To build the documentation `doxygen` needs to be installed. Create a file named `Dockerfile` with the following content: ``` -FROM trzeci/emscripten:sdk-tag-1.38.32-64bit +FROM emscripten/emsdk:2.0.10 -RUN apt-get update -y -RUN apt-get install -y doxygen +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends doxygen \ + && rm -rf /var/lib/apt/lists/* ``` Then we build the docker image and name it `opencv-js-doc` with the following command (that needs to be run only once): @@ -312,5 +338,5 @@ docker build . -t opencv-js-doc Now run the build command again, this time using the new image and passing `--build_doc`: @code{.bash} -docker run --rm --workdir /code -v "$PWD":/code "opencv-js-doc" python ./platforms/js/build_js.py build --build_doc +docker run --rm -v $(pwd):/src -u $(id -u):$(id -g) "opencv-js-doc" emcmake python3 ./platforms/js/build_js.py build_js --build_doc @endcode diff --git a/doc/js_tutorials/js_setup/js_usage/js_usage.markdown b/doc/js_tutorials/js_setup/js_usage/js_usage.markdown index 5f9f338f2d..5a8c3b87fa 100644 --- a/doc/js_tutorials/js_setup/js_usage/js_usage.markdown +++ b/doc/js_tutorials/js_setup/js_usage/js_usage.markdown @@ -4,7 +4,7 @@ Using OpenCV.js {#tutorial_js_usage} Steps ----- -In this tutorial, you will learn how to include and start to use `opencv.js` inside a web page. You can get a copy of `opencv.js` from `opencv-{VERSION_NUMBER}-docs.zip` in each [release](https://github.com/opencv/opencv/releases), or simply download the prebuilt script from the online documentations at "https://docs.opencv.org/{VERISON_NUMBER}/opencv.js" (For example, [https://docs.opencv.org/3.4.0/opencv.js](https://docs.opencv.org/3.4.0/opencv.js). Use `master` if you want the latest build). You can also build your own copy by following the tutorial on Build Opencv.js. +In this tutorial, you will learn how to include and start to use `opencv.js` inside a web page. You can get a copy of `opencv.js` from `opencv-{VERSION_NUMBER}-docs.zip` in each [release](https://github.com/opencv/opencv/releases), or simply download the prebuilt script from the online documentations at "https://docs.opencv.org/{VERSION_NUMBER}/opencv.js" (For example, [https://docs.opencv.org/3.4.0/opencv.js](https://docs.opencv.org/3.4.0/opencv.js). Use `master` if you want the latest build). You can also build your own copy by following the tutorial on Build Opencv.js. ### Create a web page @@ -82,7 +82,7 @@ In this tutorial, we just show a cv.Mat on screen. To show a cv.Mat, you need a You can use cv.imshow to show cv.Mat on the canvas. @code{.js} -cv.imshow(mat, "outputCanvas"); +cv.imshow("outputCanvas", mat); @endcode Putting all of the steps together, the final index.html is shown below. @@ -129,7 +129,7 @@ function onOpenCvReady() { @endcode -@note You have to call delete method of cv.Mat to free memory allocated in Emscripten's heap. Please refer to [Memory management of Emscripten](https://kripken.github.io/emscripten-site/docs/porting/connecting_cpp_and_javascript/embind.html#memory-management) for details. +@note You have to call delete method of cv.Mat to free memory allocated in Emscripten's heap. Please refer to [Memory management of Emscripten](https://emscripten.org/docs/porting/connecting_cpp_and_javascript/embind.html#memory-management) for details. Try it ------ @@ -137,4 +137,4 @@ Try it -\endhtmlonly \ No newline at end of file +\endhtmlonly diff --git a/doc/js_tutorials/js_tutorials.markdown b/doc/js_tutorials/js_tutorials.markdown index c8a8f92a31..73e69daa98 100644 --- a/doc/js_tutorials/js_tutorials.markdown +++ b/doc/js_tutorials/js_tutorials.markdown @@ -26,3 +26,7 @@ OpenCV.js Tutorials {#tutorial_js_root} In this section you will object detection techniques like face detection etc. + +- @subpage tutorial_js_table_of_contents_dnn + + These tutorials show how to use dnn module in JavaScript diff --git a/doc/opencv.bib b/doc/opencv.bib index 54396d6a10..d44b0f5293 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -110,6 +110,29 @@ year = {2010}, url = {http://ingmec.ual.es/~jlblanco/papers/jlblanco2010geometry3D_techrep.pdf} } +@inproceedings{Bolelli2017, + title = {{Two More Strategies to Speed Up Connected Components Labeling Algorithms}}, + author = {Bolelli, Federico and Cancilla, Michele and Grana, Costantino}, + year = 2017, + booktitle = {Image Analysis and Processing - ICIAP 2017}, + publisher = {Springer}, + volume = 10485, + pages = {48--58}, + doi = {10.1007/978-3-319-68548-9_5}, + isbn = {978-3-319-68547-2} +} +@article{Bolelli2019, + title = {{Spaghetti Labeling: Directed Acyclic Graphs for Block-Based Connected Components Labeling}}, + author = {Bolelli, Federico and Allegretti, Stefano and Baraldi, Lorenzo and Grana, Costantino}, + year = 2019, + journal = {IEEE Transactions on Image Processing}, + publisher = {IEEE}, + volume = 29, + number = 1, + pages = {1999--2012}, + doi = {10.1109/TIP.2019.2946979}, + issn = {1057-7149} +} @article{Borgefors86, author = {Borgefors, Gunilla}, title = {Distance transformations in digital images}, @@ -420,6 +443,16 @@ volume = {51}, pages = {378-384} } +@article{Grana2010, + title = {{Optimized Block-Based Connected Components Labeling With Decision Trees}}, + author = {Grana, Costantino and Borghesani, Daniele and Cucchiara, Rita}, + year = 2010, + journal = {IEEE Transactions on Image Processing}, + volume = 19, + number = 6, + pages = {1596--1609}, + doi = {10.1109/TIP.2010.2044963} +} @article{taubin1991, abstract = {The author addresses the problem of parametric representation and estimation of complex planar curves in 2-D surfaces in 3-D, and nonplanar space curves in 3-D. Curves and surfaces can be defined either parametrically or implicitly, with the latter representation used here. A planar curve is the set of zeros of a smooth function of two variables x-y, a surface is the set of zeros of a smooth function of three variables x-y-z, and a space curve is the intersection of two surfaces, which are the set of zeros of two linearly independent smooth functions of three variables x-y-z For example, the surface of a complex object in 3-D can be represented as a subset of a single implicit surface, with similar results for planar and space curves. It is shown how this unified representation can be used for object recognition, object position estimation, and segmentation of objects into meaningful subobjects, that is, the detection of `interest regions' that are more complex than high curvature regions and, hence, more useful as features for object recognition}, author = {Taubin, Gabriel}, @@ -768,6 +801,13 @@ pages = {432--441}, publisher = {Springer} } +@INPROCEEDINGS{Mortensen95intelligentscissors, + author = {Eric N. Mortensen and William A. Barrett}, + title = {Intelligent Scissors for Image Composition}, + booktitle = {In Computer Graphics, SIGGRAPH Proceedings}, + year = {1995}, + pages = {191--198} +} @inproceedings{Muja2009, author = {Muja, Marius and Lowe, David G}, title = {Fast Approximate Nearest Neighbors with Automatic Algorithm Configuration}, @@ -1261,3 +1301,26 @@ pages={281--305}, year={1987} } +@inproceedings{liao2020real, + author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang}, + title={Real-time Scene Text Detection with Differentiable Binarization}, + booktitle={Proc. AAAI}, + year={2020} +} +@article{shi2016end, + title={An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition}, + author={Shi, Baoguang and Bai, Xiang and Yao, Cong}, + journal={IEEE transactions on pattern analysis and machine intelligence}, + volume={39}, + number={11}, + pages={2298--2304}, + year={2016}, + publisher={IEEE} +} +@inproceedings{zhou2017east, + title={East: an efficient and accurate scene text detector}, + author={Zhou, Xinyu and Yao, Cong and Wen, He and Wang, Yuzhi and Zhou, Shuchang and He, Weiran and Liang, Jiajun}, + booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition}, + pages={5551--5560}, + year={2017} +} diff --git a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown index 656f5423c5..dee4df774a 100644 --- a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown +++ b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown @@ -20,10 +20,10 @@ scale invariant. ![image](images/sift_scale_invariant.jpg) -So, in 2004, **D.Lowe**, University of British Columbia, came up with a new algorithm, Scale +In 2004, **D.Lowe**, University of British Columbia, came up with a new algorithm, Scale Invariant Feature Transform (SIFT) in his paper, **Distinctive Image Features from Scale-Invariant Keypoints**, which extract keypoints and compute its descriptors. *(This paper is easy to understand -and considered to be best material available on SIFT. So this explanation is just a short summary of +and considered to be best material available on SIFT. This explanation is just a short summary of this paper)*. There are mainly four steps involved in SIFT algorithm. We will see them one-by-one. @@ -102,16 +102,17 @@ reasons. In that case, ratio of closest-distance to second-closest distance is t greater than 0.8, they are rejected. It eliminates around 90% of false matches while discards only 5% correct matches, as per the paper. -So this is a summary of SIFT algorithm. For more details and understanding, reading the original -paper is highly recommended. Remember one thing, this algorithm is patented. So this algorithm is -included in [the opencv contrib repo](https://github.com/opencv/opencv_contrib) +This is a summary of SIFT algorithm. For more details and understanding, reading the original +paper is highly recommended. SIFT in OpenCV -------------- -So now let's see SIFT functionalities available in OpenCV. Let's start with keypoint detection and -draw them. First we have to construct a SIFT object. We can pass different parameters to it which -are optional and they are well explained in docs. +Now let's see SIFT functionalities available in OpenCV. Note that these were previously only +available in [the opencv contrib repo](https://github.com/opencv/opencv_contrib), but the patent +expired in the year 2020. So they are now included in the main repo. Let's start with keypoint +detection and draw them. First we have to construct a SIFT object. We can pass different +parameters to it which are optional and they are well explained in docs. @code{.py} import numpy as np import cv2 as cv diff --git a/doc/py_tutorials/py_imgproc/py_pyramids/py_pyramids.markdown b/doc/py_tutorials/py_imgproc/py_pyramids/py_pyramids.markdown index bb31bab107..63fde0a130 100644 --- a/doc/py_tutorials/py_imgproc/py_pyramids/py_pyramids.markdown +++ b/doc/py_tutorials/py_imgproc/py_pyramids/py_pyramids.markdown @@ -88,27 +88,27 @@ B = cv.imread('orange.jpg') # generate Gaussian pyramid for A G = A.copy() gpA = [G] -for i in xrange(6): +for i in range(6): G = cv.pyrDown(G) gpA.append(G) # generate Gaussian pyramid for B G = B.copy() gpB = [G] -for i in xrange(6): +for i in range(6): G = cv.pyrDown(G) gpB.append(G) # generate Laplacian Pyramid for A lpA = [gpA[5]] -for i in xrange(5,0,-1): +for i in range(5,0,-1): GE = cv.pyrUp(gpA[i]) L = cv.subtract(gpA[i-1],GE) lpA.append(L) # generate Laplacian Pyramid for B lpB = [gpB[5]] -for i in xrange(5,0,-1): +for i in range(5,0,-1): GE = cv.pyrUp(gpB[i]) L = cv.subtract(gpB[i-1],GE) lpB.append(L) @@ -122,7 +122,7 @@ for la,lb in zip(lpA,lpB): # now reconstruct ls_ = LS[0] -for i in xrange(1,6): +for i in range(1,6): ls_ = cv.pyrUp(ls_) ls_ = cv.add(ls_, LS[i]) diff --git a/doc/py_tutorials/py_imgproc/py_thresholding/py_thresholding.markdown b/doc/py_tutorials/py_imgproc/py_thresholding/py_thresholding.markdown index 0540098850..f52e9c5db6 100644 --- a/doc/py_tutorials/py_imgproc/py_thresholding/py_thresholding.markdown +++ b/doc/py_tutorials/py_imgproc/py_thresholding/py_thresholding.markdown @@ -47,7 +47,7 @@ ret,thresh5 = cv.threshold(img,127,255,cv.THRESH_TOZERO_INV) titles = ['Original Image','BINARY','BINARY_INV','TRUNC','TOZERO','TOZERO_INV'] images = [img, thresh1, thresh2, thresh3, thresh4, thresh5] -for i in xrange(6): +for i in range(6): plt.subplot(2,3,i+1),plt.imshow(images[i],'gray',vmin=0,vmax=255) plt.title(titles[i]) plt.xticks([]),plt.yticks([]) @@ -98,7 +98,7 @@ titles = ['Original Image', 'Global Thresholding (v = 127)', 'Adaptive Mean Thresholding', 'Adaptive Gaussian Thresholding'] images = [img, th1, th2, th3] -for i in xrange(4): +for i in range(4): plt.subplot(2,2,i+1),plt.imshow(images[i],'gray') plt.title(titles[i]) plt.xticks([]),plt.yticks([]) @@ -153,7 +153,7 @@ titles = ['Original Noisy Image','Histogram','Global Thresholding (v=127)', 'Original Noisy Image','Histogram',"Otsu's Thresholding", 'Gaussian filtered Image','Histogram',"Otsu's Thresholding"] -for i in xrange(3): +for i in range(3): plt.subplot(3,3,i*3+1),plt.imshow(images[i*3],'gray') plt.title(titles[i*3]), plt.xticks([]), plt.yticks([]) plt.subplot(3,3,i*3+2),plt.hist(images[i*3].ravel(),256) @@ -196,7 +196,7 @@ bins = np.arange(256) fn_min = np.inf thresh = -1 -for i in xrange(1,256): +for i in range(1,256): p1,p2 = np.hsplit(hist_norm,[i]) # probabilities q1,q2 = Q[i],Q[255]-Q[i] # cum sum of classes if q1 < 1.e-6 or q2 < 1.e-6: diff --git a/doc/py_tutorials/py_imgproc/py_transforms/py_fourier_transform/py_fourier_transform.markdown b/doc/py_tutorials/py_imgproc/py_transforms/py_fourier_transform/py_fourier_transform.markdown index 44b08d53ab..6c4533a1b0 100644 --- a/doc/py_tutorials/py_imgproc/py_transforms/py_fourier_transform/py_fourier_transform.markdown +++ b/doc/py_tutorials/py_imgproc/py_transforms/py_fourier_transform/py_fourier_transform.markdown @@ -268,7 +268,7 @@ fft_filters = [np.fft.fft2(x) for x in filters] fft_shift = [np.fft.fftshift(y) for y in fft_filters] mag_spectrum = [np.log(np.abs(z)+1) for z in fft_shift] -for i in xrange(6): +for i in range(6): plt.subplot(2,3,i+1),plt.imshow(mag_spectrum[i],cmap = 'gray') plt.title(filter_name[i]), plt.xticks([]), plt.yticks([]) diff --git a/doc/py_tutorials/py_photo/py_non_local_means/py_non_local_means.markdown b/doc/py_tutorials/py_photo/py_non_local_means/py_non_local_means.markdown index 3f56a4841b..94e57d4d6e 100644 --- a/doc/py_tutorials/py_photo/py_non_local_means/py_non_local_means.markdown +++ b/doc/py_tutorials/py_photo/py_non_local_means/py_non_local_means.markdown @@ -108,7 +108,7 @@ from matplotlib import pyplot as plt cap = cv.VideoCapture('vtest.avi') # create a list of first 5 frames -img = [cap.read()[1] for i in xrange(5)] +img = [cap.read()[1] for i in range(5)] # convert all to grayscale gray = [cv.cvtColor(i, cv.COLOR_BGR2GRAY) for i in img] diff --git a/doc/py_tutorials/py_setup/py_intro/py_intro.markdown b/doc/py_tutorials/py_setup/py_intro/py_intro.markdown index 487ba72ee7..b013ef014e 100644 --- a/doc/py_tutorials/py_setup/py_intro/py_intro.markdown +++ b/doc/py_tutorials/py_setup/py_intro/py_intro.markdown @@ -83,4 +83,4 @@ Additional Resources 2. [NumPy Quickstart tutorial](https://numpy.org/devdocs/user/quickstart.html) 3. [NumPy Reference](https://numpy.org/devdocs/reference/index.html#reference) 4. [OpenCV Documentation](http://docs.opencv.org/) -5. [OpenCV Forum](http://answers.opencv.org/questions/) +5. [OpenCV Forum](https://forum.opencv.org/) diff --git a/doc/py_tutorials/py_setup/py_setup_in_ubuntu/py_setup_in_ubuntu.markdown b/doc/py_tutorials/py_setup/py_setup_in_ubuntu/py_setup_in_ubuntu.markdown index f88ffe6793..8b99c5df92 100644 --- a/doc/py_tutorials/py_setup/py_setup_in_ubuntu/py_setup_in_ubuntu.markdown +++ b/doc/py_tutorials/py_setup/py_setup_in_ubuntu/py_setup_in_ubuntu.markdown @@ -22,10 +22,10 @@ Installing OpenCV-Python from Pre-built Binaries This method serves best when using just for programming and developing OpenCV applications. -Install package [python-opencv](https://packages.ubuntu.com/trusty/python-opencv) with following command in terminal (as root user). +Install package [python3-opencv](https://packages.ubuntu.com/focal/python3-opencv) with following command in terminal (as root user). ``` -$ sudo apt-get install python-opencv +$ sudo apt-get install python3-opencv ``` Open Python IDLE (or IPython) and type following codes in Python terminal. diff --git a/doc/tools/scan_tutorials.py b/doc/tools/scan_tutorials.py new file mode 100644 index 0000000000..0b924a4626 --- /dev/null +++ b/doc/tools/scan_tutorials.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python + +from pathlib import Path +import re + +# Tasks +# 1. Find all tutorials +# 2. Generate tree (@subpage) +# 3. Check prev/next nodes + +class Tutorial(object): + def __init__(self, path): + self.path = path + self.title = None # doxygen title + self.children = [] # ordered titles + self.prev = None + self.next = None + with open(path, "rt") as f: + self.parse(f) + + def parse(self, f): + rx_title = re.compile(r"\{#(\w+)\}") + rx_subpage = re.compile(r"@subpage\s+(\w+)") + rx_prev = re.compile(r"@prev_tutorial\{(\w+)\}") + rx_next = re.compile(r"@next_tutorial\{(\w+)\}") + for line in f: + if self.title is None: + m = rx_title.search(line) + if m: + self.title = m.group(1) + continue + if self.prev is None: + m = rx_prev.search(line) + if m: + self.prev = m.group(1) + continue + if self.next is None: + m = rx_next.search(line) + if m: + self.next = m.group(1) + continue + m = rx_subpage.search(line) + if m: + self.children.append(m.group(1)) + continue + + def verify_prev_next(self, storage): + res = True + + if self.title is None: + print("[W] No title") + res = False + + prev = None + for one in self.children: + c = storage[one] + if c.prev is not None and c.prev != prev: + print("[W] Wrong prev_tutorial: expected {} / actual {}".format(c.prev, prev)) + res = False + prev = c.title + + next = None + for one in reversed(self.children): + c = storage[one] + if c.next is not None and c.next != next: + print("[W] Wrong next_tutorial: expected {} / actual {}".format(c.next, next)) + res = False + next = c.title + + if len(self.children) == 0 and self.prev is None and self.next is None: + print("[W] No prev and next tutorials") + res = False + + return res + +if __name__ == "__main__": + + p = Path('tutorials') + print("Looking for tutorials in: '{}'".format(p)) + + all_tutorials = dict() + for f in p.glob('**/*'): + if f.suffix.lower() in ('.markdown', '.md'): + t = Tutorial(f) + all_tutorials[t.title] = t + + res = 0 + print("Found: {}".format(len(all_tutorials))) + print("------") + for title, t in all_tutorials.items(): + if not t.verify_prev_next(all_tutorials): + print("[E] Verification failed: {}".format(t.path)) + print("------") + res = 1 + + exit(res) diff --git a/doc/tutorials/app/_old/table_of_content_highgui.markdown b/doc/tutorials/app/_old/table_of_content_highgui.markdown new file mode 100644 index 0000000000..3a1705ecd5 --- /dev/null +++ b/doc/tutorials/app/_old/table_of_content_highgui.markdown @@ -0,0 +1,4 @@ +High Level GUI and Media (highgui module) {#tutorial_table_of_content_highgui} +========================================= + +Content has been moved to this page: @ref tutorial_table_of_content_app diff --git a/doc/tutorials/app/_old/table_of_content_imgcodecs.markdown b/doc/tutorials/app/_old/table_of_content_imgcodecs.markdown new file mode 100644 index 0000000000..a49bbe5cce --- /dev/null +++ b/doc/tutorials/app/_old/table_of_content_imgcodecs.markdown @@ -0,0 +1,4 @@ +Image Input and Output (imgcodecs module) {#tutorial_table_of_content_imgcodecs} +========================================= + +Content has been moved to this page: @ref tutorial_table_of_content_app diff --git a/doc/tutorials/app/_old/table_of_content_videoio.markdown b/doc/tutorials/app/_old/table_of_content_videoio.markdown new file mode 100644 index 0000000000..f2b3ccf81c --- /dev/null +++ b/doc/tutorials/app/_old/table_of_content_videoio.markdown @@ -0,0 +1,4 @@ +Video Input and Output (videoio module) {#tutorial_table_of_content_videoio} +========================================= + +Content has been moved to this page: @ref tutorial_table_of_content_app diff --git a/doc/tutorials/highgui/trackbar/images/Adding_Trackbars_Tutorial_Result_0.jpg b/doc/tutorials/app/images/Adding_Trackbars_Tutorial_Result_0.jpg similarity index 100% rename from doc/tutorials/highgui/trackbar/images/Adding_Trackbars_Tutorial_Result_0.jpg rename to doc/tutorials/app/images/Adding_Trackbars_Tutorial_Result_0.jpg diff --git a/doc/tutorials/highgui/trackbar/images/Adding_Trackbars_Tutorial_Result_1.jpg b/doc/tutorials/app/images/Adding_Trackbars_Tutorial_Result_1.jpg similarity index 100% rename from doc/tutorials/highgui/trackbar/images/Adding_Trackbars_Tutorial_Result_1.jpg rename to doc/tutorials/app/images/Adding_Trackbars_Tutorial_Result_1.jpg diff --git a/doc/tutorials/highgui/trackbar/images/Adding_Trackbars_Tutorial_Trackbar.png b/doc/tutorials/app/images/Adding_Trackbars_Tutorial_Trackbar.png similarity index 100% rename from doc/tutorials/highgui/trackbar/images/Adding_Trackbars_Tutorial_Trackbar.png rename to doc/tutorials/app/images/Adding_Trackbars_Tutorial_Trackbar.png diff --git a/doc/tutorials/videoio/orbbec-astra/images/astra_color.jpg b/doc/tutorials/app/images/astra_color.jpg similarity index 100% rename from doc/tutorials/videoio/orbbec-astra/images/astra_color.jpg rename to doc/tutorials/app/images/astra_color.jpg diff --git a/doc/tutorials/videoio/orbbec-astra/images/astra_depth.png b/doc/tutorials/app/images/astra_depth.png similarity index 100% rename from doc/tutorials/videoio/orbbec-astra/images/astra_depth.png rename to doc/tutorials/app/images/astra_depth.png diff --git a/doc/tutorials/imgcodecs/raster-gdal/images/gdal_flood-zone.jpg b/doc/tutorials/app/images/gdal_flood-zone.jpg similarity index 100% rename from doc/tutorials/imgcodecs/raster-gdal/images/gdal_flood-zone.jpg rename to doc/tutorials/app/images/gdal_flood-zone.jpg diff --git a/doc/tutorials/imgcodecs/raster-gdal/images/gdal_heat-map.jpg b/doc/tutorials/app/images/gdal_heat-map.jpg similarity index 100% rename from doc/tutorials/imgcodecs/raster-gdal/images/gdal_heat-map.jpg rename to doc/tutorials/app/images/gdal_heat-map.jpg diff --git a/doc/tutorials/imgcodecs/raster-gdal/images/gdal_output.jpg b/doc/tutorials/app/images/gdal_output.jpg similarity index 100% rename from doc/tutorials/imgcodecs/raster-gdal/images/gdal_output.jpg rename to doc/tutorials/app/images/gdal_output.jpg diff --git a/doc/tutorials/videoio/video-input-psnr-ssim/images/outputVideoInput.png b/doc/tutorials/app/images/outputVideoInput.png similarity index 100% rename from doc/tutorials/videoio/video-input-psnr-ssim/images/outputVideoInput.png rename to doc/tutorials/app/images/outputVideoInput.png diff --git a/doc/tutorials/videoio/video-write/images/resultOutputWideoWrite.png b/doc/tutorials/app/images/resultOutputWideoWrite.png similarity index 100% rename from doc/tutorials/videoio/video-write/images/resultOutputWideoWrite.png rename to doc/tutorials/app/images/resultOutputWideoWrite.png diff --git a/doc/tutorials/videoio/video-write/images/videoCompressSelect.png b/doc/tutorials/app/images/videoCompressSelect.png similarity index 100% rename from doc/tutorials/videoio/video-write/images/videoCompressSelect.png rename to doc/tutorials/app/images/videoCompressSelect.png diff --git a/doc/tutorials/videoio/video-write/images/videoFileStructure.png b/doc/tutorials/app/images/videoFileStructure.png similarity index 100% rename from doc/tutorials/videoio/video-write/images/videoFileStructure.png rename to doc/tutorials/app/images/videoFileStructure.png diff --git a/doc/tutorials/videoio/intelperc.markdown b/doc/tutorials/app/intelperc.markdown similarity index 99% rename from doc/tutorials/videoio/intelperc.markdown rename to doc/tutorials/app/intelperc.markdown index 6a6a5e5c9a..5c036a63c2 100644 --- a/doc/tutorials/videoio/intelperc.markdown +++ b/doc/tutorials/app/intelperc.markdown @@ -1,6 +1,8 @@ Using Creative Senz3D and other Intel RealSense SDK compatible depth sensors {#tutorial_intelperc} ======================================================================================= +@tableofcontents + @prev_tutorial{tutorial_orbbec_astra} **Note**: This tutorial is partially obsolete since PerC SDK has been replaced with RealSense SDK diff --git a/doc/tutorials/videoio/kinect_openni.markdown b/doc/tutorials/app/kinect_openni.markdown similarity index 99% rename from doc/tutorials/videoio/kinect_openni.markdown rename to doc/tutorials/app/kinect_openni.markdown index aadaec5e44..e235a97755 100644 --- a/doc/tutorials/videoio/kinect_openni.markdown +++ b/doc/tutorials/app/kinect_openni.markdown @@ -1,6 +1,8 @@ Using Kinect and other OpenNI compatible depth sensors {#tutorial_kinect_openni} ====================================================== +@tableofcontents + @prev_tutorial{tutorial_video_write} @next_tutorial{tutorial_orbbec_astra} diff --git a/doc/tutorials/videoio/orbbec-astra/orbbec_astra.markdown b/doc/tutorials/app/orbbec_astra.markdown similarity index 64% rename from doc/tutorials/videoio/orbbec-astra/orbbec_astra.markdown rename to doc/tutorials/app/orbbec_astra.markdown index 664e4f6dfe..273c3c3536 100644 --- a/doc/tutorials/videoio/orbbec-astra/orbbec_astra.markdown +++ b/doc/tutorials/app/orbbec_astra.markdown @@ -1,6 +1,8 @@ Using Orbbec Astra 3D cameras {#tutorial_orbbec_astra} ====================================================== +@tableofcontents + @prev_tutorial{tutorial_kinect_openni} @next_tutorial{tutorial_intelperc} @@ -9,12 +11,12 @@ Using Orbbec Astra 3D cameras {#tutorial_orbbec_astra} This tutorial is devoted to the Astra Series of Orbbec 3D cameras (https://orbbec3d.com/product-astra-pro/). That cameras have a depth sensor in addition to a common color sensor. The depth sensors can be read using -the OpenNI interface with @ref cv::VideoCapture class. The video stream is provided through the regular camera -interface. +the open source OpenNI API with @ref cv::VideoCapture class. The video stream is provided through the regular +camera interface. ### Installation Instructions -In order to use a depth sensor with OpenCV you should do the following steps: +In order to use the Astra camera's depth sensor with OpenCV you should do the following steps: -# Download the latest version of Orbbec OpenNI SDK (from here ). Unzip the archive, choose the build according to your operating system and follow installation @@ -70,24 +72,32 @@ In order to use a depth sensor with OpenCV you should do the following steps: ### Code -To get both depth and color frames, two @ref cv::VideoCapture objects should be created: +The Astra Pro camera has two sensors -- a depth sensor and a color sensor. The depth sensor +can be read using the OpenNI interface with @ref cv::VideoCapture class. The video stream is +not available through OpenNI API and is only provided via the regular camera interface. +So, to get both depth and color frames, two @ref cv::VideoCapture objects should be created: @snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Open streams -The first object will use the regular Video4Linux2 interface to access the color sensor. The second one -is using OpenNI2 API to retrieve depth data. +The first object will use the OpenNI2 API to retrieve depth data. The second one uses the +Video4Linux2 interface to access the color sensor. Note that the example above assumes that +the Astra camera is the first camera in the system. If you have more than one camera connected, +you may need to explicitly set the proper camera number. -Before using the created VideoCapture objects you may want to setup stream parameters by setting -objects' properties. The most important parameters are frame width, frame height and fps: +Before using the created VideoCapture objects you may want to set up stream parameters by setting +objects' properties. The most important parameters are frame width, frame height and fps. +For this example, we’ll configure width and height of both streams to VGA resolution, which is +the maximum resolution available for both sensors, and we’d like both stream parameters to be the +same for easier color-to-depth data registration: @snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Setup streams -For setting and getting some property of sensor data generators use @ref cv::VideoCapture::set and +For setting and retrieving some property of sensor data generators use @ref cv::VideoCapture::set and @ref cv::VideoCapture::get methods respectively, e.g. : @snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Get properties -The following properties of cameras available through OpenNI interfaces are supported for the depth +The following properties of cameras available through OpenNI interface are supported for the depth generator: - @ref cv::CAP_PROP_FRAME_WIDTH -- Frame width in pixels. @@ -106,15 +116,16 @@ generator: - @ref cv::CAP_PROP_OPENNI_FRAME_MAX_DEPTH -- A maximum supported depth of the camera in mm. - @ref cv::CAP_PROP_OPENNI_BASELINE -- Baseline value in mm. -After the VideoCapture objects are set up you can start reading frames from them. +After the VideoCapture objects have been set up, you can start reading frames from them. @note OpenCV's VideoCapture provides synchronous API, so you have to grab frames in a new thread to avoid one stream blocking while another stream is being read. VideoCapture is not a thread-safe class, so you need to be careful to avoid any possible deadlocks or data races. -Example implementation that gets frames from each sensor in a new thread and stores them -in a list along with their timestamps: +As there are two video sources that should be read simultaneously, it’s necessary to create two +threads to avoid blocking. Example implementation that gets frames from each sensor in a new thread +and stores them in a list along with their timestamps: @snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Read streams @@ -130,17 +141,25 @@ VideoCapture can retrieve the following data: -# data given from the color sensor is a regular BGR image (CV_8UC3). -When new data is available a reading thread notifies the main thread. A frame is stored in the -ordered list -- the first frame is the latest one: +When new data are available, each reading thread notifies the main thread using a condition variable. +A frame is stored in the ordered list -- the first frame in the list is the earliest captured, +the last frame is the latest captured. As depth and color frames are read from independent sources +two video streams may become out of sync even when both streams are set up for the same frame rate. +A post-synchronization procedure can be applied to the streams to combine depth and color frames into +pairs. The sample code below demonstrates this procedure: -@snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Show color frame +@snippetlineno samples/cpp/tutorial_code/videoio/orbbec_astra/orbbec_astra.cpp Pair frames -Depth frames can be picked the same way from the `depthFrames` list. +In the code snippet above the execution is blocked until there are some frames in both frame lists. +When there are new frames, their timestamps are being checked -- if they differ more than a half of +the frame period then one of the frames is dropped. If timestamps are close enough, then two frames +are paired. Now, we have two frames: one containing color information and another one -- depth information. +In the example above retrieved frames are simply shown with cv::imshow function, but you can insert +any other processing code here. -After that, you'll have two frames: one containing color information and another one -- depth -information. In the sample images below you can see the color frame and the depth frame showing -the same scene. Looking at the color frame it's hard to distinguish plant leaves from leaves painted -on a wall, but the depth data makes it easy. +In the sample images below you can see the color frame and the depth frame representing the same scene. +Looking at the color frame it's hard to distinguish plant leaves from leaves painted on a wall, +but the depth data makes it easy. ![Color frame](images/astra_color.jpg) ![Depth frame](images/astra_depth.png) diff --git a/doc/tutorials/imgcodecs/raster-gdal/raster_io_gdal.markdown b/doc/tutorials/app/raster_io_gdal.markdown similarity index 95% rename from doc/tutorials/imgcodecs/raster-gdal/raster_io_gdal.markdown rename to doc/tutorials/app/raster_io_gdal.markdown index 432caa69e0..73574cdccd 100644 --- a/doc/tutorials/imgcodecs/raster-gdal/raster_io_gdal.markdown +++ b/doc/tutorials/app/raster_io_gdal.markdown @@ -1,6 +1,16 @@ Reading Geospatial Raster files with GDAL {#tutorial_raster_io_gdal} ========================================= +@tableofcontents + +@prev_tutorial{tutorial_trackbar} +@next_tutorial{tutorial_video_input_psnr_ssim} + +| | | +| -: | :- | +| Original author | Marvin Smith | +| Compatibility | OpenCV >= 3.0 | + Geospatial raster data is a heavily used product in Geographic Information Systems and Photogrammetry. Raster data typically can represent imagery and Digital Elevation Models (DEM). The standard library for loading GIS imagery is the Geographic Data Abstraction Library [(GDAL)](http://www.gdal.org). In this diff --git a/doc/tutorials/app/table_of_content_app.markdown b/doc/tutorials/app/table_of_content_app.markdown new file mode 100644 index 0000000000..8e05dfaf07 --- /dev/null +++ b/doc/tutorials/app/table_of_content_app.markdown @@ -0,0 +1,10 @@ +Application utils (highgui, imgcodecs, videoio modules) {#tutorial_table_of_content_app} +======================================================= + +- @subpage tutorial_trackbar +- @subpage tutorial_raster_io_gdal +- @subpage tutorial_video_input_psnr_ssim +- @subpage tutorial_video_write +- @subpage tutorial_kinect_openni +- @subpage tutorial_orbbec_astra +- @subpage tutorial_intelperc diff --git a/doc/tutorials/highgui/trackbar/trackbar.markdown b/doc/tutorials/app/trackbar.markdown similarity index 96% rename from doc/tutorials/highgui/trackbar/trackbar.markdown rename to doc/tutorials/app/trackbar.markdown index d6700d6387..2b88114a6b 100644 --- a/doc/tutorials/highgui/trackbar/trackbar.markdown +++ b/doc/tutorials/app/trackbar.markdown @@ -1,6 +1,16 @@ Adding a Trackbar to our applications! {#tutorial_trackbar} ====================================== +@tableofcontents + +@next_tutorial{tutorial_raster_io_gdal} + +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + + - In the previous tutorials (about @ref tutorial_adding_images and the @ref tutorial_basic_linear_transform) you might have noted that we needed to give some **input** to our programs, such as \f$\alpha\f$ and \f$beta\f$. We accomplished that by entering this data using the Terminal. diff --git a/doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown b/doc/tutorials/app/video_input_psnr_ssim.markdown similarity index 98% rename from doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown rename to doc/tutorials/app/video_input_psnr_ssim.markdown index 76cfa3751d..e212c4e46d 100644 --- a/doc/tutorials/videoio/video-input-psnr-ssim/video_input_psnr_ssim.markdown +++ b/doc/tutorials/app/video_input_psnr_ssim.markdown @@ -1,8 +1,16 @@ Video Input with OpenCV and similarity measurement {#tutorial_video_input_psnr_ssim} ================================================== +@tableofcontents + +@prev_tutorial{tutorial_raster_io_gdal} @next_tutorial{tutorial_video_write} +| | | +| -: | :- | +| Original author | Bernát Gábor | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/videoio/video-write/video_write.markdown b/doc/tutorials/app/video_write.markdown similarity index 97% rename from doc/tutorials/videoio/video-write/video_write.markdown rename to doc/tutorials/app/video_write.markdown index 0100f8cfc4..d655e24b89 100644 --- a/doc/tutorials/videoio/video-write/video_write.markdown +++ b/doc/tutorials/app/video_write.markdown @@ -1,9 +1,16 @@ Creating a video with OpenCV {#tutorial_video_write} ============================ +@tableofcontents + @prev_tutorial{tutorial_video_input_psnr_ssim} @next_tutorial{tutorial_kinect_openni} +| | | +| -: | :- | +| Original author | Bernát Gábor | +| Compatibility | OpenCV >= 3.0 | + Goal ---- @@ -59,7 +66,7 @@ extension, its first version. A direct limitation of this is that you cannot sav larger than 2 GB. Furthermore you can only create and expand a single video track inside the container. No audio or other track editing support here. Nevertheless, any video codec present on your system might work. If you encounter some of these limitations you will need to look into more -specialized video writing libraries such as *FFMpeg* or codecs as *HuffYUV*, *CorePNG* and *LCL*. As +specialized video writing libraries such as *FFmpeg* or codecs as *HuffYUV*, *CorePNG* and *LCL*. As an alternative, create the video track with OpenCV and expand it with sound tracks or convert it to other formats by using video manipulation programs such as *VirtualDub* or *AviSynth*. @@ -109,7 +116,7 @@ const string NAME = source.substr(0, pAt) + argv[2][0] + ".avi"; // Form the n @code{.cpp} CV_FOURCC('P','I','M,'1') // this is an MPEG1 codec from the characters to integer @endcode - If you pass for this argument minus one than a window will pop up at runtime that contains all + If you pass for this argument minus one then a window will pop up at runtime that contains all the codec installed on your system and ask you to select the one to use: ![](images/videoCompressSelect.png) diff --git a/doc/tutorials/calib3d/camera_calibration/camera_calibration.markdown b/doc/tutorials/calib3d/camera_calibration/camera_calibration.markdown index 90298124c7..00e1e9668f 100644 --- a/doc/tutorials/calib3d/camera_calibration/camera_calibration.markdown +++ b/doc/tutorials/calib3d/camera_calibration/camera_calibration.markdown @@ -1,9 +1,16 @@ Camera calibration With OpenCV {#tutorial_camera_calibration} ============================== +@tableofcontents + @prev_tutorial{tutorial_camera_calibration_square_chess} @next_tutorial{tutorial_real_time_pose} +| | | +| -: | :- | +| Original author | Bernát Gábor | +| Compatibility | OpenCV >= 4.0 | + Cameras have been around for a long-long time. However, with the introduction of the cheap *pinhole* cameras in the late 20th century, they became a common occurrence in our everyday life. diff --git a/doc/tutorials/calib3d/camera_calibration_pattern/camera_calibration_pattern.markdown b/doc/tutorials/calib3d/camera_calibration_pattern/camera_calibration_pattern.markdown index d6df8a8b5e..c87f9f95f8 100644 --- a/doc/tutorials/calib3d/camera_calibration_pattern/camera_calibration_pattern.markdown +++ b/doc/tutorials/calib3d/camera_calibration_pattern/camera_calibration_pattern.markdown @@ -1,8 +1,15 @@ Create calibration pattern {#tutorial_camera_calibration_pattern} ========================================= +@tableofcontents + @next_tutorial{tutorial_camera_calibration_square_chess} +| | | +| -: | :- | +| Original author | Laurent Berger | +| Compatibility | OpenCV >= 3.0 | + The goal of this tutorial is to learn how to create calibration pattern. diff --git a/doc/tutorials/calib3d/camera_calibration_square_chess/camera_calibration_square_chess.markdown b/doc/tutorials/calib3d/camera_calibration_square_chess/camera_calibration_square_chess.markdown index 51b0a5eac7..b278bb87ac 100644 --- a/doc/tutorials/calib3d/camera_calibration_square_chess/camera_calibration_square_chess.markdown +++ b/doc/tutorials/calib3d/camera_calibration_square_chess/camera_calibration_square_chess.markdown @@ -1,9 +1,16 @@ Camera calibration with square chessboard {#tutorial_camera_calibration_square_chess} ========================================= +@tableofcontents + @prev_tutorial{tutorial_camera_calibration_pattern} @next_tutorial{tutorial_camera_calibration} +| | | +| -: | :- | +| Original author | Victor Eruhimov | +| Compatibility | OpenCV >= 4.0 | + The goal of this tutorial is to learn how to calibrate a camera given a set of chessboard images. diff --git a/doc/tutorials/calib3d/images/camera_calibration.png b/doc/tutorials/calib3d/images/camera_calibration.png deleted file mode 100644 index b010459c9d..0000000000 Binary files a/doc/tutorials/calib3d/images/camera_calibration.png and /dev/null differ diff --git a/doc/tutorials/calib3d/images/camera_calibration_square_chess.jpg b/doc/tutorials/calib3d/images/camera_calibration_square_chess.jpg deleted file mode 100644 index 1fcab0f83c..0000000000 Binary files a/doc/tutorials/calib3d/images/camera_calibration_square_chess.jpg and /dev/null differ diff --git a/doc/tutorials/calib3d/images/real_time_pose_estimation.jpg b/doc/tutorials/calib3d/images/real_time_pose_estimation.jpg deleted file mode 100644 index dcd24cc791..0000000000 Binary files a/doc/tutorials/calib3d/images/real_time_pose_estimation.jpg and /dev/null differ diff --git a/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown b/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown index 36e19e0754..3c4f0b0c83 100644 --- a/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown +++ b/doc/tutorials/calib3d/interactive_calibration/interactive_calibration.markdown @@ -1,8 +1,15 @@ Interactive camera calibration application {#tutorial_interactive_calibration} ============================== +@tableofcontents + @prev_tutorial{tutorial_real_time_pose} +| | | +| -: | :- | +| Original author | Vladislav Sovrasov | +| Compatibility | OpenCV >= 3.1 | + According to classical calibration technique user must collect all data first and when run @ref cv::calibrateCamera function to obtain camera parameters. If average re-projection error is huge or if estimated parameters seems to be wrong, process of diff --git a/doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown b/doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown index 9888d29230..58419f8618 100644 --- a/doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown +++ b/doc/tutorials/calib3d/real_time_pose/real_time_pose.markdown @@ -1,9 +1,16 @@ Real Time pose estimation of a textured object {#tutorial_real_time_pose} ============================================== +@tableofcontents + @prev_tutorial{tutorial_camera_calibration} @next_tutorial{tutorial_interactive_calibration} +| | | +| -: | :- | +| Original author | Edgar Riba | +| Compatibility | OpenCV >= 3.0 | + Nowadays, augmented reality is one of the top research topic in computer vision and robotics fields. The most elemental problem in augmented reality is the estimation of the camera pose respect of an diff --git a/doc/tutorials/calib3d/table_of_content_calib3d.markdown b/doc/tutorials/calib3d/table_of_content_calib3d.markdown index 3861d448b7..5fc6e591e9 100644 --- a/doc/tutorials/calib3d/table_of_content_calib3d.markdown +++ b/doc/tutorials/calib3d/table_of_content_calib3d.markdown @@ -1,58 +1,8 @@ Camera calibration and 3D reconstruction (calib3d module) {#tutorial_table_of_content_calib3d} ========================================================== -Although we get most of our images in a 2D format they do come from a 3D world. Here you will learn how to find out 3D world information from 2D images. - - @subpage tutorial_camera_calibration_pattern - - *Languages:* Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Laurent Berger - - You will learn how to create some calibration pattern. - - @subpage tutorial_camera_calibration_square_chess - - *Languages:* C++ - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Victor Eruhimov - - You will use some chessboard images to calibrate your camera. - - @subpage tutorial_camera_calibration - - *Languages:* C++ - - *Compatibility:* \> OpenCV 4.0 - - *Author:* Bernát Gábor - - Camera calibration by using either the chessboard, circle or the asymmetrical circle - pattern. Get the images either from a camera attached, a video file or from an image - collection. - - @subpage tutorial_real_time_pose - - *Languages:* C++ - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Edgar Riba - - Real time pose estimation of a textured object using ORB features, FlannBased matcher, PnP - approach plus Ransac and Linear Kalman Filter to reject possible bad poses. - - @subpage tutorial_interactive_calibration - - *Compatibility:* \> OpenCV 3.1 - - *Author:* Vladislav Sovrasov - - Camera calibration by using either the chessboard, chAruco, asymmetrical circle or dual asymmetrical circle - pattern. Calibration process is continuous, so you can see results after each new pattern shot. - As an output you get average reprojection error, intrinsic camera parameters, distortion coefficients and - confidence intervals for all of evaluated variables. diff --git a/doc/tutorials/core/adding_images/adding_images.markdown b/doc/tutorials/core/adding_images/adding_images.markdown index c8776325a3..3cec9f1734 100644 --- a/doc/tutorials/core/adding_images/adding_images.markdown +++ b/doc/tutorials/core/adding_images/adding_images.markdown @@ -1,9 +1,17 @@ Adding (blending) two images using OpenCV {#tutorial_adding_images} ========================================= +@tableofcontents + @prev_tutorial{tutorial_mat_operations} @next_tutorial{tutorial_basic_linear_transform} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + +We will learn how to blend two images! Goal ---- diff --git a/doc/tutorials/core/basic_linear_transform/basic_linear_transform.markdown b/doc/tutorials/core/basic_linear_transform/basic_linear_transform.markdown index 1eac760a4c..75bd655272 100644 --- a/doc/tutorials/core/basic_linear_transform/basic_linear_transform.markdown +++ b/doc/tutorials/core/basic_linear_transform/basic_linear_transform.markdown @@ -1,9 +1,16 @@ Changing the contrast and brightness of an image! {#tutorial_basic_linear_transform} ================================================= +@tableofcontents + @prev_tutorial{tutorial_adding_images} @next_tutorial{tutorial_discrete_fourier_transform} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/core/discrete_fourier_transform/discrete_fourier_transform.markdown b/doc/tutorials/core/discrete_fourier_transform/discrete_fourier_transform.markdown index 53ef27258d..1701babf4f 100644 --- a/doc/tutorials/core/discrete_fourier_transform/discrete_fourier_transform.markdown +++ b/doc/tutorials/core/discrete_fourier_transform/discrete_fourier_transform.markdown @@ -1,9 +1,16 @@ Discrete Fourier Transform {#tutorial_discrete_fourier_transform} ========================== +@tableofcontents + @prev_tutorial{tutorial_basic_linear_transform} @next_tutorial{tutorial_file_input_output_with_xml_yml} +| | | +| -: | :- | +| Original author | Bernát Gábor | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/core/file_input_output_with_xml_yml/file_input_output_with_xml_yml.markdown b/doc/tutorials/core/file_input_output_with_xml_yml/file_input_output_with_xml_yml.markdown index b87ec79ff7..da060cf27d 100644 --- a/doc/tutorials/core/file_input_output_with_xml_yml/file_input_output_with_xml_yml.markdown +++ b/doc/tutorials/core/file_input_output_with_xml_yml/file_input_output_with_xml_yml.markdown @@ -1,9 +1,16 @@ File Input and Output using XML and YAML files {#tutorial_file_input_output_with_xml_yml} ============================================== +@tableofcontents + @prev_tutorial{tutorial_discrete_fourier_transform} @next_tutorial{tutorial_how_to_use_OpenCV_parallel_for_} +| | | +| -: | :- | +| Original author | Bernát Gábor | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/core/how_to_scan_images/how_to_scan_images.markdown b/doc/tutorials/core/how_to_scan_images/how_to_scan_images.markdown index c5028d6a3a..d19936ecbe 100644 --- a/doc/tutorials/core/how_to_scan_images/how_to_scan_images.markdown +++ b/doc/tutorials/core/how_to_scan_images/how_to_scan_images.markdown @@ -1,9 +1,16 @@ How to scan images, lookup tables and time measurement with OpenCV {#tutorial_how_to_scan_images} ================================================================== +@tableofcontents + @prev_tutorial{tutorial_mat_the_basic_image_container} @next_tutorial{tutorial_mat_mask_operations} +| | | +| -: | :- | +| Original author | Bernát Gábor | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/core/how_to_use_OpenCV_parallel_for_/how_to_use_OpenCV_parallel_for_.markdown b/doc/tutorials/core/how_to_use_OpenCV_parallel_for_/how_to_use_OpenCV_parallel_for_.markdown index 80cc6c68fe..92f73b77e8 100644 --- a/doc/tutorials/core/how_to_use_OpenCV_parallel_for_/how_to_use_OpenCV_parallel_for_.markdown +++ b/doc/tutorials/core/how_to_use_OpenCV_parallel_for_/how_to_use_OpenCV_parallel_for_.markdown @@ -1,8 +1,14 @@ How to use the OpenCV parallel_for_ to parallelize your code {#tutorial_how_to_use_OpenCV_parallel_for_} ================================================================== +@tableofcontents + @prev_tutorial{tutorial_file_input_output_with_xml_yml} +| | | +| -: | :- | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/core/images/Adding_Images_Tutorial_Result_0.jpg b/doc/tutorials/core/images/Adding_Images_Tutorial_Result_0.jpg deleted file mode 100644 index 940b54c82f..0000000000 Binary files a/doc/tutorials/core/images/Adding_Images_Tutorial_Result_0.jpg and /dev/null differ diff --git a/doc/tutorials/core/images/Basic_Linear_Transform_Tutorial_Result_0.jpg b/doc/tutorials/core/images/Basic_Linear_Transform_Tutorial_Result_0.jpg deleted file mode 100644 index eccf37aa20..0000000000 Binary files a/doc/tutorials/core/images/Basic_Linear_Transform_Tutorial_Result_0.jpg and /dev/null differ diff --git a/doc/tutorials/core/images/Drawing_1_Tutorial_Result_0.jpg b/doc/tutorials/core/images/Drawing_1_Tutorial_Result_0.jpg deleted file mode 100644 index 05e8f01232..0000000000 Binary files a/doc/tutorials/core/images/Drawing_1_Tutorial_Result_0.jpg and /dev/null differ diff --git a/doc/tutorials/core/images/Drawing_2_Tutorial_Result_7.jpg b/doc/tutorials/core/images/Drawing_2_Tutorial_Result_7.jpg deleted file mode 100644 index d650c18427..0000000000 Binary files a/doc/tutorials/core/images/Drawing_2_Tutorial_Result_7.jpg and /dev/null differ diff --git a/doc/tutorials/core/images/Morphology_1_Tutorial_Cover.jpg b/doc/tutorials/core/images/Morphology_1_Tutorial_Cover.jpg deleted file mode 100644 index 71509ba5b8..0000000000 Binary files a/doc/tutorials/core/images/Morphology_1_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/core/images/Smoothing_Tutorial_Cover.jpg b/doc/tutorials/core/images/Smoothing_Tutorial_Cover.jpg deleted file mode 100644 index c11f2ed024..0000000000 Binary files a/doc/tutorials/core/images/Smoothing_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/core/images/discrete_fourier_transform.png b/doc/tutorials/core/images/discrete_fourier_transform.png deleted file mode 100644 index 07bd1119f4..0000000000 Binary files a/doc/tutorials/core/images/discrete_fourier_transform.png and /dev/null differ diff --git a/doc/tutorials/core/images/file_input_output_with_xml_yml.png b/doc/tutorials/core/images/file_input_output_with_xml_yml.png deleted file mode 100644 index 24ae4fdd23..0000000000 Binary files a/doc/tutorials/core/images/file_input_output_with_xml_yml.png and /dev/null differ diff --git a/doc/tutorials/core/images/howToScanImages.jpg b/doc/tutorials/core/images/howToScanImages.jpg deleted file mode 100644 index 4e0fa26d0d..0000000000 Binary files a/doc/tutorials/core/images/howToScanImages.jpg and /dev/null differ diff --git a/doc/tutorials/core/images/interopOpenCV1.png b/doc/tutorials/core/images/interopOpenCV1.png deleted file mode 100644 index 040f50a003..0000000000 Binary files a/doc/tutorials/core/images/interopOpenCV1.png and /dev/null differ diff --git a/doc/tutorials/core/images/matMaskFilter2DOp.png b/doc/tutorials/core/images/matMaskFilter2DOp.png deleted file mode 100644 index 6795921608..0000000000 Binary files a/doc/tutorials/core/images/matMaskFilter2DOp.png and /dev/null differ diff --git a/doc/tutorials/core/images/matTheBasicImageStructure.jpg b/doc/tutorials/core/images/matTheBasicImageStructure.jpg deleted file mode 100644 index ab6704a3c9..0000000000 Binary files a/doc/tutorials/core/images/matTheBasicImageStructure.jpg and /dev/null differ diff --git a/doc/tutorials/core/mat-mask-operations/mat_mask_operations.markdown b/doc/tutorials/core/mat-mask-operations/mat_mask_operations.markdown index fedb123ae6..43c71d7159 100644 --- a/doc/tutorials/core/mat-mask-operations/mat_mask_operations.markdown +++ b/doc/tutorials/core/mat-mask-operations/mat_mask_operations.markdown @@ -1,9 +1,16 @@ Mask operations on matrices {#tutorial_mat_mask_operations} =========================== +@tableofcontents + @prev_tutorial{tutorial_how_to_scan_images} @next_tutorial{tutorial_mat_operations} +| | | +| -: | :- | +| Original author | Bernát Gábor | +| Compatibility | OpenCV >= 3.0 | + Mask operations on matrices are quite simple. The idea is that we recalculate each pixel's value in an image according to a mask matrix (also known as kernel). This mask holds values that will adjust how much influence neighboring pixels (and the current pixel) have on the new pixel value. From a diff --git a/doc/tutorials/core/mat_operations.markdown b/doc/tutorials/core/mat_operations.markdown index 991d01367b..331a847551 100644 --- a/doc/tutorials/core/mat_operations.markdown +++ b/doc/tutorials/core/mat_operations.markdown @@ -1,9 +1,15 @@ Operations with images {#tutorial_mat_operations} ====================== +@tableofcontents + @prev_tutorial{tutorial_mat_mask_operations} @next_tutorial{tutorial_adding_images} +| | | +| -: | :- | +| Compatibility | OpenCV >= 3.0 | + Input/Output ------------ diff --git a/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown b/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown index 573e112d61..4f6f2b8a88 100644 --- a/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown +++ b/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown @@ -1,8 +1,15 @@ Mat - The Basic Image Container {#tutorial_mat_the_basic_image_container} =============================== +@tableofcontents + @next_tutorial{tutorial_how_to_scan_images} +| | | +| -: | :- | +| Original author | Bernát Gábor | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/core/table_of_content_core.markdown b/doc/tutorials/core/table_of_content_core.markdown index c607d4c02c..4cd77fcdfc 100644 --- a/doc/tutorials/core/table_of_content_core.markdown +++ b/doc/tutorials/core/table_of_content_core.markdown @@ -1,97 +1,12 @@ The Core Functionality (core module) {#tutorial_table_of_content_core} ===================================== -Here you will learn the about the basic building blocks of the library. A must read and know for -understanding how to manipulate the images on a pixel level. - - @subpage tutorial_mat_the_basic_image_container - - *Languages:* C++ - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Bernát Gábor - - You will learn how to store images in the memory and how to print out their content to the - console. - - @subpage tutorial_how_to_scan_images - - *Languages:* C++ - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Bernát Gábor - - You'll find out how to scan images (go through each of the image pixels) with OpenCV. - Bonus: time measurement with OpenCV. - - - @subpage tutorial_mat_mask_operations - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Bernát Gábor - - You'll find out how to scan images with neighbor access and use the @ref cv::filter2D - function to apply kernel filters on images. - - @subpage tutorial_mat_operations - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - Reading/writing images from file, accessing pixels, primitive operations, visualizing images. - - @subpage tutorial_adding_images - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - We will learn how to blend two images! - - @subpage tutorial_basic_linear_transform - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - We will learn how to change our image appearance! - - @subpage tutorial_discrete_fourier_transform - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Bernát Gábor - - You will see how and why use the Discrete Fourier transformation with OpenCV. - - - @subpage tutorial_file_input_output_with_xml_yml - - *Languages:* C++, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Bernát Gábor - - You will see how to use the @ref cv::FileStorage data structure of OpenCV to write and read - data to XML or YAML file format. - - @subpage tutorial_how_to_use_OpenCV_parallel_for_ - - *Languages:* C++ - - *Compatibility:* \>= OpenCV 2.4.3 - - You will see how to use the OpenCV parallel_for_ to easily parallelize your code. diff --git a/doc/tutorials/dnn/dnn_OCR/dnn_OCR.markdown b/doc/tutorials/dnn/dnn_OCR/dnn_OCR.markdown index 43c86acaf0..48a55992c6 100644 --- a/doc/tutorials/dnn/dnn_OCR/dnn_OCR.markdown +++ b/doc/tutorials/dnn/dnn_OCR/dnn_OCR.markdown @@ -1,6 +1,14 @@ # How to run custom OCR model {#tutorial_dnn_OCR} +@tableofcontents + @prev_tutorial{tutorial_dnn_custom_layers} +@next_tutorial{tutorial_dnn_text_spotting} + +| | | +| -: | :- | +| Original author | Zihao Mu | +| Compatibility | OpenCV >= 4.3 | ## Introduction @@ -43,4 +51,4 @@ The input of text recognition model is the output of the text detection model, w DenseNet_CTC has the smallest parameters and best FPS, and it is suitable for edge devices, which are very sensitive to the cost of calculation. If you have limited computing resources and want to achieve better accuracy, VGG_CTC is a good choice. -CRNN_VGG_BiLSTM_CTC is suitable for scenarios that require high recognition accuracy. \ No newline at end of file +CRNN_VGG_BiLSTM_CTC is suitable for scenarios that require high recognition accuracy. diff --git a/doc/tutorials/dnn/dnn_android/dnn_android.markdown b/doc/tutorials/dnn/dnn_android/dnn_android.markdown index 04520245da..4eb1ff238e 100644 --- a/doc/tutorials/dnn/dnn_android/dnn_android.markdown +++ b/doc/tutorials/dnn/dnn_android/dnn_android.markdown @@ -1,8 +1,15 @@ # How to run deep networks on Android device {#tutorial_dnn_android} +@tableofcontents + @prev_tutorial{tutorial_dnn_halide_scheduling} @next_tutorial{tutorial_dnn_yolo} +| | | +| -: | :- | +| Original author | Dmitry Kurtaev | +| Compatibility | OpenCV >= 3.3 | + ## Introduction In this tutorial you'll know how to run deep learning networks on Android device using OpenCV deep learning module. diff --git a/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md b/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md index feed5aaf76..07c3fb4a7f 100644 --- a/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md +++ b/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md @@ -1,8 +1,15 @@ # Custom deep learning layers support {#tutorial_dnn_custom_layers} +@tableofcontents + @prev_tutorial{tutorial_dnn_javascript} @next_tutorial{tutorial_dnn_OCR} +| | | +| -: | :- | +| Original author | Dmitry Kurtaev | +| Compatibility | OpenCV >= 3.4.1 | + ## Introduction Deep learning is a fast growing area. The new approaches to build neural networks usually introduce new types of layers. They could be modifications of existing diff --git a/doc/tutorials/dnn/dnn_googlenet/dnn_googlenet.markdown b/doc/tutorials/dnn/dnn_googlenet/dnn_googlenet.markdown index f6040dce1c..a886e9e089 100644 --- a/doc/tutorials/dnn/dnn_googlenet/dnn_googlenet.markdown +++ b/doc/tutorials/dnn/dnn_googlenet/dnn_googlenet.markdown @@ -1,8 +1,15 @@ Load Caffe framework models {#tutorial_dnn_googlenet} =========================== +@tableofcontents + @next_tutorial{tutorial_dnn_halide} +| | | +| -: | :- | +| Original author | Vitaliy Lyudvichenko | +| Compatibility | OpenCV >= 3.3 | + Introduction ------------ diff --git a/doc/tutorials/dnn/dnn_halide/dnn_halide.markdown b/doc/tutorials/dnn/dnn_halide/dnn_halide.markdown index 0500d25150..84ab50c193 100644 --- a/doc/tutorials/dnn/dnn_halide/dnn_halide.markdown +++ b/doc/tutorials/dnn/dnn_halide/dnn_halide.markdown @@ -1,8 +1,15 @@ # How to enable Halide backend for improve efficiency {#tutorial_dnn_halide} +@tableofcontents + @prev_tutorial{tutorial_dnn_googlenet} @next_tutorial{tutorial_dnn_halide_scheduling} +| | | +| -: | :- | +| Original author | Dmitry Kurtaev | +| Compatibility | OpenCV >= 3.3 | + ## Introduction This tutorial guidelines how to run your models in OpenCV deep learning module using Halide language backend. Halide is an open-source project that let us diff --git a/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown b/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown index b825da7922..e4a6f1fecc 100644 --- a/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown +++ b/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown @@ -1,8 +1,15 @@ # How to schedule your network for Halide backend {#tutorial_dnn_halide_scheduling} +@tableofcontents + @prev_tutorial{tutorial_dnn_halide} @next_tutorial{tutorial_dnn_android} +| | | +| -: | :- | +| Original author | Dmitry Kurtaev | +| Compatibility | OpenCV >= 3.3 | + ## Introduction Halide code is the same for every device we use. But for achieving the satisfied efficiency we should schedule computations properly. In this tutorial we describe diff --git a/doc/tutorials/dnn/dnn_javascript/dnn_javascript.markdown b/doc/tutorials/dnn/dnn_javascript/dnn_javascript.markdown index 9ad632fbc8..49b6f33adb 100644 --- a/doc/tutorials/dnn/dnn_javascript/dnn_javascript.markdown +++ b/doc/tutorials/dnn/dnn_javascript/dnn_javascript.markdown @@ -1,8 +1,15 @@ # How to run deep networks in browser {#tutorial_dnn_javascript} +@tableofcontents + @prev_tutorial{tutorial_dnn_yolo} @next_tutorial{tutorial_dnn_custom_layers} +| | | +| -: | :- | +| Original author | Dmitry Kurtaev | +| Compatibility | OpenCV >= 3.3.1 | + ## Introduction This tutorial will show us how to run deep learning models using OpenCV.js right in a browser. Tutorial refers a sample of face detection and face recognition diff --git a/doc/tutorials/dnn/dnn_pytorch_tf_classification/images/opencv_resnet50_test_res_c.jpg b/doc/tutorials/dnn/dnn_pytorch_tf_classification/images/opencv_resnet50_test_res_c.jpg new file mode 100644 index 0000000000..4d1ba30378 Binary files /dev/null and b/doc/tutorials/dnn/dnn_pytorch_tf_classification/images/opencv_resnet50_test_res_c.jpg differ diff --git a/doc/tutorials/dnn/dnn_pytorch_tf_classification/images/pytorch_resnet50_opencv_test_res.jpg b/doc/tutorials/dnn/dnn_pytorch_tf_classification/images/pytorch_resnet50_opencv_test_res.jpg new file mode 100644 index 0000000000..7bee270616 Binary files /dev/null and b/doc/tutorials/dnn/dnn_pytorch_tf_classification/images/pytorch_resnet50_opencv_test_res.jpg differ diff --git a/doc/tutorials/dnn/dnn_pytorch_tf_classification/images/squirrel_cls.jpg b/doc/tutorials/dnn/dnn_pytorch_tf_classification/images/squirrel_cls.jpg new file mode 100644 index 0000000000..289b13bbd3 Binary files /dev/null and b/doc/tutorials/dnn/dnn_pytorch_tf_classification/images/squirrel_cls.jpg differ diff --git a/doc/tutorials/dnn/dnn_pytorch_tf_classification/images/tf_mobilenet_opencv_test_res.jpg b/doc/tutorials/dnn/dnn_pytorch_tf_classification/images/tf_mobilenet_opencv_test_res.jpg new file mode 100644 index 0000000000..cc18156760 Binary files /dev/null and b/doc/tutorials/dnn/dnn_pytorch_tf_classification/images/tf_mobilenet_opencv_test_res.jpg differ diff --git a/doc/tutorials/dnn/dnn_pytorch_tf_classification/pytorch_cls_model_conversion_c_tutorial.md b/doc/tutorials/dnn/dnn_pytorch_tf_classification/pytorch_cls_model_conversion_c_tutorial.md new file mode 100644 index 0000000000..1807caf0b4 --- /dev/null +++ b/doc/tutorials/dnn/dnn_pytorch_tf_classification/pytorch_cls_model_conversion_c_tutorial.md @@ -0,0 +1,220 @@ +# Conversion of PyTorch Classification Models and Launch with OpenCV C++ {#pytorch_cls_c_tutorial_dnn_conversion} + +@prev_tutorial{pytorch_cls_tutorial_dnn_conversion} + +| | | +| -: | :- | +| Original author | Anastasia Murzova | +| Compatibility | OpenCV >= 4.5 | + +## Goals +In this tutorial you will learn how to: +* convert PyTorch classification models into ONNX format +* run converted PyTorch model with OpenCV C/C++ API +* provide model inference + +We will explore the above-listed points by the example of ResNet-50 architecture. + +## Introduction +Let's briefly view the key concepts involved in the pipeline of PyTorch models transition with OpenCV API. The initial step in conversion of PyTorch models into cv::dnn::Net +is model transferring into [ONNX](https://onnx.ai/about.html) format. ONNX aims at the interchangeability of the neural networks between various frameworks. There is a built-in function in PyTorch for ONNX conversion: [``torch.onnx.export``](https://pytorch.org/docs/stable/onnx.html#torch.onnx.export). +Further the obtained ``.onnx`` model is passed into cv::dnn::readNetFromONNX or cv::dnn::readNet. + +## Requirements +To be able to experiment with the below code you will need to install a set of libraries. We will use a virtual environment with python3.7+ for this: + +```console +virtualenv -p /usr/bin/python3.7 +source /bin/activate +``` + +For OpenCV-Python building from source, follow the corresponding instructions from the @ref tutorial_py_table_of_contents_setup. + +Before you start the installation of the libraries, you can customize the [requirements.txt](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/requirements.txt), excluding or including (for example, ``opencv-python``) some dependencies. +The below line initiates requirements installation into the previously activated virtual environment: + +```console +pip install -r requirements.txt +``` + +## Practice +In this part we are going to cover the following points: +1. create a classification model conversion pipeline +2. provide the inference, process prediction results + +### Model Conversion Pipeline +The code in this subchapter is located in the ``samples/dnn/dnn_model_runner`` module and can be executed with the line: + +```console +python -m dnn_model_runner.dnn_conversion.pytorch.classification.py_to_py_resnet50_onnx +``` + +The following code contains the description of the below-listed steps: +1. instantiate PyTorch model +2. convert PyTorch model into ``.onnx`` + +```python +# initialize PyTorch ResNet-50 model +original_model = models.resnet50(pretrained=True) + +# get the path to the converted into ONNX PyTorch model +full_model_path = get_pytorch_onnx_model(original_model) +print("PyTorch ResNet-50 model was successfully converted: ", full_model_path) +``` + +``get_pytorch_onnx_model(original_model)`` function is based on ``torch.onnx.export(...)`` call: + +```python +# define the directory for further converted model save +onnx_model_path = "models" +# define the name of further converted model +onnx_model_name = "resnet50.onnx" + +# create directory for further converted model +os.makedirs(onnx_model_path, exist_ok=True) + +# get full path to the converted model +full_model_path = os.path.join(onnx_model_path, onnx_model_name) + +# generate model input +generated_input = Variable( + torch.randn(1, 3, 224, 224) +) + +# model export into ONNX format +torch.onnx.export( + original_model, + generated_input, + full_model_path, + verbose=True, + input_names=["input"], + output_names=["output"], + opset_version=11 +) +``` + +After the successful execution of the above code we will get the following output: + +```console +PyTorch ResNet-50 model was successfully converted: models/resnet50.onnx +``` + +The proposed in ``dnn/samples`` module ``dnn_model_runner`` allows us to reproduce the above conversion steps for the following PyTorch classification models: +* alexnet +* vgg11 +* vgg13 +* vgg16 +* vgg19 +* resnet18 +* resnet34 +* resnet50 +* resnet101 +* resnet152 +* squeezenet1_0 +* squeezenet1_1 +* resnext50_32x4d +* resnext101_32x8d +* wide_resnet50_2 +* wide_resnet101_2 + +To obtain the converted model, the following line should be executed: + +``` +python -m dnn_model_runner.dnn_conversion.pytorch.classification.py_to_py_cls --model_name --evaluate False +``` + +For the ResNet-50 case the below line should be run: + +``` +python -m dnn_model_runner.dnn_conversion.pytorch.classification.py_to_py_cls --model_name resnet50 --evaluate False +``` + +The default root directory for the converted model storage is defined in module ``CommonConfig``: + +```python +@dataclass +class CommonConfig: + output_data_root_dir: str = "dnn_model_runner/dnn_conversion" +``` + +Thus, the converted ResNet-50 will be saved in ``dnn_model_runner/dnn_conversion/models``. + +### Inference Pipeline +Now we can use ```models/resnet50.onnx``` for the inference pipeline using OpenCV C/C++ API. The implemented pipeline can be found in [samples/dnn/classification.cpp](https://github.com/opencv/opencv/blob/master/samples/dnn/classification.cpp). +After the build of samples (``BUILD_EXAMPLES`` flag value should be ``ON``), the appropriate ``example_dnn_classification`` executable file will be provided. + +To provide model inference we will use the below [squirrel photo](https://www.pexels.com/photo/brown-squirrel-eating-1564292) (under [CC0](https://www.pexels.com/terms-of-service/) license) corresponding to ImageNet class ID 335: +```console +fox squirrel, eastern fox squirrel, Sciurus niger +``` + +![Classification model input image](images/squirrel_cls.jpg) + +For the label decoding of the obtained prediction, we also need ``imagenet_classes.txt`` file, which contains the full list of the ImageNet classes. + +In this tutorial we will run the inference process for the converted PyTorch ResNet-50 model from the build (``samples/build``) directory: + +``` +./dnn/example_dnn_classification --model=../dnn/models/resnet50.onnx --input=../data/squirrel_cls.jpg --width=224 --height=224 --rgb=true --scale="0.003921569" --mean="123.675 116.28 103.53" --std="0.229 0.224 0.225" --crop=true --initial_width=256 --initial_height=256 --classes=../data/dnn/classification_classes_ILSVRC2012.txt +``` + +Let's explore ``classification.cpp`` key points step by step: + +1. read the model with cv::dnn::readNet, initialize the network: + +```cpp +Net net = readNet(model, config, framework); +``` + +The ``model`` parameter value is taken from ``--model`` key. In our case, it is ``resnet50.onnx``. + +* preprocess input image: + +```cpp +if (rszWidth != 0 && rszHeight != 0) +{ + resize(frame, frame, Size(rszWidth, rszHeight)); +} + +// Create a 4D blob from a frame +blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, crop); + +// Check std values. +if (std.val[0] != 0.0 && std.val[1] != 0.0 && std.val[2] != 0.0) +{ + // Divide blob by std. + divide(blob, std, blob); +} +``` + +In this step we use cv::dnn::blobFromImage function to prepare model input. +We set ``Size(rszWidth, rszHeight)`` with ``--initial_width=256 --initial_height=256`` for the initial image resize as it's described in [PyTorch ResNet inference pipeline](https://pytorch.org/hub/pytorch_vision_resnet/). + +It should be noted that firstly in cv::dnn::blobFromImage mean value is subtracted and only then pixel values are multiplied by scale. +Thus, we use ``--mean="123.675 116.28 103.53"``, which is equivalent to ``[0.485, 0.456, 0.406]`` multiplied by ``255.0`` to reproduce the original image preprocessing order for PyTorch classification models: + +```python +img /= 255.0 +img -= [0.485, 0.456, 0.406] +img /= [0.229, 0.224, 0.225] +``` + +* make forward pass: + +```cpp +net.setInput(blob); +Mat prob = net.forward(); +``` + +* process the prediction: + +```cpp +Point classIdPoint; +double confidence; +minMaxLoc(prob.reshape(1, 1), 0, &confidence, 0, &classIdPoint); +int classId = classIdPoint.x; +``` + +Here we choose the most likely object class. The ``classId`` result for our case is 335 - fox squirrel, eastern fox squirrel, Sciurus niger: + +![ResNet50 OpenCV C++ inference output](images/opencv_resnet50_test_res_c.jpg) diff --git a/doc/tutorials/dnn/dnn_pytorch_tf_classification/pytorch_cls_model_conversion_tutorial.md b/doc/tutorials/dnn/dnn_pytorch_tf_classification/pytorch_cls_model_conversion_tutorial.md new file mode 100644 index 0000000000..409d2f5a49 --- /dev/null +++ b/doc/tutorials/dnn/dnn_pytorch_tf_classification/pytorch_cls_model_conversion_tutorial.md @@ -0,0 +1,362 @@ +# Conversion of PyTorch Classification Models and Launch with OpenCV Python {#pytorch_cls_tutorial_dnn_conversion} + +@prev_tutorial{tutorial_dnn_OCR} +@next_tutorial{pytorch_cls_c_tutorial_dnn_conversion} + +| | | +| -: | :- | +| Original author | Anastasia Murzova | +| Compatibility | OpenCV >= 4.5 | + +## Goals +In this tutorial you will learn how to: +* convert PyTorch classification models into ONNX format +* run converted PyTorch model with OpenCV Python API +* obtain an evaluation of the PyTorch and OpenCV DNN models. + +We will explore the above-listed points by the example of the ResNet-50 architecture. + +## Introduction +Let's briefly view the key concepts involved in the pipeline of PyTorch models transition with OpenCV API. The initial step in conversion of PyTorch models into cv.dnn.Net +is model transferring into [ONNX](https://onnx.ai/about.html) format. ONNX aims at the interchangeability of the neural networks between various frameworks. There is a built-in function in PyTorch for ONNX conversion: [``torch.onnx.export``](https://pytorch.org/docs/stable/onnx.html#torch.onnx.export). +Further the obtained ``.onnx`` model is passed into cv.dnn.readNetFromONNX. + +## Requirements +To be able to experiment with the below code you will need to install a set of libraries. We will use a virtual environment with python3.7+ for this: + +```console +virtualenv -p /usr/bin/python3.7 +source /bin/activate +``` + +For OpenCV-Python building from source, follow the corresponding instructions from the @ref tutorial_py_table_of_contents_setup. + +Before you start the installation of the libraries, you can customize the [requirements.txt](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/requirements.txt), excluding or including (for example, ``opencv-python``) some dependencies. +The below line initiates requirements installation into the previously activated virtual environment: + +```console +pip install -r requirements.txt +``` + +## Practice +In this part we are going to cover the following points: +1. create a classification model conversion pipeline and provide the inference +2. evaluate and test classification models + +If you'd like merely to run evaluation or test model pipelines, the "Model Conversion Pipeline" part can be skipped. + +### Model Conversion Pipeline +The code in this subchapter is located in the ``dnn_model_runner`` module and can be executed with the line: + +```console +python -m dnn_model_runner.dnn_conversion.pytorch.classification.py_to_py_resnet50 +``` + +The following code contains the description of the below-listed steps: +1. instantiate PyTorch model +2. convert PyTorch model into ``.onnx`` +3. read the transferred network with OpenCV API +4. prepare input data +5. provide inference + +```python +# initialize PyTorch ResNet-50 model +original_model = models.resnet50(pretrained=True) + +# get the path to the converted into ONNX PyTorch model +full_model_path = get_pytorch_onnx_model(original_model) + +# read converted .onnx model with OpenCV API +opencv_net = cv2.dnn.readNetFromONNX(full_model_path) +print("OpenCV model was successfully read. Layer IDs: \n", opencv_net.getLayerNames()) + +# get preprocessed image +input_img = get_preprocessed_img("../data/squirrel_cls.jpg") + +# get ImageNet labels +imagenet_labels = get_imagenet_labels("../data/dnn/classification_classes_ILSVRC2012.txt") + +# obtain OpenCV DNN predictions +get_opencv_dnn_prediction(opencv_net, input_img, imagenet_labels) + +# obtain original PyTorch ResNet50 predictions +get_pytorch_dnn_prediction(original_model, input_img, imagenet_labels) +``` + +To provide model inference we will use the below [squirrel photo](https://www.pexels.com/photo/brown-squirrel-eating-1564292) (under [CC0](https://www.pexels.com/terms-of-service/) license) corresponding to ImageNet class ID 335: +```console +fox squirrel, eastern fox squirrel, Sciurus niger +``` + +![Classification model input image](images/squirrel_cls.jpg) + +For the label decoding of the obtained prediction, we also need ``imagenet_classes.txt`` file, which contains the full list of the ImageNet classes. + +Let's go deeper into each step by the example of pretrained PyTorch ResNet-50: +* instantiate PyTorch ResNet-50 model: + +```python +# initialize PyTorch ResNet-50 model +original_model = models.resnet50(pretrained=True) +``` + +* convert PyTorch model into ONNX: + +```python +# define the directory for further converted model save +onnx_model_path = "models" +# define the name of further converted model +onnx_model_name = "resnet50.onnx" + +# create directory for further converted model +os.makedirs(onnx_model_path, exist_ok=True) + +# get full path to the converted model +full_model_path = os.path.join(onnx_model_path, onnx_model_name) + +# generate model input +generated_input = Variable( + torch.randn(1, 3, 224, 224) +) + +# model export into ONNX format +torch.onnx.export( + original_model, + generated_input, + full_model_path, + verbose=True, + input_names=["input"], + output_names=["output"], + opset_version=11 +) +``` + +After the successful execution of the above code, we will get ``models/resnet50.onnx``. + +* read the transferred network with cv.dnn.readNetFromONNX passing the obtained in the previous step ONNX model into it: + +```python +# read converted .onnx model with OpenCV API +opencv_net = cv2.dnn.readNetFromONNX(full_model_path) +``` + +* prepare input data: + +```python +# read the image +input_img = cv2.imread(img_path, cv2.IMREAD_COLOR) +input_img = input_img.astype(np.float32) + +input_img = cv2.resize(input_img, (256, 256)) + +# define preprocess parameters +mean = np.array([0.485, 0.456, 0.406]) * 255.0 +scale = 1 / 255.0 +std = [0.229, 0.224, 0.225] + +# prepare input blob to fit the model input: +# 1. subtract mean +# 2. scale to set pixel values from 0 to 1 +input_blob = cv2.dnn.blobFromImage( + image=input_img, + scalefactor=scale, + size=(224, 224), # img target size + mean=mean, + swapRB=True, # BGR -> RGB + crop=True # center crop +) +# 3. divide by std +input_blob[0] /= np.asarray(std, dtype=np.float32).reshape(3, 1, 1) +``` + +In this step we read the image and prepare model input with cv.dnn.blobFromImage function, which returns 4-dimensional blob. +It should be noted that firstly in cv.dnn.blobFromImage mean value is subtracted and only then pixel values are multiplied by scale. Thus, ``mean`` is multiplied by ``255.0`` to reproduce the original image preprocessing order: + +```python +img /= 255.0 +img -= [0.485, 0.456, 0.406] +img /= [0.229, 0.224, 0.225] +``` + +* OpenCV cv.dnn.Net inference: + +```python +# set OpenCV DNN input +opencv_net.setInput(preproc_img) + +# OpenCV DNN inference +out = opencv_net.forward() +print("OpenCV DNN prediction: \n") +print("* shape: ", out.shape) + +# get the predicted class ID +imagenet_class_id = np.argmax(out) + +# get confidence +confidence = out[0][imagenet_class_id] +print("* class ID: {}, label: {}".format(imagenet_class_id, imagenet_labels[imagenet_class_id])) +print("* confidence: {:.4f}".format(confidence)) +``` + +After the above code execution we will get the following output: + +```console +OpenCV DNN prediction: +* shape: (1, 1000) +* class ID: 335, label: fox squirrel, eastern fox squirrel, Sciurus niger +* confidence: 14.8308 +``` + +* PyTorch ResNet-50 model inference: + +```python +original_net.eval() +preproc_img = torch.FloatTensor(preproc_img) + +# inference +out = original_net(preproc_img) +print("\nPyTorch model prediction: \n") +print("* shape: ", out.shape) + +# get the predicted class ID +imagenet_class_id = torch.argmax(out, axis=1).item() +print("* class ID: {}, label: {}".format(imagenet_class_id, imagenet_labels[imagenet_class_id])) + +# get confidence +confidence = out[0][imagenet_class_id] +print("* confidence: {:.4f}".format(confidence.item())) +``` + +After the above code launching we will get the following output: + +```console +PyTorch model prediction: +* shape: torch.Size([1, 1000]) +* class ID: 335, label: fox squirrel, eastern fox squirrel, Sciurus niger +* confidence: 14.8308 +``` + +The inference results of the original ResNet-50 model and cv.dnn.Net are equal. For the extended evaluation of the models we can use ``py_to_py_cls`` of the ``dnn_model_runner`` module. This module part will be described in the next subchapter. + +### Evaluation of the Models + +The proposed in ``samples/dnn`` ``dnn_model_runner`` module allows to run the full evaluation pipeline on the ImageNet dataset and test execution for the following PyTorch classification models: +* alexnet +* vgg11 +* vgg13 +* vgg16 +* vgg19 +* resnet18 +* resnet34 +* resnet50 +* resnet101 +* resnet152 +* squeezenet1_0 +* squeezenet1_1 +* resnext50_32x4d +* resnext101_32x8d +* wide_resnet50_2 +* wide_resnet101_2 + +This list can be also extended with further appropriate evaluation pipeline configuration. + +#### Evaluation Mode + +The below line represents running of the module in the evaluation mode: + +```console +python -m dnn_model_runner.dnn_conversion.pytorch.classification.py_to_py_cls --model_name +``` + +Chosen from the list classification model will be read into OpenCV cv.dnn.Net object. Evaluation results of PyTorch and OpenCV models (accuracy, inference time, L1) will be written into the log file. Inference time values will be also depicted in a chart to generalize the obtained model information. + +Necessary evaluation configurations are defined in the [test_config.py](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/common/test/configs/test_config.py) and can be modified in accordance with actual paths of data location: + +```python +@dataclass +class TestClsConfig: + batch_size: int = 50 + frame_size: int = 224 + img_root_dir: str = "./ILSVRC2012_img_val" + # location of image-class matching + img_cls_file: str = "./val.txt" + bgr_to_rgb: bool = True +``` + +To initiate the evaluation of the PyTorch ResNet-50, run the following line: + +```console +python -m dnn_model_runner.dnn_conversion.pytorch.classification.py_to_py_cls --model_name resnet50 +``` + +After script launch, the log file with evaluation data will be generated in ``dnn_model_runner/dnn_conversion/logs``: + +```console +The model PyTorch resnet50 was successfully obtained and converted to OpenCV DNN resnet50 +===== Running evaluation of the model with the following params: + * val data location: ./ILSVRC2012_img_val + * log file location: dnn_model_runner/dnn_conversion/logs/PyTorch_resnet50_log.txt +``` + +#### Test Mode + +The below line represents running of the module in the test mode, namely it provides the steps for the model inference: + +```console +python -m dnn_model_runner.dnn_conversion.pytorch.classification.py_to_py_cls --model_name --test True --default_img_preprocess --evaluate False +``` + +Here ``default_img_preprocess`` key defines whether you'd like to parametrize the model test process with some particular values or use the default values, for example, ``scale``, ``mean`` or ``std``. + +Test configuration is represented in [test_config.py](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/common/test/configs/test_config.py) ``TestClsModuleConfig`` class: + +```python +@dataclass +class TestClsModuleConfig: + cls_test_data_dir: str = "../data" + test_module_name: str = "classification" + test_module_path: str = "classification.py" + input_img: str = os.path.join(cls_test_data_dir, "squirrel_cls.jpg") + model: str = "" + + frame_height: str = str(TestClsConfig.frame_size) + frame_width: str = str(TestClsConfig.frame_size) + scale: str = "1.0" + mean: List[str] = field(default_factory=lambda: ["0.0", "0.0", "0.0"]) + std: List[str] = field(default_factory=list) + crop: str = "False" + rgb: str = "True" + rsz_height: str = "" + rsz_width: str = "" + classes: str = os.path.join(cls_test_data_dir, "dnn", "classification_classes_ILSVRC2012.txt") +``` + +The default image preprocessing options are defined in [default_preprocess_config.py](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/common/test/configs/default_preprocess_config.py). For instance: + +```python +BASE_IMG_SCALE_FACTOR = 1 / 255.0 +PYTORCH_RSZ_HEIGHT = 256 +PYTORCH_RSZ_WIDTH = 256 + +pytorch_resize_input_blob = { + "mean": ["123.675", "116.28", "103.53"], + "scale": str(BASE_IMG_SCALE_FACTOR), + "std": ["0.229", "0.224", "0.225"], + "crop": "True", + "rgb": "True", + "rsz_height": str(PYTORCH_RSZ_HEIGHT), + "rsz_width": str(PYTORCH_RSZ_WIDTH) +} +``` + +The basis of the model testing is represented in [samples/dnn/classification.py](https://github.com/opencv/opencv/blob/master/samples/dnn/classification.py). ``classification.py`` can be executed autonomously with provided converted model in ``--input`` and populated parameters for cv.dnn.blobFromImage. + +To reproduce from scratch the described in "Model Conversion Pipeline" OpenCV steps with ``dnn_model_runner`` execute the below line: + +```console +python -m dnn_model_runner.dnn_conversion.pytorch.classification.py_to_py_cls --model_name resnet50 --test True --default_img_preprocess True --evaluate False +``` + +The network prediction is depicted in the top left corner of the output window: + +![ResNet50 OpenCV inference output](images/pytorch_resnet50_opencv_test_res.jpg) diff --git a/doc/tutorials/dnn/dnn_pytorch_tf_classification/tf_cls_model_conversion_tutorial.md b/doc/tutorials/dnn/dnn_pytorch_tf_classification/tf_cls_model_conversion_tutorial.md new file mode 100644 index 0000000000..c2da541029 --- /dev/null +++ b/doc/tutorials/dnn/dnn_pytorch_tf_classification/tf_cls_model_conversion_tutorial.md @@ -0,0 +1,360 @@ +# Conversion of TensorFlow Classification Models and Launch with OpenCV Python {#tf_cls_tutorial_dnn_conversion} + +| | | +| -: | :- | +| Original author | Anastasia Murzova | +| Compatibility | OpenCV >= 4.5 | + +## Goals +In this tutorial you will learn how to: +* obtain frozen graphs of TensorFlow (TF) classification models +* run converted TensorFlow model with OpenCV Python API +* obtain an evaluation of the TensorFlow and OpenCV DNN models + +We will explore the above-listed points by the example of MobileNet architecture. + +## Introduction +Let's briefly view the key concepts involved in the pipeline of TensorFlow models transition with OpenCV API. The initial step in conversion of TensorFlow models into cv.dnn.Net +is obtaining the frozen TF model graph. Frozen graph defines the combination of the model graph structure with kept values of the required variables, for example, weights. Usually the frozen graph is saved in [protobuf](https://en.wikipedia.org/wiki/Protocol_Buffers) (```.pb```) files. +After the model ``.pb`` file was generated it can be read with cv.dnn.readNetFromTensorflow function. + +## Requirements +To be able to experiment with the below code you will need to install a set of libraries. We will use a virtual environment with python3.7+ for this: + +```console +virtualenv -p /usr/bin/python3.7 +source /bin/activate +``` + +For OpenCV-Python building from source, follow the corresponding instructions from the @ref tutorial_py_table_of_contents_setup. + +Before you start the installation of the libraries, you can customize the [requirements.txt](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/requirements.txt), excluding or including (for example, ``opencv-python``) some dependencies. +The below line initiates requirements installation into the previously activated virtual environment: + +```console +pip install -r requirements.txt +``` + +## Practice +In this part we are going to cover the following points: +1. create a TF classification model conversion pipeline and provide the inference +2. evaluate and test TF classification models + +If you'd like merely to run evaluation or test model pipelines, the "Model Conversion Pipeline" tutorial part can be skipped. + +### Model Conversion Pipeline +The code in this subchapter is located in the ``dnn_model_runner`` module and can be executed with the line: + +```console +python -m dnn_model_runner.dnn_conversion.tf.classification.py_to_py_mobilenet +``` + +The following code contains the description of the below-listed steps: +1. instantiate TF model +2. create TF frozen graph +3. read TF frozen graph with OpenCV API +4. prepare input data +5. provide inference + +```python +# initialize TF MobileNet model +original_tf_model = MobileNet( + include_top=True, + weights="imagenet" +) + +# get TF frozen graph path +full_pb_path = get_tf_model_proto(original_tf_model) + +# read frozen graph with OpenCV API +opencv_net = cv2.dnn.readNetFromTensorflow(full_pb_path) +print("OpenCV model was successfully read. Model layers: \n", opencv_net.getLayerNames()) + +# get preprocessed image +input_img = get_preprocessed_img("../data/squirrel_cls.jpg") + +# get ImageNet labels +imagenet_labels = get_imagenet_labels("../data/dnn/classification_classes_ILSVRC2012.txt") + +# obtain OpenCV DNN predictions +get_opencv_dnn_prediction(opencv_net, input_img, imagenet_labels) + +# obtain TF model predictions +get_tf_dnn_prediction(original_tf_model, input_img, imagenet_labels) +``` + +To provide model inference we will use the below [squirrel photo](https://www.pexels.com/photo/brown-squirrel-eating-1564292) (under [CC0](https://www.pexels.com/terms-of-service/) license) corresponding to ImageNet class ID 335: +```console +fox squirrel, eastern fox squirrel, Sciurus niger +``` + +![Classification model input image](images/squirrel_cls.jpg) + +For the label decoding of the obtained prediction, we also need ``imagenet_classes.txt`` file, which contains the full list of the ImageNet classes. + +Let's go deeper into each step by the example of pretrained TF MobileNet: +* instantiate TF model: + +```python +# initialize TF MobileNet model +original_tf_model = MobileNet( + include_top=True, + weights="imagenet" +) +``` + +* create TF frozen graph + +```python +# define the directory for .pb model +pb_model_path = "models" + +# define the name of .pb model +pb_model_name = "mobilenet.pb" + +# create directory for further converted model +os.makedirs(pb_model_path, exist_ok=True) + +# get model TF graph +tf_model_graph = tf.function(lambda x: tf_model(x)) + +# get concrete function +tf_model_graph = tf_model_graph.get_concrete_function( + tf.TensorSpec(tf_model.inputs[0].shape, tf_model.inputs[0].dtype)) + +# obtain frozen concrete function +frozen_tf_func = convert_variables_to_constants_v2(tf_model_graph) +# get frozen graph +frozen_tf_func.graph.as_graph_def() + +# save full tf model +tf.io.write_graph(graph_or_graph_def=frozen_tf_func.graph, + logdir=pb_model_path, + name=pb_model_name, + as_text=False) +``` + +After the successful execution of the above code, we will get a frozen graph in ``models/mobilenet.pb``. + +* read TF frozen graph with with cv.dnn.readNetFromTensorflow passing the obtained in the previous step ``mobilenet.pb`` into it: + +```python +# get TF frozen graph path +full_pb_path = get_tf_model_proto(original_tf_model) +``` + +* prepare input data with cv2.dnn.blobFromImage function: + +```python +# read the image +input_img = cv2.imread(img_path, cv2.IMREAD_COLOR) +input_img = input_img.astype(np.float32) + +# define preprocess parameters +mean = np.array([1.0, 1.0, 1.0]) * 127.5 +scale = 1 / 127.5 + +# prepare input blob to fit the model input: +# 1. subtract mean +# 2. scale to set pixel values from 0 to 1 +input_blob = cv2.dnn.blobFromImage( + image=input_img, + scalefactor=scale, + size=(224, 224), # img target size + mean=mean, + swapRB=True, # BGR -> RGB + crop=True # center crop +) +print("Input blob shape: {}\n".format(input_blob.shape)) +``` + +Please, pay attention at the preprocessing order in the cv2.dnn.blobFromImage function. Firstly, the mean value is subtracted and only then pixel values are multiplied by the defined scale. +Therefore, to reproduce the image preprocessing pipeline from the TF [``mobilenet.preprocess_input``](https://github.com/tensorflow/tensorflow/blob/02032fb477e9417197132648ec81e75beee9063a/tensorflow/python/keras/applications/mobilenet.py#L443-L445) function, we multiply ``mean`` by ``127.5``. + +As a result, 4-dimensional ``input_blob`` was obtained: + + ``Input blob shape: (1, 3, 224, 224)`` + +* provide OpenCV cv.dnn.Net inference: + +```python +# set OpenCV DNN input +opencv_net.setInput(preproc_img) + +# OpenCV DNN inference +out = opencv_net.forward() +print("OpenCV DNN prediction: \n") +print("* shape: ", out.shape) + +# get the predicted class ID +imagenet_class_id = np.argmax(out) + +# get confidence +confidence = out[0][imagenet_class_id] +print("* class ID: {}, label: {}".format(imagenet_class_id, imagenet_labels[imagenet_class_id])) +print("* confidence: {:.4f}\n".format(confidence)) +``` + +After the above code execution we will get the following output: + +```console +OpenCV DNN prediction: +* shape: (1, 1000) +* class ID: 335, label: fox squirrel, eastern fox squirrel, Sciurus niger +* confidence: 0.9525 +``` + +* provide TF MobileNet inference: + +```python +# inference +preproc_img = preproc_img.transpose(0, 2, 3, 1) +print("TF input blob shape: {}\n".format(preproc_img.shape)) + +out = original_net(preproc_img) + +print("\nTensorFlow model prediction: \n") +print("* shape: ", out.shape) + +# get the predicted class ID +imagenet_class_id = np.argmax(out) +print("* class ID: {}, label: {}".format(imagenet_class_id, imagenet_labels[imagenet_class_id])) + +# get confidence +confidence = out[0][imagenet_class_id] +print("* confidence: {:.4f}".format(confidence)) +``` + +To fit TF model input, ``input_blob`` was transposed: + +```console +TF input blob shape: (1, 224, 224, 3) +``` + +TF inference results are the following: + +```console +TensorFlow model prediction: +* shape: (1, 1000) +* class ID: 335, label: fox squirrel, eastern fox squirrel, Sciurus niger +* confidence: 0.9525 +``` + +As it can be seen from the experiments OpenCV and TF inference results are equal. + +### Evaluation of the Models + +The proposed in ``dnn/samples`` ``dnn_model_runner`` module allows to run the full evaluation pipeline on the ImageNet dataset and test execution for the following TensorFlow classification models: +* vgg16 +* vgg19 +* resnet50 +* resnet101 +* resnet152 +* densenet121 +* densenet169 +* densenet201 +* inceptionresnetv2 +* inceptionv3 +* mobilenet +* mobilenetv2 +* nasnetlarge +* nasnetmobile +* xception + +This list can be also extended with further appropriate evaluation pipeline configuration. + +#### Evaluation Mode + +To below line represents running of the module in the evaluation mode: + +```console +python -m dnn_model_runner.dnn_conversion.tf.classification.py_to_py_cls --model_name +``` + +Chosen from the list classification model will be read into OpenCV ``cv.dnn_Net`` object. Evaluation results of TF and OpenCV models (accuracy, inference time, L1) will be written into the log file. Inference time values will be also depicted in a chart to generalize the obtained model information. + +Necessary evaluation configurations are defined in the [test_config.py](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/common/test/configs/test_config.py) and can be modified in accordance with actual paths of data location:: + +```python +@dataclass +class TestClsConfig: + batch_size: int = 50 + frame_size: int = 224 + img_root_dir: str = "./ILSVRC2012_img_val" + # location of image-class matching + img_cls_file: str = "./val.txt" + bgr_to_rgb: bool = True +``` + +The values from ``TestClsConfig`` can be customized in accordance with chosen model. + +To initiate the evaluation of the TensorFlow MobileNet, run the following line: + +```console +python -m dnn_model_runner.dnn_conversion.tf.classification.py_to_py_cls --model_name mobilenet +``` + +After script launch, the log file with evaluation data will be generated in ``dnn_model_runner/dnn_conversion/logs``: + +```console +===== Running evaluation of the model with the following params: + * val data location: ./ILSVRC2012_img_val + * log file location: dnn_model_runner/dnn_conversion/logs/TF_mobilenet_log.txt +``` + +#### Test Mode + +The below line represents running of the module in the test mode, namely it provides the steps for the model inference: + +```console +python -m dnn_model_runner.dnn_conversion.tf.classification.py_to_py_cls --model_name --test True --default_img_preprocess --evaluate False +``` + +Here ``default_img_preprocess`` key defines whether you'd like to parametrize the model test process with some particular values or use the default values, for example, ``scale``, ``mean`` or ``std``. + +Test configuration is represented in [test_config.py](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/common/test/configs/test_config.py) ``TestClsModuleConfig`` class: + +```python +@dataclass +class TestClsModuleConfig: + cls_test_data_dir: str = "../data" + test_module_name: str = "classification" + test_module_path: str = "classification.py" + input_img: str = os.path.join(cls_test_data_dir, "squirrel_cls.jpg") + model: str = "" + + frame_height: str = str(TestClsConfig.frame_size) + frame_width: str = str(TestClsConfig.frame_size) + scale: str = "1.0" + mean: List[str] = field(default_factory=lambda: ["0.0", "0.0", "0.0"]) + std: List[str] = field(default_factory=list) + crop: str = "False" + rgb: str = "True" + rsz_height: str = "" + rsz_width: str = "" + classes: str = os.path.join(cls_test_data_dir, "dnn", "classification_classes_ILSVRC2012.txt") +``` + +The default image preprocessing options are defined in ``default_preprocess_config.py``. For instance, for MobileNet: + +```python +tf_input_blob = { + "mean": ["127.5", "127.5", "127.5"], + "scale": str(1 / 127.5), + "std": [], + "crop": "True", + "rgb": "True" +} +``` + +The basis of the model testing is represented in [samples/dnn/classification.py](https://github.com/opencv/opencv/blob/master/samples/dnn/classification.py). ``classification.py`` can be executed autonomously with provided converted model in ``--input`` and populated parameters for cv.dnn.blobFromImage. + +To reproduce from scratch the described in "Model Conversion Pipeline" OpenCV steps with ``dnn_model_runner`` execute the below line: + +```console +python -m dnn_model_runner.dnn_conversion.tf.classification.py_to_py_cls --model_name mobilenet --test True --default_img_preprocess True --evaluate False +``` + +The network prediction is depicted in the top left corner of the output window: + +![TF MobileNet OpenCV inference output](images/tf_mobilenet_opencv_test_res.jpg) diff --git a/doc/tutorials/dnn/dnn_pytorch_tf_detection/images/opencv_bus_res.jpg b/doc/tutorials/dnn/dnn_pytorch_tf_detection/images/opencv_bus_res.jpg new file mode 100644 index 0000000000..8bdc602068 Binary files /dev/null and b/doc/tutorials/dnn/dnn_pytorch_tf_detection/images/opencv_bus_res.jpg differ diff --git a/doc/tutorials/dnn/dnn_pytorch_tf_detection/images/pexels_double_decker_bus.jpg b/doc/tutorials/dnn/dnn_pytorch_tf_detection/images/pexels_double_decker_bus.jpg new file mode 100644 index 0000000000..aca8be09eb Binary files /dev/null and b/doc/tutorials/dnn/dnn_pytorch_tf_detection/images/pexels_double_decker_bus.jpg differ diff --git a/doc/tutorials/dnn/dnn_pytorch_tf_detection/tf_det_model_conversion_tutorial.md b/doc/tutorials/dnn/dnn_pytorch_tf_detection/tf_det_model_conversion_tutorial.md new file mode 100644 index 0000000000..04388cbaf4 --- /dev/null +++ b/doc/tutorials/dnn/dnn_pytorch_tf_detection/tf_det_model_conversion_tutorial.md @@ -0,0 +1,140 @@ +# Conversion of TensorFlow Detection Models and Launch with OpenCV Python {#tf_det_tutorial_dnn_conversion} + +| | | +| -: | :- | +| Original author | Anastasia Murzova | +| Compatibility | OpenCV >= 4.5 | + +## Goals +In this tutorial you will learn how to: +* obtain frozen graphs of TensorFlow (TF) detection models +* run converted TensorFlow model with OpenCV Python API + +We will explore the above-listed points by the example of SSD MobileNetV1. + +## Introduction +Let's briefly view the key concepts involved in the pipeline of TensorFlow models transition with OpenCV API. The initial step in the conversion of TensorFlow models into cv.dnn.Net +is obtaining the frozen TF model graph. A frozen graph defines the combination of the model graph structure with kept values of the required variables, for example, weights. The frozen graph is saved in [protobuf](https://en.wikipedia.org/wiki/Protocol_Buffers) (```.pb```) files. +There are special functions for reading ``.pb`` graphs in OpenCV: cv.dnn.readNetFromTensorflow and cv.dnn.readNet. + +## Requirements +To be able to experiment with the below code you will need to install a set of libraries. We will use a virtual environment with python3.7+ for this: + +```console +virtualenv -p /usr/bin/python3.7 +source /bin/activate +``` + +For OpenCV-Python building from source, follow the corresponding instructions from the @ref tutorial_py_table_of_contents_setup. + +Before you start the installation of the libraries, you can customize the [requirements.txt](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/requirements.txt), excluding or including (for example, ``opencv-python``) some dependencies. +The below line initiates requirements installation into the previously activated virtual environment: + +```console +pip install -r requirements.txt +``` + +## Practice +In this part we are going to cover the following points: +1. create a TF classification model conversion pipeline and provide the inference +2. provide the inference, process prediction results + +### Model Preparation +The code in this subchapter is located in the ``samples/dnn/dnn_model_runner`` module and can be executed with the below line: + +```console +python -m dnn_model_runner.dnn_conversion.tf.detection.py_to_py_ssd_mobilenet +``` + +The following code contains the steps of the TF SSD MobileNetV1 model retrieval: + +```python + tf_model_name = 'ssd_mobilenet_v1_coco_2017_11_17' + graph_extraction_dir = "./" + frozen_graph_path = extract_tf_frozen_graph(tf_model_name, graph_extraction_dir) + print("Frozen graph path for {}: {}".format(tf_model_name, frozen_graph_path)) +``` + +In ``extract_tf_frozen_graph`` function we extract the provided in model archive ``frozen_inference_graph.pb`` for its further processing: + +```python +# define model archive name +tf_model_tar = model_name + '.tar.gz' +# define link to retrieve model archive +model_link = DETECTION_MODELS_URL + tf_model_tar + +tf_frozen_graph_name = 'frozen_inference_graph' + +try: + urllib.request.urlretrieve(model_link, tf_model_tar) +except Exception: + print("TF {} was not retrieved: {}".format(model_name, model_link)) + return + +print("TF {} was retrieved.".format(model_name)) + +tf_model_tar = tarfile.open(tf_model_tar) +frozen_graph_path = "" + +for model_tar_elem in tf_model_tar.getmembers(): + if tf_frozen_graph_name in os.path.basename(model_tar_elem.name): + tf_model_tar.extract(model_tar_elem, extracted_model_path) + frozen_graph_path = os.path.join(extracted_model_path, model_tar_elem.name) + break +tf_model_tar.close() +``` + +After the successful execution of the above code we will get the following output: + +```console +TF ssd_mobilenet_v1_coco_2017_11_17 was retrieved. +Frozen graph path for ssd_mobilenet_v1_coco_2017_11_17: ./ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb +``` + +To provide model inference we will use the below [double-decker bus photo](https://www.pexels.com/photo/bus-and-car-on-one-way-street-3626589/) (under [Pexels](https://www.pexels.com/license/) license): + +![Double-decker bus](images/pexels_double_decker_bus.jpg) + +To initiate the test process we need to provide an appropriate model configuration. We will use [``ssd_mobilenet_v1_coco.config``](https://github.com/tensorflow/models/blob/master/research/object_detection/samples/configs/ssd_mobilenet_v1_coco.config) from [TensorFlow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection#tensorflow-object-detection-api). +TensorFlow Object Detection API framework contains helpful mechanisms for object detection model manipulations. + +We will use this configuration to provide a text graph representation. To generate ``.pbtxt`` we will use the corresponding [``samples/dnn/tf_text_graph_ssd.py``](https://github.com/opencv/opencv/blob/master/samples/dnn/tf_text_graph_ssd.py) script: + +```console +python tf_text_graph_ssd.py --input ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb --config ssd_mobilenet_v1_coco_2017_11_17/ssd_mobilenet_v1_coco.config --output ssd_mobilenet_v1_coco_2017_11_17.pbtxt +``` + +After successful execution ``ssd_mobilenet_v1_coco_2017_11_17.pbtxt`` will be created. + +Before we run ``object_detection.py``, let's have a look at the default values for the SSD MobileNetV1 test process configuration. They are located in [``models.yml``](https://github.com/opencv/opencv/blob/master/samples/dnn/models.yml): + +```yml +ssd_tf: + model: "ssd_mobilenet_v1_coco_2017_11_17.pb" + config: "ssd_mobilenet_v1_coco_2017_11_17.pbtxt" + mean: [0, 0, 0] + scale: 1.0 + width: 300 + height: 300 + rgb: true + classes: "object_detection_classes_coco.txt" + sample: "object_detection" +``` + +To fetch these values we need to provide frozen graph ``ssd_mobilenet_v1_coco_2017_11_17.pb`` model and text graph ``ssd_mobilenet_v1_coco_2017_11_17.pbtxt``: + +```console +python object_detection.py ssd_tf --input ../data/pexels_double_decker_bus.jpg +``` + +This line is equivalent to: + +```console +python object_detection.py --model ssd_mobilenet_v1_coco_2017_11_17.pb --config ssd_mobilenet_v1_coco_2017_11_17.pbtxt --input ../data/pexels_double_decker_bus.jpg --width 300 --height 300 --classes ../data/dnn/object_detection_classes_coco.txt +``` + +The result is: + +![OpenCV SSD bus result](images/opencv_bus_res.jpg) + +There are several helpful parameters, which can be also customized for result corrections: threshold (``--thr``) and non-maximum suppression (``--nms``) values. diff --git a/doc/tutorials/dnn/dnn_pytorch_tf_segmentation/pytorch_sem_segm_model_conversion_tutorial.md b/doc/tutorials/dnn/dnn_pytorch_tf_segmentation/pytorch_sem_segm_model_conversion_tutorial.md new file mode 100644 index 0000000000..368007ee22 --- /dev/null +++ b/doc/tutorials/dnn/dnn_pytorch_tf_segmentation/pytorch_sem_segm_model_conversion_tutorial.md @@ -0,0 +1,332 @@ +# Conversion of PyTorch Segmentation Models and Launch with OpenCV {#pytorch_segm_tutorial_dnn_conversion} + +## Goals +In this tutorial you will learn how to: +* convert PyTorch segmentation models +* run converted PyTorch model with OpenCV +* obtain an evaluation of the PyTorch and OpenCV DNN models + +We will explore the above-listed points by the example of the FCN ResNet-50 architecture. + +## Introduction +The key points involved in the transition pipeline of the [PyTorch classification](https://link_to_cls_tutorial) and segmentation models with OpenCV API are equal. The first step is model transferring into [ONNX](https://onnx.ai/about.html) format with PyTorch [``torch.onnx.export``](https://pytorch.org/docs/stable/onnx.html#torch.onnx.export) built-in function. +Further the obtained ``.onnx`` model is passed into cv.dnn.readNetFromONNX, which returns cv.dnn.Net object ready for DNN manipulations. + +## Practice +In this part we are going to cover the following points: +1. create a segmentation model conversion pipeline and provide the inference +2. evaluate and test segmentation models + +If you'd like merely to run evaluation or test model pipelines, the "Model Conversion Pipeline" part can be skipped. + +### Model Conversion Pipeline +The code in this subchapter is located in the ``dnn_model_runner`` module and can be executed with the line: + +`` +python -m dnn_model_runner.dnn_conversion.pytorch.segmentation.py_to_py_fcnresnet50 +`` + +The following code contains the description of the below-listed steps: +1. instantiate PyTorch model +2. convert PyTorch model into ``.onnx`` +3. read the transferred network with OpenCV API +4. prepare input data +5. provide inference +6. get colored masks from predictions +7. visualize results + +```python +# initialize PyTorch FCN ResNet-50 model +original_model = models.segmentation.fcn_resnet50(pretrained=True) + +# get the path to the converted into ONNX PyTorch model +full_model_path = get_pytorch_onnx_model(original_model) + +# read converted .onnx model with OpenCV API +opencv_net = cv2.dnn.readNetFromONNX(full_model_path) +print("OpenCV model was successfully read. Layer IDs: \n", opencv_net.getLayerNames()) + +# get preprocessed image +img, input_img = get_processed_imgs("test_data/sem_segm/2007_000033.jpg") + +# obtain OpenCV DNN predictions +opencv_prediction = get_opencv_dnn_prediction(opencv_net, input_img) + +# obtain original PyTorch ResNet50 predictions +pytorch_prediction = get_pytorch_dnn_prediction(original_model, input_img) + +pascal_voc_classes, pascal_voc_colors = read_colors_info("test_data/sem_segm/pascal-classes.txt") + +# obtain colored segmentation masks +opencv_colored_mask = get_colored_mask(img.shape, opencv_prediction, pascal_voc_colors) +pytorch_colored_mask = get_colored_mask(img.shape, pytorch_prediction, pascal_voc_colors) + +# obtain palette of PASCAL VOC colors +color_legend = get_legend(pascal_voc_classes, pascal_voc_colors) + +cv2.imshow('PyTorch Colored Mask', pytorch_colored_mask) +cv2.imshow('OpenCV DNN Colored Mask', opencv_colored_mask) +cv2.imshow('Color Legend', color_legend) + +cv2.waitKey(0) +``` + +To provide the model inference we will use the below picture from the [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) validation dataset: + +![PASCAL VOC img](images/2007_000033.jpg) + +The target segmented result is: + +![PASCAL VOC ground truth](images/2007_000033.png) + +For the PASCAL VOC colors decoding and its mapping with the predicted masks, we also need ``pascal-classes.txt`` file, which contains the full list of the PASCAL VOC classes and corresponding colors. + +Let's go deeper into each code step by the example of pretrained PyTorch FCN ResNet-50: +* instantiate PyTorch FCN ResNet-50 model: + +```python +# initialize PyTorch FCN ResNet-50 model +original_model = models.segmentation.fcn_resnet50(pretrained=True) +``` + +* convert PyTorch model into ONNX format: + +```python +# define the directory for further converted model save +onnx_model_path = "models" +# define the name of further converted model +onnx_model_name = "fcnresnet50.onnx" + +# create directory for further converted model +os.makedirs(onnx_model_path, exist_ok=True) + +# get full path to the converted model +full_model_path = os.path.join(onnx_model_path, onnx_model_name) + +# generate model input to build the graph +generated_input = Variable( + torch.randn(1, 3, 500, 500) +) + +# model export into ONNX format +torch.onnx.export( + original_model, + generated_input, + full_model_path, + verbose=True, + input_names=["input"], + output_names=["output"], + opset_version=11 +) +``` + +The code from this step does not differ from the classification conversion case. Thus, after the successful execution of the above code, we will get ``models/fcnresnet50.onnx``. + +* read the transferred network with cv.dnn.readNetFromONNX passing the obtained in the previous step ONNX model into it: + +```python +# read converted .onnx model with OpenCV API +opencv_net = cv2.dnn.readNetFromONNX(full_model_path) +``` + +* prepare input data: + +```python +# read the image +input_img = cv2.imread(img_path, cv2.IMREAD_COLOR) +input_img = input_img.astype(np.float32) + +# target image sizes +img_height = input_img.shape[0] +img_width = input_img.shape[1] + +# define preprocess parameters +mean = np.array([0.485, 0.456, 0.406]) * 255.0 +scale = 1 / 255.0 +std = [0.229, 0.224, 0.225] + +# prepare input blob to fit the model input: +# 1. subtract mean +# 2. scale to set pixel values from 0 to 1 +input_blob = cv2.dnn.blobFromImage( + image=input_img, + scalefactor=scale, + size=(img_width, img_height), # img target size + mean=mean, + swapRB=True, # BGR -> RGB + crop=False # center crop +) +# 3. divide by std +input_blob[0] /= np.asarray(std, dtype=np.float32).reshape(3, 1, 1) +``` + +In this step we read the image and prepare model input with cv2.dnn.blobFromImage function, which returns 4-dimensional blob. +It should be noted that firstly in ``cv2.dnn.blobFromImage`` mean value is subtracted and only then pixel values are scaled. Thus, ``mean`` is multiplied by ``255.0`` to reproduce the original image preprocessing order: + +```python +img /= 255.0 +img -= [0.485, 0.456, 0.406] +img /= [0.229, 0.224, 0.225] +``` + +* OpenCV ``cv.dnn_Net`` inference: + +```python +# set OpenCV DNN input +opencv_net.setInput(preproc_img) + +# OpenCV DNN inference +out = opencv_net.forward() +print("OpenCV DNN segmentation prediction: \n") +print("* shape: ", out.shape) + +# get IDs of predicted classes +out_predictions = np.argmax(out[0], axis=0) +``` + +After the above code execution we will get the following output: + +``` +OpenCV DNN segmentation prediction: +* shape: (1, 21, 500, 500) +``` + +Each prediction channel out of 21, where 21 represents the number of PASCAL VOC classes, contains probabilities, which indicate how likely the pixel corresponds to the PASCAL VOC class. + +* PyTorch FCN ResNet-50 model inference: + +```python +original_net.eval() +preproc_img = torch.FloatTensor(preproc_img) + +with torch.no_grad(): + # obtaining unnormalized probabilities for each class + out = original_net(preproc_img)['out'] + +print("\nPyTorch segmentation model prediction: \n") +print("* shape: ", out.shape) + +# get IDs of predicted classes +out_predictions = out[0].argmax(dim=0) +``` + +After the above code launching we will get the following output: + +``` +PyTorch segmentation model prediction: +* shape: torch.Size([1, 21, 366, 500]) +``` + +PyTorch prediction also contains probabilities corresponding to each class prediction. + +* get colored masks from predictions: + +```python +# convert mask values into PASCAL VOC colors +processed_mask = np.stack([colors[color_id] for color_id in segm_mask.flatten()]) + +# reshape mask into 3-channel image +processed_mask = processed_mask.reshape(mask_height, mask_width, 3) +processed_mask = cv2.resize(processed_mask, (img_width, img_height), interpolation=cv2.INTER_NEAREST).astype( + np.uint8) + +# convert colored mask from BGR to RGB for compatibility with PASCAL VOC colors +processed_mask = cv2.cvtColor(processed_mask, cv2.COLOR_BGR2RGB) +``` + +In this step we map the probabilities from segmentation masks with appropriate colors of the predicted classes. Let's have a look at the results: + +![OpenCV Colored Mask](images/legend_opencv_color_mask.png) + +For the extended evaluation of the models, we can use ``py_to_py_segm`` script of the ``dnn_model_runner`` module. This module part will be described in the next subchapter. + +### Evaluation of the Models + +The proposed in ``dnn/samples`` ``dnn_model_runner`` module allows to run the full evaluation pipeline on the PASCAL VOC dataset and test execution for the following PyTorch segmentation models: +* FCN ResNet-50 +* FCN ResNet-101 + +This list can be also extended with further appropriate evaluation pipeline configuration. + +#### Evaluation Mode + +The below line represents running of the module in the evaluation mode: + +``` +python -m dnn_model_runner.dnn_conversion.pytorch.segmentation.py_to_py_segm --model_name +``` + +Chosen from the list segmentation model will be read into OpenCV ``cv.dnn_Net`` object. Evaluation results of PyTorch and OpenCV models (pixel accuracy, mean IoU, inference time) will be written into the log file. Inference time values will be also depicted in a chart to generalize the obtained model information. + +Necessary evaluation configurations are defined in the [``test_config.py``](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/common/test/configs/test_config.py): + +```python +@dataclass +class TestSegmConfig: + frame_size: int = 500 + img_root_dir: str = "./VOC2012" + img_dir: str = os.path.join(img_root_dir, "JPEGImages/") + img_segm_gt_dir: str = os.path.join(img_root_dir, "SegmentationClass/") + # reduced val: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/data/pascal/seg11valid.txt + segm_val_file: str = os.path.join(img_root_dir, "ImageSets/Segmentation/seg11valid.txt") + colour_file_cls: str = os.path.join(img_root_dir, "ImageSets/Segmentation/pascal-classes.txt") +``` + +These values can be modified in accordance with chosen model pipeline. + +To initiate the evaluation of the PyTorch FCN ResNet-50, run the following line: + +``` +python -m dnn_model_runner.dnn_conversion.pytorch.segmentation.py_to_py_segm --model_name fcnresnet50 +``` + +#### Test Mode + +The below line represents running of the module in the test mode, which provides the steps for the model inference: + +``` +python -m dnn_model_runner.dnn_conversion.pytorch.segmentation.py_to_py_segm --model_name --test True --default_img_preprocess --evaluate False +``` + +Here ``default_img_preprocess`` key defines whether you'd like to parametrize the model test process with some particular values or use the default values, for example, ``scale``, ``mean`` or ``std``. + +Test configuration is represented in [``test_config.py``](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/common/test/configs/test_config.py) ``TestSegmModuleConfig`` class: + +```python +@dataclass +class TestSegmModuleConfig: + segm_test_data_dir: str = "test_data/sem_segm" + test_module_name: str = "segmentation" + test_module_path: str = "segmentation.py" + input_img: str = os.path.join(segm_test_data_dir, "2007_000033.jpg") + model: str = "" + + frame_height: str = str(TestSegmConfig.frame_size) + frame_width: str = str(TestSegmConfig.frame_size) + scale: float = 1.0 + mean: List[float] = field(default_factory=lambda: [0.0, 0.0, 0.0]) + std: List[float] = field(default_factory=list) + crop: bool = False + rgb: bool = True + classes: str = os.path.join(segm_test_data_dir, "pascal-classes.txt") +``` + +The default image preprocessing options are defined in ``default_preprocess_config.py``: + +```python +pytorch_segm_input_blob = { + "mean": ["123.675", "116.28", "103.53"], + "scale": str(1 / 255.0), + "std": ["0.229", "0.224", "0.225"], + "crop": "False", + "rgb": "True" +} +``` + +The basis of the model testing is represented in ``samples/dnn/segmentation.py``. ``segmentation.py`` can be executed autonomously with provided converted model in ``--input`` and populated parameters for ``cv2.dnn.blobFromImage``. + +To reproduce from scratch the described in "Model Conversion Pipeline" OpenCV steps with ``dnn_model_runner`` execute the below line: + +``` +python -m dnn_model_runner.dnn_conversion.pytorch.segmentation.py_to_py_segm --model_name fcnresnet50 --test True --default_img_preprocess True --evaluate False +``` diff --git a/doc/tutorials/dnn/dnn_pytorch_tf_segmentation/tf_sem_segm_model_conversion_tutorial.md b/doc/tutorials/dnn/dnn_pytorch_tf_segmentation/tf_sem_segm_model_conversion_tutorial.md new file mode 100644 index 0000000000..bcf9749e2e --- /dev/null +++ b/doc/tutorials/dnn/dnn_pytorch_tf_segmentation/tf_sem_segm_model_conversion_tutorial.md @@ -0,0 +1,406 @@ +# Conversion of TensorFlow Segmentation Models and Launch with OpenCV {#tf_segm_tutorial_dnn_conversion} + +## Goals +In this tutorial you will learn how to: +* convert TensorFlow (TF) segmentation models +* run converted TensorFlow model with OpenCV +* obtain an evaluation of the TensorFlow and OpenCV DNN models + +We will explore the above-listed points by the example of the DeepLab architecture. + +## Introduction +The key concepts involved in the transition pipeline of the [TensorFlow classification](https://link_to_cls_tutorial) and segmentation models with OpenCV API are almost equal excepting the phase of graph optimization. The initial step in conversion of TensorFlow models into cv.dnn.Net +is obtaining the frozen TF model graph. Frozen graph defines the combination of the model graph structure with kept values of the required variables, for example, weights. Usually the frozen graph is saved in [protobuf](https://en.wikipedia.org/wiki/Protocol_Buffers) (```.pb```) files. +To read the generated segmentation model ``.pb`` file with cv.dnn.readNetFromTensorflow, it is needed to modify the graph with TF [graph transform tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms). + +## Practice +In this part we are going to cover the following points: +1. create a TF classification model conversion pipeline and provide the inference +2. evaluate and test TF classification models + +If you'd like merely to run evaluation or test model pipelines, the "Model Conversion Pipeline" tutorial part can be skipped. + +### Model Conversion Pipeline +The code in this subchapter is located in the ``dnn_model_runner`` module and can be executed with the line: + +``` +python -m dnn_model_runner.dnn_conversion.tf.segmentation.py_to_py_deeplab +``` + +TensorFlow segmentation models can be found in [TensorFlow Research Models](https://github.com/tensorflow/models/tree/master/research/#tensorflow-research-models) section, which contains the implementations of models on the basis of published research papers. +We will retrieve the archive with the pre-trained TF DeepLabV3 from the below link: + +``` +http://download.tensorflow.org/models/deeplabv3_mnv2_pascal_trainval_2018_01_29.tar.gz +``` + +The full frozen graph obtaining pipeline is described in ``deeplab_retrievement.py``: + +```python +def get_deeplab_frozen_graph(): + # define model path to download + models_url = 'http://download.tensorflow.org/models/' + mobilenetv2_voctrainval = 'deeplabv3_mnv2_pascal_trainval_2018_01_29.tar.gz' + + # construct model link to download + model_link = models_url + mobilenetv2_voctrainval + + try: + urllib.request.urlretrieve(model_link, mobilenetv2_voctrainval) + except Exception: + print("TF DeepLabV3 was not retrieved: {}".format(model_link)) + return + + tf_model_tar = tarfile.open(mobilenetv2_voctrainval) + + # iterate the obtained model archive + for model_tar_elem in tf_model_tar.getmembers(): + # check whether the model archive contains frozen graph + if TF_FROZEN_GRAPH_NAME in os.path.basename(model_tar_elem.name): + # extract frozen graph + tf_model_tar.extract(model_tar_elem, FROZEN_GRAPH_PATH) + + tf_model_tar.close() +``` + +After running this script: + +``` +python -m dnn_model_runner.dnn_conversion.tf.segmentation.deeplab_retrievement +``` + +we will get ``frozen_inference_graph.pb`` in ``deeplab/deeplabv3_mnv2_pascal_trainval``. + +Before going to the network loading with OpenCV it is needed to optimize the extracted ``frozen_inference_graph.pb``. +To optimize the graph we use TF ``TransformGraph`` with default parameters: + +```python +DEFAULT_OPT_GRAPH_NAME = "optimized_frozen_inference_graph.pb" +DEFAULT_INPUTS = "sub_7" +DEFAULT_OUTPUTS = "ResizeBilinear_3" +DEFAULT_TRANSFORMS = "remove_nodes(op=Identity)" \ + " merge_duplicate_nodes" \ + " strip_unused_nodes" \ + " fold_constants(ignore_errors=true)" \ + " fold_batch_norms" \ + " fold_old_batch_norms" + + +def optimize_tf_graph( + in_graph, + out_graph=DEFAULT_OPT_GRAPH_NAME, + inputs=DEFAULT_INPUTS, + outputs=DEFAULT_OUTPUTS, + transforms=DEFAULT_TRANSFORMS, + is_manual=True, + was_optimized=True +): + # ... + + tf_opt_graph = TransformGraph( + tf_graph, + inputs, + outputs, + transforms + ) +``` + +To run graph optimization process, execute the line: + +``` +python -m dnn_model_runner.dnn_conversion.tf.segmentation.tf_graph_optimizer --in_graph deeplab/deeplabv3_mnv2_pascal_trainval/frozen_inference_graph.pb +``` + +As a result ``deeplab/deeplabv3_mnv2_pascal_trainval`` directory will contain ``optimized_frozen_inference_graph.pb``. + +After we have obtained the model graphs, let's examine the below-listed steps: +1. read TF ``frozen_inference_graph.pb`` graph +2. read optimized TF frozen graph with OpenCV API +3. prepare input data +4. provide inference +5. get colored masks from predictions +6. visualize results + +```python +# get TF model graph from the obtained frozen graph +deeplab_graph = read_deeplab_frozen_graph(deeplab_frozen_graph_path) + +# read DeepLab frozen graph with OpenCV API +opencv_net = cv2.dnn.readNetFromTensorflow(opt_deeplab_frozen_graph_path) +print("OpenCV model was successfully read. Model layers: \n", opencv_net.getLayerNames()) + +# get processed image +original_img_shape, tf_input_blob, opencv_input_img = get_processed_imgs("test_data/sem_segm/2007_000033.jpg") + +# obtain OpenCV DNN predictions +opencv_prediction = get_opencv_dnn_prediction(opencv_net, opencv_input_img) + +# obtain TF model predictions +tf_prediction = get_tf_dnn_prediction(deeplab_graph, tf_input_blob) + +# get PASCAL VOC classes and colors +pascal_voc_classes, pascal_voc_colors = read_colors_info("test_data/sem_segm/pascal-classes.txt") + +# obtain colored segmentation masks +opencv_colored_mask = get_colored_mask(original_img_shape, opencv_prediction, pascal_voc_colors) +tf_colored_mask = get_tf_colored_mask(original_img_shape, tf_prediction, pascal_voc_colors) + +# obtain palette of PASCAL VOC colors +color_legend = get_legend(pascal_voc_classes, pascal_voc_colors) + +cv2.imshow('TensorFlow Colored Mask', tf_colored_mask) +cv2.imshow('OpenCV DNN Colored Mask', opencv_colored_mask) + +cv2.imshow('Color Legend', color_legend) +``` + +To provide the model inference we will use the below picture from the [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) validation dataset: + +![PASCAL VOC img](images/2007_000033.jpg) + +The target segmented result is: + +![PASCAL VOC ground truth](images/2007_000033.png) + +For the PASCAL VOC colors decoding and its mapping with the predicted masks, we also need ``pascal-classes.txt`` file, which contains the full list of the PASCAL VOC classes and corresponding colors. + +Let's go deeper into each step by the example of pretrained TF DeepLabV3 MobileNetV2: + +* read TF ``frozen_inference_graph.pb`` graph : + +```python +# init deeplab model graph +model_graph = tf.Graph() + +# obtain +with tf.io.gfile.GFile(frozen_graph_path, 'rb') as graph_file: + tf_model_graph = GraphDef() +tf_model_graph.ParseFromString(graph_file.read()) + +with model_graph.as_default(): + tf.import_graph_def(tf_model_graph, name='') +``` + +* read optimized TF frozen graph with OpenCV API: + +```python +# read DeepLab frozen graph with OpenCV API +opencv_net = cv2.dnn.readNetFromTensorflow(opt_deeplab_frozen_graph_path) +``` + +* prepare input data with cv2.dnn.blobFromImage function: + +```python +# read the image +input_img = cv2.imread(img_path, cv2.IMREAD_COLOR) +input_img = input_img.astype(np.float32) + +# preprocess image for TF model input +tf_preproc_img = cv2.resize(input_img, (513, 513)) +tf_preproc_img = cv2.cvtColor(tf_preproc_img, cv2.COLOR_BGR2RGB) + +# define preprocess parameters for OpenCV DNN +mean = np.array([1.0, 1.0, 1.0]) * 127.5 +scale = 1 / 127.5 + +# prepare input blob to fit the model input: +# 1. subtract mean +# 2. scale to set pixel values from 0 to 1 +input_blob = cv2.dnn.blobFromImage( + image=input_img, + scalefactor=scale, + size=(513, 513), # img target size + mean=mean, + swapRB=True, # BGR -> RGB + crop=False # center crop +) +``` + +Please, pay attention at the preprocessing order in the ``cv2.dnn.blobFromImage`` function. Firstly, the mean value is subtracted and only then pixel values are multiplied by the defined scale. +Therefore, to reproduce TF image preprocessing pipeline, we multiply ``mean`` by ``127.5``. +Another important point is image preprocessing for TF DeepLab. To pass the image into TF model we need only to construct an appropriate shape, the rest image preprocessing is described in [feature_extractor.py](https://github.com/tensorflow/models/blob/master/research/deeplab/core/feature_extractor.py) and will be invoked automatically. + +* provide OpenCV ``cv.dnn_Net`` inference: + +```python +# set OpenCV DNN input +opencv_net.setInput(preproc_img) + +# OpenCV DNN inference +out = opencv_net.forward() +print("OpenCV DNN segmentation prediction: \n") +print("* shape: ", out.shape) + +# get IDs of predicted classes +out_predictions = np.argmax(out[0], axis=0) +``` + +After the above code execution we will get the following output: + +``` +OpenCV DNN segmentation prediction: +* shape: (1, 21, 513, 513) + +``` + +Each prediction channel out of 21, where 21 represents the number of PASCAL VOC classes, contains probabilities, which indicate how likely the pixel corresponds to the PASCAL VOC class. + +* provide TF model inference: + +```python +preproc_img = np.expand_dims(preproc_img, 0) + +# init TF session +tf_session = Session(graph=model_graph) + +input_tensor_name = "ImageTensor:0", +output_tensor_name = "SemanticPredictions:0" + +# run inference +out = tf_session.run( + output_tensor_name, + feed_dict={input_tensor_name: [preproc_img]} +) + +print("TF segmentation model prediction: \n") +print("* shape: ", out.shape) +``` + +TF inference results are the following: + +``` +TF segmentation model prediction: +* shape: (1, 513, 513) +``` + +TensorFlow prediction contains the indexes of corresponding PASCAL VOC classes. + +* transform OpenCV prediction into colored mask: + +```python +mask_height = segm_mask.shape[0] +mask_width = segm_mask.shape[1] + +img_height = original_img_shape[0] +img_width = original_img_shape[1] + +# convert mask values into PASCAL VOC colors +processed_mask = np.stack([colors[color_id] for color_id in segm_mask.flatten()]) + +# reshape mask into 3-channel image +processed_mask = processed_mask.reshape(mask_height, mask_width, 3) +processed_mask = cv2.resize(processed_mask, (img_width, img_height), interpolation=cv2.INTER_NEAREST).astype( + np.uint8) + +# convert colored mask from BGR to RGB +processed_mask = cv2.cvtColor(processed_mask, cv2.COLOR_BGR2RGB) +``` + +In this step we map the probabilities from segmentation masks with appropriate colors of the predicted classes. Let's have a look at the results: + +![Color Legend](images/colors_legend.png) + +![OpenCV Colored Mask](images/deeplab_opencv_colored_mask.png) + +* transform TF prediction into colored mask: + +```python +colors = np.array(colors) +processed_mask = colors[segm_mask[0]] + +img_height = original_img_shape[0] +img_width = original_img_shape[1] + +processed_mask = cv2.resize(processed_mask, (img_width, img_height), interpolation=cv2.INTER_NEAREST).astype( + np.uint8) + +# convert colored mask from BGR to RGB for compatibility with PASCAL VOC colors +processed_mask = cv2.cvtColor(processed_mask, cv2.COLOR_BGR2RGB) +``` + +The result is: + +![TF Colored Mask](images/deeplab_tf_colored_mask.png) + +As a result, we get two equal segmentation masks. + +### Evaluation of the Models + +The proposed in ``dnn/samples`` ``dnn_model_runner`` module allows to run the full evaluation pipeline on the PASCAL VOC dataset and test execution for the DeepLab MobileNet model. + +#### Evaluation Mode + +To below line represents running of the module in the evaluation mode: + +``` +python -m dnn_model_runner.dnn_conversion.tf.segmentation.py_to_py_segm +``` + +The model will be read into OpenCV ``cv.dnn_Net`` object. Evaluation results of TF and OpenCV models (pixel accuracy, mean IoU, inference time) will be written into the log file. Inference time values will be also depicted in a chart to generalize the obtained model information. + +Necessary evaluation configurations are defined in the [``test_config.py``](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/common/test/configs/test_config.py): + +```python +@dataclass +class TestSegmConfig: + frame_size: int = 500 + img_root_dir: str = "./VOC2012" + img_dir: str = os.path.join(img_root_dir, "JPEGImages/") + img_segm_gt_dir: str = os.path.join(img_root_dir, "SegmentationClass/") + # reduced val: https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/data/pascal/seg11valid.txt + segm_val_file: str = os.path.join(img_root_dir, "ImageSets/Segmentation/seg11valid.txt") + colour_file_cls: str = os.path.join(img_root_dir, "ImageSets/Segmentation/pascal-classes.txt") +``` + +These values can be modified in accordance with chosen model pipeline. + +#### Test Mode + +The below line represents running of the module in the test mode, which provides the steps for the model inference: + +``` +python -m dnn_model_runner.dnn_conversion.tf.segmentation.py_to_py_segm --test True --default_img_preprocess --evaluate False +``` + +Here ``default_img_preprocess`` key defines whether you'd like to parametrize the model test process with some particular values or use the default values, for example, ``scale``, ``mean`` or ``std``. + +Test configuration is represented in [``test_config.py``](https://github.com/opencv/opencv/tree/master/samples/dnn/dnn_model_runner/dnn_conversion/common/test/configs/test_config.py) ``TestSegmModuleConfig`` class: + +```python +@dataclass +class TestSegmModuleConfig: + segm_test_data_dir: str = "test_data/sem_segm" + test_module_name: str = "segmentation" + test_module_path: str = "segmentation.py" + input_img: str = os.path.join(segm_test_data_dir, "2007_000033.jpg") + model: str = "" + + frame_height: str = str(TestSegmConfig.frame_size) + frame_width: str = str(TestSegmConfig.frame_size) + scale: float = 1.0 + mean: List[float] = field(default_factory=lambda: [0.0, 0.0, 0.0]) + std: List[float] = field(default_factory=list) + crop: bool = False + rgb: bool = True + classes: str = os.path.join(segm_test_data_dir, "pascal-classes.txt") +``` + +The default image preprocessing options are defined in ``default_preprocess_config.py``: + +```python +tf_segm_input_blob = { + "scale": str(1 / 127.5), + "mean": ["127.5", "127.5", "127.5"], + "std": [], + "crop": "False", + "rgb": "True" +} +``` + +The basis of the model testing is represented in ``samples/dnn/segmentation.py``. ``segmentation.py`` can be executed autonomously with provided converted model in ``--input`` and populated parameters for ``cv2.dnn.blobFromImage``. + +To reproduce from scratch the described in "Model Conversion Pipeline" OpenCV steps with ``dnn_model_runner`` execute the below line: + +``` +python -m dnn_model_runner.dnn_conversion.tf.segmentation.py_to_py_segm --test True --default_img_preprocess True --evaluate False +``` diff --git a/doc/tutorials/dnn/dnn_text_spotting/detect_test1.jpg b/doc/tutorials/dnn/dnn_text_spotting/detect_test1.jpg new file mode 100644 index 0000000000..b154dfc4ec Binary files /dev/null and b/doc/tutorials/dnn/dnn_text_spotting/detect_test1.jpg differ diff --git a/doc/tutorials/dnn/dnn_text_spotting/detect_test2.jpg b/doc/tutorials/dnn/dnn_text_spotting/detect_test2.jpg new file mode 100644 index 0000000000..a46dcc03a1 Binary files /dev/null and b/doc/tutorials/dnn/dnn_text_spotting/detect_test2.jpg differ diff --git a/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown b/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown new file mode 100644 index 0000000000..5f28b6ce7a --- /dev/null +++ b/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown @@ -0,0 +1,324 @@ +# High Level API: TextDetectionModel and TextRecognitionModel {#tutorial_dnn_text_spotting} + +@tableofcontents + +@prev_tutorial{tutorial_dnn_OCR} +@next_tutorial{pytorch_cls_tutorial_dnn_conversion} + +| | | +| -: | :- | +| Original author | Wenqing Zhang | +| Compatibility | OpenCV >= 4.5 | + +## Introduction +In this tutorial, we will introduce the APIs for TextRecognitionModel and TextDetectionModel in detail. + +--- +#### TextRecognitionModel: + +In the current version, @ref cv::dnn::TextRecognitionModel only supports CNN+RNN+CTC based algorithms, +and the greedy decoding method for CTC is provided. +For more information, please refer to the [original paper](https://arxiv.org/abs/1507.05717) + +Before recognition, you should `setVocabulary` and `setDecodeType`. +- "CTC-greedy", the output of the text recognition model should be a probability matrix. + The shape should be `(T, B, Dim)`, where + - `T` is the sequence length + - `B` is the batch size (only support `B=1` in inference) + - and `Dim` is the length of vocabulary +1('Blank' of CTC is at the index=0 of Dim). + +@ref cv::dnn::TextRecognitionModel::recognize() is the main function for text recognition. +- The input image should be a cropped text image or an image with `roiRects` +- Other decoding methods may supported in the future + +--- + +#### TextDetectionModel: + +@ref cv::dnn::TextDetectionModel API provides these methods for text detection: +- cv::dnn::TextDetectionModel::detect() returns the results in std::vector> (4-points quadrangles) +- cv::dnn::TextDetectionModel::detectTextRectangles() returns the results in std::vector (RBOX-like) + +In the current version, @ref cv::dnn::TextDetectionModel supports these algorithms: +- use @ref cv::dnn::TextDetectionModel_DB with "DB" models +- and use @ref cv::dnn::TextDetectionModel_EAST with "EAST" models + +The following provided pretrained models are variants of DB (w/o deformable convolution), +and the performance can be referred to the Table.1 in the [paper]((https://arxiv.org/abs/1911.08947)). +For more information, please refer to the [official code](https://github.com/MhLiao/DB) + +--- + +You can train your own model with more data, and convert it into ONNX format. +We encourage you to add new algorithms to these APIs. + + +## Pretrained Models + +#### TextRecognitionModel: + +``` +crnn.onnx: +url: https://drive.google.com/uc?export=dowload&id=1ooaLR-rkTl8jdpGy1DoQs0-X0lQsB6Fj +sha: 270d92c9ccb670ada2459a25977e8deeaf8380d3, +alphabet_36.txt: https://drive.google.com/uc?export=dowload&id=1oPOYx5rQRp8L6XQciUwmwhMCfX0KyO4b +parameter setting: -rgb=0; +description: The classification number of this model is 36 (0~9 + a~z). + The training dataset is MJSynth. + +crnn_cs.onnx: +url: https://drive.google.com/uc?export=dowload&id=12diBsVJrS9ZEl6BNUiRp9s0xPALBS7kt +sha: a641e9c57a5147546f7a2dbea4fd322b47197cd5 +alphabet_94.txt: https://drive.google.com/uc?export=dowload&id=1oKXxXKusquimp7XY1mFvj9nwLzldVgBR +parameter setting: -rgb=1; +description: The classification number of this model is 94 (0~9 + a~z + A~Z + punctuations). + The training datasets are MJsynth and SynthText. + +crnn_cs_CN.onnx: +url: https://drive.google.com/uc?export=dowload&id=1is4eYEUKH7HR7Gl37Sw4WPXx6Ir8oQEG +sha: 3940942b85761c7f240494cf662dcbf05dc00d14 +alphabet_3944.txt: https://drive.google.com/uc?export=dowload&id=18IZUUdNzJ44heWTndDO6NNfIpJMmN-ul +parameter setting: -rgb=1; +description: The classification number of this model is 3944 (0~9 + a~z + A~Z + Chinese characters + special characters). + The training dataset is ReCTS (https://rrc.cvc.uab.es/?ch=12). +``` + +More models can be found in [here](https://drive.google.com/drive/folders/1cTbQ3nuZG-EKWak6emD_s8_hHXWz7lAr?usp=sharing), +which are taken from [clovaai](https://github.com/clovaai/deep-text-recognition-benchmark). +You can train more models by [CRNN](https://github.com/meijieru/crnn.pytorch), and convert models by `torch.onnx.export`. + +#### TextDetectionModel: + +``` +- DB_IC15_resnet50.onnx: +url: https://drive.google.com/uc?export=dowload&id=17_ABp79PlFt9yPCxSaarVc_DKTmrSGGf +sha: bef233c28947ef6ec8c663d20a2b326302421fa3 +recommended parameter setting: -inputHeight=736, -inputWidth=1280; +description: This model is trained on ICDAR2015, so it can only detect English text instances. + +- DB_IC15_resnet18.onnx: +url: https://drive.google.com/uc?export=dowload&id=1sZszH3pEt8hliyBlTmB-iulxHP1dCQWV +sha: 19543ce09b2efd35f49705c235cc46d0e22df30b +recommended parameter setting: -inputHeight=736, -inputWidth=1280; +description: This model is trained on ICDAR2015, so it can only detect English text instances. + +- DB_TD500_resnet50.onnx: +url: https://drive.google.com/uc?export=dowload&id=19YWhArrNccaoSza0CfkXlA8im4-lAGsR +sha: 1b4dd21a6baa5e3523156776970895bd3db6960a +recommended parameter setting: -inputHeight=736, -inputWidth=736; +description: This model is trained on MSRA-TD500, so it can detect both English and Chinese text instances. + +- DB_TD500_resnet18.onnx: +url: https://drive.google.com/uc?export=dowload&id=1vY_KsDZZZb_svd5RT6pjyI8BS1nPbBSX +sha: 8a3700bdc13e00336a815fc7afff5dcc1ce08546 +recommended parameter setting: -inputHeight=736, -inputWidth=736; +description: This model is trained on MSRA-TD500, so it can detect both English and Chinese text instances. + +``` + +We will release more models of DB [here](https://drive.google.com/drive/folders/1qzNCHfUJOS0NEUOIKn69eCtxdlNPpWbq?usp=sharing) in the future. + +``` +- EAST: +Download link: https://www.dropbox.com/s/r2ingd0l3zt8hxs/frozen_east_text_detection.tar.gz?dl=1 +This model is based on https://github.com/argman/EAST +``` + +## Images for Testing + +``` +Text Recognition: +url: https://drive.google.com/uc?export=dowload&id=1nMcEy68zDNpIlqAn6xCk_kYcUTIeSOtN +sha: 89205612ce8dd2251effa16609342b69bff67ca3 + +Text Detection: +url: https://drive.google.com/uc?export=dowload&id=149tAhIcvfCYeyufRoZ9tmc2mZDKE_XrF +sha: ced3c03fb7f8d9608169a913acf7e7b93e07109b +``` + +## Example for Text Recognition + +Step1. Loading images and models with a vocabulary + +```cpp + // Load a cropped text line image + // you can find cropped images for testing in "Images for Testing" + int rgb = IMREAD_COLOR; // This should be changed according to the model input requirement. + Mat image = imread("path/to/text_rec_test.png", rgb); + + // Load models weights + TextRecognitionModel model("path/to/crnn_cs.onnx"); + + // The decoding method + // more methods will be supported in future + model.setDecodeType("CTC-greedy"); + + // Load vocabulary + // vocabulary should be changed according to the text recognition model + std::ifstream vocFile; + vocFile.open("path/to/alphabet_94.txt"); + CV_Assert(vocFile.is_open()); + String vocLine; + std::vector vocabulary; + while (std::getline(vocFile, vocLine)) { + vocabulary.push_back(vocLine); + } + model.setVocabulary(vocabulary); +``` + +Step2. Setting Parameters + +```cpp + // Normalization parameters + double scale = 1.0 / 127.5; + Scalar mean = Scalar(127.5, 127.5, 127.5); + + // The input shape + Size inputSize = Size(100, 32); + + model.setInputParams(scale, inputSize, mean); +``` +Step3. Inference +```cpp + std::string recognitionResult = recognizer.recognize(image); + std::cout << "'" << recognitionResult << "'" << std::endl; +``` + +Input image: + +![Picture example](text_rec_test.png) + +Output: +``` +'welcome' +``` + + +## Example for Text Detection + +Step1. Loading images and models +```cpp + // Load an image + // you can find some images for testing in "Images for Testing" + Mat frame = imread("/path/to/text_det_test.png"); +``` + +Step2.a Setting Parameters (DB) +```cpp + // Load model weights + TextDetectionModel_DB model("/path/to/DB_TD500_resnet50.onnx"); + + // Post-processing parameters + float binThresh = 0.3; + float polyThresh = 0.5; + uint maxCandidates = 200; + double unclipRatio = 2.0; + model.setBinaryThreshold(binThresh) + .setPolygonThreshold(polyThresh) + .setMaxCandidates(maxCandidates) + .setUnclipRatio(unclipRatio) + ; + + // Normalization parameters + double scale = 1.0 / 255.0; + Scalar mean = Scalar(122.67891434, 116.66876762, 104.00698793); + + // The input shape + Size inputSize = Size(736, 736); + + model.setInputParams(scale, inputSize, mean); +``` + +Step2.b Setting Parameters (EAST) +```cpp + TextDetectionModel_EAST model("EAST.pb"); + + float confThreshold = 0.5; + float nmsThreshold = 0.4; + model.setConfidenceThreshold(confThresh) + .setNMSThreshold(nmsThresh) + ; + + double detScale = 1.0; + Size detInputSize = Size(320, 320); + Scalar detMean = Scalar(123.68, 116.78, 103.94); + bool swapRB = true; + model.setInputParams(detScale, detInputSize, detMean, swapRB); +``` + + +Step3. Inference +```cpp + std::vector> detResults; + model.detect(detResults); + + // Visualization + polylines(frame, results, true, Scalar(0, 255, 0), 2); + imshow("Text Detection", image); + waitKey(); +``` + +Output: + +![Picture example](text_det_test_results.jpg) + +## Example for Text Spotting + +After following the steps above, it is easy to get the detection results of an input image. +Then, you can do transformation and crop text images for recognition. +For more information, please refer to **Detailed Sample** +```cpp + // Transform and Crop + Mat cropped; + fourPointsTransform(recInput, vertices, cropped); + + String recResult = recognizer.recognize(cropped); +``` + +Output Examples: + +![Picture example](detect_test1.jpg) + +![Picture example](detect_test2.jpg) + +## Source Code +The [source code](https://github.com/opencv/opencv/blob/master/modules/dnn/src/model.cpp) +of these APIs can be found in the DNN module. + +## Detailed Sample +For more information, please refer to: +- [samples/dnn/scene_text_recognition.cpp](https://github.com/opencv/opencv/blob/master/samples/dnn/scene_text_recognition.cpp) +- [samples/dnn/scene_text_detection.cpp](https://github.com/opencv/opencv/blob/master/samples/dnn/scene_text_detection.cpp) +- [samples/dnn/text_detection.cpp](https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp) +- [samples/dnn/scene_text_spotting.cpp](https://github.com/opencv/opencv/blob/master/samples/dnn/scene_text_spotting.cpp) + +#### Test with an image +Examples: +```bash +example_dnn_scene_text_recognition -mp=path/to/crnn_cs.onnx -i=path/to/an/image -rgb=1 -vp=/path/to/alphabet_94.txt +example_dnn_scene_text_detection -mp=path/to/DB_TD500_resnet50.onnx -i=path/to/an/image -ih=736 -iw=736 +example_dnn_scene_text_spotting -dmp=path/to/DB_IC15_resnet50.onnx -rmp=path/to/crnn_cs.onnx -i=path/to/an/image -iw=1280 -ih=736 -rgb=1 -vp=/path/to/alphabet_94.txt +example_dnn_text_detection -dmp=path/to/EAST.pb -rmp=path/to/crnn_cs.onnx -i=path/to/an/image -rgb=1 -vp=path/to/alphabet_94.txt +``` + +#### Test on public datasets +Text Recognition: + +The download link for testing images can be found in the **Images for Testing** + + +Examples: +```bash +example_dnn_scene_text_recognition -mp=path/to/crnn.onnx -e=true -edp=path/to/evaluation_data_rec -vp=/path/to/alphabet_36.txt -rgb=0 +example_dnn_scene_text_recognition -mp=path/to/crnn_cs.onnx -e=true -edp=path/to/evaluation_data_rec -vp=/path/to/alphabet_94.txt -rgb=1 +``` + +Text Detection: + +The download links for testing images can be found in the **Images for Testing** + +Examples: +```bash +example_dnn_scene_text_detection -mp=path/to/DB_TD500_resnet50.onnx -e=true -edp=path/to/evaluation_data_det/TD500 -ih=736 -iw=736 +example_dnn_scene_text_detection -mp=path/to/DB_IC15_resnet50.onnx -e=true -edp=path/to/evaluation_data_det/IC15 -ih=736 -iw=1280 +``` diff --git a/doc/tutorials/dnn/dnn_text_spotting/text_det_test_results.jpg b/doc/tutorials/dnn/dnn_text_spotting/text_det_test_results.jpg new file mode 100644 index 0000000000..173840f729 Binary files /dev/null and b/doc/tutorials/dnn/dnn_text_spotting/text_det_test_results.jpg differ diff --git a/doc/tutorials/dnn/dnn_text_spotting/text_rec_test.png b/doc/tutorials/dnn/dnn_text_spotting/text_rec_test.png new file mode 100644 index 0000000000..c3226376e4 Binary files /dev/null and b/doc/tutorials/dnn/dnn_text_spotting/text_rec_test.png differ diff --git a/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown b/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown index 1552d4e654..0973396db5 100644 --- a/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown +++ b/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown @@ -1,9 +1,16 @@ YOLO DNNs {#tutorial_dnn_yolo} =============================== +@tableofcontents + @prev_tutorial{tutorial_dnn_android} @next_tutorial{tutorial_dnn_javascript} +| | | +| -: | :- | +| Original author | Alessandro de Oliveira Faria | +| Compatibility | OpenCV >= 3.3.1 | + Introduction ------------ diff --git a/doc/tutorials/dnn/table_of_content_dnn.markdown b/doc/tutorials/dnn/table_of_content_dnn.markdown index 0a66d04ee4..0d5e43ee11 100644 --- a/doc/tutorials/dnn/table_of_content_dnn.markdown +++ b/doc/tutorials/dnn/table_of_content_dnn.markdown @@ -2,81 +2,23 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn} ===================================== - @subpage tutorial_dnn_googlenet - - *Languages:* C++ - - *Compatibility:* \> OpenCV 3.3 - - *Author:* Vitaliy Lyudvichenko - - In this tutorial you will learn how to use opencv_dnn module for image classification by using GoogLeNet trained network from Caffe model zoo. - - @subpage tutorial_dnn_halide - - *Languages:* Halide - - *Compatibility:* \> OpenCV 3.3 - - *Author:* Dmitry Kurtaev - - This tutorial guidelines how to run your models in OpenCV deep learning module using Halide language backend. - - @subpage tutorial_dnn_halide_scheduling - - *Languages:* Halide - - *Compatibility:* \> OpenCV 3.3 - - *Author:* Dmitry Kurtaev - - In this tutorial we describe the ways to schedule your networks using Halide backend in OpenCV deep learning module. - - @subpage tutorial_dnn_android - - *Languages:* Java - - *Compatibility:* \> OpenCV 3.3 - - *Author:* Dmitry Kurtaev - - This tutorial will show you how to run deep learning model using OpenCV on Android device. - - @subpage tutorial_dnn_yolo - - *Languages:* C++, Python - - *Compatibility:* \> OpenCV 3.3.1 - - *Author:* Alessandro de Oliveira Faria - - In this tutorial you will learn how to use opencv_dnn module using yolo_object_detection with device capture, video file or image. - - @subpage tutorial_dnn_javascript - - *Languages:* JavaScript - - *Compatibility:* \> OpenCV 3.3.1 - - *Author:* Dmitry Kurtaev - - In this tutorial we'll run deep learning models in browser using OpenCV.js. - - @subpage tutorial_dnn_custom_layers - - *Languages:* C++, Python - - *Compatibility:* \> OpenCV 3.4.1 - - *Author:* Dmitry Kurtaev - - How to define custom layers to import networks. - - @subpage tutorial_dnn_OCR +- @subpage tutorial_dnn_text_spotting - *Languages:* C++ +#### PyTorch models with OpenCV +In this section you will find the guides, which describe how to run classification, segmentation and detection PyTorch DNN models with OpenCV. +- @subpage pytorch_cls_tutorial_dnn_conversion +- @subpage pytorch_cls_c_tutorial_dnn_conversion +- @subpage pytorch_segm_tutorial_dnn_conversion - *Compatibility:* \> OpenCV 4.3 - - *Author:* Zihao Mu - - In this tutorial you will learn how to use opencv_dnn module using custom OCR models. \ No newline at end of file +#### TensorFlow models with OpenCV +In this section you will find the guides, which describe how to run classification, segmentation and detection TensorFlow DNN models with OpenCV. +- @subpage tf_cls_tutorial_dnn_conversion +- @subpage tf_det_tutorial_dnn_conversion +- @subpage tf_segm_tutorial_dnn_conversion diff --git a/doc/tutorials/features2d/akaze_matching/akaze_matching.markdown b/doc/tutorials/features2d/akaze_matching/akaze_matching.markdown index 0635dfc4f6..9cb920ff1b 100644 --- a/doc/tutorials/features2d/akaze_matching/akaze_matching.markdown +++ b/doc/tutorials/features2d/akaze_matching/akaze_matching.markdown @@ -1,9 +1,16 @@ AKAZE local features matching {#tutorial_akaze_matching} ============================= +@tableofcontents + @prev_tutorial{tutorial_detection_of_planar_objects} @next_tutorial{tutorial_akaze_tracking} +| | | +| -: | :- | +| Original author | Fedor Morozov | +| Compatibility | OpenCV >= 3.0 | + Introduction ------------ diff --git a/doc/tutorials/features2d/akaze_tracking/akaze_tracking.markdown b/doc/tutorials/features2d/akaze_tracking/akaze_tracking.markdown index 58071ffd42..dd23957d5d 100644 --- a/doc/tutorials/features2d/akaze_tracking/akaze_tracking.markdown +++ b/doc/tutorials/features2d/akaze_tracking/akaze_tracking.markdown @@ -1,9 +1,16 @@ AKAZE and ORB planar tracking {#tutorial_akaze_tracking} ============================= +@tableofcontents + @prev_tutorial{tutorial_akaze_matching} @next_tutorial{tutorial_homography} +| | | +| -: | :- | +| Original author | Fedor Morozov | +| Compatibility | OpenCV >= 3.0 | + Introduction ------------ diff --git a/doc/tutorials/features2d/detection_of_planar_objects/detection_of_planar_objects.markdown b/doc/tutorials/features2d/detection_of_planar_objects/detection_of_planar_objects.markdown index 9febdb7acd..2d19e3387e 100644 --- a/doc/tutorials/features2d/detection_of_planar_objects/detection_of_planar_objects.markdown +++ b/doc/tutorials/features2d/detection_of_planar_objects/detection_of_planar_objects.markdown @@ -1,9 +1,15 @@ Detection of planar objects {#tutorial_detection_of_planar_objects} =========================== +@tableofcontents + @prev_tutorial{tutorial_feature_homography} @next_tutorial{tutorial_akaze_matching} +| | | +| -: | :- | +| Original author | Victor Eruhimov | +| Compatibility | OpenCV >= 3.0 | The goal of this tutorial is to learn how to use *features2d* and *calib3d* modules for detecting known planar objects in scenes. diff --git a/doc/tutorials/features2d/feature_description/feature_description.markdown b/doc/tutorials/features2d/feature_description/feature_description.markdown index 70a30096f5..1b77b6a624 100644 --- a/doc/tutorials/features2d/feature_description/feature_description.markdown +++ b/doc/tutorials/features2d/feature_description/feature_description.markdown @@ -1,9 +1,16 @@ Feature Description {#tutorial_feature_description} =================== +@tableofcontents + @prev_tutorial{tutorial_feature_detection} @next_tutorial{tutorial_feature_flann_matcher} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/features2d/feature_detection/feature_detection.markdown b/doc/tutorials/features2d/feature_detection/feature_detection.markdown index a22ef90520..0d52877318 100644 --- a/doc/tutorials/features2d/feature_detection/feature_detection.markdown +++ b/doc/tutorials/features2d/feature_detection/feature_detection.markdown @@ -1,9 +1,16 @@ Feature Detection {#tutorial_feature_detection} ================= +@tableofcontents + @prev_tutorial{tutorial_corner_subpixels} @next_tutorial{tutorial_feature_description} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown index 2e5f12c922..1416604aa3 100644 --- a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown +++ b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.markdown @@ -1,9 +1,16 @@ Feature Matching with FLANN {#tutorial_feature_flann_matcher} =========================== +@tableofcontents + @prev_tutorial{tutorial_feature_description} @next_tutorial{tutorial_feature_homography} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/features2d/feature_homography/feature_homography.markdown b/doc/tutorials/features2d/feature_homography/feature_homography.markdown index b2d23435eb..4b75c7f485 100644 --- a/doc/tutorials/features2d/feature_homography/feature_homography.markdown +++ b/doc/tutorials/features2d/feature_homography/feature_homography.markdown @@ -1,9 +1,16 @@ Features2D + Homography to find a known object {#tutorial_feature_homography} ============================================== +@tableofcontents + @prev_tutorial{tutorial_feature_flann_matcher} @next_tutorial{tutorial_detection_of_planar_objects} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/features2d/homography/homography.markdown b/doc/tutorials/features2d/homography/homography.markdown index 960511dd3d..10690ddf8d 100644 --- a/doc/tutorials/features2d/homography/homography.markdown +++ b/doc/tutorials/features2d/homography/homography.markdown @@ -1,8 +1,14 @@ Basic concepts of the homography explained with code {#tutorial_homography} ==================================================== +@tableofcontents + @prev_tutorial{tutorial_akaze_tracking} +| | | +| -: | :- | +| Compatibility | OpenCV >= 3.0 | + @tableofcontents Introduction {#tutorial_homography_Introduction} diff --git a/doc/tutorials/features2d/images/AKAZE_Match_Tutorial_Cover.png b/doc/tutorials/features2d/images/AKAZE_Match_Tutorial_Cover.png deleted file mode 100644 index fdf2007ba2..0000000000 Binary files a/doc/tutorials/features2d/images/AKAZE_Match_Tutorial_Cover.png and /dev/null differ diff --git a/doc/tutorials/features2d/images/AKAZE_Tracking_Tutorial_Cover.png b/doc/tutorials/features2d/images/AKAZE_Tracking_Tutorial_Cover.png deleted file mode 100644 index bb3272c96b..0000000000 Binary files a/doc/tutorials/features2d/images/AKAZE_Tracking_Tutorial_Cover.png and /dev/null differ diff --git a/doc/tutorials/features2d/images/Feature_Description_Tutorial_Cover.jpg b/doc/tutorials/features2d/images/Feature_Description_Tutorial_Cover.jpg deleted file mode 100644 index 975caa62ef..0000000000 Binary files a/doc/tutorials/features2d/images/Feature_Description_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/features2d/images/Feature_Detection_Tutorial_Cover.jpg b/doc/tutorials/features2d/images/Feature_Detection_Tutorial_Cover.jpg deleted file mode 100644 index cca9a2b438..0000000000 Binary files a/doc/tutorials/features2d/images/Feature_Detection_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/features2d/images/Feature_Flann_Matcher_Tutorial_Cover.jpg b/doc/tutorials/features2d/images/Feature_Flann_Matcher_Tutorial_Cover.jpg deleted file mode 100644 index e3f66fa0d1..0000000000 Binary files a/doc/tutorials/features2d/images/Feature_Flann_Matcher_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/features2d/images/Feature_Homography_Tutorial_Cover.jpg b/doc/tutorials/features2d/images/Feature_Homography_Tutorial_Cover.jpg deleted file mode 100644 index d509cd9eb7..0000000000 Binary files a/doc/tutorials/features2d/images/Feature_Homography_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/features2d/images/detection_of_planar_objects.png b/doc/tutorials/features2d/images/detection_of_planar_objects.png deleted file mode 100644 index 92de70cfdc..0000000000 Binary files a/doc/tutorials/features2d/images/detection_of_planar_objects.png and /dev/null differ diff --git a/doc/tutorials/features2d/images/trackingmotion/Corner_Subpixeles_Cover.jpg b/doc/tutorials/features2d/images/trackingmotion/Corner_Subpixeles_Cover.jpg deleted file mode 100644 index 61ec8d1d89..0000000000 Binary files a/doc/tutorials/features2d/images/trackingmotion/Corner_Subpixeles_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/features2d/images/trackingmotion/Generic_Corner_Detector_Cover.jpg b/doc/tutorials/features2d/images/trackingmotion/Generic_Corner_Detector_Cover.jpg deleted file mode 100644 index 89fc7bef7c..0000000000 Binary files a/doc/tutorials/features2d/images/trackingmotion/Generic_Corner_Detector_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/features2d/images/trackingmotion/Harris_Detector_Cover.jpg b/doc/tutorials/features2d/images/trackingmotion/Harris_Detector_Cover.jpg deleted file mode 100644 index bc4d816e32..0000000000 Binary files a/doc/tutorials/features2d/images/trackingmotion/Harris_Detector_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/features2d/images/trackingmotion/Shi_Tomasi_Detector_Cover.jpg b/doc/tutorials/features2d/images/trackingmotion/Shi_Tomasi_Detector_Cover.jpg deleted file mode 100644 index e0ee608d9c..0000000000 Binary files a/doc/tutorials/features2d/images/trackingmotion/Shi_Tomasi_Detector_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/features2d/table_of_content_features2d.markdown b/doc/tutorials/features2d/table_of_content_features2d.markdown index f42e2a571c..29c99018fc 100644 --- a/doc/tutorials/features2d/table_of_content_features2d.markdown +++ b/doc/tutorials/features2d/table_of_content_features2d.markdown @@ -1,128 +1,15 @@ 2D Features framework (feature2d module) {#tutorial_table_of_content_features2d} ========================================= -Learn about how to use the feature points detectors, descriptors and matching framework found inside -OpenCV. - - @subpage tutorial_harris_detector - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Why is it a good idea to track corners? We learn how to use the Harris method to detect - corners. - - @subpage tutorial_good_features_to_track - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we use an improved method to detect corners more accurately. - - @subpage tutorial_generic_corner_detector - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Here you will learn how to use OpenCV functions to make your personalized corner detector! - - *Languages:* C++, Java, Python - - @subpage tutorial_corner_subpixels - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Is pixel resolution enough? Here we learn a simple method to improve our corner location accuracy. - - @subpage tutorial_feature_detection - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - In this tutorial, you will use *features2d* to detect interest points. - - @subpage tutorial_feature_description - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - In this tutorial, you will use *features2d* to calculate feature vectors. - - @subpage tutorial_feature_flann_matcher - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - In this tutorial, you will use the FLANN library to make a fast matching. - - @subpage tutorial_feature_homography - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - In this tutorial, you will use *features2d* and *calib3d* to detect an object in a scene. - - @subpage tutorial_detection_of_planar_objects - - *Languages:* C++ - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Victor Eruhimov - - You will use *features2d* and *calib3d* modules for detecting known planar objects in - scenes. - - @subpage tutorial_akaze_matching - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 3.0 - - *Author:* Fedor Morozov - - Using *AKAZE* local features to find correspondence between two images. - - @subpage tutorial_akaze_tracking - - *Languages:* C++ - - *Compatibility:* \> OpenCV 3.0 - - *Author:* Fedor Morozov - - Using *AKAZE* and *ORB* for planar object tracking. - - @subpage tutorial_homography - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 3.0 - - This tutorial will explain the basic concepts of the homography with some - demonstration codes. diff --git a/doc/tutorials/features2d/trackingmotion/corner_subpixels/corner_subpixels.markdown b/doc/tutorials/features2d/trackingmotion/corner_subpixels/corner_subpixels.markdown index a9316b732d..e43fc3b344 100644 --- a/doc/tutorials/features2d/trackingmotion/corner_subpixels/corner_subpixels.markdown +++ b/doc/tutorials/features2d/trackingmotion/corner_subpixels/corner_subpixels.markdown @@ -1,9 +1,16 @@ Detecting corners location in subpixels {#tutorial_corner_subpixels} ======================================= +@tableofcontents + @prev_tutorial{tutorial_generic_corner_detector} @next_tutorial{tutorial_feature_detection} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/features2d/trackingmotion/generic_corner_detector/generic_corner_detector.markdown b/doc/tutorials/features2d/trackingmotion/generic_corner_detector/generic_corner_detector.markdown index 6082b9b91d..def95717b4 100644 --- a/doc/tutorials/features2d/trackingmotion/generic_corner_detector/generic_corner_detector.markdown +++ b/doc/tutorials/features2d/trackingmotion/generic_corner_detector/generic_corner_detector.markdown @@ -1,9 +1,15 @@ Creating your own corner detector {#tutorial_generic_corner_detector} ================================= +@tableofcontents + @prev_tutorial{tutorial_good_features_to_track} @next_tutorial{tutorial_corner_subpixels} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | Goal ---- diff --git a/doc/tutorials/features2d/trackingmotion/good_features_to_track/good_features_to_track.markdown b/doc/tutorials/features2d/trackingmotion/good_features_to_track/good_features_to_track.markdown index 7e8cf1157e..19023c24b5 100644 --- a/doc/tutorials/features2d/trackingmotion/good_features_to_track/good_features_to_track.markdown +++ b/doc/tutorials/features2d/trackingmotion/good_features_to_track/good_features_to_track.markdown @@ -1,9 +1,16 @@ Shi-Tomasi corner detector {#tutorial_good_features_to_track} ========================== +@tableofcontents + @prev_tutorial{tutorial_harris_detector} @next_tutorial{tutorial_generic_corner_detector} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/features2d/trackingmotion/harris_detector/harris_detector.markdown b/doc/tutorials/features2d/trackingmotion/harris_detector/harris_detector.markdown index f0b32683ce..1ed9cd51ac 100644 --- a/doc/tutorials/features2d/trackingmotion/harris_detector/harris_detector.markdown +++ b/doc/tutorials/features2d/trackingmotion/harris_detector/harris_detector.markdown @@ -1,8 +1,15 @@ Harris corner detector {#tutorial_harris_detector} ====================== +@tableofcontents + @next_tutorial{tutorial_good_features_to_track} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown b/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown index 2912c6fba5..fa25c2b0b3 100644 --- a/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown +++ b/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown @@ -1,5 +1,8 @@ # Porting anisotropic image segmentation on G-API {#tutorial_gapi_anisotropic_segmentation} +@prev_tutorial{tutorial_gapi_interactive_face_detection} +@next_tutorial{tutorial_gapi_face_beautification} + [TOC] # Introduction {#gapi_anisotropic_intro} diff --git a/doc/tutorials/gapi/face_beautification/face_beautification.markdown b/doc/tutorials/gapi/face_beautification/face_beautification.markdown index 9e56db0a54..1ceb416c99 100644 --- a/doc/tutorials/gapi/face_beautification/face_beautification.markdown +++ b/doc/tutorials/gapi/face_beautification/face_beautification.markdown @@ -1,5 +1,7 @@ # Implementing a face beautification algorithm with G-API {#tutorial_gapi_face_beautification} +@prev_tutorial{tutorial_gapi_anisotropic_segmentation} + [TOC] # Introduction {#gapi_fb_intro} diff --git a/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown b/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown index e5ca466da7..6f8b03bb61 100644 --- a/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown +++ b/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown @@ -1,5 +1,7 @@ # Face analytics pipeline with G-API {#tutorial_gapi_interactive_face_detection} +@next_tutorial{tutorial_gapi_anisotropic_segmentation} + [TOC] # Overview {#gapi_ifd_intro} diff --git a/doc/tutorials/gpu/gpu-basics-similarity/gpu_basics_similarity.markdown b/doc/tutorials/gpu/gpu-basics-similarity/gpu_basics_similarity.markdown index 74840efbc2..60e136fc6d 100644 --- a/doc/tutorials/gpu/gpu-basics-similarity/gpu_basics_similarity.markdown +++ b/doc/tutorials/gpu/gpu-basics-similarity/gpu_basics_similarity.markdown @@ -1,6 +1,9 @@ @cond CUDA_MODULES Similarity check (PNSR and SSIM) on the GPU {#tutorial_gpu_basics_similarity} =========================================== + +@tableofcontents + @todo update this tutorial @next_tutorial{tutorial_gpu_thrust_interop} diff --git a/doc/tutorials/gpu/gpu-thrust-interop/gpu_thrust_interop.markdown b/doc/tutorials/gpu/gpu-thrust-interop/gpu_thrust_interop.markdown index c930f883a3..b5d79ab0bc 100644 --- a/doc/tutorials/gpu/gpu-thrust-interop/gpu_thrust_interop.markdown +++ b/doc/tutorials/gpu/gpu-thrust-interop/gpu_thrust_interop.markdown @@ -2,6 +2,8 @@ Using a cv::cuda::GpuMat with thrust {#tutorial_gpu_thrust_interop} =========================================== +@tableofcontents + @prev_tutorial{tutorial_gpu_basics_similarity} Goal diff --git a/doc/tutorials/highgui/images/Adding_Trackbars_Tutorial_Cover.jpg b/doc/tutorials/highgui/images/Adding_Trackbars_Tutorial_Cover.jpg deleted file mode 100644 index e914cab1d5..0000000000 Binary files a/doc/tutorials/highgui/images/Adding_Trackbars_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/highgui/table_of_content_highgui.markdown b/doc/tutorials/highgui/table_of_content_highgui.markdown deleted file mode 100644 index fb5a343664..0000000000 --- a/doc/tutorials/highgui/table_of_content_highgui.markdown +++ /dev/null @@ -1,14 +0,0 @@ -High Level GUI and Media (highgui module) {#tutorial_table_of_content_highgui} -========================================= - -This section contains tutorials about how to use the built-in graphical user interface of the library. - -- @subpage tutorial_trackbar - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - We will learn how to add a Trackbar to our applications diff --git a/doc/tutorials/images/calib3d.jpg b/doc/tutorials/images/calib3d.jpg deleted file mode 100644 index c5226a9a1d..0000000000 Binary files a/doc/tutorials/images/calib3d.jpg and /dev/null differ diff --git a/doc/tutorials/images/core.jpg b/doc/tutorials/images/core.jpg deleted file mode 100644 index 6fe819bd52..0000000000 Binary files a/doc/tutorials/images/core.jpg and /dev/null differ diff --git a/doc/tutorials/images/feature2D.jpg b/doc/tutorials/images/feature2D.jpg deleted file mode 100644 index 6744de0610..0000000000 Binary files a/doc/tutorials/images/feature2D.jpg and /dev/null differ diff --git a/doc/tutorials/images/general.jpg b/doc/tutorials/images/general.jpg deleted file mode 100644 index 95829d9c64..0000000000 Binary files a/doc/tutorials/images/general.jpg and /dev/null differ diff --git a/doc/tutorials/images/gpu.jpg b/doc/tutorials/images/gpu.jpg deleted file mode 100644 index 4cc053895c..0000000000 Binary files a/doc/tutorials/images/gpu.jpg and /dev/null differ diff --git a/doc/tutorials/images/highgui.jpg b/doc/tutorials/images/highgui.jpg deleted file mode 100644 index ada65fcb03..0000000000 Binary files a/doc/tutorials/images/highgui.jpg and /dev/null differ diff --git a/doc/tutorials/images/imgproc.jpg b/doc/tutorials/images/imgproc.jpg deleted file mode 100644 index ad7dafb0b7..0000000000 Binary files a/doc/tutorials/images/imgproc.jpg and /dev/null differ diff --git a/doc/tutorials/images/introduction.jpg b/doc/tutorials/images/introduction.jpg deleted file mode 100644 index 19a9284785..0000000000 Binary files a/doc/tutorials/images/introduction.jpg and /dev/null differ diff --git a/doc/tutorials/images/ml.jpg b/doc/tutorials/images/ml.jpg deleted file mode 100644 index 40acfcfbfd..0000000000 Binary files a/doc/tutorials/images/ml.jpg and /dev/null differ diff --git a/doc/tutorials/images/objdetect.jpg b/doc/tutorials/images/objdetect.jpg deleted file mode 100644 index c811f348f5..0000000000 Binary files a/doc/tutorials/images/objdetect.jpg and /dev/null differ diff --git a/doc/tutorials/images/opencv_ios.png b/doc/tutorials/images/opencv_ios.png deleted file mode 100644 index ce2031d7c0..0000000000 Binary files a/doc/tutorials/images/opencv_ios.png and /dev/null differ diff --git a/doc/tutorials/images/photo.png b/doc/tutorials/images/photo.png deleted file mode 100644 index f701ffacf1..0000000000 Binary files a/doc/tutorials/images/photo.png and /dev/null differ diff --git a/doc/tutorials/images/retina.jpg b/doc/tutorials/images/retina.jpg deleted file mode 100644 index 2d2465070f..0000000000 Binary files a/doc/tutorials/images/retina.jpg and /dev/null differ diff --git a/doc/tutorials/images/video.jpg b/doc/tutorials/images/video.jpg deleted file mode 100644 index dd5d0c4ed2..0000000000 Binary files a/doc/tutorials/images/video.jpg and /dev/null differ diff --git a/doc/tutorials/images/viz.jpg b/doc/tutorials/images/viz.jpg deleted file mode 100644 index 7ac8f3ed8d..0000000000 Binary files a/doc/tutorials/images/viz.jpg and /dev/null differ diff --git a/doc/tutorials/imgcodecs/images/gdal-io.jpg b/doc/tutorials/imgcodecs/images/gdal-io.jpg deleted file mode 100644 index b2974ed2fb..0000000000 Binary files a/doc/tutorials/imgcodecs/images/gdal-io.jpg and /dev/null differ diff --git a/doc/tutorials/imgcodecs/table_of_content_highgui.markdown b/doc/tutorials/imgcodecs/table_of_content_highgui.markdown deleted file mode 100644 index b63b7b00ce..0000000000 --- a/doc/tutorials/imgcodecs/table_of_content_highgui.markdown +++ /dev/null @@ -1,14 +0,0 @@ -Image Input and Output (imgcodecs module) {#tutorial_table_of_content_imgcodecs} -========================================= - -This section contains tutorials about how to read/save your image files. - -- @subpage tutorial_raster_io_gdal - - *Languages:* C++ - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Marvin Smith - - Read common GIS Raster and DEM files to display and manipulate geographic data. diff --git a/doc/tutorials/imgproc/anisotropic_image_segmentation/anisotropic_image_segmentation.markdown b/doc/tutorials/imgproc/anisotropic_image_segmentation/anisotropic_image_segmentation.markdown index 49fd621909..d480a19621 100644 --- a/doc/tutorials/imgproc/anisotropic_image_segmentation/anisotropic_image_segmentation.markdown +++ b/doc/tutorials/imgproc/anisotropic_image_segmentation/anisotropic_image_segmentation.markdown @@ -1,9 +1,16 @@ Anisotropic image segmentation by a gradient structure tensor {#tutorial_anisotropic_image_segmentation_by_a_gst} ========================== +@tableofcontents + @prev_tutorial{tutorial_motion_deblur_filter} @next_tutorial{tutorial_periodic_noise_removing_filter} +| | | +| -: | :- | +| Original author | Karpushin Vladislav | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/basic_geometric_drawing/basic_geometric_drawing.markdown b/doc/tutorials/imgproc/basic_geometric_drawing/basic_geometric_drawing.markdown index 77c44219f9..289000cbe2 100644 --- a/doc/tutorials/imgproc/basic_geometric_drawing/basic_geometric_drawing.markdown +++ b/doc/tutorials/imgproc/basic_geometric_drawing/basic_geometric_drawing.markdown @@ -1,8 +1,15 @@ Basic Drawing {#tutorial_basic_geometric_drawing} ============= +@tableofcontents + @next_tutorial{tutorial_random_generator_and_text} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goals ----- diff --git a/doc/tutorials/imgproc/erosion_dilatation/erosion_dilatation.markdown b/doc/tutorials/imgproc/erosion_dilatation/erosion_dilatation.markdown index 42f8c7c38f..99179f7691 100644 --- a/doc/tutorials/imgproc/erosion_dilatation/erosion_dilatation.markdown +++ b/doc/tutorials/imgproc/erosion_dilatation/erosion_dilatation.markdown @@ -1,9 +1,16 @@ Eroding and Dilating {#tutorial_erosion_dilatation} ==================== +@tableofcontents + @prev_tutorial{tutorial_gausian_median_blur_bilateral_filter} @next_tutorial{tutorial_opening_closing_hats} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/gausian_median_blur_bilateral_filter/gausian_median_blur_bilateral_filter.markdown b/doc/tutorials/imgproc/gausian_median_blur_bilateral_filter/gausian_median_blur_bilateral_filter.markdown index a03f95b6e4..beb09641c1 100644 --- a/doc/tutorials/imgproc/gausian_median_blur_bilateral_filter/gausian_median_blur_bilateral_filter.markdown +++ b/doc/tutorials/imgproc/gausian_median_blur_bilateral_filter/gausian_median_blur_bilateral_filter.markdown @@ -1,9 +1,16 @@ Smoothing Images {#tutorial_gausian_median_blur_bilateral_filter} ================ +@tableofcontents + @prev_tutorial{tutorial_random_generator_and_text} @next_tutorial{tutorial_erosion_dilatation} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/histograms/back_projection/back_projection.markdown b/doc/tutorials/imgproc/histograms/back_projection/back_projection.markdown index 61baca9bf1..4bc5f35012 100644 --- a/doc/tutorials/imgproc/histograms/back_projection/back_projection.markdown +++ b/doc/tutorials/imgproc/histograms/back_projection/back_projection.markdown @@ -1,9 +1,16 @@ Back Projection {#tutorial_back_projection} =============== +@tableofcontents + @prev_tutorial{tutorial_histogram_comparison} @next_tutorial{tutorial_template_matching} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.markdown b/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.markdown index 0623ba12f2..5f16931269 100644 --- a/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.markdown +++ b/doc/tutorials/imgproc/histograms/histogram_calculation/histogram_calculation.markdown @@ -1,9 +1,16 @@ Histogram Calculation {#tutorial_histogram_calculation} ===================== +@tableofcontents + @prev_tutorial{tutorial_histogram_equalization} @next_tutorial{tutorial_histogram_comparison} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/histograms/histogram_comparison/histogram_comparison.markdown b/doc/tutorials/imgproc/histograms/histogram_comparison/histogram_comparison.markdown index 8b7bf78377..cc38cba07a 100644 --- a/doc/tutorials/imgproc/histograms/histogram_comparison/histogram_comparison.markdown +++ b/doc/tutorials/imgproc/histograms/histogram_comparison/histogram_comparison.markdown @@ -1,9 +1,16 @@ Histogram Comparison {#tutorial_histogram_comparison} ==================== +@tableofcontents + @prev_tutorial{tutorial_histogram_calculation} @next_tutorial{tutorial_back_projection} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/histograms/histogram_equalization/histogram_equalization.markdown b/doc/tutorials/imgproc/histograms/histogram_equalization/histogram_equalization.markdown index 271c6d1347..e80032ba7c 100644 --- a/doc/tutorials/imgproc/histograms/histogram_equalization/histogram_equalization.markdown +++ b/doc/tutorials/imgproc/histograms/histogram_equalization/histogram_equalization.markdown @@ -1,9 +1,16 @@ Histogram Equalization {#tutorial_histogram_equalization} ====================== +@tableofcontents + @prev_tutorial{tutorial_warp_affine} @next_tutorial{tutorial_histogram_calculation} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/histograms/template_matching/template_matching.markdown b/doc/tutorials/imgproc/histograms/template_matching/template_matching.markdown index 5cc39e3b17..f03f761dd1 100644 --- a/doc/tutorials/imgproc/histograms/template_matching/template_matching.markdown +++ b/doc/tutorials/imgproc/histograms/template_matching/template_matching.markdown @@ -1,9 +1,16 @@ Template Matching {#tutorial_template_matching} ================= +@tableofcontents + @prev_tutorial{tutorial_back_projection} @next_tutorial{tutorial_find_contours} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/hitOrMiss/hitOrMiss.markdown b/doc/tutorials/imgproc/hitOrMiss/hitOrMiss.markdown index c55f09296f..887b3765c0 100644 --- a/doc/tutorials/imgproc/hitOrMiss/hitOrMiss.markdown +++ b/doc/tutorials/imgproc/hitOrMiss/hitOrMiss.markdown @@ -1,9 +1,16 @@ Hit-or-Miss {#tutorial_hitOrMiss} ================================= +@tableofcontents + @prev_tutorial{tutorial_opening_closing_hats} @next_tutorial{tutorial_morph_lines_detection} +| | | +| -: | :- | +| Original author | Lorena García | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/images/Morphology_1_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/Morphology_1_Tutorial_Cover.jpg deleted file mode 100644 index 67da3a5ac0..0000000000 Binary files a/doc/tutorials/imgproc/images/Morphology_1_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/Morphology_2_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/Morphology_2_Tutorial_Cover.jpg deleted file mode 100644 index b3a1c55659..0000000000 Binary files a/doc/tutorials/imgproc/images/Morphology_2_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/Morphology_3_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/Morphology_3_Tutorial_Cover.jpg deleted file mode 100644 index 1eddc17554..0000000000 Binary files a/doc/tutorials/imgproc/images/Morphology_3_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/Pyramids_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/Pyramids_Tutorial_Cover.jpg deleted file mode 100644 index 0851cab278..0000000000 Binary files a/doc/tutorials/imgproc/images/Pyramids_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/Smoothing_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/Smoothing_Tutorial_Cover.jpg deleted file mode 100644 index 67656ab4b3..0000000000 Binary files a/doc/tutorials/imgproc/images/Smoothing_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/Threshold_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/Threshold_Tutorial_Cover.jpg deleted file mode 100644 index 6b115d88f5..0000000000 Binary files a/doc/tutorials/imgproc/images/Threshold_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/histograms/Back_Projection_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/histograms/Back_Projection_Tutorial_Cover.jpg deleted file mode 100644 index 013bdf6f2f..0000000000 Binary files a/doc/tutorials/imgproc/images/histograms/Back_Projection_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/histograms/Histogram_Calculation_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/histograms/Histogram_Calculation_Tutorial_Cover.jpg deleted file mode 100644 index 32d09e37e8..0000000000 Binary files a/doc/tutorials/imgproc/images/histograms/Histogram_Calculation_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/histograms/Histogram_Comparison_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/histograms/Histogram_Comparison_Tutorial_Cover.jpg deleted file mode 100644 index 7538a7203d..0000000000 Binary files a/doc/tutorials/imgproc/images/histograms/Histogram_Comparison_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/histograms/Histogram_Equalization_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/histograms/Histogram_Equalization_Tutorial_Cover.jpg deleted file mode 100644 index fbc5866835..0000000000 Binary files a/doc/tutorials/imgproc/images/histograms/Histogram_Equalization_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/histograms/Template_Matching_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/histograms/Template_Matching_Tutorial_Cover.jpg deleted file mode 100644 index e84f52119a..0000000000 Binary files a/doc/tutorials/imgproc/images/histograms/Template_Matching_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/imgtrans/Canny_Detector_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/imgtrans/Canny_Detector_Tutorial_Cover.jpg deleted file mode 100644 index bcd9ff9ace..0000000000 Binary files a/doc/tutorials/imgproc/images/imgtrans/Canny_Detector_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/imgtrans/CopyMakeBorder_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/imgtrans/CopyMakeBorder_Tutorial_Cover.jpg deleted file mode 100644 index f241ff2238..0000000000 Binary files a/doc/tutorials/imgproc/images/imgtrans/CopyMakeBorder_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/imgtrans/Distance_Transformation_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/imgtrans/Distance_Transformation_Tutorial_Cover.jpg deleted file mode 100644 index 8effc42a04..0000000000 Binary files a/doc/tutorials/imgproc/images/imgtrans/Distance_Transformation_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/imgtrans/Filter_2D_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/imgtrans/Filter_2D_Tutorial_Cover.jpg deleted file mode 100644 index c2f5809108..0000000000 Binary files a/doc/tutorials/imgproc/images/imgtrans/Filter_2D_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/imgtrans/Hough_Circle_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/imgtrans/Hough_Circle_Tutorial_Cover.jpg deleted file mode 100644 index 175180ae81..0000000000 Binary files a/doc/tutorials/imgproc/images/imgtrans/Hough_Circle_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/imgtrans/Hough_Lines_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/imgtrans/Hough_Lines_Tutorial_Cover.jpg deleted file mode 100644 index 4211ee2629..0000000000 Binary files a/doc/tutorials/imgproc/images/imgtrans/Hough_Lines_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/imgtrans/Laplace_Operator_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/imgtrans/Laplace_Operator_Tutorial_Cover.jpg deleted file mode 100644 index 14373f25b1..0000000000 Binary files a/doc/tutorials/imgproc/images/imgtrans/Laplace_Operator_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/imgtrans/Remap_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/imgtrans/Remap_Tutorial_Cover.jpg deleted file mode 100644 index bfb55dbdac..0000000000 Binary files a/doc/tutorials/imgproc/images/imgtrans/Remap_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/imgtrans/Sobel_Derivatives_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/imgtrans/Sobel_Derivatives_Tutorial_Cover.jpg deleted file mode 100644 index fbe17c8978..0000000000 Binary files a/doc/tutorials/imgproc/images/imgtrans/Sobel_Derivatives_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/imgtrans/Warp_Affine_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/imgtrans/Warp_Affine_Tutorial_Cover.jpg deleted file mode 100644 index 5655789bd6..0000000000 Binary files a/doc/tutorials/imgproc/images/imgtrans/Warp_Affine_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/shapedescriptors/Bounding_Rects_Circles_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/shapedescriptors/Bounding_Rects_Circles_Tutorial_Cover.jpg deleted file mode 100644 index be2ae57d40..0000000000 Binary files a/doc/tutorials/imgproc/images/shapedescriptors/Bounding_Rects_Circles_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/shapedescriptors/Bounding_Rotated_Ellipses_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/shapedescriptors/Bounding_Rotated_Ellipses_Tutorial_Cover.jpg deleted file mode 100644 index b7330592b5..0000000000 Binary files a/doc/tutorials/imgproc/images/shapedescriptors/Bounding_Rotated_Ellipses_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/shapedescriptors/Find_Contours_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/shapedescriptors/Find_Contours_Tutorial_Cover.jpg deleted file mode 100644 index 82888a1ba9..0000000000 Binary files a/doc/tutorials/imgproc/images/shapedescriptors/Find_Contours_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/shapedescriptors/Hull_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/shapedescriptors/Hull_Tutorial_Cover.jpg deleted file mode 100644 index a7a1b6ebeb..0000000000 Binary files a/doc/tutorials/imgproc/images/shapedescriptors/Hull_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/shapedescriptors/Moments_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/shapedescriptors/Moments_Tutorial_Cover.jpg deleted file mode 100644 index 1e865eb371..0000000000 Binary files a/doc/tutorials/imgproc/images/shapedescriptors/Moments_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/images/shapedescriptors/Point_Polygon_Test_Tutorial_Cover.jpg b/doc/tutorials/imgproc/images/shapedescriptors/Point_Polygon_Test_Tutorial_Cover.jpg deleted file mode 100644 index 9980df8421..0000000000 Binary files a/doc/tutorials/imgproc/images/shapedescriptors/Point_Polygon_Test_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.markdown b/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.markdown index 01bf6f862d..d585c77520 100644 --- a/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.markdown +++ b/doc/tutorials/imgproc/imgtrans/canny_detector/canny_detector.markdown @@ -1,9 +1,16 @@ Canny Edge Detector {#tutorial_canny_detector} =================== +@tableofcontents + @prev_tutorial{tutorial_laplace_operator} @next_tutorial{tutorial_hough_lines} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/imgtrans/copyMakeBorder/copyMakeBorder.markdown b/doc/tutorials/imgproc/imgtrans/copyMakeBorder/copyMakeBorder.markdown index 8a4bbc0702..4acc06064f 100644 --- a/doc/tutorials/imgproc/imgtrans/copyMakeBorder/copyMakeBorder.markdown +++ b/doc/tutorials/imgproc/imgtrans/copyMakeBorder/copyMakeBorder.markdown @@ -1,9 +1,16 @@ Adding borders to your images {#tutorial_copyMakeBorder} ============================= +@tableofcontents + @prev_tutorial{tutorial_filter_2d} @next_tutorial{tutorial_sobel_derivatives} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/imgtrans/distance_transformation/distance_transform.markdown b/doc/tutorials/imgproc/imgtrans/distance_transformation/distance_transform.markdown index a5afffdbb1..28b81f4e6e 100644 --- a/doc/tutorials/imgproc/imgtrans/distance_transformation/distance_transform.markdown +++ b/doc/tutorials/imgproc/imgtrans/distance_transformation/distance_transform.markdown @@ -1,9 +1,16 @@ Image Segmentation with Distance Transform and Watershed Algorithm {#tutorial_distance_transform} ============= +@tableofcontents + @prev_tutorial{tutorial_point_polygon_test} @next_tutorial{tutorial_out_of_focus_deblur_filter} +| | | +| -: | :- | +| Original author | Theodore Tsesmelis | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/imgtrans/filter_2d/filter_2d.markdown b/doc/tutorials/imgproc/imgtrans/filter_2d/filter_2d.markdown index 454f745177..efe3fdf9ae 100644 --- a/doc/tutorials/imgproc/imgtrans/filter_2d/filter_2d.markdown +++ b/doc/tutorials/imgproc/imgtrans/filter_2d/filter_2d.markdown @@ -1,9 +1,16 @@ Making your own linear filters! {#tutorial_filter_2d} =============================== +@tableofcontents + @prev_tutorial{tutorial_threshold_inRange} @next_tutorial{tutorial_copyMakeBorder} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.markdown b/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.markdown index fe2f88be15..6b2f215901 100644 --- a/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.markdown +++ b/doc/tutorials/imgproc/imgtrans/hough_circle/hough_circle.markdown @@ -1,9 +1,16 @@ Hough Circle Transform {#tutorial_hough_circle} ====================== +@tableofcontents + @prev_tutorial{tutorial_hough_lines} @next_tutorial{tutorial_remap} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.markdown b/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.markdown index 8b24d87a2d..5edff16879 100644 --- a/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.markdown +++ b/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.markdown @@ -1,9 +1,16 @@ Hough Line Transform {#tutorial_hough_lines} ==================== +@tableofcontents + @prev_tutorial{tutorial_canny_detector} @next_tutorial{tutorial_hough_circle} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- @@ -217,7 +224,7 @@ First you apply the transform: - *theta*: The resolution of the parameter \f$\theta\f$ in radians. We use **1 degree** (CV_PI/180) - *threshold*: The minimum number of intersections to "*detect*" a line - - *minLinLength*: The minimum number of points that can form a line. Lines with less than + - *minLineLength*: The minimum number of points that can form a line. Lines with less than this number of points are disregarded. - *maxLineGap*: The maximum gap between two points to be considered in the same line. diff --git a/doc/tutorials/imgproc/imgtrans/laplace_operator/laplace_operator.markdown b/doc/tutorials/imgproc/imgtrans/laplace_operator/laplace_operator.markdown index 63aed356b2..27b4aa98db 100644 --- a/doc/tutorials/imgproc/imgtrans/laplace_operator/laplace_operator.markdown +++ b/doc/tutorials/imgproc/imgtrans/laplace_operator/laplace_operator.markdown @@ -1,9 +1,16 @@ Laplace Operator {#tutorial_laplace_operator} ================ +@tableofcontents + @prev_tutorial{tutorial_sobel_derivatives} @next_tutorial{tutorial_canny_detector} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/imgtrans/remap/remap.markdown b/doc/tutorials/imgproc/imgtrans/remap/remap.markdown index 58c79c6039..3c3d95c4cc 100644 --- a/doc/tutorials/imgproc/imgtrans/remap/remap.markdown +++ b/doc/tutorials/imgproc/imgtrans/remap/remap.markdown @@ -1,9 +1,16 @@ Remapping {#tutorial_remap} ========= +@tableofcontents + @prev_tutorial{tutorial_hough_circle} @next_tutorial{tutorial_warp_affine} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/imgtrans/sobel_derivatives/sobel_derivatives.markdown b/doc/tutorials/imgproc/imgtrans/sobel_derivatives/sobel_derivatives.markdown index f8725d2a12..4183476524 100644 --- a/doc/tutorials/imgproc/imgtrans/sobel_derivatives/sobel_derivatives.markdown +++ b/doc/tutorials/imgproc/imgtrans/sobel_derivatives/sobel_derivatives.markdown @@ -1,9 +1,16 @@ Sobel Derivatives {#tutorial_sobel_derivatives} ================= +@tableofcontents + @prev_tutorial{tutorial_copyMakeBorder} @next_tutorial{tutorial_laplace_operator} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.markdown b/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.markdown index b5023ad03e..22d5298f18 100644 --- a/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.markdown +++ b/doc/tutorials/imgproc/imgtrans/warp_affine/warp_affine.markdown @@ -1,9 +1,16 @@ Affine Transformations {#tutorial_warp_affine} ====================== +@tableofcontents + @prev_tutorial{tutorial_remap} @next_tutorial{tutorial_histogram_equalization} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/morph_lines_detection/morph_lines_detection.md b/doc/tutorials/imgproc/morph_lines_detection/morph_lines_detection.md index ce9e81e211..74b117f849 100644 --- a/doc/tutorials/imgproc/morph_lines_detection/morph_lines_detection.md +++ b/doc/tutorials/imgproc/morph_lines_detection/morph_lines_detection.md @@ -1,9 +1,16 @@ Extract horizontal and vertical lines by using morphological operations {#tutorial_morph_lines_detection} ============= +@tableofcontents + @prev_tutorial{tutorial_hitOrMiss} @next_tutorial{tutorial_pyramids} +| | | +| -: | :- | +| Original author | Theodore Tsesmelis | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/motion_deblur_filter/motion_deblur_filter.markdown b/doc/tutorials/imgproc/motion_deblur_filter/motion_deblur_filter.markdown index 704e0ef275..fd286deda5 100644 --- a/doc/tutorials/imgproc/motion_deblur_filter/motion_deblur_filter.markdown +++ b/doc/tutorials/imgproc/motion_deblur_filter/motion_deblur_filter.markdown @@ -1,9 +1,16 @@ Motion Deblur Filter {#tutorial_motion_deblur_filter} ========================== +@tableofcontents + @prev_tutorial{tutorial_out_of_focus_deblur_filter} @next_tutorial{tutorial_anisotropic_image_segmentation_by_a_gst} +| | | +| -: | :- | +| Original author | Karpushin Vladislav | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/opening_closing_hats/opening_closing_hats.markdown b/doc/tutorials/imgproc/opening_closing_hats/opening_closing_hats.markdown index e918c65ce7..8f991176d9 100644 --- a/doc/tutorials/imgproc/opening_closing_hats/opening_closing_hats.markdown +++ b/doc/tutorials/imgproc/opening_closing_hats/opening_closing_hats.markdown @@ -1,9 +1,16 @@ More Morphology Transformations {#tutorial_opening_closing_hats} =============================== +@tableofcontents + @prev_tutorial{tutorial_erosion_dilatation} @next_tutorial{tutorial_hitOrMiss} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.markdown b/doc/tutorials/imgproc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.markdown index 800286d9a8..13db710b32 100644 --- a/doc/tutorials/imgproc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.markdown +++ b/doc/tutorials/imgproc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.markdown @@ -1,9 +1,16 @@ Out-of-focus Deblur Filter {#tutorial_out_of_focus_deblur_filter} ========================== +@tableofcontents + @prev_tutorial{tutorial_distance_transform} @next_tutorial{tutorial_motion_deblur_filter} +| | | +| -: | :- | +| Original author | Karpushin Vladislav | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/periodic_noise_removing_filter/periodic_noise_removing_filter.markdown b/doc/tutorials/imgproc/periodic_noise_removing_filter/periodic_noise_removing_filter.markdown index 3c36a1e9c4..dff204a2f2 100644 --- a/doc/tutorials/imgproc/periodic_noise_removing_filter/periodic_noise_removing_filter.markdown +++ b/doc/tutorials/imgproc/periodic_noise_removing_filter/periodic_noise_removing_filter.markdown @@ -1,8 +1,15 @@ Periodic Noise Removing Filter {#tutorial_periodic_noise_removing_filter} ========================== +@tableofcontents + @prev_tutorial{tutorial_anisotropic_image_segmentation_by_a_gst} +| | | +| -: | :- | +| Original author | Karpushin Vladislav | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/pyramids/pyramids.markdown b/doc/tutorials/imgproc/pyramids/pyramids.markdown index c11a80527f..82ef81e369 100644 --- a/doc/tutorials/imgproc/pyramids/pyramids.markdown +++ b/doc/tutorials/imgproc/pyramids/pyramids.markdown @@ -1,9 +1,16 @@ Image Pyramids {#tutorial_pyramids} ============== +@tableofcontents + @prev_tutorial{tutorial_morph_lines_detection} @next_tutorial{tutorial_threshold} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- @@ -163,7 +170,7 @@ Our program exits if the user presses **ESC**. Besides, it has two options: We use the function **pyrDown()** with three arguments (similarly to **pyrUp()**): - *src*: The current and destination image (to be shown on screen, supposedly half the input image) - - *Size( tmp.cols/2, tmp.rows/2 )* : The destination size. Since we are upsampling, + - *Size( tmp.cols/2, tmp.rows/2 )* : The destination size. Since we are downsampling, **pyrDown()** expects half the size the input image (in this case *src*). @add_toggle_cpp diff --git a/doc/tutorials/imgproc/random_generator_and_text/random_generator_and_text.markdown b/doc/tutorials/imgproc/random_generator_and_text/random_generator_and_text.markdown index f588bbc44d..4100d91622 100644 --- a/doc/tutorials/imgproc/random_generator_and_text/random_generator_and_text.markdown +++ b/doc/tutorials/imgproc/random_generator_and_text/random_generator_and_text.markdown @@ -1,9 +1,16 @@ Random generator and text with OpenCV {#tutorial_random_generator_and_text} ===================================== +@tableofcontents + @prev_tutorial{tutorial_basic_geometric_drawing} @next_tutorial{tutorial_gausian_median_blur_bilateral_filter} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goals ----- diff --git a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown index d6194dfd3f..2c6d59b579 100644 --- a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown +++ b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown @@ -1,9 +1,16 @@ Creating Bounding boxes and circles for contours {#tutorial_bounding_rects_circles} ================================================ +@tableofcontents + @prev_tutorial{tutorial_hull} @next_tutorial{tutorial_bounding_rotated_ellipses} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.markdown b/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.markdown index a4c29b2fde..c15d73e291 100644 --- a/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.markdown +++ b/doc/tutorials/imgproc/shapedescriptors/bounding_rotated_ellipses/bounding_rotated_ellipses.markdown @@ -1,9 +1,16 @@ Creating Bounding rotated boxes and ellipses for contours {#tutorial_bounding_rotated_ellipses} ========================================================= +@tableofcontents + @prev_tutorial{tutorial_bounding_rects_circles} @next_tutorial{tutorial_moments} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.markdown b/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.markdown index b8aa6d898f..dc112b9402 100644 --- a/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.markdown +++ b/doc/tutorials/imgproc/shapedescriptors/find_contours/find_contours.markdown @@ -1,9 +1,16 @@ Finding contours in your image {#tutorial_find_contours} ============================== +@tableofcontents + @prev_tutorial{tutorial_template_matching} @next_tutorial{tutorial_hull} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/shapedescriptors/hull/hull.markdown b/doc/tutorials/imgproc/shapedescriptors/hull/hull.markdown index e40934e6e2..36763fd81d 100644 --- a/doc/tutorials/imgproc/shapedescriptors/hull/hull.markdown +++ b/doc/tutorials/imgproc/shapedescriptors/hull/hull.markdown @@ -1,9 +1,16 @@ Convex Hull {#tutorial_hull} =========== +@tableofcontents + @prev_tutorial{tutorial_find_contours} @next_tutorial{tutorial_bounding_rects_circles} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/shapedescriptors/moments/moments.markdown b/doc/tutorials/imgproc/shapedescriptors/moments/moments.markdown index 683568ab0c..4e47242cd1 100644 --- a/doc/tutorials/imgproc/shapedescriptors/moments/moments.markdown +++ b/doc/tutorials/imgproc/shapedescriptors/moments/moments.markdown @@ -1,9 +1,16 @@ Image Moments {#tutorial_moments} ============= +@tableofcontents + @prev_tutorial{tutorial_bounding_rotated_ellipses} @next_tutorial{tutorial_point_polygon_test} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.markdown b/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.markdown index 2e02fb8815..6f4c6d7ebd 100644 --- a/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.markdown +++ b/doc/tutorials/imgproc/shapedescriptors/point_polygon_test/point_polygon_test.markdown @@ -1,9 +1,16 @@ Point Polygon Test {#tutorial_point_polygon_test} ================== +@tableofcontents + @prev_tutorial{tutorial_moments} @next_tutorial{tutorial_distance_transform} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/table_of_content_imgproc.markdown b/doc/tutorials/imgproc/table_of_content_imgproc.markdown index b0a8b8260b..edffd706bd 100644 --- a/doc/tutorials/imgproc/table_of_content_imgproc.markdown +++ b/doc/tutorials/imgproc/table_of_content_imgproc.markdown @@ -1,298 +1,52 @@ Image Processing (imgproc module) {#tutorial_table_of_content_imgproc} ================================= -In this section you will learn about the image processing (manipulation) functions inside OpenCV. - +Basic +----- - @subpage tutorial_basic_geometric_drawing - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - We will learn how to draw simple geometry with OpenCV! - - @subpage tutorial_random_generator_and_text - - *Languages:* C++ - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - We will draw some *fancy-looking* stuff using OpenCV! - - @subpage tutorial_gausian_median_blur_bilateral_filter - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Let's take a look at some basic linear filters! - - @subpage tutorial_erosion_dilatation - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - Author: Ana Huamán - - Let's *change* the shape of objects! - - @subpage tutorial_opening_closing_hats - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Here we investigate different morphology operators - - @subpage tutorial_hitOrMiss - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.4 - - *Author:* Lorena García - - Learn how to find patterns in binary images using the Hit-or-Miss operation - - @subpage tutorial_morph_lines_detection - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Theodore Tsesmelis - - Here we will show how we can use different morphological operators to extract horizontal and vertical lines - - @subpage tutorial_pyramids - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - What if I need a bigger/smaller image? - - @subpage tutorial_threshold - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - After so much processing, it is time to decide which pixels stay - - @subpage tutorial_threshold_inRange - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Rishiraj Surti - - Thresholding operations using inRange function. - +Transformations +--------------- - @subpage tutorial_filter_2d - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn to design our own filters by using OpenCV functions - - @subpage tutorial_copyMakeBorder - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to pad our images - - @subpage tutorial_sobel_derivatives - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to calculate gradients and use them to detect edges - - @subpage tutorial_laplace_operator - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn about the *Laplace* operator and how to detect edges with it - - @subpage tutorial_canny_detector - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn a sophisticated alternative to detect edges - - @subpage tutorial_hough_lines - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to detect lines - - @subpage tutorial_hough_circle - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to detect circles - - @subpage tutorial_remap - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to manipulate pixels locations - - @subpage tutorial_warp_affine - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to rotate, translate and scale our images - +Histograms +---------- - @subpage tutorial_histogram_equalization - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to improve the contrast in our images - - @subpage tutorial_histogram_calculation - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to create and generate histograms - - @subpage tutorial_histogram_comparison - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn to calculate metrics between histograms - - @subpage tutorial_back_projection - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to use histograms to find similar objects in images - - @subpage tutorial_template_matching - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to match templates in an image - -- @subpage tutorial_table_of_contents_contours - - Learn how to find contours in images and investigate their properties and features. +Contours +-------- +- @subpage tutorial_find_contours +- @subpage tutorial_hull +- @subpage tutorial_bounding_rects_circles +- @subpage tutorial_bounding_rotated_ellipses +- @subpage tutorial_moments +- @subpage tutorial_point_polygon_test +Others +------ - @subpage tutorial_distance_transform - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Theodore Tsesmelis - - Where we learn to segment objects using Laplacian filtering, the Distance Transformation and the Watershed algorithm. - - @subpage tutorial_out_of_focus_deblur_filter - - *Languages:* C++ - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Karpushin Vladislav - - You will learn how to recover an out-of-focus image by Wiener filter. - - @subpage tutorial_motion_deblur_filter - - *Languages:* C++ - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Karpushin Vladislav - - You will learn how to recover an image with motion blur distortion using a Wiener filter. - - @subpage tutorial_anisotropic_image_segmentation_by_a_gst - - *Languages:* C++, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Karpushin Vladislav - - You will learn how to segment an anisotropic image with a single local orientation by a gradient structure tensor. - - @subpage tutorial_periodic_noise_removing_filter - - *Languages:* C++ - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Karpushin Vladislav - - You will learn how to remove periodic noise in the Fourier domain. diff --git a/doc/tutorials/imgproc/table_of_contents_contours.markdown b/doc/tutorials/imgproc/table_of_contents_contours.markdown index 3e8bba3a62..cc2f133bfd 100644 --- a/doc/tutorials/imgproc/table_of_contents_contours.markdown +++ b/doc/tutorials/imgproc/table_of_contents_contours.markdown @@ -1,62 +1,4 @@ Contours in OpenCV {#tutorial_table_of_contents_contours} ================== -- @subpage tutorial_find_contours - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to find contours of objects in our image - -- @subpage tutorial_hull - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to get hull contours and draw them - -- @subpage tutorial_bounding_rects_circles - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to obtain bounding boxes and circles for our contours - -- @subpage tutorial_bounding_rotated_ellipses - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to obtain rotated bounding boxes and ellipses for our contours - -- @subpage tutorial_moments - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn to calculate the moments of an image - -- @subpage tutorial_point_polygon_test - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Where we learn how to calculate distances from the image to contours +Content has been moved to this page: @ref tutorial_table_of_content_imgproc diff --git a/doc/tutorials/imgproc/threshold/threshold.markdown b/doc/tutorials/imgproc/threshold/threshold.markdown index a452d14042..f7458d192c 100644 --- a/doc/tutorials/imgproc/threshold/threshold.markdown +++ b/doc/tutorials/imgproc/threshold/threshold.markdown @@ -1,9 +1,16 @@ Basic Thresholding Operations {#tutorial_threshold} ============================= +@tableofcontents + @prev_tutorial{tutorial_pyramids} @next_tutorial{tutorial_threshold_inRange} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/imgproc/threshold_inRange/threshold_inRange.markdown b/doc/tutorials/imgproc/threshold_inRange/threshold_inRange.markdown index 0995b9758c..f05ebe477a 100644 --- a/doc/tutorials/imgproc/threshold_inRange/threshold_inRange.markdown +++ b/doc/tutorials/imgproc/threshold_inRange/threshold_inRange.markdown @@ -1,9 +1,16 @@ Thresholding Operations using inRange {#tutorial_threshold_inRange} ===================================== +@tableofcontents + @prev_tutorial{tutorial_threshold} @next_tutorial{tutorial_filter_2d} +| | | +| -: | :- | +| Original author | Lorena García | +| Compatibility | Rishiraj Surti | + Goal ---- diff --git a/doc/tutorials/introduction/android_binary_package/O4A_SDK.markdown b/doc/tutorials/introduction/android_binary_package/O4A_SDK.markdown index bfc5842441..938dd613ed 100644 --- a/doc/tutorials/introduction/android_binary_package/O4A_SDK.markdown +++ b/doc/tutorials/introduction/android_binary_package/O4A_SDK.markdown @@ -28,7 +28,7 @@ If you need help with anything of the above, you may refer to our @ref tutorial_ If you encounter any error after thoroughly following these steps, feel free to contact us via [OpenCV4Android](https://groups.google.com/group/android-opencv/) discussion group or OpenCV [Q&A -forum](http://answers.opencv.org). We'll do our best to help you out. +forum](https://forum.opencv.org). We'll do our best to help you out. General info ------------ diff --git a/doc/tutorials/introduction/android_binary_package/android_dev_intro.markdown b/doc/tutorials/introduction/android_binary_package/android_dev_intro.markdown index 47dc07be43..584f9a2f6c 100644 --- a/doc/tutorials/introduction/android_binary_package/android_dev_intro.markdown +++ b/doc/tutorials/introduction/android_binary_package/android_dev_intro.markdown @@ -19,7 +19,7 @@ working environment quickly. It was written with Windows 7 in mind, though it wo If you encounter any error after thoroughly following these steps, feel free to contact us via [OpenCV4Android](https://groups.google.com/group/android-opencv/) discussion group or OpenCV [Q&A -forum](http://answers.opencv.org). We'll do our best to help you out. +forum](https://forum.opencv.org). We'll do our best to help you out. Preface ------- diff --git a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown index 92dd77af67..5acdbc41ed 100644 --- a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown +++ b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown @@ -32,7 +32,7 @@ may consult our @ref tutorial_O4A_SDK tutorial. If you encounter any error after thoroughly following these steps, feel free to contact us via [OpenCV4Android](https://groups.google.com/group/android-opencv/) discussion group or OpenCV [Q&A -forum](http://answers.opencv.org) . We'll do our best to help you out. +forum](https://forum.opencv.org) . We'll do our best to help you out. Using OpenCV Library Within Your Android Project ------------------------------------------------ diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown index 1cabf23200..1d4f426c8f 100644 --- a/doc/tutorials/introduction/config_reference/config_reference.markdown +++ b/doc/tutorials/introduction/config_reference/config_reference.markdown @@ -1,6 +1,9 @@ OpenCV configuration options reference {#tutorial_config_reference} ====================================== +@prev_tutorial{tutorial_general_install} +@next_tutorial{tutorial_linux_install} + @tableofcontents # Introduction {#tutorial_config_reference_intro} @@ -411,9 +414,9 @@ Some of OpenCV algorithms can use multithreading to accelerate processing. OpenC | pthreads | `WITH_PTHREADS_PF` | _ON_ | Unix-like | Default backend based on [pthreads](https://en.wikipedia.org/wiki/POSIX_Threads) library is available on Linux, Android and other Unix-like platforms. Thread pool is implemented in OpenCV and can be controlled with environment variables `OPENCV_THREAD_POOL_*`. Please check sources in _modules/core/src/parallel_impl.cpp_ file for details. | | Concurrency | N/A | _ON_ | Windows | [Concurrency runtime](https://docs.microsoft.com/en-us/cpp/parallel/concrt/concurrency-runtime) is available on Windows and will be turned _ON_ on supported platforms unless other backend is enabled. | | GCD | N/A | _ON_ | Apple | [Grand Central Dispatch](https://en.wikipedia.org/wiki/Grand_Central_Dispatch) is available on Apple platforms and will be turned _ON_ automatically unless other backend is enabled. Uses global system thread pool. | -| TBB | `WITH_TBB` | Multiple | _OFF_ | [Threading Building Blocks](https://en.wikipedia.org/wiki/Threading_Building_Blocks) is a cross-platform library for parallel programming. | -| OpenMP | `WITH_OPENMP` | Multiple | _OFF_ | [OpenMP](https://en.wikipedia.org/wiki/OpenMP) API relies on compiler support. | -| HPX | `WITH_HPX` | Multiple | _OFF_ | [High Performance ParallelX](https://en.wikipedia.org/wiki/HPX) is an experimental backend which is more suitable for multiprocessor environments. | +| TBB | `WITH_TBB` | _OFF_ | Multiple | [Threading Building Blocks](https://en.wikipedia.org/wiki/Threading_Building_Blocks) is a cross-platform library for parallel programming. | +| OpenMP | `WITH_OPENMP` | _OFF_ | Multiple | [OpenMP](https://en.wikipedia.org/wiki/OpenMP) API relies on compiler support. | +| HPX | `WITH_HPX` | _OFF_ | Multiple | [High Performance ParallelX](https://en.wikipedia.org/wiki/HPX) is an experimental backend which is more suitable for multiprocessor environments. | @note OpenCV can download and build TBB library from GitHub, this functionality can be enabled with the `BUILD_TBB` option. diff --git a/doc/tutorials/introduction/crosscompilation/arm_crosscompile_with_cmake.markdown b/doc/tutorials/introduction/crosscompilation/arm_crosscompile_with_cmake.markdown index 91353b2990..058b5c92f2 100644 --- a/doc/tutorials/introduction/crosscompilation/arm_crosscompile_with_cmake.markdown +++ b/doc/tutorials/introduction/crosscompilation/arm_crosscompile_with_cmake.markdown @@ -1,7 +1,7 @@ Cross compilation for ARM based Linux systems {#tutorial_arm_crosscompile_with_cmake} ============================================= -@prev_tutorial{tutorial_ios_install} +@prev_tutorial{tutorial_macos_install} @next_tutorial{tutorial_building_tegra_cuda} | | | diff --git a/doc/tutorials/introduction/documenting_opencv/documentation_tutorial.markdown b/doc/tutorials/introduction/documenting_opencv/documentation_tutorial.markdown index 2cce88c856..3654f939c0 100644 --- a/doc/tutorials/introduction/documenting_opencv/documentation_tutorial.markdown +++ b/doc/tutorials/introduction/documenting_opencv/documentation_tutorial.markdown @@ -667,20 +667,9 @@ Write the tutorial {#tutorial_documentation_steps_tutorial} 6. Add newly created tutorial to the corresponding table of contents. Just find "table_of_content_*.markdown" file with the needed table and place new record in it similar to existing ones. - @verbatim -- @subpage tutorial_windows_visual_studio_image_watch - _Languages:_ C++, Java, Python - - _Compatibility:_ \>= OpenCV 2.4 - - _Author:_ Wolf Kienzle - - You will learn how to visualize OpenCV matrices and images within Visual Studio 2012. - @endverbatim - As you can see it is just a list item with special _subpage_ command which marks your page as a - child and places it into the existing pages hierarchy. Add compatibility information, - authors list and short description. Also note the list item indent, empty lines between + It is simply a list item with special _subpage_ command which marks your page as a + child and places it into the existing pages hierarchy. Also note the list item indent, empty lines between paragraphs and special _italic_ markers. 7. Generate doxygen documentation and verify results. diff --git a/doc/tutorials/introduction/general_install/general_install.markdown b/doc/tutorials/introduction/general_install/general_install.markdown index 2fa3a17223..e8c93f430e 100644 --- a/doc/tutorials/introduction/general_install/general_install.markdown +++ b/doc/tutorials/introduction/general_install/general_install.markdown @@ -1,6 +1,8 @@ OpenCV installation overview {#tutorial_general_install} ============================ +@next_tutorial{tutorial_config_reference} + @tableofcontents There are two ways of installing OpenCV on your machine: download prebuilt version for your platform or compile from sources. diff --git a/doc/tutorials/introduction/images/Display_Image_Tutorial_Result.jpg b/doc/tutorials/introduction/images/Display_Image_Tutorial_Result.jpg deleted file mode 100644 index 16400698f0..0000000000 Binary files a/doc/tutorials/introduction/images/Display_Image_Tutorial_Result.jpg and /dev/null differ diff --git a/doc/tutorials/introduction/images/Java_logo.png b/doc/tutorials/introduction/images/Java_logo.png deleted file mode 100644 index 2114751896..0000000000 Binary files a/doc/tutorials/introduction/images/Java_logo.png and /dev/null differ diff --git a/doc/tutorials/introduction/images/android_logo.png b/doc/tutorials/introduction/images/android_logo.png deleted file mode 100644 index 69bccd74d2..0000000000 Binary files a/doc/tutorials/introduction/images/android_logo.png and /dev/null differ diff --git a/doc/tutorials/introduction/images/clojure-logo.png b/doc/tutorials/introduction/images/clojure-logo.png deleted file mode 100644 index f8a29b965c..0000000000 Binary files a/doc/tutorials/introduction/images/clojure-logo.png and /dev/null differ diff --git a/doc/tutorials/introduction/images/eclipse-logo.png b/doc/tutorials/introduction/images/eclipse-logo.png deleted file mode 100644 index 64ec01c253..0000000000 Binary files a/doc/tutorials/introduction/images/eclipse-logo.png and /dev/null differ diff --git a/doc/tutorials/introduction/images/eclipse_cpp_logo.jpeg b/doc/tutorials/introduction/images/eclipse_cpp_logo.jpeg deleted file mode 100644 index e63e26b1b4..0000000000 Binary files a/doc/tutorials/introduction/images/eclipse_cpp_logo.jpeg and /dev/null differ diff --git a/doc/tutorials/introduction/images/gccegg-65.jpg b/doc/tutorials/introduction/images/gccegg-65.jpg deleted file mode 100644 index e3e44d1f6c..0000000000 Binary files a/doc/tutorials/introduction/images/gccegg-65.jpg and /dev/null differ diff --git a/doc/tutorials/introduction/images/how_to_write_a_tutorial.png b/doc/tutorials/introduction/images/how_to_write_a_tutorial.png deleted file mode 100644 index ae40fc3d32..0000000000 Binary files a/doc/tutorials/introduction/images/how_to_write_a_tutorial.png and /dev/null differ diff --git a/doc/tutorials/introduction/images/lena.png b/doc/tutorials/introduction/images/lena.png deleted file mode 100644 index 68342fae53..0000000000 Binary files a/doc/tutorials/introduction/images/lena.png and /dev/null differ diff --git a/doc/tutorials/introduction/images/opencv_ios.png b/doc/tutorials/introduction/images/opencv_ios.png deleted file mode 100644 index ce2031d7c0..0000000000 Binary files a/doc/tutorials/introduction/images/opencv_ios.png and /dev/null differ diff --git a/doc/tutorials/introduction/images/ubuntu-logo.jpg b/doc/tutorials/introduction/images/ubuntu-logo.jpg deleted file mode 100644 index a34243496c..0000000000 Binary files a/doc/tutorials/introduction/images/ubuntu-logo.jpg and /dev/null differ diff --git a/doc/tutorials/introduction/images/visual-studio-2010-logo.jpg b/doc/tutorials/introduction/images/visual-studio-2010-logo.jpg deleted file mode 100644 index 8b053695c4..0000000000 Binary files a/doc/tutorials/introduction/images/visual-studio-2010-logo.jpg and /dev/null differ diff --git a/doc/tutorials/introduction/images/visual_studio_image_watch.png b/doc/tutorials/introduction/images/visual_studio_image_watch.png deleted file mode 100644 index e693344df8..0000000000 Binary files a/doc/tutorials/introduction/images/visual_studio_image_watch.png and /dev/null differ diff --git a/doc/tutorials/introduction/images/windows_logo.jpg b/doc/tutorials/introduction/images/windows_logo.jpg deleted file mode 100644 index e35a8a86ae..0000000000 Binary files a/doc/tutorials/introduction/images/windows_logo.jpg and /dev/null differ diff --git a/doc/tutorials/introduction/macos_install/macos_install.markdown b/doc/tutorials/introduction/macos_install/macos_install.markdown index ec708101a0..dadce9304c 100644 --- a/doc/tutorials/introduction/macos_install/macos_install.markdown +++ b/doc/tutorials/introduction/macos_install/macos_install.markdown @@ -2,7 +2,7 @@ Installation in MacOS {#tutorial_macos_install} ===================== @prev_tutorial{tutorial_android_ocl_intro} -@next_tutorial{tutorial_ios_install} +@next_tutorial{tutorial_arm_crosscompile_with_cmake} | | | | -: | :- | diff --git a/doc/tutorials/introduction/table_of_content_introduction.markdown b/doc/tutorials/introduction/table_of_content_introduction.markdown index 2eb95e7b72..d1f2aa3ca3 100644 --- a/doc/tutorials/introduction/table_of_content_introduction.markdown +++ b/doc/tutorials/introduction/table_of_content_introduction.markdown @@ -25,9 +25,9 @@ Introduction to OpenCV {#tutorial_table_of_content_introduction} ##### Other platforms - @subpage tutorial_macos_install -- @subpage tutorial_ios_install - @subpage tutorial_arm_crosscompile_with_cmake - @subpage tutorial_building_tegra_cuda +- @ref tutorial_ios_install ##### Usage basics - @subpage tutorial_display_image - We will learn how to load an image from file and display it using OpenCV diff --git a/doc/tutorials/introduction/windows_install/windows_install.markdown b/doc/tutorials/introduction/windows_install/windows_install.markdown index 0eff40dd13..56fe64998c 100644 --- a/doc/tutorials/introduction/windows_install/windows_install.markdown +++ b/doc/tutorials/introduction/windows_install/windows_install.markdown @@ -14,7 +14,7 @@ This tutorial can contain obsolete information. The description here was tested on Windows 7 SP1. Nevertheless, it should also work on any other relatively modern version of Windows OS. If you encounter errors after following the steps described -below, feel free to contact us via our [OpenCV Q&A forum](http://answers.opencv.org). We'll do our +below, feel free to contact us via our [OpenCV Q&A forum](https://forum.opencv.org). We'll do our best to help you out. @note To use the OpenCV library you have two options: @ref tutorial_windows_install_prebuilt or @@ -55,12 +55,12 @@ cd /c/lib @code{.bash} #!/bin/bash -e myRepo=$(pwd) -CMAKE_CONFIG_GENERATOR="Visual Studio 14 2015 Win64" +CMAKE_GENERATOR_OPTIONS=-G"Visual Studio 16 2019" +#CMAKE_GENERATOR_OPTIONS=-G"Visual Studio 15 2017 Win64" +#CMAKE_GENERATOR_OPTIONS=(-G"Visual Studio 16 2019" -A x64) # CMake 3.14+ is required if [ ! -d "$myRepo/opencv" ]; then echo "cloning opencv" git clone https://github.com/opencv/opencv.git - mkdir -p Build/opencv - mkdir -p Install/opencv else cd opencv git pull --rebase @@ -69,16 +69,17 @@ fi if [ ! -d "$myRepo/opencv_contrib" ]; then echo "cloning opencv_contrib" git clone https://github.com/opencv/opencv_contrib.git - mkdir -p Build/opencv_contrib else cd opencv_contrib git pull --rebase cd .. fi RepoSource=opencv -pushd Build/$RepoSource -CMAKE_OPTIONS='-DBUILD_PERF_TESTS:BOOL=OFF -DBUILD_TESTS:BOOL=OFF -DBUILD_DOCS:BOOL=OFF -DWITH_CUDA:BOOL=OFF -DBUILD_EXAMPLES:BOOL=OFF -DINSTALL_CREATE_DISTRIB=ON' -cmake -G"$CMAKE_CONFIG_GENERATOR" $CMAKE_OPTIONS -DOPENCV_EXTRA_MODULES_PATH="$myRepo"/opencv_contrib/modules -DCMAKE_INSTALL_PREFIX="$myRepo"/install/"$RepoSource" "$myRepo/$RepoSource" +mkdir -p build_opencv +pushd build_opencv +CMAKE_OPTIONS=(-DBUILD_PERF_TESTS:BOOL=OFF -DBUILD_TESTS:BOOL=OFF -DBUILD_DOCS:BOOL=OFF -DWITH_CUDA:BOOL=OFF -DBUILD_EXAMPLES:BOOL=OFF -DINSTALL_CREATE_DISTRIB=ON) +set -x +cmake "${CMAKE_GENERATOR_OPTIONS[@]}" "${CMAKE_OPTIONS[@]}" -DOPENCV_EXTRA_MODULES_PATH="$myRepo"/opencv_contrib/modules -DCMAKE_INSTALL_PREFIX="$myRepo/install/$RepoSource" "$myRepo/$RepoSource" echo "************************* $Source_DIR -->debug" cmake --build . --config debug echo "************************* $Source_DIR -->release" @@ -89,15 +90,15 @@ popd @endcode In this script I suppose you use VS 2015 in 64 bits @code{.bash} -CMAKE_CONFIG_GENERATOR="Visual Studio 14 2015 Win64" +CMAKE_GENERATOR_OPTIONS=-G"Visual Studio 14 2015 Win64" @endcode - and opencv will be installed in c:/lib/install + and opencv will be installed in c:/lib/install/opencv @code{.bash} --DCMAKE_INSTALL_PREFIX="$myRepo"/install/"$RepoSource" "$myRepo/$RepoSource" +-DCMAKE_INSTALL_PREFIX="$myRepo/install/$RepoSource" @endcode with no Perf tests, no tests, no doc, no CUDA and no example @code{.bash} -CMAKE_OPTIONS='-DBUILD_PERF_TESTS:BOOL=OFF -DBUILD_TESTS:BOOL=OFF -DBUILD_DOCS:BOOL=OFF -DBUILD_EXAMPLES:BOOL=OFF' +CMAKE_OPTIONS=(-DBUILD_PERF_TESTS:BOOL=OFF -DBUILD_TESTS:BOOL=OFF -DBUILD_DOCS:BOOL=OFF -DBUILD_EXAMPLES:BOOL=OFF) @endcode -# In git command line enter following command : @code{.bash} @@ -351,7 +352,7 @@ libraries). If you do not need the support for some of these, you can just freel To test your build just go into the `Build/bin/Debug` or `Build/bin/Release` directory and start a couple of applications like the *contours.exe*. If they run, you are done. Otherwise, - something definitely went awfully wrong. In this case you should contact us at our [Q&A forum](http://answers.opencv.org/). + something definitely went awfully wrong. In this case you should contact us at our [Q&A forum](https://forum.opencv.org/). If everything is okay, the *contours.exe* output should resemble the following image (if built with Qt support): diff --git a/doc/tutorials/ios/hello/hello.markdown b/doc/tutorials/ios/hello/hello.markdown index fc6992cc70..87d39fb8fb 100644 --- a/doc/tutorials/ios/hello/hello.markdown +++ b/doc/tutorials/ios/hello/hello.markdown @@ -1,8 +1,16 @@ OpenCV iOS Hello {#tutorial_hello} ================ +@tableofcontents + +@prev_tutorial{tutorial_ios_install} @next_tutorial{tutorial_image_manipulation} +| | | +| -: | :- | +| Original author | Charu Hans | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/ios/image_manipulation/image_manipulation.markdown b/doc/tutorials/ios/image_manipulation/image_manipulation.markdown index 61590d8f77..f01aa6e4f8 100644 --- a/doc/tutorials/ios/image_manipulation/image_manipulation.markdown +++ b/doc/tutorials/ios/image_manipulation/image_manipulation.markdown @@ -1,9 +1,16 @@ OpenCV iOS - Image Processing {#tutorial_image_manipulation} ============================= +@tableofcontents + @prev_tutorial{tutorial_hello} @next_tutorial{tutorial_video_processing} +| | | +| -: | :- | +| Original author | Charu Hans | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/ios/images/facedetect.jpg b/doc/tutorials/ios/images/facedetect.jpg deleted file mode 100644 index 788b7d8262..0000000000 Binary files a/doc/tutorials/ios/images/facedetect.jpg and /dev/null differ diff --git a/doc/tutorials/ios/images/image_effects.png b/doc/tutorials/ios/images/image_effects.png deleted file mode 100644 index 25edb668f9..0000000000 Binary files a/doc/tutorials/ios/images/image_effects.png and /dev/null differ diff --git a/doc/tutorials/ios/images/intro.png b/doc/tutorials/ios/images/intro.png deleted file mode 100644 index 5f2dc1aa4c..0000000000 Binary files a/doc/tutorials/ios/images/intro.png and /dev/null differ diff --git a/doc/tutorials/introduction/ios_install/ios_install.markdown b/doc/tutorials/ios/ios_install/ios_install.markdown similarity index 96% rename from doc/tutorials/introduction/ios_install/ios_install.markdown rename to doc/tutorials/ios/ios_install/ios_install.markdown index cbe3902602..4353808e14 100644 --- a/doc/tutorials/introduction/ios_install/ios_install.markdown +++ b/doc/tutorials/ios/ios_install/ios_install.markdown @@ -1,8 +1,9 @@ Installation in iOS {#tutorial_ios_install} =================== -@prev_tutorial{tutorial_macos_install} -@next_tutorial{tutorial_arm_crosscompile_with_cmake} +@tableofcontents + +@next_tutorial{tutorial_hello} | | | | -: | :- | diff --git a/doc/tutorials/ios/table_of_content_ios.markdown b/doc/tutorials/ios/table_of_content_ios.markdown index 4031c6c80b..99cfea5306 100644 --- a/doc/tutorials/ios/table_of_content_ios.markdown +++ b/doc/tutorials/ios/table_of_content_ios.markdown @@ -1,32 +1,6 @@ OpenCV iOS {#tutorial_table_of_content_ios} ========== - +- @subpage tutorial_ios_install - @subpage tutorial_hello - - *Languages:* Objective-C++ - - *Compatibility:* \> OpenCV 2.4.3 - - *Author:* Charu Hans - - You will learn how to link OpenCV with iOS and write a basic application. - - @subpage tutorial_image_manipulation - - *Languages:* Objective-C++ - - *Compatibility:* \> OpenCV 2.4.3 - - *Author:* Charu Hans - - You will learn how to do simple image manipulation using OpenCV in iOS. - - @subpage tutorial_video_processing - - *Languages:* Objective-C++ - - *Compatibility:* \> OpenCV 2.4.3 - - *Author:* Eduard Feicho - - You will learn how to capture and process video from camera using OpenCV in iOS. diff --git a/doc/tutorials/ios/video_processing/video_processing.markdown b/doc/tutorials/ios/video_processing/video_processing.markdown index 04bdd14e89..e388d8262c 100644 --- a/doc/tutorials/ios/video_processing/video_processing.markdown +++ b/doc/tutorials/ios/video_processing/video_processing.markdown @@ -1,8 +1,15 @@ OpenCV iOS - Video Processing {#tutorial_video_processing} ============================= +@tableofcontents + @prev_tutorial{tutorial_image_manipulation} +| | | +| -: | :- | +| Original author | Eduard Feicho | +| Compatibility | OpenCV >= 3.0 | + This tutorial explains how to process video frames using the iPhone's camera and OpenCV. diff --git a/doc/tutorials/ml/images/introduction_to_pca_cover.png b/doc/tutorials/ml/images/introduction_to_pca_cover.png deleted file mode 100644 index ce230029ec..0000000000 Binary files a/doc/tutorials/ml/images/introduction_to_pca_cover.png and /dev/null differ diff --git a/doc/tutorials/ml/images/introduction_to_svm.png b/doc/tutorials/ml/images/introduction_to_svm.png deleted file mode 100644 index f2d63751fc..0000000000 Binary files a/doc/tutorials/ml/images/introduction_to_svm.png and /dev/null differ diff --git a/doc/tutorials/ml/images/non_linear_svms.png b/doc/tutorials/ml/images/non_linear_svms.png deleted file mode 100644 index bd185d4c74..0000000000 Binary files a/doc/tutorials/ml/images/non_linear_svms.png and /dev/null differ diff --git a/doc/tutorials/ml/table_of_content_ml.markdown b/doc/tutorials/ml/table_of_content_ml.markdown deleted file mode 100644 index b4064777a2..0000000000 --- a/doc/tutorials/ml/table_of_content_ml.markdown +++ /dev/null @@ -1,36 +0,0 @@ -Machine Learning (ml module) {#tutorial_table_of_content_ml} -============================ - -Use the powerful machine learning classes for statistical classification, regression and clustering -of data. - -- @subpage tutorial_introduction_to_svm - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Fernando Iglesias García - - Learn what a Support Vector Machine is. - -- @subpage tutorial_non_linear_svms - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Fernando Iglesias García - - Here you will learn how to define the optimization problem for SVMs when it is not possible to - separate linearly the training data. - -- @subpage tutorial_introduction_to_pca - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Theodore Tsesmelis - - Learn what a Principal Component Analysis (PCA) is. diff --git a/doc/tutorials/objdetect/images/Cascade_Classifier_Tutorial_Cover.jpg b/doc/tutorials/objdetect/images/Cascade_Classifier_Tutorial_Cover.jpg deleted file mode 100644 index cfa5de67e5..0000000000 Binary files a/doc/tutorials/objdetect/images/Cascade_Classifier_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/objdetect/table_of_content_objdetect.markdown b/doc/tutorials/objdetect/table_of_content_objdetect.markdown deleted file mode 100644 index 0b019d88a5..0000000000 --- a/doc/tutorials/objdetect/table_of_content_objdetect.markdown +++ /dev/null @@ -1,18 +0,0 @@ -Object Detection (objdetect module) {#tutorial_table_of_content_objdetect} -=================================== - -Ever wondered how your digital camera detects peoples and faces? Look here to find out! - -- @subpage tutorial_cascade_classifier - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Ana Huamán - - Here we learn how to use *objdetect* to find objects in our images or videos - -- @subpage tutorial_traincascade - - This tutorial describes _opencv_traincascade_ application and its parameters. diff --git a/doc/tutorials/others/_old/table_of_content_ml.markdown b/doc/tutorials/others/_old/table_of_content_ml.markdown new file mode 100644 index 0000000000..5999b0208a --- /dev/null +++ b/doc/tutorials/others/_old/table_of_content_ml.markdown @@ -0,0 +1,4 @@ +Machine Learning (ml module) {#tutorial_table_of_content_ml} +============================ + +Content has been moved to this page: @ref tutorial_table_of_content_other diff --git a/doc/tutorials/others/_old/table_of_content_objdetect.markdown b/doc/tutorials/others/_old/table_of_content_objdetect.markdown new file mode 100644 index 0000000000..0aa69fcd8d --- /dev/null +++ b/doc/tutorials/others/_old/table_of_content_objdetect.markdown @@ -0,0 +1,4 @@ +Object Detection (objdetect module) {#tutorial_table_of_content_objdetect} +=================================== + +Content has been moved to this page: @ref tutorial_table_of_content_other diff --git a/doc/tutorials/others/_old/table_of_content_photo.markdown b/doc/tutorials/others/_old/table_of_content_photo.markdown new file mode 100644 index 0000000000..14a10a9c70 --- /dev/null +++ b/doc/tutorials/others/_old/table_of_content_photo.markdown @@ -0,0 +1,4 @@ +Computational photography (photo module) {#tutorial_table_of_content_photo} +======================================== + +Content has been moved to this page: @ref tutorial_table_of_content_other diff --git a/doc/tutorials/others/_old/table_of_content_stitching.markdown b/doc/tutorials/others/_old/table_of_content_stitching.markdown new file mode 100644 index 0000000000..e8f91ba659 --- /dev/null +++ b/doc/tutorials/others/_old/table_of_content_stitching.markdown @@ -0,0 +1,4 @@ +Images stitching (stitching module) {#tutorial_table_of_content_stitching} +=================================== + +Content has been moved to this page: @ref tutorial_table_of_content_other diff --git a/doc/tutorials/others/_old/table_of_content_video.markdown b/doc/tutorials/others/_old/table_of_content_video.markdown new file mode 100644 index 0000000000..fae3e6ca79 --- /dev/null +++ b/doc/tutorials/others/_old/table_of_content_video.markdown @@ -0,0 +1,4 @@ +Video analysis (video module) {#tutorial_table_of_content_video} +============================= + +Content has been moved to this page: @ref tutorial_table_of_content_other diff --git a/doc/tutorials/video/background_subtraction/background_subtraction.markdown b/doc/tutorials/others/background_subtraction.markdown similarity index 97% rename from doc/tutorials/video/background_subtraction/background_subtraction.markdown rename to doc/tutorials/others/background_subtraction.markdown index 420286960d..5d07f1dae9 100644 --- a/doc/tutorials/video/background_subtraction/background_subtraction.markdown +++ b/doc/tutorials/others/background_subtraction.markdown @@ -1,8 +1,16 @@ How to Use Background Subtraction Methods {#tutorial_background_subtraction} ========================================= +@tableofcontents + +@prev_tutorial{tutorial_stitcher} @next_tutorial{tutorial_meanshift} +| | | +| -: | :- | +| Original author | Domenico Daniele Bloisi | +| Compatibility | OpenCV >= 3.0 | + - Background subtraction (BS) is a common and widely used technique for generating a foreground mask (namely, a binary image containing the pixels belonging to moving objects in the scene) by using static cameras. diff --git a/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.markdown b/doc/tutorials/others/cascade_classifier.markdown similarity index 98% rename from doc/tutorials/objdetect/cascade_classifier/cascade_classifier.markdown rename to doc/tutorials/others/cascade_classifier.markdown index be942bdbdd..149bac5ab5 100644 --- a/doc/tutorials/objdetect/cascade_classifier/cascade_classifier.markdown +++ b/doc/tutorials/others/cascade_classifier.markdown @@ -1,8 +1,16 @@ Cascade Classifier {#tutorial_cascade_classifier} ================== +@tableofcontents + +@prev_tutorial{tutorial_optical_flow} @next_tutorial{tutorial_traincascade} +| | | +| -: | :- | +| Original author | Ana Huamán | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/photo/hdr_imaging/hdr_imaging.markdown b/doc/tutorials/others/hdr_imaging.markdown similarity index 98% rename from doc/tutorials/photo/hdr_imaging/hdr_imaging.markdown rename to doc/tutorials/others/hdr_imaging.markdown index 0bc15fd9b2..a9f1276618 100644 --- a/doc/tutorials/photo/hdr_imaging/hdr_imaging.markdown +++ b/doc/tutorials/others/hdr_imaging.markdown @@ -1,6 +1,15 @@ High Dynamic Range Imaging {#tutorial_hdr_imaging} ========================== +@tableofcontents + +@next_tutorial{tutorial_stitcher} + +| | | +| -: | :- | +| Original author | Fedor Morozov | +| Compatibility | OpenCV >= 3.0 | + Introduction ------------ diff --git a/doc/tutorials/video/background_subtraction/images/Background_Subtraction_Tutorial_Scheme.png b/doc/tutorials/others/images/Background_Subtraction_Tutorial_Scheme.png similarity index 100% rename from doc/tutorials/video/background_subtraction/images/Background_Subtraction_Tutorial_Scheme.png rename to doc/tutorials/others/images/Background_Subtraction_Tutorial_Scheme.png diff --git a/doc/tutorials/video/background_subtraction/images/Background_Subtraction_Tutorial_frame.jpg b/doc/tutorials/others/images/Background_Subtraction_Tutorial_frame.jpg similarity index 100% rename from doc/tutorials/video/background_subtraction/images/Background_Subtraction_Tutorial_frame.jpg rename to doc/tutorials/others/images/Background_Subtraction_Tutorial_frame.jpg diff --git a/doc/tutorials/video/background_subtraction/images/Background_Subtraction_Tutorial_result_KNN.jpg b/doc/tutorials/others/images/Background_Subtraction_Tutorial_result_KNN.jpg similarity index 100% rename from doc/tutorials/video/background_subtraction/images/Background_Subtraction_Tutorial_result_KNN.jpg rename to doc/tutorials/others/images/Background_Subtraction_Tutorial_result_KNN.jpg diff --git a/doc/tutorials/video/background_subtraction/images/Background_Subtraction_Tutorial_result_MOG2.jpg b/doc/tutorials/others/images/Background_Subtraction_Tutorial_result_MOG2.jpg similarity index 100% rename from doc/tutorials/video/background_subtraction/images/Background_Subtraction_Tutorial_result_MOG2.jpg rename to doc/tutorials/others/images/Background_Subtraction_Tutorial_result_MOG2.jpg diff --git a/doc/tutorials/objdetect/cascade_classifier/images/Cascade_Classifier_Tutorial_Result_Haar.jpg b/doc/tutorials/others/images/Cascade_Classifier_Tutorial_Result_Haar.jpg similarity index 100% rename from doc/tutorials/objdetect/cascade_classifier/images/Cascade_Classifier_Tutorial_Result_Haar.jpg rename to doc/tutorials/others/images/Cascade_Classifier_Tutorial_Result_Haar.jpg diff --git a/doc/tutorials/objdetect/cascade_classifier/images/Cascade_Classifier_Tutorial_Result_LBP.jpg b/doc/tutorials/others/images/Cascade_Classifier_Tutorial_Result_LBP.jpg similarity index 100% rename from doc/tutorials/objdetect/cascade_classifier/images/Cascade_Classifier_Tutorial_Result_LBP.jpg rename to doc/tutorials/others/images/Cascade_Classifier_Tutorial_Result_LBP.jpg diff --git a/doc/tutorials/stitching/stitcher/images/affinepano.jpg b/doc/tutorials/others/images/affinepano.jpg similarity index 100% rename from doc/tutorials/stitching/stitcher/images/affinepano.jpg rename to doc/tutorials/others/images/affinepano.jpg diff --git a/doc/tutorials/stitching/stitcher/images/boat.jpg b/doc/tutorials/others/images/boat.jpg similarity index 100% rename from doc/tutorials/stitching/stitcher/images/boat.jpg rename to doc/tutorials/others/images/boat.jpg diff --git a/doc/tutorials/stitching/stitcher/images/budapest.jpg b/doc/tutorials/others/images/budapest.jpg similarity index 100% rename from doc/tutorials/stitching/stitcher/images/budapest.jpg rename to doc/tutorials/others/images/budapest.jpg diff --git a/doc/tutorials/stitching/stitcher/images/compressedPlaneA2B1.jpg b/doc/tutorials/others/images/compressedPlaneA2B1.jpg similarity index 100% rename from doc/tutorials/stitching/stitcher/images/compressedPlaneA2B1.jpg rename to doc/tutorials/others/images/compressedPlaneA2B1.jpg diff --git a/doc/tutorials/stitching/stitcher/images/fisheye.jpg b/doc/tutorials/others/images/fisheye.jpg similarity index 100% rename from doc/tutorials/stitching/stitcher/images/fisheye.jpg rename to doc/tutorials/others/images/fisheye.jpg diff --git a/doc/tutorials/photo/hdr_imaging/images/fusion.png b/doc/tutorials/others/images/fusion.png similarity index 100% rename from doc/tutorials/photo/hdr_imaging/images/fusion.png rename to doc/tutorials/others/images/fusion.png diff --git a/doc/tutorials/stitching/stitcher/images/gvedit.jpg b/doc/tutorials/others/images/gvedit.jpg similarity index 100% rename from doc/tutorials/stitching/stitcher/images/gvedit.jpg rename to doc/tutorials/others/images/gvedit.jpg diff --git a/doc/tutorials/objdetect/cascade_classifier/images/haar.png b/doc/tutorials/others/images/haar.png similarity index 100% rename from doc/tutorials/objdetect/cascade_classifier/images/haar.png rename to doc/tutorials/others/images/haar.png diff --git a/doc/tutorials/objdetect/cascade_classifier/images/haar_features.jpg b/doc/tutorials/others/images/haar_features.jpg similarity index 100% rename from doc/tutorials/objdetect/cascade_classifier/images/haar_features.jpg rename to doc/tutorials/others/images/haar_features.jpg diff --git a/doc/tutorials/photo/hdr_imaging/images/ldr.png b/doc/tutorials/others/images/ldr.png similarity index 100% rename from doc/tutorials/photo/hdr_imaging/images/ldr.png rename to doc/tutorials/others/images/ldr.png diff --git a/doc/tutorials/photo/hdr_imaging/images/memorial.png b/doc/tutorials/others/images/memorial.png similarity index 100% rename from doc/tutorials/photo/hdr_imaging/images/memorial.png rename to doc/tutorials/others/images/memorial.png diff --git a/doc/tutorials/stitching/stitcher/images/newspaper.jpg b/doc/tutorials/others/images/newspaper.jpg similarity index 100% rename from doc/tutorials/stitching/stitcher/images/newspaper.jpg rename to doc/tutorials/others/images/newspaper.jpg diff --git a/doc/tutorials/ml/introduction_to_svm/images/optimal-hyperplane.png b/doc/tutorials/others/images/optimal-hyperplane.png similarity index 100% rename from doc/tutorials/ml/introduction_to_svm/images/optimal-hyperplane.png rename to doc/tutorials/others/images/optimal-hyperplane.png diff --git a/doc/tutorials/ml/introduction_to_pca/images/pca_eigen.png b/doc/tutorials/others/images/pca_eigen.png similarity index 100% rename from doc/tutorials/ml/introduction_to_pca/images/pca_eigen.png rename to doc/tutorials/others/images/pca_eigen.png diff --git a/doc/tutorials/ml/introduction_to_pca/images/pca_line.png b/doc/tutorials/others/images/pca_line.png similarity index 100% rename from doc/tutorials/ml/introduction_to_pca/images/pca_line.png rename to doc/tutorials/others/images/pca_line.png diff --git a/doc/tutorials/ml/introduction_to_pca/images/pca_output.png b/doc/tutorials/others/images/pca_output.png similarity index 100% rename from doc/tutorials/ml/introduction_to_pca/images/pca_output.png rename to doc/tutorials/others/images/pca_output.png diff --git a/doc/tutorials/ml/introduction_to_pca/images/pca_test1.jpg b/doc/tutorials/others/images/pca_test1.jpg similarity index 100% rename from doc/tutorials/ml/introduction_to_pca/images/pca_test1.jpg rename to doc/tutorials/others/images/pca_test1.jpg diff --git a/doc/tutorials/ml/non_linear_svms/images/sample-errors-dist.png b/doc/tutorials/others/images/sample-errors-dist.png similarity index 100% rename from doc/tutorials/ml/non_linear_svms/images/sample-errors-dist.png rename to doc/tutorials/others/images/sample-errors-dist.png diff --git a/doc/tutorials/ml/introduction_to_svm/images/separating-lines.png b/doc/tutorials/others/images/separating-lines.png similarity index 100% rename from doc/tutorials/ml/introduction_to_svm/images/separating-lines.png rename to doc/tutorials/others/images/separating-lines.png diff --git a/doc/tutorials/ml/introduction_to_svm/images/svm_intro_result.png b/doc/tutorials/others/images/svm_intro_result.png similarity index 100% rename from doc/tutorials/ml/introduction_to_svm/images/svm_intro_result.png rename to doc/tutorials/others/images/svm_intro_result.png diff --git a/doc/tutorials/ml/non_linear_svms/images/svm_non_linear_result.png b/doc/tutorials/others/images/svm_non_linear_result.png similarity index 100% rename from doc/tutorials/ml/non_linear_svms/images/svm_non_linear_result.png rename to doc/tutorials/others/images/svm_non_linear_result.png diff --git a/doc/tutorials/objdetect/images/visualisation_single_stage.png b/doc/tutorials/others/images/visualisation_single_stage.png similarity index 100% rename from doc/tutorials/objdetect/images/visualisation_single_stage.png rename to doc/tutorials/others/images/visualisation_single_stage.png diff --git a/doc/tutorials/objdetect/images/visualisation_video.png b/doc/tutorials/others/images/visualisation_video.png similarity index 100% rename from doc/tutorials/objdetect/images/visualisation_video.png rename to doc/tutorials/others/images/visualisation_video.png diff --git a/doc/tutorials/ml/introduction_to_pca/introduction_to_pca.markdown b/doc/tutorials/others/introduction_to_pca.markdown similarity index 98% rename from doc/tutorials/ml/introduction_to_pca/introduction_to_pca.markdown rename to doc/tutorials/others/introduction_to_pca.markdown index c1c6c53a99..490024ac29 100644 --- a/doc/tutorials/ml/introduction_to_pca/introduction_to_pca.markdown +++ b/doc/tutorials/others/introduction_to_pca.markdown @@ -1,8 +1,15 @@ Introduction to Principal Component Analysis (PCA) {#tutorial_introduction_to_pca} ======================================= +@tableofcontents + @prev_tutorial{tutorial_non_linear_svms} +| | | +| -: | :- | +| Original author | Theodore Tsesmelis | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/ml/introduction_to_svm/introduction_to_svm.markdown b/doc/tutorials/others/introduction_to_svm.markdown similarity index 98% rename from doc/tutorials/ml/introduction_to_svm/introduction_to_svm.markdown rename to doc/tutorials/others/introduction_to_svm.markdown index 1340061228..557093c2ae 100644 --- a/doc/tutorials/ml/introduction_to_svm/introduction_to_svm.markdown +++ b/doc/tutorials/others/introduction_to_svm.markdown @@ -1,8 +1,16 @@ Introduction to Support Vector Machines {#tutorial_introduction_to_svm} ======================================= +@tableofcontents + +@prev_tutorial{tutorial_traincascade} @next_tutorial{tutorial_non_linear_svms} +| | | +| -: | :- | +| Original author | Fernando Iglesias García | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/video/meanshift/meanshift.markdown b/doc/tutorials/others/meanshift.markdown similarity index 99% rename from doc/tutorials/video/meanshift/meanshift.markdown rename to doc/tutorials/others/meanshift.markdown index 6472570ead..7ca9ecad30 100644 --- a/doc/tutorials/video/meanshift/meanshift.markdown +++ b/doc/tutorials/others/meanshift.markdown @@ -1,6 +1,8 @@ Meanshift and Camshift {#tutorial_meanshift} ====================== +@tableofcontents + @prev_tutorial{tutorial_background_subtraction} @next_tutorial{tutorial_optical_flow} diff --git a/doc/tutorials/ml/non_linear_svms/non_linear_svms.markdown b/doc/tutorials/others/non_linear_svms.markdown similarity index 99% rename from doc/tutorials/ml/non_linear_svms/non_linear_svms.markdown rename to doc/tutorials/others/non_linear_svms.markdown index 025ae0cda1..f70449fe1d 100644 --- a/doc/tutorials/ml/non_linear_svms/non_linear_svms.markdown +++ b/doc/tutorials/others/non_linear_svms.markdown @@ -1,9 +1,16 @@ Support Vector Machines for Non-Linearly Separable Data {#tutorial_non_linear_svms} ======================================================= +@tableofcontents + @prev_tutorial{tutorial_introduction_to_svm} @next_tutorial{tutorial_introduction_to_pca} +| | | +| -: | :- | +| Original author | Fernando Iglesias García | +| Compatibility | OpenCV >= 3.0 | + Goal ---- diff --git a/doc/tutorials/video/optical_flow/optical_flow.markdown b/doc/tutorials/others/optical_flow.markdown similarity index 99% rename from doc/tutorials/video/optical_flow/optical_flow.markdown rename to doc/tutorials/others/optical_flow.markdown index bcf88f7af1..07456d7ea9 100644 --- a/doc/tutorials/video/optical_flow/optical_flow.markdown +++ b/doc/tutorials/others/optical_flow.markdown @@ -1,7 +1,10 @@ Optical Flow {#tutorial_optical_flow} ============ +@tableofcontents + @prev_tutorial{tutorial_meanshift} +@next_tutorial{tutorial_cascade_classifier} Goal ---- diff --git a/doc/tutorials/stitching/stitcher/stitcher.markdown b/doc/tutorials/others/stitcher.markdown similarity index 97% rename from doc/tutorials/stitching/stitcher/stitcher.markdown rename to doc/tutorials/others/stitcher.markdown index 3670065bbe..e636d83f30 100644 --- a/doc/tutorials/stitching/stitcher/stitcher.markdown +++ b/doc/tutorials/others/stitcher.markdown @@ -1,6 +1,16 @@ High level stitching API (Stitcher class) {#tutorial_stitcher} ========================================= +@tableofcontents + +@prev_tutorial{tutorial_hdr_imaging} +@next_tutorial{tutorial_background_subtraction} + +| | | +| -: | :- | +| Original author | Jiri Horner | +| Compatibility | OpenCV >= 3.2 | + Goal ---- diff --git a/doc/tutorials/others/table_of_content_other.markdown b/doc/tutorials/others/table_of_content_other.markdown new file mode 100644 index 0000000000..a004df63e2 --- /dev/null +++ b/doc/tutorials/others/table_of_content_other.markdown @@ -0,0 +1,13 @@ +Other tutorials (ml, objdetect, photo, stitching, video) {#tutorial_table_of_content_other} +======================================================== + +- photo. @subpage tutorial_hdr_imaging +- stitching. @subpage tutorial_stitcher +- video. @subpage tutorial_background_subtraction +- video. @subpage tutorial_meanshift +- video. @subpage tutorial_optical_flow +- objdetect. @subpage tutorial_cascade_classifier +- objdetect. @subpage tutorial_traincascade +- ml. @subpage tutorial_introduction_to_svm +- ml. @subpage tutorial_non_linear_svms +- ml. @subpage tutorial_introduction_to_pca diff --git a/doc/tutorials/objdetect/traincascade.markdown b/doc/tutorials/others/traincascade.markdown similarity index 99% rename from doc/tutorials/objdetect/traincascade.markdown rename to doc/tutorials/others/traincascade.markdown index 042aaccdc9..0d95bd003a 100644 --- a/doc/tutorials/objdetect/traincascade.markdown +++ b/doc/tutorials/others/traincascade.markdown @@ -1,7 +1,10 @@ Cascade Classifier Training {#tutorial_traincascade} =========================== +@tableofcontents + @prev_tutorial{tutorial_cascade_classifier} +@next_tutorial{tutorial_introduction_to_svm} Introduction ------------ diff --git a/doc/tutorials/photo/images/hdr.png b/doc/tutorials/photo/images/hdr.png deleted file mode 100644 index 9d3782055c..0000000000 Binary files a/doc/tutorials/photo/images/hdr.png and /dev/null differ diff --git a/doc/tutorials/photo/table_of_content_photo.markdown b/doc/tutorials/photo/table_of_content_photo.markdown deleted file mode 100644 index 357c36996e..0000000000 --- a/doc/tutorials/photo/table_of_content_photo.markdown +++ /dev/null @@ -1,14 +0,0 @@ -Computational photography (photo module) {#tutorial_table_of_content_photo} -======================================== - -Use OpenCV for advanced photo processing. - -- @subpage tutorial_hdr_imaging - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 3.0 - - *Author:* Fedor Morozov - - Learn how to create and process high dynamic range images. diff --git a/doc/tutorials/stitching/table_of_content_stitching.markdown b/doc/tutorials/stitching/table_of_content_stitching.markdown deleted file mode 100644 index d5972f4343..0000000000 --- a/doc/tutorials/stitching/table_of_content_stitching.markdown +++ /dev/null @@ -1,17 +0,0 @@ -Images stitching (stitching module) {#tutorial_table_of_content_stitching} -=================================== - -Sometimes a single image can't capture it all. Here you will learn how to join -more images together to create a large pano. Doesn't matter if you want to -create a photo panorama or you want to stitch scans. - -- @subpage tutorial_stitcher - - *Languages:* C++ - - *Compatibility:* \>= OpenCV 3.2 - - *Author:* Jiri Horner - - You will use high level stitching api to create a photo panorama. You will - learn about Stitcher class and its configurations. diff --git a/doc/tutorials/tutorials.markdown b/doc/tutorials/tutorials.markdown index 5a35077df9..59aefc2b1f 100644 --- a/doc/tutorials/tutorials.markdown +++ b/doc/tutorials/tutorials.markdown @@ -4,18 +4,12 @@ OpenCV Tutorials {#tutorial_root} - @subpage tutorial_table_of_content_introduction - build and install OpenCV on your computer - @subpage tutorial_table_of_content_core - basic building blocks of the library - @subpage tutorial_table_of_content_imgproc - image processing functions -- @subpage tutorial_table_of_content_highgui - built-in graphical user interface -- @subpage tutorial_table_of_content_imgcodecs - read and write images from/to files using _imgcodecs_ module -- @subpage tutorial_table_of_content_videoio - read and write videos using _videio_ module +- @subpage tutorial_table_of_content_app - application utils (GUI, image/video input/output) - @subpage tutorial_table_of_content_calib3d - extract 3D world information from 2D images - @subpage tutorial_table_of_content_features2d - feature detectors, descriptors and matching framework -- @subpage tutorial_table_of_content_video - algorithms for video streams: motion detection, object and feature tracking, etc. -- @subpage tutorial_table_of_content_objdetect - detect objects using conventional CV methods - @subpage tutorial_table_of_content_dnn - infer neural networks using built-in _dnn_ module -- @subpage tutorial_table_of_content_ml - machine learning algorithms for statistical classification, regression and data clustering - @subpage tutorial_table_of_content_gapi - graph-based approach to computer vision algorithms building -- @subpage tutorial_table_of_content_photo - advanced photo processing -- @subpage tutorial_table_of_content_stitching - create panoramas and more using _stitching_ module +- @subpage tutorial_table_of_content_other - other modules (ml, objdetect, stitching, video, photo) - @subpage tutorial_table_of_content_ios - running OpenCV on an iDevice @cond CUDA_MODULES - @subpage tutorial_table_of_content_gpu - utilizing power of video card to run CV algorithms diff --git a/doc/tutorials/video/images/Background_Subtraction_Tutorial_Cover.jpg b/doc/tutorials/video/images/Background_Subtraction_Tutorial_Cover.jpg deleted file mode 100644 index d5c84a3722..0000000000 Binary files a/doc/tutorials/video/images/Background_Subtraction_Tutorial_Cover.jpg and /dev/null differ diff --git a/doc/tutorials/video/table_of_content_video.markdown b/doc/tutorials/video/table_of_content_video.markdown deleted file mode 100644 index 1a80f716da..0000000000 --- a/doc/tutorials/video/table_of_content_video.markdown +++ /dev/null @@ -1,28 +0,0 @@ -Video analysis (video module) {#tutorial_table_of_content_video} -============================= - -Look here in order to find use on your video stream algorithms like: motion extraction, feature -tracking and foreground extractions. - -- @subpage tutorial_background_subtraction - - *Languages:* C++, Java, Python - - *Compatibility:* \> OpenCV 2.4.6 - - *Author:* Domenico Daniele Bloisi - - We will learn how to extract foreground masks from both videos and sequences of images and - to show them. - -- @subpage tutorial_meanshift - - *Languages:* C++, Java, Python - - Learn how to use the Meanshift and Camshift algorithms to track objects in videos. - -- @subpage tutorial_optical_flow - - *Languages:* C++, Java, Python - - We will learn how to use optical flow methods to track sparse features or to create a dense representation. diff --git a/doc/tutorials/videoio/images/video-input-psnr-ssim.png b/doc/tutorials/videoio/images/video-input-psnr-ssim.png deleted file mode 100644 index de8c2835a6..0000000000 Binary files a/doc/tutorials/videoio/images/video-input-psnr-ssim.png and /dev/null differ diff --git a/doc/tutorials/videoio/images/video-write.png b/doc/tutorials/videoio/images/video-write.png deleted file mode 100644 index 9413d80836..0000000000 Binary files a/doc/tutorials/videoio/images/video-write.png and /dev/null differ diff --git a/doc/tutorials/videoio/table_of_content_videoio.markdown b/doc/tutorials/videoio/table_of_content_videoio.markdown deleted file mode 100644 index 393a0fc236..0000000000 --- a/doc/tutorials/videoio/table_of_content_videoio.markdown +++ /dev/null @@ -1,35 +0,0 @@ -Video Input and Output (videoio module) {#tutorial_table_of_content_videoio} -========================================= - -This section contains tutorials about how to read/save your video files. - -- @subpage tutorial_video_input_psnr_ssim - - *Languages:* C++, Python - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Bernát Gábor - - You will learn how to read video streams, and how to calculate similarity values such as PSNR - or SSIM. - -- @subpage tutorial_video_write - - *Languages:* C++ - - *Compatibility:* \> OpenCV 2.0 - - *Author:* Bernát Gábor - -- @subpage tutorial_kinect_openni - - *Languages:* C++ - -- @subpage tutorial_orbbec_astra - - *Languages:* C++ - -- @subpage tutorial_intelperc - - *Languages:* C++ diff --git a/modules/3d/include/opencv2/3d.hpp b/modules/3d/include/opencv2/3d.hpp index 7591a354fc..6984b705a2 100644 --- a/modules/3d/include/opencv2/3d.hpp +++ b/modules/3d/include/opencv2/3d.hpp @@ -51,7 +51,7 @@ respectively) by the same factor. The joint rotation-translation matrix \f$[R|t]\f$ is the matrix product of a projective transformation and a homogeneous transformation. The 3-by-4 projective transformation maps 3D points -represented in camera coordinates to 2D poins in the image plane and represented in normalized +represented in camera coordinates to 2D points in the image plane and represented in normalized camera coordinates \f$x' = X_c / Z_c\f$ and \f$y' = Y_c / Z_c\f$: \f[Z_c \begin{bmatrix} @@ -538,15 +538,15 @@ or vector\ . a vector\ . @param method Method used to compute a homography matrix. The following methods are possible: - **0** - a regular method using all the points, i.e., the least squares method -- **RANSAC** - RANSAC-based robust method -- **LMEDS** - Least-Median robust method -- **RHO** - PROSAC-based robust method +- @ref RANSAC - RANSAC-based robust method +- @ref LMEDS - Least-Median robust method +- @ref RHO - PROSAC-based robust method @param ransacReprojThreshold Maximum allowed reprojection error to treat a point pair as an inlier (used in the RANSAC and RHO methods only). That is, if \f[\| \texttt{dstPoints} _i - \texttt{convertPointsHomogeneous} ( \texttt{H} * \texttt{srcPoints} _i) \|_2 > \texttt{ransacReprojThreshold}\f] then the point \f$i\f$ is considered as an outlier. If srcPoints and dstPoints are measured in pixels, it usually makes sense to set this parameter somewhere in the range of 1 to 10. -@param mask Optional output mask set by a robust method ( RANSAC or LMEDS ). Note that the input +@param mask Optional output mask set by a robust method ( RANSAC or LMeDS ). Note that the input mask values are ignored. @param maxIters The maximum number of RANSAC iterations. @param confidence Confidence level, between 0 and 1. @@ -781,37 +781,37 @@ the model coordinate system to the camera coordinate system. the provided rvec and tvec values as initial approximations of the rotation and translation vectors, respectively, and further optimizes them. @param flags Method for solving a PnP problem: -- **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In +- @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In this case the function finds such a pose that minimizes reprojection error, that is the sum of squared distances between the observed projections imagePoints and the projected (using @ref projectPoints ) objectPoints . -- **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang +- @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang "Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). In this case the function requires exactly four object and image points. -- **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis +- @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis "An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). In this case the function requires exactly four object and image points. -- **SOLVEPNP_EPNP** Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the +- @ref SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). -- **SOLVEPNP_DLS** **Broken implementation. Using this flag will fallback to EPnP.** \n +- @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n Method is based on the paper of J. Hesch and S. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). -- **SOLVEPNP_UPNP** **Broken implementation. Using this flag will fallback to EPnP.** \n +- @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto, F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ assuming that both have the same value. Then the cameraMatrix is updated with the estimated focal length. -- **SOLVEPNP_IPPE** Method is based on the paper of T. Collins and A. Bartoli. +- @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli. "Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. -- **SOLVEPNP_IPPE_SQUARE** Method is based on the paper of Toby Collins and Adrien Bartoli. +- @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli. "Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. It requires 4 coplanar object points defined in the following order: - point 0: [-squareLength / 2, squareLength / 2, 0] - point 1: [ squareLength / 2, squareLength / 2, 0] - point 2: [ squareLength / 2, -squareLength / 2, 0] - point 3: [-squareLength / 2, -squareLength / 2, 0] -- **SOLVEPNP_SQPNP** Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the +- @ref SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis20). It requires 3 or more points. @@ -921,23 +921,23 @@ a 3D point expressed in the world frame into the camera frame: - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints = np.ascontiguousarray(D[:,:2]).reshape((N,1,2)) - - The methods **SOLVEPNP_DLS** and **SOLVEPNP_UPNP** cannot be used as the current implementations are + - The methods @ref SOLVEPNP_DLS and @ref SOLVEPNP_UPNP cannot be used as the current implementations are unstable and sometimes give completely wrong results. If you pass one of these two - flags, **SOLVEPNP_EPNP** method will be used instead. - - The minimum number of points is 4 in the general case. In the case of **SOLVEPNP_P3P** and **SOLVEPNP_AP3P** + flags, @ref SOLVEPNP_EPNP method will be used instead. + - The minimum number of points is 4 in the general case. In the case of @ref SOLVEPNP_P3P and @ref SOLVEPNP_AP3P methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error). - - With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points + - With @ref SOLVEPNP_ITERATIVE method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the global solution to converge. - - With **SOLVEPNP_IPPE** input points must be >= 4 and object points must be coplanar. - - With **SOLVEPNP_IPPE_SQUARE** this is a special case suitable for marker pose estimation. + - With @ref SOLVEPNP_IPPE input points must be >= 4 and object points must be coplanar. + - With @ref SOLVEPNP_IPPE_SQUARE this is a special case suitable for marker pose estimation. Number of input points must be 4. Object points must be defined in the following order: - point 0: [-squareLength / 2, squareLength / 2, 0] - point 1: [ squareLength / 2, squareLength / 2, 0] - point 2: [ squareLength / 2, -squareLength / 2, 0] - point 3: [-squareLength / 2, -squareLength / 2, 0] - - With **SOLVEPNP_SQPNP** input points must be >= 3 + - With @ref SOLVEPNP_SQPNP input points must be >= 3 */ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, @@ -1015,9 +1015,9 @@ assumed. the model coordinate system to the camera coordinate system. A P3P problem has up to 4 solutions. @param tvecs Output translation vectors. @param flags Method for solving a P3P problem: -- **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang +- @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang "Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). -- **SOLVEPNP_AP3P** Method is based on the paper of T. Ke and S. Roumeliotis. +- @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke and S. Roumeliotis. "An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). The function estimates the object pose given 3 object points, their corresponding image @@ -1119,39 +1119,39 @@ the model coordinate system to the camera coordinate system. the provided rvec and tvec values as initial approximations of the rotation and translation vectors, respectively, and further optimizes them. @param flags Method for solving a PnP problem: -- **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In +- @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In this case the function finds such a pose that minimizes reprojection error, that is the sum of squared distances between the observed projections imagePoints and the projected (using projectPoints ) objectPoints . -- **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang +- @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang "Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). In this case the function requires exactly four object and image points. -- **SOLVEPNP_AP3P** Method is based on the paper of T. Ke, S. Roumeliotis +- @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis "An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). In this case the function requires exactly four object and image points. -- **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the +- @ref SOLVEPNP_EPNP Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). -- **SOLVEPNP_DLS** **Broken implementation. Using this flag will fallback to EPnP.** \n +- @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). -- **SOLVEPNP_UPNP** **Broken implementation. Using this flag will fallback to EPnP.** \n +- @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto, F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ assuming that both have the same value. Then the cameraMatrix is updated with the estimated focal length. -- **SOLVEPNP_IPPE** Method is based on the paper of T. Collins and A. Bartoli. +- @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli. "Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. -- **SOLVEPNP_IPPE_SQUARE** Method is based on the paper of Toby Collins and Adrien Bartoli. +- @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli. "Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. It requires 4 coplanar object points defined in the following order: - point 0: [-squareLength / 2, squareLength / 2, 0] - point 1: [ squareLength / 2, squareLength / 2, 0] - point 2: [ squareLength / 2, -squareLength / 2, 0] - point 3: [-squareLength / 2, -squareLength / 2, 0] -@param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is SOLVEPNP_ITERATIVE +@param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE and useExtrinsicGuess is set to true. -@param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is SOLVEPNP_ITERATIVE +@param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE and useExtrinsicGuess is set to true. @param reprojectionError Optional vector of reprojection error, that is the RMS error (\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points @@ -1263,17 +1263,17 @@ a 3D point expressed in the world frame into the camera frame: - Thus, given some data D = np.array(...) where D.shape = (N,M), in order to use a subset of it as, e.g., imagePoints, one must effectively copy it into a new array: imagePoints = np.ascontiguousarray(D[:,:2]).reshape((N,1,2)) - - The methods **SOLVEPNP_DLS** and **SOLVEPNP_UPNP** cannot be used as the current implementations are + - The methods @ref SOLVEPNP_DLS and @ref SOLVEPNP_UPNP cannot be used as the current implementations are unstable and sometimes give completely wrong results. If you pass one of these two - flags, **SOLVEPNP_EPNP** method will be used instead. - - The minimum number of points is 4 in the general case. In the case of **SOLVEPNP_P3P** and **SOLVEPNP_AP3P** + flags, @ref SOLVEPNP_EPNP method will be used instead. + - The minimum number of points is 4 in the general case. In the case of @ref SOLVEPNP_P3P and @ref SOLVEPNP_AP3P methods, it is required to use exactly 4 points (the first 3 points are used to estimate all the solutions of the P3P problem, the last one is used to retain the best solution that minimizes the reprojection error). - - With **SOLVEPNP_ITERATIVE** method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points + - With @ref SOLVEPNP_ITERATIVE method and `useExtrinsicGuess=true`, the minimum number of points is 3 (3 points are sufficient to compute a pose but there are up to 4 solutions). The initial solution should be close to the global solution to converge. - - With **SOLVEPNP_IPPE** input points must be >= 4 and object points must be coplanar. - - With **SOLVEPNP_IPPE_SQUARE** this is a special case suitable for marker pose estimation. + - With @ref SOLVEPNP_IPPE input points must be >= 4 and object points must be coplanar. + - With @ref SOLVEPNP_IPPE_SQUARE this is a special case suitable for marker pose estimation. Number of input points must be 4. Object points must be defined in the following order: - point 0: [-squareLength / 2, squareLength / 2, 0] - point 1: [ squareLength / 2, squareLength / 2, 0] @@ -1427,8 +1427,8 @@ same camera intrinsic matrix. If this assumption does not hold for your use case to normalized image coordinates, which are valid for the identity camera intrinsic matrix. When passing these coordinates, pass the identity matrix for this parameter. @param method Method for computing an essential matrix. -- **RANSAC** for the RANSAC algorithm. -- **LMEDS** for the LMedS algorithm. +- @ref RANSAC for the RANSAC algorithm. +- @ref LMEDS for the LMedS algorithm. @param prob Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level of confidence (probability) that the estimated matrix is correct. @param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar @@ -1437,6 +1437,7 @@ final fundamental matrix. It can be set to something like 1-3, depending on the point localization, image resolution, and the image noise. @param mask Output array of N elements, every element of which is set to 0 for outliers and to 1 for the other points. The array is computed only in the RANSAC and LMedS methods. +@param maxIters The maximum number of robust method iterations. This function estimates essential matrix based on the five-point algorithm solver in @cite Nister03 . @cite SteweniusCFS is also a related. The epipolar geometry is described by the following equation: @@ -1447,10 +1448,22 @@ where \f$E\f$ is an essential matrix, \f$p_1\f$ and \f$p_2\f$ are corresponding second images, respectively. The result of this function may be passed further to decomposeEssentialMat or recoverPose to recover the relative pose between cameras. */ -CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2, - InputArray cameraMatrix, int method = RANSAC, - double prob = 0.999, double threshold = 1.0, - OutputArray mask = noArray() ); +CV_EXPORTS_W +Mat findEssentialMat( + InputArray points1, InputArray points2, + InputArray cameraMatrix, int method = RANSAC, + double prob = 0.999, double threshold = 1.0, + int maxIters = 1000, OutputArray mask = noArray() +); + +/** @overload */ +CV_EXPORTS +Mat findEssentialMat( + InputArray points1, InputArray points2, + InputArray cameraMatrix, int method, + double prob, double threshold, + OutputArray mask +); // TODO remove from OpenCV 5.0 /** @overload @param points1 Array of N (N \>= 5) 2D points from the first image. The point coordinates should @@ -1460,8 +1473,8 @@ be floating-point (single or double precision). are feature points from cameras with same focal length and principal point. @param pp principal point of the camera. @param method Method for computing a fundamental matrix. -- **RANSAC** for the RANSAC algorithm. -- **LMEDS** for the LMedS algorithm. +- @ref RANSAC for the RANSAC algorithm. +- @ref LMEDS for the LMedS algorithm. @param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar line in pixels, beyond which the point is considered an outlier and is not used for computing the final fundamental matrix. It can be set to something like 1-3, depending on the accuracy of the @@ -1470,6 +1483,7 @@ point localization, image resolution, and the image noise. confidence (probability) that the estimated matrix is correct. @param mask Output array of N elements, every element of which is set to 0 for outliers and to 1 for the other points. The array is computed only in the RANSAC and LMedS methods. +@param maxIters The maximum number of robust method iterations. This function differs from the one above that it computes camera intrinsic matrix from focal length and principal point: @@ -1481,10 +1495,23 @@ f & 0 & x_{pp} \\ 0 & 0 & 1 \end{bmatrix}\f] */ -CV_EXPORTS_W Mat findEssentialMat( InputArray points1, InputArray points2, - double focal = 1.0, Point2d pp = Point2d(0, 0), - int method = RANSAC, double prob = 0.999, - double threshold = 1.0, OutputArray mask = noArray() ); +CV_EXPORTS_W +Mat findEssentialMat( + InputArray points1, InputArray points2, + double focal = 1.0, Point2d pp = Point2d(0, 0), + int method = RANSAC, double prob = 0.999, + double threshold = 1.0, int maxIters = 1000, + OutputArray mask = noArray() +); + +/** @overload */ +CV_EXPORTS +Mat findEssentialMat( + InputArray points1, InputArray points2, + double focal, Point2d pp, + int method, double prob, + double threshold, OutputArray mask +); // TODO remove from OpenCV 5.0 /** @brief Calculates an essential matrix from the corresponding points in two images from potentially two different cameras. @@ -1510,8 +1537,8 @@ of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6[, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are assumed. @param method Method for computing an essential matrix. -- **RANSAC** for the RANSAC algorithm. -- **LMEDS** for the LMedS algorithm. +- @ref RANSAC for the RANSAC algorithm. +- @ref LMEDS for the LMedS algorithm. @param prob Parameter used for the RANSAC or LMedS methods only. It specifies a desirable level of confidence (probability) that the estimated matrix is correct. @param threshold Parameter used for RANSAC. It is the maximum distance from a point to an epipolar @@ -1900,8 +1927,8 @@ b_2\\ @param to Second input 2D point set containing \f$(x,y)\f$. @param inliers Output vector indicating which points are inliers (1-inlier, 0-outlier). @param method Robust method used to compute transformation. The following methods are possible: -- cv::RANSAC - RANSAC-based robust method -- cv::LMEDS - Least-Median robust method +- @ref RANSAC - RANSAC-based robust method +- @ref LMEDS - Least-Median robust method RANSAC is the default method. @param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider a point as an inlier. Applies only to RANSAC. @@ -1950,8 +1977,8 @@ two 2D point sets. @param to Second input 2D point set. @param inliers Output vector indicating which points are inliers. @param method Robust method used to compute transformation. The following methods are possible: -- cv::RANSAC - RANSAC-based robust method -- cv::LMEDS - Least-Median robust method +- @ref RANSAC - RANSAC-based robust method +- @ref LMEDS - Least-Median robust method RANSAC is the default method. @param ransacReprojThreshold Maximum reprojection error in the RANSAC algorithm to consider a point as an inlier. Applies only to RANSAC. diff --git a/modules/3d/src/five-point.cpp b/modules/3d/src/five-point.cpp index c7339d8f7c..084f791f26 100644 --- a/modules/3d/src/five-point.cpp +++ b/modules/3d/src/five-point.cpp @@ -401,7 +401,8 @@ protected: // Input should be a vector of n 2D points or a Nx2 matrix Mat findEssentialMat( InputArray _points1, InputArray _points2, InputArray _cameraMatrix, - int method, double prob, double threshold, OutputArray _mask) + int method, double prob, double threshold, + int maxIters, OutputArray _mask) { CV_INSTRUMENT_REGION(); @@ -444,20 +445,36 @@ Mat findEssentialMat( InputArray _points1, InputArray _points2, InputArray _came Mat E; if( method == RANSAC ) - createRANSACPointSetRegistrator(makePtr(), 5, threshold, prob)->run(points1, points2, E, _mask); + createRANSACPointSetRegistrator(makePtr(), 5, threshold, prob, maxIters)->run(points1, points2, E, _mask); else - createLMeDSPointSetRegistrator(makePtr(), 5, prob)->run(points1, points2, E, _mask); + createLMeDSPointSetRegistrator(makePtr(), 5, prob, maxIters)->run(points1, points2, E, _mask); return E; } +Mat findEssentialMat( InputArray _points1, InputArray _points2, InputArray _cameraMatrix, + int method, double prob, double threshold, + OutputArray _mask) +{ + return findEssentialMat(_points1, _points2, _cameraMatrix, method, prob, threshold, 1000, _mask); +} + +Mat findEssentialMat( InputArray _points1, InputArray _points2, double focal, Point2d pp, + int method, double prob, double threshold, int maxIters, OutputArray _mask) +{ + CV_INSTRUMENT_REGION(); + + Mat cameraMatrix = (Mat_(3,3) << focal, 0, pp.x, 0, focal, pp.y, 0, 0, 1); + return findEssentialMat(_points1, _points2, cameraMatrix, method, prob, threshold, maxIters, _mask); +} + Mat findEssentialMat( InputArray _points1, InputArray _points2, double focal, Point2d pp, int method, double prob, double threshold, OutputArray _mask) { CV_INSTRUMENT_REGION(); Mat cameraMatrix = (Mat_(3,3) << focal, 0, pp.x, 0, focal, pp.y, 0, 0, 1); - return findEssentialMat(_points1, _points2, cameraMatrix, method, prob, threshold, _mask); + return findEssentialMat(_points1, _points2, cameraMatrix, method, prob, threshold, 1000, _mask); } Mat findEssentialMat( InputArray _points1, InputArray _points2, diff --git a/modules/3d/src/fundam.cpp b/modules/3d/src/fundam.cpp index 853c304c81..921db14e34 100644 --- a/modules/3d/src/fundam.cpp +++ b/modules/3d/src/fundam.cpp @@ -878,7 +878,7 @@ Mat findFundamentalMat( InputArray _points1, InputArray _points2, if( (method & ~3) == FM_RANSAC && npoints >= 15 ) result = createRANSACPointSetRegistrator(cb, 7, ransacReprojThreshold, confidence, maxIters)->run(m1, m2, F, _mask); else - result = createLMeDSPointSetRegistrator(cb, 7, confidence)->run(m1, m2, F, _mask); + result = createLMeDSPointSetRegistrator(cb, 7, confidence, maxIters)->run(m1, m2, F, _mask); } if( result <= 0 ) diff --git a/modules/3d/src/polynom_solver.cpp b/modules/3d/src/polynom_solver.cpp index 5eb04b7bf4..201d09c439 100644 --- a/modules/3d/src/polynom_solver.cpp +++ b/modules/3d/src/polynom_solver.cpp @@ -71,7 +71,8 @@ int solve_deg3(double a, double b, double c, double d, return 3; } else { - x0 = pow(2 * R, 1 / 3.0) - b_a_3; + double cube_root = cv::cubeRoot(2 * R); + x0 = cube_root - b_a_3; return 1; } } @@ -88,8 +89,15 @@ int solve_deg3(double a, double b, double c, double d, } // D > 0, only one real root - double AD = pow(fabs(R) + sqrt(D), 1.0 / 3.0) * (R > 0 ? 1 : (R < 0 ? -1 : 0)); - double BD = (AD == 0) ? 0 : -Q / AD; + double AD = 0.; + double BD = 0.; + double R_abs = fabs(R); + if (R_abs > DBL_EPSILON) + { + AD = cv::cubeRoot(R_abs + sqrt(D)); + AD = (R >= 0) ? AD : -AD; + BD = -Q / AD; + } // Calculate the only real root x0 = AD + BD - b_a_3; diff --git a/modules/3d/src/solvepnp.cpp b/modules/3d/src/solvepnp.cpp index 01e8e9ac57..03fb6f88c0 100644 --- a/modules/3d/src/solvepnp.cpp +++ b/modules/3d/src/solvepnp.cpp @@ -334,18 +334,42 @@ bool solvePnPRansac(InputArray _opoints, InputArray _ipoints, opoints_inliers.resize(npoints1); ipoints_inliers.resize(npoints1); - result = solvePnP(opoints_inliers, ipoints_inliers, cameraMatrix, - distCoeffs, rvec, tvec, useExtrinsicGuess, - (flags == SOLVEPNP_P3P || flags == SOLVEPNP_AP3P) ? SOLVEPNP_EPNP : flags) ? 1 : -1; + try + { + result = solvePnP(opoints_inliers, ipoints_inliers, cameraMatrix, + distCoeffs, rvec, tvec, useExtrinsicGuess, + (flags == SOLVEPNP_P3P || flags == SOLVEPNP_AP3P) ? SOLVEPNP_EPNP : flags) ? 1 : -1; + } + catch (const cv::Exception& e) + { + if (flags == SOLVEPNP_ITERATIVE && + npoints1 == 5 && + e.what() && + std::string(e.what()).find("DLT algorithm needs at least 6 points") != std::string::npos + ) + { + CV_LOG_INFO(NULL, "solvePnPRansac(): solvePnP stage to compute the final pose using points " + "in the consensus set raised DLT 6 points exception, use result from MSS (Minimal Sample Sets) stage instead."); + rvec = _local_model.col(0); // output rotation vector + tvec = _local_model.col(1); // output translation vector + result = 1; + } + else + { + // raise other exceptions + throw; + } + } - if( result <= 0 ) + if (result <= 0) { _rvec.assign(_local_model.col(0)); // output rotation vector _tvec.assign(_local_model.col(1)); // output translation vector - if( _inliers.needed() ) + if (_inliers.needed()) _inliers.release(); + CV_LOG_DEBUG(NULL, "solvePnPRansac(): solvePnP stage to compute the final pose using points in the consensus set failed. Return false"); return false; } else diff --git a/modules/3d/src/sqpnp.hpp b/modules/3d/src/sqpnp.hpp index f8136324c9..97c10e34e7 100644 --- a/modules/3d/src/sqpnp.hpp +++ b/modules/3d/src/sqpnp.hpp @@ -72,6 +72,7 @@ private: cv::Matx r_hat; cv::Matx t; double sq_error; + SQPSolution() : sq_error(0) {} }; /* diff --git a/modules/3d/src/usac/estimator.cpp b/modules/3d/src/usac/estimator.cpp index 91abe30512..75bc3cf5dd 100644 --- a/modules/3d/src/usac/estimator.cpp +++ b/modules/3d/src/usac/estimator.cpp @@ -236,13 +236,18 @@ public: CV_DbgAssert(points); } - inline void setModelParameters (const Mat &model) override { + inline void setModelParameters(const Mat& model) override + { + CV_Assert(!model.empty()); + CV_CheckTypeEQ(model.depth(), CV_64F, ""); + const auto * const m = (double *) model.data; m11=static_cast(m[0]); m12=static_cast(m[1]); m13=static_cast(m[2]); m21=static_cast(m[3]); m22=static_cast(m[4]); m23=static_cast(m[5]); m31=static_cast(m[6]); m32=static_cast(m[7]); m33=static_cast(m[8]); const Mat model_inv = model.inv(); + CV_CheckTypeEQ(model_inv.depth(), CV_64F, ""); const auto * const minv = (double *) model_inv.data; minv11=(float)minv[0]; minv12=(float)minv[1]; minv13=(float)minv[2]; minv21=(float)minv[3]; minv22=(float)minv[4]; minv23=(float)minv[5]; @@ -299,7 +304,11 @@ public: CV_DbgAssert(points); } - inline void setModelParameters (const Mat &model) override { + inline void setModelParameters(const Mat& model) override + { + CV_Assert(!model.empty()); + CV_CheckTypeEQ(model.depth(), CV_64F, ""); + const auto * const m = (double *) model.data; m11=static_cast(m[0]); m12=static_cast(m[1]); m13=static_cast(m[2]); m21=static_cast(m[3]); m22=static_cast(m[4]); m23=static_cast(m[5]); @@ -349,7 +358,11 @@ public: CV_DbgAssert(points); } - inline void setModelParameters (const Mat &model) override { + inline void setModelParameters(const Mat& model) override + { + CV_Assert(!model.empty()); + CV_CheckTypeEQ(model.depth(), CV_64F, ""); + const auto * const m = (double *) model.data; m11=static_cast(m[0]); m12=static_cast(m[1]); m13=static_cast(m[2]); m21=static_cast(m[3]); m22=static_cast(m[4]); m23=static_cast(m[5]); @@ -416,7 +429,11 @@ public: CV_DbgAssert(points); } - inline void setModelParameters (const Mat &model) override { + inline void setModelParameters(const Mat& model) override + { + CV_Assert(!model.empty()); + CV_CheckTypeEQ(model.depth(), CV_64F, ""); + const auto * const m = (double *) model.data; m11=static_cast(m[0]); m12=static_cast(m[1]); m13=static_cast(m[2]); m21=static_cast(m[3]); m22=static_cast(m[4]); m23=static_cast(m[5]); @@ -476,7 +493,11 @@ public: } - inline void setModelParameters (const Mat &model) override { + inline void setModelParameters (const Mat& model) override + { + CV_Assert(!model.empty()); + CV_CheckTypeEQ(model.depth(), CV_64F, ""); + const auto * const p = (double *) model.data; p11 = (float)p[0]; p12 = (float)p[1]; p13 = (float)p[2]; p14 = (float)p[3]; p21 = (float)p[4]; p22 = (float)p[5]; p23 = (float)p[6]; p24 = (float)p[7]; @@ -535,7 +556,11 @@ public: CV_DbgAssert(points); } - inline void setModelParameters (const Mat &model) override { + inline void setModelParameters(const Mat& model) override + { + CV_Assert(!model.empty()); + CV_CheckTypeEQ(model.depth(), CV_64F, ""); + const auto * const m = (double *) model.data; m11 = (float)m[0]; m12 = (float)m[1]; m13 = (float)m[2]; m21 = (float)m[3]; m22 = (float)m[4]; m23 = (float)m[5]; diff --git a/modules/3d/src/usac/quality.cpp b/modules/3d/src/usac/quality.cpp index f29a5cfbb6..835306c4f4 100644 --- a/modules/3d/src/usac/quality.cpp +++ b/modules/3d/src/usac/quality.cpp @@ -421,7 +421,11 @@ public: * @current_hypothesis: current RANSAC iteration * Return: true if model is good, false - otherwise. */ - inline bool isModelGood (const Mat &model) override { + inline bool isModelGood(const Mat& model) override + { + if (model.empty()) + return false; + // update error object with current model err->setModelParameters(model); diff --git a/modules/3d/test/test_solvepnp_ransac.cpp b/modules/3d/test/test_solvepnp_ransac.cpp index fb0e2965e6..43b90dff92 100644 --- a/modules/3d/test/test_solvepnp_ransac.cpp +++ b/modules/3d/test/test_solvepnp_ransac.cpp @@ -837,6 +837,43 @@ TEST(Calib3d_SolvePnPRansac, double_support) EXPECT_LE(cvtest::norm(t, Mat_(tF), NORM_INF), 1e-3); } +TEST(Calib3d_SolvePnPRansac, bad_input_points_19253) +{ + // with this specific data + // when computing the final pose using points in the consensus set with SOLVEPNP_ITERATIVE and solvePnP() + // an exception is thrown from solvePnP because there are 5 non-coplanar 3D points and the DLT algorithm needs at least 6 non-coplanar 3D points + // with PR #19253 we choose to return true, with the pose estimated from the MSS stage instead of throwing the exception + + float pts2d_[] = { + -5.38358629e-01f, -5.09638414e-02f, + -5.07192254e-01f, -2.20743284e-01f, + -5.43107152e-01f, -4.90474701e-02f, + -5.54325163e-01f, -1.86715424e-01f, + -5.59334219e-01f, -4.01909500e-02f, + -5.43504596e-01f, -4.61776406e-02f + }; + Mat pts2d(6, 2, CV_32FC1, pts2d_); + + float pts3d_[] = { + -3.01153604e-02f, -1.55665115e-01f, 4.50000018e-01f, + 4.27827090e-01f, 4.28645730e-01f, 1.08600008e+00f, + -3.14165242e-02f, -1.52656138e-01f, 4.50000018e-01f, + -1.46217480e-01f, 5.57961613e-02f, 7.17000008e-01f, + -4.89348806e-02f, -1.38795510e-01f, 4.47000027e-01f, + -3.13065052e-02f, -1.52636901e-01f, 4.51000035e-01f + }; + Mat pts3d(6, 3, CV_32FC1, pts3d_); + + Mat camera_mat = Mat::eye(3, 3, CV_64FC1); + Mat rvec, tvec; + vector inliers; + + // solvePnPRansac will return true with 5 inliers, which means the result is from MSS stage. + bool result = solvePnPRansac(pts3d, pts2d, camera_mat, noArray(), rvec, tvec, false, 100, 4.f / 460.f, 0.99, inliers); + EXPECT_EQ(inliers.size(), size_t(5)); + EXPECT_TRUE(result); +} + TEST(Calib3d_SolvePnP, input_type) { Matx33d intrinsics(5.4794130238156129e+002, 0., 2.9835545700043139e+002, 0., diff --git a/modules/3d/test/test_usac.cpp b/modules/3d/test/test_usac.cpp index 0b5cfde182..fb5641bd1e 100644 --- a/modules/3d/test/test_usac.cpp +++ b/modules/3d/test/test_usac.cpp @@ -4,7 +4,8 @@ #include "test_precomp.hpp" -namespace opencv_test { +namespace opencv_test { namespace { + enum TestSolver { Homogr, Fundam, Essen, PnP, Affine}; /* * rng -- reference to random generator @@ -264,7 +265,40 @@ TEST(usac_Fundamental, accuracy) { int(max_iters), mask); checkInliersMask(TestSolver::Fundam, inl_size, thr, pts1, pts2, F, mask); } - }} + } +} + +TEST(usac_Fundamental, regression_19639) +{ + double x_[] = { + 941, 890, + 596, 940, + 898, 941, + 894, 933, + 586, 938, + 902, 933, + 887, 935 + }; + Mat x(7, 1, CV_64FC2, x_); + + double y_[] = { + 1416, 806, + 1157, 852, + 1380, 855, + 1378, 843, + 1145, 849, + 1378, 843, + 1378, 843 + }; + Mat y(7, 1, CV_64FC2, y_); + + //std::cout << x << std::endl; + //std::cout << y << std::endl; + + Mat m = cv::findFundamentalMat(x, y, USAC_MAGSAC, 3, 0.99); + EXPECT_TRUE(m.empty()); +} + TEST(usac_Essential, accuracy) { std::vector gt_inliers; @@ -405,4 +439,5 @@ TEST(usac_testUsacParams, accuracy) { checkInliersMask(TestSolver::Homogr, inl_size, usac_params.threshold, pts1, pts2, model, mask); } -} + +}} // namespace diff --git a/modules/calib/include/opencv2/calib.hpp b/modules/calib/include/opencv2/calib.hpp index 945db3e87b..efcdd5d9e1 100644 --- a/modules/calib/include/opencv2/calib.hpp +++ b/modules/calib/include/opencv2/calib.hpp @@ -52,7 +52,7 @@ respectively) by the same factor. The joint rotation-translation matrix \f$[R|t]\f$ is the matrix product of a projective transformation and a homogeneous transformation. The 3-by-4 projective transformation maps 3D points -represented in camera coordinates to 2D poins in the image plane and represented in normalized +represented in camera coordinates to 2D points in the image plane and represented in normalized camera coordinates \f$x' = X_c / Z_c\f$ and \f$y' = Y_c / Z_c\f$: \f[Z_c \begin{bmatrix} @@ -484,13 +484,13 @@ CV_EXPORTS_W Mat initCameraMatrix2D( InputArrayOfArrays objectPoints, ( patternSize = cv::Size(points_per_row,points_per_colum) = cv::Size(columns,rows) ). @param corners Output array of detected corners. @param flags Various operation flags that can be zero or a combination of the following values: -- **CALIB_CB_ADAPTIVE_THRESH** Use adaptive thresholding to convert the image to black +- @ref CALIB_CB_ADAPTIVE_THRESH Use adaptive thresholding to convert the image to black and white, rather than a fixed threshold level (computed from the average image brightness). -- **CALIB_CB_NORMALIZE_IMAGE** Normalize the image gamma with equalizeHist before +- @ref CALIB_CB_NORMALIZE_IMAGE Normalize the image gamma with equalizeHist before applying fixed or adaptive thresholding. -- **CALIB_CB_FILTER_QUADS** Use additional criteria (like contour area, perimeter, +- @ref CALIB_CB_FILTER_QUADS Use additional criteria (like contour area, perimeter, square-like shape) to filter out false quads extracted at the contour retrieval stage. -- **CALIB_CB_FAST_CHECK** Run a fast check on the image that looks for chessboard corners, +- @ref CALIB_CB_FAST_CHECK Run a fast check on the image that looks for chessboard corners, and shortcut the call if none is found. This can drastically speed up the call in the degenerate condition when no chessboard is observed. @@ -542,11 +542,11 @@ CV_EXPORTS_W bool checkChessboard(InputArray img, Size size); ( patternSize = cv::Size(points_per_row,points_per_colum) = cv::Size(columns,rows) ). @param corners Output array of detected corners. @param flags Various operation flags that can be zero or a combination of the following values: -- **CALIB_CB_NORMALIZE_IMAGE** Normalize the image gamma with equalizeHist before detection. -- **CALIB_CB_EXHAUSTIVE** Run an exhaustive search to improve detection rate. -- **CALIB_CB_ACCURACY** Up sample input image to improve sub-pixel accuracy due to aliasing effects. -- **CALIB_CB_LARGER** The detected pattern is allowed to be larger than patternSize (see description). -- **CALIB_CB_MARKER** The detected pattern must have a marker (see description). +- @ref CALIB_CB_NORMALIZE_IMAGE Normalize the image gamma with equalizeHist before detection. +- @ref CALIB_CB_EXHAUSTIVE Run an exhaustive search to improve detection rate. +- @ref CALIB_CB_ACCURACY Up sample input image to improve sub-pixel accuracy due to aliasing effects. +- @ref CALIB_CB_LARGER The detected pattern is allowed to be larger than patternSize (see description). +- @ref CALIB_CB_MARKER The detected pattern must have a marker (see description). This should be used if an accurate camera calibration is required. @param meta Optional output arrray of detected corners (CV_8UC1 and size = cv::Size(columns,rows)). Each entry stands for one corner of the pattern and can have one of the following values: @@ -565,7 +565,7 @@ Calibration" demonstrating that the returned sub-pixel positions are more accurate than the one returned by cornerSubPix allowing a precise camera calibration for demanding applications. -In the case, the flags **CALIB_CB_LARGER** or **CALIB_CB_MARKER** are given, +In the case, the flags @ref CALIB_CB_LARGER or @ref CALIB_CB_MARKER are given, the result can be recovered from the optional meta array. Both flags are helpful to use calibration patterns exceeding the field of view of the camera. These oversized patterns allow more accurate calibrations as corners can be @@ -682,11 +682,12 @@ typedef CirclesGridFinderParameters CirclesGridFinderParameters2; ( patternSize = Size(points_per_row, points_per_colum) ). @param centers output array of detected centers. @param flags various operation flags that can be one of the following values: -- **CALIB_CB_SYMMETRIC_GRID** uses symmetric pattern of circles. -- **CALIB_CB_ASYMMETRIC_GRID** uses asymmetric pattern of circles. -- **CALIB_CB_CLUSTERING** uses a special algorithm for grid detection. It is more robust to +- @ref CALIB_CB_SYMMETRIC_GRID uses symmetric pattern of circles. +- @ref CALIB_CB_ASYMMETRIC_GRID uses asymmetric pattern of circles. +- @ref CALIB_CB_CLUSTERING uses a special algorithm for grid detection. It is more robust to perspective distortions but much more sensitive to background clutter. @param blobDetector feature detector that finds blobs like dark circles on light background. + If `blobDetector` is NULL then `image` represents Point2f array of candidates. @param parameters struct for finding circles in a grid pattern. The function attempts to determine whether the input image contains a grid of circles. If it is, the @@ -697,7 +698,7 @@ row). Otherwise, if the function fails to find all the corners or reorder them, Sample usage of detecting and drawing the centers of circles: : @code Size patternsize(7,7); //number of centers - Mat gray = ....; //source image + Mat gray = ...; //source image vector centers; //this will be filled by the detected centers bool patternfound = findCirclesGrid(gray, patternsize, centers); @@ -736,8 +737,8 @@ respectively. In the old interface all the vectors of object points from differe concatenated together. @param imageSize Size of the image used only to initialize the camera intrinsic matrix. @param cameraMatrix Input/output 3x3 floating-point camera intrinsic matrix -\f$\cameramatrix{A}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS -and/or CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be +\f$\cameramatrix{A}\f$ . If @ref CALIB_USE_INTRINSIC_GUESS +and/or @ref CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be initialized before calling the function. @param distCoeffs Input/output vector of distortion coefficients \f$\distcoeffs\f$. @@ -760,40 +761,40 @@ parameters. Order of deviations values: \f$(R_0, T_0, \dotsc , R_{M - 1}, T_{M - the number of pattern views. \f$R_i, T_i\f$ are concatenated 1x3 vectors. @param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view. @param flags Different flags that may be zero or a combination of the following values: -- **CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of +- @ref CALIB_USE_INTRINSIC_GUESS cameraMatrix contains valid initial values of fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image center ( imageSize is used), and focal distances are computed in a least-squares fashion. Note, that if intrinsic parameters are known, there is no need to use this function just to estimate extrinsic parameters. Use solvePnP instead. -- **CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global +- @ref CALIB_FIX_PRINCIPAL_POINT The principal point is not changed during the global optimization. It stays at the center or at a different location specified when -CALIB_USE_INTRINSIC_GUESS is set too. -- **CALIB_FIX_ASPECT_RATIO** The functions consider only fy as a free parameter. The + @ref CALIB_USE_INTRINSIC_GUESS is set too. +- @ref CALIB_FIX_ASPECT_RATIO The functions consider only fy as a free parameter. The ratio fx/fy stays the same as in the input cameraMatrix . When -CALIB_USE_INTRINSIC_GUESS is not set, the actual input values of fx and fy are + @ref CALIB_USE_INTRINSIC_GUESS is not set, the actual input values of fx and fy are ignored, only their ratio is computed and used further. -- **CALIB_ZERO_TANGENT_DIST** Tangential distortion coefficients \f$(p_1, p_2)\f$ are set +- @ref CALIB_ZERO_TANGENT_DIST Tangential distortion coefficients \f$(p_1, p_2)\f$ are set to zeros and stay zero. -- **CALIB_FIX_K1,...,CALIB_FIX_K6** The corresponding radial distortion -coefficient is not changed during the optimization. If CALIB_USE_INTRINSIC_GUESS is +- @ref CALIB_FIX_K1,..., @ref CALIB_FIX_K6 The corresponding radial distortion +coefficient is not changed during the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. -- **CALIB_RATIONAL_MODEL** Coefficients k4, k5, and k6 are enabled. To provide the +- @ref CALIB_RATIONAL_MODEL Coefficients k4, k5, and k6 are enabled. To provide the backward compatibility, this extra flag should be explicitly specified to make the calibration function use the rational model and return 8 coefficients. If the flag is not set, the function computes and returns only 5 distortion coefficients. -- **CALIB_THIN_PRISM_MODEL** Coefficients s1, s2, s3 and s4 are enabled. To provide the +- @ref CALIB_THIN_PRISM_MODEL Coefficients s1, s2, s3 and s4 are enabled. To provide the backward compatibility, this extra flag should be explicitly specified to make the calibration function use the thin prism model and return 12 coefficients. If the flag is not set, the function computes and returns only 5 distortion coefficients. -- **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during -the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the +- @ref CALIB_FIX_S1_S2_S3_S4 The thin prism distortion coefficients are not changed during +the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. -- **CALIB_TILTED_MODEL** Coefficients tauX and tauY are enabled. To provide the +- @ref CALIB_TILTED_MODEL Coefficients tauX and tauY are enabled. To provide the backward compatibility, this extra flag should be explicitly specified to make the calibration function use the tilted sensor model and return 14 coefficients. If the flag is not set, the function computes and returns only 5 distortion coefficients. -- **CALIB_FIX_TAUX_TAUY** The coefficients of the tilted sensor model are not changed during -the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the +- @ref CALIB_FIX_TAUX_TAUY The coefficients of the tilted sensor model are not changed during +the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. @param criteria Termination criteria for the iterative optimization algorithm. @@ -805,7 +806,7 @@ points and their corresponding 2D projections in each view must be specified. Th by using an object with known geometry and easily detectable feature points. Such an object is called a calibration rig or calibration pattern, and OpenCV has built-in support for a chessboard as a calibration rig (see @ref findChessboardCorners). Currently, initialization of intrinsic -parameters (when CALIB_USE_INTRINSIC_GUESS is not set) is only implemented for planar calibration +parameters (when @ref CALIB_USE_INTRINSIC_GUESS is not set) is only implemented for planar calibration patterns (where Z-coordinates of the object points must be all zeros). 3D calibration rigs can also be used as long as initial cameraMatrix is provided. @@ -988,39 +989,39 @@ second camera coordinate system. @param F Output fundamental matrix. @param perViewErrors Output vector of the RMS re-projection error estimated for each pattern view. @param flags Different flags that may be zero or a combination of the following values: -- **CALIB_FIX_INTRINSIC** Fix cameraMatrix? and distCoeffs? so that only R, T, E, and F +- @ref CALIB_FIX_INTRINSIC Fix cameraMatrix? and distCoeffs? so that only R, T, E, and F matrices are estimated. -- **CALIB_USE_INTRINSIC_GUESS** Optimize some or all of the intrinsic parameters +- @ref CALIB_USE_INTRINSIC_GUESS Optimize some or all of the intrinsic parameters according to the specified flags. Initial values are provided by the user. -- **CALIB_USE_EXTRINSIC_GUESS** R and T contain valid initial values that are optimized further. +- @ref CALIB_USE_EXTRINSIC_GUESS R and T contain valid initial values that are optimized further. Otherwise R and T are initialized to the median value of the pattern views (each dimension separately). -- **CALIB_FIX_PRINCIPAL_POINT** Fix the principal points during the optimization. -- **CALIB_FIX_FOCAL_LENGTH** Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ . -- **CALIB_FIX_ASPECT_RATIO** Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$ +- @ref CALIB_FIX_PRINCIPAL_POINT Fix the principal points during the optimization. +- @ref CALIB_FIX_FOCAL_LENGTH Fix \f$f^{(j)}_x\f$ and \f$f^{(j)}_y\f$ . +- @ref CALIB_FIX_ASPECT_RATIO Optimize \f$f^{(j)}_y\f$ . Fix the ratio \f$f^{(j)}_x/f^{(j)}_y\f$ . -- **CALIB_SAME_FOCAL_LENGTH** Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ . -- **CALIB_ZERO_TANGENT_DIST** Set tangential distortion coefficients for each camera to +- @ref CALIB_SAME_FOCAL_LENGTH Enforce \f$f^{(0)}_x=f^{(1)}_x\f$ and \f$f^{(0)}_y=f^{(1)}_y\f$ . +- @ref CALIB_ZERO_TANGENT_DIST Set tangential distortion coefficients for each camera to zeros and fix there. -- **CALIB_FIX_K1,...,CALIB_FIX_K6** Do not change the corresponding radial -distortion coefficient during the optimization. If CALIB_USE_INTRINSIC_GUESS is set, +- @ref CALIB_FIX_K1,..., @ref CALIB_FIX_K6 Do not change the corresponding radial +distortion coefficient during the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. -- **CALIB_RATIONAL_MODEL** Enable coefficients k4, k5, and k6. To provide the backward +- @ref CALIB_RATIONAL_MODEL Enable coefficients k4, k5, and k6. To provide the backward compatibility, this extra flag should be explicitly specified to make the calibration function use the rational model and return 8 coefficients. If the flag is not set, the function computes and returns only 5 distortion coefficients. -- **CALIB_THIN_PRISM_MODEL** Coefficients s1, s2, s3 and s4 are enabled. To provide the +- @ref CALIB_THIN_PRISM_MODEL Coefficients s1, s2, s3 and s4 are enabled. To provide the backward compatibility, this extra flag should be explicitly specified to make the calibration function use the thin prism model and return 12 coefficients. If the flag is not set, the function computes and returns only 5 distortion coefficients. -- **CALIB_FIX_S1_S2_S3_S4** The thin prism distortion coefficients are not changed during -the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the +- @ref CALIB_FIX_S1_S2_S3_S4 The thin prism distortion coefficients are not changed during +the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. -- **CALIB_TILTED_MODEL** Coefficients tauX and tauY are enabled. To provide the +- @ref CALIB_TILTED_MODEL Coefficients tauX and tauY are enabled. To provide the backward compatibility, this extra flag should be explicitly specified to make the calibration function use the tilted sensor model and return 14 coefficients. If the flag is not set, the function computes and returns only 5 distortion coefficients. -- **CALIB_FIX_TAUX_TAUY** The coefficients of the tilted sensor model are not changed during -the optimization. If CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the +- @ref CALIB_FIX_TAUX_TAUY The coefficients of the tilted sensor model are not changed during +the optimization. If @ref CALIB_USE_INTRINSIC_GUESS is set, the coefficient from the supplied distCoeffs matrix is used. Otherwise, it is set to 0. @param criteria Termination criteria for the iterative optimization algorithm. @@ -1068,10 +1069,10 @@ Besides the stereo-related information, the function can also perform a full cal the two cameras. However, due to the high dimensionality of the parameter space and noise in the input data, the function can diverge from the correct solution. If the intrinsic parameters can be estimated with high accuracy for each of the cameras individually (for example, using -calibrateCamera ), you are recommended to do so and then pass CALIB_FIX_INTRINSIC flag to the +calibrateCamera ), you are recommended to do so and then pass @ref CALIB_FIX_INTRINSIC flag to the function along with the computed intrinsic parameters. Otherwise, if all the parameters are estimated at once, it makes sense to restrict some parameters, for example, pass -CALIB_SAME_FOCAL_LENGTH and CALIB_ZERO_TANGENT_DIST flags, which is usually a + @ref CALIB_SAME_FOCAL_LENGTH and @ref CALIB_ZERO_TANGENT_DIST flags, which is usually a reasonable assumption. Similarly to calibrateCamera, the function minimizes the total re-projection error for all the @@ -1409,7 +1410,9 @@ enum{ CALIB_FIX_K3 = 1 << 6, CALIB_FIX_K4 = 1 << 7, CALIB_FIX_INTRINSIC = 1 << 8, - CALIB_FIX_PRINCIPAL_POINT = 1 << 9 + CALIB_FIX_PRINCIPAL_POINT = 1 << 9, + CALIB_ZERO_DISPARITY = 1 << 10, + CALIB_FIX_FOCAL_LENGTH = 1 << 11 }; /** @brief Projects points using fisheye model @@ -1542,7 +1545,7 @@ objectPoints[i].size() for each i. @param image_size Size of the image used only to initialize the camera intrinsic matrix. @param K Output 3x3 floating-point camera intrinsic matrix \f$\cameramatrix{A}\f$ . If -fisheye::CALIB_USE_INTRINSIC_GUESS/ is specified, some or all of fx, fy, cx, cy must be +@ref fisheye::CALIB_USE_INTRINSIC_GUESS is specified, some or all of fx, fy, cx, cy must be initialized before calling the function. @param D Output vector of distortion coefficients \f$\distcoeffsfisheye\f$. @param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view. @@ -1552,17 +1555,19 @@ space (in which object points are specified) to the world coordinate space, that position of the calibration pattern in the k-th pattern view (k=0.. *M* -1). @param tvecs Output vector of translation vectors estimated for each pattern view. @param flags Different flags that may be zero or a combination of the following values: -- **fisheye::CALIB_USE_INTRINSIC_GUESS** cameraMatrix contains valid initial values of +- @ref fisheye::CALIB_USE_INTRINSIC_GUESS cameraMatrix contains valid initial values of fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image center ( imageSize is used), and focal distances are computed in a least-squares fashion. -- **fisheye::CALIB_RECOMPUTE_EXTRINSIC** Extrinsic will be recomputed after each iteration +- @ref fisheye::CALIB_RECOMPUTE_EXTRINSIC Extrinsic will be recomputed after each iteration of intrinsic optimization. -- **fisheye::CALIB_CHECK_COND** The functions will check validity of condition number. -- **fisheye::CALIB_FIX_SKEW** Skew coefficient (alpha) is set to zero and stay zero. -- **fisheye::CALIB_FIX_K1..fisheye::CALIB_FIX_K4** Selected distortion coefficients +- @ref fisheye::CALIB_CHECK_COND The functions will check validity of condition number. +- @ref fisheye::CALIB_FIX_SKEW Skew coefficient (alpha) is set to zero and stay zero. +- @ref fisheye::CALIB_FIX_K1,..., @ref fisheye::CALIB_FIX_K4 Selected distortion coefficients are set to zeros and stay zero. -- **fisheye::CALIB_FIX_PRINCIPAL_POINT** The principal point is not changed during the global -optimization. It stays at the center or at a different location specified when CALIB_USE_INTRINSIC_GUESS is set too. +- @ref fisheye::CALIB_FIX_PRINCIPAL_POINT The principal point is not changed during the global +optimization. It stays at the center or at a different location specified when @ref fisheye::CALIB_USE_INTRINSIC_GUESS is set too. +- @ref fisheye::CALIB_FIX_FOCAL_LENGTH The focal length is not changed during the global +optimization. It is the \f$max(width,height)/\pi\f$ or the provided \f$f_x\f$, \f$f_y\f$ when @ref fisheye::CALIB_USE_INTRINSIC_GUESS is set too. @param criteria Termination criteria for the iterative optimization algorithm. */ CV_EXPORTS_W double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, const Size& image_size, @@ -1586,7 +1591,7 @@ camera. @param P2 Output 3x4 projection matrix in the new (rectified) coordinate systems for the second camera. @param Q Output \f$4 \times 4\f$ disparity-to-depth mapping matrix (see reprojectImageTo3D ). -@param flags Operation flags that may be zero or CALIB_ZERO_DISPARITY . If the flag is set, +@param flags Operation flags that may be zero or @ref fisheye::CALIB_ZERO_DISPARITY . If the flag is set, the function makes the principal points of each camera have the same pixel coordinates in the rectified views. And if the flag is not set, the function may still shift the images in the horizontal or vertical direction (depending on the orientation of epipolar lines) to maximize the @@ -1612,7 +1617,7 @@ observed by the first camera. observed by the second camera. @param K1 Input/output first camera intrinsic matrix: \f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If -any of fisheye::CALIB_USE_INTRINSIC_GUESS , fisheye::CALIB_FIX_INTRINSIC are specified, +any of @ref fisheye::CALIB_USE_INTRINSIC_GUESS , @ref fisheye::CALIB_FIX_INTRINSIC are specified, some or all of the matrix components must be initialized. @param D1 Input/output vector of distortion coefficients \f$\distcoeffsfisheye\f$ of 4 elements. @param K2 Input/output second camera intrinsic matrix. The parameter is similar to K1 . @@ -1622,16 +1627,16 @@ similar to D1 . @param R Output rotation matrix between the 1st and the 2nd camera coordinate systems. @param T Output translation vector between the coordinate systems of the cameras. @param flags Different flags that may be zero or a combination of the following values: -- **fisheye::CALIB_FIX_INTRINSIC** Fix K1, K2? and D1, D2? so that only R, T matrices +- @ref fisheye::CALIB_FIX_INTRINSIC Fix K1, K2? and D1, D2? so that only R, T matrices are estimated. -- **fisheye::CALIB_USE_INTRINSIC_GUESS** K1, K2 contains valid initial values of +- @ref fisheye::CALIB_USE_INTRINSIC_GUESS K1, K2 contains valid initial values of fx, fy, cx, cy that are optimized further. Otherwise, (cx, cy) is initially set to the image center (imageSize is used), and focal distances are computed in a least-squares fashion. -- **fisheye::CALIB_RECOMPUTE_EXTRINSIC** Extrinsic will be recomputed after each iteration +- @ref fisheye::CALIB_RECOMPUTE_EXTRINSIC Extrinsic will be recomputed after each iteration of intrinsic optimization. -- **fisheye::CALIB_CHECK_COND** The functions will check validity of condition number. -- **fisheye::CALIB_FIX_SKEW** Skew coefficient (alpha) is set to zero and stay zero. -- **fisheye::CALIB_FIX_K1..4** Selected distortion coefficients are set to zeros and stay +- @ref fisheye::CALIB_CHECK_COND The functions will check validity of condition number. +- @ref fisheye::CALIB_FIX_SKEW Skew coefficient (alpha) is set to zero and stay zero. +- @ref fisheye::CALIB_FIX_K1,..., @ref fisheye::CALIB_FIX_K4 Selected distortion coefficients are set to zeros and stay zero. @param criteria Termination criteria for the iterative optimization algorithm. */ diff --git a/modules/calib/src/calibinit.cpp b/modules/calib/src/calibinit.cpp index b776a1b617..ac4fc23d03 100644 --- a/modules/calib/src/calibinit.cpp +++ b/modules/calib/src/calibinit.cpp @@ -1837,7 +1837,7 @@ void ChessBoardDetector::generateQuads(const Mat& image_, int flags) if (boardIdx != parentIdx && (boardIdx < 0 || contour_child_counter[boardIdx] < contour_child_counter[parentIdx])) boardIdx = parentIdx; - contour_quads.push_back(QuadCountour(pt, parentIdx)); + contour_quads.emplace_back(pt, parentIdx); } size_t total = contour_quads.size(); @@ -2173,13 +2173,6 @@ void drawChessboardCorners( InputOutputArray image, Size patternSize, } } -static int quiet_error(int /*status*/, const char* /*func_name*/, - const char* /*err_msg*/, const char* /*file_name*/, - int /*line*/, void* /*userdata*/) -{ - return 0; -} - bool findCirclesGrid( InputArray _image, Size patternSize, OutputArray _centers, int flags, const Ptr &blobDetector, const CirclesGridFinderParameters& parameters_) @@ -2192,15 +2185,22 @@ bool findCirclesGrid( InputArray _image, Size patternSize, bool isSymmetricGrid = (flags & CALIB_CB_SYMMETRIC_GRID ) ? true : false; CV_Assert(isAsymmetricGrid ^ isSymmetricGrid); - Mat image = _image.getMat(); std::vector centers; - std::vector keypoints; - blobDetector->detect(image, keypoints); std::vector points; - for (size_t i = 0; i < keypoints.size(); i++) + if (blobDetector) { - points.push_back (keypoints[i].pt); + std::vector keypoints; + blobDetector->detect(_image, keypoints); + for (size_t i = 0; i < keypoints.size(); i++) + { + points.push_back(keypoints[i].pt); + } + } + else + { + CV_CheckTypeEQ(_image.type(), CV_32FC2, "blobDetector must be provided or image must contains Point2f array (std::vector) with candidates"); + _image.copyTo(points); } if(flags & CALIB_CB_ASYMMETRIC_GRID) @@ -2216,64 +2216,59 @@ bool findCirclesGrid( InputArray _image, Size patternSize, return !centers.empty(); } + bool isValid = false; const int attempts = 2; const size_t minHomographyPoints = 4; Mat H; for (int i = 0; i < attempts; i++) { - centers.clear(); - CirclesGridFinder boxFinder(patternSize, points, parameters); - bool isFound = false; -#define BE_QUIET 1 -#if BE_QUIET - void* oldCbkData; - ErrorCallback oldCbk = redirectError(quiet_error, 0, &oldCbkData); // FIXIT not thread safe -#endif - try - { - isFound = boxFinder.findHoles(); - } - catch (const Exception &) - { - - } -#if BE_QUIET - redirectError(oldCbk, oldCbkData); -#endif - if (isFound) - { - switch(parameters.gridType) + centers.clear(); + CirclesGridFinder boxFinder(patternSize, points, parameters); + try { - case CirclesGridFinderParameters::SYMMETRIC_GRID: - boxFinder.getHoles(centers); - break; - case CirclesGridFinderParameters::ASYMMETRIC_GRID: - boxFinder.getAsymmetricHoles(centers); - break; - default: - CV_Error(Error::StsBadArg, "Unknown pattern type"); + bool isFound = boxFinder.findHoles(); + if (isFound) + { + switch(parameters.gridType) + { + case CirclesGridFinderParameters::SYMMETRIC_GRID: + boxFinder.getHoles(centers); + break; + case CirclesGridFinderParameters::ASYMMETRIC_GRID: + boxFinder.getAsymmetricHoles(centers); + break; + default: + CV_Error(Error::StsBadArg, "Unknown pattern type"); + } + + isValid = true; + break; // done, return result + } + } + catch (const cv::Exception& e) + { + CV_UNUSED(e); + CV_LOG_DEBUG(NULL, "findCirclesGrid2: attempt=" << i << ": " << e.what()); + // nothing, next attempt } - if (i != 0) + boxFinder.getHoles(centers); + if (i != attempts - 1) { - Mat orgPointsMat; - transform(centers, orgPointsMat, H.inv()); - convertPointsFromHomogeneous(orgPointsMat, centers); + if (centers.size() < minHomographyPoints) + break; + H = CirclesGridFinder::rectifyGrid(boxFinder.getDetectedGridSize(), centers, points, points); } - Mat(centers).copyTo(_centers); - return true; - } + } - boxFinder.getHoles(centers); - if (i != attempts - 1) - { - if (centers.size() < minHomographyPoints) - break; - H = CirclesGridFinder::rectifyGrid(boxFinder.getDetectedGridSize(), centers, points, points); - } + if (!centers.empty() && !H.empty()) // undone rectification + { + Mat orgPointsMat; + transform(centers, orgPointsMat, H.inv()); + convertPointsFromHomogeneous(orgPointsMat, centers); } Mat(centers).copyTo(_centers); - return false; + return isValid; } bool findCirclesGrid(InputArray _image, Size patternSize, diff --git a/modules/calib/src/checkchessboard.cpp b/modules/calib/src/checkchessboard.cpp index 987790eedb..47995297c2 100644 --- a/modules/calib/src/checkchessboard.cpp +++ b/modules/calib/src/checkchessboard.cpp @@ -76,7 +76,7 @@ static void icvGetQuadrangleHypotheses(const std::vector(box_size, class_id)); + quads.emplace_back(box_size, class_id); } } diff --git a/modules/calib/src/circlesgrid.cpp b/modules/calib/src/circlesgrid.cpp index d60069d1c1..90c123e140 100644 --- a/modules/calib/src/circlesgrid.cpp +++ b/modules/calib/src/circlesgrid.cpp @@ -384,15 +384,15 @@ void CirclesGridClusterFinder::rectifyPatternPoints(const std::vector trueIndices; - trueIndices.push_back(Point(0, 0)); - trueIndices.push_back(Point(patternSize.width - 1, 0)); + trueIndices.emplace_back(0, 0); + trueIndices.emplace_back(patternSize.width - 1, 0); if(isAsymmetricGrid) { - trueIndices.push_back(Point(patternSize.width - 1, 1)); - trueIndices.push_back(Point(patternSize.width - 1, patternSize.height - 2)); + trueIndices.emplace_back(patternSize.width - 1, 1); + trueIndices.emplace_back(patternSize.width - 1, patternSize.height - 2); } - trueIndices.push_back(Point(patternSize.width - 1, patternSize.height - 1)); - trueIndices.push_back(Point(0, patternSize.height - 1)); + trueIndices.emplace_back(patternSize.width - 1, patternSize.height - 1); + trueIndices.emplace_back(0, patternSize.height - 1); std::vector idealPoints; for(size_t idx=0; idx (id, Vertex())); + vertices.emplace(id, Vertex()); } void Graph::addEdge(size_t id1, size_t id2) @@ -889,10 +889,9 @@ Mat CirclesGridFinder::rectifyGrid(Size detectedGridSize, const std::vector //all 8 segments with one end in a corner std::vector corner; - corner.push_back(Segment(keypoints[points[1][0]], keypoints[points[0][0]])); - corner.push_back(Segment(keypoints[points[0][0]], keypoints[points[0][1]])); + corner.emplace_back(keypoints[points[1][0]], keypoints[points[0][0]]); + corner.emplace_back(keypoints[points[0][0]], keypoints[points[0][1]]); segments.push_back(corner); - cornerIndices.push_back(Point(0, 0)); - firstSteps.push_back(Point(1, 0)); - secondSteps.push_back(Point(0, 1)); + cornerIndices.emplace_back(0, 0); + firstSteps.emplace_back(1, 0); + secondSteps.emplace_back(0, 1); corner.clear(); - corner.push_back(Segment(keypoints[points[0][w - 2]], keypoints[points[0][w - 1]])); - corner.push_back(Segment(keypoints[points[0][w - 1]], keypoints[points[1][w - 1]])); + corner.emplace_back(keypoints[points[0][w - 2]], keypoints[points[0][w - 1]]); + corner.emplace_back(keypoints[points[0][w - 1]], keypoints[points[1][w - 1]]); segments.push_back(corner); - cornerIndices.push_back(Point(w - 1, 0)); - firstSteps.push_back(Point(0, 1)); - secondSteps.push_back(Point(-1, 0)); + cornerIndices.emplace_back(w - 1, 0); + firstSteps.emplace_back(0, 1); + secondSteps.emplace_back(-1, 0); corner.clear(); - corner.push_back(Segment(keypoints[points[h - 2][w - 1]], keypoints[points[h - 1][w - 1]])); - corner.push_back(Segment(keypoints[points[h - 1][w - 1]], keypoints[points[h - 1][w - 2]])); + corner.emplace_back(keypoints[points[h - 2][w - 1]], keypoints[points[h - 1][w - 1]]); + corner.emplace_back(keypoints[points[h - 1][w - 1]], keypoints[points[h - 1][w - 2]]); segments.push_back(corner); - cornerIndices.push_back(Point(w - 1, h - 1)); - firstSteps.push_back(Point(-1, 0)); - secondSteps.push_back(Point(0, -1)); + cornerIndices.emplace_back(w - 1, h - 1); + firstSteps.emplace_back(-1, 0); + secondSteps.emplace_back(0, -1); corner.clear(); - corner.push_back(Segment(keypoints[points[h - 1][1]], keypoints[points[h - 1][0]])); - corner.push_back(Segment(keypoints[points[h - 1][0]], keypoints[points[h - 2][0]])); - cornerIndices.push_back(Point(0, h - 1)); - firstSteps.push_back(Point(0, -1)); - secondSteps.push_back(Point(1, 0)); + corner.emplace_back(keypoints[points[h - 1][1]], keypoints[points[h - 1][0]]); + corner.emplace_back(keypoints[points[h - 1][0]], keypoints[points[h - 2][0]]); + cornerIndices.emplace_back(0, h - 1); + firstSteps.emplace_back(0, -1); + secondSteps.emplace_back(1, 0); segments.push_back(corner); corner.clear(); @@ -1616,7 +1615,7 @@ size_t CirclesGridFinder::getFirstCorner(std::vector &largeCornerIndices, int cornerIdx = 0; bool waitOutsider = true; - for(;;) + for (size_t i = 0; i < cornersCount * 2; ++i) { if (waitOutsider) { @@ -1626,13 +1625,13 @@ size_t CirclesGridFinder::getFirstCorner(std::vector &largeCornerIndices, else { if (isInsider[(cornerIdx + 1) % cornersCount]) - break; + return cornerIdx; } cornerIdx = (cornerIdx + 1) % cornersCount; } - return cornerIdx; + CV_Error(Error::StsNoConv, "isInsider array has the same values"); } } diff --git a/modules/calib/src/fisheye.cpp b/modules/calib/src/fisheye.cpp index 5874b7faa7..2d79b8fa94 100644 --- a/modules/calib/src/fisheye.cpp +++ b/modules/calib/src/fisheye.cpp @@ -756,8 +756,8 @@ double cv::fisheye::calibrate(InputArrayOfArrays objectPoints, InputArrayOfArray IntrinsicParams currentParam; IntrinsicParams errors; - finalParam.isEstimate[0] = 1; - finalParam.isEstimate[1] = 1; + finalParam.isEstimate[0] = flags & CALIB_FIX_FOCAL_LENGTH ? 0 : 1; + finalParam.isEstimate[1] = flags & CALIB_FIX_FOCAL_LENGTH ? 0 : 1; finalParam.isEstimate[2] = flags & CALIB_FIX_PRINCIPAL_POINT ? 0 : 1; finalParam.isEstimate[3] = flags & CALIB_FIX_PRINCIPAL_POINT ? 0 : 1; finalParam.isEstimate[4] = flags & CALIB_FIX_SKEW ? 0 : 1; diff --git a/modules/calib/test/test_chesscorners.cpp b/modules/calib/test/test_chesscorners.cpp index 3d730f6bfd..b4d0628c87 100644 --- a/modules/calib/test/test_chesscorners.cpp +++ b/modules/calib/test/test_chesscorners.cpp @@ -656,5 +656,99 @@ TEST(Calib3d_CirclesPatternDetectorWithClustering, accuracy) ASSERT_LE(error, precise_success_error_level); } +TEST(Calib3d_AsymmetricCirclesPatternDetector, regression_18713) +{ + float pts_[][2] = { + { 166.5, 107 }, { 146, 236 }, { 147, 92 }, { 184, 162 }, { 150, 185.5 }, + { 215, 105 }, { 270.5, 186 }, { 159, 142 }, { 6, 205.5 }, { 32, 148.5 }, + { 126, 163.5 }, { 181, 208.5 }, { 240.5, 62 }, { 84.5, 76.5 }, { 190, 120.5 }, + { 10, 189 }, { 266, 104 }, { 307.5, 207.5 }, { 97, 184 }, { 116.5, 210 }, + { 114, 139 }, { 84.5, 233 }, { 269.5, 139 }, { 136, 126.5 }, { 120, 107.5 }, + { 129.5, 65.5 }, { 212.5, 140.5 }, { 204.5, 60.5 }, { 207.5, 241 }, { 61.5, 94.5 }, + { 186.5, 61.5 }, { 220, 63 }, { 239, 120.5 }, { 212, 186 }, { 284, 87.5 }, + { 62, 114.5 }, { 283, 61.5 }, { 238.5, 88.5 }, { 243, 159 }, { 245, 208 }, + { 298.5, 158.5 }, { 57, 129 }, { 156.5, 63.5 }, { 192, 90.5 }, { 281, 235.5 }, + { 172, 62.5 }, { 291.5, 119.5 }, { 90, 127 }, { 68.5, 166.5 }, { 108.5, 83.5 }, + { 22, 176 } + }; + Mat candidates(51, 1, CV_32FC2, (void*)pts_); + Size patternSize(4, 9); + + std::vector< Point2f > result; + bool res = false; + + // issue reports about hangs + EXPECT_NO_THROW(res = findCirclesGrid(candidates, patternSize, result, CALIB_CB_ASYMMETRIC_GRID, Ptr()/*blobDetector=NULL*/)); + EXPECT_FALSE(res); + + if (cvtest::debugLevel > 0) + { + std::cout << Mat(candidates) << std::endl; + std::cout << Mat(result) << std::endl; + Mat img(Size(400, 300), CV_8UC3, Scalar::all(0)); + + std::vector< Point2f > centers; + candidates.copyTo(centers); + + for (size_t i = 0; i < centers.size(); i++) + { + const Point2f& pt = centers[i]; + //printf("{ %g, %g }, \n", pt.x, pt.y); + circle(img, pt, 5, Scalar(0, 255, 0)); + } + for (size_t i = 0; i < result.size(); i++) + { + const Point2f& pt = result[i]; + circle(img, pt, 10, Scalar(0, 0, 255)); + } + imwrite("test_18713.png", img); + if (cvtest::debugLevel >= 10) + { + imshow("result", img); + waitKey(); + } + } +} + +TEST(Calib3d_AsymmetricCirclesPatternDetector, regression_19498) +{ + float pts_[121][2] = { + { 84.7462f, 404.504f }, { 49.1586f, 404.092f }, { 12.3362f, 403.434f }, { 102.542f, 386.214f }, { 67.6042f, 385.475f }, + { 31.4982f, 384.569f }, { 141.231f, 377.856f }, { 332.834f, 370.745f }, { 85.7663f, 367.261f }, { 50.346f, 366.051f }, + { 13.7726f, 364.663f }, { 371.746f, 362.011f }, { 68.8543f, 347.883f }, { 32.9334f, 346.263f }, { 331.926f, 343.291f }, + { 351.535f, 338.112f }, { 51.7951f, 328.247f }, { 15.4613f, 326.095f }, { 311.719f, 319.578f }, { 330.947f, 313.708f }, + { 256.706f, 307.584f }, { 34.6834f, 308.167f }, { 291.085f, 295.429f }, { 17.4316f, 287.824f }, { 252.928f, 277.92f }, + { 270.19f, 270.93f }, { 288.473f, 263.484f }, { 216.401f, 260.94f }, { 232.195f, 253.656f }, { 266.757f, 237.708f }, + { 211.323f, 229.005f }, { 227.592f, 220.498f }, { 154.749f, 188.52f }, { 222.52f, 184.906f }, { 133.85f, 163.968f }, + { 200.024f, 158.05f }, { 147.485f, 153.643f }, { 161.967f, 142.633f }, { 177.396f, 131.059f }, { 125.909f, 128.116f }, + { 139.817f, 116.333f }, { 91.8639f, 114.454f }, { 104.343f, 102.542f }, { 117.635f, 89.9116f }, { 70.9465f, 89.4619f }, + { 82.8524f, 76.7862f }, { 131.738f, 76.4741f }, { 95.5012f, 63.3351f }, { 109.034f, 49.0424f }, { 314.886f, 374.711f }, + { 351.735f, 366.489f }, { 279.113f, 357.05f }, { 313.371f, 348.131f }, { 260.123f, 335.271f }, { 276.346f, 330.325f }, + { 293.588f, 325.133f }, { 240.86f, 313.143f }, { 273.436f, 301.667f }, { 206.762f, 296.574f }, { 309.877f, 288.796f }, + { 187.46f, 274.319f }, { 201.521f, 267.804f }, { 248.973f, 245.918f }, { 181.644f, 244.655f }, { 196.025f, 237.045f }, + { 148.41f, 229.131f }, { 161.604f, 221.215f }, { 175.455f, 212.873f }, { 244.748f, 211.459f }, { 128.661f, 206.109f }, + { 190.217f, 204.108f }, { 141.346f, 197.568f }, { 205.876f, 194.781f }, { 168.937f, 178.948f }, { 121.006f, 173.714f }, + { 183.998f, 168.806f }, { 88.9095f, 159.731f }, { 100.559f, 149.867f }, { 58.553f, 146.47f }, { 112.849f, 139.302f }, + { 80.0968f, 125.74f }, { 39.24f, 123.671f }, { 154.582f, 103.85f }, { 59.7699f, 101.49f }, { 266.334f, 385.387f }, + { 234.053f, 368.718f }, { 263.347f, 361.184f }, { 244.763f, 339.958f }, { 198.16f, 328.214f }, { 211.675f, 323.407f }, + { 225.905f, 318.426f }, { 192.98f, 302.119f }, { 221.267f, 290.693f }, { 161.437f, 286.46f }, { 236.656f, 284.476f }, + { 168.023f, 251.799f }, { 105.385f, 221.988f }, { 116.724f, 214.25f }, { 97.2959f, 191.81f }, { 108.89f, 183.05f }, + { 77.9896f, 169.242f }, { 48.6763f, 156.088f }, { 68.9635f, 136.415f }, { 29.8484f, 133.886f }, { 49.1966f, 112.826f }, + { 113.059f, 29.003f }, { 251.698f, 388.562f }, { 281.689f, 381.929f }, { 297.875f, 378.518f }, { 248.376f, 365.025f }, + { 295.791f, 352.763f }, { 216.176f, 348.586f }, { 230.143f, 344.443f }, { 179.89f, 307.457f }, { 174.083f, 280.51f }, + { 142.867f, 265.085f }, { 155.127f, 258.692f }, { 124.187f, 243.661f }, { 136.01f, 236.553f }, { 86.4651f, 200.13f }, + { 67.5711f, 178.221f } + }; + + Mat candidates(121, 1, CV_32FC2, (void*)pts_); + Size patternSize(13, 8); + + std::vector< Point2f > result; + bool res = false; + + EXPECT_NO_THROW(res = findCirclesGrid(candidates, patternSize, result, CALIB_CB_SYMMETRIC_GRID, Ptr()/*blobDetector=NULL*/)); + EXPECT_FALSE(res); +} + }} // namespace /* End of file. */ diff --git a/modules/calib/test/test_cornerssubpix.cpp b/modules/calib/test/test_cornerssubpix.cpp index 05b75c5cbc..b70cc1e988 100644 --- a/modules/calib/test/test_cornerssubpix.cpp +++ b/modules/calib/test/test_cornerssubpix.cpp @@ -153,9 +153,8 @@ void CV_ChessboardSubpixelTest::run( int ) vector test_corners; bool result = findChessboardCorners(chessboard_image, pattern_size, test_corners, 15); - if(!result) + if (!result && cvtest::debugLevel > 0) { -#if 0 ts->printf(cvtest::TS::LOG, "Warning: chessboard was not detected! Writing image to test.png\n"); ts->printf(cvtest::TS::LOG, "Size = %d, %d\n", pattern_size.width, pattern_size.height); ts->printf(cvtest::TS::LOG, "Intrinsic params: fx = %f, fy = %f, cx = %f, cy = %f\n", @@ -167,7 +166,9 @@ void CV_ChessboardSubpixelTest::run( int ) distortion_coeffs_.at(0, 4)); imwrite("test.png", chessboard_image); -#endif + } + if (!result) + { continue; } diff --git a/modules/calib/test/test_fisheye.cpp b/modules/calib/test/test_fisheye.cpp index 5acc5cafaa..310804d233 100644 --- a/modules/calib/test/test_fisheye.cpp +++ b/modules/calib/test/test_fisheye.cpp @@ -345,7 +345,7 @@ TEST_F(fisheyeTest, Calibration) std::vector > imagePoints(n_images); std::vector > objectPoints(n_images); - const std::string folder =combine(datasets_repository_path, "calib-3_stereo_from_JY"); + const std::string folder = combine(datasets_repository_path, "calib-3_stereo_from_JY"); cv::FileStorage fs_left(combine(folder, "left.xml"), cv::FileStorage::READ); CV_Assert(fs_left.isOpened()); for(int i = 0; i < n_images; ++i) @@ -373,6 +373,53 @@ TEST_F(fisheyeTest, Calibration) EXPECT_MAT_NEAR(theD, this->D, 1e-10); } +TEST_F(fisheyeTest, CalibrationWithFixedFocalLength) +{ + const int n_images = 34; + + std::vector > imagePoints(n_images); + std::vector > objectPoints(n_images); + + const std::string folder =combine(datasets_repository_path, "calib-3_stereo_from_JY"); + cv::FileStorage fs_left(combine(folder, "left.xml"), cv::FileStorage::READ); + CV_Assert(fs_left.isOpened()); + for(int i = 0; i < n_images; ++i) + fs_left[cv::format("image_%d", i )] >> imagePoints[i]; + fs_left.release(); + + cv::FileStorage fs_object(combine(folder, "object.xml"), cv::FileStorage::READ); + CV_Assert(fs_object.isOpened()); + for(int i = 0; i < n_images; ++i) + fs_object[cv::format("image_%d", i )] >> objectPoints[i]; + fs_object.release(); + + int flag = 0; + flag |= cv::fisheye::CALIB_RECOMPUTE_EXTRINSIC; + flag |= cv::fisheye::CALIB_CHECK_COND; + flag |= cv::fisheye::CALIB_FIX_SKEW; + flag |= cv::fisheye::CALIB_FIX_FOCAL_LENGTH; + flag |= cv::fisheye::CALIB_USE_INTRINSIC_GUESS; + + cv::Matx33d theK = this->K; + const cv::Matx33d newK( + 558.478088, 0.000000, 620.458461, + 0.000000, 560.506767, 381.939362, + 0.000000, 0.000000, 1.000000); + + cv::Vec4d theD; + const cv::Vec4d newD(-0.001461, -0.003298, 0.006057, -0.003742); + + cv::fisheye::calibrate(objectPoints, imagePoints, imageSize, theK, theD, + cv::noArray(), cv::noArray(), flag, cv::TermCriteria(3, 20, 1e-6)); + + // ensure that CALIB_FIX_FOCAL_LENGTH works and focal lenght has not changed + EXPECT_EQ(theK(0,0), K(0,0)); + EXPECT_EQ(theK(1,1), K(1,1)); + + EXPECT_MAT_NEAR(theK, newK, 1e-6); + EXPECT_MAT_NEAR(theD, newD, 1e-6); +} + TEST_F(fisheyeTest, Homography) { const int n_images = 1; @@ -380,7 +427,7 @@ TEST_F(fisheyeTest, Homography) std::vector > imagePoints(n_images); std::vector > objectPoints(n_images); - const std::string folder =combine(datasets_repository_path, "calib-3_stereo_from_JY"); + const std::string folder = combine(datasets_repository_path, "calib-3_stereo_from_JY"); cv::FileStorage fs_left(combine(folder, "left.xml"), cv::FileStorage::READ); CV_Assert(fs_left.isOpened()); for(int i = 0; i < n_images; ++i) @@ -492,7 +539,13 @@ TEST_F(fisheyeTest, EstimateUncertainties) TEST_F(fisheyeTest, stereoRectify) { - const std::string folder =combine(datasets_repository_path, "calib-3_stereo_from_JY"); + // For consistency purposes + CV_StaticAssert( + static_cast(cv::CALIB_ZERO_DISPARITY) == static_cast(cv::fisheye::CALIB_ZERO_DISPARITY), + "For the purpose of continuity the following should be true: cv::CALIB_ZERO_DISPARITY == cv::fisheye::CALIB_ZERO_DISPARITY" + ); + + const std::string folder = combine(datasets_repository_path, "calib-3_stereo_from_JY"); cv::Size calibration_size = this->imageSize, requested_size = calibration_size; cv::Matx33d K1 = this->K, K2 = K1; @@ -504,7 +557,7 @@ TEST_F(fisheyeTest, stereoRectify) double balance = 0.0, fov_scale = 1.1; cv::Mat R1, R2, P1, P2, Q; cv::fisheye::stereoRectify(K1, D1, K2, D2, calibration_size, theR, theT, R1, R2, P1, P2, Q, - cv::CALIB_ZERO_DISPARITY, requested_size, balance, fov_scale); + cv::fisheye::CALIB_ZERO_DISPARITY, requested_size, balance, fov_scale); // Collected with these CMake flags: -DWITH_IPP=OFF -DCV_ENABLE_INTRINSICS=OFF -DCV_DISABLE_OPTIMIZATION=ON -DCMAKE_BUILD_TYPE=Debug cv::Matx33d R1_ref( @@ -551,7 +604,10 @@ TEST_F(fisheyeTest, stereoRectify) << "Q =" << std::endl << Q << std::endl; } -#if 1 // Debug code + if (cvtest::debugLevel == 0) + return; + // DEBUG code is below + cv::Mat lmapx, lmapy, rmapx, rmapy; //rewrite for fisheye cv::fisheye::initUndistortRectifyMap(K1, D1, R1, P1, requested_size, CV_32F, lmapx, lmapy); @@ -584,14 +640,13 @@ TEST_F(fisheyeTest, stereoRectify) cv::imwrite(cv::format("fisheye_rectification_AB_%03d.png", i), rectification); } -#endif } TEST_F(fisheyeTest, stereoCalibrate) { const int n_images = 34; - const std::string folder =combine(datasets_repository_path, "calib-3_stereo_from_JY"); + const std::string folder = combine(datasets_repository_path, "calib-3_stereo_from_JY"); std::vector > leftPoints(n_images); std::vector > rightPoints(n_images); @@ -658,7 +713,7 @@ TEST_F(fisheyeTest, stereoCalibrateFixIntrinsic) { const int n_images = 34; - const std::string folder =combine(datasets_repository_path, "calib-3_stereo_from_JY"); + const std::string folder = combine(datasets_repository_path, "calib-3_stereo_from_JY"); std::vector > leftPoints(n_images); std::vector > rightPoints(n_images); @@ -814,6 +869,7 @@ const cv::Matx33d fisheyeTest::K(558.478087865323, 0, 620.45851536 const cv::Vec4d fisheyeTest::D(-0.0014613319981768, -0.00329861110580401, 0.00605760088590183, -0.00374209380722371); + const cv::Matx33d fisheyeTest::R ( 9.9756700084424932e-01, 6.9698277640183867e-02, 1.4929569991321144e-03, -6.9711825162322980e-02, 9.9748249845531767e-01, 1.2997180766418455e-02, -5.8331736398316541e-04,-1.3069635393884985e-02, 9.9991441852366736e-01); diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 73e7f1d7bb..b2797ab31f 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -17,6 +17,18 @@ ocv_add_dispatched_file_force_all(test_intrin128 TEST SSE2 SSE3 SSSE3 SSE4_1 SSE ocv_add_dispatched_file_force_all(test_intrin256 TEST AVX2 AVX512_SKX) ocv_add_dispatched_file_force_all(test_intrin512 TEST AVX512_SKX) + +set(PARALLEL_ENABLE_PLUGINS_DEFAULT ON) +if(EMSCRIPTEN OR IOS OR WINRT) + set(PARALLEL_ENABLE_PLUGINS_DEFAULT OFF) +endif() +# parallel backends configuration +set(PARALLEL_ENABLE_PLUGINS "${PARALLEL_ENABLE_PLUGINS_DEFAULT}" CACHE BOOL "Allow building parallel plugin support") +# TODO building plugins with OpenCV is not supported yet +#set(PARALLEL_PLUGIN_LIST "" CACHE STRING "List of parallel backends to be compiled as plugins (tbb, openmp or special value 'all')") +#string(REPLACE "," ";" PARALLEL_PLUGIN_LIST "${PARALLEL_PLUGIN_LIST}") # support comma-separated list (,) too + + ocv_add_module(core OPTIONAL opencv_cudev WRAP java objc python js) @@ -58,10 +70,15 @@ file(GLOB_RECURSE module_opencl_hdrs source_group("Include\\Cuda Headers" FILES ${lib_cuda_hdrs}) source_group("Include\\Cuda Headers\\Detail" FILES ${lib_cuda_hdrs_detail}) +file(GLOB_RECURSE core_parallel_hdrs + "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/parallel/*.hpp" + "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/parallel/*.h") +ocv_source_group("Include" DIRBASE "${CMAKE_CURRENT_LIST_DIR}/include" FILES ${core_parallel_hdrs}) + source_group("Src" FILES "${OPENCV_MODULE_opencv_core_BINARY_DIR}/version_string.inc") ocv_glob_module_sources(SOURCES "${OPENCV_MODULE_opencv_core_BINARY_DIR}/version_string.inc" - HEADERS ${module_opencl_hdrs} ${lib_cuda_hdrs} ${lib_cuda_hdrs_detail}) + HEADERS ${core_parallel_hdrs} ${module_opencl_hdrs} ${lib_cuda_hdrs} ${lib_cuda_hdrs_detail}) ocv_module_include_directories(${the_module} ${ZLIB_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS}) if(ANDROID AND HAVE_CPUFEATURES) @@ -80,24 +97,46 @@ endif() if(HAVE_MEMALIGN) ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_MEMALIGN=1") endif() +if(HAVE_WIN32_ALIGNED_MALLOC) + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.cpp "HAVE_WIN32_ALIGNED_MALLOC=1") +endif() if(HAVE_VA_INTEL_OLD_HEADER) ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp" "HAVE_VA_INTEL_OLD_HEADER") endif() +if(OPENCV_LIBVA_LINK) + ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/va_intel.cpp" "OPENCV_LIBVA_LINK=1") +endif() option(OPENCV_ENABLE_ALLOCATOR_STATS "Enable Allocator metrics" ON) if(NOT OPENCV_ENABLE_ALLOCATOR_STATS) add_definitions(-DOPENCV_DISABLE_ALLOCATOR_STATS=1) -else() +elseif(HAVE_CXX11 OR DEFINED OPENCV_ALLOCATOR_STATS_COUNTER_TYPE) if(NOT DEFINED OPENCV_ALLOCATOR_STATS_COUNTER_TYPE) if(HAVE_ATOMIC_LONG_LONG AND OPENCV_ENABLE_ATOMIC_LONG_LONG) - set(OPENCV_ALLOCATOR_STATS_COUNTER_TYPE "long long") + if(MINGW) + # command-line generation issue due to space in value, int/int64_t should be used instead + # https://github.com/opencv/opencv/issues/16990 + message(STATUS "Consider adding OPENCV_ALLOCATOR_STATS_COUNTER_TYPE=int/int64_t according to your build configuration") + else() + set(OPENCV_ALLOCATOR_STATS_COUNTER_TYPE "long long") + endif() else() set(OPENCV_ALLOCATOR_STATS_COUNTER_TYPE "int") endif() endif() - message(STATUS "Allocator metrics storage type: '${OPENCV_ALLOCATOR_STATS_COUNTER_TYPE}'") - add_definitions("-DOPENCV_ALLOCATOR_STATS_COUNTER_TYPE=${OPENCV_ALLOCATOR_STATS_COUNTER_TYPE}") + if(DEFINED OPENCV_ALLOCATOR_STATS_COUNTER_TYPE) + message(STATUS "Allocator metrics storage type: '${OPENCV_ALLOCATOR_STATS_COUNTER_TYPE}'") + add_definitions("-DOPENCV_ALLOCATOR_STATS_COUNTER_TYPE=${OPENCV_ALLOCATOR_STATS_COUNTER_TYPE}") + endif() +endif() + + +if(PARALLEL_ENABLE_PLUGINS) + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/parallel/parallel.cpp "PARALLEL_ENABLE_PLUGINS=1") + if(OPENCV_DEBUG_POSTFIX) + ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/parallel/parallel.cpp" "DEBUG_POSTFIX=${OPENCV_DEBUG_POSTFIX}") + endif() endif() @@ -110,6 +149,10 @@ ocv_target_link_libraries(${the_module} PRIVATE "${OPENCV_HAL_LINKER_LIBS}" ) +if(OPENCV_CORE_EXCLUDE_C_API) + ocv_target_compile_definitions(${the_module} PRIVATE "OPENCV_EXCLUDE_C_API=1") +endif() + if(HAVE_HPX) ocv_target_link_libraries(${the_module} LINK_PRIVATE "${HPX_LIBRARIES}") endif() diff --git a/modules/core/cmake/parallel/detect_openmp.cmake b/modules/core/cmake/parallel/detect_openmp.cmake new file mode 100644 index 0000000000..39c050c78d --- /dev/null +++ b/modules/core/cmake/parallel/detect_openmp.cmake @@ -0,0 +1,13 @@ +if(CMAKE_VERSION VERSION_LESS "3.9") + message(STATUS "OpenMP detection requires CMake 3.9+") # OpenMP::OpenMP_CXX target +endif() + +find_package(OpenMP) +if(OpenMP_FOUND) + if(TARGET OpenMP::OpenMP_CXX) + set(HAVE_OPENMP 1) + ocv_add_external_target(openmp "" "OpenMP::OpenMP_CXX" "HAVE_OPENMP=1") + else() + message(WARNING "OpenMP: missing OpenMP::OpenMP_CXX target") + endif() +endif() diff --git a/modules/core/cmake/parallel/detect_tbb.cmake b/modules/core/cmake/parallel/detect_tbb.cmake new file mode 100644 index 0000000000..93059f8f67 --- /dev/null +++ b/modules/core/cmake/parallel/detect_tbb.cmake @@ -0,0 +1,5 @@ +include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectTBB.cmake") + +if(HAVE_TBB) + ocv_add_external_target(tbb "" "tbb" "HAVE_TBB=1") +endif() diff --git a/modules/core/cmake/parallel/init.cmake b/modules/core/cmake/parallel/init.cmake new file mode 100644 index 0000000000..c1bbe99f18 --- /dev/null +++ b/modules/core/cmake/parallel/init.cmake @@ -0,0 +1,8 @@ +macro(ocv_add_core_parallel_backend backend_id cond_var) + if(${cond_var}) + include("${CMAKE_CURRENT_LIST_DIR}/detect_${backend_id}.cmake") + endif() +endmacro() + +ocv_add_core_parallel_backend("tbb" WITH_TBB) +ocv_add_core_parallel_backend("openmp" WITH_OPENMP) diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 50af505968..48023844a9 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -50,7 +50,6 @@ #endif #include "opencv2/core/cvdef.h" -#include "opencv2/core/version.hpp" #include "opencv2/core/base.hpp" #include "opencv2/core/cvstd.hpp" #include "opencv2/core/traits.hpp" @@ -97,6 +96,10 @@ @} @defgroup core_lowlevel_api Low-level API for external libraries / plugins @} + @defgroup core_parallel Parallel Processing + @{ + @defgroup core_parallel_backend Parallel backends API + @} @} */ diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index a3a3e51e04..21a61a4e53 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -538,6 +538,16 @@ _AccTp normInf(const _Tp* a, const _Tp* b, int n) */ CV_EXPORTS_W float cubeRoot(float val); +/** @overload + +cubeRoot with argument of `double` type calls `std::cbrt(double)` +*/ +static inline +double cubeRoot(double val) +{ + return std::cbrt(val); +} + /** @brief Calculates the angle of a 2D vector in degrees. The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured diff --git a/modules/core/include/opencv2/core/bindings_utils.hpp b/modules/core/include/opencv2/core/bindings_utils.hpp index f693dc8c65..cf8bcdd622 100644 --- a/modules/core/include/opencv2/core/bindings_utils.hpp +++ b/modules/core/include/opencv2/core/bindings_utils.hpp @@ -7,6 +7,9 @@ #include #include +#include + +#include namespace cv { namespace utils { //! @addtogroup core_utils @@ -58,6 +61,67 @@ String dumpCString(const char* argument) return cv::format("String: %s", argument); } +CV_WRAP static inline +String dumpString(const String& argument) +{ + return cv::format("String: %s", argument.c_str()); +} + +CV_WRAP static inline +String testOverloadResolution(int value, const Point& point = Point(42, 24)) +{ + return format("overload (int=%d, point=(x=%d, y=%d))", value, point.x, + point.y); +} + +CV_WRAP static inline +String testOverloadResolution(const Rect& rect) +{ + return format("overload (rect=(x=%d, y=%d, w=%d, h=%d))", rect.x, rect.y, + rect.width, rect.height); +} + +CV_WRAP static inline +String dumpRect(const Rect& argument) +{ + return format("rect: (x=%d, y=%d, w=%d, h=%d)", argument.x, argument.y, + argument.width, argument.height); +} + +CV_WRAP static inline +String dumpTermCriteria(const TermCriteria& argument) +{ + return format("term_criteria: (type=%d, max_count=%d, epsilon=%lf", + argument.type, argument.maxCount, argument.epsilon); +} + +CV_WRAP static inline +String dumpRotatedRect(const RotatedRect& argument) +{ + return format("rotated_rect: (c_x=%f, c_y=%f, w=%f, h=%f, a=%f)", + argument.center.x, argument.center.y, argument.size.width, + argument.size.height, argument.angle); +} + +CV_WRAP static inline +String dumpRange(const Range& argument) +{ + if (argument == Range::all()) + { + return "range: all"; + } + else + { + return format("range: (s=%d, e=%d)", argument.start, argument.end); + } +} + +CV_WRAP static inline +void testRaiseGeneralException() +{ + throw std::runtime_error("exception text"); +} + CV_WRAP static inline AsyncArray testAsyncArray(InputArray argument) { @@ -81,7 +145,30 @@ AsyncArray testAsyncException() return p.getArrayResult(); } -//! @} -}} // namespace +namespace fs { + CV_EXPORTS_W cv::String getCacheDirectoryForDownloads(); +} // namespace fs + +//! @} // core_utils +} // namespace cv::utils + +//! @cond IGNORED + +CV_WRAP static inline +int setLogLevel(int level) +{ + // NB: Binding generators doesn't work with enums properly yet, so we define separate overload here + return cv::utils::logging::setLogLevel((cv::utils::logging::LogLevel)level); +} + +CV_WRAP static inline +int getLogLevel() +{ + return cv::utils::logging::getLogLevel(); +} + +//! @endcond IGNORED + +} // namespaces cv / utils #endif // OPENCV_CORE_BINDINGS_UTILS_HPP diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp index 5fa09682e3..716b8bf2a8 100644 --- a/modules/core/include/opencv2/core/cuda.hpp +++ b/modules/core/include/opencv2/core/cuda.hpp @@ -340,6 +340,209 @@ public: Allocator* allocator; }; +struct CV_EXPORTS_W GpuData +{ + explicit GpuData(size_t _size); + ~GpuData(); + + GpuData(const GpuData&) = delete; + GpuData& operator=(const GpuData&) = delete; + + GpuData(GpuData&&) = delete; + GpuData& operator=(GpuData&&) = delete; + + uchar* data; + size_t size; +}; + +class CV_EXPORTS_W GpuMatND +{ +public: + using SizeArray = std::vector; + using StepArray = std::vector; + using IndexArray = std::vector; + + //! destructor + ~GpuMatND(); + + //! default constructor + GpuMatND(); + + /** @overload + @param size Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + */ + GpuMatND(SizeArray size, int type); + + /** @overload + @param size Array of integers specifying an n-dimensional array shape. + @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or + CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices. + @param data Pointer to the user data. Matrix constructors that take data and step parameters do not + allocate matrix data. Instead, they just initialize the matrix header that points to the specified + data, which means that no data is copied. This operation is very efficient and can be used to + process external data using OpenCV functions. The external data is not automatically deallocated, so + you should take care of it. + @param step Array of _size.size()-1 steps in case of a multi-dimensional array (the last step is always + set to the element size). If not specified, the matrix is assumed to be continuous. + */ + GpuMatND(SizeArray size, int type, void* data, StepArray step = StepArray()); + + /** @brief Allocates GPU memory. + Suppose there is some GPU memory already allocated. In that case, this method may choose to reuse that + GPU memory under the specific condition: it must be of the same size and type, not externally allocated, + the GPU memory is continuous(i.e., isContinuous() is true), and is not a sub-matrix of another GpuMatND + (i.e., isSubmatrix() is false). In other words, this method guarantees that the GPU memory allocated by + this method is always continuous and is not a sub-region of another GpuMatND. + */ + void create(SizeArray size, int type); + + void release(); + + void swap(GpuMatND& m) noexcept; + + /** @brief Creates a full copy of the array and the underlying data. + The method creates a full copy of the array. It mimics the behavior of Mat::clone(), i.e. + the original step is not taken into account. So, the array copy is a continuous array + occupying total()\*elemSize() bytes. + */ + GpuMatND clone() const; + + /** @overload + This overload is non-blocking, so it may return even if the copy operation is not finished. + */ + GpuMatND clone(Stream& stream) const; + + /** @brief Extracts a sub-matrix. + The operator makes a new header for the specified sub-array of \*this. + The operator is an O(1) operation, that is, no matrix data is copied. + @param ranges Array of selected ranges along each dimension. + */ + GpuMatND operator()(const std::vector& ranges) const; + + /** @brief Creates a GpuMat header for a 2D plane part of an n-dim matrix. + @note The returned GpuMat is constructed with the constructor for user-allocated data. + That is, It does not perform reference counting. + @note This function does not increment this GpuMatND's reference counter. + */ + GpuMat createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const; + + /** @overload + Creates a GpuMat header if this GpuMatND is effectively 2D. + @note The returned GpuMat is constructed with the constructor for user-allocated data. + That is, It does not perform reference counting. + @note This function does not increment this GpuMatND's reference counter. + */ + GpuMat createGpuMatHeader() const; + + /** @brief Extracts a 2D plane part of an n-dim matrix. + It differs from createGpuMatHeader(IndexArray, Range, Range) in that it clones a part of this + GpuMatND to the returned GpuMat. + @note This operator does not increment this GpuMatND's reference counter; + */ + GpuMat operator()(IndexArray idx, Range rowRange, Range colRange) const; + + /** @brief Extracts a 2D plane part of an n-dim matrix if this GpuMatND is effectively 2D. + It differs from createGpuMatHeader() in that it clones a part of this GpuMatND. + @note This operator does not increment this GpuMatND's reference counter; + */ + operator GpuMat() const; + + GpuMatND(const GpuMatND&) = default; + GpuMatND& operator=(const GpuMatND&) = default; + +#if defined(__GNUC__) && __GNUC__ < 5 + // error: function '...' defaulted on its first declaration with an exception-specification + // that differs from the implicit declaration '...' + + GpuMatND(GpuMatND&&) = default; + GpuMatND& operator=(GpuMatND&&) = default; +#else + GpuMatND(GpuMatND&&) noexcept = default; + GpuMatND& operator=(GpuMatND&&) noexcept = default; +#endif + + void upload(InputArray src); + void upload(InputArray src, Stream& stream); + void download(OutputArray dst) const; + void download(OutputArray dst, Stream& stream) const; + + //! returns true iff the GpuMatND data is continuous + //! (i.e. when there are no gaps between successive rows) + bool isContinuous() const; + + //! returns true if the matrix is a sub-matrix of another matrix + bool isSubmatrix() const; + + //! returns element size in bytes + size_t elemSize() const; + + //! returns the size of element channel in bytes + size_t elemSize1() const; + + //! returns true if data is null + bool empty() const; + + //! returns true if not empty and points to external(user-allocated) gpu memory + bool external() const; + + //! returns pointer to the first byte of the GPU memory + uchar* getDevicePtr() const; + + //! returns the total number of array elements + size_t total() const; + + //! returns the size of underlying memory in bytes + size_t totalMemSize() const; + + //! returns element type + int type() const; + +private: + //! internal use + void setFields(SizeArray size, int type, StepArray step = StepArray()); + +public: + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + + //! matrix dimensionality + int dims; + + //! shape of this array + SizeArray size; + + /*! step values + Their semantics is identical to the semantics of step for Mat. + */ + StepArray step; + +private: + /*! internal use + If this GpuMatND holds external memory, this is empty. + */ + std::shared_ptr data_; + + /*! internal use + If this GpuMatND manages memory with reference counting, this value is + always equal to data_->data. If this GpuMatND holds external memory, + data_ is empty and data points to the external memory. + */ + uchar* data; + + /*! internal use + If this GpuMatND is a sub-matrix of a larger matrix, this value is the + difference of the first byte between the sub-matrix and the whole matrix. + */ + size_t offset; +}; + /** @brief Creates a continuous matrix. @param rows Row count. @@ -656,6 +859,18 @@ public: //! creates a new asynchronous stream with custom allocator CV_WRAP Stream(const Ptr& allocator); + /** @brief creates a new Stream using the cudaFlags argument to determine the behaviors of the stream + + @note The cudaFlags parameter is passed to the underlying api cudaStreamCreateWithFlags() and + supports the same parameter values. + @code + // creates an OpenCV cuda::Stream that manages an asynchronous, non-blocking, + // non-default CUDA stream + cv::cuda::Stream cvStream(cudaStreamNonBlocking); + @endcode + */ + CV_WRAP Stream(const size_t cudaFlags); + /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false. */ CV_WRAP bool queryIfComplete() const; diff --git a/modules/core/include/opencv2/core/cuda.inl.hpp b/modules/core/include/opencv2/core/cuda.inl.hpp index 30fc0aee22..3f2a0c7240 100644 --- a/modules/core/include/opencv2/core/cuda.inl.hpp +++ b/modules/core/include/opencv2/core/cuda.inl.hpp @@ -383,6 +383,92 @@ void swap(GpuMat& a, GpuMat& b) a.swap(b); } +//=================================================================================== +// GpuMatND +//=================================================================================== + +inline +GpuMatND::GpuMatND() : + flags(0), dims(0), data(nullptr), offset(0) +{ +} + +inline +GpuMatND::GpuMatND(SizeArray _size, int _type) : + flags(0), dims(0), data(nullptr), offset(0) +{ + create(std::move(_size), _type); +} + +inline +void GpuMatND::swap(GpuMatND& m) noexcept +{ + std::swap(*this, m); +} + +inline +bool GpuMatND::isContinuous() const +{ + return (flags & Mat::CONTINUOUS_FLAG) != 0; +} + +inline +bool GpuMatND::isSubmatrix() const +{ + return (flags & Mat::SUBMATRIX_FLAG) != 0; +} + +inline +size_t GpuMatND::elemSize() const +{ + return CV_ELEM_SIZE(flags); +} + +inline +size_t GpuMatND::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +inline +bool GpuMatND::empty() const +{ + return data == nullptr; +} + +inline +bool GpuMatND::external() const +{ + return !empty() && data_.use_count() == 0; +} + +inline +uchar* GpuMatND::getDevicePtr() const +{ + return data + offset; +} + +inline +size_t GpuMatND::total() const +{ + size_t p = 1; + for(auto s : size) + p *= s; + return p; +} + +inline +size_t GpuMatND::totalMemSize() const +{ + return size[0] * step[0]; +} + +inline +int GpuMatND::type() const +{ + return CV_MAT_TYPE(flags); +} + //=================================================================================== // HostMem //=================================================================================== diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h index ef2b31ac18..fe15e51e4e 100644 --- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -170,6 +170,7 @@ #if defined CV_CPU_COMPILE_RVV # define CV_RVV 1 +# include #endif #endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__ diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index 6af58b6205..6a55995fc9 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -45,6 +45,8 @@ #ifndef OPENCV_CORE_CVDEF_H #define OPENCV_CORE_CVDEF_H +#include "opencv2/core/version.hpp" + //! @addtogroup core_utils //! @{ @@ -388,7 +390,9 @@ typedef union Cv64suf } Cv64suf; +#ifndef OPENCV_ABI_COMPATIBILITY #define OPENCV_ABI_COMPATIBILITY 400 +#endif #ifdef __OPENCV_BUILD # define DISABLE_OPENCV_3_COMPATIBILITY diff --git a/modules/core/include/opencv2/core/dualquaternion.hpp b/modules/core/include/opencv2/core/dualquaternion.hpp new file mode 100644 index 0000000000..1f644e9dc8 --- /dev/null +++ b/modules/core/include/opencv2/core/dualquaternion.hpp @@ -0,0 +1,979 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved. +// Third party copyrights are property of their respective owners. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Liangqian Kong +// Longbu Wang +#ifndef OPENCV_CORE_DUALQUATERNION_HPP +#define OPENCV_CORE_DUALQUATERNION_HPP + +#include +#include + +namespace cv{ +//! @addtogroup core +//! @{ + +template class DualQuat; +template std::ostream& operator<<(std::ostream&, const DualQuat<_Tp>&); + +/** + * Dual quaternions were introduced to describe rotation together with translation while ordinary + * quaternions can only describe rotation. It can be used for shortest path pose interpolation, + * local pose optimization or volumetric deformation. More details can be found + * - https://en.wikipedia.org/wiki/Dual_quaternion + * - ["A beginners guide to dual-quaternions: what they are, how they work, and how to use them for 3D character hierarchies", Ben Kenwright, 2012](https://borodust.org/public/shared/beginner_dual_quats.pdf) + * - ["Dual Quaternions", Yan-Bin Jia, 2013](http://web.cs.iastate.edu/~cs577/handouts/dual-quaternion.pdf) + * - ["Geometric Skinning with Approximate Dual Quaternion Blending", Kavan, 2008](https://www.cs.utah.edu/~ladislav/kavan08geometric/kavan08geometric) + * - http://rodolphe-vaillant.fr/?e=29 + * + * A unit dual quaternion can be classically represented as: + * \f[ + * \begin{equation} + * \begin{split} + * \sigma &= \left(r+\frac{\epsilon}{2}tr\right)\\ + * &= [w, x, y, z, w\_, x\_, y\_, z\_] + * \end{split} + * \end{equation} + * \f] + * where \f$r, t\f$ represents the rotation (ordinary unit quaternion) and translation (pure ordinary quaternion) respectively. + * + * A general dual quaternions which consist of two quaternions is usually represented in form of: + * \f[ + * \sigma = p + \epsilon q + * \f] + * where the introduced dual unit \f$\epsilon\f$ satisfies \f$\epsilon^2 = \epsilon^3 =...=0\f$, and \f$p, q\f$ are quaternions. + * + * Alternatively, dual quaternions can also be interpreted as four components which are all [dual numbers](https://www.cs.utah.edu/~ladislav/kavan08geometric/kavan08geometric): + * \f[ + * \sigma = \hat{q}_w + \hat{q}_xi + \hat{q}_yj + \hat{q}_zk + * \f] + * If we set \f$\hat{q}_x, \hat{q}_y\f$ and \f$\hat{q}_z\f$ equal to 0, a dual quaternion is transformed to a dual number. see normalize(). + * + * If you want to create a dual quaternion, you can use: + * + * ``` + * using namespace cv; + * double angle = CV_PI; + * + * // create from eight number + * DualQuatd dq1(1, 2, 3, 4, 5, 6, 7, 8); //p = [1,2,3,4]. q=[5,6,7,8] + * + * // create from Vec + * Vec v{1,2,3,4,5,6,7,8}; + * DualQuatd dq_v{v}; + * + * // create from two quaternion + * Quatd p(1, 2, 3, 4); + * Quatd q(5, 6, 7, 8); + * DualQuatd dq2 = DualQuatd::createFromQuat(p, q); + * + * // create from an angle, an axis and a translation + * Vec3d axis{0, 0, 1}; + * Vec3d trans{3, 4, 5}; + * DualQuatd dq3 = DualQuatd::createFromAngleAxisTrans(angle, axis, trans); + * + * // If you already have an instance of class Affine3, then you can use + * Affine3d R = dq3.toAffine3(); + * DualQuatd dq4 = DualQuatd::createFromAffine3(R); + * + * // or create directly by affine transformation matrix Rt + * // see createFromMat() in detail for the form of Rt + * Matx44d Rt = dq3.toMat(); + * DualQuatd dq5 = DualQuatd::createFromMat(Rt); + * + * // Any rotation + translation movement can + * // be expressed as a rotation + translation around the same line in space (expressed by Plucker + * // coords), and here's a way to represent it this way. + * Vec3d axis{1, 1, 1}; // axis will be normalized in createFromPitch + * Vec3d trans{3, 4 ,5}; + * axis = axis / std::sqrt(axis.dot(axis));// The formula for computing moment that I use below requires a normalized axis + * Vec3d moment = 1.0 / 2 * (trans.cross(axis) + axis.cross(trans.cross(axis)) * + * std::cos(rotation_angle / 2) / std::sin(rotation_angle / 2)); + * double d = trans.dot(qaxis); + * DualQuatd dq6 = DualQuatd::createFromPitch(angle, d, axis, moment); + * ``` + * + * A point \f$v=(x, y, z)\f$ in form of dual quaternion is \f$[1+\epsilon v]=[1,0,0,0,0,x,y,z]\f$. + * The transformation of a point \f$v_1\f$ to another point \f$v_2\f$ under the dual quaternion \f$\sigma\f$ is + * \f[ + * 1 + \epsilon v_2 = \sigma * (1 + \epsilon v_1) * \sigma^{\star} + * \f] + * where \f$\sigma^{\star}=p^*-\epsilon q^*.\f$ + * + * A line in the \f$Pl\ddot{u}cker\f$ coordinates \f$(\hat{l}, m)\f$ defined by the dual quaternion \f$l=\hat{l}+\epsilon m\f$. + * To transform a line, \f[l_2 = \sigma * l_1 * \sigma^*,\f] where \f$\sigma=r+\frac{\epsilon}{2}rt\f$ and + * \f$\sigma^*=p^*+\epsilon q^*\f$. + * + * To extract the Vec or Vec, see toVec(); + * + * To extract the affine transformation matrix, see toMat(); + * + * To extract the instance of Affine3, see toAffine3(); + * + * If two quaternions \f$q_0, q_1\f$ are needed to be interpolated, you can use sclerp() + * ``` + * DualQuatd::sclerp(q0, q1, t) + * ``` + * or dqblend(). + * ``` + * DualQuatd::dqblend(q0, q1, t) + * ``` + * With more than two dual quaternions to be blended, you can use generalize linear dual quaternion blending + * with the corresponding weights, i.e. gdqblend(). + * + */ +template +class CV_EXPORTS DualQuat{ + static_assert(std::is_floating_point<_Tp>::value, "Dual quaternion only make sense with type of float or double"); + using value_type = _Tp; + +public: + static constexpr _Tp CV_DUAL_QUAT_EPS = (_Tp)1.e-6; + + DualQuat(); + + /** + * @brief create from eight same type numbers. + */ + DualQuat(const _Tp w, const _Tp x, const _Tp y, const _Tp z, const _Tp w_, const _Tp x_, const _Tp y_, const _Tp z_); + + /** + * @brief create from a double or float vector. + */ + DualQuat(const Vec<_Tp, 8> &q); + + _Tp w, x, y, z, w_, x_, y_, z_; + + /** + * @brief create Dual Quaternion from two same type quaternions p and q. + * A Dual Quaternion \f$\sigma\f$ has the form: + * \f[\sigma = p + \epsilon q\f] + * where p and q are defined as follows: + * \f[\begin{equation} + * \begin{split} + * p &= w + x\boldsymbol{i} + y\boldsymbol{j} + z\boldsymbol{k}\\ + * q &= w\_ + x\_\boldsymbol{i} + y\_\boldsymbol{j} + z\_\boldsymbol{k}. + * \end{split} + * \end{equation} + * \f] + * The p and q are the real part and dual part respectively. + * @param realPart a quaternion, real part of dual quaternion. + * @param dualPart a quaternion, dual part of dual quaternion. + * @sa Quat + */ + static DualQuat<_Tp> createFromQuat(const Quat<_Tp> &realPart, const Quat<_Tp> &dualPart); + + /** + * @brief create a dual quaternion from a rotation angle \f$\theta\f$, a rotation axis + * \f$\boldsymbol{u}\f$ and a translation \f$\boldsymbol{t}\f$. + * It generates a dual quaternion \f$\sigma\f$ in the form of + * \f[\begin{equation} + * \begin{split} + * \sigma &= r + \frac{\epsilon}{2}\boldsymbol{t}r \\ + * &= [\cos(\frac{\theta}{2}), \boldsymbol{u}\sin(\frac{\theta}{2})] + * + \frac{\epsilon}{2}[0, \boldsymbol{t}][[\cos(\frac{\theta}{2}), + * \boldsymbol{u}\sin(\frac{\theta}{2})]]\\ + * &= \cos(\frac{\theta}{2}) + \boldsymbol{u}\sin(\frac{\theta}{2}) + * + \frac{\epsilon}{2}(-(\boldsymbol{t} \cdot \boldsymbol{u})\sin(\frac{\theta}{2}) + * + \boldsymbol{t}\cos(\frac{\theta}{2}) + \boldsymbol{u} \times \boldsymbol{t} \sin(\frac{\theta}{2})). + * \end{split} + * \end{equation}\f] + * @param angle rotation angle. + * @param axis rotation axis. + * @param translation a vector of length 3. + * @note Axis will be normalized in this function. And translation is applied + * after the rotation. Use @ref createFromQuat(r, r * t / 2) to create a dual quaternion + * which translation is applied before rotation. + * @sa Quat + */ + static DualQuat<_Tp> createFromAngleAxisTrans(const _Tp angle, const Vec<_Tp, 3> &axis, const Vec<_Tp, 3> &translation); + + /** + * @brief Transform this dual quaternion to an affine transformation matrix \f$M\f$. + * Dual quaternion consists of a rotation \f$r=[a,b,c,d]\f$ and a translation \f$t=[\Delta x,\Delta y,\Delta z]\f$. The + * affine transformation matrix \f$M\f$ has the form + * \f[ + * \begin{bmatrix} + * 1-2(e_2^2 +e_3^2) &2(e_1e_2-e_0e_3) &2(e_0e_2+e_1e_3) &\Delta x\\ + * 2(e_0e_3+e_1e_2) &1-2(e_1^2+e_3^2) &2(e_2e_3-e_0e_1) &\Delta y\\ + * 2(e_1e_3-e_0e_2) &2(e_0e_1+e_2e_3) &1-2(e_1^2-e_2^2) &\Delta z\\ + * 0&0&0&1 + * \end{bmatrix} + * \f] + * if A is a matrix consisting of n points to be transformed, this could be achieved by + * \f[ + * new\_A = M * A + * \f] + * where A has the form + * \f[ + * \begin{bmatrix} + * x_0& x_1& x_2&...&x_n\\ + * y_0& y_1& y_2&...&y_n\\ + * z_0& z_1& z_2&...&z_n\\ + * 1&1&1&...&1 + * \end{bmatrix} + * \f] + * where the same subscript represent the same point. The size of A should be \f$[4,n]\f$. + * and the same size for matrix new_A. + * @param _R 4x4 matrix that represents rotations and translation. + * @note Translation is applied after the rotation. Use createFromQuat(r, r * t / 2) to create + * a dual quaternion which translation is applied before rotation. + */ + static DualQuat<_Tp> createFromMat(InputArray _R); + + /** + * @brief create dual quaternion from an affine matrix. The definition of affine matrix can refer to createFromMat() + */ + static DualQuat<_Tp> createFromAffine3(const Affine3<_Tp> &R); + + /** + * @brief A dual quaternion is a vector in form of + * \f[ + * \begin{equation} + * \begin{split} + * \sigma &=\boldsymbol{p} + \epsilon \boldsymbol{q}\\ + * &= \cos\hat{\frac{\theta}{2}}+\overline{\hat{l}}\sin\frac{\hat{\theta}}{2} + * \end{split} + * \end{equation} + * \f] + * where \f$\hat{\theta}\f$ is dual angle and \f$\overline{\hat{l}}\f$ is dual axis: + * \f[ + * \hat{\theta}=\theta + \epsilon d,\\ + * \overline{\hat{l}}= \hat{l} +\epsilon m. + * \f] + * In this representation, \f$\theta\f$ is rotation angle and \f$(\hat{l},m)\f$ is the screw axis, d is the translation distance along the axis. + * + * @param angle rotation angle. + * @param d translation along the rotation axis. + * @param axis rotation axis represented by quaternion with w = 0. + * @param moment the moment of line, and it should be orthogonal to axis. + * @note Translation is applied after the rotation. Use createFromQuat(r, r * t / 2) to create + * a dual quaternion which translation is applied before rotation. + */ + static DualQuat<_Tp> createFromPitch(const _Tp angle, const _Tp d, const Vec<_Tp, 3> &axis, const Vec<_Tp, 3> &moment); + + /** + * @brief return a quaternion which represent the real part of dual quaternion. + * The definition of real part is in createFromQuat(). + * @sa createFromQuat, getDualPart + */ + Quat<_Tp> getRealPart() const; + + /** + * @brief return a quaternion which represent the dual part of dual quaternion. + * The definition of dual part is in createFromQuat(). + * @sa createFromQuat, getRealPart + */ + Quat<_Tp> getDualPart() const; + + /** + * @brief return the conjugate of a dual quaternion. + * \f[ + * \begin{equation} + * \begin{split} + * \sigma^* &= (p + \epsilon q)^* + * &= (p^* + \epsilon q^*) + * \end{split} + * \end{equation} + * \f] + * @param dq a dual quaternion. + */ + template + friend DualQuat conjugate(const DualQuat &dq); + + /** + * @brief return the conjugate of a dual quaternion. + * \f[ + * \begin{equation} + * \begin{split} + * \sigma^* &= (p + \epsilon q)^* + * &= (p^* + \epsilon q^*) + * \end{split} + * \end{equation} + * \f] + */ + DualQuat<_Tp> conjugate() const; + + /** + * @brief return the rotation in quaternion form. + */ + Quat<_Tp> getRotation(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the translation vector. + * The rotation \f$r\f$ in this dual quaternion \f$\sigma\f$ is applied before translation \f$t\f$. + * The dual quaternion \f$\sigma\f$ is defined as + * \f[\begin{equation} + * \begin{split} + * \sigma &= p + \epsilon q \\ + * &= r + \frac{\epsilon}{2}{t}r. + * \end{split} + * \end{equation}\f] + * Thus, the translation can be obtained as follows + * \f[t = 2qp^*.\f] + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion + * and this function will save some computations. + * @note This dual quaternion's translation is applied after the rotation. + */ + Vec<_Tp, 3> getTranslation(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the norm \f$||\sigma||\f$ of dual quaternion \f$\sigma = p + \epsilon q\f$. + * \f[ + * \begin{equation} + * \begin{split} + * ||\sigma|| &= \sqrt{\sigma * \sigma^*} \\ + * &= ||p|| + \epsilon \frac{p \cdot q}{||p||}. + * \end{split} + * \end{equation} + * \f] + * Generally speaking, the norm of a not unit dual + * quaternion is a dual number. For convenience, we return it in the form of a dual quaternion + * , i.e. + * \f[ ||\sigma|| = [||p||, 0, 0, 0, \frac{p \cdot q}{||p||}, 0, 0, 0].\f] + * + * @note The data type of dual number is dual quaternion. + */ + DualQuat<_Tp> norm() const; + + /** + * @brief return a normalized dual quaternion. + * A dual quaternion can be expressed as + * \f[ + * \begin{equation} + * \begin{split} + * \sigma &= p + \epsilon q\\ + * &=||\sigma||\left(r+\frac{1}{2}tr\right) + * \end{split} + * \end{equation} + * \f] + * where \f$r, t\f$ represents the rotation (ordinary quaternion) and translation (pure ordinary quaternion) respectively, + * and \f$||\sigma||\f$ is the norm of dual quaternion(a dual number). + * A dual quaternion is unit if and only if + * \f[ + * ||p||=1, p \cdot q=0 + * \f] + * where \f$\cdot\f$ means dot product. + * The process of normalization is + * \f[ + * \sigma_{u}=\frac{\sigma}{||\sigma||} + * \f] + * Next, we simply proof \f$\sigma_u\f$ is a unit dual quaternion: + * \f[ + * \renewcommand{\Im}{\operatorname{Im}} + * \begin{equation} + * \begin{split} + * \sigma_{u}=\frac{\sigma}{||\sigma||}&=\frac{p + \epsilon q}{||p||+\epsilon\frac{p\cdot q}{||p||}}\\ + * &=\frac{p}{||p||}+\epsilon\left(\frac{q}{||p||}-p\frac{p\cdot q}{||p||^3}\right)\\ + * &=\frac{p}{||p||}+\epsilon\frac{1}{||p||^2}\left(qp^{*}-p\cdot q\right)\frac{p}{||p||}\\ + * &=\frac{p}{||p||}+\epsilon\frac{1}{||p||^2}\Im(qp^*)\frac{p}{||p||}.\\ + * \end{split} + * \end{equation} + * \f] + * As expected, the real part is a rotation and dual part is a pure quaternion. + */ + DualQuat<_Tp> normalize() const; + + /** + * @brief if \f$\sigma = p + \epsilon q\f$ is a dual quaternion, p is not zero, + * the inverse dual quaternion is + * \f[\sigma^{-1} = \frac{\sigma^*}{||\sigma||^2}, \f] + * or equivalentlly, + * \f[\sigma^{-1} = p^{-1} - \epsilon p^{-1}qp^{-1}.\f] + * @param dq a dual quaternion. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion dq assume to be a unit dual quaternion + * and this function will save some computations. + */ + template + friend DualQuat inv(const DualQuat &dq, QuatAssumeType assumeUnit); + + /** + * @brief if \f$\sigma = p + \epsilon q\f$ is a dual quaternion, p is not zero, + * the inverse dual quaternion is + * \f[\sigma^{-1} = \frac{\sigma^*}{||\sigma||^2}, \f] + * or equivalentlly, + * \f[\sigma^{-1} = p^{-1} - \epsilon p^{-1}qp^{-1}.\f] + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion + * and this function will save some computations. + */ + DualQuat<_Tp> inv(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the dot product of two dual quaternion. + * @param p other dual quaternion. + */ + _Tp dot(DualQuat<_Tp> p) const; + + /** + ** @brief return the value of \f$p^t\f$ where p is a dual quaternion. + * This could be calculated as: + * \f[ + * p^t = \exp(t\ln p) + * \f] + * @param dq a dual quaternion. + * @param t index of power function. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion dq assume to be a unit dual quaternion + * and this function will save some computations. + */ + template + friend DualQuat power(const DualQuat &dq, const T t, QuatAssumeType assumeUnit); + + /** + ** @brief return the value of \f$p^t\f$ where p is a dual quaternion. + * This could be calculated as: + * \f[ + * p^t = \exp(t\ln p) + * \f] + * + * @param t index of power function. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion + * and this function will save some computations. + */ + DualQuat<_Tp> power(const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the value of \f$p^q\f$ where p and q are dual quaternions. + * This could be calculated as: + * \f[ + * p^q = \exp(q\ln p) + * \f] + * @param p a dual quaternion. + * @param q a dual quaternion. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion p assume to be a dual unit quaternion + * and this function will save some computations. + */ + template + friend DualQuat power(const DualQuat& p, const DualQuat& q, QuatAssumeType assumeUnit); + + /** + * @brief return the value of \f$p^q\f$ where p and q are dual quaternions. + * This could be calculated as: + * \f[ + * p^q = \exp(q\ln p) + * \f] + * + * @param q a dual quaternion + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a dual unit quaternion + * and this function will save some computations. + */ + DualQuat<_Tp> power(const DualQuat<_Tp>& q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief return the value of exponential function value + * @param dq a dual quaternion. + */ + template + friend DualQuat exp(const DualQuat &dq); + + /** + * @brief return the value of exponential function value + */ + DualQuat<_Tp> exp() const; + + /** + * @brief return the value of logarithm function value + * + * @param dq a dual quaternion. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion dq assume to be a unit dual quaternion + * and this function will save some computations. + */ + template + friend DualQuat log(const DualQuat &dq, QuatAssumeType assumeUnit); + + /** + * @brief return the value of logarithm function value + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion + * and this function will save some computations. + */ + DualQuat<_Tp> log(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief Transform this dual quaternion to a vector. + */ + Vec<_Tp, 8> toVec() const; + + /** + * @brief Transform this dual quaternion to a affine transformation matrix + * the form of matrix, see createFromMat(). + */ + Matx<_Tp, 4, 4> toMat(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief Transform this dual quaternion to a instance of Affine3. + */ + Affine3<_Tp> toAffine3(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + + /** + * @brief The screw linear interpolation(ScLERP) is an extension of spherical linear interpolation of dual quaternion. + * If \f$\sigma_1\f$ and \f$\sigma_2\f$ are two dual quaternions representing the initial and final pose. + * The interpolation of ScLERP function can be defined as: + * \f[ + * ScLERP(t;\sigma_1,\sigma_2) = \sigma_1 * (\sigma_1^{-1} * \sigma_2)^t, t\in[0,1] + * \f] + * + * @param q1 a dual quaternion represents a initial pose. + * @param q2 a dual quaternion represents a final pose. + * @param t interpolation parameter + * @param directChange if true, it always return the shortest path. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion + * and this function will save some computations. + * + * For example + * ``` + * double angle1 = CV_PI / 2; + * Vec3d axis{0, 0, 1}; + * Vec3d t(0, 0, 3); + * DualQuatd initial = DualQuatd::createFromAngleAxisTrans(angle1, axis, t); + * double angle2 = CV_PI; + * DualQuatd final = DualQuatd::createFromAngleAxisTrans(angle2, axis, t); + * DualQuatd inter = DualQuatd::sclerp(initial, final, 0.5); + * ``` + */ + static DualQuat<_Tp> sclerp(const DualQuat<_Tp> &q1, const DualQuat<_Tp> &q2, const _Tp t, + bool directChange=true, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + /** + * @brief The method of Dual Quaternion linear Blending(DQB) is to compute a transformation between dual quaternion + * \f$q_1\f$ and \f$q_2\f$ and can be defined as: + * \f[ + * DQB(t;{\boldsymbol{q}}_1,{\boldsymbol{q}}_2)= + * \frac{(1-t){\boldsymbol{q}}_1+t{\boldsymbol{q}}_2}{||(1-t){\boldsymbol{q}}_1+t{\boldsymbol{q}}_2||}. + * \f] + * where \f$q_1\f$ and \f$q_2\f$ are unit dual quaternions representing the input transformations. + * If you want to use DQB that works for more than two rigid transformations, see @ref gdqblend + * + * @param q1 a unit dual quaternion representing the input transformations. + * @param q2 a unit dual quaternion representing the input transformations. + * @param t parameter \f$t\in[0,1]\f$. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion + * and this function will save some computations. + * + * @sa gdqblend + */ + static DualQuat<_Tp> dqblend(const DualQuat<_Tp> &q1, const DualQuat<_Tp> &q2, const _Tp t, + QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + + /** + * @brief The generalized Dual Quaternion linear Blending works for more than two rigid transformations. + * If these transformations are expressed as unit dual quaternions \f$q_1,...,q_n\f$ with convex weights + * \f$w = (w_1,...,w_n)\f$, the generalized DQB is simply + * \f[ + * gDQB(\boldsymbol{w};{\boldsymbol{q}}_1,...,{\boldsymbol{q}}_n)=\frac{w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n} + * {||w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n||}. + * \f] + * @param dualquat vector of dual quaternions + * @param weights vector of weights, the size of weights should be the same as dualquat, and the weights should + * satisfy \f$\sum_0^n w_{i} = 1\f$ and \f$w_i>0\f$. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, these dual quaternions assume to be unit quaternions + * and this function will save some computations. + * @note the type of weights' element should be the same as the date type of dual quaternion inside the dualquat. + */ + template + static DualQuat<_Tp> gdqblend(const Vec, cn> &dualquat, InputArray weights, + QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + + /** + * @brief The generalized Dual Quaternion linear Blending works for more than two rigid transformations. + * If these transformations are expressed as unit dual quaternions \f$q_1,...,q_n\f$ with convex weights + * \f$w = (w_1,...,w_n)\f$, the generalized DQB is simply + * \f[ + * gDQB(\boldsymbol{w};{\boldsymbol{q}}_1,...,{\boldsymbol{q}}_n)=\frac{w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n} + * {||w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n||}. + * \f] + * @param dualquat The dual quaternions which have 8 channels and 1 row or 1 col. + * @param weights vector of weights, the size of weights should be the same as dualquat, and the weights should + * satisfy \f$\sum_0^n w_{i} = 1\f$ and \f$w_i>0\f$. + * @param assumeUnit if @ref QUAT_ASSUME_UNIT, these dual quaternions assume to be unit quaternions + * and this function will save some computations. + * @note the type of weights' element should be the same as the date type of dual quaternion inside the dualquat. + */ + static DualQuat<_Tp> gdqblend(InputArray dualquat, InputArray weights, + QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); + + /** + * @brief Return opposite dual quaternion \f$-p\f$ + * which satisfies \f$p + (-p) = 0.\f$ + * + * For example + * ``` + * DualQuatd q{1, 2, 3, 4, 5, 6, 7, 8}; + * std::cout << -q << std::endl; // [-1, -2, -3, -4, -5, -6, -7, -8] + * ``` + */ + DualQuat<_Tp> operator-() const; + + /** + * @brief return true if two dual quaternions p and q are nearly equal, i.e. when the absolute + * value of each \f$p_i\f$ and \f$q_i\f$ is less than CV_DUAL_QUAT_EPS. + */ + bool operator==(const DualQuat<_Tp>&) const; + + /** + * @brief Subtraction operator of two dual quaternions p and q. + * It returns a new dual quaternion that each value is the sum of \f$p_i\f$ and \f$-q_i\f$. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * std::cout << p - q << std::endl; //[-4, -4, -4, -4, 4, -4, -4, -4] + * ``` + */ + DualQuat<_Tp> operator-(const DualQuat<_Tp>&) const; + + /** + * @brief Subtraction assignment operator of two dual quaternions p and q. + * It subtracts right operand from the left operand and assign the result to left operand. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * p -= q; // equivalent to p = p - q + * std::cout << p << std::endl; //[-4, -4, -4, -4, 4, -4, -4, -4] + * + * ``` + */ + DualQuat<_Tp>& operator-=(const DualQuat<_Tp>&); + + /** + * @brief Addition operator of two dual quaternions p and q. + * It returns a new dual quaternion that each value is the sum of \f$p_i\f$ and \f$q_i\f$. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * std::cout << p + q << std::endl; //[6, 8, 10, 12, 14, 16, 18, 20] + * ``` + */ + DualQuat<_Tp> operator+(const DualQuat<_Tp>&) const; + + /** + * @brief Addition assignment operator of two dual quaternions p and q. + * It adds right operand to the left operand and assign the result to left operand. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * p += q; // equivalent to p = p + q + * std::cout << p << std::endl; //[6, 8, 10, 12, 14, 16, 18, 20] + * + * ``` + */ + DualQuat<_Tp>& operator+=(const DualQuat<_Tp>&); + + /** + * @brief Multiplication assignment operator of two quaternions. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of dual quaternion multiplication: + * The dual quaternion can be written as an ordered pair of quaternions [A, B]. Thus + * \f[ + * \begin{equation} + * \begin{split} + * p * q &= [A, B][C, D]\\ + * &=[AC, AD + BC] + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * p *= q; + * std::cout << p << std::endl; //[-60, 12, 30, 24, -216, 80, 124, 120] + * ``` + */ + DualQuat<_Tp>& operator*=(const DualQuat<_Tp>&); + + /** + * @brief Multiplication assignment operator of a quaternions and a scalar. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of dual quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z, w\_, x\_, y\_, z\_] * s\\ + * &=[w s, x s, y s, z s, w\_ \space s, x\_ \space s, y\_ \space s, z\_ \space s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double s = 2.0; + * p *= s; + * std::cout << p << std::endl; //[2, 4, 6, 8, 10, 12, 14, 16] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + DualQuat<_Tp> operator*=(const _Tp s); + + + /** + * @brief Multiplication operator of two dual quaternions q and p. + * Multiplies values on either side of the operator. + * + * Rule of dual quaternion multiplication: + * The dual quaternion can be written as an ordered pair of quaternions [A, B]. Thus + * \f[ + * \begin{equation} + * \begin{split} + * p * q &= [A, B][C, D]\\ + * &=[AC, AD + BC] + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * std::cout << p * q << std::endl; //[-60, 12, 30, 24, -216, 80, 124, 120] + * ``` + */ + DualQuat<_Tp> operator*(const DualQuat<_Tp>&) const; + + /** + * @brief Division operator of a dual quaternions and a scalar. + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of dual quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / s &= [w, x, y, z, w\_, x\_, y\_, z\_] / s\\ + * &=[w/s, x/s, y/s, z/s, w\_/s, x\_/s, y\_/s, z\_/s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double s = 2.0; + * p /= s; // equivalent to p = p / s + * std::cout << p << std::endl; //[0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4] + * ``` + * @note the type of scalar should be equal to this dual quaternion. + */ + DualQuat<_Tp> operator/(const _Tp s) const; + + /** + * @brief Division operator of two dual quaternions p and q. + * Divides left hand operand by right hand operand. + * + * Rule of dual quaternion division with a dual quaternion: + * \f[ + * \begin{equation} + * \begin{split} + * p / q &= p * q.inv()\\ + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * std::cout << p / q << std::endl; // equivalent to p * q.inv() + * ``` + */ + DualQuat<_Tp> operator/(const DualQuat<_Tp>&) const; + + /** + * @brief Division assignment operator of two dual quaternions p and q; + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of dual quaternion division with a quaternion: + * \f[ + * \begin{equation} + * \begin{split} + * p / q&= p * q.inv()\\ + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12}; + * p /= q; // equivalent to p = p * q.inv() + * std::cout << p << std::endl; + * ``` + */ + DualQuat<_Tp>& operator/=(const DualQuat<_Tp>&); + + /** + * @brief Division assignment operator of a dual quaternions and a scalar. + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of dual quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / s &= [w, x, y, z, w\_, x\_, y\_ ,z\_] / s\\ + * &=[w / s, x / s, y / s, z / s, w\_ / \space s, x\_ / \space s, y\_ / \space s, z\_ / \space s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double s = 2.0;; + * p /= s; // equivalent to p = p / s + * std::cout << p << std::endl; //[0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + Quat<_Tp>& operator/=(const _Tp s); + + /** + * @brief Addition operator of a scalar and a dual quaternions. + * Adds right hand operand from left hand operand. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double scalar = 2.0; + * std::cout << scalar + p << std::endl; //[3.0, 2, 3, 4, 5, 6, 7, 8] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + template + friend DualQuat cv::operator+(const T s, const DualQuat&); + + /** + * @brief Addition operator of a dual quaternions and a scalar. + * Adds right hand operand from left hand operand. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double scalar = 2.0; + * std::cout << p + scalar << std::endl; //[3.0, 2, 3, 4, 5, 6, 7, 8] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + template + friend DualQuat cv::operator+(const DualQuat&, const T s); + + /** + * @brief Multiplication operator of a scalar and a dual quaternions. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of dual quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z, w\_, x\_, y\_, z\_] * s\\ + * &=[w s, x s, y s, z s, w\_ \space s, x\_ \space s, y\_ \space s, z\_ \space s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double s = 2.0; + * std::cout << s * p << std::endl; //[2, 4, 6, 8, 10, 12, 14, 16] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + template + friend DualQuat cv::operator*(const T s, const DualQuat&); + + /** + * @brief Subtraction operator of a dual quaternion and a scalar. + * Subtracts right hand operand from left hand operand. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double scalar = 2.0; + * std::cout << p - scalar << std::endl; //[-1, 2, 3, 4, 5, 6, 7, 8] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + template + friend DualQuat cv::operator-(const DualQuat&, const T s); + + /** + * @brief Subtraction operator of a scalar and a dual quaternions. + * Subtracts right hand operand from left hand operand. + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double scalar = 2.0; + * std::cout << scalar - p << std::endl; //[1.0, -2, -3, -4, -5, -6, -7, -8] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + template + friend DualQuat cv::operator-(const T s, const DualQuat&); + + /** + * @brief Multiplication operator of a dual quaternions and a scalar. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of dual quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z, w\_, x\_, y\_, z\_] * s\\ + * &=[w s, x s, y s, z s, w\_ \space s, x\_ \space s, y\_ \space s, z\_ \space s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8}; + * double s = 2.0; + * std::cout << p * s << std::endl; //[2, 4, 6, 8, 10, 12, 14, 16] + * ``` + * @note the type of scalar should be equal to the dual quaternion. + */ + template + friend DualQuat cv::operator*(const DualQuat&, const T s); + + template + friend std::ostream& cv::operator<<(std::ostream&, const DualQuat&); + +}; + +using DualQuatd = DualQuat; +using DualQuatf = DualQuat; + +//! @} core +}//namespace + +#include "dualquaternion.inl.hpp" + +#endif /* OPENCV_CORE_QUATERNION_HPP */ diff --git a/modules/core/include/opencv2/core/dualquaternion.inl.hpp b/modules/core/include/opencv2/core/dualquaternion.inl.hpp new file mode 100644 index 0000000000..4aec961dd2 --- /dev/null +++ b/modules/core/include/opencv2/core/dualquaternion.inl.hpp @@ -0,0 +1,487 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved. +// Third party copyrights are property of their respective owners. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: Liangqian Kong +// Longbu Wang + +#ifndef OPENCV_CORE_DUALQUATERNION_INL_HPP +#define OPENCV_CORE_DUALQUATERNION_INL_HPP + +#ifndef OPENCV_CORE_DUALQUATERNION_HPP +#error This is not a standalone header. Include dualquaternion.hpp instead. +#endif + +/////////////////////////////////////////////////////////////////////////////////////// +//Implementation +namespace cv { + +template +DualQuat::DualQuat():w(0), x(0), y(0), z(0), w_(0), x_(0), y_(0), z_(0){}; + +template +DualQuat::DualQuat(const T vw, const T vx, const T vy, const T vz, const T _w, const T _x, const T _y, const T _z): + w(vw), x(vx), y(vy), z(vz), w_(_w), x_(_x), y_(_y), z_(_z){}; + +template +DualQuat::DualQuat(const Vec &q):w(q[0]), x(q[1]), y(q[2]), z(q[3]), + w_(q[4]), x_(q[5]), y_(q[6]), z_(q[7]){}; + +template +DualQuat DualQuat::createFromQuat(const Quat &realPart, const Quat &dualPart) +{ + T w = realPart.w; + T x = realPart.x; + T y = realPart.y; + T z = realPart.z; + T w_ = dualPart.w; + T x_ = dualPart.x; + T y_ = dualPart.y; + T z_ = dualPart.z; + return DualQuat(w, x, y, z, w_, x_, y_, z_); +} + +template +DualQuat DualQuat::createFromAngleAxisTrans(const T angle, const Vec &axis, const Vec &trans) +{ + Quat r = Quat::createFromAngleAxis(angle, axis); + Quat t{0, trans[0], trans[1], trans[2]}; + return createFromQuat(r, t * r / 2); +} + +template +DualQuat DualQuat::createFromMat(InputArray _R) +{ + CV_CheckTypeEQ(_R.type(), cv::traits::Type::value, ""); + if (_R.size() != Size(4, 4)) + { + CV_Error(Error::StsBadArg, "The input matrix must have 4 columns and 4 rows"); + } + Mat R = _R.getMat(); + Quat r = Quat::createFromRotMat(R.colRange(0, 3).rowRange(0, 3)); + Quat trans(0, R.at(0, 3), R.at(1, 3), R.at(2, 3)); + return createFromQuat(r, trans * r / 2); +} + +template +DualQuat DualQuat::createFromAffine3(const Affine3 &R) +{ + return createFromMat(R.matrix); +} + +template +DualQuat DualQuat::createFromPitch(const T angle, const T d, const Vec &axis, const Vec &moment) +{ + T half_angle = angle / 2, half_d = d / 2; + Quat qaxis = Quat(0, axis[0], axis[1], axis[2]).normalize(); + Quat qmoment = Quat(0, moment[0], moment[1], moment[2]); + qmoment -= qaxis * axis.dot(moment); + Quat dual = -half_d * std::sin(half_angle) + std::sin(half_angle) * qmoment + + half_d * std::cos(half_angle) * qaxis; + return createFromQuat(Quat::createFromAngleAxis(angle, axis), dual); +} + +template +inline bool DualQuat::operator==(const DualQuat &q) const +{ + return (abs(w - q.w) < CV_DUAL_QUAT_EPS && abs(x - q.x) < CV_DUAL_QUAT_EPS && + abs(y - q.y) < CV_DUAL_QUAT_EPS && abs(z - q.z) < CV_DUAL_QUAT_EPS && + abs(w_ - q.w_) < CV_DUAL_QUAT_EPS && abs(x_ - q.x_) < CV_DUAL_QUAT_EPS && + abs(y_ - q.y_) < CV_DUAL_QUAT_EPS && abs(z_ - q.z_) < CV_DUAL_QUAT_EPS); +} + +template +inline Quat DualQuat::getRealPart() const +{ + return Quat(w, x, y, z); +} + +template +inline Quat DualQuat::getDualPart() const +{ + return Quat(w_, x_, y_, z_); +} + +template +inline DualQuat conjugate(const DualQuat &dq) +{ + return dq.conjugate(); +} + +template +inline DualQuat DualQuat::conjugate() const +{ + return DualQuat(w, -x, -y, -z, w_, -x_, -y_, -z_); +} + +template +DualQuat DualQuat::norm() const +{ + Quat real = getRealPart(); + T realNorm = real.norm(); + Quat dual = getDualPart(); + if (realNorm < CV_DUAL_QUAT_EPS){ + return DualQuat(0, 0, 0, 0, 0, 0, 0, 0); + } + return DualQuat(realNorm, 0, 0, 0, real.dot(dual) / realNorm, 0, 0, 0); +} + +template +inline Quat DualQuat::getRotation(QuatAssumeType assumeUnit) const +{ + if (assumeUnit) + { + return getRealPart(); + } + return getRealPart().normalize(); +} + +template +inline Vec DualQuat::getTranslation(QuatAssumeType assumeUnit) const +{ + Quat trans = 2.0 * (getDualPart() * getRealPart().inv(assumeUnit)); + return Vec{trans[1], trans[2], trans[3]}; +} + +template +DualQuat DualQuat::normalize() const +{ + Quat p = getRealPart(); + Quat q = getDualPart(); + T p_norm = p.norm(); + if (p_norm < CV_DUAL_QUAT_EPS) + { + CV_Error(Error::StsBadArg, "Cannot normalize this dual quaternion: the norm is too small."); + } + Quat p_nr = p / p_norm; + Quat q_nr = q / p_norm; + return createFromQuat(p_nr, q_nr - p_nr * p_nr.dot(q_nr)); +} + +template +inline T DualQuat::dot(DualQuat q) const +{ + return q.w * w + q.x * x + q.y * y + q.z * z + q.w_ * w_ + q.x_ * x_ + q.y_ * y_ + q.z_ * z_; +} + +template +inline DualQuat inv(const DualQuat &dq, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) +{ + return dq.inv(assumeUnit); +} + +template +inline DualQuat DualQuat::inv(QuatAssumeType assumeUnit) const +{ + Quat real = getRealPart(); + Quat dual = getDualPart(); + return createFromQuat(real.inv(assumeUnit), -real.inv(assumeUnit) * dual * real.inv(assumeUnit)); +} + +template +inline DualQuat DualQuat::operator-(const DualQuat &q) const +{ + return DualQuat(w - q.w, x - q.x, y - q.y, z - q.z, w_ - q.w_, x_ - q.x_, y_ - q.y_, z_ - q.z_); +} + +template +inline DualQuat DualQuat::operator-() const +{ + return DualQuat(-w, -x, -y, -z, -w_, -x_, -y_, -z_); +} + +template +inline DualQuat DualQuat::operator+(const DualQuat &q) const +{ + return DualQuat(w + q.w, x + q.x, y + q.y, z + q.z, w_ + q.w_, x_ + q.x_, y_ + q.y_, z_ + q.z_); +} + +template +inline DualQuat& DualQuat::operator+=(const DualQuat &q) +{ + *this = *this + q; + return *this; +} + +template +inline DualQuat DualQuat::operator*(const DualQuat &q) const +{ + Quat A = getRealPart(); + Quat B = getDualPart(); + Quat C = q.getRealPart(); + Quat D = q.getDualPart(); + return DualQuat::createFromQuat(A * C, A * D + B * C); +} + +template +inline DualQuat& DualQuat::operator*=(const DualQuat &q) +{ + *this = *this * q; + return *this; +} + +template +inline DualQuat operator+(const T a, const DualQuat &q) +{ + return DualQuat(a + q.w, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_); +} + +template +inline DualQuat operator+(const DualQuat &q, const T a) +{ + return DualQuat(a + q.w, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_); +} + +template +inline DualQuat operator-(const DualQuat &q, const T a) +{ + return DualQuat(q.w - a, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_); +} + +template +inline DualQuat& DualQuat::operator-=(const DualQuat &q) +{ + *this = *this - q; + return *this; +} + +template +inline DualQuat operator-(const T a, const DualQuat &q) +{ + return DualQuat(a - q.w, -q.x, -q.y, -q.z, -q.w_, -q.x_, -q.y_, -q.z_); +} + +template +inline DualQuat operator*(const T a, const DualQuat &q) +{ + return DualQuat(q.w * a, q.x * a, q.y * a, q.z * a, q.w_ * a, q.x_ * a, q.y_ * a, q.z_ * a); +} + +template +inline DualQuat operator*(const DualQuat &q, const T a) +{ + return DualQuat(q.w * a, q.x * a, q.y * a, q.z * a, q.w_ * a, q.x_ * a, q.y_ * a, q.z_ * a); +} + +template +inline DualQuat DualQuat::operator/(const T a) const +{ + return DualQuat(w / a, x / a, y / a, z / a, w_ / a, x_ / a, y_ / a, z_ / a); +} + +template +inline DualQuat DualQuat::operator/(const DualQuat &q) const +{ + return *this * q.inv(); +} + +template +inline DualQuat& DualQuat::operator/=(const DualQuat &q) +{ + *this = *this / q; + return *this; +} + +template +std::ostream & operator<<(std::ostream &os, const DualQuat &q) +{ + os << "DualQuat " << Vec{q.w, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_}; + return os; +} + +template +inline DualQuat exp(const DualQuat &dq) +{ + return dq.exp(); +} + +namespace detail { + +template +Matx<_Tp, 4, 4> jacob_exp(const Quat<_Tp> &q) +{ + _Tp nv = std::sqrt(q.x * q.x + q.y * q.y + q.z * q.z); + _Tp sinc_nv = abs(nv) < cv::DualQuat<_Tp>::CV_DUAL_QUAT_EPS ? 1 - nv * nv / 6 : std::sin(nv) / nv; + _Tp csiii_nv = abs(nv) < cv::DualQuat<_Tp>::CV_DUAL_QUAT_EPS ? -(_Tp)1.0 / 3 : (std::cos(nv) - sinc_nv) / nv / nv; + Matx<_Tp, 4, 4> J_exp_quat { + std::cos(nv), -sinc_nv * q.x, -sinc_nv * q.y, -sinc_nv * q.z, + sinc_nv * q.x, csiii_nv * q.x * q.x + sinc_nv, csiii_nv * q.x * q.y, csiii_nv * q.x * q.z, + sinc_nv * q.y, csiii_nv * q.y * q.x, csiii_nv * q.y * q.y + sinc_nv, csiii_nv * q.y * q.z, + sinc_nv * q.z, csiii_nv * q.z * q.x, csiii_nv * q.z * q.y, csiii_nv * q.z * q.z + sinc_nv + }; + return std::exp(q.w) * J_exp_quat; +} + +} // namespace detail + +template +DualQuat DualQuat::exp() const +{ + Quat real = getRealPart(); + return createFromQuat(real.exp(), Quat(detail::jacob_exp(real) * getDualPart().toVec())); +} + +template +DualQuat log(const DualQuat &dq, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) +{ + return dq.log(assumeUnit); +} + +template +DualQuat DualQuat::log(QuatAssumeType assumeUnit) const +{ + Quat plog = getRealPart().log(assumeUnit); + Matx jacob = detail::jacob_exp(plog); + return createFromQuat(plog, Quat(jacob.inv() * getDualPart().toVec())); +} + +template +inline DualQuat power(const DualQuat &dq, const T t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) +{ + return dq.power(t, assumeUnit); +} + +template +inline DualQuat DualQuat::power(const T t, QuatAssumeType assumeUnit) const +{ + return (t * log(assumeUnit)).exp(); +} + +template +inline DualQuat power(const DualQuat &p, const DualQuat &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) +{ + return p.power(q, assumeUnit); +} + +template +inline DualQuat DualQuat::power(const DualQuat &q, QuatAssumeType assumeUnit) const +{ + return (q * log(assumeUnit)).exp(); +} + +template +inline Vec DualQuat::toVec() const +{ + return Vec(w, x, y, z, w_, x_, y_, z_); +} + +template +Affine3 DualQuat::toAffine3(QuatAssumeType assumeUnit) const +{ + return Affine3(toMat(assumeUnit)); +} + +template +Matx DualQuat::toMat(QuatAssumeType assumeUnit) const +{ + Matx rot44 = getRotation(assumeUnit).toRotMat4x4(); + Vec translation = getTranslation(assumeUnit); + rot44(0, 3) = translation[0]; + rot44(1, 3) = translation[1]; + rot44(2, 3) = translation[2]; + return rot44; +} + +template +DualQuat DualQuat::sclerp(const DualQuat &q0, const DualQuat &q1, const T t, bool directChange, QuatAssumeType assumeUnit) +{ + DualQuat v0(q0), v1(q1); + if (!assumeUnit) + { + v0 = v0.normalize(); + v1 = v1.normalize(); + } + Quat v0Real = v0.getRealPart(); + Quat v1Real = v1.getRealPart(); + if (directChange && v1Real.dot(v0Real) < 0) + { + v0 = -v0; + } + DualQuat v0inv1 = v0.inv() * v1; + return v0 * v0inv1.power(t, QUAT_ASSUME_UNIT); +} + +template +DualQuat DualQuat::dqblend(const DualQuat &q1, const DualQuat &q2, const T t, QuatAssumeType assumeUnit) +{ + DualQuat v1(q1), v2(q2); + if (!assumeUnit) + { + v1 = v1.normalize(); + v2 = v2.normalize(); + } + if (v1.getRotation(assumeUnit).dot(v2.getRotation(assumeUnit)) < 0) + { + return ((1 - t) * v1 - t * v2).normalize(); + } + return ((1 - t) * v1 + t * v2).normalize(); +} + +template +DualQuat DualQuat::gdqblend(InputArray _dualquat, InputArray _weight, QuatAssumeType assumeUnit) +{ + CV_CheckTypeEQ(_weight.type(), cv::traits::Type::value, ""); + CV_CheckTypeEQ(_dualquat.type(), CV_MAKETYPE(CV_MAT_DEPTH(cv::traits::Type::value), 8), ""); + Size dq_s = _dualquat.size(); + if (dq_s != _weight.size() || (dq_s.height != 1 && dq_s.width != 1)) + { + CV_Error(Error::StsBadArg, "The size of weight must be the same as dualquat, both of them should be (1, n) or (n, 1)"); + } + Mat dualquat = _dualquat.getMat(), weight = _weight.getMat(); + const int cn = std::max(dq_s.width, dq_s.height); + if (!assumeUnit) + { + for (int i = 0; i < cn; ++i) + { + dualquat.at>(i) = DualQuat{dualquat.at>(i)}.normalize().toVec(); + } + } + Vec dq_blend = dualquat.at>(0) * weight.at(0); + Quat q0 = DualQuat {dualquat.at>(0)}.getRotation(assumeUnit); + for (int i = 1; i < cn; ++i) + { + T k = q0.dot(DualQuat{dualquat.at>(i)}.getRotation(assumeUnit)) < 0 ? -1: 1; + dq_blend = dq_blend + dualquat.at>(i) * k * weight.at(i); + } + return DualQuat{dq_blend}.normalize(); +} + +template +template +DualQuat DualQuat::gdqblend(const Vec, cn> &_dualquat, InputArray _weight, QuatAssumeType assumeUnit) +{ + Vec, cn> dualquat(_dualquat); + if (cn == 0) + { + return DualQuat(1, 0, 0, 0, 0, 0, 0, 0); + } + Mat dualquat_mat(cn, 1, CV_64FC(8)); + for (int i = 0; i < cn ; ++i) + { + dualquat_mat.at>(i) = dualquat[i].toVec(); + } + return gdqblend(dualquat_mat, _weight, assumeUnit); +} + +} //namespace cv + +#endif /*OPENCV_CORE_DUALQUATERNION_INL_HPP*/ diff --git a/modules/core/include/opencv2/core/fast_math.hpp b/modules/core/include/opencv2/core/fast_math.hpp index 0f53cf5c1b..eb4fbe213b 100644 --- a/modules/core/include/opencv2/core/fast_math.hpp +++ b/modules/core/include/opencv2/core/fast_math.hpp @@ -76,6 +76,9 @@ #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \ && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H) #include + #undef vector + #undef bool + #undef pixel #endif #if defined(CV_INLINE_ROUND_FLT) diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 0ffcb49cea..6f5b8e1788 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -104,7 +104,7 @@ template struct V_TypeTraits { }; -#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_, nlanes128_) \ +#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_) \ template<> struct V_TypeTraits \ { \ typedef type value_type; \ @@ -114,7 +114,6 @@ template struct V_TypeTraits typedef w_type_ w_type; \ typedef q_type_ q_type; \ typedef sum_type_ sum_type; \ - enum { nlanes128 = nlanes128_ }; \ \ static inline int_type reinterpret_int(type x) \ { \ @@ -131,7 +130,7 @@ template struct V_TypeTraits } \ } -#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_, nlanes128_) \ +#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_) \ template<> struct V_TypeTraits \ { \ typedef type value_type; \ @@ -140,7 +139,6 @@ template struct V_TypeTraits typedef uint_type_ uint_type; \ typedef w_type_ w_type; \ typedef sum_type_ sum_type; \ - enum { nlanes128 = nlanes128_ }; \ \ static inline int_type reinterpret_int(type x) \ { \ @@ -157,16 +155,16 @@ template struct V_TypeTraits } \ } -CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned, 16); -CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int, 16); -CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned, 8); -CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int, 8); -CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned, 4); -CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int, 4); -CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float, 4); -CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64, 2); -CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64, 2); -CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double, 2); +CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned); +CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int); +CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned); +CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64); +CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double); #ifndef CV_DOXYGEN @@ -202,7 +200,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE; # undef CV_RVV #endif -#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD) && !defined(CV_FORCE_SIMD128_CPP) +#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV) && !defined(CV_FORCE_SIMD128_CPP) #define CV__SIMD_FORWARD 128 #include "opencv2/core/hal/intrin_forward.hpp" #endif @@ -314,54 +312,6 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN //================================================================================================== -#define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \ - inline vtyp vx_setall_##short_typ(typ v) { return prefix##_setall_##short_typ(v); } \ - inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \ - inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \ - inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \ - inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \ - inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \ - inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \ - inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); } \ - inline vtyp vx_lut(const typ* ptr, const int* idx) { return prefix##_lut(ptr, idx); } \ - inline vtyp vx_lut_pairs(const typ* ptr, const int* idx) { return prefix##_lut_pairs(ptr, idx); } - -#define CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \ - inline vtyp vx_lut_quads(const typ* ptr, const int* idx) { return prefix##_lut_quads(ptr, idx); } - -#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \ - inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); } - -#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \ - inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); } - -#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \ - CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \ - CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \ - CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \ - CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) - -#define CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(prefix) \ - CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(uchar, v_uint8, u8, v_uint16, v_uint32, prefix, load) \ - CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(schar, v_int8, s8, v_int16, v_int32, prefix, load) \ - CV_INTRIN_DEFINE_WIDE_INTRIN(ushort, v_uint16, u16, prefix, load) \ - CV_INTRIN_DEFINE_WIDE_LUT_QUAD(ushort, v_uint16, prefix) \ - CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(ushort, v_uint32, prefix) \ - CV_INTRIN_DEFINE_WIDE_INTRIN(short, v_int16, s16, prefix, load) \ - CV_INTRIN_DEFINE_WIDE_LUT_QUAD(short, v_int16, prefix) \ - CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(short, v_int32, prefix) \ - CV_INTRIN_DEFINE_WIDE_INTRIN(int, v_int32, s32, prefix, load) \ - CV_INTRIN_DEFINE_WIDE_LUT_QUAD(int, v_int32, prefix) \ - CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(int, v_int64, prefix) \ - CV_INTRIN_DEFINE_WIDE_INTRIN(unsigned, v_uint32, u32, prefix, load) \ - CV_INTRIN_DEFINE_WIDE_LUT_QUAD(unsigned, v_uint32, prefix) \ - CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(unsigned, v_uint64, prefix) \ - CV_INTRIN_DEFINE_WIDE_INTRIN(float, v_float32, f32, prefix, load) \ - CV_INTRIN_DEFINE_WIDE_LUT_QUAD(float, v_float32, prefix) \ - CV_INTRIN_DEFINE_WIDE_INTRIN(int64, v_int64, s64, prefix, load) \ - CV_INTRIN_DEFINE_WIDE_INTRIN(uint64, v_uint64, u64, prefix, load) \ - CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(float16_t, v_float32, prefix) - template struct V_RegTraits { }; @@ -421,6 +371,7 @@ template struct V_RegTraits CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void); CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16); #endif +//! @endcond #if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512) #define CV__SIMD_NAMESPACE simd512 @@ -429,21 +380,33 @@ namespace CV__SIMD_NAMESPACE { #define CV_SIMD_64F CV_SIMD512_64F #define CV_SIMD_FP16 CV_SIMD512_FP16 #define CV_SIMD_WIDTH 64 +//! @addtogroup core_hal_intrin +//! @{ + //! @brief Maximum available vector register capacity 8-bit unsigned integer values typedef v_uint8x64 v_uint8; + //! @brief Maximum available vector register capacity 8-bit signed integer values typedef v_int8x64 v_int8; + //! @brief Maximum available vector register capacity 16-bit unsigned integer values typedef v_uint16x32 v_uint16; + //! @brief Maximum available vector register capacity 16-bit signed integer values typedef v_int16x32 v_int16; + //! @brief Maximum available vector register capacity 32-bit unsigned integer values typedef v_uint32x16 v_uint32; + //! @brief Maximum available vector register capacity 32-bit signed integer values typedef v_int32x16 v_int32; + //! @brief Maximum available vector register capacity 64-bit unsigned integer values typedef v_uint64x8 v_uint64; + //! @brief Maximum available vector register capacity 64-bit signed integer values typedef v_int64x8 v_int64; + //! @brief Maximum available vector register capacity 32-bit floating point values (single precision) typedef v_float32x16 v_float32; - CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v512) -#if CV_SIMD512_64F + #if CV_SIMD512_64F + //! @brief Maximum available vector register capacity 64-bit floating point values (double precision) typedef v_float64x8 v_float64; - CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v512, load) -#endif - inline void vx_cleanup() { v512_cleanup(); } + #endif +//! @} + + #define VXPREFIX(func) v512##func } // namespace using namespace CV__SIMD_NAMESPACE; #elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256) @@ -453,21 +416,33 @@ namespace CV__SIMD_NAMESPACE { #define CV_SIMD_64F CV_SIMD256_64F #define CV_SIMD_FP16 CV_SIMD256_FP16 #define CV_SIMD_WIDTH 32 +//! @addtogroup core_hal_intrin +//! @{ + //! @brief Maximum available vector register capacity 8-bit unsigned integer values typedef v_uint8x32 v_uint8; + //! @brief Maximum available vector register capacity 8-bit signed integer values typedef v_int8x32 v_int8; + //! @brief Maximum available vector register capacity 16-bit unsigned integer values typedef v_uint16x16 v_uint16; + //! @brief Maximum available vector register capacity 16-bit signed integer values typedef v_int16x16 v_int16; + //! @brief Maximum available vector register capacity 32-bit unsigned integer values typedef v_uint32x8 v_uint32; + //! @brief Maximum available vector register capacity 32-bit signed integer values typedef v_int32x8 v_int32; + //! @brief Maximum available vector register capacity 64-bit unsigned integer values typedef v_uint64x4 v_uint64; + //! @brief Maximum available vector register capacity 64-bit signed integer values typedef v_int64x4 v_int64; + //! @brief Maximum available vector register capacity 32-bit floating point values (single precision) typedef v_float32x8 v_float32; - CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256) #if CV_SIMD256_64F + //! @brief Maximum available vector register capacity 64-bit floating point values (double precision) typedef v_float64x4 v_float64; - CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load) #endif - inline void vx_cleanup() { v256_cleanup(); } +//! @} + + #define VXPREFIX(func) v256##func } // namespace using namespace CV__SIMD_NAMESPACE; #elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128) @@ -480,25 +455,228 @@ namespace CV__SIMD_NAMESPACE { #define CV_SIMD CV_SIMD128 #define CV_SIMD_64F CV_SIMD128_64F #define CV_SIMD_WIDTH 16 +//! @addtogroup core_hal_intrin +//! @{ + //! @brief Maximum available vector register capacity 8-bit unsigned integer values typedef v_uint8x16 v_uint8; + //! @brief Maximum available vector register capacity 8-bit signed integer values typedef v_int8x16 v_int8; + //! @brief Maximum available vector register capacity 16-bit unsigned integer values typedef v_uint16x8 v_uint16; + //! @brief Maximum available vector register capacity 16-bit signed integer values typedef v_int16x8 v_int16; + //! @brief Maximum available vector register capacity 32-bit unsigned integer values typedef v_uint32x4 v_uint32; + //! @brief Maximum available vector register capacity 32-bit signed integer values typedef v_int32x4 v_int32; + //! @brief Maximum available vector register capacity 64-bit unsigned integer values typedef v_uint64x2 v_uint64; + //! @brief Maximum available vector register capacity 64-bit signed integer values typedef v_int64x2 v_int64; + //! @brief Maximum available vector register capacity 32-bit floating point values (single precision) typedef v_float32x4 v_float32; - CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v) #if CV_SIMD128_64F + //! @brief Maximum available vector register capacity 64-bit floating point values (double precision) typedef v_float64x2 v_float64; - CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v, load) #endif - inline void vx_cleanup() { v_cleanup(); } +//! @} + + #define VXPREFIX(func) v##func } // namespace using namespace CV__SIMD_NAMESPACE; #endif +namespace CV__SIMD_NAMESPACE { +//! @addtogroup core_hal_intrin +//! @{ + //! @name Wide init with value + //! @{ + //! @brief Create maximum available capacity vector with elements set to a specific value + inline v_uint8 vx_setall_u8(uchar v) { return VXPREFIX(_setall_u8)(v); } + inline v_int8 vx_setall_s8(schar v) { return VXPREFIX(_setall_s8)(v); } + inline v_uint16 vx_setall_u16(ushort v) { return VXPREFIX(_setall_u16)(v); } + inline v_int16 vx_setall_s16(short v) { return VXPREFIX(_setall_s16)(v); } + inline v_int32 vx_setall_s32(int v) { return VXPREFIX(_setall_s32)(v); } + inline v_uint32 vx_setall_u32(unsigned v) { return VXPREFIX(_setall_u32)(v); } + inline v_float32 vx_setall_f32(float v) { return VXPREFIX(_setall_f32)(v); } + inline v_int64 vx_setall_s64(int64 v) { return VXPREFIX(_setall_s64)(v); } + inline v_uint64 vx_setall_u64(uint64 v) { return VXPREFIX(_setall_u64)(v); } +#if CV_SIMD_64F + inline v_float64 vx_setall_f64(double v) { return VXPREFIX(_setall_f64)(v); } +#endif + //! @} + + //! @name Wide init with zero + //! @{ + //! @brief Create maximum available capacity vector with elements set to zero + inline v_uint8 vx_setzero_u8() { return VXPREFIX(_setzero_u8)(); } + inline v_int8 vx_setzero_s8() { return VXPREFIX(_setzero_s8)(); } + inline v_uint16 vx_setzero_u16() { return VXPREFIX(_setzero_u16)(); } + inline v_int16 vx_setzero_s16() { return VXPREFIX(_setzero_s16)(); } + inline v_int32 vx_setzero_s32() { return VXPREFIX(_setzero_s32)(); } + inline v_uint32 vx_setzero_u32() { return VXPREFIX(_setzero_u32)(); } + inline v_float32 vx_setzero_f32() { return VXPREFIX(_setzero_f32)(); } + inline v_int64 vx_setzero_s64() { return VXPREFIX(_setzero_s64)(); } + inline v_uint64 vx_setzero_u64() { return VXPREFIX(_setzero_u64)(); } +#if CV_SIMD_64F + inline v_float64 vx_setzero_f64() { return VXPREFIX(_setzero_f64)(); } +#endif + //! @} + + //! @name Wide load from memory + //! @{ + //! @brief Load maximum available capacity register contents from memory + inline v_uint8 vx_load(const uchar * ptr) { return VXPREFIX(_load)(ptr); } + inline v_int8 vx_load(const schar * ptr) { return VXPREFIX(_load)(ptr); } + inline v_uint16 vx_load(const ushort * ptr) { return VXPREFIX(_load)(ptr); } + inline v_int16 vx_load(const short * ptr) { return VXPREFIX(_load)(ptr); } + inline v_int32 vx_load(const int * ptr) { return VXPREFIX(_load)(ptr); } + inline v_uint32 vx_load(const unsigned * ptr) { return VXPREFIX(_load)(ptr); } + inline v_float32 vx_load(const float * ptr) { return VXPREFIX(_load)(ptr); } + inline v_int64 vx_load(const int64 * ptr) { return VXPREFIX(_load)(ptr); } + inline v_uint64 vx_load(const uint64 * ptr) { return VXPREFIX(_load)(ptr); } +#if CV_SIMD_64F + inline v_float64 vx_load(const double * ptr) { return VXPREFIX(_load)(ptr); } +#endif + //! @} + + //! @name Wide load from memory(aligned) + //! @{ + //! @brief Load maximum available capacity register contents from memory(aligned) + inline v_uint8 vx_load_aligned(const uchar * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_int8 vx_load_aligned(const schar * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_uint16 vx_load_aligned(const ushort * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_int16 vx_load_aligned(const short * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_int32 vx_load_aligned(const int * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_uint32 vx_load_aligned(const unsigned * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_float32 vx_load_aligned(const float * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_int64 vx_load_aligned(const int64 * ptr) { return VXPREFIX(_load_aligned)(ptr); } + inline v_uint64 vx_load_aligned(const uint64 * ptr) { return VXPREFIX(_load_aligned)(ptr); } +#if CV_SIMD_64F + inline v_float64 vx_load_aligned(const double * ptr) { return VXPREFIX(_load_aligned)(ptr); } +#endif + //! @} + + //! @name Wide load lower half from memory + //! @{ + //! @brief Load lower half of maximum available capacity register from memory + inline v_uint8 vx_load_low(const uchar * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_int8 vx_load_low(const schar * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_uint16 vx_load_low(const ushort * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_int16 vx_load_low(const short * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_int32 vx_load_low(const int * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_uint32 vx_load_low(const unsigned * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_float32 vx_load_low(const float * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_int64 vx_load_low(const int64 * ptr) { return VXPREFIX(_load_low)(ptr); } + inline v_uint64 vx_load_low(const uint64 * ptr) { return VXPREFIX(_load_low)(ptr); } +#if CV_SIMD_64F + inline v_float64 vx_load_low(const double * ptr) { return VXPREFIX(_load_low)(ptr); } +#endif + //! @} + + //! @name Wide load halfs from memory + //! @{ + //! @brief Load maximum available capacity register contents from two memory blocks + inline v_uint8 vx_load_halves(const uchar * ptr0, const uchar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_int8 vx_load_halves(const schar * ptr0, const schar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_uint16 vx_load_halves(const ushort * ptr0, const ushort * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_int16 vx_load_halves(const short * ptr0, const short * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_int32 vx_load_halves(const int * ptr0, const int * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_uint32 vx_load_halves(const unsigned * ptr0, const unsigned * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_float32 vx_load_halves(const float * ptr0, const float * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_int64 vx_load_halves(const int64 * ptr0, const int64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } + inline v_uint64 vx_load_halves(const uint64 * ptr0, const uint64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } +#if CV_SIMD_64F + inline v_float64 vx_load_halves(const double * ptr0, const double * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); } +#endif + //! @} + + //! @name Wide LUT of elements + //! @{ + //! @brief Load maximum available capacity register contents with array elements by provided indexes + inline v_uint8 vx_lut(const uchar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_int8 vx_lut(const schar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_uint16 vx_lut(const ushort * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_int16 vx_lut(const short* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_int32 vx_lut(const int* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_uint32 vx_lut(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_float32 vx_lut(const float* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_int64 vx_lut(const int64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } + inline v_uint64 vx_lut(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } +#if CV_SIMD_64F + inline v_float64 vx_lut(const double* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); } +#endif + //! @} + + //! @name Wide LUT of element pairs + //! @{ + //! @brief Load maximum available capacity register contents with array element pairs by provided indexes + inline v_uint8 vx_lut_pairs(const uchar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_int8 vx_lut_pairs(const schar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_uint16 vx_lut_pairs(const ushort * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_int16 vx_lut_pairs(const short* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_int32 vx_lut_pairs(const int* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_uint32 vx_lut_pairs(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_float32 vx_lut_pairs(const float* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_int64 vx_lut_pairs(const int64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } + inline v_uint64 vx_lut_pairs(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } +#if CV_SIMD_64F + inline v_float64 vx_lut_pairs(const double* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); } +#endif + //! @} + + //! @name Wide LUT of element quads + //! @{ + //! @brief Load maximum available capacity register contents with array element quads by provided indexes + inline v_uint8 vx_lut_quads(const uchar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + inline v_int8 vx_lut_quads(const schar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + inline v_uint16 vx_lut_quads(const ushort* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + inline v_int16 vx_lut_quads(const short* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + inline v_int32 vx_lut_quads(const int* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + inline v_uint32 vx_lut_quads(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + inline v_float32 vx_lut_quads(const float* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); } + //! @} + + //! @name Wide load with double expansion + //! @{ + //! @brief Load maximum available capacity register contents from memory with double expand + inline v_uint16 vx_load_expand(const uchar * ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_int16 vx_load_expand(const schar * ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_uint32 vx_load_expand(const ushort * ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); } + inline v_float32 vx_load_expand(const float16_t * ptr) { return VXPREFIX(_load_expand)(ptr); } + //! @} + + //! @name Wide load with quad expansion + //! @{ + //! @brief Load maximum available capacity register contents from memory with quad expand + inline v_uint32 vx_load_expand_q(const uchar * ptr) { return VXPREFIX(_load_expand_q)(ptr); } + inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); } + //! @} + + /** @brief SIMD processing state cleanup call */ + inline void vx_cleanup() { VXPREFIX(_cleanup)(); } + + +//! @cond IGNORED + + // backward compatibility + template static inline + void vx_store(_Tp* dst, const _Tvec& v) { return v_store(dst, v); } + // backward compatibility + template static inline + void vx_store_aligned(_Tp* dst, const _Tvec& v) { return v_store_aligned(dst, v); } + +//! @endcond + + +//! @} + #undef VXPREFIX +} // namespace + +//! @cond IGNORED #ifndef CV_SIMD_64F #define CV_SIMD_64F 0 #endif diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index 859bfd72dc..46222140e6 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -52,10 +52,21 @@ //! @cond IGNORED #define CV_SIMD128_CPP 1 -#if defined(CV_FORCE_SIMD128_CPP) || defined(CV_DOXYGEN) +#if defined(CV_FORCE_SIMD128_CPP) #define CV_SIMD128 1 #define CV_SIMD128_64F 1 #endif +#if defined(CV_DOXYGEN) +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 +#define CV_SIMD256 1 +#define CV_SIMD256_64F 1 +#define CV_SIMD512 1 +#define CV_SIMD512_64F 1 +#else +#define CV_SIMD256 0 // Explicitly disable SIMD256 and SIMD512 support for scalar intrinsic implementation +#define CV_SIMD512 0 // to avoid warnings during compilation +#endif //! @endcond namespace cv @@ -68,17 +79,33 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN /** @addtogroup core_hal_intrin "Universal intrinsics" is a types and functions set intended to simplify vectorization of code on -different platforms. Currently there are two supported SIMD extensions: __SSE/SSE2__ on x86 -architectures and __NEON__ on ARM architectures, both allow working with 128 bit registers -containing packed values of different types. In case when there is no SIMD extension available -during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as -expected although it could be slower. +different platforms. Currently a few different SIMD extensions on different architectures are supported. +128 bit registers of various types support is implemented for a wide range of architectures +including x86(__SSE/SSE2/SSE4.2__), ARM(__NEON__), PowerPC(__VSX__), MIPS(__MSA__). +256 bit long registers are supported on x86(__AVX2__) and 512 bit long registers are supported on x86(__AVX512__). +In case when there is no SIMD extension available during compilation, fallback C++ implementation of intrinsics +will be chosen and code will work as expected although it could be slower. ### Types -There are several types representing 128-bit register as a vector of packed values, each type is +There are several types representing packed values vector registers, each type is implemented as a structure based on a one SIMD register. +- cv::v_uint8 and cv::v_int8: 8-bit integer values (unsigned/signed) - char +- cv::v_uint16 and cv::v_int16: 16-bit integer values (unsigned/signed) - short +- cv::v_uint32 and cv::v_int32: 32-bit integer values (unsigned/signed) - int +- cv::v_uint64 and cv::v_int64: 64-bit integer values (unsigned/signed) - int64 +- cv::v_float32: 32-bit floating point values (signed) - float +- cv::v_float64: 64-bit floating point values (signed) - double + +Exact bit length(and value quantity) of listed types is compile time deduced and depends on architecture SIMD +capabilities chosen as available during compilation of the library. All the types contains __nlanes__ enumeration +to check for exact value quantity of the type. + +In case the exact bit length of the type is important it is possible to use specific fixed length register types. + +There are several types representing 128-bit registers. + - cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char - cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short - cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsigned/signed) - int @@ -86,28 +113,96 @@ implemented as a structure based on a one SIMD register. - cv::v_float32x4: four 32-bit floating point values (signed) - float - cv::v_float64x2: two 64-bit floating point values (signed) - double +There are several types representing 256-bit registers. + +- cv::v_uint8x32 and cv::v_int8x32: thirty two 8-bit integer values (unsigned/signed) - char +- cv::v_uint16x16 and cv::v_int16x16: sixteen 16-bit integer values (unsigned/signed) - short +- cv::v_uint32x8 and cv::v_int32x8: eight 32-bit integer values (unsigned/signed) - int +- cv::v_uint64x4 and cv::v_int64x4: four 64-bit integer values (unsigned/signed) - int64 +- cv::v_float32x8: eight 32-bit floating point values (signed) - float +- cv::v_float64x4: four 64-bit floating point values (signed) - double + @note -cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to -check the CV_SIMD128_64F preprocessor definition: +256 bit registers at the moment implemented for AVX2 SIMD extension only, if you want to use this type directly, +don't forget to check the CV_SIMD256 preprocessor definition: @code -#if CV_SIMD128_64F +#if CV_SIMD256 //... #endif @endcode +There are several types representing 512-bit registers. + +- cv::v_uint8x64 and cv::v_int8x64: sixty four 8-bit integer values (unsigned/signed) - char +- cv::v_uint16x32 and cv::v_int16x32: thirty two 16-bit integer values (unsigned/signed) - short +- cv::v_uint32x16 and cv::v_int32x16: sixteen 32-bit integer values (unsigned/signed) - int +- cv::v_uint64x8 and cv::v_int64x8: eight 64-bit integer values (unsigned/signed) - int64 +- cv::v_float32x16: sixteen 32-bit floating point values (signed) - float +- cv::v_float64x8: eight 64-bit floating point values (signed) - double +@note +512 bit registers at the moment implemented for AVX512 SIMD extension only, if you want to use this type directly, +don't forget to check the CV_SIMD512 preprocessor definition. + +@note +cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to +check the CV_SIMD128_64F preprocessor definition. + ### Load and store operations These operations allow to set contents of the register explicitly or by loading it from some memory block and to save contents of the register to memory block. +There are variable size register load operations that provide result of maximum available size +depending on chosen platform capabilities. +- Constructors: +@ref v_reg::v_reg(const _Tp *ptr) "from memory", +- Other create methods: +vx_setall_s8, vx_setall_u8, ..., +vx_setzero_u8, vx_setzero_s8, ... +- Memory load operations: +vx_load, vx_load_aligned, vx_load_low, vx_load_halves, +- Memory operations with expansion of values: +vx_load_expand, vx_load_expand_q + +Also there are fixed size register load/store operations. + +For 128 bit registers - Constructors: @ref v_reg::v_reg(const _Tp *ptr) "from memory", @ref v_reg::v_reg(_Tp s0, _Tp s1) "from two values", ... - Other create methods: @ref v_setall_s8, @ref v_setall_u8, ..., @ref v_setzero_u8, @ref v_setzero_s8, ... -- Memory operations: +- Memory load operations: @ref v_load, @ref v_load_aligned, @ref v_load_low, @ref v_load_halves, +- Memory operations with expansion of values: +@ref v_load_expand, @ref v_load_expand_q + +For 256 bit registers(check CV_SIMD256 preprocessor definition) +- Constructors: +@ref v_reg::v_reg(const _Tp *ptr) "from memory", +@ref v_reg::v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) "from four values", ... +- Other create methods: +@ref v256_setall_s8, @ref v256_setall_u8, ..., +@ref v256_setzero_u8, @ref v256_setzero_s8, ... +- Memory load operations: +@ref v256_load, @ref v256_load_aligned, @ref v256_load_low, @ref v256_load_halves, +- Memory operations with expansion of values: +@ref v256_load_expand, @ref v256_load_expand_q + +For 512 bit registers(check CV_SIMD512 preprocessor definition) +- Constructors: +@ref v_reg::v_reg(const _Tp *ptr) "from memory", +@ref v_reg::v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, _Tp s4, _Tp s5, _Tp s6, _Tp s7) "from eight values", ... +- Other create methods: +@ref v512_setall_s8, @ref v512_setall_u8, ..., +@ref v512_setzero_u8, @ref v512_setzero_s8, ... +- Memory load operations: +@ref v512_load, @ref v512_load_aligned, @ref v512_load_low, @ref v512_load_halves, +- Memory operations with expansion of values: +@ref v512_load_expand, @ref v512_load_expand_q + +Store to memory operations are similar across different platform capabilities: @ref v_store, @ref v_store_aligned, @ref v_store_high, @ref v_store_low @@ -116,7 +211,7 @@ block and to save contents of the register to memory block. These operations allow to reorder or recombine elements in one or multiple vectors. - Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave -- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand, @ref v_expand_low, @ref v_expand_high +- Expand: @ref v_expand, @ref v_expand_low, @ref v_expand_high - Pack: @ref v_pack, @ref v_pack_u, @ref v_pack_b, @ref v_rshr_pack, @ref v_rshr_pack_u, @ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store - Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high @@ -153,7 +248,7 @@ Element-wise binary and unary operations. @ref operator >=(const v_reg &a, const v_reg &b) ">=", @ref operator <(const v_reg &a, const v_reg &b) "<", @ref operator <=(const v_reg &a, const v_reg &b) "<=", -@ref operator==(const v_reg &a, const v_reg &b) "==", +@ref operator ==(const v_reg &a, const v_reg &b) "==", @ref operator !=(const v_reg &a, const v_reg &b) "!=" - min/max: @ref v_min, @ref v_max @@ -190,7 +285,7 @@ shows the applicability of different operations to the types. Regular integers: -| Operations\\Types | uint 8x16 | int 8x16 | uint 16x8 | int 16x8 | uint 32x4 | int 32x4 | +| Operations\\Types | uint 8 | int 8 | uint 16 | int 16 | uint 32 | int 32 | |-------------------|:-:|:-:|:-:|:-:|:-:|:-:| |load, store | x | x | x | x | x | x | |interleave | x | x | x | x | x | x | @@ -230,7 +325,7 @@ Regular integers: Big integers: -| Operations\\Types | uint 64x2 | int 64x2 | +| Operations\\Types | uint 64 | int 64 | |-------------------|:-:|:-:| |load, store | x | x | |add, sub | x | x | @@ -244,7 +339,7 @@ Big integers: Floating point: -| Operations\\Types | float 32x4 | float 64x2 | +| Operations\\Types | float 32 | float 64 | |-------------------|:-:|:-:| |load, store | x | x | |interleave | x | | @@ -410,6 +505,67 @@ typedef v_reg v_uint64x2; /** @brief Two 64-bit signed integer values */ typedef v_reg v_int64x2; +#if CV_SIMD256 +/** @brief Thirty two 8-bit unsigned integer values */ +typedef v_reg v_uint8x32; +/** @brief Thirty two 8-bit signed integer values */ +typedef v_reg v_int8x32; +/** @brief Sixteen 16-bit unsigned integer values */ +typedef v_reg v_uint16x16; +/** @brief Sixteen 16-bit signed integer values */ +typedef v_reg v_int16x16; +/** @brief Eight 32-bit unsigned integer values */ +typedef v_reg v_uint32x8; +/** @brief Eight 32-bit signed integer values */ +typedef v_reg v_int32x8; +/** @brief Eight 32-bit floating point values (single precision) */ +typedef v_reg v_float32x8; +/** @brief Four 64-bit floating point values (double precision) */ +typedef v_reg v_float64x4; +/** @brief Four 64-bit unsigned integer values */ +typedef v_reg v_uint64x4; +/** @brief Four 64-bit signed integer values */ +typedef v_reg v_int64x4; +#endif + +#if CV_SIMD512 +/** @brief Sixty four 8-bit unsigned integer values */ +typedef v_reg v_uint8x64; +/** @brief Sixty four 8-bit signed integer values */ +typedef v_reg v_int8x64; +/** @brief Thirty two 16-bit unsigned integer values */ +typedef v_reg v_uint16x32; +/** @brief Thirty two 16-bit signed integer values */ +typedef v_reg v_int16x32; +/** @brief Sixteen 32-bit unsigned integer values */ +typedef v_reg v_uint32x16; +/** @brief Sixteen 32-bit signed integer values */ +typedef v_reg v_int32x16; +/** @brief Sixteen 32-bit floating point values (single precision) */ +typedef v_reg v_float32x16; +/** @brief Eight 64-bit floating point values (double precision) */ +typedef v_reg v_float64x8; +/** @brief Eight 64-bit unsigned integer values */ +typedef v_reg v_uint64x8; +/** @brief Eight 64-bit signed integer values */ +typedef v_reg v_int64x8; +#endif + +enum { + simd128_width = 16, +#if CV_SIMD256 + simd256_width = 32, +#endif +#if CV_SIMD512 + simd512_width = 64, + simdmax_width = simd512_width +#elif CV_SIMD256 + simdmax_width = simd256_width +#else + simdmax_width = simd128_width +#endif +}; + /** @brief Add values For all types. */ @@ -559,27 +715,6 @@ template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) return c; \ } -//! @brief Helper macro -//! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(func, cfunc) \ -inline v_reg func(const v_reg& a) \ -{ \ - v_reg c; \ - for( int i = 0; i < 4; i++ ) \ - c.s[i] = cfunc(a.s[i]); \ - return c; \ -} \ -inline v_reg func(const v_reg& a) \ -{ \ - v_reg c; \ - for( int i = 0; i < 2; i++ ) \ - { \ - c.s[i] = cfunc(a.s[i]); \ - c.s[i + 2] = 0; \ - } \ - return c; \ -} - /** @brief Square root of elements Only for floating point types.*/ @@ -598,26 +733,6 @@ Only for floating point types.*/ OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs, typename V_TypeTraits<_Tp>::abs_type) -/** @brief Round elements - -Only for floating point types.*/ -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_round, cvRound) - -/** @brief Floor elements - -Only for floating point types.*/ -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_floor, cvFloor) - -/** @brief Ceil elements - -Only for floating point types.*/ -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_ceil, cvCeil) - -/** @brief Truncate elements - -Only for floating point types.*/ -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_trunc, int) - //! @brief Helper macro //! @ingroup core_hal_intrin_impl #define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \ @@ -855,9 +970,9 @@ inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, /** @overload For 32-bit floating point values */ -inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) +template inline v_reg v_absdiff(const v_reg& a, const v_reg& b) { - v_float32x4 c; + v_reg c; for( int i = 0; i < c.nlanes; i++ ) c.s[i] = _absdiff(a.s[i], b.s[i]); return c; @@ -866,9 +981,9 @@ inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) /** @overload For 64-bit floating point values */ -inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) +template inline v_reg v_absdiff(const v_reg& a, const v_reg& b) { - v_float64x2 c; + v_reg c; for( int i = 0; i < c.nlanes; i++ ) c.s[i] = _absdiff(a.s[i], b.s[i]); return c; @@ -1238,14 +1353,17 @@ template inline typename V_TypeTraits<_Tp>::sum_type v_redu result[3] = d[0] + d[1] + d[2] + d[3] @endcode */ -inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, - const v_float32x4& c, const v_float32x4& d) +template inline v_reg v_reduce_sum4(const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d) { - v_float32x4 r; - r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3]; - r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3]; - r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3]; - r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3]; + v_reg r; + for(int i = 0; i < (n/4); i++) + { + r.s[i*4 + 0] = a.s[i*4 + 0] + a.s[i*4 + 1] + a.s[i*4 + 2] + a.s[i*4 + 3]; + r.s[i*4 + 1] = b.s[i*4 + 0] + b.s[i*4 + 1] + b.s[i*4 + 2] + b.s[i*4 + 3]; + r.s[i*4 + 2] = c.s[i*4 + 0] + c.s[i*4 + 1] + c.s[i*4 + 2] + c.s[i*4 + 3]; + r.s[i*4 + 3] = d.s[i*4 + 0] + d.s[i*4 + 1] + d.s[i*4 + 2] + d.s[i*4 + 3]; + } return r; } @@ -1459,30 +1577,116 @@ template inline void v_zip( const v_reg<_Tp, n>& a0, const @note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc. +@note Use vx_load version to get maximum available register length result + @note Alignment requirement: if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane type)` should be enough). Do not cast pointer types without runtime check for pointer alignment (like `uchar*` => `int*`). */ template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr) +inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load(const _Tp* ptr) { #if CV_STRONG_ALIGNMENT CV_Assert(isAligned(ptr)); #endif - return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); + return v_reg<_Tp, simd128_width / sizeof(_Tp)>(ptr); } +#if CV_SIMD256 +/** @brief Load 256-bit length register contents from memory + +@param ptr pointer to memory block with data +@return register object + +@note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x32, int ==> cv::v_int32x8, etc. + +@note Check CV_SIMD256 preprocessor definition prior to use. +Use vx_load version to get maximum available register length result + +@note Alignment requirement: +if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane type)` should be enough). +Do not cast pointer types without runtime check for pointer alignment (like `uchar*` => `int*`). + */ +template +inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + return v_reg<_Tp, simd256_width / sizeof(_Tp)>(ptr); +} +#endif + +#if CV_SIMD512 +/** @brief Load 512-bit length register contents from memory + +@param ptr pointer to memory block with data +@return register object + +@note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x64, int ==> cv::v_int32x16, etc. + +@note Check CV_SIMD512 preprocessor definition prior to use. +Use vx_load version to get maximum available register length result + +@note Alignment requirement: +if CV_STRONG_ALIGNMENT=1 then passed pointer must be aligned (`sizeof(lane type)` should be enough). +Do not cast pointer types without runtime check for pointer alignment (like `uchar*` => `int*`). + */ +template +inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + return v_reg<_Tp, simd512_width / sizeof(_Tp)>(ptr); +} +#endif + /** @brief Load register contents from memory (aligned) similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary in case of SIMD128, 32-byte - SIMD256, etc) - */ + +@note Use vx_load_aligned version to get maximum available register length result +*/ template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr) +inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_aligned(const _Tp* ptr) { - CV_Assert(isAligned::nlanes128>)>(ptr)); - return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); + CV_Assert(isAligned)>(ptr)); + return v_reg<_Tp, simd128_width / sizeof(_Tp)>(ptr); } +#if CV_SIMD256 +/** @brief Load register contents from memory (aligned) + +similar to cv::v256_load, but source memory block should be aligned (to 32-byte boundary in case of SIMD256, 64-byte - SIMD512, etc) + +@note Check CV_SIMD256 preprocessor definition prior to use. +Use vx_load_aligned version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_aligned(const _Tp* ptr) +{ + CV_Assert(isAligned)>(ptr)); + return v_reg<_Tp, simd256_width / sizeof(_Tp)>(ptr); +} +#endif + +#if CV_SIMD512 +/** @brief Load register contents from memory (aligned) + +similar to cv::v512_load, but source memory block should be aligned (to 64-byte boundary in case of SIMD512, etc) + +@note Check CV_SIMD512 preprocessor definition prior to use. +Use vx_load_aligned version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_aligned(const _Tp* ptr) +{ + CV_Assert(isAligned)>(ptr)); + return v_reg<_Tp, simd512_width / sizeof(_Tp)>(ptr); +} +#endif + /** @brief Load 64-bits of data to lower part (high part is undefined). @param ptr memory block containing data for first half (0..n/2) @@ -1491,14 +1695,16 @@ inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr) int lo[2] = { 1, 2 }; v_int32x4 r = v_load_low(lo); @endcode - */ + +@note Use vx_load_low version to get maximum available register length result +*/ template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr) +inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_low(const _Tp* ptr) { #if CV_STRONG_ALIGNMENT CV_Assert(isAligned(ptr)); #endif - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + v_reg<_Tp, simd128_width / sizeof(_Tp)> c; for( int i = 0; i < c.nlanes/2; i++ ) { c.s[i] = ptr[i]; @@ -1506,6 +1712,62 @@ inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr) return c; } +#if CV_SIMD256 +/** @brief Load 128-bits of data to lower part (high part is undefined). + +@param ptr memory block containing data for first half (0..n/2) + +@code{.cpp} +int lo[4] = { 1, 2, 3, 4 }; +v_int32x8 r = v256_load_low(lo); +@endcode + +@note Check CV_SIMD256 preprocessor definition prior to use. +Use vx_load_low version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_low(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + v_reg<_Tp, simd256_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes / 2; i++) + { + c.s[i] = ptr[i]; + } + return c; +} +#endif + +#if CV_SIMD512 +/** @brief Load 256-bits of data to lower part (high part is undefined). + +@param ptr memory block containing data for first half (0..n/2) + +@code{.cpp} +int lo[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; +v_int32x16 r = v512_load_low(lo); +@endcode + +@note Check CV_SIMD512 preprocessor definition prior to use. +Use vx_load_low version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_low(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + v_reg<_Tp, simd512_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes / 2; i++) + { + c.s[i] = ptr[i]; + } + return c; +} +#endif + /** @brief Load register contents from two memory blocks @param loptr memory block containing data for first half (0..n/2) @@ -1515,15 +1777,17 @@ inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr) int lo[2] = { 1, 2 }, hi[2] = { 3, 4 }; v_int32x4 r = v_load_halves(lo, hi); @endcode - */ + +@note Use vx_load_halves version to get maximum available register length result +*/ template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr) +inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_load_halves(const _Tp* loptr, const _Tp* hiptr) { #if CV_STRONG_ALIGNMENT CV_Assert(isAligned(loptr)); CV_Assert(isAligned(hiptr)); #endif - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; + v_reg<_Tp, simd128_width / sizeof(_Tp)> c; for( int i = 0; i < c.nlanes/2; i++ ) { c.s[i] = loptr[i]; @@ -1532,6 +1796,68 @@ inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, return c; } +#if CV_SIMD256 +/** @brief Load register contents from two memory blocks + +@param loptr memory block containing data for first half (0..n/2) +@param hiptr memory block containing data for second half (n/2..n) + +@code{.cpp} +int lo[4] = { 1, 2, 3, 4 }, hi[4] = { 5, 6, 7, 8 }; +v_int32x8 r = v256_load_halves(lo, hi); +@endcode + +@note Check CV_SIMD256 preprocessor definition prior to use. +Use vx_load_halves version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd256_width / sizeof(_Tp)> v256_load_halves(const _Tp* loptr, const _Tp* hiptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(loptr)); + CV_Assert(isAligned(hiptr)); +#endif + v_reg<_Tp, simd256_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes / 2; i++) + { + c.s[i] = loptr[i]; + c.s[i + c.nlanes / 2] = hiptr[i]; + } + return c; +} +#endif + +#if CV_SIMD512 +/** @brief Load register contents from two memory blocks + +@param loptr memory block containing data for first half (0..n/2) +@param hiptr memory block containing data for second half (n/2..n) + +@code{.cpp} +int lo[4] = { 1, 2, 3, 4, 5, 6, 7, 8 }, hi[4] = { 9, 10, 11, 12, 13, 14, 15, 16 }; +v_int32x16 r = v512_load_halves(lo, hi); +@endcode + +@note Check CV_SIMD512 preprocessor definition prior to use. +Use vx_load_halves version to get maximum available register length result +*/ +template +inline v_reg<_Tp, simd512_width / sizeof(_Tp)> v512_load_halves(const _Tp* loptr, const _Tp* hiptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(loptr)); + CV_Assert(isAligned(hiptr)); +#endif + v_reg<_Tp, simd512_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes / 2; i++) + { + c.s[i] = loptr[i]; + c.s[i + c.nlanes / 2] = hiptr[i]; + } + return c; +} +#endif + /** @brief Load register contents from memory with double expand Same as cv::v_load, but result pack type will be 2x wider than memory type. @@ -1540,16 +1866,19 @@ Same as cv::v_load, but result pack type will be 2x wider than memory type. short buf[4] = {1, 2, 3, 4}; // type is int16 v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32 @endcode -For 8-, 16-, 32-bit integer source types. */ +For 8-, 16-, 32-bit integer source types. + +@note Use vx_load_expand version to get maximum available register length result +*/ template -inline v_reg::w_type, V_TypeTraits<_Tp>::nlanes128 / 2> +inline v_reg::w_type, simd128_width / sizeof(typename V_TypeTraits<_Tp>::w_type)> v_load_expand(const _Tp* ptr) { #if CV_STRONG_ALIGNMENT CV_Assert(isAligned(ptr)); #endif typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg::nlanes128> c; + v_reg c; for( int i = 0; i < c.nlanes; i++ ) { c.s[i] = ptr[i]; @@ -1557,23 +1886,88 @@ v_load_expand(const _Tp* ptr) return c; } +#if CV_SIMD256 +/** @brief Load register contents from memory with double expand + +Same as cv::v256_load, but result pack type will be 2x wider than memory type. + +@code{.cpp} +short buf[8] = {1, 2, 3, 4, 5, 6, 7, 8}; // type is int16 +v_int32x8 r = v256_load_expand(buf); // r = {1, 2, 3, 4, 5, 6, 7, 8} - type is int32 +@endcode +For 8-, 16-, 32-bit integer source types. + +@note Check CV_SIMD256 preprocessor definition prior to use. +Use vx_load_expand version to get maximum available register length result +*/ +template +inline v_reg::w_type, simd256_width / sizeof(typename V_TypeTraits<_Tp>::w_type)> +v256_load_expand(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg c; + for (int i = 0; i < c.nlanes; i++) + { + c.s[i] = ptr[i]; + } + return c; +} +#endif + +#if CV_SIMD512 +/** @brief Load register contents from memory with double expand + +Same as cv::v512_load, but result pack type will be 2x wider than memory type. + +@code{.cpp} +short buf[8] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; // type is int16 +v_int32x16 r = v512_load_expand(buf); // r = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} - type is int32 +@endcode +For 8-, 16-, 32-bit integer source types. + +@note Check CV_SIMD512 preprocessor definition prior to use. +Use vx_load_expand version to get maximum available register length result +*/ +template +inline v_reg::w_type, simd512_width / sizeof(typename V_TypeTraits<_Tp>::w_type)> +v512_load_expand(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + typedef typename V_TypeTraits<_Tp>::w_type w_type; + v_reg c; + for (int i = 0; i < c.nlanes; i++) + { + c.s[i] = ptr[i]; + } + return c; +} +#endif + /** @brief Load register contents from memory with quad expand Same as cv::v_load_expand, but result type is 4 times wider than source. @code{.cpp} char buf[4] = {1, 2, 3, 4}; // type is int8 -v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32 +v_int32x4 r = v_load_expand_q(buf); // r = {1, 2, 3, 4} - type is int32 @endcode -For 8-bit integer source types. */ +For 8-bit integer source types. + +@note Use vx_load_expand_q version to get maximum available register length result +*/ template -inline v_reg::q_type, V_TypeTraits<_Tp>::nlanes128 / 4> +inline v_reg::q_type, simd128_width / sizeof(typename V_TypeTraits<_Tp>::q_type)> v_load_expand_q(const _Tp* ptr) { #if CV_STRONG_ALIGNMENT CV_Assert(isAligned(ptr)); #endif typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg::nlanes128> c; + v_reg c; for( int i = 0; i < c.nlanes; i++ ) { c.s[i] = ptr[i]; @@ -1581,6 +1975,66 @@ v_load_expand_q(const _Tp* ptr) return c; } +#if CV_SIMD256 +/** @brief Load register contents from memory with quad expand + +Same as cv::v256_load_expand, but result type is 4 times wider than source. +@code{.cpp} +char buf[8] = {1, 2, 3, 4, 5, 6, 7, 8}; // type is int8 +v_int32x8 r = v256_load_expand_q(buf); // r = {1, 2, 3, 4, 5, 6, 7, 8} - type is int32 +@endcode +For 8-bit integer source types. + +@note Check CV_SIMD256 preprocessor definition prior to use. +Use vx_load_expand_q version to get maximum available register length result +*/ +template +inline v_reg::q_type, simd256_width / sizeof(typename V_TypeTraits<_Tp>::q_type)> +v256_load_expand_q(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg c; + for (int i = 0; i < c.nlanes; i++) + { + c.s[i] = ptr[i]; + } + return c; +} +#endif + +#if CV_SIMD512 +/** @brief Load register contents from memory with quad expand + +Same as cv::v512_load_expand, but result type is 4 times wider than source. +@code{.cpp} +char buf[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; // type is int8 +v_int32x16 r = v512_load_expand_q(buf); // r = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} - type is int32 +@endcode +For 8-bit integer source types. + +@note Check CV_SIMD512 preprocessor definition prior to use. +Use vx_load_expand_q version to get maximum available register length result +*/ +template +inline v_reg::q_type, simd512_width / sizeof(typename V_TypeTraits<_Tp>::q_type)> +v512_load_expand_q(const _Tp* ptr) +{ +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(ptr)); +#endif + typedef typename V_TypeTraits<_Tp>::q_type q_type; + v_reg c; + for (int i = 0; i < c.nlanes; i++) + { + c.s[i] = ptr[i]; + } + return c; +} +#endif + /** @brief Load and deinterleave (2 channels) Load data from memory deinterleave and store to 2 registers. @@ -1965,9 +2419,11 @@ inline v_reg<_Tp, n> v_broadcast_element(const v_reg<_Tp, n>& a) return v_reg<_Tp, n>::all(a.s[i]); } -/** @brief Round +/** @brief Round elements -Rounds each value. Input type is float vector ==> output type is int vector.*/ +Rounds each value. Input type is float vector ==> output type is int vector. +@note Only for floating point types. +*/ template inline v_reg v_round(const v_reg& a) { v_reg c; @@ -1988,9 +2444,11 @@ template inline v_reg v_round(const v_reg& a, const return c; } -/** @brief Floor +/** @brief Floor elements -Floor each value. Input type is float vector ==> output type is int vector.*/ +Floor each value. Input type is float vector ==> output type is int vector. +@note Only for floating point types. +*/ template inline v_reg v_floor(const v_reg& a) { v_reg c; @@ -1999,9 +2457,11 @@ template inline v_reg v_floor(const v_reg& a) return c; } -/** @brief Ceil +/** @brief Ceil elements -Ceil each value. Input type is float vector ==> output type is int vector.*/ +Ceil each value. Input type is float vector ==> output type is int vector. +@note Only for floating point types. +*/ template inline v_reg v_ceil(const v_reg& a) { v_reg c; @@ -2010,9 +2470,11 @@ template inline v_reg v_ceil(const v_reg& a) return c; } -/** @brief Trunc +/** @brief Truncate elements -Truncate each value. Input type is float vector ==> output type is int vector.*/ +Truncate each value. Input type is float vector ==> output type is int vector. +@note Only for floating point types. +*/ template inline v_reg v_trunc(const v_reg& a) { v_reg c; @@ -2036,7 +2498,7 @@ template inline v_reg v_round(const v_reg& a) /** @overload */ template inline v_reg v_floor(const v_reg& a) { - v_reg c; + v_reg c; for( int i = 0; i < n; i++ ) { c.s[i] = cvFloor(a.s[i]); @@ -2048,7 +2510,7 @@ template inline v_reg v_floor(const v_reg& a) /** @overload */ template inline v_reg v_ceil(const v_reg& a) { - v_reg c; + v_reg c; for( int i = 0; i < n; i++ ) { c.s[i] = cvCeil(a.s[i]); @@ -2060,10 +2522,10 @@ template inline v_reg v_ceil(const v_reg& a) /** @overload */ template inline v_reg v_trunc(const v_reg& a) { - v_reg c; + v_reg c; for( int i = 0; i < n; i++ ) { - c.s[i] = cvCeil(a.s[i]); + c.s[i] = (int)(a.s[i]); c.s[i+n] = 0; } return c; @@ -2071,7 +2533,7 @@ template inline v_reg v_trunc(const v_reg& a) /** @brief Convert to float -Supported input type is cv::v_int32x4. */ +Supported input type is cv::v_int32. */ template inline v_reg v_cvt_f32(const v_reg& a) { v_reg c; @@ -2080,6 +2542,9 @@ template inline v_reg v_cvt_f32(const v_reg& a) return c; } +/** @brief Convert lower half to float + +Supported input type is cv::v_float64. */ template inline v_reg v_cvt_f32(const v_reg& a) { v_reg c; @@ -2091,6 +2556,9 @@ template inline v_reg v_cvt_f32(const v_reg& a) return c; } +/** @brief Convert to float + +Supported input type is cv::v_float64. */ template inline v_reg v_cvt_f32(const v_reg& a, const v_reg& b) { v_reg c; @@ -2102,72 +2570,55 @@ template inline v_reg v_cvt_f32(const v_reg& a, co return c; } -/** @brief Convert to double +/** @brief Convert lower half to double -Supported input type is cv::v_int32x4. */ -CV_INLINE v_reg v_cvt_f64(const v_reg& a) +Supported input type is cv::v_int32. */ +template CV_INLINE v_reg v_cvt_f64(const v_reg& a) { - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) + v_reg c; + for( int i = 0; i < (n/2); i++ ) c.s[i] = (double)a.s[i]; return c; } /** @brief Convert to double high part of vector -Supported input type is cv::v_int32x4. */ -CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) +Supported input type is cv::v_int32. */ +template CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) { - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i + 2]; + v_reg c; + for( int i = 0; i < (n/2); i++ ) + c.s[i] = (double)a.s[i + (n/2)]; return c; } -/** @brief Convert to double +/** @brief Convert lower half to double -Supported input type is cv::v_float32x4. */ -CV_INLINE v_reg v_cvt_f64(const v_reg& a) +Supported input type is cv::v_float32. */ +template CV_INLINE v_reg v_cvt_f64(const v_reg& a) { - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) + v_reg c; + for( int i = 0; i < (n/2); i++ ) c.s[i] = (double)a.s[i]; return c; } /** @brief Convert to double high part of vector -Supported input type is cv::v_float32x4. */ -CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) +Supported input type is cv::v_float32. */ +template CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) { - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i + 2]; + v_reg c; + for( int i = 0; i < (n/2); i++ ) + c.s[i] = (double)a.s[i + (n/2)]; return c; } /** @brief Convert to double -Supported input type is cv::v_int64x2. */ -CV_INLINE v_reg v_cvt_f64(const v_reg& a) +Supported input type is cv::v_int64. */ +template CV_INLINE v_reg v_cvt_f64(const v_reg& a) { - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -/** @brief Convert to double high part of vector - -Supported input type is cv::v_int64x2. */ -CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) -{ - enum { n = 2 }; v_reg c; for( int i = 0; i < n; i++ ) c.s[i] = (double)a.s[i]; @@ -2175,24 +2626,24 @@ CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) } -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut(const _Tp* tab, const int* idx) +template inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut(const _Tp* tab, const int* idx) { - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + v_reg<_Tp, simd128_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes; i++) c.s[i] = tab[idx[i]]; return c; } -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_pairs(const _Tp* tab, const int* idx) +template inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut_pairs(const _Tp* tab, const int* idx) { - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + v_reg<_Tp, simd128_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes; i++) c.s[i] = tab[idx[i / 2] + i % 2]; return c; } -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_quads(const _Tp* tab, const int* idx) +template inline v_reg<_Tp, simd128_width / sizeof(_Tp)> v_lut_quads(const _Tp* tab, const int* idx) { - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) + v_reg<_Tp, simd128_width / sizeof(_Tp)> c; + for (int i = 0; i < c.nlanes; i++) c.s[i] = tab[idx[i / 4] + i % 4]; return c; } @@ -2221,36 +2672,15 @@ template inline v_reg v_lut(const float* tab, const v_reg inline v_reg v_lut(const double* tab, const v_reg& idx) +template inline v_reg v_lut(const double* tab, const v_reg& idx) { - v_reg c; - for( int i = 0; i < n; i++ ) + v_reg c; + for( int i = 0; i < n/2; i++ ) c.s[i] = tab[idx.s[i]]; return c; } -inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - -inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - -inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - -inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - - template inline void v_lut_deinterleave(const float* tab, const v_reg& idx, v_reg& x, v_reg& y) { @@ -2330,146 +2760,205 @@ b2 {A3 B3 C3 D3} b3 {A4 B4 C4 D4} @endcode */ -template -inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1, - const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3, - v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1, - v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 ) +template +inline void v_transpose4x4( v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, + const v_reg<_Tp, n>& a2, const v_reg<_Tp, n>& a3, + v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1, + v_reg<_Tp, n>& b2, v_reg<_Tp, n>& b3 ) { - b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]); - b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]); - b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]); - b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]); + for (int i = 0; i < n / 4; i++) + { + b0.s[0 + i*4] = a0.s[0 + i*4]; b0.s[1 + i*4] = a1.s[0 + i*4]; + b0.s[2 + i*4] = a2.s[0 + i*4]; b0.s[3 + i*4] = a3.s[0 + i*4]; + b1.s[0 + i*4] = a0.s[1 + i*4]; b1.s[1 + i*4] = a1.s[1 + i*4]; + b1.s[2 + i*4] = a2.s[1 + i*4]; b1.s[3 + i*4] = a3.s[1 + i*4]; + b2.s[0 + i*4] = a0.s[2 + i*4]; b2.s[1 + i*4] = a1.s[2 + i*4]; + b2.s[2 + i*4] = a2.s[2 + i*4]; b2.s[3 + i*4] = a3.s[2 + i*4]; + b3.s[0 + i*4] = a0.s[3 + i*4]; b3.s[1 + i*4] = a1.s[3 + i*4]; + b3.s[2 + i*4] = a2.s[3 + i*4]; b3.s[3 + i*4] = a3.s[3 + i*4]; + } } //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \ -inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); } +#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix) \ +inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); } //! @name Init with zero //! @{ //! @brief Create new vector with zero elements -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, v, u8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, v, s8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, v, u16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, v, s16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, v, u32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, v, s32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, v, f32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, v, f64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, v, u64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, v, s64) + +#if CV_SIMD256 +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x32, v256, u8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x32, v256, s8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x16, v256, u16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x16, v256, s16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x8, v256, u32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x8, v256, s32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x8, v256, f32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x4, v256, f64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x4, v256, u64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x4, v256, s64) +#endif + +#if CV_SIMD512 +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x64, v512, u8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x64, v512, s8) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x32, v512, u16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x32, v512, s16) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x16, v512, u32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x16, v512, s32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x16, v512, f32) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x8, v512, f64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x8, v512, u64) +OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x8, v512, s64) +#endif //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \ -inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); } +#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix) \ +inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); } //! @name Init with value //! @{ //! @brief Create new vector with elements set to a specific value -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x16, uchar, v, u8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, v, s8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, v, u16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, v, s16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, v, u32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, v, s32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, v, f32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, v, f64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, v, u64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, v, s64) + +#if CV_SIMD256 +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x32, uchar, v256, u8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x32, schar, v256, s8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x16, ushort, v256, u16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x16, short, v256, s16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x8, unsigned, v256, u32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x8, int, v256, s32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x8, float, v256, f32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x4, double, v256, f64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x4, uint64, v256, u64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x4, int64, v256, s64) +#endif + +#if CV_SIMD512 +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x64, uchar, v512, u8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x64, schar, v512, s8) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x32, ushort, v512, u16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x32, short, v512, s16) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x16, unsigned, v512, u32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x16, int, v512, s32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x16, float, v512, f32) +OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x8, double, v512, f64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x8, uint64, v512, u64) +OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x8, int64, v512, s64) +#endif //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \ -template inline _Tpvec \ +#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tp, suffix) \ +template inline v_reg<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)> \ v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \ -{ return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); } +{ return a.template reinterpret_as<_Tp, n0*sizeof(_Tp0)/sizeof(_Tp)>(); } //! @name Reinterpret //! @{ //! @brief Convert vector to different type without modifying underlying data. -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64) +OPENCV_HAL_IMPL_C_REINTERPRET(uchar, u8) +OPENCV_HAL_IMPL_C_REINTERPRET(schar, s8) +OPENCV_HAL_IMPL_C_REINTERPRET(ushort, u16) +OPENCV_HAL_IMPL_C_REINTERPRET(short, s16) +OPENCV_HAL_IMPL_C_REINTERPRET(unsigned, u32) +OPENCV_HAL_IMPL_C_REINTERPRET(int, s32) +OPENCV_HAL_IMPL_C_REINTERPRET(float, f32) +OPENCV_HAL_IMPL_C_REINTERPRET(double, f64) +OPENCV_HAL_IMPL_C_REINTERPRET(uint64, u64) +OPENCV_HAL_IMPL_C_REINTERPRET(int64, s64) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \ -template inline _Tpvec v_shl(const _Tpvec& a) \ -{ return a << n; } +#define OPENCV_HAL_IMPL_C_SHIFTL(_Tp) \ +template inline v_reg<_Tp, n> v_shl(const v_reg<_Tp, n>& a) \ +{ return a << shift; } //! @name Left shift //! @{ //! @brief Shift left -OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short) -OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int) -OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64) +OPENCV_HAL_IMPL_C_SHIFTL(ushort) +OPENCV_HAL_IMPL_C_SHIFTL(short) +OPENCV_HAL_IMPL_C_SHIFTL(unsigned) +OPENCV_HAL_IMPL_C_SHIFTL(int) +OPENCV_HAL_IMPL_C_SHIFTL(uint64) +OPENCV_HAL_IMPL_C_SHIFTL(int64) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \ -template inline _Tpvec v_shr(const _Tpvec& a) \ -{ return a >> n; } +#define OPENCV_HAL_IMPL_C_SHIFTR(_Tp) \ +template inline v_reg<_Tp, n> v_shr(const v_reg<_Tp, n>& a) \ +{ return a >> shift; } //! @name Right shift //! @{ //! @brief Shift right -OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short) -OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int) -OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64) +OPENCV_HAL_IMPL_C_SHIFTR(ushort) +OPENCV_HAL_IMPL_C_SHIFTR(short) +OPENCV_HAL_IMPL_C_SHIFTR(unsigned) +OPENCV_HAL_IMPL_C_SHIFTR(int) +OPENCV_HAL_IMPL_C_SHIFTR(uint64) +OPENCV_HAL_IMPL_C_SHIFTR(int64) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \ -template inline _Tpvec v_rshr(const _Tpvec& a) \ +#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tp) \ +template inline v_reg<_Tp, n> v_rshr(const v_reg<_Tp, n>& a) \ { \ - _Tpvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + v_reg<_Tp, n> c; \ + for( int i = 0; i < n; i++ ) \ + c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \ return c; \ } //! @name Rounding shift //! @{ //! @brief Rounding shift right -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short) -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int) -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) +OPENCV_HAL_IMPL_C_RSHIFTR(ushort) +OPENCV_HAL_IMPL_C_RSHIFTR(short) +OPENCV_HAL_IMPL_C_RSHIFTR(unsigned) +OPENCV_HAL_IMPL_C_RSHIFTR(int) +OPENCV_HAL_IMPL_C_RSHIFTR(uint64) +OPENCV_HAL_IMPL_C_RSHIFTR(int64) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \ -inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ +#define OPENCV_HAL_IMPL_C_PACK(_Tp, _Tpn, pack_suffix, cast) \ +template inline v_reg<_Tpn, 2*n> v_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ { \ - _Tpnvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + v_reg<_Tpn, 2*n> c; \ + for( int i = 0; i < n; i++ ) \ { \ c.s[i] = cast<_Tpn>(a.s[i]); \ - c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \ + c.s[i+n] = cast<_Tpn>(b.s[i]); \ } \ return c; \ } @@ -2485,26 +2974,26 @@ inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ //! - pack_u: for 16- and 32-bit signed integer input types //! //! @note All variants except 64-bit use saturation. -OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(ushort, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(short, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(unsigned, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(int, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(uint64, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(int64, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK(short, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK(int, ushort, pack_u, saturate_cast) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ +#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tp, _Tpn, pack_suffix, cast) \ +template inline v_reg<_Tpn, 2*n> v_rshr_##pack_suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ { \ - _Tpnvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + v_reg<_Tpn, 2*n> c; \ + for( int i = 0; i < n; i++ ) \ { \ - c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \ + c.s[i+n] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \ } \ return c; \ } @@ -2520,22 +3009,22 @@ template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpve //! - pack_u: for 16- and 32-bit signed integer input types //! //! @note All variants except 64-bit use saturation. -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(ushort, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(short, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(unsigned, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(int, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(uint64, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(int64, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(short, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK(int, ushort, pack_u, saturate_cast) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ +#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \ +template inline void v_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \ { \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + for( int i = 0; i < n; i++ ) \ ptr[i] = cast<_Tpn>(a.s[i]); \ } @@ -2550,23 +3039,23 @@ inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ //! - pack_u: for 16- and 32-bit signed integer input types //! //! @note All variants except 64-bit use saturation. -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(ushort, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(short, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(unsigned, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(int, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(uint64, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(int64, int, pack, static_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(short, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_PACK_STORE(int, ushort, pack_u, saturate_cast) //! @} //! @brief Helper macro //! @ingroup core_hal_intrin_impl -#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ +#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tp, _Tpn, pack_suffix, cast) \ +template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const v_reg<_Tp, n>& a) \ { \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ + for( int i = 0; i < n; i++ ) \ + ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (shift - 1))) >> shift); \ } //! @name Pack and store with rounding shift @@ -2580,14 +3069,14 @@ template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec //! - pack_u: for 16- and 32-bit signed integer input types //! //! @note All variants except 64-bit use saturation. -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(ushort, uchar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(short, schar, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(unsigned, ushort, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int, short, pack, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(uint64, unsigned, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int64, int, pack, static_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(short, uchar, pack_u, saturate_cast) +OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(int, ushort, pack_u, saturate_cast) //! @} //! @cond IGNORED @@ -2622,9 +3111,9 @@ b {0xFFFF 0 0xFFFF 0 0 0xFFFF 0 0xFFFF} } @endcode */ -inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +template inline v_reg v_pack_b(const v_reg& a, const v_reg& b) { - v_uint8x16 mask; + v_reg mask; _pack_b(mask.s, a, b); return mask; } @@ -2645,12 +3134,12 @@ d {0 0xFFFF.. 0 0xFFFF..} } @endcode */ -inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, - const v_uint32x4& c, const v_uint32x4& d) +template inline v_reg v_pack_b(const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d) { - v_uint8x16 mask; + v_reg mask; _pack_b(mask.s, a, b); - _pack_b(mask.s + 8, c, d); + _pack_b(mask.s + 2*n, c, d); return mask; } @@ -2674,15 +3163,16 @@ h {0 0xFFFF..} 0xFF 0 0xFF 0 0 0xFF 0 0xFF } @endcode */ -inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, - const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, - const v_uint64x2& g, const v_uint64x2& h) +template inline v_reg v_pack_b(const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d, + const v_reg& e, const v_reg& f, + const v_reg& g, const v_reg& h) { - v_uint8x16 mask; + v_reg mask; _pack_b(mask.s, a, b); - _pack_b(mask.s + 4, c, d); - _pack_b(mask.s + 8, e, f); - _pack_b(mask.s + 12, g, h); + _pack_b(mask.s + 2*n, c, d); + _pack_b(mask.s + 4*n, e, f); + _pack_b(mask.s + 6*n, g, h); return mask; } //! @} @@ -2697,71 +3187,109 @@ Scheme: {D0 D1 D2 D3} x |V3| ==================== {R0 R1 R2 R3}, where: -R0 = A0V0 + A1V1 + A2V2 + A3V3, -R1 = B0V0 + B1V1 + B2V2 + B3V3 +R0 = A0V0 + B0V1 + C0V2 + D0V3, +R1 = A1V0 + B1V1 + C1V2 + D1V3 ... @endcode */ -inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, - const v_float32x4& m1, const v_float32x4& m2, - const v_float32x4& m3) +template +inline v_reg v_matmul(const v_reg& v, + const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d) { - return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0], - v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1], - v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2], - v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]); + v_reg res; + for (int i = 0; i < n / 4; i++) + { + res.s[0 + i*4] = v.s[0 + i*4] * a.s[0 + i*4] + v.s[1 + i*4] * b.s[0 + i*4] + v.s[2 + i*4] * c.s[0 + i*4] + v.s[3 + i*4] * d.s[0 + i*4]; + res.s[1 + i*4] = v.s[0 + i*4] * a.s[1 + i*4] + v.s[1 + i*4] * b.s[1 + i*4] + v.s[2 + i*4] * c.s[1 + i*4] + v.s[3 + i*4] * d.s[1 + i*4]; + res.s[2 + i*4] = v.s[0 + i*4] * a.s[2 + i*4] + v.s[1 + i*4] * b.s[2 + i*4] + v.s[2 + i*4] * c.s[2 + i*4] + v.s[3 + i*4] * d.s[2 + i*4]; + res.s[3 + i*4] = v.s[0 + i*4] * a.s[3 + i*4] + v.s[1 + i*4] * b.s[3 + i*4] + v.s[2 + i*4] * c.s[3 + i*4] + v.s[3 + i*4] * d.s[3 + i*4]; + } + return res; } /** @brief Matrix multiplication and add Scheme: @code -{A0 A1 A2 } |V0| |D0| -{B0 B1 B2 } |V1| |D1| -{C0 C1 C2 } x |V2| + |D2| -==================== +{A0 A1 A2 A3} |V0| |D0| +{B0 B1 B2 B3} |V1| |D1| +{C0 C1 C2 C3} x |V2| + |D2| +==================== |D3| {R0 R1 R2 R3}, where: -R0 = A0V0 + A1V1 + A2V2 + D0, -R1 = B0V0 + B1V1 + B2V2 + D1 +R0 = A0V0 + B0V1 + C0V2 + D0, +R1 = A1V0 + B1V1 + C1V2 + D1 ... @endcode */ -inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, - const v_float32x4& m1, const v_float32x4& m2, - const v_float32x4& m3) +template +inline v_reg v_matmuladd(const v_reg& v, + const v_reg& a, const v_reg& b, + const v_reg& c, const v_reg& d) { - return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0], - v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1], - v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2], - v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]); + v_reg res; + for (int i = 0; i < n / 4; i++) + { + res.s[0 + i * 4] = v.s[0 + i * 4] * a.s[0 + i * 4] + v.s[1 + i * 4] * b.s[0 + i * 4] + v.s[2 + i * 4] * c.s[0 + i * 4] + d.s[0 + i * 4]; + res.s[1 + i * 4] = v.s[0 + i * 4] * a.s[1 + i * 4] + v.s[1 + i * 4] * b.s[1 + i * 4] + v.s[2 + i * 4] * c.s[1 + i * 4] + d.s[1 + i * 4]; + res.s[2 + i * 4] = v.s[0 + i * 4] * a.s[2 + i * 4] + v.s[1 + i * 4] * b.s[2 + i * 4] + v.s[2 + i * 4] * c.s[2 + i * 4] + d.s[2 + i * 4]; + res.s[3 + i * 4] = v.s[0 + i * 4] * a.s[3 + i * 4] + v.s[1 + i * 4] * b.s[3 + i * 4] + v.s[2 + i * 4] * c.s[3 + i * 4] + d.s[3 + i * 4]; + } + return res; } -inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +template inline v_reg v_dotprod_expand(const v_reg& a, const v_reg& b) { return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); } -inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +template inline v_reg v_dotprod_expand(const v_reg& a, const v_reg& b, + const v_reg& c) { return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); } -inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +template inline v_reg v_dotprod_expand_fast(const v_reg& a, const v_reg& b) { return v_dotprod_expand(a, b); } -inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +template inline v_reg v_dotprod_expand_fast(const v_reg& a, const v_reg& b, + const v_reg& c) { return v_dotprod_expand(a, b, c); } ////// FP16 support /////// -inline v_reg::nlanes128> +inline v_reg v_load_expand(const float16_t* ptr) { - v_reg::nlanes128> v; + v_reg v; for( int i = 0; i < v.nlanes; i++ ) { v.s[i] = ptr[i]; } return v; } +#if CV_SIMD256 +inline v_reg +v256_load_expand(const float16_t* ptr) +{ + v_reg v; + for (int i = 0; i < v.nlanes; i++) + { + v.s[i] = ptr[i]; + } + return v; +} +#endif +#if CV_SIMD512 +inline v_reg +v512_load_expand(const float16_t* ptr) +{ + v_reg v; + for (int i = 0; i < v.nlanes; i++) + { + v.s[i] = ptr[i]; + } + return v; +} +#endif -inline void -v_pack_store(float16_t* ptr, const v_reg::nlanes128>& v) +template inline void +v_pack_store(float16_t* ptr, const v_reg& v) { for( int i = 0; i < v.nlanes; i++ ) { @@ -2770,6 +3298,12 @@ v_pack_store(float16_t* ptr, const v_reg::nlanes128>& } inline void v_cleanup() {} +#if CV_SIMD256 +inline void v256_cleanup() {} +#endif +#if CV_SIMD512 +inline void v512_cleanup() {} +#endif //! @} @@ -2778,4 +3312,9 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END #endif } +#if !defined(CV_DOXYGEN) +#undef CV_SIMD256 +#undef CV_SIMD512 +#endif + #endif diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 280691b448..785648575a 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -62,6 +62,22 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN #define CV_SIMD128_64F 0 #endif +// The following macro checks if the code is being compiled for the +// AArch64 execution state of Armv8, to enable the 128-bit +// intrinsics. The macro `__ARM_64BIT_STATE` is the one recommended by +// the Arm C Language Extension (ACLE) specifications [1] to check the +// availability of 128-bit intrinsics, and it is supporrted by clang +// and gcc. The macro `_M_ARM64` is the equivalent one for Microsoft +// Visual Studio [2] . +// +// [1] https://developer.arm.com/documentation/101028/0012/13--Advanced-SIMD--Neon--intrinsics +// [2] https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros +#if defined(__ARM_64BIT_STATE) || defined(_M_ARM64) +#define CV_NEON_AARCH64 1 +#else +#define CV_NEON_AARCH64 0 +#endif + // TODO #define CV_NEON_DOT 0 @@ -726,41 +742,61 @@ inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, // 16 >> 32 inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) { +#if CV_NEON_AARCH64 + int32x4_t p = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); + return v_int32x4(vmlal_high_s16(p, a.val, b.val)); +#else int16x4_t a0 = vget_low_s16(a.val); int16x4_t a1 = vget_high_s16(a.val); int16x4_t b0 = vget_low_s16(b.val); int16x4_t b1 = vget_high_s16(b.val); int32x4_t p = vmull_s16(a0, b0); return v_int32x4(vmlal_s16(p, a1, b1)); +#endif } inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) { +#if CV_NEON_AARCH64 + int32x4_t p = vmlal_s16(c.val, vget_low_s16(a.val), vget_low_s16(b.val)); + return v_int32x4(vmlal_high_s16(p, a.val, b.val)); +#else int16x4_t a0 = vget_low_s16(a.val); int16x4_t a1 = vget_high_s16(a.val); int16x4_t b0 = vget_low_s16(b.val); int16x4_t b1 = vget_high_s16(b.val); int32x4_t p = vmlal_s16(c.val, a0, b0); return v_int32x4(vmlal_s16(p, a1, b1)); +#endif } // 32 >> 64 inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) { +#if CV_NEON_AARCH64 + int64x2_t p = vmull_s32(vget_low_s32(a.val), vget_low_s32(b.val)); + return v_int64x2(vmlal_high_s32(p, a.val, b.val)); +#else int32x2_t a0 = vget_low_s32(a.val); int32x2_t a1 = vget_high_s32(a.val); int32x2_t b0 = vget_low_s32(b.val); int32x2_t b1 = vget_high_s32(b.val); int64x2_t p = vmull_s32(a0, b0); return v_int64x2(vmlal_s32(p, a1, b1)); +#endif } inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) { +#if CV_NEON_AARCH64 + int64x2_t p = vmlal_s32(c.val, vget_low_s32(a.val), vget_low_s32(b.val)); + return v_int64x2(vmlal_high_s32(p, a.val, b.val)); +#else int32x2_t a0 = vget_low_s32(a.val); int32x2_t a1 = vget_high_s32(a.val); int32x2_t b0 = vget_low_s32(b.val); int32x2_t b1 = vget_high_s32(b.val); int64x2_t p = vmlal_s32(c.val, a0, b0); return v_int64x2(vmlal_s32(p, a1, b1)); +#endif } // 8 >> 32 @@ -1292,7 +1328,7 @@ inline int64 v_reduce_sum(const v_int64x2& a) #if CV_SIMD128_64F inline double v_reduce_sum(const v_float64x2& a) { - return vgetq_lane_f64(a.val, 0) + vgetq_lane_f64(a.val, 1); + return vaddvq_f64(a.val); } #endif @@ -1503,6 +1539,26 @@ OPENCV_HAL_IMPL_NEON_SELECT(v_float32x4, f32, u32) OPENCV_HAL_IMPL_NEON_SELECT(v_float64x2, f64, u64) #endif +#if CV_NEON_AARCH64 +#define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \ + b1.val = vmovl_high_##suffix(a.val); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + return _Tpwvec(vmovl_##suffix(vget_low_##suffix(a.val))); \ +} \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + return _Tpwvec(vmovl_high_##suffix(a.val)); \ +} \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \ +} +#else #define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ { \ @@ -1521,6 +1577,7 @@ inline _Tpwvec v_load_expand(const _Tp* ptr) \ { \ return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \ } +#endif OPENCV_HAL_IMPL_NEON_EXPAND(v_uint8x16, v_uint16x8, uchar, u8) OPENCV_HAL_IMPL_NEON_EXPAND(v_int8x16, v_int16x8, schar, s8) diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp index eca787c7fd..cb2140df58 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp @@ -2,309 +2,2316 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. +// The original implementation has been contributed by Yin Zhang. +// Copyright (C) 2020, Institute of Software, Chinese Academy of Sciences. + #ifndef OPENCV_HAL_INTRIN_RVV_HPP #define OPENCV_HAL_INTRIN_RVV_HPP -#include -#include #include -#include "opencv2/core/saturate.hpp" - -#define CV_SIMD128_CPP 1 -#if defined(CV_FORCE_SIMD128_CPP) || defined(CV_DOXYGEN) -#define CV_SIMD128 1 -#define CV_SIMD128_64F 1 -#endif namespace cv { -#ifndef CV_DOXYGEN CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN -#endif +#define CV_SIMD128 1 +#define CV_SIMD128_64F 1 -template struct v_reg +//////////// Unsupported native intrinsics in C++ //////////// + +struct vuint8mf2_t { - typedef _Tp lane_type; - enum { nlanes = n }; - - explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; } - - v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; } - - v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; } - - v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, - _Tp s4, _Tp s5, _Tp s6, _Tp s7) + uchar val[8] = {0}; + vuint8mf2_t() {} + vuint8mf2_t(const uchar* ptr) { - s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; - s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; - } - - v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, - _Tp s4, _Tp s5, _Tp s6, _Tp s7, - _Tp s8, _Tp s9, _Tp s10, _Tp s11, - _Tp s12, _Tp s13, _Tp s14, _Tp s15) - { - s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; - s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; - s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11; - s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15; - } - - v_reg() {} - - v_reg(const v_reg<_Tp, n> & r) - { - for( int i = 0; i < n; i++ ) - s[i] = r.s[i]; - } - _Tp get0() const { return s[0]; } - - _Tp get(const int i) const { return s[i]; } - v_reg<_Tp, n> high() const - { - v_reg<_Tp, n> c; - int i; - for( i = 0; i < n/2; i++ ) + for (int i = 0; i < 8; ++i) { - c.s[i] = s[i+(n/2)]; - c.s[i+(n/2)] = 0; + val[i] = ptr[i]; } - return c; } - - static v_reg<_Tp, n> zero() +}; +struct vint8mf2_t +{ + schar val[8] = {0}; + vint8mf2_t() {} + vint8mf2_t(const schar* ptr) { - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = (_Tp)0; - return c; + for (int i = 0; i < 8; ++i) + { + val[i] = ptr[i]; + } } - - static v_reg<_Tp, n> all(_Tp s) +}; +struct vuint16mf2_t +{ + ushort val[4] = {0}; + vuint16mf2_t() {} + vuint16mf2_t(const ushort* ptr) { - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = s; - return c; + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } } - - template v_reg<_Tp2, n2> reinterpret_as() const +}; +struct vint16mf2_t +{ + short val[4] = {0}; + vint16mf2_t() {} + vint16mf2_t(const short* ptr) { - size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n); - v_reg<_Tp2, n2> c; - std::memcpy(&c.s[0], &s[0], bytes); - return c; + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } } - - v_reg& operator=(const v_reg<_Tp, n> & r) +}; +struct vuint32mf2_t +{ + unsigned val[2] = {0}; + vuint32mf2_t() {} + vuint32mf2_t(const unsigned* ptr) { - for( int i = 0; i < n; i++ ) - s[i] = r.s[i]; - return *this; + val[0] = ptr[0]; + val[1] = ptr[1]; + } +}; +struct vint32mf2_t +{ + int val[2] = {0}; + vint32mf2_t() {} + vint32mf2_t(const int* ptr) + { + val[0] = ptr[0]; + val[1] = ptr[1]; + } +}; +struct vfloat32mf2_t +{ + float val[2] = {0}; + vfloat32mf2_t() {} + vfloat32mf2_t(const float* ptr) + { + val[0] = ptr[0]; + val[1] = ptr[1]; + } +}; +struct vuint64mf2_t +{ + uint64 val[1] = {0}; + vuint64mf2_t() {} + vuint64mf2_t(const uint64* ptr) + { + val[0] = ptr[0]; + } +}; +struct vint64mf2_t +{ + int64 val[1] = {0}; + vint64mf2_t() {} + vint64mf2_t(const int64* ptr) + { + val[0] = ptr[0]; + } +}; +struct vfloat64mf2_t +{ + double val[1] = {0}; + vfloat64mf2_t() {} + vfloat64mf2_t(const double* ptr) + { + val[0] = ptr[0]; + } +}; +struct vuint8mf4_t +{ + uchar val[4] = {0}; + vuint8mf4_t() {} + vuint8mf4_t(const uchar* ptr) + { + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } + } +}; +struct vint8mf4_t +{ + schar val[4] = {0}; + vint8mf4_t() {} + vint8mf4_t(const schar* ptr) + { + for (int i = 0; i < 4; ++i) + { + val[i] = ptr[i]; + } } - - _Tp s[n]; }; -typedef v_reg v_uint8x16; -typedef v_reg v_int8x16; -typedef v_reg v_uint16x8; -typedef v_reg v_int16x8; -typedef v_reg v_uint32x4; -typedef v_reg v_int32x4; -typedef v_reg v_float32x4; -typedef v_reg v_float64x2; -typedef v_reg v_uint64x2; -typedef v_reg v_int64x2; - -template CV_INLINE v_reg<_Tp, n> operator+(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator+=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - -template CV_INLINE v_reg<_Tp, n> operator-(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator-=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - -template CV_INLINE v_reg<_Tp, n> operator*(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator*=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - -template CV_INLINE v_reg<_Tp, n> operator/(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator/=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - - -template CV_INLINE v_reg<_Tp, n> operator&(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator&=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - -template CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - -template CV_INLINE v_reg<_Tp, n> operator^(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); -template CV_INLINE v_reg<_Tp, n>& operator^=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); - -template CV_INLINE v_reg<_Tp, n> operator~(const v_reg<_Tp, n>& a); - - -#ifndef CV_DOXYGEN - -#define CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, ...) \ -__CV_EXPAND(macro_name(uchar, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(schar, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(ushort, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(short, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(unsigned, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(int, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(uint64, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(int64, __VA_ARGS__)) \ - -#define CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, ...) \ -__CV_EXPAND(macro_name(float, __VA_ARGS__)) \ -__CV_EXPAND(macro_name(double, __VA_ARGS__)) \ - -#define CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(macro_name, ...) \ -CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(macro_name, __VA_ARGS__) \ -CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(macro_name, __VA_ARGS__) \ - -#define CV__HAL_INTRIN_IMPL_BIN_OP_(_Tp, bin_op) \ -template inline \ -v_reg<_Tp, n> operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +#define OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(_Tpvec, _Tp, suffix, width, n) \ +inline _Tpvec vle##width##_v_##suffix##mf2(const _Tp* ptr) \ { \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ - return c; \ + return _Tpvec(ptr); \ } \ -template inline \ -v_reg<_Tp, n>& operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +inline void vse##width##_v_##suffix##mf2(_Tp* ptr, _Tpvec v) \ { \ - for( int i = 0; i < n; i++ ) \ - a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ - return a; \ + for (int i = 0; i < n; ++i) \ + { \ + ptr[i] = v.val[i]; \ + } \ } -#define CV__HAL_INTRIN_IMPL_BIN_OP(bin_op) CV__HAL_INTRIN_EXPAND_WITH_ALL_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, bin_op) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint8mf2_t, uint8_t, u8, 8, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint8mf2_t, int8_t, i8, 8, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint16mf2_t, uint16_t, u16, 16, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint16mf2_t, int16_t, i16, 16, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint32mf2_t, uint32_t, u32, 32, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint32mf2_t, int32_t, i32, 32, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vfloat32mf2_t, float32_t, f32, 32, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vuint64mf2_t, uint64_t, u64, 64, 1) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vint64mf2_t, int64_t, i64, 64, 1) +OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vfloat64mf2_t, float64_t, f64, 64, 1) -CV__HAL_INTRIN_IMPL_BIN_OP(+) -CV__HAL_INTRIN_IMPL_BIN_OP(-) -CV__HAL_INTRIN_IMPL_BIN_OP(*) -CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIN_OP_, /) -#define CV__HAL_INTRIN_IMPL_BIT_OP_(_Tp, bit_op) \ -template CV_INLINE \ -v_reg<_Tp, n> operator bit_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +#define OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(_Tpwvec, _Tpvec, _wTp, wcvt, suffix, width, n) \ +inline _Tpwvec wcvt (_Tpvec v) \ { \ - v_reg<_Tp, n> c; \ - typedef typename V_TypeTraits<_Tp>::int_type itype; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ - V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ - return c; \ -} \ -template CV_INLINE \ -v_reg<_Tp, n>& operator bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - typedef typename V_TypeTraits<_Tp>::int_type itype; \ - for( int i = 0; i < n; i++ ) \ - a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ - V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ - return a; \ + _wTp tmp[n]; \ + for (int i = 0; i < n; ++i) \ + { \ + tmp[i] = (_wTp)v.val[i]; \ + } \ + vsetvlmax_e##width##m1(); \ + return vle##width##_v_##suffix##m1(tmp); \ } -#define CV__HAL_INTRIN_IMPL_BIT_OP(bit_op) \ -CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) \ -CV__HAL_INTRIN_EXPAND_WITH_FP_TYPES(CV__HAL_INTRIN_IMPL_BIT_OP_, bit_op) /* TODO: FIXIT remove this after masks refactoring */ +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint16m1_t, vuint8mf2_t, ushort, vwcvtu_x_x_v_u16m1, u16, 16, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint16m1_t, vint8mf2_t, short, vwcvt_x_x_v_i16m1, i16, 16, 8) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint32m1_t, vuint16mf2_t, unsigned, vwcvtu_x_x_v_u32m1, u32, 32, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint32m1_t, vint16mf2_t, int, vwcvt_x_x_v_i32m1, i32, 32, 4) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint64m1_t, vuint32mf2_t, uint64, vwcvtu_x_x_v_u64m1, u64, 64, 2) +OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint64m1_t, vint32mf2_t, int64, vwcvt_x_x_v_i64m1, i64, 64, 2) +inline vuint8mf4_t vle8_v_u8mf4 (const uint8_t *base) +{ + return vuint8mf4_t(base); +} +inline vint8mf4_t vle8_v_i8mf4 (const int8_t *base) +{ + return vint8mf4_t(base); +} -CV__HAL_INTRIN_IMPL_BIT_OP(&) -CV__HAL_INTRIN_IMPL_BIT_OP(|) -CV__HAL_INTRIN_IMPL_BIT_OP(^) +inline vuint16mf2_t vwcvtu_x_x_v_u16mf2 (vuint8mf4_t src) +{ + ushort tmp[4]; + for (int i = 0; i < 4; ++i) + { + tmp[i] = (ushort)src.val[i]; + } + return vle16_v_u16mf2(tmp); +} +inline vint16mf2_t vwcvt_x_x_v_i16mf2 (vint8mf4_t src) +{ + short tmp[4]; + for (int i = 0; i < 4; ++i) + { + tmp[i] = (short)src.val[i]; + } + return vle16_v_i16mf2(tmp); +} -#define CV__HAL_INTRIN_IMPL_BITWISE_NOT_(_Tp, dummy) \ -template CV_INLINE \ -v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) \ -{ \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); \ - return c; \ -} \ +//////////// Types //////////// -CV__HAL_INTRIN_EXPAND_WITH_INTEGER_TYPES(CV__HAL_INTRIN_IMPL_BITWISE_NOT_, ~) +struct v_uint8x16 +{ + typedef uchar lane_type; + enum { nlanes = 16 }; + v_uint8x16() {} + explicit v_uint8x16(vuint8m1_t v) + { + vsetvlmax_e8m1(); + vse8_v_u8m1(val, v); + } + v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, + uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) + { + uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint8m1_t() const + { + vsetvlmax_e8m1(); + return vle8_v_u8m1(val); + } + uchar get0() const + { + return val[0]; + } + + uchar val[16]; +}; + +struct v_int8x16 +{ + typedef schar lane_type; + enum { nlanes = 16 }; + + v_int8x16() {} + explicit v_int8x16(vint8m1_t v) + { + vsetvlmax_e8m1(); + vse8_v_i8m1(val, v); + } + v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, + schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) + { + schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint8m1_t() const + { + vsetvlmax_e8m1(); + return vle8_v_i8m1(val); + } + schar get0() const + { + return val[0]; + } + + schar val[16]; +}; + +struct v_uint16x8 +{ + typedef ushort lane_type; + enum { nlanes = 8 }; + + v_uint16x8() {} + explicit v_uint16x8(vuint16m1_t v) + { + vsetvlmax_e16m1(); + vse16_v_u16m1(val, v); + } + v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) + { + ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint16m1_t() const + { + vsetvlmax_e16m1(); + return vle16_v_u16m1(val); + } + ushort get0() const + { + return val[0]; + } + + ushort val[8]; +}; + +struct v_int16x8 +{ + typedef short lane_type; + enum { nlanes = 8 }; + + v_int16x8() {} + explicit v_int16x8(vint16m1_t v) + { + vsetvlmax_e16m1(); + vse16_v_i16m1(val, v); + } + v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) + { + short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint16m1_t() const + { + vsetvlmax_e16m1(); + return vle16_v_i16m1(val); + } + short get0() const + { + return val[0]; + } + + short val[8]; +}; + +struct v_uint32x4 +{ + typedef unsigned lane_type; + enum { nlanes = 4 }; + + v_uint32x4() {} + explicit v_uint32x4(vuint32m1_t v) + { + vsetvlmax_e32m1(); + vse32_v_u32m1(val, v); + } + v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) + { + unsigned v[] = {v0, v1, v2, v3}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint32m1_t() const + { + vsetvlmax_e32m1(); + return vle32_v_u32m1(val); + } + unsigned get0() const + { + return val[0]; + } + + unsigned val[4]; +}; + +struct v_int32x4 +{ + typedef int lane_type; + enum { nlanes = 4 }; + + v_int32x4() {} + explicit v_int32x4(vint32m1_t v) + { + vsetvlmax_e32m1(); + vse32_v_i32m1(val, v); + } + v_int32x4(int v0, int v1, int v2, int v3) + { + int v[] = {v0, v1, v2, v3}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint32m1_t() const + { + vsetvlmax_e32m1(); + return vle32_v_i32m1(val); + } + int get0() const + { + return val[0]; + } + int val[4]; +}; + +struct v_float32x4 +{ + typedef float lane_type; + enum { nlanes = 4 }; + + v_float32x4() {} + explicit v_float32x4(vfloat32m1_t v) + { + vsetvlmax_e32m1(); + vse32_v_f32m1(val, v); + } + v_float32x4(float v0, float v1, float v2, float v3) + { + float v[] = {v0, v1, v2, v3}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vfloat32m1_t() const + { + vsetvlmax_e32m1(); + return vle32_v_f32m1(val); + } + float get0() const + { + return val[0]; + } + float val[4]; +}; + +struct v_uint64x2 +{ + typedef uint64 lane_type; + enum { nlanes = 2 }; + + v_uint64x2() {} + explicit v_uint64x2(vuint64m1_t v) + { + vsetvlmax_e64m1(); + vse64_v_u64m1(val, v); + } + v_uint64x2(uint64 v0, uint64 v1) + { + uint64 v[] = {v0, v1}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vuint64m1_t() const + { + vsetvlmax_e64m1(); + return vle64_v_u64m1(val); + } + uint64 get0() const + { + return val[0]; + } + + uint64 val[2]; +}; + +struct v_int64x2 +{ + typedef int64 lane_type; + enum { nlanes = 2 }; + + v_int64x2() {} + explicit v_int64x2(vint64m1_t v) + { + vsetvlmax_e64m1(); + vse64_v_i64m1(val, v); + } + v_int64x2(int64 v0, int64 v1) + { + int64 v[] = {v0, v1}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vint64m1_t() const + { + vsetvlmax_e64m1(); + return vle64_v_i64m1(val); + } + int64 get0() const + { + return val[0]; + } + + int64 val[2]; +}; + +#if CV_SIMD128_64F +struct v_float64x2 +{ + typedef double lane_type; + enum { nlanes = 2 }; + + v_float64x2() {} + explicit v_float64x2(vfloat64m1_t v) + { + vsetvlmax_e64m1(); + vse64_v_f64m1(val, v); + } + v_float64x2(double v0, double v1) + { + double v[] = {v0, v1}; + for (int i = 0; i < nlanes; ++i) + { + val[i] = v[i]; + } + } + operator vfloat64m1_t() const + { + vsetvlmax_e64m1(); + return vle64_v_f64m1(val); + } + double get0() const + { + return val[0]; + } + + double val[2]; +}; #endif -#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \ -template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \ -{ \ - v_reg<_Tp2, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = cfunc(a.s[i]); \ - return c; \ -} +//////////// Initial //////////// -#define OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(func, cfunc) \ -inline v_reg func(const v_reg& a) \ +#define OPENCV_HAL_IMPL_RVV_INIT_INTEGER(_Tpvec, _Tp, width, suffix1, suffix2) \ +inline v_##_Tpvec v_setzero_##suffix1() \ { \ - v_reg c; \ - for( int i = 0; i < 4; i++ ) \ - c.s[i] = cfunc(a.s[i]); \ - return c; \ + vsetvlmax_e##width##m1(); \ + return v_##_Tpvec(vzero_##suffix2##m1()); \ } \ -inline v_reg func(const v_reg& a) \ +inline v_##_Tpvec v_setall_##suffix1(_Tp v) \ { \ - v_reg c; \ - for( int i = 0; i < 2; i++ ) \ + vsetvlmax_e##width##m1(); \ + return v_##_Tpvec(vmv_v_x_##suffix2##m1(v)); \ +} + +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint8x16, uchar, 8, u8, u8) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int8x16, schar, 8, s8, i8) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint16x8, ushort, 16, u16, u16) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int16x8, short, 16, s16, i16) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint32x4, unsigned, 32, u32, u32) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int32x4, int, 32, s32, i32) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint64x2, uint64, 64, u64, u64) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int64x2, int64, 64, s64, i64) + +#define OPENCV_HAL_IMPL_RVV_INIT_FP(_Tpv, _Tp, width, suffix) \ +inline v_##_Tpv v_setzero_##suffix() \ +{ \ + vsetvlmax_e##width##m1(); \ + return v_##_Tpv(vzero_##suffix##m1()); \ +} \ +inline v_##_Tpv v_setall_##suffix(_Tp v) \ +{ \ + vsetvlmax_e##width##m1(); \ + return v_##_Tpv(vfmv_v_f_##suffix##m1(v)); \ +} + +OPENCV_HAL_IMPL_RVV_INIT_FP(float32x4, float, 32, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_INIT_FP(float64x2, double, 64, f64) +#endif + +//////////// Reinterpret //////////// + +#define OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(_Tpvec, suffix) \ +inline v_##_Tpvec v_reinterpret_as_##suffix(const v_##_Tpvec& v) { return v; } + +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint8x16, u8) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int8x16, s8) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint16x8, u16) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int16x8, s16) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint32x4, u32) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int32x4, s32) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(float32x4, f32) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(uint64x2, u64) +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int64x2, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(float64x2, f64) +#endif + +#define OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(_Tpvec1, _Tpvec2, _nTpvec1, _nTpvec2, suffix1, suffix2, nsuffix1, nsuffix2, width1, width2) \ +inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2& v) \ +{ \ + vsetvlmax_e##width2##m1(); \ + return v_##_Tpvec1((_nTpvec1)vle##width2##_v_##nsuffix2##m1(v.val)); \ +} \ +inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1& v) \ +{ \ + vsetvlmax_e##width1##m1(); \ + return v_##_Tpvec2((_nTpvec2)vle##width1##_v_##nsuffix1##m1(v.val)); \ +} + +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int8x16, vuint8m1_t, vint8m1_t, u8, s8, u8, i8, 8, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int16x8, vuint16m1_t, vint16m1_t, u16, s16, u16, i16, 16, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int32x4, vuint32m1_t, vint32m1_t, u32, s32, u32, i32, 32, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, float32x4, vuint32m1_t, vfloat32m1_t, u32, f32, u32, f32, 32, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, float32x4, vint32m1_t, vfloat32m1_t, s32, f32, i32, f32, 32, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int64x2, vuint64m1_t, vint64m1_t, u64, s64, u64, i64, 64, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint16x8, vuint8m1_t, vuint16m1_t, u8, u16, u8, u16, 8, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint32x4, vuint8m1_t, vuint32m1_t, u8, u32, u8, u32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint64x2, vuint8m1_t, vuint64m1_t, u8, u64, u8, u64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, uint32x4, vuint16m1_t, vuint32m1_t, u16, u32, u16, u32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, uint64x2, vuint16m1_t, vuint64m1_t, u16, u64, u16, u64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, uint64x2, vuint32m1_t, vuint64m1_t, u32, u64, u32, u64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int16x8, vint8m1_t, vint16m1_t, s8, s16, i8, i16, 8, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int32x4, vint8m1_t, vint32m1_t, s8, s32, i8, i32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int64x2, vint8m1_t, vint64m1_t, s8, s64, i8, i64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, int32x4, vint16m1_t, vint32m1_t, s16, s32, i16, i32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, int64x2, vint16m1_t, vint64m1_t, s16, s64, i16, i64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, int64x2, vint32m1_t, vint64m1_t, s32, s64, i32, i64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int16x8, vuint8m1_t, vint16m1_t, u8, s16, u8, i16, 8, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int32x4, vuint8m1_t, vint32m1_t, u8, s32, u8, i32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int64x2, vuint8m1_t, vint64m1_t, u8, s64, u8, i64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int8x16, vuint16m1_t, vint8m1_t, u16, s8, u16, i8, 16, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int32x4, vuint16m1_t, vint32m1_t, u16, s32, u16, i32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int64x2, vuint16m1_t, vint64m1_t, u16, s64, u16, i64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int8x16, vuint32m1_t, vint8m1_t, u32, s8, u32, i8, 32, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int16x8, vuint32m1_t, vint16m1_t, u32, s16, u32, i16, 32, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int64x2, vuint32m1_t, vint64m1_t, u32, s64, u32, i64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int8x16, vuint64m1_t, vint8m1_t, u64, s8, u64, i8, 64, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int16x8, vuint64m1_t, vint16m1_t, u64, s16, u64, i16, 64, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int32x4, vuint64m1_t, vint32m1_t, u64, s32, u64, i32, 64, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, float32x4, vuint8m1_t, vfloat32m1_t, u8, f32, u8, f32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, float32x4, vuint16m1_t, vfloat32m1_t, u16, f32, u16, f32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, float32x4, vuint64m1_t, vfloat32m1_t, u64, f32, u64, f32, 64, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, float32x4, vint8m1_t, vfloat32m1_t, s8, f32, i8, f32, 8, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, float32x4, vint16m1_t, vfloat32m1_t, s16, f32, i16, f32, 16, 32) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int64x2, float32x4, vint64m1_t, vfloat32m1_t, s64, f32, i64, f32, 64, 32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, float64x2, vuint64m1_t, vfloat64m1_t, u64, f64, u64, f64, 64, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int64x2, float64x2, vint64m1_t, vfloat64m1_t, s64, f64, i64, f64, 64, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, float64x2, vuint8m1_t, vfloat64m1_t, u8, f64, u8, f64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, float64x2, vuint16m1_t, vfloat64m1_t, u16, f64, u16, f64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, float64x2, vuint32m1_t, vfloat64m1_t, u32, f64, u32, f64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, float64x2, vint8m1_t, vfloat64m1_t, s8, f64, i8, f64, 8, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, float64x2, vint16m1_t, vfloat64m1_t, s16, f64, i16, f64, 16, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, float64x2, vint32m1_t, vfloat64m1_t, s32, f64, i32, f64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(float32x4, float64x2, vfloat32m1_t, vfloat64m1_t, f32, f64, f32, f64, 32, 64) +#endif + +////////////// Extract ////////////// + +#define OPENCV_HAL_IMPL_RVV_EXTRACT(_Tpvec, _Tp, suffix, width, vmv) \ +template \ +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, s), b, _Tpvec::nlanes - s)); \ +} \ +template inline _Tp v_extract_n(_Tpvec v) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tp(vmv(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), v, i))); \ +} + + +OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint8x16, uchar, u8, 8, vmv_x_s_u8m1_u8) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_int8x16, schar, i8, 8, vmv_x_s_i8m1_i8) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint16x8, ushort, u16, 16, vmv_x_s_u16m1_u16) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_int16x8, short, i16, 16, vmv_x_s_i16m1_i16) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint32x4, uint, u32, 32, vmv_x_s_u32m1_u32) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_int32x4, int, i32, 32, vmv_x_s_i32m1_i32) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint64x2, uint64, u64, 64, vmv_x_s_u64m1_u64) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_int64x2, int64, i64, 64, vmv_x_s_i64m1_i64) +OPENCV_HAL_IMPL_RVV_EXTRACT(v_float32x4, float, f32, 32, vfmv_f_s_f32m1_f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_EXTRACT(v_float64x2, double, f64, 64, vfmv_f_s_f64m1_f64) +#endif + +////////////// Load/Store ////////////// + +#define OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(_Tpvec, _nTpvec, _Tp, hvl, width, suffix) \ +inline _Tpvec v_load(const _Tp* ptr) \ +{ \ + vsetvlmax_e8m1(); \ + return _Tpvec((_nTpvec)vle8_v_u8m1((uchar*)ptr)); \ +} \ +inline _Tpvec v_load_aligned(const _Tp* ptr) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vle##width##_v_##suffix##m1(ptr)); \ +} \ +inline _Tpvec v_load_low(const _Tp* ptr) \ +{ \ + vsetvl_e##width##m1(hvl); \ + _Tpvec res = _Tpvec(vle##width##_v_##suffix##m1(ptr)); \ + vsetvlmax_e##width##m1(); \ + return res; \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a) \ +{ \ + vsetvlmax_e8m1(); \ + vse8_v_u8m1((uchar*)ptr, vle8_v_u8m1((uchar*)a.val)); \ +} \ +inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(ptr, a); \ +} \ +inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(ptr, a); \ +} \ +inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ +{ \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(ptr, a); \ +} \ +inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) tmp_ptr[_Tpvec::nlanes] = {0}; \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(tmp_ptr, a); \ + for(int i = 0; i < _Tpvec::nlanes/2; ++i) \ { \ - c.s[i] = cfunc(a.s[i]); \ - c.s[i + 2] = 0; \ + ptr[i] = tmp_ptr[i]; \ } \ - return c; \ -} - -OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp) - -OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp) - -OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs, - typename V_TypeTraits<_Tp>::abs_type) - -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_round, cvRound) - -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_floor, cvFloor) - -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_ceil, cvCeil) - -OPENCV_HAL_IMPL_MATH_FUNC_FLOAT(v_trunc, int) - -#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \ -template inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +} \ +inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ { \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = cfunc(a.s[i], b.s[i]); \ - return c; \ + _Tp CV_DECL_ALIGNED(32) tmp_ptr[_Tpvec::nlanes] = {0}; \ + vsetvlmax_e##width##m1(); \ + vse##width##_v_##suffix##m1(tmp_ptr, a); \ + for(int i = 0; i < _Tpvec::nlanes/2; ++i) \ + { \ + ptr[i] = tmp_ptr[i+_Tpvec::nlanes/2]; \ + } \ } -#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \ -template inline _Tp func(const v_reg<_Tp, n>& a) \ +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint8x16, vuint8m1_t, uchar, 8, 8, u8) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int8x16, vint8m1_t, schar, 8, 8, i8) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint16x8, vuint16m1_t, ushort, 4, 16, u16) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int16x8, vint16m1_t, short, 4, 16, i16) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint32x4, vuint32m1_t, unsigned, 2, 32, u32) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int32x4, vint32m1_t, int, 2, 32, i32) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint64x2, vuint64m1_t, uint64, 1, 64, u64) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int64x2, vint64m1_t, int64, 1, 64, i64) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float32x4, vfloat32m1_t, float, 2, 32, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float64x2, vfloat64m1_t, double, 1, 64, f64) +#endif + +inline v_int8x16 v_load_halves(const schar* ptr0, const schar* ptr1) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr0[4], ptr0[5], ptr0[6], ptr0[7], + ptr1[0], ptr1[1], ptr1[2], ptr1[3], ptr1[4], ptr1[5], ptr1[6], ptr1[7] + }; + vsetvlmax_e8m1(); + return v_int8x16(vle8_v_i8m1(elems)); +} +inline v_uint8x16 v_load_halves(const uchar* ptr0, const uchar* ptr1) { return v_reinterpret_as_u8(v_load_halves((schar*)ptr0, (schar*)ptr1)); } + +inline v_int16x8 v_load_halves(const short* ptr0, const short* ptr1) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr1[0], ptr1[1], ptr1[2], ptr1[3] + }; + vsetvlmax_e16m1(); + return v_int16x8(vle16_v_i16m1(elems)); +} +inline v_uint16x8 v_load_halves(const ushort* ptr0, const ushort* ptr1) { return v_reinterpret_as_u16(v_load_halves((short*)ptr0, (short*)ptr1)); } + +inline v_int32x4 v_load_halves(const int* ptr0, const int* ptr1) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + ptr0[0], ptr0[1], ptr1[0], ptr1[1] + }; + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(elems)); +} +inline v_float32x4 v_load_halves(const float* ptr0, const float* ptr1) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + ptr0[0], ptr0[1], ptr1[0], ptr1[1] + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} +inline v_uint32x4 v_load_halves(const unsigned* ptr0, const unsigned* ptr1) { return v_reinterpret_as_u32(v_load_halves((int*)ptr0, (int*)ptr1)); } + +inline v_int64x2 v_load_halves(const int64* ptr0, const int64* ptr1) +{ + int64 CV_DECL_ALIGNED(32) elems[2] = + { + ptr0[0], ptr1[0] + }; + vsetvlmax_e64m1(); + return v_int64x2(vle64_v_i64m1(elems)); +} +inline v_uint64x2 v_load_halves(const uint64* ptr0, const uint64* ptr1) { return v_reinterpret_as_u64(v_load_halves((int64*)ptr0, (int64*)ptr1)); } + +#if CV_SIMD128_64F +inline v_float64x2 v_load_halves(const double* ptr0, const double* ptr1) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr0[0], ptr1[0] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} +#endif + + +////////////// Lookup table access //////////////////// + +inline v_int8x16 v_lut(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[ 0]], + tab[idx[ 1]], + tab[idx[ 2]], + tab[idx[ 3]], + tab[idx[ 4]], + tab[idx[ 5]], + tab[idx[ 6]], + tab[idx[ 7]], + tab[idx[ 8]], + tab[idx[ 9]], + tab[idx[10]], + tab[idx[11]], + tab[idx[12]], + tab[idx[13]], + tab[idx[14]], + tab[idx[15]] + }; + vsetvlmax_e8m1(); + return v_int8x16(vle8_v_i8m1(elems)); +} +inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[4]], + tab[idx[4] + 1], + tab[idx[5]], + tab[idx[5] + 1], + tab[idx[6]], + tab[idx[6] + 1], + tab[idx[7]], + tab[idx[7] + 1] + }; + vsetvlmax_e8m1(); + return v_int8x16(vle8_v_i8m1(elems)); +} +inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) +{ + schar CV_DECL_ALIGNED(32) elems[16] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[2] + 2], + tab[idx[2] + 3], + tab[idx[3]], + tab[idx[3] + 1], + tab[idx[3] + 2], + tab[idx[3] + 3] + }; + vsetvlmax_e8m1(); + return v_int8x16(vle8_v_i8m1(elems)); +} +inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); } +inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); } +inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_quads((schar*)tab, idx)); } + +inline v_int16x8 v_lut(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]], + tab[idx[4]], + tab[idx[5]], + tab[idx[6]], + tab[idx[7]] + }; + vsetvlmax_e16m1(); + return v_int16x8(vle16_v_i16m1(elems)); +} +inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[2]], + tab[idx[2] + 1], + tab[idx[3]], + tab[idx[3] + 1] + }; + vsetvlmax_e16m1(); + return v_int16x8(vle16_v_i16m1(elems)); +} +inline v_int16x8 v_lut_quads(const short* tab, const int* idx) +{ + short CV_DECL_ALIGNED(32) elems[8] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[0] + 2], + tab[idx[0] + 3], + tab[idx[1]], + tab[idx[1] + 1], + tab[idx[1] + 2], + tab[idx[1] + 3] + }; + vsetvlmax_e16m1(); + return v_int16x8(vle16_v_i16m1(elems)); +} +inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); } +inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); } +inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_quads((short*)tab, idx)); } + +inline v_int32x4 v_lut(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(elems)); +} +inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1] + }; + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(elems)); +} +inline v_int32x4 v_lut_quads(const int* tab, const int* idx) +{ + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(tab + idx[0])); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); } +inline v_uint32x4 v_lut_pairs(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_pairs((int*)tab, idx)); } +inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut_quads((int*)tab, idx)); } + +inline v_int64x2 v_lut(const int64_t* tab, const int* idx) +{ + int64_t CV_DECL_ALIGNED(32) elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + vsetvlmax_e64m1(); + return v_int64x2(vle64_v_i64m1(elems)); +} +inline v_int64x2 v_lut_pairs(const int64* tab, const int* idx) +{ + vsetvlmax_e64m1(); + return v_int64x2(vle64_v_i64m1(tab + idx[0])); +} +inline v_uint64x2 v_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } +inline v_uint64x2 v_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } + +inline v_float32x4 v_lut(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[1]], + tab[idx[2]], + tab[idx[3]] + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} +inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[idx[0]], + tab[idx[0] + 1], + tab[idx[1]], + tab[idx[1] + 1] + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} +inline v_float32x4 v_lut_quads(const float* tab, const int* idx) +{ + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(tab + idx[0])); +} + +inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) +{ + int CV_DECL_ALIGNED(32) elems[4] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)], + tab[v_extract_n<2>(idxvec)], + tab[v_extract_n<3>(idxvec)] + }; + vsetvlmax_e32m1(); + return v_int32x4(vle32_v_i32m1(elems)); +} + +inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) +{ + unsigned CV_DECL_ALIGNED(32) elems[4] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)], + tab[v_extract_n<2>(idxvec)], + tab[v_extract_n<3>(idxvec)] + }; + vsetvlmax_e32m1(); + return v_uint32x4(vle32_v_u32m1(elems)); +} + +inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)], + tab[v_extract_n<2>(idxvec)], + tab[v_extract_n<3>(idxvec)] + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} + +inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) +{ + int CV_DECL_ALIGNED(32) idx[4]; + v_store_aligned(idx, idxvec); + + x = v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); + y = v_float32x4(tab[idx[0]+1], tab[idx[1]+1], tab[idx[2]+1], tab[idx[3]+1]); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_lut(const double* tab, const int* idx) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[idx[0]], + tab[idx[1]] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) +{ + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(tab + idx[0])); +} + +inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) +{ + double CV_DECL_ALIGNED(32) elems[2] = + { + tab[v_extract_n<0>(idxvec)], + tab[v_extract_n<1>(idxvec)] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) +{ + int CV_DECL_ALIGNED(32) idx[4] = {0}; + v_store_aligned(idx, idxvec); + + x = v_float64x2(tab[idx[0]], tab[idx[1]]); + y = v_float64x2(tab[idx[0]+1], tab[idx[1]+1]); +} +#endif + +////////////// Pack boolean //////////////////// + +inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) +{ + ushort CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_store(ptr, a); + v_store(ptr + 8, b); + vsetvlmax_e8m1(); + return v_uint8x16(vnsrl_wx_u8m1(vle16_v_u16m2(ptr), 0)); +} + +inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, + const v_uint32x4& c, const v_uint32x4& d) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_store(ptr, a); + v_store(ptr + 4, b); + v_store(ptr + 8, c); + v_store(ptr + 12, d); + vsetvlmax_e8m1(); + return v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vle32_v_u32m4(ptr), 0), 0)); +} + +inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, + const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, + const v_uint64x2& g, const v_uint64x2& h) +{ + uint64 CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_store(ptr, a); + v_store(ptr + 2, b); + v_store(ptr + 4, c); + v_store(ptr + 6, d); + v_store(ptr + 8, e); + v_store(ptr + 10, f); + v_store(ptr + 12, g); + v_store(ptr + 14, h); + vsetvlmax_e8m1(); + return v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vnsrl_wx_u32m4(vle64_v_u64m8(ptr), 0), 0), 0)); +} + +////////////// Arithmetics ////////////// +#define OPENCV_HAL_IMPL_RVV_BIN_OP(bin_op, _Tpvec, intrin, width) \ +inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ { \ - _Tp c = a.s[0]; \ - for( int i = 1; i < n; i++ ) \ - c = cfunc(c, a.s[i]); \ - return c; \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(intrin(a, b)); \ +} \ +inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + a = _Tpvec(intrin(a, b)); \ + return a; \ } -OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint8x16, vsaddu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint8x16, vssubu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint8x16, vdivu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int8x16, vsadd_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int8x16, vssub_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int8x16, vdiv_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint16x8, vsaddu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint16x8, vssubu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint16x8, vdivu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int16x8, vsadd_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int16x8, vssub_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int16x8, vdiv_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint32x4, vadd_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint32x4, vsub_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_uint32x4, vmul_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint32x4, vdivu_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int32x4, vadd_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int32x4, vsub_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_int32x4, vmul_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int32x4, vdiv_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_float32x4, vfadd_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_float32x4, vfsub_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_float32x4, vfmul_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_float32x4, vfdiv_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint64x2, vadd_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint64x2, vsub_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_uint64x2, vmul_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint64x2, vdivu_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int64x2, vadd_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int64x2, vsub_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_int64x2, vmul_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int64x2, vdiv_vv_i64m1, 64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_float64x2, vfadd_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_float64x2, vfsub_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_float64x2, vfmul_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_float64x2, vfdiv_vv_f64m1, 64) +#endif -OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max) -OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) +////////////// Bitwise logic ////////////// -OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max) +#define OPENCV_HAL_IMPL_RVV_LOGIC_OP(_Tpvec, suffix, width) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(&, _Tpvec, vand_vv_##suffix##m1, width) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(|, _Tpvec, vor_vv_##suffix##m1, width) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(^, _Tpvec, vxor_vv_##suffix##m1, width) \ +inline _Tpvec operator ~ (const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vnot_v_##suffix##m1(a)); \ +} + +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint8x16, u8, 8) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int8x16, i8, 8) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint16x8, u16, 16) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int16x8, i16, 16) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint32x4, u32, 32) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int32x4, i32, 32) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint64x2, u64, 64) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int64x2, i64, 64) + +#define OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(bin_op, intrin) \ +inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \ +{ \ + vsetvlmax_e32m1(); \ + return v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b)))); \ +} \ +inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \ +{ \ + vsetvlmax_e32m1(); \ + a = v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b)))); \ + return a; \ +} + +OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(&, vand_vv_i32m1) +OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(|, vor_vv_i32m1) +OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(^, vxor_vv_i32m1) + +inline v_float32x4 operator ~ (const v_float32x4& a) +{ + vsetvlmax_e32m1(); + return v_float32x4(vreinterpret_v_i32m1_f32m1(vnot_v_i32m1(vreinterpret_v_f32m1_i32m1(a)))); +} + +#if CV_SIMD128_64F +#define OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(bin_op, intrin) \ +inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ +{ \ + vsetvlmax_e64m1(); \ + return v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b)))); \ +} \ +inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ +{ \ + vsetvlmax_e64m1(); \ + a = v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b)))); \ + return a; \ +} + +OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(&, vand_vv_i64m1) +OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(|, vor_vv_i64m1) +OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(^, vxor_vv_i64m1) + +inline v_float64x2 operator ~ (const v_float64x2& a) +{ + vsetvlmax_e64m1(); + return v_float64x2(vreinterpret_v_i64m1_f64m1(vnot_v_i64m1(vreinterpret_v_f64m1_i64m1(a)))); +} +#endif + +////////////// Bitwise shifts ////////////// + +#define OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(_Tpvec, suffix, width) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n))); \ +} + +#define OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(_Tpvec, suffix, width) \ +inline _Tpvec operator << (const _Tpvec& a, int n) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +inline _Tpvec operator >> (const _Tpvec& a, int n) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +template inline _Tpvec v_shl(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ +} \ +template inline _Tpvec v_shr(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n))); \ +} + +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint8x16, u8, 8) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint16x8, u16, 16) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint32x4, u32, 32) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint64x2, u64, 64) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int8x16, i8, 8) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int16x8, i16, 16) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int32x4, i32, 32) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int64x2, i64, 64) + + +////////////// Comparison ////////////// + +#define OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, op, intrin, suffix, width) \ +inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vmerge_vxm_##suffix##m1(intrin(a, b), vzero_##suffix##m1(), 1)); \ +} + +#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, op, intrin, suffix, width) \ +inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vfmerge_vfm_##suffix##m1(intrin(a, b), vzero_##suffix##m1(), 1)); \ +} + +#define OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(_Tpvec, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmsltu_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgtu_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsleu_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsgeu_vv_##suffix##m1_b##width, suffix, width) + +#define OPENCV_HAL_IMPL_RVV_SIGNED_CMP(_Tpvec, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmslt_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgt_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsle_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsge_vv_##suffix##m1_b##width, suffix, width) + +#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP(_Tpvec, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ==, vmfeq_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, !=, vmfne_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <, vmflt_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >, vmfgt_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <=, vmfle_vv_##suffix##m1_b##width, suffix, width) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >=, vmfge_vv_##suffix##m1_b##width, suffix, width) + + +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint8x16, u8, 8) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint16x8, u16, 16) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint32x4, u32, 32) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint64x2, u64, 64) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int8x16, i8, 8) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int16x8, i16, 16) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int32x4, i32, 32) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int64x2, i64, 64) +OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float32x4, f32, 32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float64x2, f64, 64) +#endif + +inline v_float32x4 v_not_nan(const v_float32x4& a) +{ return a == a; } + +#if CV_SIMD128_64F +inline v_float64x2 v_not_nan(const v_float64x2& a) +{ return a == a; } +#endif + +////////////// Min/Max ////////////// + +#define OPENCV_HAL_IMPL_RVV_BIN_FUNC(_Tpvec, func, intrin, width) \ +inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(intrin(a, b)); \ +} + +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_min, vminu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_max, vmaxu_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_min, vmin_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_max, vmax_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_min, vminu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_max, vmaxu_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_min, vmin_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_max, vmax_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32x4, v_min, vminu_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32x4, v_max, vmaxu_vv_u32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32x4, v_min, vmin_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32x4, v_max, vmax_vv_i32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32x4, v_min, vfmin_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32x4, v_max, vfmax_vv_f32m1, 32) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint64x2, v_min, vminu_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint64x2, v_max, vmaxu_vv_u64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int64x2, v_min, vmin_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int64x2, v_max, vmax_vv_i64m1, 64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64x2, v_min, vfmin_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64x2, v_max, vfmax_vv_f64m1, 64) +#endif + +////////////// Arithmetics wrap ////////////// + +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_add_wrap, vadd_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_add_wrap, vadd_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_add_wrap, vadd_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_add_wrap, vadd_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_sub_wrap, vsub_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_sub_wrap, vsub_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_sub_wrap, vsub_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_sub_wrap, vsub_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_mul_wrap, vmul_vv_u8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_mul_wrap, vmul_vv_i8m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_mul_wrap, vmul_vv_u16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_mul_wrap, vmul_vv_i16m1, 16) + +////////////// Reduce ////////////// + +#define OPENCV_HAL_IMPL_RVV_REDUCE_SUM(_Tpvec, _wTpvec, _nwTpvec, scalartype, suffix, wsuffix, wwidth, red) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + vsetvlmax_e##wwidth##m1(); \ + _nwTpvec zero = vzero_##wsuffix##m1(); \ + _nwTpvec res = vzero_##wsuffix##m1(); \ + res = v##red##_vs_##suffix##m1_##wsuffix##m1(res, a, zero); \ + return (scalartype)(_wTpvec(res).get0()); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint8x16, v_uint16x8, vuint16m1_t, unsigned, u8, u16, 16, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int8x16, v_int16x8, vint16m1_t, int, i8, i16, 16, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint16x8, v_uint32x4, vuint32m1_t, unsigned, u16, u32, 32, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int16x8, v_int32x4, vint32m1_t, int, i16, i32, 32, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint32x4, v_uint64x2, vuint64m1_t, unsigned, u32, u64, 64, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int32x4, v_int64x2, vint64m1_t, int, i32, i64, 64, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_float32x4, v_float32x4, vfloat32m1_t, float, f32, f32, 32, fredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint64x2, v_uint64x2, vuint64m1_t, uint64, u64, u64, 64, redsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int64x2, v_int64x2, vint64m1_t, int64, i64, i64, 64, redsum) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_float64x2, v_float64x2, vfloat64m1_t, double, f64, f64, 64, fredsum) +#endif + + +#define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, func, scalartype, suffix, width, red) \ +inline scalartype v_reduce_##func(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + _Tpvec res = _Tpvec(v##red##_vs_##suffix##m1_##suffix##m1(a, a, a)); \ + return scalartype(res.get0()); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8x16, min, uchar, u8, 8, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int8x16, min, schar, i8, 8, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16x8, min, ushort, u16, 16, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int16x8, min, short, i16, 16, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32x4, min, unsigned, u32, 32, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int32x4, min, int, i32, 32, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, min, float, f32, 32, fredmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8x16, max, uchar, u8, 8, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int8x16, max, schar, i8, 8, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16x8, max, ushort, u16, 16, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int16x8, max, short, i16, 16, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32x4, max, unsigned, u32, 32, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int32x4, max, int, i32, 32, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, max, float, f32, 32, fredmax) + + +inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, + const v_float32x4& c, const v_float32x4& d) +{ + float CV_DECL_ALIGNED(32) elems[4] = + { + v_reduce_sum(a), + v_reduce_sum(b), + v_reduce_sum(c), + v_reduce_sum(d) + }; + vsetvlmax_e32m1(); + return v_float32x4(vle32_v_f32m1(elems)); +} + +////////////// Square-Root ////////////// + +inline v_float32x4 v_sqrt(const v_float32x4& x) +{ + vsetvlmax_e32m1(); + return v_float32x4(vfsqrt_v_f32m1(x)); +} + +inline v_float32x4 v_invsqrt(const v_float32x4& x) +{ + v_float32x4 one = v_setall_f32(1.0f); + return one / v_sqrt(x); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_sqrt(const v_float64x2& x) +{ + vsetvlmax_e64m1(); + return v_float64x2(vfsqrt_v_f64m1(x)); +} + +inline v_float64x2 v_invsqrt(const v_float64x2& x) +{ + v_float64x2 one = v_setall_f64(1.0f); + return one / v_sqrt(x); +} +#endif + +inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + vsetvlmax_e32m1(); + v_float32x4 x(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a), b, b)); + return v_sqrt(x); +} + +inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) +{ + vsetvlmax_e32m1(); + return v_float32x4(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a), b, b)); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + vsetvlmax_e64m1(); + v_float64x2 x(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a), b, b)); + return v_sqrt(x); +} + +inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) +{ + vsetvlmax_e64m1(); + return v_float64x2(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a), b, b)); +} +#endif + +////////////// Multiply-Add ////////////// + +inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + vsetvlmax_e32m1(); + return v_float32x4(vfmacc_vv_f32m1(c, a, b)); +} +inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + vsetvlmax_e32m1(); + return v_int32x4(vmacc_vv_i32m1(c, a, b)); +} + +inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) +{ + return v_fma(a, b, c); +} + +inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) +{ + return v_fma(a, b, c); +} + +#if CV_SIMD128_64F +inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + vsetvlmax_e64m1(); + return v_float64x2(vfmacc_vv_f64m1(c, a, b)); +} + +inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) +{ + return v_fma(a, b, c); +} +#endif + +////////////// Check all/any ////////////// + +#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, width) \ +inline bool v_check_all(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a), shift)); \ + return (v.val[0] | v.val[1]) == 0; \ +} \ +inline bool v_check_any(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(a, shift)); \ + return (v.val[0] | v.val[1]) != 0; \ +} + +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 8) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint16x8, u16, 15, 16) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint32x4, u32, 31, 32) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 64) + + +inline bool v_check_all(const v_int8x16& a) +{ return v_check_all(v_reinterpret_as_u8(a)); } +inline bool v_check_any(const v_int8x16& a) +{ return v_check_any(v_reinterpret_as_u8(a)); } + +inline bool v_check_all(const v_int16x8& a) +{ return v_check_all(v_reinterpret_as_u16(a)); } +inline bool v_check_any(const v_int16x8& a) +{ return v_check_any(v_reinterpret_as_u16(a)); } + +inline bool v_check_all(const v_int32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_int32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_float32x4& a) +{ return v_check_all(v_reinterpret_as_u32(a)); } +inline bool v_check_any(const v_float32x4& a) +{ return v_check_any(v_reinterpret_as_u32(a)); } + +inline bool v_check_all(const v_int64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_int64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } + +#if CV_SIMD128_64F +inline bool v_check_all(const v_float64x2& a) +{ return v_check_all(v_reinterpret_as_u64(a)); } +inline bool v_check_any(const v_float64x2& a) +{ return v_check_any(v_reinterpret_as_u64(a)); } +#endif + +////////////// abs ////////////// + +#define OPENCV_HAL_IMPL_RVV_ABSDIFF(_Tpvec, abs) \ +inline _Tpvec v_##abs(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return v_max(a, b) - v_min(a, b); \ +} + +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint8x16, absdiff) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint16x8, absdiff) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_uint32x4, absdiff) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float32x4, absdiff) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float64x2, absdiff) +#endif +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int8x16, absdiffs) +OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16x8, absdiffs) + +#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width) \ +inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _rTpvec(rshr((_nwTpvec)sub(v_max(a, b), v_min(a, b)), 0)); \ +} + +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 8) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 16) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 32) + +#define OPENCV_HAL_IMPL_RVV_ABS(_Tprvec, _Tpvec, suffix) \ +inline _Tprvec v_abs(const _Tpvec& a) \ +{ \ + return v_absdiff(a, v_setzero_##suffix()); \ +} + +OPENCV_HAL_IMPL_RVV_ABS(v_uint8x16, v_int8x16, s8) +OPENCV_HAL_IMPL_RVV_ABS(v_uint16x8, v_int16x8, s16) +OPENCV_HAL_IMPL_RVV_ABS(v_uint32x4, v_int32x4, s32) +OPENCV_HAL_IMPL_RVV_ABS(v_float32x4, v_float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ABS(v_float64x2, v_float64x2, f64) +#endif + + +#define OPENCV_HAL_IMPL_RVV_REDUCE_SAD(_Tpvec, scalartype) \ +inline scalartype v_reduce_sad(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return v_reduce_sum(v_absdiff(a, b)); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint8x16, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int8x16, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint16x8, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int16x8, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_int32x4, unsigned) +OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_float32x4, float) + +////////////// Select ////////////// + +#define OPENCV_HAL_IMPL_RVV_SELECT(_Tpvec, merge, ne, width) \ +inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(merge(ne(mask, 0), b, a)); \ +} + +OPENCV_HAL_IMPL_RVV_SELECT(v_uint8x16, vmerge_vvm_u8m1, vmsne_vx_u8m1_b8, 8) +OPENCV_HAL_IMPL_RVV_SELECT(v_int8x16, vmerge_vvm_i8m1, vmsne_vx_i8m1_b8, 8) +OPENCV_HAL_IMPL_RVV_SELECT(v_uint16x8, vmerge_vvm_u16m1, vmsne_vx_u16m1_b16, 16) +OPENCV_HAL_IMPL_RVV_SELECT(v_int16x8, vmerge_vvm_i16m1, vmsne_vx_i16m1_b16, 16) +OPENCV_HAL_IMPL_RVV_SELECT(v_uint32x4, vmerge_vvm_u32m1, vmsne_vx_u32m1_b32, 32) +OPENCV_HAL_IMPL_RVV_SELECT(v_int32x4, vmerge_vvm_i32m1, vmsne_vx_i32m1_b32, 32) +OPENCV_HAL_IMPL_RVV_SELECT(v_float32x4, vmerge_vvm_f32m1, vmfne_vf_f32m1_b32, 32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_SELECT(v_float64x2, vmerge_vvm_f64m1, vmfne_vf_f64m1_b64, 64) +#endif + +////////////// Rotate shift ////////////// + +#define OPENCV_HAL_IMPL_RVV_ROTATE_OP(_Tpvec, suffix, width) \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, n)); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslideup_vx_##suffix##m1(vzero_##suffix##m1(), a, n)); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ return a; } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, n), b, _Tpvec::nlanes - n)); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + vsetvlmax_e##width##m1(); \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), b, _Tpvec::nlanes - n), a, n)); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ CV_UNUSED(b); return a; } + + +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint8x16, u8, 8) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int8x16, i8, 8) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint16x8, u16, 16) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int16x8, i16, 16) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint32x4, u32, 32) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int32x4, i32, 32) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_float32x4, f32, 32) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint64x2, u64, 64) +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int64x2, i64, 64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_float64x2, f64, 64) +#endif + +////////////// Convert to float ////////////// + +inline v_float32x4 v_cvt_f32(const v_int32x4& a) +{ + vsetvlmax_e32m1(); + return v_float32x4(vfcvt_f_x_v_f32m1(a)); +} + +#if CV_SIMD128_64F +inline v_float32x4 v_cvt_f32(const v_float64x2& a) +{ + double arr[4] = {a.val[0], a.val[1], 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + vsetvlmax_e32m1(); + return v_float32x4(vfncvt_f_f_w_f32m1(tmp)); +} + +inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) +{ + double arr[4] = {a.val[0], a.val[1], b.val[0], b.val[1]}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + vsetvlmax_e32m1(); + return v_float32x4(vfncvt_f_f_w_f32m1(tmp)); +} + +inline v_float64x2 v_cvt_f64(const v_int32x4& a) +{ + double CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a)); + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr[0], ptr[1] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) +{ + double CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a)); + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr[2], ptr[3] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_cvt_f64(const v_float32x4& a) +{ + double CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a)); + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr[0], ptr[1] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) +{ + double CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a)); + double CV_DECL_ALIGNED(32) elems[2] = + { + ptr[2], ptr[3] + }; + vsetvlmax_e64m1(); + return v_float64x2(vle64_v_f64m1(elems)); +} + +inline v_float64x2 v_cvt_f64(const v_int64x2& a) +{ + vsetvlmax_e64m1(); + return v_float64x2(vfcvt_f_x_v_f64m1(a)); +} +#endif + +////////////// Broadcast ////////////// + +#define OPENCV_HAL_IMPL_RVV_BROADCAST(_Tpvec, suffix) \ +template inline _Tpvec v_broadcast_element(_Tpvec v) \ +{ \ + return v_setall_##suffix(v_extract_n(v)); \ +} + +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint8x16, u8) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int8x16, s8) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint16x8, u16) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int16x8, s16) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint32x4, u32) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int32x4, s32) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_uint64x2, u64) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_int64x2, s64) +OPENCV_HAL_IMPL_RVV_BROADCAST(v_float32x4, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_BROADCAST(v_float64x2, f64) +#endif + +////////////// Transpose4x4 ////////////// + +#define OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(_Tpvec, _Tp, suffix) \ +inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ + const v_##_Tpvec& a2, const v_##_Tpvec& a3, \ + v_##_Tpvec& b0, v_##_Tpvec& b1, \ + v_##_Tpvec& b2, v_##_Tpvec& b3) \ +{ \ + _Tp CV_DECL_ALIGNED(32) elems0[4] = \ + { \ + v_extract_n<0>(a0), \ + v_extract_n<0>(a1), \ + v_extract_n<0>(a2), \ + v_extract_n<0>(a3) \ + }; \ + b0 = v_load(elems0); \ + _Tp CV_DECL_ALIGNED(32) elems1[4] = \ + { \ + v_extract_n<1>(a0), \ + v_extract_n<1>(a1), \ + v_extract_n<1>(a2), \ + v_extract_n<1>(a3) \ + }; \ + b1 = v_load(elems1); \ + _Tp CV_DECL_ALIGNED(32) elems2[4] = \ + { \ + v_extract_n<2>(a0), \ + v_extract_n<2>(a1), \ + v_extract_n<2>(a2), \ + v_extract_n<2>(a3) \ + }; \ + b2 = v_load(elems2); \ + _Tp CV_DECL_ALIGNED(32) elems3[4] = \ + { \ + v_extract_n<3>(a0), \ + v_extract_n<3>(a1), \ + v_extract_n<3>(a2), \ + v_extract_n<3>(a3) \ + }; \ + b3 = v_load(elems3); \ +} + +OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(int32x4, int, i32) +OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(float32x4, float, f32) + +////////////// Reverse ////////////// + +#define OPENCV_HAL_IMPL_RVV_REVERSE(_Tpvec, _Tp, width, suffix) \ +inline _Tpvec v_reverse(const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptra[_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + for (int i = 0; i < _Tpvec::nlanes; i++) \ + { \ + ptr[i] = ptra[_Tpvec::nlanes-i-1]; \ + } \ + return v_load(ptr); \ +} + +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint8x16, uchar, 8, u8) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int8x16, schar, 8, i8) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint16x8, ushort, 16, u16) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int16x8, short, 16, i16) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint32x4, unsigned, 32, u32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int32x4, int, 32, i32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_float32x4, float, 32, f32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint64x2, uint64, 64, u64) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int64x2, int64, 64, i64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_REVERSE(v_float64x2, double, 64, f64) +#endif + +//////////// Value reordering //////////// + +#define OPENCV_HAL_IMPL_RVV_EXPAND(_Tpwvec, _Tp, _Tpvec, width, suffix, wcvt) \ +inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ +{ \ + _Tp CV_DECL_ALIGNED(32) lptr[_Tpvec::nlanes/2] = {0}; \ + _Tp CV_DECL_ALIGNED(32) hptr[_Tpvec::nlanes/2] = {0}; \ + v_store_low(lptr, a); \ + v_store_high(hptr, a); \ + b0 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr))); \ + b1 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr))); \ +} \ +inline _Tpwvec v_expand_low(const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) lptr[_Tpvec::nlanes/2] = {0}; \ + v_store_low(lptr, a); \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr))); \ +} \ +inline _Tpwvec v_expand_high(const _Tpvec& a) \ +{ \ + _Tp CV_DECL_ALIGNED(32) hptr[_Tpvec::nlanes/2] = {0}; \ + v_store_high(hptr, a); \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr))); \ +} \ +inline _Tpwvec v_load_expand(const _Tp* ptr) \ +{ \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(ptr))); \ +} + +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint16x8, uchar, v_uint8x16, 8, u8, vwcvtu_x_x_v_u16m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int16x8, schar, v_int8x16, 8, i8, vwcvt_x_x_v_i16m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint32x4, ushort, v_uint16x8, 16, u16, vwcvtu_x_x_v_u32m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int32x4, short, v_int16x8, 16, i16, vwcvt_x_x_v_i32m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint64x2, uint, v_uint32x4, 32, u32, vwcvtu_x_x_v_u64m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int64x2, int, v_int32x4, 32, i32, vwcvt_x_x_v_i64m1) + +inline v_uint32x4 v_load_expand_q(const uchar* ptr) +{ + vsetvlmax_e32m1(); + return v_uint32x4(vwcvtu_x_x_v_u32m1(vwcvtu_x_x_v_u16mf2(vle8_v_u8mf4(ptr)))); +} + +inline v_int32x4 v_load_expand_q(const schar* ptr) +{ + vsetvlmax_e32m1(); + return v_int32x4(vwcvt_x_x_v_i32m1(vwcvt_x_x_v_i16mf2(vle8_v_i8mf4(ptr)))); +} + + +#define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, shr) \ +inline _Tpvec v_pack(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + vsetvlmax_e##width##m2(); \ + return _Tpvec(shr(vle##width##_v_##suffix##m2(arr), 0)); \ +} \ +inline void v_pack_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ + vsetvlmax_e##width##m2(); \ + v_store(ptr, _Tpvec(shr(vle##width##_v_##suffix##m2(arr), 0))); \ +} \ +template inline \ +_Tpvec v_rshr_pack(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + vsetvlmax_e##width##m2(); \ + return _Tpvec(rshr(vle##width##_v_##suffix##m2(arr), n)); \ +} \ +template inline \ +void v_rshr_pack_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ + vsetvlmax_e##width##m2(); \ + v_store(ptr, _Tpvec(rshr(vle##width##_v_##suffix##m2(arr), n))); \ +} + +OPENCV_HAL_IMPL_RVV_PACK(v_uint8x16, uchar, v_uint16x8, ushort, 16, u16, vnclipu_wx_u8m1, vnclipu_wx_u8m1) +OPENCV_HAL_IMPL_RVV_PACK(v_int8x16, schar, v_int16x8, short, 16, i16, vnclip_wx_i8m1, vnclip_wx_i8m1) +OPENCV_HAL_IMPL_RVV_PACK(v_uint16x8, ushort, v_uint32x4, unsigned, 32, u32, vnclipu_wx_u16m1, vnclipu_wx_u16m1) +OPENCV_HAL_IMPL_RVV_PACK(v_int16x8, short, v_int32x4, int, 32, i32, vnclip_wx_i16m1, vnclip_wx_i16m1) +OPENCV_HAL_IMPL_RVV_PACK(v_uint32x4, unsigned, v_uint64x2, uint64, 64, u64, vnclipu_wx_u32m1, vnsrl_wx_u32m1) +OPENCV_HAL_IMPL_RVV_PACK(v_int32x4, int, v_int64x2, int64, 64, i64, vnclip_wx_i32m1, vnsra_wx_i32m1) + + +#define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, cast) \ +inline _Tpvec v_pack_u(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + vsetvlmax_e##width##m2(); \ + return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), 0)); \ +} \ +inline void v_pack_u_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ + vsetvlmax_e##width##m2(); \ + v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), 0))); \ +} \ +template inline \ +_Tpvec v_rshr_pack_u(const _wTpvec& a, const _wTpvec& b) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, b); \ + vsetvlmax_e##width##m2(); \ + return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), n)); \ +} \ +template inline \ +void v_rshr_pack_u_store(_Tp* ptr, const _wTpvec& a) \ +{ \ + _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + v_store(arr, a); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ + vsetvlmax_e##width##m2(); \ + v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), n))); \ +} + +OPENCV_HAL_IMPL_RVV_PACK_U(v_uint8x16, uchar, v_int16x8, short, 16, i16, vnclipu_wx_u8m1, vreinterpret_v_i16m2_u16m2) +OPENCV_HAL_IMPL_RVV_PACK_U(v_uint16x8, ushort, v_int32x4, int, 32, i32, vnclipu_wx_u16m1, vreinterpret_v_i32m2_u32m2) + + +#define OPENCV_HAL_IMPL_RVV_UNPACKS(_Tpvec, _Tp, width, suffix) \ +inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra0[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptra1[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb0[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb1[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra0, a0); \ + v_store(ptra1, a1); \ + int i; \ + for( i = 0; i < v_##_Tpvec::nlanes/2; i++ ) \ + { \ + ptrb0[i*2] = ptra0[i]; \ + ptrb0[i*2+1] = ptra1[i]; \ + } \ + for( ; i < v_##_Tpvec::nlanes; i++ ) \ + { \ + ptrb1[i*2-v_##_Tpvec::nlanes] = ptra0[i]; \ + ptrb1[i*2-v_##_Tpvec::nlanes+1] = ptra1[i]; \ + } \ + b0 = v_load(ptrb0); \ + b1 = v_load(ptrb1); \ +} \ +inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes/2] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes/2] = {0}; \ + v_store_low(ptra, a); \ + v_store_low(ptrb, b); \ + return v_load_halves(ptra, ptrb); \ +} \ +inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes/2] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes/2] = {0}; \ + v_store_high(ptra, a); \ + v_store_high(ptrb, b); \ + return v_load_halves(ptra, ptrb); \ +} \ +inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + c = v_combine_low(a, b); \ + d = v_combine_high(a, b); \ +} + +OPENCV_HAL_IMPL_RVV_UNPACKS(uint8x16, uchar, 8, u8) +OPENCV_HAL_IMPL_RVV_UNPACKS(int8x16, schar, 8, i8) +OPENCV_HAL_IMPL_RVV_UNPACKS(uint16x8, ushort, 16, u16) +OPENCV_HAL_IMPL_RVV_UNPACKS(int16x8, short, 16, i16) +OPENCV_HAL_IMPL_RVV_UNPACKS(uint32x4, unsigned, 32, u32) +OPENCV_HAL_IMPL_RVV_UNPACKS(int32x4, int, 32, i32) +OPENCV_HAL_IMPL_RVV_UNPACKS(float32x4, float, 32, f32) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_UNPACKS(float64x2, double, 64, f64) +#endif + + +#define OPENCV_HAL_IMPL_RVV_INTERLEAVED(_Tpvec, _Tp, suffix, width) \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + int i, i2; \ + for( i = i2 = 0; i < v_##_Tpvec::nlanes; i++, i2 += 2 ) \ + { \ + ptra[i] = ptr[i2]; \ + ptrb[i] = ptr[i2+1]; \ + } \ + a = v_load(ptra); \ + b = v_load(ptrb); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + int i, i3; \ + for( i = i3 = 0; i < v_##_Tpvec::nlanes; i++, i3 += 3 ) \ + { \ + ptra[i] = ptr[i3]; \ + ptrb[i] = ptr[i3+1]; \ + ptrc[i] = ptr[i3+2]; \ + } \ + a = v_load(ptra); \ + b = v_load(ptrb); \ + c = v_load(ptrc); \ +} \ +inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ + v_##_Tpvec& c, v_##_Tpvec& d) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrd[v_##_Tpvec::nlanes] = {0}; \ + int i, i4; \ + for( i = i4 = 0; i < v_##_Tpvec::nlanes; i++, i4 += 4 ) \ + { \ + ptra[i] = ptr[i4]; \ + ptrb[i] = ptr[i4+1]; \ + ptrc[i] = ptr[i4+2]; \ + ptrd[i] = ptr[i4+3]; \ + } \ + a = v_load(ptra); \ + b = v_load(ptrb); \ + c = v_load(ptrc); \ + d = v_load(ptrd); \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + int i, i2; \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + v_store(ptrb, b); \ + for( i = i2 = 0; i < v_##_Tpvec::nlanes; i++, i2 += 2 ) \ + { \ + ptr[i2] = ptra[i]; \ + ptr[i2+1] = ptrb[i]; \ + } \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ +{ \ + int i, i3; \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + v_store(ptrb, b); \ + v_store(ptrc, c); \ + for( i = i3 = 0; i < v_##_Tpvec::nlanes; i++, i3 += 3 ) \ + { \ + ptr[i3] = ptra[i]; \ + ptr[i3+1] = ptrb[i]; \ + ptr[i3+2] = ptrc[i]; \ + } \ +} \ +inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ + const v_##_Tpvec& c, const v_##_Tpvec& d, \ + hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ +{ \ + int i, i4; \ + _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrd[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptra, a); \ + v_store(ptrb, b); \ + v_store(ptrc, c); \ + v_store(ptrd, d); \ + for( i = i4 = 0; i < v_##_Tpvec::nlanes; i++, i4 += 4 ) \ + { \ + ptr[i4] = ptra[i]; \ + ptr[i4+1] = ptrb[i]; \ + ptr[i4+2] = ptrc[i]; \ + ptr[i4+3] = ptrd[i]; \ + } \ +} \ +inline v_##_Tpvec v_interleave_pairs(const v_##_Tpvec& vec) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptr[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrvec[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptrvec, vec); \ + for (int i = 0; i < v_##_Tpvec::nlanes/4; i++) \ + { \ + ptr[4*i ] = ptrvec[4*i ]; \ + ptr[4*i+1] = ptrvec[4*i+2]; \ + ptr[4*i+2] = ptrvec[4*i+1]; \ + ptr[4*i+3] = ptrvec[4*i+3]; \ + } \ + return v_load(ptr); \ +} \ +inline v_##_Tpvec v_interleave_quads(const v_##_Tpvec& vec) \ +{ \ + _Tp CV_DECL_ALIGNED(32) ptr[v_##_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrvec[v_##_Tpvec::nlanes] = {0}; \ + v_store(ptrvec, vec); \ + for (int i = 0; i < v_##_Tpvec::nlanes/8; i++) \ + { \ + ptr[8*i ] = ptrvec[4*i ]; \ + ptr[8*i+1] = ptrvec[4*i+4]; \ + ptr[8*i+2] = ptrvec[4*i+1]; \ + ptr[8*i+3] = ptrvec[4*i+5]; \ + ptr[8*i+4] = ptrvec[4*i+2]; \ + ptr[8*i+5] = ptrvec[4*i+6]; \ + ptr[8*i+6] = ptrvec[4*i+3]; \ + ptr[8*i+7] = ptrvec[4*i+7]; \ + } \ + return v_load(ptr); \ +} + +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint8x16, uchar, u8, 8) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int8x16, schar, i8, 8) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint16x8, ushort, u16, 16) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int16x8, short, i16, 16) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint32x4, unsigned, u32, 32) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int32x4, int, i32, 32) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(float32x4, float, f32, 32) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint64x2, uint64, u64, 64) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int64x2, int64, i64, 64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_INTERLEAVED(float64x2, double, f64, 64) +#endif + +//////////// PopCount //////////// static const unsigned char popCountTable[] = { @@ -325,1354 +2332,571 @@ static const unsigned char popCountTable[] = 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, }; -template -inline v_reg::abs_type, n> v_popcount(const v_reg<_Tp, n>& a) -{ - v_reg::abs_type, n> b = v_reg::abs_type, n>::zero(); - for (int i = 0; i < n*(int)sizeof(_Tp); i++) - b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]]; - return b; -} - -template -inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval ) -{ - for( int i = 0; i < n; i++ ) - { - minval.s[i] = std::min(a.s[i], b.s[i]); - maxval.s[i] = std::max(a.s[i], b.s[i]); - } -} - -#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \ -template \ -inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +#define OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(_rTpvec, _Tpvec, _rTp, _Tp, suffix) \ +inline _rTpvec v_popcount(const _Tpvec& a) \ { \ - typedef typename V_TypeTraits<_Tp>::int_type itype; \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \ - return c; \ + uchar CV_DECL_ALIGNED(32) ptra[16] = {0}; \ + v_store(ptra, v_reinterpret_as_u8(a)); \ + _rTp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + v_store(ptr, v_setzero_##suffix()); \ + for (int i = 0; i < _Tpvec::nlanes*(int)sizeof(_Tp); i++) \ + ptr[i/sizeof(_Tp)] += popCountTable[ptra[i]]; \ + return v_load(ptr); \ } -OPENCV_HAL_IMPL_CMP_OP(<) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint8x16, v_uint8x16, uchar, uchar, u8) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint8x16, v_int8x16, uchar, schar, u8) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint16x8, v_uint16x8, ushort, ushort, u16) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint16x8, v_int16x8, ushort, short, u16) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint32x4, v_uint32x4, unsigned, unsigned, u32) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint32x4, v_int32x4, unsigned, int, u32) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint64x2, v_uint64x2, uint64, uint64, u64) +OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint64x2, v_int64x2, uint64, int64, u64) -OPENCV_HAL_IMPL_CMP_OP(>) +//////////// SignMask //////////// -OPENCV_HAL_IMPL_CMP_OP(<=) - -OPENCV_HAL_IMPL_CMP_OP(>=) - -OPENCV_HAL_IMPL_CMP_OP(==) - -OPENCV_HAL_IMPL_CMP_OP(!=) - -template -inline v_reg v_not_nan(const v_reg& a) -{ - typedef typename V_TypeTraits::int_type itype; - v_reg c; - for (int i = 0; i < n; i++) - c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); - return c; -} -template -inline v_reg v_not_nan(const v_reg& a) -{ - typedef typename V_TypeTraits::int_type itype; - v_reg c; - for (int i = 0; i < n; i++) - c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); - return c; -} - -#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \ -template \ -inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ +#define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, _Tp, suffix, width, shift) \ +inline int v_signmask(const _Tpvec& a) \ { \ - typedef _Tp2 rtype; \ - v_reg c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \ - return c; \ + int mask = 0; \ + vsetvlmax_e##width##m1(); \ + _Tpvec tmp = _Tpvec(vsrl_vx_##suffix##m1(a, shift)); \ + for( int i = 0; i < _Tpvec::nlanes; i++ ) \ + mask |= (int)(tmp.val[i]) << i; \ + return mask; \ } -OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint8x16, uchar, u8, 8, 7) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint16x8, ushort, u16, 16, 15) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint32x4, unsigned, u32, 32, 31) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint64x2, uint64, u64, 64, 63) -OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp) +inline int v_signmask(const v_int8x16& a) +{ return v_signmask(v_reinterpret_as_u8(a)); } +inline int v_signmask(const v_int16x8& a) +{ return v_signmask(v_reinterpret_as_u16(a)); } +inline int v_signmask(const v_int32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_float32x4& a) +{ return v_signmask(v_reinterpret_as_u32(a)); } +inline int v_signmask(const v_int64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#if CV_SIMD128_64F +inline int v_signmask(const v_float64x2& a) +{ return v_signmask(v_reinterpret_as_u64(a)); } +#endif -OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp) -template inline T _absdiff(T a, T b) -{ - return a > b ? a - b : b - a; -} +//////////// Scan forward //////////// -template -inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b) -{ - typedef typename V_TypeTraits<_Tp>::abs_type rtype; - v_reg c; - const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0); - for( int i = 0; i < n; i++ ) - { - rtype ua = a.s[i] ^ mask; - rtype ub = b.s[i] ^ mask; - c.s[i] = _absdiff(ua, ub); - } - return c; -} - -inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) -{ - v_float32x4 c; - for( int i = 0; i < c.nlanes; i++ ) - c.s[i] = _absdiff(a.s[i], b.s[i]); - return c; -} - -inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) -{ - v_float64x2 c; - for( int i = 0; i < c.nlanes; i++ ) - c.s[i] = _absdiff(a.s[i], b.s[i]); - return c; -} - -template -inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++) - c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i])); - return c; -} - -template -inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = 1.f/std::sqrt(a.s[i]); - return c; -} - -template -inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]); - return c; -} - -template -inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i]; - return c; -} - -template -inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg<_Tp, n>& c) -{ - v_reg<_Tp, n> d; - for( int i = 0; i < n; i++ ) - d.s[i] = a.s[i]*b.s[i] + c.s[i]; - return d; -} - -template -inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg<_Tp, n>& c) -{ - return v_fma(a, b, c); -} - -template inline v_reg::w_type, n/2> -v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg c; - for( int i = 0; i < (n/2); i++ ) - c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1]; - return c; -} - -template inline v_reg::w_type, n/2> -v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg::w_type, n / 2>& c) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg s; - for( int i = 0; i < (n/2); i++ ) - s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i]; - return s; -} - -template inline v_reg::w_type, n/2> -v_dotprod_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ return v_dotprod(a, b); } - -template inline v_reg::w_type, n/2> -v_dotprod_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg::w_type, n / 2>& c) -{ return v_dotprod(a, b, c); } - -template inline v_reg::q_type, n/4> -v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg s; - for( int i = 0; i < (n/4); i++ ) - s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + - (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3]; - return s; -} - -template inline v_reg::q_type, n/4> -v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg::q_type, n / 4>& c) -{ - typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg s; - for( int i = 0; i < (n/4); i++ ) - s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + - (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i]; - return s; -} - -template inline v_reg::q_type, n/4> -v_dotprod_expand_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ return v_dotprod_expand(a, b); } - -template inline v_reg::q_type, n/4> -v_dotprod_expand_fast(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg::q_type, n / 4>& c) -{ return v_dotprod_expand(a, b, c); } - -template inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - v_reg::w_type, n/2>& c, - v_reg::w_type, n/2>& d) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = (w_type)a.s[i]*b.s[i]; - d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)]; - } -} - -template inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg<_Tp, n> c; - for (int i = 0; i < n; i++) - c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8); - return c; -} - -template inline void v_hsum(const v_reg<_Tp, n>& a, - v_reg::w_type, n/2>& c) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1]; - } -} - -#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \ -template inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \ +#define OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(_Tpvec, _Tp, suffix) \ +inline int v_scan_forward(const _Tpvec& a) \ { \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = (_Tp)(a.s[i] shift_op imm); \ - return c; \ + _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + v_store(ptr, v_reinterpret_as_##suffix(a)); \ + for (int i = 0; i < _Tpvec::nlanes; i++) \ + if(int(ptr[i]) < 0) \ + return i; \ + return 0; \ } -OPENCV_HAL_IMPL_SHIFT_OP(<< ) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int8x16, schar, s8) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int16x8, short, s16) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int32x4, int, s32) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float32x4, float, f32) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_int64x2, int64, s64) +#if CV_SIMD128_64F +OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float64x2, double, f64) +#endif -OPENCV_HAL_IMPL_SHIFT_OP(>> ) +//////////// Pack triplets //////////// -#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \ -template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \ +#define OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(_Tpvec, _Tp) \ +inline _Tpvec v_pack_triplets(const _Tpvec& vec) \ { \ - v_reg<_Tp, n> b; \ - for (int i = 0; i < n; i++) \ + _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + _Tp CV_DECL_ALIGNED(32) ptrvec[_Tpvec::nlanes] = {0}; \ + v_store(ptrvec, vec); \ + for (int i = 0; i < _Tpvec::nlanes/4; i++) \ { \ - int sIndex = i opA imm; \ - if (0 <= sIndex && sIndex < n) \ - { \ - b.s[i] = a.s[sIndex]; \ - } \ - else \ - { \ - b.s[i] = 0; \ - } \ + ptr[3*i ] = ptrvec[4*i ]; \ + ptr[3*i+1] = ptrvec[4*i+2]; \ + ptr[3*i+2] = ptrvec[4*i+2]; \ } \ - return b; \ -} \ -template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - v_reg<_Tp, n> c; \ - for (int i = 0; i < n; i++) \ - { \ - int aIndex = i opA imm; \ - int bIndex = i opA imm opB n; \ - if (0 <= bIndex && bIndex < n) \ - { \ - c.s[i] = b.s[bIndex]; \ - } \ - else if (0 <= aIndex && aIndex < n) \ - { \ - c.s[i] = a.s[aIndex]; \ - } \ - else \ - { \ - c.s[i] = 0; \ - } \ - } \ - return c; \ + return v_load(ptr); \ } -OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left, -, +) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint8x16, uchar) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int8x16, schar) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint16x8, ushort) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int16x8, short) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint32x4, unsigned) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int32x4, int) +OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float32x4, float) -OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -) -template inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a) +////// FP16 support /////// + +#if CV_FP16 +inline v_float32x4 v_load_expand(const float16_t* ptr) { - typename V_TypeTraits<_Tp>::sum_type c = a.s[0]; - for( int i = 1; i < n; i++ ) - c += a.s[i]; - return c; + return v_float32x4(vfwcvt_f_f_v_f32m1(vle16_v_f16mf2(ptr))); } -inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, - const v_float32x4& c, const v_float32x4& d) +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) { - v_float32x4 r; - r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3]; - r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3]; - r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3]; - r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3]; - return r; + vse16_v_f16mf2(ptr, vfncvt_f_f_w_f16mf2(v)); +} +#else +inline v_float32x4 v_load_expand(const float16_t* ptr) +{ + const int N = 4; + float buf[N]; + for( int i = 0; i < N; i++ ) buf[i] = (float)ptr[i]; + return v_load(buf); } -template inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) +inline void v_pack_store(float16_t* ptr, const v_float32x4& v) { - typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]); - for (int i = 1; i < n; i++) - c += _absdiff(a.s[i], b.s[i]); - return c; + const int N = 4; + float buf[N]; + v_store(buf, v); + for( int i = 0; i < N; i++ ) ptr[i] = float16_t(buf[i]); } - -template inline int v_signmask(const v_reg<_Tp, n>& a) -{ - int mask = 0; - for( int i = 0; i < n; i++ ) - mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i; - return mask; -} - -template inline int v_scan_forward(const v_reg<_Tp, n>& a) -{ - for (int i = 0; i < n; i++) - if(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) - return i; - return 0; -} - -template inline bool v_check_all(const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < n; i++ ) - if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 ) - return false; - return true; -} - -template inline bool v_check_any(const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < n; i++ ) - if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 ) - return true; - return false; -} - -template inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask, - const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef V_TypeTraits<_Tp> Traits; - typedef typename Traits::int_type int_type; - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - { - int_type m = Traits::reinterpret_int(mask.s[i]); - CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc - c.s[i] = m ? a.s[i] : b.s[i]; - } - return c; -} - -template inline void v_expand(const v_reg<_Tp, n>& a, - v_reg::w_type, n/2>& b0, - v_reg::w_type, n/2>& b1) -{ - for( int i = 0; i < (n/2); i++ ) - { - b0.s[i] = a.s[i]; - b1.s[i] = a.s[i+(n/2)]; - } -} - -template -inline v_reg::w_type, n/2> -v_expand_low(const v_reg<_Tp, n>& a) -{ - v_reg::w_type, n/2> b; - for( int i = 0; i < (n/2); i++ ) - b.s[i] = a.s[i]; - return b; -} - -template -inline v_reg::w_type, n/2> -v_expand_high(const v_reg<_Tp, n>& a) -{ - v_reg::w_type, n/2> b; - for( int i = 0; i < (n/2); i++ ) - b.s[i] = a.s[i+(n/2)]; - return b; -} - -template inline v_reg::int_type, n> - v_reinterpret_as_int(const v_reg<_Tp, n>& a) -{ - v_reg::int_type, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]); - return c; -} - -template inline v_reg::uint_type, n> - v_reinterpret_as_uint(const v_reg<_Tp, n>& a) -{ - v_reg::uint_type, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]); - return c; -} - -template inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, - v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 ) -{ - int i; - for( i = 0; i < n/2; i++ ) - { - b0.s[i*2] = a0.s[i]; - b0.s[i*2+1] = a1.s[i]; - } - for( ; i < n; i++ ) - { - b1.s[i*2-n] = a0.s[i]; - b1.s[i*2-n+1] = a1.s[i]; - } -} - -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); #endif - return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); + +////////////// Rounding ////////////// + +inline v_int32x4 v_round(const v_float32x4& a) +{ + vsetvlmax_e32m1(); + return v_int32x4(vfcvt_x_f_v_i32m1(a)); } -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr) +inline v_int32x4 v_floor(const v_float32x4& a) { - CV_Assert(isAligned::nlanes128>)>(ptr)); - return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); + v_float32x4 ZP5 = v_setall_f32(0.5f); + v_float32x4 t = a - ZP5; + vsetvlmax_e32m1(); + return v_int32x4(vfcvt_x_f_v_i32m1(t)); } -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr) +inline v_int32x4 v_ceil(const v_float32x4& a) { -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); + v_float32x4 ZP5 = v_setall_f32(0.5f); + v_float32x4 t = a + ZP5; + vsetvlmax_e32m1(); + return v_int32x4(vfcvt_x_f_v_i32m1(t)); +} + +inline v_int32x4 v_trunc(const v_float32x4& a) +{ + vsetvlmax_e32m1(); + return v_int32x4(vfcvt_rtz_x_f_v_i32m1(a)); +} +#if CV_SIMD128_64F +inline v_int32x4 v_round(const v_float64x2& a) +{ + double arr[4] = {a.val[0], a.val[1], 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); +} + +inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) +{ + double arr[4] = {a.val[0], a.val[1], b.val[0], b.val[1]}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); +} + +inline v_int32x4 v_floor(const v_float64x2& a) +{ + double arr[4] = {a.val[0]-0.5f, a.val[1]-0.5f, 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); +} + +inline v_int32x4 v_ceil(const v_float64x2& a) +{ + double arr[4] = {a.val[0]+0.5f, a.val[1]+0.5f, 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); +} + +inline v_int32x4 v_trunc(const v_float64x2& a) +{ + double arr[4] = {a.val[0], a.val[1], 0, 0}; + vsetvlmax_e64m2(); + vfloat64m2_t tmp = vle64_v_f64m2(arr); + return v_int32x4(vfncvt_rtz_x_f_w_i32m1(tmp)); +} #endif - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for( int i = 0; i < c.nlanes/2; i++ ) - { - c.s[i] = ptr[i]; - } - return c; + + +//////// Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) +{ + int CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_int32x4 t1, t2; + vsetvlmax_e32m2(); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2; +} +inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_int32x4 t1, t2; + vsetvlmax_e32m2(); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2 + c; } -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr) +// 32 >> 64 +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) { -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(loptr)); - CV_Assert(isAligned(hiptr)); + int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + v_int64x2 t1, t2; + vsetvlmax_e64m2(); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2; +} +inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + v_int64x2 t1, t2; + vsetvlmax_e64m2(); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + v_load_deinterleave(ptr, t1, t2); + return t1 + t2 + c; +} + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_uint32x4 t1, t2, t3, t4; + vsetvlmax_e32m4(); + vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, + const v_uint32x4& c) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_uint32x4 t1, t2, t3, t4; + vsetvlmax_e32m4(); + vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) +{ + int CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_int32x4 t1, t2, t3, t4; + vsetvlmax_e32m4(); + vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, + const v_int32x4& c) +{ + int CV_DECL_ALIGNED(32) ptr[16] = {0}; + v_int32x4 t1, t2, t3, t4; + vsetvlmax_e32m4(); + vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) +{ + uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_uint64x2 t1, t2, t3, t4; + vsetvlmax_e64m4(); + vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ + uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_uint64x2 t1, t2, t3, t4; + vsetvlmax_e64m4(); + vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) +{ + int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_int64x2 t1, t2, t3, t4; + vsetvlmax_e64m4(); + vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4; +} +inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, + const v_int64x2& c) +{ + int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + v_int64x2 t1, t2, t3, t4; + vsetvlmax_e64m4(); + vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + v_load_deinterleave(ptr, t1, t2, t3, t4); + return t1 + t2 + t3 + t4 + c; +} + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod(a, b)); } +inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, + const v_float64x2& c) +{ return v_dotprod_expand(a, b) + c; } #endif - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for( int i = 0; i < c.nlanes/2; i++ ) - { - c.s[i] = loptr[i]; - c.s[i+c.nlanes/2] = hiptr[i]; - } - return c; + +//////// Fast Dot Product //////// + +// 16 >> 32 +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) +{ + int CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e32m2(); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + return t1 + t2; +} +inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) +{ + int CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e32m2(); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + return t1 + t2 + c; } -template -inline v_reg::w_type, V_TypeTraits<_Tp>::nlanes128 / 2> -v_load_expand(const _Tp* ptr) +// 32 >> 64 +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) { -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); + int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + return t1 + t2; +} +inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) +{ + int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + vsetvlmax_e64m2(); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + return t1 + t2 + c; +} + + +// 8 >> 32 +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + vsetvlmax_e32m4(); + vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + v_uint32x4 t1 = v_load(ptr); + v_uint32x4 t2 = v_load(ptr+4); + v_uint32x4 t3 = v_load(ptr+8); + v_uint32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4; +} +inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) +{ + unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + vsetvlmax_e32m4(); + vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + v_uint32x4 t1 = v_load(ptr); + v_uint32x4 t2 = v_load(ptr+4); + v_uint32x4 t3 = v_load(ptr+8); + v_uint32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4 + c; +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) +{ + int CV_DECL_ALIGNED(32) ptr[16] = {0}; + vsetvlmax_e32m4(); + vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + v_int32x4 t3 = v_load(ptr+8); + v_int32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4; +} +inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) +{ + int CV_DECL_ALIGNED(32) ptr[16] = {0}; + vsetvlmax_e32m4(); + vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + v_int32x4 t1 = v_load(ptr); + v_int32x4 t2 = v_load(ptr+4); + v_int32x4 t3 = v_load(ptr+8); + v_int32x4 t4 = v_load(ptr+12); + return t1 + t2 + t3 + t4 + c; +} + +// 16 >> 64 +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) +{ + uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e64m4(); + vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + v_uint64x2 t1 = v_load(ptr); + v_uint64x2 t2 = v_load(ptr+2); + v_uint64x2 t3 = v_load(ptr+4); + v_uint64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4; +} +inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) +{ + uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e64m4(); + vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + v_uint64x2 t1 = v_load(ptr); + v_uint64x2 t2 = v_load(ptr+2); + v_uint64x2 t3 = v_load(ptr+4); + v_uint64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4 + c; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) +{ + int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e64m4(); + vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + v_int64x2 t3 = v_load(ptr+4); + v_int64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4; +} +inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) +{ + int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + vsetvlmax_e64m4(); + vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + v_int64x2 t1 = v_load(ptr); + v_int64x2 t2 = v_load(ptr+2); + v_int64x2 t3 = v_load(ptr+4); + v_int64x2 t4 = v_load(ptr+6); + return t1 + t2 + t3 + t4 + c; +} + +// 32 >> 64f +#if CV_SIMD128_64F +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) +{ return v_cvt_f64(v_dotprod_fast(a, b)); } +inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) +{ return v_dotprod_expand_fast(a, b) + c; } #endif - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg::nlanes128> c; - for( int i = 0; i < c.nlanes; i++ ) - { - c.s[i] = ptr[i]; - } - return c; -} -template -inline v_reg::q_type, V_TypeTraits<_Tp>::nlanes128 / 4> -v_load_expand_q(const _Tp* ptr) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg::nlanes128> c; - for( int i = 0; i < c.nlanes; i++ ) - { - c.s[i] = ptr[i]; - } - return c; -} - -template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, - v_reg<_Tp, n>& b) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - int i, i2; - for( i = i2 = 0; i < n; i++, i2 += 2 ) - { - a.s[i] = ptr[i2]; - b.s[i] = ptr[i2+1]; - } -} - -template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, - v_reg<_Tp, n>& b, v_reg<_Tp, n>& c) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - int i, i3; - for( i = i3 = 0; i < n; i++, i3 += 3 ) - { - a.s[i] = ptr[i3]; - b.s[i] = ptr[i3+1]; - c.s[i] = ptr[i3+2]; - } -} - -template -inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, - v_reg<_Tp, n>& b, v_reg<_Tp, n>& c, - v_reg<_Tp, n>& d) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - int i, i4; - for( i = i4 = 0; i < n; i++, i4 += 4 ) - { - a.s[i] = ptr[i4]; - b.s[i] = ptr[i4+1]; - c.s[i] = ptr[i4+2]; - d.s[i] = ptr[i4+3]; - } -} - -template -inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, - hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - int i, i2; - for( i = i2 = 0; i < n; i++, i2 += 2 ) - { - ptr[i2] = a.s[i]; - ptr[i2+1] = b.s[i]; - } -} - -template -inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, - hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - int i, i3; - for( i = i3 = 0; i < n; i++, i3 += 3 ) - { - ptr[i3] = a.s[i]; - ptr[i3+1] = b.s[i]; - ptr[i3+2] = c.s[i]; - } -} - -template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, - const v_reg<_Tp, n>& d, - hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - int i, i4; - for( i = i4 = 0; i < n; i++, i4 += 4 ) - { - ptr[i4] = a.s[i]; - ptr[i4+1] = b.s[i]; - ptr[i4+2] = c.s[i]; - ptr[i4+3] = d.s[i]; - } -} - -template -inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - for( int i = 0; i < n; i++ ) - ptr[i] = a.s[i]; -} - -template -inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - v_store(ptr, a); -} - -template -inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - for( int i = 0; i < (n/2); i++ ) - ptr[i] = a.s[i]; -} - -template -inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a) -{ -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(ptr)); -#endif - for( int i = 0; i < (n/2); i++ ) - ptr[i] = a.s[i+(n/2)]; -} - -template -inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a) -{ - CV_Assert(isAligned)>(ptr)); - v_store(ptr, a); -} - -template -inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a) -{ - CV_Assert(isAligned)>(ptr)); - v_store(ptr, a); -} - -template -inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) -{ - CV_Assert(isAligned)>(ptr)); - v_store(ptr, a); -} - -template -inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = a.s[i]; - c.s[i+(n/2)] = b.s[i]; - } - return c; -} - -template -inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = a.s[i+(n/2)]; - c.s[i+(n/2)] = b.s[i+(n/2)]; - } - return c; -} - -template -inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - v_reg<_Tp, n>& low, v_reg<_Tp, n>& high) -{ - for( int i = 0; i < (n/2); i++ ) - { - low.s[i] = a.s[i]; - low.s[i+(n/2)] = b.s[i]; - high.s[i] = a.s[i+(n/2)]; - high.s[i+(n/2)] = b.s[i+(n/2)]; - } -} - -template -inline v_reg<_Tp, n> v_reverse(const v_reg<_Tp, n>& a) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = a.s[n-i-1]; - return c; -} - -template -inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> r; - const int shift = n - s; - int i = 0; - for (; i < shift; ++i) - r.s[i] = a.s[i+s]; - for (; i < n; ++i) - r.s[i] = b.s[i-shift]; - return r; -} - -template -inline _Tp v_extract_n(const v_reg<_Tp, n>& v) -{ - CV_DbgAssert(s >= 0 && s < n); - return v.s[s]; -} - -template -inline v_reg<_Tp, n> v_broadcast_element(const v_reg<_Tp, n>& a) -{ - CV_DbgAssert(i >= 0 && i < n); - return v_reg<_Tp, n>::all(a.s[i]); -} - -template inline v_reg v_round(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = cvRound(a.s[i]); - return c; -} - -template inline v_reg v_round(const v_reg& a, const v_reg& b) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvRound(a.s[i]); - c.s[i+n] = cvRound(b.s[i]); - } - return c; -} - -template inline v_reg v_floor(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = cvFloor(a.s[i]); - return c; -} - -template inline v_reg v_ceil(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = cvCeil(a.s[i]); - return c; -} - -template inline v_reg v_trunc(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (int)(a.s[i]); - return c; -} - -template inline v_reg v_round(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvRound(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_floor(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvFloor(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_ceil(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvCeil(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_trunc(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvCeil(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_cvt_f32(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (float)a.s[i]; - return c; -} - -template inline v_reg v_cvt_f32(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = (float)a.s[i]; - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_cvt_f32(const v_reg& a, const v_reg& b) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = (float)a.s[i]; - c.s[i+n] = (float)b.s[i]; - } - return c; -} - -CV_INLINE v_reg v_cvt_f64(const v_reg& a) -{ - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) -{ - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i + 2]; - return c; -} - -CV_INLINE v_reg v_cvt_f64(const v_reg& a) -{ - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) -{ - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i + 2]; - return c; -} - -CV_INLINE v_reg v_cvt_f64(const v_reg& a) -{ - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -CV_INLINE v_reg v_cvt_f64_high(const v_reg& a) -{ - enum { n = 2 }; - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - - -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut(const _Tp* tab, const int* idx) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) - c.s[i] = tab[idx[i]]; - return c; -} -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_pairs(const _Tp* tab, const int* idx) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) - c.s[i] = tab[idx[i / 2] + i % 2]; - return c; -} -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_quads(const _Tp* tab, const int* idx) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) - c.s[i] = tab[idx[i / 4] + i % 4]; - return c; -} - -template inline v_reg v_lut(const int* tab, const v_reg& idx) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline v_reg v_lut(const unsigned* tab, const v_reg& idx) -{ - v_reg c; - for (int i = 0; i < n; i++) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline v_reg v_lut(const float* tab, const v_reg& idx) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline v_reg v_lut(const double* tab, const v_reg& idx) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = tab[idx.s[i]]; - return c; -} - - -inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - -inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - -inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - -inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) -{ - return v_lut(tab, idxvec.s); -} - - -template inline void v_lut_deinterleave(const float* tab, const v_reg& idx, - v_reg& x, v_reg& y) -{ - for( int i = 0; i < n; i++ ) - { - int j = idx.s[i]; - x.s[i] = tab[j]; - y.s[i] = tab[j+1]; - } -} - -template inline void v_lut_deinterleave(const double* tab, const v_reg& idx, - v_reg& x, v_reg& y) -{ - for( int i = 0; i < n; i++ ) - { - int j = idx.s[i]; - x.s[i] = tab[j]; - y.s[i] = tab[j+1]; - } -} - -template inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec) -{ - v_reg<_Tp, n> c; - for (int i = 0; i < n/4; i++) - { - c.s[4*i ] = vec.s[4*i ]; - c.s[4*i+1] = vec.s[4*i+2]; - c.s[4*i+2] = vec.s[4*i+1]; - c.s[4*i+3] = vec.s[4*i+3]; - } - return c; -} - -template inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec) -{ - v_reg<_Tp, n> c; - for (int i = 0; i < n/8; i++) - { - c.s[8*i ] = vec.s[8*i ]; - c.s[8*i+1] = vec.s[8*i+4]; - c.s[8*i+2] = vec.s[8*i+1]; - c.s[8*i+3] = vec.s[8*i+5]; - c.s[8*i+4] = vec.s[8*i+2]; - c.s[8*i+5] = vec.s[8*i+6]; - c.s[8*i+6] = vec.s[8*i+3]; - c.s[8*i+7] = vec.s[8*i+7]; - } - return c; -} - -template inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec) -{ - v_reg<_Tp, n> c; - for (int i = 0; i < n/4; i++) - { - c.s[3*i ] = vec.s[4*i ]; - c.s[3*i+1] = vec.s[4*i+1]; - c.s[3*i+2] = vec.s[4*i+2]; - } - return c; -} - -template -inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1, - const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3, - v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1, - v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 ) -{ - b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]); - b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]); - b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]); - b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]); -} - -#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \ -inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); } - -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64) - -#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \ -inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); } - -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64) - -#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \ -template inline _Tpvec \ - v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \ -{ return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); } - -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64) - -#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \ -template inline _Tpvec v_shl(const _Tpvec& a) \ -{ return a << n; } - -OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short) -OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int) -OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64) - -#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \ -template inline _Tpvec v_shr(const _Tpvec& a) \ -{ return a >> n; } - -OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short) -OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int) -OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64) - -#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \ -template inline _Tpvec v_rshr(const _Tpvec& a) \ -{ \ - _Tpvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - return c; \ -} - -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short) -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int) -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) - -#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \ -inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ -{ \ - _Tpnvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - { \ - c.s[i] = cast<_Tpn>(a.s[i]); \ - c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \ - } \ - return c; \ -} - -OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast) - -#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ -{ \ - _Tpnvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - { \ - c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - } \ - return c; \ -} - -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) - -#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ -{ \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - ptr[i] = cast<_Tpn>(a.s[i]); \ -} - -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) - -#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ -{ \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ -} - -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) - -template -inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - for (int i = 0; i < n; ++i) - { - mptr[i] = (_Tpm)a.s[i]; - mptr[i + n] = (_Tpm)b.s[i]; - } -} - - - -inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) -{ - v_uint8x16 mask; - _pack_b(mask.s, a, b); - return mask; -} - - -inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, - const v_uint32x4& c, const v_uint32x4& d) -{ - v_uint8x16 mask; - _pack_b(mask.s, a, b); - _pack_b(mask.s + 8, c, d); - return mask; -} - -inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, - const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, - const v_uint64x2& g, const v_uint64x2& h) -{ - v_uint8x16 mask; - _pack_b(mask.s, a, b); - _pack_b(mask.s + 4, c, d); - _pack_b(mask.s + 8, e, f); - _pack_b(mask.s + 12, g, h); - return mask; -} inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, const v_float32x4& m1, const v_float32x4& m2, const v_float32x4& m3) { - return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0], - v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1], - v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2], - v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]); + vsetvlmax_e32m1(); + vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v)); + res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1); + res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2); + res = vfmacc_vf_f32m1(res, v_extract_n<3>(v), m3); + return v_float32x4(res); } inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, const v_float32x4& m1, const v_float32x4& m2, - const v_float32x4& m3) + const v_float32x4& a) { - return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0], - v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1], - v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2], - v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]); + vsetvlmax_e32m1(); + vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v)); + res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1); + res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2); + return v_float32x4(res) + a; +} + +#define OPENCV_HAL_IMPL_RVV_MUL_EXPAND(_Tpvec, _Tpwvec, _Tpw, suffix, wmul, width) \ +inline void v_mul_expand(const _Tpvec& a, const _Tpvec& b, _Tpwvec& c, _Tpwvec& d) \ +{ \ + _Tpw CV_DECL_ALIGNED(32) ptr[_Tpwvec::nlanes*2] = {0}; \ + vsetvlmax_e##width##m2(); \ + vse##width##_v_##suffix##m2(ptr, wmul(a, b)); \ + vsetvlmax_e##width##m1(); \ + c = _Tpwvec(vle##width##_v_##suffix##m1(ptr)); \ + d = _Tpwvec(vle##width##_v_##suffix##m1(ptr+_Tpwvec::nlanes)); \ +} + +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint8x16, v_uint16x8, ushort, u16, vwmulu_vv_u16m2, 16) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int8x16, v_int16x8, short, i16, vwmul_vv_i16m2, 16) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint16x8, v_uint32x4, unsigned, u32, vwmulu_vv_u32m2, 32) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int16x8, v_int32x4, int, i32, vwmul_vv_i32m2, 32) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint32x4, v_uint64x2, uint64, u64, vwmulu_vv_u64m2, 64) + + +inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) +{ + vsetvlmax_e16m1(); + return v_int16x8(vnsra_wx_i16m1(vwmul_vv_i32m2(a, b), 16)); +} +inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) +{ + vsetvlmax_e16m1(); + return v_uint16x8(vnsrl_wx_u16m1(vwmulu_vv_u32m2(a, b), 16)); } -inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b) -{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_cvt_f64_high(a) * v_cvt_f64_high(b)); } -inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) -{ return v_fma(v_cvt_f64(a), v_cvt_f64(b), v_fma(v_cvt_f64_high(a), v_cvt_f64_high(b), c)); } +//////// Saturating Multiply //////// -inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b) -{ return v_dotprod_expand(a, b); } -inline v_float64x2 v_dotprod_expand_fast(const v_int32x4& a, const v_int32x4& b, const v_float64x2& c) -{ return v_dotprod_expand(a, b, c); } - -////// FP16 support /////// - -inline v_reg::nlanes128> -v_load_expand(const float16_t* ptr) -{ - v_reg::nlanes128> v; - for( int i = 0; i < v.nlanes; i++ ) - { - v.s[i] = ptr[i]; - } - return v; +#define OPENCV_HAL_IMPL_RVV_MUL_SAT(_Tpvec, _wTpvec) \ +inline _Tpvec operator * (const _Tpvec& a, const _Tpvec& b) \ +{ \ + _wTpvec c, d; \ + v_mul_expand(a, b, c, d); \ + return v_pack(c, d); \ +} \ +inline _Tpvec& operator *= (_Tpvec& a, const _Tpvec& b) \ +{ \ + a = a * b; \ + return a; \ } -inline void -v_pack_store(float16_t* ptr, const v_reg::nlanes128>& v) -{ - for( int i = 0; i < v.nlanes; i++ ) - { - ptr[i] = float16_t(v.s[i]); - } -} +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint8x16, v_uint16x8) +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int8x16, v_int16x8) +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_uint16x8, v_uint32x4) +OPENCV_HAL_IMPL_RVV_MUL_SAT(v_int16x8, v_int32x4) + inline void v_cleanup() {} - -#ifndef CV_DOXYGEN CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END -#endif + + } #endif diff --git a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp index ef928f6a5c..b4178af8b7 100644 --- a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp @@ -257,221 +257,20 @@ struct v_float64x2 v128_t val; }; -namespace fallback +namespace { - -template struct v_reg -{ - typedef _Tp lane_type; - enum { nlanes = n }; - - explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; } - - v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; } - - v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; } - - v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, - _Tp s4, _Tp s5, _Tp s6, _Tp s7) - { - s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; - s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; - } - - v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, - _Tp s4, _Tp s5, _Tp s6, _Tp s7, - _Tp s8, _Tp s9, _Tp s10, _Tp s11, - _Tp s12, _Tp s13, _Tp s14, _Tp s15) - { - s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; - s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; - s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11; - s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15; - } - - v_reg() {} - - v_reg(const v_reg<_Tp, n> & r) - { - for( int i = 0; i < n; i++ ) - s[i] = r.s[i]; - } - - _Tp get0() const { return s[0]; } - - _Tp get(const int i) const { return s[i]; } - v_reg<_Tp, n> high() const - { - v_reg<_Tp, n> c; - int i; - for( i = 0; i < n/2; i++ ) - { - c.s[i] = s[i+(n/2)]; - c.s[i+(n/2)] = 0; - } - return c; - } - - static v_reg<_Tp, n> zero() - { - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = (_Tp)0; - return c; - } - - static v_reg<_Tp, n> all(_Tp s) - { - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = s; - return c; - } - - template v_reg<_Tp2, n2> reinterpret_as() const - { - size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n); - v_reg<_Tp2, n2> c; - std::memcpy(&c.s[0], &s[0], bytes); - return c; - } - - v_reg(const cv::v_uint8x16& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_int8x16& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_uint16x8& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_int16x8& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_uint32x4& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_int32x4& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_float32x4& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_float64x2& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_uint64x2& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_int64x2& v) { wasm_v128_store(&s, v.val); } - - operator cv::v_uint8x16() const { return cv::v_uint8x16(wasm_v128_load(&s)); } - operator cv::v_int8x16() const { return cv::v_int8x16(wasm_v128_load(&s)); } - operator cv::v_uint16x8() const { return cv::v_uint16x8(wasm_v128_load(&s)); } - operator cv::v_int16x8() const { return cv::v_int16x8(wasm_v128_load(&s)); } - operator cv::v_uint32x4() const { return cv::v_uint32x4(wasm_v128_load(&s)); } - operator cv::v_int32x4() const { return cv::v_int32x4(wasm_v128_load(&s)); } - operator cv::v_float32x4() const { return cv::v_float32x4(wasm_v128_load(&s)); } - operator cv::v_float64x2() const { return cv::v_float64x2(wasm_v128_load(&s)); } - operator cv::v_uint64x2() const { return cv::v_uint64x2(wasm_v128_load(&s)); } - operator cv::v_int64x2() const { return cv::v_int64x2(wasm_v128_load(&s)); } - - _Tp s[n]; -}; - -typedef v_reg v_uint8x16; -typedef v_reg v_int8x16; -typedef v_reg v_uint16x8; -typedef v_reg v_int16x8; -typedef v_reg v_uint32x4; -typedef v_reg v_int32x4; -typedef v_reg v_float32x4; -typedef v_reg v_float64x2; -typedef v_reg v_uint64x2; -typedef v_reg v_int64x2; - -#define OPENCV_HAL_IMPL_BIN_OP(bin_op) \ -template inline v_reg<_Tp, n> \ - operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ - return c; \ -} \ -template inline v_reg<_Tp, n>& \ - operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - for( int i = 0; i < n; i++ ) \ - a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ - return a; \ -} - -OPENCV_HAL_IMPL_BIN_OP(+) -OPENCV_HAL_IMPL_BIN_OP(-) -OPENCV_HAL_IMPL_BIN_OP(*) -OPENCV_HAL_IMPL_BIN_OP(/) - -#define OPENCV_HAL_IMPL_BIT_OP(bit_op) \ -template inline v_reg<_Tp, n> operator bit_op \ - (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - v_reg<_Tp, n> c; \ - typedef typename V_TypeTraits<_Tp>::int_type itype; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ - V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ - return c; \ -} \ -template inline v_reg<_Tp, n>& operator \ - bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - typedef typename V_TypeTraits<_Tp>::int_type itype; \ - for( int i = 0; i < n; i++ ) \ - a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ - V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ - return a; \ -} - -OPENCV_HAL_IMPL_BIT_OP(&) -OPENCV_HAL_IMPL_BIT_OP(|) -OPENCV_HAL_IMPL_BIT_OP(^) - -template inline v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); - } - return c; -} - -#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \ -template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \ -{ \ - v_reg<_Tp2, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = cfunc(a.s[i]); \ - return c; \ -} - -OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs, - typename V_TypeTraits<_Tp>::abs_type) -OPENCV_HAL_IMPL_MATH_FUNC(v_round, cvRound, int) -OPENCV_HAL_IMPL_MATH_FUNC(v_floor, cvFloor, int) -OPENCV_HAL_IMPL_MATH_FUNC(v_ceil, cvCeil, int) -OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int) - -#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \ -template inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = cfunc(a.s[i], b.s[i]); \ - return c; \ -} - -#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \ -template inline _Tp func(const v_reg<_Tp, n>& a) \ -{ \ - _Tp c = a.s[0]; \ - for( int i = 1; i < n; i++ ) \ - c = cfunc(c, a.s[i]); \ - return c; \ -} - -OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min) -OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max) -OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) -OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max) +#define OPENCV_HAL_IMPL_REINTERPRET_INT(ft, tt) \ +inline tt reinterpret_int(ft x) { union { ft l; tt i; } v; v.l = x; return v.i; } +OPENCV_HAL_IMPL_REINTERPRET_INT(uchar, schar) +OPENCV_HAL_IMPL_REINTERPRET_INT(schar, schar) +OPENCV_HAL_IMPL_REINTERPRET_INT(ushort, short) +OPENCV_HAL_IMPL_REINTERPRET_INT(short, short) +OPENCV_HAL_IMPL_REINTERPRET_INT(unsigned, int) +OPENCV_HAL_IMPL_REINTERPRET_INT(int, int) +OPENCV_HAL_IMPL_REINTERPRET_INT(float, int) +OPENCV_HAL_IMPL_REINTERPRET_INT(uint64, int64) +OPENCV_HAL_IMPL_REINTERPRET_INT(int64, int64) +OPENCV_HAL_IMPL_REINTERPRET_INT(double, int64) static const unsigned char popCountTable[] = { @@ -492,1184 +291,7 @@ static const unsigned char popCountTable[] = 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, }; - -template -inline v_reg::abs_type, n> v_popcount(const v_reg<_Tp, n>& a) -{ - v_reg::abs_type, n> b = v_reg::abs_type, n>::zero(); - for (int i = 0; i < (int)(n*sizeof(_Tp)); i++) - b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]]; - return b; -} - -template -inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval ) -{ - for( int i = 0; i < n; i++ ) - { - minval.s[i] = std::min(a.s[i], b.s[i]); - maxval.s[i] = std::max(a.s[i], b.s[i]); - } -} - -#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \ -template \ -inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - typedef typename V_TypeTraits<_Tp>::int_type itype; \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \ - return c; \ -} - -OPENCV_HAL_IMPL_CMP_OP(<) -OPENCV_HAL_IMPL_CMP_OP(>) -OPENCV_HAL_IMPL_CMP_OP(<=) -OPENCV_HAL_IMPL_CMP_OP(>=) -OPENCV_HAL_IMPL_CMP_OP(==) -OPENCV_HAL_IMPL_CMP_OP(!=) - -template -inline v_reg v_not_nan(const v_reg& a) -{ - typedef typename V_TypeTraits::int_type itype; - v_reg c; - for (int i = 0; i < n; i++) - c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); - return c; -} -template -inline v_reg v_not_nan(const v_reg& a) -{ - typedef typename V_TypeTraits::int_type itype; - v_reg c; - for (int i = 0; i < n; i++) - c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); - return c; -} - -#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \ -template \ -inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - typedef _Tp2 rtype; \ - v_reg c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \ - return c; \ -} - -OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp) -OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp) -OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp) - -template inline T _absdiff(T a, T b) -{ - return a > b ? a - b : b - a; -} - -template -inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b) -{ - typedef typename V_TypeTraits<_Tp>::abs_type rtype; - v_reg c; - const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0); - for( int i = 0; i < n; i++ ) - { - rtype ua = a.s[i] ^ mask; - rtype ub = b.s[i] ^ mask; - c.s[i] = _absdiff(ua, ub); - } - return c; -} - -inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) -{ - v_float32x4 c; - for( int i = 0; i < c.nlanes; i++ ) - c.s[i] = _absdiff(a.s[i], b.s[i]); - return c; -} - -inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) -{ - v_float64x2 c; - for( int i = 0; i < c.nlanes; i++ ) - c.s[i] = _absdiff(a.s[i], b.s[i]); - return c; -} - -template -inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++) - c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i])); - return c; -} - -template -inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = 1.f/std::sqrt(a.s[i]); - return c; -} - -template -inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]); - return c; -} - -template -inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i]; - return c; -} - -template -inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg<_Tp, n>& c) -{ - v_reg<_Tp, n> d; - for( int i = 0; i < n; i++ ) - d.s[i] = a.s[i]*b.s[i] + c.s[i]; - return d; -} - -template -inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg<_Tp, n>& c) -{ - return v_fma(a, b, c); -} - -template inline v_reg::w_type, n/2> - v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg c; - for( int i = 0; i < (n/2); i++ ) - c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1]; - return c; -} - -template inline v_reg::w_type, n/2> - v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, const v_reg::w_type, n / 2>& c) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg s; - for( int i = 0; i < (n/2); i++ ) - s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i]; - return s; -} - -template inline v_reg::q_type, n/4> - v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg s; - for( int i = 0; i < (n/4); i++ ) - s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + - (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3]; - return s; -} - -template inline v_reg::q_type, n/4> - v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg::q_type, n / 4>& c) -{ - typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg s; - for( int i = 0; i < (n/4); i++ ) - s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + - (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i]; - return s; -} - -template inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - v_reg::w_type, n/2>& c, - v_reg::w_type, n/2>& d) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = (w_type)a.s[i]*b.s[i]; - d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)]; - } -} - -template inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg<_Tp, n> c; - for (int i = 0; i < n; i++) - c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8); - return c; -} - -template inline void v_hsum(const v_reg<_Tp, n>& a, - v_reg::w_type, n/2>& c) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1]; - } -} - -#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \ -template inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \ -{ \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = (_Tp)(a.s[i] shift_op imm); \ - return c; \ -} - -OPENCV_HAL_IMPL_SHIFT_OP(<< ) -OPENCV_HAL_IMPL_SHIFT_OP(>> ) - -#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \ -template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \ -{ \ - v_reg<_Tp, n> b; \ - for (int i = 0; i < n; i++) \ - { \ - int sIndex = i opA imm; \ - if (0 <= sIndex && sIndex < n) \ - { \ - b.s[i] = a.s[sIndex]; \ - } \ - else \ - { \ - b.s[i] = 0; \ - } \ - } \ - return b; \ -} \ -template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - v_reg<_Tp, n> c; \ - for (int i = 0; i < n; i++) \ - { \ - int aIndex = i opA imm; \ - int bIndex = i opA imm opB n; \ - if (0 <= bIndex && bIndex < n) \ - { \ - c.s[i] = b.s[bIndex]; \ - } \ - else if (0 <= aIndex && aIndex < n) \ - { \ - c.s[i] = a.s[aIndex]; \ - } \ - else \ - { \ - c.s[i] = 0; \ - } \ - } \ - return c; \ -} - -OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left, -, +) -OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -) - -template inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a) -{ - typename V_TypeTraits<_Tp>::sum_type c = a.s[0]; - for( int i = 1; i < n; i++ ) - c += a.s[i]; - return c; -} - -inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, - const v_float32x4& c, const v_float32x4& d) -{ - v_float32x4 r; - r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3]; - r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3]; - r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3]; - r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3]; - return r; -} - -template inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]); - for (int i = 1; i < n; i++) - c += _absdiff(a.s[i], b.s[i]); - return c; -} - -template inline int v_signmask(const v_reg<_Tp, n>& a) -{ - int mask = 0; - for( int i = 0; i < n; i++ ) - mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i; - return mask; -} - -template inline bool v_check_all(const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < n; i++ ) - if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 ) - return false; - return true; -} - -template inline bool v_check_any(const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < n; i++ ) - if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 ) - return true; - return false; -} - -template inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask, - const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef V_TypeTraits<_Tp> Traits; - typedef typename Traits::int_type int_type; - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - { - int_type m = Traits::reinterpret_int(mask.s[i]); - CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc - c.s[i] = m ? a.s[i] : b.s[i]; - } - return c; -} - -template inline void v_expand(const v_reg<_Tp, n>& a, - v_reg::w_type, n/2>& b0, - v_reg::w_type, n/2>& b1) -{ - for( int i = 0; i < (n/2); i++ ) - { - b0.s[i] = a.s[i]; - b1.s[i] = a.s[i+(n/2)]; - } -} - -template -inline v_reg::w_type, n/2> -v_expand_low(const v_reg<_Tp, n>& a) -{ - v_reg::w_type, n/2> b; - for( int i = 0; i < (n/2); i++ ) - b.s[i] = a.s[i]; - return b; -} - -template -inline v_reg::w_type, n/2> -v_expand_high(const v_reg<_Tp, n>& a) -{ - v_reg::w_type, n/2> b; - for( int i = 0; i < (n/2); i++ ) - b.s[i] = a.s[i+(n/2)]; - return b; -} - -template inline v_reg::int_type, n> - v_reinterpret_as_int(const v_reg<_Tp, n>& a) -{ - v_reg::int_type, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]); - return c; -} - -template inline v_reg::uint_type, n> - v_reinterpret_as_uint(const v_reg<_Tp, n>& a) -{ - v_reg::uint_type, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]); - return c; -} - -template inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, - v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 ) -{ - int i; - for( i = 0; i < n/2; i++ ) - { - b0.s[i*2] = a0.s[i]; - b0.s[i*2+1] = a1.s[i]; - } - for( ; i < n; i++ ) - { - b1.s[i*2-n] = a0.s[i]; - b1.s[i*2-n+1] = a1.s[i]; - } -} - -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr) -{ - return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); -} - -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr) -{ - return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); -} - -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for( int i = 0; i < c.nlanes/2; i++ ) - { - c.s[i] = ptr[i]; - } - return c; -} - -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for( int i = 0; i < c.nlanes/2; i++ ) - { - c.s[i] = loptr[i]; - c.s[i+c.nlanes/2] = hiptr[i]; - } - return c; -} - -template -inline v_reg::w_type, V_TypeTraits<_Tp>::nlanes128 / 2> -v_load_expand(const _Tp* ptr) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg::nlanes128> c; - for( int i = 0; i < c.nlanes; i++ ) - { - c.s[i] = ptr[i]; - } - return c; -} - -template -inline v_reg::q_type, V_TypeTraits<_Tp>::nlanes128 / 4> -v_load_expand_q(const _Tp* ptr) -{ - typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg::nlanes128> c; - for( int i = 0; i < c.nlanes; i++ ) - { - c.s[i] = ptr[i]; - } - return c; -} - -template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, - v_reg<_Tp, n>& b) -{ - int i, i2; - for( i = i2 = 0; i < n; i++, i2 += 2 ) - { - a.s[i] = ptr[i2]; - b.s[i] = ptr[i2+1]; - } -} - -template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, - v_reg<_Tp, n>& b, v_reg<_Tp, n>& c) -{ - int i, i3; - for( i = i3 = 0; i < n; i++, i3 += 3 ) - { - a.s[i] = ptr[i3]; - b.s[i] = ptr[i3+1]; - c.s[i] = ptr[i3+2]; - } -} - -template -inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, - v_reg<_Tp, n>& b, v_reg<_Tp, n>& c, - v_reg<_Tp, n>& d) -{ - int i, i4; - for( i = i4 = 0; i < n; i++, i4 += 4 ) - { - a.s[i] = ptr[i4]; - b.s[i] = ptr[i4+1]; - c.s[i] = ptr[i4+2]; - d.s[i] = ptr[i4+3]; - } -} - -template -inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, - hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) -{ - int i, i2; - for( i = i2 = 0; i < n; i++, i2 += 2 ) - { - ptr[i2] = a.s[i]; - ptr[i2+1] = b.s[i]; - } -} - -template -inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, - hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) -{ - int i, i3; - for( i = i3 = 0; i < n; i++, i3 += 3 ) - { - ptr[i3] = a.s[i]; - ptr[i3+1] = b.s[i]; - ptr[i3+2] = c.s[i]; - } -} - -template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, - const v_reg<_Tp, n>& d, - hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) -{ - int i, i4; - for( i = i4 = 0; i < n; i++, i4 += 4 ) - { - ptr[i4] = a.s[i]; - ptr[i4+1] = b.s[i]; - ptr[i4+2] = c.s[i]; - ptr[i4+3] = d.s[i]; - } -} - -template -inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) -{ - for( int i = 0; i < n; i++ ) - ptr[i] = a.s[i]; -} - -template -inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < (n/2); i++ ) - ptr[i] = a.s[i]; -} - -template -inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < (n/2); i++ ) - ptr[i] = a.s[i+(n/2)]; -} - -template -inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < n; i++ ) - ptr[i] = a.s[i]; -} - -template -inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < n; i++ ) - ptr[i] = a.s[i]; -} - -template -inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) -{ - for( int i = 0; i < n; i++ ) - ptr[i] = a.s[i]; -} - -template -inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = a.s[i]; - c.s[i+(n/2)] = b.s[i]; - } - return c; -} - -template -inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = a.s[i+(n/2)]; - c.s[i+(n/2)] = b.s[i+(n/2)]; - } - return c; -} - -template -inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - v_reg<_Tp, n>& low, v_reg<_Tp, n>& high) -{ - for( int i = 0; i < (n/2); i++ ) - { - low.s[i] = a.s[i]; - low.s[i+(n/2)] = b.s[i]; - high.s[i] = a.s[i+(n/2)]; - high.s[i+(n/2)] = b.s[i+(n/2)]; - } -} - -template -inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> r; - const int shift = n - s; - int i = 0; - for (; i < shift; ++i) - r.s[i] = a.s[i+s]; - for (; i < n; ++i) - r.s[i] = b.s[i-shift]; - return r; -} - -template inline v_reg v_round(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = cvRound(a.s[i]); - return c; -} - -template inline v_reg v_round(const v_reg& a, const v_reg& b) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvRound(a.s[i]); - c.s[i+n] = cvRound(b.s[i]); - } - return c; -} - -template inline v_reg v_floor(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = cvFloor(a.s[i]); - return c; -} - -template inline v_reg v_ceil(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = cvCeil(a.s[i]); - return c; -} - -template inline v_reg v_trunc(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (int)(a.s[i]); - return c; -} - -template inline v_reg v_round(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvRound(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_floor(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvFloor(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_ceil(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvCeil(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_trunc(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = (int)(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_cvt_f32(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (float)a.s[i]; - return c; -} - -template inline v_reg v_cvt_f32(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = (float)a.s[i]; - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_cvt_f32(const v_reg& a, const v_reg& b) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = (float)a.s[i]; - c.s[i+n] = (float)b.s[i]; - } - return c; -} - -inline v_float64x2 v_cvt_f64(const v_int32x4& a) -{ - v_float64x2 c; - for( int i = 0; i < 2; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) -{ - v_float64x2 c; - for( int i = 0; i < 2; i++ ) - c.s[i] = (double)a.s[i+2]; - return c; -} - -inline v_float64x2 v_cvt_f64(const v_float32x4& a) -{ - v_float64x2 c; - for( int i = 0; i < 2; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) -{ - v_float64x2 c; - for( int i = 0; i < 2; i++ ) - c.s[i] = (double)a.s[i+2]; - return c; -} - -inline v_float64x2 v_cvt_f64(const v_int64x2& a) -{ - v_float64x2 c; - for( int i = 0; i < 2; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut(const _Tp* tab, const int* idx) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) - c.s[i] = tab[idx[i]]; - return c; -} -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_pairs(const _Tp* tab, const int* idx) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) - c.s[i] = tab[idx[i / 2] + i % 2]; - return c; -} -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_quads(const _Tp* tab, const int* idx) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) - c.s[i] = tab[idx[i / 4] + i % 4]; - return c; -} - -template inline v_reg v_lut(const int* tab, const v_reg& idx) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline v_reg v_lut(const unsigned* tab, const v_reg& idx) -{ - v_reg c; - for (int i = 0; i < n; i++) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline v_reg v_lut(const float* tab, const v_reg& idx) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline v_reg v_lut(const double* tab, const v_reg& idx) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline void v_lut_deinterleave(const float* tab, const v_reg& idx, - v_reg& x, v_reg& y) -{ - for( int i = 0; i < n; i++ ) - { - int j = idx.s[i]; - x.s[i] = tab[j]; - y.s[i] = tab[j+1]; - } -} - -template inline void v_lut_deinterleave(const double* tab, const v_reg& idx, - v_reg& x, v_reg& y) -{ - for( int i = 0; i < n; i++ ) - { - int j = idx.s[i]; - x.s[i] = tab[j]; - y.s[i] = tab[j+1]; - } -} - -template inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec) -{ - v_reg<_Tp, n> c; - for (int i = 0; i < n/4; i++) - { - c.s[4*i ] = vec.s[4*i ]; - c.s[4*i+1] = vec.s[4*i+2]; - c.s[4*i+2] = vec.s[4*i+1]; - c.s[4*i+3] = vec.s[4*i+3]; - } - return c; -} - -template inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec) -{ - v_reg<_Tp, n> c; - for (int i = 0; i < n/8; i++) - { - c.s[8*i ] = vec.s[8*i ]; - c.s[8*i+1] = vec.s[8*i+4]; - c.s[8*i+2] = vec.s[8*i+1]; - c.s[8*i+3] = vec.s[8*i+5]; - c.s[8*i+4] = vec.s[8*i+2]; - c.s[8*i+5] = vec.s[8*i+6]; - c.s[8*i+6] = vec.s[8*i+3]; - c.s[8*i+7] = vec.s[8*i+7]; - } - return c; -} - -template inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec) -{ - v_reg<_Tp, n> c; - for (int i = 0; i < n/4; i++) - { - c.s[3*i ] = vec.s[4*i ]; - c.s[3*i+1] = vec.s[4*i+1]; - c.s[3*i+2] = vec.s[4*i+2]; - } - return c; -} - -template -inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1, - const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3, - v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1, - v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 ) -{ - b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]); - b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]); - b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]); - b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]); -} - -#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \ -inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); } - -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64) - -#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \ -inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); } - -OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64) - -#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \ -template inline _Tpvec \ - v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \ -{ return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); } - -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64) - -#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \ -template inline _Tpvec v_shl(const _Tpvec& a) \ -{ return a << n; } - -OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short) -OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int) -OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64) - -#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \ -template inline _Tpvec v_shr(const _Tpvec& a) \ -{ return a >> n; } - -OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short) -OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int) -OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64) - -#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \ -template inline _Tpvec v_rshr(const _Tpvec& a) \ -{ \ - _Tpvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - return c; \ -} - -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short) -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int) -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) - -#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \ -inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ -{ \ - _Tpnvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - { \ - c.s[i] = cast<_Tpn>(a.s[i]); \ - c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \ - } \ - return c; \ -} - -OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast) - -#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ -{ \ - _Tpnvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - { \ - c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - } \ - return c; \ -} - -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) - -#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ -{ \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - ptr[i] = cast<_Tpn>(a.s[i]); \ -} - -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) - -#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ -{ \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ -} - -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) - -template -inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - for (int i = 0; i < n; ++i) - { - mptr[i] = (_Tpm)a.s[i]; - mptr[i + n] = (_Tpm)b.s[i]; - } -} - -inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) -{ - v_uint8x16 mask; - _pack_b(mask.s, a, b); - return mask; -} - -inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, - const v_uint32x4& c, const v_uint32x4& d) -{ - v_uint8x16 mask; - _pack_b(mask.s, a, b); - _pack_b(mask.s + 8, c, d); - return mask; -} - -inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, - const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, - const v_uint64x2& g, const v_uint64x2& h) -{ - v_uint8x16 mask; - _pack_b(mask.s, a, b); - _pack_b(mask.s + 4, c, d); - _pack_b(mask.s + 8, e, f); - _pack_b(mask.s + 12, g, h); - return mask; -} - -inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, - const v_float32x4& m1, const v_float32x4& m2, - const v_float32x4& m3) -{ - return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0], - v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1], - v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2], - v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]); -} - -inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, - const v_float32x4& m1, const v_float32x4& m2, - const v_float32x4& m3) -{ - return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0], - v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1], - v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2], - v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]); -} - -inline v_reg::nlanes128> -v_load_expand(const float16_t* ptr) -{ - v_reg::nlanes128> v; - for( int i = 0; i < v.nlanes; i++ ) - { - v.s[i] = ptr[i]; - } - return v; -} - -inline void -v_pack_store(float16_t* ptr, const v_reg::nlanes128>& v) -{ - for( int i = 0; i < v.nlanes; i++ ) - { - ptr[i] = float16_t(v.s[i]); - } -} - -inline void v_cleanup() {} -} // namespace fallback +} // namespace static v128_t wasm_unpacklo_i8x16(v128_t a, v128_t b) { return wasm_v8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23); @@ -2644,8 +1266,31 @@ OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_sub_wrap, wasm_i8x16_sub) OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_sub_wrap, wasm_i8x16_sub) OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_sub_wrap, wasm_i16x8_sub) OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_sub_wrap, wasm_i16x8_sub) +#if (__EMSCRIPTEN_major__ * 1000000 + __EMSCRIPTEN_minor__ * 1000 + __EMSCRIPTEN_tiny__) >= (1039012) +// details: https://github.com/opencv/opencv/issues/18097 ( https://github.com/emscripten-core/emscripten/issues/12018 ) +// 1.39.12: https://github.com/emscripten-core/emscripten/commit/cd801d0f110facfd694212a3c8b2ed2ffcd630e2 +inline v_uint8x16 v_mul_wrap(const v_uint8x16& a, const v_uint8x16& b) +{ + uchar a_[16], b_[16]; + wasm_v128_store(a_, a.val); + wasm_v128_store(b_, b.val); + for (int i = 0; i < 16; i++) + a_[i] = (uchar)(a_[i] * b_[i]); + return v_uint8x16(wasm_v128_load(a_)); +} +inline v_int8x16 v_mul_wrap(const v_int8x16& a, const v_int8x16& b) +{ + schar a_[16], b_[16]; + wasm_v128_store(a_, a.val); + wasm_v128_store(b_, b.val); + for (int i = 0; i < 16; i++) + a_[i] = (schar)(a_[i] * b_[i]); + return v_int8x16(wasm_v128_load(a_)); +} +#else OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_mul_wrap, wasm_i8x16_mul) OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_mul_wrap, wasm_i8x16_mul) +#endif OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_mul_wrap, wasm_i16x8_mul) OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_mul_wrap, wasm_i16x8_mul) @@ -2905,13 +1550,17 @@ inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ } \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { \ - fallback::_Tpvec a_(a); \ - fallback::v_store_low(ptr, a_); \ + _Tpvec::lane_type a_[_Tpvec::nlanes]; \ + wasm_v128_store(a_, a.val); \ + for (int i = 0; i < (_Tpvec::nlanes / 2); i++) \ + ptr[i] = a_[i]; \ } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ { \ - fallback::_Tpvec a_(a); \ - fallback::v_store_high(ptr, a_); \ + _Tpvec::lane_type a_[_Tpvec::nlanes]; \ + wasm_v128_store(a_, a.val); \ + for (int i = 0; i < (_Tpvec::nlanes / 2); i++) \ + ptr[i] = a_[i + (_Tpvec::nlanes / 2)]; \ } OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint8x16, uchar) @@ -2977,8 +1626,12 @@ OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_float32x4, float, v128_t, f32x4, f32x4) #define OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(_Tpvec, scalartype) \ inline scalartype v_reduce_sum(const _Tpvec& a) \ { \ - fallback::_Tpvec a_(a); \ - return fallback::v_reduce_sum(a_); \ + _Tpvec::lane_type a_[_Tpvec::nlanes]; \ + wasm_v128_store(a_, a.val); \ + scalartype c = a_[0]; \ + for (int i = 1; i < _Tpvec::nlanes; i++) \ + c += a_[i]; \ + return c; \ } OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint8x16, unsigned) @@ -3102,8 +1755,11 @@ inline v_uint32x4 v_popcount(const v_uint32x4& a) } inline v_uint64x2 v_popcount(const v_uint64x2& a) { - fallback::v_uint64x2 a_(a); - return fallback::v_popcount(a_); + uint64 a_[2], b_[2] = { 0 }; + wasm_v128_store(a_, a.val); + for (int i = 0; i < 16; i++) + b_[i / 8] += popCountTable[((uint8_t*)a_)[i]]; + return v_uint64x2(wasm_v128_load(b_)); } inline v_uint8x16 v_popcount(const v_int8x16& a) { return v_popcount(v_reinterpret_as_u8(a)); } @@ -3117,8 +1773,12 @@ inline v_uint64x2 v_popcount(const v_int64x2& a) #define OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(_Tpvec, suffix, scalarType) \ inline int v_signmask(const _Tpvec& a) \ { \ - fallback::_Tpvec a_(a); \ - return fallback::v_signmask(a_); \ + _Tpvec::lane_type a_[_Tpvec::nlanes]; \ + wasm_v128_store(a_, a.val); \ + int mask = 0; \ + for (int i = 0; i < _Tpvec::nlanes; i++) \ + mask |= (reinterpret_int(a_[i]) < 0) << i; \ + return mask; \ } \ inline bool v_check_all(const _Tpvec& a) \ { return wasm_i8x16_all_true(wasm_##suffix##_lt(a.val, wasm_##suffix##_splat(0))); } \ @@ -3273,22 +1933,35 @@ inline v_int32x4 v_ceil(const v_float32x4& a) inline v_int32x4 v_trunc(const v_float32x4& a) { return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(a.val)); } -#define OPENCV_HAL_IMPL_WASM_MATH_FUNC(func, cfunc, _Tpvec, _Tpnvec, _Tp, _Tpn) \ -inline _Tpnvec func(const _Tpvec& a) \ +#define OPENCV_HAL_IMPL_WASM_MATH_FUNC(func, cfunc) \ +inline v_int32x4 func(const v_float64x2& a) \ { \ - fallback::_Tpvec a_(a); \ - return fallback::func(a_); \ + double a_[2]; \ + wasm_v128_store(a_, a.val); \ + int c_[4]; \ + c_[0] = cfunc(a_[0]); \ + c_[1] = cfunc(a_[1]); \ + c_[2] = 0; \ + c_[3] = 0; \ + return v_int32x4(wasm_v128_load(c_)); \ } -OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_round, cvRound, v_float64x2, v_int32x4, double, int) -OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_floor, cvFloor, v_float64x2, v_int32x4, double, int) -OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_ceil, cvCeil, v_float64x2, v_int32x4, double, int) -OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_trunc, int, v_float64x2, v_int32x4, double, int) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_round, cvRound) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_floor, cvFloor) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_ceil, cvCeil) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_trunc, int) inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) { - fallback::v_float64x2 a_(a), b_(b); - return fallback::v_round(a_, b_); + double a_[2], b_[2]; + wasm_v128_store(a_, a.val); + wasm_v128_store(b_, b.val); + int c_[4]; + c_[0] = cvRound(a_[0]); + c_[1] = cvRound(a_[1]); + c_[2] = cvRound(b_[0]); + c_[3] = cvRound(b_[1]); + return v_int32x4(wasm_v128_load(c_)); } #define OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(_Tpvec, suffix) \ @@ -3782,14 +2455,27 @@ inline v_float32x4 v_cvt_f32(const v_int32x4& a) inline v_float32x4 v_cvt_f32(const v_float64x2& a) { - fallback::v_float64x2 a_(a); - return fallback::v_cvt_f32(a_); + double a_[2]; + wasm_v128_store(a_, a.val); + float c_[4]; + c_[0] = (float)(a_[0]); + c_[1] = (float)(a_[1]); + c_[2] = 0; + c_[3] = 0; + return v_float32x4(wasm_v128_load(c_)); } inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) { - fallback::v_float64x2 a_(a), b_(b); - return fallback::v_cvt_f32(a_, b_); + double a_[2], b_[2]; + wasm_v128_store(a_, a.val); + wasm_v128_store(b_, b.val); + float c_[4]; + c_[0] = (float)(a_[0]); + c_[1] = (float)(a_[1]); + c_[2] = (float)(b_[0]); + c_[3] = (float)(b_[1]); + return v_float32x4(wasm_v128_load(c_)); } inline v_float64x2 v_cvt_f64(const v_int32x4& a) @@ -3798,8 +2484,12 @@ inline v_float64x2 v_cvt_f64(const v_int32x4& a) v128_t p = v128_cvti32x4_i64x2(a.val); return v_float64x2(wasm_f64x2_convert_i64x2(p)); #else - fallback::v_int32x4 a_(a); - return fallback::v_cvt_f64(a_); + int a_[4]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[0]); + c_[1] = (double)(a_[1]); + return v_float64x2(wasm_v128_load(c_)); #endif } @@ -3809,21 +2499,33 @@ inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) v128_t p = v128_cvti32x4_i64x2_high(a.val); return v_float64x2(wasm_f64x2_convert_i64x2(p)); #else - fallback::v_int32x4 a_(a); - return fallback::v_cvt_f64_high(a_); + int a_[4]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[2]); + c_[1] = (double)(a_[3]); + return v_float64x2(wasm_v128_load(c_)); #endif } inline v_float64x2 v_cvt_f64(const v_float32x4& a) { - fallback::v_float32x4 a_(a); - return fallback::v_cvt_f64(a_); + float a_[4]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[0]); + c_[1] = (double)(a_[1]); + return v_float64x2(wasm_v128_load(c_)); } inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) { - fallback::v_float32x4 a_(a); - return fallback::v_cvt_f64_high(a_); + float a_[4]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[2]); + c_[1] = (double)(a_[3]); + return v_float64x2(wasm_v128_load(c_)); } inline v_float64x2 v_cvt_f64(const v_int64x2& a) @@ -3831,8 +2533,12 @@ inline v_float64x2 v_cvt_f64(const v_int64x2& a) #ifdef __wasm_unimplemented_simd128__ return v_float64x2(wasm_f64x2_convert_i64x2(a.val)); #else - fallback::v_int64x2 a_(a); - return fallback::v_cvt_f64(a_); + int64 a_[2]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[0]); + c_[1] = (double)(a_[1]); + return v_float64x2(wasm_v128_load(c_)); #endif } @@ -4049,13 +2755,20 @@ inline v_float32x4 v_broadcast_element(const v_float32x4& a) inline v_float32x4 v_load_expand(const float16_t* ptr) { - return fallback::v_load_expand(ptr); + float a[4]; + for (int i = 0; i < 4; i++) + a[i] = ptr[i]; + return v_float32x4(wasm_v128_load(a)); } inline void v_pack_store(float16_t* ptr, const v_float32x4& v) { - fallback::v_float32x4 v_(v); - fallback::v_pack_store(ptr, v_); + double v_[4]; + wasm_v128_store(v_, v.val); + ptr[0] = float16_t(v_[0]); + ptr[1] = float16_t(v_[1]); + ptr[2] = float16_t(v_[2]); + ptr[3] = float16_t(v_[3]); } inline void v_cleanup() {} diff --git a/modules/core/include/opencv2/core/llapi/llapi.h b/modules/core/include/opencv2/core/llapi/llapi.h index 805d9ed262..ce322aecf8 100644 --- a/modules/core/include/opencv2/core/llapi/llapi.h +++ b/modules/core/include/opencv2/core/llapi/llapi.h @@ -27,6 +27,14 @@ Using this approach OpenCV provides some basic low level functionality for exter #define CV_API_CALL #endif +#ifndef CV_PLUGIN_EXPORTS +#if (defined _WIN32 || defined WINCE || defined __CYGWIN__) +# define CV_PLUGIN_EXPORTS __declspec(dllexport) +#elif defined __GNUC__ && __GNUC__ >= 4 +# define CV_PLUGIN_EXPORTS __attribute__ ((visibility ("default"))) +#endif +#endif + typedef enum cvResult { CV_ERROR_FAIL = -1, //!< Some error occurred (TODO Require to fill exception information) diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index bc676c1acd..84df297bf9 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -170,7 +170,9 @@ public: STD_VECTOR = 3 << KIND_SHIFT, STD_VECTOR_VECTOR = 4 << KIND_SHIFT, STD_VECTOR_MAT = 5 << KIND_SHIFT, - EXPR = 6 << KIND_SHIFT, //!< removed +#if OPENCV_ABI_COMPATIBILITY < 500 + EXPR = 6 << KIND_SHIFT, //!< removed: https://github.com/opencv/opencv/pull/17046 +#endif OPENGL_BUFFER = 7 << KIND_SHIFT, CUDA_HOST_MEM = 8 << KIND_SHIFT, CUDA_GPU_MAT = 9 << KIND_SHIFT, @@ -178,7 +180,9 @@ public: STD_VECTOR_UMAT =11 << KIND_SHIFT, STD_BOOL_VECTOR =12 << KIND_SHIFT, STD_VECTOR_CUDA_GPU_MAT = 13 << KIND_SHIFT, - STD_ARRAY =14 << KIND_SHIFT, +#if OPENCV_ABI_COMPATIBILITY < 500 + STD_ARRAY =14 << KIND_SHIFT, //!< removed: https://github.com/opencv/opencv/issues/18897 +#endif STD_ARRAY_MAT =15 << KIND_SHIFT }; @@ -572,24 +576,24 @@ CV_ENUM_FLAGS(UMatData::MemoryFlag) struct CV_EXPORTS MatSize { - explicit MatSize(int* _p); - int dims() const; + explicit MatSize(int* _p) CV_NOEXCEPT; + int dims() const CV_NOEXCEPT; Size operator()() const; const int& operator[](int i) const; int& operator[](int i); - operator const int*() const; // TODO OpenCV 4.0: drop this - bool operator == (const MatSize& sz) const; - bool operator != (const MatSize& sz) const; + operator const int*() const CV_NOEXCEPT; // TODO OpenCV 4.0: drop this + bool operator == (const MatSize& sz) const CV_NOEXCEPT; + bool operator != (const MatSize& sz) const CV_NOEXCEPT; int* p; }; struct CV_EXPORTS MatStep { - MatStep(); - explicit MatStep(size_t s); - const size_t& operator[](int i) const; - size_t& operator[](int i); + MatStep() CV_NOEXCEPT; + explicit MatStep(size_t s) CV_NOEXCEPT; + const size_t& operator[](int i) const CV_NOEXCEPT; + size_t& operator[](int i) CV_NOEXCEPT; operator size_t() const; MatStep& operator = (size_t s); @@ -694,11 +698,16 @@ sub-matrices. -# Process "foreign" data using OpenCV (for example, when you implement a DirectShow\* filter or a processing module for gstreamer, and so on). For example: @code - void process_video_frame(const unsigned char* pixels, - int width, int height, int step) + Mat process_video_frame(const unsigned char* pixels, + int width, int height, int step) { - Mat img(height, width, CV_8UC3, pixels, step); - GaussianBlur(img, img, Size(7,7), 1.5, 1.5); + // wrap input buffer + Mat img(height, width, CV_8UC3, (unsigned char*)pixels, step); + + Mat result; + GaussianBlur(img, result, Size(7, 7), 1.5, 1.5); + + return result; } @endcode -# Quickly initialize small matrices and/or get a super-fast element access. @@ -798,7 +807,7 @@ public: The constructed matrix can further be assigned to another matrix or matrix expression or can be allocated with Mat::create . In the former case, the old content is de-referenced. */ - Mat(); + Mat() CV_NOEXCEPT; /** @overload @param rows Number of rows in a 2D array. @@ -2184,7 +2193,7 @@ public: typedef MatConstIterator_<_Tp> const_iterator; //! default constructor - Mat_(); + Mat_() CV_NOEXCEPT; //! equivalent to Mat(_rows, _cols, DataType<_Tp>::type) Mat_(int _rows, int _cols); //! constructor that sets each matrix element to specified value @@ -2376,7 +2385,7 @@ class CV_EXPORTS UMat { public: //! default constructor - UMat(UMatUsageFlags usageFlags = USAGE_DEFAULT); + UMat(UMatUsageFlags usageFlags = USAGE_DEFAULT) CV_NOEXCEPT; //! constructs 2D matrix of the specified size and type // (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.) UMat(int rows, int cols, int type, UMatUsageFlags usageFlags = USAGE_DEFAULT); @@ -2397,20 +2406,11 @@ public: UMat(const UMat& m, const Rect& roi); UMat(const UMat& m, const Range* ranges); UMat(const UMat& m, const std::vector& ranges); + + // FIXIT copyData=false is not implemented, drop this in favor of cv::Mat (OpenCV 5.0) //! builds matrix from std::vector with or without copying the data template explicit UMat(const std::vector<_Tp>& vec, bool copyData=false); - //! builds matrix from cv::Vec; the data is copied by default - template explicit UMat(const Vec<_Tp, n>& vec, bool copyData=true); - //! builds matrix from cv::Matx; the data is copied by default - template explicit UMat(const Matx<_Tp, m, n>& mtx, bool copyData=true); - //! builds matrix from a 2D point - template explicit UMat(const Point_<_Tp>& pt, bool copyData=true); - //! builds matrix from a 3D point - template explicit UMat(const Point3_<_Tp>& pt, bool copyData=true); - //! builds matrix from comma initializer - template explicit UMat(const MatCommaInitializer_<_Tp>& commaInitializer); - //! destructor - calls release() ~UMat(); //! assignment operators diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp index d6296f8e2e..ff8297ffa4 100644 --- a/modules/core/include/opencv2/core/mat.inl.hpp +++ b/modules/core/include/opencv2/core/mat.inl.hpp @@ -111,7 +111,7 @@ _InputArray::_InputArray(const std::vector<_Tp>& vec) template inline _InputArray::_InputArray(const std::array<_Tp, _Nm>& arr) -{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ, arr.data(), Size(1, _Nm)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ, arr.data(), Size(1, _Nm)); } template inline _InputArray::_InputArray(const std::array& arr) @@ -169,7 +169,7 @@ template inline _InputArray _InputArray::rawIn(const std::array<_Tp, _Nm>& arr) { _InputArray v; - v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_READ; + v.flags = FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_READ; v.obj = (void*)arr.data(); v.sz = Size(1, _Nm); return v; @@ -191,7 +191,7 @@ inline bool _InputArray::isUMatVector() const { return kind() == _InputArray::S inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; } inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR || kind() == _InputArray::STD_BOOL_VECTOR || - kind() == _InputArray::STD_ARRAY; } + (kind() == _InputArray::MATX && (sz.width <= 1 || sz.height <= 1)); } inline bool _InputArray::isGpuMat() const { return kind() == _InputArray::CUDA_GPU_MAT; } inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; } @@ -210,7 +210,7 @@ _OutputArray::_OutputArray(std::vector<_Tp>& vec) template inline _OutputArray::_OutputArray(std::array<_Tp, _Nm>& arr) -{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } template inline _OutputArray::_OutputArray(std::array& arr) @@ -242,7 +242,7 @@ _OutputArray::_OutputArray(const std::vector<_Tp>& vec) template inline _OutputArray::_OutputArray(const std::array<_Tp, _Nm>& arr) -{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE, arr.data(), Size(1, _Nm)); } template inline _OutputArray::_OutputArray(const std::array& arr) @@ -315,7 +315,7 @@ template inline _OutputArray _OutputArray::rawOut(std::array<_Tp, _Nm>& arr) { _OutputArray v; - v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_WRITE; + v.flags = FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_WRITE; v.obj = (void*)arr.data(); v.sz = Size(1, _Nm); return v; @@ -336,7 +336,7 @@ _InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec) template inline _InputOutputArray::_InputOutputArray(std::array<_Tp, _Nm>& arr) -{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); } template inline _InputOutputArray::_InputOutputArray(std::array& arr) @@ -368,7 +368,7 @@ _InputOutputArray::_InputOutputArray(const std::vector<_Tp>& vec) template inline _InputOutputArray::_InputOutputArray(const std::array<_Tp, _Nm>& arr) -{ init(FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); } +{ init(FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW, arr.data(), Size(1, _Nm)); } template inline _InputOutputArray::_InputOutputArray(const std::array& arr) @@ -443,7 +443,7 @@ template inline _InputOutputArray _InputOutputArray::rawInOut(std::array<_Tp, _Nm>& arr) { _InputOutputArray v; - v.flags = FIXED_TYPE + FIXED_SIZE + STD_ARRAY + traits::Type<_Tp>::value + ACCESS_RW; + v.flags = FIXED_TYPE + FIXED_SIZE + MATX + traits::Type<_Tp>::value + ACCESS_RW; v.obj = (void*)arr.data(); v.sz = Size(1, _Nm); return v; @@ -1116,11 +1116,11 @@ void Mat::push_back(const std::vector<_Tp>& v) ///////////////////////////// MatSize //////////////////////////// inline -MatSize::MatSize(int* _p) +MatSize::MatSize(int* _p) CV_NOEXCEPT : p(_p) {} inline -int MatSize::dims() const +int MatSize::dims() const CV_NOEXCEPT { return (p - 1)[0]; } @@ -1153,13 +1153,13 @@ int& MatSize::operator[](int i) } inline -MatSize::operator const int*() const +MatSize::operator const int*() const CV_NOEXCEPT { return p; } inline -bool MatSize::operator != (const MatSize& sz) const +bool MatSize::operator != (const MatSize& sz) const CV_NOEXCEPT { return !(*this == sz); } @@ -1169,25 +1169,25 @@ bool MatSize::operator != (const MatSize& sz) const ///////////////////////////// MatStep //////////////////////////// inline -MatStep::MatStep() +MatStep::MatStep() CV_NOEXCEPT { p = buf; p[0] = p[1] = 0; } inline -MatStep::MatStep(size_t s) +MatStep::MatStep(size_t s) CV_NOEXCEPT { p = buf; p[0] = s; p[1] = 0; } inline -const size_t& MatStep::operator[](int i) const +const size_t& MatStep::operator[](int i) const CV_NOEXCEPT { return p[i]; } inline -size_t& MatStep::operator[](int i) +size_t& MatStep::operator[](int i) CV_NOEXCEPT { return p[i]; } @@ -1210,7 +1210,7 @@ inline MatStep& MatStep::operator = (size_t s) ////////////////////////////// Mat_<_Tp> //////////////////////////// template inline -Mat_<_Tp>::Mat_() +Mat_<_Tp>::Mat_() CV_NOEXCEPT : Mat() { flags = (flags & ~CV_MAT_TYPE_MASK) + traits::Type<_Tp>::value; diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 3315832654..3a76be2353 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -70,10 +70,12 @@ class CV_EXPORTS Image2D; class CV_EXPORTS_W_SIMPLE Device { public: - CV_WRAP Device(); + CV_WRAP Device() CV_NOEXCEPT; explicit Device(void* d); Device(const Device& d); Device& operator = (const Device& d); + Device(Device&& d) CV_NOEXCEPT; + Device& operator = (Device&& d) CV_NOEXCEPT; CV_WRAP ~Device(); void set(void* d); @@ -245,11 +247,13 @@ protected: class CV_EXPORTS Context { public: - Context(); + Context() CV_NOEXCEPT; explicit Context(int dtype); //!< @deprecated ~Context(); Context(const Context& c); Context& operator= (const Context& c); + Context(Context&& c) CV_NOEXCEPT; + Context& operator = (Context&& c) CV_NOEXCEPT; /** @deprecated */ bool create(); @@ -298,10 +302,12 @@ public: class CV_EXPORTS Platform { public: - Platform(); + Platform() CV_NOEXCEPT; ~Platform(); Platform(const Platform& p); Platform& operator = (const Platform& p); + Platform(Platform&& p) CV_NOEXCEPT; + Platform& operator = (Platform&& p) CV_NOEXCEPT; void* ptr() const; @@ -357,11 +363,13 @@ void initializeContextFromHandle(Context& ctx, void* platform, void* context, vo class CV_EXPORTS Queue { public: - Queue(); + Queue() CV_NOEXCEPT; explicit Queue(const Context& c, const Device& d=Device()); ~Queue(); Queue(const Queue& q); Queue& operator = (const Queue& q); + Queue(Queue&& q) CV_NOEXCEPT; + Queue& operator = (Queue&& q) CV_NOEXCEPT; bool create(const Context& c=Context(), const Device& d=Device()); void finish(); @@ -384,7 +392,7 @@ class CV_EXPORTS KernelArg public: enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 }; KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0); - KernelArg(); + KernelArg() CV_NOEXCEPT; static KernelArg Local(size_t localMemSize) { return KernelArg(LOCAL, 0, 1, 1, 0, localMemSize); } @@ -421,13 +429,15 @@ public: class CV_EXPORTS Kernel { public: - Kernel(); + Kernel() CV_NOEXCEPT; Kernel(const char* kname, const Program& prog); Kernel(const char* kname, const ProgramSource& prog, const String& buildopts = String(), String* errmsg=0); ~Kernel(); Kernel(const Kernel& k); Kernel& operator = (const Kernel& k); + Kernel(Kernel&& k) CV_NOEXCEPT; + Kernel& operator = (Kernel&& k) CV_NOEXCEPT; bool empty() const; bool create(const char* kname, const Program& prog); @@ -498,12 +508,13 @@ protected: class CV_EXPORTS Program { public: - Program(); + Program() CV_NOEXCEPT; Program(const ProgramSource& src, const String& buildflags, String& errmsg); Program(const Program& prog); - Program& operator = (const Program& prog); + Program(Program&& prog) CV_NOEXCEPT; + Program& operator = (Program&& prog) CV_NOEXCEPT; ~Program(); bool create(const ProgramSource& src, @@ -544,13 +555,15 @@ class CV_EXPORTS ProgramSource public: typedef uint64 hash_t; // deprecated - ProgramSource(); + ProgramSource() CV_NOEXCEPT; explicit ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash); explicit ProgramSource(const String& prog); // deprecated explicit ProgramSource(const char* prog); // deprecated ~ProgramSource(); ProgramSource(const ProgramSource& prog); ProgramSource& operator = (const ProgramSource& prog); + ProgramSource(ProgramSource&& prog) CV_NOEXCEPT; + ProgramSource& operator = (ProgramSource&& prog) CV_NOEXCEPT; const String& source() const; // deprecated hash_t hash() const; // deprecated @@ -614,7 +627,7 @@ protected: class CV_EXPORTS PlatformInfo { public: - PlatformInfo(); + PlatformInfo() CV_NOEXCEPT; /** * @param id pointer cl_platform_id (cl_platform_id*) */ @@ -623,10 +636,17 @@ public: PlatformInfo(const PlatformInfo& i); PlatformInfo& operator =(const PlatformInfo& i); + PlatformInfo(PlatformInfo&& i) CV_NOEXCEPT; + PlatformInfo& operator = (PlatformInfo&& i) CV_NOEXCEPT; String name() const; String vendor() const; + + /// See CL_PLATFORM_VERSION String version() const; + int versionMajor() const; + int versionMinor() const; + int deviceNumber() const; void getDevice(Device& device, int d) const; @@ -678,7 +698,7 @@ CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const Str class CV_EXPORTS Image2D { public: - Image2D(); + Image2D() CV_NOEXCEPT; /** @param src UMat object from which to get image properties and data @@ -691,6 +711,8 @@ public: ~Image2D(); Image2D & operator = (const Image2D & i); + Image2D(Image2D &&) CV_NOEXCEPT; + Image2D &operator=(Image2D &&) CV_NOEXCEPT; /** Indicates if creating an aliased image should succeed. Depends on the underlying platform and the dimensions of the UMat. @@ -743,9 +765,11 @@ public: /** Get associated ocl::Context */ Context& getContext() const; - /** Get associated ocl::Device */ + /** Get the single default associated ocl::Device */ Device& getDevice() const; - /** Get associated ocl::Queue */ + /** Get the single ocl::Queue that is associated with the ocl::Context and + * the single default ocl::Device + */ Queue& getQueue() const; bool useOpenCL() const; diff --git a/modules/core/include/opencv2/core/parallel/backend/parallel_for.openmp.hpp b/modules/core/include/opencv2/core/parallel/backend/parallel_for.openmp.hpp new file mode 100644 index 0000000000..b172cac34d --- /dev/null +++ b/modules/core/include/opencv2/core/parallel/backend/parallel_for.openmp.hpp @@ -0,0 +1,72 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_PARALLEL_FOR_OPENMP_HPP +#define OPENCV_CORE_PARALLEL_FOR_OPENMP_HPP + +#include "opencv2/core/parallel/parallel_backend.hpp" + +#if !defined(_OPENMP) && !defined(OPENCV_SKIP_OPENMP_PRESENSE_CHECK) +#error "This file must be compiled with enabled OpenMP" +#endif + +#include + +namespace cv { namespace parallel { namespace openmp { + +/** OpenMP parallel_for API implementation + * + * @sa setParallelForBackend + * @ingroup core_parallel_backend + */ +class ParallelForBackend : public ParallelForAPI +{ +protected: + int numThreads; + int numThreadsMax; +public: + ParallelForBackend() + { + numThreads = 0; + numThreadsMax = omp_get_max_threads(); + } + + virtual ~ParallelForBackend() {} + + virtual void parallel_for(int tasks, FN_parallel_for_body_cb_t body_callback, void* callback_data) CV_OVERRIDE + { +#pragma omp parallel for schedule(dynamic) num_threads(numThreads > 0 ? numThreads : numThreadsMax) + for (int i = 0; i < tasks; ++i) + body_callback(i, i + 1, callback_data); + } + + virtual int getThreadNum() const CV_OVERRIDE + { + return omp_get_thread_num(); + } + + virtual int getNumThreads() const CV_OVERRIDE + { + return numThreads > 0 + ? numThreads + : numThreadsMax; + } + + virtual int setNumThreads(int nThreads) CV_OVERRIDE + { + int oldNumThreads = numThreads; + numThreads = nThreads; + // nothing needed as numThreads is used in #pragma omp parallel for directly + return oldNumThreads; + } + + const char* getName() const CV_OVERRIDE + { + return "openmp"; + } +}; + +}}} // namespace + +#endif // OPENCV_CORE_PARALLEL_FOR_OPENMP_HPP diff --git a/modules/core/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp b/modules/core/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp new file mode 100644 index 0000000000..264def5f50 --- /dev/null +++ b/modules/core/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp @@ -0,0 +1,153 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_PARALLEL_FOR_TBB_HPP +#define OPENCV_CORE_PARALLEL_FOR_TBB_HPP + +#include "opencv2/core/parallel/parallel_backend.hpp" +#include + +#ifndef TBB_SUPPRESS_DEPRECATED_MESSAGES // supress warning +#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1 +#endif +#include "tbb/tbb.h" +#if !defined(TBB_INTERFACE_VERSION) +#error "Unknows/unsupported TBB version" +#endif + +#if TBB_INTERFACE_VERSION >= 8000 +#include "tbb/task_arena.h" +#endif + +namespace cv { namespace parallel { namespace tbb { + +using namespace ::tbb; + +#if TBB_INTERFACE_VERSION >= 8000 +static tbb::task_arena& getArena() +{ + static tbb::task_arena tbbArena(tbb::task_arena::automatic); + return tbbArena; +} +#else +static tbb::task_scheduler_init& getScheduler() +{ + static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred); + return tbbScheduler; +} +#endif + +/** OpenMP parallel_for API implementation + * + * @sa setParallelForBackend + * @ingroup core_parallel_backend + */ +class ParallelForBackend : public ParallelForAPI +{ +protected: + int numThreads; + int numThreadsMax; +public: + ParallelForBackend() + { + CV_LOG_INFO(NULL, "Initializing TBB parallel backend: TBB_INTERFACE_VERSION=" << TBB_INTERFACE_VERSION); + numThreads = 0; +#if TBB_INTERFACE_VERSION >= 8000 + (void)getArena(); +#else + (void)getScheduler(); +#endif + } + + virtual ~ParallelForBackend() {} + + class CallbackProxy + { + const FN_parallel_for_body_cb_t& callback; + void* const callback_data; + const int tasks; + public: + inline CallbackProxy(int tasks_, FN_parallel_for_body_cb_t& callback_, void* callback_data_) + : callback(callback_), callback_data(callback_data_), tasks(tasks_) + { + // nothing + } + + void operator()(const tbb::blocked_range& range) const + { + this->callback(range.begin(), range.end(), callback_data); + } + + void operator()() const + { + tbb::parallel_for(tbb::blocked_range(0, tasks), *this); + } + }; + + virtual void parallel_for(int tasks, FN_parallel_for_body_cb_t body_callback, void* callback_data) CV_OVERRIDE + { + CallbackProxy task(tasks, body_callback, callback_data); +#if TBB_INTERFACE_VERSION >= 8000 + getArena().execute(task); +#else + task(); +#endif + } + + virtual int getThreadNum() const CV_OVERRIDE + { +#if TBB_INTERFACE_VERSION >= 9100 + return tbb::this_task_arena::current_thread_index(); +#elif TBB_INTERFACE_VERSION >= 8000 + return tbb::task_arena::current_thread_index(); +#else + return 0; +#endif + } + + virtual int getNumThreads() const CV_OVERRIDE + { +#if TBB_INTERFACE_VERSION >= 9100 + return getArena().max_concurrency(); +#elif TBB_INTERFACE_VERSION >= 8000 + return numThreads > 0 + ? numThreads + : tbb::task_scheduler_init::default_num_threads(); +#else + return getScheduler().is_active() + ? numThreads + : tbb::task_scheduler_init::default_num_threads(); +#endif + } + + virtual int setNumThreads(int nThreads) CV_OVERRIDE + { + int oldNumThreads = numThreads; + numThreads = nThreads; + +#if TBB_INTERFACE_VERSION >= 8000 + auto& tbbArena = getArena(); + if (tbbArena.is_active()) + tbbArena.terminate(); + if (numThreads > 0) + tbbArena.initialize(numThreads); +#else + auto& tbbScheduler = getScheduler(); + if (tbbScheduler.is_active()) + tbbScheduler.terminate(); + if (numThreads > 0) + tbbScheduler.initialize(numThreads); +#endif + return oldNumThreads; + } + + const char* getName() const CV_OVERRIDE + { + return "tbb"; + } +}; + +}}} // namespace + +#endif // OPENCV_CORE_PARALLEL_FOR_TBB_HPP diff --git a/modules/core/include/opencv2/core/parallel/parallel_backend.hpp b/modules/core/include/opencv2/core/parallel/parallel_backend.hpp new file mode 100644 index 0000000000..c3e8333c1c --- /dev/null +++ b/modules/core/include/opencv2/core/parallel/parallel_backend.hpp @@ -0,0 +1,90 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_PARALLEL_BACKEND_HPP +#define OPENCV_CORE_PARALLEL_BACKEND_HPP + +#include "opencv2/core/cvdef.h" +#include + +namespace cv { namespace parallel { +#ifndef CV_API_CALL +#define CV_API_CALL +#endif + +/** @addtogroup core_parallel_backend + * @{ + * API below is provided to resolve problem of CPU resource over-subscription by multiple thread pools from different multi-threading frameworks. + * This is common problem for cases when OpenCV compiled threading framework is different from the Users Applications framework. + * + * Applications can replace OpenCV `parallel_for()` backend with own implementation (to reuse Application's thread pool). + * + * + * ### Backend API usage examples + * + * #### Intel TBB + * + * - include header with simple implementation of TBB backend: + * @snippet parallel_backend/example-tbb.cpp tbb_include + * - execute backend replacement code: + * @snippet parallel_backend/example-tbb.cpp tbb_backend + * - configuration of compiler/linker options is responsibility of Application's scripts + * + * #### OpenMP + * + * - include header with simple implementation of OpenMP backend: + * @snippet parallel_backend/example-openmp.cpp openmp_include + * - execute backend replacement code: + * @snippet parallel_backend/example-openmp.cpp openmp_backend + * - Configuration of compiler/linker options is responsibility of Application's scripts + * + * + * ### Plugins support + * + * Runtime configuration options: + * - change backend priority: `OPENCV_PARALLEL_PRIORITY_=9999` + * - disable backend: `OPENCV_PARALLEL_PRIORITY_=0` + * - specify list of backends with high priority (>100000): `OPENCV_PARALLEL_PRIORITY_LIST=TBB,OPENMP`. Unknown backends are registered as new plugins. + * + */ + +/** Interface for parallel_for backends implementations + * + * @sa setParallelForBackend + */ +class CV_EXPORTS ParallelForAPI +{ +public: + virtual ~ParallelForAPI(); + + typedef void (CV_API_CALL *FN_parallel_for_body_cb_t)(int start, int end, void* data); + + virtual void parallel_for(int tasks, FN_parallel_for_body_cb_t body_callback, void* callback_data) = 0; + + virtual int getThreadNum() const = 0; + + virtual int getNumThreads() const = 0; + + virtual int setNumThreads(int nThreads) = 0; + + virtual const char* getName() const = 0; +}; + +/** @brief Replace OpenCV parallel_for backend + * + * Application can replace OpenCV `parallel_for()` backend with own implementation. + * + * @note This call is not thread-safe. Consider calling this function from the `main()` before any other OpenCV processing functions (and without any other created threads). + */ +CV_EXPORTS void setParallelForBackend(const std::shared_ptr& api, bool propagateNumThreads = true); + +/** @brief Change OpenCV parallel_for backend + * + * @note This call is not thread-safe. Consider calling this function from the `main()` before any other OpenCV processing functions (and without any other created threads). + */ +CV_EXPORTS_W bool setParallelForBackend(const std::string& backendName, bool propagateNumThreads = true); + +//! @} +}} // namespace +#endif // OPENCV_CORE_PARALLEL_BACKEND_HPP diff --git a/modules/core/include/opencv2/core/quaternion.hpp b/modules/core/include/opencv2/core/quaternion.hpp index c72ee8c37f..8c21501e3f 100644 --- a/modules/core/include/opencv2/core/quaternion.hpp +++ b/modules/core/include/opencv2/core/quaternion.hpp @@ -27,6 +27,7 @@ #define OPENCV_CORE_QUATERNION_HPP #include +#include #include namespace cv { @@ -51,6 +52,83 @@ enum QuatAssumeType QUAT_ASSUME_UNIT }; +class QuatEnum +{ +public: + /** @brief Enum of Euler angles type. + * + * Without considering the possibility of using two different convertions for the definition of the rotation axes , + * there exists twelve possible sequences of rotation axes, divided into two groups: + * - Proper Euler angles (Z-X-Z, X-Y-X, Y-Z-Y, Z-Y-Z, X-Z-X, Y-X-Y) + * - Tait–Bryan angles (X-Y-Z, Y-Z-X, Z-X-Y, X-Z-Y, Z-Y-X, Y-X-Z). + * + * The three elemental rotations may be [extrinsic](https://en.wikipedia.org/wiki/Euler_angles#Definition_by_extrinsic_rotations) + * (rotations about the axes *xyz* of the original coordinate system, which is assumed to remain motionless), + * or [intrinsic](https://en.wikipedia.org/wiki/Euler_angles#Definition_by_intrinsic_rotations)(rotations about the axes of the rotating coordinate system *XYZ*, solidary with the moving body, which changes its orientation after each elemental rotation). + * + * + * Extrinsic and intrinsic rotations are relevant. + * + * The definition of the Euler angles is as following, + * - \f$\theta_1 \f$ represents the first rotation angle, + * - \f$\theta_2 \f$ represents the second rotation angle, + * - \f$\theta_3 \f$ represents the third rotation angle. + * + * For intrinsic rotations in the order of X-Y-Z, the rotation matrix R can be calculated by:\f[R =X(\theta_1) Y(\theta_2) Z(\theta_3) \f] + * For extrinsic rotations in the order of X-Y-Z, the rotation matrix R can be calculated by:\f[R =Z({\theta_3}) Y({\theta_2}) X({\theta_1})\f] + * where + * \f[X({\theta})={\begin{bmatrix}1&0&0\\0&\cos {\theta_1} &-\sin {\theta_1} \\0&\sin {\theta_1} &\cos {\theta_1} \\\end{bmatrix}}, + * Y({\theta})={\begin{bmatrix}\cos \theta_{2}&0&\sin \theta_{2}\\0&1 &0 \\\ -sin \theta_2& 0&\cos \theta_{2} \\\end{bmatrix}}, + * Z({\theta})={\begin{bmatrix}\cos\theta_{3} &-\sin \theta_3&0\\\sin \theta_3 &\cos \theta_3 &0\\0&0&1\\\end{bmatrix}}. + * \f] + * + * The function is designed according to this set of conventions: + * - [Right handed](https://en.wikipedia.org/wiki/Right_hand_rule) reference frames are adopted, and the [right hand rule](https://en.wikipedia.org/wiki/Right_hand_rule) is used to determine the sign of angles. + * - Each matrix is meant to represent an [active rotation](https://en.wikipedia.org/wiki/Active_and_passive_transformation) (the composing and composed matrices + * are supposed to act on the coordinates of vectors defined in the initial fixed reference frame and give as a result the coordinates of a rotated vector defined in the same reference frame). + * - For \f$\theta_1\f$ and \f$\theta_3\f$, the valid range is (−π, π]. + * + * For \f$\theta_2\f$, the valid range is [−π/2, π/2] or [0, π]. + * + * For Tait–Bryan angles, the valid range of \f$\theta_2\f$ is [−π/2, π/2]. When transforming a quaternion to Euler angles, the solution of Euler angles is unique in condition of \f$ \theta_2 \in (−π/2, π/2)\f$ . + * If \f$\theta_2 = −π/2 \f$ or \f$ \theta_2 = π/2\f$, there are infinite solutions. The common name for this situation is gimbal lock. + * For Proper Euler angles,the valid range of \f$\theta_2\f$ is in [0, π]. The solutions of Euler angles are unique in condition of \f$ \theta_2 \in (0, π)\f$ . If \f$\theta_2 =0 \f$ or \f$\theta_2 =π \f$, + * there are infinite solutions and gimbal lock will occur. + */ + enum EulerAnglesType + { + INT_XYZ, ///< Intrinsic rotations with the Euler angles type X-Y-Z + INT_XZY, ///< Intrinsic rotations with the Euler angles type X-Z-Y + INT_YXZ, ///< Intrinsic rotations with the Euler angles type Y-X-Z + INT_YZX, ///< Intrinsic rotations with the Euler angles type Y-Z-X + INT_ZXY, ///< Intrinsic rotations with the Euler angles type Z-X-Y + INT_ZYX, ///< Intrinsic rotations with the Euler angles type Z-Y-X + INT_XYX, ///< Intrinsic rotations with the Euler angles type X-Y-X + INT_XZX, ///< Intrinsic rotations with the Euler angles type X-Z-X + INT_YXY, ///< Intrinsic rotations with the Euler angles type Y-X-Y + INT_YZY, ///< Intrinsic rotations with the Euler angles type Y-Z-Y + INT_ZXZ, ///< Intrinsic rotations with the Euler angles type Z-X-Z + INT_ZYZ, ///< Intrinsic rotations with the Euler angles type Z-Y-Z + + EXT_XYZ, ///< Extrinsic rotations with the Euler angles type X-Y-Z + EXT_XZY, ///< Extrinsic rotations with the Euler angles type X-Z-Y + EXT_YXZ, ///< Extrinsic rotations with the Euler angles type Y-X-Z + EXT_YZX, ///< Extrinsic rotations with the Euler angles type Y-Z-X + EXT_ZXY, ///< Extrinsic rotations with the Euler angles type Z-X-Y + EXT_ZYX, ///< Extrinsic rotations with the Euler angles type Z-Y-X + EXT_XYX, ///< Extrinsic rotations with the Euler angles type X-Y-X + EXT_XZX, ///< Extrinsic rotations with the Euler angles type X-Z-X + EXT_YXY, ///< Extrinsic rotations with the Euler angles type Y-X-Y + EXT_YZY, ///< Extrinsic rotations with the Euler angles type Y-Z-Y + EXT_ZXZ, ///< Extrinsic rotations with the Euler angles type Z-X-Z + EXT_ZYZ, ///< Extrinsic rotations with the Euler angles type Z-Y-Z + #ifndef CV_DOXYGEN + EULER_ANGLES_MAX_VALUE + #endif + }; + +}; + template class Quat; template std::ostream& operator<<(std::ostream&, const Quat<_Tp>&); @@ -133,9 +211,9 @@ class Quat { static_assert(std::is_floating_point<_Tp>::value, "Quaternion only make sense with type of float or double"); using value_type = _Tp; - public: static constexpr _Tp CV_QUAT_EPS = (_Tp)1.e-6; + static constexpr _Tp CV_QUAT_CONVERT_THRESHOLD = (_Tp)1.e-6; Quat(); @@ -182,6 +260,41 @@ public: */ static Quat<_Tp> createFromRvec(InputArray rvec); + /** + * @brief + * from Euler angles + * + * A quaternion can be generated from Euler angles by combining the quaternion representations of the Euler rotations. + * + * For example, if we use intrinsic rotations in the order of X-Y-Z,\f$\theta_1 \f$ is rotation around the X-axis, \f$\theta_2 \f$ is rotation around the Y-axis, + * \f$\theta_3 \f$ is rotation around the Z-axis. The final quaternion q can be calculated by + * + * \f[ {q} = q_{X, \theta_1} q_{Y, \theta_2} q_{Z, \theta_3}\f] + * where \f$ q_{X, \theta_1} \f$ is created from @ref createFromXRot, \f$ q_{Y, \theta_2} \f$ is created from @ref createFromYRot, + * \f$ q_{Z, \theta_3} \f$ is created from @ref createFromZRot. + * @param angles the Euler angles in a vector of length 3 + * @param eulerAnglesType the convertion Euler angles type + */ + static Quat<_Tp> createFromEulerAngles(const Vec<_Tp, 3> &angles, QuatEnum::EulerAnglesType eulerAnglesType); + + /** + * @brief get a quaternion from a rotation about the Y-axis by \f$\theta\f$ . + * \f[q = \cos(\theta/2)+0 i+ sin(\theta/2) j +0k \f] + */ + static Quat<_Tp> createFromYRot(const _Tp theta); + + /** + * @brief get a quaternion from a rotation about the X-axis by \f$\theta\f$ . + * \f[q = \cos(\theta/2)+sin(\theta/2) i +0 j +0 k \f] + */ + static Quat<_Tp> createFromXRot(const _Tp theta); + + /** + * @brief get a quaternion from a rotation about the Z-axis by \f$\theta\f$. + * \f[q = \cos(\theta/2)+0 i +0 j +sin(\theta/2) k \f] + */ + static Quat<_Tp> createFromZRot(const _Tp theta); + /** * @brief a way to get element. * @param index over a range [0, 3]. @@ -277,17 +390,18 @@ public: * For example * ``` * Quatd q(1,2,3,4); - * power(q, 2); + * power(q, 2.0); * * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; * double angle = CV_PI; * Vec3d axis{0, 0, 1}; * Quatd q1 = Quatd::createFromAngleAxis(angle, axis); //generate a unit quat by axis and angle - * power(q1, 2, assumeUnit);//This assumeUnit means q1 is a unit quaternion. + * power(q1, 2.0, assumeUnit);//This assumeUnit means q1 is a unit quaternion. * ``` + * @note the type of the index should be the same as the quaternion. */ - template - friend Quat power(const Quat &q, _T x, QuatAssumeType assumeUnit); + template + friend Quat power(const Quat &q, const T x, QuatAssumeType assumeUnit); /** * @brief return the value of power function with index \f$x\f$. @@ -298,17 +412,16 @@ public: * For example * ``` * Quatd q(1,2,3,4); - * q.power(2); + * q.power(2.0); * * QuatAssumeType assumeUnit = QUAT_ASSUME_UNIT; * double angle = CV_PI; * Vec3d axis{0, 0, 1}; * Quatd q1 = Quatd::createFromAngleAxis(angle, axis); //generate a unit quat by axis and angle - * q1.power(2, assumeUnit); //This assumeUnt means q1 is a unit quaternion + * q1.power(2.0, assumeUnit); //This assumeUnt means q1 is a unit quaternion * ``` */ - template - Quat<_Tp> power(_T x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; + Quat<_Tp> power(const _Tp x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; /** * @brief return \f$\sqrt{q}\f$. @@ -811,8 +924,8 @@ public: /** * @brief transform a quaternion to a 3x3 rotation matrix. * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and - * this function will save some computations. Otherwise, this function will normalized this - * quaternion at first then to do the transformation. + * this function will save some computations. Otherwise, this function will normalize this + * quaternion at first then do the transformation. * * @note Matrix A which is to be rotated should have the form * \f[\begin{bmatrix} @@ -845,8 +958,8 @@ public: /** * @brief transform a quaternion to a 4x4 rotation matrix. * @param assumeUnit if QUAT_ASSUME_UNIT, this quaternion assume to be a unit quaternion and - * this function will save some computations. Otherwise, this function will normalized this - * quaternion at first then to do the transformation. + * this function will save some computations. Otherwise, this function will normalize this + * quaternion at first then do the transformation. * * The operations is similar as toRotMat3x3 * except that the points matrix should have the form @@ -859,6 +972,7 @@ public: * * @sa toRotMat3x3 */ + Matx<_Tp, 4, 4> toRotMat4x4(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const; /** @@ -1073,46 +1187,434 @@ public: const Quat<_Tp> &q2, const Quat<_Tp> &q3, const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); - + /** + * @brief Return opposite quaternion \f$-p\f$ + * which satisfies \f$p + (-p) = 0.\f$ + * + * For example + * ``` + * Quatd q{1, 2, 3, 4}; + * std::cout << -q << std::endl; // [-1, -2, -3, -4] + * ``` + */ Quat<_Tp> operator-() const; + /** + * @brief return true if two quaternions p and q are nearly equal, i.e. when the absolute + * value of each \f$p_i\f$ and \f$q_i\f$ is less than CV_QUAT_EPS. + */ bool operator==(const Quat<_Tp>&) const; + /** + * @brief Addition operator of two quaternions p and q. + * It returns a new quaternion that each value is the sum of \f$p_i\f$ and \f$q_i\f$. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p + q << std::endl; //[6, 8, 10, 12] + * ``` + */ Quat<_Tp> operator+(const Quat<_Tp>&) const; + /** + * @brief Addition assignment operator of two quaternions p and q. + * It adds right operand to the left operand and assign the result to left operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p += q; // equivalent to p = p + q + * std::cout << p << std::endl; //[6, 8, 10, 12] + * + * ``` + */ Quat<_Tp>& operator+=(const Quat<_Tp>&); + /** + * @brief Subtraction operator of two quaternions p and q. + * It returns a new quaternion that each value is the sum of \f$p_i\f$ and \f$-q_i\f$. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p - q << std::endl; //[-4, -4, -4, -4] + * ``` + */ Quat<_Tp> operator-(const Quat<_Tp>&) const; + /** + * @brief Subtraction assignment operator of two quaternions p and q. + * It subtracts right operand from the left operand and assign the result to left operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p -= q; // equivalent to p = p - q + * std::cout << p << std::endl; //[-4, -4, -4, -4] + * + * ``` + */ Quat<_Tp>& operator-=(const Quat<_Tp>&); + /** + * @brief Multiplication assignment operator of two quaternions q and p. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication: + * \f[ + * \begin{equation} + * \begin{split} + * p * q &= [p_0, \boldsymbol{u}]*[q_0, \boldsymbol{v}]\\ + * &=[p_0q_0 - \boldsymbol{u}\cdot \boldsymbol{v}, p_0\boldsymbol{v} + q_0\boldsymbol{u}+ \boldsymbol{u}\times \boldsymbol{v}]. + * \end{split} + * \end{equation} + * \f] + * where \f$\cdot\f$ means dot product and \f$\times \f$ means cross product. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p *= q; // equivalent to p = p * q + * std::cout << p << std::endl; //[-60, 12, 30, 24] + * ``` + */ Quat<_Tp>& operator*=(const Quat<_Tp>&); - Quat<_Tp>& operator*=(const _Tp&); + /** + * @brief Multiplication assignment operator of a quaternions and a scalar. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z] * s\\ + * &=[w * s, x * s, y * s, z * s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * p *= s; // equivalent to p = p * s + * std::cout << p << std::endl; //[2.0, 4.0, 6.0, 8.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + Quat<_Tp>& operator*=(const _Tp s); + /** + * @brief Multiplication operator of two quaternions q and p. + * Multiplies values on either side of the operator. + * + * Rule of quaternion multiplication: + * \f[ + * \begin{equation} + * \begin{split} + * p * q &= [p_0, \boldsymbol{u}]*[q_0, \boldsymbol{v}]\\ + * &=[p_0q_0 - \boldsymbol{u}\cdot \boldsymbol{v}, p_0\boldsymbol{v} + q_0\boldsymbol{u}+ \boldsymbol{u}\times \boldsymbol{v}]. + * \end{split} + * \end{equation} + * \f] + * where \f$\cdot\f$ means dot product and \f$\times \f$ means cross product. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p * q << std::endl; //[-60, 12, 30, 24] + * ``` + */ Quat<_Tp> operator*(const Quat<_Tp>&) const; - Quat<_Tp> operator/(const _Tp&) const; + /** + * @brief Division operator of a quaternions and a scalar. + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / s &= [w, x, y, z] / s\\ + * &=[w/s, x/s, y/s, z/s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * p /= s; // equivalent to p = p / s + * std::cout << p << std::endl; //[0.5, 1, 1.5, 2] + * ``` + * @note the type of scalar should be equal to this quaternion. + */ + Quat<_Tp> operator/(const _Tp s) const; + /** + * @brief Division operator of two quaternions p and q. + * Divides left hand operand by right hand operand. + * + * Rule of quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / q &= p * q.inv()\\ + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * std::cout << p / q << std::endl; // equivalent to p * q.inv() + * ``` + */ Quat<_Tp> operator/(const Quat<_Tp>&) const; - Quat<_Tp>& operator/=(const _Tp&); + /** + * @brief Division assignment operator of a quaternions and a scalar. + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of quaternion division with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p / s &= [w, x, y, z] / s\\ + * &=[w / s, x / s, y / s, z / s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0;; + * p /= s; // equivalent to p = p / s + * std::cout << p << std::endl; //[0.5, 1.0, 1.5, 2.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + Quat<_Tp>& operator/=(const _Tp s); + /** + * @brief Division assignment operator of two quaternions p and q; + * It divides left operand with the right operand and assign the result to left operand. + * + * Rule of quaternion division with a quaternion: + * \f[ + * \begin{equation} + * \begin{split} + * p / q&= p * q.inv()\\ + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * Quatd q{5, 6, 7, 8}; + * p /= q; // equivalent to p = p * q.inv() + * std::cout << p << std::endl; + * ``` + */ Quat<_Tp>& operator/=(const Quat<_Tp>&); _Tp& operator[](std::size_t n); const _Tp& operator[](std::size_t n) const; - template - friend Quat cv::operator*(const T, const Quat&); + /** + * @brief Subtraction operator of a scalar and a quaternions. + * Subtracts right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << scalar - p << std::endl; //[1.0, -2, -3, -4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator-(const T s, const Quat&); - template - friend Quat cv::operator*(const Quat&, const T); + /** + * @brief Subtraction operator of a quaternions and a scalar. + * Subtracts right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << p - scalar << std::endl; //[-1.0, 2, 3, 4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator-(const Quat&, const T s); + + /** + * @brief Addition operator of a quaternions and a scalar. + * Adds right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << scalar + p << std::endl; //[3.0, 2, 3, 4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator+(const T s, const Quat&); + + /** + * @brief Addition operator of a quaternions and a scalar. + * Adds right hand operand from left hand operand. + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double scalar = 2.0; + * std::cout << p + scalar << std::endl; //[3.0, 2, 3, 4] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator+(const Quat&, const T s); + + /** + * @brief Multiplication operator of a scalar and a quaternions. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z] * s\\ + * &=[w * s, x * s, y * s, z * s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * std::cout << s * p << std::endl; //[2.0, 4.0, 6.0, 8.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator*(const T s, const Quat&); + + /** + * @brief Multiplication operator of a quaternion and a scalar. + * It multiplies right operand with the left operand and assign the result to left operand. + * + * Rule of quaternion multiplication with a scalar: + * \f[ + * \begin{equation} + * \begin{split} + * p * s &= [w, x, y, z] * s\\ + * &=[w * s, x * s, y * s, z * s]. + * \end{split} + * \end{equation} + * \f] + * + * For example + * ``` + * Quatd p{1, 2, 3, 4}; + * double s = 2.0; + * std::cout << p * s << std::endl; //[2.0, 4.0, 6.0, 8.0] + * ``` + * @note the type of scalar should be equal to the quaternion. + */ + template + friend Quat cv::operator*(const Quat&, const T s); template friend std::ostream& cv::operator<<(std::ostream&, const Quat&); + /** + * @brief Transform a quaternion q to Euler angles. + * + * + * When transforming a quaternion \f$q = w + x\boldsymbol{i} + y\boldsymbol{j} + z\boldsymbol{k}\f$ to Euler angles, rotation matrix M can be calculated by: + * \f[ \begin{aligned} {M} &={\begin{bmatrix}1-2(y^{2}+z^{2})&2(xy-zx)&2(xz+yw)\\2(xy+zw)&1-2(x^{2}+z^{2})&2(yz-xw)\\2(xz-yw)&2(yz+xw)&1-2(x^{2}+y^{2})\end{bmatrix}}\end{aligned}.\f] + * On the other hand, the rotation matrix can be obtained from Euler angles. + * Using intrinsic rotations with Euler angles type XYZ as an example, + * \f$\theta_1 \f$, \f$\theta_2 \f$, \f$\theta_3 \f$ are three angles for Euler angles, the rotation matrix R can be calculated by:\f[R =X(\theta_1)Y(\theta_2)Z(\theta_3) + * ={\begin{bmatrix}\cos\theta_{2}\cos\theta_{3}&-\cos\theta_{2}\sin\theta_{3}&\sin\theta_{2}\\\cos\theta_{1}\sin\theta_{3}+\cos\theta_{3}\sin\theta_{1}\sin\theta_{2}&\cos\theta_{1}\cos\theta_{3}-\sin\theta_{1}\sin\theta_{2}\sin\theta_{3}&-\cos\theta_{2}\sin\theta_{1}\\\sin\theta_{1}\sin\theta_{3}-\cos\theta_{1}\cos\theta_{3}\sin\theta_{2}&\cos\theta_{3}\sin\theta_{1}+\cos\theta_{1}\sin\theta_{2}\sin\theta_{3}&\cos\theta_{1}\cos_{2}\end{bmatrix}}\f] + * Rotation matrix M and R are equal. As long as \f$ s_{2} \neq 1 \f$, by comparing each element of two matrices ,the solution is\f$\begin{cases} \theta_1 = \arctan2(-m_{23},m_{33})\\\theta_2 = arcsin(m_{13}) \\\theta_3 = \arctan2(-m_{12},m_{11}) \end{cases}\f$. + * + * When \f$ s_{2}=1\f$ or \f$ s_{2}=-1\f$, the gimbal lock occurs. The function will prompt "WARNING: Gimbal Lock will occur. Euler angles is non-unique. For intrinsic rotations, we set the third angle to 0, and for external rotation, we set the first angle to 0.". + * + * When \f$ s_{2}=1\f$ , + * The rotation matrix R is \f$R = {\begin{bmatrix}0&0&1\\\sin(\theta_1+\theta_3)&\cos(\theta_1+\theta_3)&0\\-\cos(\theta_1+\theta_3)&\sin(\theta_1+\theta_3)&0\end{bmatrix}}\f$. + * + * The number of solutions is infinite with the condition \f$\begin{cases} \theta_1+\theta_3 = \arctan2(m_{21},m_{22})\\ \theta_2=\pi/2 \end{cases}\ \f$. + * + * We set \f$ \theta_3 = 0\f$, the solution is \f$\begin{cases} \theta_1=\arctan2(m_{21},m_{22})\\ \theta_2=\pi/2\\ \theta_3=0 \end{cases}\f$. + * + * When \f$ s_{2}=-1\f$, + * The rotation matrix R is \f$X_{1}Y_{2}Z_{3}={\begin{bmatrix}0&0&-1\\-\sin(\theta_1-\theta_3)&\cos(\theta_1-\theta_3)&0\\\cos(\theta_1-\theta_3)&\sin(\theta_1-\theta_3)&0\end{bmatrix}}\f$. + * + * The number of solutions is infinite with the condition \f$\begin{cases} \theta_1+\theta_3 = \arctan2(m_{32},m_{22})\\ \theta_2=\pi/2 \end{cases}\ \f$. + * + * We set \f$ \theta_3 = 0\f$, the solution is \f$ \begin{cases}\theta_1=\arctan2(m_{32},m_{22}) \\ \theta_2=-\pi/2\\ \theta_3=0\end{cases}\f$. + * + * Since \f$ sin \theta\in [-1,1] \f$ and \f$ cos \theta \in [-1,1] \f$, the unnormalized quaternion will cause computational troubles. For this reason, this function will normalize the quaternion at first and @ref QuatAssumeType is not needed. + * + * When the gimbal lock occurs, we set \f$\theta_3 = 0\f$ for intrinsic rotations or \f$\theta_1 = 0\f$ for extrinsic rotations. + * + * As a result, for every Euler angles type, we can get solution as shown in the following table. + * EulerAnglesType | Ordinary | \f$\theta_2 = π/2\f$ | \f$\theta_2 = -π/2\f$ + * ------------- | -------------| -------------| ------------- + * INT_XYZ|\f$ \theta_1 = \arctan2(-m_{23},m_{33})\\\theta_2 = \arcsin(m_{13}) \\\theta_3= \arctan2(-m_{12},m_{11}) \f$|\f$ \theta_1=\arctan2(m_{21},m_{22})\\ \theta_2=\pi/2\\ \theta_3=0 \f$|\f$ \theta_1=\arctan2(m_{32},m_{22})\\ \theta_2=-\pi/2\\ \theta_3=0 \f$ + * INT_XZY|\f$ \theta_1 = \arctan2(m_{32},m_{22})\\\theta_2 = -\arcsin(m_{12}) \\\theta_3= \arctan2(m_{13},m_{11}) \f$|\f$ \theta_1=\arctan2(m_{31},m_{33})\\ \theta_2=\pi/2\\ \theta_3=0 \f$|\f$ \theta_1=\arctan2(-m_{23},m_{33})\\ \theta_2=-\pi/2\\ \theta_3=0 \f$ + * INT_YXZ|\f$ \theta_1 = \arctan2(m_{13},m_{33})\\\theta_2 = -\arcsin(m_{23}) \\\theta_3= \arctan2(m_{21},m_{22}) \f$|\f$ \theta_1=\arctan2(m_{12},m_{11})\\ \theta_2=\pi/2\\ \theta_3=0 \f$|\f$ \theta_1=\arctan2(-m_{12},m_{11})\\ \theta_2=-\pi/2\\ \theta_3=0 \f$ + * INT_YZX|\f$ \theta_1 = \arctan2(-m_{31},m_{11})\\\theta_2 = \arcsin(m_{21}) \\\theta_3= \arctan2(-m_{23},m_{22}) \f$|\f$ \theta_1=\arctan2(m_{13},m_{33})\\ \theta_2=\pi/2\\ \theta_3=0 \f$|\f$ \theta_1=\arctan2(m_{13},m_{12})\\ \theta_2=-\pi/2\\ \theta_3=0 \f$ + * INT_ZXY|\f$ \theta_1 = \arctan2(-m_{12},m_{22})\\\theta_2 = \arcsin(m_{32}) \\\theta_3= \arctan2(-m_{31},m_{33}) \f$|\f$ \theta_1=\arctan2(m_{21},m_{11})\\ \theta_2=\pi/2\\ \theta_3=0 \f$|\f$ \theta_1=\arctan2(m_{21},m_{11})\\ \theta_2=-\pi/2\\ \theta_3=0 \f$ + * INT_ZYX|\f$ \theta_1 = \arctan2(m_{21},m_{11})\\\theta_2 = \arcsin(-m_{31}) \\\theta_3= \arctan2(m_{32},m_{33}) \f$|\f$ \theta_1=\arctan2(m_{23},m_{22})\\ \theta_2=\pi/2\\ \theta_3=0 \f$|\f$ \theta_1=\arctan2(-m_{12},m_{22})\\ \theta_2=-\pi/2\\ \theta_3=0 \f$ + * EXT_XYZ|\f$ \theta_1 = \arctan2(m_{32},m_{33})\\\theta_2 = \arcsin(-m_{31}) \\\ \theta_3 = \arctan2(m_{21},m_{11})\f$|\f$ \theta_1= 0\\ \theta_2=\pi/2\\ \theta_3=\arctan2(m_{23},m_{22}) \f$|\f$ \theta_1=0\\ \theta_2=-\pi/2\\ \theta_3=\arctan2(-m_{12},m_{22}) \f$ + * EXT_XZY|\f$ \theta_1 = \arctan2(-m_{23},m_{22})\\\theta_2 = \arcsin(m_{21}) \\\theta_3= \arctan2(-m_{31},m_{11})\f$|\f$ \theta_1= 0\\ \theta_2=\pi/2\\ \theta_3=\arctan2(m_{13},m_{33}) \f$|\f$ \theta_1=0\\ \theta_2=-\pi/2\\ \theta_3=\arctan2(m_{13},m_{12}) \f$ + * EXT_YXZ|\f$ \theta_1 = \arctan2(-m_{31},m_{33}) \\\theta_2 = \arcsin(m_{32}) \\\theta_3= \arctan2(-m_{12},m_{22})\f$|\f$ \theta_1= 0\\ \theta_2=\pi/2\\ \theta_3=\arctan2(m_{21},m_{11}) \f$|\f$ \theta_1=0\\ \theta_2=-\pi/2\\ \theta_3=\arctan2(m_{21},m_{11}) \f$ + * EXT_YZX|\f$ \theta_1 = \arctan2(m_{13},m_{11})\\\theta_2 = -\arcsin(m_{12}) \\\theta_3= \arctan2(m_{32},m_{22})\f$|\f$ \theta_1= 0\\ \theta_2=\pi/2\\ \theta_3=\arctan2(m_{31},m_{33}) \f$|\f$ \theta_1=0\\ \theta_2=-\pi/2\\ \theta_3=\arctan2(-m_{23},m_{33}) \f$ + * EXT_ZXY|\f$ \theta_1 = \arctan2(m_{21},m_{22})\\\theta_2 = -\arcsin(m_{23}) \\\theta_3= \arctan2(m_{13},m_{33})\f$|\f$ \theta_1= 0\\ \theta_2=\pi/2\\ \theta_3=\arctan2(m_{12},m_{11}) \f$|\f$ \theta_1= 0\\ \theta_2=-\pi/2\\ \theta_3=\arctan2(-m_{12},m_{11}) \f$ + * EXT_ZYX|\f$ \theta_1 = \arctan2(-m_{12},m_{11})\\\theta_2 = \arcsin(m_{13}) \\\theta_3= \arctan2(-m_{23},m_{33})\f$|\f$ \theta_1=0\\ \theta_2=\pi/2\\ \theta_3=\arctan2(m_{21},m_{22}) \f$|\f$ \theta_1=0\\ \theta_2=-\pi/2\\ \theta_3=\arctan2(m_{32},m_{22}) \f$ + * + * EulerAnglesType | Ordinary | \f$\theta_2 = 0\f$ | \f$\theta_2 = π\f$ + * ------------- | -------------| -------------| ------------- + * INT_XYX| \f$ \theta_1 = \arctan2(m_{21},-m_{31})\\\theta_2 =\arccos(m_{11}) \\\theta_3 = \arctan2(m_{12},m_{13}) \f$| \f$ \theta_1=\arctan2(m_{32},m_{33})\\ \theta_2=0\\ \theta_3=0 \f$| \f$ \theta_1=\arctan2(m_{23},m_{22})\\ \theta_2=\pi\\ \theta_3=0 \f$ + * INT_XZX| \f$ \theta_1 = \arctan2(m_{31},m_{21})\\\theta_2 = \arccos(m_{11}) \\\theta_3 = \arctan2(m_{13},-m_{12}) \f$| \f$ \theta_1=\arctan2(m_{32},m_{33})\\ \theta_2=0\\ \theta_3=0 \f$| \f$ \theta_1=\arctan2(-m_{32},m_{33})\\ \theta_2=\pi\\ \theta_3=0 \f$ + * INT_YXY| \f$ \theta_1 = \arctan2(m_{12},m_{32})\\\theta_2 = \arccos(m_{22}) \\\theta_3 = \arctan2(m_{21},-m_{23}) \f$| \f$ \theta_1=\arctan2(m_{13},m_{11})\\ \theta_2=0\\ \theta_3=0 \f$| \f$ \theta_1=\arctan2(-m_{31},m_{11})\\ \theta_2=\pi\\ \theta_3=0 \f$ + * INT_YZY| \f$ \theta_1 = \arctan2(m_{32},-m_{12})\\\theta_2 = \arccos(m_{22}) \\\theta_3 =\arctan2(m_{23},m_{21}) \f$| \f$ \theta_1=\arctan2(m_{13},m_{11})\\ \theta_2=0\\ \theta_3=0 \f$| \f$ \theta_1=\arctan2(m_{13},-m_{11})\\ \theta_2=\pi\\ \theta_3=0 \f$ + * INT_ZXZ| \f$ \theta_1 = \arctan2(-m_{13},m_{23})\\\theta_2 = \arccos(m_{33}) \\\theta_3 =\arctan2(m_{31},m_{32}) \f$| \f$ \theta_1=\arctan2(m_{21},m_{22})\\ \theta_2=0\\ \theta_3=0 \f$| \f$ \theta_1=\arctan2(m_{21},m_{11})\\ \theta_2=\pi\\ \theta_3=0 \f$ + * INT_ZYZ| \f$ \theta_1 = \arctan2(m_{23},m_{13})\\\theta_2 = \arccos(m_{33}) \\\theta_3 = \arctan2(m_{32},-m_{31}) \f$| \f$ \theta_1=\arctan2(m_{21},m_{11})\\ \theta_2=0\\ \theta_3=0 \f$| \f$ \theta_1=\arctan2(m_{21},m_{11})\\ \theta_2=\pi\\ \theta_3=0 \f$ + * EXT_XYX| \f$ \theta_1 = \arctan2(m_{12},m_{13}) \\\theta_2 = \arccos(m_{11}) \\\theta_3 = \arctan2(m_{21},-m_{31})\f$| \f$ \theta_1=0\\ \theta_2=0\\ \theta_3=\arctan2(m_{32},m_{33}) \f$| \f$ \theta_1= 0\\ \theta_2=\pi\\ \theta_3= \arctan2(m_{23},m_{22}) \f$ + * EXT_XZX| \f$ \theta_1 = \arctan2(m_{13},-m_{12})\\\theta_2 = \arccos(m_{11}) \\\theta_3 = \arctan2(m_{31},m_{21})\f$| \f$ \theta_1= 0\\ \theta_2=0\\ \theta_3=\arctan2(m_{32},m_{33}) \f$| \f$ \theta_1= 0\\ \theta_2=\pi\\ \theta_3=\arctan2(-m_{32},m_{33}) \f$ + * EXT_YXY| \f$ \theta_1 = \arctan2(m_{21},-m_{23})\\\theta_2 = \arccos(m_{22}) \\\theta_3 = \arctan2(m_{12},m_{32}) \f$| \f$ \theta_1= 0\\ \theta_2=0\\ \theta_3=\arctan2(m_{13},m_{11}) \f$| \f$ \theta_1= 0\\ \theta_2=\pi\\ \theta_3=\arctan2(-m_{31},m_{11}) \f$ + * EXT_YZY| \f$ \theta_1 = \arctan2(m_{23},m_{21}) \\\theta_2 = \arccos(m_{22}) \\\theta_3 = \arctan2(m_{32},-m_{12}) \f$| \f$ \theta_1= 0\\ \theta_2=0\\ \theta_3=\arctan2(m_{13},m_{11}) \f$| \f$ \theta_1=0\\ \theta_2=\pi\\ \theta_3=\arctan2(m_{13},-m_{11}) \f$ + * EXT_ZXZ| \f$ \theta_1 = \arctan2(m_{31},m_{32}) \\\theta_2 = \arccos(m_{33}) \\\theta_3 = \arctan2(-m_{13},m_{23})\f$| \f$ \theta_1=0\\ \theta_2=0\\ \theta_3=\arctan2(m_{21},m_{22}) \f$| \f$ \theta_1= 0\\ \theta_2=\pi\\ \theta_3=\arctan2(m_{21},m_{11}) \f$ + * EXT_ZYZ| \f$ \theta_1 = \arctan2(m_{32},-m_{31})\\\theta_2 = \arccos(m_{33}) \\\theta_3 = \arctan2(m_{23},m_{13}) \f$| \f$ \theta_1=0\\ \theta_2=0\\ \theta_3=\arctan2(m_{21},m_{11}) \f$| \f$ \theta_1= 0\\ \theta_2=\pi\\ \theta_3=\arctan2(m_{21},m_{11}) \f$ + * + * @param eulerAnglesType the convertion Euler angles type + */ + + Vec<_Tp, 3> toEulerAngles(QuatEnum::EulerAnglesType eulerAnglesType); + _Tp w, x, y, z; }; @@ -1165,8 +1667,8 @@ Quat exp(const Quat &q); template Quat log(const Quat &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); -template -Quat power(const Quat& q, _T x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); +template +Quat power(const Quat& q, const T x, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); template Quat crossProduct(const Quat &p, const Quat &q); @@ -1174,11 +1676,11 @@ Quat crossProduct(const Quat &p, const Quat &q); template Quat sqrt(const Quat &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT); -template -Quat operator*(const T, const Quat&); +template +Quat operator*(const T, const Quat&); -template -Quat operator*(const Quat&, const T); +template +Quat operator*(const Quat&, const T); template std::ostream& operator<<(std::ostream&, const Quat&); diff --git a/modules/core/include/opencv2/core/quaternion.inl.hpp b/modules/core/include/opencv2/core/quaternion.inl.hpp index 769f53ed4b..3c2fce10af 100644 --- a/modules/core/include/opencv2/core/quaternion.inl.hpp +++ b/modules/core/include/opencv2/core/quaternion.inl.hpp @@ -148,6 +148,30 @@ inline Quat Quat::operator+(const Quat &q1) const return Quat(w + q1.w, x + q1.x, y + q1.y, z + q1.z); } +template +inline Quat operator+(const T a, const Quat& q) +{ + return Quat(q.w + a, q.x, q.y, q.z); +} + +template +inline Quat operator+(const Quat& q, const T a) +{ + return Quat(q.w + a, q.x, q.y, q.z); +} + +template +inline Quat operator-(const T a, const Quat& q) +{ + return Quat(a - q.w, -q.x, -q.y, -q.z); +} + +template +inline Quat operator-(const Quat& q, const T a) +{ + return Quat(q.w - a, q.x, q.y, q.z); +} + template inline Quat Quat::operator-(const Quat &q1) const { @@ -183,14 +207,14 @@ inline Quat Quat::operator*(const Quat &q1) const } -template -Quat operator*(const Quat &q1, const S a) +template +Quat operator*(const Quat &q1, const T a) { return Quat(a * q1.w, a * q1.x, a * q1.y, a * q1.z); } -template -Quat operator*(const S a, const Quat &q1) +template +Quat operator*(const T a, const Quat &q1) { return Quat(a * q1.w, a * q1.x, a * q1.y, a * q1.z); } @@ -221,7 +245,7 @@ inline Quat& Quat::operator/=(const Quat &q1) return *this; } template -Quat& Quat::operator*=(const T &q1) +Quat& Quat::operator*=(const T q1) { w *= q1; x *= q1; @@ -231,7 +255,7 @@ Quat& Quat::operator*=(const T &q1) } template -inline Quat& Quat::operator/=(const T &a) +inline Quat& Quat::operator/=(const T a) { const T a_inv = 1.0 / a; w *= a_inv; @@ -242,7 +266,7 @@ inline Quat& Quat::operator/=(const T &a) } template -inline Quat Quat::operator/(const T &a) const +inline Quat Quat::operator/(const T a) const { const T a_inv = 1.0 / a; return Quat(w * a_inv, x * a_inv, y * a_inv, z * a_inv); @@ -353,15 +377,14 @@ Quat Quat::log(QuatAssumeType assumeUnit) const return Quat(std::log(qNorm), v[0] * k, v[1] * k, v[2] *k); } -template -inline Quat power(const Quat &q1, _T alpha, QuatAssumeType assumeUnit) +template +inline Quat power(const Quat &q1, const T alpha, QuatAssumeType assumeUnit) { return q1.power(alpha, assumeUnit); } template -template -inline Quat Quat::power(_T alpha, QuatAssumeType assumeUnit) const +inline Quat Quat::power(const T alpha, QuatAssumeType assumeUnit) const { if (x * x + y * y + z * z > CV_QUAT_EPS) { @@ -843,6 +866,197 @@ Quat Quat::spline(const Quat &q0, const Quat &q1, const Quat &q2, return squad(vec[1], s1, s2, vec[2], t, assumeUnit, QUAT_ASSUME_NOT_UNIT); } +namespace detail { + +template static +Quat createFromAxisRot(int axis, const T theta) +{ + if (axis == 0) + return Quat::createFromXRot(theta); + if (axis == 1) + return Quat::createFromYRot(theta); + if (axis == 2) + return Quat::createFromZRot(theta); + CV_Assert(0); +} + +inline bool isIntAngleType(QuatEnum::EulerAnglesType eulerAnglesType) +{ + return eulerAnglesType < QuatEnum::EXT_XYZ; +} + +inline bool isTaitBryan(QuatEnum::EulerAnglesType eulerAnglesType) +{ + return eulerAnglesType/6 == 1 || eulerAnglesType/6 == 3; +} +} // namespace detail + +template +Quat Quat::createFromYRot(const T theta) +{ + return Quat{std::cos(theta * 0.5f), 0, std::sin(theta * 0.5f), 0}; +} + +template +Quat Quat::createFromXRot(const T theta){ + return Quat{std::cos(theta * 0.5f), std::sin(theta * 0.5f), 0, 0}; +} + +template +Quat Quat::createFromZRot(const T theta){ + return Quat{std::cos(theta * 0.5f), 0, 0, std::sin(theta * 0.5f)}; +} + + +template +Quat Quat::createFromEulerAngles(const Vec &angles, QuatEnum::EulerAnglesType eulerAnglesType) { + CV_Assert(eulerAnglesType < QuatEnum::EulerAnglesType::EULER_ANGLES_MAX_VALUE); + static const int rotationAxis[24][3] = { + {0, 1, 2}, ///< Intrinsic rotations with the Euler angles type X-Y-Z + {0, 2, 1}, ///< Intrinsic rotations with the Euler angles type X-Z-Y + {1, 0, 2}, ///< Intrinsic rotations with the Euler angles type Y-X-Z + {1, 2, 0}, ///< Intrinsic rotations with the Euler angles type Y-Z-X + {2, 0, 1}, ///< Intrinsic rotations with the Euler angles type Z-X-Y + {2, 1, 0}, ///< Intrinsic rotations with the Euler angles type Z-Y-X + {0, 1, 0}, ///< Intrinsic rotations with the Euler angles type X-Y-X + {0, 2, 0}, ///< Intrinsic rotations with the Euler angles type X-Z-X + {1, 0, 1}, ///< Intrinsic rotations with the Euler angles type Y-X-Y + {1, 2, 1}, ///< Intrinsic rotations with the Euler angles type Y-Z-Y + {2, 0, 2}, ///< Intrinsic rotations with the Euler angles type Z-X-Z + {2, 1, 2}, ///< Intrinsic rotations with the Euler angles type Z-Y-Z + {0, 1, 2}, ///< Extrinsic rotations with the Euler angles type X-Y-Z + {0, 2, 1}, ///< Extrinsic rotations with the Euler angles type X-Z-Y + {1, 0, 2}, ///< Extrinsic rotations with the Euler angles type Y-X-Z + {1, 2, 0}, ///< Extrinsic rotations with the Euler angles type Y-Z-X + {2, 0, 1}, ///< Extrinsic rotations with the Euler angles type Z-X-Y + {2, 1, 0}, ///< Extrinsic rotations with the Euler angles type Z-Y-X + {0, 1, 0}, ///< Extrinsic rotations with the Euler angles type X-Y-X + {0, 2, 0}, ///< Extrinsic rotations with the Euler angles type X-Z-X + {1, 0, 1}, ///< Extrinsic rotations with the Euler angles type Y-X-Y + {1, 2, 1}, ///< Extrinsic rotations with the Euler angles type Y-Z-Y + {2, 0, 2}, ///< Extrinsic rotations with the Euler angles type Z-X-Z + {2, 1, 2} ///< Extrinsic rotations with the Euler angles type Z-Y-Z + }; + Quat q1 = detail::createFromAxisRot(rotationAxis[eulerAnglesType][0], angles(0)); + Quat q2 = detail::createFromAxisRot(rotationAxis[eulerAnglesType][1], angles(1)); + Quat q3 = detail::createFromAxisRot(rotationAxis[eulerAnglesType][2], angles(2)); + if (detail::isIntAngleType(eulerAnglesType)) + { + return q1 * q2 * q3; + } + else // (!detail::isIntAngleType(eulerAnglesType)) + { + return q3 * q2 * q1; + } +} + +template +Vec Quat::toEulerAngles(QuatEnum::EulerAnglesType eulerAnglesType){ + CV_Assert(eulerAnglesType < QuatEnum::EulerAnglesType::EULER_ANGLES_MAX_VALUE); + Matx33d R = toRotMat3x3(); + enum { + C_ZERO, + C_PI, + C_PI_2, + N_CONSTANTS, + R_0_0 = N_CONSTANTS, R_0_1, R_0_2, + R_1_0, R_1_1, R_1_2, + R_2_0, R_2_1, R_2_2 + }; + static const T constants_[N_CONSTANTS] = { + 0, // C_ZERO + (T)CV_PI, // C_PI + (T)(CV_PI * 0.5) // C_PI_2, -C_PI_2 + }; + static const int rotationR_[24][12] = { + {+R_0_2, +R_1_0, +R_1_1, C_PI_2, +R_2_1, +R_1_1, -C_PI_2, -R_1_2, +R_2_2, +R_0_2, -R_0_1, +R_0_0}, // INT_XYZ + {+R_0_1, -R_1_2, +R_2_2, -C_PI_2, +R_2_0, +R_2_2, C_PI_2, +R_2_1, +R_1_1, -R_0_1, +R_0_2, +R_0_0}, // INT_XZY + {+R_1_2, -R_0_1, +R_0_0, -C_PI_2, +R_0_1, +R_0_0, C_PI_2, +R_0_2, +R_2_2, -R_1_2, +R_1_0, +R_1_1}, // INT_YXZ + {+R_1_0, +R_0_2, +R_2_2, C_PI_2, +R_0_2, +R_0_1, -C_PI_2, -R_2_0, +R_0_0, +R_1_0, -R_1_2, +R_1_1}, // INT_YZX + {+R_2_1, +R_1_0, +R_0_0, C_PI_2, +R_1_0, +R_0_0, -C_PI_2, -R_0_1, +R_1_1, +R_2_1, -R_2_0, +R_2_2}, // INT_ZXY + {+R_2_0, -R_0_1, +R_1_1, -C_PI_2, +R_1_2, +R_1_1, C_PI_2, +R_1_0, +R_0_0, -R_2_0, +R_2_1, +R_2_2}, // INT_ZYX + {+R_0_0, +R_2_1, +R_2_2, C_ZERO, +R_1_2, +R_1_1, C_PI, +R_1_0, -R_2_0, +R_0_0, +R_0_1, +R_0_2}, // INT_XYX + {+R_0_0, +R_2_1, +R_2_2, C_ZERO, -R_2_1, +R_2_2, C_PI, +R_2_0, +R_1_0, +R_0_0, +R_0_2, -R_0_1}, // INT_XZX + {+R_1_1, +R_0_2, +R_0_0, C_ZERO, -R_2_0, +R_0_0, C_PI, +R_0_1, +R_2_1, +R_1_1, +R_1_0, -R_1_2}, // INT_YXY + {+R_1_1, +R_0_2, +R_0_0, C_ZERO, +R_0_2, -R_0_0, C_PI, +R_2_1, -R_0_1, +R_1_1, +R_1_2, +R_1_0}, // INT_YZY + {+R_2_2, +R_1_0, +R_1_1, C_ZERO, +R_1_0, +R_0_0, C_PI, +R_0_2, -R_1_2, +R_2_2, +R_2_0, +R_2_1}, // INT_ZXZ + {+R_2_2, +R_1_0, +R_0_0, C_ZERO, +R_1_0, +R_0_0, C_PI, +R_1_2, +R_0_2, +R_2_2, +R_2_1, -R_2_0}, // INT_ZYZ + + {+R_2_0, -C_PI_2, -R_0_1, +R_1_1, C_PI_2, +R_1_2, +R_1_1, +R_2_1, +R_2_2, -R_2_0, +R_1_0, +R_0_0}, // EXT_XYZ + {+R_1_0, C_PI_2, +R_0_2, +R_2_2, -C_PI_2, +R_0_2, +R_0_1, -R_1_2, +R_1_1, +R_1_0, -R_2_0, +R_0_0}, // EXT_XZY + {+R_2_1, C_PI_2, +R_1_0, +R_0_0, -C_PI_2, +R_1_0, +R_0_0, -R_2_0, +R_2_2, +R_2_1, -R_0_1, +R_1_1}, // EXT_YXZ + {+R_0_2, -C_PI_2, -R_1_2, +R_2_2, C_PI_2, +R_2_0, +R_2_2, +R_0_2, +R_0_0, -R_0_1, +R_2_1, +R_1_1}, // EXT_YZX + {+R_1_2, -C_PI_2, -R_0_1, +R_0_0, C_PI_2, +R_0_1, +R_0_0, +R_1_0, +R_1_1, -R_1_2, +R_0_2, +R_2_2}, // EXT_ZXY + {+R_0_2, C_PI_2, +R_1_0, +R_1_1, -C_PI_2, +R_2_1, +R_1_1, -R_0_1, +R_0_0, +R_0_2, -R_1_2, +R_2_2}, // EXT_ZYX + {+R_0_0, C_ZERO, +R_2_1, +R_2_2, C_PI, +R_1_2, +R_1_1, +R_0_1, +R_0_2, +R_0_0, +R_1_0, -R_2_0}, // EXT_XYX + {+R_0_0, C_ZERO, +R_2_1, +R_2_2, C_PI, +R_2_1, +R_2_2, +R_0_2, -R_0_1, +R_0_0, +R_2_0, +R_1_0}, // EXT_XZX + {+R_1_1, C_ZERO, +R_0_2, +R_0_0, C_PI, -R_2_0, +R_0_0, +R_1_0, -R_1_2, +R_1_1, +R_0_1, +R_2_1}, // EXT_YXY + {+R_1_1, C_ZERO, +R_0_2, +R_0_0, C_PI, +R_0_2, -R_0_0, +R_1_2, +R_1_0, +R_1_1, +R_2_1, -R_0_1}, // EXT_YZY + {+R_2_2, C_ZERO, +R_1_0, +R_1_1, C_PI, +R_1_0, +R_0_0, +R_2_0, +R_2_1, +R_2_2, +R_0_2, -R_1_2}, // EXT_ZXZ + {+R_2_2, C_ZERO, +R_1_0, +R_0_0, C_PI, +R_1_0, +R_0_0, +R_2_1, -R_2_0, +R_2_2, +R_1_2, +R_0_2}, // EXT_ZYZ + }; + T rotationR[12]; + for (int i = 0; i < 12; i++) + { + int id = rotationR_[eulerAnglesType][i]; + unsigned idx = std::abs(id); + T value = 0.0f; + if (idx < N_CONSTANTS) + { + value = constants_[idx]; + } + else + { + unsigned r_idx = idx - N_CONSTANTS; + CV_DbgAssert(r_idx < 9); + value = R.val[r_idx]; + } + bool isNegative = id < 0; + if (isNegative) + value = -value; + rotationR[i] = value; + } + Vec angles; + if (detail::isIntAngleType(eulerAnglesType)) + { + if (abs(rotationR[0] - 1) < CV_QUAT_CONVERT_THRESHOLD) + { + CV_LOG_WARNING(NULL,"Gimbal Lock occurs. Euler angles are non-unique, we set the third angle to 0"); + angles = {std::atan2(rotationR[1], rotationR[2]), rotationR[3], 0}; + return angles; + } + else if(abs(rotationR[0] + 1) < CV_QUAT_CONVERT_THRESHOLD) + { + CV_LOG_WARNING(NULL,"Gimbal Lock occurs. Euler angles are non-unique, we set the third angle to 0"); + angles = {std::atan2(rotationR[4], rotationR[5]), rotationR[6], 0}; + return angles; + } + } + else // (!detail::isIntAngleType(eulerAnglesType)) + { + if (abs(rotationR[0] - 1) < CV_QUAT_CONVERT_THRESHOLD) + { + CV_LOG_WARNING(NULL,"Gimbal Lock occurs. Euler angles are non-unique, we set the first angle to 0"); + angles = {0, rotationR[1], std::atan2(rotationR[2], rotationR[3])}; + return angles; + } + else if (abs(rotationR[0] + 1) < CV_QUAT_CONVERT_THRESHOLD) + { + CV_LOG_WARNING(NULL,"Gimbal Lock occurs. Euler angles are non-unique, we set the first angle to 0"); + angles = {0, rotationR[4], std::atan2(rotationR[5], rotationR[6])}; + return angles; + } + } + + angles(0) = std::atan2(rotationR[7], rotationR[8]); + if (detail::isTaitBryan(eulerAnglesType)) + angles(1) = std::acos(rotationR[9]); + else + angles(1) = std::asin(rotationR[9]); + angles(2) = std::atan2(rotationR[10], rotationR[11]); + return angles; +} + } // namepsace //! @endcond diff --git a/modules/core/include/opencv2/core/simd_intrinsics.hpp b/modules/core/include/opencv2/core/simd_intrinsics.hpp index c50923f0ef..8fe7ee6b60 100644 --- a/modules/core/include/opencv2/core/simd_intrinsics.hpp +++ b/modules/core/include/opencv2/core/simd_intrinsics.hpp @@ -40,7 +40,6 @@ Notes: #endif #include "opencv2/core/cvdef.h" -#include "opencv2/core/version.hpp" #ifdef OPENCV_SIMD_CONFIG_HEADER #include CVAUX_STR(OPENCV_SIMD_CONFIG_HEADER) diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index c52abbbff4..f0368027aa 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -570,6 +570,8 @@ static inline size_t getElemSize(int type) { return (size_t)CV_ELEM_SIZE(type); /////////////////////////////// Parallel Primitives ////////////////////////////////// /** @brief Base class for parallel data processors + +@ingroup core_parallel */ class CV_EXPORTS ParallelLoopBody { @@ -579,17 +581,23 @@ public: }; /** @brief Parallel data processor + +@ingroup core_parallel */ CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.); +//! @ingroup core_parallel class ParallelLoopBodyLambdaWrapper : public ParallelLoopBody { private: std::function m_functor; public: - ParallelLoopBodyLambdaWrapper(std::function functor) : - m_functor(functor) - { } + inline + ParallelLoopBodyLambdaWrapper(std::function functor) + : m_functor(functor) + { + // nothing + } virtual void operator() (const cv::Range& range) const CV_OVERRIDE { @@ -597,11 +605,14 @@ public: } }; -inline void parallel_for_(const Range& range, std::function functor, double nstripes=-1.) +//! @ingroup core_parallel +static inline +void parallel_for_(const Range& range, std::function functor, double nstripes=-1.) { parallel_for_(range, ParallelLoopBodyLambdaWrapper(functor), nstripes); } + /////////////////////////////// forEach method of cv::Mat //////////////////////////// template inline void Mat::forEach_impl(const Functor& operation) { diff --git a/modules/core/include/opencv2/core/utils/allocator_stats.impl.hpp b/modules/core/include/opencv2/core/utils/allocator_stats.impl.hpp index 61fcf15977..eb5ecde16b 100644 --- a/modules/core/include/opencv2/core/utils/allocator_stats.impl.hpp +++ b/modules/core/include/opencv2/core/utils/allocator_stats.impl.hpp @@ -7,13 +7,11 @@ #include "./allocator_stats.hpp" -#ifdef CV_CXX11 -#include -#endif - //#define OPENCV_DISABLE_ALLOCATOR_STATS -namespace cv { namespace utils { +#ifdef CV_CXX11 + +#include #ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE #if defined(__GNUC__) && (\ @@ -28,6 +26,16 @@ namespace cv { namespace utils { #define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE long long #endif +#else // CV_CXX11 + +#ifndef OPENCV_ALLOCATOR_STATS_COUNTER_TYPE +#define OPENCV_ALLOCATOR_STATS_COUNTER_TYPE int // CV_XADD supports int only +#endif + +#endif // CV_CXX11 + +namespace cv { namespace utils { + #ifdef CV__ALLOCATOR_STATS_LOG namespace { #endif diff --git a/modules/core/include/opencv2/core/utils/plugin_loader.private.hpp b/modules/core/include/opencv2/core/utils/plugin_loader.private.hpp new file mode 100644 index 0000000000..bc3ae4d08a --- /dev/null +++ b/modules/core/include/opencv2/core/utils/plugin_loader.private.hpp @@ -0,0 +1,163 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_UTILS_PLUGIN_LOADER_HPP +#define OPENCV_UTILS_PLUGIN_LOADER_HPP + +#include "opencv2/core/utils/filesystem.hpp" +#include "opencv2/core/utils/filesystem.private.hpp" + +#if OPENCV_HAVE_FILESYSTEM_SUPPORT + +#if defined(_WIN32) +#include +#elif defined(__linux__) || defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__HAIKU__) || defined(__GLIBC__) +#include +#endif + +namespace cv { namespace plugin { namespace impl { + +#if defined(_WIN32) +typedef HMODULE LibHandle_t; +typedef wchar_t FileSystemChar_t; +typedef std::wstring FileSystemPath_t; + +// TODO wchar_t <=> UTF-8 +static inline +FileSystemPath_t toFileSystemPath(const std::string& p) +{ + FileSystemPath_t result; + result.resize(p.size()); + for (size_t i = 0; i < p.size(); i++) + result[i] = (wchar_t)p[i]; + return result; +} + +// TODO wchar_t <=> UTF-8 +static inline +std::string toPrintablePath(const FileSystemPath_t& p) +{ + std::string result; + result.resize(p.size()); + for (size_t i = 0; i < p.size(); i++) + { + wchar_t ch = p[i]; + if ((int)ch >= ' ' && (int)ch < 128) + result[i] = (char)ch; + else + result[i] = '?'; + } + return result; +} +#else // !_WIN32 +typedef void* LibHandle_t; +typedef char FileSystemChar_t; +typedef std::string FileSystemPath_t; + +static inline FileSystemPath_t toFileSystemPath(const std::string& p) { return p; } +static inline std::string toPrintablePath(const FileSystemPath_t& p) { return p; } +#endif + + +static inline +void* getSymbol_(LibHandle_t h, const char* symbolName) +{ +#if defined(_WIN32) + return (void*)GetProcAddress(h, symbolName); +#elif defined(__linux__) || defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__HAIKU__) || defined(__GLIBC__) + return dlsym(h, symbolName); +#endif +} + +static inline +LibHandle_t libraryLoad_(const FileSystemPath_t& filename) +{ +#if defined(_WIN32) +# ifdef WINRT + return LoadPackagedLibrary(filename.c_str(), 0); +# else + return LoadLibraryW(filename.c_str()); +#endif +#elif defined(__linux__) || defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__HAIKU__) || defined(__GLIBC__) + return dlopen(filename.c_str(), RTLD_NOW); +#endif +} + +static inline +void libraryRelease_(LibHandle_t h) +{ +#if defined(_WIN32) + FreeLibrary(h); +#elif defined(__linux__) || defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__HAIKU__) || defined(__GLIBC__) + dlclose(h); +#endif +} + +static inline +std::string libraryPrefix() +{ +#if defined(_WIN32) + return ""; +#else + return "lib"; +#endif +} +static inline +std::string librarySuffix() +{ +#if defined(_WIN32) + const char* suffix = "" + CVAUX_STR(CV_MAJOR_VERSION) CVAUX_STR(CV_MINOR_VERSION) CVAUX_STR(CV_SUBMINOR_VERSION) + #if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__) + "_64" + #endif + #if defined(_DEBUG) && defined(DEBUG_POSTFIX) + CVAUX_STR(DEBUG_POSTFIX) + #endif + ".dll"; + return suffix; +#else + return ".so"; +#endif +} + + +//============================ + +class CV_EXPORTS DynamicLib +{ +private: + LibHandle_t handle; + const FileSystemPath_t fname; + bool disableAutoUnloading_; + +public: + DynamicLib(const FileSystemPath_t& filename); + ~DynamicLib(); + /** Do not automatically unload library in destructor */ + inline void disableAutomaticLibraryUnloading() + { + disableAutoUnloading_ = true; + } + inline bool isLoaded() const + { + return handle != NULL; + } + void* getSymbol(const char* symbolName) const; + const std::string getName() const; +private: + void libraryLoad(const FileSystemPath_t& filename); + void libraryRelease(); + +private: + DynamicLib(const DynamicLib &) = delete; + DynamicLib &operator=(const DynamicLib &) = delete; +}; + + +}}} // namespace + +#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT + +#endif // OPENCV_UTILS_PLUGIN_LOADER_HPP diff --git a/modules/core/include/opencv2/core/utils/tls.hpp b/modules/core/include/opencv2/core/utils/tls.hpp index 697a7b0340..124caebc85 100644 --- a/modules/core/include/opencv2/core/utils/tls.hpp +++ b/modules/core/include/opencv2/core/utils/tls.hpp @@ -5,7 +5,9 @@ #ifndef OPENCV_UTILS_TLS_HPP #define OPENCV_UTILS_TLS_HPP -#include +#ifndef OPENCV_CORE_UTILITY_H +#error "tls.hpp must be included after opencv2/core/utility.hpp or opencv2/core.hpp" +#endif namespace cv { diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp index 08ae890175..68863ffb36 100644 --- a/modules/core/include/opencv2/core/vsx_utils.hpp +++ b/modules/core/include/opencv2/core/vsx_utils.hpp @@ -497,13 +497,15 @@ VSX_IMPL_CONV_EVEN_2_4(vec_uint4, vec_double2, vec_ctu, vec_ctuo) VSX_FINLINE(rt) fnm(const rg& a, int only_truncate) \ { \ assert(only_truncate == 0); \ - CV_UNUSED(only_truncate); \ + CV_UNUSED(only_truncate); \ return fn2(a); \ } VSX_IMPL_CONV_2VARIANT(vec_int4, vec_float4, vec_cts, vec_cts) + VSX_IMPL_CONV_2VARIANT(vec_uint4, vec_float4, vec_ctu, vec_ctu) VSX_IMPL_CONV_2VARIANT(vec_float4, vec_int4, vec_ctf, vec_ctf) + VSX_IMPL_CONV_2VARIANT(vec_float4, vec_uint4, vec_ctf, vec_ctf) // define vec_cts for converting double precision to signed doubleword - // which isn't combitable with xlc but its okay since Eigen only use it for gcc + // which isn't compatible with xlc but its okay since Eigen only uses it for gcc VSX_IMPL_CONV_2VARIANT(vec_dword2, vec_double2, vec_cts, vec_ctsl) #endif // Eigen diff --git a/modules/core/misc/java/test/MatTest.java b/modules/core/misc/java/test/MatTest.java index 039aa39929..00e7b7cb32 100644 --- a/modules/core/misc/java/test/MatTest.java +++ b/modules/core/misc/java/test/MatTest.java @@ -455,6 +455,27 @@ public class MatTest extends OpenCVTestCase { bytesNum = sm.get(1, 1, buff11); assertEquals(4, bytesNum); assertTrue(Arrays.equals(new short[] {340, 341, 0, 0}, buff11)); + + Mat m2 = new Mat(new int[]{ 5, 6, 8 }, CvType.CV_16S); + short[] data = new short[(int)m2.total()]; + for (int i = 0; i < data.length; i++ ) { + data[i] = (short)i; + } + m2.put(new int[] {0, 0, 0}, data); + Mat matNonContinuous = m2.submat(new Range[]{new Range(1,4), new Range(2,5), new Range(3,6)}); + Mat matContinuous = matNonContinuous.clone(); + short[] outNonContinuous = new short[(int)matNonContinuous.total()]; + matNonContinuous.get(new int[] { 0, 0, 0 }, outNonContinuous); + short[] outContinuous = new short[(int)matNonContinuous.total()]; + matContinuous.get(new int[] { 0, 0, 0 }, outContinuous); + assertArrayEquals(outNonContinuous, outContinuous); + Mat subMat2 = m2.submat(new Range[]{new Range(1,4), new Range(1,5), new Range(0,8)}); + Mat subMatClone2 = subMat2.clone(); + short[] outNonContinuous2 = new short[(int)subMat2.total()]; + subMat2.get(new int[] { 0, 1, 1 }, outNonContinuous2); + short[] outContinuous2 = new short[(int)subMat2.total()]; + subMatClone2.get(new int[] { 0, 1, 1 }, outContinuous2); + assertArrayEquals(outNonContinuous2, outContinuous2); } public void testGetNativeObjAddr() { diff --git a/modules/core/misc/plugins/parallel_openmp/CMakeLists.txt b/modules/core/misc/plugins/parallel_openmp/CMakeLists.txt new file mode 100644 index 0000000000..024d2046cf --- /dev/null +++ b/modules/core/misc/plugins/parallel_openmp/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 3.5) +project(opencv_core_parallel_openmp CXX) + +get_filename_component(OpenCV_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../../.." ABSOLUTE) +include("${OpenCV_SOURCE_DIR}/cmake/OpenCVPluginStandalone.cmake") + +# scan dependencies +set(WITH_OPENMP ON) +include("${OpenCV_SOURCE_DIR}/modules/core/cmake/parallel/init.cmake") + +message(STATUS "OpenMP: ${OpenMP_CXX_VERSION}") +ocv_create_plugin(core "opencv_core_parallel_openmp" "ocv.3rdparty.openmp" "OPENMP" "src/parallel/parallel_openmp.cpp") diff --git a/modules/core/misc/plugins/parallel_tbb/CMakeLists.txt b/modules/core/misc/plugins/parallel_tbb/CMakeLists.txt new file mode 100644 index 0000000000..c2129c7c2c --- /dev/null +++ b/modules/core/misc/plugins/parallel_tbb/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 3.5) +project(opencv_core_parallel_tbb CXX) + +get_filename_component(OpenCV_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../../.." ABSOLUTE) +include("${OpenCV_SOURCE_DIR}/cmake/OpenCVPluginStandalone.cmake") + +# scan dependencies +set(WITH_TBB ON) +include("${OpenCV_SOURCE_DIR}/modules/core/cmake/parallel/init.cmake") + +message(STATUS "TBB: ver ${TBB_VERSION_MAJOR}.${TBB_VERSION_MINOR} interface ${TBB_INTERFACE_VERSION}") +ocv_create_plugin(core "opencv_core_parallel_tbb" "ocv.3rdparty.tbb" "TBB" "src/parallel/parallel_tbb.cpp") diff --git a/modules/core/perf/opencl/perf_arithm.cpp b/modules/core/perf/opencl/perf_arithm.cpp index 9f5f6e9e77..0cbfc2d653 100644 --- a/modules/core/perf/opencl/perf_arithm.cpp +++ b/modules/core/perf/opencl/perf_arithm.cpp @@ -678,7 +678,12 @@ OCL_PERF_TEST_P(SqrtFixture, Sqrt, ::testing::Combine( OCL_TEST_CYCLE() cv::sqrt(src, dst); - if (CV_MAT_DEPTH(type) >= CV_32F) + // To square root 32 bit floats we use native_sqrt, which has implementation + // defined accuracy. We know intel devices have accurate native_sqrt, but + // otherwise stick to a relaxed sanity check. For types larger than 32 bits + // we can do the accuracy check for all devices as normal. + if (CV_MAT_DEPTH(type) > CV_32F || !ocl::useOpenCL() || + ocl::Device::getDefault().isIntel()) SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE); else SANITY_CHECK(dst, 1); diff --git a/modules/core/perf/perf_allocation.cpp b/modules/core/perf/perf_allocation.cpp new file mode 100755 index 0000000000..2f3bf3eaa7 --- /dev/null +++ b/modules/core/perf/perf_allocation.cpp @@ -0,0 +1,48 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "perf_precomp.hpp" +#include + +using namespace perf; + +#define ALLOC_MAT_SIZES ::perf::szSmall24, ::perf::szSmall32, ::perf::szSmall64, \ + ::perf::sz5MP, ::perf::sz2K, ::perf::szSmall128, ::perf::szODD, ::perf::szQVGA, \ + ::perf::szVGA, ::perf::szSVGA, ::perf::sz720p, ::perf::sz1080p, ::perf::sz2160p, \ + ::perf::sz4320p, ::perf::sz3MP, ::perf::szXGA, ::perf::szSXGA, ::perf::szWQHD, \ + ::perf::sznHD, ::perf::szqHD + +namespace opencv_test +{ + +typedef perf::TestBaseWithParam MatDepth_tb; + +PERF_TEST_P(MatDepth_tb, DISABLED_Allocation_Aligned, + testing::Values(CV_8UC1, CV_16SC1, CV_8UC3, CV_8UC4)) +{ + const int matType = GetParam(); + const cv::Mat utility(1, 1, matType); + const size_t elementBytes = utility.elemSize(); + + const std::array sizes{ALLOC_MAT_SIZES}; + std::array bytes; + for (size_t i = 0; i < sizes.size(); ++i) + { + bytes[i] = sizes[i].width * sizes[i].height * elementBytes; + } + + declare.time(60) + .iterations(100); + + TEST_CYCLE() + { + for (int i = 0; i < 100000; ++i) + { + fastFree(fastMalloc(bytes[i % sizes.size()])); + } + } + SANITY_CHECK_NOTHING(); +} + +}; diff --git a/modules/core/src/alloc.cpp b/modules/core/src/alloc.cpp index 98012998fc..a0def9db2e 100644 --- a/modules/core/src/alloc.cpp +++ b/modules/core/src/alloc.cpp @@ -82,7 +82,7 @@ cv::utils::AllocatorStatisticsInterface& getAllocatorStatistics() return allocator_stats; } -#if defined HAVE_POSIX_MEMALIGN || defined HAVE_MEMALIGN +#if defined HAVE_POSIX_MEMALIGN || defined HAVE_MEMALIGN || defined HAVE_WIN32_ALIGNED_MALLOC static bool readMemoryAlignmentParameter() { bool value = true; @@ -100,25 +100,27 @@ static bool readMemoryAlignmentParameter() // TODO add checks for valgrind, ASAN if value == false return value; } + +#if defined _MSC_VER +#pragma warning(suppress:4714) // preventive: const marked as __forceinline not inlined +static __forceinline +#else static inline +#endif bool isAlignedAllocationEnabled() { - static bool initialized = false; - static bool useMemalign = true; - if (!initialized) - { - initialized = true; // trick to avoid stuck in acquire (works only if allocations are scope based) - useMemalign = readMemoryAlignmentParameter(); - } + // use construct on first use idiom https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use + // details: https://github.com/opencv/opencv/issues/15691 + static bool useMemalign = readMemoryAlignmentParameter(); return useMemalign; } -// do not use variable directly, details: https://github.com/opencv/opencv/issues/15691 + +// need for this static const is disputed; retaining as it doesn't cause harm static const bool g_force_initialization_memalign_flag #if defined __GNUC__ __attribute__((unused)) #endif = isAlignedAllocationEnabled(); - #endif #ifdef OPENCV_ALLOC_ENABLE_STATISTICS @@ -146,6 +148,14 @@ void* fastMalloc(size_t size) return OutOfMemoryError(size); return ptr; } +#elif defined HAVE_WIN32_ALIGNED_MALLOC + if (isAlignedAllocationEnabled()) + { + void* ptr = _aligned_malloc(size, CV_MALLOC_ALIGN); + if(!ptr) + return OutOfMemoryError(size); + return ptr; + } #endif uchar* udata = (uchar*)malloc(size + sizeof(void*) + CV_MALLOC_ALIGN); if(!udata) @@ -168,6 +178,12 @@ void fastFree(void* ptr) free(ptr); return; } +#elif defined HAVE_WIN32_ALIGNED_MALLOC + if (isAlignedAllocationEnabled()) + { + _aligned_free(ptr); + return; + } #endif if(ptr) { diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 1c95985e9a..27acaf1bbf 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -57,26 +57,6 @@ namespace cv * logical operations * \****************************************************************************************/ -void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize ) -{ - int scn = (int)sc.total(), cn = CV_MAT_CN(buftype); - size_t esz = CV_ELEM_SIZE(buftype); - BinaryFunc cvtFn = getConvertFunc(sc.depth(), buftype); - CV_Assert(cvtFn); - cvtFn(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0); - // unroll the scalar - if( scn < cn ) - { - CV_Assert( scn == 1 ); - size_t esz1 = CV_ELEM_SIZE1(buftype); - for( size_t i = esz1; i < esz; i++ ) - scbuf[i] = scbuf[i - esz1]; - } - for( size_t i = esz; i < blocksize*esz; i++ ) - scbuf[i] = scbuf[i - esz]; -} - - enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4, OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8, OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14, @@ -647,7 +627,8 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, (kind1 == _InputArray::MATX && (sz1 == Size(1,4) || sz1 == Size(1,1))) || (kind2 == _InputArray::MATX && (sz2 == Size(1,4) || sz2 == Size(1,1))) ) { - if( checkScalar(*psrc1, type2, kind1, kind2) ) + if ((type1 == CV_64F && (sz1.height == 1 || sz1.height == 4)) && + checkScalar(*psrc1, type2, kind1, kind2)) { // src1 is a scalar; swap it with src2 swap(psrc1, psrc2); @@ -1002,9 +983,7 @@ static BinaryFuncC* getRecipTab() return recipTab; } -} - -void cv::multiply(InputArray src1, InputArray src2, +void multiply(InputArray src1, InputArray src2, OutputArray dst, double scale, int dtype) { CV_INSTRUMENT_REGION(); @@ -1013,7 +992,7 @@ void cv::multiply(InputArray src1, InputArray src2, true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE); } -void cv::divide(InputArray src1, InputArray src2, +void divide(InputArray src1, InputArray src2, OutputArray dst, double scale, int dtype) { CV_INSTRUMENT_REGION(); @@ -1021,7 +1000,7 @@ void cv::divide(InputArray src1, InputArray src2, arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE); } -void cv::divide(double scale, InputArray src2, +void divide(double scale, InputArray src2, OutputArray dst, int dtype) { CV_INSTRUMENT_REGION(); @@ -1029,13 +1008,17 @@ void cv::divide(double scale, InputArray src2, arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE); } +UMat UMat::mul(InputArray m, double scale) const +{ + UMat dst; + multiply(*this, m, dst, scale); + return dst; +} + /****************************************************************************************\ * addWeighted * \****************************************************************************************/ -namespace cv -{ - static BinaryFuncC* getAddWeightedTab() { static BinaryFuncC addWeightedTab[] = @@ -1849,6 +1832,9 @@ void cv::inRange(InputArray _src, InputArray _lowerb, } } + +#ifndef OPENCV_EXCLUDE_C_API + /****************************************************************************************\ * Earlier API: cvAdd etc. * \****************************************************************************************/ @@ -2008,4 +1994,5 @@ cvCmpS( const void* srcarr1, double value, void* dstarr, int cmp_op ) cv::compare( src1, value, dst, cmp_op ); } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/array.cpp b/modules/core/src/array.cpp index b2f20e41c9..a9ddefef4c 100644 --- a/modules/core/src/array.cpp +++ b/modules/core/src/array.cpp @@ -48,6 +48,8 @@ #include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API + #define CV_ORIGIN_TL 0 #define CV_ORIGIN_BL 1 @@ -2748,53 +2750,6 @@ void DefaultDeleter::operator ()(CvMatND* obj) const { cvReleaseMatND(& void DefaultDeleter::operator ()(CvSparseMat* obj) const { cvReleaseSparseMat(&obj); } void DefaultDeleter::operator ()(CvMemStorage* obj) const { cvReleaseMemStorage(&obj); } -template static inline -void scalarToRawData_(const Scalar& s, T * const buf, const int cn, const int unroll_to) -{ - int i = 0; - for(; i < cn; i++) - buf[i] = saturate_cast(s.val[i]); - for(; i < unroll_to; i++) - buf[i] = buf[i-cn]; -} - -void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to) -{ - CV_INSTRUMENT_REGION(); - - const int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - CV_Assert(cn <= 4); - switch(depth) - { - case CV_8U: - scalarToRawData_(s, (uchar*)_buf, cn, unroll_to); - break; - case CV_8S: - scalarToRawData_(s, (schar*)_buf, cn, unroll_to); - break; - case CV_16U: - scalarToRawData_(s, (ushort*)_buf, cn, unroll_to); - break; - case CV_16S: - scalarToRawData_(s, (short*)_buf, cn, unroll_to); - break; - case CV_32S: - scalarToRawData_(s, (int*)_buf, cn, unroll_to); - break; - case CV_32F: - scalarToRawData_(s, (float*)_buf, cn, unroll_to); - break; - case CV_64F: - scalarToRawData_(s, (double*)_buf, cn, unroll_to); - break; - case CV_16F: - scalarToRawData_(s, (float16_t*)_buf, cn, unroll_to); - break; - default: - CV_Error(CV_StsUnsupportedFormat,""); - } -} - } // cv:: @@ -2817,4 +2772,5 @@ cvRelease( void** struct_ptr ) } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/bindings_utils.cpp b/modules/core/src/bindings_utils.cpp index 050b7247f8..78716c21f6 100644 --- a/modules/core/src/bindings_utils.cpp +++ b/modules/core/src/bindings_utils.cpp @@ -5,6 +5,8 @@ #include "precomp.hpp" #include "opencv2/core/bindings_utils.hpp" #include +#include +#include namespace cv { namespace utils { @@ -208,4 +210,15 @@ CV_EXPORTS_W String dumpInputOutputArrayOfArrays(InputOutputArrayOfArrays argume return ss.str(); } +namespace fs { +cv::String getCacheDirectoryForDownloads() +{ +#if OPENCV_HAVE_FILESYSTEM_SUPPORT + return cv::utils::fs::getCacheDirectory("downloads", "OPENCV_DOWNLOADS_CACHE_DIR"); +#else + CV_Error(Error::StsNotImplemented, "File system support is disabled in this OpenCV build!"); +#endif +} +} // namespace fs + }} // namespace diff --git a/modules/core/src/convert.dispatch.cpp b/modules/core/src/convert.dispatch.cpp index bc8340b687..345b4624cb 100644 --- a/modules/core/src/convert.dispatch.cpp +++ b/modules/core/src/convert.dispatch.cpp @@ -154,7 +154,7 @@ static bool ocl_convertFp16( InputArray _src, OutputArray _dst, int sdepth, int sdepth == CV_32F ? "half" : "float", rowsPerWI, sdepth == CV_32F ? " -D FLOAT_TO_HALF " : ""); - ocl::Kernel k("convertFp16", ocl::core::halfconvert_oclsrc, build_opt); + ocl::Kernel k(sdepth == CV_32F ? "convertFp16_FP32_to_FP16" : "convertFp16_FP16_to_FP32", ocl::core::halfconvert_oclsrc, build_opt); if (k.empty()) return false; diff --git a/modules/core/src/convert_c.cpp b/modules/core/src/convert_c.cpp index efe4de740a..96beffccc6 100644 --- a/modules/core/src/convert_c.cpp +++ b/modules/core/src/convert_c.cpp @@ -5,6 +5,7 @@ #include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API CV_IMPL void cvSplit( const void* srcarr, void* dstarr0, void* dstarr1, void* dstarr2, void* dstarr3 ) @@ -132,3 +133,5 @@ CV_IMPL void cvNormalize( const CvArr* srcarr, CvArr* dstarr, CV_Assert( dst.size() == src.size() && src.channels() == dst.channels() ); cv::normalize( src, dst, a, b, norm_type, dst.type(), mask ); } + +#endif // OPENCV_EXCLUDE_C_API diff --git a/modules/core/src/convert_scale.dispatch.cpp b/modules/core/src/convert_scale.dispatch.cpp index 83376aa61d..6902ecc24b 100644 --- a/modules/core/src/convert_scale.dispatch.cpp +++ b/modules/core/src/convert_scale.dispatch.cpp @@ -9,7 +9,6 @@ #include "convert_scale.simd.hpp" #include "convert_scale.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content - namespace cv { @@ -117,143 +116,4 @@ void convertScaleAbs(InputArray _src, OutputArray _dst, double alpha, double bet } } -//================================================================================================== - -#ifdef HAVE_OPENCL - -static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype, - double scale, double delta ) -{ - UMat src = _src.getUMat(); - - if( _mask.empty() ) - src.convertTo( _dst, dtype, scale, delta ); - else if (src.channels() <= 4) - { - const ocl::Device & dev = ocl::Device::getDefault(); - - int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), - ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)), - rowsPerWI = dev.isIntel() ? 4 : 1; - - float fscale = static_cast(scale), fdelta = static_cast(delta); - bool haveScale = std::fabs(scale - 1) > DBL_EPSILON, - haveZeroScale = !(std::fabs(scale) > DBL_EPSILON), - haveDelta = std::fabs(delta) > DBL_EPSILON, - doubleSupport = dev.doubleFPConfig() > 0; - - if (!haveScale && !haveDelta && stype == dtype) - { - _src.copyTo(_dst, _mask); - return true; - } - if (haveZeroScale) - { - _dst.setTo(Scalar(delta), _mask); - return true; - } - - if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport) - return false; - - char cvt[2][40]; - String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d" - " -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s", - ocl::typeToStr(stype), ocl::typeToStr(dtype), - ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn, - rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), - ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), - doubleSupport ? " -D DOUBLE_SUPPORT" : "", - haveScale ? " -D HAVE_SCALE" : "", - haveDelta ? " -D HAVE_DELTA" : "", - ocl::typeToStr(sdepth), ocl::typeToStr(ddepth)); - - ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts); - if (k.empty()) - return false; - - UMat mask = _mask.getUMat(), dst = _dst.getUMat(); - - ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), - maskarg = ocl::KernelArg::ReadOnlyNoSize(mask), - dstarg = ocl::KernelArg::ReadWrite(dst); - - if (haveScale) - { - if (haveDelta) - k.args(srcarg, maskarg, dstarg, fscale, fdelta); - else - k.args(srcarg, maskarg, dstarg, fscale); - } - else - { - if (haveDelta) - k.args(srcarg, maskarg, dstarg, fdelta); - else - k.args(srcarg, maskarg, dstarg); - } - - size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI }; - return k.run(2, globalsize, NULL, false); - } - else - { - UMat temp; - src.convertTo( temp, dtype, scale, delta ); - temp.copyTo( _dst, _mask ); - } - - return true; -} - -#endif - -void normalize(InputArray _src, InputOutputArray _dst, double a, double b, - int norm_type, int rtype, InputArray _mask) -{ - CV_INSTRUMENT_REGION(); - - double scale = 1, shift = 0; - int type = _src.type(), depth = CV_MAT_DEPTH(type); - - if( rtype < 0 ) - rtype = _dst.fixedType() ? _dst.depth() : depth; - - if( norm_type == CV_MINMAX ) - { - double smin = 0, smax = 0; - double dmin = MIN( a, b ), dmax = MAX( a, b ); - minMaxIdx( _src, &smin, &smax, 0, 0, _mask ); - scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0); - if( rtype == CV_32F ) - { - scale = (float)scale; - shift = (float)dmin - (float)(smin*scale); - } - else - shift = dmin - smin*scale; - } - else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C ) - { - scale = norm( _src, norm_type, _mask ); - scale = scale > DBL_EPSILON ? a/scale : 0.; - shift = 0; - } - else - CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" ); - - CV_OCL_RUN(_dst.isUMat(), - ocl_normalize(_src, _dst, _mask, rtype, scale, shift)) - - Mat src = _src.getMat(); - if( _mask.empty() ) - src.convertTo( _dst, rtype, scale, shift ); - else - { - Mat temp; - src.convertTo( temp, rtype, scale, shift ); - temp.copyTo( _dst, _mask ); - } -} - } // namespace diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index c1e86c6a9c..a6f06a5c7d 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -53,6 +53,72 @@ namespace cv { +template static inline +void scalarToRawData_(const Scalar& s, T * const buf, const int cn, const int unroll_to) +{ + int i = 0; + for(; i < cn; i++) + buf[i] = saturate_cast(s.val[i]); + for(; i < unroll_to; i++) + buf[i] = buf[i-cn]; +} + +void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to) +{ + CV_INSTRUMENT_REGION(); + + const int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + CV_Assert(cn <= 4); + switch(depth) + { + case CV_8U: + scalarToRawData_(s, (uchar*)_buf, cn, unroll_to); + break; + case CV_8S: + scalarToRawData_(s, (schar*)_buf, cn, unroll_to); + break; + case CV_16U: + scalarToRawData_(s, (ushort*)_buf, cn, unroll_to); + break; + case CV_16S: + scalarToRawData_(s, (short*)_buf, cn, unroll_to); + break; + case CV_32S: + scalarToRawData_(s, (int*)_buf, cn, unroll_to); + break; + case CV_32F: + scalarToRawData_(s, (float*)_buf, cn, unroll_to); + break; + case CV_64F: + scalarToRawData_(s, (double*)_buf, cn, unroll_to); + break; + case CV_16F: + scalarToRawData_(s, (float16_t*)_buf, cn, unroll_to); + break; + default: + CV_Error(CV_StsUnsupportedFormat,""); + } +} + +void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize ) +{ + int scn = (int)sc.total(), cn = CV_MAT_CN(buftype); + size_t esz = CV_ELEM_SIZE(buftype); + BinaryFunc cvtFn = getConvertFunc(sc.depth(), buftype); + CV_Assert(cvtFn); + cvtFn(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0); + // unroll the scalar + if( scn < cn ) + { + CV_Assert( scn == 1 ); + size_t esz1 = CV_ELEM_SIZE1(buftype); + for( size_t i = esz1; i < esz; i++ ) + scbuf[i] = scbuf[i - esz1]; + } + for( size_t i = esz; i < blocksize*esz; i++ ) + scbuf[i] = scbuf[i - esz]; +} + template static void copyMask_(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size) { @@ -594,490 +660,6 @@ Mat& Mat::setTo(InputArray _value, InputArray _mask) return *this; } -#if CV_SIMD128 -template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) -{ - typedef typename V::lane_type T; - int end = (int)(size.width*esz); - int width = (end + 1)/2; - int width_1 = width & -v_uint8x16::nlanes; - int i, j; - -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(src, dst)); -#endif - - for( ; size.height--; src += sstep, dst += dstep ) - { - for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) - { - V t0, t1; - - t0 = v_load((T*)((uchar*)src + i)); - t1 = v_load((T*)((uchar*)src + j - v_uint8x16::nlanes)); - t0 = v_reverse(t0); - t1 = v_reverse(t1); - v_store((T*)(dst + j - v_uint8x16::nlanes), t0); - v_store((T*)(dst + i), t1); - } - if (isAligned(src, dst)) - { - for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) - { - T t0, t1; - - t0 = *((T*)((uchar*)src + i)); - t1 = *((T*)((uchar*)src + j - sizeof(T))); - *((T*)(dst + j - sizeof(T))) = t0; - *((T*)(dst + i)) = t1; - } - } - else - { - for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) - { - for (int k = 0; k < (int)sizeof(T); k++) - { - uchar t0, t1; - - t0 = *((uchar*)src + i + k); - t1 = *((uchar*)src + j + k - sizeof(T)); - *(dst + j + k - sizeof(T)) = t0; - *(dst + i + k) = t1; - } - } - } - } -} - -template CV_ALWAYS_INLINE void flipHoriz_double( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) -{ - int end = (int)(size.width*esz); - int width = (end + 1)/2; - -#if CV_STRONG_ALIGNMENT - CV_Assert(isAligned(src, dst)); - CV_Assert(isAligned(src, dst)); -#endif - - for( ; size.height--; src += sstep, dst += dstep ) - { - for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) ) - { - T1 t0, t1; - T2 t2, t3; - - t0 = *((T1*)((uchar*)src + i)); - t2 = *((T2*)((uchar*)src + i + sizeof(T1))); - t1 = *((T1*)((uchar*)src + j - sizeof(T1) - sizeof(T2))); - t3 = *((T2*)((uchar*)src + j - sizeof(T2))); - *((T1*)(dst + j - sizeof(T1) - sizeof(T2))) = t0; - *((T2*)(dst + j - sizeof(T2))) = t2; - *((T1*)(dst + i)) = t1; - *((T2*)(dst + i + sizeof(T1))) = t3; - } - } -} -#endif - -static void -flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) -{ -#if CV_SIMD -#if CV_STRONG_ALIGNMENT - size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep; -#endif - if (esz == 2 * v_uint8x16::nlanes) - { - int end = (int)(size.width*esz); - int width = end/2; - - for( ; size.height--; src += sstep, dst += dstep ) - { - for( int i = 0, j = end - 2 * v_uint8x16::nlanes; i < width; i += 2 * v_uint8x16::nlanes, j -= 2 * v_uint8x16::nlanes ) - { -#if CV_SIMD256 - v_uint8x32 t0, t1; - - t0 = v256_load((uchar*)src + i); - t1 = v256_load((uchar*)src + j); - v_store(dst + j, t0); - v_store(dst + i, t1); -#else - v_uint8x16 t0, t1, t2, t3; - - t0 = v_load((uchar*)src + i); - t1 = v_load((uchar*)src + i + v_uint8x16::nlanes); - t2 = v_load((uchar*)src + j); - t3 = v_load((uchar*)src + j + v_uint8x16::nlanes); - v_store(dst + j, t0); - v_store(dst + j + v_uint8x16::nlanes, t1); - v_store(dst + i, t2); - v_store(dst + i + v_uint8x16::nlanes, t3); -#endif - } - } - } - else if (esz == v_uint8x16::nlanes) - { - int end = (int)(size.width*esz); - int width = end/2; - - for( ; size.height--; src += sstep, dst += dstep ) - { - for( int i = 0, j = end - v_uint8x16::nlanes; i < width; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) - { - v_uint8x16 t0, t1; - - t0 = v_load((uchar*)src + i); - t1 = v_load((uchar*)src + j); - v_store(dst + j, t0); - v_store(dst + i, t1); - } - } - } - else if (esz == 8 -#if CV_STRONG_ALIGNMENT - && isAligned(alignmentMark) -#endif - ) - { - flipHoriz_single(src, sstep, dst, dstep, size, esz); - } - else if (esz == 4 -#if CV_STRONG_ALIGNMENT - && isAligned(alignmentMark) -#endif - ) - { - flipHoriz_single(src, sstep, dst, dstep, size, esz); - } - else if (esz == 2 -#if CV_STRONG_ALIGNMENT - && isAligned(alignmentMark) -#endif - ) - { - flipHoriz_single(src, sstep, dst, dstep, size, esz); - } - else if (esz == 1) - { - flipHoriz_single(src, sstep, dst, dstep, size, esz); - } - else if (esz == 24 -#if CV_STRONG_ALIGNMENT - && isAligned(alignmentMark) -#endif - ) - { - int end = (int)(size.width*esz); - int width = (end + 1)/2; - - for( ; size.height--; src += sstep, dst += dstep ) - { - for ( int i = 0, j = end; i < width; i += v_uint8x16::nlanes + sizeof(uint64_t), j -= v_uint8x16::nlanes + sizeof(uint64_t) ) - { - v_uint8x16 t0, t1; - uint64_t t2, t3; - - t0 = v_load((uchar*)src + i); - t2 = *((uint64_t*)((uchar*)src + i + v_uint8x16::nlanes)); - t1 = v_load((uchar*)src + j - v_uint8x16::nlanes - sizeof(uint64_t)); - t3 = *((uint64_t*)((uchar*)src + j - sizeof(uint64_t))); - v_store(dst + j - v_uint8x16::nlanes - sizeof(uint64_t), t0); - *((uint64_t*)(dst + j - sizeof(uint64_t))) = t2; - v_store(dst + i, t1); - *((uint64_t*)(dst + i + v_uint8x16::nlanes)) = t3; - } - } - } -#if !CV_STRONG_ALIGNMENT - else if (esz == 12) - { - flipHoriz_double(src, sstep, dst, dstep, size, esz); - } - else if (esz == 6) - { - flipHoriz_double(src, sstep, dst, dstep, size, esz); - } - else if (esz == 3) - { - flipHoriz_double(src, sstep, dst, dstep, size, esz); - } -#endif - else -#endif // CV_SIMD - { - int i, j, limit = (int)(((size.width + 1)/2)*esz); - AutoBuffer _tab(size.width*esz); - int* tab = _tab.data(); - - for( i = 0; i < size.width; i++ ) - for( size_t k = 0; k < esz; k++ ) - tab[i*esz + k] = (int)((size.width - i - 1)*esz + k); - - for( ; size.height--; src += sstep, dst += dstep ) - { - for( i = 0; i < limit; i++ ) - { - j = tab[i]; - uchar t0 = src[i], t1 = src[j]; - dst[i] = t1; dst[j] = t0; - } - } - } -} - -static void -flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, size_t esz ) -{ - const uchar* src1 = src0 + (size.height - 1)*sstep; - uchar* dst1 = dst0 + (size.height - 1)*dstep; - size.width *= (int)esz; - - for( int y = 0; y < (size.height + 1)/2; y++, src0 += sstep, src1 -= sstep, - dst0 += dstep, dst1 -= dstep ) - { - int i = 0; -#if CV_SIMD -#if CV_STRONG_ALIGNMENT - if (isAligned(src0, src1, dst0, dst1)) -#endif - { - for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) - { - v_int32 t0 = vx_load((int*)(src0 + i)); - v_int32 t1 = vx_load((int*)(src1 + i)); - vx_store((int*)(dst0 + i), t1); - vx_store((int*)(dst1 + i), t0); - } - } -#if CV_STRONG_ALIGNMENT - else - { - for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) - { - v_uint8 t0 = vx_load(src0 + i); - v_uint8 t1 = vx_load(src1 + i); - vx_store(dst0 + i, t1); - vx_store(dst1 + i, t0); - } - } -#endif -#endif - - if (isAligned(src0, src1, dst0, dst1)) - { - for( ; i <= size.width - 16; i += 16 ) - { - int t0 = ((int*)(src0 + i))[0]; - int t1 = ((int*)(src1 + i))[0]; - - ((int*)(dst0 + i))[0] = t1; - ((int*)(dst1 + i))[0] = t0; - - t0 = ((int*)(src0 + i))[1]; - t1 = ((int*)(src1 + i))[1]; - - ((int*)(dst0 + i))[1] = t1; - ((int*)(dst1 + i))[1] = t0; - - t0 = ((int*)(src0 + i))[2]; - t1 = ((int*)(src1 + i))[2]; - - ((int*)(dst0 + i))[2] = t1; - ((int*)(dst1 + i))[2] = t0; - - t0 = ((int*)(src0 + i))[3]; - t1 = ((int*)(src1 + i))[3]; - - ((int*)(dst0 + i))[3] = t1; - ((int*)(dst1 + i))[3] = t0; - } - - for( ; i <= size.width - 4; i += 4 ) - { - int t0 = ((int*)(src0 + i))[0]; - int t1 = ((int*)(src1 + i))[0]; - - ((int*)(dst0 + i))[0] = t1; - ((int*)(dst1 + i))[0] = t0; - } - } - - for( ; i < size.width; i++ ) - { - uchar t0 = src0[i]; - uchar t1 = src1[i]; - - dst0[i] = t1; - dst1[i] = t0; - } - } -} - -#ifdef HAVE_OPENCL - -enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS }; - -static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) -{ - CV_Assert(flipCode >= -1 && flipCode <= 1); - - const ocl::Device & dev = ocl::Device::getDefault(); - int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), - flipType, kercn = std::min(ocl::predictOptimalVectorWidth(_src, _dst), 4); - - bool doubleSupport = dev.doubleFPConfig() > 0; - if (!doubleSupport && depth == CV_64F) - kercn = cn; - - if (cn > 4) - return false; - - const char * kernelName; - if (flipCode == 0) - kernelName = "arithm_flip_rows", flipType = FLIP_ROWS; - else if (flipCode > 0) - kernelName = "arithm_flip_cols", flipType = FLIP_COLS; - else - kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH; - - int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1; - kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn; - - ocl::Kernel k(kernelName, ocl::core::flip_oclsrc, - format( "-D T=%s -D T1=%s -D DEPTH=%d -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d", - kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)), - kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), depth, cn, pxPerWIy, kercn)); - if (k.empty()) - return false; - - Size size = _src.size(); - _dst.create(size, type); - UMat src = _src.getUMat(), dst = _dst.getUMat(); - - int cols = size.width * cn / kercn, rows = size.height; - cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols; - rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows; - - k.args(ocl::KernelArg::ReadOnlyNoSize(src), - ocl::KernelArg::WriteOnly(dst, cn, kercn), rows, cols); - - size_t maxWorkGroupSize = dev.maxWorkGroupSize(); - CV_Assert(maxWorkGroupSize % 4 == 0); - - size_t globalsize[2] = { (size_t)cols, ((size_t)rows + pxPerWIy - 1) / pxPerWIy }, - localsize[2] = { maxWorkGroupSize / 4, 4 }; - return k.run(2, globalsize, (flipType == FLIP_COLS) && !dev.isIntel() ? localsize : NULL, false); -} - -#endif - -#if defined HAVE_IPP -static bool ipp_flip(Mat &src, Mat &dst, int flip_mode) -{ -#ifdef HAVE_IPP_IW - CV_INSTRUMENT_REGION_IPP(); - - // Details: https://github.com/opencv/opencv/issues/12943 - if (flip_mode <= 0 /* swap rows */ - && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42 - && (int64_t)(src.total()) * src.elemSize() >= CV_BIG_INT(0x80000000)/*2Gb*/ - ) - return false; - - IppiAxis ippMode; - if(flip_mode < 0) - ippMode = ippAxsBoth; - else if(flip_mode == 0) - ippMode = ippAxsHorizontal; - else - ippMode = ippAxsVertical; - - try - { - ::ipp::IwiImage iwSrc = ippiGetImage(src); - ::ipp::IwiImage iwDst = ippiGetImage(dst); - - CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode); - } - catch(const ::ipp::IwException &) - { - return false; - } - - return true; -#else - CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(flip_mode); - return false; -#endif -} -#endif - - -void flip( InputArray _src, OutputArray _dst, int flip_mode ) -{ - CV_INSTRUMENT_REGION(); - - CV_Assert( _src.dims() <= 2 ); - Size size = _src.size(); - - if (flip_mode < 0) - { - if (size.width == 1) - flip_mode = 0; - if (size.height == 1) - flip_mode = 1; - } - - if ((size.width == 1 && flip_mode > 0) || - (size.height == 1 && flip_mode == 0)) - { - return _src.copyTo(_dst); - } - - CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode)) - - Mat src = _src.getMat(); - int type = src.type(); - _dst.create( size, type ); - Mat dst = _dst.getMat(); - - CV_IPP_RUN_FAST(ipp_flip(src, dst, flip_mode)); - - size_t esz = CV_ELEM_SIZE(type); - - if( flip_mode <= 0 ) - flipVert( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz ); - else - flipHoriz( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz ); - - if( flip_mode < 0 ) - flipHoriz( dst.ptr(), dst.step, dst.ptr(), dst.step, dst.size(), esz ); -} - -void rotate(InputArray _src, OutputArray _dst, int rotateMode) -{ - CV_Assert(_src.dims() <= 2); - - switch (rotateMode) - { - case ROTATE_90_CLOCKWISE: - transpose(_src, _dst); - flip(_dst, _dst, 1); - break; - case ROTATE_180: - flip(_src, _dst, -1); - break; - case ROTATE_90_COUNTERCLOCKWISE: - transpose(_src, _dst); - flip(_dst, _dst, 0); - break; - default: - break; - } -} #if defined HAVE_OPENCL && !defined __APPLE__ @@ -1325,13 +907,12 @@ void copyMakeConstBorder_8u( const uchar* src, size_t srcstep, cv::Size srcroi, memcpy( dstInner + srcroi.width, constBuf, right ); } - dst += dststep*top; - for( i = 0; i < top; i++ ) - memcpy(dst + (i - top)*dststep, constBuf, dstroi.width); + memcpy(dst + i * dststep, constBuf, dstroi.width); + dst += (top + srcroi.height) * dststep; for( i = 0; i < bottom; i++ ) - memcpy(dst + (i + srcroi.height)*dststep, constBuf, dstroi.width); + memcpy(dst + i * dststep, constBuf, dstroi.width); } } @@ -1500,6 +1081,9 @@ void cv::copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom, } } + +#ifndef OPENCV_EXCLUDE_C_API + /* dst = src */ CV_IMPL void cvCopy( const void* srcarr, void* dstarr, const void* maskarr ) @@ -1606,4 +1190,5 @@ cvFlip( const CvArr* srcarr, CvArr* dstarr, int flip_mode ) cv::flip( src, dst, flip_mode ); } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/count_non_zero.dispatch.cpp b/modules/core/src/count_non_zero.dispatch.cpp index 089359d3e1..aac0c81293 100644 --- a/modules/core/src/count_non_zero.dispatch.cpp +++ b/modules/core/src/count_non_zero.dispatch.cpp @@ -62,11 +62,9 @@ static bool ipp_countNonZero( Mat &src, int &res ) { CV_INSTRUMENT_REGION_IPP(); -#if defined __APPLE__ || (defined _MSC_VER && defined _M_IX86) // see https://github.com/opencv/opencv/issues/17453 - if (src.dims <= 2 && src.step > 520000) + if (src.dims <= 2 && src.step > 520000 && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42) return false; -#endif #if IPP_VERSION_X100 < 201801 // Poor performance of SSE42 diff --git a/modules/core/src/cuda/gpu_mat_nd.cu b/modules/core/src/cuda/gpu_mat_nd.cu new file mode 100644 index 0000000000..3f51fd8afa --- /dev/null +++ b/modules/core/src/cuda/gpu_mat_nd.cu @@ -0,0 +1,269 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/core/cuda.hpp" +#include "opencv2/cudev.hpp" + +using namespace cv; +using namespace cv::cuda; + +GpuData::GpuData(const size_t _size) + : data(nullptr), size(_size) +{ + CV_CUDEV_SAFE_CALL(cudaMalloc(&data, _size)); +} + +GpuData::~GpuData() +{ + CV_CUDEV_SAFE_CALL(cudaFree(data)); +} + +///////////////////////////////////////////////////// +/// create + +void GpuMatND::create(SizeArray _size, int _type) +{ + { + auto elements_nonzero = [](SizeArray& v) + { + return std::all_of(v.begin(), v.end(), + [](unsigned u){ return u > 0; }); + }; + CV_Assert(!_size.empty()); + CV_Assert(elements_nonzero(_size)); + } + + _type &= Mat::TYPE_MASK; + + if (size == _size && type() == _type && !empty() && !external() && isContinuous() && !isSubmatrix()) + return; + + release(); + + setFields(std::move(_size), _type); + + data_ = std::make_shared(totalMemSize()); + data = data_->data; + offset = 0; +} + +///////////////////////////////////////////////////// +/// release + +void GpuMatND::release() +{ + data = nullptr; + data_.reset(); + + flags = dims = offset = 0; + size.clear(); + step.clear(); +} + +///////////////////////////////////////////////////// +/// clone + +static bool next(uchar*& d, const uchar*& s, std::vector& idx, const int dims, const GpuMatND& dst, const GpuMatND& src) +{ + int inc = dims-3; + + while (true) + { + if (idx[inc] == src.size[inc] - 1) + { + if (inc == 0) + { + return false; + } + + idx[inc] = 0; + d -= (dst.size[inc] - 1) * dst.step[inc]; + s -= (src.size[inc] - 1) * src.step[inc]; + inc--; + } + else + { + idx[inc]++; + d += dst.step[inc]; + s += src.step[inc]; + break; + } + } + + return true; +} + +GpuMatND GpuMatND::clone() const +{ + CV_DbgAssert(!empty()); + + GpuMatND ret(size, type()); + + if (isContinuous()) + { + CV_CUDEV_SAFE_CALL(cudaMemcpy(ret.getDevicePtr(), getDevicePtr(), ret.totalMemSize(), cudaMemcpyDeviceToDevice)); + } + else + { + // 1D arrays are always continuous + + if (dims == 2) + { + CV_CUDEV_SAFE_CALL( + cudaMemcpy2D(ret.getDevicePtr(), ret.step[0], getDevicePtr(), step[0], + size[1]*step[1], size[0], cudaMemcpyDeviceToDevice) + ); + } + else + { + std::vector idx(dims-2, 0); + + uchar* d = ret.getDevicePtr(); + const uchar* s = getDevicePtr(); + + // iterate each 2D plane + do + { + CV_CUDEV_SAFE_CALL( + cudaMemcpy2DAsync( + d, ret.step[dims-2], s, step[dims-2], + size[dims-1]*step[dims-1], size[dims-2], cudaMemcpyDeviceToDevice) + ); + } + while (next(d, s, idx, dims, ret, *this)); + + CV_CUDEV_SAFE_CALL(cudaStreamSynchronize(0)); + } + } + + return ret; +} + +GpuMatND GpuMatND::clone(Stream& stream) const +{ + CV_DbgAssert(!empty()); + + GpuMatND ret(size, type()); + + cudaStream_t _stream = StreamAccessor::getStream(stream); + + if (isContinuous()) + { + CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(ret.getDevicePtr(), getDevicePtr(), ret.totalMemSize(), cudaMemcpyDeviceToDevice, _stream)); + } + else + { + // 1D arrays are always continuous + + if (dims == 2) + { + CV_CUDEV_SAFE_CALL( + cudaMemcpy2DAsync(ret.getDevicePtr(), ret.step[0], getDevicePtr(), step[0], + size[1]*step[1], size[0], cudaMemcpyDeviceToDevice, _stream) + ); + } + else + { + std::vector idx(dims-2, 0); + + uchar* d = ret.getDevicePtr(); + const uchar* s = getDevicePtr(); + + // iterate each 2D plane + do + { + CV_CUDEV_SAFE_CALL( + cudaMemcpy2DAsync( + d, ret.step[dims-2], s, step[dims-2], + size[dims-1]*step[dims-1], size[dims-2], cudaMemcpyDeviceToDevice, _stream) + ); + } + while (next(d, s, idx, dims, ret, *this)); + } + } + + return ret; +} + +///////////////////////////////////////////////////// +/// upload + +void GpuMatND::upload(InputArray src) +{ + Mat mat = src.getMat(); + + CV_DbgAssert(!mat.empty()); + + if (!mat.isContinuous()) + mat = mat.clone(); + + SizeArray _size(mat.dims); + std::copy_n(mat.size.p, mat.dims, _size.data()); + + create(std::move(_size), mat.type()); + + CV_CUDEV_SAFE_CALL(cudaMemcpy(getDevicePtr(), mat.data, totalMemSize(), cudaMemcpyHostToDevice)); +} + +void GpuMatND::upload(InputArray src, Stream& stream) +{ + Mat mat = src.getMat(); + + CV_DbgAssert(!mat.empty()); + + if (!mat.isContinuous()) + mat = mat.clone(); + + SizeArray _size(mat.dims); + std::copy_n(mat.size.p, mat.dims, _size.data()); + + create(std::move(_size), mat.type()); + + cudaStream_t _stream = StreamAccessor::getStream(stream); + CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(getDevicePtr(), mat.data, totalMemSize(), cudaMemcpyHostToDevice, _stream)); +} + +///////////////////////////////////////////////////// +/// download + +void GpuMatND::download(OutputArray dst) const +{ + CV_DbgAssert(!empty()); + + dst.create(dims, size.data(), type()); + Mat mat = dst.getMat(); + + GpuMatND gmat = *this; + + if (!gmat.isContinuous()) + gmat = gmat.clone(); + + CV_CUDEV_SAFE_CALL(cudaMemcpy(mat.data, gmat.getDevicePtr(), mat.total() * mat.elemSize(), cudaMemcpyDeviceToHost)); +} + +void GpuMatND::download(OutputArray dst, Stream& stream) const +{ + CV_DbgAssert(!empty()); + + dst.create(dims, size.data(), type()); + Mat mat = dst.getMat(); + + GpuMatND gmat = *this; + + if (!gmat.isContinuous()) + gmat = gmat.clone(stream); + + cudaStream_t _stream = StreamAccessor::getStream(stream); + CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(mat.data, gmat.getDevicePtr(), mat.total() * mat.elemSize(), cudaMemcpyDeviceToHost, _stream)); +} + +#endif diff --git a/modules/core/src/cuda_gpu_mat_nd.cpp b/modules/core/src/cuda_gpu_mat_nd.cpp new file mode 100644 index 0000000000..8440f179ea --- /dev/null +++ b/modules/core/src/cuda_gpu_mat_nd.cpp @@ -0,0 +1,180 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::cuda; + +GpuMatND::~GpuMatND() = default; + +GpuMatND::GpuMatND(SizeArray _size, int _type, void* _data, StepArray _step) : + flags(0), dims(0), data(static_cast(_data)), offset(0) +{ + CV_Assert(_step.empty() || _size.size() == _step.size() + 1); + + setFields(std::move(_size), _type, std::move(_step)); +} + +GpuMatND GpuMatND::operator()(const std::vector& ranges) const +{ + CV_Assert(dims == (int)ranges.size()); + + for (int i = 0; i < dims; ++i) + { + Range r = ranges[i]; + CV_Assert(r == Range::all() || (0 <= r.start && r.start < r.end && r.end <= size[i])); + } + + GpuMatND ret = *this; + + for (int i = 0; i < dims; ++i) + { + Range r = ranges[i]; + if (r != Range::all() && r != Range(0, ret.size[i])) + { + ret.offset += r.start * ret.step[i]; + ret.size[i] = r.size(); + ret.flags |= Mat::SUBMATRIX_FLAG; + } + } + + ret.flags = cv::updateContinuityFlag(ret.flags, dims, ret.size.data(), ret.step.data()); + + return ret; +} + +GpuMat GpuMatND::createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const +{ + CV_Assert((int)idx.size() == dims - 2); + + std::vector ranges; + for (int i : idx) + ranges.emplace_back(i, i+1); + ranges.push_back(rowRange); + ranges.push_back(colRange); + + return (*this)(ranges).createGpuMatHeader(); +} + +GpuMat GpuMatND::createGpuMatHeader() const +{ + auto Effectively2D = [](GpuMatND m) + { + for (int i = 0; i < m.dims - 2; ++i) + if (m.size[i] > 1) + return false; + return true; + }; + CV_Assert(Effectively2D(*this)); + + return GpuMat(size[dims-2], size[dims-1], type(), getDevicePtr(), step[dims-2]); +} + +GpuMat GpuMatND::operator()(IndexArray idx, Range rowRange, Range colRange) const +{ + return createGpuMatHeader(idx, rowRange, colRange).clone(); +} + +GpuMatND::operator GpuMat() const +{ + return createGpuMatHeader().clone(); +} + +void GpuMatND::setFields(SizeArray _size, int _type, StepArray _step) +{ + _type &= Mat::TYPE_MASK; + + flags = Mat::MAGIC_VAL + _type; + dims = static_cast(_size.size()); + size = std::move(_size); + + if (_step.empty()) + { + step = StepArray(dims); + + step.back() = elemSize(); + for (int _i = dims - 2; _i >= 0; --_i) + { + const size_t i = _i; + step[i] = step[i+1] * size[i+1]; + } + + flags |= Mat::CONTINUOUS_FLAG; + } + else + { + step = std::move(_step); + step.push_back(elemSize()); + + flags = cv::updateContinuityFlag(flags, dims, size.data(), step.data()); + } + + CV_Assert(size.size() == step.size()); + CV_Assert(step.back() == elemSize()); +} + +#ifndef HAVE_CUDA + +GpuData::GpuData(const size_t _size) + : data(nullptr), size(0) +{ + CV_UNUSED(_size); + throw_no_cuda(); +} + +GpuData::~GpuData() +{ +} + +void GpuMatND::create(SizeArray _size, int _type) +{ + CV_UNUSED(_size); + CV_UNUSED(_type); + throw_no_cuda(); +} + +void GpuMatND::release() +{ + throw_no_cuda(); +} + +GpuMatND GpuMatND::clone() const +{ + throw_no_cuda(); +} + +GpuMatND GpuMatND::clone(Stream& stream) const +{ + CV_UNUSED(stream); + throw_no_cuda(); +} + +void GpuMatND::upload(InputArray src) +{ + CV_UNUSED(src); + throw_no_cuda(); +} + +void GpuMatND::upload(InputArray src, Stream& stream) +{ + CV_UNUSED(src); + CV_UNUSED(stream); + throw_no_cuda(); +} + +void GpuMatND::download(OutputArray dst) const +{ + CV_UNUSED(dst); + throw_no_cuda(); +} + +void GpuMatND::download(OutputArray dst, Stream& stream) const +{ + CV_UNUSED(dst); + CV_UNUSED(stream); + throw_no_cuda(); +} + +#endif diff --git a/modules/core/src/cuda_stream.cpp b/modules/core/src/cuda_stream.cpp index 5fb873a369..3680e0720a 100644 --- a/modules/core/src/cuda_stream.cpp +++ b/modules/core/src/cuda_stream.cpp @@ -41,6 +41,7 @@ //M*/ #include "precomp.hpp" +#include using namespace cv; using namespace cv::cuda; @@ -293,6 +294,7 @@ public: Impl(); Impl(const Ptr& allocator); + Impl(const unsigned int cudaFlags); explicit Impl(cudaStream_t stream); ~Impl(); @@ -312,6 +314,13 @@ cv::cuda::Stream::Impl::Impl(const Ptr& allocator) : stream(0 ownStream = true; } +cv::cuda::Stream::Impl::Impl(const unsigned int cudaFlags) : stream(0), ownStream(false) +{ + cudaSafeCall(cudaStreamCreateWithFlags(&stream, cudaFlags)); + ownStream = true; + allocator = makePtr(stream); +} + cv::cuda::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_), ownStream(false) { allocator = makePtr(stream); @@ -450,6 +459,16 @@ cv::cuda::Stream::Stream(const Ptr& allocator) #endif } +cv::cuda::Stream::Stream(const size_t cudaFlags) +{ +#ifndef HAVE_CUDA + CV_UNUSED(cudaFlags); + throw_no_cuda(); +#else + impl_ = makePtr(cudaFlags & UINT_MAX); +#endif +} + bool cv::cuda::Stream::queryIfComplete() const { #ifndef HAVE_CUDA diff --git a/modules/core/src/datastructs.cpp b/modules/core/src/datastructs.cpp index c00266dd55..1ff6fa9178 100644 --- a/modules/core/src/datastructs.cpp +++ b/modules/core/src/datastructs.cpp @@ -40,6 +40,8 @@ //M*/ #include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API + /* default alignment for dynamic data strucutures, resided in storages. */ #define CV_STRUCT_ALIGN ((int)sizeof(double)) @@ -3585,4 +3587,5 @@ void seqInsertSlice( CvSeq* seq, int before_index, const CvArr* from_arr ) } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/directx.cpp b/modules/core/src/directx.cpp index f028702d7f..0173f02916 100644 --- a/modules/core/src/directx.cpp +++ b/modules/core/src/directx.cpp @@ -1050,7 +1050,7 @@ bool ocl_convert_nv12_to_bgr( k.args(clImageY, clImageUV, clBuffer, step, cols, rows); - size_t globalsize[] = { (size_t)cols, (size_t)rows }; + size_t globalsize[] = { (size_t)cols/2, (size_t)rows/2 }; return k.run(2, globalsize, 0, false); } @@ -1071,7 +1071,7 @@ bool ocl_convert_bgr_to_nv12( k.args(clBuffer, step, cols, rows, clImageY, clImageUV); - size_t globalsize[] = { (size_t)cols, (size_t)rows }; + size_t globalsize[] = { (size_t)cols/2, (size_t)rows/2 }; return k.run(2, globalsize, 0, false); } diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index fcdb2a202f..87873666d9 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -4640,6 +4640,9 @@ int cv::getOptimalDFTSize( int size0 ) return optimalDFTSizeTab[b]; } + +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL void cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows ) { @@ -4695,4 +4698,5 @@ cvGetOptimalDFTSize( int size0 ) return cv::getOptimalDFTSize(size0); } +#endif // OPENCV_EXCLUDE_C_API /* End of file. */ diff --git a/modules/core/src/hal_internal.cpp b/modules/core/src/hal_internal.cpp index a31d1aa672..7ed15bdd8e 100644 --- a/modules/core/src/hal_internal.cpp +++ b/modules/core/src/hal_internal.cpp @@ -42,6 +42,7 @@ // //M*/ +#include "precomp.hpp" #include "hal_internal.hpp" #ifdef HAVE_LAPACK diff --git a/modules/core/src/hal_internal.hpp b/modules/core/src/hal_internal.hpp index 129a710145..c7a0d46de4 100644 --- a/modules/core/src/hal_internal.hpp +++ b/modules/core/src/hal_internal.hpp @@ -45,8 +45,6 @@ #ifndef OPENCV_CORE_HAL_INTERNAL_HPP #define OPENCV_CORE_HAL_INTERNAL_HPP -#include "precomp.hpp" - #ifdef HAVE_LAPACK int lapack_LU32f(float* a, size_t a_step, int m, float* b, size_t b_step, int n, int* info); diff --git a/modules/core/src/intel_gpu_gemm.inl.hpp b/modules/core/src/intel_gpu_gemm.inl.hpp index 729b43f604..fbd567b949 100644 --- a/modules/core/src/intel_gpu_gemm.inl.hpp +++ b/modules/core/src/intel_gpu_gemm.inl.hpp @@ -25,7 +25,6 @@ #ifdef HAVE_OPENCL #include -#include "precomp.hpp" #include "opencl_kernels_core.hpp" #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp" #include "opencv2/core/opencl/runtime/opencl_core.hpp" diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp index 486b7a5aba..9bca6a8211 100644 --- a/modules/core/src/lapack.cpp +++ b/modules/core/src/lapack.cpp @@ -753,8 +753,6 @@ SVBkSb( int m, int n, const double* w, size_t wstep, (double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 ); } -} - /****************************************************************************************\ * Determinant of the matrix * \****************************************************************************************/ @@ -764,7 +762,7 @@ SVBkSb( int m, int n, const double* w, size_t wstep, m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) + \ m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0))) -double cv::determinant( InputArray _mat ) +double determinant( InputArray _mat ) { CV_INSTRUMENT_REGION(); @@ -842,7 +840,7 @@ double cv::determinant( InputArray _mat ) #define Df( y, x ) ((float*)(dstdata + y*dststep))[x] #define Dd( y, x ) ((double*)(dstdata + y*dststep))[x] -double cv::invert( InputArray _src, OutputArray _dst, int method ) +double invert( InputArray _src, OutputArray _dst, int method ) { CV_INSTRUMENT_REGION(); @@ -1069,13 +1067,19 @@ double cv::invert( InputArray _src, OutputArray _dst, int method ) return result; } +UMat UMat::inv(int method) const +{ + UMat m; + invert(*this, m, method); + return m; +} /****************************************************************************************\ * Solving a linear system * \****************************************************************************************/ -bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method ) +bool solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method ) { CV_INSTRUMENT_REGION(); @@ -1374,7 +1378,7 @@ bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int meth /////////////////// finding eigenvalues and eigenvectors of a symmetric matrix /////////////// -bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) +bool eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) { CV_INSTRUMENT_REGION(); @@ -1396,7 +1400,7 @@ bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) const bool evecNeeded = _evects.needed(); const int esOptions = evecNeeded ? Eigen::ComputeEigenvectors : Eigen::EigenvaluesOnly; _evals.create(n, 1, type); - cv::Mat evals = _evals.getMat(); + Mat evals = _evals.getMat(); if ( type == CV_64F ) { Eigen::MatrixXd src_eig, zeros_eig; @@ -1448,9 +1452,6 @@ bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) #endif } -namespace cv -{ - static void _SVDcompute( InputArray _aarr, OutputArray _w, OutputArray _u, OutputArray _vt, int flags ) { @@ -1598,6 +1599,9 @@ void cv::SVBackSubst(InputArray w, InputArray u, InputArray vt, InputArray rhs, } + +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL double cvDet( const CvArr* arr ) { @@ -1789,3 +1793,4 @@ cvSVBkSb( const CvArr* warr, const CvArr* uarr, cv::SVD::backSubst(w, u, v, rhs, dst); CV_Assert( dst.data == dst0.data ); } +#endif // OPENCV_EXCLUDE_C_API diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index d4f8dc0ba4..f968bec02f 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -1638,6 +1638,9 @@ void patchNaNs( InputOutputArray _a, double _val ) } + +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL void cvExp( const CvArr* srcarr, CvArr* dstarr ) { cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); @@ -1660,6 +1663,7 @@ CV_IMPL int cvCheckArr( const CvArr* arr, int flags, return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal ); } +#endif // OPENCV_EXCLUDE_C_API /* Finds real roots of cubic, quadratic or linear equation. @@ -1955,6 +1959,8 @@ double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters ) } +#ifndef OPENCV_EXCLUDE_C_API + void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int) { cv::Mat _a = cv::cvarrToMat(a); @@ -1964,6 +1970,7 @@ void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int) CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated } +#endif // OPENCV_EXCLUDE_C_API // Common constants for dispatched code diff --git a/modules/core/src/mathfuncs_core.dispatch.cpp b/modules/core/src/mathfuncs_core.dispatch.cpp index e48f84ebbe..3c53ab1c38 100644 --- a/modules/core/src/mathfuncs_core.dispatch.cpp +++ b/modules/core/src/mathfuncs_core.dispatch.cpp @@ -7,6 +7,10 @@ #include "mathfuncs_core.simd.hpp" #include "mathfuncs_core.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + +#define IPP_DISABLE_MAGNITUDE_32F 1 // accuracy: https://github.com/opencv/opencv/issues/19506 + + namespace cv { namespace hal { ///////////////////////////////////// ATAN2 //////////////////////////////////// @@ -44,8 +48,25 @@ void magnitude32f(const float* x, const float* y, float* mag, int len) CV_INSTRUMENT_REGION(); CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len); + +#ifdef HAVE_IPP + bool allowIPP = true; +#ifdef IPP_DISABLE_MAGNITUDE_32F + if (cv::ipp::getIppTopFeatures() & ( +#if IPP_VERSION_X100 >= 201700 + ippCPUID_AVX512F | +#endif + ippCPUID_AVX2) + ) + { + allowIPP = (len & 7) == 0; + } +#endif + // SSE42 performance issues - CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0); + CV_IPP_RUN((IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42) && allowIPP, + CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0); +#endif CV_CPU_DISPATCH(magnitude32f, (x, y, mag, len), CV_CPU_DISPATCH_MODES_ALL); diff --git a/modules/core/src/matmul.dispatch.cpp b/modules/core/src/matmul.dispatch.cpp index a9b82aee88..f4bd14b5dd 100644 --- a/modules/core/src/matmul.dispatch.cpp +++ b/modules/core/src/matmul.dispatch.cpp @@ -999,8 +999,79 @@ double Mat::dot(InputArray _mat) const return r; } + +#ifdef HAVE_OPENCL + +static bool ocl_dot( InputArray _src1, InputArray _src2, double & res ) +{ + UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1); + + int type = src1.type(), depth = CV_MAT_DEPTH(type), + kercn = ocl::predictOptimalVectorWidth(src1, src2); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if ( !doubleSupport && depth == CV_64F ) + return false; + + int dbsize = ocl::Device::getDefault().maxComputeUnits(); + size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); + int ddepth = std::max(CV_32F, depth); + + int wgs2_aligned = 1; + while (wgs2_aligned < (int)wgs) + wgs2_aligned <<= 1; + wgs2_aligned >>= 1; + + char cvt[40]; + ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, + format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT " + "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d", + ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth), + ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), + ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt), + (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", + _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "", + _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn)); + if (k.empty()) + return false; + + UMat db(1, dbsize, ddepth); + + ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), + src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), + dbarg = ocl::KernelArg::PtrWriteOnly(db); + + k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg); + + size_t globalsize = dbsize * wgs; + if (k.run(1, &globalsize, &wgs, true)) + { + res = sum(db.getMat(ACCESS_READ))[0]; + return true; + } + return false; +} + +#endif + +double UMat::dot(InputArray m) const +{ + CV_INSTRUMENT_REGION(); + + CV_Assert(m.sameSize(*this) && m.type() == type()); + +#ifdef HAVE_OPENCL + double r = 0; + CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r) +#endif + + return getMat(ACCESS_READ).dot(m); +} + } // namespace cv:: + +#ifndef OPENCV_EXCLUDE_C_API /****************************************************************************************\ * Earlier API * \****************************************************************************************/ @@ -1225,4 +1296,6 @@ cvBackProjectPCA( const CvArr* proj_arr, const CvArr* avg_arr, CV_Assert(dst0.data == dst.data); } +#endif // OPENCV_EXCLUDE_C_API + /* End of file. */ diff --git a/modules/core/src/matmul.simd.hpp b/modules/core/src/matmul.simd.hpp index 38973ea1a4..c828e2906d 100644 --- a/modules/core/src/matmul.simd.hpp +++ b/modules/core/src/matmul.simd.hpp @@ -1537,7 +1537,7 @@ transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, in static void transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn, int dcn ) { -#if CV_SIMD && !defined(__aarch64__) && !defined(_M_ARM64) +#if CV_SIMD if( scn == 3 && dcn == 3 ) { int x = 0; diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 122b383379..61abc2ba8f 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -204,7 +204,7 @@ MatAllocator* Mat::getStdAllocator() //================================================================================================== -bool MatSize::operator==(const MatSize& sz) const +bool MatSize::operator==(const MatSize& sz) const CV_NOEXCEPT { int d = dims(); int dsz = sz.dims(); @@ -337,7 +337,7 @@ void finalizeHdr(Mat& m) //======================================= Mat ====================================================== -Mat::Mat() +Mat::Mat() CV_NOEXCEPT : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows), step(0) {} diff --git a/modules/core/src/matrix_c.cpp b/modules/core/src/matrix_c.cpp index dc935c3eca..e15ea9fdac 100644 --- a/modules/core/src/matrix_c.cpp +++ b/modules/core/src/matrix_c.cpp @@ -1,7 +1,12 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include "precomp.hpp" #include "opencv2/core/mat.hpp" #include "opencv2/core/types_c.h" -#include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API // glue CvMatND cvMatND(const cv::Mat& m) @@ -342,3 +347,5 @@ cvSort( const CvArr* _src, CvArr* _dst, CvArr* _idx, int flags ) CV_Assert( dst0.data == dst.data ); } } + +#endif // OPENCV_EXCLUDE_C_API diff --git a/modules/core/src/matrix_iterator.cpp b/modules/core/src/matrix_iterator.cpp index aaa7f4aa01..ce7c191cbe 100644 --- a/modules/core/src/matrix_iterator.cpp +++ b/modules/core/src/matrix_iterator.cpp @@ -2,9 +2,8 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html - -#include "opencv2/core/mat.hpp" #include "precomp.hpp" +#include "opencv2/core/mat.hpp" namespace cv { diff --git a/modules/core/src/matrix_operations.cpp b/modules/core/src/matrix_operations.cpp index ac94ecee7d..83c8aaeb57 100644 --- a/modules/core/src/matrix_operations.cpp +++ b/modules/core/src/matrix_operations.cpp @@ -2,11 +2,10 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html - +#include "precomp.hpp" #include "opencv2/core/mat.hpp" #include "opencv2/core/types_c.h" #include "opencl_kernels_core.hpp" -#include "precomp.hpp" #undef HAVE_IPP #undef CV_IPP_RUN_FAST @@ -227,6 +226,23 @@ void cv::setIdentity( InputOutputArray _m, const Scalar& s ) } } + +namespace cv { + +UMat UMat::eye(int rows, int cols, int type) +{ + return UMat::eye(Size(cols, rows), type); +} + +UMat UMat::eye(Size size, int type) +{ + UMat m(size, type); + setIdentity(m); + return m; +} + +} // namespace + //////////////////////////////////////////// trace /////////////////////////////////////////// cv::Scalar cv::trace( InputArray _m ) @@ -261,285 +277,6 @@ cv::Scalar cv::trace( InputArray _m ) return cv::sum(m.diag()); } -////////////////////////////////////// transpose ///////////////////////////////////////// - -namespace cv -{ - -template static void -transpose_( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) -{ - int i=0, j, m = sz.width, n = sz.height; - - #if CV_ENABLE_UNROLLED - for(; i <= m - 4; i += 4 ) - { - T* d0 = (T*)(dst + dstep*i); - T* d1 = (T*)(dst + dstep*(i+1)); - T* d2 = (T*)(dst + dstep*(i+2)); - T* d3 = (T*)(dst + dstep*(i+3)); - - for( j = 0; j <= n - 4; j += 4 ) - { - const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j); - const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1)); - const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2)); - const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3)); - - d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0]; - d1[j] = s0[1]; d1[j+1] = s1[1]; d1[j+2] = s2[1]; d1[j+3] = s3[1]; - d2[j] = s0[2]; d2[j+1] = s1[2]; d2[j+2] = s2[2]; d2[j+3] = s3[2]; - d3[j] = s0[3]; d3[j+1] = s1[3]; d3[j+2] = s2[3]; d3[j+3] = s3[3]; - } - - for( ; j < n; j++ ) - { - const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep); - d0[j] = s0[0]; d1[j] = s0[1]; d2[j] = s0[2]; d3[j] = s0[3]; - } - } - #endif - for( ; i < m; i++ ) - { - T* d0 = (T*)(dst + dstep*i); - j = 0; - #if CV_ENABLE_UNROLLED - for(; j <= n - 4; j += 4 ) - { - const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j); - const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1)); - const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2)); - const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3)); - - d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0]; - } - #endif - for( ; j < n; j++ ) - { - const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep); - d0[j] = s0[0]; - } - } -} - -template static void -transposeI_( uchar* data, size_t step, int n ) -{ - for( int i = 0; i < n; i++ ) - { - T* row = (T*)(data + step*i); - uchar* data1 = data + i*sizeof(T); - for( int j = i+1; j < n; j++ ) - std::swap( row[j], *(T*)(data1 + step*j) ); - } -} - -typedef void (*TransposeFunc)( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ); -typedef void (*TransposeInplaceFunc)( uchar* data, size_t step, int n ); - -#define DEF_TRANSPOSE_FUNC(suffix, type) \ -static void transpose_##suffix( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) \ -{ transpose_(src, sstep, dst, dstep, sz); } \ -\ -static void transposeI_##suffix( uchar* data, size_t step, int n ) \ -{ transposeI_(data, step, n); } - -DEF_TRANSPOSE_FUNC(8u, uchar) -DEF_TRANSPOSE_FUNC(16u, ushort) -DEF_TRANSPOSE_FUNC(8uC3, Vec3b) -DEF_TRANSPOSE_FUNC(32s, int) -DEF_TRANSPOSE_FUNC(16uC3, Vec3s) -DEF_TRANSPOSE_FUNC(32sC2, Vec2i) -DEF_TRANSPOSE_FUNC(32sC3, Vec3i) -DEF_TRANSPOSE_FUNC(32sC4, Vec4i) -DEF_TRANSPOSE_FUNC(32sC6, Vec6i) -DEF_TRANSPOSE_FUNC(32sC8, Vec8i) - -static TransposeFunc transposeTab[] = -{ - 0, transpose_8u, transpose_16u, transpose_8uC3, transpose_32s, 0, transpose_16uC3, 0, - transpose_32sC2, 0, 0, 0, transpose_32sC3, 0, 0, 0, transpose_32sC4, - 0, 0, 0, 0, 0, 0, 0, transpose_32sC6, 0, 0, 0, 0, 0, 0, 0, transpose_32sC8 -}; - -static TransposeInplaceFunc transposeInplaceTab[] = -{ - 0, transposeI_8u, transposeI_16u, transposeI_8uC3, transposeI_32s, 0, transposeI_16uC3, 0, - transposeI_32sC2, 0, 0, 0, transposeI_32sC3, 0, 0, 0, transposeI_32sC4, - 0, 0, 0, 0, 0, 0, 0, transposeI_32sC6, 0, 0, 0, 0, 0, 0, 0, transposeI_32sC8 -}; - -#ifdef HAVE_OPENCL - -static bool ocl_transpose( InputArray _src, OutputArray _dst ) -{ - const ocl::Device & dev = ocl::Device::getDefault(); - const int TILE_DIM = 32, BLOCK_ROWS = 8; - int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type), - rowsPerWI = dev.isIntel() ? 4 : 1; - - UMat src = _src.getUMat(); - _dst.create(src.cols, src.rows, type); - UMat dst = _dst.getUMat(); - - String kernelName("transpose"); - bool inplace = dst.u == src.u; - - if (inplace) - { - CV_Assert(dst.cols == dst.rows); - kernelName += "_inplace"; - } - else - { - // check required local memory size - size_t required_local_memory = (size_t) TILE_DIM*(TILE_DIM+1)*CV_ELEM_SIZE(type); - if (required_local_memory > ocl::Device::getDefault().localMemSize()) - return false; - } - - ocl::Kernel k(kernelName.c_str(), ocl::core::transpose_oclsrc, - format("-D T=%s -D T1=%s -D cn=%d -D TILE_DIM=%d -D BLOCK_ROWS=%d -D rowsPerWI=%d%s", - ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth), - cn, TILE_DIM, BLOCK_ROWS, rowsPerWI, inplace ? " -D INPLACE" : "")); - if (k.empty()) - return false; - - if (inplace) - k.args(ocl::KernelArg::ReadWriteNoSize(dst), dst.rows); - else - k.args(ocl::KernelArg::ReadOnly(src), - ocl::KernelArg::WriteOnlyNoSize(dst)); - - size_t localsize[2] = { TILE_DIM, BLOCK_ROWS }; - size_t globalsize[2] = { (size_t)src.cols, inplace ? ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI : (divUp((size_t)src.rows, TILE_DIM) * BLOCK_ROWS) }; - - if (inplace && dev.isIntel()) - { - localsize[0] = 16; - localsize[1] = dev.maxWorkGroupSize() / localsize[0]; - } - - return k.run(2, globalsize, localsize, false); -} - -#endif - -#ifdef HAVE_IPP -static bool ipp_transpose( Mat &src, Mat &dst ) -{ - CV_INSTRUMENT_REGION_IPP(); - - int type = src.type(); - typedef IppStatus (CV_STDCALL * IppiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize); - typedef IppStatus (CV_STDCALL * IppiTransposeI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize); - IppiTranspose ippiTranspose = 0; - IppiTransposeI ippiTranspose_I = 0; - - if (dst.data == src.data && dst.cols == dst.rows) - { - CV_SUPPRESS_DEPRECATED_START - ippiTranspose_I = - type == CV_8UC1 ? (IppiTransposeI)ippiTranspose_8u_C1IR : - type == CV_8UC3 ? (IppiTransposeI)ippiTranspose_8u_C3IR : - type == CV_8UC4 ? (IppiTransposeI)ippiTranspose_8u_C4IR : - type == CV_16UC1 ? (IppiTransposeI)ippiTranspose_16u_C1IR : - type == CV_16UC3 ? (IppiTransposeI)ippiTranspose_16u_C3IR : - type == CV_16UC4 ? (IppiTransposeI)ippiTranspose_16u_C4IR : - type == CV_16SC1 ? (IppiTransposeI)ippiTranspose_16s_C1IR : - type == CV_16SC3 ? (IppiTransposeI)ippiTranspose_16s_C3IR : - type == CV_16SC4 ? (IppiTransposeI)ippiTranspose_16s_C4IR : - type == CV_32SC1 ? (IppiTransposeI)ippiTranspose_32s_C1IR : - type == CV_32SC3 ? (IppiTransposeI)ippiTranspose_32s_C3IR : - type == CV_32SC4 ? (IppiTransposeI)ippiTranspose_32s_C4IR : - type == CV_32FC1 ? (IppiTransposeI)ippiTranspose_32f_C1IR : - type == CV_32FC3 ? (IppiTransposeI)ippiTranspose_32f_C3IR : - type == CV_32FC4 ? (IppiTransposeI)ippiTranspose_32f_C4IR : 0; - CV_SUPPRESS_DEPRECATED_END - } - else - { - ippiTranspose = - type == CV_8UC1 ? (IppiTranspose)ippiTranspose_8u_C1R : - type == CV_8UC3 ? (IppiTranspose)ippiTranspose_8u_C3R : - type == CV_8UC4 ? (IppiTranspose)ippiTranspose_8u_C4R : - type == CV_16UC1 ? (IppiTranspose)ippiTranspose_16u_C1R : - type == CV_16UC3 ? (IppiTranspose)ippiTranspose_16u_C3R : - type == CV_16UC4 ? (IppiTranspose)ippiTranspose_16u_C4R : - type == CV_16SC1 ? (IppiTranspose)ippiTranspose_16s_C1R : - type == CV_16SC3 ? (IppiTranspose)ippiTranspose_16s_C3R : - type == CV_16SC4 ? (IppiTranspose)ippiTranspose_16s_C4R : - type == CV_32SC1 ? (IppiTranspose)ippiTranspose_32s_C1R : - type == CV_32SC3 ? (IppiTranspose)ippiTranspose_32s_C3R : - type == CV_32SC4 ? (IppiTranspose)ippiTranspose_32s_C4R : - type == CV_32FC1 ? (IppiTranspose)ippiTranspose_32f_C1R : - type == CV_32FC3 ? (IppiTranspose)ippiTranspose_32f_C3R : - type == CV_32FC4 ? (IppiTranspose)ippiTranspose_32f_C4R : 0; - } - - IppiSize roiSize = { src.cols, src.rows }; - if (ippiTranspose != 0) - { - if (CV_INSTRUMENT_FUN_IPP(ippiTranspose, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, roiSize) >= 0) - return true; - } - else if (ippiTranspose_I != 0) - { - if (CV_INSTRUMENT_FUN_IPP(ippiTranspose_I, dst.ptr(), (int)dst.step, roiSize) >= 0) - return true; - } - return false; -} -#endif - -} - - -void cv::transpose( InputArray _src, OutputArray _dst ) -{ - CV_INSTRUMENT_REGION(); - - int type = _src.type(), esz = CV_ELEM_SIZE(type); - CV_Assert( _src.dims() <= 2 && esz <= 32 ); - - CV_OCL_RUN(_dst.isUMat(), - ocl_transpose(_src, _dst)) - - Mat src = _src.getMat(); - if( src.empty() ) - { - _dst.release(); - return; - } - - _dst.create(src.cols, src.rows, src.type()); - Mat dst = _dst.getMat(); - - // handle the case of single-column/single-row matrices, stored in STL vectors. - if( src.rows != dst.cols || src.cols != dst.rows ) - { - CV_Assert( src.size() == dst.size() && (src.cols == 1 || src.rows == 1) ); - src.copyTo(dst); - return; - } - - CV_IPP_RUN_FAST(ipp_transpose(src, dst)) - - if( dst.data == src.data ) - { - TransposeInplaceFunc func = transposeInplaceTab[esz]; - CV_Assert( func != 0 ); - CV_Assert( dst.cols == dst.rows ); - func( dst.ptr(), dst.step, dst.rows ); - } - else - { - TransposeFunc func = transposeTab[esz]; - CV_Assert( func != 0 ); - func( src.ptr(), src.step, dst.ptr(), dst.step, src.size() ); - } -} - ////////////////////////////////////// completeSymm ///////////////////////////////////////// diff --git a/modules/core/src/matrix_sparse.cpp b/modules/core/src/matrix_sparse.cpp index 05d16d706e..21e7e91151 100644 --- a/modules/core/src/matrix_sparse.cpp +++ b/modules/core/src/matrix_sparse.cpp @@ -2,10 +2,9 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html - +#include "precomp.hpp" #include "opencv2/core/mat.hpp" #include "opencv2/core/types_c.h" -#include "precomp.hpp" namespace cv { diff --git a/modules/core/src/matrix_transform.cpp b/modules/core/src/matrix_transform.cpp new file mode 100644 index 0000000000..727eaf7fee --- /dev/null +++ b/modules/core/src/matrix_transform.cpp @@ -0,0 +1,770 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include "precomp.hpp" +#include "opencl_kernels_core.hpp" + +namespace cv { + +////////////////////////////////////// transpose ///////////////////////////////////////// + +template static void +transpose_( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) +{ + int i=0, j, m = sz.width, n = sz.height; + + #if CV_ENABLE_UNROLLED + for(; i <= m - 4; i += 4 ) + { + T* d0 = (T*)(dst + dstep*i); + T* d1 = (T*)(dst + dstep*(i+1)); + T* d2 = (T*)(dst + dstep*(i+2)); + T* d3 = (T*)(dst + dstep*(i+3)); + + for( j = 0; j <= n - 4; j += 4 ) + { + const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j); + const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1)); + const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2)); + const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3)); + + d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0]; + d1[j] = s0[1]; d1[j+1] = s1[1]; d1[j+2] = s2[1]; d1[j+3] = s3[1]; + d2[j] = s0[2]; d2[j+1] = s1[2]; d2[j+2] = s2[2]; d2[j+3] = s3[2]; + d3[j] = s0[3]; d3[j+1] = s1[3]; d3[j+2] = s2[3]; d3[j+3] = s3[3]; + } + + for( ; j < n; j++ ) + { + const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep); + d0[j] = s0[0]; d1[j] = s0[1]; d2[j] = s0[2]; d3[j] = s0[3]; + } + } + #endif + for( ; i < m; i++ ) + { + T* d0 = (T*)(dst + dstep*i); + j = 0; + #if CV_ENABLE_UNROLLED + for(; j <= n - 4; j += 4 ) + { + const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j); + const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1)); + const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2)); + const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3)); + + d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0]; + } + #endif + for( ; j < n; j++ ) + { + const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep); + d0[j] = s0[0]; + } + } +} + +template static void +transposeI_( uchar* data, size_t step, int n ) +{ + for( int i = 0; i < n; i++ ) + { + T* row = (T*)(data + step*i); + uchar* data1 = data + i*sizeof(T); + for( int j = i+1; j < n; j++ ) + std::swap( row[j], *(T*)(data1 + step*j) ); + } +} + +typedef void (*TransposeFunc)( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ); +typedef void (*TransposeInplaceFunc)( uchar* data, size_t step, int n ); + +#define DEF_TRANSPOSE_FUNC(suffix, type) \ +static void transpose_##suffix( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) \ +{ transpose_(src, sstep, dst, dstep, sz); } \ +\ +static void transposeI_##suffix( uchar* data, size_t step, int n ) \ +{ transposeI_(data, step, n); } + +DEF_TRANSPOSE_FUNC(8u, uchar) +DEF_TRANSPOSE_FUNC(16u, ushort) +DEF_TRANSPOSE_FUNC(8uC3, Vec3b) +DEF_TRANSPOSE_FUNC(32s, int) +DEF_TRANSPOSE_FUNC(16uC3, Vec3s) +DEF_TRANSPOSE_FUNC(32sC2, Vec2i) +DEF_TRANSPOSE_FUNC(32sC3, Vec3i) +DEF_TRANSPOSE_FUNC(32sC4, Vec4i) +DEF_TRANSPOSE_FUNC(32sC6, Vec6i) +DEF_TRANSPOSE_FUNC(32sC8, Vec8i) + +static TransposeFunc transposeTab[] = +{ + 0, transpose_8u, transpose_16u, transpose_8uC3, transpose_32s, 0, transpose_16uC3, 0, + transpose_32sC2, 0, 0, 0, transpose_32sC3, 0, 0, 0, transpose_32sC4, + 0, 0, 0, 0, 0, 0, 0, transpose_32sC6, 0, 0, 0, 0, 0, 0, 0, transpose_32sC8 +}; + +static TransposeInplaceFunc transposeInplaceTab[] = +{ + 0, transposeI_8u, transposeI_16u, transposeI_8uC3, transposeI_32s, 0, transposeI_16uC3, 0, + transposeI_32sC2, 0, 0, 0, transposeI_32sC3, 0, 0, 0, transposeI_32sC4, + 0, 0, 0, 0, 0, 0, 0, transposeI_32sC6, 0, 0, 0, 0, 0, 0, 0, transposeI_32sC8 +}; + +#ifdef HAVE_OPENCL + +static bool ocl_transpose( InputArray _src, OutputArray _dst ) +{ + const ocl::Device & dev = ocl::Device::getDefault(); + const int TILE_DIM = 32, BLOCK_ROWS = 8; + int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type), + rowsPerWI = dev.isIntel() ? 4 : 1; + + UMat src = _src.getUMat(); + _dst.create(src.cols, src.rows, type); + UMat dst = _dst.getUMat(); + + String kernelName("transpose"); + bool inplace = dst.u == src.u; + + if (inplace) + { + CV_Assert(dst.cols == dst.rows); + kernelName += "_inplace"; + } + else + { + // check required local memory size + size_t required_local_memory = (size_t) TILE_DIM*(TILE_DIM+1)*CV_ELEM_SIZE(type); + if (required_local_memory > ocl::Device::getDefault().localMemSize()) + return false; + } + + ocl::Kernel k(kernelName.c_str(), ocl::core::transpose_oclsrc, + format("-D T=%s -D T1=%s -D cn=%d -D TILE_DIM=%d -D BLOCK_ROWS=%d -D rowsPerWI=%d%s", + ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth), + cn, TILE_DIM, BLOCK_ROWS, rowsPerWI, inplace ? " -D INPLACE" : "")); + if (k.empty()) + return false; + + if (inplace) + k.args(ocl::KernelArg::ReadWriteNoSize(dst), dst.rows); + else + k.args(ocl::KernelArg::ReadOnly(src), + ocl::KernelArg::WriteOnlyNoSize(dst)); + + size_t localsize[2] = { TILE_DIM, BLOCK_ROWS }; + size_t globalsize[2] = { (size_t)src.cols, inplace ? ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI : (divUp((size_t)src.rows, TILE_DIM) * BLOCK_ROWS) }; + + if (inplace && dev.isIntel()) + { + localsize[0] = 16; + localsize[1] = dev.maxWorkGroupSize() / localsize[0]; + } + + return k.run(2, globalsize, localsize, false); +} + +#endif + +#ifdef HAVE_IPP +static bool ipp_transpose( Mat &src, Mat &dst ) +{ + CV_INSTRUMENT_REGION_IPP(); + + int type = src.type(); + typedef IppStatus (CV_STDCALL * IppiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize); + typedef IppStatus (CV_STDCALL * IppiTransposeI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize); + IppiTranspose ippiTranspose = 0; + IppiTransposeI ippiTranspose_I = 0; + + if (dst.data == src.data && dst.cols == dst.rows) + { + CV_SUPPRESS_DEPRECATED_START + ippiTranspose_I = + type == CV_8UC1 ? (IppiTransposeI)ippiTranspose_8u_C1IR : + type == CV_8UC3 ? (IppiTransposeI)ippiTranspose_8u_C3IR : + type == CV_8UC4 ? (IppiTransposeI)ippiTranspose_8u_C4IR : + type == CV_16UC1 ? (IppiTransposeI)ippiTranspose_16u_C1IR : + type == CV_16UC3 ? (IppiTransposeI)ippiTranspose_16u_C3IR : + type == CV_16UC4 ? (IppiTransposeI)ippiTranspose_16u_C4IR : + type == CV_16SC1 ? (IppiTransposeI)ippiTranspose_16s_C1IR : + type == CV_16SC3 ? (IppiTransposeI)ippiTranspose_16s_C3IR : + type == CV_16SC4 ? (IppiTransposeI)ippiTranspose_16s_C4IR : + type == CV_32SC1 ? (IppiTransposeI)ippiTranspose_32s_C1IR : + type == CV_32SC3 ? (IppiTransposeI)ippiTranspose_32s_C3IR : + type == CV_32SC4 ? (IppiTransposeI)ippiTranspose_32s_C4IR : + type == CV_32FC1 ? (IppiTransposeI)ippiTranspose_32f_C1IR : + type == CV_32FC3 ? (IppiTransposeI)ippiTranspose_32f_C3IR : + type == CV_32FC4 ? (IppiTransposeI)ippiTranspose_32f_C4IR : 0; + CV_SUPPRESS_DEPRECATED_END + } + else + { + ippiTranspose = + type == CV_8UC1 ? (IppiTranspose)ippiTranspose_8u_C1R : + type == CV_8UC3 ? (IppiTranspose)ippiTranspose_8u_C3R : + type == CV_8UC4 ? (IppiTranspose)ippiTranspose_8u_C4R : + type == CV_16UC1 ? (IppiTranspose)ippiTranspose_16u_C1R : + type == CV_16UC3 ? (IppiTranspose)ippiTranspose_16u_C3R : + type == CV_16UC4 ? (IppiTranspose)ippiTranspose_16u_C4R : + type == CV_16SC1 ? (IppiTranspose)ippiTranspose_16s_C1R : + type == CV_16SC3 ? (IppiTranspose)ippiTranspose_16s_C3R : + type == CV_16SC4 ? (IppiTranspose)ippiTranspose_16s_C4R : + type == CV_32SC1 ? (IppiTranspose)ippiTranspose_32s_C1R : + type == CV_32SC3 ? (IppiTranspose)ippiTranspose_32s_C3R : + type == CV_32SC4 ? (IppiTranspose)ippiTranspose_32s_C4R : + type == CV_32FC1 ? (IppiTranspose)ippiTranspose_32f_C1R : + type == CV_32FC3 ? (IppiTranspose)ippiTranspose_32f_C3R : + type == CV_32FC4 ? (IppiTranspose)ippiTranspose_32f_C4R : 0; + } + + IppiSize roiSize = { src.cols, src.rows }; + if (ippiTranspose != 0) + { + if (CV_INSTRUMENT_FUN_IPP(ippiTranspose, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, roiSize) >= 0) + return true; + } + else if (ippiTranspose_I != 0) + { + if (CV_INSTRUMENT_FUN_IPP(ippiTranspose_I, dst.ptr(), (int)dst.step, roiSize) >= 0) + return true; + } + return false; +} +#endif + + +void transpose( InputArray _src, OutputArray _dst ) +{ + CV_INSTRUMENT_REGION(); + + int type = _src.type(), esz = CV_ELEM_SIZE(type); + CV_Assert( _src.dims() <= 2 && esz <= 32 ); + + CV_OCL_RUN(_dst.isUMat(), + ocl_transpose(_src, _dst)) + + Mat src = _src.getMat(); + if( src.empty() ) + { + _dst.release(); + return; + } + + _dst.create(src.cols, src.rows, src.type()); + Mat dst = _dst.getMat(); + + // handle the case of single-column/single-row matrices, stored in STL vectors. + if( src.rows != dst.cols || src.cols != dst.rows ) + { + CV_Assert( src.size() == dst.size() && (src.cols == 1 || src.rows == 1) ); + src.copyTo(dst); + return; + } + + CV_IPP_RUN_FAST(ipp_transpose(src, dst)) + + if( dst.data == src.data ) + { + TransposeInplaceFunc func = transposeInplaceTab[esz]; + CV_Assert( func != 0 ); + CV_Assert( dst.cols == dst.rows ); + func( dst.ptr(), dst.step, dst.rows ); + } + else + { + TransposeFunc func = transposeTab[esz]; + CV_Assert( func != 0 ); + func( src.ptr(), src.step, dst.ptr(), dst.step, src.size() ); + } +} + + +#if CV_SIMD128 +template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) +{ + typedef typename V::lane_type T; + int end = (int)(size.width*esz); + int width = (end + 1)/2; + int width_1 = width & -v_uint8x16::nlanes; + int i, j; + +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(src, dst)); +#endif + + for( ; size.height--; src += sstep, dst += dstep ) + { + for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) + { + V t0, t1; + + t0 = v_load((T*)((uchar*)src + i)); + t1 = v_load((T*)((uchar*)src + j - v_uint8x16::nlanes)); + t0 = v_reverse(t0); + t1 = v_reverse(t1); + v_store((T*)(dst + j - v_uint8x16::nlanes), t0); + v_store((T*)(dst + i), t1); + } + if (isAligned(src, dst)) + { + for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) + { + T t0, t1; + + t0 = *((T*)((uchar*)src + i)); + t1 = *((T*)((uchar*)src + j - sizeof(T))); + *((T*)(dst + j - sizeof(T))) = t0; + *((T*)(dst + i)) = t1; + } + } + else + { + for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) + { + for (int k = 0; k < (int)sizeof(T); k++) + { + uchar t0, t1; + + t0 = *((uchar*)src + i + k); + t1 = *((uchar*)src + j + k - sizeof(T)); + *(dst + j + k - sizeof(T)) = t0; + *(dst + i + k) = t1; + } + } + } + } +} + +template CV_ALWAYS_INLINE void flipHoriz_double( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) +{ + int end = (int)(size.width*esz); + int width = (end + 1)/2; + +#if CV_STRONG_ALIGNMENT + CV_Assert(isAligned(src, dst)); + CV_Assert(isAligned(src, dst)); +#endif + + for( ; size.height--; src += sstep, dst += dstep ) + { + for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) ) + { + T1 t0, t1; + T2 t2, t3; + + t0 = *((T1*)((uchar*)src + i)); + t2 = *((T2*)((uchar*)src + i + sizeof(T1))); + t1 = *((T1*)((uchar*)src + j - sizeof(T1) - sizeof(T2))); + t3 = *((T2*)((uchar*)src + j - sizeof(T2))); + *((T1*)(dst + j - sizeof(T1) - sizeof(T2))) = t0; + *((T2*)(dst + j - sizeof(T2))) = t2; + *((T1*)(dst + i)) = t1; + *((T2*)(dst + i + sizeof(T1))) = t3; + } + } +} +#endif + +static void +flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) +{ +#if CV_SIMD +#if CV_STRONG_ALIGNMENT + size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep; +#endif + if (esz == 2 * v_uint8x16::nlanes) + { + int end = (int)(size.width*esz); + int width = end/2; + + for( ; size.height--; src += sstep, dst += dstep ) + { + for( int i = 0, j = end - 2 * v_uint8x16::nlanes; i < width; i += 2 * v_uint8x16::nlanes, j -= 2 * v_uint8x16::nlanes ) + { +#if CV_SIMD256 + v_uint8x32 t0, t1; + + t0 = v256_load((uchar*)src + i); + t1 = v256_load((uchar*)src + j); + v_store(dst + j, t0); + v_store(dst + i, t1); +#else + v_uint8x16 t0, t1, t2, t3; + + t0 = v_load((uchar*)src + i); + t1 = v_load((uchar*)src + i + v_uint8x16::nlanes); + t2 = v_load((uchar*)src + j); + t3 = v_load((uchar*)src + j + v_uint8x16::nlanes); + v_store(dst + j, t0); + v_store(dst + j + v_uint8x16::nlanes, t1); + v_store(dst + i, t2); + v_store(dst + i + v_uint8x16::nlanes, t3); +#endif + } + } + } + else if (esz == v_uint8x16::nlanes) + { + int end = (int)(size.width*esz); + int width = end/2; + + for( ; size.height--; src += sstep, dst += dstep ) + { + for( int i = 0, j = end - v_uint8x16::nlanes; i < width; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) + { + v_uint8x16 t0, t1; + + t0 = v_load((uchar*)src + i); + t1 = v_load((uchar*)src + j); + v_store(dst + j, t0); + v_store(dst + i, t1); + } + } + } + else if (esz == 8 +#if CV_STRONG_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) + { + flipHoriz_single(src, sstep, dst, dstep, size, esz); + } + else if (esz == 4 +#if CV_STRONG_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) + { + flipHoriz_single(src, sstep, dst, dstep, size, esz); + } + else if (esz == 2 +#if CV_STRONG_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) + { + flipHoriz_single(src, sstep, dst, dstep, size, esz); + } + else if (esz == 1) + { + flipHoriz_single(src, sstep, dst, dstep, size, esz); + } + else if (esz == 24 +#if CV_STRONG_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) + { + int end = (int)(size.width*esz); + int width = (end + 1)/2; + + for( ; size.height--; src += sstep, dst += dstep ) + { + for ( int i = 0, j = end; i < width; i += v_uint8x16::nlanes + sizeof(uint64_t), j -= v_uint8x16::nlanes + sizeof(uint64_t) ) + { + v_uint8x16 t0, t1; + uint64_t t2, t3; + + t0 = v_load((uchar*)src + i); + t2 = *((uint64_t*)((uchar*)src + i + v_uint8x16::nlanes)); + t1 = v_load((uchar*)src + j - v_uint8x16::nlanes - sizeof(uint64_t)); + t3 = *((uint64_t*)((uchar*)src + j - sizeof(uint64_t))); + v_store(dst + j - v_uint8x16::nlanes - sizeof(uint64_t), t0); + *((uint64_t*)(dst + j - sizeof(uint64_t))) = t2; + v_store(dst + i, t1); + *((uint64_t*)(dst + i + v_uint8x16::nlanes)) = t3; + } + } + } +#if !CV_STRONG_ALIGNMENT + else if (esz == 12) + { + flipHoriz_double(src, sstep, dst, dstep, size, esz); + } + else if (esz == 6) + { + flipHoriz_double(src, sstep, dst, dstep, size, esz); + } + else if (esz == 3) + { + flipHoriz_double(src, sstep, dst, dstep, size, esz); + } +#endif + else +#endif // CV_SIMD + { + int i, j, limit = (int)(((size.width + 1)/2)*esz); + AutoBuffer _tab(size.width*esz); + int* tab = _tab.data(); + + for( i = 0; i < size.width; i++ ) + for( size_t k = 0; k < esz; k++ ) + tab[i*esz + k] = (int)((size.width - i - 1)*esz + k); + + for( ; size.height--; src += sstep, dst += dstep ) + { + for( i = 0; i < limit; i++ ) + { + j = tab[i]; + uchar t0 = src[i], t1 = src[j]; + dst[i] = t1; dst[j] = t0; + } + } + } +} + +static void +flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, size_t esz ) +{ + const uchar* src1 = src0 + (size.height - 1)*sstep; + uchar* dst1 = dst0 + (size.height - 1)*dstep; + size.width *= (int)esz; + + for( int y = 0; y < (size.height + 1)/2; y++, src0 += sstep, src1 -= sstep, + dst0 += dstep, dst1 -= dstep ) + { + int i = 0; +#if CV_SIMD +#if CV_STRONG_ALIGNMENT + if (isAligned(src0, src1, dst0, dst1)) +#endif + { + for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + { + v_int32 t0 = vx_load((int*)(src0 + i)); + v_int32 t1 = vx_load((int*)(src1 + i)); + v_store((int*)(dst0 + i), t1); + v_store((int*)(dst1 + i), t0); + } + } +#if CV_STRONG_ALIGNMENT + else + { + for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + { + v_uint8 t0 = vx_load(src0 + i); + v_uint8 t1 = vx_load(src1 + i); + v_store(dst0 + i, t1); + v_store(dst1 + i, t0); + } + } +#endif +#endif + + if (isAligned(src0, src1, dst0, dst1)) + { + for( ; i <= size.width - 16; i += 16 ) + { + int t0 = ((int*)(src0 + i))[0]; + int t1 = ((int*)(src1 + i))[0]; + + ((int*)(dst0 + i))[0] = t1; + ((int*)(dst1 + i))[0] = t0; + + t0 = ((int*)(src0 + i))[1]; + t1 = ((int*)(src1 + i))[1]; + + ((int*)(dst0 + i))[1] = t1; + ((int*)(dst1 + i))[1] = t0; + + t0 = ((int*)(src0 + i))[2]; + t1 = ((int*)(src1 + i))[2]; + + ((int*)(dst0 + i))[2] = t1; + ((int*)(dst1 + i))[2] = t0; + + t0 = ((int*)(src0 + i))[3]; + t1 = ((int*)(src1 + i))[3]; + + ((int*)(dst0 + i))[3] = t1; + ((int*)(dst1 + i))[3] = t0; + } + + for( ; i <= size.width - 4; i += 4 ) + { + int t0 = ((int*)(src0 + i))[0]; + int t1 = ((int*)(src1 + i))[0]; + + ((int*)(dst0 + i))[0] = t1; + ((int*)(dst1 + i))[0] = t0; + } + } + + for( ; i < size.width; i++ ) + { + uchar t0 = src0[i]; + uchar t1 = src1[i]; + + dst0[i] = t1; + dst1[i] = t0; + } + } +} + +#ifdef HAVE_OPENCL + +enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS }; + +static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode ) +{ + CV_Assert(flipCode >= -1 && flipCode <= 1); + + const ocl::Device & dev = ocl::Device::getDefault(); + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), + flipType, kercn = std::min(ocl::predictOptimalVectorWidth(_src, _dst), 4); + + bool doubleSupport = dev.doubleFPConfig() > 0; + if (!doubleSupport && depth == CV_64F) + kercn = cn; + + if (cn > 4) + return false; + + const char * kernelName; + if (flipCode == 0) + kernelName = "arithm_flip_rows", flipType = FLIP_ROWS; + else if (flipCode > 0) + kernelName = "arithm_flip_cols", flipType = FLIP_COLS; + else + kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH; + + int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1; + kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn; + + ocl::Kernel k(kernelName, ocl::core::flip_oclsrc, + format( "-D T=%s -D T1=%s -D DEPTH=%d -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d", + kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)), + kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), depth, cn, pxPerWIy, kercn)); + if (k.empty()) + return false; + + Size size = _src.size(); + _dst.create(size, type); + UMat src = _src.getUMat(), dst = _dst.getUMat(); + + int cols = size.width * cn / kercn, rows = size.height; + cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols; + rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows; + + k.args(ocl::KernelArg::ReadOnlyNoSize(src), + ocl::KernelArg::WriteOnly(dst, cn, kercn), rows, cols); + + size_t maxWorkGroupSize = dev.maxWorkGroupSize(); + CV_Assert(maxWorkGroupSize % 4 == 0); + + size_t globalsize[2] = { (size_t)cols, ((size_t)rows + pxPerWIy - 1) / pxPerWIy }, + localsize[2] = { maxWorkGroupSize / 4, 4 }; + return k.run(2, globalsize, (flipType == FLIP_COLS) && !dev.isIntel() ? localsize : NULL, false); +} + +#endif + +#if defined HAVE_IPP +static bool ipp_flip(Mat &src, Mat &dst, int flip_mode) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP(); + + // Details: https://github.com/opencv/opencv/issues/12943 + if (flip_mode <= 0 /* swap rows */ + && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42 + && (int64_t)(src.total()) * src.elemSize() >= CV_BIG_INT(0x80000000)/*2Gb*/ + ) + return false; + + IppiAxis ippMode; + if(flip_mode < 0) + ippMode = ippAxsBoth; + else if(flip_mode == 0) + ippMode = ippAxsHorizontal; + else + ippMode = ippAxsVertical; + + try + { + ::ipp::IwiImage iwSrc = ippiGetImage(src); + ::ipp::IwiImage iwDst = ippiGetImage(dst); + + CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode); + } + catch(const ::ipp::IwException &) + { + return false; + } + + return true; +#else + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(flip_mode); + return false; +#endif +} +#endif + + +void flip( InputArray _src, OutputArray _dst, int flip_mode ) +{ + CV_INSTRUMENT_REGION(); + + CV_Assert( _src.dims() <= 2 ); + Size size = _src.size(); + + if (flip_mode < 0) + { + if (size.width == 1) + flip_mode = 0; + if (size.height == 1) + flip_mode = 1; + } + + if ((size.width == 1 && flip_mode > 0) || + (size.height == 1 && flip_mode == 0)) + { + return _src.copyTo(_dst); + } + + CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode)) + + Mat src = _src.getMat(); + int type = src.type(); + _dst.create( size, type ); + Mat dst = _dst.getMat(); + + CV_IPP_RUN_FAST(ipp_flip(src, dst, flip_mode)); + + size_t esz = CV_ELEM_SIZE(type); + + if( flip_mode <= 0 ) + flipVert( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz ); + else + flipHoriz( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz ); + + if( flip_mode < 0 ) + flipHoriz( dst.ptr(), dst.step, dst.ptr(), dst.step, dst.size(), esz ); +} + +void rotate(InputArray _src, OutputArray _dst, int rotateMode) +{ + CV_Assert(_src.dims() <= 2); + + switch (rotateMode) + { + case ROTATE_90_CLOCKWISE: + transpose(_src, _dst); + flip(_dst, _dst, 1); + break; + case ROTATE_180: + flip(_src, _dst, -1); + break; + case ROTATE_90_COUNTERCLOCKWISE: + transpose(_src, _dst); + flip(_dst, _dst, 0); + break; + default: + break; + } +} + +} // namespace diff --git a/modules/core/src/matrix_wrap.cpp b/modules/core/src/matrix_wrap.cpp index 68a674f6f1..bb61ce2de1 100644 --- a/modules/core/src/matrix_wrap.cpp +++ b/modules/core/src/matrix_wrap.cpp @@ -2,9 +2,8 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html - -#include "opencv2/core/mat.hpp" #include "precomp.hpp" +#include "opencv2/core/mat.hpp" namespace cv { @@ -33,7 +32,7 @@ Mat _InputArray::getMat_(int i) const return m->getMat(accessFlags).row(i); } - if( k == MATX || k == STD_ARRAY ) + if (k == MATX) { CV_Assert( i < 0 ); return Mat(sz, flags, obj); @@ -173,7 +172,7 @@ void _InputArray::getMatVector(std::vector& mv) const return; } - if( k == MATX || k == STD_ARRAY ) + if (k == MATX) { size_t n = sz.height, esz = CV_ELEM_SIZE(flags); mv.resize(n); @@ -317,6 +316,7 @@ void _InputArray::getUMatVector(std::vector& umv) const cuda::GpuMat _InputArray::getGpuMat() const { +#ifdef HAVE_CUDA _InputArray::KindFlag k = kind(); if (k == CUDA_GPU_MAT) @@ -340,14 +340,22 @@ cuda::GpuMat _InputArray::getGpuMat() const return cuda::GpuMat(); CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for cuda::GpuMat and cuda::HostMem"); +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } void _InputArray::getGpuMatVector(std::vector& gpumv) const { +#ifdef HAVE_CUDA _InputArray::KindFlag k = kind(); if (k == STD_VECTOR_CUDA_GPU_MAT) { gpumv = *(std::vector*)obj; } +#else + CV_UNUSED(gpumv); + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } ogl::Buffer _InputArray::getOGlBuffer() const { @@ -362,7 +370,10 @@ ogl::Buffer _InputArray::getOGlBuffer() const _InputArray::KindFlag _InputArray::kind() const { KindFlag k = flags & KIND_MASK; +#if CV_VERSION_MAJOR < 5 CV_DbgAssert(k != EXPR); + CV_DbgAssert(k != STD_ARRAY); +#endif return k; } @@ -392,7 +403,7 @@ Size _InputArray::size(int i) const return ((const UMat*)obj)->size(); } - if( k == MATX || k == STD_ARRAY ) + if (k == MATX) { CV_Assert( i < 0 ); return sz; @@ -451,11 +462,15 @@ Size _InputArray::size(int i) const if (k == STD_VECTOR_CUDA_GPU_MAT) { +#ifdef HAVE_CUDA const std::vector& vv = *(const std::vector*)obj; if (i < 0) return vv.empty() ? Size() : Size((int)vv.size(), 1); CV_Assert(i < (int)vv.size()); return vv[i].size(); +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == STD_VECTOR_UMAT ) @@ -612,7 +627,7 @@ int _InputArray::dims(int i) const return ((const UMat*)obj)->dims; } - if( k == MATX || k == STD_ARRAY ) + if (k == MATX) { CV_Assert( i < 0 ); return 2; @@ -746,7 +761,7 @@ int _InputArray::type(int i) const if( k == UMAT ) return ((const UMat*)obj)->type(); - if( k == MATX || k == STD_VECTOR || k == STD_ARRAY || k == STD_VECTOR_VECTOR || k == STD_BOOL_VECTOR ) + if( k == MATX || k == STD_VECTOR || k == STD_VECTOR_VECTOR || k == STD_BOOL_VECTOR ) return CV_MAT_TYPE(flags); if( k == NONE ) @@ -790,6 +805,7 @@ int _InputArray::type(int i) const if (k == STD_VECTOR_CUDA_GPU_MAT) { +#ifdef HAVE_CUDA const std::vector& vv = *(const std::vector*)obj; if (vv.empty()) { @@ -798,6 +814,9 @@ int _InputArray::type(int i) const } CV_Assert(i < (int)vv.size()); return vv[i >= 0 ? i : 0].type(); +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == OPENGL_BUFFER ) @@ -832,7 +851,7 @@ bool _InputArray::empty() const if( k == UMAT ) return ((const UMat*)obj)->empty(); - if( k == MATX || k == STD_ARRAY ) + if (k == MATX) return false; if( k == STD_VECTOR ) @@ -901,7 +920,7 @@ bool _InputArray::isContinuous(int i) const if( k == UMAT ) return i < 0 ? ((const UMat*)obj)->isContinuous() : true; - if( k == MATX || k == STD_VECTOR || k == STD_ARRAY || + if( k == MATX || k == STD_VECTOR || k == NONE || k == STD_VECTOR_VECTOR || k == STD_BOOL_VECTOR ) return true; @@ -942,7 +961,7 @@ bool _InputArray::isSubmatrix(int i) const if( k == UMAT ) return i < 0 ? ((const UMat*)obj)->isSubmatrix() : false; - if( k == MATX || k == STD_VECTOR || k == STD_ARRAY || + if( k == MATX || k == STD_VECTOR || k == NONE || k == STD_VECTOR_VECTOR || k == STD_BOOL_VECTOR ) return false; @@ -987,7 +1006,7 @@ size_t _InputArray::offset(int i) const return ((const UMat*)obj)->offset; } - if( k == MATX || k == STD_VECTOR || k == STD_ARRAY || + if( k == MATX || k == STD_VECTOR || k == NONE || k == STD_VECTOR_VECTOR || k == STD_BOOL_VECTOR ) return 0; @@ -1046,7 +1065,7 @@ size_t _InputArray::step(int i) const return ((const UMat*)obj)->step; } - if( k == MATX || k == STD_VECTOR || k == STD_ARRAY || + if( k == MATX || k == STD_VECTOR || k == NONE || k == STD_VECTOR_VECTOR || k == STD_BOOL_VECTOR ) return 0; @@ -1092,7 +1111,7 @@ void _InputArray::copyTo(const _OutputArray& arr) const if( k == NONE ) arr.release(); - else if( k == MAT || k == MATX || k == STD_VECTOR || k == STD_ARRAY || k == STD_BOOL_VECTOR ) + else if( k == MAT || k == MATX || k == STD_VECTOR || k == STD_BOOL_VECTOR ) { Mat m = getMat(); m.copyTo(arr); @@ -1113,7 +1132,7 @@ void _InputArray::copyTo(const _OutputArray& arr, const _InputArray & mask) cons if( k == NONE ) arr.release(); - else if( k == MAT || k == MATX || k == STD_VECTOR || k == STD_ARRAY || k == STD_BOOL_VECTOR ) + else if( k == MAT || k == MATX || k == STD_VECTOR || k == STD_BOOL_VECTOR ) { Mat m = getMat(); m.copyTo(arr, mask); @@ -1159,22 +1178,34 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, _Out { CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == _sz); CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype); +#ifdef HAVE_CUDA ((cuda::GpuMat*)obj)->create(_sz, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == _sz); CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype); +#ifdef HAVE_OPENGL ((ogl::Buffer*)obj)->create(_sz, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)"); +#endif } if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == _sz); CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype); +#ifdef HAVE_CUDA ((cuda::HostMem*)obj)->create(_sz, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } int sizes[] = {_sz.height, _sz.width}; create(2, sizes, mtype, i, allowTransposed, fixedDepthMask); @@ -1201,22 +1232,34 @@ void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTran { CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(_cols, _rows)); CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype); +#ifdef HAVE_CUDA ((cuda::GpuMat*)obj)->create(_rows, _cols, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == Size(_cols, _rows)); CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype); +#ifdef HAVE_OPENGL ((ogl::Buffer*)obj)->create(_rows, _cols, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)"); +#endif } if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 ) { CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == Size(_cols, _rows)); CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype); +#ifdef HAVE_CUDA ((cuda::HostMem*)obj)->create(_rows, _cols, mtype); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } int sizes[] = {_rows, _cols}; create(2, sizes, mtype, i, allowTransposed, fixedDepthMask); @@ -1301,16 +1344,27 @@ void _OutputArray::create(int d, const int* sizes, int mtype, int i, CV_Assert( i < 0 ); int type0 = CV_MAT_TYPE(flags); CV_Assert( mtype == type0 || (CV_MAT_CN(mtype) == 1 && ((1 << type0) & fixedDepthMask) != 0) ); - CV_Assert( d == 2 && ((sizes[0] == sz.height && sizes[1] == sz.width) || - (allowTransposed && sizes[0] == sz.width && sizes[1] == sz.height))); - return; - } - - if( k == STD_ARRAY ) - { - int type0 = CV_MAT_TYPE(flags); - CV_Assert( mtype == type0 || (CV_MAT_CN(mtype) == 1 && ((1 << type0) & fixedDepthMask) != 0) ); - CV_Assert( d == 2 && sz.area() == sizes[0]*sizes[1]); + CV_CheckLE(d, 2, ""); + Size requested_size(d == 2 ? sizes[1] : 1, d >= 1 ? sizes[0] : 1); + if (sz.width == 1 || sz.height == 1) + { + // NB: 1D arrays assume allowTransposed=true (see #4159) + int total_1d = std::max(sz.width, sz.height); + CV_Check(requested_size, std::max(requested_size.width, requested_size.height) == total_1d, ""); + } + else + { + if (!allowTransposed) + { + CV_CheckEQ(requested_size, sz, ""); + } + else + { + CV_Check(requested_size, + (requested_size == sz || (requested_size.height == sz.width && requested_size.width == sz.height)), + ""); + } + } return; } @@ -1628,20 +1682,32 @@ void _OutputArray::release() const if( k == CUDA_GPU_MAT ) { +#ifdef HAVE_CUDA ((cuda::GpuMat*)obj)->release(); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == CUDA_HOST_MEM ) { +#ifdef HAVE_CUDA ((cuda::HostMem*)obj)->release(); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } if( k == OPENGL_BUFFER ) { +#ifdef HAVE_OPENGL ((ogl::Buffer*)obj)->release(); return; +#else + CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)"); +#endif } if( k == NONE ) @@ -1672,8 +1738,12 @@ void _OutputArray::release() const } if (k == STD_VECTOR_CUDA_GPU_MAT) { +#ifdef HAVE_CUDA ((std::vector*)obj)->clear(); return; +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type"); } @@ -1772,7 +1842,7 @@ void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const if( k == NONE ) ; - else if( k == MAT || k == MATX || k == STD_VECTOR || k == STD_ARRAY ) + else if (k == MAT || k == MATX || k == STD_VECTOR) { Mat m = getMat(); m.setTo(arr, mask); @@ -1781,9 +1851,13 @@ void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const ((UMat*)obj)->setTo(arr, mask); else if( k == CUDA_GPU_MAT ) { +#ifdef HAVE_CUDA Mat value = arr.getMat(); CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::CUDA_GPU_MAT) ); ((cuda::GpuMat*)obj)->setTo(Scalar(Vec(value.ptr())), mask); +#else + CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)"); +#endif } else CV_Error(Error::StsNotImplemented, ""); diff --git a/modules/core/src/norm.cpp b/modules/core/src/norm.cpp index 088c163c87..bbefefc95d 100644 --- a/modules/core/src/norm.cpp +++ b/modules/core/src/norm.cpp @@ -152,10 +152,10 @@ float normL2Sqr_(const float* a, const float* b, int n) { v_float32 t0 = vx_load(a + j) - vx_load(b + j); v_float32 t1 = vx_load(a + j + v_float32::nlanes) - vx_load(b + j + v_float32::nlanes); - v_float32 t2 = vx_load(a + j + 2 * v_float32::nlanes) - vx_load(b + j + 2 * v_float32::nlanes); - v_float32 t3 = vx_load(a + j + 3 * v_float32::nlanes) - vx_load(b + j + 3 * v_float32::nlanes); v_d0 = v_muladd(t0, t0, v_d0); + v_float32 t2 = vx_load(a + j + 2 * v_float32::nlanes) - vx_load(b + j + 2 * v_float32::nlanes); v_d1 = v_muladd(t1, t1, v_d1); + v_float32 t3 = vx_load(a + j + 3 * v_float32::nlanes) - vx_load(b + j + 3 * v_float32::nlanes); v_d2 = v_muladd(t2, t2, v_d2); v_d3 = v_muladd(t3, t3, v_d3); } @@ -205,13 +205,10 @@ int normL1_(const uchar* a, const uchar* b, int n) return d; } -}} //cv::hal +} //cv::hal //================================================================================================== -namespace cv -{ - template int normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn) { @@ -594,12 +591,10 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result) CV_UNUSED(src); CV_UNUSED(normType); CV_UNUSED(mask); CV_UNUSED(result); #endif return false; -} -#endif +} // ipp_norm() +#endif // HAVE_IPP -} // cv:: - -double cv::norm( InputArray _src, int normType, InputArray _mask ) +double norm( InputArray _src, int normType, InputArray _mask ) { CV_INSTRUMENT_REGION(); @@ -792,9 +787,6 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) //================================================================================================== #ifdef HAVE_OPENCL - -namespace cv { - static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result ) { #ifdef __ANDROID__ @@ -849,15 +841,10 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr result /= (s2 + DBL_EPSILON); return true; -} - -} - -#endif +} // ocl_norm() +#endif // HAVE_OPENCL #ifdef HAVE_IPP -namespace cv -{ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask, double &result) { CV_INSTRUMENT_REGION_IPP(); @@ -1083,12 +1070,11 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra CV_UNUSED(_src1); CV_UNUSED(_src2); CV_UNUSED(normType); CV_UNUSED(_mask); CV_UNUSED(result); #endif return false; -} -} -#endif +} // ipp_norm +#endif // HAVE_IPP -double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask ) +double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask ) { CV_INSTRUMENT_REGION(); @@ -1280,12 +1266,12 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m return result.d; } -cv::Hamming::ResultType cv::Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const +cv::Hamming::ResultType Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const { return cv::hal::normHamming(a, b, size); } -double cv::PSNR(InputArray _src1, InputArray _src2, double R) +double PSNR(InputArray _src1, InputArray _src2, double R) { CV_INSTRUMENT_REGION(); @@ -1295,3 +1281,141 @@ double cv::PSNR(InputArray _src1, InputArray _src2, double R) double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels())); return 20*log10(R/(diff+DBL_EPSILON)); } + + +#ifdef HAVE_OPENCL +static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype, + double scale, double delta ) +{ + UMat src = _src.getUMat(); + + if( _mask.empty() ) + src.convertTo( _dst, dtype, scale, delta ); + else if (src.channels() <= 4) + { + const ocl::Device & dev = ocl::Device::getDefault(); + + int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), + ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)), + rowsPerWI = dev.isIntel() ? 4 : 1; + + float fscale = static_cast(scale), fdelta = static_cast(delta); + bool haveScale = std::fabs(scale - 1) > DBL_EPSILON, + haveZeroScale = !(std::fabs(scale) > DBL_EPSILON), + haveDelta = std::fabs(delta) > DBL_EPSILON, + doubleSupport = dev.doubleFPConfig() > 0; + + if (!haveScale && !haveDelta && stype == dtype) + { + _src.copyTo(_dst, _mask); + return true; + } + if (haveZeroScale) + { + _dst.setTo(Scalar(delta), _mask); + return true; + } + + if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport) + return false; + + char cvt[2][40]; + String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d" + " -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s", + ocl::typeToStr(stype), ocl::typeToStr(dtype), + ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn, + rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), + ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), + doubleSupport ? " -D DOUBLE_SUPPORT" : "", + haveScale ? " -D HAVE_SCALE" : "", + haveDelta ? " -D HAVE_DELTA" : "", + ocl::typeToStr(sdepth), ocl::typeToStr(ddepth)); + + ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts); + if (k.empty()) + return false; + + UMat mask = _mask.getUMat(), dst = _dst.getUMat(); + + ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), + maskarg = ocl::KernelArg::ReadOnlyNoSize(mask), + dstarg = ocl::KernelArg::ReadWrite(dst); + + if (haveScale) + { + if (haveDelta) + k.args(srcarg, maskarg, dstarg, fscale, fdelta); + else + k.args(srcarg, maskarg, dstarg, fscale); + } + else + { + if (haveDelta) + k.args(srcarg, maskarg, dstarg, fdelta); + else + k.args(srcarg, maskarg, dstarg); + } + + size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI }; + return k.run(2, globalsize, NULL, false); + } + else + { + UMat temp; + src.convertTo( temp, dtype, scale, delta ); + temp.copyTo( _dst, _mask ); + } + + return true; +} // ocl_normalize +#endif // HAVE_OPENCL + +void normalize(InputArray _src, InputOutputArray _dst, double a, double b, + int norm_type, int rtype, InputArray _mask) +{ + CV_INSTRUMENT_REGION(); + + double scale = 1, shift = 0; + int type = _src.type(), depth = CV_MAT_DEPTH(type); + + if( rtype < 0 ) + rtype = _dst.fixedType() ? _dst.depth() : depth; + + if( norm_type == CV_MINMAX ) + { + double smin = 0, smax = 0; + double dmin = MIN( a, b ), dmax = MAX( a, b ); + minMaxIdx( _src, &smin, &smax, 0, 0, _mask ); + scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0); + if( rtype == CV_32F ) + { + scale = (float)scale; + shift = (float)dmin - (float)(smin*scale); + } + else + shift = dmin - smin*scale; + } + else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C ) + { + scale = norm( _src, norm_type, _mask ); + scale = scale > DBL_EPSILON ? a/scale : 0.; + shift = 0; + } + else + CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" ); + + CV_OCL_RUN(_dst.isUMat(), + ocl_normalize(_src, _dst, _mask, rtype, scale, shift)) + + Mat src = _src.getMat(); + if( _mask.empty() ) + src.convertTo( _dst, rtype, scale, shift ); + else + { + Mat temp; + src.convertTo( temp, rtype, scale, shift ); + temp.copyTo( _dst, _mask ); + } +} + +} // namespace diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 44ee8f9c59..ac52eeaf99 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -1149,14 +1149,14 @@ void OpenCLExecutionContext::release() } + // true if we have initialized OpenCL subsystem with available platforms -static bool g_isOpenCVActivated = false; +static bool g_isOpenCLInitialized = false; +static bool g_isOpenCLAvailable = false; bool haveOpenCL() { CV_TRACE_FUNCTION(); - static bool g_isOpenCLInitialized = false; - static bool g_isOpenCLAvailable = false; if (!g_isOpenCLInitialized) { @@ -1178,7 +1178,7 @@ bool haveOpenCL() { cl_uint n = 0; g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS; - g_isOpenCVActivated = n > 0; + g_isOpenCLAvailable &= n > 0; CV_LOG_INFO(NULL, "OpenCL: found " << n << " platforms"); } catch (...) @@ -1214,7 +1214,7 @@ bool useOpenCL() bool isOpenCLActivated() { - if (!g_isOpenCVActivated) + if (!g_isOpenCLAvailable) return false; // prevent unnecessary OpenCL activation via useOpenCL()->haveOpenCL() calls return useOpenCL(); } @@ -1451,7 +1451,7 @@ struct Platform::Impl bool initialized; }; -Platform::Platform() +Platform::Platform() CV_NOEXCEPT { p = 0; } @@ -1480,6 +1480,23 @@ Platform& Platform::operator = (const Platform& pl) return *this; } +Platform::Platform(Platform&& pl) CV_NOEXCEPT +{ + p = pl.p; + pl.p = nullptr; +} + +Platform& Platform::operator = (Platform&& pl) CV_NOEXCEPT +{ + if (this != &pl) { + if(p) + p->release(); + p = pl.p; + pl.p = nullptr; + } + return *this; +} + void* Platform::ptr() const { return p ? p->handle : 0; @@ -1499,25 +1516,27 @@ Platform& Platform::getDefault() /////////////////////////////////////// Device //////////////////////////////////////////// -// deviceVersion has format +// Version has format: // OpenCL // by specification // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html -static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor) +// https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetPlatformInfo.html +// https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clGetPlatformInfo.html +static void parseOpenCLVersion(const String &version, int &major, int &minor) { major = minor = 0; - if (10 >= deviceVersion.length()) + if (10 >= version.length()) return; - const char *pstr = deviceVersion.c_str(); + const char *pstr = version.c_str(); if (0 != strncmp(pstr, "OpenCL ", 7)) return; - size_t ppos = deviceVersion.find('.', 7); + size_t ppos = version.find('.', 7); if (String::npos == ppos) return; - String temp = deviceVersion.substr(7, ppos - 7); + String temp = version.substr(7, ppos - 7); major = atoi(temp.c_str()); - temp = deviceVersion.substr(ppos + 1); + temp = version.substr(ppos + 1); minor = atoi(temp.c_str()); } @@ -1555,7 +1574,7 @@ struct Device::Impl addressBits_ = getProp(CL_DEVICE_ADDRESS_BITS); String deviceVersion_ = getStrProp(CL_DEVICE_VERSION); - parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_); + parseOpenCLVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_); size_t pos = 0; while (pos < extensions_.size()) @@ -1675,7 +1694,7 @@ struct Device::Impl }; -Device::Device() +Device::Device() CV_NOEXCEPT { p = 0; } @@ -1704,6 +1723,23 @@ Device& Device::operator = (const Device& d) return *this; } +Device::Device(Device&& d) CV_NOEXCEPT +{ + p = d.p; + d.p = nullptr; +} + +Device& Device::operator = (Device&& d) CV_NOEXCEPT +{ + if (this != &d) { + if(p) + p->release(); + p = d.p; + d.p = nullptr; + } + return *this; +} + Device::~Device() { if(p) @@ -2832,7 +2868,7 @@ public: }; -Context::Context() +Context::Context() CV_NOEXCEPT { p = 0; } @@ -2917,6 +2953,23 @@ Context& Context::operator = (const Context& c) return *this; } +Context::Context(Context&& c) CV_NOEXCEPT +{ + p = c.p; + c.p = nullptr; +} + +Context& Context::operator = (Context&& c) CV_NOEXCEPT +{ + if (this != &c) { + if(p) + p->release(); + p = c.p; + c.p = nullptr; + } + return *this; +} + void* Context::ptr() const { return p == NULL ? NULL : p->handle; @@ -3229,7 +3282,7 @@ struct Queue::Impl cv::ocl::Queue profiling_queue_; }; -Queue::Queue() +Queue::Queue() CV_NOEXCEPT { p = 0; } @@ -3258,6 +3311,23 @@ Queue& Queue::operator = (const Queue& q) return *this; } +Queue::Queue(Queue&& q) CV_NOEXCEPT +{ + p = q.p; + q.p = nullptr; +} + +Queue& Queue::operator = (Queue&& q) CV_NOEXCEPT +{ + if (this != &q) { + if(p) + p->release(); + p = q.p; + q.p = nullptr; + } + return *this; +} + Queue::~Queue() { if(p) @@ -3313,7 +3383,7 @@ static cl_command_queue getQueue(const Queue& q) /////////////////////////////////////////// KernelArg ///////////////////////////////////////////// -KernelArg::KernelArg() +KernelArg::KernelArg() CV_NOEXCEPT : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1) { } @@ -3380,16 +3450,24 @@ struct Kernel::Impl haveTempSrcUMats = true; // UMat is created on RAW memory (without proper lifetime management, even from Mat) } - void addImage(const Image2D& image) + /// Preserve image lifetime (while it is specified as Kernel argument) + void registerImageArgument(int arg, const Image2D& image) { - images.push_back(image); + CV_CheckGE(arg, 0, ""); + CV_CheckLT(arg, (int)MAX_ARRS, ""); + if (arg < (int)shadow_images.size() && shadow_images[arg].ptr() != image.ptr()) // TODO future: replace ptr => impl (more strong check) + { + CV_Check(arg, !isInProgress, "ocl::Kernel: clearing of pending Image2D arguments is not allowed"); + } + shadow_images.reserve(MAX_ARRS); + shadow_images.resize(std::max(shadow_images.size(), (size_t)arg + 1)); + shadow_images[arg] = image; } void finit(cl_event e) { CV_UNUSED(e); cleanupUMats(); - images.clear(); isInProgress = false; release(); } @@ -3414,7 +3492,7 @@ struct Kernel::Impl bool isInProgress; bool isAsyncRun; // true if kernel was scheduled in async mode int nu; - std::list images; + std::vector shadow_images; bool haveTempDstUMats; bool haveTempSrcUMats; }; @@ -3447,7 +3525,7 @@ static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p) namespace cv { namespace ocl { -Kernel::Kernel() +Kernel::Kernel() CV_NOEXCEPT { p = 0; } @@ -3483,6 +3561,23 @@ Kernel& Kernel::operator = (const Kernel& k) return *this; } +Kernel::Kernel(Kernel&& k) CV_NOEXCEPT +{ + p = k.p; + k.p = nullptr; +} + +Kernel& Kernel::operator = (Kernel&& k) CV_NOEXCEPT +{ + if (this != &k) { + if(p) + p->release(); + p = k.p; + k.p = nullptr; + } + return *this; +} + Kernel::~Kernel() { if(p) @@ -3529,6 +3624,15 @@ bool Kernel::empty() const return ptr() == 0; } +static cv::String dumpValue(size_t sz, const void* p) +{ + if (sz == 4) + return cv::format("%d / %uu / 0x%08x / %g", *(int*)p, *(int*)p, *(int*)p, *(float*)p); + if (sz == 8) + return cv::format("%lld / %lluu / 0x%16llx / %g", *(long long*)p, *(long long*)p, *(long long*)p, *(double*)p); + return cv::format("%p", p); +} + int Kernel::set(int i, const void* value, size_t sz) { if (!p || !p->handle) @@ -3539,7 +3643,7 @@ int Kernel::set(int i, const void* value, size_t sz) p->cleanupUMats(); cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value); - CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%p)", p->name.c_str(), (int)i, (int)sz, (void*)value).c_str()); + CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clSetKernelArg('%s', arg_index=%d, size=%d, value=%s)", p->name.c_str(), (int)i, (int)sz, dumpValue(sz, value).c_str()).c_str()); if (retval != CL_SUCCESS) return -1; return i+1; @@ -3547,9 +3651,11 @@ int Kernel::set(int i, const void* value, size_t sz) int Kernel::set(int i, const Image2D& image2D) { - p->addImage(image2D); cl_mem h = (cl_mem)image2D.ptr(); - return set(i, &h, sizeof(h)); + int res = set(i, &h, sizeof(h)); + if (res >= 0) + p->registerImageArgument(i, image2D); + return res; } int Kernel::set(int i, const UMat& m) @@ -4026,7 +4132,7 @@ struct ProgramSource::Impl }; -ProgramSource::ProgramSource() +ProgramSource::ProgramSource() CV_NOEXCEPT { p = 0; } @@ -4070,6 +4176,23 @@ ProgramSource& ProgramSource::operator = (const ProgramSource& prog) return *this; } +ProgramSource::ProgramSource(ProgramSource&& prog) CV_NOEXCEPT +{ + p = prog.p; + prog.p = nullptr; +} + +ProgramSource& ProgramSource::operator = (ProgramSource&& prog) CV_NOEXCEPT +{ + if (this != &prog) { + if(p) + p->release(); + p = prog.p; + prog.p = nullptr; + } + return *this; +} + const String& ProgramSource::source() const { CV_Assert(p); @@ -4535,7 +4658,10 @@ struct Program::Impl }; -Program::Program() { p = 0; } +Program::Program() CV_NOEXCEPT +{ + p = 0; +} Program::Program(const ProgramSource& src, const String& buildflags, String& errmsg) @@ -4562,6 +4688,23 @@ Program& Program::operator = (const Program& prog) return *this; } +Program::Program(Program&& prog) CV_NOEXCEPT +{ + p = prog.p; + prog.p = nullptr; +} + +Program& Program::operator = (Program&& prog) CV_NOEXCEPT +{ + if (this != &prog) { + if(p) + p->release(); + p = prog.p; + prog.p = nullptr; + } + return *this; +} + Program::~Program() { if(p) @@ -6370,7 +6513,6 @@ public: static OpenCLAllocator* getOpenCLAllocator_() // call once guarantee { static OpenCLAllocator* g_allocator = new OpenCLAllocator(); // avoid destructor call (using of this object is too wide) - g_isOpenCVActivated = true; return g_allocator; } MatAllocator* getOpenCLAllocator() @@ -6566,6 +6708,9 @@ struct PlatformInfo::Impl refcount = 1; handle = *(cl_platform_id*)id; getDevices(devices, handle); + + version_ = getStrProp(CL_PLATFORM_VERSION); + parseOpenCLVersion(version_, versionMajor_, versionMinor_); } String getStrProp(cl_platform_info prop) const @@ -6579,9 +6724,13 @@ struct PlatformInfo::Impl IMPLEMENT_REFCOUNTABLE(); std::vector devices; cl_platform_id handle; + + String version_; + int versionMajor_; + int versionMinor_; }; -PlatformInfo::PlatformInfo() +PlatformInfo::PlatformInfo() CV_NOEXCEPT { p = 0; } @@ -6617,6 +6766,23 @@ PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i) return *this; } +PlatformInfo::PlatformInfo(PlatformInfo&& i) CV_NOEXCEPT +{ + p = i.p; + i.p = nullptr; +} + +PlatformInfo& PlatformInfo::operator = (PlatformInfo&& i) CV_NOEXCEPT +{ + if (this != &i) { + if(p) + p->release(); + p = i.p; + i.p = nullptr; + } + return *this; +} + int PlatformInfo::deviceNumber() const { return p ? (int)p->devices.size() : 0; @@ -6641,7 +6807,19 @@ String PlatformInfo::vendor() const String PlatformInfo::version() const { - return p ? p->getStrProp(CL_PLATFORM_VERSION) : String(); + return p ? p->version_ : String(); +} + +int PlatformInfo::versionMajor() const +{ + CV_Assert(p); + return p->versionMajor_; +} + +int PlatformInfo::versionMinor() const +{ + CV_Assert(p); + return p->versionMinor_; } static void getPlatforms(std::vector& platforms) @@ -7145,7 +7323,7 @@ struct Image2D::Impl cl_mem handle; }; -Image2D::Image2D() +Image2D::Image2D() CV_NOEXCEPT { p = NULL; } @@ -7203,6 +7381,23 @@ Image2D & Image2D::operator = (const Image2D & i) return *this; } +Image2D::Image2D(Image2D&& i) CV_NOEXCEPT +{ + p = i.p; + i.p = nullptr; +} + +Image2D& Image2D::operator = (Image2D&& i) CV_NOEXCEPT +{ + if (this != &i) { + if (p) + p->release(); + p = i.p; + i.p = nullptr; + } + return *this; +} + Image2D::~Image2D() { if (p) diff --git a/modules/core/src/ocl_disabled.impl.hpp b/modules/core/src/ocl_disabled.impl.hpp index 97c3856b37..b5f9c4f69b 100644 --- a/modules/core/src/ocl_disabled.impl.hpp +++ b/modules/core/src/ocl_disabled.impl.hpp @@ -34,10 +34,12 @@ CV_EXPORTS_W void finish() { /* nothing */ } CV_EXPORTS bool haveSVM() { return false; } -Device::Device() : p(NULL) { } +Device::Device() CV_NOEXCEPT : p(NULL) { } Device::Device(void* d) : p(NULL) { OCL_NOT_AVAILABLE(); } Device::Device(const Device& d) : p(NULL) { } Device& Device::operator=(const Device& d) { return *this; } +Device::Device(Device&&) CV_NOEXCEPT : p(NULL) { } +Device& Device::operator=(Device&&) CV_NOEXCEPT { return *this; } Device::~Device() { } void Device::set(void* d) { OCL_NOT_AVAILABLE(); } @@ -147,11 +149,13 @@ const Device& Device::getDefault() /* static */ Device Device::fromHandle(void* d) { OCL_NOT_AVAILABLE(); } -Context::Context() : p(NULL) { } +Context::Context() CV_NOEXCEPT : p(NULL) { } Context::Context(int dtype) : p(NULL) { } Context::~Context() { } Context::Context(const Context& c) : p(NULL) { } Context& Context::operator=(const Context& c) { return *this; } +Context::Context(Context&&) CV_NOEXCEPT : p(NULL) { } +Context& Context::operator=(Context&&) CV_NOEXCEPT { return *this; } bool Context::create() { return false; } bool Context::create(int dtype) { return false; } @@ -178,10 +182,12 @@ void Context::setUseSVM(bool enabled) { } void Context::release() { } -Platform::Platform() : p(NULL) { } +Platform::Platform() CV_NOEXCEPT : p(NULL) { } Platform::~Platform() { } Platform::Platform(const Platform&) : p(NULL) { } Platform& Platform::operator=(const Platform&) { return *this; } +Platform::Platform(Platform&&) CV_NOEXCEPT : p(NULL) { } +Platform& Platform::operator=(Platform&&) CV_NOEXCEPT { return *this; } void* Platform::ptr() const { return NULL; } @@ -198,11 +204,13 @@ void convertFromImage(void* cl_mem_image, UMat& dst) { OCL_NOT_AVAILABLE(); } void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device) { OCL_NOT_AVAILABLE(); } -Queue::Queue() : p(NULL) { } +Queue::Queue() CV_NOEXCEPT : p(NULL) { } Queue::Queue(const Context& c, const Device& d) : p(NULL) { OCL_NOT_AVAILABLE(); } Queue::~Queue() { } Queue::Queue(const Queue& q) {} Queue& Queue::operator=(const Queue& q) { return *this; } +Queue::Queue(Queue&&) CV_NOEXCEPT : p(NULL) { } +Queue& Queue::operator=(Queue&&) CV_NOEXCEPT { return *this; } bool Queue::create(const Context& c, const Device& d) { OCL_NOT_AVAILABLE(); } void Queue::finish() {} @@ -218,7 +226,7 @@ Queue& Queue::getDefault() const Queue& Queue::getProfilingQueue() const { OCL_NOT_AVAILABLE(); } -KernelArg::KernelArg() +KernelArg::KernelArg() CV_NOEXCEPT : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1) { } @@ -235,12 +243,14 @@ KernelArg KernelArg::Constant(const Mat& m) } -Kernel::Kernel() : p(NULL) { } +Kernel::Kernel() CV_NOEXCEPT : p(NULL) { } Kernel::Kernel(const char* kname, const Program& prog) : p(NULL) { OCL_NOT_AVAILABLE(); } Kernel::Kernel(const char* kname, const ProgramSource& prog, const String& buildopts, String* errmsg) : p(NULL) { OCL_NOT_AVAILABLE(); } Kernel::~Kernel() { } Kernel::Kernel(const Kernel& k) : p(NULL) { } Kernel& Kernel::operator=(const Kernel& k) { return *this; } +Kernel::Kernel(Kernel&&) CV_NOEXCEPT : p(NULL) { } +Kernel& Kernel::operator=(Kernel&&) CV_NOEXCEPT { return *this; } bool Kernel::empty() const { return true; } bool Kernel::create(const char* kname, const Program& prog) { OCL_NOT_AVAILABLE(); } @@ -264,10 +274,12 @@ size_t Kernel::localMemSize() const { OCL_NOT_AVAILABLE(); } void* Kernel::ptr() const { return NULL; } -Program::Program() : p(NULL) { } +Program::Program() CV_NOEXCEPT : p(NULL) { } Program::Program(const ProgramSource& src, const String& buildflags, String& errmsg) : p(NULL) { OCL_NOT_AVAILABLE(); } Program::Program(const Program& prog) : p(NULL) { } Program& Program::operator=(const Program& prog) { return *this; } +Program::Program(Program&&) CV_NOEXCEPT : p(NULL) { } +Program& Program::operator=(Program&&) CV_NOEXCEPT { return *this; } Program::~Program() { } bool Program::create(const ProgramSource& src, const String& buildflags, String& errmsg) { OCL_NOT_AVAILABLE(); } @@ -283,13 +295,15 @@ String Program::getPrefix() const { OCL_NOT_AVAILABLE(); } /* static */ String Program::getPrefix(const String& buildflags) { OCL_NOT_AVAILABLE(); } -ProgramSource::ProgramSource() : p(NULL) { } +ProgramSource::ProgramSource() CV_NOEXCEPT : p(NULL) { } ProgramSource::ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash) : p(NULL) { } ProgramSource::ProgramSource(const String& prog) : p(NULL) { } ProgramSource::ProgramSource(const char* prog) : p(NULL) { } ProgramSource::~ProgramSource() { } ProgramSource::ProgramSource(const ProgramSource& prog) : p(NULL) { } ProgramSource& ProgramSource::operator=(const ProgramSource& prog) { return *this; } +ProgramSource::ProgramSource(ProgramSource&&) CV_NOEXCEPT : p(NULL) { } +ProgramSource& ProgramSource::operator=(ProgramSource&&) CV_NOEXCEPT { return *this; } const String& ProgramSource::source() const { OCL_NOT_AVAILABLE(); } ProgramSource::hash_t ProgramSource::hash() const { OCL_NOT_AVAILABLE(); } @@ -298,12 +312,14 @@ ProgramSource::hash_t ProgramSource::hash() const { OCL_NOT_AVAILABLE(); } /* static */ ProgramSource ProgramSource::fromSPIR(const String& module, const String& name, const unsigned char* binary, const size_t size, const cv::String& buildOptions) { OCL_NOT_AVAILABLE(); } -PlatformInfo::PlatformInfo() : p(NULL) { } +PlatformInfo::PlatformInfo() CV_NOEXCEPT : p(NULL) { } PlatformInfo::PlatformInfo(void* id) : p(NULL) { OCL_NOT_AVAILABLE(); } PlatformInfo::~PlatformInfo() { } PlatformInfo::PlatformInfo(const PlatformInfo& i) : p(NULL) { } PlatformInfo& PlatformInfo::operator=(const PlatformInfo& i) { return *this; } +PlatformInfo::PlatformInfo(PlatformInfo&&) CV_NOEXCEPT : p(NULL) { } +PlatformInfo& PlatformInfo::operator=(PlatformInfo&&) CV_NOEXCEPT { return *this; } String PlatformInfo::name() const { OCL_NOT_AVAILABLE(); } String PlatformInfo::vendor() const { OCL_NOT_AVAILABLE(); } @@ -341,11 +357,13 @@ int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray sr void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m) { OCL_NOT_AVAILABLE(); } -Image2D::Image2D() : p(NULL) { } +Image2D::Image2D() CV_NOEXCEPT : p(NULL) { } Image2D::Image2D(const UMat &src, bool norm, bool alias) { OCL_NOT_AVAILABLE(); } Image2D::Image2D(const Image2D & i) : p(NULL) { OCL_NOT_AVAILABLE(); } Image2D::~Image2D() { } Image2D& Image2D::operator=(const Image2D & i) { return *this; } +Image2D::Image2D(Image2D&&) CV_NOEXCEPT : p(NULL) { } +Image2D& Image2D::operator=(Image2D&&) CV_NOEXCEPT { return *this; } /* static */ bool Image2D::canCreateAlias(const UMat &u) { OCL_NOT_AVAILABLE(); } /* static */ bool Image2D::isFormatSupported(int depth, int cn, bool norm) { OCL_NOT_AVAILABLE(); } diff --git a/modules/core/src/opencl/cvtclr_dx.cl b/modules/core/src/opencl/cvtclr_dx.cl index 0ca2118c77..5c51077814 100644 --- a/modules/core/src/opencl/cvtclr_dx.cl +++ b/modules/core/src/opencl/cvtclr_dx.cl @@ -91,63 +91,50 @@ void YUV2BGR_NV12_8u( { int x = get_global_id(0); int y = get_global_id(1); + // each iteration computes 2*2=4 pixels + int x2 = x*2; + int y2 = y*2; - if (x + 1 < cols) - { - if (y + 1 < rows) - { - __global uchar* pDstRow1 = pBGR + mad24(y, bgrStep, mad24(x, NCHANNELS, 0)); - __global uchar* pDstRow2 = pDstRow1 + bgrStep; + if (x2 + 1 < cols) { + if (y2 + 1 < rows) { + __global uchar *pDstRow1 = pBGR + mad24(y2, bgrStep, mad24(x2, NCHANNELS, 0)); + __global uchar *pDstRow2 = pDstRow1 + bgrStep; - float4 Y1 = read_imagef(imgY, (int2)(x+0, y+0)); - float4 Y2 = read_imagef(imgY, (int2)(x+1, y+0)); - float4 Y3 = read_imagef(imgY, (int2)(x+0, y+1)); - float4 Y4 = read_imagef(imgY, (int2)(x+1, y+1)); + float4 Y1 = read_imagef(imgY, (int2)(x2 + 0, y2 + 0)); + float4 Y2 = read_imagef(imgY, (int2)(x2 + 1, y2 + 0)); + float4 Y3 = read_imagef(imgY, (int2)(x2 + 0, y2 + 1)); + float4 Y4 = read_imagef(imgY, (int2)(x2 + 1, y2 + 1)); + float4 Y = (float4)(Y1.x, Y2.x, Y3.x, Y4.x); - float4 UV = read_imagef(imgUV, (int2)(x/2, y/2)) - d2; + float4 UV = read_imagef(imgUV, (int2)(x, y)) - d2; - __constant float* coeffs = c_YUV2RGBCoeffs_420; + __constant float *coeffs = c_YUV2RGBCoeffs_420; - Y1 = max(0.f, Y1 - d1) * coeffs[0]; - Y2 = max(0.f, Y2 - d1) * coeffs[0]; - Y3 = max(0.f, Y3 - d1) * coeffs[0]; - Y4 = max(0.f, Y4 - d1) * coeffs[0]; + Y = max(0.f, Y - d1) * coeffs[0]; float ruv = fma(coeffs[4], UV.y, 0.0f); float guv = fma(coeffs[3], UV.y, fma(coeffs[2], UV.x, 0.0f)); float buv = fma(coeffs[1], UV.x, 0.0f); - float R1 = (Y1.x + ruv) * CV_8U_MAX; - float G1 = (Y1.x + guv) * CV_8U_MAX; - float B1 = (Y1.x + buv) * CV_8U_MAX; + float4 R = (Y + ruv) * CV_8U_MAX; + float4 G = (Y + guv) * CV_8U_MAX; + float4 B = (Y + buv) * CV_8U_MAX; - float R2 = (Y2.x + ruv) * CV_8U_MAX; - float G2 = (Y2.x + guv) * CV_8U_MAX; - float B2 = (Y2.x + buv) * CV_8U_MAX; + pDstRow1[0*NCHANNELS + 0] = convert_uchar_sat(B.x); + pDstRow1[0*NCHANNELS + 1] = convert_uchar_sat(G.x); + pDstRow1[0*NCHANNELS + 2] = convert_uchar_sat(R.x); - float R3 = (Y3.x + ruv) * CV_8U_MAX; - float G3 = (Y3.x + guv) * CV_8U_MAX; - float B3 = (Y3.x + buv) * CV_8U_MAX; + pDstRow1[1*NCHANNELS + 0] = convert_uchar_sat(B.y); + pDstRow1[1*NCHANNELS + 1] = convert_uchar_sat(G.y); + pDstRow1[1*NCHANNELS + 2] = convert_uchar_sat(R.y); - float R4 = (Y4.x + ruv) * CV_8U_MAX; - float G4 = (Y4.x + guv) * CV_8U_MAX; - float B4 = (Y4.x + buv) * CV_8U_MAX; + pDstRow2[0*NCHANNELS + 0] = convert_uchar_sat(B.z); + pDstRow2[0*NCHANNELS + 1] = convert_uchar_sat(G.z); + pDstRow2[0*NCHANNELS + 2] = convert_uchar_sat(R.z); - pDstRow1[0*NCHANNELS + 0] = convert_uchar_sat(B1); - pDstRow1[0*NCHANNELS + 1] = convert_uchar_sat(G1); - pDstRow1[0*NCHANNELS + 2] = convert_uchar_sat(R1); - - pDstRow1[1*NCHANNELS + 0] = convert_uchar_sat(B2); - pDstRow1[1*NCHANNELS + 1] = convert_uchar_sat(G2); - pDstRow1[1*NCHANNELS + 2] = convert_uchar_sat(R2); - - pDstRow2[0*NCHANNELS + 0] = convert_uchar_sat(B3); - pDstRow2[0*NCHANNELS + 1] = convert_uchar_sat(G3); - pDstRow2[0*NCHANNELS + 2] = convert_uchar_sat(R3); - - pDstRow2[1*NCHANNELS + 0] = convert_uchar_sat(B4); - pDstRow2[1*NCHANNELS + 1] = convert_uchar_sat(G4); - pDstRow2[1*NCHANNELS + 2] = convert_uchar_sat(R4); + pDstRow2[1*NCHANNELS + 0] = convert_uchar_sat(B.w); + pDstRow2[1*NCHANNELS + 1] = convert_uchar_sat(G.w); + pDstRow2[1*NCHANNELS + 2] = convert_uchar_sat(R.w); } } } @@ -172,12 +159,15 @@ void BGR2YUV_NV12_8u( { int x = get_global_id(0); int y = get_global_id(1); + // each iteration computes 2*2=4 pixels + int x2 = x*2; + int y2 = y*2; - if (x < cols) + if (x2 + 1 < cols) { - if (y < rows) + if (y2 + 1 < rows) { - __global const uchar* pSrcRow1 = pBGR + mad24(y, bgrStep, mad24(x, NCHANNELS, 0)); + __global const uchar* pSrcRow1 = pBGR + mad24(y2, bgrStep, mad24(x2, NCHANNELS, 0)); __global const uchar* pSrcRow2 = pSrcRow1 + bgrStep; float4 src_pix1 = convert_float4(vload4(0, pSrcRow1 + 0*NCHANNELS)) * CV_8U_SCALE; @@ -196,12 +186,12 @@ void BGR2YUV_NV12_8u( UV.x = fma(coeffs[3], src_pix1.z, fma(coeffs[4], src_pix1.y, fma(coeffs[5], src_pix1.x, d2))); UV.y = fma(coeffs[5], src_pix1.z, fma(coeffs[6], src_pix1.y, fma(coeffs[7], src_pix1.x, d2))); - write_imagef(imgY, (int2)(x+0, y+0), Y1); - write_imagef(imgY, (int2)(x+1, y+0), Y2); - write_imagef(imgY, (int2)(x+0, y+1), Y3); - write_imagef(imgY, (int2)(x+1, y+1), Y4); + write_imagef(imgY, (int2)(x2+0, y2+0), Y1); + write_imagef(imgY, (int2)(x2+1, y2+0), Y2); + write_imagef(imgY, (int2)(x2+0, y2+1), Y3); + write_imagef(imgY, (int2)(x2+1, y2+1), Y4); - write_imagef(imgUV, (int2)((x/2), (y/2)), UV); + write_imagef(imgUV, (int2)(x, y), UV); } } } diff --git a/modules/core/src/opencl/halfconvert.cl b/modules/core/src/opencl/halfconvert.cl index 506df69faf..9df602f406 100644 --- a/modules/core/src/opencl/halfconvert.cl +++ b/modules/core/src/opencl/halfconvert.cl @@ -47,8 +47,17 @@ #endif #endif -__kernel void convertFp16(__global const uchar * srcptr, int src_step, int src_offset, - __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols) +__kernel void +#ifdef FLOAT_TO_HALF + convertFp16_FP32_to_FP16 +#else + convertFp16_FP16_to_FP32 +#endif +( + __global const uchar * srcptr, int src_step, int src_offset, + __global uchar * dstptr, int dst_step, int dst_offset, + int dst_rows, int dst_cols +) { int x = get_global_id(0); int y0 = get_global_id(1) * rowsPerWI; diff --git a/modules/core/src/opengl.cpp b/modules/core/src/opengl.cpp index 5ff3c717b6..ab39b1b8ac 100644 --- a/modules/core/src/opengl.cpp +++ b/modules/core/src/opengl.cpp @@ -1575,6 +1575,7 @@ void cv::ogl::render(const ogl::Arrays& arr, InputArray indices, int mode, Scala // CL-GL Interoperability #ifdef HAVE_OPENCL +# include "opencv2/core/opencl/runtime/opencl_core.hpp" # include "opencv2/core/opencl/runtime/opencl_gl.hpp" # ifdef cl_khr_gl_sharing # define HAVE_OPENCL_OPENGL_SHARING @@ -1595,6 +1596,34 @@ void cv::ogl::render(const ogl::Arrays& arr, InputArray indices, int mode, Scala namespace cv { namespace ogl { +#if defined(HAVE_OPENCL) && defined(HAVE_OPENGL) && defined(HAVE_OPENCL_OPENGL_SHARING) +// Check to avoid crash in OpenCL runtime: https://github.com/opencv/opencv/issues/5209 +static void checkOpenCLVersion() +{ + using namespace cv::ocl; + const Device& device = Device::getDefault(); + //CV_Assert(!device.empty()); + cl_device_id dev = (cl_device_id)device.ptr(); + CV_Assert(dev); + + cl_platform_id platform_id = 0; + size_t sz = 0; + + cl_int status = clGetDeviceInfo(dev, CL_DEVICE_PLATFORM, sizeof(platform_id), &platform_id, &sz); + CV_Assert(status == CL_SUCCESS && sz == sizeof(cl_platform_id)); + CV_Assert(platform_id); + + PlatformInfo pi(&platform_id); + int versionMajor = pi.versionMajor(); + int versionMinor = pi.versionMinor(); + if (versionMajor < 1 || (versionMajor == 1 && versionMinor <= 1)) + CV_Error_(cv::Error::OpenCLApiCallError, + ("OpenCL: clCreateFromGLTexture requires OpenCL 1.2+ version: %d.%d - %s (%s)", + versionMajor, versionMinor, pi.name().c_str(), pi.version().c_str()) + ); +} +#endif + namespace ocl { Context& initializeContextFromGL() @@ -1719,6 +1748,8 @@ void convertToGLTexture2D(InputArray src, Texture2D& texture) Context& ctx = Context::getDefault(); cl_context context = (cl_context)ctx.ptr(); + checkOpenCLVersion(); // clCreateFromGLTexture requires OpenCL 1.2 + UMat u = src.getUMat(); // TODO Add support for roi @@ -1777,6 +1808,8 @@ void convertFromGLTexture2D(const Texture2D& texture, OutputArray dst) Context& ctx = Context::getDefault(); cl_context context = (cl_context)ctx.ptr(); + checkOpenCLVersion(); // clCreateFromGLTexture requires OpenCL 1.2 + // TODO Need to specify ACCESS_WRITE here somehow to prevent useless data copying! dst.create(texture.size(), textureType); UMat u = dst.getUMat(); diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index 9dc0fd00f0..879d80cdb1 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -45,6 +45,9 @@ #include #include +#include "opencv2/core/parallel/parallel_backend.hpp" +#include "parallel/parallel.hpp" + #if defined _WIN32 || defined WINCE #include #undef small @@ -101,7 +104,6 @@ #endif #include "tbb/tbb.h" #include "tbb/task.h" - #include "tbb/tbb_stddef.h" #if TBB_INTERFACE_VERSION >= 8000 #include "tbb/task_arena.h" #endif @@ -145,9 +147,7 @@ # define CV_PARALLEL_FRAMEWORK "pthreads" #endif -#ifdef CV_PARALLEL_FRAMEWORK #include -#endif #include "parallel_impl.hpp" @@ -159,9 +159,10 @@ namespace cv { ParallelLoopBody::~ParallelLoopBody() {} +using namespace cv::parallel; + namespace { -#ifdef CV_PARALLEL_FRAMEWORK #ifdef ENABLE_INSTRUMENTATION static void SyncNodes(cv::instr::InstrNode *pNode) { @@ -430,8 +431,6 @@ namespace { typedef ParallelLoopBodyWrapper ProxyLoopBody; #endif -static int numThreads = -1; - #if defined HAVE_TBB #if TBB_INTERFACE_VERSION >= 8000 static tbb::task_arena tbbArena(tbb::task_arena::automatic); @@ -446,7 +445,7 @@ static inline int _initMaxThreads() int maxThreads = omp_get_max_threads(); if (!utils::getConfigurationParameterBool("OPENCV_FOR_OPENMP_DYNAMIC_DISABLE", false)) { - omp_set_dynamic(maxThreads); + omp_set_dynamic(1); } return maxThreads; } @@ -477,15 +476,11 @@ static SchedPtr pplScheduler; #endif -#endif // CV_PARALLEL_FRAMEWORK - } // namespace anon /* ================================ parallel_for_ ================================ */ -#ifdef CV_PARALLEL_FRAMEWORK static void parallel_for_impl(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes); // forward declaration -#endif void parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes) { @@ -500,7 +495,6 @@ void parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, dou if (range.empty()) return; -#ifdef CV_PARALLEL_FRAMEWORK static std::atomic flagNestedParallelFor(false); bool isNotNestedRegion = !flagNestedParallelFor.load(); if (isNotNestedRegion) @@ -519,16 +513,23 @@ void parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, dou } } else // nested parallel_for_() calls are not parallelized -#endif // CV_PARALLEL_FRAMEWORK { CV_UNUSED(nstripes); body(range); } } -#ifdef CV_PARALLEL_FRAMEWORK +static +void parallel_for_cb(int start, int end, void* data) +{ + CV_DbgAssert(data); + const cv::ParallelLoopBody& body = *(const cv::ParallelLoopBody*)data; + body(Range(start, end)); +} + static void parallel_for_impl(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes) { + using namespace cv::parallel; if ((numThreads < 0 || numThreads > 1) && range.end - range.start > 1) { ParallelLoopBodyWrapperContext ctx(body, range, nstripes); @@ -540,6 +541,16 @@ static void parallel_for_impl(const cv::Range& range, const cv::ParallelLoopBody return; } + std::shared_ptr& api = getCurrentParallelForAPI(); + if (api) + { + CV_CheckEQ(stripeRange.start, 0, ""); + api->parallel_for(stripeRange.end, parallel_for_cb, (void*)&pbody); + ctx.finalize(); // propagate exceptions if exists + return; + } + +#ifdef CV_PARALLEL_FRAMEWORK #if defined HAVE_TBB #if TBB_INTERFACE_VERSION >= 8000 @@ -590,24 +601,25 @@ static void parallel_for_impl(const cv::Range& range, const cv::ParallelLoopBody #endif ctx.finalize(); // propagate exceptions if exists - } - else - { - body(range); - } -} + return; #endif // CV_PARALLEL_FRAMEWORK + } + + body(range); +} int getNumThreads(void) { -#ifdef CV_PARALLEL_FRAMEWORK + std::shared_ptr& api = getCurrentParallelForAPI(); + if (api) + { + return api->getNumThreads(); + } - if(numThreads == 0) + if (numThreads == 0) return 1; -#endif - #if defined HAVE_TBB #if TBB_INTERFACE_VERSION >= 9100 @@ -682,10 +694,15 @@ unsigned defaultNumberOfThreads() void setNumThreads( int threads_ ) { CV_UNUSED(threads_); -#ifdef CV_PARALLEL_FRAMEWORK + int threads = (threads_ < 0) ? defaultNumberOfThreads() : (unsigned)threads_; numThreads = threads; -#endif + + std::shared_ptr& api = getCurrentParallelForAPI(); + if (api) + { + api->setNumThreads(numThreads); + } #ifdef HAVE_TBB @@ -741,6 +758,12 @@ void setNumThreads( int threads_ ) int getThreadNum() { + std::shared_ptr& api = getCurrentParallelForAPI(); + if (api) + { + return api->getThreadNum(); + } + #if defined HAVE_TBB #if TBB_INTERFACE_VERSION >= 9100 return tbb::this_task_arena::current_thread_index(); @@ -963,7 +986,13 @@ int getNumberOfCPUs() return nCPUs; // cached value } -const char* currentParallelFramework() { +const char* currentParallelFramework() +{ + std::shared_ptr& api = getCurrentParallelForAPI(); + if (api) + { + return api->getName(); + } #ifdef CV_PARALLEL_FRAMEWORK return CV_PARALLEL_FRAMEWORK; #else diff --git a/modules/core/src/parallel/factory_parallel.hpp b/modules/core/src/parallel/factory_parallel.hpp new file mode 100644 index 0000000000..693fe30ecf --- /dev/null +++ b/modules/core/src/parallel/factory_parallel.hpp @@ -0,0 +1,48 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_PARALLEL_FACTORY_HPP +#define OPENCV_CORE_PARALLEL_FACTORY_HPP + +#include "opencv2/core/parallel/parallel_backend.hpp" + +namespace cv { namespace parallel { + +class IParallelBackendFactory +{ +public: + virtual ~IParallelBackendFactory() {} + virtual std::shared_ptr create() const = 0; +}; + + +class StaticBackendFactory CV_FINAL: public IParallelBackendFactory +{ +protected: + std::function(void)> create_fn_; + +public: + StaticBackendFactory(std::function(void)>&& create_fn) + : create_fn_(create_fn) + { + // nothing + } + + ~StaticBackendFactory() CV_OVERRIDE {} + + std::shared_ptr create() const CV_OVERRIDE + { + return create_fn_(); + } +}; + +// +// PluginBackendFactory is implemented in plugin_wrapper.cpp +// + +std::shared_ptr createPluginParallelBackendFactory(const std::string& baseName); + +}} // namespace + +#endif // OPENCV_CORE_PARALLEL_FACTORY_HPP diff --git a/modules/core/src/parallel/parallel.cpp b/modules/core/src/parallel/parallel.cpp new file mode 100644 index 0000000000..29b482f5f3 --- /dev/null +++ b/modules/core/src/parallel/parallel.cpp @@ -0,0 +1,177 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "../precomp.hpp" +#include "parallel.hpp" + +#include +#include +#ifdef NDEBUG +#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1 +#else +#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1 +#endif +#include + + +#include "registry_parallel.hpp" +#include "registry_parallel.impl.hpp" + +#include "plugin_parallel_api.hpp" +#include "plugin_parallel_wrapper.impl.hpp" + + +namespace cv { namespace parallel { + +int numThreads = -1; + +ParallelForAPI::~ParallelForAPI() +{ + // nothing +} + +static +std::string& getParallelBackendName() +{ + static std::string g_backendName = toUpperCase(cv::utils::getConfigurationParameterString("OPENCV_PARALLEL_BACKEND", "")); + return g_backendName; +} + +static bool g_initializedParallelForAPI = false; + +static +std::shared_ptr createParallelForAPI() +{ + const std::string& name = getParallelBackendName(); + bool isKnown = false; + const auto& backends = getParallelBackendsInfo(); + if (!name.empty()) + { + CV_LOG_INFO(NULL, "core(parallel): requested backend name: " << name); + } + for (size_t i = 0; i < backends.size(); i++) + { + const auto& info = backends[i]; + if (!name.empty()) + { + if (name != info.name) + { + continue; + } + isKnown = true; + } + try + { + CV_LOG_DEBUG(NULL, "core(parallel): trying backend: " << info.name << " (priority=" << info.priority << ")"); + if (!info.backendFactory) + { + CV_LOG_DEBUG(NULL, "core(parallel): factory is not available (plugins require filesystem support): " << info.name); + continue; + } + std::shared_ptr backend = info.backendFactory->create(); + if (!backend) + { + CV_LOG_VERBOSE(NULL, 0, "core(parallel): not available: " << info.name); + continue; + } + CV_LOG_INFO(NULL, "core(parallel): using backend: " << info.name << " (priority=" << info.priority << ")"); + g_initializedParallelForAPI = true; + getParallelBackendName() = info.name; + return backend; + } + catch (const std::exception& e) + { + CV_LOG_WARNING(NULL, "core(parallel): can't initialize " << info.name << " backend: " << e.what()); + } + catch (...) + { + CV_LOG_WARNING(NULL, "core(parallel): can't initialize " << info.name << " backend: Unknown C++ exception"); + } + } + if (name.empty()) + { + CV_LOG_DEBUG(NULL, "core(parallel): fallback on builtin code"); + } + else + { + if (!isKnown) + CV_LOG_INFO(NULL, "core(parallel): unknown backend: " << name); + } + g_initializedParallelForAPI = true; + return std::shared_ptr(); +} + +static inline +std::shared_ptr createDefaultParallelForAPI() +{ + CV_LOG_DEBUG(NULL, "core(parallel): Initializing parallel backend..."); + return createParallelForAPI(); +} + +std::shared_ptr& getCurrentParallelForAPI() +{ + static std::shared_ptr g_currentParallelForAPI = createDefaultParallelForAPI(); + return g_currentParallelForAPI; +} + +void setParallelForBackend(const std::shared_ptr& api, bool propagateNumThreads) +{ + getCurrentParallelForAPI() = api; + if (propagateNumThreads && api) + { + setNumThreads(numThreads); + } +} + +bool setParallelForBackend(const std::string& backendName, bool propagateNumThreads) +{ + CV_TRACE_FUNCTION(); + + std::string backendName_u = toUpperCase(backendName); + if (g_initializedParallelForAPI) + { + // ... already initialized + if (getParallelBackendName() == backendName_u) + { + CV_LOG_INFO(NULL, "core(parallel): backend is already activated: " << (backendName.empty() ? "builtin(legacy)" : backendName)); + return true; + } + else + { + // ... re-create new + CV_LOG_DEBUG(NULL, "core(parallel): replacing parallel backend..."); + getParallelBackendName() = backendName_u; + getCurrentParallelForAPI() = createParallelForAPI(); + } + } + else + { + // ... no backend exists, just specify the name (initialization is triggered by getCurrentParallelForAPI() call) + getParallelBackendName() = backendName_u; + } + std::shared_ptr api = getCurrentParallelForAPI(); + if (!api) + { + if (!backendName.empty()) + { + CV_LOG_WARNING(NULL, "core(parallel): backend is not available: " << backendName << " (using builtin legacy code)"); + return false; + } + else + { + CV_LOG_WARNING(NULL, "core(parallel): switched to builtin code (legacy)"); + } + } + if (!backendName_u.empty()) + { + CV_Assert(backendName_u == getParallelBackendName()); // data race? + } + + if (propagateNumThreads) + { + setNumThreads(numThreads); + } + return true; +} + +}} // namespace diff --git a/modules/core/src/parallel/parallel.hpp b/modules/core/src/parallel/parallel.hpp new file mode 100644 index 0000000000..b6a54b14e7 --- /dev/null +++ b/modules/core/src/parallel/parallel.hpp @@ -0,0 +1,29 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#ifndef OPENCV_CORE_SRC_PARALLEL_PARALLEL_HPP +#define OPENCV_CORE_SRC_PARALLEL_PARALLEL_HPP + +#include "opencv2/core/parallel/parallel_backend.hpp" + +namespace cv { namespace parallel { + +extern int numThreads; + +std::shared_ptr& getCurrentParallelForAPI(); + +#ifndef BUILD_PLUGIN + +#ifdef HAVE_TBB +std::shared_ptr createParallelBackendTBB(); +#endif + +#ifdef HAVE_OPENMP +std::shared_ptr createParallelBackendOpenMP(); +#endif + +#endif // BUILD_PLUGIN + +}} // namespace + +#endif // OPENCV_CORE_SRC_PARALLEL_PARALLEL_HPP diff --git a/modules/core/src/parallel/parallel_openmp.cpp b/modules/core/src/parallel/parallel_openmp.cpp new file mode 100644 index 0000000000..c0010dd845 --- /dev/null +++ b/modules/core/src/parallel/parallel_openmp.cpp @@ -0,0 +1,72 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "../precomp.hpp" + +#ifdef HAVE_OPENMP + +#include "parallel.hpp" +#include "opencv2/core/parallel/backend/parallel_for.openmp.hpp" + +namespace cv { namespace parallel { + +static +std::shared_ptr& getInstance() +{ + static std::shared_ptr g_instance = std::make_shared(); + return g_instance; +} + +#ifndef BUILD_PLUGIN +std::shared_ptr createParallelBackendOpenMP() +{ + return getInstance(); +} +#endif + +}} // namespace + +#ifdef BUILD_PLUGIN + +#define ABI_VERSION 0 +#define API_VERSION 0 +#include "plugin_parallel_api.hpp" + +static +CvResult cv_getInstance(CV_OUT CvPluginParallelBackendAPI* handle) CV_NOEXCEPT +{ + try + { + if (!handle) + return CV_ERROR_FAIL; + *handle = cv::parallel::getInstance().get(); + return CV_ERROR_OK; + } + catch (...) + { + return CV_ERROR_FAIL; + } +} + +static const OpenCV_Core_Parallel_Plugin_API plugin_api = +{ + { + sizeof(OpenCV_Core_Parallel_Plugin_API), ABI_VERSION, API_VERSION, + CV_VERSION_MAJOR, CV_VERSION_MINOR, CV_VERSION_REVISION, CV_VERSION_STATUS, + "OpenMP (" CVAUX_STR(_OPENMP) ") OpenCV parallel plugin" + }, + { + /* 1*/cv_getInstance + } +}; + +const OpenCV_Core_Parallel_Plugin_API* CV_API_CALL opencv_core_parallel_plugin_init_v0(int requested_abi_version, int requested_api_version, void* /*reserved=NULL*/) CV_NOEXCEPT +{ + if (requested_abi_version == ABI_VERSION && requested_api_version <= API_VERSION) + return &plugin_api; + return NULL; +} + +#endif // BUILD_PLUGIN + +#endif // HAVE_TBB diff --git a/modules/core/src/parallel/parallel_tbb.cpp b/modules/core/src/parallel/parallel_tbb.cpp new file mode 100644 index 0000000000..d430e858e6 --- /dev/null +++ b/modules/core/src/parallel/parallel_tbb.cpp @@ -0,0 +1,74 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "../precomp.hpp" + +#include "factory_parallel.hpp" + +#ifdef HAVE_TBB + +#include "parallel.hpp" +#include "opencv2/core/parallel/backend/parallel_for.tbb.hpp" + +namespace cv { namespace parallel { + +static +std::shared_ptr& getInstance() +{ + static std::shared_ptr g_instance = std::make_shared(); + return g_instance; +} + +#ifndef BUILD_PLUGIN +std::shared_ptr createParallelBackendTBB() +{ + return getInstance(); +} +#endif + +}} // namespace + +#ifdef BUILD_PLUGIN + +#define ABI_VERSION 0 +#define API_VERSION 0 +#include "plugin_parallel_api.hpp" + +static +CvResult cv_getInstance(CV_OUT CvPluginParallelBackendAPI* handle) CV_NOEXCEPT +{ + try + { + if (!handle) + return CV_ERROR_FAIL; + *handle = cv::parallel::getInstance().get(); + return CV_ERROR_OK; + } + catch (...) + { + return CV_ERROR_FAIL; + } +} + +static const OpenCV_Core_Parallel_Plugin_API plugin_api = +{ + { + sizeof(OpenCV_Core_Parallel_Plugin_API), ABI_VERSION, API_VERSION, + CV_VERSION_MAJOR, CV_VERSION_MINOR, CV_VERSION_REVISION, CV_VERSION_STATUS, + "TBB (interface " CVAUX_STR(TBB_INTERFACE_VERSION) ") OpenCV parallel plugin" + }, + { + /* 1*/cv_getInstance + } +}; + +const OpenCV_Core_Parallel_Plugin_API* CV_API_CALL opencv_core_parallel_plugin_init_v0(int requested_abi_version, int requested_api_version, void* /*reserved=NULL*/) CV_NOEXCEPT +{ + if (requested_abi_version == ABI_VERSION && requested_api_version <= API_VERSION) + return &plugin_api; + return NULL; +} + +#endif // BUILD_PLUGIN + +#endif // HAVE_TBB diff --git a/modules/core/src/parallel/plugin_parallel_api.hpp b/modules/core/src/parallel/plugin_parallel_api.hpp new file mode 100644 index 0000000000..bdc28d6de0 --- /dev/null +++ b/modules/core/src/parallel/plugin_parallel_api.hpp @@ -0,0 +1,72 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef PARALLEL_PLUGIN_API_HPP +#define PARALLEL_PLUGIN_API_HPP + +#include +#include + +#include "opencv2/core/parallel/parallel_backend.hpp" + +#if !defined(BUILD_PLUGIN) + +/// increased for backward-compatible changes, e.g. add new function +/// Caller API <= Plugin API -> plugin is fully compatible +/// Caller API > Plugin API -> plugin is not fully compatible, caller should use extra checks to use plugins with older API +#define API_VERSION 0 // preview + +/// increased for incompatible changes, e.g. remove function argument +/// Caller ABI == Plugin ABI -> plugin is compatible +/// Caller ABI > Plugin ABI -> plugin is not compatible, caller should use shim code to use old ABI plugins (caller may know how lower ABI works, so it is possible) +/// Caller ABI < Plugin ABI -> plugin can't be used (plugin should provide interface with lower ABI to handle that) +#define ABI_VERSION 0 // preview + +#else // !defined(BUILD_PLUGIN) + +#if !defined(ABI_VERSION) || !defined(API_VERSION) +#error "Plugin must define ABI_VERSION and API_VERSION before including parallel_plugin_api.hpp" +#endif + +#endif // !defined(BUILD_PLUGIN) + +typedef cv::parallel::ParallelForAPI* CvPluginParallelBackendAPI; + +struct OpenCV_Core_Parallel_Plugin_API_v0_0_api_entries +{ + /** @brief Get parallel backend API instance + + @param[out] handle pointer on backend API handle + + @note API-CALL 1, API-Version == 0 + */ + CvResult (CV_API_CALL *getInstance)(CV_OUT CvPluginParallelBackendAPI* handle) CV_NOEXCEPT; +}; // OpenCV_Core_Parallel_Plugin_API_v0_0_api_entries + +typedef struct OpenCV_Core_Parallel_Plugin_API_v0 +{ + OpenCV_API_Header api_header; + struct OpenCV_Core_Parallel_Plugin_API_v0_0_api_entries v0; +} OpenCV_Core_Parallel_Plugin_API_v0; + +#if ABI_VERSION == 0 && API_VERSION == 0 +typedef OpenCV_Core_Parallel_Plugin_API_v0 OpenCV_Core_Parallel_Plugin_API; +#else +#error "Not supported configuration: check ABI_VERSION/API_VERSION" +#endif + +#ifdef BUILD_PLUGIN +extern "C" { + +CV_PLUGIN_EXPORTS +const OpenCV_Core_Parallel_Plugin_API* CV_API_CALL opencv_core_parallel_plugin_init_v0 + (int requested_abi_version, int requested_api_version, void* reserved /*NULL*/) CV_NOEXCEPT; + +} // extern "C" +#else // BUILD_PLUGIN +typedef const OpenCV_Core_Parallel_Plugin_API* (CV_API_CALL *FN_opencv_core_parallel_plugin_init_t) + (int requested_abi_version, int requested_api_version, void* reserved /*NULL*/); +#endif // BUILD_PLUGIN + +#endif // PARALLEL_PLUGIN_API_HPP diff --git a/modules/core/src/parallel/plugin_parallel_wrapper.impl.hpp b/modules/core/src/parallel/plugin_parallel_wrapper.impl.hpp new file mode 100644 index 0000000000..a5649b60c0 --- /dev/null +++ b/modules/core/src/parallel/plugin_parallel_wrapper.impl.hpp @@ -0,0 +1,287 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// +// Not a standalone header, part of parallel.cpp +// + +//================================================================================================== +// Dynamic backend implementation + +#include "opencv2/core/utils/plugin_loader.private.hpp" + +namespace cv { namespace impl { + +using namespace cv::parallel; + +#if OPENCV_HAVE_FILESYSTEM_SUPPORT && defined(PARALLEL_ENABLE_PLUGINS) + +using namespace cv::plugin::impl; // plugin_loader.hpp + +class PluginParallelBackend CV_FINAL: public std::enable_shared_from_this +{ +protected: + void initPluginAPI() + { + const char* init_name = "opencv_core_parallel_plugin_init_v0"; + FN_opencv_core_parallel_plugin_init_t fn_init = reinterpret_cast(lib_->getSymbol(init_name)); + if (fn_init) + { + CV_LOG_DEBUG(NULL, "Found entry: '" << init_name << "'"); + for (int supported_api_version = API_VERSION; supported_api_version >= 0; supported_api_version--) + { + plugin_api_ = fn_init(ABI_VERSION, supported_api_version, NULL); + if (plugin_api_) + break; + } + if (!plugin_api_) + { + CV_LOG_INFO(NULL, "core(parallel): plugin is incompatible (can't be initialized): " << lib_->getName()); + return; + } + if (!checkCompatibility(plugin_api_->api_header, ABI_VERSION, API_VERSION, false)) + { + plugin_api_ = NULL; + return; + } + CV_LOG_INFO(NULL, "core(parallel): plugin is ready to use '" << plugin_api_->api_header.api_description << "'"); + } + else + { + CV_LOG_INFO(NULL, "core(parallel): plugin is incompatible, missing init function: '" << init_name << "', file: " << lib_->getName()); + } + } + + + bool checkCompatibility(const OpenCV_API_Header& api_header, unsigned int abi_version, unsigned int api_version, bool checkMinorOpenCVVersion) + { + if (api_header.opencv_version_major != CV_VERSION_MAJOR) + { + CV_LOG_ERROR(NULL, "core(parallel): wrong OpenCV major version used by plugin '" << api_header.api_description << "': " << + cv::format("%d.%d, OpenCV version is '" CV_VERSION "'", api_header.opencv_version_major, api_header.opencv_version_minor)) + return false; + } + if (!checkMinorOpenCVVersion) + { + // no checks for OpenCV minor version + } + else if (api_header.opencv_version_minor != CV_VERSION_MINOR) + { + CV_LOG_ERROR(NULL, "core(parallel): wrong OpenCV minor version used by plugin '" << api_header.api_description << "': " << + cv::format("%d.%d, OpenCV version is '" CV_VERSION "'", api_header.opencv_version_major, api_header.opencv_version_minor)) + return false; + } + CV_LOG_DEBUG(NULL, "core(parallel): initialized '" << api_header.api_description << "': built with " + << cv::format("OpenCV %d.%d (ABI/API = %d/%d)", + api_header.opencv_version_major, api_header.opencv_version_minor, + api_header.min_api_version, api_header.api_version) + << ", current OpenCV version is '" CV_VERSION "' (ABI/API = " << abi_version << "/" << api_version << ")" + ); + if (api_header.min_api_version != abi_version) // future: range can be here + { + // actually this should never happen due to checks in plugin's init() function + CV_LOG_ERROR(NULL, "core(parallel): plugin is not supported due to incompatible ABI = " << api_header.min_api_version); + return false; + } + if (api_header.api_version != api_version) + { + CV_LOG_INFO(NULL, "core(parallel): NOTE: plugin is supported, but there is API version mismath: " + << cv::format("plugin API level (%d) != OpenCV API level (%d)", api_header.api_version, api_version)); + if (api_header.api_version < api_version) + { + CV_LOG_INFO(NULL, "core(parallel): NOTE: some functionality may be unavailable due to lack of support by plugin implementation"); + } + } + return true; + } + +public: + std::shared_ptr lib_; + const OpenCV_Core_Parallel_Plugin_API* plugin_api_; + + PluginParallelBackend(const std::shared_ptr& lib) + : lib_(lib) + , plugin_api_(NULL) + { + initPluginAPI(); + } + + std::shared_ptr create() const + { + CV_Assert(plugin_api_); + + CvPluginParallelBackendAPI instancePtr = NULL; + + if (plugin_api_->v0.getInstance) + { + if (CV_ERROR_OK == plugin_api_->v0.getInstance(&instancePtr)) + { + CV_Assert(instancePtr); + // TODO C++20 "aliasing constructor" + return std::shared_ptr(instancePtr, [](cv::parallel::ParallelForAPI*){}); // empty deleter + } + } + return std::shared_ptr(); + } +}; + + +class PluginParallelBackendFactory CV_FINAL: public IParallelBackendFactory +{ +public: + std::string baseName_; + std::shared_ptr backend; + bool initialized; +public: + PluginParallelBackendFactory(const std::string& baseName) + : baseName_(baseName) + , initialized(false) + { + // nothing, plugins are loaded on demand + } + + std::shared_ptr create() const CV_OVERRIDE + { + if (!initialized) + { + const_cast(this)->initBackend(); + } + if (backend) + return backend->create(); + return std::shared_ptr(); + } +protected: + void initBackend() + { + AutoLock lock(getInitializationMutex()); + try + { + if (!initialized) + loadPlugin(); + } + catch (...) + { + CV_LOG_INFO(NULL, "core(parallel): exception during plugin loading: " << baseName_ << ". SKIP"); + } + initialized = true; + } + void loadPlugin(); +}; + +static +std::vector getPluginCandidates(const std::string& baseName) +{ + using namespace cv::utils; + using namespace cv::utils::fs; + const std::string baseName_l = toLowerCase(baseName); + const std::string baseName_u = toUpperCase(baseName); + const FileSystemPath_t baseName_l_fs = toFileSystemPath(baseName_l); + std::vector paths; + // TODO OPENCV_PLUGIN_PATH + const std::vector paths_ = getConfigurationParameterPaths("OPENCV_CORE_PLUGIN_PATH", std::vector()); + if (paths_.size() != 0) + { + for (size_t i = 0; i < paths_.size(); i++) + { + paths.push_back(toFileSystemPath(paths_[i])); + } + } + else + { + FileSystemPath_t binaryLocation; + if (getBinLocation(binaryLocation)) + { + binaryLocation = getParent(binaryLocation); +#ifndef CV_CORE_PARALLEL_PLUGIN_SUBDIRECTORY + paths.push_back(binaryLocation); +#else + paths.push_back(binaryLocation + toFileSystemPath("/") + toFileSystemPath(CV_CORE_PARALLEL_PLUGIN_SUBDIRECTORY_STR)); +#endif + } + } + const std::string default_expr = libraryPrefix() + "opencv_core_parallel_" + baseName_l + "*" + librarySuffix(); + const std::string plugin_expr = getConfigurationParameterString((std::string("OPENCV_CORE_PARALLEL_PLUGIN_") + baseName_u).c_str(), default_expr.c_str()); + std::vector results; +#ifdef _WIN32 + FileSystemPath_t moduleName = toFileSystemPath(libraryPrefix() + "opencv_core_parallel_" + baseName_l + librarySuffix()); + if (plugin_expr != default_expr) + { + moduleName = toFileSystemPath(plugin_expr); + results.push_back(moduleName); + } + for (const FileSystemPath_t& path : paths) + { + results.push_back(path + L"\\" + moduleName); + } + results.push_back(moduleName); +#else + CV_LOG_DEBUG(NULL, "core(parallel): " << baseName << " plugin's glob is '" << plugin_expr << "', " << paths.size() << " location(s)"); + for (const std::string& path : paths) + { + if (path.empty()) + continue; + std::vector candidates; + cv::glob(utils::fs::join(path, plugin_expr), candidates); + CV_LOG_DEBUG(NULL, " - " << path << ": " << candidates.size()); + copy(candidates.begin(), candidates.end(), back_inserter(results)); + } +#endif + CV_LOG_DEBUG(NULL, "Found " << results.size() << " plugin(s) for " << baseName); + return results; +} + +void PluginParallelBackendFactory::loadPlugin() +{ + for (const FileSystemPath_t& plugin : getPluginCandidates(baseName_)) + { + auto lib = std::make_shared(plugin); + if (!lib->isLoaded()) + { + continue; + } + try + { + auto pluginBackend = std::make_shared(lib); + if (!pluginBackend) + { + continue; + } + if (pluginBackend->plugin_api_ == NULL) + { + CV_LOG_ERROR(NULL, "core(parallel): no compatible plugin API for backend: " << baseName_ << " in " << toPrintablePath(plugin)); + continue; + } +#if !defined(_WIN32) + // NB: we are going to use parallel backend, so prevent automatic library unloading + // (avoid uncontrolled crashes in worker threads of underlying libraries: libgomp, libtbb) + // details: https://github.com/opencv/opencv/pull/19470#pullrequestreview-589834777 + lib->disableAutomaticLibraryUnloading(); +#endif + backend = pluginBackend; + return; + } + catch (...) + { + CV_LOG_WARNING(NULL, "core(parallel): exception during plugin initialization: " << toPrintablePath(plugin) << ". SKIP"); + } + } +} + +#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT && defined(PARALLEL_ENABLE_PLUGINS) + +} // namespace + +namespace parallel { + +std::shared_ptr createPluginParallelBackendFactory(const std::string& baseName) +{ +#if OPENCV_HAVE_FILESYSTEM_SUPPORT && defined(PARALLEL_ENABLE_PLUGINS) + return std::make_shared(baseName); +#else + CV_UNUSED(baseName); + return std::shared_ptr(); +#endif +} + +}} // namespace diff --git a/modules/core/src/parallel/registry_parallel.hpp b/modules/core/src/parallel/registry_parallel.hpp new file mode 100644 index 0000000000..97464f278f --- /dev/null +++ b/modules/core/src/parallel/registry_parallel.hpp @@ -0,0 +1,25 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CORE_PARALLEL_REGISTRY_HPP +#define OPENCV_CORE_PARALLEL_REGISTRY_HPP + +#include "factory_parallel.hpp" + +namespace cv { namespace parallel { + +struct ParallelBackendInfo +{ + int priority; // 1000- - default builtin priority + // 0 - disabled (OPENCV_PARALLEL_PRIORITY_ = 0) + // >10000 - prioritized list (OPENCV_PARALLEL_PRIORITY_LIST) + std::string name; + std::shared_ptr backendFactory; +}; + +const std::vector& getParallelBackendsInfo(); + +}} // namespace + +#endif // OPENCV_CORE_PARALLEL_REGISTRY_HPP diff --git a/modules/core/src/parallel/registry_parallel.impl.hpp b/modules/core/src/parallel/registry_parallel.impl.hpp new file mode 100644 index 0000000000..c8b57e7d6c --- /dev/null +++ b/modules/core/src/parallel/registry_parallel.impl.hpp @@ -0,0 +1,173 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// +// Not a standalone header, part of parallel.cpp +// + +#include "opencv2/core/utils/filesystem.private.hpp" // OPENCV_HAVE_FILESYSTEM_SUPPORT + +namespace cv { namespace parallel { + +#if OPENCV_HAVE_FILESYSTEM_SUPPORT && defined(PARALLEL_ENABLE_PLUGINS) +#define DECLARE_DYNAMIC_BACKEND(name) \ +ParallelBackendInfo { \ + 1000, name, createPluginParallelBackendFactory(name) \ +}, +#else +#define DECLARE_DYNAMIC_BACKEND(name) /* nothing */ +#endif + +#define DECLARE_STATIC_BACKEND(name, createBackendAPI) \ +ParallelBackendInfo { \ + 1000, name, std::make_shared([=] () -> std::shared_ptr { return createBackendAPI(); }) \ +}, + +static +std::vector& getBuiltinParallelBackendsInfo() +{ + static std::vector g_backends + { +#ifdef HAVE_TBB + DECLARE_STATIC_BACKEND("TBB", createParallelBackendTBB) +#elif defined(PARALLEL_ENABLE_PLUGINS) + DECLARE_DYNAMIC_BACKEND("ONETBB") // dedicated oneTBB plugin (interface >= 12000, binary incompatibe with TBB 2017-2020) + DECLARE_DYNAMIC_BACKEND("TBB") // generic TBB plugins +#endif + +#ifdef HAVE_OPENMP + DECLARE_STATIC_BACKEND("OPENMP", createParallelBackendOpenMP) +#elif defined(PARALLEL_ENABLE_PLUGINS) + DECLARE_DYNAMIC_BACKEND("OPENMP") // TODO Intel OpenMP? +#endif + }; + return g_backends; +}; + +static +bool sortByPriority(const ParallelBackendInfo &lhs, const ParallelBackendInfo &rhs) +{ + return lhs.priority > rhs.priority; +} + +/** @brief Manages list of enabled backends + */ +class ParallelBackendRegistry +{ +protected: + std::vector enabledBackends; + ParallelBackendRegistry() + { + enabledBackends = getBuiltinParallelBackendsInfo(); + int N = (int)enabledBackends.size(); + for (int i = 0; i < N; i++) + { + ParallelBackendInfo& info = enabledBackends[i]; + info.priority = 1000 - i * 10; + } + CV_LOG_DEBUG(NULL, "core(parallel): Builtin backends(" << N << "): " << dumpBackends()); + if (readPrioritySettings()) + { + CV_LOG_INFO(NULL, "core(parallel): Updated backends priorities: " << dumpBackends()); + N = (int)enabledBackends.size(); + } + int enabled = 0; + for (int i = 0; i < N; i++) + { + ParallelBackendInfo& info = enabledBackends[enabled]; + if (enabled != i) + info = enabledBackends[i]; + size_t param_priority = utils::getConfigurationParameterSizeT(cv::format("OPENCV_PARALLEL_PRIORITY_%s", info.name.c_str()).c_str(), (size_t)info.priority); + CV_Assert(param_priority == (size_t)(int)param_priority); // overflow check + if (param_priority > 0) + { + info.priority = (int)param_priority; + enabled++; + } + else + { + CV_LOG_INFO(NULL, "core(parallel): Disable backend: " << info.name); + } + } + enabledBackends.resize(enabled); + CV_LOG_DEBUG(NULL, "core(parallel): Available backends(" << enabled << "): " << dumpBackends()); + std::sort(enabledBackends.begin(), enabledBackends.end(), sortByPriority); + CV_LOG_INFO(NULL, "core(parallel): Enabled backends(" << enabled << ", sorted by priority): " << (enabledBackends.empty() ? std::string("N/A") : dumpBackends())); + } + + static std::vector tokenize_string(const std::string& input, char token) + { + std::vector result; + std::string::size_type prev_pos = 0, pos = 0; + while((pos = input.find(token, pos)) != std::string::npos) + { + result.push_back(input.substr(prev_pos, pos-prev_pos)); + prev_pos = ++pos; + } + result.push_back(input.substr(prev_pos)); + return result; + } + bool readPrioritySettings() + { + bool hasChanges = false; + cv::String prioritized_backends = utils::getConfigurationParameterString("OPENCV_PARALLEL_PRIORITY_LIST", NULL); + if (prioritized_backends.empty()) + return hasChanges; + CV_LOG_INFO(NULL, "core(parallel): Configured priority list (OPENCV_PARALLEL_PRIORITY_LIST): " << prioritized_backends); + const std::vector names = tokenize_string(prioritized_backends, ','); + for (size_t i = 0; i < names.size(); i++) + { + const std::string& name = names[i]; + int priority = (int)(100000 + (names.size() - i) * 1000); + bool found = false; + for (size_t k = 0; k < enabledBackends.size(); k++) + { + ParallelBackendInfo& info = enabledBackends[k]; + if (name == info.name) + { + info.priority = priority; + CV_LOG_DEBUG(NULL, "core(parallel): New backend priority: '" << name << "' => " << info.priority); + found = true; + hasChanges = true; + break; + } + } + if (!found) + { + CV_LOG_INFO(NULL, "core(parallel): Adding parallel backend (plugin): '" << name << "'"); + enabledBackends.push_back(ParallelBackendInfo{priority, name, createPluginParallelBackendFactory(name)}); + hasChanges = true; + } + } + return hasChanges; + } +public: + std::string dumpBackends() const + { + std::ostringstream os; + for (size_t i = 0; i < enabledBackends.size(); i++) + { + if (i > 0) os << "; "; + const ParallelBackendInfo& info = enabledBackends[i]; + os << info.name << '(' << info.priority << ')'; + } + return os.str(); + } + + static ParallelBackendRegistry& getInstance() + { + static ParallelBackendRegistry g_instance; + return g_instance; + } + + inline const std::vector& getEnabledBackends() const { return enabledBackends; } +}; + + +const std::vector& getParallelBackendsInfo() +{ + return cv::parallel::ParallelBackendRegistry::getInstance().getEnabledBackends(); +} + +}} // namespace diff --git a/modules/core/src/parallel_impl.cpp b/modules/core/src/parallel_impl.cpp index 09579a3b14..25bf4adce8 100644 --- a/modules/core/src/parallel_impl.cpp +++ b/modules/core/src/parallel_impl.cpp @@ -356,6 +356,16 @@ public: }; +// Disable thread sanitization check when CV_USE_GLOBAL_WORKERS_COND_VAR is not +// set because it triggers as the main thread reads isActive while the children +// thread writes it (but it all works out because a mutex is locked in the main +// thread and isActive re-checked). +// This is to solve issue #19463. +#if !defined(CV_USE_GLOBAL_WORKERS_COND_VAR) && defined(__clang__) && defined(__has_feature) +#if __has_feature(thread_sanitizer) +__attribute__((no_sanitize("thread"))) +#endif +#endif void WorkerThread::thread_body() { (void)cv::utils::getThreadID(); // notify OpenCV about new thread diff --git a/modules/core/src/precomp.hpp b/modules/core/src/precomp.hpp index 21e281c007..5a0a7637c2 100644 --- a/modules/core/src/precomp.hpp +++ b/modules/core/src/precomp.hpp @@ -43,6 +43,10 @@ #ifndef __OPENCV_PRECOMP_H__ #define __OPENCV_PRECOMP_H__ +#ifdef BUILD_PLUGIN +#include "opencv2/core/utility.hpp" +#else // BUILD_PLUGIN + #include "opencv2/opencv_modules.hpp" #include "cvconfig.h" @@ -375,4 +379,5 @@ int cv_snprintf(char* buf, int len, const char* fmt, ...); int cv_vsnprintf(char* buf, int len, const char* fmt, va_list args); } -#endif /*_CXCORE_INTERNAL_H_*/ +#endif // BUILD_PLUGIN +#endif // __OPENCV_PRECOMP_H__ diff --git a/modules/core/src/rand.cpp b/modules/core/src/rand.cpp index 5d6dfb084a..0647c95486 100644 --- a/modules/core/src/rand.cpp +++ b/modules/core/src/rand.cpp @@ -750,6 +750,9 @@ void cv::randShuffle( InputOutputArray _dst, double iterFactor, RNG* _rng ) func( dst, rng, iterFactor ); } + +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL void cvRandArr( CvRNG* _rng, CvArr* arr, int disttype, CvScalar param1, CvScalar param2 ) { @@ -767,6 +770,9 @@ CV_IMPL void cvRandShuffle( CvArr* arr, CvRNG* _rng, double iter_factor ) cv::randShuffle( dst, iter_factor, &rng ); } +#endif // OPENCV_EXCLUDE_C_API + + // Mersenne Twister random number generator. // Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c diff --git a/modules/core/src/stat_c.cpp b/modules/core/src/stat_c.cpp index d7355b9f94..8b6f0f09e4 100644 --- a/modules/core/src/stat_c.cpp +++ b/modules/core/src/stat_c.cpp @@ -5,6 +5,8 @@ #include "precomp.hpp" +#ifndef OPENCV_EXCLUDE_C_API + CV_IMPL CvScalar cvSum( const CvArr* srcarr ) { cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1)); @@ -117,3 +119,5 @@ cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr ) return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask); } + +#endif // OPENCV_EXCLUDE_C_API diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index e0fdde33e8..97a2a289c7 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -128,11 +128,14 @@ void* allocSingletonNewBuffer(size_t size) { return malloc(size); } #endif -#if CV_VSX && defined __linux__ +#if (defined __ppc64__ || defined __PPC64__) && defined __linux__ # include "sys/auxv.h" # ifndef AT_HWCAP2 # define AT_HWCAP2 26 # endif +# ifndef PPC_FEATURE2_ARCH_2_07 +# define PPC_FEATURE2_ARCH_2_07 0x80000000 +# endif # ifndef PPC_FEATURE2_ARCH_3_00 # define PPC_FEATURE2_ARCH_3_00 0x00800000 # endif @@ -345,7 +348,6 @@ struct HWFeatures HWFeatures(bool run_initialize = false) { - memset( have, 0, sizeof(have[0]) * MAX_FEATURE ); if (run_initialize) initialize(); } @@ -589,14 +591,25 @@ struct HWFeatures #ifdef __mips_msa have[CV_CPU_MSA] = true; #endif - // there's no need to check VSX availability in runtime since it's always available on ppc64le CPUs - have[CV_CPU_VSX] = (CV_VSX); - // TODO: Check VSX3 availability in runtime for other platforms - #if CV_VSX && defined __linux__ - uint64 hwcap2 = getauxval(AT_HWCAP2); - have[CV_CPU_VSX3] = (hwcap2 & PPC_FEATURE2_ARCH_3_00); + + #if (defined __ppc64__ || defined __PPC64__) && defined __linux__ + unsigned int hwcap = getauxval(AT_HWCAP); + if (hwcap & PPC_FEATURE_HAS_VSX) { + hwcap = getauxval(AT_HWCAP2); + if (hwcap & PPC_FEATURE2_ARCH_3_00) { + have[CV_CPU_VSX] = have[CV_CPU_VSX3] = true; + } else { + have[CV_CPU_VSX] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0; + } + } #else - have[CV_CPU_VSX3] = (CV_VSX3); + // TODO: AIX, FreeBSD + #if CV_VSX || defined _ARCH_PWR8 || defined __POWER9_VECTOR__ + have[CV_CPU_VSX] = true; + #endif + #if CV_VSX3 || defined __POWER9_VECTOR__ + have[CV_CPU_VSX3] = true; + #endif #endif #if defined __riscv && defined __riscv_vector @@ -730,7 +743,7 @@ struct HWFeatures } } - bool have[MAX_FEATURE+1]; + bool have[MAX_FEATURE+1]{}; }; static HWFeatures featuresEnabled(true), featuresDisabled = HWFeatures(false); @@ -1810,7 +1823,7 @@ class ParseError { std::string bad_value; public: - ParseError(const std::string bad_value_) :bad_value(bad_value_) {} + ParseError(const std::string &bad_value_) :bad_value(bad_value_) {} std::string toString(const std::string ¶m) const { std::ostringstream out; @@ -2313,6 +2326,13 @@ public: ippTopFeatures = ippCPUID_SSE42; pIppLibInfo = ippiGetLibVersion(); + + // workaround: https://github.com/opencv/opencv/issues/12959 + std::string ippName(pIppLibInfo->Name ? pIppLibInfo->Name : ""); + if (ippName.find("SSE4.2") != std::string::npos) + { + ippTopFeatures = ippCPUID_SSE42; + } } public: @@ -2344,16 +2364,12 @@ unsigned long long getIppFeatures() #endif } -unsigned long long getIppTopFeatures(); - +#ifdef HAVE_IPP unsigned long long getIppTopFeatures() { -#ifdef HAVE_IPP return getIPPSingleton().ippTopFeatures; -#else - return 0; -#endif } +#endif void setIppStatus(int status, const char * const _funcname, const char * const _filename, int _line) { diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index 0ec6270a70..09ba92ecde 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -230,7 +230,7 @@ UMatDataAutoLock::~UMatDataAutoLock() //////////////////////////////// UMat //////////////////////////////// -UMat::UMat(UMatUsageFlags _usageFlags) +UMat::UMat(UMatUsageFlags _usageFlags) CV_NOEXCEPT : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(_usageFlags), u(0), offset(0), size(&rows) {} @@ -1318,88 +1318,6 @@ UMat UMat::t() const return m; } -UMat UMat::inv(int method) const -{ - UMat m; - invert(*this, m, method); - return m; -} - -UMat UMat::mul(InputArray m, double scale) const -{ - UMat dst; - multiply(*this, m, dst, scale); - return dst; -} - -#ifdef HAVE_OPENCL - -static bool ocl_dot( InputArray _src1, InputArray _src2, double & res ) -{ - UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1); - - int type = src1.type(), depth = CV_MAT_DEPTH(type), - kercn = ocl::predictOptimalVectorWidth(src1, src2); - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; - - if ( !doubleSupport && depth == CV_64F ) - return false; - - int dbsize = ocl::Device::getDefault().maxComputeUnits(); - size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); - int ddepth = std::max(CV_32F, depth); - - int wgs2_aligned = 1; - while (wgs2_aligned < (int)wgs) - wgs2_aligned <<= 1; - wgs2_aligned >>= 1; - - char cvt[40]; - ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, - format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT " - "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d", - ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth), - ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), - ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt), - (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", - _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "", - _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn)); - if (k.empty()) - return false; - - UMat db(1, dbsize, ddepth); - - ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), - src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), - dbarg = ocl::KernelArg::PtrWriteOnly(db); - - k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg); - - size_t globalsize = dbsize * wgs; - if (k.run(1, &globalsize, &wgs, true)) - { - res = sum(db.getMat(ACCESS_READ))[0]; - return true; - } - return false; -} - -#endif - -double UMat::dot(InputArray m) const -{ - CV_INSTRUMENT_REGION(); - - CV_Assert(m.sameSize(*this) && m.type() == type()); - -#ifdef HAVE_OPENCL - double r = 0; - CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r) -#endif - - return getMat(ACCESS_READ).dot(m); -} - UMat UMat::zeros(int rows, int cols, int type) { return UMat(rows, cols, type, Scalar::all(0)); @@ -1430,18 +1348,6 @@ UMat UMat::ones(int ndims, const int* sz, int type) return UMat(ndims, sz, type, Scalar(1)); } -UMat UMat::eye(int rows, int cols, int type) -{ - return UMat::eye(Size(cols, rows), type); -} - -UMat UMat::eye(Size size, int type) -{ - UMat m(size, type); - setIdentity(m); - return m; -} - } /* End of file. */ diff --git a/modules/core/src/utils/filesystem.cpp b/modules/core/src/utils/filesystem.cpp index e75640b86b..17004b27dd 100644 --- a/modules/core/src/utils/filesystem.cpp +++ b/modules/core/src/utils/filesystem.cpp @@ -587,3 +587,8 @@ cv::String getCacheDirectory(const char* /*sub_directory_name*/, const char* /*c #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT }}} // namespace + + +#if OPENCV_HAVE_FILESYSTEM_SUPPORT +#include "plugin_loader.impl.hpp" +#endif diff --git a/modules/core/src/utils/plugin_loader.impl.hpp b/modules/core/src/utils/plugin_loader.impl.hpp new file mode 100644 index 0000000000..4173c9d802 --- /dev/null +++ b/modules/core/src/utils/plugin_loader.impl.hpp @@ -0,0 +1,80 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// +// Not a standalone header, part of filesystem.cpp +// + +#include "opencv2/core/utils/plugin_loader.private.hpp" + +#if !OPENCV_HAVE_FILESYSTEM_SUPPORT +#error "Invalid build configuration" +#endif + +#if 0 // TODO +#ifdef NDEBUG +#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1 +#else +#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1 +#endif +#include +#endif + +namespace cv { namespace plugin { namespace impl { + +DynamicLib::DynamicLib(const FileSystemPath_t& filename) + : handle(0), fname(filename), disableAutoUnloading_(false) +{ + libraryLoad(filename); +} + +DynamicLib::~DynamicLib() +{ + if (!disableAutoUnloading_) + { + libraryRelease(); + } + else if (handle) + { + CV_LOG_INFO(NULL, "skip auto unloading (disabled): " << toPrintablePath(fname)); + handle = 0; + } +} + +void* DynamicLib::getSymbol(const char* symbolName) const +{ + if (!handle) + { + return 0; + } + void* res = getSymbol_(handle, symbolName); + if (!res) + { + CV_LOG_DEBUG(NULL, "No symbol '" << symbolName << "' in " << toPrintablePath(fname)); + } + return res; +} + +const std::string DynamicLib::getName() const +{ + return toPrintablePath(fname); +} + +void DynamicLib::libraryLoad(const FileSystemPath_t& filename) +{ + handle = libraryLoad_(filename); + CV_LOG_INFO(NULL, "load " << toPrintablePath(filename) << " => " << (handle ? "OK" : "FAILED")); +} + +void DynamicLib::libraryRelease() +{ + if (handle) + { + CV_LOG_INFO(NULL, "unload "<< toPrintablePath(fname)); + libraryRelease_(handle); + handle = 0; + } +} + +}}} // namespace diff --git a/modules/core/src/va_intel.cpp b/modules/core/src/va_intel.cpp index c640a08658..1d2b1cbf32 100644 --- a/modules/core/src/va_intel.cpp +++ b/modules/core/src/va_intel.cpp @@ -33,6 +33,17 @@ using namespace cv; #endif #endif +#ifdef HAVE_VA +#ifndef OPENCV_LIBVA_LINK +#include "va_wrapper.impl.hpp" +#else +namespace cv { namespace detail { +static void init_libva() { /* nothing */ } +}} // namespace +#endif +using namespace cv::detail; +#endif + namespace cv { namespace va_intel { #ifdef HAVE_VA_INTEL @@ -54,6 +65,8 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop) #if !defined(HAVE_VA) NO_VA_SUPPORT_ERROR; #else // !HAVE_VA + init_libva(); + # ifdef HAVE_VA_INTEL contextInitialized = false; if (tryInterop) @@ -176,7 +189,7 @@ static bool ocl_convert_nv12_to_bgr(cl_mem clImageY, cl_mem clImageUV, cl_mem cl k.args(clImageY, clImageUV, clBuffer, step, cols, rows); - size_t globalsize[] = { (size_t)cols, (size_t)rows }; + size_t globalsize[] = { (size_t)cols/2, (size_t)rows/2 }; return k.run(2, globalsize, 0, false); } @@ -189,7 +202,7 @@ static bool ocl_convert_bgr_to_nv12(cl_mem clBuffer, int step, int cols, int row k.args(clBuffer, step, cols, rows, clImageY, clImageUV); - size_t globalsize[] = { (size_t)cols, (size_t)rows }; + size_t globalsize[] = { (size_t)cols/2, (size_t)rows/2 }; return k.run(2, globalsize, 0, false); } #endif // HAVE_VA_INTEL @@ -507,6 +520,8 @@ void convertToVASurface(VADisplay display, InputArray src, VASurfaceID surface, #if !defined(HAVE_VA) NO_VA_SUPPORT_ERROR; #else // !HAVE_VA + init_libva(); + const int stype = CV_8UC3; int srcType = src.type(); @@ -611,6 +626,8 @@ void convertFromVASurface(VADisplay display, VASurfaceID surface, Size size, Out #if !defined(HAVE_VA) NO_VA_SUPPORT_ERROR; #else // !HAVE_VA + init_libva(); + const int dtype = CV_8UC3; // TODO Need to specify ACCESS_WRITE here somehow to prevent useless data copying! diff --git a/modules/core/src/va_wrapper.impl.hpp b/modules/core/src/va_wrapper.impl.hpp new file mode 100644 index 0000000000..260d3ba49b --- /dev/null +++ b/modules/core/src/va_wrapper.impl.hpp @@ -0,0 +1,85 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// +// Not a standalone header, part of va_intel.cpp +// + +#include "opencv2/core/utils/plugin_loader.private.hpp" // DynamicLib + +namespace cv { namespace detail { + +typedef VAStatus (*FN_vaDeriveImage)(VADisplay dpy, VASurfaceID surface, VAImage *image); +typedef VAStatus (*FN_vaDestroyImage)(VADisplay dpy, VAImageID image); +typedef VAStatus (*FN_vaMapBuffer)(VADisplay dpy, VABufferID buf_id, void **pbuf); +typedef VAStatus (*FN_vaSyncSurface)(VADisplay dpy, VASurfaceID render_target); +typedef VAStatus (*FN_vaUnmapBuffer)(VADisplay dpy, VABufferID buf_id); + +static FN_vaDeriveImage fn_vaDeriveImage = NULL; +static FN_vaDestroyImage fn_vaDestroyImage = NULL; +static FN_vaMapBuffer fn_vaMapBuffer = NULL; +static FN_vaSyncSurface fn_vaSyncSurface = NULL; +static FN_vaUnmapBuffer fn_vaUnmapBuffer = NULL; + +#define vaDeriveImage fn_vaDeriveImage +#define vaDestroyImage fn_vaDestroyImage +#define vaMapBuffer fn_vaMapBuffer +#define vaSyncSurface fn_vaSyncSurface +#define vaUnmapBuffer fn_vaUnmapBuffer + + +static std::shared_ptr loadLibVA() +{ + std::shared_ptr lib; + const char* envPath = getenv("OPENCV_LIBVA_RUNTIME"); + if (envPath) + { + lib = std::make_shared(envPath); + return lib; + } + static const char* const candidates[] = { + "libva.so", + "libva.so.2", + "libva.so.1", + }; + for (int i = 0; i < 3; ++i) + { + lib = std::make_shared(candidates[i]); + if (lib->isLoaded()) + break; + } + return lib; +} +static void init_libva() +{ + static bool initialized = false; + static auto library = loadLibVA(); + if (!initialized) + { + if (!library || !library->isLoaded()) + { + library.reset(); + CV_Error(cv::Error::StsBadFunc, "OpenCV can't load VA library (libva)"); + } + auto& lib = *library.get(); +#define VA_LOAD_SYMBOL(name) fn_ ## name = reinterpret_cast(lib.getSymbol(#name)); \ + if (!fn_ ## name) \ + { \ + library.reset(); \ + initialized = true; \ + CV_Error_(cv::Error::StsBadFunc, ("OpenCV can't load VA library (libva), missing symbol: %s", #name)); \ + } + + VA_LOAD_SYMBOL(vaDeriveImage); + VA_LOAD_SYMBOL(vaDestroyImage); + VA_LOAD_SYMBOL(vaMapBuffer); + VA_LOAD_SYMBOL(vaSyncSurface); + VA_LOAD_SYMBOL(vaUnmapBuffer); + initialized = true; + } + if (!library) + CV_Error(cv::Error::StsBadFunc, "OpenCV can't load/initialize VA library (libva)"); +} + +}} // namespace diff --git a/modules/core/test/ocl/test_opencl.cpp b/modules/core/test/ocl/test_opencl.cpp index 27cd82d424..e639f72948 100644 --- a/modules/core/test/ocl/test_opencl.cpp +++ b/modules/core/test/ocl/test_opencl.cpp @@ -120,6 +120,11 @@ TEST(OpenCL, support_SPIR_programs) cv::ocl::ProgramSource src = cv::ocl::ProgramSource::fromSPIR(module_name, "simple_spir", (uchar*)&program_binary_code[0], program_binary_code.size(), ""); cv::String errmsg; cv::ocl::Program program(src, "", errmsg); + if (program.ptr() == NULL && device.isAMD()) + { + // https://community.amd.com/t5/opencl/spir-support-in-new-drivers-lost/td-p/170165 + throw cvtest::SkipTestException("Bypass AMD OpenCL runtime bug: 'cl_khr_spir' extension is declared, but it doesn't really work"); + } ASSERT_TRUE(program.ptr() != NULL); k.create("test_kernel", program); } @@ -127,4 +132,120 @@ TEST(OpenCL, support_SPIR_programs) testOpenCLKernel(k); } +TEST(OpenCL, move_construct_assign) +{ + cv::ocl::Context ctx1 = cv::ocl::Context::getDefault(); + if (!ctx1.ptr()) + { + throw cvtest::SkipTestException("OpenCL is not available"); + } + void* const ctx_ptr = ctx1.ptr(); + cv::ocl::Context ctx2(std::move(ctx1)); + ASSERT_EQ(ctx1.ptr(), nullptr); + ASSERT_EQ(ctx2.ptr(), ctx_ptr); + cv::ocl::Context ctx3 = std::move(ctx2); + ASSERT_EQ(ctx2.ptr(), nullptr); + ASSERT_EQ(ctx3.ptr(), ctx_ptr); + + cv::ocl::Platform pl1 = cv::ocl::Platform::getDefault(); + void* const pl_ptr = pl1.ptr(); + cv::ocl::Platform pl2(std::move(pl1)); + ASSERT_EQ(pl1.ptr(), nullptr); + ASSERT_EQ(pl2.ptr(), pl_ptr); + cv::ocl::Platform pl3 = std::move(pl2); + ASSERT_EQ(pl2.ptr(), nullptr); + ASSERT_EQ(pl3.ptr(), pl_ptr); + + std::vector platformInfos; + cv::ocl::getPlatfomsInfo(platformInfos); + const cv::String pi_name = platformInfos[0].name(); + cv::ocl::PlatformInfo pinfo2(std::move(platformInfos[0])); + ASSERT_EQ(platformInfos[0].name(), cv::String()); + ASSERT_EQ(pinfo2.name(), pi_name); + cv::ocl::PlatformInfo pinfo3 = std::move(pinfo2); + ASSERT_EQ(pinfo2.name(), cv::String()); + ASSERT_EQ(pinfo3.name(), pi_name); + + cv::ocl::Queue q1 = cv::ocl::Queue::getDefault(); + void* const q_ptr = q1.ptr(); + cv::ocl::Queue q2(std::move(q1)); + ASSERT_EQ(q1.ptr(), nullptr); + ASSERT_EQ(q2.ptr(), q_ptr); + cv::ocl::Queue q3 = std::move(q2); + ASSERT_EQ(q2.ptr(), nullptr); + ASSERT_EQ(q3.ptr(), q_ptr); + + cv::ocl::Device d1 = cv::ocl::Device::getDefault(); + if (!d1.compilerAvailable()) + { + throw cvtest::SkipTestException("OpenCL compiler is not available"); + } + void* const d_ptr = d1.ptr(); + cv::ocl::Device d2(std::move(d1)); + ASSERT_EQ(d1.ptr(), nullptr); + ASSERT_EQ(d2.ptr(), d_ptr); + cv::ocl::Device d3 = std::move(d2); + ASSERT_EQ(d2.ptr(), nullptr); + ASSERT_EQ(d3.ptr(), d_ptr); + + if (d3.imageSupport()) { + cv::UMat umat1 = cv::UMat::ones(640, 480, CV_32FC1); + cv::ocl::Image2D img1(umat1); + void *const img_ptr = img1.ptr(); + cv::ocl::Image2D img2(std::move(img1)); + ASSERT_EQ(img1.ptr(), nullptr); + ASSERT_EQ(img2.ptr(), img_ptr); + cv::ocl::Image2D img3 = std::move(img2); + ASSERT_EQ(img2.ptr(), nullptr); + ASSERT_EQ(img3.ptr(), img_ptr); + } + + static const char* opencl_kernel_src = +"__kernel void test_kernel(__global const uchar* src, int src_step, int src_offset,\n" +" __global uchar* dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,\n" +" int c)\n" +"{\n" +" int x = get_global_id(0);\n" +" int y = get_global_id(1);\n" +" if (x < dst_cols && y < dst_rows)\n" +" {\n" +" int src_idx = y * src_step + x + src_offset;\n" +" int dst_idx = y * dst_step + x + dst_offset;\n" +" dst[dst_idx] = src[src_idx] + c;\n" +" }\n" +"}\n"; + cv::String module_name; // empty to disable OpenCL cache + cv::ocl::ProgramSource ps1(module_name, "move_construct_assign", opencl_kernel_src, ""); + cv::ocl::ProgramSource::Impl* const ps_ptr = ps1.getImpl(); + cv::ocl::ProgramSource ps2(std::move(ps1)); + ASSERT_EQ(ps1.getImpl(), nullptr); + ASSERT_EQ(ps2.getImpl(), ps_ptr); + cv::ocl::ProgramSource ps3 = std::move(ps2); + ASSERT_EQ(ps2.getImpl(), nullptr); + ASSERT_EQ(ps3.getImpl(), ps_ptr); + + cv::String errmsg; + cv::ocl::Program prog1(ps3, "", errmsg); + void* const prog_ptr = prog1.ptr(); + ASSERT_NE(prog_ptr, nullptr); + cv::ocl::Program prog2(std::move(prog1)); + ASSERT_EQ(prog1.ptr(), nullptr); + ASSERT_EQ(prog2.ptr(), prog_ptr); + cv::ocl::Program prog3 = std::move(prog2); + ASSERT_EQ(prog2.ptr(), nullptr); + ASSERT_EQ(prog3.ptr(), prog_ptr); + + cv::ocl::Kernel k1("test_kernel", prog3); + void* const k_ptr = k1.ptr(); + ASSERT_NE(k_ptr, nullptr); + cv::ocl::Kernel k2(std::move(k1)); + ASSERT_EQ(k1.ptr(), nullptr); + ASSERT_EQ(k2.ptr(), k_ptr); + cv::ocl::Kernel k3 = std::move(k2); + ASSERT_EQ(k2.ptr(), nullptr); + ASSERT_EQ(k3.ptr(), k_ptr); + + testOpenCLKernel(k3); +} + }} // namespace diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index 75a7004f81..effb0e68e0 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -2456,4 +2456,16 @@ TEST(Core_MinMaxIdx, rows_overflow) } +TEST(Core_Magnitude, regression_19506) +{ + for (int N = 1; N <= 64; ++N) + { + Mat a(1, N, CV_32FC1, Scalar::all(1e-20)); + Mat res; + magnitude(a, a, res); + EXPECT_LE(cvtest::norm(res, NORM_L1), 1e-15) << N; + } +} + + }} // namespace diff --git a/modules/core/test/test_cuda.cpp b/modules/core/test/test_cuda.cpp new file mode 100755 index 0000000000..a3e0a9034b --- /dev/null +++ b/modules/core/test/test_cuda.cpp @@ -0,0 +1,21 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#if defined(HAVE_CUDA) + +#include "test_precomp.hpp" +#include +#include "opencv2/core/cuda.hpp" + +namespace opencv_test { namespace { + +TEST(CUDA_Stream, construct_cudaFlags) +{ + cv::cuda::Stream stream(cudaStreamNonBlocking); + EXPECT_NE(stream.cudaPtr(), nullptr); +} + +}} // namespace + +#endif diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 84da496b42..269ebe0f2a 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -1466,7 +1466,7 @@ template struct TheTest R r1 = vx_load_expand((const cv::float16_t*)data.a.d); R r2(r1); EXPECT_EQ(1.0f, r1.get0()); - vx_store(data_f32.a.d, r2); + v_store(data_f32.a.d, r2); EXPECT_EQ(-2.0f, data_f32.a.d[R::nlanes - 1]); out.a.clear(); diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp index 74ee167c54..9b6145d733 100644 --- a/modules/core/test/test_mat.cpp +++ b/modules/core/test/test_mat.cpp @@ -1988,7 +1988,6 @@ class TestInputArrayRangeChecking { C(EXPR); C(MATX); C(STD_VECTOR); - C(STD_ARRAY); C(NONE); C(STD_VECTOR_VECTOR); C(STD_BOOL_VECTOR); diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp index 3934ceb716..67d0a53995 100644 --- a/modules/core/test/test_misc.cpp +++ b/modules/core/test/test_misc.cpp @@ -2,6 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" +#include namespace opencv_test { namespace { @@ -189,7 +190,7 @@ TEST(Core_OutputArrayCreate, _13772) TEST(Core_String, find_last_of__with__empty_string) { cv::String s; - size_t p = s.find_last_of("q", 0); + size_t p = s.find_last_of('q', 0); // npos is not exported: EXPECT_EQ(cv::String::npos, p); EXPECT_EQ(std::string::npos, p); } @@ -783,5 +784,18 @@ TEST(Core_Check, testSize_1) } } +TEST(Core_Allocation, alignedAllocation) +{ + // iterate from size=1 to approximate byte size of 8K 32bpp image buffer + for (int i = 0; i < 200; i++) { + const size_t size = static_cast(std::pow(1.091, (double)i)); + void * const buf = cv::fastMalloc(size); + ASSERT_NE((uintptr_t)0, (uintptr_t)buf) + << "failed to allocate memory"; + ASSERT_EQ((uintptr_t)0, (uintptr_t)buf % CV_MALLOC_ALIGN) + << "memory not aligned to " << CV_MALLOC_ALIGN; + cv::fastFree(buf); + } +} }} // namespace diff --git a/modules/core/test/test_opencl.cpp b/modules/core/test/test_opencl.cpp index f4f195ea6e..17cd7b5c89 100644 --- a/modules/core/test/test_opencl.cpp +++ b/modules/core/test/test_opencl.cpp @@ -8,6 +8,23 @@ namespace opencv_test { namespace ocl { +static +testing::internal::ParamGenerator getOpenCLTestConfigurations() +{ + if (!cv::ocl::useOpenCL()) + { + return testing::ValuesIn(std::vector()); + } + + std::vector configurations = { + ":GPU:0", + ":GPU:1", + ":CPU:0", + }; + return testing::ValuesIn(configurations); +} + + static void executeUMatCall(bool requireOpenCL = true) { UMat a(100, 100, CV_8UC1, Scalar::all(0)); @@ -45,7 +62,7 @@ TEST(OCL_Context, createFromDevice) EXPECT_TRUE(context.getImpl() == context2.getImpl()) << "Broken cache for OpenCL context (device)"; } -TEST(OCL_OpenCLExecutionContext, basic) +TEST(OCL_OpenCLExecutionContextDefault, basic) { bool useOCL = cv::ocl::useOpenCL(); @@ -72,7 +89,7 @@ TEST(OCL_OpenCLExecutionContext, basic) EXPECT_TRUE(queue.getImpl() == queue2.getImpl()); } -TEST(OCL_OpenCLExecutionContext, createAndBind) +TEST(OCL_OpenCLExecutionContextDefault, createAndBind) { bool useOCL = cv::ocl::useOpenCL(); @@ -106,7 +123,9 @@ TEST(OCL_OpenCLExecutionContext, createAndBind) } } -TEST(OCL_OpenCLExecutionContext, createGPU) +typedef testing::TestWithParam OCL_OpenCLExecutionContext_P; + +TEST_P(OCL_OpenCLExecutionContext_P, multipleBindAndExecute) { bool useOCL = cv::ocl::useOpenCL(); @@ -120,12 +139,11 @@ TEST(OCL_OpenCLExecutionContext, createGPU) ASSERT_FALSE(ctx.empty()); - ocl::Context context = ocl::Context::create(":GPU:1"); + std::string opencl_device = GetParam(); + ocl::Context context = ocl::Context::create(opencl_device); if (context.empty()) { - context = ocl::Context::create(":CPU:"); - if (context.empty()) - throw SkipTestException("OpenCL GPU1/CPU devices are not available"); + throw SkipTestException(std::string("OpenCL device is not available: '") + opencl_device + "'"); } ocl::Device device = context.device(0); @@ -135,8 +153,10 @@ TEST(OCL_OpenCLExecutionContext, createGPU) try { + std::cout << "ctx2..." << std::endl; ctx2.bind(); executeUMatCall(); + std::cout << "ctx..." << std::endl; ctx.bind(); executeUMatCall(); } @@ -147,7 +167,7 @@ TEST(OCL_OpenCLExecutionContext, createGPU) } } -TEST(OCL_OpenCLExecutionContext, ScopeTest) +TEST_P(OCL_OpenCLExecutionContext_P, ScopeTest) { bool useOCL = cv::ocl::useOpenCL(); @@ -161,12 +181,11 @@ TEST(OCL_OpenCLExecutionContext, ScopeTest) ASSERT_FALSE(ctx.empty()); - ocl::Context context = ocl::Context::create(":GPU:1"); + std::string opencl_device = GetParam(); + ocl::Context context = ocl::Context::create(opencl_device); if (context.empty()) { - context = ocl::Context::create(":CPU:"); - if (context.empty()) - context = ctx.getContext(); + throw SkipTestException(std::string("OpenCL device is not available: '") + opencl_device + "'"); } ocl::Device device = context.device(0); @@ -188,4 +207,9 @@ TEST(OCL_OpenCLExecutionContext, ScopeTest) executeUMatCall(); } + + +INSTANTIATE_TEST_CASE_P(/*nothing*/, OCL_OpenCLExecutionContext_P, getOpenCLTestConfigurations()); + + } } // namespace opencv_test::ocl diff --git a/modules/core/test/test_operations.cpp b/modules/core/test/test_operations.cpp index 645045674a..934028f3ae 100644 --- a/modules/core/test/test_operations.cpp +++ b/modules/core/test/test_operations.cpp @@ -1551,4 +1551,14 @@ TEST(Core_MatExpr, empty_check_15760) EXPECT_THROW(Mat c = Mat().cross(Mat()), cv::Exception); } +TEST(Core_Arithm, scalar_handling_19599) // https://github.com/opencv/opencv/issues/19599 (OpenCV 4.x+ only) +{ + Mat a(1, 1, CV_32F, Scalar::all(1)); + Mat b(4, 1, CV_64F, Scalar::all(1)); // MatExpr may convert Scalar to Mat + Mat c; + EXPECT_NO_THROW(cv::multiply(a, b, c)); + EXPECT_EQ(1, c.cols); + EXPECT_EQ(1, c.rows); +} + }} // namespace diff --git a/modules/core/test/test_quaternion.cpp b/modules/core/test/test_quaternion.cpp index 0025674ec7..4e4e89629c 100644 --- a/modules/core/test/test_quaternion.cpp +++ b/modules/core/test/test_quaternion.cpp @@ -3,11 +3,15 @@ // of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" +#include // EXPECT_MAT_NEAR + #include -#include -using namespace cv; +#include + namespace opencv_test{ namespace { -class QuatTest: public ::testing::Test { + +class QuatTest: public ::testing::Test +{ protected: void SetUp() override { @@ -18,7 +22,7 @@ protected: } double scalar = 2.5; double angle = CV_PI; - int qNorm2 = 2; + double qNorm2 = 2; Vec axis{1, 1, 1}; Vec unAxis{0, 0, 0}; Vec unitAxis{1.0 / sqrt(3), 1.0 / sqrt(3), 1.0 / sqrt(3)}; @@ -37,7 +41,8 @@ protected: }; -TEST_F(QuatTest, constructor){ +TEST_F(QuatTest, constructor) +{ Vec coeff{1, 2, 3, 4}; EXPECT_EQ(Quat (coeff), q1); EXPECT_EQ(q3, q3UnitAxis); @@ -78,7 +83,8 @@ TEST_F(QuatTest, constructor){ EXPECT_EQ(Quatd::createFromRvec(Vec3d(0, 0, 0)), qIdentity); } -TEST_F(QuatTest, basicfuns){ +TEST_F(QuatTest, basicfuns) +{ Quat q1Conj{1, -2, -3, -4}; EXPECT_EQ(q3Norm2.normalize(), q3); EXPECT_EQ(q1.norm(), sqrt(30)); @@ -124,7 +130,7 @@ TEST_F(QuatTest, basicfuns){ EXPECT_EQ(exp(qNull), qIdentity); EXPECT_EQ(exp(Quatd(0, angle * unitAxis[0] / 2, angle * unitAxis[1] / 2, angle * unitAxis[2] / 2)), q3); - EXPECT_EQ(power(q3, 2), Quatd::createFromAngleAxis(2*angle, axis)); + EXPECT_EQ(power(q3, 2.0), Quatd::createFromAngleAxis(2*angle, axis)); EXPECT_EQ(power(Quatd(0.5, 0.5, 0.5, 0.5), 2.0, assumeUnit), Quatd(-0.5,0.5,0.5,0.5)); EXPECT_EQ(power(Quatd(0.5, 0.5, 0.5, 0.5), -2.0), Quatd(-0.5,-0.5,-0.5,-0.5)); EXPECT_EQ(sqrt(q1), power(q1, 0.5)); @@ -160,7 +166,8 @@ TEST_F(QuatTest, basicfuns){ EXPECT_EQ(tan(atan(q1)), q1); } -TEST_F(QuatTest, opeartor){ +TEST_F(QuatTest, test_operator) +{ Quatd minusQ{-1, -2, -3, -4}; Quatd qAdd{3.5, 0, 6.5, 8}; Quatd qMinus{-1.5, 4, -0.5, 0}; @@ -171,7 +178,15 @@ TEST_F(QuatTest, opeartor){ EXPECT_EQ(-q1, minusQ); EXPECT_EQ(q1 + q2, qAdd); + EXPECT_EQ(q1 + scalar, Quatd(3.5, 2, 3, 4)); + EXPECT_EQ(scalar + q1, Quatd(3.5, 2, 3, 4)); + EXPECT_EQ(q1 + 2.0, Quatd(3, 2, 3, 4)); + EXPECT_EQ(2.0 + q1, Quatd(3, 2, 3, 4)); EXPECT_EQ(q1 - q2, qMinus); + EXPECT_EQ(q1 - scalar, Quatd(-1.5, 2, 3, 4)); + EXPECT_EQ(scalar - q1, Quatd(1.5, -2, -3, -4)); + EXPECT_EQ(q1 - 2.0, Quatd(-1, 2, 3, 4)); + EXPECT_EQ(2.0 - q1, Quatd(1, -2, -3, -4)); EXPECT_EQ(q1 * q2, qMultq); EXPECT_EQ(q1 * scalar, qMults); EXPECT_EQ(scalar * q1, qMults); @@ -195,7 +210,8 @@ TEST_F(QuatTest, opeartor){ EXPECT_ANY_THROW(q1.at(4)); } -TEST_F(QuatTest, quatAttrs){ +TEST_F(QuatTest, quatAttrs) +{ double angleQ1 = 2 * acos(1.0 / sqrt(30)); Vec3d axis1{0.3713906763541037, 0.557086014, 0.742781352}; Vec q1axis1 = q1.getAxis(); @@ -215,7 +231,8 @@ TEST_F(QuatTest, quatAttrs){ EXPECT_NEAR(axis1[2], axis1[2], 1e-6); } -TEST_F(QuatTest, interpolation){ +TEST_F(QuatTest, interpolation) +{ Quatd qNoRot = Quatd::createFromAngleAxis(0, axis); Quatd qLerpInter(1.0 / 2, sqrt(3) / 6, sqrt(3) / 6, sqrt(3) / 6); EXPECT_EQ(Quatd::lerp(qNoRot, q3, 0), qNoRot); @@ -250,6 +267,226 @@ TEST_F(QuatTest, interpolation){ EXPECT_EQ(Quatd::spline(tr1, tr2, tr3, tr3, 0.5), Quatd(0.336889853392, 0.543600719487, 0.543600719487, 0.543600719487)); } -} // namespace +static const Quatd qEuler[24] = { + Quatd(0.7233214, 0.3919013, 0.2005605, 0.5319728), //INT_XYZ + Quatd(0.8223654, 0.0222635, 0.3604221, 0.4396766), //INT_XZY + Quatd(0.822365, 0.439677, 0.0222635, 0.360422), //INT_YXZ + Quatd(0.723321, 0.531973, 0.391901, 0.20056), //INT_YZX + Quatd(0.723321, 0.20056, 0.531973, 0.391901), //INT_ZXY + Quatd(0.822365, 0.360422, 0.439677, 0.0222635), //INT_ZYX + Quatd(0.653285, 0.65328, 0.369641, -0.0990435), //INT_XYX + Quatd(0.653285, 0.65328, 0.0990435, 0.369641), //INT_XZX + Quatd(0.653285, 0.369641, 0.65328, 0.0990435), //INT_YXY + Quatd(0.653285, -0.0990435, 0.65328, 0.369641), //INT_YZY + Quatd(0.653285, 0.369641, -0.0990435, 0.65328), //INT_ZXZ + Quatd(0.653285, 0.0990435, 0.369641, 0.65328), //INT_ZYZ -}// opencv_test \ No newline at end of file + Quatd(0.822365, 0.0222635, 0.439677, 0.360422), //EXT_XYZ + Quatd(0.723321, 0.391901, 0.531973, 0.20056), //EXT_XZY + Quatd(0.723321, 0.20056, 0.391901, 0.531973), //EXT_YXZ + Quatd(0.822365, 0.360422, 0.0222635, 0.439677), //EXT_YZX + Quatd(0.822365, 0.439677, 0.360422, 0.0222635), //EXT_ZXY + Quatd(0.723321, 0.531973, 0.20056, 0.391901), //EXT_ZYX + Quatd(0.653285, 0.65328, 0.369641, 0.0990435), //EXT_XYX + Quatd(0.653285, 0.65328, -0.0990435, 0.369641), //EXT_XZX + Quatd(0.653285, 0.369641, 0.65328, -0.0990435), //EXT_YXY + Quatd(0.653285, 0.0990435, 0.65328, 0.369641), //EXT_YZY + Quatd(0.653285, 0.369641, 0.0990435, 0.65328), //EXT_ZXZ + Quatd(0.653285, -0.0990435, 0.369641, 0.65328) //EXT_ZYZ +}; + +TEST_F(QuatTest, EulerAngles) +{ + Vec3d test_angle = {0.523598, 0.78539, 1.04719}; + for (QuatEnum::EulerAnglesType i = QuatEnum::EulerAnglesType::INT_XYZ; i <= QuatEnum::EulerAnglesType::EXT_ZYZ; i = (QuatEnum::EulerAnglesType)(i + 1)) + { + SCOPED_TRACE(cv::format("EulerAnglesType=%d", i)); + Quatd q = Quatd::createFromEulerAngles(test_angle, i); + EXPECT_EQ(q, qEuler[i]); + Vec3d Euler_Angles = q.toEulerAngles(i); + EXPECT_NEAR(Euler_Angles[0], test_angle[0], 1e-6); + EXPECT_NEAR(Euler_Angles[1], test_angle[1], 1e-6); + EXPECT_NEAR(Euler_Angles[2], test_angle[2], 1e-6); + } + Quatd qEuler0 = {0, 0, 0, 0}; + EXPECT_ANY_THROW(qEuler0.toEulerAngles(QuatEnum::INT_XYZ)); + + Quatd qEulerLock1 = {0.5612665, 0.43042, 0.5607083, 0.4304935}; + Vec3d test_angle_lock1 = {1.3089878, CV_PI * 0.5, 0}; + Vec3d Euler_Angles_solute_1 = qEulerLock1.toEulerAngles(QuatEnum::INT_XYZ); + EXPECT_NEAR(Euler_Angles_solute_1[0], test_angle_lock1[0], 1e-6); + EXPECT_NEAR(Euler_Angles_solute_1[1], test_angle_lock1[1], 1e-6); + EXPECT_NEAR(Euler_Angles_solute_1[2], test_angle_lock1[2], 1e-6); + + Quatd qEulerLock2 = {0.7010574, 0.0922963, 0.7010573, -0.0922961}; + Vec3d test_angle_lock2 = {-0.2618, CV_PI * 0.5, 0}; + Vec3d Euler_Angles_solute_2 = qEulerLock2.toEulerAngles(QuatEnum::INT_ZYX); + EXPECT_NEAR(Euler_Angles_solute_2[0], test_angle_lock2[0], 1e-6); + EXPECT_NEAR(Euler_Angles_solute_2[1], test_angle_lock2[1], 1e-6); + EXPECT_NEAR(Euler_Angles_solute_2[2], test_angle_lock2[2], 1e-6); + + Vec3d test_angle6 = {CV_PI * 0.25, CV_PI * 0.5, CV_PI * 0.25}; + Vec3d test_angle7 = {CV_PI * 0.5, CV_PI * 0.5, 0}; + EXPECT_EQ(Quatd::createFromEulerAngles(test_angle6, QuatEnum::INT_ZXY), Quatd::createFromEulerAngles(test_angle7, QuatEnum::INT_ZXY)); +} + + + +class DualQuatTest: public ::testing::Test +{ +protected: + double scalar = 2.5; + double angle = CV_PI; + Vec axis{1, 1, 1}; + Vec unAxis{0, 0, 0}; + Vec unitAxis{1.0 / sqrt(3), 1.0 / sqrt(3), 1.0 / sqrt(3)}; + DualQuatd dq1{1, 2, 3, 4, 5, 6, 7, 8}; + Vec3d trans{0, 0, 5}; + double rotation_angle = 2.0 / 3 * CV_PI; + DualQuatd dq2 = DualQuatd::createFromAngleAxisTrans(rotation_angle, axis, trans); + DualQuatd dqAllOne{1, 1, 1, 1, 1, 1, 1, 1}; + DualQuatd dqAllZero{0, 0, 0, 0, 0, 0, 0, 0}; + DualQuatd dqIdentity{1, 0, 0, 0, 0, 0, 0, 0}; + DualQuatd dqTrans{1, 0, 0, 0, 0, 2, 3, 4}; + DualQuatd dqOnlyTrans{0, 0, 0, 0, 0, 2, 3, 4}; + DualQuatd dualNumber1{-3,0,0,0,-31.1,0,0,0}; + DualQuatd dualNumber2{4,0,0,0,5.1,0,0,0}; +}; + +TEST_F(DualQuatTest, constructor) +{ + EXPECT_EQ(dq1, DualQuatd::createFromQuat(Quatd(1, 2, 3, 4), Quatd(5, 6, 7, 8))); + EXPECT_EQ(dq2 * dq2.conjugate(), dqIdentity); + EXPECT_NEAR(dq2.getRotation(QUAT_ASSUME_UNIT).norm(), 1, 1e-6); + EXPECT_NEAR(dq2.getRealPart().dot(dq2.getDualPart()), 0, 1e-6); + EXPECT_MAT_NEAR(dq2.getTranslation(QUAT_ASSUME_UNIT), trans, 1e-6); + DualQuatd q_conj = DualQuatd::createFromQuat(dq2.getRealPart().conjugate(), -dq2.getDualPart().conjugate()); + DualQuatd q{1,0,0,0,0,3,0,0}; + EXPECT_EQ(dq2 * q * q_conj, DualQuatd(1,0,0,0,0,0,3,5)); + Matx44d R1 = dq2.toMat(); + DualQuatd dq3 = DualQuatd::createFromMat(R1); + EXPECT_EQ(dq3, dq2); + axis = axis / std::sqrt(axis.dot(axis)); + Vec3d moment = 1.0 / 2 * (trans.cross(axis) + axis.cross(trans.cross(axis)) * + std::cos(rotation_angle / 2) / std::sin(rotation_angle / 2)); + double d = trans.dot(axis); + DualQuatd dq4 = DualQuatd::createFromPitch(rotation_angle, d, axis, moment); + EXPECT_EQ(dq4, dq3); + EXPECT_EQ(dq2, DualQuatd::createFromAffine3(dq2.toAffine3())); + EXPECT_EQ(dq1.normalize(), DualQuatd::createFromAffine3(dq1.toAffine3())); +} + +TEST_F(DualQuatTest, test_operator) +{ + DualQuatd dq_origin{1, 2, 3, 4, 5, 6, 7, 8}; + EXPECT_EQ(dq1 - dqAllOne, DualQuatd(0, 1, 2, 3, 4, 5, 6, 7)); + EXPECT_EQ(-dq1, DualQuatd(-1, -2, -3, -4, -5, -6, -7, -8)); + EXPECT_EQ(dq1 + dqAllOne, DualQuatd(2, 3, 4, 5, 6, 7, 8, 9)); + EXPECT_EQ(dq1 / dq1, dqIdentity); + DualQuatd dq3{-4, 1, 3, 2, -15.5, 0, -3, 8.5}; + EXPECT_EQ(dq1 * dq2, dq3); + EXPECT_EQ(dq3 / dq2, dq1); + DualQuatd dq12{2, 4, 6, 8, 10, 12, 14, 16}; + EXPECT_EQ(dq1 * 2.0, dq12); + EXPECT_EQ(2.0 * dq1, dq12); + EXPECT_EQ(dq1 - 1.0, DualQuatd(0, 2, 3, 4, 5, 6, 7, 8)); + EXPECT_EQ(1.0 - dq1, DualQuatd(0, -2, -3, -4, -5, -6, -7, -8)); + EXPECT_EQ(dq1 + 1.0, DualQuatd(2, 2, 3, 4, 5, 6, 7, 8)); + EXPECT_EQ(1.0 + dq1, DualQuatd(2, 2, 3, 4, 5, 6, 7, 8)); + dq1 += dq2; + EXPECT_EQ(dq1, dq_origin + dq2); + dq1 -= dq2; + EXPECT_EQ(dq1, dq_origin); + dq1 *= dq2; + EXPECT_EQ(dq1, dq_origin * dq2); + dq1 /= dq2; + EXPECT_EQ(dq1, dq_origin); +} + +TEST_F(DualQuatTest, basic_ops) +{ + EXPECT_EQ(dq1.getRealPart(), Quatd(1, 2, 3, 4)); + EXPECT_EQ(dq1.getDualPart(), Quatd(5, 6, 7, 8)); + EXPECT_EQ((dq1 * dq2).conjugate(), conjugate(dq1 * dq2)); + EXPECT_EQ(dq1.conjugate(), DualQuatd::createFromQuat(dq1.getRealPart().conjugate(), dq1.getDualPart().conjugate())); + EXPECT_EQ((dq2 * dq1).conjugate(), dq1.conjugate() * dq2.conjugate()); + EXPECT_EQ(dq1.conjugate() * dq1, dq1.norm() * dq1.norm()); + EXPECT_EQ(dq1.conjugate() * dq1, dq1.norm().power(2.0)); + EXPECT_EQ(dualNumber2.power(2.0), DualQuatd(16, 0, 0, 0, 40.8, 0, 0, 0)); + EXPECT_EQ(dq1.power(2.0), (2.0 * dq1.log()).exp()); + EXPECT_EQ(power(dq1, 2.0), (exp(2.0 * log(dq1)))); + EXPECT_EQ(dq2.power(3.0 / 2, QUAT_ASSUME_UNIT).power(4.0 / 3, QUAT_ASSUME_UNIT), dq2 * dq2); + EXPECT_EQ(dq2.power(-0.5).power(2.0), dq2.inv()); + EXPECT_EQ(power(dq1, dq2), exp(dq2 * log(dq1))); + EXPECT_EQ(power(dq2, dq1, QUAT_ASSUME_UNIT), exp(dq1 * log(dq2))); + EXPECT_EQ((dq2.norm() * dq1).power(2.0), dq1.power(2.0) * dq2.norm().power(2.0)); + DualQuatd q1norm = dq1.normalize(); + EXPECT_EQ(dq2.norm(), dqIdentity); + EXPECT_NEAR(q1norm.getRealPart().norm(), 1, 1e-6); + EXPECT_NEAR(q1norm.getRealPart().dot(q1norm.getDualPart()), 0, 1e-6); + EXPECT_NEAR(dq1.getRotation().norm(), 1, 1e-6); + EXPECT_NEAR(dq2.getRotation(QUAT_ASSUME_UNIT).norm(), 1, 1e-6); + EXPECT_NEAR(dq2.getRotation(QUAT_ASSUME_UNIT).norm(), 1, 1e-6); + EXPECT_MAT_NEAR(Mat(dq2.getTranslation()), Mat(trans), 1e-6); + EXPECT_MAT_NEAR(Mat(q1norm.getTranslation(QUAT_ASSUME_UNIT)), Mat(dq1.getTranslation()), 1e-6); + EXPECT_EQ(dq2.getTranslation(), dq2.getTranslation(QUAT_ASSUME_UNIT)); + EXPECT_EQ(dq1.inv() * dq1, dqIdentity); + EXPECT_EQ(inv(dq1) * dq1, dqIdentity); + EXPECT_EQ(dq2.inv(QUAT_ASSUME_UNIT) * dq2, dqIdentity); + EXPECT_EQ(inv(dq2, QUAT_ASSUME_UNIT) * dq2, dqIdentity); + EXPECT_EQ(dq2.inv(), dq2.conjugate()); + EXPECT_EQ(dqIdentity.inv(), dqIdentity); + EXPECT_ANY_THROW(dqAllZero.inv()); + EXPECT_EQ(dqAllZero.exp(), dqIdentity); + EXPECT_EQ(exp(dqAllZero), dqIdentity); + EXPECT_ANY_THROW(log(dqAllZero)); + EXPECT_EQ(log(dqIdentity), dqAllZero); + EXPECT_EQ(dqIdentity.log(), dqAllZero); + EXPECT_EQ(dualNumber1 * dualNumber2, dualNumber2 * dualNumber1); + EXPECT_EQ(dualNumber2.exp().log(), dualNumber2); + EXPECT_EQ(dq2.log(QUAT_ASSUME_UNIT).exp(), dq2); + EXPECT_EQ(exp(log(dq2, QUAT_ASSUME_UNIT)), dq2); + EXPECT_EQ(dqIdentity.log(QUAT_ASSUME_UNIT).exp(), dqIdentity); + EXPECT_EQ(dq1.log().exp(), dq1); + EXPECT_EQ(dqTrans.log().exp(), dqTrans); + EXPECT_MAT_NEAR(q1norm.toMat(QUAT_ASSUME_UNIT), dq1.toMat(), 1e-6); + Matx44d R1 = dq2.toMat(); + Mat point = (Mat_(4, 1) << 3, 0, 0, 1); + Mat new_point = R1 * point; + Mat after = (Mat_(4, 1) << 0, 3, 5 ,1); + EXPECT_MAT_NEAR(new_point, after, 1e-6); + Vec vec = dq1.toVec(); + EXPECT_EQ(DualQuatd(vec), dq1); + Affine3d afd = q1norm.toAffine3(QUAT_ASSUME_UNIT); + EXPECT_MAT_NEAR(Mat(afd.translation()), Mat(q1norm.getTranslation(QUAT_ASSUME_UNIT)), 1e-6); + Affine3d dq1_afd = dq1.toAffine3(); + EXPECT_MAT_NEAR(dq1_afd.matrix, afd.matrix, 1e-6); + EXPECT_ANY_THROW(dqAllZero.toAffine3()); +} + +TEST_F(DualQuatTest, interpolation) +{ + DualQuatd dq = DualQuatd::createFromAngleAxisTrans(8 * CV_PI / 5, Vec3d{0, 0, 1}, Vec3d{0, 0, 10}); + EXPECT_EQ(DualQuatd::sclerp(dqIdentity, dq, 0.5), DualQuatd::sclerp(-dqIdentity, dq, 0.5, false)); + EXPECT_EQ(DualQuatd::sclerp(dqIdentity, dq, 0), -dqIdentity); + EXPECT_EQ(DualQuatd::sclerp(dqIdentity, dq2, 1), dq2); + EXPECT_EQ(DualQuatd::sclerp(dqIdentity, dq2, 0.4, false, QUAT_ASSUME_UNIT), DualQuatd(0.91354546, 0.23482951, 0.23482951, 0.23482951, -0.23482951, -0.47824988, 0.69589767, 0.69589767)); + EXPECT_EQ(DualQuatd::dqblend(dqIdentity, dq1.normalize(), 0.2, QUAT_ASSUME_UNIT), DualQuatd::dqblend(dqIdentity, -dq1, 0.2)); + EXPECT_EQ(DualQuatd::dqblend(dqIdentity, dq2, 0.4), DualQuatd(0.91766294, 0.22941573, 0.22941573, 0.22941573, -0.21130397, -0.48298049, 0.66409818, 0.66409818)); + DualQuatd gdb = DualQuatd::gdqblend(Vec{dqIdentity, dq, dq2}, Vec3d{0.4, 0, 0.6}, QUAT_ASSUME_UNIT); + EXPECT_EQ(gdb, DualQuatd::dqblend(dqIdentity, dq2, 0.6)); + EXPECT_ANY_THROW(DualQuatd::gdqblend(Vec{dq2}, Vec2d{0.5, 0.5})); + Mat gdqb_d(1, 2, CV_64FC(7)); + gdqb_d.at>(0, 0) = Vec{1,2,3,4,5,6,7}; + gdqb_d.at>(0, 1) = Vec{1,2,3,4,5,6,7}; + EXPECT_ANY_THROW(DualQuatd::gdqblend(gdqb_d, Vec2d{0.5, 0.5})); + Mat gdqb_f(1, 2, CV_32FC(8)); + gdqb_f.at>(0, 0) = Vec{1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f}; + gdqb_f.at>(0, 1) = Vec{1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f}; + EXPECT_ANY_THROW(DualQuatd::gdqblend(gdqb_f, Vec2d{0.5, 0.5})); + EXPECT_ANY_THROW(DualQuatd::gdqblend(Vec{dqIdentity, dq, dq2}, Vec3f{0.4f, 0.f, 0.6f}, QUAT_ASSUME_UNIT)); + EXPECT_EQ(gdb, DualQuatd::gdqblend(Vec{dqIdentity, dq * dualNumber1, -dq2}, Vec3d{0.4, 0, 0.6})); +} + + +}} // namespace diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 39aaa1edb4..24d35646df 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -364,6 +364,7 @@ CV__DNN_INLINE_NS_BEGIN * Inner vector has slice ranges for the first number of input dimensions. */ std::vector > sliceRanges; + std::vector > sliceSteps; int axis; int num_split; @@ -499,6 +500,14 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; + class CV_EXPORTS ExpLayer : public ActivationLayer + { + public: + float base, scale, shift; + + static Ptr create(const LayerParams ¶ms); + }; + /* Layers used in semantic segmentation */ class CV_EXPORTS CropLayer : public Layer diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 69b71f90ce..0743de00ab 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -100,6 +100,18 @@ CV__DNN_INLINE_NS_BEGIN CV_EXPORTS std::vector< std::pair > getAvailableBackends(); CV_EXPORTS_W std::vector getAvailableTargets(dnn::Backend be); + /** + * @brief Enables detailed logging of the DNN model loading with CV DNN API. + * @param[in] isDiagnosticsMode Indicates whether diagnostic mode should be set. + * + * Diagnostic mode provides detailed logging of the model loading stage to explore + * potential problems (ex.: not implemented layer type). + * + * @note In diagnostic mode series of assertions will be skipped, it can lead to the + * expected application crash. + */ + CV_EXPORTS void enableModelDiagnostics(bool isDiagnosticsMode); + /** @brief This class provides all data needed to initialize layer. * * It includes dictionary with scalar params (which can be read by using Dict interface), @@ -1216,7 +1228,7 @@ CV__DNN_INLINE_NS_BEGIN * KeypointsModel creates net from file with trained weights and config, * sets preprocessing input, runs forward pass and returns the x and y coordinates of each detected keypoint */ - class CV_EXPORTS_W KeypointsModel: public Model + class CV_EXPORTS_W_SIMPLE KeypointsModel: public Model { public: /** @@ -1248,7 +1260,7 @@ CV__DNN_INLINE_NS_BEGIN * SegmentationModel creates net from file with trained weights and config, * sets preprocessing input, runs forward pass and returns the class prediction for each pixel. */ - class CV_EXPORTS_W SegmentationModel: public Model + class CV_EXPORTS_W_SIMPLE SegmentationModel: public Model { public: /** @@ -1296,6 +1308,23 @@ CV__DNN_INLINE_NS_BEGIN */ CV_WRAP DetectionModel(const Net& network); + CV_DEPRECATED_EXTERNAL // avoid using in C++ code (need to fix bindings first) + DetectionModel(); + + /** + * @brief nmsAcrossClasses defaults to false, + * such that when non max suppression is used during the detect() function, it will do so per-class. + * This function allows you to toggle this behaviour. + * @param[in] value The new value for nmsAcrossClasses + */ + CV_WRAP DetectionModel& setNmsAcrossClasses(bool value); + + /** + * @brief Getter for nmsAcrossClasses. This variable defaults to false, + * such that when non max suppression is used during the detect() function, it will do so only per-class + */ + CV_WRAP bool getNmsAcrossClasses(); + /** @brief Given the @p input frame, create input blob, run net and return result detections. * @param[in] frame The input image. * @param[out] classIds Class indexes in result detection. @@ -1309,6 +1338,255 @@ CV__DNN_INLINE_NS_BEGIN float confThreshold = 0.5f, float nmsThreshold = 0.0f); }; + +/** @brief This class represents high-level API for text recognition networks. + * + * TextRecognitionModel allows to set params for preprocessing input image. + * TextRecognitionModel creates net from file with trained weights and config, + * sets preprocessing input, runs forward pass and return recognition result. + * For TextRecognitionModel, CRNN-CTC is supported. + */ +class CV_EXPORTS_W_SIMPLE TextRecognitionModel : public Model +{ +public: + CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) + TextRecognitionModel(); + + /** + * @brief Create Text Recognition model from deep learning network + * Call setDecodeType() and setVocabulary() after constructor to initialize the decoding method + * @param[in] network Net object + */ + CV_WRAP TextRecognitionModel(const Net& network); + + /** + * @brief Create text recognition model from network represented in one of the supported formats + * Call setDecodeType() and setVocabulary() after constructor to initialize the decoding method + * @param[in] model Binary file contains trained weights + * @param[in] config Text file contains network configuration + */ + CV_WRAP inline + TextRecognitionModel(const std::string& model, const std::string& config = "") + : TextRecognitionModel(readNet(model, config)) { /* nothing */ } + + /** + * @brief Set the decoding method of translating the network output into string + * @param[in] decodeType The decoding method of translating the network output into string: {'CTC-greedy': greedy decoding for the output of CTC-based methods} + */ + CV_WRAP + TextRecognitionModel& setDecodeType(const std::string& decodeType); + + /** + * @brief Get the decoding method + * @return the decoding method + */ + CV_WRAP + const std::string& getDecodeType() const; + + /** + * @brief Set the vocabulary for recognition. + * @param[in] vocabulary the associated vocabulary of the network. + */ + CV_WRAP + TextRecognitionModel& setVocabulary(const std::vector& vocabulary); + + /** + * @brief Get the vocabulary for recognition. + * @return vocabulary the associated vocabulary + */ + CV_WRAP + const std::vector& getVocabulary() const; + + /** + * @brief Given the @p input frame, create input blob, run net and return recognition result + * @param[in] frame The input image + * @return The text recognition result + */ + CV_WRAP + std::string recognize(InputArray frame) const; + + /** + * @brief Given the @p input frame, create input blob, run net and return recognition result + * @param[in] frame The input image + * @param[in] roiRects List of text detection regions of interest (cv::Rect, CV_32SC4). ROIs is be cropped as the network inputs + * @param[out] results A set of text recognition results. + */ + CV_WRAP + void recognize(InputArray frame, InputArrayOfArrays roiRects, CV_OUT std::vector& results) const; +}; + + +/** @brief Base class for text detection networks + */ +class CV_EXPORTS_W_SIMPLE TextDetectionModel : public Model +{ +protected: + CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) + TextDetectionModel(); + +public: + + /** @brief Performs detection + * + * Given the input @p frame, prepare network input, run network inference, post-process network output and return result detections. + * + * Each result is quadrangle's 4 points in this order: + * - bottom-left + * - top-left + * - top-right + * - bottom-right + * + * Use cv::getPerspectiveTransform function to retrive image region without perspective transformations. + * + * @note If DL model doesn't support that kind of output then result may be derived from detectTextRectangles() output. + * + * @param[in] frame The input image + * @param[out] detections array with detections' quadrangles (4 points per result) + * @param[out] confidences array with detection confidences + */ + CV_WRAP + void detect( + InputArray frame, + CV_OUT std::vector< std::vector >& detections, + CV_OUT std::vector& confidences + ) const; + + /** @overload */ + CV_WRAP + void detect( + InputArray frame, + CV_OUT std::vector< std::vector >& detections + ) const; + + /** @brief Performs detection + * + * Given the input @p frame, prepare network input, run network inference, post-process network output and return result detections. + * + * Each result is rotated rectangle. + * + * @note Result may be inaccurate in case of strong perspective transformations. + * + * @param[in] frame the input image + * @param[out] detections array with detections' RotationRect results + * @param[out] confidences array with detection confidences + */ + CV_WRAP + void detectTextRectangles( + InputArray frame, + CV_OUT std::vector& detections, + CV_OUT std::vector& confidences + ) const; + + /** @overload */ + CV_WRAP + void detectTextRectangles( + InputArray frame, + CV_OUT std::vector& detections + ) const; +}; + +/** @brief This class represents high-level API for text detection DL networks compatible with EAST model. + * + * Configurable parameters: + * - (float) confThreshold - used to filter boxes by confidences, default: 0.5f + * - (float) nmsThreshold - used in non maximum suppression, default: 0.0f + */ +class CV_EXPORTS_W_SIMPLE TextDetectionModel_EAST : public TextDetectionModel +{ +public: + CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) + TextDetectionModel_EAST(); + + /** + * @brief Create text detection algorithm from deep learning network + * @param[in] network Net object + */ + CV_WRAP TextDetectionModel_EAST(const Net& network); + + /** + * @brief Create text detection model from network represented in one of the supported formats. + * An order of @p model and @p config arguments does not matter. + * @param[in] model Binary file contains trained weights. + * @param[in] config Text file contains network configuration. + */ + CV_WRAP inline + TextDetectionModel_EAST(const std::string& model, const std::string& config = "") + : TextDetectionModel_EAST(readNet(model, config)) { /* nothing */ } + + /** + * @brief Set the detection confidence threshold + * @param[in] confThreshold A threshold used to filter boxes by confidences + */ + CV_WRAP + TextDetectionModel_EAST& setConfidenceThreshold(float confThreshold); + + /** + * @brief Get the detection confidence threshold + */ + CV_WRAP + float getConfidenceThreshold() const; + + /** + * @brief Set the detection NMS filter threshold + * @param[in] nmsThreshold A threshold used in non maximum suppression + */ + CV_WRAP + TextDetectionModel_EAST& setNMSThreshold(float nmsThreshold); + + /** + * @brief Get the detection confidence threshold + */ + CV_WRAP + float getNMSThreshold() const; +}; + +/** @brief This class represents high-level API for text detection DL networks compatible with DB model. + * + * Related publications: @cite liao2020real + * Paper: https://arxiv.org/abs/1911.08947 + * For more information about the hyper-parameters setting, please refer to https://github.com/MhLiao/DB + * + * Configurable parameters: + * - (float) binaryThreshold - The threshold of the binary map. It is usually set to 0.3. + * - (float) polygonThreshold - The threshold of text polygons. It is usually set to 0.5, 0.6, and 0.7. Default is 0.5f + * - (double) unclipRatio - The unclip ratio of the detected text region, which determines the output size. It is usually set to 2.0. + * - (int) maxCandidates - The max number of the output results. + */ +class CV_EXPORTS_W_SIMPLE TextDetectionModel_DB : public TextDetectionModel +{ +public: + CV_DEPRECATED_EXTERNAL // avoid using in C++ code, will be moved to "protected" (need to fix bindings first) + TextDetectionModel_DB(); + + /** + * @brief Create text detection algorithm from deep learning network. + * @param[in] network Net object. + */ + CV_WRAP TextDetectionModel_DB(const Net& network); + + /** + * @brief Create text detection model from network represented in one of the supported formats. + * An order of @p model and @p config arguments does not matter. + * @param[in] model Binary file contains trained weights. + * @param[in] config Text file contains network configuration. + */ + CV_WRAP inline + TextDetectionModel_DB(const std::string& model, const std::string& config = "") + : TextDetectionModel_DB(readNet(model, config)) { /* nothing */ } + + CV_WRAP TextDetectionModel_DB& setBinaryThreshold(float binaryThreshold); + CV_WRAP float getBinaryThreshold() const; + + CV_WRAP TextDetectionModel_DB& setPolygonThreshold(float polygonThreshold); + CV_WRAP float getPolygonThreshold() const; + + CV_WRAP TextDetectionModel_DB& setUnclipRatio(double unclipRatio); + CV_WRAP double getUnclipRatio() const; + + CV_WRAP TextDetectionModel_DB& setMaxCandidates(int maxCandidates); + CV_WRAP int getMaxCandidates() const; +}; + //! @} CV__DNN_INLINE_NS_END } diff --git a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp index d6809ce3fd..8312a418f3 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp @@ -247,6 +247,7 @@ inline DictValue & DictValue::operator=(const DictValue &r) } inline DictValue::DictValue(const DictValue &r) + : pv(NULL) { type = r.type; diff --git a/modules/dnn/include/opencv2/dnn/layer_reg.private.hpp b/modules/dnn/include/opencv2/dnn/layer_reg.private.hpp new file mode 100644 index 0000000000..46a58f09bc --- /dev/null +++ b/modules/dnn/include/opencv2/dnn/layer_reg.private.hpp @@ -0,0 +1,23 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_DNN_LAYER_REG_HPP +#define OPENCV_DNN_LAYER_REG_HPP +#include + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN +//! @addtogroup dnn +//! @{ + +//! Register layer types of DNN model. +typedef std::map > LayerFactory_Impl; +LayerFactory_Impl& getLayerFactoryImpl(); + +//! @} +CV__DNN_INLINE_NS_END +} +} +#endif diff --git a/modules/dnn/include/opencv2/dnn/shape_utils.hpp b/modules/dnn/include/opencv2/dnn/shape_utils.hpp index 5b8d953c1a..4c610f6cef 100644 --- a/modules/dnn/include/opencv2/dnn/shape_utils.hpp +++ b/modules/dnn/include/opencv2/dnn/shape_utils.hpp @@ -205,24 +205,54 @@ static inline std::ostream& operator<<(std::ostream &out, const MatShape& shape) return out; } -inline int clamp(int ax, int dims) +/// @brief Converts axis from `[-dims; dims)` (similar to Python's slice notation) to `[0; dims)` range. +static inline +int normalize_axis(int axis, int dims) { - return ax < 0 ? ax + dims : ax; + CV_Check(axis, axis >= -dims && axis < dims, ""); + axis = (axis < 0) ? (dims + axis) : axis; + CV_DbgCheck(axis, axis >= 0 && axis < dims, ""); + return axis; } -inline int clamp(int ax, const MatShape& shape) +static inline +int normalize_axis(int axis, const MatShape& shape) { - return clamp(ax, (int)shape.size()); + return normalize_axis(axis, (int)shape.size()); } -inline Range clamp(const Range& r, int axisSize) +static inline +Range normalize_axis_range(const Range& r, int axisSize) { - Range clamped(std::max(r.start, 0), + if (r == Range::all()) + return Range(0, axisSize); + CV_CheckGE(r.start, 0, ""); + Range clamped(r.start, r.end > 0 ? std::min(r.end, axisSize) : axisSize + r.end + 1); - CV_Assert_N(clamped.start < clamped.end, clamped.end <= axisSize); + CV_DbgCheckGE(clamped.start, 0, ""); + CV_CheckLT(clamped.start, clamped.end, ""); + CV_CheckLE(clamped.end, axisSize, ""); return clamped; } +static inline +bool isAllOnes(const MatShape &inputShape, int startPos, int endPos) +{ + CV_Assert(!inputShape.empty()); + + CV_CheckGE((int) inputShape.size(), startPos, ""); + CV_CheckGE(startPos, 0, ""); + CV_CheckLE(startPos, endPos, ""); + CV_CheckLE((size_t)endPos, inputShape.size(), ""); + + for (size_t i = startPos; i < endPos; i++) + { + if (inputShape[i] != 1) + return false; + } + return true; +} + CV__DNN_INLINE_NS_END } } diff --git a/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp b/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp index 29882b92b0..333b1bfdd2 100644 --- a/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp +++ b/modules/dnn/include/opencv2/dnn/utils/inference_engine.hpp @@ -49,6 +49,8 @@ CV_EXPORTS_W void resetMyriadDevice(); #define CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_2 "Myriad2" /// Intel(R) Neural Compute Stick 2, NCS2 (USB 03e7:2485), MyriadX (https://software.intel.com/ru-ru/neural-compute-stick) #define CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X "MyriadX" +#define CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE "ARM_COMPUTE" +#define CV_DNN_INFERENCE_ENGINE_CPU_TYPE_X86 "X86" /** @brief Returns Inference Engine VPU type. @@ -57,6 +59,11 @@ CV_EXPORTS_W void resetMyriadDevice(); */ CV_EXPORTS_W cv::String getInferenceEngineVPUType(); +/** @brief Returns Inference Engine CPU type. + * + * Specify OpenVINO plugin: CPU or ARM. + */ +CV_EXPORTS_W cv::String getInferenceEngineCPUType(); /** @brief Release a HDDL plugin. */ diff --git a/modules/dnn/include/opencv2/dnn/version.hpp b/modules/dnn/include/opencv2/dnn/version.hpp index 7dc2786906..1cd0b8f486 100644 --- a/modules/dnn/include/opencv2/dnn/version.hpp +++ b/modules/dnn/include/opencv2/dnn/version.hpp @@ -6,7 +6,7 @@ #define OPENCV_DNN_VERSION_HPP /// Use with major OpenCV version only. -#define OPENCV_DNN_API_VERSION 20201117 +#define OPENCV_DNN_API_VERSION 20210301 #if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS #define CV__DNN_INLINE_NS __CV_CAT(dnn5_v, OPENCV_DNN_API_VERSION) diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py index 746dabf4ea..d0687ca4bc 100644 --- a/modules/dnn/misc/python/test/test_dnn.py +++ b/modules/dnn/misc/python/test/test_dnn.py @@ -197,6 +197,25 @@ class dnn_test(NewOpenCVTests): normAssert(self, out, ref) + def test_textdetection_model(self): + img_path = self.find_dnn_file("dnn/text_det_test1.png") + weights = self.find_dnn_file("dnn/onnx/models/DB_TD500_resnet50.onnx", required=False) + if weights is None: + raise unittest.SkipTest("Missing DNN test files (onnx/models/DB_TD500_resnet50.onnx). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.") + + frame = cv.imread(img_path) + scale = 1.0 / 255.0 + size = (736, 736) + mean = (122.67891434, 116.66876762, 104.00698793) + + model = cv.dnn_TextDetectionModel_DB(weights) + model.setInputParams(scale, size, mean) + out, _ = model.detect(frame) + + self.assertTrue(type(out) == list) + self.assertTrue(np.array(out).shape == (2, 4, 2)) + + def test_face_detection(self): proto = self.find_dnn_file('dnn/opencv_face_detector.prototxt') model = self.find_dnn_file('dnn/opencv_face_detector.caffemodel', required=False) diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp index aef3bc2c31..46db47bc4c 100644 --- a/modules/dnn/perf/perf_net.cpp +++ b/modules/dnn/perf/perf_net.cpp @@ -206,7 +206,7 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) throw SkipTestException("Test is disabled in OpenVINO 2020.4"); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000) // nGraph compilation failure +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000) // nGraph compilation failure if (target == DNN_TARGET_MYRIAD) throw SkipTestException(""); #endif @@ -241,7 +241,7 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv4_tiny) { if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000) // nGraph compilation failure +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000) // nGraph compilation failure if (target == DNN_TARGET_MYRIAD) throw SkipTestException(""); #endif @@ -276,9 +276,9 @@ PERF_TEST_P_(DNNTestNetwork, Inception_v2_Faster_RCNN) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) throw SkipTestException("Test is disabled in OpenVINO 2019R2"); #endif -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000) - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) - throw SkipTestException("Test is disabled in OpenVINO 2021.1 / MYRIAD"); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000) + if (target == DNN_TARGET_MYRIAD) + throw SkipTestException("Test is disabled in OpenVINO 2021.1+ / MYRIAD"); #endif if (backend == DNN_BACKEND_HALIDE || (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) || diff --git a/modules/dnn/src/caffe/opencv-caffe.proto b/modules/dnn/src/caffe/opencv-caffe.proto index 8ab35bac99..d540591f82 100644 --- a/modules/dnn/src/caffe/opencv-caffe.proto +++ b/modules/dnn/src/caffe/opencv-caffe.proto @@ -181,6 +181,8 @@ message DetectionOutputParameter { optional float confidence_threshold = 9; // If prior boxes are normalized to [0, 1] or not. optional bool normalized_bbox = 10 [default = true]; + // OpenCV custom parameter + optional bool clip = 1000 [default = false]; } message Datum { diff --git a/modules/dnn/src/cuda/activations.cu b/modules/dnn/src/cuda/activations.cu index 6a991baea2..599d58852e 100644 --- a/modules/dnn/src/cuda/activations.cu +++ b/modules/dnn/src/cuda/activations.cu @@ -145,6 +145,11 @@ void power(const Stream& stream, Span output, View input, T exp, T scale, generic_op>(stream, output, input, {exp, scale, shift}); } +template +void exp(const Stream& stream, Span output, View input, T normScale, T normShift) { + generic_op>(stream, output, input, {normScale, normShift}); +} + #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530) template void relu<__half>(const Stream&, Span<__half>, View<__half>, __half); template void clipped_relu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); @@ -156,6 +161,7 @@ template void elu<__half>(const Stream&, Span<__half>, View<__half>); template void abs<__half>(const Stream& stream, Span<__half> output, View<__half> input); template void bnll<__half>(const Stream&, Span<__half>, View<__half>); template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half); +template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half); #endif @@ -169,6 +175,7 @@ template void elu(const Stream&, Span, View); template void abs(const Stream& stream, Span output, View input); template void bnll(const Stream&, Span, View); template void power(const Stream&, Span, View, float, float, float); +template void exp(const Stream&, Span, View, float, float); template static void launch_vectorized_axiswise_relu(const Stream& stream, Span output, View input, std::size_t inner_size, View slope) { diff --git a/modules/dnn/src/cuda/functors.hpp b/modules/dnn/src/cuda/functors.hpp index 0435cb294f..1c29de0426 100644 --- a/modules/dnn/src/cuda/functors.hpp +++ b/modules/dnn/src/cuda/functors.hpp @@ -228,6 +228,25 @@ struct PowerFunctor { T exp, scale, shift; }; +template +struct ExpFunctor { + struct Params { + CUDA4DNN_HOST_DEVICE Params() : normScale(1), normShift(0) { } + CUDA4DNN_HOST_DEVICE Params(T nScale_, T nShift_) : normScale(nScale_), normShift(nShift_) { } + T normScale, normShift; + }; + + CUDA4DNN_DEVICE ExpFunctor() : ExpFunctor(Params{}) { } + CUDA4DNN_DEVICE ExpFunctor(const Params& params) : normScale{params.normScale}, normShift{params.normShift} { } + + CUDA4DNN_DEVICE T operator()(T value) { + using csl::device::fast_exp; + return fast_exp(normShift + normScale * value); + } + + T normScale, normShift; +}; + template struct MaxFunctor { struct Params { @@ -297,4 +316,4 @@ struct DivFunctor { }}}} /* namespace cv::dnn::cuda4dnn::kernels */ -#endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */ \ No newline at end of file +#endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */ diff --git a/modules/dnn/src/cuda/math.hpp b/modules/dnn/src/cuda/math.hpp index 1a9b221896..273f3fe98e 100644 --- a/modules/dnn/src/cuda/math.hpp +++ b/modules/dnn/src/cuda/math.hpp @@ -108,6 +108,10 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace de template __device__ T clamp(T value, T lower, T upper) { return min(max(value, lower), upper); } + template __device__ long lround(T value); + template <> inline __device__ long lround(double value) { return ::lround(value); } + template <> inline __device__ long lround(float value) { return lroundf(value); } + template __device__ T round(T value); template <> inline __device__ double round(double value) { return ::round(value); } template <> inline __device__ float round(float value) { return roundf(value); } diff --git a/modules/dnn/src/cuda/max_unpooling.cu b/modules/dnn/src/cuda/max_unpooling.cu index fbfb5ae432..3bfd75f926 100644 --- a/modules/dnn/src/cuda/max_unpooling.cu +++ b/modules/dnn/src/cuda/max_unpooling.cu @@ -31,7 +31,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { namespace raw { template ::type = true> /* Order has been hardcoded; see code */ + typename std::enable_if::type = true> /* Order has been hardcoded; see code */ __global__ void max_pooling_with_indices( Span output, Span indices, View input, size_type channels, array out_spatial_dims, array in_spatial_dims, @@ -72,7 +72,22 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { in_spatial_size *= in_spatial_dims[i]; const auto outer_offset = (n * channels + c) * in_spatial_size; - if (Order == 2) { + if (Order == 1) { + array idx; + for (idx[0] = start[0]; idx[0] != end[0]; idx[0]++) { + index_type offset = 0; + index_type stride = 1; + for (int i = Order - 1; i >= 0; i--) { + offset += stride * idx[i]; + stride *= in_spatial_dims[i]; + } + + if (input[outer_offset + offset] > max_value) { + max_idx = offset; + max_value = input[outer_offset + offset]; + } + } + } else if (Order == 2) { array idx; for (idx[0] = start[0]; idx[0] != end[0]; idx[0]++) { for (idx[1] = start[1]; idx[1] != end[1]; idx[1]++) { @@ -206,8 +221,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { out_spatial_dims[i] = output.get_axis_size(2 + i); } - /* only max_pooling2d and max_pooling3d are supported */ - CV_Assert(2 <= order && order <= 3); + CV_Assert(1 <= order && order <= 3); std::size_t channels = input.get_axis_size(1); if (order == 3) { launch_max_pooling_kernel(stream, output, indices, input, channels, @@ -215,6 +229,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { } else if (order == 2) { launch_max_pooling_kernel(stream, output, indices, input, channels, out_spatial_dims, in_spatial_dims, window_size, strides, padding_left); + } else if (order == 1) { + launch_max_pooling_kernel(stream, output, indices, input, channels, + out_spatial_dims, in_spatial_dims, window_size, strides, padding_left); } } diff --git a/modules/dnn/src/cuda/resize.cu b/modules/dnn/src/cuda/resize.cu index 045b4f0a87..b780dab9f9 100644 --- a/modules/dnn/src/cuda/resize.cu +++ b/modules/dnn/src/cuda/resize.cu @@ -26,7 +26,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { template __global__ void resize_nn( Span output, size_type out_height, size_type out_width, - View input, size_type in_height, size_type in_width) + View input, size_type in_height, size_type in_width, + float o2i_fy, float o2i_fx, bool round, bool half_pixel_centers) { auto in_image_size = in_height * in_width; auto out_image_size = out_height * out_width; @@ -60,12 +61,16 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { const index_type y = (iter % out_image_size) / out_width; const index_type x = iter % out_width; - /* o2i = output to input */ - auto o2i_fy = static_cast(in_height) / out_height; - auto o2i_fx = static_cast(in_width) / out_width; + auto in_yf = half_pixel_centers ? (y + 0.5f) * o2i_fy : y * o2i_fy; + auto in_xf = half_pixel_centers ? (x + 0.5f) * o2i_fx : x * o2i_fx; - auto in_y = static_cast(y * o2i_fy); - auto in_x = static_cast(x * o2i_fx); + using device::lround; + index_type in_y = round ? lround(in_yf) : static_cast(in_yf); + index_type in_x = round ? lround(in_xf) : static_cast(in_xf); + + using device::min; + in_y = min(in_y, in_height - 1); + in_x = min(in_x, in_width - 1); index_type in_idx = c_start * in_image_size + in_y * in_width + in_x; index_type out_idx = c_start * out_image_size + y * out_width + x; @@ -83,7 +88,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { __global__ void resize_bilinear( Span output, size_type out_height, size_type out_width, View input, size_type in_height, size_type in_width, - float o2i_fy, float o2i_fx) + float o2i_fy, float o2i_fx, bool half_pixel_centers) { auto in_image_size = in_height * in_width; auto out_image_size = out_height * out_width; @@ -119,8 +124,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { const index_type y = (iter % out_image_size) / out_width; const index_type x = iter % out_width; - auto in_x = x * o2i_fx; - auto in_y = y * o2i_fy; + using device::max; + auto in_x = half_pixel_centers ? max((x + 0.5f) * o2i_fx - 0.5f, 0.0f) : x * o2i_fx; + auto in_y = half_pixel_centers ? max((y + 0.5f) * o2i_fy - 0.5f, 0.0f) : y * o2i_fy; auto in_x0 = static_cast(in_x); auto in_y0 = static_cast(in_y); @@ -157,15 +163,16 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { template static void launch_multichannel_resize_nn(const Stream& stream, Span output, size_type out_height, size_type out_width, - View input, size_type in_height, size_type in_width) + View input, size_type in_height, size_type in_width, + float scale_y, float scale_x, bool round, bool half_pixel_centers) { auto kernel = raw::resize_nn; auto policy = make_policy(kernel, output.size() / CHANNELS_PER_ITER, 0, stream); - launch_kernel(kernel, policy, output, out_height, out_width, input, in_height, in_width); + launch_kernel(kernel, policy, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers); } template - void resize_nn(const Stream& stream, TensorSpan output, TensorView input) { + void resize_nn(const Stream& stream, TensorSpan output, TensorView input, float scale_y, float scale_x, bool round, bool half_pixel_centers) { auto out_height = output.get_axis_size(-2); auto out_width = output.get_axis_size(-1); @@ -176,38 +183,38 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { auto num_iters = num_effective_channels * out_height * out_width; if (num_effective_channels % 32 == 0 && num_iters > 655360) { - launch_multichannel_resize_nn(stream, output, out_height, out_width, input, in_height, in_width); + launch_multichannel_resize_nn(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers); } else if (num_effective_channels % 16 == 0 && num_iters > 327680) { - launch_multichannel_resize_nn(stream, output, out_height, out_width, input, in_height, in_width); + launch_multichannel_resize_nn(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers); } else if (num_effective_channels % 8 == 0 && num_iters > 163840) { - launch_multichannel_resize_nn(stream, output, out_height, out_width, input, in_height, in_width); + launch_multichannel_resize_nn(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers); } else if (num_effective_channels % 4 == 0 && num_iters > 81920) { - launch_multichannel_resize_nn(stream, output, out_height, out_width, input, in_height, in_width); + launch_multichannel_resize_nn(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers); } else if (num_effective_channels % 2 == 0) { - launch_multichannel_resize_nn(stream, output, out_height, out_width, input, in_height, in_width); + launch_multichannel_resize_nn(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers); } else { - launch_multichannel_resize_nn(stream, output, out_height, out_width, input, in_height, in_width); + launch_multichannel_resize_nn(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, round, half_pixel_centers); } } #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530) - template void resize_nn<__half>(const Stream&, TensorSpan<__half>, TensorView<__half>); + template void resize_nn<__half>(const Stream&, TensorSpan<__half>, TensorView<__half>, float, float, bool, bool); #endif - template void resize_nn(const Stream&, TensorSpan, TensorView); + template void resize_nn(const Stream&, TensorSpan, TensorView, float, float, bool,bool); template static void launch_multichannel_resize_bilinear(const Stream& stream, Span output, size_type out_height, size_type out_width, View input, size_type in_height, size_type in_width, - float scale_y, float scale_x) + float scale_y, float scale_x, bool half_pixel_centers) { auto kernel = raw::resize_bilinear; auto policy = make_policy(kernel, output.size() / CHANNELS_PER_ITER, 0, stream); - launch_kernel(kernel, policy, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x); + launch_kernel(kernel, policy, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, half_pixel_centers); } template - void resize_bilinear(const Stream& stream, TensorSpan output, TensorView input, float scale_y, float scale_x) { + void resize_bilinear(const Stream& stream, TensorSpan output, TensorView input, float scale_y, float scale_x, bool half_pixel_centers) { auto out_height = output.get_axis_size(-2); auto out_width = output.get_axis_size(-1); @@ -218,21 +225,21 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { auto num_iters = num_effective_channels * out_height * out_width; if (num_effective_channels % 16 == 0 && num_iters > 163840) { - launch_multichannel_resize_bilinear(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x); + launch_multichannel_resize_bilinear(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, half_pixel_centers); } else if (num_effective_channels % 8 == 0 && num_iters > 81920) { - launch_multichannel_resize_bilinear(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x); + launch_multichannel_resize_bilinear(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, half_pixel_centers); } else if (num_effective_channels % 4 == 0 && num_iters > 40960) { - launch_multichannel_resize_bilinear(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x); + launch_multichannel_resize_bilinear(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, half_pixel_centers); } else if (num_effective_channels % 2 == 0) { - launch_multichannel_resize_bilinear(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x); + launch_multichannel_resize_bilinear(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, half_pixel_centers); } else { - launch_multichannel_resize_bilinear(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x); + launch_multichannel_resize_bilinear(stream, output, out_height, out_width, input, in_height, in_width, scale_y, scale_x, half_pixel_centers); } } #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 530) - template void resize_bilinear<__half>(const Stream&, TensorSpan<__half>, TensorView<__half>, float, float); + template void resize_bilinear<__half>(const Stream&, TensorSpan<__half>, TensorView<__half>, float, float, bool); #endif - template void resize_bilinear(const Stream&, TensorSpan, TensorView, float, float); + template void resize_bilinear(const Stream&, TensorSpan, TensorView, float, float, bool); }}}} /* namespace cv::dnn::cuda4dnn::kernels */ diff --git a/modules/dnn/src/cuda4dnn/init.hpp b/modules/dnn/src/cuda4dnn/init.hpp index e9d997311f..f5bb7714f8 100644 --- a/modules/dnn/src/cuda4dnn/init.hpp +++ b/modules/dnn/src/cuda4dnn/init.hpp @@ -17,28 +17,18 @@ namespace cv { namespace dnn { namespace cuda4dnn { void checkVersions() { - int cudart_version = 0; - CUDA4DNN_CHECK_CUDA(cudaRuntimeGetVersion(&cudart_version)); - if (cudart_version != CUDART_VERSION) + // https://docs.nvidia.com/deeplearning/cudnn/developer-guide/index.html#programming-model + // cuDNN API Compatibility + // Beginning in cuDNN 7, the binary compatibility of a patch and minor releases is maintained as follows: + // Any patch release x.y.z is forward or backward-compatible with applications built against another cuDNN patch release x.y.w (meaning, of the same major and minor version number, but having w!=z). + // cuDNN minor releases beginning with cuDNN 7 are binary backward-compatible with applications built against the same or earlier patch release (meaning, an application built against cuDNN 7.x is binary compatible with cuDNN library 7.y, where y>=x). + // Applications compiled with a cuDNN version 7.y are not guaranteed to work with 7.x release when y > x. + auto cudnn_bversion = cudnnGetVersion(); + auto cudnn_major_bversion = cudnn_bversion / 1000, cudnn_minor_bversion = cudnn_bversion % 1000 / 100; + if (cudnn_major_bversion != CUDNN_MAJOR || cudnn_minor_bversion < CUDNN_MINOR) { std::ostringstream oss; - oss << "CUDART reports version " << cudart_version << " which does not match with the version " << CUDART_VERSION << " with which OpenCV was built"; - CV_LOG_WARNING(NULL, oss.str().c_str()); - } - - auto cudnn_version = cudnnGetVersion(); - if (cudnn_version != CUDNN_VERSION) - { - std::ostringstream oss; - oss << "cuDNN reports version " << cudnn_version << " which does not match with the version " << CUDNN_VERSION << " with which OpenCV was built"; - CV_LOG_WARNING(NULL, oss.str().c_str()); - } - - auto cudnn_cudart_version = cudnnGetCudartVersion(); - if (cudart_version != cudnn_cudart_version) - { - std::ostringstream oss; - oss << "CUDART version " << cudnn_cudart_version << " reported by cuDNN " << cudnn_version << " does not match with the version reported by CUDART " << cudart_version; + oss << "cuDNN reports version " << cudnn_major_bversion << "." << cudnn_minor_bversion << " which is not compatible with the version " << CUDNN_MAJOR << "." << CUDNN_MINOR << " with which OpenCV was built"; CV_LOG_WARNING(NULL, oss.str().c_str()); } } @@ -57,9 +47,6 @@ namespace cv { namespace dnn { namespace cuda4dnn { bool isDeviceCompatible() { - if (getDeviceCount() <= 0) - return false; - int device_id = getDevice(); if (device_id < 0) return false; @@ -80,9 +67,6 @@ namespace cv { namespace dnn { namespace cuda4dnn { bool doesDeviceSupportFP16() { - if (getDeviceCount() <= 0) - return false; - int device_id = getDevice(); if (device_id < 0) return false; diff --git a/modules/dnn/src/cuda4dnn/kernels/activations.hpp b/modules/dnn/src/cuda4dnn/kernels/activations.hpp index 46f697fce3..0a7c9878fb 100644 --- a/modules/dnn/src/cuda4dnn/kernels/activations.hpp +++ b/modules/dnn/src/cuda4dnn/kernels/activations.hpp @@ -45,6 +45,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { template void power(const csl::Stream& stream, csl::Span output, csl::View input, T exp, T scale, T shift); + template + void exp(const csl::Stream& stream, csl::Span output, csl::View input, T normScale, T normShift); + }}}} /* namespace cv::dnn::cuda4dnn::kernels */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ACTIVATIONS_HPP */ diff --git a/modules/dnn/src/cuda4dnn/kernels/resize.hpp b/modules/dnn/src/cuda4dnn/kernels/resize.hpp index 31aee3d371..4a3768a70a 100644 --- a/modules/dnn/src/cuda4dnn/kernels/resize.hpp +++ b/modules/dnn/src/cuda4dnn/kernels/resize.hpp @@ -11,10 +11,10 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels { template - void resize_nn(const csl::Stream& stream, csl::TensorSpan output, csl::TensorView input); + void resize_nn(const csl::Stream& stream, csl::TensorSpan output, csl::TensorView input, float scale_y, float scale_x, bool round, bool half_pixel_centers); template - void resize_bilinear(const csl::Stream& stream, csl::TensorSpan output, csl::TensorView input, float scale_y, float scale_x); + void resize_bilinear(const csl::Stream& stream, csl::TensorSpan output, csl::TensorView input, float scale_y, float scale_x, bool half_pixel_centers); }}}} /* namespace cv::dnn::cuda4dnn::kernels */ diff --git a/modules/dnn/src/cuda4dnn/primitives/activation.hpp b/modules/dnn/src/cuda4dnn/primitives/activation.hpp index fce996a89e..84b95927a3 100644 --- a/modules/dnn/src/cuda4dnn/primitives/activation.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/activation.hpp @@ -341,6 +341,36 @@ namespace cv { namespace dnn { namespace cuda4dnn { const T exp, scale, shift; }; + template + class ExpOp final : public CUDABackendNode { + public: + using wrapper_type = GetCUDABackendWrapperType; + + ExpOp(csl::Stream stream_, T nScale_, T nShift_) + : stream(std::move(stream_)), normScale{ nScale_ }, normShift{ nShift_ } { } + + void forward( + const std::vector>& inputs, + const std::vector>& outputs, + csl::Workspace& workspace) override + { + for (int i = 0; i < inputs.size(); i++) + { + auto input_wrapper = inputs[i].dynamicCast(); + auto input = input_wrapper->getView(); + + auto output_wrapper = outputs[i].dynamicCast(); + auto output = output_wrapper->getSpan(); + + kernels::exp(stream, output, input, normScale, normShift); + } + } + + private: + csl::Stream stream; + const T normScale, normShift; + }; + }}} /* namespace cv::dnn::cuda4dnn */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ACTIVATION_HPP */ diff --git a/modules/dnn/src/cuda4dnn/primitives/convolution.hpp b/modules/dnn/src/cuda4dnn/primitives/convolution.hpp index 8d788f05dc..12cf97404e 100644 --- a/modules/dnn/src/cuda4dnn/primitives/convolution.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/convolution.hpp @@ -103,7 +103,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { const auto groups = config.groups; - CV_Assert (1 < convolution_order && convolution_order <= 3); + CV_Assert (1 <= convolution_order && convolution_order <= 3); const auto rank = input_shape.size(); const auto output_feature_maps = output_shape[1]; diff --git a/modules/dnn/src/cuda4dnn/primitives/max_unpooling.hpp b/modules/dnn/src/cuda4dnn/primitives/max_unpooling.hpp index 1102dc56fa..fc1002fc4e 100644 --- a/modules/dnn/src/cuda4dnn/primitives/max_unpooling.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/max_unpooling.hpp @@ -50,13 +50,12 @@ namespace cv { namespace dnn { namespace cuda4dnn { window_size = config.window_size; const auto pooling_order = window_size.size(); - CV_Assert(pooling_order >= 1); strides = config.strides; CV_Assert(pooling_order == strides.size()); - if (pooling_order != 2 && pooling_order != 3) - CV_Error(Error::StsNotImplemented, "Only 2D/3D max-pooling are supported."); + if (pooling_order < 1 || pooling_order > 3) + CV_Error(Error::StsNotImplemented, "Only 1D/2D/3D max-pooling are supported."); padding_left.resize(pooling_order); if (config.padMode == MaxPoolingConfiguration::PaddingMode::MANUAL) diff --git a/modules/dnn/src/cuda4dnn/primitives/resize.hpp b/modules/dnn/src/cuda4dnn/primitives/resize.hpp index 0ac7b94e19..1465aa8867 100644 --- a/modules/dnn/src/cuda4dnn/primitives/resize.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/resize.hpp @@ -20,14 +20,23 @@ namespace cv { namespace dnn { namespace cuda4dnn { BILINEAR }; + struct ResizeConfiguration { + InterpolationType type; + bool align_corners; + bool half_pixel_centers; + }; + template class ResizeOp final : public CUDABackendNode { public: using wrapper_type = GetCUDABackendWrapperType; - ResizeOp(csl::Stream stream_, InterpolationType type_, float scaleHeight_, float scaleWidth_) - : stream(std::move(stream_)), type{ type_ }, scaleHeight{ scaleHeight_ }, scaleWidth{ scaleWidth_ } + ResizeOp(csl::Stream stream_, const ResizeConfiguration& config) + : stream(std::move(stream_)) { + type = config.type; + align_corners = config.align_corners; + half_pixel_centers = config.half_pixel_centers; } void forward( @@ -44,16 +53,27 @@ namespace cv { namespace dnn { namespace cuda4dnn { auto output_wrapper = outputs[0].dynamicCast(); auto output = output_wrapper->getSpan(); + const auto compute_scale = [this](std::size_t input_size, std::size_t output_size) { + return (align_corners && output_size > 1) ? + static_cast(input_size - 1) / (output_size - 1) : + static_cast(input_size) / output_size; + }; + + auto out_height = output.get_axis_size(-2), out_width = output.get_axis_size(-1); + auto in_height = input.get_axis_size(-2), in_width = input.get_axis_size(-1); + float scale_height = compute_scale(in_height, out_height), + scale_width = compute_scale(in_width, out_width); + if (type == InterpolationType::NEAREST_NEIGHBOUR) - kernels::resize_nn(stream, output, input); + kernels::resize_nn(stream, output, input, scale_height, scale_width, align_corners, half_pixel_centers); else if (type == InterpolationType::BILINEAR) - kernels::resize_bilinear(stream, output, input, scaleHeight, scaleWidth); + kernels::resize_bilinear(stream, output, input, scale_height, scale_width, half_pixel_centers); } private: csl::Stream stream; InterpolationType type; - float scaleHeight, scaleWidth; /* for bilinear interpolation */ + bool align_corners, half_pixel_centers; }; }}} /* namespace cv::dnn::cuda4dnn */ diff --git a/modules/dnn/src/darknet/darknet_io.cpp b/modules/dnn/src/darknet/darknet_io.cpp index c745d5f036..4915538ff7 100644 --- a/modules/dnn/src/darknet/darknet_io.cpp +++ b/modules/dnn/src/darknet/darknet_io.cpp @@ -241,6 +241,10 @@ namespace cv { { activation_param.type = "Sigmoid"; } + else if (type == "tanh") + { + activation_param.type = "TanH"; + } else { CV_Error(cv::Error::StsParseError, "Unsupported activation: " + type); @@ -554,6 +558,29 @@ namespace cv { fused_layer_names.push_back(last_layer); } + void setSAM(int from) + { + cv::dnn::LayerParams eltwise_param; + eltwise_param.name = "SAM-name"; + eltwise_param.type = "Eltwise"; + + eltwise_param.set("operation", "prod"); + eltwise_param.set("output_channels_mode", "same"); + + darknet::LayerParameter lp; + std::string layer_name = cv::format("sam_%d", layer_id); + lp.layer_name = layer_name; + lp.layer_type = eltwise_param.type; + lp.layerParams = eltwise_param; + lp.bottom_indexes.push_back(last_layer); + lp.bottom_indexes.push_back(fused_layer_names.at(from)); + last_layer = layer_name; + net->layers.push_back(lp); + + layer_id++; + fused_layer_names.push_back(last_layer); + } + void setUpsample(int scaleFactor) { cv::dnn::LayerParams param; @@ -620,7 +647,7 @@ namespace cv { // read section read_net = false; ++layers_counter; - const size_t layer_type_size = line.find("]") - 1; + const size_t layer_type_size = line.find(']') - 1; CV_Assert(layer_type_size < line.size()); std::string layer_type = line.substr(1, layer_type_size); net->layers_cfg[layers_counter]["layer_type"] = layer_type; @@ -833,6 +860,14 @@ namespace cv { from = from < 0 ? from + layers_counter : from; setParams.setScaleChannels(from); } + else if (layer_type == "sam") + { + std::string bottom_layer = getParam(layer_params, "from", ""); + CV_Assert(!bottom_layer.empty()); + int from = std::atoi(bottom_layer.c_str()); + from = from < 0 ? from + layers_counter : from; + setParams.setSAM(from); + } else if (layer_type == "upsample") { int scaleFactor = getParam(layer_params, "stride", 1); diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 0f60a393a5..668cce8fa6 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -93,6 +94,13 @@ static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false); static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false); +bool DNN_DIAGNOSTICS_RUN = false; + +void enableModelDiagnostics(bool isDiagnosticsMode) +{ + DNN_DIAGNOSTICS_RUN = isDiagnosticsMode; +} + using std::vector; using std::map; using std::make_pair; @@ -239,11 +247,10 @@ private: #endif #ifdef HAVE_CUDA - if (haveCUDA() && cuda4dnn::isDeviceCompatible()) + if (haveCUDA()) { backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)); - if (cuda4dnn::doesDeviceSupportFP16()) - backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16)); + backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16)); } #endif } @@ -1383,11 +1390,12 @@ struct Net::Impl : public detail::NetImplBase CV_Assert(preferableBackend != DNN_BACKEND_HALIDE || preferableTarget == DNN_TARGET_CPU || preferableTarget == DNN_TARGET_OPENCL); +#ifdef HAVE_INF_ENGINE if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { CV_Assert( - preferableTarget == DNN_TARGET_CPU || + (preferableTarget == DNN_TARGET_CPU && (!isArmComputePlugin() || preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) || preferableTarget == DNN_TARGET_OPENCL || preferableTarget == DNN_TARGET_OPENCL_FP16 || preferableTarget == DNN_TARGET_MYRIAD || @@ -1395,6 +1403,7 @@ struct Net::Impl : public detail::NetImplBase preferableTarget == DNN_TARGET_FPGA ); } +#endif CV_Assert(preferableBackend != DNN_BACKEND_VKCOM || preferableTarget == DNN_TARGET_VULKAN); CV_Assert(preferableBackend != DNN_BACKEND_CUDA || @@ -2099,8 +2108,8 @@ struct Net::Impl : public detail::NetImplBase return; } - bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU || - BackendRegistry::checkIETarget(DNN_TARGET_CPU); + bool supportsCPUFallback = !isArmComputePlugin() && (preferableTarget == DNN_TARGET_CPU || + BackendRegistry::checkIETarget(DNN_TARGET_CPU)); // Build Inference Engine networks from sets of layers that support this // backend. Split a whole model on several Inference Engine networks if @@ -2363,6 +2372,9 @@ struct Net::Impl : public detail::NetImplBase CV_Assert(preferableBackend == DNN_BACKEND_CUDA); #ifdef HAVE_CUDA + if (!cudaInfo) /* we need to check only once */ + cuda4dnn::checkVersions(); + if (cuda4dnn::getDeviceCount() <= 0) CV_Error(Error::StsError, "No CUDA capable device found."); @@ -2373,7 +2385,10 @@ struct Net::Impl : public detail::NetImplBase CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration."); if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16()) - CV_Error(Error::StsError, "The selected CUDA device does not support FP16 operations."); + { + CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target."); + preferableTarget = DNN_TARGET_CUDA; + } if (!cudaInfo) { @@ -2384,7 +2399,6 @@ struct Net::Impl : public detail::NetImplBase auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers cudaInfo = std::unique_ptr(new CudaInfo_t(std::move(context), std::move(d2h_stream))); - cuda4dnn::checkVersions(); } cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any @@ -2972,7 +2986,7 @@ struct Net::Impl : public detail::NetImplBase // the concatenation optimization is applied with batch_size > 1. // so, for now, we only apply this optimization in the most popular // case batch_size == 1. - int axis = clamp(concatLayer->axis, output.dims); + int axis = normalize_axis(concatLayer->axis, output.dims); if( output.total(0, axis) == 1 ) { size_t i, ninputs = ld.inputBlobsId.size(); @@ -4461,7 +4475,7 @@ string Net::Impl::dump() prevNode = itBackend->second; } } - string colors[] = {"#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151"}; + std::vector colors = {"#ffffb3", "#fccde5", "#8dd3c7", "#bebada", "#80b1d3", "#fdb462", "#ff4848", "#b35151", "#b266ff"}; string backend; switch (prefBackend) { @@ -4613,6 +4627,7 @@ string Net::Impl::dump() case DNN_TARGET_CUDA_FP16: out << "CUDA_FP16"; colorId = 6; break; // don't use default: } + CV_Assert(colorId < colors.size()); out << "\\n"; // align center out << ((clusterIds.size() == 1)? "\" " : " }\" "); out << "fillcolor=\"" << colors[colorId] << "\" "; @@ -5303,15 +5318,13 @@ static Mutex& getLayerFactoryMutex() return *instance; } -typedef std::map > LayerFactory_Impl; - static LayerFactory_Impl& getLayerFactoryImpl_() { static LayerFactory_Impl impl; return impl; } -static LayerFactory_Impl& getLayerFactoryImpl() +LayerFactory_Impl& getLayerFactoryImpl() { static LayerFactory_Impl* volatile instance = NULL; if (instance == NULL) diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index c646c1fe3a..49717f8513 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -772,8 +772,14 @@ static InferenceEngine::Layout estimateLayout(const Mat& m) { if (m.dims == 4) return InferenceEngine::Layout::NCHW; + else if (m.dims == 3) + return InferenceEngine::Layout::CHW; else if (m.dims == 2) return InferenceEngine::Layout::NC; + else if (m.dims == 1) + return InferenceEngine::Layout::C; + else if (m.dims == 5) + return InferenceEngine::Layout::NCDHW; else return InferenceEngine::Layout::ANY; } diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 570a6ff665..698168817f 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -110,6 +110,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(BNLL, BNLLLayer); CV_DNN_REGISTER_LAYER_CLASS(AbsVal, AbsLayer); CV_DNN_REGISTER_LAYER_CLASS(Power, PowerLayer); + CV_DNN_REGISTER_LAYER_CLASS(Exp, ExpLayer); CV_DNN_REGISTER_LAYER_CLASS(BatchNorm, BatchNormLayer); CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer); CV_DNN_REGISTER_LAYER_CLASS(Dropout, BlankLayer); diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index 1168755a29..edd9948db1 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -401,7 +401,11 @@ public: shape[1] = weights_.total(); auto weight = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), weights_.data); auto bias = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), bias_.data); +#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2021_2) + auto scale_node = std::make_shared(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY); +#else auto scale_node = std::make_shared(ieInpNode, weight, ngraph::op::AutoBroadcastType::NUMPY); +#endif auto scale_shift = std::make_shared(scale_node, bias, ngraph::op::AutoBroadcastType::NUMPY); return Ptr(new InfEngineNgraphNode(scale_shift)); } diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index 8a0f4a67c6..a950c56167 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -79,7 +79,7 @@ public: { CV_Assert(inputs.size() > 0); outputs.resize(1, inputs[0]); - int cAxis = clamp(axis, inputs[0]); + int cAxis = normalize_axis(axis, inputs[0]); int axisSum = 0; for (size_t i = 0; i < inputs.size(); i++) @@ -201,7 +201,7 @@ public: inps.getUMatVector(inputs); outs.getUMatVector(outputs); - int cAxis = clamp(axis, inputs[0].dims); + int cAxis = normalize_axis(axis, inputs[0].dims); if (padding) return false; @@ -255,7 +255,7 @@ public: inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); - int cAxis = clamp(axis, inputs[0].dims); + int cAxis = normalize_axis(axis, inputs[0].dims); Mat& outMat = outputs[0]; if (padding) @@ -296,7 +296,7 @@ public: auto context = reinterpret_cast(context_); auto input_wrapper = inputs[0].dynamicCast(); - auto concat_axis = clamp(axis, input_wrapper->getRank()); + auto concat_axis = normalize_axis(axis, input_wrapper->getRank()); return make_cuda_node(preferableTarget, std::move(context->stream), concat_axis, padding); } #endif @@ -305,7 +305,7 @@ public: { #ifdef HAVE_VULKAN vkcom::Tensor in = VkComTensor(input[0]); - int cAxis = clamp(axis, in.dimNum()); + int cAxis = normalize_axis(axis, in.dimNum()); std::shared_ptr op(new vkcom::OpConcat(cAxis)); return Ptr(new VkComBackendNode(input, op)); #endif // HAVE_VULKAN @@ -341,7 +341,7 @@ public: InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]); InferenceEngine::Builder::ConcatLayer ieLayer(name); - ieLayer.setAxis(clamp(axis, input->getDims().size())); + ieLayer.setAxis(normalize_axis(axis, input->getDims().size())); ieLayer.setInputPorts(std::vector(inputs.size())); return Ptr(new InfEngineBackendNode(ieLayer)); } @@ -354,7 +354,7 @@ public: { InferenceEngine::DataPtr data = ngraphDataNode(inputs[0]); const int numDims = data->getDims().size(); - const int cAxis = clamp(axis, numDims); + const int cAxis = normalize_axis(axis, numDims); std::vector maxDims(numDims, 0); CV_Assert(inputs.size() == nodes.size()); diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 02495f45ea..fb57f26511 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -125,6 +125,9 @@ public: { kernel_size.assign(1, kernel_size[0]); strides.assign(1, strides[0]); + dilations.assign(1, dilations[0]); + pads_begin.assign(1, pads_begin[0]); + pads_end.assign(1, pads_end[0]); } CV_Assert(weightShape.dims() == kernel_size.size() + 2); for (int i = 0; i < kernel_size.size(); i++) { @@ -311,8 +314,8 @@ public: #ifdef HAVE_CUDA if (backendId == DNN_BACKEND_CUDA) { - /* only convolution 2d and 3d supported */ - if (ksize == 2 || ksize == 3) + /* only 1d, 2d and 3d convolutions supported */ + if (ksize > 0 && ksize <= 3) return true; return false; @@ -321,10 +324,13 @@ public: #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { - if (ksize == 1) + bool isArmTarget = preferableTarget == DNN_TARGET_CPU && isArmComputePlugin(); + if (isArmTarget && blobs.empty()) return false; + if (ksize == 1) + return isArmTarget; if (ksize == 3) - return preferableTarget == DNN_TARGET_CPU; + return preferableTarget != DNN_TARGET_MYRIAD && !isArmTarget; bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL; if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || !isMyriad) && blobs.empty()) return false; @@ -802,7 +808,7 @@ public: CV_Assert_N(inputs.size() >= 1, nodes.size() >= 1); auto& ieInpNode = nodes[0].dynamicCast()->node; std::vector dims = ieInpNode->get_shape(); - CV_Assert(dims.size() == 4 || dims.size() == 5); + CV_Check(dims.size(), dims.size() >= 3 && dims.size() <= 5, ""); std::shared_ptr ieWeights = nodes.size() > 1 ? nodes[1].dynamicCast()->node : nullptr; if (nodes.size() > 1) CV_Assert(ieWeights); // dynamic_cast should not fail @@ -840,7 +846,7 @@ public: else { auto shape = std::make_shared(ngraph::element::i64, - ngraph::Shape{kernel_shape.size()}, kernel_shape.data()); + ngraph::Shape{kernel_shape.size()}, std::vector(kernel_shape.begin(), kernel_shape.end())); ieWeights = std::make_shared(ieWeights, shape, true); } @@ -875,7 +881,7 @@ public: if (nodes.size() == 3) { auto bias_shape = std::make_shared(ngraph::element::i64, - ngraph::Shape{shape.size()}, shape.data()); + ngraph::Shape{shape.size()}, std::vector(shape.begin(), shape.end())); bias = std::make_shared(nodes[2].dynamicCast()->node, bias_shape, true); } else @@ -1244,7 +1250,7 @@ public: v20*vw20 + v21*vw21 + v22*vw22 + vbias; if (relu) vout = v_select(vout > z, vout, vout*vrc); - vx_store(outptr + out_j, vout); + v_store(outptr + out_j, vout); } } #endif @@ -1597,15 +1603,15 @@ public: v_float32x4 r2 = v_load_aligned(rptr + vsz_a*2); v_float32x4 r3 = v_load_aligned(rptr + vsz_a*3); - vs00 += w0*r0; - vs01 += w0*r1; - vs02 += w0*r2; - vs03 += w0*r3; + vs00 = v_fma(w0, r0, vs00); + vs01 = v_fma(w0, r1, vs01); + vs02 = v_fma(w0, r2, vs02); + vs03 = v_fma(w0, r3, vs03); - vs10 += w1*r0; - vs11 += w1*r1; - vs12 += w1*r2; - vs13 += w1*r3; + vs10 = v_fma(w1, r0, vs10); + vs11 = v_fma(w1, r1, vs11); + vs12 = v_fma(w1, r2, vs12); + vs13 = v_fma(w1, r3, vs13); } s0 += v_reduce_sum4(vs00, vs01, vs02, vs03); s1 += v_reduce_sum4(vs10, vs11, vs12, vs13); @@ -1688,16 +1694,7 @@ public: umat_blobs.resize(n); for (size_t i = 0; i < n; i++) { - if (use_half) - { - Mat matFP32; - convertFp16(inputs[i + 1], matFP32); - matFP32.copyTo(umat_blobs[i]); - } - else - { - inputs[i + 1].copyTo(umat_blobs[i]); - } + inputs[i + 1].copyTo(umat_blobs[i]); } inputs.resize(1); } @@ -1708,7 +1705,10 @@ public: umat_blobs.resize(n); for (size_t i = 0; i < n; i++) { - blobs[i].copyTo(umat_blobs[i]); + if (use_half) + convertFp16(blobs[i], umat_blobs[i]); + else + blobs[i].copyTo(umat_blobs[i]); } } @@ -1764,14 +1764,20 @@ public: if (fusedWeights) { - weightsMat.copyTo(umat_blobs[0]); + if (use_half) + convertFp16(weightsMat, umat_blobs[0]); + else + weightsMat.copyTo(umat_blobs[0]); fusedWeights = false; } if (fusedBias) { if ( umat_blobs.size() < 2 ) umat_blobs.resize(2); - umat_blobs[1] = UMat(biasvec, true); + if (use_half) + convertFp16(Mat(biasvec, true), umat_blobs[1]); + else + Mat(biasvec, true).copyTo(umat_blobs[1]); convolutionOp->setBias(true); fusedBias = false; } @@ -2001,6 +2007,21 @@ public: const auto groups = input_feature_maps / input_feature_maps_per_group; ConvolutionConfiguration config; + + if (input_shape.size() == 3) + { + // Conv1D + // We add an extra dim for input and output tensors, because CuDNN doesn't support convolution with 3D tensors + input_shape.insert(std::end(input_shape) - 1, 1); + output_shape.insert(std::end(output_shape) - 1, 1); + + // Do the similar thing for the other parameters + pads_begin.insert(std::begin(pads_begin), 0); + pads_end.insert(std::begin(pads_end), 0); + strides.insert(std::begin(strides), 1); + dilations.insert(std::begin(dilations), 1); + kernel_size.insert(std::begin(kernel_size), 1); + } config.kernel_size.assign(std::begin(kernel_size), std::end(kernel_size)); config.dilations.assign(std::begin(dilations), std::end(dilations)); config.strides.assign(std::begin(strides), std::end(strides)); @@ -2365,20 +2386,21 @@ public: for( ; n <= nmax - 4; n += 4 ) { + v_float32x4 d0 = v_load(dst0 + n); + v_float32x4 d1 = v_load(dst1 + n); v_float32x4 b0 = v_load(bptr0 + n); v_float32x4 b1 = v_load(bptr1 + n); v_float32x4 b2 = v_load(bptr2 + n); v_float32x4 b3 = v_load(bptr3 + n); - v_float32x4 d0 = v_load(dst0 + n); - v_float32x4 d1 = v_load(dst1 + n); - d0 += b0*a00; - d1 += b0*a01; - d0 += b1*a10; - d1 += b1*a11; - d0 += b2*a20; - d1 += b2*a21; - d0 += b3*a30; - d1 += b3*a31; + // TODO try to improve pipeline width + d0 = v_fma(b0, a00, d0); + d1 = v_fma(b0, a01, d1); + d0 = v_fma(b1, a10, d0); + d1 = v_fma(b1, a11, d1); + d0 = v_fma(b2, a20, d0); + d1 = v_fma(b2, a21, d1); + d0 = v_fma(b3, a30, d0); + d1 = v_fma(b3, a31, d1); v_store(dst0 + n, d0); v_store(dst1 + n, d1); } @@ -2386,8 +2408,10 @@ public: for( ; n < nmax; n++ ) { - float b0 = bptr0[n], b1 = bptr1[n]; - float b2 = bptr2[n], b3 = bptr3[n]; + float b0 = bptr0[n]; + float b1 = bptr1[n]; + float b2 = bptr2[n]; + float b3 = bptr3[n]; float d0 = dst0[n] + alpha00*b0 + alpha10*b1 + alpha20*b2 + alpha30*b3; float d1 = dst1[n] + alpha01*b0 + alpha11*b1 + alpha21*b2 + alpha31*b3; dst0[n] = d0; diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 40556191f5..de97c873af 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -138,6 +138,12 @@ public: typedef std::map > LabelBBox; + inline int getNumOfTargetClasses() { + unsigned numBackground = + (_backgroundLabelId >= 0 && _backgroundLabelId < _numClasses) ? 1 : 0; + return (_numClasses - numBackground); + } + bool getParameterDict(const LayerParams ¶ms, const std::string ¶meterName, DictValue& result) @@ -590,12 +596,13 @@ public: LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(label); if (label_bboxes == decodeBBoxes.end()) CV_Error_(cv::Error::StsError, ("Could not find location predictions for label %d", label)); + int limit = (getNumOfTargetClasses() == 1) ? _keepTopK : std::numeric_limits::max(); if (_bboxesNormalized) NMSFast_(label_bboxes->second, scores, _confidenceThreshold, _nmsThreshold, 1.0, _topK, - indices[c], util::caffe_norm_box_overlap); + indices[c], util::caffe_norm_box_overlap, limit); else NMSFast_(label_bboxes->second, scores, _confidenceThreshold, _nmsThreshold, 1.0, _topK, - indices[c], util::caffe_box_overlap); + indices[c], util::caffe_box_overlap, limit); numDetections += indices[c].size(); } if (_keepTopK > -1 && numDetections > (size_t)_keepTopK) @@ -617,8 +624,13 @@ public: } } // Keep outputs k results per image. - std::sort(scoreIndexPairs.begin(), scoreIndexPairs.end(), - util::SortScorePairDescend >); + if ((_keepTopK * 8) > scoreIndexPairs.size()) { + std::sort(scoreIndexPairs.begin(), scoreIndexPairs.end(), + util::SortScorePairDescend >); + } else { + std::partial_sort(scoreIndexPairs.begin(), scoreIndexPairs.begin() + _keepTopK, scoreIndexPairs.end(), + util::SortScorePairDescend >); + } scoreIndexPairs.resize(_keepTopK); std::map > newIndices; @@ -853,16 +865,16 @@ public: for (int i = 0; i < num; ++i, locData += numPredsPerClass * numLocClasses * 4) { LabelBBox& labelBBox = locPreds[i]; + int start = shareLocation ? -1 : 0; + for (int c = 0; c < numLocClasses; ++c) { + labelBBox[start++].resize(numPredsPerClass); + } for (int p = 0; p < numPredsPerClass; ++p) { int startIdx = p * numLocClasses * 4; for (int c = 0; c < numLocClasses; ++c) { int label = shareLocation ? -1 : c; - if (labelBBox.find(label) == labelBBox.end()) - { - labelBBox[label].resize(numPredsPerClass); - } util::NormalizedBBox& bbox = labelBBox[label][p]; if (locPredTransposed) { diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index ed87a3e2fc..9bb5be342f 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -1354,11 +1354,15 @@ struct PowerFunctor : public BaseFunctor ngraph::Shape{1}, &scale); auto shift_node = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &shift); - auto power_node = std::make_shared(ngraph::element::f32, - ngraph::Shape{1}, &power); auto mul = std::make_shared(scale_node, node, ngraph::op::AutoBroadcastType::NUMPY); auto scale_shift = std::make_shared(mul, shift_node, ngraph::op::AutoBroadcastType::NUMPY); + + if (power == 1) + return scale_shift; + + auto power_node = std::make_shared(ngraph::element::f32, + ngraph::Shape{1}, &power); return std::make_shared(scale_shift, power_node, ngraph::op::AutoBroadcastType::NUMPY); } #endif // HAVE_DNN_NGRAPH @@ -1400,6 +1404,120 @@ struct PowerFunctor : public BaseFunctor int64 getFLOPSPerElement() const { return power == 1 ? 2 : 10; } }; +struct ExpFunctor : public BaseFunctor +{ + typedef ExpLayer Layer; + float base, scale, shift; + float normScale, normShift; + + ExpFunctor(float base_ = -1.f, float scale_ = 1.f, float shift_ = 0.f) + : base(base_), scale(scale_), shift(shift_) + { + // For base > 0 : + // y = base^(scale * input + shift) + // ln(y) = ln(base)*(scale * input + shift) + // y = exp((ln(base)*scale) * input + (ln(base)*shift)) + // y = exp(normalized_scale * input + normalized_shift) + CV_Check(base, base == -1.f || base > 0.f, "Unsupported 'base' value"); + const float ln_base = (base == -1.f) ? 1.f : log(base); + normScale = scale * ln_base; + normShift = shift * ln_base; + } + + bool supportBackend(int backendId, int targetId) + { + return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || + backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; + } + + void apply(const float* srcptr, float* dstptr, int len, size_t planeSize, int cn0, int cn1) const + { + float a = normScale, b = normShift; + for( int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize ) + { + for( int i = 0; i < len; i++ ) + { + float x = srcptr[i]; + dstptr[i] = exp(a*x + b); + } + } + } + +#ifdef HAVE_OPENCL + bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals) + { + std::vector inputs; + std::vector outputs; + + inps.getUMatVector(inputs); + outs.getUMatVector(outputs); + String buildopt = oclGetTMacro(inputs[0]); + + for (size_t i = 0; i < inputs.size(); i++) + { + UMat& src = inputs[i]; + UMat& dst = outputs[i]; + + ocl::Kernel kernel("ExpForward", ocl::dnn::activations_oclsrc, buildopt); + kernel.set(0, (int)src.total()); + kernel.set(1, ocl::KernelArg::PtrReadOnly(src)); + kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst)); + kernel.set(3, (float)normScale); + kernel.set(4, (float)normShift); + + size_t gSize = src.total(); + CV_Assert(kernel.run(1, &gSize, NULL, false)); + } + return true; + } +#endif + +#ifdef HAVE_CUDA + Ptr initCUDA(int target, csl::Stream stream) + { + return make_cuda_node(target, stream, normScale, normShift); + } +#endif + +#ifdef HAVE_HALIDE + void attachHalide(const Halide::Expr& input, Halide::Func& top) + { + Halide::Var x("x"), y("y"), c("c"), n("n"); + top(x, y, c, n) = exp(normScale * input + normShift); + } +#endif // HAVE_HALIDE + +#ifdef HAVE_DNN_IE_NN_BUILDER_2019 + InferenceEngine::Builder::Layer initInfEngineBuilderAPI() + { + CV_Error(Error::StsNotImplemented, ""); + } +#endif // HAVE_DNN_IE_NN_BUILDER_2019 + +#ifdef HAVE_DNN_NGRAPH + std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + { + auto scale_node = std::make_shared(ngraph::element::f32, + ngraph::Shape{1}, &normScale); + auto shift_node = std::make_shared(ngraph::element::f32, + ngraph::Shape{1}, &normShift); + auto mul = std::make_shared(scale_node, node, ngraph::op::AutoBroadcastType::NUMPY); + auto scale_shift = std::make_shared(mul, shift_node, ngraph::op::AutoBroadcastType::NUMPY); + return std::make_shared(scale_shift); + } +#endif // HAVE_DNN_NGRAPH + +#ifdef HAVE_VULKAN + std::shared_ptr initVkCom() + { + // TODO: add vkcom implementation + return std::shared_ptr(); + } +#endif // HAVE_VULKAN + + int64 getFLOPSPerElement() const { return 3; } +}; + struct ChannelsPReLUFunctor : public BaseFunctor { typedef ChannelsPReLULayer Layer; @@ -1634,6 +1752,20 @@ Ptr PowerLayer::create(const LayerParams& params) return l; } +Ptr ExpLayer::create(const LayerParams& params) +{ + float base = params.get("base", -1.0f); + float scale = params.get("scale", 1.0f); + float shift = params.get("shift", 0.0f); + Ptr l(new ElementWiseLayer(ExpFunctor(base, scale, shift))); + l->setParamsFrom(params); + l->base = base; + l->scale = scale; + l->shift = shift; + + return l; +} + Ptr ChannelsPReLULayer::create(const LayerParams& params) { CV_Assert(params.blobs.size() == 1); diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 10ce70ff3e..a337c48d9e 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -46,6 +46,7 @@ #include "../op_halide.hpp" #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" +#include #ifdef HAVE_OPENCL #include "opencl_kernels_dnn.hpp" @@ -97,6 +98,7 @@ public: : outputChannels(0) { setParamsFrom(params); + hasVecInput = false; op = SUM; if (params.has("operation")) { @@ -156,6 +158,9 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { + if (hasVecInput && ELTWISE_CHANNNELS_SAME) + return backendId == DNN_BACKEND_OPENCV; + if (backendId == DNN_BACKEND_CUDA) { if(channelsModeInput == ELTWISE_CHANNNELS_INPUT_0 || channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE) @@ -211,9 +216,6 @@ public: { CV_Assert(0 && "Internal error"); } - - for (size_t j = 2; j < dims; j++) - CV_Assert(inputs[0][j] == inputs[i][j]); } channelsMode = variableChannels ? channelsModeInput : ELTWISE_CHANNNELS_SAME; @@ -221,9 +223,56 @@ public: outputs.assign(1, inputs[0]); outputs[0][1] = numChannels; + + if (dims > 2) + { + size_t vecIdx = 0; + bool isVecFound = false; + for (size_t i = 0; i < inputs.size(); i++) + { + bool allOnes = isAllOnes(inputs[i], 2, dims); + if (!allOnes && !isVecFound) + { + vecIdx = i; + isVecFound = true; + } + + if (!allOnes && i != vecIdx) + { + for (size_t j = 2; j < dims; j++) + { + CV_Assert(inputs[vecIdx][j] == inputs[i][j]); + } + } + } + + if (channelsModeInput == ELTWISE_CHANNNELS_SAME && isVecFound) + { + for (size_t j = 2; j < dims; j++) + { + outputs[0][j] = inputs[vecIdx][j]; + } + } + } + return false; } + void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE + { + std::vector inputs; + inputs_arr.getMatVector(inputs); + + for (size_t i = 0; i < inputs.size(); i++) + { + MatShape inpShape = shape(inputs[i].size); + if (isAllOnes(inpShape, 2, inputs[i].dims)) + { + hasVecInput = true; + return; + } + } + } class EltwiseInvoker : public ParallelLoopBody { @@ -516,6 +565,9 @@ public: if ((inputs_.depth() == CV_16S && op != SUM) || (channelsMode != ELTWISE_CHANNNELS_SAME)) return false; + if (hasVecInput) + return false; // TODO not implemented yet: https://github.com/opencv/opencv/pull/19477 + inputs_.getUMatVector(inputs); outputs_.getUMatVector(outputs); @@ -616,6 +668,47 @@ public: CV_Assert(outputs.size() == 1); const int nstripes = getNumThreads(); + + if (channelsModeInput == ELTWISE_CHANNNELS_SAME && inputs[0].dims > 2) + { + for (size_t i = 0; i < inputs.size(); i++) + { + MatShape inpShape = shape(inputs[i].size); + bool allOnes = isAllOnes(inpShape, 2, inputs[i].dims); + + if (allOnes) + { + Mat tmpInput = inputs[i]; + MatShape outShape = shape(outputs[0].size); + size_t xSize = outShape[2]; + for (size_t j = 3; j < outShape.size(); j++) + xSize *= outShape[j]; + + int dimVec[3] = {outShape[0], outShape[1], (int) xSize}; + std::vector matSizesVec(&dimVec[0], &dimVec[0] + 3); + inputs[i] = Mat(matSizesVec, tmpInput.type()); + + std::vector idx(outShape.size(), 0); + std::vector outIdx(inpShape.size(), 0); + + for (size_t j = 0; j < outShape[0]; j++) + { + outIdx[0] = idx[0] = j; + for(size_t k = 0; k < outShape[1]; k++) + { + outIdx[1] = idx[1] = k; + for (size_t x = 0; x < xSize; x++) + { + outIdx[2] = x; + inputs[i].at(outIdx.data()) = tmpInput.at(idx.data()); + } + } + } + inputs[i] = inputs[i].reshape(0, outShape); + } + } + } + EltwiseInvoker::run(*this, &inputs[0], (int)inputs.size(), outputs[0], nstripes); @@ -795,6 +888,9 @@ public: } Ptr activ; + +private: + bool hasVecInput; }; Ptr EltwiseLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/flatten_layer.cpp b/modules/dnn/src/layers/flatten_layer.cpp index b5ecd8b8ee..7cf01a14fa 100644 --- a/modules/dnn/src/layers/flatten_layer.cpp +++ b/modules/dnn/src/layers/flatten_layer.cpp @@ -89,8 +89,8 @@ public: } int numAxes = inputs[0].size(); - int startAxis = clamp(_startAxis, numAxes); - int endAxis = clamp(_endAxis, numAxes); + int startAxis = normalize_axis(_startAxis, numAxes); + int endAxis = normalize_axis(_endAxis, numAxes); CV_Assert(startAxis >= 0); CV_Assert(endAxis >= startAxis && endAxis < (int)numAxes); @@ -120,8 +120,8 @@ public: inputs_arr.getMatVector(inputs); int numAxes = inputs[0].dims; - _startAxis = clamp(_startAxis, numAxes); - _endAxis = clamp(_endAxis, numAxes); + _startAxis = normalize_axis(_startAxis, numAxes); + _endAxis = normalize_axis(_endAxis, numAxes); } #ifdef HAVE_OPENCL @@ -195,8 +195,8 @@ virtual Ptr initNgraph(const std::vector >& inp std::vector dims = ieInpNode->get_shape(); int numAxes = dims.size(); - int startAxis = clamp(_startAxis, numAxes); - int endAxis = clamp(_endAxis, numAxes); + int startAxis = normalize_axis(_startAxis, numAxes); + int endAxis = normalize_axis(_endAxis, numAxes); CV_Assert(startAxis >= 0); CV_Assert(endAxis >= startAxis && endAxis < numAxes); diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index f46a02af3f..709420c3ca 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -132,7 +132,7 @@ public: CV_CheckEQ(blobs[0].dims, 2, ""); numOutput = blobs[0].size[0]; CV_Assert(!bias || (size_t)numOutput == blobs[1].total()); - cAxis = clamp(axis, inputs[0]); + cAxis = normalize_axis(axis, inputs[0]); } MatShape outShape(cAxis + 1); @@ -245,16 +245,18 @@ public: #if CV_SIMD128 for( ; i <= nw - 4; i += 4, wptr += 4*wstep ) { - v_float32x4 vs0 = v_setall_f32(0.f), vs1 = v_setall_f32(0.f); - v_float32x4 vs2 = v_setall_f32(0.f), vs3 = v_setall_f32(0.f); + v_float32x4 vs0 = v_setall_f32(0.f); + v_float32x4 vs1 = v_setall_f32(0.f); + v_float32x4 vs2 = v_setall_f32(0.f); + v_float32x4 vs3 = v_setall_f32(0.f); for( k = 0; k < vecsize; k += 4 ) { v_float32x4 v = v_load_aligned(sptr + k); - vs0 += v*v_load_aligned(wptr + k); - vs1 += v*v_load_aligned(wptr + wstep + k); - vs2 += v*v_load_aligned(wptr + wstep*2 + k); - vs3 += v*v_load_aligned(wptr + wstep*3 + k); + vs0 = v_fma(v, v_load_aligned(wptr + k), vs0); + vs1 = v_fma(v, v_load_aligned(wptr + wstep + k), vs1); + vs2 = v_fma(v, v_load_aligned(wptr + wstep*2 + k), vs2); + vs3 = v_fma(v, v_load_aligned(wptr + wstep*3 + k), vs3); } v_float32x4 s = v_reduce_sum4(vs0, vs1, vs2, vs3); @@ -354,7 +356,7 @@ public: return true; } - int axisCan = clamp(axis, inputs[0].dims); + int axisCan = normalize_axis(axis, inputs[0].dims); int numOutput = blobs[0].size[0]; int innerSize = blobs[0].size[1]; int outerSize = total(shape(inputs[0]), 0, axisCan); @@ -475,7 +477,7 @@ public: if (!blobs.empty()) { - int axisCan = clamp(axis, input[0].dims); + int axisCan = normalize_axis(axis, input[0].dims); int outerSize = input[0].total(0, axisCan); for (size_t i = 0; i < input.size(); i++) @@ -523,7 +525,7 @@ public: auto input_wrapper = inputs[0].dynamicCast(); - auto flatten_start_axis = clamp(axis, input_wrapper->getRank()); + auto flatten_start_axis = normalize_axis(axis, input_wrapper->getRank()); auto biasMat_ = bias ? biasMat : Mat(); return make_cuda_node(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), flatten_start_axis, weightsMat, biasMat_); diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp index db986bc897..783949d4cd 100644 --- a/modules/dnn/src/layers/mvn_layer.cpp +++ b/modules/dnn/src/layers/mvn_layer.cpp @@ -403,7 +403,15 @@ public: const std::vector >& nodes) CV_OVERRIDE { auto& ieInpNode = nodes[0].dynamicCast()->node; +#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2021_2) auto mvn = std::make_shared(ieInpNode, acrossChannels, normVariance, eps); +#else + int64_t start_axis = acrossChannels ? 1 : 2; + std::vector axes_v(ieInpNode->get_shape().size() - start_axis); + std::iota(axes_v.begin(), axes_v.end(), start_axis); + auto axes = std::make_shared(ngraph::element::i64, ngraph::Shape{axes_v.size()}, axes_v.data()); + auto mvn = std::make_shared(ieInpNode, axes, normVariance, eps, ngraph::op::MVNEpsMode::INSIDE_SQRT); +#endif return Ptr(new InfEngineNgraphNode(mvn)); } #endif // HAVE_DNN_NGRAPH diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp index a979fdedb6..24559543e1 100644 --- a/modules/dnn/src/layers/normalize_bbox_layer.cpp +++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp @@ -126,8 +126,8 @@ public: const UMat& inp0 = inputs[0]; UMat& buffer = internals[0]; - startAxis = clamp(startAxis, inp0.dims); - endAxis = clamp(endAxis, inp0.dims); + startAxis = normalize_axis(startAxis, inp0.dims); + endAxis = normalize_axis(endAxis, inp0.dims); size_t num = total(shape(inp0.size), 0, startAxis); size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1); @@ -211,8 +211,8 @@ public: const Mat& inp0 = inputs[0]; Mat& buffer = internals[0]; - startAxis = clamp(startAxis, inp0.dims); - endAxis = clamp(endAxis, inp0.dims); + startAxis = normalize_axis(startAxis, inp0.dims); + endAxis = normalize_axis(endAxis, inp0.dims); const float* inpData = inp0.ptr(); float* outData = outputs[0].ptr(); @@ -334,8 +334,8 @@ public: if (!acrossSpatial) { axes_data.push_back(1); } else { - axes_data.resize(ieInpNode->get_shape().size()); - std::iota(axes_data.begin(), axes_data.end(), 0); + axes_data.resize(ieInpNode->get_shape().size() - 1); + std::iota(axes_data.begin(), axes_data.end(), 1); } auto axes = std::make_shared(ngraph::element::i64, ngraph::Shape{axes_data.size()}, axes_data); auto norm = std::make_shared(ieInpNode, axes, epsilon, ngraph::op::EpsMode::ADD); @@ -344,19 +344,18 @@ public: std::vector shape(ieInpNode->get_shape().size(), 1); shape[0] = blobs.empty() ? 1 : batch; shape[1] = numChannels; - std::shared_ptr weight; - if (blobs.empty()) + if (!blobs.empty()) { - std::vector ones(numChannels, 1); - weight = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), ones.data()); - } - else - { - weight = std::make_shared( + auto weight = std::make_shared( ngraph::element::f32, ngraph::Shape(shape), blobs[0].data); +#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2021_2) + auto mul = std::make_shared(norm, weight, ngraph::op::AutoBroadcastType::NUMPY); +#else + auto mul = std::make_shared(norm, weight, ngraph::op::AutoBroadcastType::NUMPY); +#endif + return Ptr(new InfEngineNgraphNode(mul)); } - auto mul = std::make_shared(norm, weight, ngraph::op::AutoBroadcastType::NUMPY); - return Ptr(new InfEngineNgraphNode(mul)); + return Ptr(new InfEngineNgraphNode(norm)); } #endif // HAVE_DNN_NGRAPH @@ -378,8 +377,8 @@ public: NormalizeConfiguration config; config.input_shape.assign(std::begin(input_shape), std::end(input_shape)); - config.axis_start = clamp(startAxis, input_shape.size()); - config.axis_end = clamp(endAxis, input_shape.size()) + 1; /* +1 because NormalizeOp follows [start, end) convention */ + config.axis_start = normalize_axis(startAxis, input_shape.size()); + config.axis_end = normalize_axis(endAxis, input_shape.size()) + 1; /* +1 because NormalizeOp follows [start, end) convention */ config.norm = pnorm; config.eps = epsilon; diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp index b286133419..d182568795 100644 --- a/modules/dnn/src/layers/padding_layer.cpp +++ b/modules/dnn/src/layers/padding_layer.cpp @@ -105,9 +105,10 @@ public: if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { bool isMyriad = preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL; - return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) && - (!isMyriad || - (dstRanges.size() == 4 && paddings[0].first == 0 && paddings[0].second == 0)); + if (INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) && isMyriad) + return dstRanges.size() == 4 && paddings[0].first == 0 && paddings[0].second == 0; + + return (dstRanges.size() <= 4 || !isArmComputePlugin()); } #endif return backendId == DNN_BACKEND_OPENCV || diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index 05f8c380cc..c525c3f82f 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -113,6 +113,10 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { +#ifdef HAVE_INF_ENGINE + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && preferableTarget == DNN_TARGET_CPU) + return _order.size() <= 4 || !isArmComputePlugin(); +#endif return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA || ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && haveInfEngine()) || diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 621315a572..b8e2cfdf8f 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -71,6 +71,14 @@ using std::min; using namespace cv::dnn::ocl4dnn; #endif +#ifdef HAVE_HALIDE +#if 0 // size_t is not well supported in Halide operations +typedef size_t HALIDE_DIFF_T; +#else +typedef int HALIDE_DIFF_T; +#endif +#endif + #ifdef HAVE_CUDA #include "../cuda4dnn/primitives/pooling.hpp" #include "../cuda4dnn/primitives/roi_pooling.hpp" @@ -78,6 +86,7 @@ using namespace cv::dnn::ocl4dnn; using namespace cv::dnn::cuda4dnn; #endif + namespace cv { namespace dnn @@ -169,14 +178,13 @@ public: if (inputs[0].dims == 3) { - //Pool1D - kernel_size.erase(kernel_size.begin() + 1); - strides.erase(strides.begin() + 1); - pads_begin.erase(pads_begin.begin() + 1); - pads_end.erase(pads_end.begin() + 1); + // Pool1D + kernel_size.assign(1, kernel_size[0]); + strides.assign(1, strides[0]); + pads_begin.assign(1, pads_begin[0]); + pads_end.assign(1, pads_end[0]); } - #ifdef HAVE_OPENCL poolOp.release(); #endif @@ -212,7 +220,9 @@ public: #endif if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { - return !computeMaxIdx && type != STOCHASTIC && kernel_size.size() > 1; +#ifdef HAVE_DNN_NGRAPH + return !computeMaxIdx && type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin()); +#endif } else if (backendId == DNN_BACKEND_OPENCV) { @@ -383,6 +393,19 @@ public: return make_cuda_node(preferableTarget, std::move(context->stream), config); } + if (input_shape.size() == 3) + { + // Pool1D + // We add an extra dim for input tensor, because CuDNN support pooling only with 2 and 3 spatial dimensions + input_shape.insert(std::end(input_shape) - 1, 1); + + // Do the similar thing for the other parameters + pads_begin.insert(std::begin(pads_begin), 0); + pads_end.insert(std::begin(pads_end), 0); + strides.insert(std::begin(strides), 1); + kernel_size.insert(std::begin(kernel_size), 1); + } + PoolingConfiguration config; if (type == MAX) { @@ -440,9 +463,9 @@ public: { int padding_mode; vkcom::PoolType pool_type; - int filter_size[2] = {kernel.height, kernel.width}; - int pad_size[2] = {pad.height, pad.width}; - int stride_size[2] = {stride.height, stride.width}; + int filter_size[2] = {static_cast(kernel_size[0]), static_cast(kernel_size[1])}; + int pad_size[2] = {static_cast(pads_begin[0]), static_cast(pads_begin[1])}; + int stride_size[2] = {static_cast(strides[0]), static_cast(strides[1])}; pool_type = type == MAX ? vkcom::kPoolTypeMax: (type == AVE ? vkcom::kPoolTypeAvg: vkcom::kPoolTypeNum); @@ -896,7 +919,7 @@ public: if (max_elem!=last) { dstData[x0] = *max_elem; - if( compMaxIdx ) + if( compMaxIdx && dstMaskData ) { dstMaskData[x0] = std::distance(first, max_elem); } @@ -1097,12 +1120,12 @@ public: Halide::Buffer inputBuffer = halideBuffer(inputs[0]); const int inWidth = inputBuffer.width(); const int inHeight = inputBuffer.height(); - const size_t kernelHeight = kernel_size[0]; - const size_t kernelWidth = kernel_size[1]; - const size_t strideHeight = strides[0]; - const size_t strideWidth = strides[1]; - const size_t paddingTop = pads_begin[0]; - const size_t paddingLeft = pads_begin[1]; + const HALIDE_DIFF_T kernelHeight = (HALIDE_DIFF_T)kernel_size[0]; + const HALIDE_DIFF_T kernelWidth = (HALIDE_DIFF_T)kernel_size[1]; + const HALIDE_DIFF_T strideHeight = (HALIDE_DIFF_T)strides[0]; + const HALIDE_DIFF_T strideWidth = (HALIDE_DIFF_T)strides[1]; + const HALIDE_DIFF_T paddingTop = (HALIDE_DIFF_T)pads_begin[0]; + const HALIDE_DIFF_T paddingLeft = (HALIDE_DIFF_T)pads_begin[1]; Halide::Var x("x"), y("y"), c("c"), n("n"); Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name)); @@ -1148,10 +1171,10 @@ public: Halide::Buffer inputBuffer = halideBuffer(inputs[0]); const int inW = inputBuffer.width(), inH = inputBuffer.height(); - const size_t kernelHeight = kernel_size[0]; - const size_t kernelWidth = kernel_size[1]; - const size_t strideHeight = strides[0]; - const size_t strideWidth = strides[1]; + const HALIDE_DIFF_T kernelHeight = (HALIDE_DIFF_T)kernel_size[0]; + const HALIDE_DIFF_T kernelWidth = (HALIDE_DIFF_T)kernel_size[1]; + const HALIDE_DIFF_T strideHeight = (HALIDE_DIFF_T)strides[0]; + const HALIDE_DIFF_T strideWidth = (HALIDE_DIFF_T)strides[1]; if ((inW - kernelWidth) % strideWidth || (inH - kernelHeight) % strideHeight) { CV_Error(cv::Error::StsNotImplemented, diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp index 4658e7b41f..aeb5d44a47 100644 --- a/modules/dnn/src/layers/proposal_layer.cpp +++ b/modules/dnn/src/layers/proposal_layer.cpp @@ -54,11 +54,11 @@ public: for (int i = 0; i < ratios.size(); ++i) { float ratio = ratios.get(i); + float width = std::floor(baseSize / sqrt(ratio) + 0.5f); + float height = std::floor(width * ratio + 0.5f); for (int j = 0; j < scales.size(); ++j) { float scale = scales.get(j); - float width = std::floor(baseSize / sqrt(ratio) + 0.5f); - float height = std::floor(width * ratio + 0.5f); widths.push_back(scale * width); heights.push_back(scale * height); } @@ -292,7 +292,8 @@ public: CV_Assert(imInfo.total() >= 2); // We've chosen the smallest data type because we need just a shape from it. - fakeImageBlob.create(shape(1, 1, imInfo.at(0), imInfo.at(1)), CV_8UC1); + // We don't allocate memory but just need the shape is correct. + Mat fakeImageBlob(shape(1, 1, imInfo.at(0), imInfo.at(1)), CV_8UC1, NULL); // Generate prior boxes. std::vector layerInputs(2), layerOutputs(1, priorBoxes); @@ -433,7 +434,6 @@ private: Ptr deltasPermute; Ptr scoresPermute; uint32_t keepTopBeforeNMS, keepTopAfterNMS, featStride, baseSize; - Mat fakeImageBlob; float nmsThreshold; DictValue ratios, scales; #ifdef HAVE_OPENCL diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp index 5ddb5342d0..7da211afb0 100644 --- a/modules/dnn/src/layers/region_layer.cpp +++ b/modules/dnn/src/layers/region_layer.cpp @@ -460,8 +460,10 @@ public: std::vector mask(anchors, 1); region = std::make_shared(tr_input, coords, classes, anchors, useSoftmax, mask, 1, 3, anchors_vec); + auto tr_shape = tr_input->get_shape(); auto shape_as_inp = std::make_shared(ngraph::element::i64, - ngraph::Shape{tr_input->get_shape().size()}, tr_input->get_shape().data()); + ngraph::Shape{tr_shape.size()}, + std::vector(tr_shape.begin(), tr_shape.end())); region = std::make_shared(region, shape_as_inp, true); new_axes = std::make_shared(ngraph::element::i64, ngraph::Shape{4}, std::vector{0, 2, 3, 1}); @@ -607,7 +609,7 @@ public: result = std::make_shared(result, tr_axes); if (b > 1) { - std::vector sizes = {(size_t)b, result->get_shape()[0] / b, result->get_shape()[1]}; + std::vector sizes{b, static_cast(result->get_shape()[0]) / b, static_cast(result->get_shape()[1])}; auto shape_node = std::make_shared(ngraph::element::i64, ngraph::Shape{sizes.size()}, sizes.data()); result = std::make_shared(result, shape_node, true); } diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index 4c603c1ac8..ab8f41c7b6 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -66,14 +66,7 @@ static void computeShapeByReshapeMask(const MatShape &srcShape, int srcShapeSize = (int)srcShape.size(); int maskShapeSize = (int)maskShape.size(); - if (srcRange == Range::all()) - srcRange = Range(0, srcShapeSize); - else - { - int sz = srcRange.size(); - srcRange.start = clamp(srcRange.start, srcShapeSize); - srcRange.end = srcRange.end == INT_MAX ? srcShapeSize : srcRange.start + sz; - } + srcRange = normalize_axis_range(srcRange, srcShapeSize); bool explicitMask = !maskShape.empty(); // All mask values are positive. for (int i = 0, n = maskShape.size(); i < n && explicitMask; ++i) diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp index a19c2d050f..e872c7f6b0 100644 --- a/modules/dnn/src/layers/resize_layer.cpp +++ b/modules/dnn/src/layers/resize_layer.cpp @@ -48,6 +48,7 @@ public: CV_Check(interpolation, interpolation == "nearest" || interpolation == "opencv_linear" || interpolation == "bilinear", ""); alignCorners = params.get("align_corners", false); + halfPixelCenters = params.get("half_pixel_centers", false); } bool getMemoryShapes(const std::vector &inputs, @@ -71,7 +72,7 @@ public: virtual bool supportBackend(int backendId) CV_OVERRIDE { if (backendId == DNN_BACKEND_CUDA) - return interpolation == "nearest" || interpolation == "bilinear"; + return interpolation == "nearest" || interpolation == "bilinear" || interpolation == "opencv_linear"; #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) @@ -123,7 +124,7 @@ public: Mat& inp = inputs[0]; Mat& out = outputs[0]; - if (interpolation == "nearest" || interpolation == "opencv_linear") + if ((interpolation == "nearest" && !alignCorners && !halfPixelCenters) || interpolation == "opencv_linear" || (interpolation == "bilinear" && halfPixelCenters)) { InterpolationFlags mode = interpolation == "nearest" ? INTER_NEAREST : INTER_LINEAR; for (size_t n = 0; n < inputs[0].size[0]; ++n) @@ -135,6 +136,54 @@ public: } } } + else if (interpolation == "nearest") + { + const int inpHeight = inp.size[2]; + const int inpWidth = inp.size[3]; + const int inpSpatialSize = inpHeight * inpWidth; + const int outSpatialSize = outHeight * outWidth; + const int numPlanes = inp.size[0] * inp.size[1]; + CV_Assert_N(inp.isContinuous(), out.isContinuous()); + + Mat inpPlanes = inp.reshape(1, numPlanes * inpHeight); + Mat outPlanes = out.reshape(1, numPlanes * outHeight); + + float heightOffset = 0.0f; + float widthOffset = 0.0f; + + if (halfPixelCenters) + { + heightOffset = 0.5f * scaleHeight; + widthOffset = 0.5f * scaleWidth; + } + + for (int y = 0; y < outHeight; ++y) + { + float input_y = y * scaleHeight + heightOffset; + int y0 = halfPixelCenters ? std::floor(input_y) : lroundf(input_y); + y0 = std::min(y0, inpHeight - 1); + + const float* inpData_row = inpPlanes.ptr(y0); + + for (int x = 0; x < outWidth; ++x) + { + float input_x = x * scaleWidth + widthOffset; + int x0 = halfPixelCenters ? std::floor(input_x) : lroundf(input_x); + x0 = std::min(x0, inpWidth - 1); + + float* outData = outPlanes.ptr(y, x); + const float* inpData_row_c = inpData_row; + + for (int c = 0; c < numPlanes; ++c) + { + *outData = inpData_row_c[x0]; + + inpData_row_c += inpSpatialSize; + outData += outSpatialSize; + } + } + } + } else if (interpolation == "bilinear") { const int inpHeight = inp.size[2]; @@ -218,6 +267,7 @@ public: { auto& ieInpNode = nodes[0].dynamicCast()->node; +#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2021_2) ngraph::op::InterpolateAttrs attrs; attrs.pads_begin.push_back(0); attrs.pads_end.push_back(0); @@ -236,6 +286,37 @@ public: std::vector shape = {outHeight, outWidth}; auto out_shape = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, shape.data()); auto interp = std::make_shared(ieInpNode, out_shape, attrs); +#else + ngraph::op::v4::Interpolate::InterpolateAttrs attrs; + + if (interpolation == "nearest") { + attrs.mode = ngraph::op::v4::Interpolate::InterpolateMode::nearest; + attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel; + } else if (interpolation == "bilinear") { + attrs.mode = ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx; + attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::asymmetric; + } else { + CV_Error(Error::StsNotImplemented, format("Unsupported interpolation: %s", interpolation.c_str())); + } + attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::sizes; + + if (alignCorners) { + attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners; + } + + attrs.nearest_mode = ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor; + + std::vector shape = {outHeight, outWidth}; + auto out_shape = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, shape.data()); + + auto& input_shape = ieInpNode->get_shape(); + CV_Assert_N(input_shape[2] != 0, input_shape[3] != 0); + std::vector scales = {static_cast(outHeight) / input_shape[2], static_cast(outWidth) / input_shape[3]}; + auto scales_shape = std::make_shared(ngraph::element::f32, ngraph::Shape{2}, scales.data()); + + auto axes = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, std::vector{2, 3}); + auto interp = std::make_shared(ieInpNode, out_shape, scales_shape, axes, attrs); +#endif return Ptr(new InfEngineNgraphNode(interp)); } #endif // HAVE_DNN_NGRAPH @@ -250,15 +331,28 @@ public: { auto context = reinterpret_cast(context_); - cuda4dnn::InterpolationType itype; + cuda4dnn::ResizeConfiguration config; if (interpolation == "nearest") - itype = InterpolationType::NEAREST_NEIGHBOUR; + { + config.type = InterpolationType::NEAREST_NEIGHBOUR; + config.align_corners = alignCorners; + config.half_pixel_centers = halfPixelCenters; + } else if (interpolation == "bilinear") - itype = InterpolationType::BILINEAR; + { + config.type = InterpolationType::BILINEAR; + config.align_corners = alignCorners; + config.half_pixel_centers = halfPixelCenters; + } + else if (interpolation == "opencv_linear") + { + config.type = InterpolationType::BILINEAR; + config.align_corners = false; + config.half_pixel_centers = true; + } else CV_Error(Error::StsNotImplemented, "Requested interpolation mode is not available in resize layer."); - - return make_cuda_node(preferableTarget, std::move(context->stream), itype, scaleHeight, scaleWidth); + return make_cuda_node(preferableTarget, std::move(context->stream), config); } #endif @@ -269,6 +363,7 @@ protected: String interpolation; float scaleWidth, scaleHeight; bool alignCorners; + bool halfPixelCenters; }; diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index f348b1e5be..a5c268214e 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -305,7 +305,7 @@ public: numChannels = blobs[0].total(); std::vector shape(ieInpNode0->get_shape().size(), 1); - int cAxis = clamp(axis, shape.size()); + int cAxis = normalize_axis(axis, shape.size()); shape[cAxis] = numChannels; auto node = ieInpNode0; @@ -314,7 +314,11 @@ public: auto weight = blobs.empty() ? ieInpNode1 : std::make_shared(ngraph::element::f32, ngraph::Shape(shape), blobs[0].data); +#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2021_2) + node = std::make_shared(node, weight, ngraph::op::AutoBroadcastType::NUMPY); +#else node = std::make_shared(node, weight, ngraph::op::AutoBroadcastType::NUMPY); +#endif } if (hasBias || !hasWeights) { diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index fa2d755b71..54e2340387 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -70,6 +70,7 @@ public: SliceLayerImpl(const LayerParams& params) { setParamsFrom(params); + hasSteps = false; axis = params.get("axis", 1); num_split = params.get("num_split", 0); hasDynamicShapes = params.get("has_dynamic_shapes", false); @@ -79,7 +80,7 @@ public: CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end")); const DictValue &indicesValue = params.get("slice_point"); sliceRanges.resize(indicesValue.size() + 1, - std::vector(axis + 1, Range::all())); + std::vector(std::max(axis,0) + 1, Range::all())); int prevSlice = 0; for (int i = 0; i < indicesValue.size(); ++i) { @@ -118,6 +119,22 @@ public: sliceRanges[0][i].end = end; // We'll finalize a negative value later. } } + + if (params.has("steps")) + { + const DictValue &steps = params.get("steps"); + sliceSteps.resize(1); + sliceSteps[0].resize(steps.size()); + + for (int i = 0; i < steps.size(); ++i) + { + int step = steps.get(i); + CV_Assert(step >= 1); + if (step > 1) + hasSteps = true; + sliceSteps[0][i] = step; + } + } } } @@ -126,14 +143,17 @@ public: #ifdef HAVE_DNN_IE_NN_BUILDER_2019 if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R1) && - sliceRanges.size() == 1 && sliceRanges[0].size() == 4; + sliceRanges.size() == 1 && sliceRanges[0].size() == 4 && !hasSteps; #endif #ifdef HAVE_DNN_NGRAPH if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) - return sliceRanges.size() == 1; + return sliceRanges.size() == 1 && !hasSteps; #endif - return backendId == DNN_BACKEND_OPENCV || - backendId == DNN_BACKEND_CUDA; +#ifdef HAVE_CUDA + if (backendId == DNN_BACKEND_CUDA) + return !hasSteps; +#endif + return backendId == DNN_BACKEND_OPENCV; } bool getMemoryShapes(const std::vector &inputs, @@ -153,7 +173,10 @@ public: for (int j = 0; j < sliceRanges[i].size(); ++j) { if (shapesInitialized || inpShape[j] > 0) - outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size(); + outputs[i][j] = normalize_axis_range(sliceRanges[i][j], inpShape[j]).size(); + + if (!sliceSteps.empty() && (i < sliceSteps.size()) && (j < sliceSteps[i].size()) && (sliceSteps[i][j] > 1)) + outputs[i][j] = (outputs[i][j] + sliceSteps[i][j] - 1) / sliceSteps[i][j]; } } } @@ -188,6 +211,7 @@ public: const MatSize& inpShape = inputs[0].size; finalSliceRanges = sliceRanges; + if (sliceRanges.empty()) { // Divide input blob on equal parts by axis. @@ -216,10 +240,13 @@ public: // Clamp. for (int j = 0; j < finalSliceRanges[i].size(); ++j) { - finalSliceRanges[i][j] = clamp(finalSliceRanges[i][j], inpShape[j]); + finalSliceRanges[i][j] = normalize_axis_range(finalSliceRanges[i][j], inpShape[j]); } } + if (!sliceSteps.empty() && sliceSteps[0].size() != inputs[0].dims) + sliceSteps[0].resize(inputs[0].dims, 1); + #if 0 std::cout << "DEBUG: DNN/Slice: " << outputs.size() << " inpShape=" << inpShape << std::endl; for (int i = 0; i < outputs.size(); ++i) @@ -427,6 +454,9 @@ public: { CV_TRACE_FUNCTION(); + if (hasSteps) + return false; // TODO not implemented yet: https://github.com/opencv/opencv/pull/19546 + std::vector inputs; std::vector outputs; @@ -485,9 +515,24 @@ public: const Mat& inpMat = inputs[0]; CV_Assert(outputs.size() == finalSliceRanges.size()); - for (size_t i = 0; i < outputs.size(); i++) + + if (!hasSteps) { - inpMat(finalSliceRanges[i]).copyTo(outputs[i]); + for (size_t i = 0; i < outputs.size(); i++) + { + inpMat(finalSliceRanges[i]).copyTo(outputs[i]); + } + } + else + { + int dimsNum = inpMat.dims; + + for (size_t i = 0; i < outputs.size(); i++) + { + std::vector inpIdx(dimsNum, 0); + std::vector outIdx(dimsNum, 0); + getSliceRecursive(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx); + } } } @@ -603,11 +648,42 @@ public: #endif +private: + void getSliceRecursive(const Mat &inpMat, std::vector &inpIdx, + const std::vector &sliceRanges, + const std::vector &sliceSteps, int dim, int dimsNum, + Mat &outputs, std::vector &outIdx) + { + int begin = sliceRanges[dim].start; + int end = sliceRanges[dim].end; + int step = !sliceSteps.empty() ? sliceSteps[dim] : 1; + + const bool is32F = inpMat.depth() == CV_32F; + + // TODO optimization is required (for 2D tail case at least) + for (int k = begin, j = 0; k < end; k += step, j++) + { + inpIdx[dim] = k; + outIdx[dim] = j; + + if (dim + 1 < dimsNum) + getSliceRecursive(inpMat, inpIdx, sliceRanges, sliceSteps, dim + 1, dimsNum, outputs, outIdx); + else + { + if (is32F) + outputs.at(outIdx.data()) = inpMat.at(inpIdx.data()); + else + outputs.at(outIdx.data()) = inpMat.at(inpIdx.data()); // 16F emulation + } + } + } + protected: // The actual non-negative values determined from @p sliceRanges depends on input size. std::vector > finalSliceRanges; bool hasDynamicShapes; bool shapesInitialized; + bool hasSteps; }; class CropLayerImpl CV_FINAL : public SliceLayerImpl @@ -634,7 +710,7 @@ public: CV_Assert(inputs.size() == 2); MatShape dstShape = inputs[0]; - int start = clamp(axis, dstShape); + int start = normalize_axis(axis, dstShape); for (int i = start; i < dstShape.size(); i++) { dstShape[i] = inputs[1][i]; @@ -653,7 +729,7 @@ public: const Mat &inpSzBlob = inputs[1]; int dims = inpBlob.dims; - int start_axis = clamp(axis, dims); + int start_axis = normalize_axis(axis, dims); std::vector offset_final(dims, 0); if (offset.size() == 1) diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index 6715c86e39..546c1017ad 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -89,7 +89,7 @@ public: { bool inplace = Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); MatShape shape = inputs[0]; - int cAxis = clamp(axisRaw, shape.size()); + int cAxis = normalize_axis(axisRaw, shape.size()); shape[cAxis] = 1; internals.assign(1, shape); return inplace; @@ -124,7 +124,7 @@ public: UMat& src = inputs[0]; UMat& dstMat = outputs[0]; - int axis = clamp(axisRaw, src.dims); + int axis = normalize_axis(axisRaw, src.dims); if (softmaxOp.empty()) { @@ -216,7 +216,7 @@ public: const Mat &src = inputs[0]; Mat &dst = outputs[0]; - int axis = clamp(axisRaw, src.dims); + int axis = normalize_axis(axisRaw, src.dims); size_t outerSize = src.total(0, axis), channels = src.size[axis], innerSize = src.total(axis + 1); @@ -306,7 +306,7 @@ public: auto context = reinterpret_cast(context_); auto input_wrapper = inputs[0].dynamicCast(); - auto channel_axis = clamp(axisRaw, input_wrapper->getRank()); + auto channel_axis = normalize_axis(axisRaw, input_wrapper->getRank()); return make_cuda_node(preferableTarget, std::move(context->cudnn_handle), channel_axis, logSoftMax); } #endif @@ -315,7 +315,7 @@ public: { #ifdef HAVE_VULKAN vkcom::Tensor in = VkComTensor(inputs[0]); - int cAxis = clamp(axisRaw, in.dimNum()); + int cAxis = normalize_axis(axisRaw, in.dimNum()); std::shared_ptr op(new vkcom::OpSoftmax(cAxis, logSoftMax)); return Ptr(new VkComBackendNode(inputs, op)); #endif // HAVE_VULKAN @@ -354,7 +354,7 @@ public: InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]); InferenceEngine::Builder::SoftMaxLayer ieLayer(name); - ieLayer.setAxis(clamp(axisRaw, input->getDims().size())); + ieLayer.setAxis(normalize_axis(axisRaw, input->getDims().size())); return Ptr(new InfEngineBackendNode(ieLayer)); } @@ -365,7 +365,7 @@ public: const std::vector >& nodes) CV_OVERRIDE { auto& ieInpNode = nodes[0].dynamicCast()->node; - int axis = clamp(axisRaw, ieInpNode->get_shape().size()); + int axis = normalize_axis(axisRaw, ieInpNode->get_shape().size()); auto softmax = std::make_shared(ieInpNode, axis); if (logSoftMax) return Ptr(new InfEngineNgraphNode(std::make_shared(softmax))); diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp index aefeaa42b3..0af8223a7f 100644 --- a/modules/dnn/src/model.cpp +++ b/modules/dnn/src/model.cpp @@ -4,7 +4,6 @@ #include "precomp.hpp" #include -#include #include #include @@ -37,9 +36,10 @@ public: virtual void setPreferableBackend(Backend backendId) { net.setPreferableBackend(backendId); } virtual void setPreferableTarget(Target targetId) { net.setPreferableTarget(targetId); } - /*virtual*/ + virtual void initNet(const Net& network) { + CV_TRACE_FUNCTION(); net = network; outNames = net.getUnconnectedOutLayersNames(); @@ -91,6 +91,7 @@ public: /*virtual*/ void processFrame(InputArray frame, OutputArrayOfArrays outs) { + CV_TRACE_FUNCTION(); if (size.empty()) CV_Error(Error::StsBadSize, "Input size not specified"); @@ -103,6 +104,7 @@ public: Mat imInfo(Matx13f(size.height, size.width, 1.6f)); net.setInput(imInfo, "im_info"); } + net.forward(outs, outNames); } }; @@ -320,34 +322,78 @@ void SegmentationModel::segment(InputArray frame, OutputArray mask) } } -void disableRegionNMS(Net& net) +class DetectionModel_Impl : public Model::Impl { - for (String& name : net.getUnconnectedOutLayersNames()) +public: + virtual ~DetectionModel_Impl() {} + DetectionModel_Impl() : Impl() {} + DetectionModel_Impl(const DetectionModel_Impl&) = delete; + DetectionModel_Impl(DetectionModel_Impl&&) = delete; + + void disableRegionNMS(Net& net) { - int layerId = net.getLayerId(name); - Ptr layer = net.getLayer(layerId).dynamicCast(); - if (!layer.empty()) + for (String& name : net.getUnconnectedOutLayersNames()) { - layer->nmsThreshold = 0; + int layerId = net.getLayerId(name); + Ptr layer = net.getLayer(layerId).dynamicCast(); + if (!layer.empty()) + { + layer->nmsThreshold = 0; + } } } -} + + void setNmsAcrossClasses(bool value) { + nmsAcrossClasses = value; + } + + bool getNmsAcrossClasses() { + return nmsAcrossClasses; + } + +private: + bool nmsAcrossClasses = false; +}; DetectionModel::DetectionModel(const String& model, const String& config) - : Model(model, config) + : DetectionModel(readNet(model, config)) { - disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet() + // nothing } -DetectionModel::DetectionModel(const Net& network) : Model(network) +DetectionModel::DetectionModel(const Net& network) : Model() { - disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet() + impl = makePtr(); + impl->initNet(network); + impl.dynamicCast()->disableRegionNMS(getNetwork_()); // FIXIT Move to DetectionModel::Impl::initNet() +} + +DetectionModel::DetectionModel() : Model() +{ + // nothing +} + +DetectionModel& DetectionModel::setNmsAcrossClasses(bool value) +{ + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); // remove once default constructor is removed + + impl.dynamicCast()->setNmsAcrossClasses(value); + return *this; +} + +bool DetectionModel::getNmsAcrossClasses() +{ + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); // remove once default constructor is removed + + return impl.dynamicCast()->getNmsAcrossClasses(); } void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, CV_OUT std::vector& confidences, CV_OUT std::vector& boxes, float confThreshold, float nmsThreshold) { + CV_Assert(impl != nullptr && impl.dynamicCast() != nullptr); // remove once default constructor is removed + std::vector detections; impl->processFrame(frame, detections); @@ -413,7 +459,7 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, { std::vector predClassIds; std::vector predBoxes; - std::vector predConf; + std::vector predConfidences; for (int i = 0; i < detections.size(); ++i) { // Network produces output blob with a shape NxC where N is a number of @@ -442,37 +488,51 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, height = std::max(1, std::min(height, frameHeight - top)); predClassIds.push_back(classIdPoint.x); - predConf.push_back(static_cast(conf)); + predConfidences.push_back(static_cast(conf)); predBoxes.emplace_back(left, top, width, height); } } if (nmsThreshold) { - std::map > class2indices; - for (size_t i = 0; i < predClassIds.size(); i++) + if (getNmsAcrossClasses()) { - if (predConf[i] >= confThreshold) - { - class2indices[predClassIds[i]].push_back(i); - } - } - for (const auto& it : class2indices) - { - std::vector localBoxes; - std::vector localConfidences; - for (size_t idx : it.second) - { - localBoxes.push_back(predBoxes[idx]); - localConfidences.push_back(predConf[idx]); - } std::vector indices; - NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices); - classIds.resize(classIds.size() + indices.size(), it.first); + NMSBoxes(predBoxes, predConfidences, confThreshold, nmsThreshold, indices); for (int idx : indices) { - boxes.push_back(localBoxes[idx]); - confidences.push_back(localConfidences[idx]); + boxes.push_back(predBoxes[idx]); + confidences.push_back(predConfidences[idx]); + classIds.push_back(predClassIds[idx]); + } + } + else + { + std::map > class2indices; + for (size_t i = 0; i < predClassIds.size(); i++) + { + if (predConfidences[i] >= confThreshold) + { + class2indices[predClassIds[i]].push_back(i); + } + } + for (const auto& it : class2indices) + { + std::vector localBoxes; + std::vector localConfidences; + for (size_t idx : it.second) + { + localBoxes.push_back(predBoxes[idx]); + localConfidences.push_back(predConfidences[idx]); + } + std::vector indices; + NMSBoxes(localBoxes, localConfidences, confThreshold, nmsThreshold, indices); + classIds.resize(classIds.size() + indices.size(), it.first); + for (int idx : indices) + { + boxes.push_back(localBoxes[idx]); + confidences.push_back(localConfidences[idx]); + } } } } @@ -480,11 +540,786 @@ void DetectionModel::detect(InputArray frame, CV_OUT std::vector& classIds, { boxes = std::move(predBoxes); classIds = std::move(predClassIds); - confidences = std::move(predConf); + confidences = std::move(predConfidences); } } else CV_Error(Error::StsNotImplemented, "Unknown output layer type: \"" + lastLayer->type + "\""); } +struct TextRecognitionModel_Impl : public Model::Impl +{ + std::string decodeType; + std::vector vocabulary; + + TextRecognitionModel_Impl() + { + CV_TRACE_FUNCTION(); + } + + TextRecognitionModel_Impl(const Net& network) + { + CV_TRACE_FUNCTION(); + initNet(network); + } + + inline + void setVocabulary(const std::vector& inputVoc) + { + vocabulary = inputVoc; + } + + inline + void setDecodeType(const std::string& type) + { + decodeType = type; + } + + virtual + std::string decode(const Mat& prediction) + { + CV_TRACE_FUNCTION(); + CV_Assert(!prediction.empty()); + if (decodeType.empty()) + CV_Error(Error::StsBadArg, "TextRecognitionModel: decodeType is not specified"); + if (vocabulary.empty()) + CV_Error(Error::StsBadArg, "TextRecognitionModel: vocabulary is not specified"); + + std::string decodeSeq; + if (decodeType == "CTC-greedy") + { + CV_CheckEQ(prediction.dims, 3, ""); + CV_CheckType(prediction.type(), CV_32FC1, ""); + const int vocLength = (int)(vocabulary.size()); + CV_CheckLE(prediction.size[1], vocLength, ""); + bool ctcFlag = true; + int lastLoc = 0; + for (int i = 0; i < prediction.size[0]; i++) + { + const float* pred = prediction.ptr(i); + int maxLoc = 0; + float maxScore = pred[0]; + for (int j = 1; j < vocLength + 1; j++) + { + float score = pred[j]; + if (maxScore < score) + { + maxScore = score; + maxLoc = j; + } + } + + if (maxLoc > 0) + { + std::string currentChar = vocabulary.at(maxLoc - 1); + if (maxLoc != lastLoc || ctcFlag) + { + lastLoc = maxLoc; + decodeSeq += currentChar; + ctcFlag = false; + } + } + else + { + ctcFlag = true; + } + } + } else if (decodeType.length() == 0) { + CV_Error(Error::StsBadArg, "Please set decodeType"); + } else { + CV_Error_(Error::StsBadArg, ("Unsupported decodeType: %s", decodeType.c_str())); + } + + return decodeSeq; + } + + virtual + std::string recognize(InputArray frame) + { + CV_TRACE_FUNCTION(); + std::vector outs; + processFrame(frame, outs); + CV_CheckEQ(outs.size(), (size_t)1, ""); + return decode(outs[0]); + } + + virtual + void recognize(InputArray frame, InputArrayOfArrays roiRects, CV_OUT std::vector& results) + { + CV_TRACE_FUNCTION(); + results.clear(); + if (roiRects.empty()) + { + auto s = recognize(frame); + results.push_back(s); + return; + } + + std::vector rects; + roiRects.copyTo(rects); + + // Predict for each RoI + Mat input = frame.getMat(); + for (size_t i = 0; i < rects.size(); i++) + { + Rect roiRect = rects[i]; + Mat roi = input(roiRect); + auto s = recognize(roi); + results.push_back(s); + } + } + + static inline + TextRecognitionModel_Impl& from(const std::shared_ptr& ptr) + { + CV_Assert(ptr); + return *((TextRecognitionModel_Impl*)ptr.get()); + } +}; + +TextRecognitionModel::TextRecognitionModel() +{ + impl = std::static_pointer_cast(makePtr()); +} + +TextRecognitionModel::TextRecognitionModel(const Net& network) +{ + impl = std::static_pointer_cast(std::make_shared(network)); +} + +TextRecognitionModel& TextRecognitionModel::setDecodeType(const std::string& decodeType) +{ + TextRecognitionModel_Impl::from(impl).setDecodeType(decodeType); + return *this; +} + +const std::string& TextRecognitionModel::getDecodeType() const +{ + return TextRecognitionModel_Impl::from(impl).decodeType; +} + +TextRecognitionModel& TextRecognitionModel::setVocabulary(const std::vector& inputVoc) +{ + TextRecognitionModel_Impl::from(impl).setVocabulary(inputVoc); + return *this; +} + +const std::vector& TextRecognitionModel::getVocabulary() const +{ + return TextRecognitionModel_Impl::from(impl).vocabulary; +} + +std::string TextRecognitionModel::recognize(InputArray frame) const +{ + return TextRecognitionModel_Impl::from(impl).recognize(frame); +} + +void TextRecognitionModel::recognize(InputArray frame, InputArrayOfArrays roiRects, CV_OUT std::vector& results) const +{ + TextRecognitionModel_Impl::from(impl).recognize(frame, roiRects, results); +} + + +///////////////////////////////////////// Text Detection ///////////////////////////////////////// + +struct TextDetectionModel_Impl : public Model::Impl +{ + TextDetectionModel_Impl() {} + + TextDetectionModel_Impl(const Net& network) + { + CV_TRACE_FUNCTION(); + initNet(network); + } + + virtual + std::vector< std::vector > detect(InputArray frame, CV_OUT std::vector& confidences) + { + CV_TRACE_FUNCTION(); + std::vector rects = detectTextRectangles(frame, confidences); + std::vector< std::vector > results; + for (const RotatedRect& rect : rects) + { + Point2f vertices[4] = {}; + rect.points(vertices); + std::vector result = { vertices[0], vertices[1], vertices[2], vertices[3] }; + results.emplace_back(result); + } + return results; + } + + virtual + std::vector< std::vector > detect(InputArray frame) + { + CV_TRACE_FUNCTION(); + std::vector confidences; + return detect(frame, confidences); + } + + virtual + std::vector detectTextRectangles(InputArray frame, CV_OUT std::vector& confidences) + { + CV_Error(Error::StsNotImplemented, ""); + } + + virtual + std::vector detectTextRectangles(InputArray frame) + { + CV_TRACE_FUNCTION(); + std::vector confidences; + return detectTextRectangles(frame, confidences); + } + + static inline + TextDetectionModel_Impl& from(const std::shared_ptr& ptr) + { + CV_Assert(ptr); + return *((TextDetectionModel_Impl*)ptr.get()); + } +}; + + +TextDetectionModel::TextDetectionModel() + : Model() +{ + // nothing +} + +static +void to32s( + const std::vector< std::vector >& detections_f, + CV_OUT std::vector< std::vector >& detections +) +{ + detections.resize(detections_f.size()); + for (size_t i = 0; i < detections_f.size(); i++) + { + const auto& contour_f = detections_f[i]; + std::vector contour(contour_f.size()); + for (size_t j = 0; j < contour_f.size(); j++) + { + contour[j].x = cvRound(contour_f[j].x); + contour[j].y = cvRound(contour_f[j].y); + } + swap(detections[i], contour); + } +} + +void TextDetectionModel::detect( + InputArray frame, + CV_OUT std::vector< std::vector >& detections, + CV_OUT std::vector& confidences +) const +{ + std::vector< std::vector > detections_f = TextDetectionModel_Impl::from(impl).detect(frame, confidences); + to32s(detections_f, detections); + return; +} + +void TextDetectionModel::detect( + InputArray frame, + CV_OUT std::vector< std::vector >& detections +) const +{ + std::vector< std::vector > detections_f = TextDetectionModel_Impl::from(impl).detect(frame); + to32s(detections_f, detections); + return; +} + +void TextDetectionModel::detectTextRectangles( + InputArray frame, + CV_OUT std::vector& detections, + CV_OUT std::vector& confidences +) const +{ + detections = TextDetectionModel_Impl::from(impl).detectTextRectangles(frame, confidences); + return; +} + +void TextDetectionModel::detectTextRectangles( + InputArray frame, + CV_OUT std::vector& detections +) const +{ + detections = TextDetectionModel_Impl::from(impl).detectTextRectangles(frame); + return; +} + + +struct TextDetectionModel_EAST_Impl : public TextDetectionModel_Impl +{ + float confThreshold; + float nmsThreshold; + + TextDetectionModel_EAST_Impl() + : confThreshold(0.5f) + , nmsThreshold(0.0f) + { + CV_TRACE_FUNCTION(); + } + + TextDetectionModel_EAST_Impl(const Net& network) + : TextDetectionModel_EAST_Impl() + { + CV_TRACE_FUNCTION(); + initNet(network); + } + + void setConfidenceThreshold(float confThreshold_) { confThreshold = confThreshold_; } + float getConfidenceThreshold() const { return confThreshold; } + + void setNMSThreshold(float nmsThreshold_) { nmsThreshold = nmsThreshold_; } + float getNMSThreshold() const { return nmsThreshold; } + + // TODO: According to article EAST supports quadrangles output: https://arxiv.org/pdf/1704.03155.pdf +#if 0 + virtual + std::vector< std::vector > detect(InputArray frame, CV_OUT std::vector& confidences) CV_OVERRIDE +#endif + + virtual + std::vector detectTextRectangles(InputArray frame, CV_OUT std::vector& confidences) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + std::vector results; + + std::vector outs; + processFrame(frame, outs); + CV_CheckEQ(outs.size(), (size_t)2, ""); + Mat geometry = outs[0]; + Mat scoreMap = outs[1]; + + CV_CheckEQ(scoreMap.dims, 4, ""); + CV_CheckEQ(geometry.dims, 4, ""); + CV_CheckEQ(scoreMap.size[0], 1, ""); + CV_CheckEQ(geometry.size[0], 1, ""); + CV_CheckEQ(scoreMap.size[1], 1, ""); + CV_CheckEQ(geometry.size[1], 5, ""); + CV_CheckEQ(scoreMap.size[2], geometry.size[2], ""); + CV_CheckEQ(scoreMap.size[3], geometry.size[3], ""); + + CV_CheckType(scoreMap.type(), CV_32FC1, ""); + CV_CheckType(geometry.type(), CV_32FC1, ""); + + std::vector boxes; + std::vector scores; + const int height = scoreMap.size[2]; + const int width = scoreMap.size[3]; + for (int y = 0; y < height; ++y) + { + const float* scoresData = scoreMap.ptr(0, 0, y); + const float* x0_data = geometry.ptr(0, 0, y); + const float* x1_data = geometry.ptr(0, 1, y); + const float* x2_data = geometry.ptr(0, 2, y); + const float* x3_data = geometry.ptr(0, 3, y); + const float* anglesData = geometry.ptr(0, 4, y); + for (int x = 0; x < width; ++x) + { + float score = scoresData[x]; + if (score < confThreshold) + continue; + + float offsetX = x * 4.0f, offsetY = y * 4.0f; + float angle = anglesData[x]; + float cosA = std::cos(angle); + float sinA = std::sin(angle); + float h = x0_data[x] + x2_data[x]; + float w = x1_data[x] + x3_data[x]; + + Point2f offset(offsetX + cosA * x1_data[x] + sinA * x2_data[x], + offsetY - sinA * x1_data[x] + cosA * x2_data[x]); + Point2f p1 = Point2f(-sinA * h, -cosA * h) + offset; + Point2f p3 = Point2f(-cosA * w, sinA * w) + offset; + boxes.push_back(RotatedRect(0.5f * (p1 + p3), Size2f(w, h), -angle * 180.0f / (float)CV_PI)); + scores.push_back(score); + } + } + + // Apply non-maximum suppression procedure. + std::vector indices; + NMSBoxes(boxes, scores, confThreshold, nmsThreshold, indices); + + confidences.clear(); + confidences.reserve(indices.size()); + + // Re-scale + Point2f ratio((float)frame.cols() / size.width, (float)frame.rows() / size.height); + bool isUniformRatio = std::fabs(ratio.x - ratio.y) <= 0.01f; + for (uint i = 0; i < indices.size(); i++) + { + auto idx = indices[i]; + + auto conf = scores[idx]; + confidences.push_back(conf); + + RotatedRect& box0 = boxes[idx]; + + if (isUniformRatio) + { + RotatedRect box = box0; + box.center.x *= ratio.x; + box.center.y *= ratio.y; + box.size.width *= ratio.x; + box.size.height *= ratio.y; + results.emplace_back(box); + } + else + { + Point2f vertices[4] = {}; + box0.points(vertices); + for (int j = 0; j < 4; j++) + { + vertices[j].x *= ratio.x; + vertices[j].y *= ratio.y; + } + RotatedRect box = minAreaRect(Mat(4, 1, CV_32FC2, (void*)vertices)); + + // minArea() rect is not normalized, it may return rectangles rotated by +90/-90 + float angle_diff = std::fabs(box.angle - box0.angle); + while (angle_diff >= (90 + 45)) + { + box.angle += (box.angle < box0.angle) ? 180 : -180; + angle_diff = std::fabs(box.angle - box0.angle); + } + if (angle_diff > 45) // avoid ~90 degree turns + { + std::swap(box.size.width, box.size.height); + if (box.angle < box0.angle) + box.angle += 90; + else if (box.angle > box0.angle) + box.angle -= 90; + } + // CV_DbgAssert(std::fabs(box.angle - box0.angle) <= 45); + + results.emplace_back(box); + } + } + + return results; + } + + static inline + TextDetectionModel_EAST_Impl& from(const std::shared_ptr& ptr) + { + CV_Assert(ptr); + return *((TextDetectionModel_EAST_Impl*)ptr.get()); + } +}; + + +TextDetectionModel_EAST::TextDetectionModel_EAST() + : TextDetectionModel() +{ + impl = std::static_pointer_cast(makePtr()); +} + +TextDetectionModel_EAST::TextDetectionModel_EAST(const Net& network) + : TextDetectionModel() +{ + impl = std::static_pointer_cast(makePtr(network)); +} + +TextDetectionModel_EAST& TextDetectionModel_EAST::setConfidenceThreshold(float confThreshold) +{ + TextDetectionModel_EAST_Impl::from(impl).setConfidenceThreshold(confThreshold); + return *this; +} +float TextDetectionModel_EAST::getConfidenceThreshold() const +{ + return TextDetectionModel_EAST_Impl::from(impl).getConfidenceThreshold(); +} + +TextDetectionModel_EAST& TextDetectionModel_EAST::setNMSThreshold(float nmsThreshold) +{ + TextDetectionModel_EAST_Impl::from(impl).setNMSThreshold(nmsThreshold); + return *this; +} +float TextDetectionModel_EAST::getNMSThreshold() const +{ + return TextDetectionModel_EAST_Impl::from(impl).getNMSThreshold(); +} + + + +struct TextDetectionModel_DB_Impl : public TextDetectionModel_Impl +{ + float binaryThreshold; + float polygonThreshold; + double unclipRatio; + int maxCandidates; + + TextDetectionModel_DB_Impl() + : binaryThreshold(0.3f) + , polygonThreshold(0.5f) + , unclipRatio(2.0f) + , maxCandidates(0) + { + CV_TRACE_FUNCTION(); + } + + TextDetectionModel_DB_Impl(const Net& network) + : TextDetectionModel_DB_Impl() + { + CV_TRACE_FUNCTION(); + initNet(network); + } + + void setBinaryThreshold(float binaryThreshold_) { binaryThreshold = binaryThreshold_; } + float getBinaryThreshold() const { return binaryThreshold; } + + void setPolygonThreshold(float polygonThreshold_) { polygonThreshold = polygonThreshold_; } + float getPolygonThreshold() const { return polygonThreshold; } + + void setUnclipRatio(double unclipRatio_) { unclipRatio = unclipRatio_; } + double getUnclipRatio() const { return unclipRatio; } + + void setMaxCandidates(int maxCandidates_) { maxCandidates = maxCandidates_; } + int getMaxCandidates() const { return maxCandidates; } + + + virtual + std::vector detectTextRectangles(InputArray frame, CV_OUT std::vector& confidences) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + std::vector< std::vector > contours = detect(frame, confidences); + std::vector results; results.reserve(contours.size()); + for (size_t i = 0; i < contours.size(); i++) + { + auto& contour = contours[i]; + RotatedRect box = minAreaRect(contour); + + // minArea() rect is not normalized, it may return rectangles with angle=-90 or height < width + const float angle_threshold = 60; // do not expect vertical text, TODO detection algo property + bool swap_size = false; + if (box.size.width < box.size.height) // horizontal-wide text area is expected + swap_size = true; + else if (std::fabs(box.angle) >= angle_threshold) // don't work with vertical rectangles + swap_size = true; + if (swap_size) + { + std::swap(box.size.width, box.size.height); + if (box.angle < 0) + box.angle += 90; + else if (box.angle > 0) + box.angle -= 90; + } + + results.push_back(box); + } + return results; + } + + std::vector< std::vector > detect(InputArray frame, CV_OUT std::vector& confidences) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + std::vector< std::vector > results; + + std::vector outs; + processFrame(frame, outs); + CV_Assert(outs.size() == 1); + Mat binary = outs[0]; + + // Threshold + Mat bitmap; + threshold(binary, bitmap, binaryThreshold, 255, THRESH_BINARY); + + // Scale ratio + float scaleHeight = (float)(frame.rows()) / (float)(binary.size[0]); + float scaleWidth = (float)(frame.cols()) / (float)(binary.size[1]); + + // Find contours + std::vector< std::vector > contours; + bitmap.convertTo(bitmap, CV_8UC1); + findContours(bitmap, contours, RETR_LIST, CHAIN_APPROX_SIMPLE); + + // Candidate number limitation + size_t numCandidate = std::min(contours.size(), (size_t)(maxCandidates > 0 ? maxCandidates : INT_MAX)); + + for (size_t i = 0; i < numCandidate; i++) + { + std::vector& contour = contours[i]; + + // Calculate text contour score + if (contourScore(binary, contour) < polygonThreshold) + continue; + + // Rescale + std::vector contourScaled; contourScaled.reserve(contour.size()); + for (size_t j = 0; j < contour.size(); j++) + { + contourScaled.push_back(Point(int(contour[j].x * scaleWidth), + int(contour[j].y * scaleHeight))); + } + + // Unclip + RotatedRect box = minAreaRect(contourScaled); + + // minArea() rect is not normalized, it may return rectangles with angle=-90 or height < width + const float angle_threshold = 60; // do not expect vertical text, TODO detection algo property + bool swap_size = false; + if (box.size.width < box.size.height) // horizontal-wide text area is expected + swap_size = true; + else if (std::fabs(box.angle) >= angle_threshold) // don't work with vertical rectangles + swap_size = true; + if (swap_size) + { + std::swap(box.size.width, box.size.height); + if (box.angle < 0) + box.angle += 90; + else if (box.angle > 0) + box.angle -= 90; + } + + Point2f vertex[4]; + box.points(vertex); // order: bl, tl, tr, br + std::vector approx; + for (int j = 0; j < 4; j++) + approx.emplace_back(vertex[j]); + std::vector polygon; + unclip(approx, polygon, unclipRatio); + results.push_back(polygon); + } + + confidences = std::vector(contours.size(), 1.0f); + return results; + } + + // According to https://github.com/MhLiao/DB/blob/master/structure/representers/seg_detector_representer.py (2020-10) + static double contourScore(const Mat& binary, const std::vector& contour) + { + Rect rect = boundingRect(contour); + int xmin = std::max(rect.x, 0); + int xmax = std::min(rect.x + rect.width, binary.cols - 1); + int ymin = std::max(rect.y, 0); + int ymax = std::min(rect.y + rect.height, binary.rows - 1); + + Mat binROI = binary(Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)); + + Mat mask = Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8U); + std::vector roiContour; + for (size_t i = 0; i < contour.size(); i++) { + Point pt = Point(contour[i].x - xmin, contour[i].y - ymin); + roiContour.push_back(pt); + } + std::vector> roiContours = {roiContour}; + fillPoly(mask, roiContours, Scalar(1)); + double score = cv::mean(binROI, mask).val[0]; + + return score; + } + + // According to https://github.com/MhLiao/DB/blob/master/structure/representers/seg_detector_representer.py (2020-10) + static void unclip(const std::vector& inPoly, std::vector &outPoly, const double unclipRatio) + { + double area = contourArea(inPoly); + double length = arcLength(inPoly, true); + CV_Assert(length > FLT_EPSILON); + double distance = area * unclipRatio / length; + + size_t numPoints = inPoly.size(); + std::vector> newLines; + for (size_t i = 0; i < numPoints; i++) { + std::vector newLine; + Point pt1 = inPoly[i]; + Point pt2 = inPoly[(i - 1) % numPoints]; + Point vec = pt1 - pt2; + float unclipDis = (float)(distance / norm(vec)); + Point2f rotateVec = Point2f(vec.y * unclipDis, -vec.x * unclipDis); + newLine.push_back(Point2f(pt1.x + rotateVec.x, pt1.y + rotateVec.y)); + newLine.push_back(Point2f(pt2.x + rotateVec.x, pt2.y + rotateVec.y)); + newLines.push_back(newLine); + } + + size_t numLines = newLines.size(); + for (size_t i = 0; i < numLines; i++) { + Point2f a = newLines[i][0]; + Point2f b = newLines[i][1]; + Point2f c = newLines[(i + 1) % numLines][0]; + Point2f d = newLines[(i + 1) % numLines][1]; + Point2f pt; + Point2f v1 = b - a; + Point2f v2 = d - c; + double cosAngle = (v1.x * v2.x + v1.y * v2.y) / (norm(v1) * norm(v2)); + + if( fabs(cosAngle) > 0.7 ) { + pt.x = (b.x + c.x) * 0.5; + pt.y = (b.y + c.y) * 0.5; + } else { + double denom = a.x * (double)(d.y - c.y) + b.x * (double)(c.y - d.y) + + d.x * (double)(b.y - a.y) + c.x * (double)(a.y - b.y); + double num = a.x * (double)(d.y - c.y) + c.x * (double)(a.y - d.y) + d.x * (double)(c.y - a.y); + double s = num / denom; + + pt.x = a.x + s*(b.x - a.x); + pt.y = a.y + s*(b.y - a.y); + } + + + outPoly.push_back(pt); + } + } + + + static inline + TextDetectionModel_DB_Impl& from(const std::shared_ptr& ptr) + { + CV_Assert(ptr); + return *((TextDetectionModel_DB_Impl*)ptr.get()); + } +}; + + +TextDetectionModel_DB::TextDetectionModel_DB() + : TextDetectionModel() +{ + impl = std::static_pointer_cast(makePtr()); +} + +TextDetectionModel_DB::TextDetectionModel_DB(const Net& network) + : TextDetectionModel() +{ + impl = std::static_pointer_cast(makePtr(network)); +} + +TextDetectionModel_DB& TextDetectionModel_DB::setBinaryThreshold(float binaryThreshold) +{ + TextDetectionModel_DB_Impl::from(impl).setBinaryThreshold(binaryThreshold); + return *this; +} +float TextDetectionModel_DB::getBinaryThreshold() const +{ + return TextDetectionModel_DB_Impl::from(impl).getBinaryThreshold(); +} + +TextDetectionModel_DB& TextDetectionModel_DB::setPolygonThreshold(float polygonThreshold) +{ + TextDetectionModel_DB_Impl::from(impl).setPolygonThreshold(polygonThreshold); + return *this; +} +float TextDetectionModel_DB::getPolygonThreshold() const +{ + return TextDetectionModel_DB_Impl::from(impl).getPolygonThreshold(); +} + +TextDetectionModel_DB& TextDetectionModel_DB::setUnclipRatio(double unclipRatio) +{ + TextDetectionModel_DB_Impl::from(impl).setUnclipRatio(unclipRatio); + return *this; +} +double TextDetectionModel_DB::getUnclipRatio() const +{ + return TextDetectionModel_DB_Impl::from(impl).getUnclipRatio(); +} + +TextDetectionModel_DB& TextDetectionModel_DB::setMaxCandidates(int maxCandidates) +{ + TextDetectionModel_DB_Impl::from(impl).setMaxCandidates(maxCandidates); + return *this; +} +int TextDetectionModel_DB::getMaxCandidates() const +{ + return TextDetectionModel_DB_Impl::from(impl).getMaxCandidates(); +} + + }} // namespace diff --git a/modules/dnn/src/nms.inl.hpp b/modules/dnn/src/nms.inl.hpp index 89e3adfcf5..7b84839c02 100644 --- a/modules/dnn/src/nms.inl.hpp +++ b/modules/dnn/src/nms.inl.hpp @@ -62,12 +62,15 @@ inline void GetMaxScoreIndex(const std::vector& scores, const float thres // score_threshold: a threshold used to filter detection results. // nms_threshold: a threshold used in non maximum suppression. // top_k: if not > 0, keep at most top_k picked indices. +// limit: early terminate once the # of picked indices has reached it. // indices: the kept indices of bboxes after nms. template inline void NMSFast_(const std::vector& bboxes, const std::vector& scores, const float score_threshold, const float nms_threshold, const float eta, const int top_k, - std::vector& indices, float (*computeOverlap)(const BoxType&, const BoxType&)) + std::vector& indices, + float (*computeOverlap)(const BoxType&, const BoxType&), + int limit = std::numeric_limits::max()) { CV_Assert(bboxes.size() == scores.size()); @@ -86,8 +89,12 @@ inline void NMSFast_(const std::vector& bboxes, float overlap = computeOverlap(bboxes[idx], bboxes[kept_idx]); keep = overlap <= adaptive_threshold; } - if (keep) + if (keep) { indices.push_back(idx); + if (indices.size() >= limit) { + break; + } + } if (keep && eta < 1 && adaptive_threshold > 0.5) { adaptive_threshold *= eta; } diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp index 8de7ba26e2..7bb277d102 100644 --- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp +++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp @@ -274,8 +274,6 @@ class OCL4DNNConvSpatial int32_t group_; bool bias_term_; UMat swizzled_weights_umat; - UMat weights_half; - UMat bias_half; UMat bottom_data2_; int32_t bottom_index_; diff --git a/modules/dnn/src/ocl4dnn/src/math_functions.cpp b/modules/dnn/src/ocl4dnn/src/math_functions.cpp index 47224c3be6..855a21e08f 100644 --- a/modules/dnn/src/ocl4dnn/src/math_functions.cpp +++ b/modules/dnn/src/ocl4dnn/src/math_functions.cpp @@ -88,13 +88,13 @@ ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset, size_t global_copy[2]; global_copy[0] = width; global_copy[1] = height; - oclk_gemm_copy.set(0, ocl::KernelArg::PtrReadOnly(buffer)); - oclk_gemm_copy.set(1, image); - oclk_gemm_copy.set(2, offset); - oclk_gemm_copy.set(3, width); - oclk_gemm_copy.set(4, height); - oclk_gemm_copy.set(5, ld); - oclk_gemm_copy.run(2, global_copy, NULL, false); + oclk_gemm_copy + .args( + ocl::KernelArg::PtrReadOnly(buffer), + image, offset, + width, height, + ld) + .run(2, global_copy, NULL, false); } } else { if (!padding) @@ -112,14 +112,14 @@ ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset, global_copy[0] = padded_width; global_copy[1] = padded_height; - oclk_gemm_copy.set(0, ocl::KernelArg::PtrReadOnly(buffer)); - oclk_gemm_copy.set(1, image); - oclk_gemm_copy.set(2, offset); - oclk_gemm_copy.set(3, width); - oclk_gemm_copy.set(4, height); - oclk_gemm_copy.set(5, ld); - - oclk_gemm_copy.run(2, global_copy, NULL, false); + bool res = oclk_gemm_copy + .args( + ocl::KernelArg::PtrReadOnly(buffer), + image, offset, + width, height, + ld) + .run(2, global_copy, NULL, false); + CV_Assert(res); } } @@ -465,8 +465,12 @@ static bool ocl4dnnFastBufferGEMM(const CBLAS_TRANSPOSE TransA, kernel_name += "_float"; } + bool isBetaZero = beta == 0; + String opts = format("-DTYPE=%d", halfPrecisionMode ? TYPE_HALF : TYPE_FLOAT); - ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_buffer_oclsrc, opts); + if (isBetaZero) + opts += " -DZERO_BETA=1"; + size_t local[2] = {}; size_t global[2] = {}; if (TransA == CblasNoTrans && TransB != CblasNoTrans && is_small_batch) { @@ -496,27 +500,37 @@ static bool ocl4dnnFastBufferGEMM(const CBLAS_TRANSPOSE TransA, local[1] = ly; } - int arg_idx = 0; - oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(A)); - oclk_gemm_float.set(arg_idx++, offA); - oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(B)); - oclk_gemm_float.set(arg_idx++, offB); - oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrWriteOnly(C)); - oclk_gemm_float.set(arg_idx++, offC); - oclk_gemm_float.set(arg_idx++, M); - oclk_gemm_float.set(arg_idx++, N); - oclk_gemm_float.set(arg_idx++, K); - oclk_gemm_float.set(arg_idx++, (float)alpha); - oclk_gemm_float.set(arg_idx++, (float)beta); - bool ret = true; - if (TransB == CblasNoTrans || TransA != CblasNoTrans) { + if (TransB == CblasNoTrans || TransA != CblasNoTrans) + { + // _NN_ int stride = 256; for (int start_index = 0; start_index < K; start_index += stride) { - oclk_gemm_float.set(arg_idx, start_index); - ret = oclk_gemm_float.run(2, global, local, false); + ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_buffer_oclsrc, opts); + oclk_gemm_float.args( + ocl::KernelArg::PtrReadOnly(A), offA, + ocl::KernelArg::PtrReadOnly(B), offB, + isBetaZero ? ocl::KernelArg::PtrWriteOnly(C) : ocl::KernelArg::PtrReadWrite(C), offC, + M, N, K, + (float)alpha, (float)beta, + start_index + ); + ret &= oclk_gemm_float.run(2, global, local, false); } - } else { + } + else + { + // _NT_ + //C.reshape(1,1).setTo(0xfe00 /*FP16 NAN*/); // stable one-line reproducer for https://github.com/opencv/opencv/issues/18937 + //C.reshape(1,1).setTo(0); // non-optimal fixup (and not accurate) + ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_buffer_oclsrc, opts); + oclk_gemm_float.args( + ocl::KernelArg::PtrReadOnly(A), offA, + ocl::KernelArg::PtrReadOnly(B), offB, + isBetaZero ? ocl::KernelArg::PtrWriteOnly(C) : ocl::KernelArg::PtrReadWrite(C), offC, + M, N, K, + (float)alpha, (float)beta + ); ret = oclk_gemm_float.run(2, global, local, false); } return ret; diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp index bf56d3a8a1..059fc8f402 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp @@ -588,16 +588,16 @@ bool OCL4DNNConvSpatial::Forward(const UMat& bottom, fused_eltwise_ = false; } - if (use_half_ && bias_half.empty() && !bias.empty()) - convertFp16(bias, bias_half); + if (use_half_ && !bias.empty()) + CV_CheckTypeEQ(bias.type(), CV_16SC1, ""); - if (use_half_ && weights_half.empty()) - convertFp16(weight, weights_half); + if (use_half_) + CV_CheckTypeEQ(weight.type(), CV_16SC1, ""); - prepareKernel(bottom, top, weight, (use_half_) ? bias_half : bias, numImages); + prepareKernel(bottom, top, weight, bias, numImages); if (bestKernelConfig.empty()) return false; - return convolve(bottom, top, weight, (use_half_) ? bias_half : bias, numImages, bestKernelConfig); + return convolve(bottom, top, weight, bias, numImages, bestKernelConfig); } template @@ -744,29 +744,26 @@ bool OCL4DNNConvSpatial::swizzleWeight(const UMat &weight, kernel_h_ * (int)alignSize(kernel_w_, 2), (use_half_) ? CV_16SC1 : CV_32FC1); - UMat swizzled_weights_tmp; - if (use_half_) - swizzled_weights_tmp.create(shape(swizzled_weights_umat), CV_32F); - if (!interleave) { - cl_uint argIdx = 0; int32_t channels = channels_ / group_; - ocl::Kernel oclk_copy_weight(CL_KERNEL_SELECT("copyWeightsSwizzled"), - cv::ocl::dnn::conv_spatial_helper_oclsrc); + ocl::Kernel oclk_copy_weight( + use_half_ ? "copyWeightsSwizzled_half" : "copyWeightsSwizzled_float", + cv::ocl::dnn::conv_spatial_helper_oclsrc, + use_half_ ? "-DHALF_SUPPORT=1 -DDtype=half" : "-DDtype=float" + ); if (oclk_copy_weight.empty()) return false; - oclk_copy_weight.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight)); - if (use_half_) - oclk_copy_weight.set(argIdx++, ocl::KernelArg::PtrWriteOnly(swizzled_weights_tmp)); - else - oclk_copy_weight.set(argIdx++, ocl::KernelArg::PtrWriteOnly(swizzled_weights_umat)); - oclk_copy_weight.set(argIdx++, kernel_w_); - oclk_copy_weight.set(argIdx++, kernel_h_); - oclk_copy_weight.set(argIdx++, channels); - oclk_copy_weight.set(argIdx++, num_output_); - oclk_copy_weight.set(argIdx++, swizzled_factor); + oclk_copy_weight.args( + ocl::KernelArg::PtrReadOnly(weight), + ocl::KernelArg::PtrWriteOnly(swizzled_weights_umat), + kernel_w_, + kernel_h_, + channels, + num_output_, + swizzled_factor + ); size_t global_work_size_copy[3] = { (size_t) (alignSize(num_output_, swizzled_factor) * channels * kernel_w_ * kernel_h_), 1, 1 }; @@ -778,13 +775,24 @@ bool OCL4DNNConvSpatial::swizzleWeight(const UMat &weight, } } else { // assumption: kernel dimension is 2 - Mat weightMat = weight.getMat(ACCESS_READ); - Dtype* cpu_weight = (Dtype *)weightMat.ptr(); + Mat weightMat; Mat swizzledWeightMat; + UMat weight_tmp; // FP32 in half mode, TODO implement FP16 repack if (use_half_) - swizzledWeightMat = swizzled_weights_tmp.getMat(ACCESS_WRITE); + { + CV_CheckTypeEQ(weight.type(), CV_16SC1, ""); + convertFp16(weight, weight_tmp); + weightMat = weight_tmp.getMat(ACCESS_READ); + swizzledWeightMat.create(shape(swizzled_weights_umat), CV_32F); + } else + { + weightMat = weight.getMat(ACCESS_READ); swizzledWeightMat = swizzled_weights_umat.getMat(ACCESS_WRITE); + } + + CV_CheckTypeEQ(weightMat.type(), CV_32FC1, ""); + Dtype* cpu_weight = (Dtype *)weightMat.ptr(); Dtype* cpu_swizzled_weight = (Dtype *)swizzledWeightMat.ptr(); int interleavedRows = (kernel_w_ / 2) * 2; @@ -792,26 +800,28 @@ bool OCL4DNNConvSpatial::swizzleWeight(const UMat &weight, int blockWidth = swizzled_factor; // should equal to simd size. int rowAlignment = 32; size_t interleaved_filter_size = M_ * kernel_w_ * kernel_h_ * channels_ * sizeof(Dtype); - Dtype * tmpSwizzledWeight = reinterpret_cast(malloc(interleaved_filter_size)); - CHECK_EQ(tmpSwizzledWeight != NULL, true) << "Failed to allocate temporary swizzled weight"; + cv::AutoBuffer tmpSwizzledWeight(interleaved_filter_size); for (int od = 0; od < M_; od++) for (int id = 0; id < channels_; id++) for (int r = 0; r < kernel_h_; r++) for (int c = 0; c < kernel_w_; c++) tmpSwizzledWeight[((id * kernel_h_ + r)* kernel_w_ + c) * M_ + od] = cpu_weight[((od * channels_ + id) * kernel_h_ + r)*kernel_w_+c]; + interleaveMatrix(cpu_swizzled_weight, - tmpSwizzledWeight, + tmpSwizzledWeight.data(), kernel_w_ * kernel_h_ * channels_, M_, interleavedRows, nonInterleavedRows, blockWidth, rowAlignment); - free(tmpSwizzledWeight); - } - if (use_half_) - convertFp16(swizzled_weights_tmp, swizzled_weights_umat); + // unmap OpenCL buffers + weightMat.release(); + + if (use_half_) + convertFp16(swizzledWeightMat, swizzled_weights_umat); + } return true; } @@ -1104,10 +1114,7 @@ bool OCL4DNNConvSpatial::convolve(const UMat &bottom, UMat &top, cl_uint argIdx = 0; setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx); kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom)); - if (use_half_) - kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weights_half)); - else - kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight)); + kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight)); if (bias_term_) kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias)); kernel.set(argIdx++, ocl::KernelArg::PtrWriteOnly(top)); @@ -1148,10 +1155,7 @@ bool OCL4DNNConvSpatial::convolve(const UMat &bottom, UMat &top, setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx); kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom)); kernel.set(argIdx++, image_offset); - if (use_half_) - kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weights_half)); - else - kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight)); + kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight)); kernel.set(argIdx++, kernel_offset); if (bias_term_) kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bias)); @@ -1956,7 +1960,7 @@ void OCL4DNNConvSpatial::prepareKernel(const UMat &bottom, UMat &top, UMat benchData(1, numImages * top_dim_, (use_half_) ? CV_16SC1 : CV_32FC1); - calculateBenchmark(bottom, benchData, (use_half_) ? weights_half : weight, bias, numImages); + calculateBenchmark(bottom, benchData, weight, bias, numImages); if (run_auto_tuning_ || force_auto_tuning_) { diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index ad3d903d68..7826f2b0ca 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -10,11 +10,14 @@ #include "../graph_simplifier.hpp" #include "onnx_graph_simplifier.hpp" +#include #include namespace cv { namespace dnn { CV__DNN_INLINE_NS_BEGIN +extern bool DNN_DIAGNOSTICS_RUN; + // This wrapper can behave differently for fake input nodes and real graph nodes. class ONNXNodeWrapper : public ImportNodeWrapper { @@ -249,6 +252,40 @@ public: } }; +class NormalizeSubgraph4 : public NormalizeSubgraphBase +{ +public: + NormalizeSubgraph4() : NormalizeSubgraphBase(1) + { + int input = addNodeToMatch(""); + int mul = addNodeToMatch("Mul", input, input); + int sum = addNodeToMatch("ReduceSum", mul); + int eps = addNodeToMatch(""); + int max = addNodeToMatch("Max", sum, eps); + int sqrt = addNodeToMatch("Sqrt", max); + int reciprocal = addNodeToMatch("Reciprocal", sqrt); + addNodeToMatch("Mul", input, reciprocal); + setFusedNode("Normalize", input); + } +}; + +class NormalizeSubgraph5 : public NormalizeSubgraphBase +{ +public: + NormalizeSubgraph5() : NormalizeSubgraphBase(1) + { + int input = addNodeToMatch(""); + int mul = addNodeToMatch("Mul", input, input); + int sum = addNodeToMatch("ReduceSum", mul); + int clip = addNodeToMatch("Clip", sum); + int sqrt = addNodeToMatch("Sqrt", clip); + int one = addNodeToMatch("Constant"); + int div = addNodeToMatch("Div", one, sqrt); + addNodeToMatch("Mul", input, div); + setFusedNode("Normalize", input); + } +}; + class GatherCastSubgraph : public Subgraph { public: @@ -314,6 +351,19 @@ public: } }; +class MishSubgraph : public Subgraph +{ +public: + MishSubgraph() + { + int input = addNodeToMatch(""); + int softplus = addNodeToMatch("Softplus", input); + int tanh = addNodeToMatch("Tanh", softplus); + addNodeToMatch("Mul", input, tanh); + setFusedNode("Mish", input); + } +}; + class MulCastSubgraph : public Subgraph { public: @@ -512,6 +562,9 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net) subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); + subgraphs.push_back(makePtr()); + subgraphs.push_back(makePtr()); + subgraphs.push_back(makePtr()); simplifySubgraphs(Ptr(new ONNXGraphWrapper(net)), subgraphs); } @@ -589,8 +642,17 @@ Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto) } } else - CV_Error(Error::StsUnsupportedFormat, "Unsupported data type: " + - opencv_onnx::TensorProto_DataType_Name(datatype)); + { + std::string errorMsg = "Unsupported data type: " + + opencv_onnx::TensorProto_DataType_Name(datatype); + + if (!DNN_DIAGNOSTICS_RUN) + { + CV_Error(Error::StsUnsupportedFormat, errorMsg); + } + CV_LOG_ERROR(NULL, errorMsg); + return blob; + } if (tensor_proto.dims_size() == 0) blob.dims = 1; // To force 1-dimensional cv::Mat for scalars. return blob; diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 859b595b7f..98714bbd5c 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -8,6 +8,8 @@ #include "../precomp.hpp" #include +#include + #include #undef CV_LOG_STRIP_LEVEL #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1 @@ -37,6 +39,7 @@ namespace cv { namespace dnn { CV__DNN_INLINE_NS_BEGIN +extern bool DNN_DIAGNOSTICS_RUN; class ONNXImporter { @@ -58,11 +61,12 @@ class ONNXImporter void addConstant(const std::string& name, const Mat& blob); void addLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + static const std::set& getSupportedTypes(); public: ONNXImporter(Net& net, const char *onnxFile) - : dstNet(net) + : dstNet(net), utilNet() { hasDynamicShapes = false; CV_Assert(onnxFile); @@ -83,7 +87,7 @@ public: } ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer) - : dstNet(net) + : dstNet(net), utilNet() { hasDynamicShapes = false; CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)"); @@ -110,6 +114,7 @@ public: protected: Net& dstNet; + Net utilNet; opencv_onnx::GraphProto graph_proto; std::string framework_name; @@ -182,6 +187,10 @@ std::map ONNXImporter::getGraphTensors( tensor_proto = graph_proto.initializer(i); Mat mat = getMatFromTensor(tensor_proto); releaseONNXTensor(tensor_proto); + + if (DNN_DIAGNOSTICS_RUN && mat.empty()) + continue; + layers_weights.insert(std::make_pair(tensor_proto.name(), mat)); } return layers_weights; @@ -201,118 +210,132 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot opencv_onnx::AttributeProto attribute_proto = node_proto.attribute(i); std::string attribute_name = attribute_proto.name(); - if(attribute_name == "kernel_shape") + try { - CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); - lp.set("kernel_size", parse(attribute_proto.ints())); - } - else if(attribute_name == "strides") - { - CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); - lp.set("stride", parse(attribute_proto.ints())); - } - else if(attribute_name == "pads") - { - if (node_proto.op_type() == "Pad") + if(attribute_name == "kernel_shape") { - // Padding layer. - // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN. - // We need to shuffle it to begin0, end0, begin1, end1, ... - CV_Assert(attribute_proto.ints_size() % 2 == 0); - const int dims = attribute_proto.ints_size() / 2; - std::vector paddings; - paddings.reserve(attribute_proto.ints_size()); - for (int i = 0; i < dims; ++i) + CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); + lp.set("kernel_size", parse(attribute_proto.ints())); + } + else if(attribute_name == "strides") + { + CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); + lp.set("stride", parse(attribute_proto.ints())); + } + else if(attribute_name == "pads") + { + if (node_proto.op_type() == "Pad") { - paddings.push_back(attribute_proto.ints(i)); - paddings.push_back(attribute_proto.ints(dims + i)); + // Padding layer. + // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN. + // We need to shuffle it to begin0, end0, begin1, end1, ... + CV_Assert(attribute_proto.ints_size() % 2 == 0); + const int dims = attribute_proto.ints_size() / 2; + std::vector paddings; + paddings.reserve(attribute_proto.ints_size()); + for (int i = 0; i < dims; ++i) + { + paddings.push_back(attribute_proto.ints(i)); + paddings.push_back(attribute_proto.ints(dims + i)); + } + lp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size())); } - lp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size())); + else + { + // Convolution or pooling. + CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6); + lp.set("pad", parse(attribute_proto.ints())); + } + } + else if(attribute_name == "auto_pad") + { + if (attribute_proto.s() == "SAME_UPPER" || attribute_proto.s() == "SAME_LOWER") { + lp.set("pad_mode", "SAME"); + } + else if (attribute_proto.s() == "VALID") { + lp.set("pad_mode", "VALID"); + } + } + else if(attribute_name == "dilations") + { + CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); + lp.set("dilation", parse(attribute_proto.ints())); + } + else if (attribute_proto.has_i()) + { + ::google::protobuf::int64 src = attribute_proto.i(); + if (src < std::numeric_limits::min() || src > std::numeric_limits::max()) + CV_Error(Error::StsOutOfRange, "Input is out of OpenCV 32S range"); + else + lp.set(attribute_name, saturate_cast(src)); + } + else if (attribute_proto.has_f()) + { + lp.set(attribute_name, attribute_proto.f()); + } + else if (attribute_proto.has_s()) + { + lp.set(attribute_name, attribute_proto.s()); + } + else if (attribute_proto.floats_size() > 0) + { + lp.set(attribute_name, DictValue::arrayReal( + attribute_proto.floats().data(), attribute_proto.floats_size())); + } + else if (attribute_proto.ints_size() > 0) + { + lp.set(attribute_name, parse(attribute_proto.ints())); + } + else if (attribute_proto.has_t()) + { + opencv_onnx::TensorProto tensor = attribute_proto.t(); + Mat blob = getMatFromTensor(tensor); + lp.blobs.push_back(blob); + } + else if (attribute_proto.has_g()) + { + CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: 'Graph' is not supported", attribute_name.c_str())); + } + else if (attribute_proto.graphs_size() > 0) + { + CV_Error(Error::StsNotImplemented, + cv::format("DNN/ONNX/Attribute[%s]: 'Graphs' (%d) in attributes is not supported", + attribute_name.c_str(), attribute_proto.graphs_size()) + ); + } + else if (attribute_proto.strings_size() > 0) + { + std::string msg = cv::format("DNN/ONNX/Attribute[%s]: 'Strings' (%d) are not supported", + attribute_name.c_str(), attribute_proto.strings_size()); + CV_LOG_ERROR(NULL, msg); + for (int i = 0; i < attribute_proto.strings_size(); i++) + { + CV_LOG_ERROR(NULL, " Attribute[" << attribute_name << "].string(" << i << ") = '" << attribute_proto.strings(i) << "'"); + } + CV_Error(Error::StsNotImplemented, msg); + } + else if (attribute_proto.tensors_size() > 0) + { + CV_Error(Error::StsNotImplemented, + cv::format("DNN/ONNX/Attribute[%s]: 'Tensors' (%d) in attributes are not supported", + attribute_name.c_str(), attribute_proto.tensors_size()) + ); } else { - // Convolution or pooling. - CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6); - lp.set("pad", parse(attribute_proto.ints())); + CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: unsupported attribute format", attribute_name.c_str())); } } - else if(attribute_name == "auto_pad") + catch (const cv::Exception& e) { - if (attribute_proto.s() == "SAME_UPPER" || attribute_proto.s() == "SAME_LOWER") { - lp.set("pad_mode", "SAME"); - } - else if (attribute_proto.s() == "VALID") { - lp.set("pad_mode", "VALID"); - } - } - else if(attribute_name == "dilations") - { - CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); - lp.set("dilation", parse(attribute_proto.ints())); - } - else if (attribute_proto.has_i()) - { - ::google::protobuf::int64 src = attribute_proto.i(); - if (src < std::numeric_limits::min() || src > std::numeric_limits::max()) - CV_Error(Error::StsOutOfRange, "Input is out of OpenCV 32S range"); - else - lp.set(attribute_name, saturate_cast(src)); - } - else if (attribute_proto.has_f()) - { - lp.set(attribute_name, attribute_proto.f()); - } - else if (attribute_proto.has_s()) - { - lp.set(attribute_name, attribute_proto.s()); - } - else if (attribute_proto.floats_size() > 0) - { - lp.set(attribute_name, DictValue::arrayReal( - attribute_proto.floats().data(), attribute_proto.floats_size())); - } - else if (attribute_proto.ints_size() > 0) - { - lp.set(attribute_name, parse(attribute_proto.ints())); - } - else if (attribute_proto.has_t()) - { - opencv_onnx::TensorProto tensor = attribute_proto.t(); - Mat blob = getMatFromTensor(tensor); - lp.blobs.push_back(blob); - } - else if (attribute_proto.has_g()) - { - CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: 'Graph' is not supported", attribute_name.c_str())); - } - else if (attribute_proto.graphs_size() > 0) - { - CV_Error(Error::StsNotImplemented, - cv::format("DNN/ONNX/Attribute[%s]: 'Graphs' (%d) in attributes is not supported", - attribute_name.c_str(), attribute_proto.graphs_size()) - ); - } - else if (attribute_proto.strings_size() > 0) - { - std::string msg = cv::format("DNN/ONNX/Attribute[%s]: 'Strings' (%d) are not supported", - attribute_name.c_str(), attribute_proto.strings_size()); - CV_LOG_ERROR(NULL, msg); - for (int i = 0; i < attribute_proto.strings_size(); i++) + CV_UNUSED(e); + if (DNN_DIAGNOSTICS_RUN) { - CV_LOG_ERROR(NULL, " Attribute[" << attribute_name << "].string(" << i << ") = '" << attribute_proto.strings(i) << "'"); + CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem with processing attributes for node " << node_proto.name() << " Attribute " << attribute_name.c_str() + ); + continue; } - CV_Error(Error::StsNotImplemented, msg); - } - else if (attribute_proto.tensors_size() > 0) - { - CV_Error(Error::StsNotImplemented, - cv::format("DNN/ONNX/Attribute[%s]: 'Tensors' (%d) in attributes are not supported", - attribute_name.c_str(), attribute_proto.tensors_size()) - ); - } - else - { - CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: unsupported attribute format", attribute_name.c_str())); + throw; } } return lp; @@ -338,7 +361,11 @@ Mat ONNXImporter::getBlob(const std::string& input_name) void ONNXImporter::addLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { - int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); + int id; + if (DNN_DIAGNOSTICS_RUN) + id = utilNet.addLayer(layerParams.name, layerParams.type, layerParams); + else + id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); for (int i = 0; i < node_proto.output_size(); ++i) { layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i))); @@ -351,7 +378,10 @@ void ONNXImporter::addLayer(LayerParams& layerParams, const std::string& input_name = node_proto.input(j); IterLayerId_t layerId = layer_id.find(input_name); if (layerId != layer_id.end()) { - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum); + if (DNN_DIAGNOSTICS_RUN) + utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum); + else + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum); ++inpNum; // Collect input shapes. IterShape_t shapeIt = outShapes.find(input_name); @@ -360,7 +390,11 @@ void ONNXImporter::addLayer(LayerParams& layerParams, } } // Compute shape of output blob for this layer. - Ptr layer = dstNet.getLayer(id); // FIXIT: avoid instantiation of layers during the import stage + Ptr layer; + if (DNN_DIAGNOSTICS_RUN) + layer = utilNet.getLayer(id); + else + layer = dstNet.getLayer(id); // FIXIT: avoid instantiation of layers during the import stage layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes); for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i) { @@ -437,8 +471,37 @@ void ONNXImporter::populateNet() layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1))); } } + utilNet.setInputsNames(netInputs); dstNet.setInputsNames(netInputs); + if (DNN_DIAGNOSTICS_RUN) { + auto &supportedTypes = getSupportedTypes(); + for (int li = 0; li < layersSize; li++) { + const opencv_onnx::NodeProto &node_proto = graph_proto.node(li); + std::string name = node_proto.output(0); + std::string layer_type = node_proto.op_type(); + auto registered = supportedTypes.find(layer_type); + if (registered == supportedTypes.end()) { + CV_LOG_ERROR(NULL, "DNN/ONNX: NOTE: Potential problem with creating node " << name<< " with type " << layer_type << ".\n Type " + << layer_type << " IS NOT SUPPORTED!\n" + ); + } + } + auto oldConstBlobs = constBlobs; + auto oldOutShapes = outShapes; + auto oldLayerId = layer_id; + CV_LOG_INFO(NULL, "DNN/ONNX: start diagnostic run!"); + for (int li = 0; li < layersSize; li++) { + const opencv_onnx::NodeProto &node_proto = graph_proto.node(li); + handleNode(node_proto); + } + CV_LOG_INFO(NULL, "DNN/ONNX: diagnostic run completed!"); + constBlobs = oldConstBlobs; + outShapes = oldOutShapes; + layer_id = oldLayerId; + enableModelDiagnostics(false); + } + for(int li = 0; li < layersSize; li++) { const opencv_onnx::NodeProto& node_proto = graph_proto.node(li); @@ -448,6 +511,80 @@ void ONNXImporter::populateNet() CV_LOG_DEBUG(NULL, "DNN/ONNX: import completed!"); } +const std::set& ONNXImporter::getSupportedTypes() +{ + static const std::set layerTypes = { + "MaxPool", + "AveragePool", + "GlobalAveragePool", + "GlobalMaxPool", + "ReduceMean", + "ReduceSum", + "ReduceMax", + "Slice", + "Split", + "Add", + "Sum", + "Sub", + "Pow", + "Max", + "Neg", + "Constant", + "LSTM", + "ImageScaler", + "Clip", + "LeakyRelu", + "Relu", + "Elu", + "Tanh", + "PRelu", + "LRN", + "InstanceNormalization", + "BatchNormalization", + "Gemm", + "MatMul", + "Mul", + "Div", + "Conv", + "ConvTranspose", + "Transpose", + "Squeeze", + "Flatten", + "Unsqueeze", + "Expand", + "Reshape", + "Pad", + "Shape", + "Cast", + "ConstantOfShape", + "ConstantFill", + "Gather", + "Concat", + "Resize", + "Upsample", + "SoftMax", + "Softmax", + "LogSoftmax", + "DetectionOutput", + "Interp", + "CropAndResize", + "ROIPooling", + "PSROIPooling", + "ChannelsPReLU", + "Sigmoid", + "Swish", + "Mish", + "AbsVal", + "BNLL", + "MaxUnpool", + "Dropout", + "Identity", + "Crop", + "Normalize" + }; + return layerTypes; +} + void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) { opencv_onnx::NodeProto node_proto = node_proto_; // TODO FIXIT @@ -458,11 +595,11 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) ); - + LayerParams layerParams; try { // FIXIT not all cases can be repacked into "LayerParams". Importer should handle such cases directly for each "layer_type" - LayerParams layerParams = getLayerParams(node_proto); + layerParams = getLayerParams(node_proto); layerParams.name = name; layerParams.type = layer_type; @@ -503,7 +640,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) MatShape targetShape; std::vector shouldDelete(inpShape.size(), false); for (int i = 0; i < axes.size(); i++) { - int axis = clamp(axes.get(i), inpShape.size()); + int axis = normalize_axis(axes.get(i), inpShape.size()); shouldDelete[axis] = true; } for (int axis = 0; axis < inpShape.size(); ++axis){ @@ -515,7 +652,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) if (inpShape.size() == 3 && axes.size() <= 2) { - int axis = clamp(axes.get(0), inpShape.size()); + int axis = normalize_axis(axes.get(0), inpShape.size()); CV_CheckNE(axis, 0, ""); LayerParams reshapeLp; @@ -539,8 +676,8 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) avgLp.set("pool", pool); if (axes.size() == 2) { - CV_CheckEQ(clamp(axes.get(0), inpShape.size()), 1, "Unsupported mode"); - CV_CheckEQ(clamp(axes.get(1), inpShape.size()), 2, "Unsupported mode"); + CV_CheckEQ(normalize_axis(axes.get(0), inpShape.size()), 1, "Unsupported mode"); + CV_CheckEQ(normalize_axis(axes.get(1), inpShape.size()), 2, "Unsupported mode"); avgLp.set("global_pooling", true); } else @@ -560,9 +697,9 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) CV_Assert(axes.size() <= inpShape.size() - 2); std::vector kernel_size(inpShape.size() - 2, 1); - if (axes.size() == 1 && (clamp(axes.get(0), inpShape.size()) <= 1)) + if (axes.size() == 1 && (normalize_axis(axes.get(0), inpShape.size()) <= 1)) { - int axis = clamp(axes.get(0), inpShape.size()); + int axis = normalize_axis(axes.get(0), inpShape.size()); MatShape newShape = inpShape; newShape[axis + 1] = total(newShape, axis + 1); newShape.resize(axis + 2); @@ -584,7 +721,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) else { for (int i = 0; i < axes.size(); i++) { - int axis = clamp(axes.get(i), inpShape.size()); + int axis = normalize_axis(axes.get(i), inpShape.size()); CV_Assert_N(axis >= 2 + i, axis < inpShape.size()); kernel_size[axis - 2] = inpShape[axis]; } @@ -641,20 +778,11 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) int axis = 0; std::vector begin; std::vector end; + std::vector steps; int inp_size = node_proto.input_size(); if (inp_size == 1) { - if (layerParams.has("steps")) - { - DictValue steps = layerParams.get("steps"); - for (int i = 0; i < steps.size(); ++i) - { - if (steps.get(i) != 1) - CV_Error(Error::StsNotImplemented, - "Slice layer only supports steps = 1"); - } - } if (layerParams.has("axes")) { DictValue axes = layerParams.get("axes"); for (int i = 1; i < axes.size(); ++i) { @@ -677,7 +805,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) int finish = ends.get(i); end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim } - } else { + } else { // inp_size > 1 CV_Assert(inp_size >= 3); for (int i = 1; i < inp_size; i++) { CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end()); @@ -711,6 +839,12 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) if (inp_size == 5) { CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end()); Mat step_blob = getBlob(node_proto, 4); + const int* steps_ptr = step_blob.ptr(); + + if (axis > 0) + steps.resize(axis, 1); + + std::copy(steps_ptr, steps_ptr + step_blob.total(), std::back_inserter(steps)); // Very strange application for Slice op with tensor reversing. // We just workaround it for 2d constants. @@ -728,13 +862,15 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) return; } } - CV_CheckEQ(countNonZero(step_blob != 1), 0, "Slice layer only supports steps = 1"); } } layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size())); layerParams.set("end", DictValue::arrayInt(&end[0], end.size())); layerParams.set("axis", axis); + if (!steps.empty()) + layerParams.set("steps", DictValue::arrayInt(&steps[0], steps.size())); + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { Mat inp = getBlob(node_proto, 0); @@ -799,7 +935,11 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) constParams.name = layerParams.name + "/const"; constParams.type = "Const"; constParams.blobs.push_back((isSub ? -1 : 1) * blob); - int id = dstNet.addLayer(constParams.name, constParams.type, constParams); + int id; + if (DNN_DIAGNOSTICS_RUN) + id = utilNet.addLayer(constParams.name, constParams.type, constParams); + else + id = dstNet.addLayer(constParams.name, constParams.type, constParams); layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0))); outShapes[constParams.name] = shape(blob); @@ -844,12 +984,19 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) powerParams.type = "Power"; powerParams.set("scale", -1); + int id; //Create Power layer - int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); + if (DNN_DIAGNOSTICS_RUN) + id = utilNet.addLayer(powerParams.name, powerParams.type, powerParams); + else + id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); //Connect to input IterLayerId_t layerId = layer_id.find(node_proto.input(1)); CV_Assert(layerId != layer_id.end()); - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + if (DNN_DIAGNOSTICS_RUN) + utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + else + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); //Add shape layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0))); outShapes[powerParams.name] = outShapes[node_proto.input(1)]; @@ -1036,11 +1183,18 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) layerParams.erase("epsilon"); //Create MVN layer - int id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); + int id; + if (DNN_DIAGNOSTICS_RUN) + id = utilNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); + else + id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); //Connect to input IterLayerId_t layerId = layer_id.find(node_proto.input(0)); CV_Assert(layerId != layer_id.end()); - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + if (DNN_DIAGNOSTICS_RUN) + utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + else + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); //Add shape layer_id.insert(std::make_pair(mvnParams.name, LayerInfo(id, 0))); outShapes[mvnParams.name] = outShapes[node_proto.input(0)]; @@ -1162,6 +1316,53 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) layerParams.type = "Scale"; } } + else if (!haveVariables) + { + Mat inp0 = getBlob(node_proto, 0); + Mat inp1 = getBlob(node_proto, 1); + + if (inp0.size != inp1.size && (inp0.total() != 1 || inp1.total() != 1)) + CV_Error_(Error::StsNotImplemented, ("Different shapes case is not supported with constant inputs: %s", layer_type.c_str())); + + if (inp0.total() == 1 && inp1.total() == 1 && inp0.dims != inp1.dims) + { + if (inp0.dims < inp1.dims) + { + inp0 = inp0.reshape(1, inp1.dims, inp1.size); + inp0.dims = inp1.dims; + } + else + { + inp1 = inp1.reshape(1, inp0.dims, inp0.size); + inp1.dims = inp0.dims; + } + } + + Mat out; + if (inp0.total() != inp1.total()) + { + if (inp0.total() == 1) + { + float coeff = isDiv ? 1.0 / inp0.at(0) : inp0.at(0); + multiply(inp1, coeff, out); + } + else + { + float coeff = isDiv ? 1.0 / inp1.at(0) : inp1.at(0); + multiply(inp0, coeff, out); + } + + } + else + { + out = isDiv ? inp0 / inp1 : inp0.mul(inp1); + } + + if (inp0.dims == 1 && inp1.dims == 1) + out.dims = 1; // to workaround dims == 1 + addConstant(layerParams.name, out); + return; + } else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)]) { layerParams.type = "Eltwise"; @@ -1186,12 +1387,19 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) powerParams.type = "Power"; powerParams.set("power", -1); + int id; //Create Power layer - int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); + if (DNN_DIAGNOSTICS_RUN) + id = utilNet.addLayer(powerParams.name, powerParams.type, powerParams); + else + id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); //Connect to input IterLayerId_t layerId = layer_id.find(node_proto.input(1)); CV_Assert(layerId != layer_id.end()); - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + if (DNN_DIAGNOSTICS_RUN) + utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + else + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); //Add shape layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0))); outShapes[powerParams.name] = outShapes[node_proto.input(1)]; @@ -1201,20 +1409,6 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) } layerParams.type = "Scale"; } - - if (!haveVariables) - { - Mat inp0 = getBlob(node_proto, 0); - Mat inp1 = getBlob(node_proto, 1); - if (inp0.size != inp1.size && inp1.total() != 1) - CV_Error(Error::StsNotImplemented, "Constant multiply with different shapes"); - - Mat out = isDiv ? inp0 / inp1 : inp0.mul(inp1); - out = out.reshape(1, inp0.dims, inp0.size); - out.dims = inp0.dims; // to workaround dims == 1 - addConstant(layerParams.name, out); - return; - } } else if (layer_type == "Conv") { @@ -1343,7 +1537,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { Mat input = getBlob(node_proto, 0); - int axis = clamp(layerParams.get("axis", 1), input.dims); + int axis = normalize_axis(layerParams.get("axis", 1), input.dims); std::vector out_size(&input.size[0], &input.size[0] + axis); out_size.push_back(input.total(axis)); @@ -1733,9 +1927,26 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) if (!hasVariableInps) { std::vector inputs(node_proto.input_size()), concatenated; + // Due constant folding we can get inputs with different number of dimensions + // Insert the missing dimension to inputs + MatShape inputShape; for (size_t i = 0; i < inputs.size(); ++i) { inputs[i] = getBlob(node_proto, i); + if (inputs[i].size.dims() > inputShape.size()) + { + inputShape = shape(inputs[i]); + } + } + + // Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1 + int axis = layerParams.get("axis", 1); + for (size_t i = 0; i < inputs.size(); ++i) + { + MatShape targetShape = inputShape; + targetShape[axis] = shape(inputs[i])[axis]; + CV_CheckEQ(total(targetShape), total(shape(inputs[i])), ""); + inputs[i] = inputs[i].reshape(0, targetShape); } runLayer(layerParams, inputs, concatenated); @@ -1873,9 +2084,31 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) } catch (const cv::Exception& e) { - CV_LOG_ERROR(NULL, "DNN/ONNX: ERROR during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " - << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) - ); + if (DNN_DIAGNOSTICS_RUN) + { + CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " + << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) << "\n" << e.msg + ); + auto registeredLayers = getLayerFactoryImpl(); + if (registeredLayers.find(layerParams.type) != registeredLayers.end()) + { + try + { + Ptr layer = LayerFactory::createLayerInstance(layerParams.type, layerParams); + } + catch (const std::exception& e) + { + CV_LOG_ERROR(NULL, "DNN/ONNX: Layer of type " << layerParams.type << "(" << layer_type << ") cannot be created with parameters " << layerParams << ". Error: " << e.what() + ); + } + } + } + else + { + CV_LOG_ERROR(NULL, "DNN/ONNX: ERROR during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " + << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) + ); + } for (int i = 0; i < node_proto.input_size(); i++) { CV_LOG_INFO(NULL, " Input[" << i << "] = '" << node_proto.input(i) << "'"); @@ -1884,7 +2117,16 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) { CV_LOG_INFO(NULL, " Output[" << i << "] = '" << node_proto.output(i) << "'"); } - CV_Error(Error::StsError, cv::format("Node [%s]:(%s) parse error: %s", layer_type.c_str(), name.c_str(), e.what())); + if (DNN_DIAGNOSTICS_RUN) + { + for (int i = 0; i < node_proto.output_size(); ++i) + { + layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(0, i))); + outShapes[node_proto.output(i)] = outShapes[node_proto.input(0)]; + } + } + else + CV_Error(Error::StsError, cv::format("Node [%s]:(%s) parse error: %s", layer_type.c_str(), name.c_str(), e.what())); } } diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp index b7cdc2ad94..d9b98404c3 100644 --- a/modules/dnn/src/op_inf_engine.cpp +++ b/modules/dnn/src/op_inf_engine.cpp @@ -655,6 +655,22 @@ InferenceEngine::Core& getCore(const std::string& id) } #endif +static bool detectArmPlugin_() +{ + InferenceEngine::Core& ie = getCore("CPU"); + const std::vector devices = ie.GetAvailableDevices(); + for (std::vector::const_iterator i = devices.begin(); i != devices.end(); ++i) + { + if (i->find("CPU") != std::string::npos) + { + const std::string name = ie.GetMetric(*i, METRIC_KEY(FULL_DEVICE_NAME)).as(); + CV_LOG_INFO(NULL, "CPU plugin: " << name); + return name.find("arm_compute::NEON") != std::string::npos; + } + } + return false; +} + #if !defined(OPENCV_DNN_IE_VPU_TYPE_DEFAULT) static bool detectMyriadX_(std::string device) { @@ -1185,6 +1201,12 @@ bool isMyriadX() return myriadX; } +bool isArmComputePlugin() +{ + static bool armPlugin = getInferenceEngineCPUType() == CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE; + return armPlugin; +} + static std::string getInferenceEngineVPUType_() { static std::string param_vpu_type = utils::getConfigurationParameterString("OPENCV_DNN_IE_VPU_TYPE", ""); @@ -1223,6 +1245,14 @@ cv::String getInferenceEngineVPUType() return vpu_type; } +cv::String getInferenceEngineCPUType() +{ + static cv::String cpu_type = detectArmPlugin_() ? + CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE : + CV_DNN_INFERENCE_ENGINE_CPU_TYPE_X86; + return cpu_type; +} + #else // HAVE_INF_ENGINE cv::String getInferenceEngineBackendType() @@ -1238,6 +1268,11 @@ cv::String getInferenceEngineVPUType() { CV_Error(Error::StsNotImplemented, "This OpenCV build doesn't include InferenceEngine support"); } + +cv::String getInferenceEngineCPUType() +{ + CV_Error(Error::StsNotImplemented, "This OpenCV build doesn't include InferenceEngine support"); +} #endif // HAVE_INF_ENGINE diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp index fcd1a6927d..f52334bc45 100644 --- a/modules/dnn/src/op_inf_engine.hpp +++ b/modules/dnn/src/op_inf_engine.hpp @@ -28,10 +28,12 @@ #define INF_ENGINE_RELEASE_2020_3 2020030000 #define INF_ENGINE_RELEASE_2020_4 2020040000 #define INF_ENGINE_RELEASE_2021_1 2021010000 +#define INF_ENGINE_RELEASE_2021_2 2021020000 +#define INF_ENGINE_RELEASE_2021_3 2021030000 #ifndef INF_ENGINE_RELEASE -#warning("IE version have not been provided via command-line. Using 2021.1 by default") -#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2021_1 +#warning("IE version have not been provided via command-line. Using 2021.3 by default") +#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2021_3 #endif #define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000)) @@ -254,8 +256,11 @@ CV__DNN_INLINE_NS_BEGIN bool isMyriadX(); +bool isArmComputePlugin(); + CV__DNN_INLINE_NS_END + InferenceEngine::Core& getCore(const std::string& id); template diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl index b900e6add6..68f0dd7268 100644 --- a/modules/dnn/src/opencl/activations.cl +++ b/modules/dnn/src/opencl/activations.cl @@ -140,3 +140,14 @@ __kernel void ELUForward(const int n, __global const T* in, __global T* out) out[index] = (src >= 0.f) ? src : exp(src) - 1; } } + +__kernel void ExpForward(const int n, __global const T* in, __global T* out, + const KERNEL_ARG_DTYPE normScale, + const KERNEL_ARG_DTYPE normShift) +{ + int index = get_global_id(0); + if (index < n) + { + out[index] = exp(normShift + normScale * in[index]); + } +} diff --git a/modules/dnn/src/opencl/conv_spatial_helper.cl b/modules/dnn/src/opencl/conv_spatial_helper.cl index 9d5a89f7b1..33d9db57c8 100644 --- a/modules/dnn/src/opencl/conv_spatial_helper.cl +++ b/modules/dnn/src/opencl/conv_spatial_helper.cl @@ -39,9 +39,14 @@ // //M*/ +#ifdef HALF_SUPPORT +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16:enable +#endif +#endif + #define CONCAT(A,B) A##_##B #define TEMPLATE(name,type) CONCAT(name,type) -#define Dtype float __kernel void TEMPLATE(copyWeightsSwizzled, Dtype) (__global Dtype* weightIn, diff --git a/modules/dnn/src/opencl/gemm_buffer.cl b/modules/dnn/src/opencl/gemm_buffer.cl index 8cbc34dde5..b345983aee 100644 --- a/modules/dnn/src/opencl/gemm_buffer.cl +++ b/modules/dnn/src/opencl/gemm_buffer.cl @@ -90,6 +90,12 @@ #pragma OPENCL EXTENSION cl_intel_subgroups : enable #endif +#ifdef ZERO_BETA +#define BETA_ZERO_CHECK(b0, v) (b0) +#else +#define BETA_ZERO_CHECK(b0, v) (v) +#endif + #define VEC_SIZE 4 #define LWG_HEIGHT 4 #define TILE_M 8 @@ -143,14 +149,14 @@ __kernel void TEMPLATE(gemm_buffer_NN, Dtype)( int row6 = mad24(global_y, TILE_M, 6) < M ? 6 : border; int row7 = mad24(global_y, TILE_M, 7) < M ? 7 : border; - Dtype4 dot00 = (start_index != 0) ? vload4(0, dst_write0) : beta * vload4(0, dst_write0); - Dtype4 dot01 = (start_index != 0) ? vload4(0, dst_write0 + 1 * N) : beta * vload4(0, dst_write0 + 1 * N); - Dtype4 dot02 = (start_index != 0) ? vload4(0, dst_write0 + 2 * N) : beta * vload4(0, dst_write0 + 2 * N); - Dtype4 dot03 = (start_index != 0) ? vload4(0, dst_write0 + 3 * N) : beta * vload4(0, dst_write0 + 3 * N); - Dtype4 dot04 = (start_index != 0) ? vload4(0, dst_write0 + 4 * N) : beta * vload4(0, dst_write0 + 4 * N); - Dtype4 dot05 = (start_index != 0) ? vload4(0, dst_write0 + 5 * N) : beta * vload4(0, dst_write0 + 5 * N); - Dtype4 dot06 = (start_index != 0) ? vload4(0, dst_write0 + 6 * N) : beta * vload4(0, dst_write0 + 6 * N); - Dtype4 dot07 = (start_index != 0) ? vload4(0, dst_write0 + 7 * N) : beta * vload4(0, dst_write0 + 7 * N); + Dtype4 dot00 = (start_index != 0) ? vload4(0, dst_write0) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0)); + Dtype4 dot01 = (start_index != 0) ? vload4(0, dst_write0 + 1 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 1 * N)); + Dtype4 dot02 = (start_index != 0) ? vload4(0, dst_write0 + 2 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 2 * N)); + Dtype4 dot03 = (start_index != 0) ? vload4(0, dst_write0 + 3 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 3 * N)); + Dtype4 dot04 = (start_index != 0) ? vload4(0, dst_write0 + 4 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 4 * N)); + Dtype4 dot05 = (start_index != 0) ? vload4(0, dst_write0 + 5 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 5 * N)); + Dtype4 dot06 = (start_index != 0) ? vload4(0, dst_write0 + 6 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 6 * N)); + Dtype4 dot07 = (start_index != 0) ? vload4(0, dst_write0 + 7 * N) : BETA_ZERO_CHECK((Dtype4)0, beta * vload4(0, dst_write0 + 7 * N)); int end_index = min(start_index + 256, K); int w = start_index; @@ -579,7 +585,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( output = (local_x == 5) ? _dot.s5 : output; \ output = (local_x == 6) ? _dot.s6 : output; \ output = (local_x == 7) ? _dot.s7 : output; \ - dst_write0[0] = mad(output, alpha, beta * dst_write0[0]); \ + dst_write0[0] = BETA_ZERO_CHECK(alpha * output, mad(output, alpha, beta * dst_write0[0])); \ dst_write0 += N; if(global_x < N && global_y * 8 < M) { @@ -765,7 +771,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( output = (local_x == 5) ? _dot.s5 : output; \ output = (local_x == 6) ? _dot.s6 : output; \ output = (local_x == 7) ? _dot.s7 : output; \ - dst_write0[0] = mad(output, alpha, beta * dst_write0[0]); \ + dst_write0[0] = BETA_ZERO_CHECK(alpha * output, mad(output, alpha, beta * dst_write0[0])); \ dst_write0 += N; if(global_x < N && global_y * 8 < M) { @@ -819,8 +825,9 @@ void TEMPLATE(gemm_buffer_NT_M_2_edgerows,Dtype)( const Dtype4 b1 = {srca_read1[i*4], srca_read1[(i*4+1)], srca_read1[(i*4+2)], srca_read1[(i*4+3)]}; #pragma unroll for(int j = 0; j < rows; ++j) { - dot0[j] += b0 * vload4(i, srcb_read + j * K); - dot1[j] += b1 * vload4(i, srcb_read + j * K); + Dtype4 a = vload4(i, srcb_read + j * K); + dot0[j] += b0 * a; + dot1[j] += b1 * a; } i += get_local_size(0); @@ -859,11 +866,19 @@ void TEMPLATE(gemm_buffer_NT_M_2_edgerows,Dtype)( } } + barrier(CLK_LOCAL_MEM_FENCE); if(lid == 0) { #pragma unroll for(int j = 0; j < rows; ++j) { - dstc0[(x_gid * 4 + j)] = alpha * work_each0[j] + beta * dstc0[(x_gid * 4 + j)]; - dstc1[(x_gid * 4 + j)] = alpha * work_each1[j] + beta * dstc1[(x_gid * 4 + j)]; +#ifdef ZERO_BETA + Dtype a0 = alpha * work_each0[j]; + Dtype a1 = alpha * work_each1[j]; +#else + Dtype a0 = alpha * work_each0[j] + beta * dstc0[(x_gid * 4 + j)]; + Dtype a1 = alpha * work_each1[j] + beta * dstc1[(x_gid * 4 + j)]; +#endif + dstc0[(x_gid * 4 + j)] = a0; + dstc1[(x_gid * 4 + j)] = a1; } } } @@ -952,9 +967,15 @@ __kernel void TEMPLATE(gemm_buffer_NT_M_2,Dtype)( } } - if(lid == 0) { + if(lid == 0) + { +#ifdef ZERO_BETA + dstc0[x_gid] = alpha * work0[0]; + dstc1[x_gid] = alpha * work1[0]; +#else dstc0[x_gid] = alpha * work0[0] + beta * dstc0[x_gid]; dstc1[x_gid] = alpha * work1[0] + beta * dstc1[x_gid]; +#endif } } } @@ -1058,10 +1079,17 @@ void TEMPLATE(gemm_buffer_NT_M_4_edgerows,Dtype)( if(lid == 0) { #pragma unroll for(int j = 0; j < rows; ++j) { +#ifdef ZERO_BETA + dstc0[(x_gid * 4 + j)] = alpha * work_each0[j]; + dstc1[(x_gid * 4 + j)] = alpha * work_each1[j]; + dstc2[(x_gid * 4 + j)] = alpha * work_each2[j]; + dstc3[(x_gid * 4 + j)] = alpha * work_each3[j]; +#else dstc0[(x_gid * 4 + j)] = alpha * work_each0[j] + beta * dstc0[(x_gid * 4 + j)]; dstc1[(x_gid * 4 + j)] = alpha * work_each1[j] + beta * dstc1[(x_gid * 4 + j)]; dstc2[(x_gid * 4 + j)] = alpha * work_each2[j] + beta * dstc2[(x_gid * 4 + j)]; dstc3[(x_gid * 4 + j)] = alpha * work_each3[j] + beta * dstc3[(x_gid * 4 + j)]; +#endif } } } @@ -1179,10 +1207,17 @@ __kernel void TEMPLATE(gemm_buffer_NT_M_4,Dtype)( } if(lid == 0) { +#ifdef ZERO_BETA + dstc0[x_gid] = alpha * work0[0]; + dstc1[x_gid] = alpha * work1[0]; + dstc2[x_gid] = alpha * work2[0]; + dstc3[x_gid] = alpha * work3[0]; +#else dstc0[x_gid] = alpha * work0[0] + beta * dstc0[x_gid]; dstc1[x_gid] = alpha * work1[0] + beta * dstc1[x_gid]; dstc2[x_gid] = alpha * work2[0] + beta * dstc2[x_gid]; dstc3[x_gid] = alpha * work3[0] + beta * dstc3[x_gid]; +#endif } } } @@ -1320,6 +1355,16 @@ __kernel void TEMPLATE(gemm_buffer_NT_M_8,Dtype)( } if(lid == 0) { +#ifdef ZERO_BETA + dstc0[x_gid] = alpha * work0[0]; + dstc1[x_gid] = alpha * work1[0]; + dstc2[x_gid] = alpha * work2[0]; + dstc3[x_gid] = alpha * work3[0]; + dstc4[x_gid] = alpha * work4[0]; + dstc5[x_gid] = alpha * work5[0]; + dstc6[x_gid] = alpha * work6[0]; + dstc7[x_gid] = alpha * work7[0]; +#else dstc0[x_gid] = alpha * work0[0] + beta * dstc0[x_gid]; dstc1[x_gid] = alpha * work1[0] + beta * dstc1[x_gid]; dstc2[x_gid] = alpha * work2[0] + beta * dstc2[x_gid]; @@ -1328,6 +1373,7 @@ __kernel void TEMPLATE(gemm_buffer_NT_M_8,Dtype)( dstc5[x_gid] = alpha * work5[0] + beta * dstc5[x_gid]; dstc6[x_gid] = alpha * work6[0] + beta * dstc6[x_gid]; dstc7[x_gid] = alpha * work7[0] + beta * dstc7[x_gid]; +#endif } } #undef SLM_SIZE diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 45dfdad9e8..65695b8504 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -11,6 +11,12 @@ Implementation of Tensorflow models parser #include "../precomp.hpp" +#include +#include +#undef CV_LOG_STRIP_LEVEL +#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1 +#include + #ifdef HAVE_PROTOBUF #include "tf_io.hpp" @@ -93,7 +99,7 @@ void blobShapeFromTensor(const tensorflow::TensorProto &tensor, MatShape& shape) shape[i] = (int)_shape.dim(i).size(); } else - shape.resize(1, 1); // Scalar. + shape.resize(1, 1); // Scalar. // FIXIT: should be empty } else { @@ -258,7 +264,7 @@ const tensorflow::AttrValue& getLayerAttr(const tensorflow::NodeDef &layer, cons return layer.attr().at(name); } -static int getDataLayout(const tensorflow::NodeDef& layer) +static DataLayout getDataLayout(const tensorflow::NodeDef& layer) { if (hasLayerAttr(layer, "data_format")) { @@ -280,13 +286,32 @@ static inline std::string getNodeName(const std::string& tensorName) return tensorName.substr(0, tensorName.rfind(':')); } -static inline int getDataLayout(const std::string& layerName, - const std::map& data_layouts) +static inline +DataLayout getDataLayout( + const std::string& layerName, + const std::map& data_layouts +) { - std::map::const_iterator it = data_layouts.find(getNodeName(layerName)); + std::map::const_iterator it = data_layouts.find(getNodeName(layerName)); return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN; } +static +bool hasAllOnes(const Mat &inputs, int startPos, int endPos) +{ + CV_CheckLE(inputs.dims, 2, ""); + CV_CheckGE(startPos, 0, ""); + CV_CheckLE(startPos, endPos, ""); + CV_CheckLT((size_t)endPos, inputs.total(), ""); + + for (int i = startPos; i < endPos; i++) + { + if (inputs.at(i) != 1 && inputs.at(i) != -1) + return false; + } + return true; +} + void setStrides(LayerParams &layerParams, const tensorflow::NodeDef &layer) { if (hasLayerAttr(layer, "strides")) @@ -389,7 +414,7 @@ Pin parsePin(const std::string &name) { Pin pin(name); - size_t delimiter_pos = name.find_first_of(":"); + size_t delimiter_pos = name.find_first_of(':'); if (delimiter_pos != std::string::npos) { pin.name = name.substr(0, delimiter_pos); @@ -439,15 +464,20 @@ void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int in net.mutable_node()->DeleteSubrange(layer_index, 1); } -class TFImporter { +class TFImporter +{ public: - TFImporter(const char *model, const char *config = NULL); - TFImporter(const char *dataModel, size_t lenModel, + TFImporter(Net& net, const char *model, const char *config = NULL); + TFImporter(Net& net, const char *dataModel, size_t lenModel, const char *dataConfig = NULL, size_t lenConfig = 0); +protected: + Net& dstNet; + void populateNet(); - void populateNet(Net dstNet); + void parseNode(const tensorflow::NodeDef& layer); + + DataLayout predictOutputDataLayout(const tensorflow::NodeDef& layer); -private: void kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob); void connect(const std::map& layers_name_id_map, Net& network, const Pin& outPin, @@ -467,23 +497,56 @@ private: std::vector netInputsNames; std::vector netInputShapes; + + std::set layers_to_ignore; + std::map data_layouts; + + // find all Const layers for params + std::map value_id; + // A map with constant blobs which are shared between multiple layers. + std::map sharedWeights; + + std::map layer_id; + +private: + void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId); }; -TFImporter::TFImporter(const char *model, const char *config) +TFImporter::TFImporter(Net& net, const char *model, const char *config) + : dstNet(net) { if (model && model[0]) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from file: " << model); ReadTFNetParamsFromBinaryFileOrDie(model, &netBin); + } if (config && config[0]) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from file: " << config); ReadTFNetParamsFromTextFileOrDie(config, &netTxt); + } + + populateNet(); } -TFImporter::TFImporter(const char *dataModel, size_t lenModel, - const char *dataConfig, size_t lenConfig) +TFImporter::TFImporter( + Net& net, + const char *dataModel, size_t lenModel, + const char *dataConfig, size_t lenConfig +) + : dstNet(net) { if (dataModel != NULL && lenModel > 0) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from memory (" << lenModel << " bytes)"); ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin); + } if (dataConfig != NULL && lenConfig > 0) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from memory (" << lenConfig << " bytes)"); ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt); + } + populateNet(); } void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob) @@ -612,84 +675,98 @@ const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDe static void addConstNodes(tensorflow::GraphDef& net, std::map& const_layers, std::set& layers_to_ignore) { + CV_LOG_DEBUG(NULL, "DNN/TF: addConstNodes(): handling " << net.node_size() << " nodes..."); for (int li = 0; li < net.node_size(); li++) { const tensorflow::NodeDef &layer = net.node(li); String name = layer.name(); String type = layer.op(); - if (type == "Dequantize") + //CV_LOG_DEBUG(NULL, "DNN/TF: layer_id=" << li << " - '" << name << "' @ " << type); + + try { - // Example of Dequantize node: - // name: "conv2d_1/bias" - // op: "Dequantize" - // input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8) - // input: "conv2d_1/bias_quantized_min" - // input: "conv2d_1/bias_quantized_max" - // attr { key: "T" value { type: DT_QUINT8 } } (quantized type) - // attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique) - CV_Assert(layer.input_size() == 3); - for (int i = 0; i < 3; ++i) - CV_Assert(const_layers.find(layer.input(i)) != const_layers.end()); - CV_Assert(hasLayerAttr(layer, "mode") && - getLayerAttr(layer, "mode").s() == "MIN_FIRST"); + if (type == "Dequantize") + { + // Example of Dequantize node: + // name: "conv2d_1/bias" + // op: "Dequantize" + // input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8) + // input: "conv2d_1/bias_quantized_min" + // input: "conv2d_1/bias_quantized_max" + // attr { key: "T" value { type: DT_QUINT8 } } (quantized type) + // attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique) + CV_CheckEQ(layer.input_size(), 3, "Dequantize: 3 inputs is supported only"); + for (int i = 0; i < 3; ++i) + CV_Assert(const_layers.find(layer.input(i)) != const_layers.end()); + CV_Assert(hasLayerAttr(layer, "mode") && + getLayerAttr(layer, "mode").s() == "MIN_FIRST"); - int tensorId = const_layers[layer.input(0)]; - int minId = const_layers[layer.input(1)]; - int maxId = const_layers[layer.input(2)]; + int tensorId = const_layers[layer.input(0)]; + int minId = const_layers[layer.input(1)]; + int maxId = const_layers[layer.input(2)]; - tensorflow::TensorProto* tensor = net.mutable_node(tensorId) - ->mutable_attr()->at("value") - .mutable_tensor(); - CV_Assert(tensor->dtype() == tensorflow::DT_QUINT8); + tensorflow::TensorProto* tensor = net.mutable_node(tensorId) + ->mutable_attr()->at("value") + .mutable_tensor(); + CV_CheckEQ((int)tensor->dtype(), (int)tensorflow::DT_QUINT8, ""); - Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor()); - Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor()); - CV_Assert_N(qMin.total() == 1, qMin.type() == CV_32FC1, - qMax.total() == 1, qMax.type() == CV_32FC1); + Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor()); + Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor()); + CV_CheckEQ(qMin.total(), (size_t)1, ""); + CV_CheckTypeEQ(qMin.type(), CV_32FC1, ""); + CV_CheckEQ(qMax.total(), (size_t)1, ""); + CV_CheckTypeEQ(qMax.type(), CV_32FC1, ""); - Mat content = getTensorContent(*tensor); + Mat content = getTensorContent(*tensor); - float minVal = qMin.at(0); - float rangeScale = (qMax.at(0) - minVal) / 255; - CV_Assert(rangeScale >= 0); - content.convertTo(content, CV_32FC1, rangeScale, - rangeScale * cvRound(minVal / rangeScale)); + float minVal = qMin.at(0); + float rangeScale = (qMax.at(0) - minVal) / 255; + CV_Assert(rangeScale >= 0); + content.convertTo(content, CV_32FC1, rangeScale, + rangeScale * cvRound(minVal / rangeScale)); - tensor->set_dtype(tensorflow::DT_FLOAT); - tensor->set_tensor_content(content.data, content.total() * content.elemSize1()); + tensor->set_dtype(tensorflow::DT_FLOAT); + tensor->set_tensor_content(content.data, content.total() * content.elemSize1()); - net.mutable_node(tensorId)->set_name(name); - CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second); + net.mutable_node(tensorId)->set_name(name); + CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second); + layers_to_ignore.insert(name); + continue; + } + else if (type != "Const") + continue; // only Const parameters are supported + + if (layer.attr().find("value") != layer.attr().end()) + { + CV_Assert(const_layers.insert(std::make_pair(name, li)).second); + } layers_to_ignore.insert(name); - continue; } - else if (type != "Const") - continue; // only Const parameters are supported - - if (layer.attr().find("value") != layer.attr().end()) + catch (const std::exception& e) { - CV_Assert(const_layers.insert(std::make_pair(name, li)).second); + CV_LOG_ERROR(NULL, "DNN/TF: Can't handle node='" << name << "'. Exception: " << e.what()); + throw; } - layers_to_ignore.insert(name); } + CV_LOG_DEBUG(NULL, "DNN/TF: layers_to_ignore.size() = " << layers_to_ignore.size()); } // If all inputs of specific layer have the same data layout we can say that // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise. -static int predictOutputDataLayout(const tensorflow::GraphDef& net, - const tensorflow::NodeDef& layer, - const std::map& data_layouts) +DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer) { - int layout = getDataLayout(layer); + DataLayout layout = getDataLayout(layer); if (layout != DATA_LAYOUT_UNKNOWN) + { + CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)"); return layout; + } // Determine layout by layer's inputs - std::map::const_iterator it; for (int i = 0, n = layer.input_size(); i < n; ++i) { - it = data_layouts.find(getNodeName(layer.input(i))); + std::map::const_iterator it = data_layouts.find(getNodeName(layer.input(i))); if (it != data_layouts.end()) { if (layout != DATA_LAYOUT_UNKNOWN) @@ -703,71 +780,72 @@ static int predictOutputDataLayout(const tensorflow::GraphDef& net, } if (layout != DATA_LAYOUT_UNKNOWN) + { + CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)"); return layout; + } // Determine layout by layer's consumers recursively. - it = data_layouts.find(layer.name()); + std::map::const_iterator it = data_layouts.find(layer.name()); CV_Assert(it != data_layouts.end()); return it->second; } -void TFImporter::populateNet(Net dstNet) +void TFImporter::populateNet() { - if (!netTxt.ByteSize()) - removePhaseSwitches(netBin); + CV_Assert(netBin.ByteSize() || netTxt.ByteSize()); - RemoveIdentityOps(netBin); - RemoveIdentityOps(netTxt); + CV_LOG_INFO(NULL, "DNN/TF: parsing model" + << (netBin.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netBin.versions().producer(), (int)netBin.versions().min_consumer()) : cv::String(" (N/A version info)")) + << ". Number of nodes = " << netBin.node_size() + ); - if (!netTxt.ByteSize()) + if (netTxt.ByteSize()) { - simplifySubgraphs(netBin); - sortByExecutionOrder(netBin); + CV_LOG_INFO(NULL, "DNN/TF: parsing config" + << (netTxt.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netTxt.versions().producer(), (int)netTxt.versions().min_consumer()) : cv::String(" (N/A version info)")) + << ". Number of nodes = " << netTxt.node_size() + ); + + RemoveIdentityOps(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes"); + RemoveIdentityOps(netTxt); + CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(config) => " << netTxt.node_size() << " nodes"); + + sortByExecutionOrder(netTxt); + CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(config) => " << netTxt.node_size() << " nodes"); } else { - sortByExecutionOrder(netTxt); - } + removePhaseSwitches(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: removePhaseSwitches(model) => " << netBin.node_size() << " nodes"); - std::set layers_to_ignore; + RemoveIdentityOps(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes"); + + simplifySubgraphs(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: simplifySubgraphs(model) => " << netBin.node_size() << " nodes"); + sortByExecutionOrder(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(model) => " << netBin.node_size() << " nodes"); + } tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin; int layersSize = net.node_size(); - std::map data_layouts; // Pre-fill data layouts where they are set explicitly. // Assuming that nodes are in topological order - for (int i = net.node_size() - 1; i >= 0; --i) + for (int i = layersSize - 1; i >= 0; --i) { const tensorflow::NodeDef& layer = net.node(i); std::string name = layer.name(); - int layout = getDataLayout(layer); - std::map::iterator it = data_layouts.find(name); - if (it != data_layouts.end()) - { - if (layout != DATA_LAYOUT_UNKNOWN) - { - if (it->second == DATA_LAYOUT_UNKNOWN) - it->second = layout; - else if (it->second != layout) - { - it->second = DATA_LAYOUT_UNKNOWN; - layout = DATA_LAYOUT_UNKNOWN; - } - } - else - layout = it->second; - } - else - data_layouts[name] = layout; + CV_LOG_DEBUG(NULL, "DNN/TF: node(" << i << " - '" << name << "') propagating layout..."); - // Specify input layers to have the same data layout. - for (int j = 0; j < layer.input_size(); ++j) + try { - name = getNodeName(layer.input(j)); - it = data_layouts.find(name); + DataLayout layout = getDataLayout(layer); + std::map::iterator it = data_layouts.find(name); if (it != data_layouts.end()) { if (layout != DATA_LAYOUT_UNKNOWN) @@ -775,38 +853,105 @@ void TFImporter::populateNet(Net dstNet) if (it->second == DATA_LAYOUT_UNKNOWN) it->second = layout; else if (it->second != layout) + { it->second = DATA_LAYOUT_UNKNOWN; + layout = DATA_LAYOUT_UNKNOWN; + } } + else + layout = it->second; } else data_layouts[name] = layout; + + // Specify input layers to have the same data layout. + for (int j = 0; j < layer.input_size(); ++j) + { + name = getNodeName(layer.input(j)); + it = data_layouts.find(name); + if (it != data_layouts.end()) + { + if (layout != DATA_LAYOUT_UNKNOWN) + { + if (it->second == DATA_LAYOUT_UNKNOWN) + it->second = layout; + else if (it->second != layout) + it->second = DATA_LAYOUT_UNKNOWN; + } + } + else + data_layouts[name] = layout; + } + } + catch (const std::exception& e) + { + CV_LOG_ERROR(NULL, "DNN/TF: Can't propagate layout for node='" << name << "'. Exception: " << e.what()); + throw; } } - // find all Const layers for params - std::map value_id; - // A map with constant blobs which are shared between multiple layers. - std::map sharedWeights; addConstNodes(netBin, value_id, layers_to_ignore); addConstNodes(netTxt, value_id, layers_to_ignore); - std::map layer_id; for (int li = 0; li < layersSize; li++) { - tensorflow::NodeDef layer = net.node(li); - String name = layer.name(); - String type = layer.op(); + const tensorflow::NodeDef& layer = net.node(li); + + const std::string name = layer.name(); + const std::string type = layer.op(); + const int ninputs = layer.input_size(); + CV_LOG_DEBUG(NULL, "DNN/TF: (" << li << "/" << layersSize << ") Parse layer " << name << " @ " << type << " with " << ninputs << " inputs"); + + parseNode(layer); + } + + for (size_t i = 0; i < netInputsNames.size(); i++) + { + CV_LOG_DEBUG(NULL, "DNN/TF: Model input: " << i << " - '" << netInputsNames[i] << "'"); + CV_Assert(!netInputsNames[i].empty()); + } + dstNet.setInputsNames(netInputsNames); + CV_LOG_DEBUG(NULL, "DNN/TF: ===================== Import completed ====================="); +} + +void TFImporter::addPermuteLayer(const int* order, const std::string& permName, Pin& inpId) +{ + LayerParams permLP; + permLP.set("order", DictValue::arrayInt(order, 4)); + CV_Assert(layer_id.find(permName) == layer_id.end()); + int permId = dstNet.addLayer(permName, "Permute", permLP); + layer_id[permName] = permId; + connect(layer_id, dstNet, inpId, permId, 0); + inpId = Pin(permName); +} + +void TFImporter::parseNode(const tensorflow::NodeDef& layer_) +{ + tensorflow::NodeDef layer = layer_; + + tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin; + + /*const*/ std::string name = layer.name(); + /*const*/ std::string type = layer.op(); + /*const*/ int num_inputs = layer.input_size(); + + try + { LayerParams layerParams; - if(layers_to_ignore.find(name) != layers_to_ignore.end()) - continue; + if (layers_to_ignore.find(name) != layers_to_ignore.end()) + { + CV_LOG_DEBUG(NULL, "DNN/TF: ignored"); + return; + } - int predictedLayout = predictOutputDataLayout(net, layer, data_layouts); + DataLayout predictedLayout = predictOutputDataLayout(layer); data_layouts[name] = predictedLayout; if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad" || type == "MirrorPad" || type == "Conv3D") { + CV_CheckGT(num_inputs, 0, ""); // The first node of dilated convolution subgraph. // Extract input node, dilation rate and paddings. std::string input = layer.input(0); @@ -824,7 +969,7 @@ void TFImporter::populateNet(Net dstNet) // input: "input" // input: "SpaceToBatchND/block_shape" // input: "SpaceToBatchND/paddings" - CV_Assert(layer.input_size() == 3); + CV_CheckEQ(num_inputs, 3, ""); DictValue dilation = parseDims(getConstBlob(layer, value_id, 1)); CV_Assert(dilation.size() == 2); @@ -839,10 +984,14 @@ void TFImporter::populateNet(Net dstNet) layerParams.set("pad_w", paddings.at(2)); CV_Assert(next_layers.size() == 1); - layer = net.node(next_layers[0].second); layers_to_ignore.insert(next_layers[0].first); + + // FIXIT don't override, rewrite this code + layer = net.node(next_layers[0].second); name = layer.name(); type = layer.op(); + num_inputs = layer.input_size(); + CV_LOG_DEBUG(NULL, "DNN/TF: switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs"); } else if (type == "Pad" || type == "MirrorPad") { @@ -876,7 +1025,7 @@ void TFImporter::populateNet(Net dstNet) layer_id[name] = id; connect(layer_id, dstNet, parsePin(input), id, 0); - continue; + return; } else { @@ -886,10 +1035,14 @@ void TFImporter::populateNet(Net dstNet) layerParams.set("pad_h", paddings.at(4)); layerParams.set("pad_w", paddings.at(6)); - layer = net.node(next_layers[0].second); layers_to_ignore.insert(next_layers[0].first); + + // FIXIT don't override, rewrite this code + layer = net.node(next_layers[0].second); name = layer.name(); type = layer.op(); + num_inputs = layer.input_size(); + CV_LOG_DEBUG(NULL, "DNN/TF: switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs"); } } @@ -1011,13 +1164,14 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "BiasAdd" || type == "Add" || type == "AddV2" || type == "Sub" || type=="AddN") { + CV_CheckGT(num_inputs, 0, ""); bool haveConst = false; - for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii) + for(int ii = 0; !haveConst && ii < num_inputs; ++ii) { Pin input = parsePin(layer.input(ii)); haveConst = value_id.find(input.name) != value_id.end(); } - CV_Assert(!haveConst || layer.input_size() == 2); + CV_Assert(!haveConst || num_inputs == 2); if (haveConst) { @@ -1054,7 +1208,7 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, "Eltwise", layerParams); layer_id[name] = id; - for (int ii = 0; ii < layer.input_size(); ii++) + for (int ii = 0; ii < num_inputs; ii++) { Pin inp = parsePin(layer.input(ii)); if (layer_id.find(inp.name) == layer_id.end()) @@ -1065,7 +1219,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "MatMul") { - CV_Assert(layer.input_size() == 2); + CV_CheckEQ(num_inputs, 2, ""); // For the object detection networks, TensorFlow Object Detection API // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) @@ -1077,7 +1231,7 @@ void TFImporter::populateNet(Net dstNet) layerParams.set("bias_term", false); layerParams.blobs.resize(1); - StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); + StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); // FIXIT Use layers fusion instead if (next_layers.empty()) { next_layers = getNextLayers(net, name, "Add"); @@ -1105,8 +1259,18 @@ void TFImporter::populateNet(Net dstNet) int kernel_blob_index = -1; const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index); - blobFromTensor(kernelTensor, layerParams.blobs[0]); - releaseTensor(const_cast(&kernelTensor)); + const String kernelTensorName = layer.input(kernel_blob_index); + std::map::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName); + if (sharedWeightsIt == sharedWeights.end()) + { + blobFromTensor(kernelTensor, layerParams.blobs[0]); + releaseTensor(const_cast(&kernelTensor)); + sharedWeights[kernelTensorName] = layerParams.blobs[0]; + } + else + { + layerParams.blobs[0] = sharedWeightsIt->second; + } if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed Mat data = layerParams.blobs[0].t(); @@ -1135,44 +1299,57 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Reshape") { + CV_CheckGT(num_inputs, 0, ""); Pin inpId = parsePin(layer.input(0)); - int inpLayout = getDataLayout(layer.input(0), data_layouts); + DataLayout inpLayout = getDataLayout(layer.input(0), data_layouts); // There are two possible implementations: reshape an input using // predefined sizes or use a second input blob as a source of new shape. if (value_id.find(layer.input(1)) != value_id.end()) { Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1)); - if (newShape.total() == 4) + int newShapeSize = newShape.total(); + bool hasSwap = false; + if (newShapeSize == 4 && hasAllOnes(newShape, 0, 2)) { // NHWC->NCHW std::swap(*newShape.ptr(0, 2), *newShape.ptr(0, 3)); std::swap(*newShape.ptr(0, 1), *newShape.ptr(0, 2)); + hasSwap = true; } if (inpLayout == DATA_LAYOUT_NHWC) { - if (newShape.total() != 4 || newShape.at(1) == 1) + if (newShapeSize >= 2 || newShape.at(1) == 1) { - LayerParams permLP; int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. - permLP.set("order", DictValue::arrayInt(order, 4)); - - std::string permName = name + "/nchw"; - CV_Assert(layer_id.find(permName) == layer_id.end()); - int permId = dstNet.addLayer(permName, "Permute", permLP); - layer_id[permName] = permId; - connect(layer_id, dstNet, inpId, permId, 0); - inpId = Pin(permName); - inpLayout = DATA_LAYOUT_NCHW; + addPermuteLayer(order, name + "/nhwc", inpId); + if (newShapeSize < 4) + { + inpLayout = DATA_LAYOUT_NCHW; + } + else + { + inpLayout = DATA_LAYOUT_NHWC; + } } } - layerParams.set("dim", DictValue::arrayInt(newShape.ptr(), newShape.total())); + layerParams.set("dim", DictValue::arrayInt(newShape.ptr(), newShapeSize)); int id = dstNet.addLayer(name, "Reshape", layerParams); layer_id[name] = id; // one input only connect(layer_id, dstNet, inpId, id, 0); - data_layouts[name] = newShape.total() == 2 ? DATA_LAYOUT_PLANAR : inpLayout; + inpId = Pin(name); + + if ((inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_UNKNOWN || inpLayout == DATA_LAYOUT_PLANAR) && + newShapeSize == 4 && !hasSwap) + { + int order[] = {0, 3, 1, 2}; // Transform back to OpenCV's NCHW. + addPermuteLayer(order, name + "/nchw", inpId); + inpLayout = DATA_LAYOUT_NCHW; + } + + data_layouts[name] = newShapeSize == 2 ? DATA_LAYOUT_PLANAR : inpLayout; } else { @@ -1185,6 +1362,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Flatten" || type == "Squeeze") { + CV_CheckGT(num_inputs, 0, ""); Pin inpId = parsePin(layer.input(0)); int inpLayout = getDataLayout(layer.input(0), data_layouts); if (type == "Squeeze") @@ -1231,6 +1409,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Transpose") { + CV_CheckGT(num_inputs, 0, ""); Mat perm = getTensorContent(getConstBlob(layer, value_id, 1)); CV_Assert(perm.type() == CV_32SC1); int* permData = (int*)perm.data; @@ -1304,6 +1483,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "LRN") { + CV_CheckGT(num_inputs, 0, ""); if(hasLayerAttr(layer, "alpha")) { layerParams.set("alpha", getLayerAttr(layer, "alpha").f()); } @@ -1322,11 +1502,12 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, "LRN", layerParams); layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } else if (type == "Concat" || type == "ConcatV2") { - int axisId = (type == "Concat" ? 0 : layer.input_size() - 1); + CV_CheckGT(num_inputs, 0, ""); + int axisId = (type == "Concat" ? 0 : num_inputs - 1); int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0); if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) @@ -1337,7 +1518,7 @@ void TFImporter::populateNet(Net dstNet) // input(0) or input(n-1) is concat_dim int from = (type == "Concat" ? 1 : 0); - int to = (type == "Concat" ? layer.input_size() : layer.input_size() - 1); + int to = (type == "Concat" ? num_inputs : num_inputs - 1); for (int ii = from; ii < to; ii++) { @@ -1370,6 +1551,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "MaxPool" || type == "MaxPool3D") { + CV_CheckGT(num_inputs, 0, ""); layerParams.set("pool", "max"); setKSize(layerParams, layer); @@ -1381,10 +1563,11 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } else if (type == "AvgPool" || type == "AvgPool3D") { + CV_CheckGT(num_inputs, 0, ""); layerParams.set("pool", "ave"); layerParams.set("ave_pool_padded_area", false); setKSize(layerParams, layer); @@ -1394,11 +1577,11 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } else if (type == "MaxPoolGrad") { - CV_Assert(layer.input_size() == 3); + CV_CheckEQ(num_inputs, 3, ""); layerParams.set("pool_k_h", 0); layerParams.set("pool_k_w", 0); @@ -1457,7 +1640,7 @@ void TFImporter::populateNet(Net dstNet) // TODO: slicing input may be Const op // TODO: slicing kernels for convolutions - in current implementation it is impossible // TODO: add parsing num of slices parameter - CV_Assert(layer.input_size() == 2); + CV_CheckEQ(num_inputs, 2, ""); // num_split // 1st blob is dims tensor int axis = getConstBlob(layer, value_id, 0).int_val().Get(0); @@ -1480,7 +1663,7 @@ void TFImporter::populateNet(Net dstNet) // input: "input_node" // input: "Slice/begin" // input: "Slice/size" - CV_Assert(layer.input_size() == 3); + CV_CheckEQ(num_inputs, 3, ""); Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2)); CV_Assert_N(!begins.empty(), !sizes.empty()); @@ -1505,7 +1688,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "StridedSlice") { - CV_Assert(layer.input_size() == 4); + CV_CheckEQ(num_inputs, 4, ""); Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); Mat ends = getTensorContent(getConstBlob(layer, value_id, 2)); Mat strides = getTensorContent(getConstBlob(layer, value_id, 3)); @@ -1544,8 +1727,9 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Mul" || type == "RealDiv") { + CV_CheckGT(num_inputs, 0, ""); int constId = -1; - for(int ii = 0; ii < layer.input_size(); ++ii) + for(int ii = 0; ii < num_inputs; ++ii) { Pin input = parsePin(layer.input(ii)); if (value_id.find(input.name) != value_id.end()) @@ -1554,12 +1738,12 @@ void TFImporter::populateNet(Net dstNet) break; } } - CV_Assert((constId != -1) || (layer.input_size() == 2)); + CV_Assert((constId != -1) || (num_inputs == 2)); if (constId != -1) { // Multiplication by constant. - CV_Assert(layer.input_size() == 2); + CV_CheckEQ(num_inputs, 2, ""); Mat scaleMat = getTensorContent(getConstBlob(layer, value_id)); CV_Assert(scaleMat.type() == CV_32FC1); if (type == "RealDiv") @@ -1642,8 +1826,9 @@ void TFImporter::populateNet(Net dstNet) { // Check if all the inputs have the same shape. bool equalInpShapes = true; + bool isShapeOnes = false; MatShape outShape0; - for (int ii = 0; ii < layer.input_size() && !netInputShapes.empty(); ii++) + for (int ii = 0; ii < num_inputs && !netInputShapes.empty(); ii++) { Pin pin = parsePin(layer.input(ii)); int inpId = layer_id.find(pin.name)->second; @@ -1662,12 +1847,14 @@ void TFImporter::populateNet(Net dstNet) else if (outShape != outShape0) { equalInpShapes = false; + isShapeOnes = isAllOnes(outShape, 2, outShape.size()) || + isAllOnes(outShape0, 2, outShape0.size()); break; } } int id; - if (equalInpShapes || netInputShapes.empty()) + if (equalInpShapes || netInputShapes.empty() || (!equalInpShapes && isShapeOnes)) { layerParams.set("operation", type == "RealDiv" ? "div" : "prod"); id = dstNet.addLayer(name, "Eltwise", layerParams); @@ -1681,7 +1868,7 @@ void TFImporter::populateNet(Net dstNet) layer_id[name] = id; - for (int ii = 0; ii < layer.input_size(); ii++) + for (int ii = 0; ii < num_inputs; ii++) { Pin inp = parsePin(layer.input(ii)); if (layer_id.find(inp.name) == layer_id.end()) @@ -1698,9 +1885,7 @@ void TFImporter::populateNet(Net dstNet) // input: "BatchNorm/beta" // input: "BatchNorm/moving_mean" // input: "BatchNorm/moving_variance" - if (layer.input_size() != 5) - CV_Error(Error::StsNotImplemented, - "Expected gamma, beta, mean and std"); + CV_CheckEQ(num_inputs, 5, "Expected gamma, beta, mean and std"); Pin inpId = parsePin(layer.input(0)); bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b(); @@ -1768,9 +1953,7 @@ void TFImporter::populateNet(Net dstNet) // input: "conv2d_transpose/output_shape" // input: "weights" // input: "input" - if (layer.input_size() != 3) - CV_Error(Error::StsNotImplemented, - "Expected output shape, weights and input nodes"); + CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes"); layerParams.set("bias_term", false); layerParams.blobs.resize(1); @@ -1845,8 +2028,7 @@ void TFImporter::populateNet(Net dstNet) // input: "lstm_block_wrapper/w_f_diag" // input: "lstm_block_wrapper/w_o_diag" // input: "lstm_block_wrapper/bias" - if (layer.input_size() != 9) - CV_Error(Error::StsNotImplemented, "Unexpected number of input nodes"); + CV_CheckEQ(num_inputs, 9, "Unexpected number of input nodes"); if (hasLayerAttr(layer, "forget_bias")) layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f()); @@ -1912,6 +2094,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear" || type == "FusedResizeAndPadConv2D") { + CV_CheckGT(num_inputs, 0, ""); std::string convWeights = ""; if (type == "FusedResizeAndPadConv2D") { @@ -1919,30 +2102,32 @@ void TFImporter::populateNet(Net dstNet) // input: "decoder/ResizeBilinear/size" // input: "decoder/decoder_conv0/Conv2D_dummy_paddings" // input: "decoder/decoder_conv0/weights" - CV_CheckEQ(layer.input_size(), 4, "Number of input for FusedResizeAndPadConv2D"); + CV_CheckEQ(num_inputs, 4, "Number of input for FusedResizeAndPadConv2D"); Mat paddings = getTensorContent(getConstBlob(layer, value_id, 2)); CV_CheckEQ(countNonZero(paddings), 0, "Unsupported mode"); convWeights = layer.input(3); - layer.mutable_input()->DeleteSubrange(2, 2); + layer.mutable_input()->DeleteSubrange(2, 2); // FIXIT do NOT modify input model + num_inputs = layer.input_size(); name = name + "/resize"; if (hasLayerAttr(layer, "resize_align_corners")) { + // FIXIT do NOT modify input model layer.mutable_attr()->insert( ::google::protobuf::MapPair("align_corners", getLayerAttr(layer, "resize_align_corners"))); } } - if (layer.input_size() == 2) + if (num_inputs == 2) { Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1)); CV_CheckTypeEQ(outSize.type(), CV_32SC1, ""); CV_CheckEQ(outSize.total(), (size_t)2, ""); layerParams.set("height", outSize.at(0, 0)); layerParams.set("width", outSize.at(0, 1)); } - else if (layer.input_size() == 3) + else if (num_inputs == 3) { Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1)); Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2)); @@ -1952,7 +2137,7 @@ void TFImporter::populateNet(Net dstNet) layerParams.set("zoom_factor_y", factorHeight.at(0)); } else - CV_Assert(layer.input_size() == 2 || layer.input_size() == 3); + CV_Check(num_inputs, num_inputs == 2 || num_inputs == 3, ""); if (type == "ResizeNearestNeighbor") layerParams.set("interpolation", "nearest"); @@ -1962,6 +2147,9 @@ void TFImporter::populateNet(Net dstNet) if (hasLayerAttr(layer, "align_corners")) layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b()); + if (hasLayerAttr(layer, "half_pixel_centers")) + layerParams.set("half_pixel_centers", getLayerAttr(layer, "half_pixel_centers").b()); + int id = dstNet.addLayer(name, "Resize", layerParams); layer_id[name] = id; @@ -1970,12 +2158,12 @@ void TFImporter::populateNet(Net dstNet) // Step back to add convolution if (type == "FusedResizeAndPadConv2D") { - tensorflow::NodeDef* conv = net.mutable_node(li); - conv->clear_input(); - conv->add_input(name); - conv->add_input(convWeights); - conv->set_op("Conv2D"); - li -= 1; + tensorflow::NodeDef conv = layer_; + conv.clear_input(); + conv.add_input(name); + conv.add_input(convWeights); + conv.set_op("Conv2D"); + parseNode(conv); } } else if (type == "L2Normalize") @@ -1983,7 +2171,7 @@ void TFImporter::populateNet(Net dstNet) // op: "L2Normalize" // input: "input" // input: "reduction_indices" (axis) - CV_Assert(layer.input_size() == 2); + CV_CheckEQ(num_inputs, 2, ""); Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1)); CV_Assert(reductionIndices.type() == CV_32SC1); @@ -2008,6 +2196,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "PriorBox") { + CV_CheckEQ(num_inputs, 2, ""); if (hasLayerAttr(layer, "min_size")) layerParams.set("min_size", getLayerAttr(layer, "min_size").i()); if (hasLayerAttr(layer, "max_size")) @@ -2040,12 +2229,13 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Softmax") { + CV_CheckGT(num_inputs, 0, ""); if (hasLayerAttr(layer, "axis")) layerParams.set("axis", getLayerAttr(layer, "axis").i()); int id = dstNet.addLayer(name, "Softmax", layerParams); layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } else if (type == "CropAndResize") { @@ -2053,7 +2243,7 @@ void TFImporter::populateNet(Net dstNet) // input: "input" // input: "boxes" // input: "sizes" - CV_Assert(layer.input_size() == 3); + CV_CheckEQ(num_inputs, 3, ""); Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2)); CV_CheckTypeEQ(cropSize.type(), CV_32SC1, ""); CV_CheckEQ(cropSize.total(), (size_t)2, ""); @@ -2081,6 +2271,7 @@ void TFImporter::populateNet(Net dstNet) // determine out shape: NxCxHxW --Slice--> 1xCxHxW // out_shape = 1xCxHxW if keepDims else (1xCxHxW --Flatten--> CxHxW) // global pool: NxCxHxW --Flatten--> Nx(C*H*W) --Reshape--> 1x1xNx(C*H*W) --Pooling--> 1x1x1x(C*H*W) --Reshape--> out_shape + CV_CheckGT(num_inputs, 0, ""); Mat indices = getTensorContent(getConstBlob(layer, value_id, 1)); CV_Assert(indices.type() == CV_32SC1); @@ -2169,12 +2360,9 @@ void TFImporter::populateNet(Net dstNet) // To keep correct order after squeeze dims we first need to change layout from NCHW to NHWC LayerParams permLP; int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. - permLP.set("order", DictValue::arrayInt(order, 4)); std::string permName = name + "/nchw"; - CV_Assert(layer_id.find(permName) == layer_id.end()); - int permId = dstNet.addLayer(permName, "Permute", permLP); - layer_id[permName] = permId; - connect(layer_id, dstNet, Pin(name), permId, 0); + Pin inpId = Pin(name); + addPermuteLayer(order, permName, inpId); LayerParams squeezeLp; std::string squeezeName = name + "/squeeze"; @@ -2186,6 +2374,38 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, Pin(permName), squeezeId, 0); } } + else if (axis == 1) + { + int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. + Pin inpId = parsePin(layer.input(0)); + addPermuteLayer(order, name + "/nhwc", inpId); + + layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set("kernel_h", 1); + layerParams.set("global_pooling_w", true); + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, inpId, id, 0); + + if (!keepDims) + { + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + int channel_id = 3; // TF NHWC layout + squeezeLp.set("axis", channel_id - 1); + squeezeLp.set("end_axis", channel_id); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(name), squeezeId, 0); + } + else + { + int order[] = {0, 3, 1, 2}; // From NHWC to OpenCV's NCHW. + Pin inpId = parsePin(name); + addPermuteLayer(order, name + "/nchw", inpId); + } + } } else { if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation."); @@ -2215,6 +2435,7 @@ void TFImporter::populateNet(Net dstNet) // Example: given a list with "N" tensors of shape (C, H, W): // if axis == 0 then the output tensor will have the shape (N, C, H, W), // if axis == 1 then the output tensor will have the shape (C, N, H, W). + CV_CheckGT(num_inputs, 0, ""); CV_Assert(hasLayerAttr(layer, "axis")); int dim = (int)getLayerAttr(layer, "axis").i(); if (dim != 0) @@ -2222,7 +2443,7 @@ void TFImporter::populateNet(Net dstNet) CV_Assert(hasLayerAttr(layer, "N")); int num = (int)getLayerAttr(layer, "N").i(); - CV_Assert(layer.input_size() == num); + CV_CheckEQ(num_inputs, num, ""); std::string base_name = name + "/reshape_"; std::vector reshape_ids; for (int i = 0; i < num; i++) { @@ -2253,7 +2474,7 @@ void TFImporter::populateNet(Net dstNet) // input: "input" // input: "mix" // input: "max" - CV_Assert(layer.input_size() == 3); + CV_CheckEQ(num_inputs, 3, ""); Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1)); Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2)); @@ -2268,10 +2489,21 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); } + else if (type == "LeakyRelu") + { + CV_CheckGT(num_inputs, 0, ""); + CV_Assert(hasLayerAttr(layer, "alpha")); + layerParams.set("negative_slope", getLayerAttr(layer, "alpha").f()); + + int id = dstNet.addLayer(name, "ReLU", layerParams); + layer_id[name] = id; + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); + } else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" || - type == "Relu" || type == "Elu" || + type == "Relu" || type == "Elu" || type == "Exp" || type == "Identity" || type == "Relu6") { + CV_CheckGT(num_inputs, 0, ""); std::string dnnType = type; if (type == "Abs") dnnType = "AbsVal"; else if (type == "Tanh") dnnType = "TanH"; @@ -2281,7 +2513,7 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, dnnType, layerParams); layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } else { @@ -2305,7 +2537,7 @@ void TFImporter::populateNet(Net dstNet) // All the Const input nodes are added to layer's blobs. std::vector inputsNames; - for (int i = 0; i < layer.input_size(); ++i) + for (int i = 0; i < num_inputs; ++i) { // Check if input is a Const node. if (value_id.find(layer.input(i)) != value_id.end()) @@ -2325,7 +2557,11 @@ void TFImporter::populateNet(Net dstNet) } } } - dstNet.setInputsNames(netInputsNames); + catch (const std::exception& e) + { + CV_LOG_ERROR(NULL, "DNN/TF: Can't parse layer for node='" << name << "'. Exception: " << e.what()); + throw; + } } } // namespace @@ -2334,18 +2570,16 @@ void TFImporter::populateNet(Net dstNet) Net readNetFromTensorflow(const String &model, const String &config) { - TFImporter importer(model.c_str(), config.c_str()); Net net; - importer.populateNet(net); + TFImporter importer(net, model.c_str(), config.c_str()); return net; } Net readNetFromTensorflow(const char* bufferModel, size_t lenModel, const char* bufferConfig, size_t lenConfig) { - TFImporter importer(bufferModel, lenModel, bufferConfig, lenConfig); Net net; - importer.populateNet(net); + TFImporter importer(net, bufferModel, lenModel, bufferConfig, lenConfig); return net; } diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 67f5782a2e..aab4c6f507 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -101,6 +101,9 @@ public: TEST_P(DNNTestNetwork, AlexNet) { applyTestTag(CV_TEST_TAG_MEMORY_1GB); + if (backend == DNN_BACKEND_HALIDE) // Realization contains wrong number of Images (1) for realizing pipeline with 2 outputs + applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE); + processNet("dnn/bvlc_alexnet.caffemodel", "dnn/bvlc_alexnet.prototxt", Size(227, 227), "prob", target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_alexnet.yml" : @@ -115,6 +118,9 @@ TEST_P(DNNTestNetwork, ResNet_50) (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB), CV_TEST_TAG_DEBUG_LONG ); + if (backend == DNN_BACKEND_HALIDE) // Realization contains wrong number of Images (1) for realizing pipeline with 2 outputs + applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE); + processNet("dnn/ResNet-50-model.caffemodel", "dnn/ResNet-50-deploy.prototxt", Size(224, 224), "prob", target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_resnet_50.yml" : @@ -125,6 +131,9 @@ TEST_P(DNNTestNetwork, ResNet_50) TEST_P(DNNTestNetwork, SqueezeNet_v1_1) { + if (backend == DNN_BACKEND_HALIDE) // Realization contains wrong number of Images (1) for realizing pipeline with 2 outputs + applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE); + processNet("dnn/squeezenet_v1.1.caffemodel", "dnn/squeezenet_v1.1.prototxt", Size(227, 227), "prob", target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_squeezenet_v1_1.yml" : @@ -136,6 +145,9 @@ TEST_P(DNNTestNetwork, SqueezeNet_v1_1) TEST_P(DNNTestNetwork, GoogLeNet) { applyTestTag(target == DNN_TARGET_CPU ? "" : CV_TEST_TAG_MEMORY_512MB); + if (backend == DNN_BACKEND_HALIDE) // Realization contains wrong number of Images (1) for realizing pipeline with 2 outputs + applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE); + processNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt", Size(224, 224), "prob"); expectNoFallbacksFromIE(net); @@ -145,6 +157,9 @@ TEST_P(DNNTestNetwork, GoogLeNet) TEST_P(DNNTestNetwork, Inception_5h) { applyTestTag(CV_TEST_TAG_MEMORY_512MB); + if (backend == DNN_BACKEND_HALIDE) // Realization contains wrong number of Images (1) for realizing pipeline with 2 outputs + applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE); + double l1 = default_l1, lInf = default_lInf; if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (target == DNN_TARGET_CPU || target == DNN_TARGET_OPENCL)) { @@ -162,6 +177,9 @@ TEST_P(DNNTestNetwork, Inception_5h) TEST_P(DNNTestNetwork, ENet) { applyTestTag(target == DNN_TARGET_CPU ? "" : CV_TEST_TAG_MEMORY_512MB); + if (backend == DNN_BACKEND_HALIDE) // Realization contains wrong number of Images (1) for realizing pipeline with 2 outputs + applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) diff --git a/modules/dnn/test/test_common.hpp b/modules/dnn/test/test_common.hpp index 3bc8fc3a89..139f3d1671 100644 --- a/modules/dnn/test/test_common.hpp +++ b/modules/dnn/test/test_common.hpp @@ -30,11 +30,13 @@ #define CV_TEST_TAG_DNN_SKIP_IE_2019R1_1 "dnn_skip_ie_2019r1_1" #define CV_TEST_TAG_DNN_SKIP_IE_2019R2 "dnn_skip_ie_2019r2" #define CV_TEST_TAG_DNN_SKIP_IE_2019R3 "dnn_skip_ie_2019r3" +#define CV_TEST_TAG_DNN_SKIP_IE_CPU "dnn_skip_ie_cpu" #define CV_TEST_TAG_DNN_SKIP_IE_OPENCL "dnn_skip_ie_ocl" #define CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16 "dnn_skip_ie_ocl_fp16" #define CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_2 "dnn_skip_ie_myriad2" #define CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X "dnn_skip_ie_myriadx" #define CV_TEST_TAG_DNN_SKIP_IE_MYRIAD CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_2, CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X +#define CV_TEST_TAG_DNN_SKIP_IE_ARM_CPU "dnn_skip_ie_arm_cpu" #define CV_TEST_TAG_DNN_SKIP_VULKAN "dnn_skip_vulkan" @@ -113,6 +115,14 @@ void normAssertDetections( double confThreshold = 0.0, double scores_diff = 1e-5, double boxes_iou_diff = 1e-4); +// For text detection networks +// Curved text polygon is not supported in the current version. +// (concave polygon is invalid input to intersectConvexConvex) +void normAssertTextDetections( + const std::vector>& gtPolys, + const std::vector>& testPolys, + const char *comment = "", double boxes_iou_diff = 1e-4); + void readFileContent(const std::string& filename, CV_OUT std::vector& content); #ifdef HAVE_INF_ENGINE diff --git a/modules/dnn/test/test_common.impl.hpp b/modules/dnn/test/test_common.impl.hpp index cf1b558391..3d56e6f308 100644 --- a/modules/dnn/test/test_common.impl.hpp +++ b/modules/dnn/test/test_common.impl.hpp @@ -177,6 +177,52 @@ void normAssertDetections( testBoxes, comment, confThreshold, scores_diff, boxes_iou_diff); } +// For text detection networks +// Curved text polygon is not supported in the current version. +// (concave polygon is invalid input to intersectConvexConvex) +void normAssertTextDetections( + const std::vector>& gtPolys, + const std::vector>& testPolys, + const char *comment /*= ""*/, double boxes_iou_diff /*= 1e-4*/) +{ + std::vector matchedRefBoxes(gtPolys.size(), false); + for (uint i = 0; i < testPolys.size(); ++i) + { + const std::vector& testPoly = testPolys[i]; + bool matched = false; + double topIoU = 0; + for (uint j = 0; j < gtPolys.size() && !matched; ++j) + { + if (!matchedRefBoxes[j]) + { + std::vector intersectionPolygon; + float intersectArea = intersectConvexConvex(testPoly, gtPolys[j], intersectionPolygon, true); + double iou = intersectArea / (contourArea(testPoly) + contourArea(gtPolys[j]) - intersectArea); + topIoU = std::max(topIoU, iou); + if (1.0 - iou < boxes_iou_diff) + { + matched = true; + matchedRefBoxes[j] = true; + } + } + } + if (!matched) { + std::cout << cv::format("Unmatched-det:") << testPoly << std::endl; + std::cout << "Highest IoU: " << topIoU << std::endl; + } + EXPECT_TRUE(matched) << comment; + } + + // Check unmatched groundtruth. + for (uint i = 0; i < gtPolys.size(); ++i) + { + if (!matchedRefBoxes[i]) { + std::cout << cv::format("Unmatched-gt:") << gtPolys[i] << std::endl; + } + EXPECT_TRUE(matchedRefBoxes[i]); + } +} + void readFileContent(const std::string& filename, CV_OUT std::vector& content) { const std::ios::openmode mode = std::ios::in | std::ios::binary; @@ -407,13 +453,13 @@ void initDNNTests() #ifdef HAVE_DNN_IE_NN_BUILDER_2019 CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, #endif - "" + CV_TEST_TAG_DNN_SKIP_IE_CPU ); -#endif registerGlobalSkipTag( // see validateVPUType(): CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_2, CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16 ); +#endif #ifdef HAVE_VULKAN registerGlobalSkipTag( CV_TEST_TAG_DNN_SKIP_VULKAN diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index 021603636e..f2b30c9b87 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -656,7 +656,7 @@ TEST_P(Test_Darknet_nets, YOLOv4_tiny) target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB ); -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021010000) // nGraph compilation failure +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000) // nGraph compilation failure if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif @@ -727,6 +727,10 @@ TEST_P(Test_Darknet_layers, shortcut) TEST_P(Test_Darknet_layers, upsample) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception +#endif testDarknetLayer("upsample"); } @@ -735,6 +739,11 @@ TEST_P(Test_Darknet_layers, mish) testDarknetLayer("mish", true); } +TEST_P(Test_Darknet_layers, tanh) +{ + testDarknetLayer("tanh"); +} + TEST_P(Test_Darknet_layers, avgpool_softmax) { testDarknetLayer("avgpool_softmax"); @@ -798,6 +807,11 @@ TEST_P(Test_Darknet_layers, relu) testDarknetLayer("relu"); } +TEST_P(Test_Darknet_layers, sam) +{ + testDarknetLayer("sam", true); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_layers, dnnBackendsAndTargets()); }} // namespace diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index 7e6d7f87d2..165ee4d67b 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -258,7 +258,17 @@ TEST_P(LRN, Accuracy) int sz[] = {1, inChannels, inSize.height, inSize.width}; Mat input(4, &sz[0], CV_32F); - test(lp, input, backendId, targetId); + + double l1 = 0.0, lInf = 0.0; + // The OpenCL kernels use the native_ math functions which have + // implementation defined accuracy, so we use relaxed thresholds. See + // https://github.com/opencv/opencv/issues/9821 for more details. + if (targetId == DNN_TARGET_OPENCL) + { + l1 = 0.01; + lInf = 0.01; + } + test(lp, input, backendId, targetId, false, l1, lInf); } INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, LRN, Combine( @@ -632,6 +642,31 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Power, Combine( dnnBackendsAndTargetsWithHalide() )); +typedef TestWithParam > > Exp; +TEST_P(Exp, Accuracy) +{ + float base = get<0>(GetParam())[0]; + float scale = get<0>(GetParam())[1]; + float shift = get<0>(GetParam())[2]; + Backend backendId = get<0>(get<1>(GetParam())); + Target targetId = get<1>(get<1>(GetParam())); + + LayerParams lp; + lp.set("base", base); + lp.set("scale", scale); + lp.set("shift", shift); + lp.type = "Exp"; + lp.name = "testLayer"; + testInPlaceActivation(lp, backendId, targetId); +} + +INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Exp, Combine( +/*base, scale, shift*/ Values(Vec3f(0.9f, -1.0f, 1.1f), Vec3f(0.9f, 1.1f, -1.0f), + Vec3f(-1.0f, 0.9f, 1.1f), Vec3f(-1.0f, 1.1f, 0.9f), + Vec3f(1.1f, 0.9f, -1.0f), Vec3f(1.1f, -1.0f, 0.9f)), + dnnBackendsAndTargetsWithHalide() +)); + TEST_P(Test_Halide_layers, ChannelsPReLU) { LayerParams lp; diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 61537e0e01..20d3fb41eb 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -169,8 +169,17 @@ TEST_P(Test_Caffe_layers, Softmax) TEST_P(Test_Caffe_layers, LRN) { - testLayerUsingCaffeModels("layer_lrn_spatial"); - testLayerUsingCaffeModels("layer_lrn_channels"); + double l1 = 0.0, lInf = 0.0; + // The OpenCL kernels use the native_ math functions which have + // implementation defined accuracy, so we use relaxed thresholds. See + // https://github.com/opencv/opencv/issues/9821 for more details. + if (target == DNN_TARGET_OPENCL) + { + l1 = 0.01; + lInf = 0.01; + } + testLayerUsingCaffeModels("layer_lrn_spatial", false, true, l1, lInf); + testLayerUsingCaffeModels("layer_lrn_channels", false, true, l1, lInf); } TEST_P(Test_Caffe_layers, Convolution) @@ -1583,6 +1592,11 @@ TEST_P(Test_Caffe_layers, Interp) TEST_P(Test_Caffe_layers, DISABLED_Interp) // requires patched protobuf (available in OpenCV source tree only) #endif { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception +#endif + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); @@ -2152,6 +2166,12 @@ public: randu(scales, -1.0f, 1.0f); activationParams.blobs.push_back(scales); } + else if (activationParams.type == "Exp") + { + activationParams.set("base", -1.0f); + activationParams.set("scale", 0.3f); + activationParams.set("shift", 0.6f); + } } static void makeDefaultTestEltwiseLayer(LayerParams& eltwiseParams, const std::string& op, bool withCoefficients) @@ -2223,7 +2243,7 @@ public: static testing::internal::ParamGenerator activationLayersList() { // TODO: automate list generation - return Values("ReLU", "ReLU6", "ChannelsPReLU", "TanH", "Swish", "Mish", "Sigmoid", "ELU", "AbsVal", "BNLL", "Power"); + return Values("ReLU", "ReLU6", "ChannelsPReLU", "TanH", "Swish", "Mish", "Sigmoid", "ELU", "AbsVal", "BNLL", "Power", "Exp"); } static testing::internal::ParamGenerator > dnnBackendsAndTargetsForFusionTests() diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index 7d516de73e..f7befa9937 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -25,7 +25,8 @@ public: double scoreDiff, double iouDiff, double confThreshold = 0.24, double nmsThreshold = 0.0, const Size& size = {-1, -1}, Scalar mean = Scalar(), - double scale = 1.0, bool swapRB = false, bool crop = false) + double scale = 1.0, bool swapRB = false, bool crop = false, + bool nmsAcrossClasses = false) { checkBackend(); @@ -38,6 +39,8 @@ public: model.setPreferableBackend(backend); model.setPreferableTarget(target); + model.setNmsAcrossClasses(nmsAcrossClasses); + std::vector classIds; std::vector confidences; std::vector boxes; @@ -110,6 +113,156 @@ public: model.segment(frame, mask); normAssert(mask, exp, "", norm, norm); } + + void testTextRecognitionModel(const std::string& weights, const std::string& cfg, + const std::string& imgPath, const std::string& seq, + const std::string& decodeType, const std::vector& vocabulary, + const Size& size = {-1, -1}, Scalar mean = Scalar(), + double scale = 1.0, bool swapRB = false, bool crop = false) + { + checkBackend(); + + Mat frame = imread(imgPath, IMREAD_GRAYSCALE); + + TextRecognitionModel model(weights, cfg); + model.setDecodeType(decodeType) + .setVocabulary(vocabulary) + .setInputSize(size).setInputMean(mean).setInputScale(scale) + .setInputSwapRB(swapRB).setInputCrop(crop); + + model.setPreferableBackend(backend); + model.setPreferableTarget(target); + + std::string result = model.recognize(frame); + EXPECT_EQ(result, seq) << "Full frame: " << imgPath; + + std::vector rois; + rois.push_back(Rect(0, 0, frame.cols, frame.rows)); + rois.push_back(Rect(0, 0, frame.cols, frame.rows)); // twice + std::vector results; + model.recognize(frame, rois, results); + EXPECT_EQ((size_t)2u, results.size()) << "ROI: " << imgPath; + EXPECT_EQ(results[0], seq) << "ROI[0]: " << imgPath; + EXPECT_EQ(results[1], seq) << "ROI[1]: " << imgPath; + } + + void testTextDetectionModelByDB(const std::string& weights, const std::string& cfg, + const std::string& imgPath, const std::vector>& gt, + float binThresh, float polyThresh, + uint maxCandidates, double unclipRatio, + const Size& size = {-1, -1}, Scalar mean = Scalar(), + double scale = 1.0, bool swapRB = false, bool crop = false) + { + checkBackend(); + + Mat frame = imread(imgPath); + + TextDetectionModel_DB model(weights, cfg); + model.setBinaryThreshold(binThresh) + .setPolygonThreshold(polyThresh) + .setUnclipRatio(unclipRatio) + .setMaxCandidates(maxCandidates) + .setInputSize(size).setInputMean(mean).setInputScale(scale) + .setInputSwapRB(swapRB).setInputCrop(crop); + + model.setPreferableBackend(backend); + model.setPreferableTarget(target); + + // 1. Check common TextDetectionModel API through RotatedRect + std::vector results; + model.detectTextRectangles(frame, results); + + EXPECT_GT(results.size(), (size_t)0); + + std::vector< std::vector > contours; + for (size_t i = 0; i < results.size(); i++) + { + const RotatedRect& box = results[i]; + Mat contour; + boxPoints(box, contour); + std::vector contour2i(4); + for (int i = 0; i < 4; i++) + { + contour2i[i].x = cvRound(contour.at(i, 0)); + contour2i[i].y = cvRound(contour.at(i, 1)); + } + contours.push_back(contour2i); + } +#if 0 // test debug + Mat result = frame.clone(); + drawContours(result, contours, -1, Scalar(0, 0, 255), 1); + imshow("result", result); // imwrite("result.png", result); + waitKey(0); +#endif + normAssertTextDetections(gt, contours, "", 0.05f); + + // 2. Check quadrangle-based API + // std::vector< std::vector > contours; + model.detect(frame, contours); + +#if 0 // test debug + Mat result = frame.clone(); + drawContours(result, contours, -1, Scalar(0, 0, 255), 1); + imshow("result_contours", result); // imwrite("result_contours.png", result); + waitKey(0); +#endif + normAssertTextDetections(gt, contours, "", 0.05f); + } + + void testTextDetectionModelByEAST( + const std::string& weights, const std::string& cfg, + const std::string& imgPath, const std::vector& gt, + float confThresh, float nmsThresh, + const Size& size = {-1, -1}, Scalar mean = Scalar(), + double scale = 1.0, bool swapRB = false, bool crop = false, + double eps_center = 5/*pixels*/, double eps_size = 5/*pixels*/, double eps_angle = 1 + ) + { + checkBackend(); + + Mat frame = imread(imgPath); + + TextDetectionModel_EAST model(weights, cfg); + model.setConfidenceThreshold(confThresh) + .setNMSThreshold(nmsThresh) + .setInputSize(size).setInputMean(mean).setInputScale(scale) + .setInputSwapRB(swapRB).setInputCrop(crop); + + model.setPreferableBackend(backend); + model.setPreferableTarget(target); + + std::vector results; + model.detectTextRectangles(frame, results); + + EXPECT_EQ(results.size(), (size_t)1); + for (size_t i = 0; i < results.size(); i++) + { + const RotatedRect& box = results[i]; +#if 0 // test debug + Mat contour; + boxPoints(box, contour); + std::vector contour2i(4); + for (int i = 0; i < 4; i++) + { + contour2i[i].x = cvRound(contour.at(i, 0)); + contour2i[i].y = cvRound(contour.at(i, 1)); + } + std::vector< std::vector > contours; + contours.push_back(contour2i); + + Mat result = frame.clone(); + drawContours(result, contours, -1, Scalar(0, 0, 255), 1); + imshow("result", result); //imwrite("result.png", result); + waitKey(0); +#endif + const RotatedRect& gtBox = gt[i]; + EXPECT_NEAR(box.center.x, gtBox.center.x, eps_center); + EXPECT_NEAR(box.center.y, gtBox.center.y, eps_center); + EXPECT_NEAR(box.size.width, gtBox.size.width, eps_size); + EXPECT_NEAR(box.size.height, gtBox.size.height, eps_size); + EXPECT_NEAR(box.angle, gtBox.angle, eps_angle); + } + } }; TEST_P(Test_Model, Classify) @@ -177,6 +330,58 @@ TEST_P(Test_Model, DetectRegion) Scalar(), scale, swapRB); } +TEST_P(Test_Model, DetectRegionWithNmsAcrossClasses) +{ + applyTestTag(CV_TEST_TAG_LONG, CV_TEST_TAG_MEMORY_1GB); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); +#endif + +#if defined(INF_ENGINE_RELEASE) + if (target == DNN_TARGET_MYRIAD + && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); +#endif + + std::vector refClassIds = { 6, 11 }; + std::vector refConfidences = { 0.750469f, 0.901615f }; + std::vector refBoxes = { Rect2d(240, 53, 135, 72), + Rect2d(58, 141, 117, 249) }; + + std::string img_path = _tf("dog416.png"); + std::string weights_file = _tf("yolo-voc.weights", false); + std::string config_file = _tf("yolo-voc.cfg"); + + double scale = 1.0 / 255.0; + Size size{ 416, 416 }; + bool swapRB = true; + bool crop = false; + bool nmsAcrossClasses = true; + + double confThreshold = 0.24; + double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.15: 0.15; + double scoreDiff = 8e-5, iouDiff = 1e-5; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 1e-2; + iouDiff = 1.6e-2; + } + + testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, + refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size, + Scalar(), scale, swapRB, crop, + nmsAcrossClasses); +} + TEST_P(Test_Model, DetectionOutput) { #if defined(INF_ENGINE_RELEASE) @@ -391,6 +596,87 @@ TEST_P(Test_Model, Segmentation) testSegmentationModel(weights_file, config_file, inp, exp, norm, size, mean, scale, swapRB); } +TEST_P(Test_Model, TextRecognition) +{ + if (target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + + std::string imgPath = _tf("text_rec_test.png"); + std::string weightPath = _tf("onnx/models/crnn.onnx", false); + std::string seq = "welcome"; + + Size size{100, 32}; + double scale = 1.0 / 127.5; + Scalar mean = Scalar(127.5); + std::string decodeType = "CTC-greedy"; + std::vector vocabulary = {"0","1","2","3","4","5","6","7","8","9", + "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"}; + + testTextRecognitionModel(weightPath, "", imgPath, seq, decodeType, vocabulary, size, mean, scale); +} + +TEST_P(Test_Model, TextDetectionByDB) +{ + if (target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + + std::string imgPath = _tf("text_det_test1.png"); + std::string weightPath = _tf("onnx/models/DB_TD500_resnet50.onnx", false); + + // GroundTruth + std::vector> gt = { + { Point(142, 193), Point(136, 164), Point(213, 150), Point(219, 178) }, + { Point(136, 165), Point(122, 114), Point(319, 71), Point(330, 122) } + }; + + Size size{736, 736}; + double scale = 1.0 / 255.0; + Scalar mean = Scalar(122.67891434, 116.66876762, 104.00698793); + + float binThresh = 0.3; + float polyThresh = 0.5; + uint maxCandidates = 200; + double unclipRatio = 2.0; + + testTextDetectionModelByDB(weightPath, "", imgPath, gt, binThresh, polyThresh, maxCandidates, unclipRatio, size, mean, scale); +} + +TEST_P(Test_Model, TextDetectionByEAST) +{ + std::string imgPath = _tf("text_det_test2.jpg"); + std::string weightPath = _tf("frozen_east_text_detection.pb", false); + + // GroundTruth + std::vector gt = { + RotatedRect(Point2f(657.55f, 409.5f), Size2f(316.84f, 62.45f), -4.79) + }; + + // Model parameters + Size size{320, 320}; + double scale = 1.0; + Scalar mean = Scalar(123.68, 116.78, 103.94); + bool swapRB = true; + + // Detection algorithm parameters + float confThresh = 0.5; + float nmsThresh = 0.4; + + double eps_center = 5/*pixels*/; + double eps_size = 5/*pixels*/; + double eps_angle = 1; + + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_MYRIAD) + { + eps_center = 10; + eps_size = 25; + eps_angle = 3; + } + + testTextDetectionModelByEAST(weightPath, "", imgPath, gt, confThresh, nmsThresh, size, mean, scale, swapRB, false/*crop*/, + eps_center, eps_size, eps_angle + ); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_Model, dnnBackendsAndTargets()); }} // namespace diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index a2c097da42..81ea1dcdd0 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -122,7 +122,8 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight) if (backend == DNN_BACKEND_CUDA) applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported - + if (backend == DNN_BACKEND_VKCOM) + applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported String basename = "conv_variable_w"; Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); ASSERT_FALSE(net.empty()); @@ -152,6 +153,12 @@ TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias) if (backend == DNN_BACKEND_CUDA) applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported + if (backend == DNN_BACKEND_VKCOM) + applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU && + getInferenceEngineCPUType() == CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_ARM_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); String basename = "conv_variable_wb"; Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); @@ -326,6 +333,13 @@ TEST_P(Test_ONNX_layers, Power) testONNXModels("pow2", npy, 0, 0, false, false); } +TEST_P(Test_ONNX_layers, Exp) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + testONNXModels("exp"); +} + TEST_P(Test_ONNX_layers, Concatenation) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) @@ -426,14 +440,27 @@ TEST_P(Test_ONNX_layers, BatchNormalization3D) TEST_P(Test_ONNX_layers, BatchNormalizationUnfused) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception +#endif testONNXModels("frozenBatchNorm2d"); } TEST_P(Test_ONNX_layers, BatchNormalizationSubgraph) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception +#endif testONNXModels("batch_norm_subgraph"); } +TEST_P(Test_ONNX_layers, NormalizeFusionSubgraph) +{ + testONNXModels("normalize_fusion"); +} + TEST_P(Test_ONNX_layers, Transpose) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) @@ -637,6 +664,26 @@ TEST_P(Test_ONNX_layers, Slice) #endif } +TEST_P(Test_ONNX_layers, Slice_Steps_2DInput) +{ + testONNXModels("slice_opset_11_steps_2d"); +} + +TEST_P(Test_ONNX_layers, Slice_Steps_3DInput) +{ + testONNXModels("slice_opset_11_steps_3d"); +} + +TEST_P(Test_ONNX_layers, Slice_Steps_4DInput) +{ + testONNXModels("slice_opset_11_steps_4d"); +} + +TEST_P(Test_ONNX_layers, Slice_Steps_5DInput) +{ + testONNXModels("slice_opset_11_steps_5d"); +} + TEST_P(Test_ONNX_layers, Softmax) { testONNXModels("softmax"); @@ -698,6 +745,16 @@ TEST_P(Test_ONNX_layers, ResizeOpset11_Torch1_6) testONNXModels("resize_opset11_torch1.6"); } +TEST_P(Test_ONNX_layers, Mish) +{ + testONNXModels("mish"); +} + +TEST_P(Test_ONNX_layers, CalculatePads) +{ + testONNXModels("calc_pads"); +} + TEST_P(Test_ONNX_layers, Conv1d) { testONNXModels("conv1d"); @@ -710,6 +767,10 @@ TEST_P(Test_ONNX_layers, Conv1d_bias) TEST_P(Test_ONNX_layers, Conv1d_variable_weight) { + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported + if (backend == DNN_BACKEND_VKCOM) + applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported String basename = "conv1d_variable_w"; Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); ASSERT_FALSE(net.empty()); @@ -730,9 +791,15 @@ TEST_P(Test_ONNX_layers, Conv1d_variable_weight) TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias) { + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); // not supported + if (backend == DNN_BACKEND_VKCOM) + applyTestTag(CV_TEST_TAG_DNN_SKIP_VULKAN); // not supported if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + if (target == DNN_TARGET_CPU && getInferenceEngineCPUType() == CV_DNN_INFERENCE_ENGINE_CPU_TYPE_ARM_COMPUTE) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_ARM_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); } String basename = "conv1d_variable_wb"; Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); @@ -756,8 +823,12 @@ TEST_P(Test_ONNX_layers, Conv1d_variable_weight_bias) TEST_P(Test_ONNX_layers, GatherMultiOutput) { - if (cvtest::skipUnstableTests && backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) - throw SkipTestException("Skip unstable test: https://github.com/opencv/opencv/issues/18937"); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception +#endif #if defined(INF_ENGINE_RELEASE) if (target == DNN_TARGET_MYRIAD) @@ -855,6 +926,7 @@ TEST_P(Test_ONNX_layers, PoolConv1d) TEST_P(Test_ONNX_layers, ConvResizePool1d) { +#if defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); @@ -862,7 +934,12 @@ TEST_P(Test_ONNX_layers, ConvResizePool1d) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); +#if INF_ENGINE_VER_MAJOR_EQ(2021030000) + if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception + if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception +#endif } +#endif testONNXModels("conv_resize_pool_1d"); } diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index e6cfbe6637..2c36134724 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -81,12 +81,12 @@ class Test_TensorFlow_layers : public DNNTestLayer { public: void runTensorFlowNet(const std::string& prefix, bool hasText = false, - double l1 = 0.0, double lInf = 0.0, bool memoryLoad = false) + double l1 = 0.0, double lInf = 0.0, bool memoryLoad = false, const std::string& groupPrefix = "") { - std::string netPath = path(prefix + "_net.pb"); - std::string netConfig = (hasText ? path(prefix + "_net.pbtxt") : ""); + std::string netPath = path(prefix + groupPrefix + "_net.pb"); + std::string netConfig = (hasText ? path(prefix + groupPrefix + "_net.pbtxt") : ""); std::string inpPath = path(prefix + "_in.npy"); - std::string outPath = path(prefix + "_out.npy"); + std::string outPath = path(prefix + groupPrefix + "_out.npy"); cv::Mat input = blobFromNPY(inpPath); cv::Mat ref = blobFromNPY(outPath); @@ -135,6 +135,16 @@ TEST_P(Test_TensorFlow_layers, reduce_sum) runTensorFlowNet("sum_pool_by_axis"); } +TEST_P(Test_TensorFlow_layers, reduce_sum_channel) +{ + runTensorFlowNet("reduce_sum_channel"); +} + +TEST_P(Test_TensorFlow_layers, reduce_sum_channel_keep_dims) +{ + runTensorFlowNet("reduce_sum_channel", false, 0.0, 0.0, false, "_keep_dims"); +} + TEST_P(Test_TensorFlow_layers, conv_single_conv) { runTensorFlowNet("single_conv"); @@ -205,6 +215,17 @@ TEST_P(Test_TensorFlow_layers, eltwise) runTensorFlowNet("eltwise_sub"); } +TEST_P(Test_TensorFlow_layers, eltwise_add_vec) +{ + runTensorFlowNet("eltwise_add_vec"); +} + +TEST_P(Test_TensorFlow_layers, eltwise_mul_vec) +{ + runTensorFlowNet("eltwise_mul_vec"); +} + + TEST_P(Test_TensorFlow_layers, channel_broadcast) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) @@ -219,6 +240,12 @@ TEST_P(Test_TensorFlow_layers, pad_and_concat) TEST_P(Test_TensorFlow_layers, concat_axis_1) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception +#endif runTensorFlowNet("concat_axis_1"); } @@ -279,6 +306,10 @@ TEST_P(Test_TensorFlow_layers, batch_norm_10) } TEST_P(Test_TensorFlow_layers, batch_norm_11) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_CPU) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_CPU, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // nan +#endif if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); runTensorFlowNet("mvn_batch_norm_1x1"); @@ -478,12 +509,28 @@ TEST_P(Test_TensorFlow_layers, unfused_flatten) runTensorFlowNet("unfused_flatten_unknown_batch"); } +TEST_P(Test_TensorFlow_layers, reshape_layer) +{ + runTensorFlowNet("reshape_layer"); +} + +TEST_P(Test_TensorFlow_layers, reshape_nchw) +{ + runTensorFlowNet("reshape_nchw"); +} + +TEST_P(Test_TensorFlow_layers, reshape_conv) +{ + runTensorFlowNet("reshape_conv"); +} + TEST_P(Test_TensorFlow_layers, leaky_relu) { #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif + runTensorFlowNet("leaky_relu"); runTensorFlowNet("leaky_relu_order1"); runTensorFlowNet("leaky_relu_order2"); runTensorFlowNet("leaky_relu_order3"); @@ -1001,6 +1048,19 @@ TEST_P(Test_TensorFlow_layers, resize_nearest_neighbor) runTensorFlowNet("keras_upsampling2d"); } +TEST_P(Test_TensorFlow_layers, resize_nearest_neighbor_align_corners) +{ + runTensorFlowNet("resize_nearest_neighbor", false, 0.0, 0.0, false, "_align_corners"); +} + +TEST_P(Test_TensorFlow_layers, resize_nearest_neighbor_half_pixel) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + + runTensorFlowNet("resize_nearest_neighbor", false, 0.0, 0.0, false, "_half_pixel"); +} + TEST_P(Test_TensorFlow_layers, fused_resize_conv) { runTensorFlowNet("fused_resize_conv"); @@ -1056,10 +1116,61 @@ TEST_P(Test_TensorFlow_layers, keras_mobilenet_head) runTensorFlowNet("keras_learning_phase"); } +// TF case: align_corners=False, half_pixel_centers=False TEST_P(Test_TensorFlow_layers, resize_bilinear) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception +#endif runTensorFlowNet("resize_bilinear"); +} + +// TF case: align_corners=True, half_pixel_centers=False +TEST_P(Test_TensorFlow_layers, resize_bilinear_align_corners) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception +#endif + runTensorFlowNet("resize_bilinear", + false, 0.0, 0.0, false, // default parameters + "_align_corners"); +} + +// TF case: align_corners=False, half_pixel_centers=True +TEST_P(Test_TensorFlow_layers, resize_bilinear_half_pixel) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + + runTensorFlowNet("resize_bilinear", false, 0.0, 0.0, false, "_half_pixel"); +} + +// TF case: align_corners=False, half_pixel_centers=False +TEST_P(Test_TensorFlow_layers, resize_bilinear_factor) +{ runTensorFlowNet("resize_bilinear_factor"); +} + +// TF case: align_corners=False, half_pixel_centers=True +TEST_P(Test_TensorFlow_layers, resize_bilinear_factor_half_pixel) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + + runTensorFlowNet("resize_bilinear_factor", false, 0.0, 0.0, false, "_half_pixel"); +} + +// TF case: align_corners=True, half_pixel_centers=False +TEST_P(Test_TensorFlow_layers, resize_bilinear_factor_align_corners) +{ + runTensorFlowNet("resize_bilinear_factor", false, 0.0, 0.0, false, "_align_corners"); +} + +// TF case: align_corners=False, half_pixel_centers=False +TEST_P(Test_TensorFlow_layers, resize_bilinear_down) +{ runTensorFlowNet("resize_bilinear_down"); } diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index 54b7c1baa9..f1d636895b 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -258,6 +258,14 @@ TEST_P(Test_Torch_layers, net_conv_gemm_lrn) l1 = 0.0042; lInf = 0.021; } + // The OpenCL kernels use the native_ math functions which have + // implementation defined accuracy, so we use relaxed thresholds. See + // https://github.com/opencv/opencv/issues/9821 for more details. + else if (target == DNN_TARGET_OPENCL) + { + l1 = 0.02; + lInf = 0.02; + } runTorchNet("net_conv_gemm_lrn", "", false, true, true, l1, lInf); } @@ -282,6 +290,15 @@ TEST_P(Test_Torch_layers, net_padding) TEST_P(Test_Torch_layers, net_non_spatial) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // crash + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception +#endif + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, @@ -592,6 +609,11 @@ private: TEST_P(Test_Torch_layers, upsampling_nearest) { +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // TODO +#endif + // Test a custom layer. CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer); try diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp index 829d8ff898..343ffd81e4 100644 --- a/modules/features2d/include/opencv2/features2d.hpp +++ b/modules/features2d/include/opencv2/features2d.hpp @@ -481,8 +481,7 @@ article](http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions)). than union-find method; it actually get 1.5~2m/s on my centrino L7200 1.2GHz laptop. - the color image algorithm is taken from: @cite forssen2007maximally ; it should be much slower -than grey image method ( 3~4 times ); the chi_table.h file is taken directly from paper's source -code which is distributed under GPL. +than grey image method ( 3~4 times ) - (Python) A complete example showing the use of the %MSER detector can be found at samples/python/mser.py */ diff --git a/modules/features2d/src/blobdetector.cpp b/modules/features2d/src/blobdetector.cpp index d07e8bae83..c2215cd57c 100644 --- a/modules/features2d/src/blobdetector.cpp +++ b/modules/features2d/src/blobdetector.cpp @@ -325,13 +325,19 @@ void SimpleBlobDetectorImpl::detect(InputArray image, std::vector& std::vector < Center > curCenters; findBlobs(grayscaleImage, binarizedImage, curCenters); + if(params.maxThreshold - params.minThreshold <= params.thresholdStep) { + // if the difference between min and max threshold is less than the threshold step + // we're only going to enter the loop once, so we need to add curCenters + // to ensure we still use minDistBetweenBlobs + centers.push_back(curCenters); + } std::vector < std::vector
> newCenters; for (size_t i = 0; i < curCenters.size(); i++) { bool isNew = true; for (size_t j = 0; j < centers.size(); j++) { - double dist = norm(centers[j][ centers[j].size() / 2 ].location - curCenters[i].location); + double dist = norm(centers[j][centers[j].size() / 2 ].location - curCenters[i].location); isNew = dist >= params.minDistBetweenBlobs && dist >= centers[j][ centers[j].size() / 2 ].radius && dist >= curCenters[i].radius; if (!isNew) { diff --git a/modules/features2d/src/gftt.cpp b/modules/features2d/src/gftt.cpp index 11ed29f39d..bc97fc1677 100644 --- a/modules/features2d/src/gftt.cpp +++ b/modules/features2d/src/gftt.cpp @@ -87,6 +87,7 @@ public: } std::vector corners; + std::vector cornersQuality; if (_image.isUMat()) { @@ -97,7 +98,7 @@ public: ugrayImage = _image.getUMat(); goodFeaturesToTrack( ugrayImage, corners, nfeatures, qualityLevel, minDistance, _mask, - blockSize, gradSize, useHarrisDetector, k ); + cornersQuality, blockSize, gradSize, useHarrisDetector, k ); } else { @@ -106,14 +107,14 @@ public: cvtColor( image, grayImage, COLOR_BGR2GRAY ); goodFeaturesToTrack( grayImage, corners, nfeatures, qualityLevel, minDistance, _mask, - blockSize, gradSize, useHarrisDetector, k ); + cornersQuality, blockSize, gradSize, useHarrisDetector, k ); } + CV_Assert(corners.size() == cornersQuality.size()); + keypoints.resize(corners.size()); - std::vector::const_iterator corner_it = corners.begin(); - std::vector::iterator keypoint_it = keypoints.begin(); - for( ; corner_it != corners.end() && keypoint_it != keypoints.end(); ++corner_it, ++keypoint_it ) - *keypoint_it = KeyPoint( *corner_it, (float)blockSize ); + for (size_t i = 0; i < corners.size(); i++) + keypoints[i] = KeyPoint(corners[i], (float)blockSize, -1, cornersQuality[i]); } diff --git a/modules/features2d/src/mser.cpp b/modules/features2d/src/mser.cpp index a37b4ea482..4fe07bd6eb 100644 --- a/modules/features2d/src/mser.cpp +++ b/modules/features2d/src/mser.cpp @@ -35,7 +35,7 @@ * it actually get 1.5~2m/s on my centrino L7200 1.2GHz laptop. * 3. the color image algorithm is taken from: Maximally Stable Colour Regions for Recognition and Match; * it should be much slower than gray image method ( 3~4 times ); - * the chi_table.h file is taken directly from paper's source code which is distributed under GPL. + * the chi_table.h file is taken directly from paper's source code which is distributed under permissive BSD-like license: http://users.isy.liu.se/cvl/perfo/software/chi_table.h * 4. though the name is *contours*, the result actually is a list of point set. */ diff --git a/modules/features2d/src/orb.cpp b/modules/features2d/src/orb.cpp index 881fc01516..85d17cdd0d 100644 --- a/modules/features2d/src/orb.cpp +++ b/modules/features2d/src/orb.cpp @@ -1025,15 +1025,20 @@ void ORB_Impl::detectAndCompute( InputArray _image, InputArray _mask, Mat imagePyramid, maskPyramid; UMat uimagePyramid, ulayerInfo; - int level_dy = image.rows + border*2; - Point level_ofs(0,0); - Size bufSize((cvRound(image.cols/getScale(0, firstLevel, scaleFactor)) + border*2 + 15) & -16, 0); + float level0_inv_scale = 1.0f / getScale(0, firstLevel, scaleFactor); + size_t level0_width = (size_t)cvRound(image.cols * level0_inv_scale); + size_t level0_height = (size_t)cvRound(image.rows * level0_inv_scale); + Size bufSize((int)alignSize(level0_width + border*2, 16), 0); // TODO change alignment to 64 + + int level_dy = (int)level0_height + border*2; + Point level_ofs(0, 0); for( level = 0; level < nLevels; level++ ) { float scale = getScale(level, firstLevel, scaleFactor); layerScale[level] = scale; - Size sz(cvRound(image.cols/scale), cvRound(image.rows/scale)); + float inv_scale = 1.0f / scale; + Size sz(cvRound(image.cols * inv_scale), cvRound(image.rows * inv_scale)); Size wholeSize(sz.width + border*2, sz.height + border*2); if( level_ofs.x + wholeSize.width > bufSize.width ) { diff --git a/modules/features2d/test/test_blobdetector.cpp b/modules/features2d/test/test_blobdetector.cpp new file mode 100644 index 0000000000..56b7145862 --- /dev/null +++ b/modules/features2d/test/test_blobdetector.cpp @@ -0,0 +1,21 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { +TEST(Features2d_BlobDetector, bug_6667) +{ + cv::Mat image = cv::Mat(cv::Size(100, 100), CV_8UC1, cv::Scalar(255, 255, 255)); + cv::circle(image, Point(50, 50), 20, cv::Scalar(0), -1); + SimpleBlobDetector::Params params; + params.minThreshold = 250; + params.maxThreshold = 260; + std::vector keypoints; + + Ptr detector = SimpleBlobDetector::create(params); + detector->detect(image, keypoints); + ASSERT_NE((int) keypoints.size(), 0); +} +}} // namespace diff --git a/modules/features2d/test/test_nearestneighbors.cpp b/modules/features2d/test/test_nearestneighbors.cpp index 42fc6fe948..11cee9cea0 100644 --- a/modules/features2d/test/test_nearestneighbors.cpp +++ b/modules/features2d/test/test_nearestneighbors.cpp @@ -123,7 +123,7 @@ void NearestNeighborTest::run( int /*start_from*/ ) { Mat desc( featuresCount, dims, CV_32FC1 ); ts->get_rng().fill( desc, RNG::UNIFORM, minValue, maxValue ); - createModel( desc ); + createModel( desc.clone() ); // .clone() is used to simulate dangling pointers problem: https://github.com/opencv/opencv/issues/17553 tempCode = checkGetPoints( desc ); if( tempCode != cvtest::TS::OK ) diff --git a/modules/features2d/test/test_orb.cpp b/modules/features2d/test/test_orb.cpp index 868bee354c..8a4d9776b3 100644 --- a/modules/features2d/test/test_orb.cpp +++ b/modules/features2d/test/test_orb.cpp @@ -90,7 +90,7 @@ TEST(Features2D_ORB, _1996) ASSERT_EQ(0, roiViolations); } -TEST(Features2D_ORB, crash) +TEST(Features2D_ORB, crash_5031) { cv::Mat image = cv::Mat::zeros(cv::Size(1920, 1080), CV_8UC3); @@ -123,4 +123,23 @@ TEST(Features2D_ORB, crash) ASSERT_NO_THROW(orb->compute(image, keypoints, descriptors)); } + +TEST(Features2D_ORB, regression_16197) +{ + Mat img(Size(72, 72), CV_8UC1, Scalar::all(0)); + Ptr orbPtr = ORB::create(); + orbPtr->setNLevels(5); + orbPtr->setFirstLevel(3); + orbPtr->setScaleFactor(1.8); + orbPtr->setPatchSize(8); + orbPtr->setEdgeThreshold(8); + + std::vector kps; + Mat fv; + + // exception in debug mode, crash in release + ASSERT_NO_THROW(orbPtr->detectAndCompute(img, noArray(), kps, fv)); +} + + }} // namespace diff --git a/modules/flann/include/opencv2/flann/all_indices.h b/modules/flann/include/opencv2/flann/all_indices.h index 2de18af24a..03877ab6ad 100644 --- a/modules/flann/include/opencv2/flann/all_indices.h +++ b/modules/flann/include/opencv2/flann/all_indices.h @@ -82,7 +82,7 @@ struct index_creator nnIndex = new LshIndex(dataset, params, distance); break; default: - throw FLANNException("Unknown index type"); + FLANN_THROW(cv::Error::StsBadArg, "Unknown index type"); } return nnIndex; @@ -111,7 +111,7 @@ struct index_creator nnIndex = new LshIndex(dataset, params, distance); break; default: - throw FLANNException("Unknown index type"); + FLANN_THROW(cv::Error::StsBadArg, "Unknown index type"); } return nnIndex; @@ -140,7 +140,7 @@ struct index_creator nnIndex = new LshIndex(dataset, params, distance); break; default: - throw FLANNException("Unknown index type"); + FLANN_THROW(cv::Error::StsBadArg, "Unknown index type"); } return nnIndex; diff --git a/modules/flann/include/opencv2/flann/autotuned_index.h b/modules/flann/include/opencv2/flann/autotuned_index.h index 54a60a73d6..d90f739aff 100644 --- a/modules/flann/include/opencv2/flann/autotuned_index.h +++ b/modules/flann/include/opencv2/flann/autotuned_index.h @@ -34,7 +34,6 @@ #include -#include "general.h" #include "nn_index.h" #include "ground_truth.h" #include "index_testing.h" diff --git a/modules/flann/include/opencv2/flann/composite_index.h b/modules/flann/include/opencv2/flann/composite_index.h index bcf0827c9f..f1af41ac26 100644 --- a/modules/flann/include/opencv2/flann/composite_index.h +++ b/modules/flann/include/opencv2/flann/composite_index.h @@ -33,7 +33,6 @@ //! @cond IGNORED -#include "general.h" #include "nn_index.h" #include "kdtree_index.h" #include "kmeans_index.h" diff --git a/modules/flann/include/opencv2/flann/flann_base.hpp b/modules/flann/include/opencv2/flann/flann_base.hpp index 0f23930024..258ec38d20 100644 --- a/modules/flann/include/opencv2/flann/flann_base.hpp +++ b/modules/flann/include/opencv2/flann/flann_base.hpp @@ -82,11 +82,11 @@ NNIndex* load_saved_index(const Matrix IndexHeader header = load_header(fin); if (header.data_type != Datatype::type()) { fclose(fin); - throw FLANNException("Datatype of saved index is different than of the one to be created."); + FLANN_THROW(cv::Error::StsError, "Datatype of saved index is different than of the one to be created."); } if ((size_t(header.rows) != dataset.rows)||(size_t(header.cols) != dataset.cols)) { fclose(fin); - throw FLANNException("The index saved belongs to a different dataset"); + FLANN_THROW(cv::Error::StsError, "The index saved belongs to a different dataset"); } IndexParams params; @@ -140,7 +140,7 @@ public: { FILE* fout = fopen(filename.c_str(), "wb"); if (fout == NULL) { - throw FLANNException("Cannot open file"); + FLANN_THROW(cv::Error::StsError, "Cannot open file"); } save_header(fout, *nnIndex_); saveIndex(fout); diff --git a/modules/flann/include/opencv2/flann/general.h b/modules/flann/include/opencv2/flann/general.h index ac848d6230..29fa8be121 100644 --- a/modules/flann/include/opencv2/flann/general.h +++ b/modules/flann/include/opencv2/flann/general.h @@ -31,6 +31,8 @@ #ifndef OPENCV_FLANN_GENERAL_H_ #define OPENCV_FLANN_GENERAL_H_ +#if CV_VERSION_MAJOR <= 4 + //! @cond IGNORED #include "opencv2/core.hpp" @@ -48,6 +50,14 @@ public: } +#define FLANN_THROW(TYPE, STR) throw FLANNException(STR) + +#else + +#define FLANN_THROW(TYPE, STR) CV_Error(TYPE, STR) + +#endif + //! @endcond #endif /* OPENCV_FLANN_GENERAL_H_ */ diff --git a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h index b7a650ff00..2d39d4f0f6 100644 --- a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h +++ b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h @@ -382,7 +382,7 @@ public: chooseCenters = &HierarchicalClusteringIndex::GroupWiseCenterChooser; } else { - throw FLANNException("Unknown algorithm for choosing initial centers."); + FLANN_THROW(cv::Error::StsError, "Unknown algorithm for choosing initial centers."); } root = new NodePtr[trees_]; @@ -446,7 +446,7 @@ public: void buildIndex() CV_OVERRIDE { if (branching_<2) { - throw FLANNException("Branching factor must be at least 2"); + FLANN_THROW(cv::Error::StsError, "Branching factor must be at least 2"); } free_indices(); diff --git a/modules/flann/include/opencv2/flann/index_testing.h b/modules/flann/include/opencv2/flann/index_testing.h index f3d147588d..207adef449 100644 --- a/modules/flann/include/opencv2/flann/index_testing.h +++ b/modules/flann/include/opencv2/flann/index_testing.h @@ -93,7 +93,7 @@ float search_with_ground_truth(NNIndex& index, const Matrix resultSet(nn+skipMatches); diff --git a/modules/flann/include/opencv2/flann/kdtree_index.h b/modules/flann/include/opencv2/flann/kdtree_index.h index 5a3d9d7fe0..603fdbd421 100644 --- a/modules/flann/include/opencv2/flann/kdtree_index.h +++ b/modules/flann/include/opencv2/flann/kdtree_index.h @@ -37,7 +37,6 @@ #include #include -#include "general.h" #include "nn_index.h" #include "dynamic_bitset.h" #include "matrix.h" diff --git a/modules/flann/include/opencv2/flann/kdtree_single_index.h b/modules/flann/include/opencv2/flann/kdtree_single_index.h index e571403b10..ed95c3db7d 100644 --- a/modules/flann/include/opencv2/flann/kdtree_single_index.h +++ b/modules/flann/include/opencv2/flann/kdtree_single_index.h @@ -37,7 +37,6 @@ #include #include -#include "general.h" #include "nn_index.h" #include "matrix.h" #include "result_set.h" diff --git a/modules/flann/include/opencv2/flann/kmeans_index.h b/modules/flann/include/opencv2/flann/kmeans_index.h index cb1a54a6d6..f73669999f 100644 --- a/modules/flann/include/opencv2/flann/kmeans_index.h +++ b/modules/flann/include/opencv2/flann/kmeans_index.h @@ -370,7 +370,7 @@ public: chooseCenters = &KMeansIndex::chooseCentersKMeanspp; } else { - throw FLANNException("Unknown algorithm for choosing initial centers."); + FLANN_THROW(cv::Error::StsBadArg, "Unknown algorithm for choosing initial centers."); } cb_index_ = 0.4f; @@ -442,7 +442,7 @@ public: void buildIndex() CV_OVERRIDE { if (branching_<2) { - throw FLANNException("Branching factor must be at least 2"); + FLANN_THROW(cv::Error::StsError, "Branching factor must be at least 2"); } free_indices(); @@ -559,7 +559,7 @@ public: { int numClusters = centers.rows; if (numClusters<1) { - throw FLANNException("Number of clusters must be at least 1"); + FLANN_THROW(cv::Error::StsBadArg, "Number of clusters must be at least 1"); } DistanceType variance; diff --git a/modules/flann/include/opencv2/flann/linear_index.h b/modules/flann/include/opencv2/flann/linear_index.h index 8a0f10fd86..6428c0d7ef 100644 --- a/modules/flann/include/opencv2/flann/linear_index.h +++ b/modules/flann/include/opencv2/flann/linear_index.h @@ -33,7 +33,6 @@ //! @cond IGNORED -#include "general.h" #include "nn_index.h" namespace cvflann diff --git a/modules/flann/include/opencv2/flann/lsh_index.h b/modules/flann/include/opencv2/flann/lsh_index.h index 4e3c376006..b5e87f6041 100644 --- a/modules/flann/include/opencv2/flann/lsh_index.h +++ b/modules/flann/include/opencv2/flann/lsh_index.h @@ -42,7 +42,6 @@ #include #include -#include "general.h" #include "nn_index.h" #include "matrix.h" #include "result_set.h" diff --git a/modules/flann/include/opencv2/flann/matrix.h b/modules/flann/include/opencv2/flann/matrix.h index 34893b72c3..fb871bd73c 100644 --- a/modules/flann/include/opencv2/flann/matrix.h +++ b/modules/flann/include/opencv2/flann/matrix.h @@ -35,8 +35,6 @@ #include -#include "general.h" - namespace cvflann { diff --git a/modules/flann/include/opencv2/flann/miniflann.hpp b/modules/flann/include/opencv2/flann/miniflann.hpp index 093646254c..b8df92d758 100644 --- a/modules/flann/include/opencv2/flann/miniflann.hpp +++ b/modules/flann/include/opencv2/flann/miniflann.hpp @@ -169,10 +169,13 @@ public: CV_WRAP cvflann::flann_algorithm_t getAlgorithm() const; protected: + bool load_(const String& filename); + cvflann::flann_distance_t distType; cvflann::flann_algorithm_t algo; int featureType; void* index; + Mat features_clone; // index may store features pointer internally for searching, so avoid dangling pointers: https://github.com/opencv/opencv/issues/17553 }; } } // namespace cv::flann diff --git a/modules/flann/include/opencv2/flann/nn_index.h b/modules/flann/include/opencv2/flann/nn_index.h index fbb4c7924c..f6e17d19fc 100644 --- a/modules/flann/include/opencv2/flann/nn_index.h +++ b/modules/flann/include/opencv2/flann/nn_index.h @@ -31,7 +31,6 @@ #ifndef OPENCV_FLANN_NNINDEX_H #define OPENCV_FLANN_NNINDEX_H -#include "general.h" #include "matrix.h" #include "result_set.h" #include "params.h" diff --git a/modules/flann/include/opencv2/flann/params.h b/modules/flann/include/opencv2/flann/params.h index dd3092f065..c9093cde8c 100644 --- a/modules/flann/include/opencv2/flann/params.h +++ b/modules/flann/include/opencv2/flann/params.h @@ -91,7 +91,7 @@ T get_param(const IndexParams& params, cv::String name) return it->second.cast(); } else { - throw FLANNException(cv::String("Missing parameter '")+name+cv::String("' in the parameters given")); + FLANN_THROW(cv::Error::StsBadArg, cv::String("Missing parameter '")+name+cv::String("' in the parameters given")); } } diff --git a/modules/flann/include/opencv2/flann/random.h b/modules/flann/include/opencv2/flann/random.h index 3bb48b687b..2c1809c3a9 100644 --- a/modules/flann/include/opencv2/flann/random.h +++ b/modules/flann/include/opencv2/flann/random.h @@ -37,8 +37,6 @@ #include #include -#include "general.h" - namespace cvflann { diff --git a/modules/flann/include/opencv2/flann/saving.h b/modules/flann/include/opencv2/flann/saving.h index 53359b4b7b..8b3aeb7f0a 100644 --- a/modules/flann/include/opencv2/flann/saving.h +++ b/modules/flann/include/opencv2/flann/saving.h @@ -112,11 +112,11 @@ inline IndexHeader load_header(FILE* stream) size_t read_size = fread(&header,sizeof(header),1,stream); if (read_size!=(size_t)1) { - throw FLANNException("Invalid index file, cannot read"); + FLANN_THROW(cv::Error::StsError, "Invalid index file, cannot read"); } if (strcmp(header.signature,FLANN_SIGNATURE_)!=0) { - throw FLANNException("Invalid index file, wrong signature"); + FLANN_THROW(cv::Error::StsError, "Invalid index file, wrong signature"); } return header; @@ -150,7 +150,7 @@ void load_value(FILE* stream, T& value, size_t count = 1) { size_t read_cnt = fread(&value, sizeof(value), count, stream); if (read_cnt != count) { - throw FLANNException("Cannot read from file"); + FLANN_THROW(cv::Error::StsParseError, "Cannot read from file"); } } @@ -159,12 +159,12 @@ void load_value(FILE* stream, cvflann::Matrix& value) { size_t read_cnt = fread(&value, sizeof(value), 1, stream); if (read_cnt != 1) { - throw FLANNException("Cannot read from file"); + FLANN_THROW(cv::Error::StsParseError, "Cannot read from file"); } value.data = new T[value.rows*value.cols]; read_cnt = fread(value.data, sizeof(T), value.rows*value.cols, stream); if (read_cnt != (size_t)(value.rows*value.cols)) { - throw FLANNException("Cannot read from file"); + FLANN_THROW(cv::Error::StsParseError, "Cannot read from file"); } } @@ -175,12 +175,12 @@ void load_value(FILE* stream, std::vector& value) size_t size; size_t read_cnt = fread(&size, sizeof(size_t), 1, stream); if (read_cnt!=1) { - throw FLANNException("Cannot read from file"); + FLANN_THROW(cv::Error::StsError, "Cannot read from file"); } value.resize(size); read_cnt = fread(&value[0], sizeof(T), size, stream); if (read_cnt != size) { - throw FLANNException("Cannot read from file"); + FLANN_THROW(cv::Error::StsError, "Cannot read from file"); } } diff --git a/modules/flann/src/miniflann.cpp b/modules/flann/src/miniflann.cpp index b56578c17f..c871875ae4 100644 --- a/modules/flann/src/miniflann.cpp +++ b/modules/flann/src/miniflann.cpp @@ -390,14 +390,18 @@ void Index::build(InputArray _data, const IndexParams& params, flann_distance_t CV_INSTRUMENT_REGION(); release(); + + // Index may reuse 'data' during search, need to keep it alive + features_clone = _data.getMat().clone(); + Mat data = features_clone; + algo = getParam(params, "algorithm", FLANN_INDEX_LINEAR); if( algo == FLANN_INDEX_SAVED ) { - load(_data, getParam(params, "filename", String())); + load_(getParam(params, "filename", String())); return; } - Mat data = _data.getMat(); index = 0; featureType = data.type(); distType = _distType; @@ -462,6 +466,8 @@ void Index::release() { CV_INSTRUMENT_REGION(); + features_clone.release(); + if( !index ) return; @@ -785,9 +791,20 @@ bool loadIndex(Index* index0, void*& index, const Mat& data, FILE* fin, const Di bool Index::load(InputArray _data, const String& filename) { - Mat data = _data.getMat(); - bool ok = true; release(); + + // Index may reuse 'data' during search, need to keep it alive + features_clone = _data.getMat().clone(); + Mat data = features_clone; + + return load_(filename); +} + +bool Index::load_(const String& filename) +{ + Mat data = features_clone; + bool ok = true; + FILE* fin = fopen(filename.c_str(), "rb"); if (fin == NULL) return false; diff --git a/modules/flann/src/precomp.hpp b/modules/flann/src/precomp.hpp index 099a6abce1..66de0c1a9c 100644 --- a/modules/flann/src/precomp.hpp +++ b/modules/flann/src/precomp.hpp @@ -13,7 +13,6 @@ #include "opencv2/flann/index_testing.h" #include "opencv2/flann/params.h" #include "opencv2/flann/saving.h" -#include "opencv2/flann/general.h" // index types #include "opencv2/flann/all_indices.h" diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index 0067cfa389..6b586c1f99 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -23,7 +23,7 @@ ocv_add_module(gapi REQUIRED opencv_imgproc OPTIONAL - opencv_video + opencv_video opencv_stereo WRAP python ) @@ -38,10 +38,6 @@ if(MSVC) endif() endif() -if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") # don't add Clang here: issue should be investigated and fixed (workaround for Apple only) - ocv_warnings_disable(CMAKE_CXX_FLAGS -Wrange-loop-analysis) # https://github.com/opencv/opencv/issues/18928 -endif() - file(GLOB gapi_ext_hdrs "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/*.hpp" @@ -57,6 +53,7 @@ file(GLOB gapi_ext_hdrs "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/streaming/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/plaidml/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/util/*.hpp" + "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/python/*.hpp" ) set(gapi_srcs @@ -80,6 +77,7 @@ set(gapi_srcs src/api/kernels_video.cpp src/api/kernels_nnparsers.cpp src/api/kernels_streaming.cpp + src/api/kernels_stereo.cpp src/api/render.cpp src/api/render_ocv.cpp src/api/ginfer.cpp @@ -107,6 +105,7 @@ set(gapi_srcs # Executor src/executor/gexecutor.cpp + src/executor/gtbbexecutor.cpp src/executor/gstreamingexecutor.cpp src/executor/gasync.cpp @@ -114,6 +113,7 @@ set(gapi_srcs src/backends/cpu/gcpubackend.cpp src/backends/cpu/gcpukernel.cpp src/backends/cpu/gcpuimgproc.cpp + src/backends/cpu/gcpustereo.cpp src/backends/cpu/gcpuvideo.cpp src/backends/cpu/gcpucore.cpp src/backends/cpu/gnnparsers.cpp @@ -156,8 +156,12 @@ set(gapi_srcs src/api/s11n.cpp src/backends/common/serialization.cpp + # Streaming backend + src/backends/streaming/gstreamingbackend.cpp + # Python bridge src/backends/ie/bindings_ie.cpp + src/backends/python/gpythonbackend.cpp ) ocv_add_dispatched_file(backends/fluid/gfluidimgproc_func SSE4_1 AVX2) @@ -196,6 +200,10 @@ if(TARGET opencv_test_gapi) target_link_libraries(opencv_test_gapi PRIVATE ade) endif() +if(HAVE_TBB AND TARGET opencv_test_gapi) + ocv_target_link_libraries(opencv_test_gapi PRIVATE tbb) +endif() + if(HAVE_FREETYPE) ocv_target_compile_definitions(${the_module} PRIVATE -DHAVE_FREETYPE) if(TARGET opencv_test_gapi) diff --git a/modules/gapi/cmake/DownloadADE.cmake b/modules/gapi/cmake/DownloadADE.cmake index ee1b645412..aa24e949ea 100644 --- a/modules/gapi/cmake/DownloadADE.cmake +++ b/modules/gapi/cmake/DownloadADE.cmake @@ -20,12 +20,26 @@ endif() set(ADE_root "${ade_src_dir}/${ade_subdir}/sources/ade") file(GLOB_RECURSE ADE_sources "${ADE_root}/source/*.cpp") file(GLOB_RECURSE ADE_include "${ADE_root}/include/ade/*.hpp") -add_library(ade STATIC ${ADE_include} ${ADE_sources}) +add_library(ade STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} + ${ADE_include} + ${ADE_sources} +) target_include_directories(ade PUBLIC $) -set_target_properties(ade PROPERTIES POSITION_INDEPENDENT_CODE True) +set_target_properties(ade PROPERTIES + POSITION_INDEPENDENT_CODE True + OUTPUT_NAME ade + DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}" + COMPILE_PDB_NAME ade + COMPILE_PDB_NAME_DEBUG "ade${OPENCV_DEBUG_POSTFIX}" + ARCHIVE_OUTPUT_DIRECTORY ${3P_LIBRARY_OUTPUT_PATH} +) + +if(ENABLE_SOLUTION_FOLDERS) + set_target_properties(ade PROPERTIES FOLDER "3rdparty") +endif() if(NOT BUILD_SHARED_LIBS) - ocv_install_target(ade EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev) + ocv_install_target(ade EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev OPTIONAL) endif() ocv_install_3rdparty_licenses(ade "${ade_src_dir}/${ade_subdir}/LICENSE") diff --git a/modules/gapi/cmake/standalone.cmake b/modules/gapi/cmake/standalone.cmake index 5cc57d8269..d08eda1be5 100644 --- a/modules/gapi/cmake/standalone.cmake +++ b/modules/gapi/cmake/standalone.cmake @@ -21,6 +21,7 @@ file(GLOB FLUID_sources "${FLUID_ROOT}/src/api/g*.cpp" "${FLUID_ROOT}/src/compiler/passes/*.cpp" "${FLUID_ROOT}/src/executor/*.cpp" "${FLUID_ROOT}/src/backends/fluid/*.cpp" + "${FLUID_ROOT}/src/backends/streaming/*.cpp" "${FLUID_ROOT}/src/backends/common/*.cpp") add_library(${FLUID_TARGET} STATIC ${FLUID_includes} ${FLUID_sources}) diff --git a/modules/gapi/include/opencv2/gapi.hpp b/modules/gapi/include/opencv2/gapi.hpp index 8445746710..e4b2021479 100644 --- a/modules/gapi/include/opencv2/gapi.hpp +++ b/modules/gapi/include/opencv2/gapi.hpp @@ -33,8 +33,9 @@ #include #include -// Include this file here to avoid cyclic dependency between +// Include these files here to avoid cyclic dependency between // Desync & GKernel & GComputation & GStreamingCompiled. #include +#include #endif // OPENCV_GAPI_HPP diff --git a/modules/gapi/include/opencv2/gapi/core.hpp b/modules/gapi/include/opencv2/gapi/core.hpp index 8825585696..cb5d55d13f 100644 --- a/modules/gapi/include/opencv2/gapi/core.hpp +++ b/modules/gapi/include/opencv2/gapi/core.hpp @@ -17,6 +17,7 @@ #include #include #include +#include /** \defgroup gapi_core G-API Core functionality @{ @@ -26,6 +27,7 @@ @defgroup gapi_transform Graph API: Image and channel composition functions @} */ + namespace cv { namespace gapi { namespace core { using GMat2 = std::tuple; @@ -296,8 +298,8 @@ namespace core { } }; - G_TYPED_KERNEL(GAbsDiffC, , "org.opencv.core.matrixop.absdiffC") { - static GMatDesc outMeta(GMatDesc a, GScalarDesc) { + G_TYPED_KERNEL(GAbsDiffC, , "org.opencv.core.matrixop.absdiffC") { + static GMatDesc outMeta(const GMatDesc& a, const GScalarDesc&) { return a; } }; @@ -450,12 +452,6 @@ namespace core { } }; - G_TYPED_KERNEL(GCopy, , "org.opencv.core.transform.copy") { - static GMatDesc outMeta(GMatDesc in) { - return in; - } - }; - G_TYPED_KERNEL(GConcatHor, , "org.opencv.imgproc.transform.concatHor") { static GMatDesc outMeta(GMatDesc l, GMatDesc r) { return l.withSizeDelta(+r.size.width, 0); @@ -508,6 +504,77 @@ namespace core { return in.withType(in.depth, in.chan).withSize(dsize); } }; + + G_TYPED_KERNEL( + GKMeansND, + ,GMat,GMat>(GMat,int,GMat,TermCriteria,int,KmeansFlags)>, + "org.opencv.core.kmeansND") { + + static std::tuple + outMeta(const GMatDesc& in, int K, const GMatDesc& bestLabels, const TermCriteria&, int, + KmeansFlags flags) { + GAPI_Assert(in.depth == CV_32F); + std::vector amount_n_dim = detail::checkVector(in); + int amount = amount_n_dim[0], dim = amount_n_dim[1]; + if (amount == -1) // Mat with height != 1, width != 1, channels != 1 given + { // which means that kmeans will consider the following: + amount = in.size.height; + dim = in.size.width * in.chan; + } + // kmeans sets these labels' sizes when no bestLabels given: + GMatDesc out_labels(CV_32S, 1, Size{1, amount}); + // kmeans always sets these centers' sizes: + GMatDesc centers (CV_32F, 1, Size{dim, K}); + if (flags & KMEANS_USE_INITIAL_LABELS) + { + GAPI_Assert(bestLabels.depth == CV_32S); + int labels_amount = detail::checkVector(bestLabels, 1u); + GAPI_Assert(labels_amount == amount); + out_labels = bestLabels; // kmeans preserves bestLabels' sizes if given + } + return std::make_tuple(empty_gopaque_desc(), out_labels, centers); + } + }; + + G_TYPED_KERNEL( + GKMeansNDNoInit, + ,GMat,GMat>(GMat,int,TermCriteria,int,KmeansFlags)>, + "org.opencv.core.kmeansNDNoInit") { + + static std::tuple + outMeta(const GMatDesc& in, int K, const TermCriteria&, int, KmeansFlags flags) { + GAPI_Assert( !(flags & KMEANS_USE_INITIAL_LABELS) ); + GAPI_Assert(in.depth == CV_32F); + std::vector amount_n_dim = detail::checkVector(in); + int amount = amount_n_dim[0], dim = amount_n_dim[1]; + if (amount == -1) // Mat with height != 1, width != 1, channels != 1 given + { // which means that kmeans will consider the following: + amount = in.size.height; + dim = in.size.width * in.chan; + } + GMatDesc out_labels(CV_32S, 1, Size{1, amount}); + GMatDesc centers (CV_32F, 1, Size{dim, K}); + return std::make_tuple(empty_gopaque_desc(), out_labels, centers); + } + }; + + G_TYPED_KERNEL(GKMeans2D, ,GArray,GArray> + (GArray,int,GArray,TermCriteria,int,KmeansFlags)>, + "org.opencv.core.kmeans2D") { + static std::tuple + outMeta(const GArrayDesc&,int,const GArrayDesc&,const TermCriteria&,int,KmeansFlags) { + return std::make_tuple(empty_gopaque_desc(), empty_array_desc(), empty_array_desc()); + } + }; + + G_TYPED_KERNEL(GKMeans3D, ,GArray,GArray> + (GArray,int,GArray,TermCriteria,int,KmeansFlags)>, + "org.opencv.core.kmeans3D") { + static std::tuple + outMeta(const GArrayDesc&,int,const GArrayDesc&,const TermCriteria&,int,KmeansFlags) { + return std::make_tuple(empty_gopaque_desc(), empty_array_desc(), empty_array_desc()); + } + }; } // namespace core namespace streaming { @@ -524,6 +591,12 @@ G_TYPED_KERNEL(GSizeR, (GOpaque)>, "org.opencv.streaming.siz return empty_gopaque_desc(); } }; + +G_TYPED_KERNEL(GSizeMF, (GFrame)>, "org.opencv.streaming.sizeMF") { + static GOpaqueDesc outMeta(const GFrameDesc&) { + return empty_gopaque_desc(); + } +}; } // namespace streaming //! @addtogroup gapi_math @@ -572,7 +645,7 @@ Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref @param ddepth optional depth of the output matrix. @sa sub, addWeighted */ -GAPI_EXPORTS GMat addC(const GMat& src1, const GScalar& c, int ddepth = -1); +GAPI_EXPORTS_W GMat addC(const GMat& src1, const GScalar& c, int ddepth = -1); //! @overload GAPI_EXPORTS GMat addC(const GScalar& c, const GMat& src1, int ddepth = -1); @@ -1081,6 +1154,7 @@ GAPI_EXPORTS GMat bitwise_xor(const GMat& src1, const GScalar& src2); /** @brief Inverts every bit of an array. + The function bitwise_not calculates per-element bit-wise inversion of the input matrix: \f[\texttt{dst} (I) = \neg \texttt{src} (I)\f] @@ -1436,41 +1510,77 @@ Output image size will have the size dsize, the depth of output is the same as o */ GAPI_EXPORTS GMatP resizeP(const GMatP& src, const Size& dsize, int interpolation = cv::INTER_LINEAR); -/** @brief Creates one 3-channel (4-channel) matrix out of 3(4) single-channel ones. +/** @brief Creates one 4-channel matrix out of 4 single-channel ones. The function merges several matrices to make a single multi-channel matrix. That is, each element of the output matrix will be a concatenation of the elements of the input matrices, where elements of i-th input matrix are treated as mv[i].channels()-element vectors. -Input matrix must be of @ref CV_8UC3 (@ref CV_8UC4) type. +Output matrix must be of @ref CV_8UC4 type. -The function split3/split4 does the reverse operation. +The function split4 does the reverse operation. -@note Function textual ID for merge3 is "org.opencv.core.transform.merge3" -@note Function textual ID for merge4 is "org.opencv.core.transform.merge4" +@note + - Function textual ID is "org.opencv.core.transform.merge4" -@param src1 first input matrix to be merged -@param src2 second input matrix to be merged -@param src3 third input matrix to be merged -@param src4 fourth input matrix to be merged -@sa split4, split3 +@param src1 first input @ref CV_8UC1 matrix to be merged. +@param src2 second input @ref CV_8UC1 matrix to be merged. +@param src3 third input @ref CV_8UC1 matrix to be merged. +@param src4 fourth input @ref CV_8UC1 matrix to be merged. +@sa merge3, split4, split3 */ GAPI_EXPORTS GMat merge4(const GMat& src1, const GMat& src2, const GMat& src3, const GMat& src4); + +/** @brief Creates one 3-channel matrix out of 3 single-channel ones. + +The function merges several matrices to make a single multi-channel matrix. That is, each +element of the output matrix will be a concatenation of the elements of the input matrices, where +elements of i-th input matrix are treated as mv[i].channels()-element vectors. +Output matrix must be of @ref CV_8UC3 type. + +The function split3 does the reverse operation. + +@note + - Function textual ID is "org.opencv.core.transform.merge3" + +@param src1 first input @ref CV_8UC1 matrix to be merged. +@param src2 second input @ref CV_8UC1 matrix to be merged. +@param src3 third input @ref CV_8UC1 matrix to be merged. +@sa merge4, split4, split3 +*/ GAPI_EXPORTS GMat merge3(const GMat& src1, const GMat& src2, const GMat& src3); -/** @brief Divides a 3-channel (4-channel) matrix into 3(4) single-channel matrices. +/** @brief Divides a 4-channel matrix into 4 single-channel matrices. -The function splits a 3-channel (4-channel) matrix into 3(4) single-channel matrices: +The function splits a 4-channel matrix into 4 single-channel matrices: \f[\texttt{mv} [c](I) = \texttt{src} (I)_c\f] -All output matrices must be in @ref CV_8UC1. +All output matrices must be of @ref CV_8UC1 type. -@note Function textual for split3 ID is "org.opencv.core.transform.split3" -@note Function textual for split4 ID is "org.opencv.core.transform.split4" +The function merge4 does the reverse operation. -@param src input @ref CV_8UC4 (@ref CV_8UC3) matrix. -@sa merge3, merge4 +@note + - Function textual ID is "org.opencv.core.transform.split4" + +@param src input @ref CV_8UC4 matrix. +@sa split3, merge3, merge4 */ GAPI_EXPORTS std::tuple split4(const GMat& src); + +/** @brief Divides a 3-channel matrix into 3 single-channel matrices. + +The function splits a 3-channel matrix into 3 single-channel matrices: +\f[\texttt{mv} [c](I) = \texttt{src} (I)_c\f] + +All output matrices must be of @ref CV_8UC1 type. + +The function merge3 does the reverse operation. + +@note + - Function textual ID is "org.opencv.core.transform.split3" + +@param src input @ref CV_8UC3 matrix. +@sa split4, merge3, merge4 +*/ GAPI_EXPORTS_W std::tuple split3(const GMat& src); /** @brief Applies a generic geometrical transformation to an image. @@ -1488,7 +1598,9 @@ convert from floating to fixed-point representations of a map is that they can y cvFloor(y)) and \f$map_2\f$ contains indices in a table of interpolation coefficients. Output image must be of the same size and depth as input one. -@note Function textual ID is "org.opencv.core.transform.remap" +@note + - Function textual ID is "org.opencv.core.transform.remap" + - Due to current implementation limitations the size of an input and output images should be less than 32767x32767. @param src Source image. @param map1 The first map of either (x,y) points or just x values having the type CV_16SC2, @@ -1501,8 +1613,6 @@ and #INTER_LINEAR_EXACT are not supported by this function. borderMode=BORDER_TRANSPARENT, it means that the pixels in the destination image that corresponds to the "outliers" in the source image are not modified by the function. @param borderValue Value used in case of a constant border. By default, it is 0. -@note -Due to current implementation limitations the size of an input and output images should be less than 32767x32767. */ GAPI_EXPORTS GMat remap(const GMat& src, const Mat& map1, const Mat& map2, int interpolation, int borderMode = BORDER_CONSTANT, @@ -1559,19 +1669,6 @@ Output matrix must be of the same depth as input one, size is specified by given */ GAPI_EXPORTS GMat crop(const GMat& src, const Rect& rect); -/** @brief Copies a matrix. - -Copies an input array. Works as a regular Mat::clone but happens in-graph. -Mainly is used to workaround some existing limitations (e.g. to forward an input frame to outputs -in the streaming mode). Will be deprecated and removed in the future. - -@note Function textual ID is "org.opencv.core.transform.copy" - -@param src input matrix. -@sa crop -*/ -GAPI_EXPORTS GMat copy(const GMat& src); - /** @brief Applies horizontal concatenation to given matrices. The function horizontally concatenates two GMat matrices (with the same number of rows). @@ -1757,6 +1854,79 @@ GAPI_EXPORTS GMat warpAffine(const GMat& src, const Mat& M, const Size& dsize, i int borderMode = cv::BORDER_CONSTANT, const Scalar& borderValue = Scalar()); //! @} gapi_transform +/** @brief Finds centers of clusters and groups input samples around the clusters. + +The function kmeans implements a k-means algorithm that finds the centers of K clusters +and groups the input samples around the clusters. As an output, \f$\texttt{bestLabels}_i\f$ +contains a 0-based cluster index for the \f$i^{th}\f$ sample. + +@note + - Function textual ID is "org.opencv.core.kmeansND" + - In case of an N-dimentional points' set given, input GMat can have the following traits: +2 dimensions, a single row or column if there are N channels, +or N columns if there is a single channel. Mat should have @ref CV_32F depth. + - Although, if GMat with height != 1, width != 1, channels != 1 given as data, n-dimensional +samples are considered given in amount of A, where A = height, n = width * channels. + - In case of GMat given as data: + - the output labels are returned as 1-channel GMat with sizes +width = 1, height = A, where A is samples amount, or width = bestLabels.width, +height = bestLabels.height if bestLabels given; + - the cluster centers are returned as 1-channel GMat with sizes +width = n, height = K, where n is samples' dimentionality and K is clusters' amount. + - As one of possible usages, if you want to control the initial labels for each attempt +by yourself, you can utilize just the core of the function. To do that, set the number +of attempts to 1, initialize labels each time using a custom algorithm, pass them with the +( flags = #KMEANS_USE_INITIAL_LABELS ) flag, and then choose the best (most-compact) clustering. + +@param data Data for clustering. An array of N-Dimensional points with float coordinates is needed. +Function can take GArray, GArray for 2D and 3D cases or GMat for any +dimentionality and channels. +@param K Number of clusters to split the set by. +@param bestLabels Optional input integer array that can store the supposed initial cluster indices +for every sample. Used when ( flags = #KMEANS_USE_INITIAL_LABELS ) flag is set. +@param criteria The algorithm termination criteria, that is, the maximum number of iterations +and/or the desired accuracy. The accuracy is specified as criteria.epsilon. As soon as each of +the cluster centers moves by less than criteria.epsilon on some iteration, the algorithm stops. +@param attempts Flag to specify the number of times the algorithm is executed using different +initial labellings. The algorithm returns the labels that yield the best compactness (see the first +function return value). +@param flags Flag that can take values of cv::KmeansFlags . + +@return + - Compactness measure that is computed as +\f[\sum _i \| \texttt{samples} _i - \texttt{centers} _{ \texttt{labels} _i} \| ^2\f] +after every attempt. The best (minimum) value is chosen and the corresponding labels and the +compactness value are returned by the function. + - Integer array that stores the cluster indices for every sample. + - Array of the cluster centers. +*/ +GAPI_EXPORTS std::tuple,GMat,GMat> +kmeans(const GMat& data, const int K, const GMat& bestLabels, + const TermCriteria& criteria, const int attempts, const KmeansFlags flags); + +/** @overload +@note + - Function textual ID is "org.opencv.core.kmeansNDNoInit" + - #KMEANS_USE_INITIAL_LABELS flag must not be set while using this overload. + */ +GAPI_EXPORTS_W std::tuple,GMat,GMat> +kmeans(const GMat& data, const int K, const TermCriteria& criteria, const int attempts, + const KmeansFlags flags); + +/** @overload +@note Function textual ID is "org.opencv.core.kmeans2D" + */ +GAPI_EXPORTS_W std::tuple,GArray,GArray> +kmeans(const GArray& data, const int K, const GArray& bestLabels, + const TermCriteria& criteria, const int attempts, const KmeansFlags flags); + +/** @overload +@note Function textual ID is "org.opencv.core.kmeans3D" + */ +GAPI_EXPORTS std::tuple,GArray,GArray> +kmeans(const GArray& data, const int K, const GArray& bestLabels, + const TermCriteria& criteria, const int attempts, const KmeansFlags flags); + namespace streaming { /** @brief Gets dimensions from Mat. @@ -1765,7 +1935,7 @@ namespace streaming { @param src Input tensor @return Size (tensor dimensions). */ -GAPI_EXPORTS GOpaque size(const GMat& src); +GAPI_EXPORTS_W GOpaque size(const GMat& src); /** @overload Gets dimensions from rectangle. @@ -1775,7 +1945,16 @@ Gets dimensions from rectangle. @param r Input rectangle. @return Size (rectangle dimensions). */ -GAPI_EXPORTS GOpaque size(const GOpaque& r); +GAPI_EXPORTS_W GOpaque size(const GOpaque& r); + +/** @brief Gets dimensions from MediaFrame. + +@note Function textual ID is "org.opencv.streaming.sizeMF" + +@param src Input frame +@return Size (frame dimensions). +*/ +GAPI_EXPORTS GOpaque size(const GFrame& src); } //namespace streaming } //namespace gapi } //namespace cv diff --git a/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp b/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp index 5dd70bd2e8..5539e244ba 100644 --- a/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp +++ b/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp @@ -101,6 +101,7 @@ public: const cv::Scalar& inVal(int input); cv::Scalar& outValR(int output); // FIXME: Avoid cv::Scalar s = ctx.outValR() + cv::MediaFrame& outFrame(int output); template std::vector& outVecR(int output) // FIXME: the same issue { return outVecRef(output).wref(); @@ -189,6 +190,11 @@ template<> struct get_in >: public get_in>/GArray> conversion should be done more gracefully in the system +template struct get_in> >: public get_in> > +{ +}; + //FIXME(dm): GOpaque/GOpaque conversion should be done more gracefully in the system template<> struct get_in >: public get_in > { @@ -258,6 +264,13 @@ template<> struct get_out return ctx.outValR(idx); } }; +template<> struct get_out +{ + static cv::MediaFrame& get(GCPUContext &ctx, int idx) + { + return ctx.outFrame(idx); + } +}; template struct get_out> { static std::vector& get(GCPUContext &ctx, int idx) diff --git a/modules/gapi/include/opencv2/gapi/cpu/stereo.hpp b/modules/gapi/include/opencv2/gapi/cpu/stereo.hpp new file mode 100644 index 0000000000..f7d79e9b3c --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/cpu/stereo.hpp @@ -0,0 +1,48 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef OPENCV_GAPI_CPU_STEREO_API_HPP +#define OPENCV_GAPI_CPU_STEREO_API_HPP + +#include // GKernelPackage + +namespace cv { +namespace gapi { +namespace calib3d { +namespace cpu { + +GAPI_EXPORTS GKernelPackage kernels(); + +/** @brief Structure for the Stereo operation initialization parameters.*/ +struct GAPI_EXPORTS StereoInitParam { + StereoInitParam(int nD, int bS, double bL, double f): + numDisparities(nD), blockSize(bS), baseline(bL), focus(f) {} + + StereoInitParam() = default; + + int numDisparities = 0; + int blockSize = 21; + double baseline = 70.; + double focus = 1000.; +}; + +} // namespace cpu +} // namespace calib3d +} // namespace gapi + +namespace detail { + + template<> struct CompileArgTag { + static const char* tag() { + return "org.opencv.stereoInit"; + } +}; + +} // namespace detail +} // namespace cv + + +#endif // OPENCV_GAPI_CPU_STEREO_API_HPP diff --git a/modules/gapi/include/opencv2/gapi/garg.hpp b/modules/gapi/include/opencv2/gapi/garg.hpp index 0838573b56..20f2233bf9 100644 --- a/modules/gapi/include/opencv2/gapi/garg.hpp +++ b/modules/gapi/include/opencv2/gapi/garg.hpp @@ -210,6 +210,7 @@ using GRunArgP = util::variant< cv::Mat*, cv::RMat*, cv::Scalar*, + cv::MediaFrame*, cv::detail::VectorRef, cv::detail::OpaqueRef >; @@ -248,6 +249,30 @@ template inline GRunArgsP gout(Ts&... args) return GRunArgsP{ GRunArgP(detail::wrap_host_helper::wrap_out(args))... }; } +struct GTypeInfo; +using GTypesInfo = std::vector; + +// FIXME: Needed for python bridge, must be moved to more appropriate header +namespace detail { +struct ExtractArgsCallback +{ + cv::GRunArgs operator()(const cv::GTypesInfo& info) const { return c(info); } + using CallBackT = std::function; + CallBackT c; +}; + +struct ExtractMetaCallback +{ + cv::GMetaArgs operator()(const cv::GTypesInfo& info) const { return c(info); } + using CallBackT = std::function; + CallBackT c; +}; + +void constructGraphOutputs(const cv::GTypesInfo &out_info, + cv::GRunArgs &args, + cv::GRunArgsP &outs); +} // namespace detail + } // namespace cv #endif // OPENCV_GAPI_GARG_HPP diff --git a/modules/gapi/include/opencv2/gapi/garray.hpp b/modules/gapi/include/opencv2/gapi/garray.hpp index 5d4b3c59e0..32799bc07e 100644 --- a/modules/gapi/include/opencv2/gapi/garray.hpp +++ b/modules/gapi/include/opencv2/gapi/garray.hpp @@ -35,14 +35,14 @@ template class GArray; * \addtogroup gapi_meta_args * @{ */ -struct GArrayDesc +struct GAPI_EXPORTS_W_SIMPLE GArrayDesc { // FIXME: Body // FIXME: Also implement proper operator== then bool operator== (const GArrayDesc&) const { return true; } }; template GArrayDesc descr_of(const std::vector &) { return {};} -static inline GArrayDesc empty_array_desc() {return {}; } +GAPI_EXPORTS_W inline GArrayDesc empty_array_desc() {return {}; } /** @} */ std::ostream& operator<<(std::ostream& os, const cv::GArrayDesc &desc); @@ -246,12 +246,18 @@ namespace detail public: VectorRef() = default; - template explicit VectorRef(const std::vector& vec) : - m_ref(new VectorRefT(vec)), m_kind(GOpaqueTraits::kind) {} - template explicit VectorRef(std::vector& vec) : - m_ref(new VectorRefT(vec)), m_kind(GOpaqueTraits::kind) {} - template explicit VectorRef(std::vector&& vec) : - m_ref(new VectorRefT(std::move(vec))), m_kind(GOpaqueTraits::kind) {} + template explicit VectorRef(const std::vector& vec) + : m_ref(new VectorRefT(vec)) + , m_kind(GOpaqueTraits::kind) + {} + template explicit VectorRef(std::vector& vec) + : m_ref(new VectorRefT(vec)) + , m_kind(GOpaqueTraits::kind) + {} + template explicit VectorRef(std::vector&& vec) + : m_ref(new VectorRefT(std::move(vec))) + , m_kind(GOpaqueTraits::kind) + {} cv::detail::OpaqueKind getKind() const { @@ -321,9 +327,10 @@ namespace detail # define FLATTEN_NS cv #endif template struct flatten_g; - template<> struct flatten_g { using type = FLATTEN_NS::Mat; }; - template<> struct flatten_g { using type = FLATTEN_NS::Scalar; }; - template struct flatten_g { using type = T; }; + template<> struct flatten_g { using type = FLATTEN_NS::Mat; }; + template<> struct flatten_g { using type = FLATTEN_NS::Scalar; }; + template struct flatten_g> { using type = std::vector; }; + template struct flatten_g { using type = T; }; #undef FLATTEN_NS // FIXME: the above mainly duplicates "ProtoToParam" thing from gtyped.hpp // but I decided not to include gtyped here - probably worth moving that stuff @@ -368,8 +375,6 @@ private: detail::GArrayU m_ref; }; -using GArrayP2f = GArray; - /** @} */ } // namespace cv diff --git a/modules/gapi/include/opencv2/gapi/gcall.hpp b/modules/gapi/include/opencv2/gapi/gcall.hpp index 511eca1408..8d1b8d6010 100644 --- a/modules/gapi/include/opencv2/gapi/gcall.hpp +++ b/modules/gapi/include/opencv2/gapi/gcall.hpp @@ -11,6 +11,7 @@ #include // GArg #include // GMat #include // GScalar +#include // GFrame #include // GArray #include // GOpaque @@ -41,6 +42,7 @@ public: GMat yield (int output = 0); GMatP yieldP (int output = 0); GScalar yieldScalar(int output = 0); + GFrame yieldFrame (int output = 0); template GArray yieldArray(int output = 0) { diff --git a/modules/gapi/include/opencv2/gapi/gcommon.hpp b/modules/gapi/include/opencv2/gapi/gcommon.hpp index a474140baa..8119e397eb 100644 --- a/modules/gapi/include/opencv2/gapi/gcommon.hpp +++ b/modules/gapi/include/opencv2/gapi/gcommon.hpp @@ -204,12 +204,12 @@ template GCompileArgs compile_args(Ts&&... args) return GCompileArgs{ GCompileArg(args)... }; } +namespace gapi +{ /** * @brief Retrieves particular compilation argument by its type from * cv::GCompileArgs */ -namespace gapi -{ template inline cv::util::optional getCompileArg(const cv::GCompileArgs &args) { diff --git a/modules/gapi/include/opencv2/gapi/gcomputation.hpp b/modules/gapi/include/opencv2/gapi/gcomputation.hpp index 8732ada0d6..a3566fb495 100644 --- a/modules/gapi/include/opencv2/gapi/gcomputation.hpp +++ b/modules/gapi/include/opencv2/gapi/gcomputation.hpp @@ -258,7 +258,8 @@ public: void apply(GRunArgs &&ins, GRunArgsP &&outs, GCompileArgs &&args = {}); // Arg-to-arg overload /// @private -- Exclude this function from OpenCV documentation - GAPI_WRAP GRunArgs apply(GRunArgs &&ins, GCompileArgs &&args = {}); + GAPI_WRAP GRunArgs apply(const cv::detail::ExtractArgsCallback &callback, + GCompileArgs &&args = {}); /// @private -- Exclude this function from OpenCV documentation void apply(const std::vector& ins, // Compatibility overload @@ -436,7 +437,11 @@ public: * * @sa @ref gapi_compile_args */ - GAPI_WRAP GStreamingCompiled compileStreaming(GMetaArgs &&in_metas, GCompileArgs &&args = {}); + GStreamingCompiled compileStreaming(GMetaArgs &&in_metas, GCompileArgs &&args = {}); + + /// @private -- Exclude this function from OpenCV documentation + GAPI_WRAP GStreamingCompiled compileStreaming(const cv::detail::ExtractMetaCallback &callback, + GCompileArgs &&args = {}); /** * @brief Compile the computation for streaming mode. diff --git a/modules/gapi/include/opencv2/gapi/gframe.hpp b/modules/gapi/include/opencv2/gapi/gframe.hpp index f555a93aa3..13fd5d6d29 100644 --- a/modules/gapi/include/opencv2/gapi/gframe.hpp +++ b/modules/gapi/include/opencv2/gapi/gframe.hpp @@ -62,6 +62,9 @@ struct GAPI_EXPORTS GFrameDesc static inline GFrameDesc empty_gframe_desc() { return GFrameDesc{}; } /** @} */ +class MediaFrame; +GAPI_EXPORTS GFrameDesc descr_of(const MediaFrame &frame); + GAPI_EXPORTS std::ostream& operator<<(std::ostream& os, const cv::GFrameDesc &desc); } // namespace cv diff --git a/modules/gapi/include/opencv2/gapi/gkernel.hpp b/modules/gapi/include/opencv2/gapi/gkernel.hpp index 0ec7dd07c0..f70e50253d 100644 --- a/modules/gapi/include/opencv2/gapi/gkernel.hpp +++ b/modules/gapi/include/opencv2/gapi/gkernel.hpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation #ifndef OPENCV_GAPI_GKERNEL_HPP @@ -30,6 +30,7 @@ struct GTypeInfo { GShape shape; cv::detail::OpaqueKind kind; + detail::HostCtor ctor; }; using GShapes = std::vector; @@ -90,6 +91,10 @@ namespace detail { static inline cv::GOpaque yield(cv::GCall &call, int i) { return call.yieldOpaque(i); } }; + template<> struct Yield + { + static inline cv::GFrame yield(cv::GCall &call, int i) { return call.yieldFrame(i); } + }; //////////////////////////////////////////////////////////////////////////// // Helper classes which brings outputMeta() marshalling to kernel @@ -239,8 +244,6 @@ public: using InArgs = std::tuple; using OutArgs = std::tuple; - static_assert(!cv::detail::contains::value, "Values of GFrame type can't be used as operation outputs"); - static R on(Args... args) { cv::GCall call(GKernel{ K::id() @@ -514,6 +517,13 @@ namespace gapi { */ const std::vector& get_transformations() const; + /** + * @brief Returns vector of kernel ids included in the package + * + * @return vector of kernel ids included in the package + */ + std::vector get_kernel_ids() const; + /** * @brief Test if a particular kernel _implementation_ KImpl is * included in this kernel package. @@ -603,6 +613,18 @@ namespace gapi { includeHelper(); } + /** + * @brief Adds a new kernel based on it's backend and id into the kernel package + * + * @param backend backend associated with the kernel + * @param kernel_id a name/id of the kernel + */ + void include(const cv::gapi::GBackend& backend, const std::string& kernel_id) + { + removeAPI(kernel_id); + m_id_kernels[kernel_id] = std::make_pair(backend, GKernelImpl{{}, {}}); + } + /** * @brief Lists all backends which are included into package * diff --git a/modules/gapi/include/opencv2/gapi/gmat.hpp b/modules/gapi/include/opencv2/gapi/gmat.hpp index f441413be5..5e567fb107 100644 --- a/modules/gapi/include/opencv2/gapi/gmat.hpp +++ b/modules/gapi/include/opencv2/gapi/gmat.hpp @@ -73,25 +73,25 @@ class RMat; * \addtogroup gapi_meta_args * @{ */ -struct GAPI_EXPORTS GMatDesc +struct GAPI_EXPORTS_W_SIMPLE GMatDesc { // FIXME: Default initializers in C++14 - int depth; - int chan; - cv::Size size; // NB.: no multi-dimensional cases covered yet - bool planar; - std::vector dims; // FIXME: Maybe it's real questionable to have it here + GAPI_PROP int depth; + GAPI_PROP int chan; + GAPI_PROP cv::Size size; // NB.: no multi-dimensional cases covered yet + GAPI_PROP bool planar; + GAPI_PROP std::vector dims; // FIXME: Maybe it's real questionable to have it here - GMatDesc(int d, int c, cv::Size s, bool p = false) + GAPI_WRAP GMatDesc(int d, int c, cv::Size s, bool p = false) : depth(d), chan(c), size(s), planar(p) {} - GMatDesc(int d, const std::vector &dd) + GAPI_WRAP GMatDesc(int d, const std::vector &dd) : depth(d), chan(-1), size{-1,-1}, planar(false), dims(dd) {} - GMatDesc(int d, std::vector &&dd) + GAPI_WRAP GMatDesc(int d, std::vector &&dd) : depth(d), chan(-1), size{-1,-1}, planar(false), dims(std::move(dd)) {} - GMatDesc() : GMatDesc(-1, -1, {-1,-1}) {} + GAPI_WRAP GMatDesc() : GMatDesc(-1, -1, {-1,-1}) {} inline bool operator== (const GMatDesc &rhs) const { @@ -155,7 +155,7 @@ struct GAPI_EXPORTS GMatDesc // Meta combinator: return a new GMatDesc with specified data depth // and number of channels. // (all other fields are taken unchanged from this GMatDesc) - GMatDesc withType(int ddepth, int dchan) const + GAPI_WRAP GMatDesc withType(int ddepth, int dchan) const { GAPI_Assert(CV_MAT_CN(ddepth) == 1 || ddepth == -1); GMatDesc desc = withDepth(ddepth); @@ -203,6 +203,27 @@ struct GAPI_EXPORTS GMatDesc static inline GMatDesc empty_gmat_desc() { return GMatDesc{-1,-1,{-1,-1}}; } +namespace gapi { namespace detail { +/** Checks GMatDesc fields if the passed matrix is a set of n-dimentional points. +@param in GMatDesc to check. +@param n expected dimensionality. +@return the amount of points. In case input matrix can't be described as vector of points +of expected dimensionality, returns -1. + */ +int checkVector(const GMatDesc& in, const size_t n); + +/** @overload + +Checks GMatDesc fields if the passed matrix can be described as a set of points of any +dimensionality. + +@return array of two elements in form of std::vector: the amount of points +and their calculated dimensionality. In case input matrix can't be described as vector of points, +returns {-1, -1}. + */ +std::vector checkVector(const GMatDesc& in); +}} // namespace gapi::detail + #if !defined(GAPI_STANDALONE) GAPI_EXPORTS GMatDesc descr_of(const cv::UMat &mat); #endif // !defined(GAPI_STANDALONE) diff --git a/modules/gapi/include/opencv2/gapi/gopaque.hpp b/modules/gapi/include/opencv2/gapi/gopaque.hpp index 6117971768..00f0718422 100644 --- a/modules/gapi/include/opencv2/gapi/gopaque.hpp +++ b/modules/gapi/include/opencv2/gapi/gopaque.hpp @@ -21,6 +21,9 @@ #include #include +#include // OpaqueKind +#include // TypeHintBase + namespace cv { // Forward declaration; GNode and GOrigin are an internal @@ -33,14 +36,14 @@ template class GOpaque; * \addtogroup gapi_meta_args * @{ */ -struct GOpaqueDesc +struct GAPI_EXPORTS_W_SIMPLE GOpaqueDesc { // FIXME: Body // FIXME: Also implement proper operator== then bool operator== (const GOpaqueDesc&) const { return true; } }; template GOpaqueDesc descr_of(const U &) { return {};} -static inline GOpaqueDesc empty_gopaque_desc() {return {}; } +GAPI_EXPORTS_W inline GOpaqueDesc empty_gopaque_desc() {return {}; } /** @} */ std::ostream& operator<<(std::ostream& os, const cv::GOpaqueDesc &desc); diff --git a/modules/gapi/include/opencv2/gapi/gproto.hpp b/modules/gapi/include/opencv2/gapi/gproto.hpp index f91fcdb2c8..fbcccb38ea 100644 --- a/modules/gapi/include/opencv2/gapi/gproto.hpp +++ b/modules/gapi/include/opencv2/gapi/gproto.hpp @@ -135,7 +135,7 @@ GRunArg value_of(const GOrigin &origin); // Transform run-time computation arguments into a collection of metadata // extracted from that arguments GMetaArg GAPI_EXPORTS descr_of(const GRunArg &arg ); -GMetaArgs GAPI_EXPORTS_W descr_of(const GRunArgs &args); +GMetaArgs GAPI_EXPORTS descr_of(const GRunArgs &args); // Transform run-time operation result argument into metadata extracted from that argument // Used to compare the metadata, which generated at compile time with the metadata result operation in run time diff --git a/modules/gapi/include/opencv2/gapi/gscalar.hpp b/modules/gapi/include/opencv2/gapi/gscalar.hpp index 00abdd1d13..d4af2cab5d 100644 --- a/modules/gapi/include/opencv2/gapi/gscalar.hpp +++ b/modules/gapi/include/opencv2/gapi/gscalar.hpp @@ -49,7 +49,7 @@ private: * \addtogroup gapi_meta_args * @{ */ -struct GScalarDesc +struct GAPI_EXPORTS_W_SIMPLE GScalarDesc { // NB.: right now it is empty @@ -64,9 +64,9 @@ struct GScalarDesc } }; -static inline GScalarDesc empty_scalar_desc() { return GScalarDesc(); } +GAPI_EXPORTS_W inline GScalarDesc empty_scalar_desc() { return GScalarDesc(); } -GAPI_EXPORTS GScalarDesc descr_of(const cv::Scalar &scalar); +GAPI_EXPORTS GScalarDesc descr_of(const cv::Scalar &scalar); std::ostream& operator<<(std::ostream& os, const cv::GScalarDesc &desc); diff --git a/modules/gapi/include/opencv2/gapi/gstreaming.hpp b/modules/gapi/include/opencv2/gapi/gstreaming.hpp index e09cf8d0f7..4e579caafb 100644 --- a/modules/gapi/include/opencv2/gapi/gstreaming.hpp +++ b/modules/gapi/include/opencv2/gapi/gstreaming.hpp @@ -180,7 +180,10 @@ public: * @param ins vector of inputs to process. * @sa gin */ - GAPI_WRAP void setSource(GRunArgs &&ins); + void setSource(GRunArgs &&ins); + + /// @private -- Exclude this function from OpenCV documentation + GAPI_WRAP void setSource(const cv::detail::ExtractArgsCallback& callback); /** * @brief Specify an input video stream for a single-input @@ -251,6 +254,7 @@ public: bool pull(cv::GRunArgsP &&outs); // NB: Used from python + /// @private -- Exclude this function from OpenCV documentation GAPI_WRAP std::tuple pull(); /** diff --git a/modules/gapi/include/opencv2/gapi/imgproc.hpp b/modules/gapi/include/opencv2/gapi/imgproc.hpp index 7435ec1e1d..25a64a5067 100644 --- a/modules/gapi/include/opencv2/gapi/imgproc.hpp +++ b/modules/gapi/include/opencv2/gapi/imgproc.hpp @@ -43,15 +43,6 @@ void validateFindingContoursMeta(const int depth, const int chan, const int mode break; } } - -// Checks if the passed mat is a set of n-dimentional points of the given depth -bool isPointsVector(const int chan, const cv::Size &size, const int depth, - const int n, const int ddepth = -1) -{ - return (ddepth == depth || ddepth < 0) && - ((chan == n && (size.height == 1 || size.width == 1)) || - (chan == 1 && size.width == n)); -} } // anonymous namespace namespace cv { namespace gapi { @@ -212,10 +203,17 @@ namespace imgproc { G_TYPED_KERNEL(GBoundingRectMat, (GMat)>, "org.opencv.imgproc.shape.boundingRectMat") { static GOpaqueDesc outMeta(GMatDesc in) { - GAPI_Assert((in.depth == CV_8U && in.chan == 1) || - (isPointsVector(in.chan, in.size, in.depth, 2, CV_32S) || - isPointsVector(in.chan, in.size, in.depth, 2, CV_32F))); - + if (in.depth == CV_8U) + { + GAPI_Assert(in.chan == 1); + } + else + { + GAPI_Assert (in.depth == CV_32S || in.depth == CV_32F); + int amount = detail::checkVector(in, 2u); + GAPI_Assert(amount != -1 && + "Input Mat can't be described as vector of 2-dimentional points"); + } return empty_gopaque_desc(); } }; @@ -237,7 +235,9 @@ namespace imgproc { G_TYPED_KERNEL(GFitLine2DMat, (GMat,DistanceTypes,double,double,double)>, "org.opencv.imgproc.shape.fitLine2DMat") { static GOpaqueDesc outMeta(GMatDesc in,DistanceTypes,double,double,double) { - GAPI_Assert(isPointsVector(in.chan, in.size, in.depth, 2, -1)); + int amount = detail::checkVector(in, 2u); + GAPI_Assert(amount != -1 && + "Input Mat can't be described as vector of 2-dimentional points"); return empty_gopaque_desc(); } }; @@ -269,7 +269,9 @@ namespace imgproc { G_TYPED_KERNEL(GFitLine3DMat, (GMat,DistanceTypes,double,double,double)>, "org.opencv.imgproc.shape.fitLine3DMat") { static GOpaqueDesc outMeta(GMatDesc in,int,double,double,double) { - GAPI_Assert(isPointsVector(in.chan, in.size, in.depth, 3, -1)); + int amount = detail::checkVector(in, 3u); + GAPI_Assert(amount != -1 && + "Input Mat can't be described as vector of 3-dimentional points"); return empty_gopaque_desc(); } }; @@ -501,10 +503,10 @@ kernel kernelY. The final result is returned. Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. Output image must have the same type, size, and number of channels as the input image. -@note In case of floating-point computation, rounding to nearest even is procedeed +@note + - In case of floating-point computation, rounding to nearest even is procedeed if hardware supports it (if not - to nearest value). - -@note Function textual ID is "org.opencv.imgproc.filters.sepfilter" + - Function textual ID is "org.opencv.imgproc.filters.sepfilter" @param src Source image. @param ddepth desired depth of the destination image (the following combinations of src.depth() and ddepth are supported: @@ -543,9 +545,9 @@ anchor.y - 1)`. Supported matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. Output image must have the same size and number of channels an input image. -@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. - -@note Function textual ID is "org.opencv.imgproc.filters.filter2D" +@note + - Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + - Function textual ID is "org.opencv.imgproc.filters.filter2D" @param src input image. @param ddepth desired depth of the destination image @@ -580,9 +582,9 @@ algorithms, and so on). If you need to compute pixel sums over variable-size win Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. Output image must have the same type, size, and number of channels as the input image. -@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. - -@note Function textual ID is "org.opencv.imgproc.filters.boxfilter" +@note + - Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + - Function textual ID is "org.opencv.imgproc.filters.boxfilter" @param src Source image. @param dtype the output image depth (-1 to set the input image data type). @@ -609,9 +611,9 @@ true, borderType)`. Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. Output image must have the same type, size, and number of channels as the input image. -@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. - -@note Function textual ID is "org.opencv.imgproc.filters.blur" +@note + - Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + - Function textual ID is "org.opencv.imgproc.filters.blur" @param src Source image. @param ksize blurring kernel size. @@ -637,9 +639,9 @@ Output image must have the same type and number of channels an input image. Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, @ref CV_32FC1. Output image must have the same type, size, and number of channels as the input image. -@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. - -@note Function textual ID is "org.opencv.imgproc.filters.gaussianBlur" +@note + - Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + - Function textual ID is "org.opencv.imgproc.filters.gaussianBlur" @param src input image; @param ksize Gaussian kernel size. ksize.width and ksize.height can differ but they both must be @@ -662,10 +664,10 @@ GAPI_EXPORTS GMat gaussianBlur(const GMat& src, const Size& ksize, double sigmaX The function smoothes an image using the median filter with the \f$\texttt{ksize} \times \texttt{ksize}\f$ aperture. Each channel of a multi-channel image is processed independently. Output image must have the same type, size, and number of channels as the input image. -@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. +@note + - Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. The median filter uses cv::BORDER_REPLICATE internally to cope with border pixels, see cv::BorderTypes - -@note Function textual ID is "org.opencv.imgproc.filters.medianBlur" + - Function textual ID is "org.opencv.imgproc.filters.medianBlur" @param src input matrix (image) @param ksize aperture linear size; it must be odd and greater than 1, for example: 3, 5, 7 ... @@ -683,9 +685,9 @@ shape of a pixel neighborhood over which the minimum is taken: Erosion can be applied several (iterations) times. In case of multi-channel images, each channel is processed independently. Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, and @ref CV_32FC1. Output image must have the same type, size, and number of channels as the input image. -@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. - -@note Function textual ID is "org.opencv.imgproc.filters.erode" +@note + - Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + - Function textual ID is "org.opencv.imgproc.filters.erode" @param src input image @param kernel structuring element used for erosion; if `element=Mat()`, a `3 x 3` rectangular @@ -707,7 +709,9 @@ The function erodes the source image using the rectangular structuring element w Erosion can be applied several (iterations) times. In case of multi-channel images, each channel is processed independently. Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, and @ref CV_32FC1. Output image must have the same type, size, and number of channels as the input image. -@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. +@note + - Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + - Function textual ID is "org.opencv.imgproc.filters.erode" @param src input image @param iterations number of times erosion is applied. @@ -728,9 +732,9 @@ shape of a pixel neighborhood over which the maximum is taken: Dilation can be applied several (iterations) times. In case of multi-channel images, each channel is processed independently. Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, and @ref CV_32FC1. Output image must have the same type, size, and number of channels as the input image. -@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. - -@note Function textual ID is "org.opencv.imgproc.filters.dilate" +@note + - Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + - Function textual ID is "org.opencv.imgproc.filters.dilate" @param src input image. @param kernel structuring element used for dilation; if elemenat=Mat(), a 3 x 3 rectangular @@ -755,9 +759,9 @@ shape of a pixel neighborhood over which the maximum is taken: Dilation can be applied several (iterations) times. In case of multi-channel images, each channel is processed independently. Supported input matrix data types are @ref CV_8UC1, @ref CV_8UC3, @ref CV_16UC1, @ref CV_16SC1, and @ref CV_32FC1. Output image must have the same type, size, and number of channels as the input image. -@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. - -@note Function textual ID is "org.opencv.imgproc.filters.dilate" +@note + - Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + - Function textual ID is "org.opencv.imgproc.filters.dilate" @param src input image. @param iterations number of times dilation is applied. @@ -778,7 +782,12 @@ basic operations. Any of the operations can be done in-place. In case of multi-channel images, each channel is processed independently. -@note Function textual ID is "org.opencv.imgproc.filters.morphologyEx" +@note + - Function textual ID is "org.opencv.imgproc.filters.morphologyEx" + - The number of iterations is the number of times erosion or dilatation operation will be +applied. For instance, an opening operation (#MORPH_OPEN) with two iterations is equivalent to +apply successively: erode -> erode -> dilate -> dilate +(and not erode -> dilate -> erode -> dilate). @param src Input image. @param op Type of a morphological operation, see #MorphTypes @@ -790,10 +799,6 @@ the kernel center. @param borderValue Border value in case of a constant border. The default value has a special meaning. @sa dilate, erode, getStructuringElement -@note The number of iterations is the number of times erosion or dilatation operation will be -applied. For instance, an opening operation (#MORPH_OPEN) with two iterations is equivalent to -apply successively: erode -> erode -> dilate -> dilate -(and not erode -> dilate -> erode -> dilate). */ GAPI_EXPORTS GMat morphologyEx(const GMat &src, const MorphTypes op, const Mat &kernel, const Point &anchor = Point(-1,-1), @@ -830,9 +835,9 @@ The second case corresponds to a kernel of: \f[\vecthreethree{-1}{-2}{-1}{0}{0}{0}{1}{2}{1}\f] -@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. - -@note Function textual ID is "org.opencv.imgproc.filters.sobel" +@note + - Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + - Function textual ID is "org.opencv.imgproc.filters.sobel" @param src input image. @param ddepth output image depth, see @ref filter_depths "combinations"; in the case of @@ -881,11 +886,10 @@ The second case corresponds to a kernel of: \f[\vecthreethree{-1}{-2}{-1}{0}{0}{0}{1}{2}{1}\f] -@note First returned matrix correspons to dx derivative while the second one to dy. - -@note Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. - -@note Function textual ID is "org.opencv.imgproc.filters.sobelxy" +@note + - First returned matrix correspons to dx derivative while the second one to dy. + - Rounding to nearest even is procedeed if hardware supports it, if not - to nearest. + - Function textual ID is "org.opencv.imgproc.filters.sobelxy" @param src input image. @param ddepth output image depth, see @ref filter_depths "combinations"; in the case of @@ -1008,11 +1012,11 @@ described in @cite Shi94 The function can be used to initialize a point-based tracker of an object. -@note If the function is called with different values A and B of the parameter qualityLevel , and +@note + - If the function is called with different values A and B of the parameter qualityLevel , and A \> B, the vector of returned corners with qualityLevel=A will be the prefix of the output vector with qualityLevel=B . - -@note Function textual ID is "org.opencv.imgproc.feature.goodFeaturesToTrack" + - Function textual ID is "org.opencv.imgproc.feature.goodFeaturesToTrack" @param image Input 8-bit or floating-point 32-bit, single-channel image. @param maxCorners Maximum number of corners to return. If there are more corners than are found, @@ -1057,9 +1061,9 @@ The function equalizes the histogram of the input image using the following algo - Transform the image using \f$H'\f$ as a look-up table: \f$\texttt{dst}(x,y) = H'(\texttt{src}(x,y))\f$ The algorithm normalizes the brightness and increases the contrast of the image. -@note The returned image is of the same size and type as input. - -@note Function textual ID is "org.opencv.imgproc.equalizeHist" +@note + - The returned image is of the same size and type as input. + - Function textual ID is "org.opencv.imgproc.equalizeHist" @param src Source 8-bit single channel image. */ @@ -1119,8 +1123,9 @@ image of labels ( @ref CV_32SC1 ). If #RETR_FLOODFILL -- @ref CV_32SC1 supports contours are extracted from the image ROI and then they should be analyzed in the whole image context. -@return GArray of detected contours. Each contour is stored as a GArray of points. -@return Optional output GArray of cv::Vec4i, containing information about the image topology. +@return + - GArray of detected contours. Each contour is stored as a GArray of points. + - Optional output GArray of cv::Vec4i, containing information about the image topology. It has as many elements as the number of contours. For each i-th contour contours[i], the elements hierarchy[i][0] , hierarchy[i][1] , hierarchy[i][2] , and hierarchy[i][3] are set to 0-based indices in contours of the next and previous contours at the same hierarchical level, the first @@ -1144,16 +1149,16 @@ of gray-scale image. The function calculates and returns the minimal up-right bounding rectangle for the specified point set or non-zero pixels of gray-scale image. -@note Function textual ID is "org.opencv.imgproc.shape.boundingRectMat" +@note + - Function textual ID is "org.opencv.imgproc.shape.boundingRectMat" + - In case of a 2D points' set given, Mat should be 2-dimensional, have a single row or column +if there are 2 channels, or have 2 columns if there is a single channel. Mat should have either +@ref CV_32S or @ref CV_32F depth @param src Input gray-scale image @ref CV_8UC1; or input set of @ref CV_32S or @ref CV_32F 2D points stored in Mat. - -@note In case of a 2D points' set given, Mat should be 2-dimensional, have a single row or column -if there are 2 channels, or have 2 columns if there is a single channel. Mat should have either -@ref CV_32S or @ref CV_32F depth */ -GAPI_EXPORTS GOpaque boundingRect(const GMat& src); +GAPI_EXPORTS_W GOpaque boundingRect(const GMat& src); /** @overload @@ -1163,7 +1168,7 @@ Calculates the up-right bounding rectangle of a point set. @param src Input 2D point set, stored in std::vector. */ -GAPI_EXPORTS GOpaque boundingRect(const GArray& src); +GAPI_EXPORTS_W GOpaque boundingRect(const GArray& src); /** @overload @@ -1197,14 +1202,13 @@ The algorithm is based on the M-estimator ( , std::vector, std::vector. - -@note In case of an N-dimentional points' set given, Mat should be 2-dimensional, have a single row -or column if there are N channels, or have N columns if there is a single channel. - @param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER and @ref DIST_C are not suppored. @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value @@ -1270,14 +1274,13 @@ The algorithm is based on the M-estimator ( , std::vector, std::vector. - -@note In case of an N-dimentional points' set given, Mat should be 2-dimensional, have a single row -or column if there are N channels, or have N columns if there is a single channel. - @param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER and @ref DIST_C are not suppored. @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value @@ -1341,6 +1344,7 @@ Output image is 8-bit unsigned 3-channel image @ref CV_8UC3. GAPI_EXPORTS GMat BGR2RGB(const GMat& src); /** @brief Converts an image from RGB color space to gray-scaled. + The conventional ranges for R, G, and B channel values are 0 to 255. Resulting gray color value computed as \f[\texttt{dst} (I)= \texttt{0.299} * \texttt{src}(I).R + \texttt{0.587} * \texttt{src}(I).G + \texttt{0.114} * \texttt{src}(I).B \f] @@ -1367,6 +1371,7 @@ Resulting gray color value computed as GAPI_EXPORTS GMat RGB2Gray(const GMat& src, float rY, float gY, float bY); /** @brief Converts an image from BGR color space to gray-scaled. + The conventional ranges for B, G, and R channel values are 0 to 255. Resulting gray color value computed as \f[\texttt{dst} (I)= \texttt{0.114} * \texttt{src}(I).B + \texttt{0.587} * \texttt{src}(I).G + \texttt{0.299} * \texttt{src}(I).R \f] diff --git a/modules/gapi/include/opencv2/gapi/infer.hpp b/modules/gapi/include/opencv2/gapi/infer.hpp index b850775a62..6e71f59df9 100644 --- a/modules/gapi/include/opencv2/gapi/infer.hpp +++ b/modules/gapi/include/opencv2/gapi/infer.hpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2019-2020 Intel Corporation +// Copyright (C) 2019-2021 Intel Corporation #ifndef OPENCV_GAPI_INFER_HPP @@ -16,6 +16,7 @@ #include // tuple #include // is_same, false_type +#include // all_satisfy #include // any<> #include // GKernelType[M], GBackend #include // GArg @@ -27,40 +28,160 @@ namespace cv { template class GNetworkType; namespace detail { - template - struct valid_infer2_types; - // Terminal case 1 (50/50 success) - template - struct valid_infer2_types< std::tuple, std::tuple > { - // By default, Nets are limited to GMat argument types only - // for infer2, every GMat argument may translate to either - // GArray or GArray. GArray<> part is stripped - // already at this point. - static constexpr const auto value = - std::is_same::type, cv::GMat>::value - || std::is_same::type, cv::Rect>::value; +// Infer /////////////////////////////////////////////////////////////////////// +template +struct accepted_infer_types { + static constexpr const auto value = + std::is_same::type, cv::GMat>::value + || std::is_same::type, cv::GFrame>::value; +}; + +template +using valid_infer_types = all_satisfy; + +// Infer2 ////////////////////////////////////////////////////////////////////// + +template +struct valid_infer2_types; + +// Terminal case 1 (50/50 success) +template +struct valid_infer2_types< std::tuple, std::tuple > { + // By default, Nets are limited to GMat argument types only + // for infer2, every GMat argument may translate to either + // GArray or GArray. GArray<> part is stripped + // already at this point. + static constexpr const auto value = + std::is_same::type, cv::GMat>::value + || std::is_same::type, cv::Rect>::value; +}; + +// Terminal case 2 (100% failure) +template +struct valid_infer2_types< std::tuple<>, std::tuple > + : public std::false_type { +}; + +// Terminal case 3 (100% failure) +template +struct valid_infer2_types< std::tuple, std::tuple<> > + : public std::false_type { +}; + +// Recursion -- generic +template +struct valid_infer2_types< std::tuple, std::tuple > { + static constexpr const auto value = + valid_infer2_types< std::tuple, std::tuple >::value + && valid_infer2_types< std::tuple, std::tuple >::value; +}; + +// Struct stores network input/output names. +// Used by infer +struct InOutInfo +{ + std::vector in_names; + std::vector out_names; +}; + +template +class GInferOutputsTyped +{ +public: + GInferOutputsTyped() = default; + GInferOutputsTyped(std::shared_ptr call) + : m_priv(std::make_shared(std::move(call))) + { + } + + OutT at(const std::string& name) + { + auto it = m_priv->blobs.find(name); + if (it == m_priv->blobs.end()) { + // FIXME: Avoid modifying GKernel + auto shape = cv::detail::GTypeTraits::shape; + m_priv->call->kernel().outShapes.push_back(shape); + m_priv->call->kernel().outCtors.emplace_back(cv::detail::GObtainCtor::get()); + auto out_idx = static_cast(m_priv->blobs.size()); + it = m_priv->blobs.emplace(name, + cv::detail::Yield::yield(*(m_priv->call), out_idx)).first; + m_priv->info->out_names.push_back(name); + } + return it->second; + } +private: + struct Priv + { + Priv(std::shared_ptr c) + : call(std::move(c)), info(cv::util::any_cast(&call->params())) + { + } + + std::shared_ptr call; + InOutInfo* info = nullptr; + std::unordered_map blobs; }; - // Terminal case 2 (100% failure) - template - struct valid_infer2_types< std::tuple<>, std::tuple > - : public std::false_type { + std::shared_ptr m_priv; +}; + +template +class GInferInputsTyped +{ +public: + GInferInputsTyped() + : m_priv(std::make_shared()) + { + } + + template + void setInput(const std::string& name, U in) + { + m_priv->blobs.emplace(std::piecewise_construct, + std::forward_as_tuple(name), + std::forward_as_tuple(in)); + } + + using StorageT = cv::util::variant; + StorageT& operator[](const std::string& name) { + return m_priv->blobs[name]; + } + + using Map = std::unordered_map; + const Map& getBlobs() const { + return m_priv->blobs; + } + +private: + struct Priv + { + std::unordered_map blobs; }; - // Terminal case 3 (100% failure) - template - struct valid_infer2_types< std::tuple, std::tuple<> > - : public std::false_type { - }; + std::shared_ptr m_priv; +}; + +template +std::shared_ptr makeCall(const std::string &tag, + std::vector &&args, + std::vector &&names, + cv::GKinds &&kinds) { + auto call = std::make_shared(GKernel{ + InferT::id(), + tag, + InferT::getOutMeta, + {}, // outShape will be filled later + std::move(kinds), + {}, // outCtors will be filled later + }); + + call->setArgs(std::move(args)); + call->params() = cv::detail::InOutInfo{std::move(names), {}}; + + return call; +} - // Recursion -- generic - template - struct valid_infer2_types< std::tuple, std::tuple > { - static constexpr const auto value = - valid_infer2_types< std::tuple, std::tuple >::value - && valid_infer2_types< std::tuple, std::tuple >::value; - }; } // namespace detail // TODO: maybe tuple_wrap_helper from util.hpp may help with this. @@ -76,10 +197,6 @@ public: using API = std::function; using ResultL = std::tuple< cv::GArray... >; - using APIList = std::function, Args...)>; - - // FIXME: Args... must be limited to a single GMat - using APIRoi = std::function, Args...)>; }; // Single-return-value network definition (specialized base class) @@ -94,20 +211,48 @@ public: using API = std::function; using ResultL = cv::GArray; - using APIList = std::function, Args...)>; +}; - // FIXME: Args... must be limited to a single GMat - using APIRoi = std::function, Args...)>; +// InferAPI: Accepts either GMat or GFrame for very individual network's input +template +struct InferAPI { + using type = typename std::enable_if + < detail::valid_infer_types::value + && std::tuple_size::value == sizeof...(Ts) + , std::function + >::type; +}; + +// InferAPIRoi: Accepts a rectangle and either GMat or GFrame +template +struct InferAPIRoi { + using type = typename std::enable_if + < detail::valid_infer_types::value + && std::tuple_size::value == 1u + , std::function, T)> + >::type; +}; + +// InferAPIList: Accepts a list of rectangles and list of GMat/GFrames; +// crops every input. +template +struct InferAPIList { + using type = typename std::enable_if + < detail::valid_infer_types::value + && std::tuple_size::value == sizeof...(Ts) + , std::function, Ts...)> + >::type; }; // APIList2 is also template to allow different calling options // (GArray vs GArray per input) -template +template struct InferAPIList2 { using type = typename std::enable_if - < cv::detail::valid_infer2_types< typename Net::InArgs + < detail::valid_infer_types::value && + cv::detail::valid_infer2_types< typename Net::InArgs , std::tuple >::value, - std::function...)> + std::function...)> >::type; }; @@ -127,49 +272,6 @@ struct GInferBase { } }; -// Struct stores network input/output names. -// Used by infer -struct InOutInfo -{ - std::vector in_names; - std::vector out_names; -}; - -/** - * @{ - * @brief G-API object used to collect network inputs - */ -class GAPI_EXPORTS_W_SIMPLE GInferInputs -{ -using Map = std::unordered_map; -public: - GAPI_WRAP GInferInputs(); - GAPI_WRAP void setInput(const std::string& name, const cv::GMat& value); - - cv::GMat& operator[](const std::string& name); - const Map& getBlobs() const; - -private: - std::shared_ptr in_blobs; -}; -/** @} */ - -/** - * @{ - * @brief G-API object used to collect network outputs - */ -struct GAPI_EXPORTS_W_SIMPLE GInferOutputs -{ -public: - GAPI_WRAP GInferOutputs() = default; - GInferOutputs(std::shared_ptr call); - GAPI_WRAP cv::GMat at(const std::string& name); - -private: - struct Priv; - std::shared_ptr m_priv; -}; -/** @} */ // Base "InferROI" kernel. // All notes from "Infer" kernel apply here as well. struct GInferROIBase { @@ -206,11 +308,11 @@ struct GInferList2Base { // A generic inference kernel. API (::on()) is fully defined by the Net // template parameter. // Acts as a regular kernel in graph (via KernelTypeMedium). -template +template struct GInfer final : public GInferBase - , public detail::KernelTypeMedium< GInfer - , typename Net::API > { + , public detail::KernelTypeMedium< GInfer + , typename InferAPI::type > { using GInferBase::getOutMeta; // FIXME: name lookup conflict workaround? static constexpr const char* tag() { return Net::tag(); } @@ -218,11 +320,11 @@ struct GInfer final // A specific roi-inference kernel. API (::on()) is fixed here and // verified against Net. -template +template struct GInferROI final : public GInferROIBase - , public detail::KernelTypeMedium< GInferROI - , typename Net::APIRoi > { + , public detail::KernelTypeMedium< GInferROI + , typename InferAPIRoi::type > { using GInferROIBase::getOutMeta; // FIXME: name lookup conflict workaround? static constexpr const char* tag() { return Net::tag(); } @@ -231,11 +333,11 @@ struct GInferROI final // A generic roi-list inference kernel. API (::on()) is derived from // the Net template parameter (see more in infer<> overload). -template +template struct GInferList final : public GInferListBase - , public detail::KernelTypeMedium< GInferList - , typename Net::APIList > { + , public detail::KernelTypeMedium< GInferList + , typename InferAPIList::type > { using GInferListBase::getOutMeta; // FIXME: name lookup conflict workaround? static constexpr const char* tag() { return Net::tag(); } @@ -246,16 +348,100 @@ struct GInferList final // overload). // Takes an extra variadic template list to reflect how this network // was called (with Rects or GMats as array parameters) -template +template struct GInferList2 final : public GInferList2Base - , public detail::KernelTypeMedium< GInferList2 - , typename InferAPIList2::type > { + , public detail::KernelTypeMedium< GInferList2 + , typename InferAPIList2::type > { using GInferList2Base::getOutMeta; // FIXME: name lookup conflict workaround? static constexpr const char* tag() { return Net::tag(); } }; +/** + * @brief G-API object used to collect network inputs + */ +using GInferInputs = cv::detail::GInferInputsTyped; + +/** + * @brief G-API object used to collect the list of network inputs + */ +using GInferListInputs = cv::detail::GInferInputsTyped, cv::GArray>; + +/** + * @brief G-API object used to collect network outputs + */ +using GInferOutputs = cv::detail::GInferOutputsTyped; + +/** + * @brief G-API object used to collect the list of network outputs + */ +using GInferListOutputs = cv::detail::GInferOutputsTyped>; + +namespace detail { +void inline unpackBlobs(const cv::GInferInputs::Map& blobs, + std::vector& args, + std::vector& names, + cv::GKinds& kinds) +{ + for (auto&& p : blobs) { + names.emplace_back(p.first); + switch (p.second.index()) { + case cv::GInferInputs::StorageT::index_of(): + args.emplace_back(cv::util::get(p.second)); + kinds.emplace_back(cv::detail::OpaqueKind::CV_MAT); + break; + case cv::GInferInputs::StorageT::index_of(): + args.emplace_back(cv::util::get(p.second)); + kinds.emplace_back(cv::detail::OpaqueKind::CV_UNKNOWN); + break; + default: + GAPI_Assert(false); + } + } +} + +template +struct InferROITraits; + +template <> +struct InferROITraits +{ + using outType = cv::GInferOutputs; + using inType = cv::GOpaque; +}; + +template <> +struct InferROITraits +{ + using outType = cv::GInferListOutputs; + using inType = cv::GArray; +}; + +template +typename InferROITraits::outType +inferGenericROI(const std::string& tag, + const typename InferROITraits::inType& in, + const cv::GInferInputs& inputs) +{ + std::vector args; + std::vector names; + cv::GKinds kinds; + + args.emplace_back(in); + kinds.emplace_back(cv::detail::OpaqueKind::CV_RECT); + + unpackBlobs(inputs.getBlobs(), args, names, kinds); + + auto call = cv::detail::makeCall(tag, + std::move(args), + std::move(names), + std::move(kinds)); + + return {std::move(call)}; +} + +} // namespace detail } // namespace cv // FIXME: Probably the signature makes a function/tuple/function round-trip @@ -280,9 +466,9 @@ namespace gapi { * objects of appropriate type is returned. * @sa G_API_NET() */ -template -typename Net::Result infer(cv::GOpaque roi, cv::GMat in) { - return GInferROI::on(roi, in); +template +typename Net::Result infer(cv::GOpaque roi, T in) { + return GInferROI::on(roi, in); } /** @brief Calculates responses for the specified network (template @@ -300,7 +486,7 @@ typename Net::Result infer(cv::GOpaque roi, cv::GMat in) { */ template typename Net::ResultL infer(cv::GArray roi, Args&&... args) { - return GInferList::on(roi, std::forward(args)...); + return GInferList::on(roi, std::forward(args)...); } /** @brief Calculates responses for the specified network (template @@ -320,11 +506,12 @@ typename Net::ResultL infer(cv::GArray roi, Args&&... args) { * GArray<> objects is returned with the appropriate types inside. * @sa G_API_NET() */ -template -typename Net::ResultL infer2(cv::GMat image, cv::GArray... args) { + +template +typename Net::ResultL infer2(T image, cv::GArray... args) { // FIXME: Declared as "2" because in the current form it steals // overloads from the regular infer - return GInferList2::on(image, args...); + return GInferList2::on(image, args...); } /** @@ -340,7 +527,7 @@ typename Net::ResultL infer2(cv::GMat image, cv::GArray... args) { */ template typename Net::Result infer(Args&&... args) { - return GInfer::on(std::forward(args)...); + return GInfer::on(std::forward(args)...); } /** @@ -355,38 +542,98 @@ struct Generic { }; * @param inputs networks's inputs * @return a GInferOutputs */ -template GInferOutputs -infer(const std::string& tag, const GInferInputs& inputs) +template cv::GInferOutputs +infer(const std::string& tag, const cv::GInferInputs& inputs) { - std::vector input_args; - std::vector input_names; + std::vector args; + std::vector names; + cv::GKinds kinds; - const auto& blobs = inputs.getBlobs(); - for (auto&& p : blobs) - { - input_names.push_back(p.first); - input_args.emplace_back(p.second); - } + cv::detail::unpackBlobs(inputs.getBlobs(), args, names, kinds); - GKinds kinds(blobs.size(), cv::detail::OpaqueKind::CV_MAT); - auto call = std::make_shared(GKernel{ - GInferBase::id(), - tag, - GInferBase::getOutMeta, - {}, // outShape will be filled later - std::move(kinds), - {}, // outCtors will be filled later - }); + auto call = cv::detail::makeCall(tag, + std::move(args), + std::move(names), + std::move(kinds)); - call->setArgs(std::move(input_args)); - call->params() = InOutInfo{input_names, {}}; - - return GInferOutputs{std::move(call)}; + return cv::GInferOutputs{std::move(call)}; } -GAPI_EXPORTS_W inline GInferOutputs infer(const String& name, const GInferInputs& inputs) +/** @brief Calculates response for the generic network + * for the specified region in the source image. + * Currently expects a single-input network only. + * + * @param tag a network tag + * @param roi a an object describing the region of interest + * in the source image. May be calculated in the same graph dynamically. + * @param inputs networks's inputs + * @return a cv::GInferOutputs + */ +template cv::GInferOutputs +infer(const std::string& tag, const cv::GOpaque& roi, const cv::GInferInputs& inputs) { - return infer(name, inputs); + return cv::detail::inferGenericROI(tag, roi, inputs); +} + +/** @brief Calculates responses for the specified network + * for every region in the source image. + * + * @param tag a network tag + * @param rois a list of rectangles describing regions of interest + * in the source image. Usually an output of object detector or tracker. + * @param inputs networks's inputs + * @return a cv::GInferListOutputs + */ +template cv::GInferListOutputs +infer(const std::string& tag, const cv::GArray& rois, const cv::GInferInputs& inputs) +{ + return cv::detail::inferGenericROI(tag, rois, inputs); +} + +/** @brief Calculates responses for the specified network + * for every region in the source image, extended version. + * + * @param tag a network tag + * @param in a source image containing regions of interest. + * @param inputs networks's inputs + * @return a cv::GInferListOutputs + */ +template +typename std::enable_if::value, cv::GInferListOutputs>::type +infer2(const std::string& tag, + const Input& in, + const cv::GInferListInputs& inputs) +{ + std::vector args; + std::vector names; + cv::GKinds kinds; + + args.emplace_back(in); + auto k = cv::detail::GOpaqueTraits::kind; + kinds.emplace_back(k); + + for (auto&& p : inputs.getBlobs()) { + names.emplace_back(p.first); + switch (p.second.index()) { + case cv::GInferListInputs::StorageT::index_of>(): + args.emplace_back(cv::util::get>(p.second)); + kinds.emplace_back(cv::detail::OpaqueKind::CV_MAT); + break; + case cv::GInferListInputs::StorageT::index_of>(): + args.emplace_back(cv::util::get>(p.second)); + kinds.emplace_back(cv::detail::OpaqueKind::CV_RECT); + break; + default: + GAPI_Assert(false); + } + } + + auto call = cv::detail::makeCall(tag, + std::move(args), + std::move(names), + std::move(kinds)); + + return cv::GInferListOutputs{std::move(call)}; } } // namespace gapi @@ -418,8 +665,8 @@ struct GAPI_EXPORTS GNetParam { * @sa cv::gapi::networks */ struct GAPI_EXPORTS_W_SIMPLE GNetPackage { - GAPI_WRAP GNetPackage() : GNetPackage({}) {} - explicit GNetPackage(std::initializer_list &&ii); + GAPI_WRAP GNetPackage() = default; + explicit GNetPackage(std::initializer_list ii); std::vector backends() const; std::vector networks; }; diff --git a/modules/gapi/include/opencv2/gapi/infer/ie.hpp b/modules/gapi/include/opencv2/gapi/infer/ie.hpp index 53e31fbb09..60137c960c 100644 --- a/modules/gapi/include/opencv2/gapi/infer/ie.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/ie.hpp @@ -8,6 +8,7 @@ #define OPENCV_GAPI_INFER_IE_HPP #include +#include #include #include #include // tuple, tuple_size @@ -67,6 +68,12 @@ namespace detail { Kind kind; bool is_generic; IEConfig config; + + std::map> reshape_table; + std::unordered_set layer_names_to_reshape; + + // NB: Number of asyncrhonious infer requests + size_t nireq; }; } // namespace detail @@ -91,7 +98,10 @@ public: , std::tuple_size::value // num_out , detail::ParamDesc::Kind::Load , false - , {}} { + , {} + , {} + , {} + , 1u} { }; Params(const std::string &model, @@ -101,7 +111,10 @@ public: , std::tuple_size::value // num_out , detail::ParamDesc::Kind::Import , false - , {}} { + , {} + , {} + , {} + , 1u} { }; Params& cfgInputLayers(const typename PortCfg::In &ll) { @@ -137,6 +150,42 @@ public: return *this; } + Params& cfgNumRequests(size_t nireq) { + GAPI_Assert(nireq > 0 && "Number of infer requests must be greater than zero!"); + desc.nireq = nireq; + return *this; + } + + Params& cfgInputReshape(std::map>&& reshape_table) { + desc.reshape_table = std::move(reshape_table); + return *this; + } + + Params& cfgInputReshape(const std::map>& reshape_table) { + desc.reshape_table = reshape_table; + return *this; + } + + Params& cfgInputReshape(std::string&& layer_name, std::vector&& layer_dims) { + desc.reshape_table.emplace(layer_name, layer_dims); + return *this; + } + + Params& cfgInputReshape(const std::string& layer_name, const std::vector& layer_dims) { + desc.reshape_table.emplace(layer_name, layer_dims); + return *this; + } + + Params& cfgInputReshape(std::unordered_set&& layer_names) { + desc.layer_names_to_reshape = std::move(layer_names); + return *this; + } + + Params& cfgInputReshape(const std::unordered_set& layer_names) { + desc.layer_names_to_reshape = layer_names; + return *this; + } + // BEGIN(G-API's network parametrization API) GBackend backend() const { return cv::gapi::ie::backend(); } std::string tag() const { return Net::tag(); } @@ -154,13 +203,13 @@ public: const std::string &model, const std::string &weights, const std::string &device) - : desc{ model, weights, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Load, true, {}}, m_tag(tag) { + : desc{ model, weights, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Load, true, {}, {}, {}, 1u}, m_tag(tag) { }; Params(const std::string &tag, const std::string &model, const std::string &device) - : desc{ model, {}, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Import, true, {}}, m_tag(tag) { + : desc{ model, {}, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Import, true, {}, {}, {}, 1u}, m_tag(tag) { }; Params& pluginConfig(IEConfig&& cfg) { @@ -173,6 +222,19 @@ public: return *this; } + Params& constInput(const std::string &layer_name, + const cv::Mat &data, + TraitAs hint = TraitAs::TENSOR) { + desc.const_inputs[layer_name] = {data, hint}; + return *this; + } + + Params& cfgNumRequests(size_t nireq) { + GAPI_Assert(nireq > 0 && "Number of infer requests must be greater than zero!"); + desc.nireq = nireq; + return *this; + } + // BEGIN(G-API's network parametrization API) GBackend backend() const { return cv::gapi::ie::backend(); } std::string tag() const { return m_tag; } diff --git a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp index d61ceb3dca..3a4e35fb09 100644 --- a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp @@ -58,6 +58,8 @@ struct ParamDesc { PostProc custom_post_proc; std::vector normalize; + + std::vector names_to_remap; }; } // namespace detail @@ -86,7 +88,7 @@ public: }; // BEGIN(G-API's network parametrization API) - GBackend backend() const { return cv::gapi::onnx::backend(); } + GBackend backend() const { return cv::gapi::onnx::backend(); } std::string tag() const { return Net::tag(); } cv::util::any params() const { return { desc }; } // END(G-API's network parametrization API) @@ -115,13 +117,70 @@ public: return *this; } - Params& cfgPostProc(const std::vector &outs, + /** @brief Configures graph output and sets the post processing function from user. + + The function is used for the case of infer of networks with dynamic outputs. + Since these networks haven't known output parameters needs provide them for + construction of output of graph. + The function provides meta information of outputs and post processing function. + Post processing function is used for copy information from ONNX infer's result + to output of graph which is allocated by out meta information. + + @param out_metas out meta information. + @param pp post processing function, which has two parameters. First is onnx + result, second is graph output. Both parameters is std::map that contain pair of + layer's name and cv::Mat. + @return reference to object of class Params. + */ + Params& cfgPostProc(const std::vector &out_metas, const PostProc &pp) { - desc.out_metas = outs; + desc.out_metas = out_metas; desc.custom_post_proc = pp; return *this; } + /** @overload + The function has rvalue parameters. + */ + Params& cfgPostProc(std::vector &&out_metas, + PostProc &&pp) { + desc.out_metas = std::move(out_metas); + desc.custom_post_proc = std::move(pp); + return *this; + } + + /** @overload + The function has additional parameter names_to_remap. This parameter provides + information about output layers which will be used for infer and in post + processing function. + + @param out_metas out meta information. + @param pp post processing function. + @param names_to_remap contains names of output layers. CNN's infer will be done on these layers. + Infer's result will be processed in post processing function using these names. + @return reference to object of class Params. + */ + Params& cfgPostProc(const std::vector &out_metas, + const PostProc &pp, + const std::vector &names_to_remap) { + desc.out_metas = out_metas; + desc.custom_post_proc = pp; + desc.names_to_remap = names_to_remap; + return *this; + } + + /** @overload + The function has rvalue parameters. + */ + Params& cfgPostProc(std::vector &&out_metas, + PostProc &&pp, + std::vector &&names_to_remap) { + desc.out_metas = std::move(out_metas); + desc.custom_post_proc = std::move(pp); + desc.names_to_remap = std::move(names_to_remap); + return *this; + } + Params& cfgNormalize(const typename PortCfg::Normalize &n) { desc.normalize.assign(n.begin(), n.end()); return *this; diff --git a/modules/gapi/include/opencv2/gapi/infer/parsers.hpp b/modules/gapi/include/opencv2/gapi/infer/parsers.hpp index 15742c6e55..3225c73831 100644 --- a/modules/gapi/include/opencv2/gapi/infer/parsers.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/parsers.hpp @@ -85,11 +85,11 @@ the larger side of the rectangle. @param filterOutOfBounds If provided true, out-of-frame boxes are filtered. @return a vector of detected bounding boxes. */ -GAPI_EXPORTS GArray parseSSD(const GMat& in, - const GOpaque& inSz, - const float confidenceThreshold = 0.5f, - const bool alignmentToSquare = false, - const bool filterOutOfBounds = false); +GAPI_EXPORTS_W GArray parseSSD(const GMat& in, + const GOpaque& inSz, + const float confidenceThreshold = 0.5f, + const bool alignmentToSquare = false, + const bool filterOutOfBounds = false); /** @brief Parses output of Yolo network. @@ -108,8 +108,8 @@ detection is smaller than confidence threshold, detection is rejected. relative box intersection area required for rejecting the box with a smaller confidence. If 1.f, nms is not performed and no boxes are rejected. @param anchors Anchors Yolo network was trained with. -@note The default anchor values are taken from openvinotoolkit docs: -https://docs.openvinotoolkit.org/latest/omz_models_intel_yolo_v2_tiny_vehicle_detection_0001_description_yolo_v2_tiny_vehicle_detection_0001.html#output. +@note The default anchor values are specified for YOLO v2 Tiny as described in Intel Open Model Zoo +documentation. @return a tuple with a vector of detected boxes and a vector of appropriate labels. */ GAPI_EXPORTS std::tuple, GArray> parseYolo(const GMat& in, diff --git a/modules/gapi/include/opencv2/gapi/media.hpp b/modules/gapi/include/opencv2/gapi/media.hpp index f27cb80913..3d7f5a5b65 100644 --- a/modules/gapi/include/opencv2/gapi/media.hpp +++ b/modules/gapi/include/opencv2/gapi/media.hpp @@ -30,9 +30,21 @@ public: View access(Access) const; cv::GFrameDesc desc() const; + // Cast underlying MediaFrame adapter to the particular adapter type, + // return nullptr if underlying type is different + template T* get() const + { + static_assert(std::is_base_of::value, + "T is not derived from cv::MediaFrame::IAdapter!"); + auto* adapter = getAdapter(); + GAPI_Assert(adapter != nullptr); + return dynamic_cast(adapter); + } + private: struct Priv; std::shared_ptr m; + IAdapter* getAdapter() const; }; template diff --git a/modules/gapi/include/opencv2/gapi/own/assert.hpp b/modules/gapi/include/opencv2/gapi/own/assert.hpp index d0e0f1c3ff..d50543fdac 100644 --- a/modules/gapi/include/opencv2/gapi/own/assert.hpp +++ b/modules/gapi/include/opencv2/gapi/own/assert.hpp @@ -2,16 +2,28 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018 Intel Corporation +// Copyright (C) 2018-2020 Intel Corporation #ifndef OPENCV_GAPI_OWN_ASSERT_HPP #define OPENCV_GAPI_OWN_ASSERT_HPP +#include + +#define GAPI_DbgAssertNoOp(expr) { \ + constexpr bool _assert_tmp = false && (expr); \ + cv::util::suppress_unused_warning(_assert_tmp); \ +} + #if !defined(GAPI_STANDALONE) #include #define GAPI_Assert CV_Assert -#define GAPI_DbgAssert CV_DbgAssert + +#if defined _DEBUG || defined CV_STATIC_ANALYSIS +# define GAPI_DbgAssert CV_DbgAssert +#else +# define GAPI_DbgAssert(expr) GAPI_DbgAssertNoOp(expr) +#endif #else #include @@ -33,7 +45,7 @@ namespace detail #ifdef NDEBUG -# define GAPI_DbgAssert(expr) +# define GAPI_DbgAssert(expr) GAPI_DbgAssertNoOp(expr) #else # define GAPI_DbgAssert(expr) GAPI_Assert(expr) #endif diff --git a/modules/gapi/include/opencv2/gapi/own/exports.hpp b/modules/gapi/include/opencv2/gapi/own/exports.hpp index da42a3238c..1978991b75 100644 --- a/modules/gapi/include/opencv2/gapi/own/exports.hpp +++ b/modules/gapi/include/opencv2/gapi/own/exports.hpp @@ -12,10 +12,12 @@ # include # define GAPI_EXPORTS CV_EXPORTS /* special informative macros for wrapper generators */ +# define GAPI_PROP CV_PROP # define GAPI_WRAP CV_WRAP # define GAPI_EXPORTS_W_SIMPLE CV_EXPORTS_W_SIMPLE # define GAPI_EXPORTS_W CV_EXPORTS_W # else +# define GAPI_PROP # define GAPI_WRAP # define GAPI_EXPORTS # define GAPI_EXPORTS_W_SIMPLE diff --git a/modules/gapi/include/opencv2/gapi/python/python.hpp b/modules/gapi/include/opencv2/gapi/python/python.hpp new file mode 100644 index 0000000000..1c85d69d9f --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/python/python.hpp @@ -0,0 +1,58 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + + +#ifndef OPENCV_GAPI_PYTHON_API_HPP +#define OPENCV_GAPI_PYTHON_API_HPP + +#include // GKernelPackage +#include // GAPI_EXPORTS + +namespace cv { +namespace gapi { +namespace python { + +GAPI_EXPORTS cv::gapi::GBackend backend(); + +struct GPythonContext +{ + const cv::GArgs &ins; + const cv::GMetaArgs &in_metas; + const cv::GTypesInfo &out_info; +}; + +using Impl = std::function; + +class GAPI_EXPORTS GPythonKernel +{ +public: + GPythonKernel() = default; + GPythonKernel(Impl run); + + cv::GRunArgs operator()(const GPythonContext& ctx); +private: + Impl m_run; +}; + +class GAPI_EXPORTS GPythonFunctor : public cv::gapi::GFunctor +{ +public: + using Meta = cv::GKernel::M; + + GPythonFunctor(const char* id, const Meta &meta, const Impl& impl); + + GKernelImpl impl() const override; + gapi::GBackend backend() const override; + +private: + GKernelImpl impl_; +}; + +} // namespace python +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_PYTHON_API_HPP diff --git a/modules/gapi/include/opencv2/gapi/render/render.hpp b/modules/gapi/include/opencv2/gapi/render/render.hpp index a4df304289..a84c26c810 100644 --- a/modules/gapi/include/opencv2/gapi/render/render.hpp +++ b/modules/gapi/include/opencv2/gapi/render/render.hpp @@ -97,6 +97,17 @@ void GAPI_EXPORTS render(cv::Mat& y_plane, const Prims& prims, cv::GCompileArgs&& args = {}); +/** @brief The function renders on the input media frame passed drawing primitivies + +@param frame input Media Frame : @ref cv::MediaFrame. +@param prims vector of drawing primitivies +@param args graph compile time parameters +*/ +void GAPI_EXPORTS render(cv::MediaFrame& frame, + const Prims& prims, + cv::GCompileArgs&& args = {}); + + G_TYPED_KERNEL_M(GRenderNV12, )>, "org.opencv.render.nv12") { static GMatDesc2 outMeta(GMatDesc y_plane, GMatDesc uv_plane, GArrayDesc) @@ -113,6 +124,14 @@ G_TYPED_KERNEL(GRenderBGR, )>, "or } }; +G_TYPED_KERNEL(GRenderFrame, )>, "org.opencv.render.frame") +{ + static GFrameDesc outMeta(GFrameDesc desc, GArrayDesc) + { + return desc; + } +}; + /** @brief Renders on 3 channels input Output image must be 8-bit unsigned planar 3-channel image @@ -134,6 +153,17 @@ uv image must be 8-bit unsigned planar 2-channel image @ref CV_8UC2 GAPI_EXPORTS GMat2 renderNV12(const GMat& y, const GMat& uv, const GArray& prims); + +/** @brief Renders Media Frame + +Output media frame frame cv::MediaFrame + +@param m_frame input image: cv::MediaFrame @ref cv::MediaFrame +@param prims draw primitives +*/ +GAPI_EXPORTS GFrame renderFrame(const GFrame& m_frame, + const GArray& prims); + //! @} gapi_draw_api } // namespace draw diff --git a/modules/gapi/include/opencv2/gapi/s11n.hpp b/modules/gapi/include/opencv2/gapi/s11n.hpp index 0e2c4c239b..5a64410e5a 100644 --- a/modules/gapi/include/opencv2/gapi/s11n.hpp +++ b/modules/gapi/include/opencv2/gapi/s11n.hpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation #ifndef OPENCV_GAPI_S11N_HPP #define OPENCV_GAPI_S11N_HPP @@ -24,6 +24,8 @@ namespace detail { GAPI_EXPORTS cv::GRunArgs getRunArgs(const std::vector &p); + GAPI_EXPORTS std::vector getVectorOfStrings(const std::vector &p); + template cv::GCompileArgs getCompileArgs(const std::vector &p); @@ -42,6 +44,7 @@ T deserialize(const std::vector &p); GAPI_EXPORTS std::vector serialize(const cv::GCompileArgs&); GAPI_EXPORTS std::vector serialize(const cv::GMetaArgs&); GAPI_EXPORTS std::vector serialize(const cv::GRunArgs&); +GAPI_EXPORTS std::vector serialize(const std::vector&); template<> inline cv::GComputation deserialize(const std::vector &p) { @@ -58,6 +61,11 @@ cv::GRunArgs deserialize(const std::vector &p) { return detail::getRunArgs(p); } +template<> inline +std::vector deserialize(const std::vector &p) { + return detail::getVectorOfStrings(p); +} + template inline typename std::enable_if::value, GCompileArgs>:: type deserialize(const std::vector &p) { diff --git a/modules/gapi/include/opencv2/gapi/stereo.hpp b/modules/gapi/include/opencv2/gapi/stereo.hpp new file mode 100644 index 0000000000..908045d4c7 --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/stereo.hpp @@ -0,0 +1,64 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distereoibution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef OPENCV_GAPI_STEREO_HPP +#define OPENCV_GAPI_STEREO_HPP + +#include +#include +#include + +namespace cv { +namespace gapi { + +enum class StereoOutputFormat { + DEPTH_FLOAT16, + DEPTH_FLOAT32, + DISPARITY_FIXED16_11_5, + DISPARITY_FIXED16_12_4 +}; + +namespace calib3d { + +G_TYPED_KERNEL(GStereo, , "org.opencv.stereo") { + static GMatDesc outMeta(const GMatDesc &left, const GMatDesc &right, const StereoOutputFormat of) { + GAPI_Assert(left.chan == 1); + GAPI_Assert(left.depth == CV_8U); + + GAPI_Assert(right.chan == 1); + GAPI_Assert(right.depth == CV_8U); + + switch(of) { + case StereoOutputFormat::DEPTH_FLOAT16: + return left.withDepth(CV_16FC1); + case StereoOutputFormat::DEPTH_FLOAT32: + return left.withDepth(CV_32FC1); + case StereoOutputFormat::DISPARITY_FIXED16_11_5: + case StereoOutputFormat::DISPARITY_FIXED16_12_4: + return left.withDepth(CV_16SC1); + default: + GAPI_Assert(false && "Unknown output format!"); + } + } +}; + +} // namespace calib3d + +/** @brief Extract disparity/depth information depending on passed StereoOutputFormat argument. +The function extracts disparity/depth information depending on passed StereoOutputFormat argument from +given stereo-pair. + +@param left left 8-bit unsigned 1-channel image of @ref CV_8UC1 type +@param right right 8-bit unsigned 1-channel image of @ref CV_8UC1 type +@param of enum to specify output kind: depth or disparity and corresponding type +*/ +GAPI_EXPORTS GMat stereo(const GMat& left, + const GMat& right, + const StereoOutputFormat of = StereoOutputFormat::DEPTH_FLOAT32); +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_STEREO_HPP diff --git a/modules/gapi/include/opencv2/gapi/streaming/format.hpp b/modules/gapi/include/opencv2/gapi/streaming/format.hpp new file mode 100644 index 0000000000..c9d2fa3e0a --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/streaming/format.hpp @@ -0,0 +1,94 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2020 Intel Corporation + +#ifndef OPENCV_GAPI_GSTREAMING_FORMAT_HPP +#define OPENCV_GAPI_GSTREAMING_FORMAT_HPP + +#include // GKernelPackage + +namespace cv { +namespace gapi { +namespace streaming { + +GAPI_EXPORTS cv::gapi::GKernelPackage kernels(); + +G_API_OP(GBGR, , "org.opencv.streaming.BGR") +{ + static GMatDesc outMeta(const GFrameDesc& in) { return GMatDesc{CV_8U, 3, in.size}; } +}; + +G_API_OP(GY, , "org.opencv.streaming.Y") { + static GMatDesc outMeta(const GFrameDesc& frameDesc) { + return GMatDesc { CV_8U, 1, frameDesc.size , false }; + } +}; + +G_API_OP(GUV, , "org.opencv.streaming.UV") { + static GMatDesc outMeta(const GFrameDesc& frameDesc) { + return GMatDesc { CV_8U, 2, cv::Size(frameDesc.size.width / 2, frameDesc.size.height / 2), + false }; + } +}; + +/** @brief Gets bgr plane from input frame + +@note Function textual ID is "org.opencv.streaming.BGR" + +@param in Input frame +@return Image in BGR format +*/ +GAPI_EXPORTS cv::GMat BGR(const cv::GFrame& in); + +/** @brief Extracts Y plane from media frame. + +Output image is 8-bit 1-channel image of @ref CV_8UC1. + +@note Function textual ID is "org.opencv.streaming.Y" + +@param frame input media frame. +*/ +GAPI_EXPORTS GMat Y(const cv::GFrame& frame); + +/** @brief Extracts UV plane from media frame. + +Output image is 8-bit 2-channel image of @ref CV_8UC2. + +@note Function textual ID is "org.opencv.streaming.UV" + +@param frame input media frame. +*/ +GAPI_EXPORTS GMat UV(const cv::GFrame& frame); +} // namespace streaming + +//! @addtogroup gapi_transform +//! @{ +/** @brief Makes a copy of the input image. Note that this copy may be not real +(no actual data copied). Use this function to maintain graph contracts, +e.g when graph's input needs to be passed directly to output, like in Streaming mode. + +@note Function textual ID is "org.opencv.streaming.copy" + +@param in Input image +@return Copy of the input +*/ +GAPI_EXPORTS GMat copy(const GMat& in); + +/** @brief Makes a copy of the input frame. Note that this copy may be not real +(no actual data copied). Use this function to maintain graph contracts, +e.g when graph's input needs to be passed directly to output, like in Streaming mode. + +@note Function textual ID is "org.opencv.streaming.copy" + +@param in Input frame +@return Copy of the input +*/ +GAPI_EXPORTS GFrame copy(const GFrame& in); +//! @} gapi_transform + +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_GSTREAMING_FORMAT_HPP diff --git a/modules/gapi/include/opencv2/gapi/streaming/sync.hpp b/modules/gapi/include/opencv2/gapi/streaming/sync.hpp new file mode 100644 index 0000000000..5801e6f00a --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/streaming/sync.hpp @@ -0,0 +1,30 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef OPENCV_GAPI_STREAMING_SYNC_HPP +#define OPENCV_GAPI_STREAMING_SYNC_HPP + +namespace cv { +namespace gapi { +namespace streaming { + +enum class sync_policy { + dont_sync, + drop +}; + +} // namespace streaming +} // namespace gapi + +namespace detail { + template<> struct CompileArgTag { + static const char* tag() { return "gapi.streaming.sync_policy"; } + }; + +} // namespace detail +} // namespace cv + +#endif // OPENCV_GAPI_STREAMING_SYNC_HPP diff --git a/modules/gapi/include/opencv2/gapi/util/copy_through_move.hpp b/modules/gapi/include/opencv2/gapi/util/copy_through_move.hpp new file mode 100644 index 0000000000..1a1121eb21 --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/util/copy_through_move.hpp @@ -0,0 +1,34 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2020 Intel Corporation + +#ifndef OPENCV_GAPI_UTIL_COPY_THROUGH_MOVE_HPP +#define OPENCV_GAPI_UTIL_COPY_THROUGH_MOVE_HPP + +#include //decay_t + +namespace cv +{ +namespace util +{ + //This is a tool to move initialize captures of a lambda in C++11 + template + struct copy_through_move_t{ + T value; + const T& get() const {return value;} + T& get() {return value;} + copy_through_move_t(T&& g) : value(std::move(g)) {} + copy_through_move_t(copy_through_move_t&&) = default; + copy_through_move_t(copy_through_move_t const& lhs) : copy_through_move_t(std::move(const_cast(lhs))) {} + }; + + template + copy_through_move_t> copy_through_move(T&& t){ + return std::forward(t); + } +} // namespace util +} // namespace cv + +#endif /* OPENCV_GAPI_UTIL_COPY_THROUGH_MOVE_HPP */ diff --git a/modules/gapi/include/opencv2/gapi/util/optional.hpp b/modules/gapi/include/opencv2/gapi/util/optional.hpp index 1aa2b265d9..6c8ceebbda 100644 --- a/modules/gapi/include/opencv2/gapi/util/optional.hpp +++ b/modules/gapi/include/opencv2/gapi/util/optional.hpp @@ -35,9 +35,9 @@ namespace util // instead {} optional() {}; optional(const optional&) = default; - explicit optional(T &&value) noexcept; - explicit optional(const T &value) noexcept; - optional(optional &&) noexcept; + explicit optional(T&&) noexcept; + explicit optional(const T&) noexcept; + optional(optional&&) noexcept; // TODO: optional(nullopt_t) noexcept; // TODO: optional(const optional &) // TODO: optional(optional &&) @@ -46,8 +46,8 @@ namespace util // TODO: optional(U&& value); // Assignment - optional& operator=(const optional& rhs) = default; - optional& operator=(optional&& rhs); + optional& operator=(const optional&) = default; + optional& operator=(optional&&); // Observers T* operator-> (); @@ -84,7 +84,7 @@ namespace util // Implementation ////////////////////////////////////////////////////////// template optional::optional(T &&v) noexcept - : m_holder(v) + : m_holder(std::move(v)) { } diff --git a/modules/gapi/include/opencv2/gapi/video.hpp b/modules/gapi/include/opencv2/gapi/video.hpp index 7f90134e6d..10965b0aa6 100644 --- a/modules/gapi/include/opencv2/gapi/video.hpp +++ b/modules/gapi/include/opencv2/gapi/video.hpp @@ -16,6 +16,32 @@ */ namespace cv { namespace gapi { + +/** @brief Structure for the Kalman filter's initialization parameters.*/ + +struct GAPI_EXPORTS KalmanParams +{ + // initial state + + //! corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) + Mat state; + //! posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k) + Mat errorCov; + + // dynamic system description + + //! state transition matrix (A) + Mat transitionMatrix; + //! measurement matrix (H) + Mat measurementMatrix; + //! process noise covariance matrix (Q) + Mat processNoiseCov; + //! measurement noise covariance matrix (R) + Mat measurementNoiseCov; + //! control matrix (B) (Optional: not used if there's no control) + Mat controlMatrix; +}; + namespace video { using GBuildPyrOutput = std::tuple, GScalar>; @@ -62,6 +88,95 @@ G_TYPED_KERNEL(GCalcOptFlowLKForPyr, return std::make_tuple(empty_array_desc(), empty_array_desc(), empty_array_desc()); } }; + +enum BackgroundSubtractorType +{ + TYPE_BS_MOG2, + TYPE_BS_KNN +}; + +/** @brief Structure for the Background Subtractor operation's initialization parameters.*/ + +struct BackgroundSubtractorParams +{ + //! Type of the Background Subtractor operation. + BackgroundSubtractorType operation = TYPE_BS_MOG2; + + //! Length of the history. + int history = 500; + + //! For MOG2: Threshold on the squared Mahalanobis distance between the pixel + //! and the model to decide whether a pixel is well described by + //! the background model. + //! For KNN: Threshold on the squared distance between the pixel and the sample + //! to decide whether a pixel is close to that sample. + double threshold = 16; + + //! If true, the algorithm will detect shadows and mark them. + bool detectShadows = true; + + //! The value between 0 and 1 that indicates how fast + //! the background model is learnt. + //! Negative parameter value makes the algorithm use some automatically + //! chosen learning rate. + double learningRate = -1; + + //! default constructor + BackgroundSubtractorParams() {} + + /** Full constructor + @param op MOG2/KNN Background Subtractor type. + @param histLength Length of the history. + @param thrshld For MOG2: Threshold on the squared Mahalanobis distance between + the pixel and the model to decide whether a pixel is well described by the background model. + For KNN: Threshold on the squared distance between the pixel and the sample to decide + whether a pixel is close to that sample. + @param detect If true, the algorithm will detect shadows and mark them. It decreases the + speed a bit, so if you do not need this feature, set the parameter to false. + @param lRate The value between 0 and 1 that indicates how fast the background model is learnt. + Negative parameter value makes the algorithm to use some automatically chosen learning rate. + */ + BackgroundSubtractorParams(BackgroundSubtractorType op, int histLength, + double thrshld, bool detect, double lRate) : operation(op), + history(histLength), + threshold(thrshld), + detectShadows(detect), + learningRate(lRate){} +}; + +G_TYPED_KERNEL(GBackgroundSubtractor, , + "org.opencv.video.BackgroundSubtractor") +{ + static GMatDesc outMeta(const GMatDesc& in, const BackgroundSubtractorParams& bsParams) + { + GAPI_Assert(bsParams.history >= 0); + GAPI_Assert(bsParams.learningRate <= 1); + return in.withType(CV_8U, 1); + } +}; + +void checkParams(const cv::gapi::KalmanParams& kfParams, + const cv::GMatDesc& measurement, const cv::GMatDesc& control = {}); + +G_TYPED_KERNEL(GKalmanFilter, , GMat, KalmanParams)>, + "org.opencv.video.KalmanFilter") +{ + static GMatDesc outMeta(const GMatDesc& measurement, const GOpaqueDesc&, + const GMatDesc& control, const KalmanParams& kfParams) + { + checkParams(kfParams, measurement, control); + return measurement.withSize(Size(1, kfParams.transitionMatrix.rows)); + } +}; + +G_TYPED_KERNEL(GKalmanFilterNoControl, , KalmanParams)>, "org.opencv.video.KalmanFilterNoControl") +{ + static GMatDesc outMeta(const GMatDesc& measurement, const GOpaqueDesc&, const KalmanParams& kfParams) + { + checkParams(kfParams, measurement); + return measurement.withSize(Size(1, kfParams.transitionMatrix.rows)); + } +}; } //namespace video //! @addtogroup gapi_video @@ -83,8 +198,9 @@ G_TYPED_KERNEL(GCalcOptFlowLKForPyr, @param tryReuseInputImage put ROI of input image into the pyramid if possible. You can pass false to force data copying. -@return output pyramid. -@return number of levels in constructed pyramid. Can be less than maxLevel. +@return + - output pyramid. + - number of levels in constructed pyramid. Can be less than maxLevel. */ GAPI_EXPORTS std::tuple, GScalar> buildOpticalFlowPyramid(const GMat &img, @@ -131,11 +247,12 @@ by number of pixels in a window; if this value is less than minEigThreshold, the feature is filtered out and its flow is not processed, so it allows to remove bad points and get a performance boost. -@return GArray of 2D points (with single-precision floating-point coordinates) +@return + - GArray of 2D points (with single-precision floating-point coordinates) containing the calculated new positions of input features in the second image. -@return status GArray (of unsigned chars); each element of the vector is set to 1 if + - status GArray (of unsigned chars); each element of the vector is set to 1 if the flow for the corresponding features has been found, otherwise, it is set to 0. -@return GArray of errors (doubles); each element of the vector is set to an error for the + - GArray of errors (doubles); each element of the vector is set to an error for the corresponding feature, type of the error measure can be set in flags parameter; if the flow wasn't found then the error is not defined (use the status parameter to find such cases). */ @@ -169,8 +286,75 @@ calcOpticalFlowPyrLK(const GArray &prevPyr, int flags = 0, double minEigThresh = 1e-4); +/** @brief Gaussian Mixture-based or K-nearest neighbours-based Background/Foreground Segmentation Algorithm. +The operation generates a foreground mask. + +@return Output image is foreground mask, i.e. 8-bit unsigned 1-channel (binary) matrix @ref CV_8UC1. + +@note Functional textual ID is "org.opencv.video.BackgroundSubtractor" + +@param src input image: Floating point frame is used without scaling and should be in range [0,255]. +@param bsParams Set of initialization parameters for Background Subtractor kernel. +*/ +GAPI_EXPORTS GMat BackgroundSubtractor(const GMat& src, const cv::gapi::video::BackgroundSubtractorParams& bsParams); + +/** @brief Standard Kalman filter algorithm . + +@note Functional textual ID is "org.opencv.video.KalmanFilter" + +@param measurement input matrix: 32-bit or 64-bit float 1-channel matrix containing measurements. +@param haveMeasurement dynamic input flag that indicates whether we get measurements +at a particular iteration . +@param control input matrix: 32-bit or 64-bit float 1-channel matrix contains control data +for changing dynamic system. +@param kfParams Set of initialization parameters for Kalman filter kernel. + +@return Output matrix is predicted or corrected state. They can be 32-bit or 64-bit float +1-channel matrix @ref CV_32FC1 or @ref CV_64FC1. + +@details If measurement matrix is given (haveMeasurements == true), corrected state will +be returned which corresponds to the pipeline +cv::KalmanFilter::predict(control) -> cv::KalmanFilter::correct(measurement). +Otherwise, predicted state will be returned which corresponds to the call of +cv::KalmanFilter::predict(control). +@sa cv::KalmanFilter +*/ +GAPI_EXPORTS GMat KalmanFilter(const GMat& measurement, const GOpaque& haveMeasurement, + const GMat& control, const cv::gapi::KalmanParams& kfParams); + +/** @overload +The case of Standard Kalman filter algorithm when there is no control in a dynamic system. +In this case the controlMatrix is empty and control vector is absent. + +@note Function textual ID is "org.opencv.video.KalmanFilterNoControl" + +@param measurement input matrix: 32-bit or 64-bit float 1-channel matrix containing measurements. +@param haveMeasurement dynamic input flag that indicates whether we get measurements +at a particular iteration. +@param kfParams Set of initialization parameters for Kalman filter kernel. + +@return Output matrix is predicted or corrected state. They can be 32-bit or 64-bit float +1-channel matrix @ref CV_32FC1 or @ref CV_64FC1. + +@sa cv::KalmanFilter + */ +GAPI_EXPORTS GMat KalmanFilter(const GMat& measurement, const GOpaque& haveMeasurement, + const cv::gapi::KalmanParams& kfParams); + //! @} gapi_video } //namespace gapi } //namespace cv + +namespace cv { namespace detail { +template<> struct CompileArgTag +{ + static const char* tag() + { + return "org.opencv.video.background_substractor_params"; + } +}; +} // namespace detail +} // namespace cv + #endif // OPENCV_GAPI_VIDEO_HPP diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp index e25328e64f..56a7e70d88 100644 --- a/modules/gapi/misc/python/pyopencv_gapi.hpp +++ b/modules/gapi/misc/python/pyopencv_gapi.hpp @@ -3,67 +3,140 @@ #ifdef HAVE_OPENCV_GAPI +#ifdef _MSC_VER +#pragma warning(disable: 4503) // "decorated name length exceeded" + // on empty_meta(const cv::GMetaArgs&, const cv::GArgs&) +#endif + +#include +#include + // NB: Python wrapper replaces :: with _ for classes -using gapi_GKernelPackage = cv::gapi::GKernelPackage; -using gapi_GNetPackage = cv::gapi::GNetPackage; -using gapi_ie_PyParams = cv::gapi::ie::PyParams; +using gapi_GKernelPackage = cv::gapi::GKernelPackage; +using gapi_GNetPackage = cv::gapi::GNetPackage; +using gapi_ie_PyParams = cv::gapi::ie::PyParams; using gapi_wip_IStreamSource_Ptr = cv::Ptr; +using detail_ExtractArgsCallback = cv::detail::ExtractArgsCallback; +using detail_ExtractMetaCallback = cv::detail::ExtractMetaCallback; + +// NB: Python wrapper generate T_U for T +// This behavior is only observed for inputs +using GOpaque_bool = cv::GOpaque; +using GOpaque_int = cv::GOpaque; +using GOpaque_double = cv::GOpaque; +using GOpaque_float = cv::GOpaque; +using GOpaque_string = cv::GOpaque; +using GOpaque_Point2i = cv::GOpaque; +using GOpaque_Point2f = cv::GOpaque; +using GOpaque_Size = cv::GOpaque; +using GOpaque_Rect = cv::GOpaque; + +using GArray_bool = cv::GArray; +using GArray_int = cv::GArray; +using GArray_double = cv::GArray; +using GArray_float = cv::GArray; +using GArray_string = cv::GArray; +using GArray_Point2i = cv::GArray; +using GArray_Point2f = cv::GArray; +using GArray_Size = cv::GArray; +using GArray_Rect = cv::GArray; +using GArray_Scalar = cv::GArray; +using GArray_Mat = cv::GArray; +using GArray_GMat = cv::GArray; // FIXME: Python wrapper generate code without namespace std, // so it cause error: "string wasn't declared" // WA: Create using using std::string; -template<> +template <> bool pyopencv_to(PyObject* obj, std::vector& value, const ArgInfo& info) { return pyopencv_to_generic_vec(obj, value, info); } -template<> +template <> PyObject* pyopencv_from(const std::vector& value) { return pyopencv_from_generic_vec(value); } -template<> +template <> bool pyopencv_to(PyObject* obj, GRunArgs& value, const ArgInfo& info) { return pyopencv_to_generic_vec(obj, value, info); } -static PyObject* from_grunarg(const GRunArg& v) +template<> +PyObject* pyopencv_from(const cv::detail::OpaqueRef& o) +{ + switch (o.getKind()) + { + case cv::detail::OpaqueKind::CV_BOOL : return pyopencv_from(o.rref()); + case cv::detail::OpaqueKind::CV_INT : return pyopencv_from(o.rref()); + case cv::detail::OpaqueKind::CV_DOUBLE : return pyopencv_from(o.rref()); + case cv::detail::OpaqueKind::CV_FLOAT : return pyopencv_from(o.rref()); + case cv::detail::OpaqueKind::CV_STRING : return pyopencv_from(o.rref()); + case cv::detail::OpaqueKind::CV_POINT : return pyopencv_from(o.rref()); + case cv::detail::OpaqueKind::CV_POINT2F : return pyopencv_from(o.rref()); + case cv::detail::OpaqueKind::CV_SIZE : return pyopencv_from(o.rref()); + case cv::detail::OpaqueKind::CV_RECT : return pyopencv_from(o.rref()); + case cv::detail::OpaqueKind::CV_UNKNOWN : break; + case cv::detail::OpaqueKind::CV_UINT64 : break; + case cv::detail::OpaqueKind::CV_SCALAR : break; + case cv::detail::OpaqueKind::CV_MAT : break; + case cv::detail::OpaqueKind::CV_DRAW_PRIM : break; + } + + PyErr_SetString(PyExc_TypeError, "Unsupported GOpaque type"); + return NULL; +}; + +template <> +PyObject* pyopencv_from(const cv::detail::VectorRef& v) +{ + switch (v.getKind()) + { + case cv::detail::OpaqueKind::CV_BOOL : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_INT : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_DOUBLE : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_FLOAT : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_STRING : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_POINT : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_POINT2F : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_SIZE : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_RECT : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_SCALAR : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_MAT : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_UNKNOWN : break; + case cv::detail::OpaqueKind::CV_UINT64 : break; + case cv::detail::OpaqueKind::CV_DRAW_PRIM : break; + } + + PyErr_SetString(PyExc_TypeError, "Unsupported GArray type"); + return NULL; +} + +template <> +PyObject* pyopencv_from(const GRunArg& v) { switch (v.index()) { case GRunArg::index_of(): - { - const auto& m = util::get(v); - return pyopencv_from(m); - } + return pyopencv_from(util::get(v)); case GRunArg::index_of(): - { - const auto& s = util::get(v); - return pyopencv_from(s); - } + return pyopencv_from(util::get(v)); + case GRunArg::index_of(): - { - const auto& vref = util::get(v); - switch (vref.getKind()) - { - case cv::detail::OpaqueKind::CV_POINT2F: - return pyopencv_from(vref.rref()); - default: - PyErr_SetString(PyExc_TypeError, "Unsupported kind for GArray"); - return NULL; - } - } - default: - PyErr_SetString(PyExc_TypeError, "Failed to unpack GRunArgs"); - return NULL; + return pyopencv_from(util::get(v)); + + case GRunArg::index_of(): + return pyopencv_from(util::get(v)); } - GAPI_Assert(false); + + PyErr_SetString(PyExc_TypeError, "Failed to unpack GRunArgs"); + return NULL; } template<> @@ -74,7 +147,7 @@ PyObject* pyopencv_from(const GRunArgs& value) // NB: It doesn't make sense to return list with a single element if (n == 1) { - PyObject* item = from_grunarg(value[0]); + PyObject* item = pyopencv_from(value[0]); if(!item) { return NULL; @@ -85,7 +158,7 @@ PyObject* pyopencv_from(const GRunArgs& value) PyObject* list = PyList_New(n); for(i = 0; i < n; ++i) { - PyObject* item = from_grunarg(value[i]); + PyObject* item = pyopencv_from(value[i]); if(!item) { Py_DECREF(list); @@ -110,6 +183,26 @@ PyObject* pyopencv_from(const GMetaArgs& value) return pyopencv_from_generic_vec(value); } +template +void pyopencv_to_with_check(PyObject* from, T& to, const std::string& msg = "") +{ + if (!pyopencv_to(from, to, ArgInfo("", false))) + { + cv::util::throw_error(std::logic_error(msg)); + } +} + +template +void pyopencv_to_generic_vec_with_check(PyObject* from, + std::vector& to, + const std::string& msg = "") +{ + if (!pyopencv_to_generic_vec(from, to, ArgInfo("", false))) + { + cv::util::throw_error(std::logic_error(msg)); + } +} + template static PyObject* extract_proto_args(PyObject* py_args, PyObject* kw) { @@ -117,6 +210,7 @@ static PyObject* extract_proto_args(PyObject* py_args, PyObject* kw) GProtoArgs args; Py_ssize_t size = PyTuple_Size(py_args); + args.reserve(size); for (int i = 0; i < size; ++i) { PyObject* item = PyTuple_GetItem(py_args, i); @@ -128,9 +222,13 @@ static PyObject* extract_proto_args(PyObject* py_args, PyObject* kw) { args.emplace_back(reinterpret_cast(item)->v); } - else if (PyObject_TypeCheck(item, reinterpret_cast(pyopencv_GArrayP2f_TypePtr))) + else if (PyObject_TypeCheck(item, reinterpret_cast(pyopencv_GOpaqueT_TypePtr))) { - args.emplace_back(reinterpret_cast(item)->v.strip()); + args.emplace_back(reinterpret_cast(item)->v.strip()); + } + else if (PyObject_TypeCheck(item, reinterpret_cast(pyopencv_GArrayT_TypePtr))) + { + args.emplace_back(reinterpret_cast(item)->v.strip()); } else { @@ -152,63 +250,553 @@ static PyObject* pyopencv_cv_GOut(PyObject* , PyObject* py_args, PyObject* kw) return extract_proto_args(py_args, kw); } -static PyObject* pyopencv_cv_gin(PyObject* , PyObject* py_args, PyObject* kw) +static cv::detail::OpaqueRef extract_opaque_ref(PyObject* from, cv::detail::OpaqueKind kind) { - using namespace cv; - - GRunArgs args; - Py_ssize_t size = PyTuple_Size(py_args); - for (int i = 0; i < size; ++i) +#define HANDLE_CASE(T, O) case cv::detail::OpaqueKind::CV_##T: \ +{ \ + O obj{}; \ + pyopencv_to_with_check(from, obj, "Failed to obtain " # O); \ + return cv::detail::OpaqueRef{std::move(obj)}; \ +} +#define UNSUPPORTED(T) case cv::detail::OpaqueKind::CV_##T: break + switch (kind) { - PyObject* item = PyTuple_GetItem(py_args, i); - if (PyTuple_Check(item)) + HANDLE_CASE(BOOL, bool); + HANDLE_CASE(INT, int); + HANDLE_CASE(DOUBLE, double); + HANDLE_CASE(FLOAT, float); + HANDLE_CASE(STRING, std::string); + HANDLE_CASE(POINT, cv::Point); + HANDLE_CASE(POINT2F, cv::Point2f); + HANDLE_CASE(SIZE, cv::Size); + HANDLE_CASE(RECT, cv::Rect); + UNSUPPORTED(UNKNOWN); + UNSUPPORTED(UINT64); + UNSUPPORTED(SCALAR); + UNSUPPORTED(MAT); + UNSUPPORTED(DRAW_PRIM); +#undef HANDLE_CASE +#undef UNSUPPORTED + } + util::throw_error(std::logic_error("Unsupported type for GOpaqueT")); +} + +static cv::detail::VectorRef extract_vector_ref(PyObject* from, cv::detail::OpaqueKind kind) +{ +#define HANDLE_CASE(T, O) case cv::detail::OpaqueKind::CV_##T: \ +{ \ + std::vector obj; \ + pyopencv_to_generic_vec_with_check(from, obj, "Failed to obtain vector of " # O); \ + return cv::detail::VectorRef{std::move(obj)}; \ +} +#define UNSUPPORTED(T) case cv::detail::OpaqueKind::CV_##T: break + switch (kind) + { + HANDLE_CASE(BOOL, bool); + HANDLE_CASE(INT, int); + HANDLE_CASE(DOUBLE, double); + HANDLE_CASE(FLOAT, float); + HANDLE_CASE(STRING, std::string); + HANDLE_CASE(POINT, cv::Point); + HANDLE_CASE(POINT2F, cv::Point2f); + HANDLE_CASE(SIZE, cv::Size); + HANDLE_CASE(RECT, cv::Rect); + HANDLE_CASE(SCALAR, cv::Scalar); + HANDLE_CASE(MAT, cv::Mat); + UNSUPPORTED(UNKNOWN); + UNSUPPORTED(UINT64); + UNSUPPORTED(DRAW_PRIM); +#undef HANDLE_CASE +#undef UNSUPPORTED + } + util::throw_error(std::logic_error("Unsupported type for GArrayT")); +} + +static cv::GRunArg extract_run_arg(const cv::GTypeInfo& info, PyObject* item) +{ + switch (info.shape) + { + case cv::GShape::GMAT: { - cv::Scalar s; - if (pyopencv_to(item, s, ArgInfo("scalar", false))) + // NB: In case streaming it can be IStreamSource or cv::Mat + if (PyObject_TypeCheck(item, + reinterpret_cast(pyopencv_gapi_wip_IStreamSource_TypePtr))) { - args.emplace_back(s); - } - else - { - PyErr_SetString(PyExc_TypeError, "Failed convert tuple to cv::Scalar"); - return NULL; + cv::gapi::wip::IStreamSource::Ptr source = + reinterpret_cast(item)->v; + return source; } + cv::Mat obj; + pyopencv_to_with_check(item, obj, "Failed to obtain cv::Mat"); + return obj; } - else if (PyArray_Check(item)) + case cv::GShape::GSCALAR: { - cv::Mat m; - if (pyopencv_to(item, m, ArgInfo("mat", false))) - { - args.emplace_back(m); - } - else - { - PyErr_SetString(PyExc_TypeError, "Failed convert array to cv::Mat"); - return NULL; - } + cv::Scalar obj; + pyopencv_to_with_check(item, obj, "Failed to obtain cv::Scalar"); + return obj; } - else if (PyObject_TypeCheck(item, - reinterpret_cast(pyopencv_gapi_wip_IStreamSource_TypePtr))) + case cv::GShape::GOPAQUE: { - cv::gapi::wip::IStreamSource::Ptr source = - reinterpret_cast(item)->v; - args.emplace_back(source); + return extract_opaque_ref(item, info.kind); } - else + case cv::GShape::GARRAY: { - PyErr_SetString(PyExc_TypeError, "cv.gin can works only with cv::Mat," - "cv::Scalar, cv::gapi::wip::IStreamSource::Ptr"); - return NULL; + return extract_vector_ref(item, info.kind); + } + case cv::GShape::GFRAME: + { + // NB: Isn't supported yet. + break; } } - return pyopencv_from_generic_vec(args); + util::throw_error(std::logic_error("Unsupported output shape")); } -static PyObject* pyopencv_cv_gout(PyObject* o, PyObject* py_args, PyObject* kw) +static cv::GRunArgs extract_run_args(const cv::GTypesInfo& info, PyObject* py_args) { - return pyopencv_cv_gin(o, py_args, kw); + cv::GRunArgs args; + Py_ssize_t tuple_size = PyTuple_Size(py_args); + args.reserve(tuple_size); + + for (int i = 0; i < tuple_size; ++i) + { + args.push_back(extract_run_arg(info[i], PyTuple_GetItem(py_args, i))); + } + + return args; } +static cv::GMetaArg extract_meta_arg(const cv::GTypeInfo& info, PyObject* item) +{ + switch (info.shape) + { + case cv::GShape::GMAT: + { + cv::Mat obj; + pyopencv_to_with_check(item, obj, "Failed to obtain cv::Mat"); + return cv::GMetaArg{cv::descr_of(obj)}; + } + case cv::GShape::GSCALAR: + { + cv::Scalar obj; + pyopencv_to_with_check(item, obj, "Failed to obtain cv::Scalar"); + return cv::GMetaArg{cv::descr_of(obj)}; + } + case cv::GShape::GARRAY: + { + return cv::GMetaArg{cv::empty_array_desc()}; + } + case cv::GShape::GOPAQUE: + { + return cv::GMetaArg{cv::empty_gopaque_desc()}; + } + case cv::GShape::GFRAME: + { + // NB: Isn't supported yet. + break; + } + } + util::throw_error(std::logic_error("Unsupported output shape")); +} + +static cv::GMetaArgs extract_meta_args(const cv::GTypesInfo& info, PyObject* py_args) +{ + cv::GMetaArgs metas; + Py_ssize_t tuple_size = PyTuple_Size(py_args); + metas.reserve(tuple_size); + + for (int i = 0; i < tuple_size; ++i) + { + metas.push_back(extract_meta_arg(info[i], PyTuple_GetItem(py_args, i))); + } + + return metas; +} + +inline PyObject* extract_opaque_value(const cv::GArg& value) +{ + GAPI_Assert(value.kind != cv::detail::ArgKind::GOBJREF); +#define HANDLE_CASE(T, O) case cv::detail::OpaqueKind::CV_##T: \ + { \ + return pyopencv_from(value.get()); \ + } + +#define UNSUPPORTED(T) case cv::detail::OpaqueKind::CV_##T: break + switch (value.opaque_kind) + { + HANDLE_CASE(BOOL, bool); + HANDLE_CASE(INT, int); + HANDLE_CASE(DOUBLE, double); + HANDLE_CASE(FLOAT, float); + HANDLE_CASE(STRING, std::string); + HANDLE_CASE(POINT, cv::Point); + HANDLE_CASE(POINT2F, cv::Point2f); + HANDLE_CASE(SIZE, cv::Size); + HANDLE_CASE(RECT, cv::Rect); + HANDLE_CASE(SCALAR, cv::Scalar); + HANDLE_CASE(MAT, cv::Mat); + UNSUPPORTED(UNKNOWN); + UNSUPPORTED(UINT64); + UNSUPPORTED(DRAW_PRIM); +#undef HANDLE_CASE +#undef UNSUPPORTED + } + util::throw_error(std::logic_error("Unsupported kernel input type")); +} + +static cv::GRunArgs run_py_kernel(PyObject* kernel, + const cv::gapi::python::GPythonContext &ctx) +{ + const auto& ins = ctx.ins; + const auto& in_metas = ctx.in_metas; + const auto& out_info = ctx.out_info; + + PyGILState_STATE gstate; + gstate = PyGILState_Ensure(); + + cv::GRunArgs outs; + try + { + int in_idx = 0; + PyObject* args = PyTuple_New(ins.size()); + for (size_t i = 0; i < ins.size(); ++i) + { + // NB: If meta is monostate then object isn't associated with G-TYPE, so in case it + // kind matches with supported types do conversion from c++ to python, if not (CV_UNKNOWN) + // obtain PyObject* and pass as-is. + if (cv::util::holds_alternative(in_metas[i])) + { + PyTuple_SetItem(args, i, + ins[i].opaque_kind != cv::detail::OpaqueKind::CV_UNKNOWN ? extract_opaque_value(ins[i]) + : ins[i].get()); + continue; + } + + switch (in_metas[i].index()) + { + case cv::GMetaArg::index_of(): + PyTuple_SetItem(args, i, pyopencv_from(ins[i].get())); + break; + case cv::GMetaArg::index_of(): + PyTuple_SetItem(args, i, pyopencv_from(ins[i].get())); + break; + case cv::GMetaArg::index_of(): + PyTuple_SetItem(args, i, pyopencv_from(ins[i].get())); + break; + case cv::GMetaArg::index_of(): + PyTuple_SetItem(args, i, pyopencv_from(ins[i].get())); + break; + case cv::GMetaArg::index_of(): + util::throw_error(std::logic_error("GFrame isn't supported for custom operation")); + break; + } + ++in_idx; + } + + PyObject* result = PyObject_CallObject(kernel, args); + + outs = out_info.size() == 1 ? cv::GRunArgs{extract_run_arg(out_info[0], result)} + : extract_run_args(out_info, result); + } + catch (...) + { + PyGILState_Release(gstate); + throw; + } + PyGILState_Release(gstate); + + return outs; +} + +// FIXME: Now it's impossible to obtain meta function from operation, +// because kernel connects to operation only by id (string). +static cv::GMetaArgs empty_meta(const cv::GMetaArgs &, const cv::GArgs &) { + return {}; +} + +static GMetaArg get_meta_arg(PyObject* obj) +{ + if (PyObject_TypeCheck(obj, + reinterpret_cast(pyopencv_GMatDesc_TypePtr))) + { + return cv::GMetaArg{reinterpret_cast(obj)->v}; + } + else if (PyObject_TypeCheck(obj, + reinterpret_cast(pyopencv_GScalarDesc_TypePtr))) + { + return cv::GMetaArg{reinterpret_cast(obj)->v}; + } + else if (PyObject_TypeCheck(obj, + reinterpret_cast(pyopencv_GArrayDesc_TypePtr))) + { + return cv::GMetaArg{reinterpret_cast(obj)->v}; + } + else if (PyObject_TypeCheck(obj, + reinterpret_cast(pyopencv_GOpaqueDesc_TypePtr))) + { + return cv::GMetaArg{reinterpret_cast(obj)->v}; + } + else + { + util::throw_error(std::logic_error("Unsupported output meta type")); + } +} + +static cv::GMetaArgs get_meta_args(PyObject* tuple) +{ + size_t size = PyTuple_Size(tuple); + + cv::GMetaArgs metas; + metas.reserve(size); + for (size_t i = 0; i < size; ++i) + { + metas.push_back(get_meta_arg(PyTuple_GetItem(tuple, i))); + } + + return metas; +} + +static GMetaArgs python_meta(PyObject* outMeta, const cv::GMetaArgs &meta, const cv::GArgs &gargs) { + PyGILState_STATE gstate; + gstate = PyGILState_Ensure(); + + cv::GMetaArgs out_metas; + try + { + PyObject* args = PyTuple_New(meta.size()); + size_t idx = 0; + for (auto&& m : meta) + { + switch (m.index()) + { + case cv::GMetaArg::index_of(): + PyTuple_SetItem(args, idx, pyopencv_from(cv::util::get(m))); + break; + case cv::GMetaArg::index_of(): + PyTuple_SetItem(args, idx, pyopencv_from(cv::util::get(m))); + break; + case cv::GMetaArg::index_of(): + PyTuple_SetItem(args, idx, pyopencv_from(cv::util::get(m))); + break; + case cv::GMetaArg::index_of(): + PyTuple_SetItem(args, idx, pyopencv_from(cv::util::get(m))); + break; + case cv::GMetaArg::index_of(): + PyTuple_SetItem(args, idx, gargs[idx].get()); + break; + case cv::GMetaArg::index_of(): + util::throw_error(std::logic_error("GFrame isn't supported for custom operation")); + break; + } + ++idx; + } + PyObject* result = PyObject_CallObject(outMeta, args); + out_metas = PyTuple_Check(result) ? get_meta_args(result) + : cv::GMetaArgs{get_meta_arg(result)}; + } + catch (...) + { + PyGILState_Release(gstate); + throw; + } + PyGILState_Release(gstate); + + return out_metas; +} + +static PyObject* pyopencv_cv_gapi_kernels(PyObject* , PyObject* py_args, PyObject*) +{ + using namespace cv; + gapi::GKernelPackage pkg; + Py_ssize_t size = PyTuple_Size(py_args); + for (int i = 0; i < size; ++i) + { + PyObject* pair = PyTuple_GetItem(py_args, i); + PyObject* kernel = PyTuple_GetItem(pair, 0); + + std::string id; + if (!pyopencv_to(PyTuple_GetItem(pair, 1), id, ArgInfo("id", false))) + { + PyErr_SetString(PyExc_TypeError, "Failed to obtain: kernel id must be a string"); + return NULL; + } + Py_INCREF(kernel); + gapi::python::GPythonFunctor f(id.c_str(), + empty_meta, + std::bind(run_py_kernel, + kernel, + std::placeholders::_1)); + pkg.include(f); + } + return pyopencv_from(pkg); +} + +static PyObject* pyopencv_cv_gapi_op(PyObject* , PyObject* py_args, PyObject*) +{ + using namespace cv; + Py_ssize_t size = PyTuple_Size(py_args); + std::string id; + if (!pyopencv_to(PyTuple_GetItem(py_args, 0), id, ArgInfo("id", false))) + { + PyErr_SetString(PyExc_TypeError, "Failed to obtain: operation id must be a string"); + return NULL; + } + PyObject* outMeta = PyTuple_GetItem(py_args, 1); + Py_INCREF(outMeta); + + cv::GArgs args; + for (int i = 2; i < size; i++) + { + PyObject* item = PyTuple_GetItem(py_args, i); + if (PyObject_TypeCheck(item, + reinterpret_cast(pyopencv_GMat_TypePtr))) + { + args.emplace_back(reinterpret_cast(item)->v); + } + else if (PyObject_TypeCheck(item, + reinterpret_cast(pyopencv_GScalar_TypePtr))) + { + args.emplace_back(reinterpret_cast(item)->v); + } + else if (PyObject_TypeCheck(item, + reinterpret_cast(pyopencv_GOpaqueT_TypePtr))) + { + auto&& arg = reinterpret_cast(item)->v.arg(); +#define HC(T, K) case cv::GOpaqueT::Storage:: index_of>(): \ + args.emplace_back(cv::util::get>(arg)); \ + break; \ + + SWITCH(arg.index(), GOPAQUE_TYPE_LIST_G, HC) +#undef HC + } + else if (PyObject_TypeCheck(item, + reinterpret_cast(pyopencv_GArrayT_TypePtr))) + { + auto&& arg = reinterpret_cast(item)->v.arg(); +#define HC(T, K) case cv::GArrayT::Storage:: index_of>(): \ + args.emplace_back(cv::util::get>(arg)); \ + break; \ + + SWITCH(arg.index(), GARRAY_TYPE_LIST_G, HC) +#undef HC + } + else + { + Py_INCREF(item); + args.emplace_back(cv::GArg(item)); + } + } + + cv::GKernel::M outMetaWrapper = std::bind(python_meta, + outMeta, + std::placeholders::_1, + std::placeholders::_2); + return pyopencv_from(cv::gapi::wip::op(id, outMetaWrapper, std::move(args))); +} + +static PyObject* pyopencv_cv_gin(PyObject*, PyObject* py_args, PyObject*) +{ + Py_INCREF(py_args); + auto callback = cv::detail::ExtractArgsCallback{[=](const cv::GTypesInfo& info) + { + PyGILState_STATE gstate; + gstate = PyGILState_Ensure(); + + cv::GRunArgs args; + try + { + args = extract_run_args(info, py_args); + } + catch (...) + { + PyGILState_Release(gstate); + throw; + } + PyGILState_Release(gstate); + return args; + }}; + + return pyopencv_from(callback); +} + +static PyObject* pyopencv_cv_descr_of(PyObject*, PyObject* py_args, PyObject*) +{ + Py_INCREF(py_args); + auto callback = cv::detail::ExtractMetaCallback{[=](const cv::GTypesInfo& info) + { + PyGILState_STATE gstate; + gstate = PyGILState_Ensure(); + + cv::GMetaArgs args; + try + { + args = extract_meta_args(info, py_args); + } + catch (...) + { + PyGILState_Release(gstate); + throw; + } + PyGILState_Release(gstate); + return args; + }}; + return pyopencv_from(callback); +} + +template +struct PyOpenCV_Converter> +{ + static PyObject* from(const cv::GArray& p) + { + return pyopencv_from(cv::GArrayT(p)); + } + static bool to(PyObject *obj, cv::GArray& value, const ArgInfo& info) + { + if (PyObject_TypeCheck(obj, reinterpret_cast(pyopencv_GArrayT_TypePtr))) + { + auto& array = reinterpret_cast(obj)->v; + try { + value = cv::util::get>(array.arg()); + } catch (...) { + return false; + } + return true; + } + return false; + } +}; + +template +struct PyOpenCV_Converter> +{ + static PyObject* from(const cv::GOpaque& p) + { + return pyopencv_from(cv::GOpaqueT(p)); + } + static bool to(PyObject *obj, cv::GOpaque& value, const ArgInfo& info) + { + if (PyObject_TypeCheck(obj, reinterpret_cast(pyopencv_GOpaqueT_TypePtr))) + { + auto& opaque = reinterpret_cast(obj)->v; + try { + value = cv::util::get>(opaque.arg()); + } catch (...) { + return false; + } + return true; + } + return false; + } +}; + + +// extend cv.gapi.wip. methods +#define PYOPENCV_EXTRA_METHODS_GAPI_WIP \ + {"kernels", CV_PY_FN_WITH_KW(pyopencv_cv_gapi_kernels), "kernels(...) -> GKernelPackage"}, \ + {"op", CV_PY_FN_WITH_KW_(pyopencv_cv_gapi_op, 0), "kernels(...) -> retval\n"}, \ + + #endif // HAVE_OPENCV_GAPI #endif // OPENCV_GAPI_PYOPENCV_GAPI_HPP diff --git a/modules/gapi/misc/python/python_bridge.hpp b/modules/gapi/misc/python/python_bridge.hpp new file mode 100644 index 0000000000..51f0ca8ab0 --- /dev/null +++ b/modules/gapi/misc/python/python_bridge.hpp @@ -0,0 +1,327 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef OPENCV_GAPI_PYTHON_BRIDGE_HPP +#define OPENCV_GAPI_PYTHON_BRIDGE_HPP + +#include +#include +#include + +#define ID(T, E) T +#define ID_(T, E) ID(T, E), + +#define WRAP_ARGS(T, E, G) \ + G(T, E) + +#define SWITCH(type, LIST_G, HC) \ + switch(type) { \ + LIST_G(HC, HC) \ + default: \ + GAPI_Assert(false && "Unsupported type"); \ + } + +#define GARRAY_TYPE_LIST_G(G, G2) \ +WRAP_ARGS(bool , cv::gapi::ArgType::CV_BOOL, G) \ +WRAP_ARGS(int , cv::gapi::ArgType::CV_INT, G) \ +WRAP_ARGS(double , cv::gapi::ArgType::CV_DOUBLE, G) \ +WRAP_ARGS(float , cv::gapi::ArgType::CV_FLOAT, G) \ +WRAP_ARGS(std::string , cv::gapi::ArgType::CV_STRING, G) \ +WRAP_ARGS(cv::Point , cv::gapi::ArgType::CV_POINT, G) \ +WRAP_ARGS(cv::Point2f , cv::gapi::ArgType::CV_POINT2F, G) \ +WRAP_ARGS(cv::Size , cv::gapi::ArgType::CV_SIZE, G) \ +WRAP_ARGS(cv::Rect , cv::gapi::ArgType::CV_RECT, G) \ +WRAP_ARGS(cv::Scalar , cv::gapi::ArgType::CV_SCALAR, G) \ +WRAP_ARGS(cv::Mat , cv::gapi::ArgType::CV_MAT, G) \ +WRAP_ARGS(cv::GMat , cv::gapi::ArgType::CV_GMAT, G2) + +#define GOPAQUE_TYPE_LIST_G(G, G2) \ +WRAP_ARGS(bool , cv::gapi::ArgType::CV_BOOL, G) \ +WRAP_ARGS(int , cv::gapi::ArgType::CV_INT, G) \ +WRAP_ARGS(double , cv::gapi::ArgType::CV_DOUBLE, G) \ +WRAP_ARGS(float , cv::gapi::ArgType::CV_FLOAT, G) \ +WRAP_ARGS(std::string , cv::gapi::ArgType::CV_STRING, G) \ +WRAP_ARGS(cv::Point , cv::gapi::ArgType::CV_POINT, G) \ +WRAP_ARGS(cv::Point2f , cv::gapi::ArgType::CV_POINT2F, G) \ +WRAP_ARGS(cv::Size , cv::gapi::ArgType::CV_SIZE, G) \ +WRAP_ARGS(cv::Rect , cv::gapi::ArgType::CV_RECT, G2) \ + +namespace cv { +namespace gapi { + +// NB: cv.gapi.CV_BOOL in python +enum ArgType { + CV_BOOL, + CV_INT, + CV_DOUBLE, + CV_FLOAT, + CV_STRING, + CV_POINT, + CV_POINT2F, + CV_SIZE, + CV_RECT, + CV_SCALAR, + CV_MAT, + CV_GMAT, +}; + +GAPI_EXPORTS_W inline cv::GInferOutputs infer(const String& name, const cv::GInferInputs& inputs) +{ + return infer(name, inputs); +} + +GAPI_EXPORTS_W inline GInferOutputs infer(const std::string& name, + const cv::GOpaque& roi, + const GInferInputs& inputs) +{ + return infer(name, roi, inputs); +} + +GAPI_EXPORTS_W inline GInferListOutputs infer(const std::string& name, + const cv::GArray& rois, + const GInferInputs& inputs) +{ + return infer(name, rois, inputs); +} + +GAPI_EXPORTS_W inline GInferListOutputs infer2(const std::string& name, + const cv::GMat in, + const GInferListInputs& inputs) +{ + return infer2(name, in, inputs); +} + +} // namespace gapi + +namespace detail { + +template