diff --git a/cmake/OpenCVCompilerDefenses.cmake b/cmake/OpenCVCompilerDefenses.cmake index c4563ff0c1..62029ea38b 100644 --- a/cmake/OpenCVCompilerDefenses.cmake +++ b/cmake/OpenCVCompilerDefenses.cmake @@ -5,7 +5,7 @@ # - https://wiki.debian.org/Hardening # - https://wiki.gentoo.org/wiki/Hardened/Toolchain # - https://docs.microsoft.com/en-us/cpp/build/reference/sdl-enable-additional-security-checks - +# - https://developer.apple.com/library/archive/documentation/Security/Conceptual/SecureCodingGuide/Articles/BufferOverflows.html set(OPENCV_LINKER_DEFENSES_FLAGS_COMMON "") @@ -44,6 +44,12 @@ if(MSVC) if(NOT X86_64) set(OPENCV_LINKER_DEFENSES_FLAGS_COMMON "${OPENCV_LINKER_DEFENSES_FLAGS_COMMON} /safeseh") endif() +elseif(CV_CLANG) + ocv_add_defense_compiler_flag("-fstack-protector-strong") + ocv_add_defense_compiler_flag_release("-D_FORTIFY_SOURCE=2") + if (NOT APPLE) + set(OPENCV_LINKER_DEFENSES_FLAGS_COMMON "${OPENCV_LINKER_DEFENSES_FLAGS_COMMON} -z noexecstack -z relro -z now" ) + endif() elseif(CV_GCC) if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9") ocv_add_defense_compiler_flag("-fstack-protector") diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index f82159ea3a..8247a9a53c 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -4,6 +4,15 @@ # SSE4_1 / SSE4_2 / POPCNT # AVX / AVX2 / AVX_512F # FMA3 +# +# CPU features groups: +# AVX512_COMMON (Common instructions AVX-512F/CD for all CPUs that support AVX-512) +# AVX512_KNL (Knights Landing with AVX-512F/CD/ER/PF) +# AVX512_KNM (Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ) +# AVX512_SKX (Skylake-X with AVX-512F/CD/BW/DQ/VL) +# AVX512_CNL (Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI) +# AVX512_CEL (Cascade Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI) +# AVX512_ICL (Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ/VPCLMULQDQ*/GFNI*/VAES*) # ppc64le arch: # VSX (always available on Power8) @@ -33,7 +42,8 @@ # # CPU_{opt}_ENABLED_DEFAULT=ON/OFF - has compiler support without additional flag (CPU_BASELINE_DETECT=ON only) -set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F;AVX512_SKX") +set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F") +list(APPEND CPU_ALL_OPTIMIZATIONS "AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CEL;AVX512_ICL") list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16) list(APPEND CPU_ALL_OPTIMIZATIONS VSX VSX3) list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS) @@ -152,9 +162,15 @@ elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ") endif() if(X86 OR X86_64) - ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX_512F;AVX512_SKX") + ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX_512F;AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CEL;AVX512_ICL") - ocv_update(CPU_AVX512_SKX_GROUP "AVX_512F;AVX_512CD;AVX_512BW;AVX_512DQ;AVX_512VL") + ocv_update(CPU_AVX512_COMMON_GROUP "AVX_512F;AVX_512CD") + ocv_update(CPU_AVX512_KNL_GROUP "AVX512_COMMON;AVX512_KNL_EXTRA") + ocv_update(CPU_AVX512_KNM_GROUP "AVX512_KNL;AVX512_KNM_EXTRA;AVX_512VPOPCNTDQ") + ocv_update(CPU_AVX512_SKX_GROUP "AVX512_COMMON;AVX_512VL;AVX_512BW;AVX_512DQ") + ocv_update(CPU_AVX512_CNL_GROUP "AVX512_SKX;AVX_512IFMA;AVX_512VBMI") + ocv_update(CPU_AVX512_CEL_GROUP "AVX512_CNL;AVX_512VNNI") + ocv_update(CPU_AVX512_ICL_GROUP "AVX512_CEL;AVX_512VBMI2;AVX_512BITALG;AVX_512VPOPCNTDQ") # ? VPCLMULQDQ, GFNI, VAES ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp") ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp") @@ -167,9 +183,22 @@ if(X86 OR X86_64) ocv_update(CPU_AVX2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx2.cpp") ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp") ocv_update(CPU_AVX_512F_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512.cpp") + ocv_update(CPU_AVX512_COMMON_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512common.cpp") + ocv_update(CPU_AVX512_KNL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512knl.cpp") + ocv_update(CPU_AVX512_KNM_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512knm.cpp") ocv_update(CPU_AVX512_SKX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512skx.cpp") + ocv_update(CPU_AVX512_CNL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512cnl.cpp") + ocv_update(CPU_AVX512_CEL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512cel.cpp") + ocv_update(CPU_AVX512_ICL_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512icl.cpp") if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE) + ocv_update(CPU_AVX512_ICL_IMPLIES "AVX512_CEL") + ocv_update(CPU_AVX512_CEL_IMPLIES "AVX512_CNL") + ocv_update(CPU_AVX512_CNL_IMPLIES "AVX512_SKX") + ocv_update(CPU_AVX512_SKX_IMPLIES "AVX512_COMMON") + ocv_update(CPU_AVX512_KNM_IMPLIES "AVX512_KNL") + ocv_update(CPU_AVX512_KNL_IMPLIES "AVX512_COMMON") + ocv_update(CPU_AVX512_COMMON_IMPLIES "AVX_512F") ocv_update(CPU_AVX_512F_IMPLIES "AVX2") ocv_update(CPU_AVX_512F_FORCE "") # Don't force other optimizations ocv_update(CPU_AVX2_IMPLIES "AVX;FMA3;FP16") @@ -192,10 +221,10 @@ if(X86 OR X86_64) ocv_update(CPU_${name}_FLAGS_NAME "${name}") if(MSVC) set(enable_flags "${msvc_flags}") - set(flags_conflict "/arch:[^ ]+") + set(flags_conflict "/arch:[^ ]*|/Qx:[^ ]+") else() set(enable_flags "${unix_flags}") - set(flags_conflict "-msse[^ ]*|-mssse3|-mavx[^ ]*|-march[^ ]+") + set(flags_conflict "-msse[^ ]*|-mssse3|-mavx[^ ]*|-march[^ ]*|-x[^ ]+") endif() ocv_update(CPU_${name}_FLAGS_ON "${enable_flags}") if(flags_conflict) @@ -215,8 +244,14 @@ if(X86 OR X86_64) if(NOT X86_64) # x64 compiler doesn't support /arch:sse ocv_intel_compiler_optimization_option(SSE "-msse" "/arch:SSE") endif() - ocv_intel_compiler_optimization_option(AVX_512F "-march=common-avx512" "/arch:COMMON-AVX512") - ocv_intel_compiler_optimization_option(AVX512_SKX "-march=core-avx512" "/arch:CORE-AVX512") + ocv_intel_compiler_optimization_option(AVX_512F "-xCOMMON-AVX512" "/Qx:COMMON-AVX512") + ocv_intel_compiler_optimization_option(AVX512_COMMON "-xCOMMON-AVX512" "/Qx:COMMON-AVX512") + ocv_intel_compiler_optimization_option(AVX512_KNL "-xKNL" "/Qx:KNL") + ocv_intel_compiler_optimization_option(AVX512_KNM "-xKNM" "/Qx:KNM") + ocv_intel_compiler_optimization_option(AVX512_SKX "-xSKYLAKE-AVX512" "/Qx:SKYLAKE-AVX512") + ocv_intel_compiler_optimization_option(AVX512_CNL "-xCANNONLAKE" "/Qx:CANNONLAKE") + ocv_intel_compiler_optimization_option(AVX512_CEL "-xCASCADELAKE" "/Qx:CASCADELAKE") + ocv_intel_compiler_optimization_option(AVX512_ICL "-xICELAKE-CLIENT" "/Qx:ICELAKE-CLIENT") elseif(CV_GCC OR CV_CLANG) ocv_update(CPU_AVX2_FLAGS_ON "-mavx2") ocv_update(CPU_FP16_FLAGS_ON "-mf16c") @@ -230,12 +265,21 @@ if(X86 OR X86_64) ocv_update(CPU_SSE2_FLAGS_ON "-msse2") ocv_update(CPU_SSE_FLAGS_ON "-msse") if(NOT (CV_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")) # GCC >= 5.0 - # -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi ocv_update(CPU_AVX_512F_FLAGS_ON "-mavx512f") - ocv_update(CPU_AVX512_SKX_FLAGS_ON "-mavx512f -mavx512cd -mavx512vl -mavx512bw -mavx512dq") + ocv_update(CPU_AVX_512CD_FLAGS_ON "-mavx512cd") + ocv_update(CPU_AVX512_KNL_EXTRA_FLAGS_ON "-mavx512er -mavx512pf") + ocv_update(CPU_AVX512_KNM_EXTRA_FLAGS_ON "-mavx5124fmaps -mavx5124vnniw") + ocv_update(CPU_AVX_512BW_FLAGS_ON "-mavx512bw") + ocv_update(CPU_AVX_512DQ_FLAGS_ON "-mavx512dq") + ocv_update(CPU_AVX_512VL_FLAGS_ON "-mavx512vl") + ocv_update(CPU_AVX_512IFMA_FLAGS_ON "-mavx512ifma") + ocv_update(CPU_AVX_512VBMI_FLAGS_ON "-mavx512vbmi") + ocv_update(CPU_AVX_512VNNI_FLAGS_ON "-mavx512vnni") + ocv_update(CPU_AVX_512VBMI2_FLAGS_ON "-mavx512vbmi2") + ocv_update(CPU_AVX_512BITALG_FLAGS_ON "-mavx512bitalg") + ocv_update(CPU_AVX_512VPOPCNTDQ_FLAGS_ON "-mavx512vpopcntdq") else() ocv_update(CPU_AVX_512F_SUPPORTED OFF) - ocv_update(CPU_AVX512_SKX_SUPPORTED OFF) endif() elseif(MSVC) ocv_update(CPU_AVX2_FLAGS_ON "/arch:AVX2") @@ -336,6 +380,10 @@ if(CV_DISABLE_OPTIMIZATION) set(CPU_DISPATCH_REQUIRE "") endif() +if("x${CPU_DISPATCH}" STREQUAL "xALL") + set(CPU_DISPATCH "${CPU_KNOWN_OPTIMIZATIONS}") +endif() + macro(ocv_check_compiler_optimization OPT) if(NOT DEFINED CPU_${OPT}_SUPPORTED) if((DEFINED CPU_${OPT}_FLAGS_ON AND NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") OR CPU_${OPT}_TEST_FILE) @@ -408,6 +456,7 @@ foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS}) if(NOT DEFINED CPU_${OPT}_FORCE) set(CPU_${OPT}_FORCE "${CPU_${OPT}_IMPLIES}") endif() + #message("${OPT}: CPU_${OPT}_FLAGS_ON=${CPU_${OPT}_FLAGS_ON}") endforeach() if(_add_native_flag) @@ -809,19 +858,19 @@ macro(__ocv_add_dispatched_file filename target_src_var src_directory dst_direct file(WRITE "${__file}" "${__codestr}") endif() - if(";${CPU_DISPATCH};" MATCHES "${OPT}" OR __CPU_DISPATCH_INCLUDE_ALL) + if(";${CPU_DISPATCH_FINAL};" MATCHES "${OPT}" OR __CPU_DISPATCH_INCLUDE_ALL) if(EXISTS "${src_directory}/${filename}.${OPT_LOWER}.cpp") message(STATUS "Using overrided ${OPT} source: ${src_directory}/${filename}.${OPT_LOWER}.cpp") else() list(APPEND ${target_src_var} "${__file}") endif() - endif() - set(__declarations_str "${__declarations_str} + set(__declarations_str "${__declarations_str} #define CV_CPU_DISPATCH_MODE ${OPT} #include \"opencv2/core/private/cv_cpu_include_simd_declarations.hpp\" ") - set(__dispatch_modes "${OPT}, ${__dispatch_modes}") + set(__dispatch_modes "${OPT}, ${__dispatch_modes}") + endif() endforeach() set(__declarations_str "${__declarations_str} diff --git a/cmake/checks/cpu_avx512cel.cpp b/cmake/checks/cpu_avx512cel.cpp new file mode 100644 index 0000000000..e372cf9a45 --- /dev/null +++ b/cmake/checks/cpu_avx512cel.cpp @@ -0,0 +1,11 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include +void test() +{ + __m512i a, b, c; + a = _mm512_dpwssd_epi32(a, b, c); +} +#else +#error "AVX512-CEL is not supported" +#endif +int main() { return 0; } \ No newline at end of file diff --git a/cmake/checks/cpu_avx512cnl.cpp b/cmake/checks/cpu_avx512cnl.cpp new file mode 100644 index 0000000000..480a312fe5 --- /dev/null +++ b/cmake/checks/cpu_avx512cnl.cpp @@ -0,0 +1,12 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include +void test() +{ + __m512i a, b, c; + a = _mm512_madd52hi_epu64(a, b, c); + a = _mm512_permutexvar_epi8(a, b); +} +#else +#error "AVX512-CNL is not supported" +#endif +int main() { return 0; } \ No newline at end of file diff --git a/cmake/checks/cpu_avx512common.cpp b/cmake/checks/cpu_avx512common.cpp new file mode 100644 index 0000000000..1754a95399 --- /dev/null +++ b/cmake/checks/cpu_avx512common.cpp @@ -0,0 +1,14 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include +void test() +{ + __m512i zmm = _mm512_setzero_si512(); + zmm = _mm512_lzcnt_epi32(zmm); +#if defined __GNUC__ && defined __x86_64__ + asm volatile ("" : : : "zmm16", "zmm17", "zmm18", "zmm19"); +#endif +} +#else +#error "AVX512-COMMON is not supported" +#endif +int main() { return 0; } diff --git a/cmake/checks/cpu_avx512icl.cpp b/cmake/checks/cpu_avx512icl.cpp new file mode 100644 index 0000000000..a67f5f35d4 --- /dev/null +++ b/cmake/checks/cpu_avx512icl.cpp @@ -0,0 +1,13 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include +void test() +{ + __m512i a, b, c; + a = _mm512_popcnt_epi8(a); + a = _mm512_shrdv_epi64(a, b, c); + a = _mm512_popcnt_epi64(a); +} +#else +#error "AVX512-ICL is not supported" +#endif +int main() { return 0; } \ No newline at end of file diff --git a/cmake/checks/cpu_avx512knl.cpp b/cmake/checks/cpu_avx512knl.cpp new file mode 100644 index 0000000000..f0eaa646e3 --- /dev/null +++ b/cmake/checks/cpu_avx512knl.cpp @@ -0,0 +1,16 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include + +void test() +{ + int* base; + __m512i idx; + __mmask16 m16; + __m512 f; + _mm512_mask_prefetch_i32gather_ps(idx, m16, base, 1, _MM_HINT_T1); + f = _mm512_rsqrt28_ps(f); +} +#else +#error "AVX512-KNL is not supported" +#endif +int main() { return 0; } \ No newline at end of file diff --git a/cmake/checks/cpu_avx512knm.cpp b/cmake/checks/cpu_avx512knm.cpp new file mode 100644 index 0000000000..18b2bf53e8 --- /dev/null +++ b/cmake/checks/cpu_avx512knm.cpp @@ -0,0 +1,17 @@ +#if defined __AVX512__ || defined __AVX512F__ +#include +void test() +{ + __m512 a, b, c, d, e; + __m512i ai, bi, ci, di, ei, fi; + __m128 *mem; + __m128i *memi; + __mmask16 m; + a = _mm512_4fnmadd_ps(a, b, c, d, e, mem); + ai = _mm512_4dpwssd_epi32(ai, bi, ci, di, ei, memi); + ai = _mm512_popcnt_epi64(ai); +} +#else +#error "AVX512-KNM is not supported" +#endif +int main() { return 0; } \ No newline at end of file diff --git a/doc/opencv.bib b/doc/opencv.bib index e2af456532..fd1b60dfd1 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -195,6 +195,21 @@ volume = {9}, publisher = {Walter de Gruyter} } +@article{Chaumette06, + author = {Chaumette, Fran{\c c}ois and Hutchinson, S.}, + title = {{Visual servo control, Part I: Basic approaches}}, + url = {https://hal.inria.fr/inria-00350283}, + journal = {{IEEE Robotics and Automation Magazine}}, + publisher = {{Institute of Electrical and Electronics Engineers}}, + volume = {13}, + number = {4}, + pages = {82-90}, + year = {2006}, + pdf = {https://hal.inria.fr/inria-00350283/file/2006_ieee_ram_chaumette.pdf}, + hal_id = {inria-00350283}, + hal_version = {v1}, +} + @article{Daniilidis98, author = {Konstantinos Daniilidis}, title = {Hand-Eye Calibration Using Dual Quaternions}, @@ -242,6 +257,12 @@ publisher = {IEEE}, url = {http://alumni.media.mit.edu/~jdavis/Publications/publications_402.pdf} } +@misc{Eade13, + author = {Eade, Ethan}, + title = {Gauss-Newton / Levenberg-Marquardt Optimization}, + year = {2013}, + url = {http://ethaneade.com/optimization.pdf} +} @inproceedings{EM11, author = {Gastal, Eduardo SL and Oliveira, Manuel M}, title = {Domain transform for edge-aware image and video processing}, @@ -596,10 +617,14 @@ title = {ROF and TV-L1 denoising with Primal-Dual algorithm}, url = {http://znah.net/rof-and-tv-l1-denoising-with-primal-dual-algorithm.html} } -@misc{VandLec, - author = {Vandenberghe, Lieven}, - title = {QR Factorization}, - url = {http://www.seas.ucla.edu/~vandenbe/133A/lectures/qr.pdf} +@misc{Madsen04, + author = {K. Madsen and H. B. Nielsen and O. Tingleff}, + title = {Methods for Non-Linear Least Squares Problems (2nd ed.)}, + year = {2004}, + pages = {60}, + publisher = {Informatics and Mathematical Modelling, Technical University of Denmark, {DTU}}, + address = {Richard Petersens Plads, Building 321, {DK-}2800 Kgs. Lyngby}, + url = {http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/3215/pdf/imm3215.pdf} } @article{MHT2011, author = {Getreuer, Pascal}, @@ -645,6 +670,23 @@ title = {Deeper understanding of the homography decomposition for vision-based control}, year = {2007} } +@article{Marchand16, + author = {Marchand, Eric and Uchiyama, Hideaki and Spindler, Fabien}, + title = {{Pose Estimation for Augmented Reality: A Hands-On Survey}}, + url = {https://hal.inria.fr/hal-01246370}, + journal = {{IEEE Transactions on Visualization and Computer Graphics}}, + publisher = {{Institute of Electrical and Electronics Engineers}}, + volume = {22}, + number = {12}, + pages = {2633 - 2651}, + year = {2016}, + month = Dec, + doi = {10.1109/TVCG.2015.2513408}, + keywords = {homography ; SLAM ; motion estimation ; Index Terms-Survey ; augmented reality ; vision-based camera localization ; pose estimation ; PnP ; keypoint matching ; code examples}, + pdf = {https://hal.inria.fr/hal-01246370/file/survey-ieee-v2.pdf}, + hal_id = {hal-01246370}, + hal_version = {v1}, +} @article{Matas00, author = {Matas, Jiri and Galambos, Charles and Kittler, Josef}, title = {Robust detection of lines using the progressive probabilistic hough transform}, @@ -915,6 +957,11 @@ volume = {2}, publisher = {IEEE} } +@misc{VandLec, + author = {Vandenberghe, Lieven}, + title = {QR Factorization}, + url = {http://www.seas.ucla.edu/~vandenbe/133A/lectures/qr.pdf} +} @inproceedings{V03, author = {Kwatra, Vivek and Sch{\"o}dl, Arno and Essa, Irfan and Turk, Greg and Bobick, Aaron}, title = {Graphcut textures: image and video synthesis using graph cuts}, diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 264c976b2f..5f6d3c048a 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -383,8 +383,11 @@ public: modified using setMaxIters() method. */ static Ptr create(const Ptr& cb, int maxIters); + static Ptr create(const Ptr& cb, int maxIters, double eps); }; + + /** @brief Finds a perspective transformation between two planes. @param srcPoints Coordinates of the points in the original plane, a matrix of the type CV_32FC2 @@ -842,6 +845,65 @@ CV_EXPORTS_W int solveP3P( InputArray objectPoints, InputArray imagePoints, OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, int flags ); +/** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame +to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution. + +@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel, +where N is the number of points. vector\ can also be passed here. +@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, +where N is the number of points. vector\ can also be passed here. +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of +4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +assumed. +@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec , brings points from +the model coordinate system to the camera coordinate system. Input values are used as an initial solution. +@param tvec Input/Output translation vector. Input values are used as an initial solution. +@param criteria Criteria when to stop the Levenberg-Marquard iterative algorithm. + +The function refines the object pose given at least 3 object points, their corresponding image +projections, an initial solution for the rotation and translation vector, +as well as the camera matrix and the distortion coefficients. +The function minimizes the projection error with respect to the rotation and the translation vectors, according +to a Levenberg-Marquardt iterative minimization @cite Madsen04 @cite Eade13 process. + */ +CV_EXPORTS_W void solvePnPRefineLM( InputArray objectPoints, InputArray imagePoints, + InputArray cameraMatrix, InputArray distCoeffs, + InputOutputArray rvec, InputOutputArray tvec, + TermCriteria criteria = TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 20, FLT_EPSILON)); + +/** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame +to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution. + +@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel, +where N is the number of points. vector\ can also be passed here. +@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, +where N is the number of points. vector\ can also be passed here. +@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ . +@param distCoeffs Input vector of distortion coefficients +\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of +4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are +assumed. +@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec , brings points from +the model coordinate system to the camera coordinate system. Input values are used as an initial solution. +@param tvec Input/Output translation vector. Input values are used as an initial solution. +@param criteria Criteria when to stop the Levenberg-Marquard iterative algorithm. +@param VVSlambda Gain for the virtual visual servoing control law, equivalent to the \f$\alpha\f$ +gain in the Gauss-Newton formulation. + +The function refines the object pose given at least 3 object points, their corresponding image +projections, an initial solution for the rotation and translation vector, +as well as the camera matrix and the distortion coefficients. +The function minimizes the projection error with respect to the rotation and the translation vectors, using a +virtual visual servoing (VVS) @cite Chaumette06 @cite Marchand16 scheme. + */ +CV_EXPORTS_W void solvePnPRefineVVS( InputArray objectPoints, InputArray imagePoints, + InputArray cameraMatrix, InputArray distCoeffs, + InputOutputArray rvec, InputOutputArray tvec, + TermCriteria criteria = TermCriteria(TermCriteria::EPS + TermCriteria::COUNT, 20, FLT_EPSILON), + double VVSlambda = 1); + /** @brief Finds an initial camera matrix from 3D-2D point correspondences. @param objectPoints Vector of vectors of the calibration pattern points in the calibration pattern diff --git a/modules/calib3d/src/levmarq.cpp b/modules/calib3d/src/levmarq.cpp index 4e59f043a8..0d339ccf79 100644 --- a/modules/calib3d/src/levmarq.cpp +++ b/modules/calib3d/src/levmarq.cpp @@ -81,11 +81,11 @@ class LMSolverImpl CV_FINAL : public LMSolver { public: LMSolverImpl() : maxIters(100) { init(); } - LMSolverImpl(const Ptr& _cb, int _maxIters) : cb(_cb), maxIters(_maxIters) { init(); } + LMSolverImpl(const Ptr& _cb, int _maxIters) : cb(_cb), epsx(FLT_EPSILON), epsf(FLT_EPSILON), maxIters(_maxIters) { init(); } + LMSolverImpl(const Ptr& _cb, int _maxIters, double _eps) : cb(_cb), epsx(_eps), epsf(_eps), maxIters(_maxIters) { init(); } void init() { - epsx = epsf = FLT_EPSILON; printInterval = 0; } @@ -215,4 +215,9 @@ Ptr LMSolver::create(const Ptr& cb, int maxIters) return makePtr(cb, maxIters); } +Ptr LMSolver::create(const Ptr& cb, int maxIters, double eps) +{ + return makePtr(cb, maxIters, eps); +} + } diff --git a/modules/calib3d/src/solvepnp.cpp b/modules/calib3d/src/solvepnp.cpp index 0f76e8c452..58c16f40cc 100644 --- a/modules/calib3d/src/solvepnp.cpp +++ b/modules/calib3d/src/solvepnp.cpp @@ -456,4 +456,271 @@ int solveP3P( InputArray _opoints, InputArray _ipoints, return solutions; } +class SolvePnPRefineLMCallback CV_FINAL : public LMSolver::Callback +{ +public: + SolvePnPRefineLMCallback(InputArray _opoints, InputArray _ipoints, InputArray _cameraMatrix, InputArray _distCoeffs) + { + objectPoints = _opoints.getMat(); + imagePoints = _ipoints.getMat(); + npoints = std::max(objectPoints.checkVector(3, CV_32F), objectPoints.checkVector(3, CV_64F)); + imagePoints0 = imagePoints.reshape(1, npoints*2); + cameraMatrix = _cameraMatrix.getMat(); + distCoeffs = _distCoeffs.getMat(); + } + + bool compute(InputArray _param, OutputArray _err, OutputArray _Jac) const CV_OVERRIDE + { + Mat param = _param.getMat(); + _err.create(npoints*2, 1, CV_64FC1); + + if(_Jac.needed()) + { + _Jac.create(npoints*2, param.rows, CV_64FC1); + } + + Mat rvec = param(Rect(0, 0, 1, 3)), tvec = param(Rect(0, 3, 1, 3)); + + Mat J, projectedPts; + projectPoints(objectPoints, rvec, tvec, cameraMatrix, distCoeffs, projectedPts, _Jac.needed() ? J : noArray()); + + if (_Jac.needed()) + { + Mat Jac = _Jac.getMat(); + for (int i = 0; i < Jac.rows; i++) + { + for (int j = 0; j < Jac.cols; j++) + { + Jac.at(i,j) = J.at(i,j); + } + } + } + + Mat err = _err.getMat(); + projectedPts = projectedPts.reshape(1, npoints*2); + err = projectedPts - imagePoints0; + + return true; + } + + Mat objectPoints, imagePoints, imagePoints0; + Mat cameraMatrix, distCoeffs; + int npoints; +}; + +/** + * @brief Compute the Interaction matrix and the residuals for the current pose. + * @param objectPoints 3D object points. + * @param R Current estimated rotation matrix. + * @param tvec Current estimated translation vector. + * @param L Interaction matrix for a vector of point features. + * @param s Residuals. + */ +static void computeInteractionMatrixAndResiduals(const Mat& objectPoints, const Mat& R, const Mat& tvec, + Mat& L, Mat& s) +{ + Mat objectPointsInCam; + + int npoints = objectPoints.rows; + for (int i = 0; i < npoints; i++) + { + Mat curPt = objectPoints.row(i); + objectPointsInCam = R * curPt.t() + tvec; + + double Zi = objectPointsInCam.at(2,0); + double xi = objectPointsInCam.at(0,0) / Zi; + double yi = objectPointsInCam.at(1,0) / Zi; + + s.at(2*i,0) = xi; + s.at(2*i+1,0) = yi; + + L.at(2*i,0) = -1 / Zi; + L.at(2*i,1) = 0; + L.at(2*i,2) = xi / Zi; + L.at(2*i,3) = xi*yi; + L.at(2*i,4) = -(1 + xi*xi); + L.at(2*i,5) = yi; + + L.at(2*i+1,0) = 0; + L.at(2*i+1,1) = -1 / Zi; + L.at(2*i+1,2) = yi / Zi; + L.at(2*i+1,3) = 1 + yi*yi; + L.at(2*i+1,4) = -xi*yi; + L.at(2*i+1,5) = -xi; + } +} + +/** + * @brief The exponential map from se(3) to SE(3). + * @param twist A twist (v, w) represents the velocity of a rigid body as an angular velocity + * around an axis and a linear velocity along this axis. + * @param R1 Resultant rotation matrix from the twist. + * @param t1 Resultant translation vector from the twist. + */ +static void exponentialMapToSE3Inv(const Mat& twist, Mat& R1, Mat& t1) +{ + //see Exponential Map in http://ethaneade.com/lie.pdf + /* + \begin{align*} + \boldsymbol{\delta} &= \left( \mathbf{u}, \boldsymbol{\omega} \right ) \in se(3) \\ + \mathbf{u}, \boldsymbol{\omega} &\in \mathbb{R}^3 \\ + \theta &= \sqrt{ \boldsymbol{\omega}^T \boldsymbol{\omega} } \\ + A &= \frac{\sin \theta}{\theta} \\ + B &= \frac{1 - \cos \theta}{\theta^2} \\ + C &= \frac{1-A}{\theta^2} \\ + \mathbf{R} &= \mathbf{I} + A \boldsymbol{\omega}_{\times} + B \boldsymbol{\omega}_{\times}^2 \\ + \mathbf{V} &= \mathbf{I} + B \boldsymbol{\omega}_{\times} + C \boldsymbol{\omega}_{\times}^2 \\ + \exp \begin{pmatrix} + \mathbf{u} \\ + \boldsymbol{\omega} + \end{pmatrix} &= + \left( + \begin{array}{c|c} + \mathbf{R} & \mathbf{V} \mathbf{u} \\ \hline + \mathbf{0} & 1 + \end{array} + \right ) + \end{align*} + */ + double vx = twist.at(0,0); + double vy = twist.at(1,0); + double vz = twist.at(2,0); + double wx = twist.at(3,0); + double wy = twist.at(4,0); + double wz = twist.at(5,0); + + Matx31d rvec(wx, wy, wz); + Mat R; + Rodrigues(rvec, R); + + double theta = sqrt(wx*wx + wy*wy + wz*wz); + double sinc = std::fabs(theta) < 1e-8 ? 1 : sin(theta) / theta; + double mcosc = (std::fabs(theta) < 1e-8) ? 0.5 : (1-cos(theta)) / (theta*theta); + double msinc = (std::abs(theta) < 1e-8) ? (1/6.0) : (1-sinc) / (theta*theta); + + Matx31d dt; + dt(0) = vx*(sinc + wx*wx*msinc) + vy*(wx*wy*msinc - wz*mcosc) + vz*(wx*wz*msinc + wy*mcosc); + dt(1) = vx*(wx*wy*msinc + wz*mcosc) + vy*(sinc + wy*wy*msinc) + vz*(wy*wz*msinc - wx*mcosc); + dt(2) = vx*(wx*wz*msinc - wy*mcosc) + vy*(wy*wz*msinc + wx*mcosc) + vz*(sinc + wz*wz*msinc); + + R1 = R.t(); + t1 = -R1 * dt; +} + +enum SolvePnPRefineMethod { + SOLVEPNP_REFINE_LM = 0, + SOLVEPNP_REFINE_VVS = 1 +}; + +static void solvePnPRefine(InputArray _objectPoints, InputArray _imagePoints, + InputArray _cameraMatrix, InputArray _distCoeffs, + InputOutputArray _rvec, InputOutputArray _tvec, + SolvePnPRefineMethod _flags, + TermCriteria _criteria=TermCriteria(TermCriteria::EPS+TermCriteria::COUNT, 20, FLT_EPSILON), + double _vvslambda=1) +{ + CV_INSTRUMENT_REGION(); + + Mat opoints_ = _objectPoints.getMat(), ipoints_ = _imagePoints.getMat(); + Mat opoints, ipoints; + opoints_.convertTo(opoints, CV_64F); + ipoints_.convertTo(ipoints, CV_64F); + int npoints = opoints.checkVector(3, CV_64F); + CV_Assert( npoints >= 3 && npoints == ipoints.checkVector(2, CV_64F) ); + CV_Assert( !_rvec.empty() && !_tvec.empty() ); + + int rtype = _rvec.type(), ttype = _tvec.type(); + Size rsize = _rvec.size(), tsize = _tvec.size(); + CV_Assert( (rtype == CV_32FC1 || rtype == CV_64FC1) && + (ttype == CV_32FC1 || ttype == CV_64FC1) ); + CV_Assert( (rsize == Size(1, 3) || rsize == Size(3, 1)) && + (tsize == Size(1, 3) || tsize == Size(3, 1)) ); + + Mat cameraMatrix0 = _cameraMatrix.getMat(); + Mat distCoeffs0 = _distCoeffs.getMat(); + Mat cameraMatrix = Mat_(cameraMatrix0); + Mat distCoeffs = Mat_(distCoeffs0); + + if (_flags == SOLVEPNP_REFINE_LM) + { + Mat rvec0 = _rvec.getMat(), tvec0 = _tvec.getMat(); + Mat rvec, tvec; + rvec0.convertTo(rvec, CV_64F); + tvec0.convertTo(tvec, CV_64F); + + Mat params(6, 1, CV_64FC1); + for (int i = 0; i < 3; i++) + { + params.at(i,0) = rvec.at(i,0); + params.at(i+3,0) = tvec.at(i,0); + } + + LMSolver::create(makePtr(opoints, ipoints, cameraMatrix, distCoeffs), _criteria.maxCount, _criteria.epsilon)->run(params); + + params.rowRange(0, 3).convertTo(rvec0, rvec0.depth()); + params.rowRange(3, 6).convertTo(tvec0, tvec0.depth()); + } + else if (_flags == SOLVEPNP_REFINE_VVS) + { + Mat rvec0 = _rvec.getMat(), tvec0 = _tvec.getMat(); + Mat rvec, tvec; + rvec0.convertTo(rvec, CV_64F); + tvec0.convertTo(tvec, CV_64F); + + vector ipoints_normalized; + undistortPoints(ipoints, ipoints_normalized, cameraMatrix, distCoeffs); + Mat sd = Mat(ipoints_normalized).reshape(1, npoints*2); + Mat objectPoints0 = opoints.reshape(1, npoints); + Mat imagePoints0 = ipoints.reshape(1, npoints*2); + Mat L(npoints*2, 6, CV_64FC1), s(npoints*2, 1, CV_64FC1); + + double residuals_1 = std::numeric_limits::max(), residuals = 0; + Mat err; + Mat R; + Rodrigues(rvec, R); + for (int iter = 0; iter < _criteria.maxCount; iter++) + { + computeInteractionMatrixAndResiduals(objectPoints0, R, tvec, L, s); + err = s - sd; + + Mat Lp = L.inv(cv::DECOMP_SVD); + Mat dq = -_vvslambda * Lp * err; + + Mat R1, t1; + exponentialMapToSE3Inv(dq, R1, t1); + R = R1 * R; + tvec = R1 * tvec + t1; + + residuals_1 = residuals; + Mat res = err.t()*err; + residuals = res.at(0,0); + + if (std::fabs(residuals - residuals_1) < _criteria.epsilon) + break; + } + + Rodrigues(R, rvec); + rvec.convertTo(rvec0, rvec0.depth()); + tvec.convertTo(tvec0, tvec0.depth()); + } +} + +void solvePnPRefineLM(InputArray _objectPoints, InputArray _imagePoints, + InputArray _cameraMatrix, InputArray _distCoeffs, + InputOutputArray _rvec, InputOutputArray _tvec, + TermCriteria _criteria) +{ + CV_INSTRUMENT_REGION(); + solvePnPRefine(_objectPoints, _imagePoints, _cameraMatrix, _distCoeffs, _rvec, _tvec, SOLVEPNP_REFINE_LM, _criteria); +} + +void solvePnPRefineVVS(InputArray _objectPoints, InputArray _imagePoints, + InputArray _cameraMatrix, InputArray _distCoeffs, + InputOutputArray _rvec, InputOutputArray _tvec, + TermCriteria _criteria, double _VVSlambda) +{ + CV_INSTRUMENT_REGION(); + solvePnPRefine(_objectPoints, _imagePoints, _cameraMatrix, _distCoeffs, _rvec, _tvec, SOLVEPNP_REFINE_VVS, _criteria, _VVSlambda); +} + } diff --git a/modules/calib3d/test/test_solvepnp_ransac.cpp b/modules/calib3d/test/test_solvepnp_ransac.cpp index 2359fa9282..adf7758c92 100644 --- a/modules/calib3d/test/test_solvepnp_ransac.cpp +++ b/modules/calib3d/test/test_solvepnp_ransac.cpp @@ -589,4 +589,330 @@ TEST(Calib3d_SolvePnP, iterativeInitialGuess3pts) } } +TEST(Calib3d_SolvePnP, refine3pts) +{ + { + Matx33d intrinsics(605.4, 0.0, 317.35, + 0.0, 601.2, 242.63, + 0.0, 0.0, 1.0); + + double L = 0.1; + vector p3d; + p3d.push_back(Point3d(-L, -L, 0.0)); + p3d.push_back(Point3d(L, -L, 0.0)); + p3d.push_back(Point3d(L, L, 0.0)); + + Mat rvec_ground_truth = (Mat_(3,1) << 0.3, -0.2, 0.75); + Mat tvec_ground_truth = (Mat_(3,1) << 0.15, -0.2, 1.5); + + vector p2d; + projectPoints(p3d, rvec_ground_truth, tvec_ground_truth, intrinsics, noArray(), p2d); + + { + Mat rvec_est = (Mat_(3,1) << 0.2, -0.1, 0.6); + Mat tvec_est = (Mat_(3,1) << 0.05, -0.05, 1.0); + + solvePnPRefineLM(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est); + + cout << "\nmethod: Levenberg-Marquardt" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + } + { + Mat rvec_est = (Mat_(3,1) << 0.2, -0.1, 0.6); + Mat tvec_est = (Mat_(3,1) << 0.05, -0.05, 1.0); + + solvePnPRefineVVS(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est); + + cout << "\nmethod: Virtual Visual Servoing" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + } + } + + { + Matx33f intrinsics(605.4f, 0.0f, 317.35f, + 0.0f, 601.2f, 242.63f, + 0.0f, 0.0f, 1.0f); + + float L = 0.1f; + vector p3d; + p3d.push_back(Point3f(-L, -L, 0.0f)); + p3d.push_back(Point3f(L, -L, 0.0f)); + p3d.push_back(Point3f(L, L, 0.0f)); + + Mat rvec_ground_truth = (Mat_(3,1) << -0.75f, 0.4f, 0.34f); + Mat tvec_ground_truth = (Mat_(3,1) << -0.15f, 0.35f, 1.58f); + + vector p2d; + projectPoints(p3d, rvec_ground_truth, tvec_ground_truth, intrinsics, noArray(), p2d); + + { + Mat rvec_est = (Mat_(3,1) << -0.5f, 0.2f, 0.2f); + Mat tvec_est = (Mat_(3,1) << 0.0f, 0.2f, 1.0f); + + solvePnPRefineLM(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est); + + cout << "\nmethod: Levenberg-Marquardt" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + } + { + Mat rvec_est = (Mat_(3,1) << -0.5f, 0.2f, 0.2f); + Mat tvec_est = (Mat_(3,1) << 0.0f, 0.2f, 1.0f); + + solvePnPRefineVVS(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est); + + cout << "\nmethod: Virtual Visual Servoing" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + } + } +} + +TEST(Calib3d_SolvePnP, refine) +{ + //double + { + Matx33d intrinsics(605.4, 0.0, 317.35, + 0.0, 601.2, 242.63, + 0.0, 0.0, 1.0); + + double L = 0.1; + vector p3d; + p3d.push_back(Point3d(-L, -L, 0.0)); + p3d.push_back(Point3d(L, -L, 0.0)); + p3d.push_back(Point3d(L, L, 0.0)); + p3d.push_back(Point3d(-L, L, L/2)); + p3d.push_back(Point3d(0, 0, -L/2)); + + Mat rvec_ground_truth = (Mat_(3,1) << 0.3, -0.2, 0.75); + Mat tvec_ground_truth = (Mat_(3,1) << 0.15, -0.2, 1.5); + + vector p2d; + projectPoints(p3d, rvec_ground_truth, tvec_ground_truth, intrinsics, noArray(), p2d); + + { + Mat rvec_est = (Mat_(3,1) << 0.1, -0.1, 0.1); + Mat tvec_est = (Mat_(3,1) << 0.0, -0.5, 1.0); + + solvePnP(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est, true, SOLVEPNP_ITERATIVE); + + cout << "\nmethod: Levenberg-Marquardt (C API)" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + } + { + Mat rvec_est = (Mat_(3,1) << 0.1, -0.1, 0.1); + Mat tvec_est = (Mat_(3,1) << 0.0, -0.5, 1.0); + + solvePnPRefineLM(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est); + + cout << "\nmethod: Levenberg-Marquardt (C++ API)" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + } + { + Mat rvec_est = (Mat_(3,1) << 0.1, -0.1, 0.1); + Mat tvec_est = (Mat_(3,1) << 0.0, -0.5, 1.0); + + solvePnPRefineVVS(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est); + + cout << "\nmethod: Virtual Visual Servoing" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + } + } + + //float + { + Matx33f intrinsics(605.4f, 0.0f, 317.35f, + 0.0f, 601.2f, 242.63f, + 0.0f, 0.0f, 1.0f); + + float L = 0.1f; + vector p3d; + p3d.push_back(Point3f(-L, -L, 0.0f)); + p3d.push_back(Point3f(L, -L, 0.0f)); + p3d.push_back(Point3f(L, L, 0.0f)); + p3d.push_back(Point3f(-L, L, L/2)); + p3d.push_back(Point3f(0, 0, -L/2)); + + Mat rvec_ground_truth = (Mat_(3,1) << -0.75f, 0.4f, 0.34f); + Mat tvec_ground_truth = (Mat_(3,1) << -0.15f, 0.35f, 1.58f); + + vector p2d; + projectPoints(p3d, rvec_ground_truth, tvec_ground_truth, intrinsics, noArray(), p2d); + + { + Mat rvec_est = (Mat_(3,1) << -0.1f, 0.1f, 0.1f); + Mat tvec_est = (Mat_(3,1) << 0.0f, 0.0f, 1.0f); + + solvePnP(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est, true, SOLVEPNP_ITERATIVE); + + cout << "\nmethod: Levenberg-Marquardt (C API)" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + } + { + Mat rvec_est = (Mat_(3,1) << -0.1f, 0.1f, 0.1f); + Mat tvec_est = (Mat_(3,1) << 0.0f, 0.0f, 1.0f); + + solvePnPRefineLM(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est); + + cout << "\nmethod: Levenberg-Marquardt (C++ API)" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + } + { + Mat rvec_est = (Mat_(3,1) << -0.1f, 0.1f, 0.1f); + Mat tvec_est = (Mat_(3,1) << 0.0f, 0.0f, 1.0f); + + solvePnPRefineVVS(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est); + + cout << "\nmethod: Virtual Visual Servoing" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-6); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-6); + } + } + + //refine after solvePnP + { + Matx33d intrinsics(605.4, 0.0, 317.35, + 0.0, 601.2, 242.63, + 0.0, 0.0, 1.0); + + double L = 0.1; + vector p3d; + p3d.push_back(Point3d(-L, -L, 0.0)); + p3d.push_back(Point3d(L, -L, 0.0)); + p3d.push_back(Point3d(L, L, 0.0)); + p3d.push_back(Point3d(-L, L, L/2)); + p3d.push_back(Point3d(0, 0, -L/2)); + + Mat rvec_ground_truth = (Mat_(3,1) << 0.3, -0.2, 0.75); + Mat tvec_ground_truth = (Mat_(3,1) << 0.15, -0.2, 1.5); + + vector p2d; + projectPoints(p3d, rvec_ground_truth, tvec_ground_truth, intrinsics, noArray(), p2d); + + //add small Gaussian noise + RNG& rng = theRNG(); + for (size_t i = 0; i < p2d.size(); i++) + { + p2d[i].x += rng.gaussian(5e-2); + p2d[i].y += rng.gaussian(5e-2); + } + + Mat rvec_est, tvec_est; + solvePnP(p3d, p2d, intrinsics, noArray(), rvec_est, tvec_est, false, SOLVEPNP_EPNP); + + { + + Mat rvec_est_refine = rvec_est.clone(), tvec_est_refine = tvec_est.clone(); + solvePnP(p3d, p2d, intrinsics, noArray(), rvec_est_refine, tvec_est_refine, true, SOLVEPNP_ITERATIVE); + + cout << "\nmethod: Levenberg-Marquardt (C API)" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est (EPnP): " << rvec_est.t() << std::endl; + cout << "rvec_est_refine: " << rvec_est_refine.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est (EPnP): " << tvec_est.t() << std::endl; + cout << "tvec_est_refine: " << tvec_est_refine.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-2); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-3); + + EXPECT_LT(cvtest::norm(rvec_ground_truth, rvec_est_refine, NORM_INF), cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF)); + EXPECT_LT(cvtest::norm(tvec_ground_truth, tvec_est_refine, NORM_INF), cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF)); + } + { + Mat rvec_est_refine = rvec_est.clone(), tvec_est_refine = tvec_est.clone(); + solvePnPRefineLM(p3d, p2d, intrinsics, noArray(), rvec_est_refine, tvec_est_refine); + + cout << "\nmethod: Levenberg-Marquardt (C++ API)" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "rvec_est_refine: " << rvec_est_refine.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + cout << "tvec_est_refine: " << tvec_est_refine.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-2); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-3); + + EXPECT_LT(cvtest::norm(rvec_ground_truth, rvec_est_refine, NORM_INF), cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF)); + EXPECT_LT(cvtest::norm(tvec_ground_truth, tvec_est_refine, NORM_INF), cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF)); + } + { + Mat rvec_est_refine = rvec_est.clone(), tvec_est_refine = tvec_est.clone(); + solvePnPRefineVVS(p3d, p2d, intrinsics, noArray(), rvec_est_refine, tvec_est_refine); + + cout << "\nmethod: Virtual Visual Servoing" << endl; + cout << "rvec_ground_truth: " << rvec_ground_truth.t() << std::endl; + cout << "rvec_est: " << rvec_est.t() << std::endl; + cout << "rvec_est_refine: " << rvec_est_refine.t() << std::endl; + cout << "tvec_ground_truth: " << tvec_ground_truth.t() << std::endl; + cout << "tvec_est: " << tvec_est.t() << std::endl; + cout << "tvec_est_refine: " << tvec_est_refine.t() << std::endl; + + EXPECT_LE(cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF), 1e-2); + EXPECT_LE(cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF), 1e-3); + + EXPECT_LT(cvtest::norm(rvec_ground_truth, rvec_est_refine, NORM_INF), cvtest::norm(rvec_ground_truth, rvec_est, NORM_INF)); + EXPECT_LT(cvtest::norm(tvec_ground_truth, tvec_est_refine, NORM_INF), cvtest::norm(tvec_ground_truth, tvec_est, NORM_INF)); + } + } +} + }} // namespace diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h index 7f6d6b0fb9..483cc8f269 100644 --- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -87,9 +87,41 @@ # include # define CV_AVX_512F 1 #endif +#ifdef CV_CPU_COMPILE_AVX512_COMMON +# define CV_AVX512_COMMON 1 +# define CV_AVX_512CD 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_KNL +# define CV_AVX512_KNL 1 +# define CV_AVX_512ER 1 +# define CV_AVX_512PF 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_KNM +# define CV_AVX512_KNM 1 +# define CV_AVX_5124FMAPS 1 +# define CV_AVX_5124VNNIW 1 +# define CV_AVX_512VPOPCNTDQ 1 +#endif #ifdef CV_CPU_COMPILE_AVX512_SKX -# include # define CV_AVX512_SKX 1 +# define CV_AVX_512VL 1 +# define CV_AVX_512BW 1 +# define CV_AVX_512DQ 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_CNL +# define CV_AVX512_CNL 1 +# define CV_AVX_512IFMA 1 +# define CV_AVX_512VBMI 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_CEL +# define CV_AVX512_CEL 1 +# define CV_AVX_512VNNI 1 +#endif +#ifdef CV_CPU_COMPILE_AVX512_ICL +# define CV_AVX512_ICL 1 +# define CV_AVX_512VBMI2 1 +# define CV_AVX_512BITALG 1 +# define CV_AVX_512VPOPCNTDQ 1 #endif #ifdef CV_CPU_COMPILE_FMA3 # define CV_FMA3 1 @@ -223,9 +255,10 @@ struct VZeroUpperGuard { #ifndef CV_AVX_512ER # define CV_AVX_512ER 0 #endif -#ifndef CV_AVX_512IFMA512 -# define CV_AVX_512IFMA512 0 +#ifndef CV_AVX_512IFMA +# define CV_AVX_512IFMA 0 #endif +#define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated #ifndef CV_AVX_512PF # define CV_AVX_512PF 0 #endif @@ -235,9 +268,45 @@ struct VZeroUpperGuard { #ifndef CV_AVX_512VL # define CV_AVX_512VL 0 #endif +#ifndef CV_AVX_5124FMAPS +# define CV_AVX_5124FMAPS 0 +#endif +#ifndef CV_AVX_5124VNNIW +# define CV_AVX_5124VNNIW 0 +#endif +#ifndef CV_AVX_512VPOPCNTDQ +# define CV_AVX_512VPOPCNTDQ 0 +#endif +#ifndef CV_AVX_512VNNI +# define CV_AVX_512VNNI 0 +#endif +#ifndef CV_AVX_512VBMI2 +# define CV_AVX_512VBMI2 0 +#endif +#ifndef CV_AVX_512BITALG +# define CV_AVX_512BITALG 0 +#endif +#ifndef CV_AVX512_COMMON +# define CV_AVX512_COMMON 0 +#endif +#ifndef CV_AVX512_KNL +# define CV_AVX512_KNL 0 +#endif +#ifndef CV_AVX512_KNM +# define CV_AVX512_KNM 0 +#endif #ifndef CV_AVX512_SKX # define CV_AVX512_SKX 0 #endif +#ifndef CV_AVX512_CNL +# define CV_AVX512_CNL 0 +#endif +#ifndef CV_AVX512_CEL +# define CV_AVX512_CEL 0 +#endif +#ifndef CV_AVX512_ICL +# define CV_AVX512_ICL 0 +#endif #ifndef CV_NEON # define CV_NEON 0 diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h index ad1339796d..90e0e9b9e3 100644 --- a/modules/core/include/opencv2/core/cv_cpu_helper.h +++ b/modules/core/include/opencv2/core/cv_cpu_helper.h @@ -252,6 +252,69 @@ #endif #define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...) CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_COMMON +# define CV_TRY_AVX512_COMMON 1 +# define CV_CPU_FORCE_AVX512_COMMON 1 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 1 +# define CV_CPU_CALL_AVX512_COMMON(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) return (opt_AVX512_COMMON::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_COMMON +# define CV_TRY_AVX512_COMMON 1 +# define CV_CPU_FORCE_AVX512_COMMON 0 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON (cv::checkHardwareSupport(CV_CPU_AVX512_COMMON)) +# define CV_CPU_CALL_AVX512_COMMON(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args) +#else +# define CV_TRY_AVX512_COMMON 0 +# define CV_CPU_FORCE_AVX512_COMMON 0 +# define CV_CPU_HAS_SUPPORT_AVX512_COMMON 0 +# define CV_CPU_CALL_AVX512_COMMON(fn, args) +# define CV_CPU_CALL_AVX512_COMMON_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_COMMON(fn, args, mode, ...) CV_CPU_CALL_AVX512_COMMON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNL +# define CV_TRY_AVX512_KNL 1 +# define CV_CPU_FORCE_AVX512_KNL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL 1 +# define CV_CPU_CALL_AVX512_KNL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) return (opt_AVX512_KNL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNL +# define CV_TRY_AVX512_KNL 1 +# define CV_CPU_FORCE_AVX512_KNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL (cv::checkHardwareSupport(CV_CPU_AVX512_KNL)) +# define CV_CPU_CALL_AVX512_KNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args) +#else +# define CV_TRY_AVX512_KNL 0 +# define CV_CPU_FORCE_AVX512_KNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNL 0 +# define CV_CPU_CALL_AVX512_KNL(fn, args) +# define CV_CPU_CALL_AVX512_KNL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNM +# define CV_TRY_AVX512_KNM 1 +# define CV_CPU_FORCE_AVX512_KNM 1 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM 1 +# define CV_CPU_CALL_AVX512_KNM(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) return (opt_AVX512_KNM::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNM +# define CV_TRY_AVX512_KNM 1 +# define CV_CPU_FORCE_AVX512_KNM 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM (cv::checkHardwareSupport(CV_CPU_AVX512_KNM)) +# define CV_CPU_CALL_AVX512_KNM(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args) +#else +# define CV_TRY_AVX512_KNM 0 +# define CV_CPU_FORCE_AVX512_KNM 0 +# define CV_CPU_HAS_SUPPORT_AVX512_KNM 0 +# define CV_CPU_CALL_AVX512_KNM(fn, args) +# define CV_CPU_CALL_AVX512_KNM_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNM(fn, args, mode, ...) CV_CPU_CALL_AVX512_KNM(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX # define CV_TRY_AVX512_SKX 1 # define CV_CPU_FORCE_AVX512_SKX 1 @@ -273,6 +336,69 @@ #endif #define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...) CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CNL +# define CV_TRY_AVX512_CNL 1 +# define CV_CPU_FORCE_AVX512_CNL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL 1 +# define CV_CPU_CALL_AVX512_CNL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) return (opt_AVX512_CNL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CNL +# define CV_TRY_AVX512_CNL 1 +# define CV_CPU_FORCE_AVX512_CNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL (cv::checkHardwareSupport(CV_CPU_AVX512_CNL)) +# define CV_CPU_CALL_AVX512_CNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args) +#else +# define CV_TRY_AVX512_CNL 0 +# define CV_CPU_FORCE_AVX512_CNL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CNL 0 +# define CV_CPU_CALL_AVX512_CNL(fn, args) +# define CV_CPU_CALL_AVX512_CNL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_CNL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CEL +# define CV_TRY_AVX512_CEL 1 +# define CV_CPU_FORCE_AVX512_CEL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_CEL 1 +# define CV_CPU_CALL_AVX512_CEL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_CEL_(fn, args) return (opt_AVX512_CEL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CEL +# define CV_TRY_AVX512_CEL 1 +# define CV_CPU_FORCE_AVX512_CEL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CEL (cv::checkHardwareSupport(CV_CPU_AVX512_CEL)) +# define CV_CPU_CALL_AVX512_CEL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CEL) return (opt_AVX512_CEL::fn args) +# define CV_CPU_CALL_AVX512_CEL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CEL) return (opt_AVX512_CEL::fn args) +#else +# define CV_TRY_AVX512_CEL 0 +# define CV_CPU_FORCE_AVX512_CEL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_CEL 0 +# define CV_CPU_CALL_AVX512_CEL(fn, args) +# define CV_CPU_CALL_AVX512_CEL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_CEL(fn, args, mode, ...) CV_CPU_CALL_AVX512_CEL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_ICL +# define CV_TRY_AVX512_ICL 1 +# define CV_CPU_FORCE_AVX512_ICL 1 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL 1 +# define CV_CPU_CALL_AVX512_ICL(fn, args) return (cpu_baseline::fn args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) return (opt_AVX512_ICL::fn args) +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_ICL +# define CV_TRY_AVX512_ICL 1 +# define CV_CPU_FORCE_AVX512_ICL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL (cv::checkHardwareSupport(CV_CPU_AVX512_ICL)) +# define CV_CPU_CALL_AVX512_ICL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args) +#else +# define CV_TRY_AVX512_ICL 0 +# define CV_CPU_FORCE_AVX512_ICL 0 +# define CV_CPU_HAS_SUPPORT_AVX512_ICL 0 +# define CV_CPU_CALL_AVX512_ICL(fn, args) +# define CV_CPU_CALL_AVX512_ICL_(fn, args) +#endif +#define __CV_CPU_DISPATCH_CHAIN_AVX512_ICL(fn, args, mode, ...) CV_CPU_CALL_AVX512_ICL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__)) + #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON # define CV_TRY_NEON 1 # define CV_CPU_FORCE_NEON 1 diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index caa4a9e4c7..0b301623b0 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -235,6 +235,12 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard #define CV_CPU_AVX_512PF 19 #define CV_CPU_AVX_512VBMI 20 #define CV_CPU_AVX_512VL 21 +#define CV_CPU_AVX_512VBMI2 22 +#define CV_CPU_AVX_512VNNI 23 +#define CV_CPU_AVX_512BITALG 24 +#define CV_CPU_AVX_512VPOPCNTDQ 25 +#define CV_CPU_AVX_5124VNNIW 26 +#define CV_CPU_AVX_5124FMAPS 27 #define CV_CPU_NEON 100 @@ -243,6 +249,12 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard // CPU features groups #define CV_CPU_AVX512_SKX 256 +#define CV_CPU_AVX512_COMMON 257 +#define CV_CPU_AVX512_KNL 258 +#define CV_CPU_AVX512_KNM 259 +#define CV_CPU_AVX512_CNL 260 +#define CV_CPU_AVX512_CEL 261 +#define CV_CPU_AVX512_ICL 262 // when adding to this list remember to update the following enum #define CV_HARDWARE_MAX_FEATURE 512 @@ -273,6 +285,12 @@ enum CpuFeatures { CPU_AVX_512PF = 19, CPU_AVX_512VBMI = 20, CPU_AVX_512VL = 21, + CPU_AVX_512VBMI2 = 22, + CPU_AVX_512VNNI = 23, + CPU_AVX_512BITALG = 24, + CPU_AVX_512VPOPCNTDQ= 25, + CPU_AVX_5124VNNIW = 26, + CPU_AVX_5124FMAPS = 27, CPU_NEON = 100, @@ -280,6 +298,12 @@ enum CpuFeatures { CPU_VSX3 = 201, CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL + CPU_AVX512_COMMON = 257, //!< Common instructions AVX-512F/CD for all CPUs that support AVX-512 + CPU_AVX512_KNL = 258, //!< Knights Landing with AVX-512F/CD/ER/PF + CPU_AVX512_KNM = 259, //!< Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ + CPU_AVX512_CNL = 260, //!< Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI + CPU_AVX512_CEL = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI + CPU_AVX512_ICL = 262, //!< Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ CPU_MAX_FEATURE = 512 // see CV_HARDWARE_MAX_FEATURE }; diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 894dc6cbef..1be4a54959 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -356,6 +356,12 @@ struct HWFeatures g_hwFeatureNames[CPU_AVX_512PF] = "AVX512PF"; g_hwFeatureNames[CPU_AVX_512VBMI] = "AVX512VBMI"; g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL"; + g_hwFeatureNames[CPU_AVX_512VBMI2] = "AVX512VBMI2"; + g_hwFeatureNames[CPU_AVX_512VNNI] = "AVX512VNNI"; + g_hwFeatureNames[CPU_AVX_512BITALG] = "AVX512BITALG"; + g_hwFeatureNames[CPU_AVX_512VPOPCNTDQ] = "AVX512VPOPCNTDQ"; + g_hwFeatureNames[CPU_AVX_5124VNNIW] = "AVX5124VNNIW"; + g_hwFeatureNames[CPU_AVX_5124FMAPS] = "AVX5124FMAPS"; g_hwFeatureNames[CPU_NEON] = "NEON"; @@ -363,6 +369,11 @@ struct HWFeatures g_hwFeatureNames[CPU_VSX3] = "VSX3"; g_hwFeatureNames[CPU_AVX512_SKX] = "AVX512-SKX"; + g_hwFeatureNames[CPU_AVX512_KNL] = "AVX512-KNL"; + g_hwFeatureNames[CPU_AVX512_KNM] = "AVX512-KNM"; + g_hwFeatureNames[CPU_AVX512_CNL] = "AVX512-CNL"; + g_hwFeatureNames[CPU_AVX512_CEL] = "AVX512-CEL"; + g_hwFeatureNames[CPU_AVX512_ICL] = "AVX512-ICL"; } void initialize(void) @@ -404,15 +415,21 @@ struct HWFeatures have[CV_CPU_AVX2] = (cpuid_data_ex[1] & (1<<5)) != 0; - have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0; - have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0; - have[CV_CPU_AVX_512IFMA512] = (cpuid_data_ex[1] & (1<<21)) != 0; - have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0; - have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0; - have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0; - have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0; - have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0; - have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0; + have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0; + have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0; + have[CV_CPU_AVX_512IFMA] = (cpuid_data_ex[1] & (1<<21)) != 0; + have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0; + have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0; + have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0; + have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0; + have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0; + have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0; + have[CV_CPU_AVX_512VBMI2] = (cpuid_data_ex[2] & (1<<6)) != 0; + have[CV_CPU_AVX_512VNNI] = (cpuid_data_ex[2] & (1<<11)) != 0; + have[CV_CPU_AVX_512BITALG] = (cpuid_data_ex[2] & (1<<12)) != 0; + have[CV_CPU_AVX_512VPOPCNTDQ] = (cpuid_data_ex[2] & (1<<14)) != 0; + have[CV_CPU_AVX_5124VNNIW] = (cpuid_data_ex[3] & (1<<2)) != 0; + have[CV_CPU_AVX_5124FMAPS] = (cpuid_data_ex[3] & (1<<3)) != 0; bool have_AVX_OS_support = true; bool have_AVX512_OS_support = true; @@ -446,15 +463,38 @@ struct HWFeatures have[CV_CPU_AVX_512CD] = false; have[CV_CPU_AVX_512DQ] = false; have[CV_CPU_AVX_512ER] = false; - have[CV_CPU_AVX_512IFMA512] = false; + have[CV_CPU_AVX_512IFMA] = false; have[CV_CPU_AVX_512PF] = false; have[CV_CPU_AVX_512VBMI] = false; have[CV_CPU_AVX_512VL] = false; + have[CV_CPU_AVX_512VBMI2] = false; + have[CV_CPU_AVX_512VNNI] = false; + have[CV_CPU_AVX_512BITALG] = false; + have[CV_CPU_AVX_512VPOPCNTDQ] = false; + have[CV_CPU_AVX_5124VNNIW] = false; + have[CV_CPU_AVX_5124FMAPS] = false; } - if (have[CV_CPU_AVX_512F]) + have[CV_CPU_AVX512_COMMON] = have[CV_CPU_AVX_512F] && have[CV_CPU_AVX_512CD]; + if (have[CV_CPU_AVX512_COMMON]) { - have[CV_CPU_AVX512_SKX] = have[CV_CPU_AVX_512F] & have[CV_CPU_AVX_512CD] & have[CV_CPU_AVX_512BW] & have[CV_CPU_AVX_512DQ] & have[CV_CPU_AVX_512VL]; + have[CV_CPU_AVX512_KNL] = have[CV_CPU_AVX_512ER] && have[CV_CPU_AVX_512PF]; + have[CV_CPU_AVX512_KNM] = have[CV_CPU_AVX512_KNL] && have[CV_CPU_AVX_5124FMAPS] && + have[CV_CPU_AVX_5124VNNIW] && have[CV_CPU_AVX_512VPOPCNTDQ]; + have[CV_CPU_AVX512_SKX] = have[CV_CPU_AVX_512BW] && have[CV_CPU_AVX_512DQ] && have[CV_CPU_AVX_512VL]; + have[CV_CPU_AVX512_CNL] = have[CV_CPU_AVX512_SKX] && have[CV_CPU_AVX_512IFMA] && have[CV_CPU_AVX_512VBMI]; + have[CV_CPU_AVX512_CEL] = have[CV_CPU_AVX512_CNL] && have[CV_CPU_AVX_512VNNI]; + have[CV_CPU_AVX512_ICL] = have[CV_CPU_AVX512_CEL] && have[CV_CPU_AVX_512VBMI2] && + have[CV_CPU_AVX_512BITALG] && have[CV_CPU_AVX_512VPOPCNTDQ]; + } + else + { + have[CV_CPU_AVX512_KNL] = false; + have[CV_CPU_AVX512_KNM] = false; + have[CV_CPU_AVX512_SKX] = false; + have[CV_CPU_AVX512_CNL] = false; + have[CV_CPU_AVX512_CEL] = false; + have[CV_CPU_AVX512_ICL] = false; } } #endif // CV_CPUID_X86 @@ -621,11 +661,14 @@ struct HWFeatures } if (isBaseline) { - if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'. This has very limited effect, because code optimizations for this feature are executed unconditionally in the most cases.\n", getHWFeatureNameSafe(i)); + if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'." + "This has very limited effect, because code optimizations for this feature are executed unconditionally " + "in the most cases.\n", getHWFeatureNameSafe(i)); } if (!have[i]) { - if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n", getHWFeatureNameSafe(i)); + if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n", + getHWFeatureNameSafe(i)); } have[i] = false; diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index bc4c782ca7..95f5b57de6 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -126,7 +126,7 @@ public: inpShape.push_back(inputs[0].size[i]); outShape.push_back(outputs[0].size[i]); } - getConvPoolPaddings(inpShape, outShape, kernel_size, strides, padMode, dilations, pads_begin, pads_end); + getConvPoolPaddings(inpShape, kernel_size, strides, padMode, pads_begin, pads_end); if (pads_begin.size() == 2) { for (int i = 0; i < pads_begin.size(); i++) { if (pads_begin[i] != pads_end[i]) @@ -1331,7 +1331,7 @@ public: inpShape.push_back(inputs[0].size[i]); outShape.push_back(outputs[0].size[i]); } - getConvPoolPaddings(outShape, inpShape, kernel_size, strides, padMode, dilations, pads_begin, pads_end); + getConvPoolPaddings(outShape, kernel_size, strides, padMode, pads_begin, pads_end); if (pads_begin.size() == 2) { for (int i = 0; i < pads_begin.size(); i++) { if (pads_begin[i] != pads_end[i]) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index 627f79c784..29d863d2ad 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -214,25 +214,25 @@ void getConvPoolOutParams(const std::vector& inp, const std::vector } } -void getConvPoolPaddings(const std::vector& inp, const std::vector& out, - const std::vector& kernel, const std::vector& strides, - const String &padMode, const std::vector& dilation, +void getConvPoolPaddings(const std::vector& inp, const std::vector& kernel, + const std::vector& strides, const String &padMode, std::vector& pads_begin, std::vector& pads_end) { - if (padMode == "VALID") + if (padMode == "SAME" || padMode == "VALID") { pads_begin.assign(kernel.size(), 0); pads_end.assign(kernel.size(), 0); } - else if (padMode == "SAME") + if (padMode == "SAME") { - CV_Assert_N(kernel.size() == dilation.size(), kernel.size() == strides.size(), - kernel.size() == inp.size(), kernel.size() == out.size()); - pads_begin.resize(kernel.size()); - pads_end.resize(kernel.size()); + CV_Assert_N(kernel.size() == strides.size(), kernel.size() == inp.size()); for (int i = 0; i < pads_begin.size(); i++) { - int pad = ((out[i] - 1) * strides[i] + dilation[i] * (kernel[i] - 1) + 1 - inp[i]) / 2; - pads_begin[i] = pads_end[i] = std::max(0, pad); + // There are test cases with stride > kernel. + if (strides[i] <= kernel[i]) + { + int pad = (kernel[i] - 1 - (inp[i] - 1 + strides[i]) % strides[i]) / 2; + pads_begin[i] = pads_end[i] = pad; + } } } } diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp index fd1e430a54..26c1ce62d5 100644 --- a/modules/dnn/src/layers/layers_common.hpp +++ b/modules/dnn/src/layers/layers_common.hpp @@ -69,9 +69,8 @@ void getConvPoolOutParams(const std::vector& inp, const std::vector const std::vector& stride, const String &padMode, const std::vector& dilation, std::vector& out); - void getConvPoolPaddings(const std::vector& inp, const std::vector& out, - const std::vector& kernel, const std::vector& strides, - const String &padMode, const std::vector& dilation, + void getConvPoolPaddings(const std::vector& inp, const std::vector& kernel, + const std::vector& strides, const String &padMode, std::vector& pads_begin, std::vector& pads_end); } } diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index b087cb0219..7316347f2e 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -144,7 +144,7 @@ public: kernel_size = std::vector(inp.begin(), inp.end()); } - getConvPoolPaddings(inp, out, kernel_size, strides, padMode, std::vector(kernel_size.size(), 1), pads_begin, pads_end); + getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); if (pads_begin.size() == 2) { pad_t = pads_begin[0]; pad_l = pads_begin[1]; diff --git a/modules/dnn/src/op_inf_engine.cpp b/modules/dnn/src/op_inf_engine.cpp index 2657b0c40e..9e072dc91d 100644 --- a/modules/dnn/src/op_inf_engine.cpp +++ b/modules/dnn/src/op_inf_engine.cpp @@ -958,7 +958,16 @@ Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob) // NOTE: Inference Engine sizes are reversed. std::vector dims = blob->dims(); std::vector size(dims.rbegin(), dims.rend()); - return Mat(size, CV_32F, (void*)blob->buffer()); + + int type = -1; + switch (blob->precision()) + { + case InferenceEngine::Precision::FP32: type = CV_32F; break; + case InferenceEngine::Precision::U8: type = CV_8U; break; + default: + CV_Error(Error::StsNotImplemented, "Unsupported blob precision"); + } + return Mat(size, type, (void*)blob->buffer()); } bool InfEngineBackendLayer::getMemoryShapes(const std::vector &inputs, diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp index aaa1c09ee4..8e57d557db 100644 --- a/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp +++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.cpp @@ -770,43 +770,47 @@ void RemoveIdentityOps(tensorflow::GraphDef& net) } } -Mat getTensorContent(const tensorflow::TensorProto &tensor) +Mat getTensorContent(const tensorflow::TensorProto &tensor, bool copy) { const std::string& content = tensor.tensor_content(); + Mat m; switch (tensor.dtype()) { case tensorflow::DT_FLOAT: { if (!content.empty()) - return Mat(1, content.size() / sizeof(float), CV_32FC1, (void*)content.c_str()).clone(); + m = Mat(1, content.size() / sizeof(float), CV_32FC1, (void*)content.c_str()); else { const RepeatedField& field = tensor.float_val(); CV_Assert(!field.empty()); - return Mat(1, field.size(), CV_32FC1, (void*)field.data()).clone(); + m = Mat(1, field.size(), CV_32FC1, (void*)field.data()); } + break; } case tensorflow::DT_DOUBLE: { if (!content.empty()) - return Mat(1, content.size() / sizeof(double), CV_64FC1, (void*)content.c_str()).clone(); + m = Mat(1, content.size() / sizeof(double), CV_64FC1, (void*)content.c_str()); else { const RepeatedField& field = tensor.double_val(); CV_Assert(!field.empty()); - return Mat(1, field.size(), CV_64FC1, (void*)field.data()).clone(); + m = Mat(1, field.size(), CV_64FC1, (void*)field.data()); } + break; } case tensorflow::DT_INT32: { if (!content.empty()) - return Mat(1, content.size() / sizeof(int32_t), CV_32SC1, (void*)content.c_str()).clone(); + m = Mat(1, content.size() / sizeof(int32_t), CV_32SC1, (void*)content.c_str()); else { const RepeatedField& field = tensor.int_val(); CV_Assert(!field.empty()); - return Mat(1, field.size(), CV_32SC1, (void*)field.data()).clone(); + m = Mat(1, field.size(), CV_32SC1, (void*)field.data()); } + break; } case tensorflow::DT_HALF: { @@ -825,20 +829,20 @@ Mat getTensorContent(const tensorflow::TensorProto &tensor) } // Reinterpret as a signed shorts just for a convertFp16 call. Mat halfsSigned(halfs.size(), CV_16SC1, halfs.data); - Mat floats(halfs.size(), CV_32FC1); - convertFp16(halfsSigned, floats); - return floats; + convertFp16(halfsSigned, m); + break; } case tensorflow::DT_QUINT8: { CV_Assert(!content.empty()); - return Mat(1, content.size(), CV_8UC1, (void*)content.c_str()).clone(); + m = Mat(1, content.size(), CV_8UC1, (void*)content.c_str()); + break; } default: CV_Error(Error::StsError, "Tensor's data type is not supported"); break; } - return Mat(); + return copy ? m.clone() : m; } void releaseTensor(tensorflow::TensorProto* tensor) diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp index 986fc3c06e..8a77dda6d4 100644 --- a/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp +++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp @@ -21,7 +21,7 @@ void RemoveIdentityOps(tensorflow::GraphDef& net); void simplifySubgraphs(tensorflow::GraphDef& net); -Mat getTensorContent(const tensorflow::TensorProto &tensor); +Mat getTensorContent(const tensorflow::TensorProto &tensor, bool copy = true); void releaseTensor(tensorflow::TensorProto* tensor); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 298d532dc7..41985c834d 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -109,7 +109,7 @@ void parseTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob) dstBlob.create(shape, CV_32F); - Mat tensorContent = getTensorContent(tensor); + Mat tensorContent = getTensorContent(tensor, /*no copy*/false); int size = tensorContent.total(); CV_Assert(size == (int)dstBlob.total()); @@ -509,7 +509,7 @@ void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &ds dstBlob.create(shape, CV_32F); - Mat tensorContent = getTensorContent(tensor); + Mat tensorContent = getTensorContent(tensor, /*no copy*/false); int size = tensorContent.total(); CV_Assert(size == (int)dstBlob.total()); diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index 7dad46d331..ea2929c20c 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -345,11 +345,12 @@ TEST(Net, forwardAndRetrieve) #ifdef HAVE_INF_ENGINE // This test runs network in synchronous mode for different inputs and then // runs the same model asynchronously for the same inputs. -typedef testing::TestWithParam Async; +typedef testing::TestWithParam > Async; TEST_P(Async, set_and_forward_single) { static const int kTimeout = 5000; // in milliseconds. - const int target = GetParam(); + const int dtype = get<0>(GetParam()); + const int target = get<1>(GetParam()); const std::string suffix = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "_fp16" : ""; const std::string& model = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin"); @@ -367,8 +368,8 @@ TEST_P(Async, set_and_forward_single) int blobSize[] = {2, 6, 75, 113}; for (int i = 0; i < numInputs; ++i) { - inputs[i].create(4, &blobSize[0], CV_32FC1); - randu(inputs[i], 0.0f, 1.0f); + inputs[i].create(4, &blobSize[0], dtype); + randu(inputs[i], 0, 255); } // Run synchronously. @@ -394,7 +395,8 @@ TEST_P(Async, set_and_forward_single) TEST_P(Async, set_and_forward_all) { static const int kTimeout = 5000; // in milliseconds. - const int target = GetParam(); + const int dtype = get<0>(GetParam()); + const int target = get<1>(GetParam()); const std::string suffix = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "_fp16" : ""; const std::string& model = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin"); @@ -413,8 +415,8 @@ TEST_P(Async, set_and_forward_all) int blobSize[] = {2, 6, 75, 113}; for (int i = 0; i < numInputs; ++i) { - inputs[i].create(4, &blobSize[0], CV_32FC1); - randu(inputs[i], 0.0f, 1.0f); + inputs[i].create(4, &blobSize[0], dtype); + randu(inputs[i], 0, 255); } // Run synchronously. @@ -441,7 +443,10 @@ TEST_P(Async, set_and_forward_all) } } -INSTANTIATE_TEST_CASE_P(/**/, Async, testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE))); +INSTANTIATE_TEST_CASE_P(/**/, Async, Combine( + Values(CV_32F, CV_8U), + testing::ValuesIn(getAvailableTargets(DNN_BACKEND_INFERENCE_ENGINE)) +)); #endif // HAVE_INF_ENGINE }} // namespace diff --git a/modules/ml/doc/ml_intro.markdown b/modules/ml/doc/ml_intro.markdown index 3bc9b068f2..fb4f1a7bd7 100644 --- a/modules/ml/doc/ml_intro.markdown +++ b/modules/ml/doc/ml_intro.markdown @@ -25,7 +25,7 @@ components: vector responses. - Another optional component is the mask of missing measurements. Most algorithms require all the components in all the training samples be valid, but some other algorithms, such as decision - tress, can handle the cases of missing measurements. + trees, can handle the cases of missing measurements. - In the case of classification problem user may want to give different weights to different classes. This is useful, for example, when: - user wants to shift prediction accuracy towards lower false-alarm rate or higher hit-rate. diff --git a/samples/dnn/tf_text_graph_ssd.py b/samples/dnn/tf_text_graph_ssd.py index 35207ca3cd..730e0bbc25 100644 --- a/samples/dnn/tf_text_graph_ssd.py +++ b/samples/dnn/tf_text_graph_ssd.py @@ -274,7 +274,8 @@ def createSSDGraph(modelPath, configPath, outputPath): num_matched_layers = 0 for node in graph_def.node: - if re.match('BoxPredictor_\d/BoxEncodingPredictor/Conv2D', node.name) or \ + if re.match('BoxPredictor_\d/BoxEncodingPredictor/convolution', node.name) or \ + re.match('BoxPredictor_\d/BoxEncodingPredictor/Conv2D', node.name) or \ re.match('WeightSharedConvolutionalBoxPredictor(_\d)*/BoxPredictor/Conv2D', node.name): node.addAttr('loc_pred_transposed', True) num_matched_layers += 1