Merge remote-tracking branch 'upstream/3.4' into merge-3.4

2025-06-07 01:13:28 +08:00 · 2021-12-24 16:35:47 +00:00 · 2021-12-24 16:35:47 +00:00 · 217fea9667
commit 217fea9667
parent 9777fbacf6 655d381ef3
26 changed files with 804 additions and 377 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -223,7 +223,7 @@ OCV_OPTION(BUILD_OPENJPEG           "Build OpenJPEG from source"         (WIN32
 OCV_OPTION(BUILD_JASPER             "Build libjasper from source"        (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) )
 OCV_OPTION(BUILD_JPEG               "Build libjpeg from source"          (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) )
 OCV_OPTION(BUILD_PNG                "Build libpng from source"           (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) )
-OCV_OPTION(BUILD_OPENEXR            "Build openexr from source"          (((WIN32 OR ANDROID OR APPLE) AND NOT WINRT) OR OPENCV_FORCE_3RDPARTY_BUILD) )
+OCV_OPTION(BUILD_OPENEXR            "Build openexr from source"          (OPENCV_FORCE_3RDPARTY_BUILD) )
 OCV_OPTION(BUILD_WEBP               "Build WebP from source"             (((WIN32 OR ANDROID OR APPLE) AND NOT WINRT) OR OPENCV_FORCE_3RDPARTY_BUILD) )
 OCV_OPTION(BUILD_TBB                "Download and build TBB from source" (ANDROID OR OPENCV_FORCE_3RDPARTY_BUILD) )
 OCV_OPTION(BUILD_IPP_IW             "Build IPP IW from source"           (NOT MINGW OR OPENCV_FORCE_3RDPARTY_BUILD) IF (X86_64 OR X86) AND NOT WINRT )
@ -311,7 +311,7 @@ OCV_OPTION(WITH_JPEG "Include JPEG support" ON
 OCV_OPTION(WITH_WEBP "Include WebP support" ON
  VISIBLE_IF NOT WINRT
  VERIFY HAVE_WEBP)
-OCV_OPTION(WITH_OPENEXR "Include ILM support via OpenEXR" BUILD_OPENEXR OR NOT CMAKE_CROSSCOMPILING
+OCV_OPTION(WITH_OPENEXR "Include ILM support via OpenEXR" ((WIN32 OR ANDROID OR APPLE) OR BUILD_OPENEXR) OR NOT CMAKE_CROSSCOMPILING
  VISIBLE_IF NOT APPLE_FRAMEWORK AND NOT WINRT
  VERIFY HAVE_OPENEXR)
 OCV_OPTION(WITH_OPENGL "Include OpenGL support" OFF
--- a/cmake/OpenCVFindLibsGrfmt.cmake
+++ b/cmake/OpenCVFindLibsGrfmt.cmake
@ -268,7 +268,9 @@ if(WITH_OPENEXR)
    set(OPENEXR_LIBRARIES IlmImf)
    add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/openexr")
    if(OPENEXR_VERSION)  # check via TARGET doesn't work
+      set(BUILD_OPENEXR ON)
      set(HAVE_OPENEXR YES)
+      set(BUILD_OPENEXR ON)
    endif()
  endif()
 endif()
--- a/cmake/OpenCVFindOpenEXR.cmake
+++ b/cmake/OpenCVFindOpenEXR.cmake
@ -9,12 +9,20 @@
 # OPENEXR_LIBRARIES = libraries that are needed to use OpenEXR.
 #

-find_package(OpenEXR 3.0 CONFIG QUIET)
-if(TARGET OpenEXR::OpenEXR)
-    SET(OPENEXR_FOUND TRUE)
-    SET(OPENEXR_LIBRARIES OpenEXR::OpenEXR)
-    SET(OPENEXR_VERSION ${OpenEXR_VERSION})
-    return()
+if(NOT OPENCV_SKIP_OPENEXR_FIND_PACKAGE)
+  find_package(OpenEXR 3 QUIET)
+  #ocv_cmake_dump_vars(EXR)
+  if(OpenEXR_FOUND)
+    if(TARGET OpenEXR::OpenEXR)  # OpenEXR 3+
+      set(OPENEXR_LIBRARIES OpenEXR::OpenEXR)
+      set(OPENEXR_INCLUDE_PATHS "")
+      set(OPENEXR_VERSION "${OpenEXR_VERSION}")
+      set(OPENEXR_FOUND 1)
+      return()
+    else()
+      message(STATUS "Unsupported find_package(OpenEXR) - missing OpenEXR::OpenEXR target (version ${OpenEXR_VERSION})")
+    endif()
+  endif()
 endif()

 SET(OPENEXR_LIBRARIES "")
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
@ -1294,6 +1294,12 @@
  number={2},
  pages={117-135},
 }
+@inproceedings{Zuliani2014RANSACFD,
+  title={RANSAC for Dummies With examples using the RANSAC toolbox for Matlab \& Octave and more...},
+  author={Marco Zuliani},
+  year={2014},
+  url = {https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.475.1243&rep=rep1&type=pdf}
+}
@inproceedings{forstner1987fast,
  title={A fast operator for detection and precise location of distincs points, corners and center of circular features},
  author={FORSTNER, W},
--- a/doc/tutorials/introduction/windows_install/windows_install.markdown
+++ b/doc/tutorials/introduction/windows_install/windows_install.markdown
@ -367,7 +367,7 @@ libraries). If you do not need the support for some of these, you can just freel
 Set the OpenCV environment variable and add it to the systems path {#tutorial_windows_install_path}
 =================================================================

-First we set an environment variable to make easier our work. This will hold the build directory of
+First, we set an environment variable to make our work easier. This will hold the build directory of
 our OpenCV library that we use in our projects. Start up a command window and enter:
@code
    setx -m OPENCV_DIR D:\OpenCV\Build\x86\vc11     (suggested for Visual Studio 2012 - 32 bit Windows)
--- a/modules/calib3d/doc/calib3d.bib
+++ b/modules/calib3d/doc/calib3d.bib
@ -40,12 +40,13 @@
  publisher={IEEE}
 }

-@inproceedings{Terzakis20,
-  author = {Terzakis, George and Lourakis, Manolis},
-  year = {2020},
-  month = {09},
-  pages = {},
-  title = {A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem}
+@inproceedings{Terzakis2020SQPnP,
+  title={A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem},
+  author={George Terzakis and Manolis Lourakis},
+  booktitle={European Conference on Computer Vision},
+  pages={478--494},
+  year={2020},
+  publisher={Springer International Publishing}
 }

@inproceedings{strobl2011iccv,
--- a/modules/calib3d/doc/solvePnP.markdown
+++ b/modules/calib3d/doc/solvePnP.markdown
@ -0,0 +1,176 @@
+# Perspective-n-Point (PnP) pose computation {#calib3d_solvePnP}
+
+## Pose computation overview
+
+The pose computation problem @cite Marchand16 consists in solving for the rotation and translation that minimizes the reprojection error from 3D-2D point correspondences.
+
+The `solvePnP` and related functions estimate the object pose given a set of object points, their corresponding image projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below (more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward and the Z-axis forward).
+
+![](pnp.jpg)
+
+Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$
+using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$ (also denoted \f$ \bf{K} \f$ in the literature):
+
+\f[
+  \begin{align*}
+  \begin{bmatrix}
+  u \\
+  v \\
+  1
+  \end{bmatrix} &=
+  \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix} \\
+  \begin{bmatrix}
+  u \\
+  v \\
+  1
+  \end{bmatrix} &=
+  \begin{bmatrix}
+  f_x & 0 & c_x \\
+  0 & f_y & c_y \\
+  0 & 0 & 1
+  \end{bmatrix}
+  \begin{bmatrix}
+  1 & 0 & 0 & 0 \\
+  0 & 1 & 0 & 0 \\
+  0 & 0 & 1 & 0
+  \end{bmatrix}
+  \begin{bmatrix}
+  r_{11} & r_{12} & r_{13} & t_x \\
+  r_{21} & r_{22} & r_{23} & t_y \\
+  r_{31} & r_{32} & r_{33} & t_z \\
+  0 & 0 & 0 & 1
+  \end{bmatrix}
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix}
+  \end{align*}
+\f]
+
+The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming
+a 3D point expressed in the world frame into the camera frame:
+
+\f[
+  \begin{align*}
+  \begin{bmatrix}
+  X_c \\
+  Y_c \\
+  Z_c \\
+  1
+  \end{bmatrix} &=
+  \hspace{0.2em} ^{c}\bf{T}_w
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix} \\
+  \begin{bmatrix}
+  X_c \\
+  Y_c \\
+  Z_c \\
+  1
+  \end{bmatrix} &=
+  \begin{bmatrix}
+  r_{11} & r_{12} & r_{13} & t_x \\
+  r_{21} & r_{22} & r_{23} & t_y \\
+  r_{31} & r_{32} & r_{33} & t_z \\
+  0 & 0 & 0 & 1
+  \end{bmatrix}
+  \begin{bmatrix}
+  X_{w} \\
+  Y_{w} \\
+  Z_{w} \\
+  1
+  \end{bmatrix}
+  \end{align*}
+\f]
+
+## Pose computation methods
+@anchor calib3d_solvePnP_flags
+
+Refer to the cv::SolvePnPMethod enum documentation for the list of possible values. Some details about each method are described below:
+
+-   cv::SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
+this case the function finds such a pose that minimizes reprojection error, that is the sum
+of squared distances between the observed projections "imagePoints" and the projected (using
+cv::projectPoints ) "objectPoints". Initial solution for non-planar "objectPoints" needs at least 6 points and uses the DLT algorithm.
+Initial solution for planar "objectPoints" needs at least 4 points and uses pose from homography decomposition.
+-   cv::SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
+"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
+In this case the function requires exactly four object and image points.
+-   cv::SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
+"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
+In this case the function requires exactly four object and image points.
+-   cv::SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the
+paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
+-   cv::SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
+Method is based on the paper of J. Hesch and S. Roumeliotis.
+"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
+-   cv::SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
+Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
+F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
+Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
+assuming that both have the same value. Then the cameraMatrix is updated with the estimated
+focal length.
+-   cv::SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
+"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
+-   cv::SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
+"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
+It requires 4 coplanar object points defined in the following order:
+  - point 0: [-squareLength / 2,  squareLength / 2, 0]
+  - point 1: [ squareLength / 2,  squareLength / 2, 0]
+  - point 2: [ squareLength / 2, -squareLength / 2, 0]
+  - point 3: [-squareLength / 2, -squareLength / 2, 0]
+-   cv::SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the
+Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis2020SQPnP). It requires 3 or more points.
+
+## P3P
+
+The cv::solveP3P() computes an object pose from exactly 3 3D-2D point correspondences. A P3P problem has up to 4 solutions.
+
+@note The solutions are sorted by reprojection errors (lowest to highest).
+
+## PnP
+
+The cv::solvePnP() returns the rotation and the translation vectors that transform a 3D point expressed in the object
+coordinate frame to the camera coordinate frame, using different methods:
+- P3P methods (cv::SOLVEPNP_P3P, cv::SOLVEPNP_AP3P): need 4 input points to return a unique solution.
+- cv::SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar.
+- cv::SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation.
+Number of input points must be 4. Object points must be defined in the following order:
+  - point 0: [-squareLength / 2,  squareLength / 2, 0]
+  - point 1: [ squareLength / 2,  squareLength / 2, 0]
+  - point 2: [ squareLength / 2, -squareLength / 2, 0]
+  - point 3: [-squareLength / 2, -squareLength / 2, 0]
+- for all the other flags, number of input points must be >= 4 and object points can be in any configuration.
+
+## Generic PnP
+
+The cv::solvePnPGeneric() allows retrieving all the possible solutions.
+
+Currently, only cv::SOLVEPNP_P3P, cv::SOLVEPNP_AP3P, cv::SOLVEPNP_IPPE, cv::SOLVEPNP_IPPE_SQUARE, cv::SOLVEPNP_SQPNP can return multiple solutions.
+
+## RANSAC PnP
+
+The cv::solvePnPRansac() computes the object pose wrt. the camera frame using a RANSAC scheme to deal with outliers.
+
+More information can be found in @cite Zuliani2014RANSACFD
+
+## Pose refinement
+
+Pose refinement consists in estimating the rotation and translation that minimizes the reprojection error using a non-linear minimization method and starting from an initial estimate of the solution. OpenCV proposes cv::solvePnPRefineLM() and cv::solvePnPRefineVVS() for this problem.
+
+cv::solvePnPRefineLM() uses a non-linear Levenberg-Marquardt minimization scheme @cite Madsen04 @cite Eade13 and the current implementation computes the rotation update as a perturbation and not on SO(3).
+
+cv::solvePnPRefineVVS() uses a Gauss-Newton non-linear minimization scheme @cite Marchand16 and with an update of the rotation part computed using the exponential map.
+
+@note at least three 3D-2D point correspondences are necessary.
--- a/modules/calib3d/include/opencv2/calib3d.hpp
+++ b/modules/calib3d/include/opencv2/calib3d.hpp
@ -454,7 +454,9 @@ enum { LMEDS  = 4,  //!< least-median of squares algorithm
     };

 enum SolvePnPMethod {
-    SOLVEPNP_ITERATIVE   = 0,
+    SOLVEPNP_ITERATIVE   = 0, //!< Pose refinement using non-linear Levenberg-Marquardt minimization scheme @cite Madsen04 @cite Eade13 \n
+                              //!< Initial solution for non-planar "objectPoints" needs at least 6 points and uses the DLT algorithm. \n
+                              //!< Initial solution for planar "objectPoints" needs at least 4 points and uses pose from homography decomposition.
    SOLVEPNP_EPNP        = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp
    SOLVEPNP_P3P         = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete
    SOLVEPNP_DLS         = 3, //!< **Broken implementation. Using this flag will fallback to EPnP.** \n
@ -471,7 +473,7 @@ enum SolvePnPMethod {
                              //!<   - point 1: [ squareLength / 2,  squareLength / 2, 0]
                              //!<   - point 2: [ squareLength / 2, -squareLength / 2, 0]
                              //!<   - point 3: [-squareLength / 2, -squareLength / 2, 0]
-    SOLVEPNP_SQPNP       = 8, //!< SQPnP: A Consistently Fast and Globally OptimalSolution to the Perspective-n-Point Problem @cite Terzakis20
+    SOLVEPNP_SQPNP       = 8, //!< SQPnP: A Consistently Fast and Globally OptimalSolution to the Perspective-n-Point Problem @cite Terzakis2020SQPnP
 #ifndef CV_DOXYGEN
    SOLVEPNP_MAX_COUNT        //!< Used for count
 #endif
@ -890,6 +892,9 @@ Check @ref tutorial_homography "the corresponding tutorial" for more details
 */

 /** @brief Finds an object pose from 3D-2D point correspondences.
+
+@see @ref calib3d_solvePnP
+
 This function returns the rotation and the translation vectors that transform a 3D point expressed in the object
 coordinate frame to the camera coordinate frame, using different methods:
 - P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution.
@ -916,133 +921,9 @@ the model coordinate system to the camera coordinate system.
@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses
 the provided rvec and tvec values as initial approximations of the rotation and translation
 vectors, respectively, and further optimizes them.
-@param flags Method for solving a PnP problem:
-   @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
-this case the function finds such a pose that minimizes reprojection error, that is the sum
-of squared distances between the observed projections imagePoints and the projected (using
-@ref projectPoints ) objectPoints .
-   @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
-"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
-In this case the function requires exactly four object and image points.
-   @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
-"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
-In this case the function requires exactly four object and image points.
-   @ref SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the
-paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
-   @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
-Method is based on the paper of J. Hesch and S. Roumeliotis.
-"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
-   @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
-Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
-F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
-Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
-assuming that both have the same value. Then the cameraMatrix is updated with the estimated
-focal length.
-   @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
-"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
-   @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
-"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
-It requires 4 coplanar object points defined in the following order:
-  - point 0: [-squareLength / 2,  squareLength / 2, 0]
-  - point 1: [ squareLength / 2,  squareLength / 2, 0]
-  - point 2: [ squareLength / 2, -squareLength / 2, 0]
-  - point 3: [-squareLength / 2, -squareLength / 2, 0]
-   @ref SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the
-Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis20). It requires 3 or more points.
+@param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags

-
-The function estimates the object pose given a set of object points, their corresponding image
-projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below
-(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
-and the Z-axis forward).
-
-![](pnp.jpg)
-
-Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$
-using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$:
-
-\f[
-  \begin{align*}
-  \begin{bmatrix}
-  u \\
-  v \\
-  1
-  \end{bmatrix} &=
-  \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
-  \begin{bmatrix}
-  X_{w} \\
-  Y_{w} \\
-  Z_{w} \\
-  1
-  \end{bmatrix} \\
-  \begin{bmatrix}
-  u \\
-  v \\
-  1
-  \end{bmatrix} &=
-  \begin{bmatrix}
-  f_x & 0 & c_x \\
-  0 & f_y & c_y \\
-  0 & 0 & 1
-  \end{bmatrix}
-  \begin{bmatrix}
-  1 & 0 & 0 & 0 \\
-  0 & 1 & 0 & 0 \\
-  0 & 0 & 1 & 0
-  \end{bmatrix}
-  \begin{bmatrix}
-  r_{11} & r_{12} & r_{13} & t_x \\
-  r_{21} & r_{22} & r_{23} & t_y \\
-  r_{31} & r_{32} & r_{33} & t_z \\
-  0 & 0 & 0 & 1
-  \end{bmatrix}
-  \begin{bmatrix}
-  X_{w} \\
-  Y_{w} \\
-  Z_{w} \\
-  1
-  \end{bmatrix}
-  \end{align*}
-\f]
-
-The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming
-a 3D point expressed in the world frame into the camera frame:
-
-\f[
-  \begin{align*}
-  \begin{bmatrix}
-  X_c \\
-  Y_c \\
-  Z_c \\
-  1
-  \end{bmatrix} &=
-  \hspace{0.2em} ^{c}\bf{T}_w
-  \begin{bmatrix}
-  X_{w} \\
-  Y_{w} \\
-  Z_{w} \\
-  1
-  \end{bmatrix} \\
-  \begin{bmatrix}
-  X_c \\
-  Y_c \\
-  Z_c \\
-  1
-  \end{bmatrix} &=
-  \begin{bmatrix}
-  r_{11} & r_{12} & r_{13} & t_x \\
-  r_{21} & r_{22} & r_{23} & t_y \\
-  r_{31} & r_{32} & r_{33} & t_z \\
-  0 & 0 & 0 & 1
-  \end{bmatrix}
-  \begin{bmatrix}
-  X_{w} \\
-  Y_{w} \\
-  Z_{w} \\
-  1
-  \end{bmatrix}
-  \end{align*}
-\f]
+More information about Perspective-n-Points is described in @ref calib3d_solvePnP

@note
   -   An example of how to use solvePnP for planar augmented reality can be found at
@ -1082,6 +963,8 @@ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,

 /** @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme.

+@see @ref calib3d_solvePnP
+
@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
 1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
@ -1140,6 +1023,8 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint

 /** @brief Finds an object pose from 3 3D-2D point correspondences.

+@see @ref calib3d_solvePnP
+
@param objectPoints Array of object points in the object coordinate space, 3x3 1-channel or
 1x3/3x1 3-channel. vector\<Point3f\> can be also passed here.
@param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel.
@ -1171,6 +1056,8 @@ CV_EXPORTS_W int solveP3P( InputArray objectPoints, InputArray imagePoints,
 /** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame
 to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution.

+@see @ref calib3d_solvePnP
+
@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel,
 where N is the number of points. vector\<Point3d\> can also be passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
@ -1198,6 +1085,8 @@ CV_EXPORTS_W void solvePnPRefineLM( InputArray objectPoints, InputArray imagePoi
 /** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame
 to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution.

+@see @ref calib3d_solvePnP
+
@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel,
 where N is the number of points. vector\<Point3d\> can also be passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
@ -1226,6 +1115,9 @@ CV_EXPORTS_W void solvePnPRefineVVS( InputArray objectPoints, InputArray imagePo
                                     double VVSlambda = 1);

 /** @brief Finds an object pose from 3D-2D point correspondences.
+
+@see @ref calib3d_solvePnP
+
 This function returns a list of all the possible solutions (a solution is a <rotation vector, translation vector>
 couple), depending on the number of input points and the chosen method:
 - P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): 3 or 4 input points. Number of returned solutions can be between 0 and 4 with 3 input points.
@ -1253,37 +1145,7 @@ the model coordinate system to the camera coordinate system.
@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses
 the provided rvec and tvec values as initial approximations of the rotation and translation
 vectors, respectively, and further optimizes them.
-@param flags Method for solving a PnP problem:
-   @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
-this case the function finds such a pose that minimizes reprojection error, that is the sum
-of squared distances between the observed projections imagePoints and the projected (using
- #projectPoints ) objectPoints .
-   @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
-"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
-In this case the function requires exactly four object and image points.
-   @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
-"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
-In this case the function requires exactly four object and image points.
-   @ref SOLVEPNP_EPNP Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the
-paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
-   @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
-Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis.
-"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
-   @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
-Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto,
-F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
-Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
-assuming that both have the same value. Then the cameraMatrix is updated with the estimated
-focal length.
-   @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
-"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
-   @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
-"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
-It requires 4 coplanar object points defined in the following order:
-  - point 0: [-squareLength / 2,  squareLength / 2, 0]
-  - point 1: [ squareLength / 2,  squareLength / 2, 0]
-  - point 2: [ squareLength / 2, -squareLength / 2, 0]
-  - point 3: [-squareLength / 2, -squareLength / 2, 0]
+@param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags
@param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE
 and useExtrinsicGuess is set to true.
@param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE
@ -1292,98 +1154,7 @@ and useExtrinsicGuess is set to true.
 (\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points
 and the 3D object points projected with the estimated pose.

-The function estimates the object pose given a set of object points, their corresponding image
-projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below
-(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
-and the Z-axis forward).
-
-![](pnp.jpg)
-
-Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$
-using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$:
-
-\f[
-  \begin{align*}
-  \begin{bmatrix}
-  u \\
-  v \\
-  1
-  \end{bmatrix} &=
-  \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
-  \begin{bmatrix}
-  X_{w} \\
-  Y_{w} \\
-  Z_{w} \\
-  1
-  \end{bmatrix} \\
-  \begin{bmatrix}
-  u \\
-  v \\
-  1
-  \end{bmatrix} &=
-  \begin{bmatrix}
-  f_x & 0 & c_x \\
-  0 & f_y & c_y \\
-  0 & 0 & 1
-  \end{bmatrix}
-  \begin{bmatrix}
-  1 & 0 & 0 & 0 \\
-  0 & 1 & 0 & 0 \\
-  0 & 0 & 1 & 0
-  \end{bmatrix}
-  \begin{bmatrix}
-  r_{11} & r_{12} & r_{13} & t_x \\
-  r_{21} & r_{22} & r_{23} & t_y \\
-  r_{31} & r_{32} & r_{33} & t_z \\
-  0 & 0 & 0 & 1
-  \end{bmatrix}
-  \begin{bmatrix}
-  X_{w} \\
-  Y_{w} \\
-  Z_{w} \\
-  1
-  \end{bmatrix}
-  \end{align*}
-\f]
-
-The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming
-a 3D point expressed in the world frame into the camera frame:
-
-\f[
-  \begin{align*}
-  \begin{bmatrix}
-  X_c \\
-  Y_c \\
-  Z_c \\
-  1
-  \end{bmatrix} &=
-  \hspace{0.2em} ^{c}\bf{T}_w
-  \begin{bmatrix}
-  X_{w} \\
-  Y_{w} \\
-  Z_{w} \\
-  1
-  \end{bmatrix} \\
-  \begin{bmatrix}
-  X_c \\
-  Y_c \\
-  Z_c \\
-  1
-  \end{bmatrix} &=
-  \begin{bmatrix}
-  r_{11} & r_{12} & r_{13} & t_x \\
-  r_{21} & r_{22} & r_{23} & t_y \\
-  r_{31} & r_{32} & r_{33} & t_z \\
-  0 & 0 & 0 & 1
-  \end{bmatrix}
-  \begin{bmatrix}
-  X_{w} \\
-  Y_{w} \\
-  Z_{w} \\
-  1
-  \end{bmatrix}
-  \end{align*}
-\f]
+More information is described in @ref calib3d_solvePnP

@note
   -   An example of how to use solvePnP for planar augmented reality can be found at
--- a/modules/core/include/opencv2/core/bindings_utils.hpp
+++ b/modules/core/include/opencv2/core/bindings_utils.hpp
@ -103,6 +103,21 @@ String dumpRotatedRect(const RotatedRect& argument)
                  argument.size.height, argument.angle);
 }

+CV_WRAP static inline
+RotatedRect testRotatedRect(float x, float y, float w, float h, float angle)
+{
+    return RotatedRect(Point2f(x, y), Size2f(w, h), angle);
+}
+
+CV_WRAP static inline
+std::vector<RotatedRect> testRotatedRectVector(float x, float y, float w, float h, float angle)
+{
+    std::vector<RotatedRect> result;
+    for (int i = 0; i < 10; i++)
+        result.push_back(RotatedRect(Point2f(x + i, y + 2 * i), Size2f(w, h), angle + 10 * i));
+    return result;
+}
+
 CV_WRAP static inline
 String dumpRange(const Range& argument)
 {
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -288,8 +288,6 @@ std::vector<Target> getAvailableTargets(Backend be)

 namespace
 {
-    typedef std::vector<MatShape> ShapesVec;
-
    struct LayerShapes
    {
        ShapesVec in, out, internal;
@ -1490,6 +1488,11 @@ struct Net::Impl : public detail::NetImplBase

            clear();

+            if (hasDynamicShapes)
+            {
+                updateLayersShapes();
+            }
+
            this->blobsToKeep = blobsToKeep_;

            allocateLayers(blobsToKeep_);
@ -3794,20 +3797,24 @@ struct Net::Impl : public detail::NetImplBase

    void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
    {
-        std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
+        CV_CheckGE(id, 0, "");
+        CV_CheckLT(id, (int)layers.size(), "");
+        LayerData& layerData = layers[id];
+        std::vector<LayerPin>& inputLayerIds = layerData.inputBlobsId;
+        LayerShapes& layerShapes = inOutShapes[id];

-        if (id == 0 && inOutShapes[id].in[0].empty())
+        if (id == 0 && layerShapes.in[0].empty())
        {
-            if (!layers[0].outputBlobs.empty())
+            if (!layerData.outputBlobs.empty())
            {
                ShapesVec shapes;
-                for (int i = 0; i < layers[0].outputBlobs.size(); i++)
+                for (int i = 0; i < layerData.outputBlobs.size(); i++)
                {
-                    Mat& inp = layers[0].outputBlobs[i];
-                    CV_Assert(inp.total());
+                    Mat& inp = layerData.outputBlobs[i];
+                    CV_Assert(!inp.empty());
                    shapes.push_back(shape(inp));
                }
-                inOutShapes[0].in = shapes;
+                layerShapes.in = shapes;
            }
            else
            {
@ -3823,17 +3830,17 @@ struct Net::Impl : public detail::NetImplBase
                }
                if (none)
                {
-                    inOutShapes[0].out.clear();
+                    layerShapes.out.clear();
                    return;
                }
                else
                {
-                    inOutShapes[0].in = inputShapes;
+                    layerShapes.in = inputShapes;
                }
            }
        }

-        if (inOutShapes[id].in.empty())
+        if (layerShapes.in.empty())
        {
            for(int i = 0; i < inputLayerIds.size(); i++)
            {
@ -3846,14 +3853,14 @@ struct Net::Impl : public detail::NetImplBase
                    getLayerShapesRecursively(layerId, inOutShapes);
                }
                const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
-                inOutShapes[id].in.push_back(shape);
+                layerShapes.in.push_back(shape);
            }
        }
-        const ShapesVec& is = inOutShapes[id].in;
-        ShapesVec& os = inOutShapes[id].out;
-        ShapesVec& ints = inOutShapes[id].internal;
-        int requiredOutputs = layers[id].requiredOutputs.size();
-        Ptr<Layer> l = layers[id].getLayerInstance();
+        const ShapesVec& is = layerShapes.in;
+        ShapesVec& os = layerShapes.out;
+        ShapesVec& ints = layerShapes.internal;
+        int requiredOutputs = layerData.requiredOutputs.size();
+        Ptr<Layer> l = layerData.getLayerInstance();
        CV_Assert(l);
        bool layerSupportInPlace = false;
        try
@ -3881,13 +3888,38 @@ struct Net::Impl : public detail::NetImplBase
            CV_LOG_ERROR(NULL, "Exception message: " << e.what());
            throw;
        }
-        inOutShapes[id].supportInPlace = layerSupportInPlace;
+        layerShapes.supportInPlace = layerSupportInPlace;

-        for (int i = 0; i < ints.size(); i++)
-            CV_Assert(total(ints[i]) > 0);
+        try
+        {
+            for (int i = 0; i < ints.size(); i++)
+                CV_CheckGT(total(ints[i]), 0, "");

-        for (int i = 0; i < os.size(); i++)
-            CV_Assert(total(os[i]) > 0);
+            for (int i = 0; i < os.size(); i++)
+                CV_CheckGT(total(os[i]), 0, "");
+        }
+        catch (const cv::Exception& e)
+        {
+            CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() post validation failed." <<
+                    " inputs=" << is.size() <<
+                    " outputs=" << os.size() << "/" << requiredOutputs <<
+                    " blobs=" << l->blobs.size() <<
+                    " inplace=" << layerSupportInPlace);
+            for (size_t i = 0; i < is.size(); ++i)
+            {
+                CV_LOG_ERROR(NULL, "    input[" << i << "] = " << toString(is[i]));
+            }
+            for (size_t i = 0; i < os.size(); ++i)
+            {
+                CV_LOG_ERROR(NULL, "    output[" << i << "] = " << toString(os[i]));
+            }
+            for (size_t i = 0; i < l->blobs.size(); ++i)
+            {
+                CV_LOG_ERROR(NULL, "    blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i])));
+            }
+            CV_LOG_ERROR(NULL, "Exception message: " << e.what());
+            throw;
+        }
    }

    void getLayersShapes(const ShapesVec& netInputShapes,
@ -3915,43 +3947,58 @@ struct Net::Impl : public detail::NetImplBase

    void updateLayersShapes()
    {
-        CV_Assert(!layers[0].outputBlobs.empty());
+        CV_LOG_DEBUG(NULL, "updateLayersShapes() with layers.size=" << layers.size());
+        CV_Assert(netInputLayer);
+        DataLayer& inputLayer = *netInputLayer;
+        LayerData& inputLayerData = layers[0];
+        CV_Assert(inputLayerData.layerInstance.get() == &inputLayer);
+        CV_Assert(!inputLayerData.outputBlobs.empty());
        ShapesVec inputShapes;
-        for(int i = 0; i < layers[0].outputBlobs.size(); i++)
+        for(int i = 0; i < inputLayerData.outputBlobs.size(); i++)
        {
-            Mat& inp = layers[0].outputBlobs[i];
-            CV_Assert(inp.total());
-            if (preferableBackend == DNN_BACKEND_OPENCV &&
+            Mat& inp = inputLayerData.outputBlobs[i];
+            CV_Assert(!inp.empty());
+            if (preferableBackend == DNN_BACKEND_OPENCV &&  // FIXIT: wrong place for output allocation
                preferableTarget == DNN_TARGET_OPENCL_FP16 &&
-                layers[0].dtype == CV_32F)
+                inputLayerData.dtype == CV_32F)
            {
-                layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
+                inp.create(inp.dims, inp.size, CV_16S);
            }
            inputShapes.push_back(shape(inp));
        }
+        CV_LOG_DEBUG(NULL, toString(inputShapes, "Network input shapes"));
        LayersShapesMap layersShapes;
        layersShapes[0].in = inputShapes;
        for (MapIdToLayerData::iterator it = layers.begin();
             it != layers.end(); it++)
        {
            int layerId = it->first;
-            std::vector<LayerPin>& inputLayerIds = it->second.inputBlobsId;
-            if (layersShapes[layerId].in.empty())
+            LayerData& layerData = it->second;
+            std::vector<LayerPin>& inputLayerIds = layerData.inputBlobsId;
+            LayerShapes& layerShapes = layersShapes[layerId];
+            CV_LOG_DEBUG(NULL, "layer " << layerId << ": [" << layerData.type << "]:(" << layerData.name << ") with inputs.size=" << inputLayerIds.size());
+            if (layerShapes.in.empty())
            {
                for(int i = 0; i < inputLayerIds.size(); i++)
                {
-                    int inputLayerId = inputLayerIds[i].lid;
+                    const LayerPin& inputPin = inputLayerIds[i];
+                    int inputLayerId = inputPin.lid;
+                    CV_LOG_DEBUG(NULL, "    input[" << i << "] " << inputLayerId << ":" << inputPin.oid << " as [" << layers[inputLayerId].type << "]:(" << layers[inputLayerId].name << ")");
                    LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId);
-                    if(inputIt == layersShapes.end() || inputIt->second.out.empty())
+                    if (inputIt == layersShapes.end() || inputIt->second.out.empty())
                    {
                        getLayerShapesRecursively(inputLayerId, layersShapes);
                    }
-                    const MatShape& shape = layersShapes[inputLayerId].out[inputLayerIds[i].oid];
-                    layersShapes[layerId].in.push_back(shape);
+                    const MatShape& shape = layersShapes[inputLayerId].out[inputPin.oid];
+                    layerShapes.in.push_back(shape);
                }
-                it->second.getLayerInstance()->updateMemoryShapes(layersShapes[layerId].in);
+                layerData.getLayerInstance()->updateMemoryShapes(layerShapes.in);
            }
+            CV_LOG_DEBUG(NULL, "Layer " << layerId << ": " << toString(layerShapes.in, "input shapes"));
+            CV_LOG_IF_DEBUG(NULL, !layerShapes.out.empty(), "Layer " << layerId << ": " << toString(layerShapes.out, "output shapes"));
+            CV_LOG_IF_DEBUG(NULL, !layerShapes.internal.empty(), "Layer " << layerId << ": " << toString(layerShapes.internal, "internal shapes"));
        }
+        CV_LOG_DEBUG(NULL, "updateLayersShapes() - DONE");
    }

    LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
@ -5000,13 +5047,8 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
    bool oldShape = prevShape == blobShape;

    blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
-    if (!oldShape) {
+    if (!oldShape)
        ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid];
-        if (impl->hasDynamicShapes)
-        {
-            impl->updateLayersShapes();
-        }
-    }

    if (!ld.outputBlobsWrappers[pin.oid].empty())
    {
--- a/modules/dnn/src/dnn_common.hpp
+++ b/modules/dnn/src/dnn_common.hpp
@ -81,6 +81,44 @@ struct NetImplBase

 }  // namespace detail

+
+typedef std::vector<MatShape> ShapesVec;
+
+static inline std::string toString(const ShapesVec& shapes, const std::string& name = std::string())
+{
+    std::ostringstream ss;
+    if (!name.empty())
+        ss << name << ' ';
+    ss << '[';
+    for(size_t i = 0, n = shapes.size(); i < n; ++i)
+        ss << ' ' << toString(shapes[i]);
+    ss << " ]";
+    return ss.str();
+}
+
+static inline std::string toString(const Mat& blob, const std::string& name = std::string())
+{
+    std::ostringstream ss;
+    if (!name.empty())
+        ss << name << ' ';
+    if (blob.empty())
+    {
+        ss << "<empty>";
+    }
+    else if (blob.dims == 1)
+    {
+        Mat blob_ = blob;
+        blob_.dims = 2;  // hack
+        ss << blob_.t();
+    }
+    else
+    {
+        ss << blob.reshape(1, 1);
+    }
+    return ss.str();
+}
+
+
 CV__DNN_INLINE_NS_END
 }}  // namespace

--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@ -1366,9 +1366,16 @@ public:
            }
            else if (padMode.empty())
            {
-                int addedDims = isPool1D? inpShape.size() : local_kernel.size();
-                for (int i = 0; i < addedDims; i++) {
+                size_t addedDims = isPool1D? inpShape.size() : local_kernel.size();
+                CV_CheckLE(addedDims, inpShape.size(), "");
+                CV_CheckLE(addedDims, pads_begin.size(), "");
+                CV_CheckLE(addedDims, pads_end.size(), "");
+                CV_CheckLE(addedDims, local_kernel.size(), "");
+                CV_CheckLE(addedDims, strides.size(), "");
+                for (int i = 0; i < addedDims; i++)
+                {
                    float dst = (float) (inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i];
+                    CV_CheckGE(dst, 0.0f, "");
                    outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
                }

--- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
@ -741,7 +741,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net)
    simplifySubgraphs(Ptr<ImportGraphWrapper>(new ONNXGraphWrapper(net)), subgraphs);
 }

-Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
+Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto)
 {
    if (tensor_proto.raw_data().empty() && tensor_proto.float_data().empty() &&
        tensor_proto.double_data().empty() && tensor_proto.int64_data().empty() &&
--- a/modules/dnn/src/onnx/onnx_graph_simplifier.hpp
+++ b/modules/dnn/src/onnx/onnx_graph_simplifier.hpp
@ -8,8 +8,6 @@
 #ifndef __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__
 #define __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__

-#include "../precomp.hpp"
-
 #if defined(__GNUC__) && __GNUC__ >= 5
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wsuggest-override"
@ -33,7 +31,7 @@ void convertInt64ToInt32(const T1& src, T2& dst, int size)
    }
 }

-Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto);
+Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto);

 CV__DNN_INLINE_NS_END
 }}  // namespace dnn, namespace cv
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -12,7 +12,7 @@

 #include <opencv2/core/utils/logger.defines.hpp>
 #undef CV_LOG_STRIP_LEVEL
-#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
+#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1
 #include <opencv2/core/utils/logger.hpp>

 #ifdef HAVE_PROTOBUF
@ -158,6 +158,9 @@ private:
    void parseQConcat              (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);

    void parseCustomLayer          (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
+
+    int onnx_opset;  // OperatorSetIdProto for 'onnx' domain
+    void parseOperatorSet();
 };

 class ONNXLayerHandler : public detail::LayerHandler
@ -189,8 +192,9 @@ void ONNXLayerHandler::fillRegistry(const opencv_onnx::GraphProto &net)
 }

 ONNXImporter::ONNXImporter(Net& net, const char *onnxFile)
-    : layerHandler(DNN_DIAGNOSTICS_RUN ?  new ONNXLayerHandler(this) : nullptr),
-        dstNet(net), dispatch(buildDispatchMap())
+    : layerHandler(DNN_DIAGNOSTICS_RUN ?  new ONNXLayerHandler(this) : nullptr)
+    , dstNet(net), dispatch(buildDispatchMap())
+    , onnx_opset(0)
 {
    hasDynamicShapes = false;
    CV_Assert(onnxFile);
@ -211,7 +215,9 @@ ONNXImporter::ONNXImporter(Net& net, const char *onnxFile)
 }

 ONNXImporter::ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
-    : layerHandler(DNN_DIAGNOSTICS_RUN ?  new ONNXLayerHandler(this) : nullptr), dstNet(net), dispatch(buildDispatchMap())
+    : layerHandler(DNN_DIAGNOSTICS_RUN ?  new ONNXLayerHandler(this) : nullptr)
+    , dstNet(net), dispatch(buildDispatchMap())
+    , onnx_opset(0)
 {
    hasDynamicShapes = false;
    CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");
@ -242,6 +248,53 @@ inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, co
    }
 }

+static
+void dumpValueInfoProto(int i, const opencv_onnx::ValueInfoProto& valueInfoProto, const std::string& prefix)
+{
+    CV_Assert(valueInfoProto.has_name());
+    CV_Assert(valueInfoProto.has_type());
+    const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
+    CV_Assert(typeProto.has_tensor_type());
+    const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
+    CV_Assert(tensor.has_shape());
+    const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();
+
+    int dim_size = tensorShape.dim_size();
+    CV_CheckGE(dim_size, 0, "");
+    MatShape shape(dim_size);
+    for (int j = 0; j < dim_size; ++j)
+    {
+        const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
+        if (dimension.has_dim_param())
+        {
+            CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)");
+        }
+        // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
+        if (dimension.has_denotation())
+        {
+            CV_LOG_INFO(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'");
+        }
+        shape[j] = dimension.dim_value();
+    }
+    CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << " as '" << valueInfoProto.name() << "'] shape=" << toString(shape));
+}
+
+static
+void dumpTensorProto(int i, const opencv_onnx::TensorProto& tensorProto, const std::string& prefix)
+{
+    if (utils::logging::getLogLevel() < utils::logging::LOG_LEVEL_VERBOSE)
+        return;
+    int dim_size = tensorProto.dims_size();
+    CV_CheckGE(dim_size, 0, "");
+    MatShape shape(dim_size);
+    for (int j = 0; j < dim_size; ++j)
+    {
+        int sz = static_cast<int>(tensorProto.dims(j));
+        shape[j] = sz;
+    }
+    CV_LOG_VERBOSE(NULL, 0, "DNN/ONNX: " << prefix << "[" << i << " as '" << tensorProto.name() << "'] shape=" << toString(shape) << " data_type=" << (int)tensorProto.data_type());
+}
+
 void releaseONNXTensor(opencv_onnx::TensorProto& tensor_proto)
 {
    if (!tensor_proto.raw_data().empty()) {
@ -282,21 +335,21 @@ void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
 std::map<std::string, Mat> ONNXImporter::getGraphTensors(
                                        const opencv_onnx::GraphProto& graph_proto)
 {
-  opencv_onnx::TensorProto tensor_proto;
-  std::map<std::string, Mat> layers_weights;
+    std::map<std::string, Mat> layers_weights;

-  for (int i = 0; i < graph_proto.initializer_size(); i++)
-  {
-    tensor_proto = graph_proto.initializer(i);
-    Mat mat = getMatFromTensor(tensor_proto);
-    releaseONNXTensor(tensor_proto);
+    for (int i = 0; i < graph_proto.initializer_size(); i++)
+    {
+        const opencv_onnx::TensorProto& tensor_proto = graph_proto.initializer(i);
+        dumpTensorProto(i, tensor_proto, "initializer");
+        Mat mat = getMatFromTensor(tensor_proto);
+        releaseONNXTensor(const_cast<opencv_onnx::TensorProto&>(tensor_proto));  // drop already loaded data

-    if (DNN_DIAGNOSTICS_RUN && mat.empty())
-        continue;
+        if (DNN_DIAGNOSTICS_RUN && mat.empty())
+            continue;

-    layers_weights.insert(std::make_pair(tensor_proto.name(), mat));
-  }
-  return layers_weights;
+        layers_weights.insert(std::make_pair(tensor_proto.name(), mat));
+    }
+    return layers_weights;
 }

 static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) {
@ -562,10 +615,45 @@ void ONNXImporter::addNegation(const LayerParams& layerParams, opencv_onnx::Node

 void ONNXImporter::addConstant(const std::string& name, const Mat& blob)
 {
+    CV_LOG_DEBUG(NULL, "DNN/ONNX: add constant '" << name << "' shape=" << toString(shape(blob)) << ": " << toString(blob));
    constBlobs.insert(std::make_pair(name, blob));
    outShapes.insert(std::make_pair(name, shape(blob)));
 }

+void ONNXImporter::parseOperatorSet()
+{
+    int ir_version = model_proto.has_ir_version() ? static_cast<int>(model_proto.ir_version()) : -1;
+    if (ir_version < 3)
+        return;
+
+    int opset_size = model_proto.opset_import_size();
+    if (opset_size <= 0)
+    {
+        CV_LOG_INFO(NULL, "DNN/ONNX: missing opset information")
+        return;
+    }
+
+    for (int i = 0; i < opset_size; ++i)
+    {
+        const ::opencv_onnx::OperatorSetIdProto& opset_entry = model_proto.opset_import(i);
+        const std::string& domain = opset_entry.has_domain() ? opset_entry.domain() : std::string();
+        int version = opset_entry.has_version() ? opset_entry.version() : -1;
+        if (domain.empty() || domain == "ai.onnx")
+        {
+            // ONNX opset covered by specification: https://github.com/onnx/onnx/blob/master/docs/Operators.md
+            onnx_opset = std::max(onnx_opset, version);
+        }
+        else
+        {
+            // OpenCV don't know other opsets
+            // will fail later on unsupported node processing
+            CV_LOG_WARNING(NULL, "DNN/ONNX: unsupported opset[" << i << "]: domain='" << domain << "' version=" << version);
+        }
+    }
+
+    CV_LOG_INFO(NULL, "DNN/ONNX: ONNX opset version = " << onnx_opset);
+}
+
 void ONNXImporter::handleQuantizedNode(LayerParams& layerParams,
                                       const opencv_onnx::NodeProto& node_proto)
 {
@ -627,56 +715,79 @@ void ONNXImporter::populateNet()
            << " model produced by '" << framework_name << "'"
            << (framework_version.empty() ? cv::String() : cv::format(":%s", framework_version.c_str()))
            << ". Number of nodes = " << graph_proto.node_size()
+            << ", initializers = " << graph_proto.initializer_size()
            << ", inputs = " << graph_proto.input_size()
            << ", outputs = " << graph_proto.output_size()
            );

+    parseOperatorSet();
+
    simplifySubgraphs(graph_proto);

    const int layersSize = graph_proto.node_size();
    CV_LOG_DEBUG(NULL, "DNN/ONNX: graph simplified to " << layersSize << " nodes");

-    constBlobs = getGraphTensors(graph_proto);
+    constBlobs = getGraphTensors(graph_proto);  // scan GraphProto.initializer
+    std::vector<String> netInputs;  // map with network inputs (without const blobs)
    // Add all the inputs shapes. It includes as constant blobs as network's inputs shapes.
    for (int i = 0; i < graph_proto.input_size(); ++i)
    {
        const opencv_onnx::ValueInfoProto& valueInfoProto = graph_proto.input(i);
        CV_Assert(valueInfoProto.has_name());
+        const std::string& name = valueInfoProto.name();
        CV_Assert(valueInfoProto.has_type());
-        opencv_onnx::TypeProto typeProto = valueInfoProto.type();
+        const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
        CV_Assert(typeProto.has_tensor_type());
-        opencv_onnx::TypeProto::Tensor tensor = typeProto.tensor_type();
+        const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
        CV_Assert(tensor.has_shape());
-        opencv_onnx::TensorShapeProto tensorShape = tensor.shape();
+        const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();

-        MatShape inpShape(tensorShape.dim_size());
-        for (int j = 0; j < inpShape.size(); ++j)
+        int dim_size = tensorShape.dim_size();
+        CV_CheckGE(dim_size, 0, "");  // some inputs are scalars (dims=0), e.g. in Test_ONNX_nets.Resnet34_kinetics test
+        MatShape inpShape(dim_size);
+        for (int j = 0; j < dim_size; ++j)
        {
-            inpShape[j] = tensorShape.dim(j).dim_value();
+            const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
+            if (dimension.has_dim_param())
+            {
+                CV_LOG_DEBUG(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)");
+            }
+            // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
+            if (dimension.has_denotation())
+            {
+                CV_LOG_INFO(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'");
+            }
+            inpShape[j] = dimension.dim_value();
            // NHW, NCHW(NHWC), NCDHW(NDHWC); do not set this flag if only N is dynamic
-            if (!tensorShape.dim(j).dim_param().empty() && !(j == 0 && inpShape.size() >= 3))
+            if (dimension.has_dim_param() && !(j == 0 && inpShape.size() >= 3))
+            {
                hasDynamicShapes = true;
+            }
        }
-        if (!inpShape.empty() && !hasDynamicShapes)
+        bool isInitialized = ((constBlobs.find(name) != constBlobs.end()));
+        CV_LOG_IF_DEBUG(NULL, !isInitialized, "DNN/ONNX: input[" << i << " as '" << name << "'] shape=" << toString(inpShape));
+        CV_LOG_IF_VERBOSE(NULL, 0, isInitialized, "DNN/ONNX: pre-initialized input[" << i << " as '" << name << "'] shape=" << toString(inpShape));
+        if (dim_size > 0 && !hasDynamicShapes)  // FIXIT result is not reliable for models with multiple inputs
        {
            inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size
        }
        outShapes[valueInfoProto.name()] = inpShape;
-    }
-
-    // create map with network inputs (without const blobs)
-    // fill map: push layer name, layer id and output id
-    std::vector<String> netInputs;
-    for (int j = 0; j < graph_proto.input_size(); j++)
-    {
-        const std::string& name = graph_proto.input(j).name();
-        if (constBlobs.find(name) == constBlobs.end()) {
+        // fill map: push layer name, layer id and output id
+        if (!isInitialized)
+        {
            netInputs.push_back(name);
            layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1)));
        }
    }
+
    dstNet.setInputsNames(netInputs);

+    // dump outputs
+    for (int i = 0; i < graph_proto.output_size(); ++i)
+    {
+        dumpValueInfoProto(i, graph_proto.output(i), "output");
+    }
+
    if (DNN_DIAGNOSTICS_RUN) {
        CV_LOG_INFO(NULL, "DNN/ONNX: start diagnostic run!");
        layerHandler->fillRegistry(graph_proto);
@ -696,6 +807,17 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto)
    CV_Assert(node_proto.output_size() >= 1);
    std::string name = node_proto.output(0);
    const std::string& layer_type = node_proto.op_type();
+    const std::string& layer_type_domain = node_proto.has_domain() ? node_proto.domain() : std::string();
+    if (!layer_type_domain.empty() && layer_type_domain != "ai.onnx")
+    {
+        CV_LOG_WARNING(NULL, "DNN/ONNX: can't handle node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
+                << cv::format("[%s@%s]:(%s)", layer_type.c_str(), layer_type_domain.c_str(), name.c_str())
+        );
+        if (DNN_DIAGNOSTICS_RUN)
+            return;  // ignore error
+        CV_Error(Error::StsNotImplemented, cv::format("ONNX: unsupported domain: %s", layer_type_domain.c_str()));
+    }
+
    CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
            << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
    );
@ -2341,11 +2463,24 @@ void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeP
    CV_Assert(shapeIt != outShapes.end());
    const MatShape& inpShape = shapeIt->second;

-    Mat shapeMat(inpShape.size(), 1, CV_32S);
-    for (int j = 0; j < inpShape.size(); ++j)
-        shapeMat.at<int>(j) = inpShape[j];
-    shapeMat.dims = 1;
+    int dims = static_cast<int>(inpShape.size());
+    Mat shapeMat(dims, 1, CV_32S);
+    bool isDynamicShape = false;
+    for (int j = 0; j < dims; ++j)
+    {
+        int sz = inpShape[j];
+        isDynamicShape |= (sz == 0);
+        shapeMat.at<int>(j) = sz;
+    }
+    shapeMat.dims = 1;  // FIXIT Mat 1D

+    if (isDynamicShape)
+    {
+        CV_LOG_ERROR(NULL, "DNN/ONNX(Shape): dynamic 'zero' shapes are not supported, input " << toString(inpShape, node_proto.input(0)));
+        // FIXIT repair assertion
+        // Disabled to pass face detector tests from #20422
+        // CV_Assert(!isDynamicShape);  // not supported
+    }
    addConstant(layerParams.name, shapeMat);
 }

@ -2542,6 +2677,7 @@ void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::Node
    addLayer(layerParams, node_proto);
 }

+// https://github.com/onnx/onnx/blob/master/docs/Operators.md#Resize
 void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
 {
    for (int i = 1; i < node_proto.input_size(); i++)
@ -2565,30 +2701,38 @@ void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::Node
    if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
        layerParams.set("mode", "opencv_linear");

-    // input = [X, scales], [X, roi, scales] or [x, roi, scales, sizes]
-    int foundScaleId = hasDynamicShapes ? node_proto.input_size() - 1
-                                        : node_proto.input_size() > 2 ? 2 : 1;
+    // opset-10: input = [X, scales]
+    // opset-11: input = [X, roi, scales] or [x, roi, scales, sizes]
+    int scalesInputId = node_proto.input_size() == 2 ? 1 : 2;

-    Mat scales = getBlob(node_proto, foundScaleId);
-    if (scales.total() == 4)
+    Mat scales = getBlob(node_proto, scalesInputId);
+    if (!scales.empty())
    {
+        CV_CheckEQ(scales.total(), (size_t)4, "HCHW layout is expected");
        layerParams.set("zoom_factor_y", scales.at<float>(2));
        layerParams.set("zoom_factor_x", scales.at<float>(3));
    }
-    else
+    else if (node_proto.input_size() >= 4)  // opset-11
    {
-        const std::string& inputLast = node_proto.input(node_proto.input_size() - 1);
-        if (constBlobs.find(inputLast) != constBlobs.end())
+        const std::string& inputSizes = node_proto.input(3);
+        if (constBlobs.find(inputSizes) != constBlobs.end())
        {
-            Mat shapes = getBlob(inputLast);
-            CV_CheckEQ(shapes.size[0], 4, "");
-            CV_CheckEQ(shapes.size[1], 1, "");
+            Mat shapes = getBlob(inputSizes);
+            CV_CheckEQ(shapes.total(), (size_t)4, "HCHW layout is expected");
            CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, "");
            if (shapes.depth() == CV_32F)
                shapes.convertTo(shapes, CV_32S);
            layerParams.set("width", shapes.at<int>(3));
            layerParams.set("height", shapes.at<int>(2));
        }
+        else
+        {
+            CV_Error(Error::StsNotImplemented, cv::format("ONNX/Resize: doesn't support dynamic non-constant 'sizes' input: %s", inputSizes.c_str()));
+        }
+    }
+    else
+    {
+        CV_Error(Error::StsNotImplemented, "ONNX/Resize: can't find neither 'scale' nor destination sizes parameters");
    }
    replaceLayerParam(layerParams, "mode", "interpolation");
    addLayer(layerParams, node_proto);
--- a/modules/dnn/src/precomp.hpp
+++ b/modules/dnn/src/precomp.hpp
@ -72,5 +72,6 @@
 #include <opencv2/core/utils/trace.hpp>
 #include <opencv2/dnn.hpp>
 #include <opencv2/dnn/all_layers.hpp>
+#include <opencv2/dnn/shape_utils.hpp>

 #include "dnn_common.hpp"
--- a/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp
+++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp
@ -8,8 +8,6 @@
 #ifndef __OPENCV_DNN_TF_SIMPLIFIER_HPP__
 #define __OPENCV_DNN_TF_SIMPLIFIER_HPP__

-#include "../precomp.hpp"
-
 #ifdef HAVE_PROTOBUF

 #include "tf_io.hpp"
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -1190,7 +1190,58 @@ TEST_P(Test_ONNX_layers, GatherMultiOutput)
    testONNXModels("gather_multi_output");
 }

-TEST_P(Test_ONNX_layers, DynamicAxes)
+TEST_P(Test_ONNX_layers, DynamicAxes_squeeze_and_conv)
+{
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+#if INF_ENGINE_VER_MAJOR_LT(2021000000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+#endif
+#endif
+    testONNXModels("squeeze_and_conv_dynamic_axes");
+}
+
+TEST_P(Test_ONNX_layers, DynamicAxes_unsqueeze_and_conv)
+{
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+#if INF_ENGINE_VER_MAJOR_LT(2021000000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+#endif
+#endif
+    testONNXModels("unsqueeze_and_conv_dynamic_axes");
+}
+
+TEST_P(Test_ONNX_layers, DynamicAxes_gather)
+{
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+#if INF_ENGINE_VER_MAJOR_LT(2021000000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+#endif
+#endif
+    testONNXModels("gather_dynamic_axes");
+}
+
+TEST_P(Test_ONNX_layers, DynamicAxes_gather_scalar)
 {
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
    // accuracy
@ -1211,18 +1262,112 @@ TEST_P(Test_ONNX_layers, DynamicAxes)
    }
 #endif
 #endif
-    testONNXModels("squeeze_and_conv_dynamic_axes");
-    testONNXModels("unsqueeze_and_conv_dynamic_axes");
-    testONNXModels("gather_dynamic_axes");
    testONNXModels("gather_scalar_dynamic_axes");
+}
+
+TEST_P(Test_ONNX_layers, DynamicAxes_slice)
+{
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+#if INF_ENGINE_VER_MAJOR_LT(2021000000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+#endif
+#endif
    testONNXModels("slice_dynamic_axes");
+}
+
+TEST_P(Test_ONNX_layers, DynamicAxes_slice_opset_11)
+{
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+#if INF_ENGINE_VER_MAJOR_LT(2021000000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+#endif
+#endif
    testONNXModels("slice_opset_11_dynamic_axes");
+}
+
+TEST_P(Test_ONNX_layers, DynamicAxes_resize_opset11_torch16)
+{
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+#if INF_ENGINE_VER_MAJOR_LT(2021000000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+#endif
+#endif
    testONNXModels("resize_opset11_torch1.6_dynamic_axes");
+}
+
+TEST_P(Test_ONNX_layers, DynamicAxes_average_pooling)
+{
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+#if INF_ENGINE_VER_MAJOR_LT(2021000000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+#endif
+#endif
    testONNXModels("average_pooling_dynamic_axes");
+}
+
+TEST_P(Test_ONNX_layers, DynamicAxes_maxpooling_sigmoid)
+{
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+#if INF_ENGINE_VER_MAJOR_LT(2021000000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+#endif
+#endif
    testONNXModels("maxpooling_sigmoid_dynamic_axes");
+}
+
+TEST_P(Test_ONNX_layers, DynamicAxes_dynamic_batch)
+{
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    }
+#if INF_ENGINE_VER_MAJOR_LT(2021000000)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+#endif
+#endif
    testONNXModels("dynamic_batch");
 }

+
 TEST_P(Test_ONNX_layers, MaxPool1d)
 {
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000)
@ -1344,6 +1489,16 @@ TEST_P(Test_ONNX_layers, DivConst)
 }


+// FIXIT disabled due to non-standard ONNX model domains, need to add ONNX domains support
+// Example:
+// DNN/ONNX: unsupported opset[1]: domain='com.microsoft.experimental' version=1
+// DNN/ONNX: unsupported opset[2]: domain='ai.onnx.preview.training' version=1
+// DNN/ONNX: unsupported opset[3]: domain='com.microsoft.nchwc' version=1
+// DNN/ONNX: unsupported opset[4]: domain='com.microsoft.mlfeaturizers' version=1
+// DNN/ONNX: unsupported opset[5]: domain='ai.onnx.ml' version=2
+// DNN/ONNX: unsupported opset[6]: domain='com.microsoft' version=1
+// DNN/ONNX: unsupported opset[7]: domain='ai.onnx.training' version=1
+#if 0
 TEST_P(Test_ONNX_layers, Quantized_Convolution)
 {
    testONNXModels("quantized_conv_uint8_weights", npy, 0.004, 0.02);
@ -1449,6 +1604,7 @@ TEST_P(Test_ONNX_layers, Quantized_Constant)
 {
    testONNXModels("quantized_constant", npy, 0.002, 0.008);
 }
+#endif

 INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());

@ -1593,7 +1749,8 @@ TEST_P(Test_ONNX_nets, ResNet50v1)
    testONNXModels("resnet50v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD);
 }

-TEST_P(Test_ONNX_nets, ResNet50_Int8)
+// FIXIT missing ONNX domains support
+TEST_P(Test_ONNX_nets, DISABLED_ResNet50_Int8)
 {
    testONNXModels("resnet50_int8", pb, default_l1, default_lInf, true);
 }
--- a/modules/highgui/src/window_gtk.cpp
+++ b/modules/highgui/src/window_gtk.cpp
@ -1966,6 +1966,10 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da
    }
    else if( event->type == GDK_SCROLL )
    {
+        GdkEventScroll* event_scroll = (GdkEventScroll*)event;
+        pt32f.x = cvFloor(event_scroll->x);
+        pt32f.y = cvFloor(event_scroll->y);
+
 #if defined(GTK_VERSION3_4)
        // NOTE: in current implementation doesn't possible to put into callback function delta_x and delta_y separately
        double delta = (event->scroll.delta_x + event->scroll.delta_y);
@ -2023,14 +2027,18 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da
               (unsigned)pt.y < (unsigned)(image_widget->original_image->height)
            ))
        {
-            state &= gtk_accelerator_get_default_mod_mask();
-            flags |= BIT_MAP(state, GDK_SHIFT_MASK,   CV_EVENT_FLAG_SHIFTKEY) |
-                BIT_MAP(state, GDK_CONTROL_MASK, CV_EVENT_FLAG_CTRLKEY)  |
-                BIT_MAP(state, GDK_MOD1_MASK,    CV_EVENT_FLAG_ALTKEY)   |
-                BIT_MAP(state, GDK_MOD2_MASK,    CV_EVENT_FLAG_ALTKEY)   |
+            // handle non-keyboard (mouse) modifiers first
+            flags |=
                BIT_MAP(state, GDK_BUTTON1_MASK, CV_EVENT_FLAG_LBUTTON)  |
                BIT_MAP(state, GDK_BUTTON2_MASK, CV_EVENT_FLAG_MBUTTON)  |
                BIT_MAP(state, GDK_BUTTON3_MASK, CV_EVENT_FLAG_RBUTTON);
+            // keyboard modifiers
+            state &= gtk_accelerator_get_default_mod_mask();
+            flags |=
+                BIT_MAP(state, GDK_SHIFT_MASK,   CV_EVENT_FLAG_SHIFTKEY) |
+                BIT_MAP(state, GDK_CONTROL_MASK, CV_EVENT_FLAG_CTRLKEY)  |
+                BIT_MAP(state, GDK_MOD1_MASK,    CV_EVENT_FLAG_ALTKEY)   |
+                BIT_MAP(state, GDK_MOD2_MASK,    CV_EVENT_FLAG_ALTKEY);
            window->on_mouse( cv_event, pt.x, pt.y, flags, window->on_mouse_param );
        }
    }
--- a/modules/imgcodecs/CMakeLists.txt
+++ b/modules/imgcodecs/CMakeLists.txt
@ -50,12 +50,21 @@ if(HAVE_JASPER)
  list(APPEND GRFMT_LIBS ${JASPER_LIBRARIES})
  if(OPENCV_IO_FORCE_JASPER)
    add_definitions(-DOPENCV_IMGCODECS_FORCE_JASPER=1)
+  else()
+    message(STATUS "imgcodecs: Jasper codec is disabled in runtime. Details: https://github.com/opencv/opencv/issues/14058")
  endif()
 endif()

 if(HAVE_OPENEXR)
  include_directories(SYSTEM ${OPENEXR_INCLUDE_PATHS})
  list(APPEND GRFMT_LIBS ${OPENEXR_LIBRARIES})
+  if(OPENCV_IO_FORCE_OPENEXR
+      OR NOT BUILD_OPENEXR  # external OpenEXR versions are not disabled
+  )
+    add_definitions(-DOPENCV_IMGCODECS_USE_OPENEXR=1)
+  else()
+    message(STATUS "imgcodecs: OpenEXR codec is disabled in runtime. Details: https://github.com/opencv/opencv/issues/21326")
+  endif()
 endif()

 if(HAVE_PNG OR HAVE_TIFF OR HAVE_OPENEXR)
@ -167,6 +176,9 @@ ocv_add_accuracy_tests()
 if(TARGET opencv_test_imgcodecs AND HAVE_JASPER AND "$ENV{OPENCV_IO_ENABLE_JASPER}")
  ocv_target_compile_definitions(opencv_test_imgcodecs PRIVATE OPENCV_IMGCODECS_ENABLE_JASPER_TESTS=1)
 endif()
+if(TARGET opencv_test_imgcodecs AND HAVE_OPENEXR AND "$ENV{OPENCV_IO_ENABLE_OPENEXR}")
+  ocv_target_compile_definitions(opencv_test_imgcodecs PRIVATE OPENCV_IMGCODECS_ENABLE_OPENEXR_TESTS=1)
+endif()
 if(TARGET opencv_test_imgcodecs AND HAVE_PNG AND NOT (PNG_VERSION VERSION_LESS "1.6.31"))
  # details: https://github.com/glennrp/libpng/commit/68cb0aaee3de6371b81a4613476d9b33e43e95b1
  ocv_target_compile_definitions(opencv_test_imgcodecs PRIVATE OPENCV_IMGCODECS_PNG_WITH_EXIF=1)
--- a/modules/imgcodecs/src/grfmt_exr.cpp
+++ b/modules/imgcodecs/src/grfmt_exr.cpp
@ -44,6 +44,9 @@

 #ifdef HAVE_OPENEXR

+#include <opencv2/core/utils/configuration.private.hpp>
+#include <opencv2/core/utils/logger.hpp>
+
 #if defined _MSC_VER && _MSC_VER >= 1200
 #  pragma warning( disable: 4100 4244 4267 )
 #endif
@ -80,6 +83,27 @@
 namespace cv
 {

+static bool isOpenEXREnabled()
+{
+    static const bool PARAM_ENABLE_OPENEXR = utils::getConfigurationParameterBool("OPENCV_IO_ENABLE_OPENEXR",
+#ifdef OPENCV_IMGCODECS_USE_OPENEXR
+        true
+#else
+        false
+#endif
+    );
+    return PARAM_ENABLE_OPENEXR;
+}
+static void initOpenEXR()
+{
+    if (!isOpenEXREnabled())
+    {
+        const char* message = "imgcodecs: OpenEXR codec is disabled. You can enable it via 'OPENCV_IO_ENABLE_OPENEXR' option. Refer for details and cautions here: https://github.com/opencv/opencv/issues/21326";
+        CV_LOG_WARNING(NULL, message);
+        CV_Error(Error::StsNotImplemented, message);
+    }
+}
+
 /////////////////////// ExrDecoder ///////////////////

 ExrDecoder::ExrDecoder()
@ -577,6 +601,7 @@ void  ExrDecoder::RGBToGray( float *in, float *out )

 ImageDecoder ExrDecoder::newDecoder() const
 {
+    initOpenEXR();
    return makePtr<ExrDecoder>();
 }

@ -740,6 +765,7 @@ bool  ExrEncoder::write( const Mat& img, const std::vector<int>& params )

 ImageEncoder ExrEncoder::newEncoder() const
 {
+    initOpenEXR();
    return makePtr<ExrEncoder>();
 }

--- a/modules/imgcodecs/src/grfmt_exr.hpp
+++ b/modules/imgcodecs/src/grfmt_exr.hpp
@ -53,6 +53,7 @@
 #include <ImfInputFile.h>
 #include <ImfChannelList.h>
 #include <ImathBox.h>
+#include <ImfRgbaFile.h>
 #include "grfmt_base.hpp"

 namespace cv
--- a/modules/imgcodecs/test/test_grfmt.cpp
+++ b/modules/imgcodecs/test/test_grfmt.cpp
@ -429,6 +429,6 @@ TEST(Imgcodecs, write_parameter_type)

 }} // namespace

-#ifdef HAVE_OPENEXR
+#if defined(HAVE_OPENEXR) && defined(OPENCV_IMGCODECS_ENABLE_OPENEXR_TESTS)
 #include "test_exr.impl.hpp"
 #endif
--- a/modules/python/src2/cv2_convert.hpp
+++ b/modules/python/src2/cv2_convert.hpp
@ -372,6 +372,9 @@ struct IsRepresentableAsMatDataType<T, typename VoidType<typename cv::DataType<T
 {
 };

+// https://github.com/opencv/opencv/issues/20930
+template <> struct IsRepresentableAsMatDataType<cv::RotatedRect, void> : FalseType {};
+
 } // namespace traits

 template <typename Tp>
--- a/modules/python/test/test_misc.py
+++ b/modules/python/test/test_misc.py
@ -594,6 +594,18 @@ class Arguments(NewOpenCVTests):
        self.assertEqual(ints.dtype, np.int32, "Vector of integers has wrong elements type")
        self.assertEqual(ints.shape, expected_shape, "Vector of integers has wrong shape.")

+    def test_result_rotated_rect_issue_20930(self):
+        rr = cv.utils.testRotatedRect(10, 20, 100, 200, 45)
+        self.assertTrue(isinstance(rr, tuple), msg=type(rr))
+        self.assertEqual(len(rr), 3)
+
+        rrv = cv.utils.testRotatedRectVector(10, 20, 100, 200, 45)
+        self.assertTrue(isinstance(rrv, tuple), msg=type(rrv))
+        self.assertEqual(len(rrv), 10)
+
+        rr = rrv[0]
+        self.assertTrue(isinstance(rr, tuple), msg=type(rrv))
+        self.assertEqual(len(rr), 3)

 class CanUsePurePythonModuleFunction(NewOpenCVTests):
    def test_can_get_ocv_version(self):
--- a/samples/python/find_obj.py
+++ b/samples/python/find_obj.py
@ -86,6 +86,7 @@ def explore_match(win, img1, img2, kp_pairs, status = None, H = None):

    if status is None:
        status = np.ones(len(kp_pairs), np.bool_)
+        status = status.reshape((len(kp_pairs), 1))
    p1, p2 = [], []  # python 2 / python 3 change of zip unpacking
    for kpp in kp_pairs:
        p1.append(np.int32(kpp[0].pt))