diff --git a/CMakeLists.txt b/CMakeLists.txt index 4718c383ae..f05adb32da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -223,7 +223,7 @@ OCV_OPTION(BUILD_OPENJPEG "Build OpenJPEG from source" (WIN32 OCV_OPTION(BUILD_JASPER "Build libjasper from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_JPEG "Build libjpeg from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_PNG "Build libpng from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) ) -OCV_OPTION(BUILD_OPENEXR "Build openexr from source" (((WIN32 OR ANDROID OR APPLE) AND NOT WINRT) OR OPENCV_FORCE_3RDPARTY_BUILD) ) +OCV_OPTION(BUILD_OPENEXR "Build openexr from source" (OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_WEBP "Build WebP from source" (((WIN32 OR ANDROID OR APPLE) AND NOT WINRT) OR OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_TBB "Download and build TBB from source" (ANDROID OR OPENCV_FORCE_3RDPARTY_BUILD) ) OCV_OPTION(BUILD_IPP_IW "Build IPP IW from source" (NOT MINGW OR OPENCV_FORCE_3RDPARTY_BUILD) IF (X86_64 OR X86) AND NOT WINRT ) @@ -311,7 +311,7 @@ OCV_OPTION(WITH_JPEG "Include JPEG support" ON OCV_OPTION(WITH_WEBP "Include WebP support" ON VISIBLE_IF NOT WINRT VERIFY HAVE_WEBP) -OCV_OPTION(WITH_OPENEXR "Include ILM support via OpenEXR" BUILD_OPENEXR OR NOT CMAKE_CROSSCOMPILING +OCV_OPTION(WITH_OPENEXR "Include ILM support via OpenEXR" ((WIN32 OR ANDROID OR APPLE) OR BUILD_OPENEXR) OR NOT CMAKE_CROSSCOMPILING VISIBLE_IF NOT APPLE_FRAMEWORK AND NOT WINRT VERIFY HAVE_OPENEXR) OCV_OPTION(WITH_OPENGL "Include OpenGL support" OFF diff --git a/cmake/OpenCVFindLibsGrfmt.cmake b/cmake/OpenCVFindLibsGrfmt.cmake index 23a6ca6959..95d1d92f68 100644 --- a/cmake/OpenCVFindLibsGrfmt.cmake +++ b/cmake/OpenCVFindLibsGrfmt.cmake @@ -268,7 +268,9 @@ if(WITH_OPENEXR) set(OPENEXR_LIBRARIES IlmImf) add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/openexr") if(OPENEXR_VERSION) # check via TARGET doesn't work + set(BUILD_OPENEXR ON) set(HAVE_OPENEXR YES) + set(BUILD_OPENEXR ON) endif() endif() endif() diff --git a/cmake/OpenCVFindOpenEXR.cmake b/cmake/OpenCVFindOpenEXR.cmake index 133468243a..0df626a842 100644 --- a/cmake/OpenCVFindOpenEXR.cmake +++ b/cmake/OpenCVFindOpenEXR.cmake @@ -9,12 +9,20 @@ # OPENEXR_LIBRARIES = libraries that are needed to use OpenEXR. # -find_package(OpenEXR 3.0 CONFIG QUIET) -if(TARGET OpenEXR::OpenEXR) - SET(OPENEXR_FOUND TRUE) - SET(OPENEXR_LIBRARIES OpenEXR::OpenEXR) - SET(OPENEXR_VERSION ${OpenEXR_VERSION}) - return() +if(NOT OPENCV_SKIP_OPENEXR_FIND_PACKAGE) + find_package(OpenEXR 3 QUIET) + #ocv_cmake_dump_vars(EXR) + if(OpenEXR_FOUND) + if(TARGET OpenEXR::OpenEXR) # OpenEXR 3+ + set(OPENEXR_LIBRARIES OpenEXR::OpenEXR) + set(OPENEXR_INCLUDE_PATHS "") + set(OPENEXR_VERSION "${OpenEXR_VERSION}") + set(OPENEXR_FOUND 1) + return() + else() + message(STATUS "Unsupported find_package(OpenEXR) - missing OpenEXR::OpenEXR target (version ${OpenEXR_VERSION})") + endif() + endif() endif() SET(OPENEXR_LIBRARIES "") diff --git a/doc/opencv.bib b/doc/opencv.bib index 3e22e8a2b1..62c1c79912 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -1294,6 +1294,12 @@ number={2}, pages={117-135}, } +@inproceedings{Zuliani2014RANSACFD, + title={RANSAC for Dummies With examples using the RANSAC toolbox for Matlab \& Octave and more...}, + author={Marco Zuliani}, + year={2014}, + url = {https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.475.1243&rep=rep1&type=pdf} +} @inproceedings{forstner1987fast, title={A fast operator for detection and precise location of distincs points, corners and center of circular features}, author={FORSTNER, W}, diff --git a/doc/tutorials/introduction/windows_install/windows_install.markdown b/doc/tutorials/introduction/windows_install/windows_install.markdown index 56fe64998c..87f2e51eb8 100644 --- a/doc/tutorials/introduction/windows_install/windows_install.markdown +++ b/doc/tutorials/introduction/windows_install/windows_install.markdown @@ -367,7 +367,7 @@ libraries). If you do not need the support for some of these, you can just freel Set the OpenCV environment variable and add it to the systems path {#tutorial_windows_install_path} ================================================================= -First we set an environment variable to make easier our work. This will hold the build directory of +First, we set an environment variable to make our work easier. This will hold the build directory of our OpenCV library that we use in our projects. Start up a command window and enter: @code setx -m OPENCV_DIR D:\OpenCV\Build\x86\vc11 (suggested for Visual Studio 2012 - 32 bit Windows) diff --git a/modules/calib3d/doc/calib3d.bib b/modules/calib3d/doc/calib3d.bib index 86f2277b16..c7b2a8cfde 100644 --- a/modules/calib3d/doc/calib3d.bib +++ b/modules/calib3d/doc/calib3d.bib @@ -40,12 +40,13 @@ publisher={IEEE} } -@inproceedings{Terzakis20, - author = {Terzakis, George and Lourakis, Manolis}, - year = {2020}, - month = {09}, - pages = {}, - title = {A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem} +@inproceedings{Terzakis2020SQPnP, + title={A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem}, + author={George Terzakis and Manolis Lourakis}, + booktitle={European Conference on Computer Vision}, + pages={478--494}, + year={2020}, + publisher={Springer International Publishing} } @inproceedings{strobl2011iccv, diff --git a/modules/calib3d/doc/solvePnP.markdown b/modules/calib3d/doc/solvePnP.markdown new file mode 100644 index 0000000000..dd4fbaa15d --- /dev/null +++ b/modules/calib3d/doc/solvePnP.markdown @@ -0,0 +1,176 @@ +# Perspective-n-Point (PnP) pose computation {#calib3d_solvePnP} + +## Pose computation overview + +The pose computation problem @cite Marchand16 consists in solving for the rotation and translation that minimizes the reprojection error from 3D-2D point correspondences. + +The `solvePnP` and related functions estimate the object pose given a set of object points, their corresponding image projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below (more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward and the Z-axis forward). + +![](pnp.jpg) + +Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$ +using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$ (also denoted \f$ \bf{K} \f$ in the literature): + +\f[ + \begin{align*} + \begin{bmatrix} + u \\ + v \\ + 1 + \end{bmatrix} &= + \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} \\ + \begin{bmatrix} + u \\ + v \\ + 1 + \end{bmatrix} &= + \begin{bmatrix} + f_x & 0 & c_x \\ + 0 & f_y & c_y \\ + 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + 1 & 0 & 0 & 0 \\ + 0 & 1 & 0 & 0 \\ + 0 & 0 & 1 & 0 + \end{bmatrix} + \begin{bmatrix} + r_{11} & r_{12} & r_{13} & t_x \\ + r_{21} & r_{22} & r_{23} & t_y \\ + r_{31} & r_{32} & r_{33} & t_z \\ + 0 & 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} + \end{align*} +\f] + +The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming +a 3D point expressed in the world frame into the camera frame: + +\f[ + \begin{align*} + \begin{bmatrix} + X_c \\ + Y_c \\ + Z_c \\ + 1 + \end{bmatrix} &= + \hspace{0.2em} ^{c}\bf{T}_w + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} \\ + \begin{bmatrix} + X_c \\ + Y_c \\ + Z_c \\ + 1 + \end{bmatrix} &= + \begin{bmatrix} + r_{11} & r_{12} & r_{13} & t_x \\ + r_{21} & r_{22} & r_{23} & t_y \\ + r_{31} & r_{32} & r_{33} & t_z \\ + 0 & 0 & 0 & 1 + \end{bmatrix} + \begin{bmatrix} + X_{w} \\ + Y_{w} \\ + Z_{w} \\ + 1 + \end{bmatrix} + \end{align*} +\f] + +## Pose computation methods +@anchor calib3d_solvePnP_flags + +Refer to the cv::SolvePnPMethod enum documentation for the list of possible values. Some details about each method are described below: + +- cv::SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In +this case the function finds such a pose that minimizes reprojection error, that is the sum +of squared distances between the observed projections "imagePoints" and the projected (using +cv::projectPoints ) "objectPoints". Initial solution for non-planar "objectPoints" needs at least 6 points and uses the DLT algorithm. +Initial solution for planar "objectPoints" needs at least 4 points and uses pose from homography decomposition. +- cv::SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang +"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). +In this case the function requires exactly four object and image points. +- cv::SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis +"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). +In this case the function requires exactly four object and image points. +- cv::SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the +paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). +- cv::SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n +Method is based on the paper of J. Hesch and S. Roumeliotis. +"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). +- cv::SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n +Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto, +F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length +Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ +assuming that both have the same value. Then the cameraMatrix is updated with the estimated +focal length. +- cv::SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli. +"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. +- cv::SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli. +"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. +It requires 4 coplanar object points defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] +- cv::SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the +Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis2020SQPnP). It requires 3 or more points. + +## P3P + +The cv::solveP3P() computes an object pose from exactly 3 3D-2D point correspondences. A P3P problem has up to 4 solutions. + +@note The solutions are sorted by reprojection errors (lowest to highest). + +## PnP + +The cv::solvePnP() returns the rotation and the translation vectors that transform a 3D point expressed in the object +coordinate frame to the camera coordinate frame, using different methods: +- P3P methods (cv::SOLVEPNP_P3P, cv::SOLVEPNP_AP3P): need 4 input points to return a unique solution. +- cv::SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar. +- cv::SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation. +Number of input points must be 4. Object points must be defined in the following order: + - point 0: [-squareLength / 2, squareLength / 2, 0] + - point 1: [ squareLength / 2, squareLength / 2, 0] + - point 2: [ squareLength / 2, -squareLength / 2, 0] + - point 3: [-squareLength / 2, -squareLength / 2, 0] +- for all the other flags, number of input points must be >= 4 and object points can be in any configuration. + +## Generic PnP + +The cv::solvePnPGeneric() allows retrieving all the possible solutions. + +Currently, only cv::SOLVEPNP_P3P, cv::SOLVEPNP_AP3P, cv::SOLVEPNP_IPPE, cv::SOLVEPNP_IPPE_SQUARE, cv::SOLVEPNP_SQPNP can return multiple solutions. + +## RANSAC PnP + +The cv::solvePnPRansac() computes the object pose wrt. the camera frame using a RANSAC scheme to deal with outliers. + +More information can be found in @cite Zuliani2014RANSACFD + +## Pose refinement + +Pose refinement consists in estimating the rotation and translation that minimizes the reprojection error using a non-linear minimization method and starting from an initial estimate of the solution. OpenCV proposes cv::solvePnPRefineLM() and cv::solvePnPRefineVVS() for this problem. + +cv::solvePnPRefineLM() uses a non-linear Levenberg-Marquardt minimization scheme @cite Madsen04 @cite Eade13 and the current implementation computes the rotation update as a perturbation and not on SO(3). + +cv::solvePnPRefineVVS() uses a Gauss-Newton non-linear minimization scheme @cite Marchand16 and with an update of the rotation part computed using the exponential map. + +@note at least three 3D-2D point correspondences are necessary. diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index 198e405f6d..b3709c8cc2 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -454,7 +454,9 @@ enum { LMEDS = 4, //!< least-median of squares algorithm }; enum SolvePnPMethod { - SOLVEPNP_ITERATIVE = 0, + SOLVEPNP_ITERATIVE = 0, //!< Pose refinement using non-linear Levenberg-Marquardt minimization scheme @cite Madsen04 @cite Eade13 \n + //!< Initial solution for non-planar "objectPoints" needs at least 6 points and uses the DLT algorithm. \n + //!< Initial solution for planar "objectPoints" needs at least 4 points and uses pose from homography decomposition. SOLVEPNP_EPNP = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp SOLVEPNP_P3P = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete SOLVEPNP_DLS = 3, //!< **Broken implementation. Using this flag will fallback to EPnP.** \n @@ -471,7 +473,7 @@ enum SolvePnPMethod { //!< - point 1: [ squareLength / 2, squareLength / 2, 0] //!< - point 2: [ squareLength / 2, -squareLength / 2, 0] //!< - point 3: [-squareLength / 2, -squareLength / 2, 0] - SOLVEPNP_SQPNP = 8, //!< SQPnP: A Consistently Fast and Globally OptimalSolution to the Perspective-n-Point Problem @cite Terzakis20 + SOLVEPNP_SQPNP = 8, //!< SQPnP: A Consistently Fast and Globally OptimalSolution to the Perspective-n-Point Problem @cite Terzakis2020SQPnP #ifndef CV_DOXYGEN SOLVEPNP_MAX_COUNT //!< Used for count #endif @@ -890,6 +892,9 @@ Check @ref tutorial_homography "the corresponding tutorial" for more details */ /** @brief Finds an object pose from 3D-2D point correspondences. + +@see @ref calib3d_solvePnP + This function returns the rotation and the translation vectors that transform a 3D point expressed in the object coordinate frame to the camera coordinate frame, using different methods: - P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution. @@ -916,133 +921,9 @@ the model coordinate system to the camera coordinate system. @param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses the provided rvec and tvec values as initial approximations of the rotation and translation vectors, respectively, and further optimizes them. -@param flags Method for solving a PnP problem: -- @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In -this case the function finds such a pose that minimizes reprojection error, that is the sum -of squared distances between the observed projections imagePoints and the projected (using -@ref projectPoints ) objectPoints . -- @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang -"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). -In this case the function requires exactly four object and image points. -- @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis -"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). -In this case the function requires exactly four object and image points. -- @ref SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the -paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). -- @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n -Method is based on the paper of J. Hesch and S. Roumeliotis. -"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). -- @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n -Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto, -F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length -Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ -assuming that both have the same value. Then the cameraMatrix is updated with the estimated -focal length. -- @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli. -"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. -- @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli. -"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. -It requires 4 coplanar object points defined in the following order: - - point 0: [-squareLength / 2, squareLength / 2, 0] - - point 1: [ squareLength / 2, squareLength / 2, 0] - - point 2: [ squareLength / 2, -squareLength / 2, 0] - - point 3: [-squareLength / 2, -squareLength / 2, 0] -- @ref SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the -Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis20). It requires 3 or more points. +@param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags - -The function estimates the object pose given a set of object points, their corresponding image -projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below -(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward -and the Z-axis forward). - -![](pnp.jpg) - -Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$ -using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$: - -\f[ - \begin{align*} - \begin{bmatrix} - u \\ - v \\ - 1 - \end{bmatrix} &= - \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} \\ - \begin{bmatrix} - u \\ - v \\ - 1 - \end{bmatrix} &= - \begin{bmatrix} - f_x & 0 & c_x \\ - 0 & f_y & c_y \\ - 0 & 0 & 1 - \end{bmatrix} - \begin{bmatrix} - 1 & 0 & 0 & 0 \\ - 0 & 1 & 0 & 0 \\ - 0 & 0 & 1 & 0 - \end{bmatrix} - \begin{bmatrix} - r_{11} & r_{12} & r_{13} & t_x \\ - r_{21} & r_{22} & r_{23} & t_y \\ - r_{31} & r_{32} & r_{33} & t_z \\ - 0 & 0 & 0 & 1 - \end{bmatrix} - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} - \end{align*} -\f] - -The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming -a 3D point expressed in the world frame into the camera frame: - -\f[ - \begin{align*} - \begin{bmatrix} - X_c \\ - Y_c \\ - Z_c \\ - 1 - \end{bmatrix} &= - \hspace{0.2em} ^{c}\bf{T}_w - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} \\ - \begin{bmatrix} - X_c \\ - Y_c \\ - Z_c \\ - 1 - \end{bmatrix} &= - \begin{bmatrix} - r_{11} & r_{12} & r_{13} & t_x \\ - r_{21} & r_{22} & r_{23} & t_y \\ - r_{31} & r_{32} & r_{33} & t_z \\ - 0 & 0 & 0 & 1 - \end{bmatrix} - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} - \end{align*} -\f] +More information about Perspective-n-Points is described in @ref calib3d_solvePnP @note - An example of how to use solvePnP for planar augmented reality can be found at @@ -1082,6 +963,8 @@ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints, /** @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme. +@see @ref calib3d_solvePnP + @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel, where N is the number of points. vector\ can be also passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, @@ -1140,6 +1023,8 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint /** @brief Finds an object pose from 3 3D-2D point correspondences. +@see @ref calib3d_solvePnP + @param objectPoints Array of object points in the object coordinate space, 3x3 1-channel or 1x3/3x1 3-channel. vector\ can be also passed here. @param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel. @@ -1171,6 +1056,8 @@ CV_EXPORTS_W int solveP3P( InputArray objectPoints, InputArray imagePoints, /** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution. +@see @ref calib3d_solvePnP + @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel, where N is the number of points. vector\ can also be passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, @@ -1198,6 +1085,8 @@ CV_EXPORTS_W void solvePnPRefineLM( InputArray objectPoints, InputArray imagePoi /** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution. +@see @ref calib3d_solvePnP + @param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel, where N is the number of points. vector\ can also be passed here. @param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel, @@ -1226,6 +1115,9 @@ CV_EXPORTS_W void solvePnPRefineVVS( InputArray objectPoints, InputArray imagePo double VVSlambda = 1); /** @brief Finds an object pose from 3D-2D point correspondences. + +@see @ref calib3d_solvePnP + This function returns a list of all the possible solutions (a solution is a couple), depending on the number of input points and the chosen method: - P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): 3 or 4 input points. Number of returned solutions can be between 0 and 4 with 3 input points. @@ -1253,37 +1145,7 @@ the model coordinate system to the camera coordinate system. @param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses the provided rvec and tvec values as initial approximations of the rotation and translation vectors, respectively, and further optimizes them. -@param flags Method for solving a PnP problem: -- @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In -this case the function finds such a pose that minimizes reprojection error, that is the sum -of squared distances between the observed projections imagePoints and the projected (using - #projectPoints ) objectPoints . -- @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang -"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete). -In this case the function requires exactly four object and image points. -- @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis -"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17). -In this case the function requires exactly four object and image points. -- @ref SOLVEPNP_EPNP Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the -paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp). -- @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n -Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis. -"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct). -- @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n -Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto, -F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length -Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$ -assuming that both have the same value. Then the cameraMatrix is updated with the estimated -focal length. -- @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli. -"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points. -- @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli. -"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation. -It requires 4 coplanar object points defined in the following order: - - point 0: [-squareLength / 2, squareLength / 2, 0] - - point 1: [ squareLength / 2, squareLength / 2, 0] - - point 2: [ squareLength / 2, -squareLength / 2, 0] - - point 3: [-squareLength / 2, -squareLength / 2, 0] +@param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags @param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE and useExtrinsicGuess is set to true. @param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE @@ -1292,98 +1154,7 @@ and useExtrinsicGuess is set to true. (\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points and the 3D object points projected with the estimated pose. -The function estimates the object pose given a set of object points, their corresponding image -projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below -(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward -and the Z-axis forward). - -![](pnp.jpg) - -Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$ -using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$: - -\f[ - \begin{align*} - \begin{bmatrix} - u \\ - v \\ - 1 - \end{bmatrix} &= - \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} \\ - \begin{bmatrix} - u \\ - v \\ - 1 - \end{bmatrix} &= - \begin{bmatrix} - f_x & 0 & c_x \\ - 0 & f_y & c_y \\ - 0 & 0 & 1 - \end{bmatrix} - \begin{bmatrix} - 1 & 0 & 0 & 0 \\ - 0 & 1 & 0 & 0 \\ - 0 & 0 & 1 & 0 - \end{bmatrix} - \begin{bmatrix} - r_{11} & r_{12} & r_{13} & t_x \\ - r_{21} & r_{22} & r_{23} & t_y \\ - r_{31} & r_{32} & r_{33} & t_z \\ - 0 & 0 & 0 & 1 - \end{bmatrix} - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} - \end{align*} -\f] - -The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming -a 3D point expressed in the world frame into the camera frame: - -\f[ - \begin{align*} - \begin{bmatrix} - X_c \\ - Y_c \\ - Z_c \\ - 1 - \end{bmatrix} &= - \hspace{0.2em} ^{c}\bf{T}_w - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} \\ - \begin{bmatrix} - X_c \\ - Y_c \\ - Z_c \\ - 1 - \end{bmatrix} &= - \begin{bmatrix} - r_{11} & r_{12} & r_{13} & t_x \\ - r_{21} & r_{22} & r_{23} & t_y \\ - r_{31} & r_{32} & r_{33} & t_z \\ - 0 & 0 & 0 & 1 - \end{bmatrix} - \begin{bmatrix} - X_{w} \\ - Y_{w} \\ - Z_{w} \\ - 1 - \end{bmatrix} - \end{align*} -\f] +More information is described in @ref calib3d_solvePnP @note - An example of how to use solvePnP for planar augmented reality can be found at diff --git a/modules/core/include/opencv2/core/bindings_utils.hpp b/modules/core/include/opencv2/core/bindings_utils.hpp index f9c73dd4ff..22a86ff9be 100644 --- a/modules/core/include/opencv2/core/bindings_utils.hpp +++ b/modules/core/include/opencv2/core/bindings_utils.hpp @@ -103,6 +103,21 @@ String dumpRotatedRect(const RotatedRect& argument) argument.size.height, argument.angle); } +CV_WRAP static inline +RotatedRect testRotatedRect(float x, float y, float w, float h, float angle) +{ + return RotatedRect(Point2f(x, y), Size2f(w, h), angle); +} + +CV_WRAP static inline +std::vector testRotatedRectVector(float x, float y, float w, float h, float angle) +{ + std::vector result; + for (int i = 0; i < 10; i++) + result.push_back(RotatedRect(Point2f(x + i, y + 2 * i), Size2f(w, h), angle + 10 * i)); + return result; +} + CV_WRAP static inline String dumpRange(const Range& argument) { diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 9c8d54c34b..67312dba78 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -288,8 +288,6 @@ std::vector getAvailableTargets(Backend be) namespace { - typedef std::vector ShapesVec; - struct LayerShapes { ShapesVec in, out, internal; @@ -1490,6 +1488,11 @@ struct Net::Impl : public detail::NetImplBase clear(); + if (hasDynamicShapes) + { + updateLayersShapes(); + } + this->blobsToKeep = blobsToKeep_; allocateLayers(blobsToKeep_); @@ -3794,20 +3797,24 @@ struct Net::Impl : public detail::NetImplBase void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes) { - std::vector& inputLayerIds = layers[id].inputBlobsId; + CV_CheckGE(id, 0, ""); + CV_CheckLT(id, (int)layers.size(), ""); + LayerData& layerData = layers[id]; + std::vector& inputLayerIds = layerData.inputBlobsId; + LayerShapes& layerShapes = inOutShapes[id]; - if (id == 0 && inOutShapes[id].in[0].empty()) + if (id == 0 && layerShapes.in[0].empty()) { - if (!layers[0].outputBlobs.empty()) + if (!layerData.outputBlobs.empty()) { ShapesVec shapes; - for (int i = 0; i < layers[0].outputBlobs.size(); i++) + for (int i = 0; i < layerData.outputBlobs.size(); i++) { - Mat& inp = layers[0].outputBlobs[i]; - CV_Assert(inp.total()); + Mat& inp = layerData.outputBlobs[i]; + CV_Assert(!inp.empty()); shapes.push_back(shape(inp)); } - inOutShapes[0].in = shapes; + layerShapes.in = shapes; } else { @@ -3823,17 +3830,17 @@ struct Net::Impl : public detail::NetImplBase } if (none) { - inOutShapes[0].out.clear(); + layerShapes.out.clear(); return; } else { - inOutShapes[0].in = inputShapes; + layerShapes.in = inputShapes; } } } - if (inOutShapes[id].in.empty()) + if (layerShapes.in.empty()) { for(int i = 0; i < inputLayerIds.size(); i++) { @@ -3846,14 +3853,14 @@ struct Net::Impl : public detail::NetImplBase getLayerShapesRecursively(layerId, inOutShapes); } const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid]; - inOutShapes[id].in.push_back(shape); + layerShapes.in.push_back(shape); } } - const ShapesVec& is = inOutShapes[id].in; - ShapesVec& os = inOutShapes[id].out; - ShapesVec& ints = inOutShapes[id].internal; - int requiredOutputs = layers[id].requiredOutputs.size(); - Ptr l = layers[id].getLayerInstance(); + const ShapesVec& is = layerShapes.in; + ShapesVec& os = layerShapes.out; + ShapesVec& ints = layerShapes.internal; + int requiredOutputs = layerData.requiredOutputs.size(); + Ptr l = layerData.getLayerInstance(); CV_Assert(l); bool layerSupportInPlace = false; try @@ -3881,13 +3888,38 @@ struct Net::Impl : public detail::NetImplBase CV_LOG_ERROR(NULL, "Exception message: " << e.what()); throw; } - inOutShapes[id].supportInPlace = layerSupportInPlace; + layerShapes.supportInPlace = layerSupportInPlace; - for (int i = 0; i < ints.size(); i++) - CV_Assert(total(ints[i]) > 0); + try + { + for (int i = 0; i < ints.size(); i++) + CV_CheckGT(total(ints[i]), 0, ""); - for (int i = 0; i < os.size(); i++) - CV_Assert(total(os[i]) > 0); + for (int i = 0; i < os.size(); i++) + CV_CheckGT(total(os[i]), 0, ""); + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() post validation failed." << + " inputs=" << is.size() << + " outputs=" << os.size() << "/" << requiredOutputs << + " blobs=" << l->blobs.size() << + " inplace=" << layerSupportInPlace); + for (size_t i = 0; i < is.size(); ++i) + { + CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i])); + } + for (size_t i = 0; i < os.size(); ++i) + { + CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i])); + } + for (size_t i = 0; i < l->blobs.size(); ++i) + { + CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i]))); + } + CV_LOG_ERROR(NULL, "Exception message: " << e.what()); + throw; + } } void getLayersShapes(const ShapesVec& netInputShapes, @@ -3915,43 +3947,58 @@ struct Net::Impl : public detail::NetImplBase void updateLayersShapes() { - CV_Assert(!layers[0].outputBlobs.empty()); + CV_LOG_DEBUG(NULL, "updateLayersShapes() with layers.size=" << layers.size()); + CV_Assert(netInputLayer); + DataLayer& inputLayer = *netInputLayer; + LayerData& inputLayerData = layers[0]; + CV_Assert(inputLayerData.layerInstance.get() == &inputLayer); + CV_Assert(!inputLayerData.outputBlobs.empty()); ShapesVec inputShapes; - for(int i = 0; i < layers[0].outputBlobs.size(); i++) + for(int i = 0; i < inputLayerData.outputBlobs.size(); i++) { - Mat& inp = layers[0].outputBlobs[i]; - CV_Assert(inp.total()); - if (preferableBackend == DNN_BACKEND_OPENCV && + Mat& inp = inputLayerData.outputBlobs[i]; + CV_Assert(!inp.empty()); + if (preferableBackend == DNN_BACKEND_OPENCV && // FIXIT: wrong place for output allocation preferableTarget == DNN_TARGET_OPENCL_FP16 && - layers[0].dtype == CV_32F) + inputLayerData.dtype == CV_32F) { - layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S); + inp.create(inp.dims, inp.size, CV_16S); } inputShapes.push_back(shape(inp)); } + CV_LOG_DEBUG(NULL, toString(inputShapes, "Network input shapes")); LayersShapesMap layersShapes; layersShapes[0].in = inputShapes; for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++) { int layerId = it->first; - std::vector& inputLayerIds = it->second.inputBlobsId; - if (layersShapes[layerId].in.empty()) + LayerData& layerData = it->second; + std::vector& inputLayerIds = layerData.inputBlobsId; + LayerShapes& layerShapes = layersShapes[layerId]; + CV_LOG_DEBUG(NULL, "layer " << layerId << ": [" << layerData.type << "]:(" << layerData.name << ") with inputs.size=" << inputLayerIds.size()); + if (layerShapes.in.empty()) { for(int i = 0; i < inputLayerIds.size(); i++) { - int inputLayerId = inputLayerIds[i].lid; + const LayerPin& inputPin = inputLayerIds[i]; + int inputLayerId = inputPin.lid; + CV_LOG_DEBUG(NULL, " input[" << i << "] " << inputLayerId << ":" << inputPin.oid << " as [" << layers[inputLayerId].type << "]:(" << layers[inputLayerId].name << ")"); LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId); - if(inputIt == layersShapes.end() || inputIt->second.out.empty()) + if (inputIt == layersShapes.end() || inputIt->second.out.empty()) { getLayerShapesRecursively(inputLayerId, layersShapes); } - const MatShape& shape = layersShapes[inputLayerId].out[inputLayerIds[i].oid]; - layersShapes[layerId].in.push_back(shape); + const MatShape& shape = layersShapes[inputLayerId].out[inputPin.oid]; + layerShapes.in.push_back(shape); } - it->second.getLayerInstance()->updateMemoryShapes(layersShapes[layerId].in); + layerData.getLayerInstance()->updateMemoryShapes(layerShapes.in); } + CV_LOG_DEBUG(NULL, "Layer " << layerId << ": " << toString(layerShapes.in, "input shapes")); + CV_LOG_IF_DEBUG(NULL, !layerShapes.out.empty(), "Layer " << layerId << ": " << toString(layerShapes.out, "output shapes")); + CV_LOG_IF_DEBUG(NULL, !layerShapes.internal.empty(), "Layer " << layerId << ": " << toString(layerShapes.internal, "internal shapes")); } + CV_LOG_DEBUG(NULL, "updateLayersShapes() - DONE"); } LayerPin getLatestLayerPin(const std::vector& pins) @@ -5000,13 +5047,8 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons bool oldShape = prevShape == blobShape; blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]); - if (!oldShape) { + if (!oldShape) ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid]; - if (impl->hasDynamicShapes) - { - impl->updateLayersShapes(); - } - } if (!ld.outputBlobsWrappers[pin.oid].empty()) { diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp index 3c68322e09..ffeb3bfda1 100644 --- a/modules/dnn/src/dnn_common.hpp +++ b/modules/dnn/src/dnn_common.hpp @@ -81,6 +81,44 @@ struct NetImplBase } // namespace detail + +typedef std::vector ShapesVec; + +static inline std::string toString(const ShapesVec& shapes, const std::string& name = std::string()) +{ + std::ostringstream ss; + if (!name.empty()) + ss << name << ' '; + ss << '['; + for(size_t i = 0, n = shapes.size(); i < n; ++i) + ss << ' ' << toString(shapes[i]); + ss << " ]"; + return ss.str(); +} + +static inline std::string toString(const Mat& blob, const std::string& name = std::string()) +{ + std::ostringstream ss; + if (!name.empty()) + ss << name << ' '; + if (blob.empty()) + { + ss << ""; + } + else if (blob.dims == 1) + { + Mat blob_ = blob; + blob_.dims = 2; // hack + ss << blob_.t(); + } + else + { + ss << blob.reshape(1, 1); + } + return ss.str(); +} + + CV__DNN_INLINE_NS_END }} // namespace diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 0b9b94fa57..7cb86a9515 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -1366,9 +1366,16 @@ public: } else if (padMode.empty()) { - int addedDims = isPool1D? inpShape.size() : local_kernel.size(); - for (int i = 0; i < addedDims; i++) { + size_t addedDims = isPool1D? inpShape.size() : local_kernel.size(); + CV_CheckLE(addedDims, inpShape.size(), ""); + CV_CheckLE(addedDims, pads_begin.size(), ""); + CV_CheckLE(addedDims, pads_end.size(), ""); + CV_CheckLE(addedDims, local_kernel.size(), ""); + CV_CheckLE(addedDims, strides.size(), ""); + for (int i = 0; i < addedDims; i++) + { float dst = (float) (inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; + CV_CheckGE(dst, 0.0f, ""); outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); } diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index 80fe0b173e..c6e54d6a92 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -741,7 +741,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net) simplifySubgraphs(Ptr(new ONNXGraphWrapper(net)), subgraphs); } -Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto) +Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto) { if (tensor_proto.raw_data().empty() && tensor_proto.float_data().empty() && tensor_proto.double_data().empty() && tensor_proto.int64_data().empty() && diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.hpp b/modules/dnn/src/onnx/onnx_graph_simplifier.hpp index 836b8ed4b7..e2a4d0ac39 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.hpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.hpp @@ -8,8 +8,6 @@ #ifndef __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__ #define __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__ -#include "../precomp.hpp" - #if defined(__GNUC__) && __GNUC__ >= 5 #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsuggest-override" @@ -33,7 +31,7 @@ void convertInt64ToInt32(const T1& src, T2& dst, int size) } } -Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto); +Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto); CV__DNN_INLINE_NS_END }} // namespace dnn, namespace cv diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 45fa424930..6283dc9d0e 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -12,7 +12,7 @@ #include #undef CV_LOG_STRIP_LEVEL -#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1 +#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1 #include #ifdef HAVE_PROTOBUF @@ -158,6 +158,9 @@ private: void parseQConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseCustomLayer (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + + int onnx_opset; // OperatorSetIdProto for 'onnx' domain + void parseOperatorSet(); }; class ONNXLayerHandler : public detail::LayerHandler @@ -189,8 +192,9 @@ void ONNXLayerHandler::fillRegistry(const opencv_onnx::GraphProto &net) } ONNXImporter::ONNXImporter(Net& net, const char *onnxFile) - : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr), - dstNet(net), dispatch(buildDispatchMap()) + : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr) + , dstNet(net), dispatch(buildDispatchMap()) + , onnx_opset(0) { hasDynamicShapes = false; CV_Assert(onnxFile); @@ -211,7 +215,9 @@ ONNXImporter::ONNXImporter(Net& net, const char *onnxFile) } ONNXImporter::ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer) - : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr), dstNet(net), dispatch(buildDispatchMap()) + : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr) + , dstNet(net), dispatch(buildDispatchMap()) + , onnx_opset(0) { hasDynamicShapes = false; CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)"); @@ -242,6 +248,53 @@ inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, co } } +static +void dumpValueInfoProto(int i, const opencv_onnx::ValueInfoProto& valueInfoProto, const std::string& prefix) +{ + CV_Assert(valueInfoProto.has_name()); + CV_Assert(valueInfoProto.has_type()); + const opencv_onnx::TypeProto& typeProto = valueInfoProto.type(); + CV_Assert(typeProto.has_tensor_type()); + const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type(); + CV_Assert(tensor.has_shape()); + const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape(); + + int dim_size = tensorShape.dim_size(); + CV_CheckGE(dim_size, 0, ""); + MatShape shape(dim_size); + for (int j = 0; j < dim_size; ++j) + { + const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j); + if (dimension.has_dim_param()) + { + CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)"); + } + // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition + if (dimension.has_denotation()) + { + CV_LOG_INFO(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'"); + } + shape[j] = dimension.dim_value(); + } + CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << " as '" << valueInfoProto.name() << "'] shape=" << toString(shape)); +} + +static +void dumpTensorProto(int i, const opencv_onnx::TensorProto& tensorProto, const std::string& prefix) +{ + if (utils::logging::getLogLevel() < utils::logging::LOG_LEVEL_VERBOSE) + return; + int dim_size = tensorProto.dims_size(); + CV_CheckGE(dim_size, 0, ""); + MatShape shape(dim_size); + for (int j = 0; j < dim_size; ++j) + { + int sz = static_cast(tensorProto.dims(j)); + shape[j] = sz; + } + CV_LOG_VERBOSE(NULL, 0, "DNN/ONNX: " << prefix << "[" << i << " as '" << tensorProto.name() << "'] shape=" << toString(shape) << " data_type=" << (int)tensorProto.data_type()); +} + void releaseONNXTensor(opencv_onnx::TensorProto& tensor_proto) { if (!tensor_proto.raw_data().empty()) { @@ -282,21 +335,21 @@ void runLayer(LayerParams& params, const std::vector& inputs, std::map ONNXImporter::getGraphTensors( const opencv_onnx::GraphProto& graph_proto) { - opencv_onnx::TensorProto tensor_proto; - std::map layers_weights; + std::map layers_weights; - for (int i = 0; i < graph_proto.initializer_size(); i++) - { - tensor_proto = graph_proto.initializer(i); - Mat mat = getMatFromTensor(tensor_proto); - releaseONNXTensor(tensor_proto); + for (int i = 0; i < graph_proto.initializer_size(); i++) + { + const opencv_onnx::TensorProto& tensor_proto = graph_proto.initializer(i); + dumpTensorProto(i, tensor_proto, "initializer"); + Mat mat = getMatFromTensor(tensor_proto); + releaseONNXTensor(const_cast(tensor_proto)); // drop already loaded data - if (DNN_DIAGNOSTICS_RUN && mat.empty()) - continue; + if (DNN_DIAGNOSTICS_RUN && mat.empty()) + continue; - layers_weights.insert(std::make_pair(tensor_proto.name(), mat)); - } - return layers_weights; + layers_weights.insert(std::make_pair(tensor_proto.name(), mat)); + } + return layers_weights; } static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) { @@ -562,10 +615,45 @@ void ONNXImporter::addNegation(const LayerParams& layerParams, opencv_onnx::Node void ONNXImporter::addConstant(const std::string& name, const Mat& blob) { + CV_LOG_DEBUG(NULL, "DNN/ONNX: add constant '" << name << "' shape=" << toString(shape(blob)) << ": " << toString(blob)); constBlobs.insert(std::make_pair(name, blob)); outShapes.insert(std::make_pair(name, shape(blob))); } +void ONNXImporter::parseOperatorSet() +{ + int ir_version = model_proto.has_ir_version() ? static_cast(model_proto.ir_version()) : -1; + if (ir_version < 3) + return; + + int opset_size = model_proto.opset_import_size(); + if (opset_size <= 0) + { + CV_LOG_INFO(NULL, "DNN/ONNX: missing opset information") + return; + } + + for (int i = 0; i < opset_size; ++i) + { + const ::opencv_onnx::OperatorSetIdProto& opset_entry = model_proto.opset_import(i); + const std::string& domain = opset_entry.has_domain() ? opset_entry.domain() : std::string(); + int version = opset_entry.has_version() ? opset_entry.version() : -1; + if (domain.empty() || domain == "ai.onnx") + { + // ONNX opset covered by specification: https://github.com/onnx/onnx/blob/master/docs/Operators.md + onnx_opset = std::max(onnx_opset, version); + } + else + { + // OpenCV don't know other opsets + // will fail later on unsupported node processing + CV_LOG_WARNING(NULL, "DNN/ONNX: unsupported opset[" << i << "]: domain='" << domain << "' version=" << version); + } + } + + CV_LOG_INFO(NULL, "DNN/ONNX: ONNX opset version = " << onnx_opset); +} + void ONNXImporter::handleQuantizedNode(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { @@ -627,56 +715,79 @@ void ONNXImporter::populateNet() << " model produced by '" << framework_name << "'" << (framework_version.empty() ? cv::String() : cv::format(":%s", framework_version.c_str())) << ". Number of nodes = " << graph_proto.node_size() + << ", initializers = " << graph_proto.initializer_size() << ", inputs = " << graph_proto.input_size() << ", outputs = " << graph_proto.output_size() ); + parseOperatorSet(); + simplifySubgraphs(graph_proto); const int layersSize = graph_proto.node_size(); CV_LOG_DEBUG(NULL, "DNN/ONNX: graph simplified to " << layersSize << " nodes"); - constBlobs = getGraphTensors(graph_proto); + constBlobs = getGraphTensors(graph_proto); // scan GraphProto.initializer + std::vector netInputs; // map with network inputs (without const blobs) // Add all the inputs shapes. It includes as constant blobs as network's inputs shapes. for (int i = 0; i < graph_proto.input_size(); ++i) { const opencv_onnx::ValueInfoProto& valueInfoProto = graph_proto.input(i); CV_Assert(valueInfoProto.has_name()); + const std::string& name = valueInfoProto.name(); CV_Assert(valueInfoProto.has_type()); - opencv_onnx::TypeProto typeProto = valueInfoProto.type(); + const opencv_onnx::TypeProto& typeProto = valueInfoProto.type(); CV_Assert(typeProto.has_tensor_type()); - opencv_onnx::TypeProto::Tensor tensor = typeProto.tensor_type(); + const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type(); CV_Assert(tensor.has_shape()); - opencv_onnx::TensorShapeProto tensorShape = tensor.shape(); + const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape(); - MatShape inpShape(tensorShape.dim_size()); - for (int j = 0; j < inpShape.size(); ++j) + int dim_size = tensorShape.dim_size(); + CV_CheckGE(dim_size, 0, ""); // some inputs are scalars (dims=0), e.g. in Test_ONNX_nets.Resnet34_kinetics test + MatShape inpShape(dim_size); + for (int j = 0; j < dim_size; ++j) { - inpShape[j] = tensorShape.dim(j).dim_value(); + const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j); + if (dimension.has_dim_param()) + { + CV_LOG_DEBUG(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)"); + } + // https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition + if (dimension.has_denotation()) + { + CV_LOG_INFO(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'"); + } + inpShape[j] = dimension.dim_value(); // NHW, NCHW(NHWC), NCDHW(NDHWC); do not set this flag if only N is dynamic - if (!tensorShape.dim(j).dim_param().empty() && !(j == 0 && inpShape.size() >= 3)) + if (dimension.has_dim_param() && !(j == 0 && inpShape.size() >= 3)) + { hasDynamicShapes = true; + } } - if (!inpShape.empty() && !hasDynamicShapes) + bool isInitialized = ((constBlobs.find(name) != constBlobs.end())); + CV_LOG_IF_DEBUG(NULL, !isInitialized, "DNN/ONNX: input[" << i << " as '" << name << "'] shape=" << toString(inpShape)); + CV_LOG_IF_VERBOSE(NULL, 0, isInitialized, "DNN/ONNX: pre-initialized input[" << i << " as '" << name << "'] shape=" << toString(inpShape)); + if (dim_size > 0 && !hasDynamicShapes) // FIXIT result is not reliable for models with multiple inputs { inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size } outShapes[valueInfoProto.name()] = inpShape; - } - - // create map with network inputs (without const blobs) - // fill map: push layer name, layer id and output id - std::vector netInputs; - for (int j = 0; j < graph_proto.input_size(); j++) - { - const std::string& name = graph_proto.input(j).name(); - if (constBlobs.find(name) == constBlobs.end()) { + // fill map: push layer name, layer id and output id + if (!isInitialized) + { netInputs.push_back(name); layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1))); } } + dstNet.setInputsNames(netInputs); + // dump outputs + for (int i = 0; i < graph_proto.output_size(); ++i) + { + dumpValueInfoProto(i, graph_proto.output(i), "output"); + } + if (DNN_DIAGNOSTICS_RUN) { CV_LOG_INFO(NULL, "DNN/ONNX: start diagnostic run!"); layerHandler->fillRegistry(graph_proto); @@ -696,6 +807,17 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto) CV_Assert(node_proto.output_size() >= 1); std::string name = node_proto.output(0); const std::string& layer_type = node_proto.op_type(); + const std::string& layer_type_domain = node_proto.has_domain() ? node_proto.domain() : std::string(); + if (!layer_type_domain.empty() && layer_type_domain != "ai.onnx") + { + CV_LOG_WARNING(NULL, "DNN/ONNX: can't handle node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " + << cv::format("[%s@%s]:(%s)", layer_type.c_str(), layer_type_domain.c_str(), name.c_str()) + ); + if (DNN_DIAGNOSTICS_RUN) + return; // ignore error + CV_Error(Error::StsNotImplemented, cv::format("ONNX: unsupported domain: %s", layer_type_domain.c_str())); + } + CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) ); @@ -2341,11 +2463,24 @@ void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeP CV_Assert(shapeIt != outShapes.end()); const MatShape& inpShape = shapeIt->second; - Mat shapeMat(inpShape.size(), 1, CV_32S); - for (int j = 0; j < inpShape.size(); ++j) - shapeMat.at(j) = inpShape[j]; - shapeMat.dims = 1; + int dims = static_cast(inpShape.size()); + Mat shapeMat(dims, 1, CV_32S); + bool isDynamicShape = false; + for (int j = 0; j < dims; ++j) + { + int sz = inpShape[j]; + isDynamicShape |= (sz == 0); + shapeMat.at(j) = sz; + } + shapeMat.dims = 1; // FIXIT Mat 1D + if (isDynamicShape) + { + CV_LOG_ERROR(NULL, "DNN/ONNX(Shape): dynamic 'zero' shapes are not supported, input " << toString(inpShape, node_proto.input(0))); + // FIXIT repair assertion + // Disabled to pass face detector tests from #20422 + // CV_Assert(!isDynamicShape); // not supported + } addConstant(layerParams.name, shapeMat); } @@ -2542,6 +2677,7 @@ void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::Node addLayer(layerParams, node_proto); } +// https://github.com/onnx/onnx/blob/master/docs/Operators.md#Resize void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { for (int i = 1; i < node_proto.input_size(); i++) @@ -2565,30 +2701,38 @@ void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::Node if (layerParams.get("mode") == "linear" && framework_name == "pytorch") layerParams.set("mode", "opencv_linear"); - // input = [X, scales], [X, roi, scales] or [x, roi, scales, sizes] - int foundScaleId = hasDynamicShapes ? node_proto.input_size() - 1 - : node_proto.input_size() > 2 ? 2 : 1; + // opset-10: input = [X, scales] + // opset-11: input = [X, roi, scales] or [x, roi, scales, sizes] + int scalesInputId = node_proto.input_size() == 2 ? 1 : 2; - Mat scales = getBlob(node_proto, foundScaleId); - if (scales.total() == 4) + Mat scales = getBlob(node_proto, scalesInputId); + if (!scales.empty()) { + CV_CheckEQ(scales.total(), (size_t)4, "HCHW layout is expected"); layerParams.set("zoom_factor_y", scales.at(2)); layerParams.set("zoom_factor_x", scales.at(3)); } - else + else if (node_proto.input_size() >= 4) // opset-11 { - const std::string& inputLast = node_proto.input(node_proto.input_size() - 1); - if (constBlobs.find(inputLast) != constBlobs.end()) + const std::string& inputSizes = node_proto.input(3); + if (constBlobs.find(inputSizes) != constBlobs.end()) { - Mat shapes = getBlob(inputLast); - CV_CheckEQ(shapes.size[0], 4, ""); - CV_CheckEQ(shapes.size[1], 1, ""); + Mat shapes = getBlob(inputSizes); + CV_CheckEQ(shapes.total(), (size_t)4, "HCHW layout is expected"); CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, ""); if (shapes.depth() == CV_32F) shapes.convertTo(shapes, CV_32S); layerParams.set("width", shapes.at(3)); layerParams.set("height", shapes.at(2)); } + else + { + CV_Error(Error::StsNotImplemented, cv::format("ONNX/Resize: doesn't support dynamic non-constant 'sizes' input: %s", inputSizes.c_str())); + } + } + else + { + CV_Error(Error::StsNotImplemented, "ONNX/Resize: can't find neither 'scale' nor destination sizes parameters"); } replaceLayerParam(layerParams, "mode", "interpolation"); addLayer(layerParams, node_proto); diff --git a/modules/dnn/src/precomp.hpp b/modules/dnn/src/precomp.hpp index eacbd6566c..6ee693dd6b 100644 --- a/modules/dnn/src/precomp.hpp +++ b/modules/dnn/src/precomp.hpp @@ -72,5 +72,6 @@ #include #include #include +#include #include "dnn_common.hpp" diff --git a/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp b/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp index 2b7ba86251..ad20968f6d 100644 --- a/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp +++ b/modules/dnn/src/tensorflow/tf_graph_simplifier.hpp @@ -8,8 +8,6 @@ #ifndef __OPENCV_DNN_TF_SIMPLIFIER_HPP__ #define __OPENCV_DNN_TF_SIMPLIFIER_HPP__ -#include "../precomp.hpp" - #ifdef HAVE_PROTOBUF #include "tf_io.hpp" diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index a980df8a67..fe261c8f15 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -1190,7 +1190,58 @@ TEST_P(Test_ONNX_layers, GatherMultiOutput) testONNXModels("gather_multi_output"); } -TEST_P(Test_ONNX_layers, DynamicAxes) +TEST_P(Test_ONNX_layers, DynamicAxes_squeeze_and_conv) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif + testONNXModels("squeeze_and_conv_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_unsqueeze_and_conv) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif + testONNXModels("unsqueeze_and_conv_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_gather) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif + testONNXModels("gather_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_gather_scalar) { #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000) // accuracy @@ -1211,18 +1262,112 @@ TEST_P(Test_ONNX_layers, DynamicAxes) } #endif #endif - testONNXModels("squeeze_and_conv_dynamic_axes"); - testONNXModels("unsqueeze_and_conv_dynamic_axes"); - testONNXModels("gather_dynamic_axes"); testONNXModels("gather_scalar_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_slice) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif testONNXModels("slice_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_slice_opset_11) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif testONNXModels("slice_opset_11_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_resize_opset11_torch16) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif testONNXModels("resize_opset11_torch1.6_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_average_pooling) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif testONNXModels("average_pooling_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_maxpooling_sigmoid) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif testONNXModels("maxpooling_sigmoid_dynamic_axes"); +} + +TEST_P(Test_ONNX_layers, DynamicAxes_dynamic_batch) +{ +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } +#if INF_ENGINE_VER_MAJOR_LT(2021000000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + { + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + } +#endif +#endif testONNXModels("dynamic_batch"); } + TEST_P(Test_ONNX_layers, MaxPool1d) { #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) @@ -1344,6 +1489,16 @@ TEST_P(Test_ONNX_layers, DivConst) } +// FIXIT disabled due to non-standard ONNX model domains, need to add ONNX domains support +// Example: +// DNN/ONNX: unsupported opset[1]: domain='com.microsoft.experimental' version=1 +// DNN/ONNX: unsupported opset[2]: domain='ai.onnx.preview.training' version=1 +// DNN/ONNX: unsupported opset[3]: domain='com.microsoft.nchwc' version=1 +// DNN/ONNX: unsupported opset[4]: domain='com.microsoft.mlfeaturizers' version=1 +// DNN/ONNX: unsupported opset[5]: domain='ai.onnx.ml' version=2 +// DNN/ONNX: unsupported opset[6]: domain='com.microsoft' version=1 +// DNN/ONNX: unsupported opset[7]: domain='ai.onnx.training' version=1 +#if 0 TEST_P(Test_ONNX_layers, Quantized_Convolution) { testONNXModels("quantized_conv_uint8_weights", npy, 0.004, 0.02); @@ -1449,6 +1604,7 @@ TEST_P(Test_ONNX_layers, Quantized_Constant) { testONNXModels("quantized_constant", npy, 0.002, 0.008); } +#endif INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets()); @@ -1593,7 +1749,8 @@ TEST_P(Test_ONNX_nets, ResNet50v1) testONNXModels("resnet50v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD); } -TEST_P(Test_ONNX_nets, ResNet50_Int8) +// FIXIT missing ONNX domains support +TEST_P(Test_ONNX_nets, DISABLED_ResNet50_Int8) { testONNXModels("resnet50_int8", pb, default_l1, default_lInf, true); } diff --git a/modules/highgui/src/window_gtk.cpp b/modules/highgui/src/window_gtk.cpp index 3428586ea3..260166f33b 100644 --- a/modules/highgui/src/window_gtk.cpp +++ b/modules/highgui/src/window_gtk.cpp @@ -1966,6 +1966,10 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da } else if( event->type == GDK_SCROLL ) { + GdkEventScroll* event_scroll = (GdkEventScroll*)event; + pt32f.x = cvFloor(event_scroll->x); + pt32f.y = cvFloor(event_scroll->y); + #if defined(GTK_VERSION3_4) // NOTE: in current implementation doesn't possible to put into callback function delta_x and delta_y separately double delta = (event->scroll.delta_x + event->scroll.delta_y); @@ -2023,14 +2027,18 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da (unsigned)pt.y < (unsigned)(image_widget->original_image->height) )) { - state &= gtk_accelerator_get_default_mod_mask(); - flags |= BIT_MAP(state, GDK_SHIFT_MASK, CV_EVENT_FLAG_SHIFTKEY) | - BIT_MAP(state, GDK_CONTROL_MASK, CV_EVENT_FLAG_CTRLKEY) | - BIT_MAP(state, GDK_MOD1_MASK, CV_EVENT_FLAG_ALTKEY) | - BIT_MAP(state, GDK_MOD2_MASK, CV_EVENT_FLAG_ALTKEY) | + // handle non-keyboard (mouse) modifiers first + flags |= BIT_MAP(state, GDK_BUTTON1_MASK, CV_EVENT_FLAG_LBUTTON) | BIT_MAP(state, GDK_BUTTON2_MASK, CV_EVENT_FLAG_MBUTTON) | BIT_MAP(state, GDK_BUTTON3_MASK, CV_EVENT_FLAG_RBUTTON); + // keyboard modifiers + state &= gtk_accelerator_get_default_mod_mask(); + flags |= + BIT_MAP(state, GDK_SHIFT_MASK, CV_EVENT_FLAG_SHIFTKEY) | + BIT_MAP(state, GDK_CONTROL_MASK, CV_EVENT_FLAG_CTRLKEY) | + BIT_MAP(state, GDK_MOD1_MASK, CV_EVENT_FLAG_ALTKEY) | + BIT_MAP(state, GDK_MOD2_MASK, CV_EVENT_FLAG_ALTKEY); window->on_mouse( cv_event, pt.x, pt.y, flags, window->on_mouse_param ); } } diff --git a/modules/imgcodecs/CMakeLists.txt b/modules/imgcodecs/CMakeLists.txt index cc512f3e19..b9c4683aa9 100644 --- a/modules/imgcodecs/CMakeLists.txt +++ b/modules/imgcodecs/CMakeLists.txt @@ -50,12 +50,21 @@ if(HAVE_JASPER) list(APPEND GRFMT_LIBS ${JASPER_LIBRARIES}) if(OPENCV_IO_FORCE_JASPER) add_definitions(-DOPENCV_IMGCODECS_FORCE_JASPER=1) + else() + message(STATUS "imgcodecs: Jasper codec is disabled in runtime. Details: https://github.com/opencv/opencv/issues/14058") endif() endif() if(HAVE_OPENEXR) include_directories(SYSTEM ${OPENEXR_INCLUDE_PATHS}) list(APPEND GRFMT_LIBS ${OPENEXR_LIBRARIES}) + if(OPENCV_IO_FORCE_OPENEXR + OR NOT BUILD_OPENEXR # external OpenEXR versions are not disabled + ) + add_definitions(-DOPENCV_IMGCODECS_USE_OPENEXR=1) + else() + message(STATUS "imgcodecs: OpenEXR codec is disabled in runtime. Details: https://github.com/opencv/opencv/issues/21326") + endif() endif() if(HAVE_PNG OR HAVE_TIFF OR HAVE_OPENEXR) @@ -167,6 +176,9 @@ ocv_add_accuracy_tests() if(TARGET opencv_test_imgcodecs AND HAVE_JASPER AND "$ENV{OPENCV_IO_ENABLE_JASPER}") ocv_target_compile_definitions(opencv_test_imgcodecs PRIVATE OPENCV_IMGCODECS_ENABLE_JASPER_TESTS=1) endif() +if(TARGET opencv_test_imgcodecs AND HAVE_OPENEXR AND "$ENV{OPENCV_IO_ENABLE_OPENEXR}") + ocv_target_compile_definitions(opencv_test_imgcodecs PRIVATE OPENCV_IMGCODECS_ENABLE_OPENEXR_TESTS=1) +endif() if(TARGET opencv_test_imgcodecs AND HAVE_PNG AND NOT (PNG_VERSION VERSION_LESS "1.6.31")) # details: https://github.com/glennrp/libpng/commit/68cb0aaee3de6371b81a4613476d9b33e43e95b1 ocv_target_compile_definitions(opencv_test_imgcodecs PRIVATE OPENCV_IMGCODECS_PNG_WITH_EXIF=1) diff --git a/modules/imgcodecs/src/grfmt_exr.cpp b/modules/imgcodecs/src/grfmt_exr.cpp index f3368587f3..960f5da3d3 100644 --- a/modules/imgcodecs/src/grfmt_exr.cpp +++ b/modules/imgcodecs/src/grfmt_exr.cpp @@ -44,6 +44,9 @@ #ifdef HAVE_OPENEXR +#include +#include + #if defined _MSC_VER && _MSC_VER >= 1200 # pragma warning( disable: 4100 4244 4267 ) #endif @@ -80,6 +83,27 @@ namespace cv { +static bool isOpenEXREnabled() +{ + static const bool PARAM_ENABLE_OPENEXR = utils::getConfigurationParameterBool("OPENCV_IO_ENABLE_OPENEXR", +#ifdef OPENCV_IMGCODECS_USE_OPENEXR + true +#else + false +#endif + ); + return PARAM_ENABLE_OPENEXR; +} +static void initOpenEXR() +{ + if (!isOpenEXREnabled()) + { + const char* message = "imgcodecs: OpenEXR codec is disabled. You can enable it via 'OPENCV_IO_ENABLE_OPENEXR' option. Refer for details and cautions here: https://github.com/opencv/opencv/issues/21326"; + CV_LOG_WARNING(NULL, message); + CV_Error(Error::StsNotImplemented, message); + } +} + /////////////////////// ExrDecoder /////////////////// ExrDecoder::ExrDecoder() @@ -577,6 +601,7 @@ void ExrDecoder::RGBToGray( float *in, float *out ) ImageDecoder ExrDecoder::newDecoder() const { + initOpenEXR(); return makePtr(); } @@ -740,6 +765,7 @@ bool ExrEncoder::write( const Mat& img, const std::vector& params ) ImageEncoder ExrEncoder::newEncoder() const { + initOpenEXR(); return makePtr(); } diff --git a/modules/imgcodecs/src/grfmt_exr.hpp b/modules/imgcodecs/src/grfmt_exr.hpp index 99acd775c2..a86874d228 100644 --- a/modules/imgcodecs/src/grfmt_exr.hpp +++ b/modules/imgcodecs/src/grfmt_exr.hpp @@ -53,6 +53,7 @@ #include #include #include +#include #include "grfmt_base.hpp" namespace cv diff --git a/modules/imgcodecs/test/test_grfmt.cpp b/modules/imgcodecs/test/test_grfmt.cpp index 2a8a5957ff..a6af4b92e2 100644 --- a/modules/imgcodecs/test/test_grfmt.cpp +++ b/modules/imgcodecs/test/test_grfmt.cpp @@ -429,6 +429,6 @@ TEST(Imgcodecs, write_parameter_type) }} // namespace -#ifdef HAVE_OPENEXR +#if defined(HAVE_OPENEXR) && defined(OPENCV_IMGCODECS_ENABLE_OPENEXR_TESTS) #include "test_exr.impl.hpp" #endif diff --git a/modules/python/src2/cv2_convert.hpp b/modules/python/src2/cv2_convert.hpp index e9ed71258b..563f5386b7 100644 --- a/modules/python/src2/cv2_convert.hpp +++ b/modules/python/src2/cv2_convert.hpp @@ -372,6 +372,9 @@ struct IsRepresentableAsMatDataType struct IsRepresentableAsMatDataType : FalseType {}; + } // namespace traits template diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py index fe484089f6..051ac33ac9 100644 --- a/modules/python/test/test_misc.py +++ b/modules/python/test/test_misc.py @@ -594,6 +594,18 @@ class Arguments(NewOpenCVTests): self.assertEqual(ints.dtype, np.int32, "Vector of integers has wrong elements type") self.assertEqual(ints.shape, expected_shape, "Vector of integers has wrong shape.") + def test_result_rotated_rect_issue_20930(self): + rr = cv.utils.testRotatedRect(10, 20, 100, 200, 45) + self.assertTrue(isinstance(rr, tuple), msg=type(rr)) + self.assertEqual(len(rr), 3) + + rrv = cv.utils.testRotatedRectVector(10, 20, 100, 200, 45) + self.assertTrue(isinstance(rrv, tuple), msg=type(rrv)) + self.assertEqual(len(rrv), 10) + + rr = rrv[0] + self.assertTrue(isinstance(rr, tuple), msg=type(rrv)) + self.assertEqual(len(rr), 3) class CanUsePurePythonModuleFunction(NewOpenCVTests): def test_can_get_ocv_version(self): diff --git a/samples/python/find_obj.py b/samples/python/find_obj.py index 14dfa74902..c5783bf456 100755 --- a/samples/python/find_obj.py +++ b/samples/python/find_obj.py @@ -86,6 +86,7 @@ def explore_match(win, img1, img2, kp_pairs, status = None, H = None): if status is None: status = np.ones(len(kp_pairs), np.bool_) + status = status.reshape((len(kp_pairs), 1)) p1, p2 = [], [] # python 2 / python 3 change of zip unpacking for kpp in kp_pairs: p1.append(np.int32(kpp[0].pt))