Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin 2021-12-24 16:35:47 +00:00
commit 217fea9667
26 changed files with 804 additions and 377 deletions

View File

@ -223,7 +223,7 @@ OCV_OPTION(BUILD_OPENJPEG "Build OpenJPEG from source" (WIN32
OCV_OPTION(BUILD_JASPER "Build libjasper from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) )
OCV_OPTION(BUILD_JPEG "Build libjpeg from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) )
OCV_OPTION(BUILD_PNG "Build libpng from source" (WIN32 OR ANDROID OR APPLE OR OPENCV_FORCE_3RDPARTY_BUILD) )
OCV_OPTION(BUILD_OPENEXR "Build openexr from source" (((WIN32 OR ANDROID OR APPLE) AND NOT WINRT) OR OPENCV_FORCE_3RDPARTY_BUILD) )
OCV_OPTION(BUILD_OPENEXR "Build openexr from source" (OPENCV_FORCE_3RDPARTY_BUILD) )
OCV_OPTION(BUILD_WEBP "Build WebP from source" (((WIN32 OR ANDROID OR APPLE) AND NOT WINRT) OR OPENCV_FORCE_3RDPARTY_BUILD) )
OCV_OPTION(BUILD_TBB "Download and build TBB from source" (ANDROID OR OPENCV_FORCE_3RDPARTY_BUILD) )
OCV_OPTION(BUILD_IPP_IW "Build IPP IW from source" (NOT MINGW OR OPENCV_FORCE_3RDPARTY_BUILD) IF (X86_64 OR X86) AND NOT WINRT )
@ -311,7 +311,7 @@ OCV_OPTION(WITH_JPEG "Include JPEG support" ON
OCV_OPTION(WITH_WEBP "Include WebP support" ON
VISIBLE_IF NOT WINRT
VERIFY HAVE_WEBP)
OCV_OPTION(WITH_OPENEXR "Include ILM support via OpenEXR" BUILD_OPENEXR OR NOT CMAKE_CROSSCOMPILING
OCV_OPTION(WITH_OPENEXR "Include ILM support via OpenEXR" ((WIN32 OR ANDROID OR APPLE) OR BUILD_OPENEXR) OR NOT CMAKE_CROSSCOMPILING
VISIBLE_IF NOT APPLE_FRAMEWORK AND NOT WINRT
VERIFY HAVE_OPENEXR)
OCV_OPTION(WITH_OPENGL "Include OpenGL support" OFF

View File

@ -268,7 +268,9 @@ if(WITH_OPENEXR)
set(OPENEXR_LIBRARIES IlmImf)
add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/openexr")
if(OPENEXR_VERSION) # check via TARGET doesn't work
set(BUILD_OPENEXR ON)
set(HAVE_OPENEXR YES)
set(BUILD_OPENEXR ON)
endif()
endif()
endif()

View File

@ -9,12 +9,20 @@
# OPENEXR_LIBRARIES = libraries that are needed to use OpenEXR.
#
find_package(OpenEXR 3.0 CONFIG QUIET)
if(TARGET OpenEXR::OpenEXR)
SET(OPENEXR_FOUND TRUE)
SET(OPENEXR_LIBRARIES OpenEXR::OpenEXR)
SET(OPENEXR_VERSION ${OpenEXR_VERSION})
return()
if(NOT OPENCV_SKIP_OPENEXR_FIND_PACKAGE)
find_package(OpenEXR 3 QUIET)
#ocv_cmake_dump_vars(EXR)
if(OpenEXR_FOUND)
if(TARGET OpenEXR::OpenEXR) # OpenEXR 3+
set(OPENEXR_LIBRARIES OpenEXR::OpenEXR)
set(OPENEXR_INCLUDE_PATHS "")
set(OPENEXR_VERSION "${OpenEXR_VERSION}")
set(OPENEXR_FOUND 1)
return()
else()
message(STATUS "Unsupported find_package(OpenEXR) - missing OpenEXR::OpenEXR target (version ${OpenEXR_VERSION})")
endif()
endif()
endif()
SET(OPENEXR_LIBRARIES "")

View File

@ -1294,6 +1294,12 @@
number={2},
pages={117-135},
}
@inproceedings{Zuliani2014RANSACFD,
title={RANSAC for Dummies With examples using the RANSAC toolbox for Matlab \& Octave and more...},
author={Marco Zuliani},
year={2014},
url = {https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.475.1243&rep=rep1&type=pdf}
}
@inproceedings{forstner1987fast,
title={A fast operator for detection and precise location of distincs points, corners and center of circular features},
author={FORSTNER, W},

View File

@ -367,7 +367,7 @@ libraries). If you do not need the support for some of these, you can just freel
Set the OpenCV environment variable and add it to the systems path {#tutorial_windows_install_path}
=================================================================
First we set an environment variable to make easier our work. This will hold the build directory of
First, we set an environment variable to make our work easier. This will hold the build directory of
our OpenCV library that we use in our projects. Start up a command window and enter:
@code
setx -m OPENCV_DIR D:\OpenCV\Build\x86\vc11 (suggested for Visual Studio 2012 - 32 bit Windows)

View File

@ -40,12 +40,13 @@
publisher={IEEE}
}
@inproceedings{Terzakis20,
author = {Terzakis, George and Lourakis, Manolis},
year = {2020},
month = {09},
pages = {},
title = {A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem}
@inproceedings{Terzakis2020SQPnP,
title={A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem},
author={George Terzakis and Manolis Lourakis},
booktitle={European Conference on Computer Vision},
pages={478--494},
year={2020},
publisher={Springer International Publishing}
}
@inproceedings{strobl2011iccv,

View File

@ -0,0 +1,176 @@
# Perspective-n-Point (PnP) pose computation {#calib3d_solvePnP}
## Pose computation overview
The pose computation problem @cite Marchand16 consists in solving for the rotation and translation that minimizes the reprojection error from 3D-2D point correspondences.
The `solvePnP` and related functions estimate the object pose given a set of object points, their corresponding image projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below (more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward and the Z-axis forward).
![](pnp.jpg)
Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$
using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$ (also denoted \f$ \bf{K} \f$ in the literature):
\f[
\begin{align*}
\begin{bmatrix}
u \\
v \\
1
\end{bmatrix} &=
\bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
\begin{bmatrix}
X_{w} \\
Y_{w} \\
Z_{w} \\
1
\end{bmatrix} \\
\begin{bmatrix}
u \\
v \\
1
\end{bmatrix} &=
\begin{bmatrix}
f_x & 0 & c_x \\
0 & f_y & c_y \\
0 & 0 & 1
\end{bmatrix}
\begin{bmatrix}
1 & 0 & 0 & 0 \\
0 & 1 & 0 & 0 \\
0 & 0 & 1 & 0
\end{bmatrix}
\begin{bmatrix}
r_{11} & r_{12} & r_{13} & t_x \\
r_{21} & r_{22} & r_{23} & t_y \\
r_{31} & r_{32} & r_{33} & t_z \\
0 & 0 & 0 & 1
\end{bmatrix}
\begin{bmatrix}
X_{w} \\
Y_{w} \\
Z_{w} \\
1
\end{bmatrix}
\end{align*}
\f]
The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming
a 3D point expressed in the world frame into the camera frame:
\f[
\begin{align*}
\begin{bmatrix}
X_c \\
Y_c \\
Z_c \\
1
\end{bmatrix} &=
\hspace{0.2em} ^{c}\bf{T}_w
\begin{bmatrix}
X_{w} \\
Y_{w} \\
Z_{w} \\
1
\end{bmatrix} \\
\begin{bmatrix}
X_c \\
Y_c \\
Z_c \\
1
\end{bmatrix} &=
\begin{bmatrix}
r_{11} & r_{12} & r_{13} & t_x \\
r_{21} & r_{22} & r_{23} & t_y \\
r_{31} & r_{32} & r_{33} & t_z \\
0 & 0 & 0 & 1
\end{bmatrix}
\begin{bmatrix}
X_{w} \\
Y_{w} \\
Z_{w} \\
1
\end{bmatrix}
\end{align*}
\f]
## Pose computation methods
@anchor calib3d_solvePnP_flags
Refer to the cv::SolvePnPMethod enum documentation for the list of possible values. Some details about each method are described below:
- cv::SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
this case the function finds such a pose that minimizes reprojection error, that is the sum
of squared distances between the observed projections "imagePoints" and the projected (using
cv::projectPoints ) "objectPoints". Initial solution for non-planar "objectPoints" needs at least 6 points and uses the DLT algorithm.
Initial solution for planar "objectPoints" needs at least 4 points and uses pose from homography decomposition.
- cv::SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
In this case the function requires exactly four object and image points.
- cv::SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
In this case the function requires exactly four object and image points.
- cv::SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the
paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
- cv::SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of J. Hesch and S. Roumeliotis.
"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
- cv::SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
assuming that both have the same value. Then the cameraMatrix is updated with the estimated
focal length.
- cv::SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
- cv::SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
It requires 4 coplanar object points defined in the following order:
- point 0: [-squareLength / 2, squareLength / 2, 0]
- point 1: [ squareLength / 2, squareLength / 2, 0]
- point 2: [ squareLength / 2, -squareLength / 2, 0]
- point 3: [-squareLength / 2, -squareLength / 2, 0]
- cv::SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the
Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis2020SQPnP). It requires 3 or more points.
## P3P
The cv::solveP3P() computes an object pose from exactly 3 3D-2D point correspondences. A P3P problem has up to 4 solutions.
@note The solutions are sorted by reprojection errors (lowest to highest).
## PnP
The cv::solvePnP() returns the rotation and the translation vectors that transform a 3D point expressed in the object
coordinate frame to the camera coordinate frame, using different methods:
- P3P methods (cv::SOLVEPNP_P3P, cv::SOLVEPNP_AP3P): need 4 input points to return a unique solution.
- cv::SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar.
- cv::SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation.
Number of input points must be 4. Object points must be defined in the following order:
- point 0: [-squareLength / 2, squareLength / 2, 0]
- point 1: [ squareLength / 2, squareLength / 2, 0]
- point 2: [ squareLength / 2, -squareLength / 2, 0]
- point 3: [-squareLength / 2, -squareLength / 2, 0]
- for all the other flags, number of input points must be >= 4 and object points can be in any configuration.
## Generic PnP
The cv::solvePnPGeneric() allows retrieving all the possible solutions.
Currently, only cv::SOLVEPNP_P3P, cv::SOLVEPNP_AP3P, cv::SOLVEPNP_IPPE, cv::SOLVEPNP_IPPE_SQUARE, cv::SOLVEPNP_SQPNP can return multiple solutions.
## RANSAC PnP
The cv::solvePnPRansac() computes the object pose wrt. the camera frame using a RANSAC scheme to deal with outliers.
More information can be found in @cite Zuliani2014RANSACFD
## Pose refinement
Pose refinement consists in estimating the rotation and translation that minimizes the reprojection error using a non-linear minimization method and starting from an initial estimate of the solution. OpenCV proposes cv::solvePnPRefineLM() and cv::solvePnPRefineVVS() for this problem.
cv::solvePnPRefineLM() uses a non-linear Levenberg-Marquardt minimization scheme @cite Madsen04 @cite Eade13 and the current implementation computes the rotation update as a perturbation and not on SO(3).
cv::solvePnPRefineVVS() uses a Gauss-Newton non-linear minimization scheme @cite Marchand16 and with an update of the rotation part computed using the exponential map.
@note at least three 3D-2D point correspondences are necessary.

View File

@ -454,7 +454,9 @@ enum { LMEDS = 4, //!< least-median of squares algorithm
};
enum SolvePnPMethod {
SOLVEPNP_ITERATIVE = 0,
SOLVEPNP_ITERATIVE = 0, //!< Pose refinement using non-linear Levenberg-Marquardt minimization scheme @cite Madsen04 @cite Eade13 \n
//!< Initial solution for non-planar "objectPoints" needs at least 6 points and uses the DLT algorithm. \n
//!< Initial solution for planar "objectPoints" needs at least 4 points and uses pose from homography decomposition.
SOLVEPNP_EPNP = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp
SOLVEPNP_P3P = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete
SOLVEPNP_DLS = 3, //!< **Broken implementation. Using this flag will fallback to EPnP.** \n
@ -471,7 +473,7 @@ enum SolvePnPMethod {
//!< - point 1: [ squareLength / 2, squareLength / 2, 0]
//!< - point 2: [ squareLength / 2, -squareLength / 2, 0]
//!< - point 3: [-squareLength / 2, -squareLength / 2, 0]
SOLVEPNP_SQPNP = 8, //!< SQPnP: A Consistently Fast and Globally OptimalSolution to the Perspective-n-Point Problem @cite Terzakis20
SOLVEPNP_SQPNP = 8, //!< SQPnP: A Consistently Fast and Globally OptimalSolution to the Perspective-n-Point Problem @cite Terzakis2020SQPnP
#ifndef CV_DOXYGEN
SOLVEPNP_MAX_COUNT //!< Used for count
#endif
@ -890,6 +892,9 @@ Check @ref tutorial_homography "the corresponding tutorial" for more details
*/
/** @brief Finds an object pose from 3D-2D point correspondences.
@see @ref calib3d_solvePnP
This function returns the rotation and the translation vectors that transform a 3D point expressed in the object
coordinate frame to the camera coordinate frame, using different methods:
- P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution.
@ -916,133 +921,9 @@ the model coordinate system to the camera coordinate system.
@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses
the provided rvec and tvec values as initial approximations of the rotation and translation
vectors, respectively, and further optimizes them.
@param flags Method for solving a PnP problem:
- @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
this case the function finds such a pose that minimizes reprojection error, that is the sum
of squared distances between the observed projections imagePoints and the projected (using
@ref projectPoints ) objectPoints .
- @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
In this case the function requires exactly four object and image points.
- @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
In this case the function requires exactly four object and image points.
- @ref SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the
paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
- @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of J. Hesch and S. Roumeliotis.
"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
- @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
assuming that both have the same value. Then the cameraMatrix is updated with the estimated
focal length.
- @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
- @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
It requires 4 coplanar object points defined in the following order:
- point 0: [-squareLength / 2, squareLength / 2, 0]
- point 1: [ squareLength / 2, squareLength / 2, 0]
- point 2: [ squareLength / 2, -squareLength / 2, 0]
- point 3: [-squareLength / 2, -squareLength / 2, 0]
- @ref SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the
Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis20). It requires 3 or more points.
@param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags
The function estimates the object pose given a set of object points, their corresponding image
projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below
(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
and the Z-axis forward).
![](pnp.jpg)
Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$
using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$:
\f[
\begin{align*}
\begin{bmatrix}
u \\
v \\
1
\end{bmatrix} &=
\bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
\begin{bmatrix}
X_{w} \\
Y_{w} \\
Z_{w} \\
1
\end{bmatrix} \\
\begin{bmatrix}
u \\
v \\
1
\end{bmatrix} &=
\begin{bmatrix}
f_x & 0 & c_x \\
0 & f_y & c_y \\
0 & 0 & 1
\end{bmatrix}
\begin{bmatrix}
1 & 0 & 0 & 0 \\
0 & 1 & 0 & 0 \\
0 & 0 & 1 & 0
\end{bmatrix}
\begin{bmatrix}
r_{11} & r_{12} & r_{13} & t_x \\
r_{21} & r_{22} & r_{23} & t_y \\
r_{31} & r_{32} & r_{33} & t_z \\
0 & 0 & 0 & 1
\end{bmatrix}
\begin{bmatrix}
X_{w} \\
Y_{w} \\
Z_{w} \\
1
\end{bmatrix}
\end{align*}
\f]
The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming
a 3D point expressed in the world frame into the camera frame:
\f[
\begin{align*}
\begin{bmatrix}
X_c \\
Y_c \\
Z_c \\
1
\end{bmatrix} &=
\hspace{0.2em} ^{c}\bf{T}_w
\begin{bmatrix}
X_{w} \\
Y_{w} \\
Z_{w} \\
1
\end{bmatrix} \\
\begin{bmatrix}
X_c \\
Y_c \\
Z_c \\
1
\end{bmatrix} &=
\begin{bmatrix}
r_{11} & r_{12} & r_{13} & t_x \\
r_{21} & r_{22} & r_{23} & t_y \\
r_{31} & r_{32} & r_{33} & t_z \\
0 & 0 & 0 & 1
\end{bmatrix}
\begin{bmatrix}
X_{w} \\
Y_{w} \\
Z_{w} \\
1
\end{bmatrix}
\end{align*}
\f]
More information about Perspective-n-Points is described in @ref calib3d_solvePnP
@note
- An example of how to use solvePnP for planar augmented reality can be found at
@ -1082,6 +963,8 @@ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
/** @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme.
@see @ref calib3d_solvePnP
@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
@ -1140,6 +1023,8 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint
/** @brief Finds an object pose from 3 3D-2D point correspondences.
@see @ref calib3d_solvePnP
@param objectPoints Array of object points in the object coordinate space, 3x3 1-channel or
1x3/3x1 3-channel. vector\<Point3f\> can be also passed here.
@param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel.
@ -1171,6 +1056,8 @@ CV_EXPORTS_W int solveP3P( InputArray objectPoints, InputArray imagePoints,
/** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame
to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution.
@see @ref calib3d_solvePnP
@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel,
where N is the number of points. vector\<Point3d\> can also be passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
@ -1198,6 +1085,8 @@ CV_EXPORTS_W void solvePnPRefineLM( InputArray objectPoints, InputArray imagePoi
/** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame
to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution.
@see @ref calib3d_solvePnP
@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel,
where N is the number of points. vector\<Point3d\> can also be passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
@ -1226,6 +1115,9 @@ CV_EXPORTS_W void solvePnPRefineVVS( InputArray objectPoints, InputArray imagePo
double VVSlambda = 1);
/** @brief Finds an object pose from 3D-2D point correspondences.
@see @ref calib3d_solvePnP
This function returns a list of all the possible solutions (a solution is a <rotation vector, translation vector>
couple), depending on the number of input points and the chosen method:
- P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): 3 or 4 input points. Number of returned solutions can be between 0 and 4 with 3 input points.
@ -1253,37 +1145,7 @@ the model coordinate system to the camera coordinate system.
@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses
the provided rvec and tvec values as initial approximations of the rotation and translation
vectors, respectively, and further optimizes them.
@param flags Method for solving a PnP problem:
- @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
this case the function finds such a pose that minimizes reprojection error, that is the sum
of squared distances between the observed projections imagePoints and the projected (using
#projectPoints ) objectPoints .
- @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
In this case the function requires exactly four object and image points.
- @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
In this case the function requires exactly four object and image points.
- @ref SOLVEPNP_EPNP Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the
paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
- @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis.
"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
- @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto,
F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
assuming that both have the same value. Then the cameraMatrix is updated with the estimated
focal length.
- @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
- @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
It requires 4 coplanar object points defined in the following order:
- point 0: [-squareLength / 2, squareLength / 2, 0]
- point 1: [ squareLength / 2, squareLength / 2, 0]
- point 2: [ squareLength / 2, -squareLength / 2, 0]
- point 3: [-squareLength / 2, -squareLength / 2, 0]
@param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags
@param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE
and useExtrinsicGuess is set to true.
@param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE
@ -1292,98 +1154,7 @@ and useExtrinsicGuess is set to true.
(\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points
and the 3D object points projected with the estimated pose.
The function estimates the object pose given a set of object points, their corresponding image
projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below
(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
and the Z-axis forward).
![](pnp.jpg)
Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$
using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$:
\f[
\begin{align*}
\begin{bmatrix}
u \\
v \\
1
\end{bmatrix} &=
\bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
\begin{bmatrix}
X_{w} \\
Y_{w} \\
Z_{w} \\
1
\end{bmatrix} \\
\begin{bmatrix}
u \\
v \\
1
\end{bmatrix} &=
\begin{bmatrix}
f_x & 0 & c_x \\
0 & f_y & c_y \\
0 & 0 & 1
\end{bmatrix}
\begin{bmatrix}
1 & 0 & 0 & 0 \\
0 & 1 & 0 & 0 \\
0 & 0 & 1 & 0
\end{bmatrix}
\begin{bmatrix}
r_{11} & r_{12} & r_{13} & t_x \\
r_{21} & r_{22} & r_{23} & t_y \\
r_{31} & r_{32} & r_{33} & t_z \\
0 & 0 & 0 & 1
\end{bmatrix}
\begin{bmatrix}
X_{w} \\
Y_{w} \\
Z_{w} \\
1
\end{bmatrix}
\end{align*}
\f]
The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming
a 3D point expressed in the world frame into the camera frame:
\f[
\begin{align*}
\begin{bmatrix}
X_c \\
Y_c \\
Z_c \\
1
\end{bmatrix} &=
\hspace{0.2em} ^{c}\bf{T}_w
\begin{bmatrix}
X_{w} \\
Y_{w} \\
Z_{w} \\
1
\end{bmatrix} \\
\begin{bmatrix}
X_c \\
Y_c \\
Z_c \\
1
\end{bmatrix} &=
\begin{bmatrix}
r_{11} & r_{12} & r_{13} & t_x \\
r_{21} & r_{22} & r_{23} & t_y \\
r_{31} & r_{32} & r_{33} & t_z \\
0 & 0 & 0 & 1
\end{bmatrix}
\begin{bmatrix}
X_{w} \\
Y_{w} \\
Z_{w} \\
1
\end{bmatrix}
\end{align*}
\f]
More information is described in @ref calib3d_solvePnP
@note
- An example of how to use solvePnP for planar augmented reality can be found at

View File

@ -103,6 +103,21 @@ String dumpRotatedRect(const RotatedRect& argument)
argument.size.height, argument.angle);
}
CV_WRAP static inline
RotatedRect testRotatedRect(float x, float y, float w, float h, float angle)
{
return RotatedRect(Point2f(x, y), Size2f(w, h), angle);
}
CV_WRAP static inline
std::vector<RotatedRect> testRotatedRectVector(float x, float y, float w, float h, float angle)
{
std::vector<RotatedRect> result;
for (int i = 0; i < 10; i++)
result.push_back(RotatedRect(Point2f(x + i, y + 2 * i), Size2f(w, h), angle + 10 * i));
return result;
}
CV_WRAP static inline
String dumpRange(const Range& argument)
{

View File

@ -288,8 +288,6 @@ std::vector<Target> getAvailableTargets(Backend be)
namespace
{
typedef std::vector<MatShape> ShapesVec;
struct LayerShapes
{
ShapesVec in, out, internal;
@ -1490,6 +1488,11 @@ struct Net::Impl : public detail::NetImplBase
clear();
if (hasDynamicShapes)
{
updateLayersShapes();
}
this->blobsToKeep = blobsToKeep_;
allocateLayers(blobsToKeep_);
@ -3794,20 +3797,24 @@ struct Net::Impl : public detail::NetImplBase
void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
{
std::vector<LayerPin>& inputLayerIds = layers[id].inputBlobsId;
CV_CheckGE(id, 0, "");
CV_CheckLT(id, (int)layers.size(), "");
LayerData& layerData = layers[id];
std::vector<LayerPin>& inputLayerIds = layerData.inputBlobsId;
LayerShapes& layerShapes = inOutShapes[id];
if (id == 0 && inOutShapes[id].in[0].empty())
if (id == 0 && layerShapes.in[0].empty())
{
if (!layers[0].outputBlobs.empty())
if (!layerData.outputBlobs.empty())
{
ShapesVec shapes;
for (int i = 0; i < layers[0].outputBlobs.size(); i++)
for (int i = 0; i < layerData.outputBlobs.size(); i++)
{
Mat& inp = layers[0].outputBlobs[i];
CV_Assert(inp.total());
Mat& inp = layerData.outputBlobs[i];
CV_Assert(!inp.empty());
shapes.push_back(shape(inp));
}
inOutShapes[0].in = shapes;
layerShapes.in = shapes;
}
else
{
@ -3823,17 +3830,17 @@ struct Net::Impl : public detail::NetImplBase
}
if (none)
{
inOutShapes[0].out.clear();
layerShapes.out.clear();
return;
}
else
{
inOutShapes[0].in = inputShapes;
layerShapes.in = inputShapes;
}
}
}
if (inOutShapes[id].in.empty())
if (layerShapes.in.empty())
{
for(int i = 0; i < inputLayerIds.size(); i++)
{
@ -3846,14 +3853,14 @@ struct Net::Impl : public detail::NetImplBase
getLayerShapesRecursively(layerId, inOutShapes);
}
const MatShape& shape = inOutShapes[layerId].out[inputLayerIds[i].oid];
inOutShapes[id].in.push_back(shape);
layerShapes.in.push_back(shape);
}
}
const ShapesVec& is = inOutShapes[id].in;
ShapesVec& os = inOutShapes[id].out;
ShapesVec& ints = inOutShapes[id].internal;
int requiredOutputs = layers[id].requiredOutputs.size();
Ptr<Layer> l = layers[id].getLayerInstance();
const ShapesVec& is = layerShapes.in;
ShapesVec& os = layerShapes.out;
ShapesVec& ints = layerShapes.internal;
int requiredOutputs = layerData.requiredOutputs.size();
Ptr<Layer> l = layerData.getLayerInstance();
CV_Assert(l);
bool layerSupportInPlace = false;
try
@ -3881,13 +3888,38 @@ struct Net::Impl : public detail::NetImplBase
CV_LOG_ERROR(NULL, "Exception message: " << e.what());
throw;
}
inOutShapes[id].supportInPlace = layerSupportInPlace;
layerShapes.supportInPlace = layerSupportInPlace;
for (int i = 0; i < ints.size(); i++)
CV_Assert(total(ints[i]) > 0);
try
{
for (int i = 0; i < ints.size(); i++)
CV_CheckGT(total(ints[i]), 0, "");
for (int i = 0; i < os.size(); i++)
CV_Assert(total(os[i]) > 0);
for (int i = 0; i < os.size(); i++)
CV_CheckGT(total(os[i]), 0, "");
}
catch (const cv::Exception& e)
{
CV_LOG_ERROR(NULL, "OPENCV/DNN: [" << l->type << "]:(" << l->name << "): getMemoryShapes() post validation failed." <<
" inputs=" << is.size() <<
" outputs=" << os.size() << "/" << requiredOutputs <<
" blobs=" << l->blobs.size() <<
" inplace=" << layerSupportInPlace);
for (size_t i = 0; i < is.size(); ++i)
{
CV_LOG_ERROR(NULL, " input[" << i << "] = " << toString(is[i]));
}
for (size_t i = 0; i < os.size(); ++i)
{
CV_LOG_ERROR(NULL, " output[" << i << "] = " << toString(os[i]));
}
for (size_t i = 0; i < l->blobs.size(); ++i)
{
CV_LOG_ERROR(NULL, " blobs[" << i << "] = " << typeToString(l->blobs[i].type()) << " " << toString(shape(l->blobs[i])));
}
CV_LOG_ERROR(NULL, "Exception message: " << e.what());
throw;
}
}
void getLayersShapes(const ShapesVec& netInputShapes,
@ -3915,43 +3947,58 @@ struct Net::Impl : public detail::NetImplBase
void updateLayersShapes()
{
CV_Assert(!layers[0].outputBlobs.empty());
CV_LOG_DEBUG(NULL, "updateLayersShapes() with layers.size=" << layers.size());
CV_Assert(netInputLayer);
DataLayer& inputLayer = *netInputLayer;
LayerData& inputLayerData = layers[0];
CV_Assert(inputLayerData.layerInstance.get() == &inputLayer);
CV_Assert(!inputLayerData.outputBlobs.empty());
ShapesVec inputShapes;
for(int i = 0; i < layers[0].outputBlobs.size(); i++)
for(int i = 0; i < inputLayerData.outputBlobs.size(); i++)
{
Mat& inp = layers[0].outputBlobs[i];
CV_Assert(inp.total());
if (preferableBackend == DNN_BACKEND_OPENCV &&
Mat& inp = inputLayerData.outputBlobs[i];
CV_Assert(!inp.empty());
if (preferableBackend == DNN_BACKEND_OPENCV && // FIXIT: wrong place for output allocation
preferableTarget == DNN_TARGET_OPENCL_FP16 &&
layers[0].dtype == CV_32F)
inputLayerData.dtype == CV_32F)
{
layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S);
inp.create(inp.dims, inp.size, CV_16S);
}
inputShapes.push_back(shape(inp));
}
CV_LOG_DEBUG(NULL, toString(inputShapes, "Network input shapes"));
LayersShapesMap layersShapes;
layersShapes[0].in = inputShapes;
for (MapIdToLayerData::iterator it = layers.begin();
it != layers.end(); it++)
{
int layerId = it->first;
std::vector<LayerPin>& inputLayerIds = it->second.inputBlobsId;
if (layersShapes[layerId].in.empty())
LayerData& layerData = it->second;
std::vector<LayerPin>& inputLayerIds = layerData.inputBlobsId;
LayerShapes& layerShapes = layersShapes[layerId];
CV_LOG_DEBUG(NULL, "layer " << layerId << ": [" << layerData.type << "]:(" << layerData.name << ") with inputs.size=" << inputLayerIds.size());
if (layerShapes.in.empty())
{
for(int i = 0; i < inputLayerIds.size(); i++)
{
int inputLayerId = inputLayerIds[i].lid;
const LayerPin& inputPin = inputLayerIds[i];
int inputLayerId = inputPin.lid;
CV_LOG_DEBUG(NULL, " input[" << i << "] " << inputLayerId << ":" << inputPin.oid << " as [" << layers[inputLayerId].type << "]:(" << layers[inputLayerId].name << ")");
LayersShapesMap::iterator inputIt = layersShapes.find(inputLayerId);
if(inputIt == layersShapes.end() || inputIt->second.out.empty())
if (inputIt == layersShapes.end() || inputIt->second.out.empty())
{
getLayerShapesRecursively(inputLayerId, layersShapes);
}
const MatShape& shape = layersShapes[inputLayerId].out[inputLayerIds[i].oid];
layersShapes[layerId].in.push_back(shape);
const MatShape& shape = layersShapes[inputLayerId].out[inputPin.oid];
layerShapes.in.push_back(shape);
}
it->second.getLayerInstance()->updateMemoryShapes(layersShapes[layerId].in);
layerData.getLayerInstance()->updateMemoryShapes(layerShapes.in);
}
CV_LOG_DEBUG(NULL, "Layer " << layerId << ": " << toString(layerShapes.in, "input shapes"));
CV_LOG_IF_DEBUG(NULL, !layerShapes.out.empty(), "Layer " << layerId << ": " << toString(layerShapes.out, "output shapes"));
CV_LOG_IF_DEBUG(NULL, !layerShapes.internal.empty(), "Layer " << layerId << ": " << toString(layerShapes.internal, "internal shapes"));
}
CV_LOG_DEBUG(NULL, "updateLayersShapes() - DONE");
}
LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
@ -5000,13 +5047,8 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
bool oldShape = prevShape == blobShape;
blob_.copyTo(impl->netInputLayer->inputsData[pin.oid]);
if (!oldShape) {
if (!oldShape)
ld.outputBlobs[pin.oid] = impl->netInputLayer->inputsData[pin.oid];
if (impl->hasDynamicShapes)
{
impl->updateLayersShapes();
}
}
if (!ld.outputBlobsWrappers[pin.oid].empty())
{

View File

@ -81,6 +81,44 @@ struct NetImplBase
} // namespace detail
typedef std::vector<MatShape> ShapesVec;
static inline std::string toString(const ShapesVec& shapes, const std::string& name = std::string())
{
std::ostringstream ss;
if (!name.empty())
ss << name << ' ';
ss << '[';
for(size_t i = 0, n = shapes.size(); i < n; ++i)
ss << ' ' << toString(shapes[i]);
ss << " ]";
return ss.str();
}
static inline std::string toString(const Mat& blob, const std::string& name = std::string())
{
std::ostringstream ss;
if (!name.empty())
ss << name << ' ';
if (blob.empty())
{
ss << "<empty>";
}
else if (blob.dims == 1)
{
Mat blob_ = blob;
blob_.dims = 2; // hack
ss << blob_.t();
}
else
{
ss << blob.reshape(1, 1);
}
return ss.str();
}
CV__DNN_INLINE_NS_END
}} // namespace

View File

@ -1366,9 +1366,16 @@ public:
}
else if (padMode.empty())
{
int addedDims = isPool1D? inpShape.size() : local_kernel.size();
for (int i = 0; i < addedDims; i++) {
size_t addedDims = isPool1D? inpShape.size() : local_kernel.size();
CV_CheckLE(addedDims, inpShape.size(), "");
CV_CheckLE(addedDims, pads_begin.size(), "");
CV_CheckLE(addedDims, pads_end.size(), "");
CV_CheckLE(addedDims, local_kernel.size(), "");
CV_CheckLE(addedDims, strides.size(), "");
for (int i = 0; i < addedDims; i++)
{
float dst = (float) (inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i];
CV_CheckGE(dst, 0.0f, "");
outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst)));
}

View File

@ -741,7 +741,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net)
simplifySubgraphs(Ptr<ImportGraphWrapper>(new ONNXGraphWrapper(net)), subgraphs);
}
Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto)
Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto)
{
if (tensor_proto.raw_data().empty() && tensor_proto.float_data().empty() &&
tensor_proto.double_data().empty() && tensor_proto.int64_data().empty() &&

View File

@ -8,8 +8,6 @@
#ifndef __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__
#define __OPENCV_DNN_ONNX_SIMPLIFIER_HPP__
#include "../precomp.hpp"
#if defined(__GNUC__) && __GNUC__ >= 5
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsuggest-override"
@ -33,7 +31,7 @@ void convertInt64ToInt32(const T1& src, T2& dst, int size)
}
}
Mat getMatFromTensor(opencv_onnx::TensorProto& tensor_proto);
Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto);
CV__DNN_INLINE_NS_END
}} // namespace dnn, namespace cv

View File

@ -12,7 +12,7 @@
#include <opencv2/core/utils/logger.defines.hpp>
#undef CV_LOG_STRIP_LEVEL
#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1
#include <opencv2/core/utils/logger.hpp>
#ifdef HAVE_PROTOBUF
@ -158,6 +158,9 @@ private:
void parseQConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
void parseCustomLayer (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto);
int onnx_opset; // OperatorSetIdProto for 'onnx' domain
void parseOperatorSet();
};
class ONNXLayerHandler : public detail::LayerHandler
@ -189,8 +192,9 @@ void ONNXLayerHandler::fillRegistry(const opencv_onnx::GraphProto &net)
}
ONNXImporter::ONNXImporter(Net& net, const char *onnxFile)
: layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr),
dstNet(net), dispatch(buildDispatchMap())
: layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr)
, dstNet(net), dispatch(buildDispatchMap())
, onnx_opset(0)
{
hasDynamicShapes = false;
CV_Assert(onnxFile);
@ -211,7 +215,9 @@ ONNXImporter::ONNXImporter(Net& net, const char *onnxFile)
}
ONNXImporter::ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
: layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr), dstNet(net), dispatch(buildDispatchMap())
: layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr)
, dstNet(net), dispatch(buildDispatchMap())
, onnx_opset(0)
{
hasDynamicShapes = false;
CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");
@ -242,6 +248,53 @@ inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, co
}
}
static
void dumpValueInfoProto(int i, const opencv_onnx::ValueInfoProto& valueInfoProto, const std::string& prefix)
{
CV_Assert(valueInfoProto.has_name());
CV_Assert(valueInfoProto.has_type());
const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
CV_Assert(typeProto.has_tensor_type());
const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
CV_Assert(tensor.has_shape());
const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();
int dim_size = tensorShape.dim_size();
CV_CheckGE(dim_size, 0, "");
MatShape shape(dim_size);
for (int j = 0; j < dim_size; ++j)
{
const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
if (dimension.has_dim_param())
{
CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)");
}
// https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
if (dimension.has_denotation())
{
CV_LOG_INFO(NULL, "DNN/ONNX: " << prefix << "[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'");
}
shape[j] = dimension.dim_value();
}
CV_LOG_DEBUG(NULL, "DNN/ONNX: " << prefix << "[" << i << " as '" << valueInfoProto.name() << "'] shape=" << toString(shape));
}
static
void dumpTensorProto(int i, const opencv_onnx::TensorProto& tensorProto, const std::string& prefix)
{
if (utils::logging::getLogLevel() < utils::logging::LOG_LEVEL_VERBOSE)
return;
int dim_size = tensorProto.dims_size();
CV_CheckGE(dim_size, 0, "");
MatShape shape(dim_size);
for (int j = 0; j < dim_size; ++j)
{
int sz = static_cast<int>(tensorProto.dims(j));
shape[j] = sz;
}
CV_LOG_VERBOSE(NULL, 0, "DNN/ONNX: " << prefix << "[" << i << " as '" << tensorProto.name() << "'] shape=" << toString(shape) << " data_type=" << (int)tensorProto.data_type());
}
void releaseONNXTensor(opencv_onnx::TensorProto& tensor_proto)
{
if (!tensor_proto.raw_data().empty()) {
@ -282,21 +335,21 @@ void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
std::map<std::string, Mat> ONNXImporter::getGraphTensors(
const opencv_onnx::GraphProto& graph_proto)
{
opencv_onnx::TensorProto tensor_proto;
std::map<std::string, Mat> layers_weights;
std::map<std::string, Mat> layers_weights;
for (int i = 0; i < graph_proto.initializer_size(); i++)
{
tensor_proto = graph_proto.initializer(i);
Mat mat = getMatFromTensor(tensor_proto);
releaseONNXTensor(tensor_proto);
for (int i = 0; i < graph_proto.initializer_size(); i++)
{
const opencv_onnx::TensorProto& tensor_proto = graph_proto.initializer(i);
dumpTensorProto(i, tensor_proto, "initializer");
Mat mat = getMatFromTensor(tensor_proto);
releaseONNXTensor(const_cast<opencv_onnx::TensorProto&>(tensor_proto)); // drop already loaded data
if (DNN_DIAGNOSTICS_RUN && mat.empty())
continue;
if (DNN_DIAGNOSTICS_RUN && mat.empty())
continue;
layers_weights.insert(std::make_pair(tensor_proto.name(), mat));
}
return layers_weights;
layers_weights.insert(std::make_pair(tensor_proto.name(), mat));
}
return layers_weights;
}
static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) {
@ -562,10 +615,45 @@ void ONNXImporter::addNegation(const LayerParams& layerParams, opencv_onnx::Node
void ONNXImporter::addConstant(const std::string& name, const Mat& blob)
{
CV_LOG_DEBUG(NULL, "DNN/ONNX: add constant '" << name << "' shape=" << toString(shape(blob)) << ": " << toString(blob));
constBlobs.insert(std::make_pair(name, blob));
outShapes.insert(std::make_pair(name, shape(blob)));
}
void ONNXImporter::parseOperatorSet()
{
int ir_version = model_proto.has_ir_version() ? static_cast<int>(model_proto.ir_version()) : -1;
if (ir_version < 3)
return;
int opset_size = model_proto.opset_import_size();
if (opset_size <= 0)
{
CV_LOG_INFO(NULL, "DNN/ONNX: missing opset information")
return;
}
for (int i = 0; i < opset_size; ++i)
{
const ::opencv_onnx::OperatorSetIdProto& opset_entry = model_proto.opset_import(i);
const std::string& domain = opset_entry.has_domain() ? opset_entry.domain() : std::string();
int version = opset_entry.has_version() ? opset_entry.version() : -1;
if (domain.empty() || domain == "ai.onnx")
{
// ONNX opset covered by specification: https://github.com/onnx/onnx/blob/master/docs/Operators.md
onnx_opset = std::max(onnx_opset, version);
}
else
{
// OpenCV don't know other opsets
// will fail later on unsupported node processing
CV_LOG_WARNING(NULL, "DNN/ONNX: unsupported opset[" << i << "]: domain='" << domain << "' version=" << version);
}
}
CV_LOG_INFO(NULL, "DNN/ONNX: ONNX opset version = " << onnx_opset);
}
void ONNXImporter::handleQuantizedNode(LayerParams& layerParams,
const opencv_onnx::NodeProto& node_proto)
{
@ -627,56 +715,79 @@ void ONNXImporter::populateNet()
<< " model produced by '" << framework_name << "'"
<< (framework_version.empty() ? cv::String() : cv::format(":%s", framework_version.c_str()))
<< ". Number of nodes = " << graph_proto.node_size()
<< ", initializers = " << graph_proto.initializer_size()
<< ", inputs = " << graph_proto.input_size()
<< ", outputs = " << graph_proto.output_size()
);
parseOperatorSet();
simplifySubgraphs(graph_proto);
const int layersSize = graph_proto.node_size();
CV_LOG_DEBUG(NULL, "DNN/ONNX: graph simplified to " << layersSize << " nodes");
constBlobs = getGraphTensors(graph_proto);
constBlobs = getGraphTensors(graph_proto); // scan GraphProto.initializer
std::vector<String> netInputs; // map with network inputs (without const blobs)
// Add all the inputs shapes. It includes as constant blobs as network's inputs shapes.
for (int i = 0; i < graph_proto.input_size(); ++i)
{
const opencv_onnx::ValueInfoProto& valueInfoProto = graph_proto.input(i);
CV_Assert(valueInfoProto.has_name());
const std::string& name = valueInfoProto.name();
CV_Assert(valueInfoProto.has_type());
opencv_onnx::TypeProto typeProto = valueInfoProto.type();
const opencv_onnx::TypeProto& typeProto = valueInfoProto.type();
CV_Assert(typeProto.has_tensor_type());
opencv_onnx::TypeProto::Tensor tensor = typeProto.tensor_type();
const opencv_onnx::TypeProto::Tensor& tensor = typeProto.tensor_type();
CV_Assert(tensor.has_shape());
opencv_onnx::TensorShapeProto tensorShape = tensor.shape();
const opencv_onnx::TensorShapeProto& tensorShape = tensor.shape();
MatShape inpShape(tensorShape.dim_size());
for (int j = 0; j < inpShape.size(); ++j)
int dim_size = tensorShape.dim_size();
CV_CheckGE(dim_size, 0, ""); // some inputs are scalars (dims=0), e.g. in Test_ONNX_nets.Resnet34_kinetics test
MatShape inpShape(dim_size);
for (int j = 0; j < dim_size; ++j)
{
inpShape[j] = tensorShape.dim(j).dim_value();
const opencv_onnx::TensorShapeProto_Dimension& dimension = tensorShape.dim(j);
if (dimension.has_dim_param())
{
CV_LOG_DEBUG(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] = <" << dimension.dim_param() << "> (dynamic)");
}
// https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
if (dimension.has_denotation())
{
CV_LOG_INFO(NULL, "DNN/ONNX: input[" << i << "] dim[" << j << "] denotation is '" << dimension.denotation() << "'");
}
inpShape[j] = dimension.dim_value();
// NHW, NCHW(NHWC), NCDHW(NDHWC); do not set this flag if only N is dynamic
if (!tensorShape.dim(j).dim_param().empty() && !(j == 0 && inpShape.size() >= 3))
if (dimension.has_dim_param() && !(j == 0 && inpShape.size() >= 3))
{
hasDynamicShapes = true;
}
}
if (!inpShape.empty() && !hasDynamicShapes)
bool isInitialized = ((constBlobs.find(name) != constBlobs.end()));
CV_LOG_IF_DEBUG(NULL, !isInitialized, "DNN/ONNX: input[" << i << " as '" << name << "'] shape=" << toString(inpShape));
CV_LOG_IF_VERBOSE(NULL, 0, isInitialized, "DNN/ONNX: pre-initialized input[" << i << " as '" << name << "'] shape=" << toString(inpShape));
if (dim_size > 0 && !hasDynamicShapes) // FIXIT result is not reliable for models with multiple inputs
{
inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size
}
outShapes[valueInfoProto.name()] = inpShape;
}
// create map with network inputs (without const blobs)
// fill map: push layer name, layer id and output id
std::vector<String> netInputs;
for (int j = 0; j < graph_proto.input_size(); j++)
{
const std::string& name = graph_proto.input(j).name();
if (constBlobs.find(name) == constBlobs.end()) {
// fill map: push layer name, layer id and output id
if (!isInitialized)
{
netInputs.push_back(name);
layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1)));
}
}
dstNet.setInputsNames(netInputs);
// dump outputs
for (int i = 0; i < graph_proto.output_size(); ++i)
{
dumpValueInfoProto(i, graph_proto.output(i), "output");
}
if (DNN_DIAGNOSTICS_RUN) {
CV_LOG_INFO(NULL, "DNN/ONNX: start diagnostic run!");
layerHandler->fillRegistry(graph_proto);
@ -696,6 +807,17 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto)
CV_Assert(node_proto.output_size() >= 1);
std::string name = node_proto.output(0);
const std::string& layer_type = node_proto.op_type();
const std::string& layer_type_domain = node_proto.has_domain() ? node_proto.domain() : std::string();
if (!layer_type_domain.empty() && layer_type_domain != "ai.onnx")
{
CV_LOG_WARNING(NULL, "DNN/ONNX: can't handle node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
<< cv::format("[%s@%s]:(%s)", layer_type.c_str(), layer_type_domain.c_str(), name.c_str())
);
if (DNN_DIAGNOSTICS_RUN)
return; // ignore error
CV_Error(Error::StsNotImplemented, cv::format("ONNX: unsupported domain: %s", layer_type_domain.c_str()));
}
CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
<< cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
);
@ -2341,11 +2463,24 @@ void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeP
CV_Assert(shapeIt != outShapes.end());
const MatShape& inpShape = shapeIt->second;
Mat shapeMat(inpShape.size(), 1, CV_32S);
for (int j = 0; j < inpShape.size(); ++j)
shapeMat.at<int>(j) = inpShape[j];
shapeMat.dims = 1;
int dims = static_cast<int>(inpShape.size());
Mat shapeMat(dims, 1, CV_32S);
bool isDynamicShape = false;
for (int j = 0; j < dims; ++j)
{
int sz = inpShape[j];
isDynamicShape |= (sz == 0);
shapeMat.at<int>(j) = sz;
}
shapeMat.dims = 1; // FIXIT Mat 1D
if (isDynamicShape)
{
CV_LOG_ERROR(NULL, "DNN/ONNX(Shape): dynamic 'zero' shapes are not supported, input " << toString(inpShape, node_proto.input(0)));
// FIXIT repair assertion
// Disabled to pass face detector tests from #20422
// CV_Assert(!isDynamicShape); // not supported
}
addConstant(layerParams.name, shapeMat);
}
@ -2542,6 +2677,7 @@ void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::Node
addLayer(layerParams, node_proto);
}
// https://github.com/onnx/onnx/blob/master/docs/Operators.md#Resize
void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto)
{
for (int i = 1; i < node_proto.input_size(); i++)
@ -2565,30 +2701,38 @@ void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::Node
if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
layerParams.set("mode", "opencv_linear");
// input = [X, scales], [X, roi, scales] or [x, roi, scales, sizes]
int foundScaleId = hasDynamicShapes ? node_proto.input_size() - 1
: node_proto.input_size() > 2 ? 2 : 1;
// opset-10: input = [X, scales]
// opset-11: input = [X, roi, scales] or [x, roi, scales, sizes]
int scalesInputId = node_proto.input_size() == 2 ? 1 : 2;
Mat scales = getBlob(node_proto, foundScaleId);
if (scales.total() == 4)
Mat scales = getBlob(node_proto, scalesInputId);
if (!scales.empty())
{
CV_CheckEQ(scales.total(), (size_t)4, "HCHW layout is expected");
layerParams.set("zoom_factor_y", scales.at<float>(2));
layerParams.set("zoom_factor_x", scales.at<float>(3));
}
else
else if (node_proto.input_size() >= 4) // opset-11
{
const std::string& inputLast = node_proto.input(node_proto.input_size() - 1);
if (constBlobs.find(inputLast) != constBlobs.end())
const std::string& inputSizes = node_proto.input(3);
if (constBlobs.find(inputSizes) != constBlobs.end())
{
Mat shapes = getBlob(inputLast);
CV_CheckEQ(shapes.size[0], 4, "");
CV_CheckEQ(shapes.size[1], 1, "");
Mat shapes = getBlob(inputSizes);
CV_CheckEQ(shapes.total(), (size_t)4, "HCHW layout is expected");
CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, "");
if (shapes.depth() == CV_32F)
shapes.convertTo(shapes, CV_32S);
layerParams.set("width", shapes.at<int>(3));
layerParams.set("height", shapes.at<int>(2));
}
else
{
CV_Error(Error::StsNotImplemented, cv::format("ONNX/Resize: doesn't support dynamic non-constant 'sizes' input: %s", inputSizes.c_str()));
}
}
else
{
CV_Error(Error::StsNotImplemented, "ONNX/Resize: can't find neither 'scale' nor destination sizes parameters");
}
replaceLayerParam(layerParams, "mode", "interpolation");
addLayer(layerParams, node_proto);

View File

@ -72,5 +72,6 @@
#include <opencv2/core/utils/trace.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#include "dnn_common.hpp"

View File

@ -8,8 +8,6 @@
#ifndef __OPENCV_DNN_TF_SIMPLIFIER_HPP__
#define __OPENCV_DNN_TF_SIMPLIFIER_HPP__
#include "../precomp.hpp"
#ifdef HAVE_PROTOBUF
#include "tf_io.hpp"

View File

@ -1190,7 +1190,58 @@ TEST_P(Test_ONNX_layers, GatherMultiOutput)
testONNXModels("gather_multi_output");
}
TEST_P(Test_ONNX_layers, DynamicAxes)
TEST_P(Test_ONNX_layers, DynamicAxes_squeeze_and_conv)
{
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
}
#if INF_ENGINE_VER_MAJOR_LT(2021000000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
#endif
#endif
testONNXModels("squeeze_and_conv_dynamic_axes");
}
TEST_P(Test_ONNX_layers, DynamicAxes_unsqueeze_and_conv)
{
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
}
#if INF_ENGINE_VER_MAJOR_LT(2021000000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
#endif
#endif
testONNXModels("unsqueeze_and_conv_dynamic_axes");
}
TEST_P(Test_ONNX_layers, DynamicAxes_gather)
{
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
}
#if INF_ENGINE_VER_MAJOR_LT(2021000000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
#endif
#endif
testONNXModels("gather_dynamic_axes");
}
TEST_P(Test_ONNX_layers, DynamicAxes_gather_scalar)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021040000)
// accuracy
@ -1211,18 +1262,112 @@ TEST_P(Test_ONNX_layers, DynamicAxes)
}
#endif
#endif
testONNXModels("squeeze_and_conv_dynamic_axes");
testONNXModels("unsqueeze_and_conv_dynamic_axes");
testONNXModels("gather_dynamic_axes");
testONNXModels("gather_scalar_dynamic_axes");
}
TEST_P(Test_ONNX_layers, DynamicAxes_slice)
{
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
}
#if INF_ENGINE_VER_MAJOR_LT(2021000000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
#endif
#endif
testONNXModels("slice_dynamic_axes");
}
TEST_P(Test_ONNX_layers, DynamicAxes_slice_opset_11)
{
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
}
#if INF_ENGINE_VER_MAJOR_LT(2021000000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
#endif
#endif
testONNXModels("slice_opset_11_dynamic_axes");
}
TEST_P(Test_ONNX_layers, DynamicAxes_resize_opset11_torch16)
{
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
}
#if INF_ENGINE_VER_MAJOR_LT(2021000000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
#endif
#endif
testONNXModels("resize_opset11_torch1.6_dynamic_axes");
}
TEST_P(Test_ONNX_layers, DynamicAxes_average_pooling)
{
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
}
#if INF_ENGINE_VER_MAJOR_LT(2021000000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
#endif
#endif
testONNXModels("average_pooling_dynamic_axes");
}
TEST_P(Test_ONNX_layers, DynamicAxes_maxpooling_sigmoid)
{
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
}
#if INF_ENGINE_VER_MAJOR_LT(2021000000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
#endif
#endif
testONNXModels("maxpooling_sigmoid_dynamic_axes");
}
TEST_P(Test_ONNX_layers, DynamicAxes_dynamic_batch)
{
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
}
#if INF_ENGINE_VER_MAJOR_LT(2021000000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
{
if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
}
#endif
#endif
testONNXModels("dynamic_batch");
}
TEST_P(Test_ONNX_layers, MaxPool1d)
{
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000)
@ -1344,6 +1489,16 @@ TEST_P(Test_ONNX_layers, DivConst)
}
// FIXIT disabled due to non-standard ONNX model domains, need to add ONNX domains support
// Example:
// DNN/ONNX: unsupported opset[1]: domain='com.microsoft.experimental' version=1
// DNN/ONNX: unsupported opset[2]: domain='ai.onnx.preview.training' version=1
// DNN/ONNX: unsupported opset[3]: domain='com.microsoft.nchwc' version=1
// DNN/ONNX: unsupported opset[4]: domain='com.microsoft.mlfeaturizers' version=1
// DNN/ONNX: unsupported opset[5]: domain='ai.onnx.ml' version=2
// DNN/ONNX: unsupported opset[6]: domain='com.microsoft' version=1
// DNN/ONNX: unsupported opset[7]: domain='ai.onnx.training' version=1
#if 0
TEST_P(Test_ONNX_layers, Quantized_Convolution)
{
testONNXModels("quantized_conv_uint8_weights", npy, 0.004, 0.02);
@ -1449,6 +1604,7 @@ TEST_P(Test_ONNX_layers, Quantized_Constant)
{
testONNXModels("quantized_constant", npy, 0.002, 0.008);
}
#endif
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());
@ -1593,7 +1749,8 @@ TEST_P(Test_ONNX_nets, ResNet50v1)
testONNXModels("resnet50v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD);
}
TEST_P(Test_ONNX_nets, ResNet50_Int8)
// FIXIT missing ONNX domains support
TEST_P(Test_ONNX_nets, DISABLED_ResNet50_Int8)
{
testONNXModels("resnet50_int8", pb, default_l1, default_lInf, true);
}

View File

@ -1966,6 +1966,10 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da
}
else if( event->type == GDK_SCROLL )
{
GdkEventScroll* event_scroll = (GdkEventScroll*)event;
pt32f.x = cvFloor(event_scroll->x);
pt32f.y = cvFloor(event_scroll->y);
#if defined(GTK_VERSION3_4)
// NOTE: in current implementation doesn't possible to put into callback function delta_x and delta_y separately
double delta = (event->scroll.delta_x + event->scroll.delta_y);
@ -2023,14 +2027,18 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da
(unsigned)pt.y < (unsigned)(image_widget->original_image->height)
))
{
state &= gtk_accelerator_get_default_mod_mask();
flags |= BIT_MAP(state, GDK_SHIFT_MASK, CV_EVENT_FLAG_SHIFTKEY) |
BIT_MAP(state, GDK_CONTROL_MASK, CV_EVENT_FLAG_CTRLKEY) |
BIT_MAP(state, GDK_MOD1_MASK, CV_EVENT_FLAG_ALTKEY) |
BIT_MAP(state, GDK_MOD2_MASK, CV_EVENT_FLAG_ALTKEY) |
// handle non-keyboard (mouse) modifiers first
flags |=
BIT_MAP(state, GDK_BUTTON1_MASK, CV_EVENT_FLAG_LBUTTON) |
BIT_MAP(state, GDK_BUTTON2_MASK, CV_EVENT_FLAG_MBUTTON) |
BIT_MAP(state, GDK_BUTTON3_MASK, CV_EVENT_FLAG_RBUTTON);
// keyboard modifiers
state &= gtk_accelerator_get_default_mod_mask();
flags |=
BIT_MAP(state, GDK_SHIFT_MASK, CV_EVENT_FLAG_SHIFTKEY) |
BIT_MAP(state, GDK_CONTROL_MASK, CV_EVENT_FLAG_CTRLKEY) |
BIT_MAP(state, GDK_MOD1_MASK, CV_EVENT_FLAG_ALTKEY) |
BIT_MAP(state, GDK_MOD2_MASK, CV_EVENT_FLAG_ALTKEY);
window->on_mouse( cv_event, pt.x, pt.y, flags, window->on_mouse_param );
}
}

View File

@ -50,12 +50,21 @@ if(HAVE_JASPER)
list(APPEND GRFMT_LIBS ${JASPER_LIBRARIES})
if(OPENCV_IO_FORCE_JASPER)
add_definitions(-DOPENCV_IMGCODECS_FORCE_JASPER=1)
else()
message(STATUS "imgcodecs: Jasper codec is disabled in runtime. Details: https://github.com/opencv/opencv/issues/14058")
endif()
endif()
if(HAVE_OPENEXR)
include_directories(SYSTEM ${OPENEXR_INCLUDE_PATHS})
list(APPEND GRFMT_LIBS ${OPENEXR_LIBRARIES})
if(OPENCV_IO_FORCE_OPENEXR
OR NOT BUILD_OPENEXR # external OpenEXR versions are not disabled
)
add_definitions(-DOPENCV_IMGCODECS_USE_OPENEXR=1)
else()
message(STATUS "imgcodecs: OpenEXR codec is disabled in runtime. Details: https://github.com/opencv/opencv/issues/21326")
endif()
endif()
if(HAVE_PNG OR HAVE_TIFF OR HAVE_OPENEXR)
@ -167,6 +176,9 @@ ocv_add_accuracy_tests()
if(TARGET opencv_test_imgcodecs AND HAVE_JASPER AND "$ENV{OPENCV_IO_ENABLE_JASPER}")
ocv_target_compile_definitions(opencv_test_imgcodecs PRIVATE OPENCV_IMGCODECS_ENABLE_JASPER_TESTS=1)
endif()
if(TARGET opencv_test_imgcodecs AND HAVE_OPENEXR AND "$ENV{OPENCV_IO_ENABLE_OPENEXR}")
ocv_target_compile_definitions(opencv_test_imgcodecs PRIVATE OPENCV_IMGCODECS_ENABLE_OPENEXR_TESTS=1)
endif()
if(TARGET opencv_test_imgcodecs AND HAVE_PNG AND NOT (PNG_VERSION VERSION_LESS "1.6.31"))
# details: https://github.com/glennrp/libpng/commit/68cb0aaee3de6371b81a4613476d9b33e43e95b1
ocv_target_compile_definitions(opencv_test_imgcodecs PRIVATE OPENCV_IMGCODECS_PNG_WITH_EXIF=1)

View File

@ -44,6 +44,9 @@
#ifdef HAVE_OPENEXR
#include <opencv2/core/utils/configuration.private.hpp>
#include <opencv2/core/utils/logger.hpp>
#if defined _MSC_VER && _MSC_VER >= 1200
# pragma warning( disable: 4100 4244 4267 )
#endif
@ -80,6 +83,27 @@
namespace cv
{
static bool isOpenEXREnabled()
{
static const bool PARAM_ENABLE_OPENEXR = utils::getConfigurationParameterBool("OPENCV_IO_ENABLE_OPENEXR",
#ifdef OPENCV_IMGCODECS_USE_OPENEXR
true
#else
false
#endif
);
return PARAM_ENABLE_OPENEXR;
}
static void initOpenEXR()
{
if (!isOpenEXREnabled())
{
const char* message = "imgcodecs: OpenEXR codec is disabled. You can enable it via 'OPENCV_IO_ENABLE_OPENEXR' option. Refer for details and cautions here: https://github.com/opencv/opencv/issues/21326";
CV_LOG_WARNING(NULL, message);
CV_Error(Error::StsNotImplemented, message);
}
}
/////////////////////// ExrDecoder ///////////////////
ExrDecoder::ExrDecoder()
@ -577,6 +601,7 @@ void ExrDecoder::RGBToGray( float *in, float *out )
ImageDecoder ExrDecoder::newDecoder() const
{
initOpenEXR();
return makePtr<ExrDecoder>();
}
@ -740,6 +765,7 @@ bool ExrEncoder::write( const Mat& img, const std::vector<int>& params )
ImageEncoder ExrEncoder::newEncoder() const
{
initOpenEXR();
return makePtr<ExrEncoder>();
}

View File

@ -53,6 +53,7 @@
#include <ImfInputFile.h>
#include <ImfChannelList.h>
#include <ImathBox.h>
#include <ImfRgbaFile.h>
#include "grfmt_base.hpp"
namespace cv

View File

@ -429,6 +429,6 @@ TEST(Imgcodecs, write_parameter_type)
}} // namespace
#ifdef HAVE_OPENEXR
#if defined(HAVE_OPENEXR) && defined(OPENCV_IMGCODECS_ENABLE_OPENEXR_TESTS)
#include "test_exr.impl.hpp"
#endif

View File

@ -372,6 +372,9 @@ struct IsRepresentableAsMatDataType<T, typename VoidType<typename cv::DataType<T
{
};
// https://github.com/opencv/opencv/issues/20930
template <> struct IsRepresentableAsMatDataType<cv::RotatedRect, void> : FalseType {};
} // namespace traits
template <typename Tp>

View File

@ -594,6 +594,18 @@ class Arguments(NewOpenCVTests):
self.assertEqual(ints.dtype, np.int32, "Vector of integers has wrong elements type")
self.assertEqual(ints.shape, expected_shape, "Vector of integers has wrong shape.")
def test_result_rotated_rect_issue_20930(self):
rr = cv.utils.testRotatedRect(10, 20, 100, 200, 45)
self.assertTrue(isinstance(rr, tuple), msg=type(rr))
self.assertEqual(len(rr), 3)
rrv = cv.utils.testRotatedRectVector(10, 20, 100, 200, 45)
self.assertTrue(isinstance(rrv, tuple), msg=type(rrv))
self.assertEqual(len(rrv), 10)
rr = rrv[0]
self.assertTrue(isinstance(rr, tuple), msg=type(rrv))
self.assertEqual(len(rr), 3)
class CanUsePurePythonModuleFunction(NewOpenCVTests):
def test_can_get_ocv_version(self):

View File

@ -86,6 +86,7 @@ def explore_match(win, img1, img2, kp_pairs, status = None, H = None):
if status is None:
status = np.ones(len(kp_pairs), np.bool_)
status = status.reshape((len(kp_pairs), 1))
p1, p2 = [], [] # python 2 / python 3 change of zip unpacking
for kpp in kp_pairs:
p1.append(np.int32(kpp[0].pt))