mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Summarize PnP pose computation on a single separate page.
This commit is contained in:
parent
4d9365990a
commit
9c61d80bc4
@ -1169,6 +1169,12 @@
|
|||||||
number = {7},
|
number = {7},
|
||||||
publisher = {Elsevier}
|
publisher = {Elsevier}
|
||||||
}
|
}
|
||||||
|
@inproceedings{Zuliani2014RANSACFD,
|
||||||
|
title={RANSAC for Dummies With examples using the RANSAC toolbox for Matlab \& Octave and more...},
|
||||||
|
author={Marco Zuliani},
|
||||||
|
year={2014},
|
||||||
|
url = {https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.475.1243&rep=rep1&type=pdf}
|
||||||
|
}
|
||||||
@inproceedings{arthur_kmeanspp_2007,
|
@inproceedings{arthur_kmeanspp_2007,
|
||||||
author = {Arthur, David and Vassilvitskii, Sergei},
|
author = {Arthur, David and Vassilvitskii, Sergei},
|
||||||
title = {k-means++: The advantages of careful seeding},
|
title = {k-means++: The advantages of careful seeding},
|
||||||
|
@ -40,12 +40,13 @@
|
|||||||
publisher={IEEE}
|
publisher={IEEE}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{Terzakis20,
|
@inproceedings{Terzakis2020SQPnP,
|
||||||
author = {Terzakis, George and Lourakis, Manolis},
|
title={A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem},
|
||||||
year = {2020},
|
author={George Terzakis and Manolis Lourakis},
|
||||||
month = {09},
|
booktitle={European Conference on Computer Vision},
|
||||||
pages = {},
|
pages={478--494},
|
||||||
title = {A Consistently Fast and Globally Optimal Solution to the Perspective-n-Point Problem}
|
year={2020},
|
||||||
|
publisher={Springer International Publishing}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{strobl2011iccv,
|
@inproceedings{strobl2011iccv,
|
||||||
@ -62,20 +63,25 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{kinectfusion,
|
@inproceedings{kinectfusion,
|
||||||
author = {Izadi, Shahram and Kim, David and Hilliges, Otmar and Molyneaux, David and Newcombe, Richard and Kohli, Pushmeet and Shotton, Jamie and Hodges, Steve and Freeman, Dustin and Davison, Andrew and Fitzgibbon, Andrew},
|
author = {Izadi, Shahram and Kim, David and Hilliges, Otmar and Molyneaux, David and Newcombe, Richard and Kohli, Pushmeet and Shotton, Jamie and Hodges, Steve and Freeman, Dustin and Davison, Andrew and Fitzgibbon, Andrew},
|
||||||
title = {KinectFusion: Real-time 3D Reconstruction and Interaction Using a Moving Depth Camera},
|
title = {KinectFusion: Real-time 3D Reconstruction and Interaction Using a Moving Depth Camera},
|
||||||
booktitle = {},
|
booktitle = {},
|
||||||
year = {2011},
|
year = {2011},
|
||||||
month = {October},
|
month = {October},
|
||||||
abstract = {
|
abstract = {
|
||||||
KinectFusion enables a user holding and moving a standard Kinect camera to rapidly create detailed 3D reconstructions of an indoor scene. Only the depth data from Kinect is used to track the 3D pose of the sensor and reconstruct, geometrically precise, 3D models of the physical scene in real-time. The capabilities of KinectFusion, as well as the novel GPU-based pipeline are described in full. We show uses of the core system for low-cost handheld scanning, and geometry-aware augmented reality and physics-based interactions. Novel extensions to the core GPU pipeline demonstrate object segmentation and user interaction directly in front of the sensor, without degrading camera tracking or reconstruction. These extensions are used to enable real-time multi-touch interactions anywhere, allowing any planar or non-planar reconstructed physical surface to be appropriated for touch.
|
KinectFusion enables a user holding and moving a standard Kinect camera to rapidly create detailed 3D reconstructions of an indoor scene.
|
||||||
},
|
Only the depth data from Kinect is used to track the 3D pose of the sensor and reconstruct, geometrically precise, 3D models of the physical scene in real-time.
|
||||||
publisher = {ACM},
|
The capabilities of KinectFusion, as well as the novel GPU-based pipeline are described in full. We show uses of the core system for low-cost handheld scanning,
|
||||||
url = {https://www.microsoft.com/en-us/research/publication/kinectfusion-real-time-3d-reconstruction-and-interaction-using-a-moving-depth-camera/},
|
and geometry-aware augmented reality and physics-based interactions. Novel extensions to the core GPU pipeline demonstrate object segmentation and user interaction directly
|
||||||
address = {},
|
in front of the sensor, without degrading camera tracking or reconstruction. These extensions are used to enable real-time multi-touch interactions anywhere,
|
||||||
pages = {559-568},
|
allowing any planar or non-planar reconstructed physical surface to be appropriated for touch.
|
||||||
journal = {},
|
},
|
||||||
volume = {},
|
publisher = {ACM},
|
||||||
chapter = {},
|
url = {https://www.microsoft.com/en-us/research/publication/kinectfusion-real-time-3d-reconstruction-and-interaction-using-a-moving-depth-camera/},
|
||||||
isbn = {978-1-4503-0716-1},
|
address = {},
|
||||||
|
pages = {559-568},
|
||||||
|
journal = {},
|
||||||
|
volume = {},
|
||||||
|
chapter = {},
|
||||||
|
isbn = {978-1-4503-0716-1},
|
||||||
}
|
}
|
176
modules/3d/doc/solvePnP.markdown
Normal file
176
modules/3d/doc/solvePnP.markdown
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
# Perspective-n-Point (PnP) pose computation {#calib3d_solvePnP}
|
||||||
|
|
||||||
|
## Pose computation overview
|
||||||
|
|
||||||
|
The pose computation problem @cite Marchand16 consists in solving for the rotation and translation that minimizes the reprojection error from 3D-2D point correspondences.
|
||||||
|
|
||||||
|
The `solvePnP` and related functions estimate the object pose given a set of object points, their corresponding image projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below (more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward and the Z-axis forward).
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$
|
||||||
|
using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$ (also denoted \f$ \bf{K} \f$ in the literature):
|
||||||
|
|
||||||
|
\f[
|
||||||
|
\begin{align*}
|
||||||
|
\begin{bmatrix}
|
||||||
|
u \\
|
||||||
|
v \\
|
||||||
|
1
|
||||||
|
\end{bmatrix} &=
|
||||||
|
\bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
|
||||||
|
\begin{bmatrix}
|
||||||
|
X_{w} \\
|
||||||
|
Y_{w} \\
|
||||||
|
Z_{w} \\
|
||||||
|
1
|
||||||
|
\end{bmatrix} \\
|
||||||
|
\begin{bmatrix}
|
||||||
|
u \\
|
||||||
|
v \\
|
||||||
|
1
|
||||||
|
\end{bmatrix} &=
|
||||||
|
\begin{bmatrix}
|
||||||
|
f_x & 0 & c_x \\
|
||||||
|
0 & f_y & c_y \\
|
||||||
|
0 & 0 & 1
|
||||||
|
\end{bmatrix}
|
||||||
|
\begin{bmatrix}
|
||||||
|
1 & 0 & 0 & 0 \\
|
||||||
|
0 & 1 & 0 & 0 \\
|
||||||
|
0 & 0 & 1 & 0
|
||||||
|
\end{bmatrix}
|
||||||
|
\begin{bmatrix}
|
||||||
|
r_{11} & r_{12} & r_{13} & t_x \\
|
||||||
|
r_{21} & r_{22} & r_{23} & t_y \\
|
||||||
|
r_{31} & r_{32} & r_{33} & t_z \\
|
||||||
|
0 & 0 & 0 & 1
|
||||||
|
\end{bmatrix}
|
||||||
|
\begin{bmatrix}
|
||||||
|
X_{w} \\
|
||||||
|
Y_{w} \\
|
||||||
|
Z_{w} \\
|
||||||
|
1
|
||||||
|
\end{bmatrix}
|
||||||
|
\end{align*}
|
||||||
|
\f]
|
||||||
|
|
||||||
|
The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming
|
||||||
|
a 3D point expressed in the world frame into the camera frame:
|
||||||
|
|
||||||
|
\f[
|
||||||
|
\begin{align*}
|
||||||
|
\begin{bmatrix}
|
||||||
|
X_c \\
|
||||||
|
Y_c \\
|
||||||
|
Z_c \\
|
||||||
|
1
|
||||||
|
\end{bmatrix} &=
|
||||||
|
\hspace{0.2em} ^{c}\bf{T}_w
|
||||||
|
\begin{bmatrix}
|
||||||
|
X_{w} \\
|
||||||
|
Y_{w} \\
|
||||||
|
Z_{w} \\
|
||||||
|
1
|
||||||
|
\end{bmatrix} \\
|
||||||
|
\begin{bmatrix}
|
||||||
|
X_c \\
|
||||||
|
Y_c \\
|
||||||
|
Z_c \\
|
||||||
|
1
|
||||||
|
\end{bmatrix} &=
|
||||||
|
\begin{bmatrix}
|
||||||
|
r_{11} & r_{12} & r_{13} & t_x \\
|
||||||
|
r_{21} & r_{22} & r_{23} & t_y \\
|
||||||
|
r_{31} & r_{32} & r_{33} & t_z \\
|
||||||
|
0 & 0 & 0 & 1
|
||||||
|
\end{bmatrix}
|
||||||
|
\begin{bmatrix}
|
||||||
|
X_{w} \\
|
||||||
|
Y_{w} \\
|
||||||
|
Z_{w} \\
|
||||||
|
1
|
||||||
|
\end{bmatrix}
|
||||||
|
\end{align*}
|
||||||
|
\f]
|
||||||
|
|
||||||
|
## Pose computation methods
|
||||||
|
@anchor calib3d_solvePnP_flags
|
||||||
|
|
||||||
|
Refer to the cv::SolvePnPMethod enum documentation for the list of possible values. Some details about each method are described below:
|
||||||
|
|
||||||
|
- cv::SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
|
||||||
|
this case the function finds such a pose that minimizes reprojection error, that is the sum
|
||||||
|
of squared distances between the observed projections "imagePoints" and the projected (using
|
||||||
|
cv::projectPoints ) "objectPoints". Initial solution for non-planar "objectPoints" needs at least 6 points and uses the DLT algorithm.
|
||||||
|
Initial solution for planar "objectPoints" needs at least 4 points and uses pose from homography decomposition.
|
||||||
|
- cv::SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
|
||||||
|
"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
|
||||||
|
In this case the function requires exactly four object and image points.
|
||||||
|
- cv::SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
|
||||||
|
"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
|
||||||
|
In this case the function requires exactly four object and image points.
|
||||||
|
- cv::SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the
|
||||||
|
paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
|
||||||
|
- cv::SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
|
||||||
|
Method is based on the paper of J. Hesch and S. Roumeliotis.
|
||||||
|
"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
|
||||||
|
- cv::SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
|
||||||
|
Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
|
||||||
|
F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
|
||||||
|
Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
|
||||||
|
assuming that both have the same value. Then the cameraMatrix is updated with the estimated
|
||||||
|
focal length.
|
||||||
|
- cv::SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
|
||||||
|
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
|
||||||
|
- cv::SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
|
||||||
|
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
|
||||||
|
It requires 4 coplanar object points defined in the following order:
|
||||||
|
- point 0: [-squareLength / 2, squareLength / 2, 0]
|
||||||
|
- point 1: [ squareLength / 2, squareLength / 2, 0]
|
||||||
|
- point 2: [ squareLength / 2, -squareLength / 2, 0]
|
||||||
|
- point 3: [-squareLength / 2, -squareLength / 2, 0]
|
||||||
|
- cv::SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the
|
||||||
|
Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis2020SQPnP). It requires 3 or more points.
|
||||||
|
|
||||||
|
## P3P
|
||||||
|
|
||||||
|
The cv::solveP3P() computes an object pose from exactly 3 3D-2D point correspondences. A P3P problem has up to 4 solutions.
|
||||||
|
|
||||||
|
@note The solutions are sorted by reprojection errors (lowest to highest).
|
||||||
|
|
||||||
|
## PnP
|
||||||
|
|
||||||
|
The cv::solvePnP() returns the rotation and the translation vectors that transform a 3D point expressed in the object
|
||||||
|
coordinate frame to the camera coordinate frame, using different methods:
|
||||||
|
- P3P methods (cv::SOLVEPNP_P3P, cv::SOLVEPNP_AP3P): need 4 input points to return a unique solution.
|
||||||
|
- cv::SOLVEPNP_IPPE Input points must be >= 4 and object points must be coplanar.
|
||||||
|
- cv::SOLVEPNP_IPPE_SQUARE Special case suitable for marker pose estimation.
|
||||||
|
Number of input points must be 4. Object points must be defined in the following order:
|
||||||
|
- point 0: [-squareLength / 2, squareLength / 2, 0]
|
||||||
|
- point 1: [ squareLength / 2, squareLength / 2, 0]
|
||||||
|
- point 2: [ squareLength / 2, -squareLength / 2, 0]
|
||||||
|
- point 3: [-squareLength / 2, -squareLength / 2, 0]
|
||||||
|
- for all the other flags, number of input points must be >= 4 and object points can be in any configuration.
|
||||||
|
|
||||||
|
## Generic PnP
|
||||||
|
|
||||||
|
The cv::solvePnPGeneric() allows retrieving all the possible solutions.
|
||||||
|
|
||||||
|
Currently, only cv::SOLVEPNP_P3P, cv::SOLVEPNP_AP3P, cv::SOLVEPNP_IPPE, cv::SOLVEPNP_IPPE_SQUARE, cv::SOLVEPNP_SQPNP can return multiple solutions.
|
||||||
|
|
||||||
|
## RANSAC PnP
|
||||||
|
|
||||||
|
The cv::solvePnPRansac() computes the object pose wrt. the camera frame using a RANSAC scheme to deal with outliers.
|
||||||
|
|
||||||
|
More information can be found in @cite Zuliani2014RANSACFD
|
||||||
|
|
||||||
|
## Pose refinement
|
||||||
|
|
||||||
|
Pose refinement consists in estimating the rotation and translation that minimizes the reprojection error using a non-linear minimization method and starting from an initial estimate of the solution. OpenCV proposes cv::solvePnPRefineLM() and cv::solvePnPRefineVVS() for this problem.
|
||||||
|
|
||||||
|
cv::solvePnPRefineLM() uses a non-linear Levenberg-Marquardt minimization scheme @cite Madsen04 @cite Eade13 and the current implementation computes the rotation update as a perturbation and not on SO(3).
|
||||||
|
|
||||||
|
cv::solvePnPRefineVVS() uses a Gauss-Newton non-linear minimization scheme @cite Marchand16 and with an update of the rotation part computed using the exponential map.
|
||||||
|
|
||||||
|
@note at least three 3D-2D point correspondences are necessary.
|
@ -387,7 +387,9 @@ enum { LMEDS = 4, //!< least-median of squares algorithm
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum SolvePnPMethod {
|
enum SolvePnPMethod {
|
||||||
SOLVEPNP_ITERATIVE = 0,
|
SOLVEPNP_ITERATIVE = 0, //!< Pose refinement using non-linear Levenberg-Marquardt minimization scheme @cite Madsen04 @cite Eade13 \n
|
||||||
|
//!< Initial solution for non-planar "objectPoints" needs at least 6 points and uses the DLT algorithm. \n
|
||||||
|
//!< Initial solution for planar "objectPoints" needs at least 4 points and uses pose from homography decomposition.
|
||||||
SOLVEPNP_EPNP = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp
|
SOLVEPNP_EPNP = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp
|
||||||
SOLVEPNP_P3P = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete
|
SOLVEPNP_P3P = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete
|
||||||
SOLVEPNP_DLS = 3, //!< **Broken implementation. Using this flag will fallback to EPnP.** \n
|
SOLVEPNP_DLS = 3, //!< **Broken implementation. Using this flag will fallback to EPnP.** \n
|
||||||
@ -404,7 +406,7 @@ enum SolvePnPMethod {
|
|||||||
//!< - point 1: [ squareLength / 2, squareLength / 2, 0]
|
//!< - point 1: [ squareLength / 2, squareLength / 2, 0]
|
||||||
//!< - point 2: [ squareLength / 2, -squareLength / 2, 0]
|
//!< - point 2: [ squareLength / 2, -squareLength / 2, 0]
|
||||||
//!< - point 3: [-squareLength / 2, -squareLength / 2, 0]
|
//!< - point 3: [-squareLength / 2, -squareLength / 2, 0]
|
||||||
SOLVEPNP_SQPNP = 8, //!< SQPnP: A Consistently Fast and Globally OptimalSolution to the Perspective-n-Point Problem @cite Terzakis20
|
SOLVEPNP_SQPNP = 8, //!< SQPnP: A Consistently Fast and Globally OptimalSolution to the Perspective-n-Point Problem @cite Terzakis2020SQPnP
|
||||||
#ifndef CV_DOXYGEN
|
#ifndef CV_DOXYGEN
|
||||||
SOLVEPNP_MAX_COUNT //!< Used for count
|
SOLVEPNP_MAX_COUNT //!< Used for count
|
||||||
#endif
|
#endif
|
||||||
@ -779,6 +781,9 @@ Check @ref tutorial_homography "the corresponding tutorial" for more details
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/** @brief Finds an object pose from 3D-2D point correspondences.
|
/** @brief Finds an object pose from 3D-2D point correspondences.
|
||||||
|
|
||||||
|
@see @ref calib3d_solvePnP
|
||||||
|
|
||||||
This function returns the rotation and the translation vectors that transform a 3D point expressed in the object
|
This function returns the rotation and the translation vectors that transform a 3D point expressed in the object
|
||||||
coordinate frame to the camera coordinate frame, using different methods:
|
coordinate frame to the camera coordinate frame, using different methods:
|
||||||
- P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution.
|
- P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): need 4 input points to return a unique solution.
|
||||||
@ -805,133 +810,9 @@ the model coordinate system to the camera coordinate system.
|
|||||||
@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses
|
@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses
|
||||||
the provided rvec and tvec values as initial approximations of the rotation and translation
|
the provided rvec and tvec values as initial approximations of the rotation and translation
|
||||||
vectors, respectively, and further optimizes them.
|
vectors, respectively, and further optimizes them.
|
||||||
@param flags Method for solving a PnP problem:
|
@param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags
|
||||||
- @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
|
|
||||||
this case the function finds such a pose that minimizes reprojection error, that is the sum
|
|
||||||
of squared distances between the observed projections imagePoints and the projected (using
|
|
||||||
@ref projectPoints ) objectPoints .
|
|
||||||
- @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
|
|
||||||
"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
|
|
||||||
In this case the function requires exactly four object and image points.
|
|
||||||
- @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
|
|
||||||
"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
|
|
||||||
In this case the function requires exactly four object and image points.
|
|
||||||
- @ref SOLVEPNP_EPNP Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the
|
|
||||||
paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
|
|
||||||
- @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
|
|
||||||
Method is based on the paper of J. Hesch and S. Roumeliotis.
|
|
||||||
"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
|
|
||||||
- @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
|
|
||||||
Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
|
|
||||||
F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
|
|
||||||
Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
|
|
||||||
assuming that both have the same value. Then the cameraMatrix is updated with the estimated
|
|
||||||
focal length.
|
|
||||||
- @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
|
|
||||||
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
|
|
||||||
- @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
|
|
||||||
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
|
|
||||||
It requires 4 coplanar object points defined in the following order:
|
|
||||||
- point 0: [-squareLength / 2, squareLength / 2, 0]
|
|
||||||
- point 1: [ squareLength / 2, squareLength / 2, 0]
|
|
||||||
- point 2: [ squareLength / 2, -squareLength / 2, 0]
|
|
||||||
- point 3: [-squareLength / 2, -squareLength / 2, 0]
|
|
||||||
- @ref SOLVEPNP_SQPNP Method is based on the paper "A Consistently Fast and Globally Optimal Solution to the
|
|
||||||
Perspective-n-Point Problem" by G. Terzakis and M.Lourakis (@cite Terzakis20). It requires 3 or more points.
|
|
||||||
|
|
||||||
|
More information about Perspective-n-Points is described in @ref calib3d_solvePnP
|
||||||
The function estimates the object pose given a set of object points, their corresponding image
|
|
||||||
projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below
|
|
||||||
(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
|
|
||||||
and the Z-axis forward).
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$
|
|
||||||
using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$:
|
|
||||||
|
|
||||||
\f[
|
|
||||||
\begin{align*}
|
|
||||||
\begin{bmatrix}
|
|
||||||
u \\
|
|
||||||
v \\
|
|
||||||
1
|
|
||||||
\end{bmatrix} &=
|
|
||||||
\bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
|
|
||||||
\begin{bmatrix}
|
|
||||||
X_{w} \\
|
|
||||||
Y_{w} \\
|
|
||||||
Z_{w} \\
|
|
||||||
1
|
|
||||||
\end{bmatrix} \\
|
|
||||||
\begin{bmatrix}
|
|
||||||
u \\
|
|
||||||
v \\
|
|
||||||
1
|
|
||||||
\end{bmatrix} &=
|
|
||||||
\begin{bmatrix}
|
|
||||||
f_x & 0 & c_x \\
|
|
||||||
0 & f_y & c_y \\
|
|
||||||
0 & 0 & 1
|
|
||||||
\end{bmatrix}
|
|
||||||
\begin{bmatrix}
|
|
||||||
1 & 0 & 0 & 0 \\
|
|
||||||
0 & 1 & 0 & 0 \\
|
|
||||||
0 & 0 & 1 & 0
|
|
||||||
\end{bmatrix}
|
|
||||||
\begin{bmatrix}
|
|
||||||
r_{11} & r_{12} & r_{13} & t_x \\
|
|
||||||
r_{21} & r_{22} & r_{23} & t_y \\
|
|
||||||
r_{31} & r_{32} & r_{33} & t_z \\
|
|
||||||
0 & 0 & 0 & 1
|
|
||||||
\end{bmatrix}
|
|
||||||
\begin{bmatrix}
|
|
||||||
X_{w} \\
|
|
||||||
Y_{w} \\
|
|
||||||
Z_{w} \\
|
|
||||||
1
|
|
||||||
\end{bmatrix}
|
|
||||||
\end{align*}
|
|
||||||
\f]
|
|
||||||
|
|
||||||
The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming
|
|
||||||
a 3D point expressed in the world frame into the camera frame:
|
|
||||||
|
|
||||||
\f[
|
|
||||||
\begin{align*}
|
|
||||||
\begin{bmatrix}
|
|
||||||
X_c \\
|
|
||||||
Y_c \\
|
|
||||||
Z_c \\
|
|
||||||
1
|
|
||||||
\end{bmatrix} &=
|
|
||||||
\hspace{0.2em} ^{c}\bf{T}_w
|
|
||||||
\begin{bmatrix}
|
|
||||||
X_{w} \\
|
|
||||||
Y_{w} \\
|
|
||||||
Z_{w} \\
|
|
||||||
1
|
|
||||||
\end{bmatrix} \\
|
|
||||||
\begin{bmatrix}
|
|
||||||
X_c \\
|
|
||||||
Y_c \\
|
|
||||||
Z_c \\
|
|
||||||
1
|
|
||||||
\end{bmatrix} &=
|
|
||||||
\begin{bmatrix}
|
|
||||||
r_{11} & r_{12} & r_{13} & t_x \\
|
|
||||||
r_{21} & r_{22} & r_{23} & t_y \\
|
|
||||||
r_{31} & r_{32} & r_{33} & t_z \\
|
|
||||||
0 & 0 & 0 & 1
|
|
||||||
\end{bmatrix}
|
|
||||||
\begin{bmatrix}
|
|
||||||
X_{w} \\
|
|
||||||
Y_{w} \\
|
|
||||||
Z_{w} \\
|
|
||||||
1
|
|
||||||
\end{bmatrix}
|
|
||||||
\end{align*}
|
|
||||||
\f]
|
|
||||||
|
|
||||||
@note
|
@note
|
||||||
- An example of how to use solvePnP for planar augmented reality can be found at
|
- An example of how to use solvePnP for planar augmented reality can be found at
|
||||||
@ -967,10 +848,12 @@ a 3D point expressed in the world frame into the camera frame:
|
|||||||
CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
|
CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
|
||||||
InputArray cameraMatrix, InputArray distCoeffs,
|
InputArray cameraMatrix, InputArray distCoeffs,
|
||||||
OutputArray rvec, OutputArray tvec,
|
OutputArray rvec, OutputArray tvec,
|
||||||
bool useExtrinsicGuess = false, int flags = 0 );
|
bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE );
|
||||||
|
|
||||||
/** @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme.
|
/** @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme.
|
||||||
|
|
||||||
|
@see @ref calib3d_solvePnP
|
||||||
|
|
||||||
@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
|
@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
|
||||||
1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
|
1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
|
||||||
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
|
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
|
||||||
@ -1015,7 +898,7 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint
|
|||||||
OutputArray rvec, OutputArray tvec,
|
OutputArray rvec, OutputArray tvec,
|
||||||
bool useExtrinsicGuess = false, int iterationsCount = 100,
|
bool useExtrinsicGuess = false, int iterationsCount = 100,
|
||||||
float reprojectionError = 8.0, double confidence = 0.99,
|
float reprojectionError = 8.0, double confidence = 0.99,
|
||||||
OutputArray inliers = noArray(), int flags = 0 );
|
OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Finds rotation and translation vector.
|
Finds rotation and translation vector.
|
||||||
@ -1028,6 +911,8 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint
|
|||||||
|
|
||||||
/** @brief Finds an object pose from 3 3D-2D point correspondences.
|
/** @brief Finds an object pose from 3 3D-2D point correspondences.
|
||||||
|
|
||||||
|
@see @ref calib3d_solvePnP
|
||||||
|
|
||||||
@param objectPoints Array of object points in the object coordinate space, 3x3 1-channel or
|
@param objectPoints Array of object points in the object coordinate space, 3x3 1-channel or
|
||||||
1x3/3x1 3-channel. vector\<Point3f\> can be also passed here.
|
1x3/3x1 3-channel. vector\<Point3f\> can be also passed here.
|
||||||
@param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel.
|
@param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel.
|
||||||
@ -1059,6 +944,8 @@ CV_EXPORTS_W int solveP3P( InputArray objectPoints, InputArray imagePoints,
|
|||||||
/** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame
|
/** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame
|
||||||
to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution.
|
to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution.
|
||||||
|
|
||||||
|
@see @ref calib3d_solvePnP
|
||||||
|
|
||||||
@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel,
|
@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel,
|
||||||
where N is the number of points. vector\<Point3d\> can also be passed here.
|
where N is the number of points. vector\<Point3d\> can also be passed here.
|
||||||
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
|
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
|
||||||
@ -1087,6 +974,8 @@ CV_EXPORTS_W void solvePnPRefineLM( InputArray objectPoints, InputArray imagePoi
|
|||||||
/** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame
|
/** @brief Refine a pose (the translation and the rotation that transform a 3D point expressed in the object coordinate frame
|
||||||
to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution.
|
to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution.
|
||||||
|
|
||||||
|
@see @ref calib3d_solvePnP
|
||||||
|
|
||||||
@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel,
|
@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel,
|
||||||
where N is the number of points. vector\<Point3d\> can also be passed here.
|
where N is the number of points. vector\<Point3d\> can also be passed here.
|
||||||
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
|
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
|
||||||
@ -1116,6 +1005,9 @@ CV_EXPORTS_W void solvePnPRefineVVS( InputArray objectPoints, InputArray imagePo
|
|||||||
double VVSlambda = 1);
|
double VVSlambda = 1);
|
||||||
|
|
||||||
/** @brief Finds an object pose from 3D-2D point correspondences.
|
/** @brief Finds an object pose from 3D-2D point correspondences.
|
||||||
|
|
||||||
|
@see @ref calib3d_solvePnP
|
||||||
|
|
||||||
This function returns a list of all the possible solutions (a solution is a <rotation vector, translation vector>
|
This function returns a list of all the possible solutions (a solution is a <rotation vector, translation vector>
|
||||||
couple), depending on the number of input points and the chosen method:
|
couple), depending on the number of input points and the chosen method:
|
||||||
- P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): 3 or 4 input points. Number of returned solutions can be between 0 and 4 with 3 input points.
|
- P3P methods (@ref SOLVEPNP_P3P, @ref SOLVEPNP_AP3P): 3 or 4 input points. Number of returned solutions can be between 0 and 4 with 3 input points.
|
||||||
@ -1143,37 +1035,7 @@ the model coordinate system to the camera coordinate system.
|
|||||||
@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses
|
@param useExtrinsicGuess Parameter used for #SOLVEPNP_ITERATIVE. If true (1), the function uses
|
||||||
the provided rvec and tvec values as initial approximations of the rotation and translation
|
the provided rvec and tvec values as initial approximations of the rotation and translation
|
||||||
vectors, respectively, and further optimizes them.
|
vectors, respectively, and further optimizes them.
|
||||||
@param flags Method for solving a PnP problem:
|
@param flags Method for solving a PnP problem: see @ref calib3d_solvePnP_flags
|
||||||
- @ref SOLVEPNP_ITERATIVE Iterative method is based on a Levenberg-Marquardt optimization. In
|
|
||||||
this case the function finds such a pose that minimizes reprojection error, that is the sum
|
|
||||||
of squared distances between the observed projections imagePoints and the projected (using
|
|
||||||
#projectPoints ) objectPoints .
|
|
||||||
- @ref SOLVEPNP_P3P Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
|
|
||||||
"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
|
|
||||||
In this case the function requires exactly four object and image points.
|
|
||||||
- @ref SOLVEPNP_AP3P Method is based on the paper of T. Ke, S. Roumeliotis
|
|
||||||
"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
|
|
||||||
In this case the function requires exactly four object and image points.
|
|
||||||
- @ref SOLVEPNP_EPNP Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the
|
|
||||||
paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
|
|
||||||
- @ref SOLVEPNP_DLS **Broken implementation. Using this flag will fallback to EPnP.** \n
|
|
||||||
Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis.
|
|
||||||
"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
|
|
||||||
- @ref SOLVEPNP_UPNP **Broken implementation. Using this flag will fallback to EPnP.** \n
|
|
||||||
Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto,
|
|
||||||
F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
|
|
||||||
Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
|
|
||||||
assuming that both have the same value. Then the cameraMatrix is updated with the estimated
|
|
||||||
focal length.
|
|
||||||
- @ref SOLVEPNP_IPPE Method is based on the paper of T. Collins and A. Bartoli.
|
|
||||||
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method requires coplanar object points.
|
|
||||||
- @ref SOLVEPNP_IPPE_SQUARE Method is based on the paper of Toby Collins and Adrien Bartoli.
|
|
||||||
"Infinitesimal Plane-Based Pose Estimation" (@cite Collins14). This method is suitable for marker pose estimation.
|
|
||||||
It requires 4 coplanar object points defined in the following order:
|
|
||||||
- point 0: [-squareLength / 2, squareLength / 2, 0]
|
|
||||||
- point 1: [ squareLength / 2, squareLength / 2, 0]
|
|
||||||
- point 2: [ squareLength / 2, -squareLength / 2, 0]
|
|
||||||
- point 3: [-squareLength / 2, -squareLength / 2, 0]
|
|
||||||
@param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE
|
@param rvec Rotation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE
|
||||||
and useExtrinsicGuess is set to true.
|
and useExtrinsicGuess is set to true.
|
||||||
@param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE
|
@param tvec Translation vector used to initialize an iterative PnP refinement algorithm, when flag is @ref SOLVEPNP_ITERATIVE
|
||||||
@ -1182,98 +1044,7 @@ and useExtrinsicGuess is set to true.
|
|||||||
(\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points
|
(\f$ \text{RMSE} = \sqrt{\frac{\sum_{i}^{N} \left ( \hat{y_i} - y_i \right )^2}{N}} \f$) between the input image points
|
||||||
and the 3D object points projected with the estimated pose.
|
and the 3D object points projected with the estimated pose.
|
||||||
|
|
||||||
The function estimates the object pose given a set of object points, their corresponding image
|
More information is described in @ref calib3d_solvePnP
|
||||||
projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below
|
|
||||||
(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
|
|
||||||
and the Z-axis forward).
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
Points expressed in the world frame \f$ \bf{X}_w \f$ are projected into the image plane \f$ \left[ u, v \right] \f$
|
|
||||||
using the perspective projection model \f$ \Pi \f$ and the camera intrinsic parameters matrix \f$ \bf{A} \f$:
|
|
||||||
|
|
||||||
\f[
|
|
||||||
\begin{align*}
|
|
||||||
\begin{bmatrix}
|
|
||||||
u \\
|
|
||||||
v \\
|
|
||||||
1
|
|
||||||
\end{bmatrix} &=
|
|
||||||
\bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
|
|
||||||
\begin{bmatrix}
|
|
||||||
X_{w} \\
|
|
||||||
Y_{w} \\
|
|
||||||
Z_{w} \\
|
|
||||||
1
|
|
||||||
\end{bmatrix} \\
|
|
||||||
\begin{bmatrix}
|
|
||||||
u \\
|
|
||||||
v \\
|
|
||||||
1
|
|
||||||
\end{bmatrix} &=
|
|
||||||
\begin{bmatrix}
|
|
||||||
f_x & 0 & c_x \\
|
|
||||||
0 & f_y & c_y \\
|
|
||||||
0 & 0 & 1
|
|
||||||
\end{bmatrix}
|
|
||||||
\begin{bmatrix}
|
|
||||||
1 & 0 & 0 & 0 \\
|
|
||||||
0 & 1 & 0 & 0 \\
|
|
||||||
0 & 0 & 1 & 0
|
|
||||||
\end{bmatrix}
|
|
||||||
\begin{bmatrix}
|
|
||||||
r_{11} & r_{12} & r_{13} & t_x \\
|
|
||||||
r_{21} & r_{22} & r_{23} & t_y \\
|
|
||||||
r_{31} & r_{32} & r_{33} & t_z \\
|
|
||||||
0 & 0 & 0 & 1
|
|
||||||
\end{bmatrix}
|
|
||||||
\begin{bmatrix}
|
|
||||||
X_{w} \\
|
|
||||||
Y_{w} \\
|
|
||||||
Z_{w} \\
|
|
||||||
1
|
|
||||||
\end{bmatrix}
|
|
||||||
\end{align*}
|
|
||||||
\f]
|
|
||||||
|
|
||||||
The estimated pose is thus the rotation (`rvec`) and the translation (`tvec`) vectors that allow transforming
|
|
||||||
a 3D point expressed in the world frame into the camera frame:
|
|
||||||
|
|
||||||
\f[
|
|
||||||
\begin{align*}
|
|
||||||
\begin{bmatrix}
|
|
||||||
X_c \\
|
|
||||||
Y_c \\
|
|
||||||
Z_c \\
|
|
||||||
1
|
|
||||||
\end{bmatrix} &=
|
|
||||||
\hspace{0.2em} ^{c}\bf{T}_w
|
|
||||||
\begin{bmatrix}
|
|
||||||
X_{w} \\
|
|
||||||
Y_{w} \\
|
|
||||||
Z_{w} \\
|
|
||||||
1
|
|
||||||
\end{bmatrix} \\
|
|
||||||
\begin{bmatrix}
|
|
||||||
X_c \\
|
|
||||||
Y_c \\
|
|
||||||
Z_c \\
|
|
||||||
1
|
|
||||||
\end{bmatrix} &=
|
|
||||||
\begin{bmatrix}
|
|
||||||
r_{11} & r_{12} & r_{13} & t_x \\
|
|
||||||
r_{21} & r_{22} & r_{23} & t_y \\
|
|
||||||
r_{31} & r_{32} & r_{33} & t_z \\
|
|
||||||
0 & 0 & 0 & 1
|
|
||||||
\end{bmatrix}
|
|
||||||
\begin{bmatrix}
|
|
||||||
X_{w} \\
|
|
||||||
Y_{w} \\
|
|
||||||
Z_{w} \\
|
|
||||||
1
|
|
||||||
\end{bmatrix}
|
|
||||||
\end{align*}
|
|
||||||
\f]
|
|
||||||
|
|
||||||
@note
|
@note
|
||||||
- An example of how to use solvePnP for planar augmented reality can be found at
|
- An example of how to use solvePnP for planar augmented reality can be found at
|
||||||
@ -1309,7 +1080,7 @@ CV_EXPORTS_W int solvePnPGeneric( InputArray objectPoints, InputArray imagePoint
|
|||||||
InputArray cameraMatrix, InputArray distCoeffs,
|
InputArray cameraMatrix, InputArray distCoeffs,
|
||||||
OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
|
OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
|
||||||
bool useExtrinsicGuess = false,
|
bool useExtrinsicGuess = false,
|
||||||
int flags = 0,
|
int flags = SOLVEPNP_ITERATIVE,
|
||||||
InputArray rvec = noArray(), InputArray tvec = noArray(),
|
InputArray rvec = noArray(), InputArray tvec = noArray(),
|
||||||
OutputArray reprojectionError = noArray() );
|
OutputArray reprojectionError = noArray() );
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user