Merge pull request #16028 from catree:improve_calib3d_doc

2025-06-08 01:53:19 +08:00 · 2019-12-03 12:34:10 +00:00 · 2019-12-03 12:34:10 +00:00 · eb44e0a556
commit eb44e0a556
parent 7523c777c5 43d58aa760
3 changed files with 82 additions and 44 deletions
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
@ -102,6 +102,14 @@
  publisher = {Elsevier},
  url = {https://www.cs.bgu.ac.il/~icbv161/wiki.files/Readings/1981-Ballard-Generalizing_the_Hough_Transform_to_Detect_Arbitrary_Shapes.pdf}
 }
+@techreport{blanco2010tutorial,
+  title = {A tutorial on SE(3) transformation parameterizations and on-manifold optimization},
+  author = {Blanco, Jose-Luis},
+  institution = {University of Malaga},
+  number = {012010},
+  year = {2010},
+  url = {http://ingmec.ual.es/~jlblanco/papers/jlblanco2010geometry3D_techrep.pdf}
+}
@article{Borgefors86,
  author = {Borgefors, Gunilla},
  title = {Distance transformations in digital images},
@ -277,6 +285,12 @@
  year = {2013},
  url = {http://ethaneade.com/optimization.pdf}
 }
+@misc{Eade17,
+  author = {Eade, Ethan},
+  title = {Lie Groups for 2D and 3D Transformation},
+  year = {2017},
+  url = {http://www.ethaneade.com/lie.pdf}
+}
@inproceedings{EM11,
  author = {Gastal, Eduardo SL and Oliveira, Manuel M},
  title = {Domain transform for edge-aware image and video processing},
@ -397,6 +411,14 @@
  year = {1999},
  url = {https://pdfs.semanticscholar.org/090d/25f94cb021bdd3400a2f547f989a6a5e07ec.pdf}
 }
+@article{Gallego2014ACF,
+  title = {A Compact Formula for the Derivative of a 3-D Rotation in Exponential Coordinates},
+  author = {Guillermo Gallego and Anthony J. Yezzi},
+  journal = {Journal of Mathematical Imaging and Vision},
+  year = {2014},
+  volume = {51},
+  pages = {378-384}
+}
@article{taubin1991,
  abstract = {The author addresses the problem of parametric representation and estimation of complex planar curves in 2-D surfaces in 3-D, and nonplanar space curves in 3-D. Curves and surfaces can be defined either parametrically or implicitly, with the latter representation used here. A planar curve is the set of zeros of a smooth function of two variables <e1>x</e1>-<e1>y</e1>, a surface is the set of zeros of a smooth function of three variables <e1>x</e1>-<e1>y</e1>-<e1>z</e1>, and a space curve is the intersection of two surfaces, which are the set of zeros of two linearly independent smooth functions of three variables <e1>x</e1>-<e1>y</e1>-<e1>z</e1> For example, the surface of a complex object in 3-D can be represented as a subset of a single implicit surface, with similar results for planar and space curves. It is shown how this unified representation can be used for object recognition, object position estimation, and segmentation of objects into meaningful subobjects, that is, the detection of `interest regions' that are more complex than high curvature regions and, hence, more useful as features for object recognition},
  author = {Taubin, Gabriel},
@ -915,6 +937,13 @@
  journal = {Retrieved on August},
  volume = {6}
 }
+@article{Sol2018AML,
+  title = {A micro Lie theory for state estimation in robotics},
+  author = {Joan Sol{\`a} and J{\'e}r{\'e}mie Deray and Dinesh Atchuthan},
+  journal = {ArXiv},
+  year = {2018},
+  volume={abs/1812.01537}
+}
@misc{SteweniusCFS,
  author = {Stewenius, Henrik},
  title = {Calibrated Fivepoint solver},
--- a/modules/calib3d/doc/pics/pinhole_camera_model.png
+++ b/modules/calib3d/doc/pics/pinhole_camera_model.png
--- a/modules/calib3d/include/opencv2/calib3d.hpp
+++ b/modules/calib3d/include/opencv2/calib3d.hpp
@ -60,24 +60,24 @@ or

 \f[s  \vecthree{u}{v}{1} = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}
 \begin{bmatrix}
-r_{11} & r_{12} & r_{13} & t_1  \\
-r_{21} & r_{22} & r_{23} & t_2  \\
-r_{31} & r_{32} & r_{33} & t_3
+r_{11} & r_{12} & r_{13} & t_x  \\
+r_{21} & r_{22} & r_{23} & t_y  \\
+r_{31} & r_{32} & r_{33} & t_z
 \end{bmatrix}
 \begin{bmatrix}
-X \\
-Y \\
-Z \\
+X_w \\
+Y_w \\
+Z_w \\
 1
 \end{bmatrix}\f]

 where:

-   \f$(X, Y, Z)\f$ are the coordinates of a 3D point in the world coordinate space
+-   \f$(X_w, Y_w, Z_w)\f$ are the coordinates of a 3D point in the world coordinate space
 -   \f$(u, v)\f$ are the coordinates of the projection point in pixels
 -   \f$A\f$ is a camera matrix, or a matrix of intrinsic parameters
-   \f$(cx, cy)\f$ is a principal point that is usually at the image center
-   \f$fx, fy\f$ are the focal lengths expressed in pixel units.
+-   \f$(c_x, c_y)\f$ is a principal point that is usually at the image center
+-   \f$f_x, f_y\f$ are the focal lengths expressed in pixel units.

 Thus, if an image from the camera is scaled by a factor, all of these parameters should be scaled
 (multiplied/divided, respectively) by the same factor. The matrix of intrinsic parameters does not
@ -85,15 +85,15 @@ depend on the scene viewed. So, once estimated, it can be re-used as long as the
 fixed (in case of zoom lens). The joint rotation-translation matrix \f$[R|t]\f$ is called a matrix of
 extrinsic parameters. It is used to describe the camera motion around a static scene, or vice versa,
 rigid motion of an object in front of a still camera. That is, \f$[R|t]\f$ translates coordinates of a
-point \f$(X, Y, Z)\f$ to a coordinate system, fixed with respect to the camera. The transformation above
-is equivalent to the following (when \f$z \ne 0\f$ ):
+world point \f$(X_w, Y_w, Z_w)\f$ to a coordinate system, fixed with respect to the camera.
+The transformation above is equivalent to the following (when \f$z \ne 0\f$ ):

 \f[\begin{array}{l}
-\vecthree{x}{y}{z} = R  \vecthree{X}{Y}{Z} + t \\
-x' = x/z \\
-y' = y/z \\
-u = f_x*x' + c_x \\
-v = f_y*y' + c_y
+\vecthree{X_c}{Y_c}{Z_c} = R  \vecthree{X_w}{Y_w}{Z_w} + t \\
+x' = X_c/Z_c \\
+y' = Y_c/Z_c \\
+u = f_x \times x' + c_x \\
+v = f_y \times y' + c_y
 \end{array}\f]

 The following figure illustrates the pinhole camera model.
@ -104,14 +104,14 @@ Real lenses usually have some distortion, mostly radial distortion and slight ta
 So, the above model is extended as:

 \f[\begin{array}{l}
-\vecthree{x}{y}{z} = R  \vecthree{X}{Y}{Z} + t \\
-x' = x/z \\
-y' = y/z \\
+\vecthree{X_c}{Y_c}{Z_c} = R  \vecthree{X_w}{Y_w}{Z_w} + t \\
+x' = X_c/Z_c \\
+y' = Y_c/Z_c \\
 x'' = x'  \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + 2 p_1 x' y' + p_2(r^2 + 2 x'^2) + s_1 r^2 + s_2 r^4 \\
 y'' = y'  \frac{1 + k_1 r^2 + k_2 r^4 + k_3 r^6}{1 + k_4 r^2 + k_5 r^4 + k_6 r^6} + p_1 (r^2 + 2 y'^2) + 2 p_2 x' y' + s_3 r^2 + s_4 r^4 \\
 \text{where} \quad r^2 = x'^2 + y'^2  \\
-u = f_x*x'' + c_x \\
-v = f_y*y'' + c_y
+u = f_x \times x'' + c_x \\
+v = f_y \times y'' + c_y
 \end{array}\f]

 \f$k_1\f$, \f$k_2\f$, \f$k_3\f$, \f$k_4\f$, \f$k_5\f$, and \f$k_6\f$ are radial distortion coefficients. \f$p_1\f$ and \f$p_2\f$ are
@ -133,8 +133,8 @@ s\vecthree{x'''}{y'''}{1} =
 \vecthreethree{R_{33}(\tau_x, \tau_y)}{0}{-R_{13}(\tau_x, \tau_y)}
 {0}{R_{33}(\tau_x, \tau_y)}{-R_{23}(\tau_x, \tau_y)}
 {0}{0}{1} R(\tau_x, \tau_y) \vecthree{x''}{y''}{1}\\
-u = f_x*x''' + c_x \\
-v = f_y*y''' + c_y
+u = f_x \times x''' + c_x \\
+v = f_y \times y''' + c_y
 \end{array}\f]

 where the matrix \f$R(\tau_x, \tau_y)\f$ is defined by two rotations with angular parameter \f$\tau_x\f$
@ -314,7 +314,7 @@ enum HandEyeCalibrationMethod
@param jacobian Optional output Jacobian matrix, 3x9 or 9x3, which is a matrix of partial
 derivatives of the output array components with respect to the input array components.

-\f[\begin{array}{l} \theta \leftarrow norm(r) \\ r  \leftarrow r/ \theta \\ R =  \cos{\theta} I + (1- \cos{\theta} ) r r^T +  \sin{\theta} \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} \end{array}\f]
+\f[\begin{array}{l} \theta \leftarrow norm(r) \\ r  \leftarrow r/ \theta \\ R =  \cos(\theta) I + (1- \cos{\theta} ) r r^T +  \sin(\theta) \vecthreethree{0}{-r_z}{r_y}{r_z}{0}{-r_x}{-r_y}{r_x}{0} \end{array}\f]

 Inverse transformation can be also done easily, since

@ -322,7 +322,16 @@ Inverse transformation can be also done easily, since

 A rotation vector is a convenient and most compact representation of a rotation matrix (since any
 rotation matrix has just 3 degrees of freedom). The representation is used in the global 3D geometry
-optimization procedures like calibrateCamera, stereoCalibrate, or solvePnP .
+optimization procedures like @ref calibrateCamera, @ref stereoCalibrate, or @ref solvePnP .
+
+@note More information about the computation of the derivative of a 3D rotation matrix with respect to its exponential coordinate
+can be found in:
+    - A Compact Formula for the Derivative of a 3-D Rotation in Exponential Coordinates, Guillermo Gallego, Anthony J. Yezzi @cite Gallego2014ACF
+
+@note Useful information on SE(3) and Lie Groups can be found in:
+    - A tutorial on SE(3) transformation parameterizations and on-manifold optimization, Jose-Luis Blanco @cite blanco2010tutorial
+    - Lie Groups for 2D and 3D Transformation, Ethan Eade @cite Eade17
+    - A micro Lie theory for state estimation in robotics, Joan Solà, Jérémie Deray, Dinesh Atchuthan @cite Sol2018AML
 */
 CV_EXPORTS_W void Rodrigues( InputArray src, OutputArray dst, OutputArray jacobian = noArray() );

@ -564,10 +573,10 @@ Number of input points must be 4. Object points must be defined in the following
 - for all the other flags, number of input points must be >= 4 and object points can be in any configuration.

@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
-1xN/Nx1 3-channel, where N is the number of points. vector\<Point3f\> can be also passed here.
+1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
-where N is the number of points. vector\<Point2f\> can be also passed here.
-@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
+where N is the number of points. vector\<Point2d\> can be also passed here.
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param distCoeffs Input vector of distortion coefficients
 \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
@ -625,7 +634,7 @@ using the perspective projection model \f$ \Pi \f$ and the camera intrinsic para
  v \\
  1
  \end{bmatrix} &=
-  \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{M}_w
+  \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
  \begin{bmatrix}
  X_{w} \\
  Y_{w} \\
@ -673,7 +682,7 @@ a 3D point expressed in the world frame into the camera frame:
  Z_c \\
  1
  \end{bmatrix} &=
-  \hspace{0.2em} ^{c}\bf{M}_w
+  \hspace{0.2em} ^{c}\bf{T}_w
  \begin{bmatrix}
  X_{w} \\
  Y_{w} \\
@ -739,9 +748,9 @@ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
 /** @brief Finds an object pose from 3D-2D point correspondences using the RANSAC scheme.

@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
-1xN/Nx1 3-channel, where N is the number of points. vector\<Point3f\> can be also passed here.
+1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
-where N is the number of points. vector\<Point2f\> can be also passed here.
+where N is the number of points. vector\<Point2d\> can be also passed here.
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
@param distCoeffs Input vector of distortion coefficients
 \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
@ -791,7 +800,7 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint
 1x3/3x1 3-channel. vector\<Point3f\> can be also passed here.
@param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel.
 vector\<Point2f\> can be also passed here.
-@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param distCoeffs Input vector of distortion coefficients
 \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
@ -820,10 +829,10 @@ CV_EXPORTS_W int solveP3P( InputArray objectPoints, InputArray imagePoints,
 to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution.

@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel,
-where N is the number of points. vector\<Point3f\> can also be passed here.
+where N is the number of points. vector\<Point3d\> can also be passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
-where N is the number of points. vector\<Point2f\> can also be passed here.
-@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
+where N is the number of points. vector\<Point2d\> can also be passed here.
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param distCoeffs Input vector of distortion coefficients
 \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
@ -848,10 +857,10 @@ CV_EXPORTS_W void solvePnPRefineLM( InputArray objectPoints, InputArray imagePoi
 to the camera coordinate frame) from a 3D-2D point correspondences and starting from an initial solution.

@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or 1xN/Nx1 3-channel,
-where N is the number of points. vector\<Point3f\> can also be passed here.
+where N is the number of points. vector\<Point3d\> can also be passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
-where N is the number of points. vector\<Point2f\> can also be passed here.
-@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
+where N is the number of points. vector\<Point2d\> can also be passed here.
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param distCoeffs Input vector of distortion coefficients
 \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
@ -890,10 +899,10 @@ Number of input points must be 4 and 2 solutions are returned. Object points mus
 Only 1 solution is returned.

@param objectPoints Array of object points in the object coordinate space, Nx3 1-channel or
-1xN/Nx1 3-channel, where N is the number of points. vector\<Point3f\> can be also passed here.
+1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
-where N is the number of points. vector\<Point2f\> can be also passed here.
-@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
+where N is the number of points. vector\<Point2d\> can be also passed here.
+@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param distCoeffs Input vector of distortion coefficients
 \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
 4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
@ -958,7 +967,7 @@ using the perspective projection model \f$ \Pi \f$ and the camera intrinsic para
  v \\
  1
  \end{bmatrix} &=
-  \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{M}_w
+  \bf{A} \hspace{0.1em} \Pi \hspace{0.2em} ^{c}\bf{T}_w
  \begin{bmatrix}
  X_{w} \\
  Y_{w} \\
@ -1006,7 +1015,7 @@ a 3D point expressed in the world frame into the camera frame:
  Z_c \\
  1
  \end{bmatrix} &=
-  \hspace{0.2em} ^{c}\bf{M}_w
+  \hspace{0.2em} ^{c}\bf{T}_w
  \begin{bmatrix}
  X_{w} \\
  Y_{w} \\