Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin 2020-08-26 15:51:25 +00:00
commit ba147d2be2
13 changed files with 771 additions and 174 deletions

View File

@ -9,6 +9,9 @@ MathJax.Hub.Config(
forkfour: ["\\left\\{ \\begin{array}{l l} #1 & \\mbox{#2}\\\\ #3 & \\mbox{#4}\\\\ #5 & \\mbox{#6}\\\\ #7 & \\mbox{#8}\\\\ \\end{array} \\right.", 8],
vecthree: ["\\begin{bmatrix} #1\\\\ #2\\\\ #3 \\end{bmatrix}", 3],
vecthreethree: ["\\begin{bmatrix} #1 & #2 & #3\\\\ #4 & #5 & #6\\\\ #7 & #8 & #9 \\end{bmatrix}", 9],
cameramatrix: ["#1 = \\begin{bmatrix} f_x & 0 & c_x\\\\ 0 & f_y & c_y\\\\ 0 & 0 & 1 \\end{bmatrix}", 1],
distcoeffs: ["(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \\tau_x, \\tau_y]]]]) \\text{ of 4, 5, 8, 12 or 14 elements}"],
distcoeffsfisheye: ["(k_1, k_2, k_3, k_4)"],
hdotsfor: ["\\dots", 1],
mathbbm: ["\\mathbb{#1}", 1],
bordermatrix: ["\\matrix{#1}", 1]

View File

@ -51,3 +51,20 @@
#7 & #8 & #9
\end{bmatrix}
}
\newcommand{\cameramatrix}[1]{
#1 =
\begin{bmatrix}
f_x & 0 & c_x\\
0 & f_y & c_y\\
0 & 0 & 1
\end{bmatrix}
}
\newcommand{\distcoeffs}[]{
(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]]) \text{ of 4, 5, 8, 12 or 14 elements}
}
\newcommand{\distcoeffsfisheye}[]{
(k_1, k_2, k_3, k_4)
}

View File

@ -36,18 +36,27 @@ class PatternMaker:
def make_circles_pattern(self):
spacing = self.square_size
r = spacing / self.radius_rate
for x in range(1, self.cols + 1):
for y in range(1, self.rows + 1):
dot = SVG("circle", cx=x * spacing, cy=y * spacing, r=r, fill="black", stroke="none")
pattern_width = ((self.cols - 1.0) * spacing) + (2.0 * r)
pattern_height = ((self.rows - 1.0) * spacing) + (2.0 * r)
x_spacing = (self.width - pattern_width) / 2.0
y_spacing = (self.height - pattern_height) / 2.0
for x in range(0, self.cols):
for y in range(0, self.rows):
dot = SVG("circle", cx=(x * spacing) + x_spacing + r,
cy=(y * spacing) + y_spacing + r, r=r, fill="black", stroke="none")
self.g.append(dot)
def make_acircles_pattern(self):
spacing = self.square_size
r = spacing / self.radius_rate
for i in range(0, self.rows):
for j in range(0, self.cols):
dot = SVG("circle", cx=((j * 2 + i % 2) * spacing) + spacing, cy=self.height - (i * spacing + spacing),
r=r, fill="black", stroke="none")
pattern_width = ((self.cols-1.0) * 2 * spacing) + spacing + (2.0 * r)
pattern_height = ((self.rows-1.0) * spacing) + (2.0 * r)
x_spacing = (self.width - pattern_width) / 2.0
y_spacing = (self.height - pattern_height) / 2.0
for x in range(0, self.cols):
for y in range(0, self.rows):
dot = SVG("circle", cx=(2 * x * spacing) + (y % 2)*spacing + x_spacing + r,
cy=(y * spacing) + y_spacing + r, r=r, fill="black", stroke="none")
self.g.append(dot)
def make_checkerboard_pattern(self):
@ -84,9 +93,9 @@ def main():
parser.add_argument("-R", "--radius_rate", help="circles_radius = square_size/radius_rate", default="5.0",
action="store", dest="radius_rate", type=float)
parser.add_argument("-w", "--page_width", help="page width in units", default="216", action="store",
dest="page_width", type=int)
dest="page_width", type=float)
parser.add_argument("-h", "--page_height", help="page height in units", default="279", action="store",
dest="page_width", type=int)
dest="page_width", type=float)
parser.add_argument("-a", "--page_size", help="page size, supersedes -h -w arguments", default="A4", action="store",
dest="page_size", choices=["A0", "A1", "A2", "A3", "A4", "A5"])
args = parser.parse_args()

View File

@ -64,17 +64,17 @@ The distortion-free projective transformation given by a pinhole camera model i
\f[s \; p = A \begin{bmatrix} R|t \end{bmatrix} P_w,\f]
where \f$P_w\f$ is a 3D point expressed with respect to the world coordinate system,
\f$p\f$ is a 2D pixel in the image plane, \f$A\f$ is the intrinsic camera matrix,
\f$p\f$ is a 2D pixel in the image plane, \f$A\f$ is the camera intrinsic matrix,
\f$R\f$ and \f$t\f$ are the rotation and translation that describe the change of coordinates from
world to camera coordinate systems (or camera frame) and \f$s\f$ is the projective transformation's
arbitrary scaling and not part of the camera model.
The intrinsic camera matrix \f$A\f$ (notation used as in @cite Zhang2000 and also generally notated
The camera intrinsic matrix \f$A\f$ (notation used as in @cite Zhang2000 and also generally notated
as \f$K\f$) projects 3D points given in the camera coordinate system to 2D pixel coordinates, i.e.
\f[p = A P_c.\f]
The camera matrix \f$A\f$ is composed of the focal lengths \f$f_x\f$ and \f$f_y\f$, which are
The camera intrinsic matrix \f$A\f$ is composed of the focal lengths \f$f_x\f$ and \f$f_y\f$, which are
expressed in pixel units, and the principal point \f$(c_x, c_y)\f$, that is usually close to the
image center:
@ -382,9 +382,9 @@ R & t \\
\end{bmatrix} P_{h_0}.\f]
@note
- Many functions in this module take a camera matrix as an input parameter. Although all
- Many functions in this module take a camera intrinsic matrix as an input parameter. Although all
functions assume the same structure of this parameter, they may name it differently. The
parameter's description, however, will be clear in that a camera matrix with the structure
parameter's description, however, will be clear in that a camera intrinsic matrix with the structure
shown above is required.
- A calibration sample for 3 cameras in a horizontal position can be found at
opencv_source_code/samples/cpp/3calibration.cpp
@ -457,8 +457,10 @@ enum SolvePnPMethod {
SOLVEPNP_ITERATIVE = 0,
SOLVEPNP_EPNP = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp
SOLVEPNP_P3P = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete
SOLVEPNP_DLS = 3, //!< A Direct Least-Squares (DLS) Method for PnP @cite hesch2011direct
SOLVEPNP_UPNP = 4, //!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive
SOLVEPNP_DLS = 3, //!< **Broken implementation. Using this flag will fallback to EPnP.** \n
//!< A Direct Least-Squares (DLS) Method for PnP @cite hesch2011direct
SOLVEPNP_UPNP = 4, //!< **Broken implementation. Using this flag will fallback to EPnP.** \n
//!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive
SOLVEPNP_AP3P = 5, //!< An Efficient Algebraic Solution to the Perspective-Three-Point Problem @cite Ke17
SOLVEPNP_IPPE = 6, //!< Infinitesimal Plane-Based Pose Estimation @cite Collins14 \n
//!< Object points must be coplanar.
@ -752,10 +754,10 @@ CV_EXPORTS_W Vec3d RQDecomp3x3( InputArray src, OutputArray mtxR, OutputArray mt
OutputArray Qy = noArray(),
OutputArray Qz = noArray());
/** @brief Decomposes a projection matrix into a rotation matrix and a camera matrix.
/** @brief Decomposes a projection matrix into a rotation matrix and a camera intrinsic matrix.
@param projMatrix 3x4 input projection matrix P.
@param cameraMatrix Output 3x3 camera matrix K.
@param cameraMatrix Output 3x3 camera intrinsic matrix \f$\cameramatrix{A}\f$.
@param rotMatrix Output 3x3 external rotation matrix R.
@param transVect Output 4x1 translation vector T.
@param rotMatrixX Optional 3x3 rotation matrix around x-axis.
@ -840,10 +842,9 @@ CV_EXPORTS_W void composeRT( InputArray rvec1, InputArray tvec1,
@param rvec The rotation vector (@ref Rodrigues) that, together with tvec, performs a change of
basis from world to camera coordinate system, see @ref calibrateCamera for details.
@param tvec The translation vector, see parameter description above.
@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$ .
@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ .
@param distCoeffs Input vector of distortion coefficients
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
4, 5, 8, 12 or 14 elements. If the vector is empty, the zero distortion coefficients are assumed.
\f$\distcoeffs\f$ . If the vector is empty, the zero distortion coefficients are assumed.
@param imagePoints Output array of image points, 1xN/Nx1 2-channel, or
vector\<Point2f\> .
@param jacobian Optional output 2Nx(10+\<numDistCoeffs\>) jacobian matrix of derivatives of image
@ -897,10 +898,9 @@ Number of input points must be 4. Object points must be defined in the following
1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
where N is the number of points. vector\<Point2d\> can be also passed here.
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
@param distCoeffs Input vector of distortion coefficients
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
assumed.
@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
the model coordinate system to the camera coordinate system.
@ -912,7 +912,7 @@ vectors, respectively, and further optimizes them.
- **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In
this case the function finds such a pose that minimizes reprojection error, that is the sum
of squared distances between the observed projections imagePoints and the projected (using
projectPoints ) objectPoints .
@ref projectPoints ) objectPoints .
- **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
In this case the function requires exactly four object and image points.
@ -921,9 +921,11 @@ In this case the function requires exactly four object and image points.
In this case the function requires exactly four object and image points.
- **SOLVEPNP_EPNP** Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the
paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
- **SOLVEPNP_DLS** Method is based on the paper of J. Hesch and S. Roumeliotis.
- **SOLVEPNP_DLS** **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of J. Hesch and S. Roumeliotis.
"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
- **SOLVEPNP_UPNP** Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
- **SOLVEPNP_UPNP** **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
assuming that both have the same value. Then the cameraMatrix is updated with the estimated
@ -939,7 +941,7 @@ It requires 4 coplanar object points defined in the following order:
- point 3: [-squareLength / 2, -squareLength / 2, 0]
The function estimates the object pose given a set of object points, their corresponding image
projections, as well as the camera matrix and the distortion coefficients, see the figure below
projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below
(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
and the Z-axis forward).
@ -1072,10 +1074,9 @@ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
where N is the number of points. vector\<Point2d\> can be also passed here.
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
@param distCoeffs Input vector of distortion coefficients
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
assumed.
@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
the model coordinate system to the camera coordinate system.
@ -1092,7 +1093,7 @@ an inlier.
@param flags Method for solving a PnP problem (see @ref solvePnP ).
The function estimates an object pose given a set of object points, their corresponding image
projections, as well as the camera matrix and the distortion coefficients. This function finds such
projections, as well as the camera intrinsic matrix and the distortion coefficients. This function finds such
a pose that minimizes reprojection error, that is, the sum of squared distances between the observed
projections imagePoints and the projected (using @ref projectPoints ) objectPoints. The use of RANSAC
makes the function resistant to outliers.
@ -1131,10 +1132,9 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint
1x3/3x1 3-channel. vector\<Point3f\> can be also passed here.
@param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel.
vector\<Point2f\> can be also passed here.
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
@param distCoeffs Input vector of distortion coefficients
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
assumed.
@param rvecs Output rotation vectors (see @ref Rodrigues ) that, together with tvecs, brings points from
the model coordinate system to the camera coordinate system. A P3P problem has up to 4 solutions.
@ -1146,7 +1146,7 @@ the model coordinate system to the camera coordinate system. A P3P problem has u
"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
The function estimates the object pose given 3 object points, their corresponding image
projections, as well as the camera matrix and the distortion coefficients.
projections, as well as the camera intrinsic matrix and the distortion coefficients.
@note
The solutions are sorted by reprojection errors (lowest to highest).
@ -1163,10 +1163,9 @@ to the camera coordinate frame) from a 3D-2D point correspondences and starting
where N is the number of points. vector\<Point3d\> can also be passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
where N is the number of points. vector\<Point2d\> can also be passed here.
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
@param distCoeffs Input vector of distortion coefficients
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
assumed.
@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
the model coordinate system to the camera coordinate system. Input values are used as an initial solution.
@ -1175,7 +1174,7 @@ the model coordinate system to the camera coordinate system. Input values are us
The function refines the object pose given at least 3 object points, their corresponding image
projections, an initial solution for the rotation and translation vector,
as well as the camera matrix and the distortion coefficients.
as well as the camera intrinsic matrix and the distortion coefficients.
The function minimizes the projection error with respect to the rotation and the translation vectors, according
to a Levenberg-Marquardt iterative minimization @cite Madsen04 @cite Eade13 process.
*/
@ -1191,10 +1190,9 @@ to the camera coordinate frame) from a 3D-2D point correspondences and starting
where N is the number of points. vector\<Point3d\> can also be passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
where N is the number of points. vector\<Point2d\> can also be passed here.
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
@param distCoeffs Input vector of distortion coefficients
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
assumed.
@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
the model coordinate system to the camera coordinate system. Input values are used as an initial solution.
@ -1205,7 +1203,7 @@ gain in the Damped Gauss-Newton formulation.
The function refines the object pose given at least 3 object points, their corresponding image
projections, an initial solution for the rotation and translation vector,
as well as the camera matrix and the distortion coefficients.
as well as the camera intrinsic matrix and the distortion coefficients.
The function minimizes the projection error with respect to the rotation and the translation vectors, using a
virtual visual servoing (VVS) @cite Chaumette06 @cite Marchand16 scheme.
*/
@ -1233,10 +1231,9 @@ Only 1 solution is returned.
1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
where N is the number of points. vector\<Point2d\> can be also passed here.
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
@param distCoeffs Input vector of distortion coefficients
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
assumed.
@param rvecs Vector of output rotation vectors (see @ref Rodrigues ) that, together with tvecs, brings points from
the model coordinate system to the camera coordinate system.
@ -1257,9 +1254,11 @@ In this case the function requires exactly four object and image points.
In this case the function requires exactly four object and image points.
- **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the
paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
- **SOLVEPNP_DLS** Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis.
- **SOLVEPNP_DLS** **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis.
"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
- **SOLVEPNP_UPNP** Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto,
- **SOLVEPNP_UPNP** **Broken implementation. Using this flag will fallback to EPnP.** \n
Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto,
F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
assuming that both have the same value. Then the cameraMatrix is updated with the estimated
@ -1282,7 +1281,7 @@ and useExtrinsicGuess is set to true.
and the 3D object points projected with the estimated pose.
The function estimates the object pose given a set of object points, their corresponding image
projections, as well as the camera matrix and the distortion coefficients, see the figure below
projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below
(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
and the Z-axis forward).
@ -1411,7 +1410,7 @@ CV_EXPORTS_W int solvePnPGeneric( InputArray objectPoints, InputArray imagePoint
InputArray rvec = noArray(), InputArray tvec = noArray(),
OutputArray reprojectionError = noArray() );
/** @brief Finds an initial camera matrix from 3D-2D point correspondences.
/** @brief Finds an initial camera intrinsic matrix from 3D-2D point correspondences.
@param objectPoints Vector of vectors of the calibration pattern points in the calibration pattern
coordinate space. In the old interface all the per-view vectors are concatenated. See
@ -1422,7 +1421,7 @@ old interface all the per-view vectors are concatenated.
@param aspectRatio If it is zero or negative, both \f$f_x\f$ and \f$f_y\f$ are estimated independently.
Otherwise, \f$f_x = f_y * \texttt{aspectRatio}\f$ .
The function estimates and returns an initial camera matrix for the camera calibration process.
The function estimates and returns an initial camera intrinsic matrix for the camera calibration process.
Currently, the function only supports planar calibration patterns, which are patterns where each
object point has z-coordinate =0.
*/
@ -1601,10 +1600,9 @@ CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSiz
@param image Input/output image. It must have 1 or 3 channels. The number of channels is not altered.
@param cameraMatrix Input 3x3 floating-point matrix of camera intrinsic parameters.
\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
\f$\cameramatrix{A}\f$
@param distCoeffs Input vector of distortion coefficients
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
4, 5, 8, 12 or 14 elements. If the vector is empty, the zero distortion coefficients are assumed.
\f$\distcoeffs\f$. If the vector is empty, the zero distortion coefficients are assumed.
@param rvec Rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
the model coordinate system to the camera coordinate system.
@param tvec Translation vector.
@ -1707,14 +1705,13 @@ pattern points (e.g. std::vector<std::vector<cv::Vec2f>>). imagePoints.size() an
objectPoints.size(), and imagePoints[i].size() and objectPoints[i].size() for each i, must be equal,
respectively. In the old interface all the vectors of object points from different views are
concatenated together.
@param imageSize Size of the image used only to initialize the intrinsic camera matrix.
@param cameraMatrix Input/output 3x3 floating-point camera matrix
\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS
@param imageSize Size of the image used only to initialize the camera intrinsic matrix.
@param cameraMatrix Input/output 3x3 floating-point camera intrinsic matrix
\f$\cameramatrix{A}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS
and/or CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be
initialized before calling the function.
@param distCoeffs Input/output vector of distortion coefficients
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
4, 5, 8, 12 or 14 elements.
\f$\distcoeffs\f$.
@param rvecs Output vector of rotation vectors (@ref Rodrigues ) estimated for each pattern view
(e.g. std::vector<cv::Mat>>). That is, each i-th rotation vector together with the corresponding
i-th translation vector (see the next output parameter description) brings the calibration pattern
@ -1904,9 +1901,9 @@ CV_EXPORTS_W double calibrateCameraRO( InputArrayOfArrays objectPoints,
int flags = 0, TermCriteria criteria = TermCriteria(
TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) );
/** @brief Computes useful camera characteristics from the camera matrix.
/** @brief Computes useful camera characteristics from the camera intrinsic matrix.
@param cameraMatrix Input camera matrix that can be estimated by calibrateCamera or
@param cameraMatrix Input camera intrinsic matrix that can be estimated by calibrateCamera or
stereoCalibrate .
@param imageSize Input image size in pixels.
@param apertureWidth Physical width in mm of the sensor.
@ -1942,15 +1939,15 @@ be equal for each i.
observed by the first camera. The same structure as in @ref calibrateCamera.
@param imagePoints2 Vector of vectors of the projections of the calibration pattern points,
observed by the second camera. The same structure as in @ref calibrateCamera.
@param cameraMatrix1 Input/output camera matrix for the first camera, the same as in
@param cameraMatrix1 Input/output camera intrinsic matrix for the first camera, the same as in
@ref calibrateCamera. Furthermore, for the stereo case, additional flags may be used, see below.
@param distCoeffs1 Input/output vector of distortion coefficients, the same as in
@ref calibrateCamera.
@param cameraMatrix2 Input/output second camera matrix for the second camera. See description for
@param cameraMatrix2 Input/output second camera intrinsic matrix for the second camera. See description for
cameraMatrix1.
@param distCoeffs2 Input/output lens distortion coefficients for the second camera. See
description for distCoeffs1.
@param imageSize Size of the image used only to initialize the intrinsic camera matrices.
@param imageSize Size of the image used only to initialize the camera intrinsic matrices.
@param R Output rotation matrix. Together with the translation vector T, this matrix brings
points given in the first camera's coordinate system to points in the second camera's
coordinate system. In more technical terms, the tuple of R and T performs a change of basis
@ -2071,9 +2068,9 @@ CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints,
/** @brief Computes rectification transforms for each head of a calibrated stereo camera.
@param cameraMatrix1 First camera matrix.
@param cameraMatrix1 First camera intrinsic matrix.
@param distCoeffs1 First camera distortion parameters.
@param cameraMatrix2 Second camera matrix.
@param cameraMatrix2 Second camera intrinsic matrix.
@param distCoeffs2 Second camera distortion parameters.
@param imageSize Size of the image used for stereo calibration.
@param R Rotation matrix from the coordinate system of the first camera to the second camera,
@ -2229,12 +2226,11 @@ CV_EXPORTS_W float rectify3Collinear( InputArray cameraMatrix1, InputArray distC
OutputArray Q, double alpha, Size newImgSize,
CV_OUT Rect* roi1, CV_OUT Rect* roi2, int flags );
/** @brief Returns the new camera matrix based on the free scaling parameter.
/** @brief Returns the new camera intrinsic matrix based on the free scaling parameter.
@param cameraMatrix Input camera matrix.
@param cameraMatrix Input camera intrinsic matrix.
@param distCoeffs Input vector of distortion coefficients
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
assumed.
@param imageSize Original image size.
@param alpha Free scaling parameter between 0 (when all the pixels in the undistorted image are
@ -2243,17 +2239,17 @@ stereoRectify for details.
@param newImgSize Image size after rectification. By default, it is set to imageSize .
@param validPixROI Optional output rectangle that outlines all-good-pixels region in the
undistorted image. See roi1, roi2 description in stereoRectify .
@param centerPrincipalPoint Optional flag that indicates whether in the new camera matrix the
@param centerPrincipalPoint Optional flag that indicates whether in the new camera intrinsic matrix the
principal point should be at the image center or not. By default, the principal point is chosen to
best fit a subset of the source image (determined by alpha) to the corrected image.
@return new_camera_matrix Output new camera matrix.
@return new_camera_matrix Output new camera intrinsic matrix.
The function computes and returns the optimal new camera matrix based on the free scaling parameter.
The function computes and returns the optimal new camera intrinsic matrix based on the free scaling parameter.
By varying this parameter, you may retrieve only sensible pixels alpha=0 , keep all the original
image pixels if there is valuable information in the corners alpha=1 , or get something in between.
When alpha\>0 , the undistorted result is likely to have some black pixels corresponding to
"virtual" pixels outside of the captured distorted image. The original camera matrix, distortion
coefficients, the computed new camera matrix, and newImageSize should be passed to
"virtual" pixels outside of the captured distorted image. The original camera intrinsic matrix, distortion
coefficients, the computed new camera intrinsic matrix, and newImageSize should be passed to
initUndistortRectifyMap to produce the maps for remap .
*/
CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray distCoeffs,
@ -2265,23 +2261,23 @@ CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray
@param[in] R_gripper2base Rotation part extracted from the homogeneous matrix that transforms a point
expressed in the gripper frame to the robot base frame (\f$_{}^{b}\textrm{T}_g\f$).
This is a vector (`vector<Mat>`) that contains the rotation matrices for all the transformations
from gripper frame to robot base frame.
This is a vector (`vector<Mat>`) that contains the rotation, `(3x3)` rotation matrices or `(3x1)` rotation vectors,
for all the transformations from gripper frame to robot base frame.
@param[in] t_gripper2base Translation part extracted from the homogeneous matrix that transforms a point
expressed in the gripper frame to the robot base frame (\f$_{}^{b}\textrm{T}_g\f$).
This is a vector (`vector<Mat>`) that contains the translation vectors for all the transformations
This is a vector (`vector<Mat>`) that contains the `(3x1)` translation vectors for all the transformations
from gripper frame to robot base frame.
@param[in] R_target2cam Rotation part extracted from the homogeneous matrix that transforms a point
expressed in the target frame to the camera frame (\f$_{}^{c}\textrm{T}_t\f$).
This is a vector (`vector<Mat>`) that contains the rotation matrices for all the transformations
from calibration target frame to camera frame.
This is a vector (`vector<Mat>`) that contains the rotation, `(3x3)` rotation matrices or `(3x1)` rotation vectors,
for all the transformations from calibration target frame to camera frame.
@param[in] t_target2cam Rotation part extracted from the homogeneous matrix that transforms a point
expressed in the target frame to the camera frame (\f$_{}^{c}\textrm{T}_t\f$).
This is a vector (`vector<Mat>`) that contains the translation vectors for all the transformations
This is a vector (`vector<Mat>`) that contains the `(3x1)` translation vectors for all the transformations
from calibration target frame to camera frame.
@param[out] R_cam2gripper Estimated rotation part extracted from the homogeneous matrix that transforms a point
@param[out] R_cam2gripper Estimated `(3x3)` rotation part extracted from the homogeneous matrix that transforms a point
expressed in the camera frame to the gripper frame (\f$_{}^{g}\textrm{T}_c\f$).
@param[out] t_cam2gripper Estimated translation part extracted from the homogeneous matrix that transforms a point
@param[out] t_cam2gripper Estimated `(3x1)` translation part extracted from the homogeneous matrix that transforms a point
expressed in the camera frame to the gripper frame (\f$_{}^{g}\textrm{T}_c\f$).
@param[in] method One of the implemented Hand-Eye calibration method, see cv::HandEyeCalibrationMethod
@ -2502,11 +2498,11 @@ CV_EXPORTS_W Mat findFundamentalMat( InputArray points1, InputArray points2,
@param points1 Array of N (N \>= 5) 2D points from the first image. The point coordinates should
be floating-point (single or double precision).
@param points2 Array of the second image points of the same size and format as points1 .
@param cameraMatrix Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ .
Note that this function assumes that points1 and points2 are feature points from cameras with the
same camera matrix. If this assumption does not hold for your use case, use
same camera intrinsic matrix. If this assumption does not hold for your use case, use
`undistortPoints()` with `P = cv::NoArray()` for both cameras to transform image points
to normalized image coordinates, which are valid for the identity camera matrix. When
to normalized image coordinates, which are valid for the identity camera intrinsic matrix. When
passing these coordinates, pass the identity matrix for this parameter.
@param method Method for computing an essential matrix.
- **RANSAC** for the RANSAC algorithm.
@ -2553,10 +2549,10 @@ confidence (probability) that the estimated matrix is correct.
@param mask Output array of N elements, every element of which is set to 0 for outliers and to 1
for the other points. The array is computed only in the RANSAC and LMedS methods.
This function differs from the one above that it computes camera matrix from focal length and
This function differs from the one above that it computes camera intrinsic matrix from focal length and
principal point:
\f[K =
\f[A =
\begin{bmatrix}
f & 0 & x_{pp} \\
0 & f & y_{pp} \\
@ -2653,9 +2649,9 @@ inliers that pass the check.
@param points1 Array of N 2D points from the first image. The point coordinates should be
floating-point (single or double precision).
@param points2 Array of the second image points of the same size and format as points1 .
@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ .
Note that this function assumes that points1 and points2 are feature points from cameras with the
same camera matrix.
same camera intrinsic matrix.
@param R Output rotation matrix. Together with the translation vector, this matrix makes up a tuple
that performs a change of basis from the first camera's coordinate system to the second camera's
coordinate system. Note that, in general, t can not be used for this tuple, see the parameter
@ -2718,7 +2714,7 @@ are feature points from cameras with same focal length and principal point.
inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to
recover pose. In the output mask only inliers which pass the cheirality check.
This function differs from the one above that it computes camera matrix from focal length and
This function differs from the one above that it computes camera intrinsic matrix from focal length and
principal point:
\f[A =
@ -2738,9 +2734,9 @@ CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray point
@param points1 Array of N 2D points from the first image. The point coordinates should be
floating-point (single or double precision).
@param points2 Array of the second image points of the same size and format as points1.
@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ .
Note that this function assumes that points1 and points2 are feature points from cameras with the
same camera matrix.
same camera intrinsic matrix.
@param R Output rotation matrix. Together with the translation vector, this matrix makes up a tuple
that performs a change of basis from the first camera's coordinate system to the second camera's
coordinate system. Note that, in general, t can not be used for this tuple, see the parameter
@ -3150,7 +3146,7 @@ Check @ref tutorial_homography "the corresponding tutorial" for more details.
/** @brief Decompose a homography matrix to rotation(s), translation(s) and plane normal(s).
@param H The input homography matrix between two images.
@param K The input intrinsic camera calibration matrix.
@param K The input camera intrinsic matrix.
@param rotations Array of rotation matrices.
@param translations Array of translation matrices.
@param normals Array of plane normal matrices.
@ -3611,8 +3607,8 @@ namespace fisheye
@param imagePoints Output array of image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, or
vector\<Point2f\>.
@param affine
@param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
@param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
@param alpha The skew coefficient.
@param jacobian Optional output 2Nx15 jacobian matrix of derivatives of image points with respect
to components of the focal lengths, coordinates of the principal point, distortion coefficients,
@ -3635,12 +3631,12 @@ namespace fisheye
@param undistorted Array of object points, 1xN/Nx1 2-channel (or vector\<Point2f\> ), where N is
the number of points in the view.
@param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
@param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
@param alpha The skew coefficient.
@param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\<Point2f\> .
Note that the function assumes the camera matrix of the undistorted points to be identity.
Note that the function assumes the camera intrinsic matrix of the undistorted points to be identity.
This means if you want to transform back points undistorted with undistortPoints() you have to
multiply them with \f$P^{-1}\f$.
*/
@ -3650,11 +3646,11 @@ namespace fisheye
@param distorted Array of object points, 1xN/Nx1 2-channel (or vector\<Point2f\> ), where N is the
number of points in the view.
@param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
@param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
@param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3
1-channel or 1x1 3-channel
@param P New camera matrix (3x3) or new projection matrix (3x4)
@param P New camera intrinsic matrix (3x3) or new projection matrix (3x4)
@param undistorted Output array of image points, 1xN/Nx1 2-channel, or vector\<Point2f\> .
*/
CV_EXPORTS_W void undistortPoints(InputArray distorted, OutputArray undistorted,
@ -3663,11 +3659,11 @@ namespace fisheye
/** @brief Computes undistortion and rectification maps for image transform by cv::remap(). If D is empty zero
distortion is used, if R or P is empty identity matrixes are used.
@param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
@param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
@param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3
1-channel or 1x1 3-channel
@param P New camera matrix (3x3) or new projection matrix (3x4)
@param P New camera intrinsic matrix (3x3) or new projection matrix (3x4)
@param size Undistorted image size.
@param m1type Type of the first output map that can be CV_32FC1 or CV_16SC2 . See convertMaps()
for details.
@ -3681,9 +3677,9 @@ namespace fisheye
@param distorted image with fisheye lens distortion.
@param undistorted Output image with compensated fisheye lens distortion.
@param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
@param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
@param Knew Camera matrix of the distorted image. By default, it is the identity matrix but you
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
@param Knew Camera intrinsic matrix of the distorted image. By default, it is the identity matrix but you
may additionally scale and shift the result by using a different matrix.
@param new_size the new size
@ -3708,14 +3704,14 @@ namespace fisheye
CV_EXPORTS_W void undistortImage(InputArray distorted, OutputArray undistorted,
InputArray K, InputArray D, InputArray Knew = cv::noArray(), const Size& new_size = Size());
/** @brief Estimates new camera matrix for undistortion or rectification.
/** @brief Estimates new camera intrinsic matrix for undistortion or rectification.
@param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
@param image_size Size of the image
@param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
@param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3
1-channel or 1x1 3-channel
@param P New camera matrix (3x3) or new projection matrix (3x4)
@param P New camera intrinsic matrix (3x3) or new projection matrix (3x4)
@param balance Sets the new focal length in range between the min focal length and the max focal
length. Balance is in range of [0, 1].
@param new_size the new size
@ -3731,12 +3727,12 @@ namespace fisheye
@param imagePoints vector of vectors of the projections of calibration pattern points.
imagePoints.size() and objectPoints.size() and imagePoints[i].size() must be equal to
objectPoints[i].size() for each i.
@param image_size Size of the image used only to initialize the intrinsic camera matrix.
@param K Output 3x3 floating-point camera matrix
\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If
@param image_size Size of the image used only to initialize the camera intrinsic matrix.
@param K Output 3x3 floating-point camera intrinsic matrix
\f$\cameramatrix{A}\f$ . If
fisheye::CALIB_USE_INTRINSIC_GUESS/ is specified, some or all of fx, fy, cx, cy must be
initialized before calling the function.
@param D Output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
@param D Output vector of distortion coefficients \f$\distcoeffsfisheye\f$.
@param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view.
That is, each k-th rotation vector together with the corresponding k-th translation vector (see
the next output parameter description) brings the calibration pattern from the model coordinate
@ -3763,9 +3759,9 @@ optimization. It stays at the center or at a different location specified when C
/** @brief Stereo rectification for fisheye camera model
@param K1 First camera matrix.
@param K1 First camera intrinsic matrix.
@param D1 First camera distortion parameters.
@param K2 Second camera matrix.
@param K2 Second camera intrinsic matrix.
@param D2 Second camera distortion parameters.
@param imageSize Size of the image used for stereo calibration.
@param R Rotation matrix between the coordinate systems of the first and the second
@ -3802,15 +3798,15 @@ optimization. It stays at the center or at a different location specified when C
observed by the first camera.
@param imagePoints2 Vector of vectors of the projections of the calibration pattern points,
observed by the second camera.
@param K1 Input/output first camera matrix:
@param K1 Input/output first camera intrinsic matrix:
\f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If
any of fisheye::CALIB_USE_INTRINSIC_GUESS , fisheye::CALIB_FIX_INTRINSIC are specified,
some or all of the matrix components must be initialized.
@param D1 Input/output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$ of 4 elements.
@param K2 Input/output second camera matrix. The parameter is similar to K1 .
@param D1 Input/output vector of distortion coefficients \f$\distcoeffsfisheye\f$ of 4 elements.
@param K2 Input/output second camera intrinsic matrix. The parameter is similar to K1 .
@param D2 Input/output lens distortion coefficients for the second camera. The parameter is
similar to D1 .
@param imageSize Size of the image used only to initialize intrinsic camera matrix.
@param imageSize Size of the image used only to initialize camera intrinsic matrix.
@param R Output rotation matrix between the 1st and the 2nd camera coordinate systems.
@param T Output translation vector between the coordinate systems of the cameras.
@param flags Different flags that may be zero or a combination of the following values:

View File

@ -29,6 +29,7 @@ static Mat homogeneousInverse(const Mat& T)
// q = sin(theta/2) * v
// theta - rotation angle
// v - unit rotation axis, |v| = 1
// Reference: http://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToQuaternion/
static Mat rot2quatMinimal(const Mat& R)
{
CV_Assert(R.type() == CV_64FC1 && R.rows >= 3 && R.cols >= 3);
@ -44,7 +45,7 @@ static Mat rot2quatMinimal(const Mat& R)
qx = (m21 - m12) / S;
qy = (m02 - m20) / S;
qz = (m10 - m01) / S;
} else if ((m00 > m11)&(m00 > m22)) {
} else if (m00 > m11 && m00 > m22) {
double S = sqrt(1.0 + m00 - m11 - m22) * 2; // S=4*qx
qx = 0.25 * S;
qy = (m01 + m10) / S;
@ -98,6 +99,7 @@ static Mat quatMinimal2rot(const Mat& q)
//
// q - 4x1 unit quaternion <qw, qx, qy, qz>
// R - 3x3 rotation matrix
// Reference: http://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToQuaternion/
static Mat rot2quat(const Mat& R)
{
CV_Assert(R.type() == CV_64FC1 && R.rows >= 3 && R.cols >= 3);
@ -114,7 +116,7 @@ static Mat rot2quat(const Mat& R)
qx = (m21 - m12) / S;
qy = (m02 - m20) / S;
qz = (m10 - m01) / S;
} else if ((m00 > m11)&(m00 > m22)) {
} else if (m00 > m11 && m00 > m22) {
double S = sqrt(1.0 + m00 - m11 - m22) * 2; // S=4*qx
qw = (m21 - m12) / S;
qx = 0.25 * S;
@ -572,7 +574,11 @@ static void calibrateHandEyeAndreff(const std::vector<Mat>& Hg, const std::vecto
R = R.reshape(1, 2, newSize);
//Eq 15
double det = determinant(R);
R = pow(sign_double(det) / abs(det), 1.0/3.0) * R;
if (std::fabs(det) < FLT_EPSILON)
{
CV_Error(Error::StsNoConv, "calibrateHandEye() with CALIB_HAND_EYE_ANDREFF method: determinant(R) is null");
}
R = cubeRoot(static_cast<float>(sign_double(det) / abs(det))) * R;
Mat w, u, vt;
SVDecomp(R, w, u, vt);

View File

@ -51,6 +51,8 @@
#include "usac.hpp"
#include <opencv2/core/utils/logger.hpp>
namespace cv
{
#if defined _DEBUG || defined CV_STATIC_ANALYSIS
@ -809,6 +811,15 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints,
vector<Mat> vec_rvecs, vec_tvecs;
if (flags == SOLVEPNP_EPNP || flags == SOLVEPNP_DLS || flags == SOLVEPNP_UPNP)
{
if (flags == SOLVEPNP_DLS)
{
CV_LOG_DEBUG(NULL, "Broken implementation for SOLVEPNP_DLS. Fallback to EPnP.");
}
else if (flags == SOLVEPNP_UPNP)
{
CV_LOG_DEBUG(NULL, "Broken implementation for SOLVEPNP_UPNP. Fallback to EPnP.");
}
Mat undistortedPoints;
undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs);
epnp PnP(cameraMatrix, opoints, undistortedPoints);

View File

@ -7,6 +7,38 @@
namespace opencv_test { namespace {
static std::string getMethodName(HandEyeCalibrationMethod method)
{
std::string method_name = "";
switch (method)
{
case CALIB_HAND_EYE_TSAI:
method_name = "Tsai";
break;
case CALIB_HAND_EYE_PARK:
method_name = "Park";
break;
case CALIB_HAND_EYE_HORAUD:
method_name = "Horaud";
break;
case CALIB_HAND_EYE_ANDREFF:
method_name = "Andreff";
break;
case CALIB_HAND_EYE_DANIILIDIS:
method_name = "Daniilidis";
break;
default:
break;
}
return method_name;
}
class CV_CalibrateHandEyeTest : public cvtest::BaseTest
{
public:
@ -48,7 +80,6 @@ protected:
std::vector<Mat> &R_target2cam, std::vector<Mat> &t_target2cam,
bool noise, Mat& R_cam2gripper, Mat& t_cam2gripper);
Mat homogeneousInverse(const Mat& T);
std::string getMethodName(HandEyeCalibrationMethod method);
double sign_double(double val);
double eps_rvec[5];
@ -340,38 +371,6 @@ Mat CV_CalibrateHandEyeTest::homogeneousInverse(const Mat& T)
return Tinv;
}
std::string CV_CalibrateHandEyeTest::getMethodName(HandEyeCalibrationMethod method)
{
std::string method_name = "";
switch (method)
{
case CALIB_HAND_EYE_TSAI:
method_name = "Tsai";
break;
case CALIB_HAND_EYE_PARK:
method_name = "Park";
break;
case CALIB_HAND_EYE_HORAUD:
method_name = "Horaud";
break;
case CALIB_HAND_EYE_ANDREFF:
method_name = "Andreff";
break;
case CALIB_HAND_EYE_DANIILIDIS:
method_name = "Daniilidis";
break;
default:
break;
}
return method_name;
}
double CV_CalibrateHandEyeTest::sign_double(double val)
{
return (0 < val) - (val < 0);
@ -381,4 +380,86 @@ double CV_CalibrateHandEyeTest::sign_double(double val)
TEST(Calib3d_CalibrateHandEye, regression) { CV_CalibrateHandEyeTest test; test.safe_run(); }
TEST(Calib3d_CalibrateHandEye, regression_17986)
{
const std::string camera_poses_filename = findDataFile("cv/hand_eye_calibration/cali.txt");
const std::string end_effector_poses = findDataFile("cv/hand_eye_calibration/robot_cali.txt");
std::vector<Mat> R_target2cam;
std::vector<Mat> t_target2cam;
// Parse camera poses
{
std::ifstream file(camera_poses_filename.c_str());
ASSERT_TRUE(file.is_open());
int ndata = 0;
file >> ndata;
R_target2cam.reserve(ndata);
t_target2cam.reserve(ndata);
std::string image_name;
Matx33d cameraMatrix;
Matx33d R;
Matx31d t;
Matx16d distCoeffs;
Matx13d distCoeffs2;
while (file >> image_name >>
cameraMatrix(0,0) >> cameraMatrix(0,1) >> cameraMatrix(0,2) >>
cameraMatrix(1,0) >> cameraMatrix(1,1) >> cameraMatrix(1,2) >>
cameraMatrix(2,0) >> cameraMatrix(2,1) >> cameraMatrix(2,2) >>
R(0,0) >> R(0,1) >> R(0,2) >>
R(1,0) >> R(1,1) >> R(1,2) >>
R(2,0) >> R(2,1) >> R(2,2) >>
t(0) >> t(1) >> t(2) >>
distCoeffs(0) >> distCoeffs(1) >> distCoeffs(2) >> distCoeffs(3) >> distCoeffs(4) >>
distCoeffs2(0) >> distCoeffs2(1) >> distCoeffs2(2)) {
R_target2cam.push_back(Mat(R));
t_target2cam.push_back(Mat(t));
}
}
std::vector<Mat> R_gripper2base;
std::vector<Mat> t_gripper2base;
// Parse end-effector poses
{
std::ifstream file(end_effector_poses.c_str());
ASSERT_TRUE(file.is_open());
int ndata = 0;
file >> ndata;
R_gripper2base.reserve(ndata);
t_gripper2base.reserve(ndata);
Matx33d R;
Matx31d t;
Matx14d last_row;
while (file >>
R(0,0) >> R(0,1) >> R(0,2) >> t(0) >>
R(1,0) >> R(1,1) >> R(1,2) >> t(1) >>
R(2,0) >> R(2,1) >> R(2,2) >> t(2) >>
last_row(0) >> last_row(1) >> last_row(2) >> last_row(3)) {
R_gripper2base.push_back(Mat(R));
t_gripper2base.push_back(Mat(t));
}
}
std::vector<HandEyeCalibrationMethod> methods;
methods.push_back(CALIB_HAND_EYE_TSAI);
methods.push_back(CALIB_HAND_EYE_PARK);
methods.push_back(CALIB_HAND_EYE_HORAUD);
methods.push_back(CALIB_HAND_EYE_ANDREFF);
methods.push_back(CALIB_HAND_EYE_DANIILIDIS);
for (size_t idx = 0; idx < methods.size(); idx++) {
SCOPED_TRACE(cv::format("method=%s", getMethodName(methods[idx]).c_str()));
Matx33d R_cam2gripper_est;
Matx31d t_cam2gripper_est;
calibrateHandEye(R_gripper2base, t_gripper2base, R_target2cam, t_target2cam, R_cam2gripper_est, t_cam2gripper_est, methods[idx]);
EXPECT_TRUE(checkRange(R_cam2gripper_est));
EXPECT_TRUE(checkRange(t_cam2gripper_est));
}
}
}} // namespace

View File

@ -16,6 +16,7 @@ Implementation of Scale layer.
#include "../op_inf_engine.hpp"
#include "../ie_ngraph.hpp"
#include <opencv2/imgproc.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#ifdef HAVE_CUDA
@ -389,7 +390,7 @@ public:
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert_N(inputs.size() == 1, blobs.size() == 3);
CV_Assert_N(blobs[0].total() == 1, blobs[1].total() == total(inputs[0], 1),
CV_Assert_N(blobs[0].total() == 1,
blobs[2].total() == inputs[0][1]);
outputs.assign(1, inputs[0]);
@ -412,15 +413,20 @@ public:
float* outData = outputs[0].ptr<float>();
Mat data_mean_cpu = blobs[1].clone();
Mat mean_resize = Mat(inputs[0].size[3], inputs[0].size[2], CV_32FC3);
Mat mean_3d = Mat(data_mean_cpu.size[3], data_mean_cpu.size[2], CV_32FC3, data_mean_cpu.ptr<float>(0));
resize(mean_3d, mean_resize, Size(inputs[0].size[3], inputs[0].size[2]));
int new_size[] = {1, mean_resize.channels(), mean_resize.cols, mean_resize.rows};
Mat data_mean_cpu_resize = mean_resize.reshape(1, *new_size);
Mat data_mean_per_channel_cpu = blobs[2].clone();
const int numWeights = data_mean_cpu.total();
const int numWeights = data_mean_cpu_resize.total();
CV_Assert(numWeights != 0);
++num_iter;
if (num_iter <= recompute_mean)
{
data_mean_cpu *= (num_iter - 1);
data_mean_cpu_resize *= (num_iter - 1);
const int batch = inputs[0].size[0];
float alpha = 1.0 / batch;
@ -429,15 +435,15 @@ public:
Mat inpSlice(1, numWeights, CV_32F, inpData);
inpSlice = alpha * inpSlice;
add(data_mean_cpu.reshape(1, 1), inpSlice, data_mean_cpu.reshape(1, 1));
add(data_mean_cpu_resize.reshape(1, 1), inpSlice, data_mean_cpu_resize.reshape(1, 1));
inpData += numWeights;
}
data_mean_cpu *= (1.0 / num_iter);
data_mean_cpu_resize *= (1.0 / num_iter);
int newsize[] = {blobs[1].size[1], (int)blobs[1].total(2)};
reduce(data_mean_cpu.reshape(1, 2, &newsize[0]), data_mean_per_channel_cpu, 1, REDUCE_SUM, CV_32F);
int newsize[] = {inputs[0].size[1], (int)inputs[0].total(2)};
reduce(data_mean_cpu_resize.reshape(1, 2, &newsize[0]), data_mean_per_channel_cpu, 1, REDUCE_SUM, CV_32F);
int area = blobs[1].total(2);
int area = inputs[0].total(2);
data_mean_per_channel_cpu *= (1.0 / area);
}
@ -452,7 +458,7 @@ public:
Mat inpSlice(1, numWeights, CV_32F, inpData);
Mat outSlice(1, numWeights, CV_32F, outData);
add(inpSlice, (-1) * data_mean_cpu, outSlice);
add(inpSlice, (-1) * data_mean_cpu_resize, outSlice);
inpData += numWeights;
outData += numWeights;
}

View File

@ -712,6 +712,19 @@ void ONNXImporter::populateNet(Net dstNet)
layerParams.set("bias_term", true);
}
}
else if (layer_type == "Pow")
{
if (layer_id.find(node_proto.input(1)) != layer_id.end())
CV_Error(Error::StsNotImplemented, "Unsupported Pow op with variable power");
Mat blob = getBlob(node_proto, constBlobs, 1);
if (blob.total() != 1)
CV_Error(Error::StsNotImplemented, "Pow op supports only scalar power");
blob.convertTo(blob, CV_32F);
layerParams.type = "Power";
layerParams.set("power", blob.at<float>(0));
}
else if (layer_type == "Max")
{
layerParams.type = "Eltwise";
@ -933,6 +946,19 @@ void ONNXImporter::populateNet(Net dstNet)
Mat bias = getBlob(node_proto, constBlobs, 2);
layerParams.blobs.push_back(bias);
}
if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
{
Mat inputBuf = getBlob(node_proto, constBlobs, 0);
LayerParams constParams;
constParams.name = node_proto.input(0);
constParams.type = "Const";
constParams.blobs.push_back(inputBuf);
opencv_onnx::NodeProto proto;
proto.add_output(constParams.name);
addLayer(dstNet, constParams, proto, layer_id, outShapes);
}
layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]);
layerParams.set("bias_term", node_proto.input_size() == 3);

View File

@ -657,6 +657,8 @@ TEST_P(Test_Caffe_layers, DataAugmentation)
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
testLayerUsingCaffeModels("data_augmentation", true, false);
testLayerUsingCaffeModels("data_augmentation_2x1", true, false);
testLayerUsingCaffeModels("data_augmentation_8x6", true, false);
}
TEST_P(Test_Caffe_layers, Resample)

View File

@ -301,6 +301,11 @@ TEST_P(Test_ONNX_layers, Cast)
testONNXModels("cast");
}
TEST_P(Test_ONNX_layers, Power)
{
testONNXModels("pow2", npy, 0, 0, false, false);
}
TEST_P(Test_ONNX_layers, Concatenation)
{
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
@ -639,6 +644,26 @@ TEST_P(Test_ONNX_layers, Pad2d_Unfused)
testONNXModels("ZeroPad2d");
}
TEST_P(Test_ONNX_layers, LinearWithConstant)
{
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
#endif
testONNXModels("lin_with_constant");
}
TEST_P(Test_ONNX_layers, MatmulWithTwoInputs)
{
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
#endif
testONNXModels("matmul_with_two_inputs");
}
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());
class Test_ONNX_nets : public Test_ONNX_layers

View File

@ -5,7 +5,7 @@ Original paper: https://arxiv.org/abs/1612.01925.
Original repo: https://github.com/lmb-freiburg/flownet2.
Download the converted .caffemodel model from https://drive.google.com/open?id=16qvE9VNmU39NttpZwZs81Ga8VYQJDaWZ
and .prototxt from https://drive.google.com/open?id=19bo6SWU2p8ZKvjXqMKiCPdK8mghwDy9b.
and .prototxt from https://drive.google.com/file/d/1RyNIUsan1ZOh2hpYIH36A-jofAvJlT6a/view?usp=sharing.
Otherwise download original model from https://lmb.informatik.uni-freiburg.de/resources/binaries/flownet2/flownet2-models.tar.gz,
convert .h5 model to .caffemodel and modify original .prototxt using .prototxt from link above.
'''
@ -18,7 +18,7 @@ import cv2 as cv
class OpticalFlow(object):
def __init__(self, proto, model, height, width):
self.net = cv.dnn.readNet(proto, model)
self.net = cv.dnn.readNetFromCaffe(proto, model)
self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
self.height = height
self.width = width
@ -62,9 +62,9 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Use this script to calculate optical flow using FlowNetv2',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-input', '-i', required=True, help='Path to input video file. Skip this argument to capture frames from a camera.')
parser.add_argument('--height', default=320, help='Input height')
parser.add_argument('--width', default=448, help='Input width')
parser.add_argument('--proto', '-p', default='FlowNet2_deploy.prototxt', help='Path to prototxt.')
parser.add_argument('--height', default=320, type=int, help='Input height')
parser.add_argument('--width', default=448, type=int, help='Input width')
parser.add_argument('--proto', '-p', default='FlowNet2_deploy_anysize.prototxt', help='Path to prototxt.')
parser.add_argument('--model', '-m', default='FlowNet2_weights.caffemodel', help='Path to caffemodel.')
args, _ = parser.parse_known_args()
@ -75,7 +75,25 @@ if __name__ == '__main__':
cv.namedWindow(winName, cv.WINDOW_NORMAL)
cap = cv.VideoCapture(args.input if args.input else 0)
hasFrame, first_frame = cap.read()
opt_flow = OpticalFlow(args.proto, args.model, args.height, args.width)
divisor = 64.
var = {}
var['ADAPTED_WIDTH'] = int(np.ceil(args.width/divisor) * divisor)
var['ADAPTED_HEIGHT'] = int(np.ceil(args.height/divisor) * divisor)
var['SCALE_WIDTH'] = args.width / float(var['ADAPTED_WIDTH'])
var['SCALE_HEIGHT'] = args.height / float(var['ADAPTED_HEIGHT'])
config = ''
proto = open(args.proto).readlines()
for line in proto:
for key, value in var.items():
tag = "$%s$" % key
line = line.replace(tag, str(value))
config += line
caffemodel = open(args.model, 'rb').read()
opt_flow = OpticalFlow(bytearray(config.encode()), caffemodel, var['ADAPTED_HEIGHT'], var['ADAPTED_WIDTH'])
while cv.waitKey(1) < 0:
hasFrame, second_frame = cap.read()
if not hasFrame:

397
samples/dnn/siamrpnpp.py Normal file
View File

@ -0,0 +1,397 @@
import argparse
import cv2 as cv
import numpy as np
import os
"""
Link to original paper : https://arxiv.org/abs/1812.11703
Link to original repo : https://github.com/STVIR/pysot
You can download the pre-trained weights of the Tracker Model from https://drive.google.com/file/d/11bwgPFVkps9AH2NOD1zBDdpF_tQghAB-/view?usp=sharing
You can download the target net (target branch of SiamRPN++) from https://drive.google.com/file/d/1dw_Ne3UMcCnFsaD6xkZepwE4GEpqq7U_/view?usp=sharing
You can download the search net (search branch of SiamRPN++) from https://drive.google.com/file/d/1Lt4oE43ZSucJvze3Y-Z87CVDreO-Afwl/view?usp=sharing
You can download the head model (RPN Head) from https://drive.google.com/file/d/1zT1yu12mtj3JQEkkfKFJWiZ71fJ-dQTi/view?usp=sharing
"""
class ModelBuilder():
""" This class generates the SiamRPN++ Tracker Model by using Imported ONNX Nets
"""
def __init__(self, target_net, search_net, rpn_head):
super(ModelBuilder, self).__init__()
# Build the target branch
self.target_net = target_net
# Build the search branch
self.search_net = search_net
# Build RPN_Head
self.rpn_head = rpn_head
def template(self, z):
""" Takes the template of size (1, 1, 127, 127) as an input to generate kernel
"""
self.target_net.setInput(z)
outNames = self.target_net.getUnconnectedOutLayersNames()
self.zfs_1, self.zfs_2, self.zfs_3 = self.target_net.forward(outNames)
def track(self, x):
""" Takes the search of size (1, 1, 255, 255) as an input to generate classification score and bounding box regression
"""
self.search_net.setInput(x)
outNames = self.search_net.getUnconnectedOutLayersNames()
xfs_1, xfs_2, xfs_3 = self.search_net.forward(outNames)
self.rpn_head.setInput(np.stack([self.zfs_1, self.zfs_2, self.zfs_3]), 'input_1')
self.rpn_head.setInput(np.stack([xfs_1, xfs_2, xfs_3]), 'input_2')
outNames = self.rpn_head.getUnconnectedOutLayersNames()
cls, loc = self.rpn_head.forward(outNames)
return {'cls': cls, 'loc': loc}
class Anchors:
""" This class generate anchors.
"""
def __init__(self, stride, ratios, scales, image_center=0, size=0):
self.stride = stride
self.ratios = ratios
self.scales = scales
self.image_center = image_center
self.size = size
self.anchor_num = len(self.scales) * len(self.ratios)
self.anchors = self.generate_anchors()
def generate_anchors(self):
"""
generate anchors based on predefined configuration
"""
anchors = np.zeros((self.anchor_num, 4), dtype=np.float32)
size = self.stride**2
count = 0
for r in self.ratios:
ws = int(np.sqrt(size * 1. / r))
hs = int(ws * r)
for s in self.scales:
w = ws * s
h = hs * s
anchors[count][:] = [-w * 0.5, -h * 0.5, w * 0.5, h * 0.5][:]
count += 1
return anchors
class SiamRPNTracker:
def __init__(self, model):
super(SiamRPNTracker, self).__init__()
self.anchor_stride = 8
self.anchor_ratios = [0.33, 0.5, 1, 2, 3]
self.anchor_scales = [8]
self.track_base_size = 8
self.track_context_amount = 0.5
self.track_exemplar_size = 127
self.track_instance_size = 255
self.track_lr = 0.4
self.track_penalty_k = 0.04
self.track_window_influence = 0.44
self.score_size = (self.track_instance_size - self.track_exemplar_size) // \
self.anchor_stride + 1 + self.track_base_size
self.anchor_num = len(self.anchor_ratios) * len(self.anchor_scales)
hanning = np.hanning(self.score_size)
window = np.outer(hanning, hanning)
self.window = np.tile(window.flatten(), self.anchor_num)
self.anchors = self.generate_anchor(self.score_size)
self.model = model
def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans):
"""
Args:
im: bgr based input image frame
pos: position of the center of the frame
model_sz: exemplar / target image size
s_z: original / search image size
avg_chans: channel average
Return:
im_patch: sub_windows for the given image input
"""
if isinstance(pos, float):
pos = [pos, pos]
sz = original_sz
im_h, im_w, im_d = im.shape
c = (original_sz + 1) / 2
cx, cy = pos
context_xmin = np.floor(cx - c + 0.5)
context_xmax = context_xmin + sz - 1
context_ymin = np.floor(cy - c + 0.5)
context_ymax = context_ymin + sz - 1
left_pad = int(max(0., -context_xmin))
top_pad = int(max(0., -context_ymin))
right_pad = int(max(0., context_xmax - im_w + 1))
bottom_pad = int(max(0., context_ymax - im_h + 1))
context_xmin += left_pad
context_xmax += left_pad
context_ymin += top_pad
context_ymax += top_pad
if any([top_pad, bottom_pad, left_pad, right_pad]):
size = (im_h + top_pad + bottom_pad, im_w + left_pad + right_pad, im_d)
te_im = np.zeros(size, np.uint8)
te_im[top_pad:top_pad + im_h, left_pad:left_pad + im_w, :] = im
if top_pad:
te_im[0:top_pad, left_pad:left_pad + im_w, :] = avg_chans
if bottom_pad:
te_im[im_h + top_pad:, left_pad:left_pad + im_w, :] = avg_chans
if left_pad:
te_im[:, 0:left_pad, :] = avg_chans
if right_pad:
te_im[:, im_w + left_pad:, :] = avg_chans
im_patch = te_im[int(context_ymin):int(context_ymax + 1),
int(context_xmin):int(context_xmax + 1), :]
else:
im_patch = im[int(context_ymin):int(context_ymax + 1),
int(context_xmin):int(context_xmax + 1), :]
if not np.array_equal(model_sz, original_sz):
im_patch = cv.resize(im_patch, (model_sz, model_sz))
im_patch = im_patch.transpose(2, 0, 1)
im_patch = im_patch[np.newaxis, :, :, :]
im_patch = im_patch.astype(np.float32)
return im_patch
def generate_anchor(self, score_size):
"""
Args:
im: bgr based input image frame
pos: position of the center of the frame
model_sz: exemplar / target image size
s_z: original / search image size
avg_chans: channel average
Return:
anchor: anchors for pre-determined values of stride, ratio, and scale
"""
anchors = Anchors(self.anchor_stride, self.anchor_ratios, self.anchor_scales)
anchor = anchors.anchors
x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3]
anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1)
total_stride = anchors.stride
anchor_num = anchors.anchor_num
anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
ori = - (score_size // 2) * total_stride
xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)],
[ori + total_stride * dy for dy in range(score_size)])
xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
np.tile(yy.flatten(), (anchor_num, 1)).flatten()
anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
return anchor
def _convert_bbox(self, delta, anchor):
"""
Args:
delta: localisation
anchor: anchor of pre-determined anchor size
Return:
delta: prediction of bounding box
"""
delta_transpose = np.transpose(delta, (1, 2, 3, 0))
delta_contig = np.ascontiguousarray(delta_transpose)
delta = delta_contig.reshape(4, -1)
delta[0, :] = delta[0, :] * anchor[:, 2] + anchor[:, 0]
delta[1, :] = delta[1, :] * anchor[:, 3] + anchor[:, 1]
delta[2, :] = np.exp(delta[2, :]) * anchor[:, 2]
delta[3, :] = np.exp(delta[3, :]) * anchor[:, 3]
return delta
def _softmax(self, x):
"""
Softmax in the direction of the depth of the layer
"""
x = x.astype(dtype=np.float32)
x_max = x.max(axis=1)[:, np.newaxis]
e_x = np.exp(x-x_max)
div = np.sum(e_x, axis=1)[:, np.newaxis]
y = e_x / div
return y
def _convert_score(self, score):
"""
Args:
cls: score
Return:
cls: score for cls
"""
score_transpose = np.transpose(score, (1, 2, 3, 0))
score_con = np.ascontiguousarray(score_transpose)
score_view = score_con.reshape(2, -1)
score = np.transpose(score_view, (1, 0))
score = self._softmax(score)
return score[:,1]
def _bbox_clip(self, cx, cy, width, height, boundary):
"""
Adjusting the bounding box
"""
bbox_h, bbox_w = boundary
cx = max(0, min(cx, bbox_w))
cy = max(0, min(cy, bbox_h))
width = max(10, min(width, bbox_w))
height = max(10, min(height, bbox_h))
return cx, cy, width, height
def init(self, img, bbox):
"""
Args:
img(np.ndarray): bgr based input image frame
bbox: (x,y,w,h): bounding box
"""
x,y,h,w = bbox
self.center_pos = np.array([x + (h - 1) / 2, y + (w - 1) / 2])
self.h = h
self.w = w
w_z = self.w + self.track_context_amount * np.add(h, w)
h_z = self.h + self.track_context_amount * np.add(h, w)
s_z = round(np.sqrt(w_z * h_z))
self.channel_average = np.mean(img, axis=(0, 1))
z_crop = self.get_subwindow(img, self.center_pos, self.track_exemplar_size, s_z, self.channel_average)
self.model.template(z_crop)
def track(self, img):
"""
Args:
img(np.ndarray): BGR image
Return:
bbox(list):[x, y, width, height]
"""
w_z = self.w + self.track_context_amount * np.add(self.w, self.h)
h_z = self.h + self.track_context_amount * np.add(self.w, self.h)
s_z = np.sqrt(w_z * h_z)
scale_z = self.track_exemplar_size / s_z
s_x = s_z * (self.track_instance_size / self.track_exemplar_size)
x_crop = self.get_subwindow(img, self.center_pos, self.track_instance_size, round(s_x), self.channel_average)
outputs = self.model.track(x_crop)
score = self._convert_score(outputs['cls'])
pred_bbox = self._convert_bbox(outputs['loc'], self.anchors)
def change(r):
return np.maximum(r, 1. / r)
def sz(w, h):
pad = (w + h) * 0.5
return np.sqrt((w + pad) * (h + pad))
# scale penalty
s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) /
(sz(self.w * scale_z, self.h * scale_z)))
# aspect ratio penalty
r_c = change((self.w / self.h) /
(pred_bbox[2, :] / pred_bbox[3, :]))
penalty = np.exp(-(r_c * s_c - 1) * self.track_penalty_k)
pscore = penalty * score
# window penalty
pscore = pscore * (1 - self.track_window_influence) + \
self.window * self.track_window_influence
best_idx = np.argmax(pscore)
bbox = pred_bbox[:, best_idx] / scale_z
lr = penalty[best_idx] * score[best_idx] * self.track_lr
cpx, cpy = self.center_pos
x,y,w,h = bbox
cx = x + cpx
cy = y + cpy
# smooth bbox
width = self.w * (1 - lr) + w * lr
height = self.h * (1 - lr) + h * lr
# clip boundary
cx, cy, width, height = self._bbox_clip(cx, cy, width, height, img.shape[:2])
# udpate state
self.center_pos = np.array([cx, cy])
self.w = width
self.h = height
bbox = [cx - width / 2, cy - height / 2, width, height]
best_score = score[best_idx]
return {'bbox': bbox, 'best_score': best_score}
def get_frames(video_name):
"""
Args:
Path to input video frame
Return:
Frame
"""
cap = cv.VideoCapture(video_name if video_name else 0)
while True:
ret, frame = cap.read()
if ret:
yield frame
else:
break
def main():
""" Sample SiamRPN Tracker
"""
# Computation backends supported by layers
backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
# Target Devices for computation
targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
parser = argparse.ArgumentParser(description='Use this script to run SiamRPN++ Visual Tracker',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--input_video', type=str, help='Path to input video file. Skip this argument to capture frames from a camera.')
parser.add_argument('--target_net', type=str, default='target_net.onnx', help='Path to part of SiamRPN++ ran on target frame.')
parser.add_argument('--search_net', type=str, default='search_net.onnx', help='Path to part of SiamRPN++ ran on search frame.')
parser.add_argument('--rpn_head', type=str, default='rpn_head.onnx', help='Path to RPN Head ONNX model.')
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
help='Select a computation backend: '
"%d: automatically (by default) "
"%d: Halide"
"%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit)"
"%d: OpenCV Implementation" % backends)
parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
help='Select a target device: '
"%d: CPU target (by default)"
"%d: OpenCL"
"%d: OpenCL FP16"
"%d: Myriad" % targets)
args, _ = parser.parse_known_args()
if args.input_video and not os.path.isfile(args.input_video):
raise OSError("Input video file does not exist")
if not os.path.isfile(args.target_net):
raise OSError("Target Net does not exist")
if not os.path.isfile(args.search_net):
raise OSError("Search Net does not exist")
if not os.path.isfile(args.rpn_head):
raise OSError("RPN Head Net does not exist")
#Load the Networks
target_net = cv.dnn.readNetFromONNX(args.target_net)
target_net.setPreferableBackend(args.backend)
target_net.setPreferableTarget(args.target)
search_net = cv.dnn.readNetFromONNX(args.search_net)
search_net.setPreferableBackend(args.backend)
search_net.setPreferableTarget(args.target)
rpn_head = cv.dnn.readNetFromONNX(args.rpn_head)
rpn_head.setPreferableBackend(args.backend)
rpn_head.setPreferableTarget(args.target)
model = ModelBuilder(target_net, search_net, rpn_head)
tracker = SiamRPNTracker(model)
first_frame = True
cv.namedWindow('SiamRPN++ Tracker', cv.WINDOW_AUTOSIZE)
for frame in get_frames(args.input_video):
if first_frame:
try:
init_rect = cv.selectROI('SiamRPN++ Tracker', frame, False, False)
except:
exit()
tracker.init(frame, init_rect)
first_frame = False
else:
outputs = tracker.track(frame)
bbox = list(map(int, outputs['bbox']))
x,y,w,h = bbox
cv.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
cv.imshow('SiamRPN++ Tracker', frame)
key = cv.waitKey(1)
if key == ord("q"):
break
if __name__ == '__main__':
main()