mirror of
https://github.com/opencv/opencv.git
synced 2024-11-25 03:30:34 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
ba147d2be2
@ -9,6 +9,9 @@ MathJax.Hub.Config(
|
||||
forkfour: ["\\left\\{ \\begin{array}{l l} #1 & \\mbox{#2}\\\\ #3 & \\mbox{#4}\\\\ #5 & \\mbox{#6}\\\\ #7 & \\mbox{#8}\\\\ \\end{array} \\right.", 8],
|
||||
vecthree: ["\\begin{bmatrix} #1\\\\ #2\\\\ #3 \\end{bmatrix}", 3],
|
||||
vecthreethree: ["\\begin{bmatrix} #1 & #2 & #3\\\\ #4 & #5 & #6\\\\ #7 & #8 & #9 \\end{bmatrix}", 9],
|
||||
cameramatrix: ["#1 = \\begin{bmatrix} f_x & 0 & c_x\\\\ 0 & f_y & c_y\\\\ 0 & 0 & 1 \\end{bmatrix}", 1],
|
||||
distcoeffs: ["(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \\tau_x, \\tau_y]]]]) \\text{ of 4, 5, 8, 12 or 14 elements}"],
|
||||
distcoeffsfisheye: ["(k_1, k_2, k_3, k_4)"],
|
||||
hdotsfor: ["\\dots", 1],
|
||||
mathbbm: ["\\mathbb{#1}", 1],
|
||||
bordermatrix: ["\\matrix{#1}", 1]
|
||||
|
@ -51,3 +51,20 @@
|
||||
#7 & #8 & #9
|
||||
\end{bmatrix}
|
||||
}
|
||||
|
||||
\newcommand{\cameramatrix}[1]{
|
||||
#1 =
|
||||
\begin{bmatrix}
|
||||
f_x & 0 & c_x\\
|
||||
0 & f_y & c_y\\
|
||||
0 & 0 & 1
|
||||
\end{bmatrix}
|
||||
}
|
||||
|
||||
\newcommand{\distcoeffs}[]{
|
||||
(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]]) \text{ of 4, 5, 8, 12 or 14 elements}
|
||||
}
|
||||
|
||||
\newcommand{\distcoeffsfisheye}[]{
|
||||
(k_1, k_2, k_3, k_4)
|
||||
}
|
||||
|
@ -36,18 +36,27 @@ class PatternMaker:
|
||||
def make_circles_pattern(self):
|
||||
spacing = self.square_size
|
||||
r = spacing / self.radius_rate
|
||||
for x in range(1, self.cols + 1):
|
||||
for y in range(1, self.rows + 1):
|
||||
dot = SVG("circle", cx=x * spacing, cy=y * spacing, r=r, fill="black", stroke="none")
|
||||
pattern_width = ((self.cols - 1.0) * spacing) + (2.0 * r)
|
||||
pattern_height = ((self.rows - 1.0) * spacing) + (2.0 * r)
|
||||
x_spacing = (self.width - pattern_width) / 2.0
|
||||
y_spacing = (self.height - pattern_height) / 2.0
|
||||
for x in range(0, self.cols):
|
||||
for y in range(0, self.rows):
|
||||
dot = SVG("circle", cx=(x * spacing) + x_spacing + r,
|
||||
cy=(y * spacing) + y_spacing + r, r=r, fill="black", stroke="none")
|
||||
self.g.append(dot)
|
||||
|
||||
def make_acircles_pattern(self):
|
||||
spacing = self.square_size
|
||||
r = spacing / self.radius_rate
|
||||
for i in range(0, self.rows):
|
||||
for j in range(0, self.cols):
|
||||
dot = SVG("circle", cx=((j * 2 + i % 2) * spacing) + spacing, cy=self.height - (i * spacing + spacing),
|
||||
r=r, fill="black", stroke="none")
|
||||
pattern_width = ((self.cols-1.0) * 2 * spacing) + spacing + (2.0 * r)
|
||||
pattern_height = ((self.rows-1.0) * spacing) + (2.0 * r)
|
||||
x_spacing = (self.width - pattern_width) / 2.0
|
||||
y_spacing = (self.height - pattern_height) / 2.0
|
||||
for x in range(0, self.cols):
|
||||
for y in range(0, self.rows):
|
||||
dot = SVG("circle", cx=(2 * x * spacing) + (y % 2)*spacing + x_spacing + r,
|
||||
cy=(y * spacing) + y_spacing + r, r=r, fill="black", stroke="none")
|
||||
self.g.append(dot)
|
||||
|
||||
def make_checkerboard_pattern(self):
|
||||
@ -84,9 +93,9 @@ def main():
|
||||
parser.add_argument("-R", "--radius_rate", help="circles_radius = square_size/radius_rate", default="5.0",
|
||||
action="store", dest="radius_rate", type=float)
|
||||
parser.add_argument("-w", "--page_width", help="page width in units", default="216", action="store",
|
||||
dest="page_width", type=int)
|
||||
dest="page_width", type=float)
|
||||
parser.add_argument("-h", "--page_height", help="page height in units", default="279", action="store",
|
||||
dest="page_width", type=int)
|
||||
dest="page_width", type=float)
|
||||
parser.add_argument("-a", "--page_size", help="page size, supersedes -h -w arguments", default="A4", action="store",
|
||||
dest="page_size", choices=["A0", "A1", "A2", "A3", "A4", "A5"])
|
||||
args = parser.parse_args()
|
||||
|
@ -64,17 +64,17 @@ The distortion-free projective transformation given by a pinhole camera model i
|
||||
\f[s \; p = A \begin{bmatrix} R|t \end{bmatrix} P_w,\f]
|
||||
|
||||
where \f$P_w\f$ is a 3D point expressed with respect to the world coordinate system,
|
||||
\f$p\f$ is a 2D pixel in the image plane, \f$A\f$ is the intrinsic camera matrix,
|
||||
\f$p\f$ is a 2D pixel in the image plane, \f$A\f$ is the camera intrinsic matrix,
|
||||
\f$R\f$ and \f$t\f$ are the rotation and translation that describe the change of coordinates from
|
||||
world to camera coordinate systems (or camera frame) and \f$s\f$ is the projective transformation's
|
||||
arbitrary scaling and not part of the camera model.
|
||||
|
||||
The intrinsic camera matrix \f$A\f$ (notation used as in @cite Zhang2000 and also generally notated
|
||||
The camera intrinsic matrix \f$A\f$ (notation used as in @cite Zhang2000 and also generally notated
|
||||
as \f$K\f$) projects 3D points given in the camera coordinate system to 2D pixel coordinates, i.e.
|
||||
|
||||
\f[p = A P_c.\f]
|
||||
|
||||
The camera matrix \f$A\f$ is composed of the focal lengths \f$f_x\f$ and \f$f_y\f$, which are
|
||||
The camera intrinsic matrix \f$A\f$ is composed of the focal lengths \f$f_x\f$ and \f$f_y\f$, which are
|
||||
expressed in pixel units, and the principal point \f$(c_x, c_y)\f$, that is usually close to the
|
||||
image center:
|
||||
|
||||
@ -382,9 +382,9 @@ R & t \\
|
||||
\end{bmatrix} P_{h_0}.\f]
|
||||
|
||||
@note
|
||||
- Many functions in this module take a camera matrix as an input parameter. Although all
|
||||
- Many functions in this module take a camera intrinsic matrix as an input parameter. Although all
|
||||
functions assume the same structure of this parameter, they may name it differently. The
|
||||
parameter's description, however, will be clear in that a camera matrix with the structure
|
||||
parameter's description, however, will be clear in that a camera intrinsic matrix with the structure
|
||||
shown above is required.
|
||||
- A calibration sample for 3 cameras in a horizontal position can be found at
|
||||
opencv_source_code/samples/cpp/3calibration.cpp
|
||||
@ -457,8 +457,10 @@ enum SolvePnPMethod {
|
||||
SOLVEPNP_ITERATIVE = 0,
|
||||
SOLVEPNP_EPNP = 1, //!< EPnP: Efficient Perspective-n-Point Camera Pose Estimation @cite lepetit2009epnp
|
||||
SOLVEPNP_P3P = 2, //!< Complete Solution Classification for the Perspective-Three-Point Problem @cite gao2003complete
|
||||
SOLVEPNP_DLS = 3, //!< A Direct Least-Squares (DLS) Method for PnP @cite hesch2011direct
|
||||
SOLVEPNP_UPNP = 4, //!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive
|
||||
SOLVEPNP_DLS = 3, //!< **Broken implementation. Using this flag will fallback to EPnP.** \n
|
||||
//!< A Direct Least-Squares (DLS) Method for PnP @cite hesch2011direct
|
||||
SOLVEPNP_UPNP = 4, //!< **Broken implementation. Using this flag will fallback to EPnP.** \n
|
||||
//!< Exhaustive Linearization for Robust Camera Pose and Focal Length Estimation @cite penate2013exhaustive
|
||||
SOLVEPNP_AP3P = 5, //!< An Efficient Algebraic Solution to the Perspective-Three-Point Problem @cite Ke17
|
||||
SOLVEPNP_IPPE = 6, //!< Infinitesimal Plane-Based Pose Estimation @cite Collins14 \n
|
||||
//!< Object points must be coplanar.
|
||||
@ -752,10 +754,10 @@ CV_EXPORTS_W Vec3d RQDecomp3x3( InputArray src, OutputArray mtxR, OutputArray mt
|
||||
OutputArray Qy = noArray(),
|
||||
OutputArray Qz = noArray());
|
||||
|
||||
/** @brief Decomposes a projection matrix into a rotation matrix and a camera matrix.
|
||||
/** @brief Decomposes a projection matrix into a rotation matrix and a camera intrinsic matrix.
|
||||
|
||||
@param projMatrix 3x4 input projection matrix P.
|
||||
@param cameraMatrix Output 3x3 camera matrix K.
|
||||
@param cameraMatrix Output 3x3 camera intrinsic matrix \f$\cameramatrix{A}\f$.
|
||||
@param rotMatrix Output 3x3 external rotation matrix R.
|
||||
@param transVect Output 4x1 translation vector T.
|
||||
@param rotMatrixX Optional 3x3 rotation matrix around x-axis.
|
||||
@ -840,10 +842,9 @@ CV_EXPORTS_W void composeRT( InputArray rvec1, InputArray tvec1,
|
||||
@param rvec The rotation vector (@ref Rodrigues) that, together with tvec, performs a change of
|
||||
basis from world to camera coordinate system, see @ref calibrateCamera for details.
|
||||
@param tvec The translation vector, see parameter description above.
|
||||
@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$ .
|
||||
@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ .
|
||||
@param distCoeffs Input vector of distortion coefficients
|
||||
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
|
||||
4, 5, 8, 12 or 14 elements. If the vector is empty, the zero distortion coefficients are assumed.
|
||||
\f$\distcoeffs\f$ . If the vector is empty, the zero distortion coefficients are assumed.
|
||||
@param imagePoints Output array of image points, 1xN/Nx1 2-channel, or
|
||||
vector\<Point2f\> .
|
||||
@param jacobian Optional output 2Nx(10+\<numDistCoeffs\>) jacobian matrix of derivatives of image
|
||||
@ -897,10 +898,9 @@ Number of input points must be 4. Object points must be defined in the following
|
||||
1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
|
||||
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
|
||||
where N is the number of points. vector\<Point2d\> can be also passed here.
|
||||
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
|
||||
@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
|
||||
@param distCoeffs Input vector of distortion coefficients
|
||||
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
|
||||
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
assumed.
|
||||
@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
|
||||
the model coordinate system to the camera coordinate system.
|
||||
@ -912,7 +912,7 @@ vectors, respectively, and further optimizes them.
|
||||
- **SOLVEPNP_ITERATIVE** Iterative method is based on a Levenberg-Marquardt optimization. In
|
||||
this case the function finds such a pose that minimizes reprojection error, that is the sum
|
||||
of squared distances between the observed projections imagePoints and the projected (using
|
||||
projectPoints ) objectPoints .
|
||||
@ref projectPoints ) objectPoints .
|
||||
- **SOLVEPNP_P3P** Method is based on the paper of X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang
|
||||
"Complete Solution Classification for the Perspective-Three-Point Problem" (@cite gao2003complete).
|
||||
In this case the function requires exactly four object and image points.
|
||||
@ -921,9 +921,11 @@ In this case the function requires exactly four object and image points.
|
||||
In this case the function requires exactly four object and image points.
|
||||
- **SOLVEPNP_EPNP** Method has been introduced by F. Moreno-Noguer, V. Lepetit and P. Fua in the
|
||||
paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
|
||||
- **SOLVEPNP_DLS** Method is based on the paper of J. Hesch and S. Roumeliotis.
|
||||
- **SOLVEPNP_DLS** **Broken implementation. Using this flag will fallback to EPnP.** \n
|
||||
Method is based on the paper of J. Hesch and S. Roumeliotis.
|
||||
"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
|
||||
- **SOLVEPNP_UPNP** Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
|
||||
- **SOLVEPNP_UPNP** **Broken implementation. Using this flag will fallback to EPnP.** \n
|
||||
Method is based on the paper of A. Penate-Sanchez, J. Andrade-Cetto,
|
||||
F. Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
|
||||
Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
|
||||
assuming that both have the same value. Then the cameraMatrix is updated with the estimated
|
||||
@ -939,7 +941,7 @@ It requires 4 coplanar object points defined in the following order:
|
||||
- point 3: [-squareLength / 2, -squareLength / 2, 0]
|
||||
|
||||
The function estimates the object pose given a set of object points, their corresponding image
|
||||
projections, as well as the camera matrix and the distortion coefficients, see the figure below
|
||||
projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below
|
||||
(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
|
||||
and the Z-axis forward).
|
||||
|
||||
@ -1072,10 +1074,9 @@ CV_EXPORTS_W bool solvePnP( InputArray objectPoints, InputArray imagePoints,
|
||||
1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
|
||||
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
|
||||
where N is the number of points. vector\<Point2d\> can be also passed here.
|
||||
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1}\f$ .
|
||||
@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
|
||||
@param distCoeffs Input vector of distortion coefficients
|
||||
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
|
||||
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
assumed.
|
||||
@param rvec Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
|
||||
the model coordinate system to the camera coordinate system.
|
||||
@ -1092,7 +1093,7 @@ an inlier.
|
||||
@param flags Method for solving a PnP problem (see @ref solvePnP ).
|
||||
|
||||
The function estimates an object pose given a set of object points, their corresponding image
|
||||
projections, as well as the camera matrix and the distortion coefficients. This function finds such
|
||||
projections, as well as the camera intrinsic matrix and the distortion coefficients. This function finds such
|
||||
a pose that minimizes reprojection error, that is, the sum of squared distances between the observed
|
||||
projections imagePoints and the projected (using @ref projectPoints ) objectPoints. The use of RANSAC
|
||||
makes the function resistant to outliers.
|
||||
@ -1131,10 +1132,9 @@ CV_EXPORTS_W bool solvePnPRansac( InputArray objectPoints, InputArray imagePoint
|
||||
1x3/3x1 3-channel. vector\<Point3f\> can be also passed here.
|
||||
@param imagePoints Array of corresponding image points, 3x2 1-channel or 1x3/3x1 2-channel.
|
||||
vector\<Point2f\> can be also passed here.
|
||||
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
|
||||
@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
|
||||
@param distCoeffs Input vector of distortion coefficients
|
||||
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
|
||||
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
assumed.
|
||||
@param rvecs Output rotation vectors (see @ref Rodrigues ) that, together with tvecs, brings points from
|
||||
the model coordinate system to the camera coordinate system. A P3P problem has up to 4 solutions.
|
||||
@ -1146,7 +1146,7 @@ the model coordinate system to the camera coordinate system. A P3P problem has u
|
||||
"An Efficient Algebraic Solution to the Perspective-Three-Point Problem" (@cite Ke17).
|
||||
|
||||
The function estimates the object pose given 3 object points, their corresponding image
|
||||
projections, as well as the camera matrix and the distortion coefficients.
|
||||
projections, as well as the camera intrinsic matrix and the distortion coefficients.
|
||||
|
||||
@note
|
||||
The solutions are sorted by reprojection errors (lowest to highest).
|
||||
@ -1163,10 +1163,9 @@ to the camera coordinate frame) from a 3D-2D point correspondences and starting
|
||||
where N is the number of points. vector\<Point3d\> can also be passed here.
|
||||
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
|
||||
where N is the number of points. vector\<Point2d\> can also be passed here.
|
||||
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
|
||||
@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
|
||||
@param distCoeffs Input vector of distortion coefficients
|
||||
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
|
||||
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
assumed.
|
||||
@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
|
||||
the model coordinate system to the camera coordinate system. Input values are used as an initial solution.
|
||||
@ -1175,7 +1174,7 @@ the model coordinate system to the camera coordinate system. Input values are us
|
||||
|
||||
The function refines the object pose given at least 3 object points, their corresponding image
|
||||
projections, an initial solution for the rotation and translation vector,
|
||||
as well as the camera matrix and the distortion coefficients.
|
||||
as well as the camera intrinsic matrix and the distortion coefficients.
|
||||
The function minimizes the projection error with respect to the rotation and the translation vectors, according
|
||||
to a Levenberg-Marquardt iterative minimization @cite Madsen04 @cite Eade13 process.
|
||||
*/
|
||||
@ -1191,10 +1190,9 @@ to the camera coordinate frame) from a 3D-2D point correspondences and starting
|
||||
where N is the number of points. vector\<Point3d\> can also be passed here.
|
||||
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
|
||||
where N is the number of points. vector\<Point2d\> can also be passed here.
|
||||
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
|
||||
@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
|
||||
@param distCoeffs Input vector of distortion coefficients
|
||||
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
|
||||
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
assumed.
|
||||
@param rvec Input/Output rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
|
||||
the model coordinate system to the camera coordinate system. Input values are used as an initial solution.
|
||||
@ -1205,7 +1203,7 @@ gain in the Damped Gauss-Newton formulation.
|
||||
|
||||
The function refines the object pose given at least 3 object points, their corresponding image
|
||||
projections, an initial solution for the rotation and translation vector,
|
||||
as well as the camera matrix and the distortion coefficients.
|
||||
as well as the camera intrinsic matrix and the distortion coefficients.
|
||||
The function minimizes the projection error with respect to the rotation and the translation vectors, using a
|
||||
virtual visual servoing (VVS) @cite Chaumette06 @cite Marchand16 scheme.
|
||||
*/
|
||||
@ -1233,10 +1231,9 @@ Only 1 solution is returned.
|
||||
1xN/Nx1 3-channel, where N is the number of points. vector\<Point3d\> can be also passed here.
|
||||
@param imagePoints Array of corresponding image points, Nx2 1-channel or 1xN/Nx1 2-channel,
|
||||
where N is the number of points. vector\<Point2d\> can be also passed here.
|
||||
@param cameraMatrix Input camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
|
||||
@param cameraMatrix Input camera intrinsic matrix \f$\cameramatrix{A}\f$ .
|
||||
@param distCoeffs Input vector of distortion coefficients
|
||||
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
|
||||
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
assumed.
|
||||
@param rvecs Vector of output rotation vectors (see @ref Rodrigues ) that, together with tvecs, brings points from
|
||||
the model coordinate system to the camera coordinate system.
|
||||
@ -1257,9 +1254,11 @@ In this case the function requires exactly four object and image points.
|
||||
In this case the function requires exactly four object and image points.
|
||||
- **SOLVEPNP_EPNP** Method has been introduced by F.Moreno-Noguer, V.Lepetit and P.Fua in the
|
||||
paper "EPnP: Efficient Perspective-n-Point Camera Pose Estimation" (@cite lepetit2009epnp).
|
||||
- **SOLVEPNP_DLS** Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis.
|
||||
- **SOLVEPNP_DLS** **Broken implementation. Using this flag will fallback to EPnP.** \n
|
||||
Method is based on the paper of Joel A. Hesch and Stergios I. Roumeliotis.
|
||||
"A Direct Least-Squares (DLS) Method for PnP" (@cite hesch2011direct).
|
||||
- **SOLVEPNP_UPNP** Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto,
|
||||
- **SOLVEPNP_UPNP** **Broken implementation. Using this flag will fallback to EPnP.** \n
|
||||
Method is based on the paper of A.Penate-Sanchez, J.Andrade-Cetto,
|
||||
F.Moreno-Noguer. "Exhaustive Linearization for Robust Camera Pose and Focal Length
|
||||
Estimation" (@cite penate2013exhaustive). In this case the function also estimates the parameters \f$f_x\f$ and \f$f_y\f$
|
||||
assuming that both have the same value. Then the cameraMatrix is updated with the estimated
|
||||
@ -1282,7 +1281,7 @@ and useExtrinsicGuess is set to true.
|
||||
and the 3D object points projected with the estimated pose.
|
||||
|
||||
The function estimates the object pose given a set of object points, their corresponding image
|
||||
projections, as well as the camera matrix and the distortion coefficients, see the figure below
|
||||
projections, as well as the camera intrinsic matrix and the distortion coefficients, see the figure below
|
||||
(more precisely, the X-axis of the camera frame is pointing to the right, the Y-axis downward
|
||||
and the Z-axis forward).
|
||||
|
||||
@ -1411,7 +1410,7 @@ CV_EXPORTS_W int solvePnPGeneric( InputArray objectPoints, InputArray imagePoint
|
||||
InputArray rvec = noArray(), InputArray tvec = noArray(),
|
||||
OutputArray reprojectionError = noArray() );
|
||||
|
||||
/** @brief Finds an initial camera matrix from 3D-2D point correspondences.
|
||||
/** @brief Finds an initial camera intrinsic matrix from 3D-2D point correspondences.
|
||||
|
||||
@param objectPoints Vector of vectors of the calibration pattern points in the calibration pattern
|
||||
coordinate space. In the old interface all the per-view vectors are concatenated. See
|
||||
@ -1422,7 +1421,7 @@ old interface all the per-view vectors are concatenated.
|
||||
@param aspectRatio If it is zero or negative, both \f$f_x\f$ and \f$f_y\f$ are estimated independently.
|
||||
Otherwise, \f$f_x = f_y * \texttt{aspectRatio}\f$ .
|
||||
|
||||
The function estimates and returns an initial camera matrix for the camera calibration process.
|
||||
The function estimates and returns an initial camera intrinsic matrix for the camera calibration process.
|
||||
Currently, the function only supports planar calibration patterns, which are patterns where each
|
||||
object point has z-coordinate =0.
|
||||
*/
|
||||
@ -1601,10 +1600,9 @@ CV_EXPORTS_W void drawChessboardCorners( InputOutputArray image, Size patternSiz
|
||||
|
||||
@param image Input/output image. It must have 1 or 3 channels. The number of channels is not altered.
|
||||
@param cameraMatrix Input 3x3 floating-point matrix of camera intrinsic parameters.
|
||||
\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
|
||||
\f$\cameramatrix{A}\f$
|
||||
@param distCoeffs Input vector of distortion coefficients
|
||||
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
|
||||
4, 5, 8, 12 or 14 elements. If the vector is empty, the zero distortion coefficients are assumed.
|
||||
\f$\distcoeffs\f$. If the vector is empty, the zero distortion coefficients are assumed.
|
||||
@param rvec Rotation vector (see @ref Rodrigues ) that, together with tvec, brings points from
|
||||
the model coordinate system to the camera coordinate system.
|
||||
@param tvec Translation vector.
|
||||
@ -1707,14 +1705,13 @@ pattern points (e.g. std::vector<std::vector<cv::Vec2f>>). imagePoints.size() an
|
||||
objectPoints.size(), and imagePoints[i].size() and objectPoints[i].size() for each i, must be equal,
|
||||
respectively. In the old interface all the vectors of object points from different views are
|
||||
concatenated together.
|
||||
@param imageSize Size of the image used only to initialize the intrinsic camera matrix.
|
||||
@param cameraMatrix Input/output 3x3 floating-point camera matrix
|
||||
\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS
|
||||
@param imageSize Size of the image used only to initialize the camera intrinsic matrix.
|
||||
@param cameraMatrix Input/output 3x3 floating-point camera intrinsic matrix
|
||||
\f$\cameramatrix{A}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS
|
||||
and/or CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be
|
||||
initialized before calling the function.
|
||||
@param distCoeffs Input/output vector of distortion coefficients
|
||||
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
|
||||
4, 5, 8, 12 or 14 elements.
|
||||
\f$\distcoeffs\f$.
|
||||
@param rvecs Output vector of rotation vectors (@ref Rodrigues ) estimated for each pattern view
|
||||
(e.g. std::vector<cv::Mat>>). That is, each i-th rotation vector together with the corresponding
|
||||
i-th translation vector (see the next output parameter description) brings the calibration pattern
|
||||
@ -1904,9 +1901,9 @@ CV_EXPORTS_W double calibrateCameraRO( InputArrayOfArrays objectPoints,
|
||||
int flags = 0, TermCriteria criteria = TermCriteria(
|
||||
TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON) );
|
||||
|
||||
/** @brief Computes useful camera characteristics from the camera matrix.
|
||||
/** @brief Computes useful camera characteristics from the camera intrinsic matrix.
|
||||
|
||||
@param cameraMatrix Input camera matrix that can be estimated by calibrateCamera or
|
||||
@param cameraMatrix Input camera intrinsic matrix that can be estimated by calibrateCamera or
|
||||
stereoCalibrate .
|
||||
@param imageSize Input image size in pixels.
|
||||
@param apertureWidth Physical width in mm of the sensor.
|
||||
@ -1942,15 +1939,15 @@ be equal for each i.
|
||||
observed by the first camera. The same structure as in @ref calibrateCamera.
|
||||
@param imagePoints2 Vector of vectors of the projections of the calibration pattern points,
|
||||
observed by the second camera. The same structure as in @ref calibrateCamera.
|
||||
@param cameraMatrix1 Input/output camera matrix for the first camera, the same as in
|
||||
@param cameraMatrix1 Input/output camera intrinsic matrix for the first camera, the same as in
|
||||
@ref calibrateCamera. Furthermore, for the stereo case, additional flags may be used, see below.
|
||||
@param distCoeffs1 Input/output vector of distortion coefficients, the same as in
|
||||
@ref calibrateCamera.
|
||||
@param cameraMatrix2 Input/output second camera matrix for the second camera. See description for
|
||||
@param cameraMatrix2 Input/output second camera intrinsic matrix for the second camera. See description for
|
||||
cameraMatrix1.
|
||||
@param distCoeffs2 Input/output lens distortion coefficients for the second camera. See
|
||||
description for distCoeffs1.
|
||||
@param imageSize Size of the image used only to initialize the intrinsic camera matrices.
|
||||
@param imageSize Size of the image used only to initialize the camera intrinsic matrices.
|
||||
@param R Output rotation matrix. Together with the translation vector T, this matrix brings
|
||||
points given in the first camera's coordinate system to points in the second camera's
|
||||
coordinate system. In more technical terms, the tuple of R and T performs a change of basis
|
||||
@ -2071,9 +2068,9 @@ CV_EXPORTS_W double stereoCalibrate( InputArrayOfArrays objectPoints,
|
||||
|
||||
/** @brief Computes rectification transforms for each head of a calibrated stereo camera.
|
||||
|
||||
@param cameraMatrix1 First camera matrix.
|
||||
@param cameraMatrix1 First camera intrinsic matrix.
|
||||
@param distCoeffs1 First camera distortion parameters.
|
||||
@param cameraMatrix2 Second camera matrix.
|
||||
@param cameraMatrix2 Second camera intrinsic matrix.
|
||||
@param distCoeffs2 Second camera distortion parameters.
|
||||
@param imageSize Size of the image used for stereo calibration.
|
||||
@param R Rotation matrix from the coordinate system of the first camera to the second camera,
|
||||
@ -2229,12 +2226,11 @@ CV_EXPORTS_W float rectify3Collinear( InputArray cameraMatrix1, InputArray distC
|
||||
OutputArray Q, double alpha, Size newImgSize,
|
||||
CV_OUT Rect* roi1, CV_OUT Rect* roi2, int flags );
|
||||
|
||||
/** @brief Returns the new camera matrix based on the free scaling parameter.
|
||||
/** @brief Returns the new camera intrinsic matrix based on the free scaling parameter.
|
||||
|
||||
@param cameraMatrix Input camera matrix.
|
||||
@param cameraMatrix Input camera intrinsic matrix.
|
||||
@param distCoeffs Input vector of distortion coefficients
|
||||
\f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6 [, s_1, s_2, s_3, s_4[, \tau_x, \tau_y]]]])\f$ of
|
||||
4, 5, 8, 12 or 14 elements. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
\f$\distcoeffs\f$. If the vector is NULL/empty, the zero distortion coefficients are
|
||||
assumed.
|
||||
@param imageSize Original image size.
|
||||
@param alpha Free scaling parameter between 0 (when all the pixels in the undistorted image are
|
||||
@ -2243,17 +2239,17 @@ stereoRectify for details.
|
||||
@param newImgSize Image size after rectification. By default, it is set to imageSize .
|
||||
@param validPixROI Optional output rectangle that outlines all-good-pixels region in the
|
||||
undistorted image. See roi1, roi2 description in stereoRectify .
|
||||
@param centerPrincipalPoint Optional flag that indicates whether in the new camera matrix the
|
||||
@param centerPrincipalPoint Optional flag that indicates whether in the new camera intrinsic matrix the
|
||||
principal point should be at the image center or not. By default, the principal point is chosen to
|
||||
best fit a subset of the source image (determined by alpha) to the corrected image.
|
||||
@return new_camera_matrix Output new camera matrix.
|
||||
@return new_camera_matrix Output new camera intrinsic matrix.
|
||||
|
||||
The function computes and returns the optimal new camera matrix based on the free scaling parameter.
|
||||
The function computes and returns the optimal new camera intrinsic matrix based on the free scaling parameter.
|
||||
By varying this parameter, you may retrieve only sensible pixels alpha=0 , keep all the original
|
||||
image pixels if there is valuable information in the corners alpha=1 , or get something in between.
|
||||
When alpha\>0 , the undistorted result is likely to have some black pixels corresponding to
|
||||
"virtual" pixels outside of the captured distorted image. The original camera matrix, distortion
|
||||
coefficients, the computed new camera matrix, and newImageSize should be passed to
|
||||
"virtual" pixels outside of the captured distorted image. The original camera intrinsic matrix, distortion
|
||||
coefficients, the computed new camera intrinsic matrix, and newImageSize should be passed to
|
||||
initUndistortRectifyMap to produce the maps for remap .
|
||||
*/
|
||||
CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray distCoeffs,
|
||||
@ -2265,23 +2261,23 @@ CV_EXPORTS_W Mat getOptimalNewCameraMatrix( InputArray cameraMatrix, InputArray
|
||||
|
||||
@param[in] R_gripper2base Rotation part extracted from the homogeneous matrix that transforms a point
|
||||
expressed in the gripper frame to the robot base frame (\f$_{}^{b}\textrm{T}_g\f$).
|
||||
This is a vector (`vector<Mat>`) that contains the rotation matrices for all the transformations
|
||||
from gripper frame to robot base frame.
|
||||
This is a vector (`vector<Mat>`) that contains the rotation, `(3x3)` rotation matrices or `(3x1)` rotation vectors,
|
||||
for all the transformations from gripper frame to robot base frame.
|
||||
@param[in] t_gripper2base Translation part extracted from the homogeneous matrix that transforms a point
|
||||
expressed in the gripper frame to the robot base frame (\f$_{}^{b}\textrm{T}_g\f$).
|
||||
This is a vector (`vector<Mat>`) that contains the translation vectors for all the transformations
|
||||
This is a vector (`vector<Mat>`) that contains the `(3x1)` translation vectors for all the transformations
|
||||
from gripper frame to robot base frame.
|
||||
@param[in] R_target2cam Rotation part extracted from the homogeneous matrix that transforms a point
|
||||
expressed in the target frame to the camera frame (\f$_{}^{c}\textrm{T}_t\f$).
|
||||
This is a vector (`vector<Mat>`) that contains the rotation matrices for all the transformations
|
||||
from calibration target frame to camera frame.
|
||||
This is a vector (`vector<Mat>`) that contains the rotation, `(3x3)` rotation matrices or `(3x1)` rotation vectors,
|
||||
for all the transformations from calibration target frame to camera frame.
|
||||
@param[in] t_target2cam Rotation part extracted from the homogeneous matrix that transforms a point
|
||||
expressed in the target frame to the camera frame (\f$_{}^{c}\textrm{T}_t\f$).
|
||||
This is a vector (`vector<Mat>`) that contains the translation vectors for all the transformations
|
||||
This is a vector (`vector<Mat>`) that contains the `(3x1)` translation vectors for all the transformations
|
||||
from calibration target frame to camera frame.
|
||||
@param[out] R_cam2gripper Estimated rotation part extracted from the homogeneous matrix that transforms a point
|
||||
@param[out] R_cam2gripper Estimated `(3x3)` rotation part extracted from the homogeneous matrix that transforms a point
|
||||
expressed in the camera frame to the gripper frame (\f$_{}^{g}\textrm{T}_c\f$).
|
||||
@param[out] t_cam2gripper Estimated translation part extracted from the homogeneous matrix that transforms a point
|
||||
@param[out] t_cam2gripper Estimated `(3x1)` translation part extracted from the homogeneous matrix that transforms a point
|
||||
expressed in the camera frame to the gripper frame (\f$_{}^{g}\textrm{T}_c\f$).
|
||||
@param[in] method One of the implemented Hand-Eye calibration method, see cv::HandEyeCalibrationMethod
|
||||
|
||||
@ -2502,11 +2498,11 @@ CV_EXPORTS_W Mat findFundamentalMat( InputArray points1, InputArray points2,
|
||||
@param points1 Array of N (N \>= 5) 2D points from the first image. The point coordinates should
|
||||
be floating-point (single or double precision).
|
||||
@param points2 Array of the second image points of the same size and format as points1 .
|
||||
@param cameraMatrix Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
|
||||
@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ .
|
||||
Note that this function assumes that points1 and points2 are feature points from cameras with the
|
||||
same camera matrix. If this assumption does not hold for your use case, use
|
||||
same camera intrinsic matrix. If this assumption does not hold for your use case, use
|
||||
`undistortPoints()` with `P = cv::NoArray()` for both cameras to transform image points
|
||||
to normalized image coordinates, which are valid for the identity camera matrix. When
|
||||
to normalized image coordinates, which are valid for the identity camera intrinsic matrix. When
|
||||
passing these coordinates, pass the identity matrix for this parameter.
|
||||
@param method Method for computing an essential matrix.
|
||||
- **RANSAC** for the RANSAC algorithm.
|
||||
@ -2553,10 +2549,10 @@ confidence (probability) that the estimated matrix is correct.
|
||||
@param mask Output array of N elements, every element of which is set to 0 for outliers and to 1
|
||||
for the other points. The array is computed only in the RANSAC and LMedS methods.
|
||||
|
||||
This function differs from the one above that it computes camera matrix from focal length and
|
||||
This function differs from the one above that it computes camera intrinsic matrix from focal length and
|
||||
principal point:
|
||||
|
||||
\f[K =
|
||||
\f[A =
|
||||
\begin{bmatrix}
|
||||
f & 0 & x_{pp} \\
|
||||
0 & f & y_{pp} \\
|
||||
@ -2653,9 +2649,9 @@ inliers that pass the check.
|
||||
@param points1 Array of N 2D points from the first image. The point coordinates should be
|
||||
floating-point (single or double precision).
|
||||
@param points2 Array of the second image points of the same size and format as points1 .
|
||||
@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
|
||||
@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ .
|
||||
Note that this function assumes that points1 and points2 are feature points from cameras with the
|
||||
same camera matrix.
|
||||
same camera intrinsic matrix.
|
||||
@param R Output rotation matrix. Together with the translation vector, this matrix makes up a tuple
|
||||
that performs a change of basis from the first camera's coordinate system to the second camera's
|
||||
coordinate system. Note that, in general, t can not be used for this tuple, see the parameter
|
||||
@ -2718,7 +2714,7 @@ are feature points from cameras with same focal length and principal point.
|
||||
inliers in points1 and points2 for then given essential matrix E. Only these inliers will be used to
|
||||
recover pose. In the output mask only inliers which pass the cheirality check.
|
||||
|
||||
This function differs from the one above that it computes camera matrix from focal length and
|
||||
This function differs from the one above that it computes camera intrinsic matrix from focal length and
|
||||
principal point:
|
||||
|
||||
\f[A =
|
||||
@ -2738,9 +2734,9 @@ CV_EXPORTS_W int recoverPose( InputArray E, InputArray points1, InputArray point
|
||||
@param points1 Array of N 2D points from the first image. The point coordinates should be
|
||||
floating-point (single or double precision).
|
||||
@param points2 Array of the second image points of the same size and format as points1.
|
||||
@param cameraMatrix Camera matrix \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ .
|
||||
@param cameraMatrix Camera intrinsic matrix \f$\cameramatrix{A}\f$ .
|
||||
Note that this function assumes that points1 and points2 are feature points from cameras with the
|
||||
same camera matrix.
|
||||
same camera intrinsic matrix.
|
||||
@param R Output rotation matrix. Together with the translation vector, this matrix makes up a tuple
|
||||
that performs a change of basis from the first camera's coordinate system to the second camera's
|
||||
coordinate system. Note that, in general, t can not be used for this tuple, see the parameter
|
||||
@ -3150,7 +3146,7 @@ Check @ref tutorial_homography "the corresponding tutorial" for more details.
|
||||
/** @brief Decompose a homography matrix to rotation(s), translation(s) and plane normal(s).
|
||||
|
||||
@param H The input homography matrix between two images.
|
||||
@param K The input intrinsic camera calibration matrix.
|
||||
@param K The input camera intrinsic matrix.
|
||||
@param rotations Array of rotation matrices.
|
||||
@param translations Array of translation matrices.
|
||||
@param normals Array of plane normal matrices.
|
||||
@ -3611,8 +3607,8 @@ namespace fisheye
|
||||
@param imagePoints Output array of image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, or
|
||||
vector\<Point2f\>.
|
||||
@param affine
|
||||
@param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
|
||||
@param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
|
||||
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
|
||||
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
|
||||
@param alpha The skew coefficient.
|
||||
@param jacobian Optional output 2Nx15 jacobian matrix of derivatives of image points with respect
|
||||
to components of the focal lengths, coordinates of the principal point, distortion coefficients,
|
||||
@ -3635,12 +3631,12 @@ namespace fisheye
|
||||
|
||||
@param undistorted Array of object points, 1xN/Nx1 2-channel (or vector\<Point2f\> ), where N is
|
||||
the number of points in the view.
|
||||
@param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
|
||||
@param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
|
||||
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
|
||||
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
|
||||
@param alpha The skew coefficient.
|
||||
@param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\<Point2f\> .
|
||||
|
||||
Note that the function assumes the camera matrix of the undistorted points to be identity.
|
||||
Note that the function assumes the camera intrinsic matrix of the undistorted points to be identity.
|
||||
This means if you want to transform back points undistorted with undistortPoints() you have to
|
||||
multiply them with \f$P^{-1}\f$.
|
||||
*/
|
||||
@ -3650,11 +3646,11 @@ namespace fisheye
|
||||
|
||||
@param distorted Array of object points, 1xN/Nx1 2-channel (or vector\<Point2f\> ), where N is the
|
||||
number of points in the view.
|
||||
@param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
|
||||
@param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
|
||||
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
|
||||
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
|
||||
@param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3
|
||||
1-channel or 1x1 3-channel
|
||||
@param P New camera matrix (3x3) or new projection matrix (3x4)
|
||||
@param P New camera intrinsic matrix (3x3) or new projection matrix (3x4)
|
||||
@param undistorted Output array of image points, 1xN/Nx1 2-channel, or vector\<Point2f\> .
|
||||
*/
|
||||
CV_EXPORTS_W void undistortPoints(InputArray distorted, OutputArray undistorted,
|
||||
@ -3663,11 +3659,11 @@ namespace fisheye
|
||||
/** @brief Computes undistortion and rectification maps for image transform by cv::remap(). If D is empty zero
|
||||
distortion is used, if R or P is empty identity matrixes are used.
|
||||
|
||||
@param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
|
||||
@param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
|
||||
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
|
||||
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
|
||||
@param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3
|
||||
1-channel or 1x1 3-channel
|
||||
@param P New camera matrix (3x3) or new projection matrix (3x4)
|
||||
@param P New camera intrinsic matrix (3x3) or new projection matrix (3x4)
|
||||
@param size Undistorted image size.
|
||||
@param m1type Type of the first output map that can be CV_32FC1 or CV_16SC2 . See convertMaps()
|
||||
for details.
|
||||
@ -3681,9 +3677,9 @@ namespace fisheye
|
||||
|
||||
@param distorted image with fisheye lens distortion.
|
||||
@param undistorted Output image with compensated fisheye lens distortion.
|
||||
@param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
|
||||
@param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
|
||||
@param Knew Camera matrix of the distorted image. By default, it is the identity matrix but you
|
||||
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
|
||||
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
|
||||
@param Knew Camera intrinsic matrix of the distorted image. By default, it is the identity matrix but you
|
||||
may additionally scale and shift the result by using a different matrix.
|
||||
@param new_size the new size
|
||||
|
||||
@ -3708,14 +3704,14 @@ namespace fisheye
|
||||
CV_EXPORTS_W void undistortImage(InputArray distorted, OutputArray undistorted,
|
||||
InputArray K, InputArray D, InputArray Knew = cv::noArray(), const Size& new_size = Size());
|
||||
|
||||
/** @brief Estimates new camera matrix for undistortion or rectification.
|
||||
/** @brief Estimates new camera intrinsic matrix for undistortion or rectification.
|
||||
|
||||
@param K Camera matrix \f$K = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
|
||||
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
|
||||
@param image_size Size of the image
|
||||
@param D Input vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
|
||||
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
|
||||
@param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3
|
||||
1-channel or 1x1 3-channel
|
||||
@param P New camera matrix (3x3) or new projection matrix (3x4)
|
||||
@param P New camera intrinsic matrix (3x3) or new projection matrix (3x4)
|
||||
@param balance Sets the new focal length in range between the min focal length and the max focal
|
||||
length. Balance is in range of [0, 1].
|
||||
@param new_size the new size
|
||||
@ -3731,12 +3727,12 @@ namespace fisheye
|
||||
@param imagePoints vector of vectors of the projections of calibration pattern points.
|
||||
imagePoints.size() and objectPoints.size() and imagePoints[i].size() must be equal to
|
||||
objectPoints[i].size() for each i.
|
||||
@param image_size Size of the image used only to initialize the intrinsic camera matrix.
|
||||
@param K Output 3x3 floating-point camera matrix
|
||||
\f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If
|
||||
@param image_size Size of the image used only to initialize the camera intrinsic matrix.
|
||||
@param K Output 3x3 floating-point camera intrinsic matrix
|
||||
\f$\cameramatrix{A}\f$ . If
|
||||
fisheye::CALIB_USE_INTRINSIC_GUESS/ is specified, some or all of fx, fy, cx, cy must be
|
||||
initialized before calling the function.
|
||||
@param D Output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$.
|
||||
@param D Output vector of distortion coefficients \f$\distcoeffsfisheye\f$.
|
||||
@param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each pattern view.
|
||||
That is, each k-th rotation vector together with the corresponding k-th translation vector (see
|
||||
the next output parameter description) brings the calibration pattern from the model coordinate
|
||||
@ -3763,9 +3759,9 @@ optimization. It stays at the center or at a different location specified when C
|
||||
|
||||
/** @brief Stereo rectification for fisheye camera model
|
||||
|
||||
@param K1 First camera matrix.
|
||||
@param K1 First camera intrinsic matrix.
|
||||
@param D1 First camera distortion parameters.
|
||||
@param K2 Second camera matrix.
|
||||
@param K2 Second camera intrinsic matrix.
|
||||
@param D2 Second camera distortion parameters.
|
||||
@param imageSize Size of the image used for stereo calibration.
|
||||
@param R Rotation matrix between the coordinate systems of the first and the second
|
||||
@ -3802,15 +3798,15 @@ optimization. It stays at the center or at a different location specified when C
|
||||
observed by the first camera.
|
||||
@param imagePoints2 Vector of vectors of the projections of the calibration pattern points,
|
||||
observed by the second camera.
|
||||
@param K1 Input/output first camera matrix:
|
||||
@param K1 Input/output first camera intrinsic matrix:
|
||||
\f$\vecthreethree{f_x^{(j)}}{0}{c_x^{(j)}}{0}{f_y^{(j)}}{c_y^{(j)}}{0}{0}{1}\f$ , \f$j = 0,\, 1\f$ . If
|
||||
any of fisheye::CALIB_USE_INTRINSIC_GUESS , fisheye::CALIB_FIX_INTRINSIC are specified,
|
||||
some or all of the matrix components must be initialized.
|
||||
@param D1 Input/output vector of distortion coefficients \f$(k_1, k_2, k_3, k_4)\f$ of 4 elements.
|
||||
@param K2 Input/output second camera matrix. The parameter is similar to K1 .
|
||||
@param D1 Input/output vector of distortion coefficients \f$\distcoeffsfisheye\f$ of 4 elements.
|
||||
@param K2 Input/output second camera intrinsic matrix. The parameter is similar to K1 .
|
||||
@param D2 Input/output lens distortion coefficients for the second camera. The parameter is
|
||||
similar to D1 .
|
||||
@param imageSize Size of the image used only to initialize intrinsic camera matrix.
|
||||
@param imageSize Size of the image used only to initialize camera intrinsic matrix.
|
||||
@param R Output rotation matrix between the 1st and the 2nd camera coordinate systems.
|
||||
@param T Output translation vector between the coordinate systems of the cameras.
|
||||
@param flags Different flags that may be zero or a combination of the following values:
|
||||
|
@ -29,6 +29,7 @@ static Mat homogeneousInverse(const Mat& T)
|
||||
// q = sin(theta/2) * v
|
||||
// theta - rotation angle
|
||||
// v - unit rotation axis, |v| = 1
|
||||
// Reference: http://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToQuaternion/
|
||||
static Mat rot2quatMinimal(const Mat& R)
|
||||
{
|
||||
CV_Assert(R.type() == CV_64FC1 && R.rows >= 3 && R.cols >= 3);
|
||||
@ -44,7 +45,7 @@ static Mat rot2quatMinimal(const Mat& R)
|
||||
qx = (m21 - m12) / S;
|
||||
qy = (m02 - m20) / S;
|
||||
qz = (m10 - m01) / S;
|
||||
} else if ((m00 > m11)&(m00 > m22)) {
|
||||
} else if (m00 > m11 && m00 > m22) {
|
||||
double S = sqrt(1.0 + m00 - m11 - m22) * 2; // S=4*qx
|
||||
qx = 0.25 * S;
|
||||
qy = (m01 + m10) / S;
|
||||
@ -98,6 +99,7 @@ static Mat quatMinimal2rot(const Mat& q)
|
||||
//
|
||||
// q - 4x1 unit quaternion <qw, qx, qy, qz>
|
||||
// R - 3x3 rotation matrix
|
||||
// Reference: http://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToQuaternion/
|
||||
static Mat rot2quat(const Mat& R)
|
||||
{
|
||||
CV_Assert(R.type() == CV_64FC1 && R.rows >= 3 && R.cols >= 3);
|
||||
@ -114,7 +116,7 @@ static Mat rot2quat(const Mat& R)
|
||||
qx = (m21 - m12) / S;
|
||||
qy = (m02 - m20) / S;
|
||||
qz = (m10 - m01) / S;
|
||||
} else if ((m00 > m11)&(m00 > m22)) {
|
||||
} else if (m00 > m11 && m00 > m22) {
|
||||
double S = sqrt(1.0 + m00 - m11 - m22) * 2; // S=4*qx
|
||||
qw = (m21 - m12) / S;
|
||||
qx = 0.25 * S;
|
||||
@ -572,7 +574,11 @@ static void calibrateHandEyeAndreff(const std::vector<Mat>& Hg, const std::vecto
|
||||
R = R.reshape(1, 2, newSize);
|
||||
//Eq 15
|
||||
double det = determinant(R);
|
||||
R = pow(sign_double(det) / abs(det), 1.0/3.0) * R;
|
||||
if (std::fabs(det) < FLT_EPSILON)
|
||||
{
|
||||
CV_Error(Error::StsNoConv, "calibrateHandEye() with CALIB_HAND_EYE_ANDREFF method: determinant(R) is null");
|
||||
}
|
||||
R = cubeRoot(static_cast<float>(sign_double(det) / abs(det))) * R;
|
||||
|
||||
Mat w, u, vt;
|
||||
SVDecomp(R, w, u, vt);
|
||||
|
@ -51,6 +51,8 @@
|
||||
|
||||
#include "usac.hpp"
|
||||
|
||||
#include <opencv2/core/utils/logger.hpp>
|
||||
|
||||
namespace cv
|
||||
{
|
||||
#if defined _DEBUG || defined CV_STATIC_ANALYSIS
|
||||
@ -809,6 +811,15 @@ int solvePnPGeneric( InputArray _opoints, InputArray _ipoints,
|
||||
vector<Mat> vec_rvecs, vec_tvecs;
|
||||
if (flags == SOLVEPNP_EPNP || flags == SOLVEPNP_DLS || flags == SOLVEPNP_UPNP)
|
||||
{
|
||||
if (flags == SOLVEPNP_DLS)
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "Broken implementation for SOLVEPNP_DLS. Fallback to EPnP.");
|
||||
}
|
||||
else if (flags == SOLVEPNP_UPNP)
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "Broken implementation for SOLVEPNP_UPNP. Fallback to EPnP.");
|
||||
}
|
||||
|
||||
Mat undistortedPoints;
|
||||
undistortPoints(ipoints, undistortedPoints, cameraMatrix, distCoeffs);
|
||||
epnp PnP(cameraMatrix, opoints, undistortedPoints);
|
||||
|
@ -7,6 +7,38 @@
|
||||
|
||||
namespace opencv_test { namespace {
|
||||
|
||||
static std::string getMethodName(HandEyeCalibrationMethod method)
|
||||
{
|
||||
std::string method_name = "";
|
||||
switch (method)
|
||||
{
|
||||
case CALIB_HAND_EYE_TSAI:
|
||||
method_name = "Tsai";
|
||||
break;
|
||||
|
||||
case CALIB_HAND_EYE_PARK:
|
||||
method_name = "Park";
|
||||
break;
|
||||
|
||||
case CALIB_HAND_EYE_HORAUD:
|
||||
method_name = "Horaud";
|
||||
break;
|
||||
|
||||
case CALIB_HAND_EYE_ANDREFF:
|
||||
method_name = "Andreff";
|
||||
break;
|
||||
|
||||
case CALIB_HAND_EYE_DANIILIDIS:
|
||||
method_name = "Daniilidis";
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return method_name;
|
||||
}
|
||||
|
||||
class CV_CalibrateHandEyeTest : public cvtest::BaseTest
|
||||
{
|
||||
public:
|
||||
@ -48,7 +80,6 @@ protected:
|
||||
std::vector<Mat> &R_target2cam, std::vector<Mat> &t_target2cam,
|
||||
bool noise, Mat& R_cam2gripper, Mat& t_cam2gripper);
|
||||
Mat homogeneousInverse(const Mat& T);
|
||||
std::string getMethodName(HandEyeCalibrationMethod method);
|
||||
double sign_double(double val);
|
||||
|
||||
double eps_rvec[5];
|
||||
@ -340,38 +371,6 @@ Mat CV_CalibrateHandEyeTest::homogeneousInverse(const Mat& T)
|
||||
return Tinv;
|
||||
}
|
||||
|
||||
std::string CV_CalibrateHandEyeTest::getMethodName(HandEyeCalibrationMethod method)
|
||||
{
|
||||
std::string method_name = "";
|
||||
switch (method)
|
||||
{
|
||||
case CALIB_HAND_EYE_TSAI:
|
||||
method_name = "Tsai";
|
||||
break;
|
||||
|
||||
case CALIB_HAND_EYE_PARK:
|
||||
method_name = "Park";
|
||||
break;
|
||||
|
||||
case CALIB_HAND_EYE_HORAUD:
|
||||
method_name = "Horaud";
|
||||
break;
|
||||
|
||||
case CALIB_HAND_EYE_ANDREFF:
|
||||
method_name = "Andreff";
|
||||
break;
|
||||
|
||||
case CALIB_HAND_EYE_DANIILIDIS:
|
||||
method_name = "Daniilidis";
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return method_name;
|
||||
}
|
||||
|
||||
double CV_CalibrateHandEyeTest::sign_double(double val)
|
||||
{
|
||||
return (0 < val) - (val < 0);
|
||||
@ -381,4 +380,86 @@ double CV_CalibrateHandEyeTest::sign_double(double val)
|
||||
|
||||
TEST(Calib3d_CalibrateHandEye, regression) { CV_CalibrateHandEyeTest test; test.safe_run(); }
|
||||
|
||||
TEST(Calib3d_CalibrateHandEye, regression_17986)
|
||||
{
|
||||
const std::string camera_poses_filename = findDataFile("cv/hand_eye_calibration/cali.txt");
|
||||
const std::string end_effector_poses = findDataFile("cv/hand_eye_calibration/robot_cali.txt");
|
||||
|
||||
std::vector<Mat> R_target2cam;
|
||||
std::vector<Mat> t_target2cam;
|
||||
// Parse camera poses
|
||||
{
|
||||
std::ifstream file(camera_poses_filename.c_str());
|
||||
ASSERT_TRUE(file.is_open());
|
||||
|
||||
int ndata = 0;
|
||||
file >> ndata;
|
||||
R_target2cam.reserve(ndata);
|
||||
t_target2cam.reserve(ndata);
|
||||
|
||||
std::string image_name;
|
||||
Matx33d cameraMatrix;
|
||||
Matx33d R;
|
||||
Matx31d t;
|
||||
Matx16d distCoeffs;
|
||||
Matx13d distCoeffs2;
|
||||
while (file >> image_name >>
|
||||
cameraMatrix(0,0) >> cameraMatrix(0,1) >> cameraMatrix(0,2) >>
|
||||
cameraMatrix(1,0) >> cameraMatrix(1,1) >> cameraMatrix(1,2) >>
|
||||
cameraMatrix(2,0) >> cameraMatrix(2,1) >> cameraMatrix(2,2) >>
|
||||
R(0,0) >> R(0,1) >> R(0,2) >>
|
||||
R(1,0) >> R(1,1) >> R(1,2) >>
|
||||
R(2,0) >> R(2,1) >> R(2,2) >>
|
||||
t(0) >> t(1) >> t(2) >>
|
||||
distCoeffs(0) >> distCoeffs(1) >> distCoeffs(2) >> distCoeffs(3) >> distCoeffs(4) >>
|
||||
distCoeffs2(0) >> distCoeffs2(1) >> distCoeffs2(2)) {
|
||||
R_target2cam.push_back(Mat(R));
|
||||
t_target2cam.push_back(Mat(t));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Mat> R_gripper2base;
|
||||
std::vector<Mat> t_gripper2base;
|
||||
// Parse end-effector poses
|
||||
{
|
||||
std::ifstream file(end_effector_poses.c_str());
|
||||
ASSERT_TRUE(file.is_open());
|
||||
|
||||
int ndata = 0;
|
||||
file >> ndata;
|
||||
R_gripper2base.reserve(ndata);
|
||||
t_gripper2base.reserve(ndata);
|
||||
|
||||
Matx33d R;
|
||||
Matx31d t;
|
||||
Matx14d last_row;
|
||||
while (file >>
|
||||
R(0,0) >> R(0,1) >> R(0,2) >> t(0) >>
|
||||
R(1,0) >> R(1,1) >> R(1,2) >> t(1) >>
|
||||
R(2,0) >> R(2,1) >> R(2,2) >> t(2) >>
|
||||
last_row(0) >> last_row(1) >> last_row(2) >> last_row(3)) {
|
||||
R_gripper2base.push_back(Mat(R));
|
||||
t_gripper2base.push_back(Mat(t));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<HandEyeCalibrationMethod> methods;
|
||||
methods.push_back(CALIB_HAND_EYE_TSAI);
|
||||
methods.push_back(CALIB_HAND_EYE_PARK);
|
||||
methods.push_back(CALIB_HAND_EYE_HORAUD);
|
||||
methods.push_back(CALIB_HAND_EYE_ANDREFF);
|
||||
methods.push_back(CALIB_HAND_EYE_DANIILIDIS);
|
||||
|
||||
for (size_t idx = 0; idx < methods.size(); idx++) {
|
||||
SCOPED_TRACE(cv::format("method=%s", getMethodName(methods[idx]).c_str()));
|
||||
|
||||
Matx33d R_cam2gripper_est;
|
||||
Matx31d t_cam2gripper_est;
|
||||
calibrateHandEye(R_gripper2base, t_gripper2base, R_target2cam, t_target2cam, R_cam2gripper_est, t_cam2gripper_est, methods[idx]);
|
||||
|
||||
EXPECT_TRUE(checkRange(R_cam2gripper_est));
|
||||
EXPECT_TRUE(checkRange(t_cam2gripper_est));
|
||||
}
|
||||
}
|
||||
|
||||
}} // namespace
|
||||
|
@ -16,6 +16,7 @@ Implementation of Scale layer.
|
||||
#include "../op_inf_engine.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
@ -389,7 +390,7 @@ public:
|
||||
std::vector<MatShape> &internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert_N(inputs.size() == 1, blobs.size() == 3);
|
||||
CV_Assert_N(blobs[0].total() == 1, blobs[1].total() == total(inputs[0], 1),
|
||||
CV_Assert_N(blobs[0].total() == 1,
|
||||
blobs[2].total() == inputs[0][1]);
|
||||
|
||||
outputs.assign(1, inputs[0]);
|
||||
@ -412,15 +413,20 @@ public:
|
||||
float* outData = outputs[0].ptr<float>();
|
||||
|
||||
Mat data_mean_cpu = blobs[1].clone();
|
||||
Mat mean_resize = Mat(inputs[0].size[3], inputs[0].size[2], CV_32FC3);
|
||||
Mat mean_3d = Mat(data_mean_cpu.size[3], data_mean_cpu.size[2], CV_32FC3, data_mean_cpu.ptr<float>(0));
|
||||
resize(mean_3d, mean_resize, Size(inputs[0].size[3], inputs[0].size[2]));
|
||||
int new_size[] = {1, mean_resize.channels(), mean_resize.cols, mean_resize.rows};
|
||||
Mat data_mean_cpu_resize = mean_resize.reshape(1, *new_size);
|
||||
Mat data_mean_per_channel_cpu = blobs[2].clone();
|
||||
|
||||
const int numWeights = data_mean_cpu.total();
|
||||
const int numWeights = data_mean_cpu_resize.total();
|
||||
CV_Assert(numWeights != 0);
|
||||
|
||||
++num_iter;
|
||||
if (num_iter <= recompute_mean)
|
||||
{
|
||||
data_mean_cpu *= (num_iter - 1);
|
||||
data_mean_cpu_resize *= (num_iter - 1);
|
||||
const int batch = inputs[0].size[0];
|
||||
float alpha = 1.0 / batch;
|
||||
|
||||
@ -429,15 +435,15 @@ public:
|
||||
Mat inpSlice(1, numWeights, CV_32F, inpData);
|
||||
inpSlice = alpha * inpSlice;
|
||||
|
||||
add(data_mean_cpu.reshape(1, 1), inpSlice, data_mean_cpu.reshape(1, 1));
|
||||
add(data_mean_cpu_resize.reshape(1, 1), inpSlice, data_mean_cpu_resize.reshape(1, 1));
|
||||
inpData += numWeights;
|
||||
}
|
||||
data_mean_cpu *= (1.0 / num_iter);
|
||||
data_mean_cpu_resize *= (1.0 / num_iter);
|
||||
|
||||
int newsize[] = {blobs[1].size[1], (int)blobs[1].total(2)};
|
||||
reduce(data_mean_cpu.reshape(1, 2, &newsize[0]), data_mean_per_channel_cpu, 1, REDUCE_SUM, CV_32F);
|
||||
int newsize[] = {inputs[0].size[1], (int)inputs[0].total(2)};
|
||||
reduce(data_mean_cpu_resize.reshape(1, 2, &newsize[0]), data_mean_per_channel_cpu, 1, REDUCE_SUM, CV_32F);
|
||||
|
||||
int area = blobs[1].total(2);
|
||||
int area = inputs[0].total(2);
|
||||
data_mean_per_channel_cpu *= (1.0 / area);
|
||||
}
|
||||
|
||||
@ -452,7 +458,7 @@ public:
|
||||
Mat inpSlice(1, numWeights, CV_32F, inpData);
|
||||
Mat outSlice(1, numWeights, CV_32F, outData);
|
||||
|
||||
add(inpSlice, (-1) * data_mean_cpu, outSlice);
|
||||
add(inpSlice, (-1) * data_mean_cpu_resize, outSlice);
|
||||
inpData += numWeights;
|
||||
outData += numWeights;
|
||||
}
|
||||
|
@ -712,6 +712,19 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
layerParams.set("bias_term", true);
|
||||
}
|
||||
}
|
||||
else if (layer_type == "Pow")
|
||||
{
|
||||
if (layer_id.find(node_proto.input(1)) != layer_id.end())
|
||||
CV_Error(Error::StsNotImplemented, "Unsupported Pow op with variable power");
|
||||
|
||||
Mat blob = getBlob(node_proto, constBlobs, 1);
|
||||
if (blob.total() != 1)
|
||||
CV_Error(Error::StsNotImplemented, "Pow op supports only scalar power");
|
||||
|
||||
blob.convertTo(blob, CV_32F);
|
||||
layerParams.type = "Power";
|
||||
layerParams.set("power", blob.at<float>(0));
|
||||
}
|
||||
else if (layer_type == "Max")
|
||||
{
|
||||
layerParams.type = "Eltwise";
|
||||
@ -933,6 +946,19 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
Mat bias = getBlob(node_proto, constBlobs, 2);
|
||||
layerParams.blobs.push_back(bias);
|
||||
}
|
||||
if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
|
||||
{
|
||||
Mat inputBuf = getBlob(node_proto, constBlobs, 0);
|
||||
|
||||
LayerParams constParams;
|
||||
constParams.name = node_proto.input(0);
|
||||
constParams.type = "Const";
|
||||
constParams.blobs.push_back(inputBuf);
|
||||
|
||||
opencv_onnx::NodeProto proto;
|
||||
proto.add_output(constParams.name);
|
||||
addLayer(dstNet, constParams, proto, layer_id, outShapes);
|
||||
}
|
||||
|
||||
layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]);
|
||||
layerParams.set("bias_term", node_proto.input_size() == 3);
|
||||
|
@ -657,6 +657,8 @@ TEST_P(Test_Caffe_layers, DataAugmentation)
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
testLayerUsingCaffeModels("data_augmentation", true, false);
|
||||
testLayerUsingCaffeModels("data_augmentation_2x1", true, false);
|
||||
testLayerUsingCaffeModels("data_augmentation_8x6", true, false);
|
||||
}
|
||||
|
||||
TEST_P(Test_Caffe_layers, Resample)
|
||||
|
@ -301,6 +301,11 @@ TEST_P(Test_ONNX_layers, Cast)
|
||||
testONNXModels("cast");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Power)
|
||||
{
|
||||
testONNXModels("pow2", npy, 0, 0, false, false);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Concatenation)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||
@ -639,6 +644,26 @@ TEST_P(Test_ONNX_layers, Pad2d_Unfused)
|
||||
testONNXModels("ZeroPad2d");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, LinearWithConstant)
|
||||
{
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
|
||||
#endif
|
||||
testONNXModels("lin_with_constant");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, MatmulWithTwoInputs)
|
||||
{
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2020040000)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE);
|
||||
#endif
|
||||
testONNXModels("matmul_with_two_inputs");
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());
|
||||
|
||||
class Test_ONNX_nets : public Test_ONNX_layers
|
||||
|
@ -5,7 +5,7 @@ Original paper: https://arxiv.org/abs/1612.01925.
|
||||
Original repo: https://github.com/lmb-freiburg/flownet2.
|
||||
|
||||
Download the converted .caffemodel model from https://drive.google.com/open?id=16qvE9VNmU39NttpZwZs81Ga8VYQJDaWZ
|
||||
and .prototxt from https://drive.google.com/open?id=19bo6SWU2p8ZKvjXqMKiCPdK8mghwDy9b.
|
||||
and .prototxt from https://drive.google.com/file/d/1RyNIUsan1ZOh2hpYIH36A-jofAvJlT6a/view?usp=sharing.
|
||||
Otherwise download original model from https://lmb.informatik.uni-freiburg.de/resources/binaries/flownet2/flownet2-models.tar.gz,
|
||||
convert .h5 model to .caffemodel and modify original .prototxt using .prototxt from link above.
|
||||
'''
|
||||
@ -18,7 +18,7 @@ import cv2 as cv
|
||||
|
||||
class OpticalFlow(object):
|
||||
def __init__(self, proto, model, height, width):
|
||||
self.net = cv.dnn.readNet(proto, model)
|
||||
self.net = cv.dnn.readNetFromCaffe(proto, model)
|
||||
self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
|
||||
self.height = height
|
||||
self.width = width
|
||||
@ -62,9 +62,9 @@ if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Use this script to calculate optical flow using FlowNetv2',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('-input', '-i', required=True, help='Path to input video file. Skip this argument to capture frames from a camera.')
|
||||
parser.add_argument('--height', default=320, help='Input height')
|
||||
parser.add_argument('--width', default=448, help='Input width')
|
||||
parser.add_argument('--proto', '-p', default='FlowNet2_deploy.prototxt', help='Path to prototxt.')
|
||||
parser.add_argument('--height', default=320, type=int, help='Input height')
|
||||
parser.add_argument('--width', default=448, type=int, help='Input width')
|
||||
parser.add_argument('--proto', '-p', default='FlowNet2_deploy_anysize.prototxt', help='Path to prototxt.')
|
||||
parser.add_argument('--model', '-m', default='FlowNet2_weights.caffemodel', help='Path to caffemodel.')
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
@ -75,7 +75,25 @@ if __name__ == '__main__':
|
||||
cv.namedWindow(winName, cv.WINDOW_NORMAL)
|
||||
cap = cv.VideoCapture(args.input if args.input else 0)
|
||||
hasFrame, first_frame = cap.read()
|
||||
opt_flow = OpticalFlow(args.proto, args.model, args.height, args.width)
|
||||
|
||||
divisor = 64.
|
||||
var = {}
|
||||
var['ADAPTED_WIDTH'] = int(np.ceil(args.width/divisor) * divisor)
|
||||
var['ADAPTED_HEIGHT'] = int(np.ceil(args.height/divisor) * divisor)
|
||||
var['SCALE_WIDTH'] = args.width / float(var['ADAPTED_WIDTH'])
|
||||
var['SCALE_HEIGHT'] = args.height / float(var['ADAPTED_HEIGHT'])
|
||||
|
||||
config = ''
|
||||
proto = open(args.proto).readlines()
|
||||
for line in proto:
|
||||
for key, value in var.items():
|
||||
tag = "$%s$" % key
|
||||
line = line.replace(tag, str(value))
|
||||
config += line
|
||||
|
||||
caffemodel = open(args.model, 'rb').read()
|
||||
|
||||
opt_flow = OpticalFlow(bytearray(config.encode()), caffemodel, var['ADAPTED_HEIGHT'], var['ADAPTED_WIDTH'])
|
||||
while cv.waitKey(1) < 0:
|
||||
hasFrame, second_frame = cap.read()
|
||||
if not hasFrame:
|
||||
|
397
samples/dnn/siamrpnpp.py
Normal file
397
samples/dnn/siamrpnpp.py
Normal file
@ -0,0 +1,397 @@
|
||||
import argparse
|
||||
import cv2 as cv
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
"""
|
||||
Link to original paper : https://arxiv.org/abs/1812.11703
|
||||
Link to original repo : https://github.com/STVIR/pysot
|
||||
|
||||
You can download the pre-trained weights of the Tracker Model from https://drive.google.com/file/d/11bwgPFVkps9AH2NOD1zBDdpF_tQghAB-/view?usp=sharing
|
||||
You can download the target net (target branch of SiamRPN++) from https://drive.google.com/file/d/1dw_Ne3UMcCnFsaD6xkZepwE4GEpqq7U_/view?usp=sharing
|
||||
You can download the search net (search branch of SiamRPN++) from https://drive.google.com/file/d/1Lt4oE43ZSucJvze3Y-Z87CVDreO-Afwl/view?usp=sharing
|
||||
You can download the head model (RPN Head) from https://drive.google.com/file/d/1zT1yu12mtj3JQEkkfKFJWiZ71fJ-dQTi/view?usp=sharing
|
||||
"""
|
||||
|
||||
class ModelBuilder():
|
||||
""" This class generates the SiamRPN++ Tracker Model by using Imported ONNX Nets
|
||||
"""
|
||||
def __init__(self, target_net, search_net, rpn_head):
|
||||
super(ModelBuilder, self).__init__()
|
||||
# Build the target branch
|
||||
self.target_net = target_net
|
||||
# Build the search branch
|
||||
self.search_net = search_net
|
||||
# Build RPN_Head
|
||||
self.rpn_head = rpn_head
|
||||
|
||||
def template(self, z):
|
||||
""" Takes the template of size (1, 1, 127, 127) as an input to generate kernel
|
||||
"""
|
||||
self.target_net.setInput(z)
|
||||
outNames = self.target_net.getUnconnectedOutLayersNames()
|
||||
self.zfs_1, self.zfs_2, self.zfs_3 = self.target_net.forward(outNames)
|
||||
|
||||
def track(self, x):
|
||||
""" Takes the search of size (1, 1, 255, 255) as an input to generate classification score and bounding box regression
|
||||
"""
|
||||
self.search_net.setInput(x)
|
||||
outNames = self.search_net.getUnconnectedOutLayersNames()
|
||||
xfs_1, xfs_2, xfs_3 = self.search_net.forward(outNames)
|
||||
self.rpn_head.setInput(np.stack([self.zfs_1, self.zfs_2, self.zfs_3]), 'input_1')
|
||||
self.rpn_head.setInput(np.stack([xfs_1, xfs_2, xfs_3]), 'input_2')
|
||||
outNames = self.rpn_head.getUnconnectedOutLayersNames()
|
||||
cls, loc = self.rpn_head.forward(outNames)
|
||||
return {'cls': cls, 'loc': loc}
|
||||
|
||||
class Anchors:
|
||||
""" This class generate anchors.
|
||||
"""
|
||||
def __init__(self, stride, ratios, scales, image_center=0, size=0):
|
||||
self.stride = stride
|
||||
self.ratios = ratios
|
||||
self.scales = scales
|
||||
self.image_center = image_center
|
||||
self.size = size
|
||||
self.anchor_num = len(self.scales) * len(self.ratios)
|
||||
self.anchors = self.generate_anchors()
|
||||
|
||||
def generate_anchors(self):
|
||||
"""
|
||||
generate anchors based on predefined configuration
|
||||
"""
|
||||
anchors = np.zeros((self.anchor_num, 4), dtype=np.float32)
|
||||
size = self.stride**2
|
||||
count = 0
|
||||
for r in self.ratios:
|
||||
ws = int(np.sqrt(size * 1. / r))
|
||||
hs = int(ws * r)
|
||||
|
||||
for s in self.scales:
|
||||
w = ws * s
|
||||
h = hs * s
|
||||
anchors[count][:] = [-w * 0.5, -h * 0.5, w * 0.5, h * 0.5][:]
|
||||
count += 1
|
||||
return anchors
|
||||
|
||||
class SiamRPNTracker:
|
||||
def __init__(self, model):
|
||||
super(SiamRPNTracker, self).__init__()
|
||||
self.anchor_stride = 8
|
||||
self.anchor_ratios = [0.33, 0.5, 1, 2, 3]
|
||||
self.anchor_scales = [8]
|
||||
self.track_base_size = 8
|
||||
self.track_context_amount = 0.5
|
||||
self.track_exemplar_size = 127
|
||||
self.track_instance_size = 255
|
||||
self.track_lr = 0.4
|
||||
self.track_penalty_k = 0.04
|
||||
self.track_window_influence = 0.44
|
||||
self.score_size = (self.track_instance_size - self.track_exemplar_size) // \
|
||||
self.anchor_stride + 1 + self.track_base_size
|
||||
self.anchor_num = len(self.anchor_ratios) * len(self.anchor_scales)
|
||||
hanning = np.hanning(self.score_size)
|
||||
window = np.outer(hanning, hanning)
|
||||
self.window = np.tile(window.flatten(), self.anchor_num)
|
||||
self.anchors = self.generate_anchor(self.score_size)
|
||||
self.model = model
|
||||
|
||||
def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans):
|
||||
"""
|
||||
Args:
|
||||
im: bgr based input image frame
|
||||
pos: position of the center of the frame
|
||||
model_sz: exemplar / target image size
|
||||
s_z: original / search image size
|
||||
avg_chans: channel average
|
||||
Return:
|
||||
im_patch: sub_windows for the given image input
|
||||
"""
|
||||
if isinstance(pos, float):
|
||||
pos = [pos, pos]
|
||||
sz = original_sz
|
||||
im_h, im_w, im_d = im.shape
|
||||
c = (original_sz + 1) / 2
|
||||
cx, cy = pos
|
||||
context_xmin = np.floor(cx - c + 0.5)
|
||||
context_xmax = context_xmin + sz - 1
|
||||
context_ymin = np.floor(cy - c + 0.5)
|
||||
context_ymax = context_ymin + sz - 1
|
||||
left_pad = int(max(0., -context_xmin))
|
||||
top_pad = int(max(0., -context_ymin))
|
||||
right_pad = int(max(0., context_xmax - im_w + 1))
|
||||
bottom_pad = int(max(0., context_ymax - im_h + 1))
|
||||
context_xmin += left_pad
|
||||
context_xmax += left_pad
|
||||
context_ymin += top_pad
|
||||
context_ymax += top_pad
|
||||
|
||||
if any([top_pad, bottom_pad, left_pad, right_pad]):
|
||||
size = (im_h + top_pad + bottom_pad, im_w + left_pad + right_pad, im_d)
|
||||
te_im = np.zeros(size, np.uint8)
|
||||
te_im[top_pad:top_pad + im_h, left_pad:left_pad + im_w, :] = im
|
||||
if top_pad:
|
||||
te_im[0:top_pad, left_pad:left_pad + im_w, :] = avg_chans
|
||||
if bottom_pad:
|
||||
te_im[im_h + top_pad:, left_pad:left_pad + im_w, :] = avg_chans
|
||||
if left_pad:
|
||||
te_im[:, 0:left_pad, :] = avg_chans
|
||||
if right_pad:
|
||||
te_im[:, im_w + left_pad:, :] = avg_chans
|
||||
im_patch = te_im[int(context_ymin):int(context_ymax + 1),
|
||||
int(context_xmin):int(context_xmax + 1), :]
|
||||
else:
|
||||
im_patch = im[int(context_ymin):int(context_ymax + 1),
|
||||
int(context_xmin):int(context_xmax + 1), :]
|
||||
|
||||
if not np.array_equal(model_sz, original_sz):
|
||||
im_patch = cv.resize(im_patch, (model_sz, model_sz))
|
||||
im_patch = im_patch.transpose(2, 0, 1)
|
||||
im_patch = im_patch[np.newaxis, :, :, :]
|
||||
im_patch = im_patch.astype(np.float32)
|
||||
return im_patch
|
||||
|
||||
def generate_anchor(self, score_size):
|
||||
"""
|
||||
Args:
|
||||
im: bgr based input image frame
|
||||
pos: position of the center of the frame
|
||||
model_sz: exemplar / target image size
|
||||
s_z: original / search image size
|
||||
avg_chans: channel average
|
||||
Return:
|
||||
anchor: anchors for pre-determined values of stride, ratio, and scale
|
||||
"""
|
||||
anchors = Anchors(self.anchor_stride, self.anchor_ratios, self.anchor_scales)
|
||||
anchor = anchors.anchors
|
||||
x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3]
|
||||
anchor = np.stack([(x1 + x2) * 0.5, (y1 + y2) * 0.5, x2 - x1, y2 - y1], 1)
|
||||
total_stride = anchors.stride
|
||||
anchor_num = anchors.anchor_num
|
||||
anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
|
||||
ori = - (score_size // 2) * total_stride
|
||||
xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)],
|
||||
[ori + total_stride * dy for dy in range(score_size)])
|
||||
xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
|
||||
np.tile(yy.flatten(), (anchor_num, 1)).flatten()
|
||||
anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
|
||||
return anchor
|
||||
|
||||
def _convert_bbox(self, delta, anchor):
|
||||
"""
|
||||
Args:
|
||||
delta: localisation
|
||||
anchor: anchor of pre-determined anchor size
|
||||
Return:
|
||||
delta: prediction of bounding box
|
||||
"""
|
||||
delta_transpose = np.transpose(delta, (1, 2, 3, 0))
|
||||
delta_contig = np.ascontiguousarray(delta_transpose)
|
||||
delta = delta_contig.reshape(4, -1)
|
||||
delta[0, :] = delta[0, :] * anchor[:, 2] + anchor[:, 0]
|
||||
delta[1, :] = delta[1, :] * anchor[:, 3] + anchor[:, 1]
|
||||
delta[2, :] = np.exp(delta[2, :]) * anchor[:, 2]
|
||||
delta[3, :] = np.exp(delta[3, :]) * anchor[:, 3]
|
||||
return delta
|
||||
|
||||
def _softmax(self, x):
|
||||
"""
|
||||
Softmax in the direction of the depth of the layer
|
||||
"""
|
||||
x = x.astype(dtype=np.float32)
|
||||
x_max = x.max(axis=1)[:, np.newaxis]
|
||||
e_x = np.exp(x-x_max)
|
||||
div = np.sum(e_x, axis=1)[:, np.newaxis]
|
||||
y = e_x / div
|
||||
return y
|
||||
|
||||
def _convert_score(self, score):
|
||||
"""
|
||||
Args:
|
||||
cls: score
|
||||
Return:
|
||||
cls: score for cls
|
||||
"""
|
||||
score_transpose = np.transpose(score, (1, 2, 3, 0))
|
||||
score_con = np.ascontiguousarray(score_transpose)
|
||||
score_view = score_con.reshape(2, -1)
|
||||
score = np.transpose(score_view, (1, 0))
|
||||
score = self._softmax(score)
|
||||
return score[:,1]
|
||||
|
||||
def _bbox_clip(self, cx, cy, width, height, boundary):
|
||||
"""
|
||||
Adjusting the bounding box
|
||||
"""
|
||||
bbox_h, bbox_w = boundary
|
||||
cx = max(0, min(cx, bbox_w))
|
||||
cy = max(0, min(cy, bbox_h))
|
||||
width = max(10, min(width, bbox_w))
|
||||
height = max(10, min(height, bbox_h))
|
||||
return cx, cy, width, height
|
||||
|
||||
def init(self, img, bbox):
|
||||
"""
|
||||
Args:
|
||||
img(np.ndarray): bgr based input image frame
|
||||
bbox: (x,y,w,h): bounding box
|
||||
"""
|
||||
x,y,h,w = bbox
|
||||
self.center_pos = np.array([x + (h - 1) / 2, y + (w - 1) / 2])
|
||||
self.h = h
|
||||
self.w = w
|
||||
w_z = self.w + self.track_context_amount * np.add(h, w)
|
||||
h_z = self.h + self.track_context_amount * np.add(h, w)
|
||||
s_z = round(np.sqrt(w_z * h_z))
|
||||
self.channel_average = np.mean(img, axis=(0, 1))
|
||||
z_crop = self.get_subwindow(img, self.center_pos, self.track_exemplar_size, s_z, self.channel_average)
|
||||
self.model.template(z_crop)
|
||||
|
||||
def track(self, img):
|
||||
"""
|
||||
Args:
|
||||
img(np.ndarray): BGR image
|
||||
Return:
|
||||
bbox(list):[x, y, width, height]
|
||||
"""
|
||||
w_z = self.w + self.track_context_amount * np.add(self.w, self.h)
|
||||
h_z = self.h + self.track_context_amount * np.add(self.w, self.h)
|
||||
s_z = np.sqrt(w_z * h_z)
|
||||
scale_z = self.track_exemplar_size / s_z
|
||||
s_x = s_z * (self.track_instance_size / self.track_exemplar_size)
|
||||
x_crop = self.get_subwindow(img, self.center_pos, self.track_instance_size, round(s_x), self.channel_average)
|
||||
outputs = self.model.track(x_crop)
|
||||
score = self._convert_score(outputs['cls'])
|
||||
pred_bbox = self._convert_bbox(outputs['loc'], self.anchors)
|
||||
|
||||
def change(r):
|
||||
return np.maximum(r, 1. / r)
|
||||
|
||||
def sz(w, h):
|
||||
pad = (w + h) * 0.5
|
||||
return np.sqrt((w + pad) * (h + pad))
|
||||
|
||||
# scale penalty
|
||||
s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) /
|
||||
(sz(self.w * scale_z, self.h * scale_z)))
|
||||
|
||||
# aspect ratio penalty
|
||||
r_c = change((self.w / self.h) /
|
||||
(pred_bbox[2, :] / pred_bbox[3, :]))
|
||||
penalty = np.exp(-(r_c * s_c - 1) * self.track_penalty_k)
|
||||
pscore = penalty * score
|
||||
|
||||
# window penalty
|
||||
pscore = pscore * (1 - self.track_window_influence) + \
|
||||
self.window * self.track_window_influence
|
||||
best_idx = np.argmax(pscore)
|
||||
bbox = pred_bbox[:, best_idx] / scale_z
|
||||
lr = penalty[best_idx] * score[best_idx] * self.track_lr
|
||||
|
||||
cpx, cpy = self.center_pos
|
||||
x,y,w,h = bbox
|
||||
cx = x + cpx
|
||||
cy = y + cpy
|
||||
|
||||
# smooth bbox
|
||||
width = self.w * (1 - lr) + w * lr
|
||||
height = self.h * (1 - lr) + h * lr
|
||||
|
||||
# clip boundary
|
||||
cx, cy, width, height = self._bbox_clip(cx, cy, width, height, img.shape[:2])
|
||||
|
||||
# udpate state
|
||||
self.center_pos = np.array([cx, cy])
|
||||
self.w = width
|
||||
self.h = height
|
||||
bbox = [cx - width / 2, cy - height / 2, width, height]
|
||||
best_score = score[best_idx]
|
||||
return {'bbox': bbox, 'best_score': best_score}
|
||||
|
||||
def get_frames(video_name):
|
||||
"""
|
||||
Args:
|
||||
Path to input video frame
|
||||
Return:
|
||||
Frame
|
||||
"""
|
||||
cap = cv.VideoCapture(video_name if video_name else 0)
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if ret:
|
||||
yield frame
|
||||
else:
|
||||
break
|
||||
|
||||
def main():
|
||||
""" Sample SiamRPN Tracker
|
||||
"""
|
||||
# Computation backends supported by layers
|
||||
backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
|
||||
# Target Devices for computation
|
||||
targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
|
||||
|
||||
parser = argparse.ArgumentParser(description='Use this script to run SiamRPN++ Visual Tracker',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--input_video', type=str, help='Path to input video file. Skip this argument to capture frames from a camera.')
|
||||
parser.add_argument('--target_net', type=str, default='target_net.onnx', help='Path to part of SiamRPN++ ran on target frame.')
|
||||
parser.add_argument('--search_net', type=str, default='search_net.onnx', help='Path to part of SiamRPN++ ran on search frame.')
|
||||
parser.add_argument('--rpn_head', type=str, default='rpn_head.onnx', help='Path to RPN Head ONNX model.')
|
||||
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
|
||||
help='Select a computation backend: '
|
||||
"%d: automatically (by default) "
|
||||
"%d: Halide"
|
||||
"%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit)"
|
||||
"%d: OpenCV Implementation" % backends)
|
||||
parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
|
||||
help='Select a target device: '
|
||||
"%d: CPU target (by default)"
|
||||
"%d: OpenCL"
|
||||
"%d: OpenCL FP16"
|
||||
"%d: Myriad" % targets)
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
if args.input_video and not os.path.isfile(args.input_video):
|
||||
raise OSError("Input video file does not exist")
|
||||
if not os.path.isfile(args.target_net):
|
||||
raise OSError("Target Net does not exist")
|
||||
if not os.path.isfile(args.search_net):
|
||||
raise OSError("Search Net does not exist")
|
||||
if not os.path.isfile(args.rpn_head):
|
||||
raise OSError("RPN Head Net does not exist")
|
||||
|
||||
#Load the Networks
|
||||
target_net = cv.dnn.readNetFromONNX(args.target_net)
|
||||
target_net.setPreferableBackend(args.backend)
|
||||
target_net.setPreferableTarget(args.target)
|
||||
search_net = cv.dnn.readNetFromONNX(args.search_net)
|
||||
search_net.setPreferableBackend(args.backend)
|
||||
search_net.setPreferableTarget(args.target)
|
||||
rpn_head = cv.dnn.readNetFromONNX(args.rpn_head)
|
||||
rpn_head.setPreferableBackend(args.backend)
|
||||
rpn_head.setPreferableTarget(args.target)
|
||||
model = ModelBuilder(target_net, search_net, rpn_head)
|
||||
tracker = SiamRPNTracker(model)
|
||||
|
||||
first_frame = True
|
||||
cv.namedWindow('SiamRPN++ Tracker', cv.WINDOW_AUTOSIZE)
|
||||
for frame in get_frames(args.input_video):
|
||||
if first_frame:
|
||||
try:
|
||||
init_rect = cv.selectROI('SiamRPN++ Tracker', frame, False, False)
|
||||
except:
|
||||
exit()
|
||||
tracker.init(frame, init_rect)
|
||||
first_frame = False
|
||||
else:
|
||||
outputs = tracker.track(frame)
|
||||
bbox = list(map(int, outputs['bbox']))
|
||||
x,y,w,h = bbox
|
||||
cv.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
|
||||
cv.imshow('SiamRPN++ Tracker', frame)
|
||||
key = cv.waitKey(1)
|
||||
if key == ord("q"):
|
||||
break
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue
Block a user