Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin 2022-06-26 14:21:40 +00:00
commit 2a4926f417
12 changed files with 316 additions and 66 deletions

View File

@ -86,6 +86,9 @@ int main( int argc, const char** argv )
"{ image i | | (required) path to reference image }"
"{ model m | | (required) path to cascade xml file }"
"{ data d | | (optional) path to video output folder }"
"{ ext | avi | (optional) output video file extension e.g. avi (default) or mp4 }"
"{ fourcc | XVID | (optional) output video file's 4-character codec e.g. XVID (default) or H264 }"
"{ fps | 15 | (optional) output video file's frames-per-second rate }"
);
// Read in the input arguments
if (parser.has("help")){
@ -96,7 +99,9 @@ int main( int argc, const char** argv )
string model(parser.get<string>("model"));
string output_folder(parser.get<string>("data"));
string image_ref = (parser.get<string>("image"));
if (model.empty() || image_ref.empty()){
string fourcc = (parser.get<string>("fourcc"));
int fps = parser.get<int>("fps");
if (model.empty() || image_ref.empty() || fourcc.size()!=4 || fps<1){
parser.printMessage();
printLimits();
return -1;
@ -166,11 +171,19 @@ int main( int argc, const char** argv )
// each stage, containing all weak classifiers for that stage.
bool draw_planes = false;
stringstream output_video;
output_video << output_folder << "model_visualization.avi";
output_video << output_folder << "model_visualization." << parser.get<string>("ext");
VideoWriter result_video;
if( output_folder.compare("") != 0 ){
draw_planes = true;
result_video.open(output_video.str(), VideoWriter::fourcc('X','V','I','D'), 15, Size(reference_image.cols * resize_factor, reference_image.rows * resize_factor), false);
result_video.open(output_video.str(), VideoWriter::fourcc(fourcc[0],fourcc[1],fourcc[2],fourcc[3]), fps, visualization.size(), false);
if (!result_video.isOpened()){
cerr << "the output video '" << output_video.str() << "' could not be opened."
<< " fourcc=" << fourcc
<< " fps=" << fps
<< " frameSize=" << visualization.size()
<< endl;
return -1;
}
}
if(haar){

View File

@ -543,7 +543,7 @@
title = {Multiple view geometry in computer vision},
year = {2003},
publisher = {Cambridge university press},
url = {http://cds.cern.ch/record/1598612/files/0521540518_TOC.pdf}
url = {https://www.robots.ox.ac.uk/~vgg/hzbook/}
}
@article{Horaud95,
author = {Horaud, Radu and Dornaika, Fadi},
@ -753,10 +753,17 @@
isbn = {0387008934},
publisher = {Springer}
}
@article{Malis,
author = {Malis, Ezio and Vargas, Manuel and others},
title = {Deeper understanding of the homography decomposition for vision-based control},
year = {2007}
@article{Malis2007,
author = {Malis, Ezio and Vargas, Manuel},
title = {{Deeper understanding of the homography decomposition for vision-based control}},
year = {2007},
url = {https://hal.inria.fr/inria-00174036},
type = {Research Report},
number = {RR-6303},
pages = {90},
institution = {{INRIA}},
keywords = {Visual servoing ; planar objects ; homography ; decomposition ; camera calibration errors ; structure from motion ; Euclidean},
pdf = {https://hal.inria.fr/inria-00174036v3/file/RR-6303.pdf},
}
@article{Marchand16,
author = {Marchand, Eric and Uchiyama, Hideaki and Spindler, Fabien},
@ -920,7 +927,8 @@
author = {Szeliski, Richard},
title = {Computer vision: algorithms and applications},
year = {2010},
publisher = {Springer}
publisher = {Springer},
url = {https://szeliski.org/Book/}
}
@article{Rafael12,
author = {von Gioi, Rafael Grompone and Jakubowicz, J{\'e}r{\'e}mie and Morel, Jean-Michel and Randall, Gregory},

View File

@ -16,9 +16,11 @@ Introduction {#tutorial_homography_Introduction}
This tutorial will demonstrate the basic concepts of the homography with some codes.
For detailed explanations about the theory, please refer to a computer vision course or a computer vision book, e.g.:
* Multiple View Geometry in Computer Vision, @cite HartleyZ00.
* An Invitation to 3-D Vision: From Images to Geometric Models, @cite Ma:2003:IVI
* Computer Vision: Algorithms and Applications, @cite RS10
* Multiple View Geometry in Computer Vision, Richard Hartley and Andrew Zisserman, @cite HartleyZ00 (some sample chapters are available [here](https://www.robots.ox.ac.uk/~vgg/hzbook/), CVPR Tutorials are available [here](https://www.robots.ox.ac.uk/~az/tutorials/))
* An Invitation to 3-D Vision: From Images to Geometric Models, Yi Ma, Stefano Soatto, Jana Kosecka, and S. Shankar Sastry, @cite Ma:2003:IVI (a computer vision book handout is available [here](https://cs.gmu.edu/%7Ekosecka/cs685/VisionBookHandout.pdf))
* Computer Vision: Algorithms and Applications, Richard Szeliski, @cite RS10 (an electronic version is available [here](https://szeliski.org/Book/))
* Deeper understanding of the homography decomposition for vision-based control, Ezio Malis, Manuel Vargas, @cite Malis2007 (open access [here](https://hal.inria.fr/inria-00174036))
* Pose Estimation for Augmented Reality: A Hands-On Survey, Eric Marchand, Hideaki Uchiyama, Fabien Spindler, @cite Marchand16 (open access [here](https://hal.inria.fr/hal-01246370))
The tutorial code can be found here [C++](https://github.com/opencv/opencv/tree/4.x/samples/cpp/tutorial_code/features2D/Homography),
[Python](https://github.com/opencv/opencv/tree/4.x/samples/python/tutorial_code/features2D/Homography),
@ -38,7 +40,7 @@ Briefly, the planar homography relates the transformation between two planes (up
x^{'} \\
y^{'} \\
1
\end{bmatrix} = H
\end{bmatrix} = \mathbf{H}
\begin{bmatrix}
x \\
y \\
@ -129,22 +131,22 @@ A quick solution to retrieve the pose from the homography matrix is (see \ref po
\f[
\begin{align*}
\boldsymbol{X} &= \left( X, Y, 0, 1 \right ) \\
\boldsymbol{x} &= \boldsymbol{P}\boldsymbol{X} \\
&= \boldsymbol{K} \left[ \boldsymbol{r_1} \hspace{0.5em} \boldsymbol{r_2} \hspace{0.5em} \boldsymbol{r_3} \hspace{0.5em} \boldsymbol{t} \right ]
\mathbf{X} &= \left( X, Y, 0, 1 \right ) \\
\mathbf{x} &= \mathbf{P}\mathbf{X} \\
&= \mathbf{K} \left[ \mathbf{r_1} \hspace{0.5em} \mathbf{r_2} \hspace{0.5em} \mathbf{r_3} \hspace{0.5em} \mathbf{t} \right ]
\begin{pmatrix}
X \\
Y \\
0 \\
1
\end{pmatrix} \\
&= \boldsymbol{K} \left[ \boldsymbol{r_1} \hspace{0.5em} \boldsymbol{r_2} \hspace{0.5em} \boldsymbol{t} \right ]
&= \mathbf{K} \left[ \mathbf{r_1} \hspace{0.5em} \mathbf{r_2} \hspace{0.5em} \mathbf{t} \right ]
\begin{pmatrix}
X \\
Y \\
1
\end{pmatrix} \\
&= \boldsymbol{H}
&= \mathbf{H}
\begin{pmatrix}
X \\
Y \\
@ -155,16 +157,16 @@ A quick solution to retrieve the pose from the homography matrix is (see \ref po
\f[
\begin{align*}
\boldsymbol{H} &= \lambda \boldsymbol{K} \left[ \boldsymbol{r_1} \hspace{0.5em} \boldsymbol{r_2} \hspace{0.5em} \boldsymbol{t} \right ] \\
\boldsymbol{K}^{-1} \boldsymbol{H} &= \lambda \left[ \boldsymbol{r_1} \hspace{0.5em} \boldsymbol{r_2} \hspace{0.5em} \boldsymbol{t} \right ] \\
\boldsymbol{P} &= \boldsymbol{K} \left[ \boldsymbol{r_1} \hspace{0.5em} \boldsymbol{r_2} \hspace{0.5em} \left( \boldsymbol{r_1} \times \boldsymbol{r_2} \right ) \hspace{0.5em} \boldsymbol{t} \right ]
\mathbf{H} &= \lambda \mathbf{K} \left[ \mathbf{r_1} \hspace{0.5em} \mathbf{r_2} \hspace{0.5em} \mathbf{t} \right ] \\
\mathbf{K}^{-1} \mathbf{H} &= \lambda \left[ \mathbf{r_1} \hspace{0.5em} \mathbf{r_2} \hspace{0.5em} \mathbf{t} \right ] \\
\mathbf{P} &= \mathbf{K} \left[ \mathbf{r_1} \hspace{0.5em} \mathbf{r_2} \hspace{0.5em} \left( \mathbf{r_1} \times \mathbf{r_2} \right ) \hspace{0.5em} \mathbf{t} \right ]
\end{align*}
\f]
This is a quick solution (see also \ref projective_transformations "2") as this does not ensure that the resulting rotation matrix will be orthogonal and the scale is estimated roughly by normalize the first column to 1.
A solution to have a proper rotation matrix (with the properties of a rotation matrix) consists to apply a polar decomposition
(see \ref polar_decomposition "6" or \ref polar_decomposition_svd "7" for some information):
A solution to have a proper rotation matrix (with the properties of a rotation matrix) consists to apply a polar decomposition, or orthogonalization of the rotation matrix
(see \ref polar_decomposition "6" or \ref polar_decomposition_svd "7" or \ref polar_decomposition_svd_2 "8" or \ref Kabsch_algorithm "9" for some information):
@snippet pose_from_homography.cpp polar-decomposition-of-the-rotation-matrix
@ -245,7 +247,7 @@ To check the correctness of the calculation, the matching lines are displayed:
### Demo 3: Homography from the camera displacement {#tutorial_homography_Demo3}
The homography relates the transformation between two planes and it is possible to retrieve the corresponding camera displacement that allows to go from the first to the second plane view (see @cite Malis for more information).
The homography relates the transformation between two planes and it is possible to retrieve the corresponding camera displacement that allows to go from the first to the second plane view (see @cite Malis2007 for more information).
Before going into the details that allow to compute the homography from the camera displacement, some recalls about camera pose and homogeneous transformation.
The function @ref cv::solvePnP allows to compute the camera pose from the correspondences 3D object points (points expressed in the object frame) and the projected 2D image points (object points viewed in the image).
@ -275,7 +277,7 @@ The intrinsic parameters and the distortion coefficients are required (see the c
Z_o \\
1
\end{bmatrix} \\
&= \boldsymbol{K} \hspace{0.2em} ^{c}\textrm{M}_o
&= \mathbf{K} \hspace{0.2em} ^{c}\mathbf{M}_o
\begin{bmatrix}
X_o \\
Y_o \\
@ -285,9 +287,9 @@ The intrinsic parameters and the distortion coefficients are required (see the c
\end{align*}
\f]
\f$ \boldsymbol{K} \f$ is the intrinsic matrix and \f$ ^{c}\textrm{M}_o \f$ is the camera pose. The output of @ref cv::solvePnP is exactly this: `rvec` is the Rodrigues rotation vector and `tvec` the translation vector.
\f$ \mathbf{K} \f$ is the intrinsic matrix and \f$ ^{c}\mathbf{M}_o \f$ is the camera pose. The output of @ref cv::solvePnP is exactly this: `rvec` is the Rodrigues rotation vector and `tvec` the translation vector.
\f$ ^{c}\textrm{M}_o \f$ can be represented in a homogeneous form and allows to transform a point expressed in the object frame into the camera frame:
\f$ ^{c}\mathbf{M}_o \f$ can be represented in a homogeneous form and allows to transform a point expressed in the object frame into the camera frame:
\f[
\begin{align*}
@ -297,7 +299,7 @@ The intrinsic parameters and the distortion coefficients are required (see the c
Z_c \\
1
\end{bmatrix} &=
\hspace{0.2em} ^{c}\textrm{M}_o
\hspace{0.2em} ^{c}\mathbf{M}_o
\begin{bmatrix}
X_o \\
Y_o \\
@ -306,7 +308,7 @@ The intrinsic parameters and the distortion coefficients are required (see the c
\end{bmatrix} \\
&=
\begin{bmatrix}
^{c}\textrm{R}_o & ^{c}\textrm{t}_o \\
^{c}\mathbf{R}_o & ^{c}\mathbf{t}_o \\
0_{1\times3} & 1
\end{bmatrix}
\begin{bmatrix}
@ -333,19 +335,19 @@ The intrinsic parameters and the distortion coefficients are required (see the c
Transform a point expressed in one frame to another frame can be easily done with matrix multiplication:
* \f$ ^{c_1}\textrm{M}_o \f$ is the camera pose for the camera 1
* \f$ ^{c_2}\textrm{M}_o \f$ is the camera pose for the camera 2
* \f$ ^{c_1}\mathbf{M}_o \f$ is the camera pose for the camera 1
* \f$ ^{c_2}\mathbf{M}_o \f$ is the camera pose for the camera 2
To transform a 3D point expressed in the camera 1 frame to the camera 2 frame:
\f[
^{c_2}\textrm{M}_{c_1} = \hspace{0.2em} ^{c_2}\textrm{M}_{o} \cdot \hspace{0.1em} ^{o}\textrm{M}_{c_1} = \hspace{0.2em} ^{c_2}\textrm{M}_{o} \cdot \hspace{0.1em} \left( ^{c_1}\textrm{M}_{o} \right )^{-1} =
^{c_2}\mathbf{M}_{c_1} = \hspace{0.2em} ^{c_2}\mathbf{M}_{o} \cdot \hspace{0.1em} ^{o}\mathbf{M}_{c_1} = \hspace{0.2em} ^{c_2}\mathbf{M}_{o} \cdot \hspace{0.1em} \left( ^{c_1}\mathbf{M}_{o} \right )^{-1} =
\begin{bmatrix}
^{c_2}\textrm{R}_{o} & ^{c_2}\textrm{t}_{o} \\
^{c_2}\mathbf{R}_{o} & ^{c_2}\mathbf{t}_{o} \\
0_{3 \times 1} & 1
\end{bmatrix} \cdot
\begin{bmatrix}
^{c_1}\textrm{R}_{o}^T & - \hspace{0.2em} ^{c_1}\textrm{R}_{o}^T \cdot \hspace{0.2em} ^{c_1}\textrm{t}_{o} \\
^{c_1}\mathbf{R}_{o}^T & - \hspace{0.2em} ^{c_1}\mathbf{R}_{o}^T \cdot \hspace{0.2em} ^{c_1}\mathbf{t}_{o} \\
0_{1 \times 3} & 1
\end{bmatrix}
\f]
@ -368,11 +370,11 @@ On this figure, `n` is the normal vector of the plane and `d` the distance betwe
The [equation](https://en.wikipedia.org/wiki/Homography_(computer_vision)#3D_plane_to_plane_equation) to compute the homography from the camera displacement is:
\f[
^{2}\textrm{H}_{1} = \hspace{0.2em} ^{2}\textrm{R}_{1} - \hspace{0.1em} \frac{^{2}\textrm{t}_{1} \cdot n^T}{d}
^{2}\mathbf{H}_{1} = \hspace{0.2em} ^{2}\mathbf{R}_{1} - \hspace{0.1em} \frac{^{2}\mathbf{t}_{1} \cdot \hspace{0.1em} ^{1}\mathbf{n}^\top}{^1d}
\f]
Where \f$ ^{2}\textrm{H}_{1} \f$ is the homography matrix that maps the points in the first camera frame to the corresponding points in the second camera frame, \f$ ^{2}\textrm{R}_{1} = \hspace{0.2em} ^{c_2}\textrm{R}_{o} \cdot \hspace{0.1em} ^{c_1}\textrm{R}_{o}^{T} \f$
is the rotation matrix that represents the rotation between the two camera frames and \f$ ^{2}\textrm{t}_{1} = \hspace{0.2em} ^{c_2}\textrm{R}_{o} \cdot \left( - \hspace{0.1em} ^{c_1}\textrm{R}_{o}^{T} \cdot \hspace{0.1em} ^{c_1}\textrm{t}_{o} \right ) + \hspace{0.1em} ^{c_2}\textrm{t}_{o} \f$
Where \f$ ^{2}\mathbf{H}_{1} \f$ is the homography matrix that maps the points in the first camera frame to the corresponding points in the second camera frame, \f$ ^{2}\mathbf{R}_{1} = \hspace{0.2em} ^{c_2}\mathbf{R}_{o} \cdot \hspace{0.1em} ^{c_1}\mathbf{R}_{o}^{\top} \f$
is the rotation matrix that represents the rotation between the two camera frames and \f$ ^{2}\mathbf{t}_{1} = \hspace{0.2em} ^{c_2}\mathbf{R}_{o} \cdot \left( - \hspace{0.1em} ^{c_1}\mathbf{R}_{o}^{\top} \cdot \hspace{0.1em} ^{c_1}\mathbf{t}_{o} \right ) + \hspace{0.1em} ^{c_2}\mathbf{t}_{o} \f$
the translation vector between the two camera frames.
Here the normal vector `n` is the plane normal expressed in the camera frame 1 and can be computed as the cross product of 2 vectors (using 3 non collinear points that lie on the plane) or in our case directly with:
@ -383,7 +385,7 @@ The distance `d` can be computed as the dot product between the plane normal and
@snippet homography_from_camera_displacement.cpp compute-plane-distance-to-the-camera-frame-1
The projective homography matrix \f$ \textbf{G} \f$ can be computed from the Euclidean homography \f$ \textbf{H} \f$ using the intrinsic matrix \f$ \textbf{K} \f$ (see @cite Malis), here assuming the same camera between the two plane views:
The projective homography matrix \f$ \textbf{G} \f$ can be computed from the Euclidean homography \f$ \textbf{H} \f$ using the intrinsic matrix \f$ \textbf{K} \f$ (see @cite Malis2007), here assuming the same camera between the two plane views:
\f[
\textbf{G} = \gamma \textbf{K} \textbf{H} \textbf{K}^{-1}
@ -394,7 +396,7 @@ The projective homography matrix \f$ \textbf{G} \f$ can be computed from the Euc
In our case, the Z-axis of the chessboard goes inside the object whereas in the homography figure it goes outside. This is just a matter of sign:
\f[
^{2}\textrm{H}_{1} = \hspace{0.2em} ^{2}\textrm{R}_{1} + \hspace{0.1em} \frac{^{2}\textrm{t}_{1} \cdot n^T}{d}
^{2}\mathbf{H}_{1} = \hspace{0.2em} ^{2}\mathbf{R}_{1} + \hspace{0.1em} \frac{^{2}\mathbf{t}_{1} \cdot \hspace{0.1em} ^{1}\mathbf{n}^\top}{^1d}
\f]
@snippet homography_from_camera_displacement.cpp compute-homography-from-camera-displacement
@ -472,8 +474,8 @@ As you can see, there is one solution that matches almost perfectly with the com
At least two of the solutions may further be invalidated if point correspondences are available by applying positive depth constraint (all points must be in front of the camera).
```
As the result of the decomposition is a camera displacement, if we have the initial camera pose \f$ ^{c_1}\textrm{M}_{o} \f$, we can compute the current camera pose
\f$ ^{c_2}\textrm{M}_{o} = \hspace{0.2em} ^{c_2}\textrm{M}_{c_1} \cdot \hspace{0.1em} ^{c_1}\textrm{M}_{o} \f$ and test if the 3D object points that belong to the plane are projected in front of the camera or not.
As the result of the decomposition is a camera displacement, if we have the initial camera pose \f$ ^{c_1}\mathbf{M}_{o} \f$, we can compute the current camera pose
\f$ ^{c_2}\mathbf{M}_{o} = \hspace{0.2em} ^{c_2}\mathbf{M}_{c_1} \cdot \hspace{0.1em} ^{c_1}\mathbf{M}_{o} \f$ and test if the 3D object points that belong to the plane are projected in front of the camera or not.
Another solution could be to retain the solution with the closest normal if we know the plane normal expressed at the camera 1 pose.
The same thing but with the homography matrix estimated with @ref cv::findHomography
@ -522,7 +524,7 @@ The [stitching module](@ref stitching) provides a complete pipeline to stitch im
The homography transformation applies only for planar structure. But in the case of a rotating camera (pure rotation around the camera axis of projection, no translation), an arbitrary world can be considered
([see previously](@ref tutorial_homography_What_is_the_homography_matrix)).
The homography can then be computed using the rotation transformation and the camera intrinsic parameters as (see for instance \ref homography_course "8"):
The homography can then be computed using the rotation transformation and the camera intrinsic parameters as (see for instance \ref homography_course "10"):
\f[
s
@ -540,7 +542,7 @@ The homography can then be computed using the rotation transformation and the ca
\f]
To illustrate, we used Blender, a free and open-source 3D computer graphics software, to generate two camera views with only a rotation transformation between each other.
More information about how to retrieve the camera intrinsic parameters and the `3x4` extrinsic matrix with respect to the world can be found in \ref answer_blender "9" (an additional transformation
More information about how to retrieve the camera intrinsic parameters and the `3x4` extrinsic matrix with respect to the world can be found in \ref answer_blender "11" (an additional transformation
is needed to get the transformation between the camera and the object frames) with Blender.
The figure below shows the two generated views of the Suzanne model, with only a rotation transformation:
@ -609,11 +611,13 @@ Additional references {#tutorial_homography_Additional_references}
---------------------
* \anchor lecture_16 1. [Lecture 16: Planar Homographies](http://www.cse.psu.edu/~rtc12/CSE486/lecture16.pdf), Robert Collins
* \anchor projective_transformations 2. [2D projective transformations (homographies)](https://ags.cs.uni-kl.de/fileadmin/inf_ags/3dcv-ws11-12/3DCV_WS11-12_lec04.pdf), Christiano Gava, Gabriele Bleser
* \anchor szeliski 3. [Computer Vision: Algorithms and Applications](http://szeliski.org/Book/drafts/SzeliskiBook_20100903_draft.pdf), Richard Szeliski
* \anchor projective_transformations 2. [2D projective transformations (homographies)](https://web.archive.org/web/20171226115739/https://ags.cs.uni-kl.de/fileadmin/inf_ags/3dcv-ws11-12/3DCV_WS11-12_lec04.pdf), Christiano Gava, Gabriele Bleser
* \anchor szeliski 3. [Computer Vision: Algorithms and Applications](https://szeliski.org/Book/), Richard Szeliski
* \anchor answer_dsp 4. [Step by Step Camera Pose Estimation for Visual Tracking and Planar Markers](https://dsp.stackexchange.com/a/2737)
* \anchor pose_ar 5. [Pose from homography estimation](https://team.inria.fr/lagadic/camera_localization/tutorial-pose-dlt-planar-opencv.html)
* \anchor pose_ar 5. [Pose from homography estimation](https://visp-doc.inria.fr/doxygen/camera_localization/tutorial-pose-dlt-planar-opencv.html)
* \anchor polar_decomposition 6. [Polar Decomposition (in Continuum Mechanics)](http://www.continuummechanics.org/polardecomposition.html)
* \anchor polar_decomposition_svd 7. [A Personal Interview with the Singular Value Decomposition](https://web.stanford.edu/~gavish/documents/SVD_ans_you.pdf), Matan Gavish
* \anchor homography_course 8. [Homography](http://people.scs.carleton.ca/~c_shu/Courses/comp4900d/notes/homography.pdf), Dr. Gerhard Roth
* \anchor answer_blender 9. [3x4 camera matrix from blender camera](https://blender.stackexchange.com/a/38210)
* \anchor polar_decomposition_svd 7. [Chapter 3 - 3.1.2 From matrices to rotations - Theorem 3.1 (Least-squares estimation of a rotation from a matrix K)](https://www-sop.inria.fr/asclepios/cours/MVA/Rotations.pdf)
* \anchor polar_decomposition_svd_2 8. [A Personal Interview with the Singular Value Decomposition](https://web.stanford.edu/~gavish/documents/SVD_ans_you.pdf), Matan Gavish
* \anchor Kabsch_algorithm 9. [Kabsch algorithm, Computation of the optimal rotation matrix](https://en.wikipedia.org/wiki/Kabsch_algorithm#Computation_of_the_optimal_rotation_matrix)
* \anchor homography_course 10. [Homography](http://people.scs.carleton.ca/~c_shu/Courses/comp4900d/notes/homography.pdf), Dr. Gerhard Roth
* \anchor answer_blender 11. [3x4 camera matrix from blender camera](https://blender.stackexchange.com/a/38210)

View File

@ -3230,7 +3230,7 @@ Check @ref tutorial_homography "the corresponding tutorial" for more details.
This function extracts relative camera motion between two views of a planar object and returns up to
four mathematical solution tuples of rotation, translation, and plane normal. The decomposition of
the homography matrix H is described in detail in @cite Malis.
the homography matrix H is described in detail in @cite Malis2007.
If the homography H, induced by the plane, gives the constraint
\f[s_i \vecthree{x'_i}{y'_i}{1} \sim H \vecthree{x_i}{y_i}{1}\f] on the source image points
@ -3258,7 +3258,7 @@ CV_EXPORTS_W int decomposeHomographyMat(InputArray H,
@param pointsMask optional Mat/Vector of 8u type representing the mask for the inliers as given by the #findHomography function
This function is intended to filter the output of the #decomposeHomographyMat based on additional
information as described in @cite Malis . The summary of the method: the #decomposeHomographyMat function
information as described in @cite Malis2007 . The summary of the method: the #decomposeHomographyMat function
returns 2 unique solutions and their "opposites" for a total of 4 solutions. If we have access to the
sets of points visible in the camera frame before and after the homography transformation is applied,
we can determine which are the true potential solutions and which are the opposites by verifying which

View File

@ -71,7 +71,7 @@ data sharing. A destructor decrements the reference counter associated with the
The buffer is deallocated if and only if the reference counter reaches zero, that is, when no other
structures refer to the same buffer. Similarly, when a Mat instance is copied, no actual data is
really copied. Instead, the reference counter is incremented to memorize that there is another owner
of the same data. There is also the Mat::clone method that creates a full copy of the matrix data.
of the same data. There is also the cv::Mat::clone method that creates a full copy of the matrix data.
See the example below:
```.cpp
// create a big 8Mb matrix
@ -159,11 +159,11 @@ grayscale conversion. Note that frame and edges are allocated only once during t
of the loop body since all the next video frames have the same resolution. If you somehow change the
video resolution, the arrays are automatically reallocated.
The key component of this technology is the Mat::create method. It takes the desired array size and
type. If the array already has the specified size and type, the method does nothing. Otherwise, it
releases the previously allocated data, if any (this part involves decrementing the reference
The key component of this technology is the cv::Mat::create method. It takes the desired array size
and type. If the array already has the specified size and type, the method does nothing. Otherwise,
it releases the previously allocated data, if any (this part involves decrementing the reference
counter and comparing it with zero), and then allocates a new buffer of the required size. Most
functions call the Mat::create method for each output array, and so the automatic output data
functions call the cv::Mat::create method for each output array, and so the automatic output data
allocation is implemented.
Some notable exceptions from this scheme are cv::mixChannels, cv::RNG::fill, and a few other
@ -247,9 +247,9 @@ Examples:
// matrix (10-element complex vector)
Mat img(Size(1920, 1080), CV_8UC3); // make a 3-channel (color) image
// of 1920 columns and 1080 rows.
Mat grayscale(image.size(), CV_MAKETYPE(image.depth(), 1)); // make a 1-channel image of
// the same size and same
// channel type as img
Mat grayscale(img.size(), CV_MAKETYPE(img.depth(), 1)); // make a 1-channel image of
// the same size and same
// channel type as img
```
Arrays with more complex elements cannot be constructed or processed using OpenCV. Furthermore, each
function or method can handle only a subset of all possible array types. Usually, the more complex
@ -269,14 +269,14 @@ extended in future based on user requests.
### InputArray and OutputArray
Many OpenCV functions process dense 2-dimensional or multi-dimensional numerical arrays. Usually,
such functions take cppMat as parameters, but in some cases it's more convenient to use
such functions take `cv::Mat` as parameters, but in some cases it's more convenient to use
`std::vector<>` (for a point set, for example) or `cv::Matx<>` (for 3x3 homography matrix and such). To
avoid many duplicates in the API, special "proxy" classes have been introduced. The base "proxy"
class is cv::InputArray. It is used for passing read-only arrays on a function input. The derived from
InputArray class cv::OutputArray is used to specify an output array for a function. Normally, you should
not care of those intermediate types (and you should not declare variables of those types
explicitly) - it will all just work automatically. You can assume that instead of
InputArray/OutputArray you can always use `Mat`, `std::vector<>`, `cv::Matx<>`, `cv::Vec<>` or `cv::Scalar`. When a
InputArray/OutputArray you can always use `cv::Mat`, `std::vector<>`, `cv::Matx<>`, `cv::Vec<>` or `cv::Scalar`. When a
function has an optional input or output array, and you do not have or do not want one, pass
cv::noArray().
@ -288,7 +288,7 @@ the optimization algorithm did not converge), it returns a special error code (t
boolean variable).
The exceptions can be instances of the cv::Exception class or its derivatives. In its turn,
cv::Exception is a derivative of std::exception. So it can be gracefully handled in the code using
cv::Exception is a derivative of `std::exception`. So it can be gracefully handled in the code using
other standard C++ library components.
The exception is typically thrown either using the `#CV_Error(errcode, description)` macro, or its
@ -297,7 +297,7 @@ printf-like `#CV_Error_(errcode, (printf-spec, printf-args))` variant, or using
satisfied. For performance-critical code, there is #CV_DbgAssert(condition) that is only retained in
the Debug configuration. Due to the automatic memory management, all the intermediate buffers are
automatically deallocated in case of a sudden error. You only need to add a try statement to catch
exceptions, if needed: :
exceptions, if needed:
```.cpp
try
{

View File

@ -67,7 +67,7 @@
#if defined(__clang__) && defined(__has_feature)
#if __has_feature(memory_sanitizer)
#define CV_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \
__msan_unpoison(adresse, size)
__msan_unpoison(address, size)
#endif
#endif
#ifndef CV_ANNOTATE_MEMORY_IS_INITIALIZED

View File

@ -90,6 +90,7 @@ enum ImwriteFlags {
IMWRITE_JPEG_RST_INTERVAL = 4, //!< JPEG restart interval, 0 - 65535, default is 0 - no restart.
IMWRITE_JPEG_LUMA_QUALITY = 5, //!< Separate luma quality level, 0 - 100, default is -1 - don't use.
IMWRITE_JPEG_CHROMA_QUALITY = 6, //!< Separate chroma quality level, 0 - 100, default is -1 - don't use.
IMWRITE_JPEG_SAMPLING_FACTOR = 7, //!< For JPEG, set sampling factor. See cv::ImwriteJPEGSamplingFactorParams.
IMWRITE_PNG_COMPRESSION = 16, //!< For PNG, it can be the compression level from 0 to 9. A higher value means a smaller size and longer compression time. If specified, strategy is changed to IMWRITE_PNG_STRATEGY_DEFAULT (Z_DEFAULT_STRATEGY). Default value is 1 (best speed setting).
IMWRITE_PNG_STRATEGY = 17, //!< One of cv::ImwritePNGFlags, default is IMWRITE_PNG_STRATEGY_RLE.
IMWRITE_PNG_BILEVEL = 18, //!< Binary level PNG, 0 or 1, default is 0.
@ -105,6 +106,15 @@ enum ImwriteFlags {
IMWRITE_JPEG2000_COMPRESSION_X1000 = 272 //!< For JPEG2000, use to specify the target compression rate (multiplied by 1000). The value can be from 0 to 1000. Default is 1000.
};
enum ImwriteJPEGSamplingFactorParams {
IMWRITE_JPEG_SAMPLING_FACTOR_411 = 0x411111, //!< 4x1,1x1,1x1
IMWRITE_JPEG_SAMPLING_FACTOR_420 = 0x221111, //!< 2x2,1x1,1x1(Default)
IMWRITE_JPEG_SAMPLING_FACTOR_422 = 0x211111, //!< 2x1,1x1,1x1
IMWRITE_JPEG_SAMPLING_FACTOR_440 = 0x121111, //!< 1x2,1x1,1x1
IMWRITE_JPEG_SAMPLING_FACTOR_444 = 0x111111 //!< 1x1,1x1,1x1(No subsampling)
};
enum ImwriteEXRTypeFlags {
/*IMWRITE_EXR_TYPE_UNIT = 0, //!< not supported */
IMWRITE_EXR_TYPE_HALF = 1, //!< store as HALF (FP16)

View File

@ -44,6 +44,8 @@
#ifdef HAVE_JPEG
#include <opencv2/core/utils/logger.hpp>
#ifdef _MSC_VER
//interaction between '_setjmp' and C++ object destruction is non-portable
#pragma warning(disable: 4611)
@ -640,6 +642,7 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
int rst_interval = 0;
int luma_quality = -1;
int chroma_quality = -1;
uint32_t sampling_factor = 0; // same as 0x221111
for( size_t i = 0; i < params.size(); i += 2 )
{
@ -687,6 +690,27 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
rst_interval = params[i+1];
rst_interval = MIN(MAX(rst_interval, 0), 65535L);
}
if( params[i] == IMWRITE_JPEG_SAMPLING_FACTOR )
{
sampling_factor = static_cast<uint32_t>(params[i+1]);
switch ( sampling_factor )
{
case IMWRITE_JPEG_SAMPLING_FACTOR_411:
case IMWRITE_JPEG_SAMPLING_FACTOR_420:
case IMWRITE_JPEG_SAMPLING_FACTOR_422:
case IMWRITE_JPEG_SAMPLING_FACTOR_440:
case IMWRITE_JPEG_SAMPLING_FACTOR_444:
// OK.
break;
default:
CV_LOG_WARNING(NULL, cv::format("Unknown value for IMWRITE_JPEG_SAMPLING_FACTOR: 0x%06x", sampling_factor ) );
sampling_factor = 0;
break;
}
}
}
jpeg_set_defaults( &cinfo );
@ -699,6 +723,14 @@ bool JpegEncoder::write( const Mat& img, const std::vector<int>& params )
if( optimize )
cinfo.optimize_coding = TRUE;
if( (channels > 1) && ( sampling_factor != 0 ) )
{
cinfo.comp_info[0].v_samp_factor = (sampling_factor >> 16 ) & 0xF;
cinfo.comp_info[0].h_samp_factor = (sampling_factor >> 20 ) & 0xF;
cinfo.comp_info[1].v_samp_factor = 1;
cinfo.comp_info[1].h_samp_factor = 1;
}
#if JPEG_LIB_VERSION >= 70
if (luma_quality >= 0 && chroma_quality >= 0)
{

View File

@ -178,6 +178,98 @@ TEST(Imgcodecs_Jpeg, encode_decode_rst_jpeg)
EXPECT_EQ(0, remove(output_normal.c_str()));
}
//==================================================================================================
static const uint32_t default_sampling_factor = static_cast<uint32_t>(0x221111);
static uint32_t test_jpeg_subsampling( const Mat src, const vector<int> param )
{
vector<uint8_t> jpeg;
if ( cv::imencode(".jpg", src, jpeg, param ) == false )
{
return 0;
}
if ( src.channels() != 3 )
{
return 0;
}
// Find SOF Marker(FFC0)
int sof_offset = 0; // not found.
int jpeg_size = static_cast<int>( jpeg.size() );
for ( int i = 0 ; i < jpeg_size - 1; i++ )
{
if ( (jpeg[i] == 0xff ) && ( jpeg[i+1] == 0xC0 ) )
{
sof_offset = i;
break;
}
}
if ( sof_offset == 0 )
{
return 0;
}
// Extract Subsampling Factor from SOF.
return ( jpeg[sof_offset + 0x0A + 3 * 0 + 1] << 16 ) +
( jpeg[sof_offset + 0x0A + 3 * 1 + 1] << 8 ) +
( jpeg[sof_offset + 0x0A + 3 * 2 + 1] ) ;
}
TEST(Imgcodecs_Jpeg, encode_subsamplingfactor_default)
{
vector<int> param;
Mat src( 48, 64, CV_8UC3, cv::Scalar::all(0) );
EXPECT_EQ( default_sampling_factor, test_jpeg_subsampling(src, param) );
}
TEST(Imgcodecs_Jpeg, encode_subsamplingfactor_usersetting_valid)
{
Mat src( 48, 64, CV_8UC3, cv::Scalar::all(0) );
const uint32_t sampling_factor_list[] = {
IMWRITE_JPEG_SAMPLING_FACTOR_411,
IMWRITE_JPEG_SAMPLING_FACTOR_420,
IMWRITE_JPEG_SAMPLING_FACTOR_422,
IMWRITE_JPEG_SAMPLING_FACTOR_440,
IMWRITE_JPEG_SAMPLING_FACTOR_444,
};
const int sampling_factor_list_num = 5;
for ( int i = 0 ; i < sampling_factor_list_num; i ++ )
{
vector<int> param;
param.push_back( IMWRITE_JPEG_SAMPLING_FACTOR );
param.push_back( sampling_factor_list[i] );
EXPECT_EQ( sampling_factor_list[i], test_jpeg_subsampling(src, param) );
}
}
TEST(Imgcodecs_Jpeg, encode_subsamplingfactor_usersetting_invalid)
{
Mat src( 48, 64, CV_8UC3, cv::Scalar::all(0) );
const uint32_t sampling_factor_list[] = { // Invalid list
0x111112,
0x000000,
0x001111,
0xFF1111,
0x141111, // 1x4,1x1,1x1 - unknown
0x241111, // 2x4,1x1,1x1 - unknown
0x421111, // 4x2,1x1,1x1 - unknown
0x441111, // 4x4,1x1,1x1 - 410(libjpeg cannot handle it)
};
const int sampling_factor_list_num = 8;
for ( int i = 0 ; i < sampling_factor_list_num; i ++ )
{
vector<int> param;
param.push_back( IMWRITE_JPEG_SAMPLING_FACTOR );
param.push_back( sampling_factor_list[i] );
EXPECT_EQ( default_sampling_factor, test_jpeg_subsampling(src, param) );
}
}
#endif // HAVE_JPEG
}} // namespace

View File

@ -1633,7 +1633,7 @@ bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step,
img_convert_ctx,
sw_picture->data,
sw_picture->linesize,
0, context->coded_height,
0, sw_picture->height,
rgb_picture.data,
rgb_picture.linesize
);

View File

@ -0,0 +1,82 @@
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <iostream>
#include <vector>
using namespace std;
using namespace cv;
int main(int /*argc*/, const char** /* argv */ )
{
Mat framebuffer( 160 * 2, 160 * 5, CV_8UC3, cv::Scalar::all(255) );
Mat img( 160, 160, CV_8UC3, cv::Scalar::all(255) );
// Create test image.
{
const Point center( img.rows / 2 , img.cols /2 );
for( int radius = 5; radius < img.rows ; radius += 3.5 )
{
cv::circle( img, center, radius, Scalar(255,0,255) );
}
cv::rectangle( img, Point(0,0), Point(img.rows-1, img.cols-1), Scalar::all(0), 2 );
}
// Draw original image(s).
int top = 0; // Upper images
{
for( int left = 0 ; left < img.rows * 5 ; left += img.rows ){
Mat roi = framebuffer( Rect( left, top, img.rows, img.cols ) );
img.copyTo(roi);
cv::putText( roi, "original", Point(5,15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar::all(0), 2, 4, false );
}
}
// Draw lossy images
top += img.cols; // Lower images
{
struct test_config{
string comment;
uint32_t sampling_factor;
} config [] = {
{ "411", IMWRITE_JPEG_SAMPLING_FACTOR_411 },
{ "420", IMWRITE_JPEG_SAMPLING_FACTOR_420 },
{ "422", IMWRITE_JPEG_SAMPLING_FACTOR_422 },
{ "440", IMWRITE_JPEG_SAMPLING_FACTOR_440 },
{ "444", IMWRITE_JPEG_SAMPLING_FACTOR_444 },
};
const int config_num = 5;
int left = 0;
for ( int i = 0 ; i < config_num; i++ )
{
// Compress images with sampling factor parameter.
vector<int> param;
param.push_back( IMWRITE_JPEG_SAMPLING_FACTOR );
param.push_back( config[i].sampling_factor );
vector<uint8_t> jpeg;
(void) imencode(".jpg", img, jpeg, param );
// Decompress it.
Mat jpegMat(jpeg);
Mat lossy_img = imdecode(jpegMat, -1);
// Copy into framebuffer and comment
Mat roi = framebuffer( Rect( left, top, lossy_img.rows, lossy_img.cols ) );
lossy_img.copyTo(roi);
cv::putText( roi, config[i].comment, Point(5,155), FONT_HERSHEY_SIMPLEX, 0.5, Scalar::all(0), 2, 4, false );
left += lossy_img.rows;
}
}
// Output framebuffer(as lossless).
imwrite( "imgcodecs_jpeg_samplingfactor_result.png", framebuffer );
return 0;
}

View File

@ -109,9 +109,18 @@ void poseEstimationFromCoplanarPoints(const string &imgPath, const string &intri
//! [polar-decomposition-of-the-rotation-matrix]
cout << "R (before polar decomposition):\n" << R << "\ndet(R): " << determinant(R) << endl;
Mat W, U, Vt;
Mat_<double> W, U, Vt;
SVDecomp(R, W, U, Vt);
R = U*Vt;
double det = determinant(R);
if (det < 0)
{
Vt.at<double>(2,0) *= -1;
Vt.at<double>(2,1) *= -1;
Vt.at<double>(2,2) *= -1;
R = U*Vt;
}
cout << "R (after polar decomposition):\n" << R << "\ndet(R): " << determinant(R) << endl;
//! [polar-decomposition-of-the-rotation-matrix]