mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 06:03:15 +08:00
Fixed several mistakes in documentation
This commit is contained in:
parent
5f8715c8b4
commit
b1eba01afb
@ -312,7 +312,7 @@ def process_module(module, path):
|
||||
if namespace:
|
||||
name = name[len(namespace) + 1:]
|
||||
#print namespace, parent, name, fn[0]
|
||||
if not namespace and not parent and not name.startswith("cv") and not name.startswith("CV_"):
|
||||
if not namespace and not parent and not name.startswith("cv") and not name.startswith("icv") and not name.startswith("CV_"):
|
||||
logerror(ERROR_004_MISSEDNAMESPACE, "function " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention")
|
||||
else:
|
||||
fdescr = (namespace, parent, name, fn)
|
||||
|
@ -7,7 +7,7 @@ applyColorMap
|
||||
Trains a FaceRecognizer with given data and associated labels.
|
||||
|
||||
.. ocv:function:: void applyColorMap(InputArray src, OutputArray dst, int colormap)
|
||||
|
||||
|
||||
:param src: The source image, grayscale or colored does not matter.
|
||||
:param dst: The result is the colormapped source image. Note: :ocv:func:`Mat::create` is called on dst.
|
||||
:param colormap: The colormap to apply, see the list of available colormaps below.
|
||||
@ -55,10 +55,10 @@ In OpenCV 2.4 you only need :ocv:func:`applyColorMap` to apply a colormap on a g
|
||||
if (argc > 1) {
|
||||
filename = string(argv[1]);
|
||||
}
|
||||
// The following lines show how to apply a colormap on a given image
|
||||
// and show it with cv::imshow example with an image. An exception is
|
||||
// thrown if the path to the image is invalid.
|
||||
if(!filename.empty()) {
|
||||
// The following lines show how to apply a colormap on a given image
|
||||
// and show it with cv::imshow example with an image. An exception is
|
||||
// thrown if the path to the image is invalid.
|
||||
if(!filename.empty()) {
|
||||
Mat img0 = imread(filename);
|
||||
// Throw an exception, if the image can't be read:
|
||||
if(img0.empty()) {
|
||||
@ -71,9 +71,9 @@ In OpenCV 2.4 you only need :ocv:func:`applyColorMap` to apply a colormap on a g
|
||||
// Show the result:
|
||||
imshow("cm_img0", cm_img0);
|
||||
waitKey(0);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
And here are the color scales for each of the available colormaps:
|
||||
|
@ -6,7 +6,7 @@ FaceRecognizer
|
||||
FaceRecognizer
|
||||
--------------
|
||||
|
||||
.. ocv:class:: FaceRecognizer
|
||||
.. ocv:class:: FaceRecognizer : public Algorithm
|
||||
|
||||
All face recognition models in OpenCV are derived from the abstract base class :ocv:class:`FaceRecognizer`, which provides
|
||||
a unified access to all face recongition algorithms in OpenCV. ::
|
||||
@ -143,14 +143,8 @@ And finally train it on the given dataset (the face images and labels):
|
||||
FaceRecognizer::predict
|
||||
-----------------------
|
||||
|
||||
.. ocv:function:: int FaceRecognizer::predict(InputArray src) const
|
||||
|
||||
Predicts a label for a given input image.
|
||||
|
||||
:param src: Sample image to get a prediction from.
|
||||
|
||||
|
||||
.. ocv:function:: void predict(InputArray src, int &label, double &confidence) const
|
||||
.. ocv:function:: int FaceRecognizer::predict( InputArray src ) const = 0
|
||||
.. ocv:function:: void FaceRecognizer::predict( InputArray src, int & label, double & confidence ) const = 0
|
||||
|
||||
Predicts a label and associated confidence (e.g. distance) for a given input image.
|
||||
|
||||
@ -219,8 +213,8 @@ FaceRecognizer::load
|
||||
|
||||
Loads a :ocv:class:`FaceRecognizer` and its model state.
|
||||
|
||||
.. ocv:function:: void FaceRecognizer::load(const string& filename)
|
||||
.. ocv:function:: void FaceRecognizer::load(FileStorage& fs)
|
||||
.. ocv:function:: void FaceRecognizer::load( const string& filename )
|
||||
.. ocv:function:: void FaceRecognizer::load( const FileStorage& fs ) = 0
|
||||
|
||||
Loads a persisted model and state from a given XML or YAML file . Every
|
||||
:ocv:class:`FaceRecognizer` has to overwrite ``FaceRecognizer::load(FileStorage& fs)``
|
||||
|
@ -12,7 +12,7 @@ Introduction
|
||||
OpenCV 2.4 now comes with the very new :ocv:class:`FaceRecognizer` class for face recognition, so you can start experimenting with face recognition right away. This document is the guide I've wished for, when I was working myself into face recognition. It shows you how to perform face recognition with :ocv:class:`FaceRecognizer` in OpenCV (with full source code listings) and gives you an introduction into the algorithms behind. I'll also show how to create the visualizations you can find in many publications, because a lot of people asked for.
|
||||
|
||||
The currently available algorithms are:
|
||||
|
||||
|
||||
* Eigenfaces (see :ocv:func:`createEigenFaceRecognizer`)
|
||||
* Fisherfaces (see :ocv:func:`createFisherFaceRecognizer`)
|
||||
* Local Binary Patterns Histograms (see :ocv:func:`createLBPHFaceRecognizer`)
|
||||
@ -28,7 +28,7 @@ Face recognition is an easy task for humans. Experiments in [Tu06]_ have shown,
|
||||
|
||||
Face recognition based on the geometric features of a face is probably the most intuitive approach to face recognition. One of the first automated face recognition systems was described in [Kanade73]_: marker points (position of eyes, ears, nose, ...) were used to build a feature vector (distance between the points, angle between them, ...). The recognition was performed by calculating the euclidean distance between feature vectors of a probe and reference image. Such a method is robust against changes in illumination by its nature, but has a huge drawback: the accurate registration of the marker points is complicated, even with state of the art algorithms. Some of the latest work on geometric face recognition was carried out in [Bru92]_. A 22-dimensional feature vector was used and experiments on large datasets have shown, that geometrical features alone my not carry enough information for face recognition.
|
||||
|
||||
The Eigenfaces method described in [TP91]_ took a holistic approach to face recognition: A facial image is a point from a high-dimensional image space and a lower-dimensional representation is found, where classification becomes easy. The lower-dimensional subspace is found with Principal Component Analysis, which identifies the axes with maximum variance. While this kind of transformation is optimal from a reconstruction standpoint, it doesn't take any class labels into account. Imagine a situation where the variance is generated from external sources, let it be light. The axes with maximum variance do not necessarily contain any discriminative information at all, hence a classification becomes impossible. So a class-specific projection with a Linear Discriminant Analysis was applied to face recognition in [BHK97]_. The basic idea is to minimize the variance within a class, while maximizing the variance between the classes at the same time.
|
||||
The Eigenfaces method described in [TP91]_ took a holistic approach to face recognition: A facial image is a point from a high-dimensional image space and a lower-dimensional representation is found, where classification becomes easy. The lower-dimensional subspace is found with Principal Component Analysis, which identifies the axes with maximum variance. While this kind of transformation is optimal from a reconstruction standpoint, it doesn't take any class labels into account. Imagine a situation where the variance is generated from external sources, let it be light. The axes with maximum variance do not necessarily contain any discriminative information at all, hence a classification becomes impossible. So a class-specific projection with a Linear Discriminant Analysis was applied to face recognition in [BHK97]_. The basic idea is to minimize the variance within a class, while maximizing the variance between the classes at the same time.
|
||||
|
||||
Recently various methods for a local feature extraction emerged. To avoid the high-dimensionality of the input data only local regions of an image are described, the extracted features are (hopefully) more robust against partial occlusion, illumation and small sample size. Algorithms used for a local feature extraction are Gabor Wavelets ([Wiskott97]_), Discrete Cosinus Transform ([Messer06]_) and Local Binary Patterns ([AHP04]_). It's still an open research question what's the best way to preserve spatial information when applying a local feature extraction, because spatial information is potentially useful information.
|
||||
|
||||
@ -38,25 +38,25 @@ Face Database
|
||||
Let's get some data to experiment with first. I don't want to do a toy example here. We are doing face recognition, so you'll need some face images! You can either create your own dataset or start with one of the available face databases, `http://face-rec.org/databases/ <http://face-rec.org/databases>`_ gives you an up-to-date overview. Three interesting databases are (parts of the description are quoted from `http://face-rec.org <http://face-rec.org>`_):
|
||||
|
||||
* `AT&T Facedatabase <http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html>`_ The AT&T Facedatabase, sometimes also referred to as *ORL Database of Faces*, contains ten different images of each of 40 distinct subjects. For some subjects, the images were taken at different times, varying the lighting, facial expressions (open / closed eyes, smiling / not smiling) and facial details (glasses / no glasses). All the images were taken against a dark homogeneous background with the subjects in an upright, frontal position (with tolerance for some side movement).
|
||||
|
||||
* `Yale Facedatabase A <http://cvc.yale.edu/projects/yalefaces/yalefaces.html>`_ The AT&T Facedatabase is good for initial tests, but it's a fairly easy database. The Eigenfaces method already has a 97% recognition rate, so you won't see any improvements with other algorithms. The Yale Facedatabase A is a more appropriate dataset for initial experiments, because the recognition problem is harder. The database consists of 15 people (14 male, 1 female) each with 11 grayscale images sized :math:`320 \times 243` pixel. There are changes in the light conditions (center light, left light, right light), facial expressions (happy, normal, sad, sleepy, surprised, wink) and glasses (glasses, no-glasses).
|
||||
|
||||
* `Yale Facedatabase A <http://cvc.yale.edu/projects/yalefaces/yalefaces.html>`_ The AT&T Facedatabase is good for initial tests, but it's a fairly easy database. The Eigenfaces method already has a 97% recognition rate, so you won't see any improvements with other algorithms. The Yale Facedatabase A is a more appropriate dataset for initial experiments, because the recognition problem is harder. The database consists of 15 people (14 male, 1 female) each with 11 grayscale images sized :math:`320 \times 243` pixel. There are changes in the light conditions (center light, left light, right light), facial expressions (happy, normal, sad, sleepy, surprised, wink) and glasses (glasses, no-glasses).
|
||||
|
||||
Bad news is it's not available for public download anymore, because the original server seems to be down. You can find some sites mirroring it (`like the MIT <http://vismod.media.mit.edu/vismod/classes/mas622-00/datasets/>`_), but I can't make guarantees about the integrity. If you need to crop and align the images yourself, read my notes at `bytefish.de/blog/fisherfaces <http://bytefish.de/blog/fisherfaces>`_.
|
||||
|
||||
|
||||
* `Extended Yale Facedatabase B <http://vision.ucsd.edu/~leekc/ExtYaleDatabase/ExtYaleB.html>`_ The Extended Yale Facedatabase B contains 2414 images of 38 different people in its cropped version. The focus of this database is set on extracting features that are robust to illumination, the images have almost no variation in emotion/occlusion/... . I personally think, that this dataset is too large for the experiments I perform in this document. You better use the `AT&T Facedatabase <http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html>`_ for intial testing. A first version of the Yale Facedatabase B was used in [BHK97]_ to see how the Eigenfaces and Fisherfaces method perform under heavy illumination changes. [Lee05]_ used the same setup to take 16128 images of 28 people. The Extended Yale Facedatabase B is the merge of the two databases, which is now known as Extended Yalefacedatabase B.
|
||||
|
||||
Preparing the data
|
||||
-------------------
|
||||
|
||||
Once we have acquired some data, we'll need to read it in our program. In the demo I have decided to read the images from a very simple CSV file. Why? Because it's the simplest platform-independent approach I can think of. However, if you know a simpler solution please ping me about it. Basically all the CSV file needs to contain are lines composed of a ``filename`` followed by a ``;`` followed by the ``label`` (as *integer number*), making up a line like this:
|
||||
Once we have acquired some data, we'll need to read it in our program. In the demo I have decided to read the images from a very simple CSV file. Why? Because it's the simplest platform-independent approach I can think of. However, if you know a simpler solution please ping me about it. Basically all the CSV file needs to contain are lines composed of a ``filename`` followed by a ``;`` followed by the ``label`` (as *integer number*), making up a line like this:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
/path/to/image.ext;0
|
||||
|
||||
|
||||
Let's dissect the line. ``/path/to/image.ext`` is the path to an image, probably something like this if you are in Windows: ``C:/faces/person0/image0.jpg``. Then there is the separator ``;`` and finally we assign the label ``0`` to the image. Think of the label as the subject (the person) this image belongs to, so same subjects (persons) should have the same label.
|
||||
|
||||
Download the AT&T Facedatabase from AT&T Facedatabase and the corresponding CSV file from at.txt, which looks like this (file is without ... of course):
|
||||
Download the AT&T Facedatabase from AT&T Facedatabase and the corresponding CSV file from at.txt, which looks like this (file is without ... of course):
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
@ -69,12 +69,12 @@ Download the AT&T Facedatabase from AT&T Facedatabase and the corresponding CSV
|
||||
./at/s40/1.pgm;39
|
||||
./at/s40/2.pgm;39
|
||||
|
||||
Imagine I have extracted the files to ``D:/data/at`` and have downloaded the CSV file to ``D:/data/at.txt``. Then you would simply need to Search & Replace ``./`` with ``D:/data/``. You can do that in an editor of your choice, every sufficiently advanced editor can do this. Once you have a CSV file with valid filenames and labels, you can run any of the demos by passing the path to the CSV file as parameter:
|
||||
Imagine I have extracted the files to ``D:/data/at`` and have downloaded the CSV file to ``D:/data/at.txt``. Then you would simply need to Search & Replace ``./`` with ``D:/data/``. You can do that in an editor of your choice, every sufficiently advanced editor can do this. Once you have a CSV file with valid filenames and labels, you can run any of the demos by passing the path to the CSV file as parameter:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
facerec_demo.exe D:/data/at.txt
|
||||
|
||||
|
||||
Creating the CSV File
|
||||
+++++++++++++++++++++
|
||||
|
||||
@ -97,8 +97,8 @@ You don't really want to create the CSV file by hand. I have prepared you a litt
|
||||
| |-- 1.pgm
|
||||
| |-- ...
|
||||
| |-- 10.pgm
|
||||
|
||||
|
||||
|
||||
|
||||
Then simply call create_csv.py with the path to the folder, just like this and you could save the output:
|
||||
|
||||
.. code-block:: none
|
||||
@ -137,38 +137,38 @@ Let :math:`X = \{ x_{1}, x_{2}, \ldots, x_{n} \}` be a random vector with observ
|
||||
1. Compute the mean :math:`\mu`
|
||||
|
||||
.. math::
|
||||
|
||||
|
||||
\mu = \frac{1}{n} \sum_{i=1}^{n} x_{i}
|
||||
|
||||
|
||||
2. Compute the the Covariance Matrix `S`
|
||||
|
||||
.. math::
|
||||
|
||||
|
||||
S = \frac{1}{n} \sum_{i=1}^{n} (x_{i} - \mu) (x_{i} - \mu)^{T}`
|
||||
|
||||
|
||||
3. Compute the eigenvalues :math:`\lambda_{i}` and eigenvectors :math:`v_{i}` of :math:`S`
|
||||
|
||||
.. math::
|
||||
|
||||
.. math::
|
||||
|
||||
S v_{i} = \lambda_{i} v_{i}, i=1,2,\ldots,n
|
||||
|
||||
|
||||
4. Order the eigenvectors descending by their eigenvalue. The :math:`k` principal components are the eigenvectors corresponding to the :math:`k` largest eigenvalues.
|
||||
|
||||
The :math:`k` principal components of the observed vector :math:`x` are then given by:
|
||||
|
||||
.. math::
|
||||
|
||||
y = W^{T} (x - \mu)
|
||||
y = W^{T} (x - \mu)
|
||||
|
||||
|
||||
where :math:`W = (v_{1}, v_{2}, \ldots, v_{k})`.
|
||||
where :math:`W = (v_{1}, v_{2}, \ldots, v_{k})`.
|
||||
|
||||
The reconstruction from the PCA basis is given by:
|
||||
|
||||
.. math::
|
||||
|
||||
x = W y + \mu
|
||||
|
||||
x = W y + \mu
|
||||
|
||||
where :math:`W = (v_{1}, v_{2}, \ldots, v_{k})`.
|
||||
|
||||
|
||||
@ -182,14 +182,14 @@ Still there's one problem left to solve. Imagine we are given :math:`400` images
|
||||
|
||||
.. math::
|
||||
|
||||
X^{T} X v_{i} = \lambda_{i} v{i}
|
||||
X^{T} X v_{i} = \lambda_{i} v{i}
|
||||
|
||||
|
||||
and get the original eigenvectors of :math:`S = X X^{T}` with a left multiplication of the data matrix:
|
||||
|
||||
.. math::
|
||||
|
||||
X X^{T} (X v_{i}) = \lambda_{i} (X v_{i})
|
||||
X X^{T} (X v_{i}) = \lambda_{i} (X v_{i})
|
||||
|
||||
The resulting eigenvectors are orthogonal, to get orthonormal eigenvectors they need to be normalized to unit length. I don't want to turn this into a publication, so please look into [Duda01]_ for the derivation and proof of the equations.
|
||||
|
||||
@ -206,7 +206,7 @@ I've used the jet colormap, so you can see how the grayscale values are distribu
|
||||
|
||||
.. image:: img/eigenfaces_opencv.png
|
||||
:align: center
|
||||
|
||||
|
||||
We've already seen, that we can reconstruct a face from its lower dimensional approximation. So let's see how many Eigenfaces are needed for a good reconstruction. I'll do a subplot with :math:`10,30,\ldots,310` Eigenfaces:
|
||||
|
||||
.. code-block:: cpp
|
||||
@ -238,7 +238,7 @@ Fisherfaces
|
||||
|
||||
The Principal Component Analysis (PCA), which is the core of the Eigenfaces method, finds a linear combination of features that maximizes the total variance in data. While this is clearly a powerful way to represent data, it doesn't consider any classes and so a lot of discriminative information *may* be lost when throwing components away. Imagine a situation where the variance in your data is generated by an external source, let it be the light. The components identified by a PCA do not necessarily contain any discriminative information at all, so the projected samples are smeared together and a classification becomes impossible (see `http://www.bytefish.de/wiki/pca_lda_with_gnu_octave <http://www.bytefish.de/wiki/pca_lda_with_gnu_octave>`_ for an example).
|
||||
|
||||
The Linear Discriminant Analysis performs a class-specific dimensionality reduction and was invented by the great statistician `Sir R. A. Fisher <http://en.wikipedia.org/wiki/Ronald_Fisher>`_. He successfully used it for classifying flowers in his 1936 paper *The use of multiple measurements in taxonomic problems* [Fisher36]_. In order to find the combination of features that separates best between classes the Linear Discriminant Analysis maximizes the ratio of between-classes to within-classes scatter, instead of maximizing the overall scatter. The idea is simple: same classes should cluster tightly together, while different classes are as far away as possible from each other in the lower-dimensional representation. This was also recognized by `Belhumeur <http://www.cs.columbia.edu/~belhumeur/>`_, `Hespanha <http://www.ece.ucsb.edu/~hespanha/>`_ and `Kriegman <http://cseweb.ucsd.edu/~kriegman/>`_ and so they applied a Discriminant Analysis to face recognition in [BHK97]_.
|
||||
The Linear Discriminant Analysis performs a class-specific dimensionality reduction and was invented by the great statistician `Sir R. A. Fisher <http://en.wikipedia.org/wiki/Ronald_Fisher>`_. He successfully used it for classifying flowers in his 1936 paper *The use of multiple measurements in taxonomic problems* [Fisher36]_. In order to find the combination of features that separates best between classes the Linear Discriminant Analysis maximizes the ratio of between-classes to within-classes scatter, instead of maximizing the overall scatter. The idea is simple: same classes should cluster tightly together, while different classes are as far away as possible from each other in the lower-dimensional representation. This was also recognized by `Belhumeur <http://www.cs.columbia.edu/~belhumeur/>`_, `Hespanha <http://www.ece.ucsb.edu/~hespanha/>`_ and `Kriegman <http://cseweb.ucsd.edu/~kriegman/>`_ and so they applied a Discriminant Analysis to face recognition in [BHK97]_.
|
||||
|
||||
Algorithmic Description
|
||||
-----------------------
|
||||
@ -288,7 +288,7 @@ Following [BHK97]_, a solution for this optimization problem is given by solving
|
||||
|
||||
.. math::
|
||||
:nowrap:
|
||||
|
||||
|
||||
\begin{align*}
|
||||
S_{B} v_{i} & = & \lambda_{i} S_w v_{i} \nonumber \\
|
||||
S_{W}^{-1} S_{B} v_{i} & = & \lambda_{i} v_{i}
|
||||
@ -323,7 +323,7 @@ For this example I am going to use the Yale Facedatabase A, just because the plo
|
||||
|
||||
.. image:: img/fisherfaces_opencv.png
|
||||
:align: center
|
||||
|
||||
|
||||
The Fisherfaces method learns a class-specific transformation matrix, so the they do not capture illumination as obviously as the Eigenfaces method. The Discriminant Analysis instead finds the facial features to discriminate between the persons. It's important to mention, that the performance of the Fisherfaces heavily depends on the input data as well. Practically said: if you learn the Fisherfaces for well-illuminated pictures only and you try to recognize faces in bad-illuminated scenes, then method is likely to find the wrong components (just because those features may not be predominant on bad illuminated images). This is somewhat logical, since the method had no chance to learn the illumination.
|
||||
|
||||
The Fisherfaces allow a reconstruction of the projected image, just like the Eigenfaces did. But since we only identified the features to distinguish between subjects, you can't expect a nice reconstruction of the original image. For the Fisherfaces method we'll project the sample image onto each of the Fisherfaces instead. So you'll have a nice visualization, which feature each of the Fisherfaces describes:
|
||||
@ -354,7 +354,7 @@ The differences may be subtle for the human eyes, but you should be able to see
|
||||
|
||||
Local Binary Patterns Histograms
|
||||
================================
|
||||
|
||||
|
||||
Eigenfaces and Fisherfaces take a somewhat holistic approach to face recognition. You treat your data as a vector somewhere in a high-dimensional image space. We all know high-dimensionality is bad, so a lower-dimensional subspace is identified, where (probably) useful information is preserved. The Eigenfaces approach maximizes the total scatter, which can lead to problems if the variance is generated by an external source, because components with a maximum variance over all classes aren't necessarily useful for classification (see `http://www.bytefish.de/wiki/pca_lda_with_gnu_octave <http://www.bytefish.de/wiki/pca_lda_with_gnu_octave>`_). So to preserve some discriminative information we applied a Linear Discriminant Analysis and optimized as described in the Fisherfaces method. The Fisherfaces method worked great... at least for the constrained scenario we've assumed in our model.
|
||||
|
||||
Now real life isn't perfect. You simply can't guarantee perfect light settings in your images or 10 different images of a person. So what if there's only one image for each person? Our covariance estimates for the subspace *may* be horribly wrong, so will the recognition. Remember the Eigenfaces method had a 96% recognition rate on the AT&T Facedatabase? How many images do we actually need to get such useful estimates? Here are the Rank-1 recognition rates of the Eigenfaces and Fisherfaces method on the AT&T Facedatabase, which is a fairly easy image database:
|
||||
@ -362,10 +362,10 @@ Now real life isn't perfect. You simply can't guarantee perfect light settings i
|
||||
.. image:: img/at_database_small_sample_size.png
|
||||
:scale: 60%
|
||||
:align: center
|
||||
|
||||
|
||||
So in order to get good recognition rates you'll need at least 8(+-1) images for each person and the Fisherfaces method doesn't really help here. The above experiment is a 10-fold cross validated result carried out with the facerec framework at: `https://github.com/bytefish/facerec <https://github.com/bytefish/facerec>`_. This is not a publication, so I won't back these figures with a deep mathematical analysis. Please have a look into [KM01]_ for a detailed analysis of both methods, when it comes to small training datasets.
|
||||
|
||||
So some research concentrated on extracting local features from images. The idea is to not look at the whole image as a high-dimensional vector, but describe only local features of an object. The features you extract this way will have a low-dimensionality implicitly. A fine idea! But you'll soon observe the image representation we are given doesn't only suffer from illumination variations. Think of things like scale, translation or rotation in images - your local description has to be at least a bit robust against those things. Just like :ocv:class:`SIFT`, the Local Binary Patterns methodology has its roots in 2D texture analysis. The basic idea of Local Binary Patterns is to summarize the local structure in an image by comparing each pixel with its neighborhood. Take a pixel as center and threshold its neighbors against. If the intensity of the center pixel is greater-equal its neighbor, then denote it with 1 and 0 if not. You'll end up with a binary number for each pixel, just like 11001111. So with 8 surrounding pixels you'll end up with 2^8 possible combinations, called *Local Binary Patterns* or sometimes referred to as *LBP codes*. The first LBP operator described in literature actually used a fixed 3 x 3 neighborhood just like this:
|
||||
So some research concentrated on extracting local features from images. The idea is to not look at the whole image as a high-dimensional vector, but describe only local features of an object. The features you extract this way will have a low-dimensionality implicitly. A fine idea! But you'll soon observe the image representation we are given doesn't only suffer from illumination variations. Think of things like scale, translation or rotation in images - your local description has to be at least a bit robust against those things. Just like :ocv:class:`SIFT`, the Local Binary Patterns methodology has its roots in 2D texture analysis. The basic idea of Local Binary Patterns is to summarize the local structure in an image by comparing each pixel with its neighborhood. Take a pixel as center and threshold its neighbors against. If the intensity of the center pixel is greater-equal its neighbor, then denote it with 1 and 0 if not. You'll end up with a binary number for each pixel, just like 11001111. So with 8 surrounding pixels you'll end up with 2^8 possible combinations, called *Local Binary Patterns* or sometimes referred to as *LBP codes*. The first LBP operator described in literature actually used a fixed 3 x 3 neighborhood just like this:
|
||||
|
||||
.. image:: img/lbp/lbp.png
|
||||
:scale: 80%
|
||||
@ -384,16 +384,16 @@ A more formal description of the LBP operator can be given as:
|
||||
|
||||
.. math::
|
||||
:nowrap:
|
||||
|
||||
|
||||
\begin{equation}
|
||||
s(x) =
|
||||
\begin{cases}
|
||||
s(x) =
|
||||
\begin{cases}
|
||||
1 & \text{if $x \geq 0$}\\
|
||||
0 & \text{else}
|
||||
\end{cases}
|
||||
\end{equation}
|
||||
|
||||
This description enables you to capture very fine grained details in images. In fact the authors were able to compete with state of the art results for texture classification. Soon after the operator was published it was noted, that a fixed neighborhood fails to encode details differing in scale. So the operator was extended to use a variable neighborhood in [AHP04]_. The idea is to align an abritrary number of neighbors on a circle with a variable radius, which enables to capture the following neighborhoods:
|
||||
This description enables you to capture very fine grained details in images. In fact the authors were able to compete with state of the art results for texture classification. Soon after the operator was published it was noted, that a fixed neighborhood fails to encode details differing in scale. So the operator was extended to use a variable neighborhood in [AHP04]_. The idea is to align an abritrary number of neighbors on a circle with a variable radius, which enables to capture the following neighborhoods:
|
||||
|
||||
.. image:: img/lbp/patterns.png
|
||||
:scale: 80%
|
||||
@ -407,7 +407,7 @@ For a given Point :math:`(x_c,y_c)` the position of the neighbor :math:`(x_p,y_p
|
||||
\begin{align*}
|
||||
x_{p} & = & x_c + R \cos({\frac{2\pi p}{P}})\\
|
||||
y_{p} & = & y_c - R \sin({\frac{2\pi p}{P}})
|
||||
\end{align*}
|
||||
\end{align*}
|
||||
|
||||
Where :math:`R` is the radius of the circle and :math:`P` is the number of sample points.
|
||||
|
||||
@ -415,14 +415,14 @@ The operator is an extension to the original LBP codes, so it's sometimes called
|
||||
|
||||
.. math::
|
||||
:nowrap:
|
||||
|
||||
|
||||
\begin{align*}
|
||||
f(x,y) \approx \begin{bmatrix}
|
||||
1-x & x \end{bmatrix} \begin{bmatrix}
|
||||
f(0,0) & f(0,1) \\
|
||||
f(1,0) & f(1,1) \end{bmatrix} \begin{bmatrix}
|
||||
1-y \\
|
||||
y \end{bmatrix}.
|
||||
1-x & x \end{bmatrix} \begin{bmatrix}
|
||||
f(0,0) & f(0,1) \\
|
||||
f(1,0) & f(1,1) \end{bmatrix} \begin{bmatrix}
|
||||
1-y \\
|
||||
y \end{bmatrix}.
|
||||
\end{align*}
|
||||
|
||||
By definition the LBP operator is robust against monotonic gray scale transformations. We can easily verify this by looking at the LBP image of an artificially modified image (so you see what an LBP image looks like!):
|
||||
@ -448,7 +448,7 @@ You've learned how to use the new :ocv:class:`FaceRecognizer` in real applicatio
|
||||
Credits
|
||||
=======
|
||||
|
||||
This document wouldn't be possible without the kind permission to use the face images of the *AT&T Database of Faces* and the *Yale Facedatabase A/B*.
|
||||
This document wouldn't be possible without the kind permission to use the face images of the *AT&T Database of Faces* and the *Yale Facedatabase A/B*.
|
||||
|
||||
The Database of Faces
|
||||
---------------------
|
||||
@ -477,7 +477,7 @@ Yale Facedatabase B
|
||||
|
||||
The extended Yale Face Database B contains 16128 images of 28 human subjects under 9 poses and 64 illumination conditions. The data format of this database is the same as the Yale Face Database B. Please refer to the homepage of the Yale Face Database B (or one copy of this page) for more detailed information of the data format.
|
||||
|
||||
You are free to use the extended Yale Face Database B for research purposes. All publications which use this database should acknowledge the use of "the Exteded Yale Face Database B" and reference Athinodoros Georghiades, Peter Belhumeur, and David Kriegman's paper, "From Few to Many: Illumination Cone Models for Face Recognition under Variable Lighting and Pose", PAMI, 2001, `[bibtex] <http://vision.ucsd.edu/~leekc/ExtYaleDatabase/athosref.html>`_.
|
||||
You are free to use the extended Yale Face Database B for research purposes. All publications which use this database should acknowledge the use of "the Exteded Yale Face Database B" and reference Athinodoros Georghiades, Peter Belhumeur, and David Kriegman's paper, "From Few to Many: Illumination Cone Models for Face Recognition under Variable Lighting and Pose", PAMI, 2001, `[bibtex] <http://vision.ucsd.edu/~leekc/ExtYaleDatabase/athosref.html>`_.
|
||||
|
||||
The extended database as opposed to the original Yale Face Database B with 10 subjects was first reported by Kuang-Chih Lee, Jeffrey Ho, and David Kriegman in "Acquiring Linear Subspaces for Face Recognition under Variable Lighting, PAMI, May, 2005 `[pdf] <http://vision.ucsd.edu/~leekc/papers/9pltsIEEE.pdf>`_." All test image data used in the experiments are manually aligned, cropped, and then re-sized to 168x192 images. If you publish your experimental results with the cropped images, please reference the PAMI2005 paper as well. (Source: `http://vision.ucsd.edu/~leekc/ExtYaleDatabase/ExtYaleB.html <http://vision.ucsd.edu/~leekc/ExtYaleDatabase/ExtYaleB.html>`_)
|
||||
|
||||
@ -543,8 +543,8 @@ You don't really want to create the CSV file by hand. I have prepared you a litt
|
||||
| |-- 1.pgm
|
||||
| |-- ...
|
||||
| |-- 10.pgm
|
||||
|
||||
|
||||
|
||||
|
||||
Then simply call ``create_csv.py`` with the path to the folder, just like this and you could save the output:
|
||||
|
||||
.. code-block:: none
|
||||
@ -573,7 +573,7 @@ Here is the script, if you can't find it:
|
||||
.. literalinclude:: ./src/create_csv.py
|
||||
:language: python
|
||||
:linenos:
|
||||
|
||||
|
||||
Aligning Face Images
|
||||
---------------------
|
||||
|
||||
@ -590,9 +590,9 @@ If you are using the same *offset_pct* and *dest_sz* for your images, they are a
|
||||
:language: python
|
||||
:linenos:
|
||||
|
||||
Imagine we are given `this photo of Arnold Schwarzenegger <http://en.wikipedia.org/wiki/File:Arnold_Schwarzenegger_edit%28ws%29.jpg>`_, which is under a Public Domain license. The (x,y)-position of the eyes is approximately *(252,364)* for the left and *(420,366)* for the right eye. Now you only need to define the horizontal offset, vertical offset and the size your scaled, rotated & cropped face should have.
|
||||
Imagine we are given `this photo of Arnold Schwarzenegger <http://en.wikipedia.org/wiki/File:Arnold_Schwarzenegger_edit%28ws%29.jpg>`_, which is under a Public Domain license. The (x,y)-position of the eyes is approximately *(252,364)* for the left and *(420,366)* for the right eye. Now you only need to define the horizontal offset, vertical offset and the size your scaled, rotated & cropped face should have.
|
||||
|
||||
Here are some examples:
|
||||
Here are some examples:
|
||||
|
||||
+---------------------------------+----------------------------------------------------------------------------+
|
||||
| Configuration | Cropped, Scaled, Rotated Face |
|
||||
@ -612,5 +612,5 @@ CSV for the AT&T Facedatabase
|
||||
.. literalinclude:: etc/at.txt
|
||||
:language: none
|
||||
:linenos:
|
||||
|
||||
|
||||
|
||||
|
@ -931,7 +931,7 @@ namespace cv
|
||||
virtual int predict(InputArray src) const = 0;
|
||||
|
||||
// Predicts the label and confidence for a given sample.
|
||||
CV_WRAP virtual void predict(InputArray src, CV_OUT int &label, CV_OUT double &dist) const = 0;
|
||||
CV_WRAP virtual void predict(InputArray src, CV_OUT int &label, CV_OUT double &confidence) const = 0;
|
||||
|
||||
// Serializes this object to a given filename.
|
||||
CV_WRAP virtual void save(const string& filename) const;
|
||||
@ -970,7 +970,7 @@ namespace cv
|
||||
|
||||
CV_EXPORTS_W void applyColorMap(InputArray src, OutputArray dst, int colormap);
|
||||
|
||||
CV_EXPORTS_W bool initModule_contrib();
|
||||
CV_EXPORTS bool initModule_contrib();
|
||||
}
|
||||
|
||||
#include "opencv2/contrib/retina.hpp"
|
||||
|
@ -968,7 +968,7 @@ Smoothes an image using a Gaussian filter.
|
||||
|
||||
.. ocv:function:: void GaussianBlur( InputArray src, OutputArray dst, Size ksize, double sigmaX, double sigmaY=0, int borderType=BORDER_DEFAULT )
|
||||
|
||||
.. ocv:pyfunction:: cv2.GaussianBlur(src, ksize, sigma1[, dst[, sigma2[, borderType]]]) -> dst
|
||||
.. ocv:pyfunction:: cv2.GaussianBlur(src, ksize, sigmaX[, dst[, sigmaY[, borderType]]]) -> dst
|
||||
|
||||
:param src: Source image. The image can have any number of channels, which are processed independently. The depth should be ``CV_8U``, ``CV_16U``, ``CV_16S``, ``CV_32F`` or ``CV_64F``.
|
||||
|
||||
|
@ -47,6 +47,8 @@ params_mapping = {
|
||||
}
|
||||
}
|
||||
|
||||
known_text_sections_names = ["Appendix", "Results", "Prerequisites", "Introduction", "Description"]
|
||||
|
||||
class DeclarationParser(object):
|
||||
def __init__(self, line=None):
|
||||
if line is None:
|
||||
@ -130,7 +132,7 @@ class RstParser(object):
|
||||
for root, dirs, files in os.walk(os.path.join(module_path,"doc")):
|
||||
for filename in fnmatch.filter(files, "*.rst"):
|
||||
doclist.append(os.path.join(root, filename))
|
||||
|
||||
|
||||
for doc in doclist:
|
||||
self.parse_rst_file(module_name, doc)
|
||||
|
||||
@ -146,7 +148,7 @@ class RstParser(object):
|
||||
self.sections_total += 1
|
||||
# skip sections having whitespace in name
|
||||
#if section_name.find(" ") >= 0 and section_name.find("::operator") < 0:
|
||||
if section_name.find(" ") >= 0 and not bool(re.match(r"(\w+::)*operator\s*(\w+|>>|<<|\(\)|->|\+\+|--|=|==|\+=|-=)", section_name)):
|
||||
if (section_name.find(" ") >= 0 and not bool(re.match(r"(\w+::)*operator\s*(\w+|>>|<<|\(\)|->|\+\+|--|=|==|\+=|-=)", section_name)) ) or section_name.endswith(":"):
|
||||
if show_errors:
|
||||
print >> sys.stderr, "RST parser warning W%03d: SKIPPED: \"%s\" File: %s:%s" % (WARNING_002_HDRWHITESPACE, section_name, file_name, lineno)
|
||||
self.sections_skipped += 1
|
||||
@ -306,7 +308,11 @@ class RstParser(object):
|
||||
if verbose:
|
||||
self.print_info(func)
|
||||
elif func:
|
||||
if show_errors:
|
||||
if func["name"] in known_text_sections_names:
|
||||
if show_errors:
|
||||
print >> sys.stderr, "RST parser warning W%03d: SKIPPED: \"%s\" File: %s:%s" % (WARNING_002_HDRWHITESPACE, section_name, file_name, lineno)
|
||||
self.sections_skipped += 1
|
||||
elif show_errors:
|
||||
self.print_info(func, True, sys.stderr)
|
||||
|
||||
def parse_rst_file(self, module_name, doc):
|
||||
@ -336,7 +342,7 @@ class RstParser(object):
|
||||
continue
|
||||
|
||||
ll = l.rstrip()
|
||||
if len(prev_line) > 0 and len(ll) >= len(prev_line) and ll == "-" * len(ll):
|
||||
if len(prev_line) > 0 and len(ll) >= len(prev_line) and (ll == "-" * len(ll) or ll == "+" * len(ll)):
|
||||
# new function candidate
|
||||
if len(lines) > 1:
|
||||
self.parse_section_safe(module_name, fname, doc, flineno, lines[:len(lines)-1])
|
||||
|
Loading…
Reference in New Issue
Block a user