From 4ccbd445591303bde1fbf77ca13ab4cbeeb2f05d Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Mon, 1 Dec 2014 16:40:06 +0300 Subject: [PATCH] User guide converted to doxygen --- doc/CMakeLists.txt | 3 +- doc/opencv.bib | 8 + doc/user_guide/ug_features2d.markdown | 110 ++++++++ doc/user_guide/ug_highgui.markdown | 141 +++++++++++ doc/user_guide/ug_intelperc.markdown | 85 +++++++ doc/user_guide/ug_mat.markdown | 180 +++++++++++++ doc/user_guide/ug_traincascade.markdown | 323 ++++++++++++++++++++++++ doc/user_guide/user_guide.markdown | 8 + 8 files changed, 857 insertions(+), 1 deletion(-) create mode 100644 doc/user_guide/ug_features2d.markdown create mode 100644 doc/user_guide/ug_highgui.markdown create mode 100644 doc/user_guide/ug_intelperc.markdown create mode 100644 doc/user_guide/ug_mat.markdown create mode 100644 doc/user_guide/ug_traincascade.markdown create mode 100644 doc/user_guide/user_guide.markdown diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index f4b2f1765d..ebad5454ce 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -198,7 +198,8 @@ if(BUILD_DOCS AND HAVE_DOXYGEN) set(bibfile "${CMAKE_CURRENT_SOURCE_DIR}/opencv.bib") set(tutorial_path "${CMAKE_CURRENT_SOURCE_DIR}/tutorials") set(tutorial_py_path "${CMAKE_CURRENT_SOURCE_DIR}/py_tutorials") - string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INPUT_LIST "${rootfile} ; ${paths_include} ; ${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path}") + set(user_guide_path "${CMAKE_CURRENT_SOURCE_DIR}/user_guide") + string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INPUT_LIST "${rootfile} ; ${paths_include} ; ${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path} ; ${user_guide_path}") string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_IMAGE_PATH "${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path}") string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_EXAMPLE_PATH "${CMAKE_SOURCE_DIR}/samples ; ${paths_doc}") set(CMAKE_DOXYGEN_LAYOUT "${CMAKE_CURRENT_SOURCE_DIR}/DoxygenLayout.xml") diff --git a/doc/opencv.bib b/doc/opencv.bib index 52e5dc1b2d..067a1aa005 100644 --- a/doc/opencv.bib +++ b/doc/opencv.bib @@ -832,3 +832,11 @@ year={2013}, organization={Springer} } +@incollection{Liao2007, + title={Learning multi-scale block local binary patterns for face recognition}, + author={Liao, Shengcai and Zhu, Xiangxin and Lei, Zhen and Zhang, Lun and Li, Stan Z}, + booktitle={Advances in Biometrics}, + pages={828--837}, + year={2007}, + publisher={Springer} +} diff --git a/doc/user_guide/ug_features2d.markdown b/doc/user_guide/ug_features2d.markdown new file mode 100644 index 0000000000..25ec20ab66 --- /dev/null +++ b/doc/user_guide/ug_features2d.markdown @@ -0,0 +1,110 @@ +Features2d {#tutorial_ug_features2d} +========== + +Detectors +--------- + +Descriptors +----------- + +Matching keypoints +------------------ + +### The code + +We will start with a short sample \`opencv/samples/cpp/matcher_simple.cpp\`: + +@code{.cpp} + Mat img1 = imread(argv[1], IMREAD_GRAYSCALE); + Mat img2 = imread(argv[2], IMREAD_GRAYSCALE); + if(img1.empty() || img2.empty()) + { + printf("Can't read one of the images\n"); + return -1; + } + + // detecting keypoints + SurfFeatureDetector detector(400); + vector keypoints1, keypoints2; + detector.detect(img1, keypoints1); + detector.detect(img2, keypoints2); + + // computing descriptors + SurfDescriptorExtractor extractor; + Mat descriptors1, descriptors2; + extractor.compute(img1, keypoints1, descriptors1); + extractor.compute(img2, keypoints2, descriptors2); + + // matching descriptors + BruteForceMatcher > matcher; + vector matches; + matcher.match(descriptors1, descriptors2, matches); + + // drawing the results + namedWindow("matches", 1); + Mat img_matches; + drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches); + imshow("matches", img_matches); + waitKey(0); +@endcode + +### The code explained + +Let us break the code down. +@code{.cpp} + Mat img1 = imread(argv[1], IMREAD_GRAYSCALE); + Mat img2 = imread(argv[2], IMREAD_GRAYSCALE); + if(img1.empty() || img2.empty()) + { + printf("Can't read one of the images\n"); + return -1; + } +@endcode +We load two images and check if they are loaded correctly. +@code{.cpp} + // detecting keypoints + Ptr detector = FastFeatureDetector::create(15); + vector keypoints1, keypoints2; + detector->detect(img1, keypoints1); + detector->detect(img2, keypoints2); +@endcode +First, we create an instance of a keypoint detector. All detectors inherit the abstract +FeatureDetector interface, but the constructors are algorithm-dependent. The first argument to each +detector usually controls the balance between the amount of keypoints and their stability. The range +of values is different for different detectors (For instance, *FAST* threshold has the meaning of +pixel intensity difference and usually varies in the region *[0,40]*. *SURF* threshold is applied to +a Hessian of an image and usually takes on values larger than *100*), so use defaults in case of +doubt. +@code{.cpp} + // computing descriptors + Ptr extractor = SURF::create(); + Mat descriptors1, descriptors2; + extractor->compute(img1, keypoints1, descriptors1); + extractor->compute(img2, keypoints2, descriptors2); +@endcode +We create an instance of descriptor extractor. The most of OpenCV descriptors inherit +DescriptorExtractor abstract interface. Then we compute descriptors for each of the keypoints. The +output Mat of the DescriptorExtractor::compute method contains a descriptor in a row *i* for each +*i*-th keypoint. Note that the method can modify the keypoints vector by removing the keypoints such +that a descriptor for them is not defined (usually these are the keypoints near image border). The +method makes sure that the ouptut keypoints and descriptors are consistent with each other (so that +the number of keypoints is equal to the descriptors row count). : +@code{.cpp} + // matching descriptors + BruteForceMatcher > matcher; + vector matches; + matcher.match(descriptors1, descriptors2, matches); +@endcode +Now that we have descriptors for both images, we can match them. First, we create a matcher that for +each descriptor from image 2 does exhaustive search for the nearest descriptor in image 1 using +Euclidean metric. Manhattan distance is also implemented as well as a Hamming distance for Brief +descriptor. The output vector matches contains pairs of corresponding points indices. : +@code{.cpp} + // drawing the results + namedWindow("matches", 1); + Mat img_matches; + drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches); + imshow("matches", img_matches); + waitKey(0); +@endcode +The final part of the sample is about visualizing the matching results. diff --git a/doc/user_guide/ug_highgui.markdown b/doc/user_guide/ug_highgui.markdown new file mode 100644 index 0000000000..3213627f87 --- /dev/null +++ b/doc/user_guide/ug_highgui.markdown @@ -0,0 +1,141 @@ +HighGUI {#tutorial_ug_highgui} +======= + +Using Kinect and other OpenNI compatible depth sensors +------------------------------------------------------ + +Depth sensors compatible with OpenNI (Kinect, XtionPRO, ...) are supported through VideoCapture +class. Depth map, RGB image and some other formats of output can be retrieved by using familiar +interface of VideoCapture. + +In order to use depth sensor with OpenCV you should do the following preliminary steps: + +-# Install OpenNI library (from here ) and PrimeSensor Module + for OpenNI (from here ). The installation should be done + to default folders listed in the instructions of these products, e.g.: + @code{.text} + OpenNI: + Linux & MacOSX: + Libs into: /usr/lib + Includes into: /usr/include/ni + Windows: + Libs into: c:/Program Files/OpenNI/Lib + Includes into: c:/Program Files/OpenNI/Include + PrimeSensor Module: + Linux & MacOSX: + Bins into: /usr/bin + Windows: + Bins into: c:/Program Files/Prime Sense/Sensor/Bin + @endcode + If one or both products were installed to the other folders, the user should change + corresponding CMake variables OPENNI_LIB_DIR, OPENNI_INCLUDE_DIR or/and + OPENNI_PRIME_SENSOR_MODULE_BIN_DIR. + +-# Configure OpenCV with OpenNI support by setting WITH_OPENNI flag in CMake. If OpenNI is found + in install folders OpenCV will be built with OpenNI library (see a status OpenNI in CMake log) + whereas PrimeSensor Modules can not be found (see a status OpenNI PrimeSensor Modules in CMake + log). Without PrimeSensor module OpenCV will be successfully compiled with OpenNI library, but + VideoCapture object will not grab data from Kinect sensor. + +-# Build OpenCV. + +VideoCapture can retrieve the following data: + +-# data given from depth generator: + - CAP_OPENNI_DEPTH_MAP - depth values in mm (CV_16UC1) + - CAP_OPENNI_POINT_CLOUD_MAP - XYZ in meters (CV_32FC3) + - CAP_OPENNI_DISPARITY_MAP - disparity in pixels (CV_8UC1) + - CAP_OPENNI_DISPARITY_MAP_32F - disparity in pixels (CV_32FC1) + - CAP_OPENNI_VALID_DEPTH_MASK - mask of valid pixels (not ocluded, not shaded etc.) + (CV_8UC1) + +-# data given from RGB image generator: + - CAP_OPENNI_BGR_IMAGE - color image (CV_8UC3) + - CAP_OPENNI_GRAY_IMAGE - gray image (CV_8UC1) + +In order to get depth map from depth sensor use VideoCapture::operator \>\>, e. g. : +@code{.cpp} + VideoCapture capture( CAP_OPENNI ); + for(;;) + { + Mat depthMap; + capture >> depthMap; + + if( waitKey( 30 ) >= 0 ) + break; + } +@endcode +For getting several data maps use VideoCapture::grab and VideoCapture::retrieve, e.g. : +@code{.cpp} + VideoCapture capture(0); // or CAP_OPENNI + for(;;) + { + Mat depthMap; + Mat bgrImage; + + capture.grab(); + + capture.retrieve( depthMap, CAP_OPENNI_DEPTH_MAP ); + capture.retrieve( bgrImage, CAP_OPENNI_BGR_IMAGE ); + + if( waitKey( 30 ) >= 0 ) + break; + } +@endcode +For setting and getting some property of sensor\` data generators use VideoCapture::set and +VideoCapture::get methods respectively, e.g. : +@code{.cpp} + VideoCapture capture( CAP_OPENNI ); + capture.set( CAP_OPENNI_IMAGE_GENERATOR_OUTPUT_MODE, CAP_OPENNI_VGA_30HZ ); + cout << "FPS " << capture.get( CAP_OPENNI_IMAGE_GENERATOR+CAP_PROP_FPS ) << endl; +@endcode +Since two types of sensor's data generators are supported (image generator and depth generator), +there are two flags that should be used to set/get property of the needed generator: + +- CAP_OPENNI_IMAGE_GENERATOR -- A flag for access to the image generator properties. +- CAP_OPENNI_DEPTH_GENERATOR -- A flag for access to the depth generator properties. This flag + value is assumed by default if neither of the two possible values of the property is not set. + +Some depth sensors (for example XtionPRO) do not have image generator. In order to check it you can +get CAP_OPENNI_IMAGE_GENERATOR_PRESENT property. +@code{.cpp} +bool isImageGeneratorPresent = capture.get( CAP_PROP_OPENNI_IMAGE_GENERATOR_PRESENT ) != 0; // or == 1 +@endcode +Flags specifing the needed generator type must be used in combination with particular generator +property. The following properties of cameras available through OpenNI interfaces are supported: + +- For image generator: + + - CAP_PROP_OPENNI_OUTPUT_MODE -- Three output modes are supported: CAP_OPENNI_VGA_30HZ + used by default (image generator returns images in VGA resolution with 30 FPS), + CAP_OPENNI_SXGA_15HZ (image generator returns images in SXGA resolution with 15 FPS) and + CAP_OPENNI_SXGA_30HZ (image generator returns images in SXGA resolution with 30 FPS, the + mode is supported by XtionPRO Live); depth generator's maps are always in VGA resolution. + +- For depth generator: + + - CAP_PROP_OPENNI_REGISTRATION -- Flag that registers the remapping depth map to image map + by changing depth generator's view point (if the flag is "on") or sets this view point to + its normal one (if the flag is "off"). The registration process’s resulting images are + pixel-aligned,which means that every pixel in the image is aligned to a pixel in the depth + image. + + Next properties are available for getting only: + + - CAP_PROP_OPENNI_FRAME_MAX_DEPTH -- A maximum supported depth of Kinect in mm. + - CAP_PROP_OPENNI_BASELINE -- Baseline value in mm. + - CAP_PROP_OPENNI_FOCAL_LENGTH -- A focal length in pixels. + - CAP_PROP_FRAME_WIDTH -- Frame width in pixels. + - CAP_PROP_FRAME_HEIGHT -- Frame height in pixels. + - CAP_PROP_FPS -- Frame rate in FPS. + +- Some typical flags combinations "generator type + property" are defined as single flags: + + - CAP_OPENNI_IMAGE_GENERATOR_OUTPUT_MODE = CAP_OPENNI_IMAGE_GENERATOR + CAP_PROP_OPENNI_OUTPUT_MODE + - CAP_OPENNI_DEPTH_GENERATOR_BASELINE = CAP_OPENNI_DEPTH_GENERATOR + CAP_PROP_OPENNI_BASELINE + - CAP_OPENNI_DEPTH_GENERATOR_FOCAL_LENGTH = CAP_OPENNI_DEPTH_GENERATOR + CAP_PROP_OPENNI_FOCAL_LENGTH + - CAP_OPENNI_DEPTH_GENERATOR_REGISTRATION = CAP_OPENNI_DEPTH_GENERATOR + CAP_PROP_OPENNI_REGISTRATION + +For more information please refer to the example of usage +[openniccaptureccpp](https://github.com/Itseez/opencv/tree/master/samples/cpp/openni_capture.cpp) in +opencv/samples/cpp folder. diff --git a/doc/user_guide/ug_intelperc.markdown b/doc/user_guide/ug_intelperc.markdown new file mode 100644 index 0000000000..da81e1de0c --- /dev/null +++ b/doc/user_guide/ug_intelperc.markdown @@ -0,0 +1,85 @@ +HighGUI {#tutorial_ug_intelperc} +======= + +Using Creative Senz3D and other Intel Perceptual Computing SDK compatible depth sensors +--------------------------------------------------------------------------------------- + +Depth sensors compatible with Intel Perceptual Computing SDK are supported through VideoCapture +class. Depth map, RGB image and some other formats of output can be retrieved by using familiar +interface of VideoCapture. + +In order to use depth sensor with OpenCV you should do the following preliminary steps: + +-# Install Intel Perceptual Computing SDK (from here ). + +-# Configure OpenCV with Intel Perceptual Computing SDK support by setting WITH_INTELPERC flag in + CMake. If Intel Perceptual Computing SDK is found in install folders OpenCV will be built with + Intel Perceptual Computing SDK library (see a status INTELPERC in CMake log). If CMake process + doesn't find Intel Perceptual Computing SDK installation folder automatically, the user should + change corresponding CMake variables INTELPERC_LIB_DIR and INTELPERC_INCLUDE_DIR to the + proper value. + +-# Build OpenCV. + +VideoCapture can retrieve the following data: + +-# data given from depth generator: + - CAP_INTELPERC_DEPTH_MAP - each pixel is a 16-bit integer. The value indicates the + distance from an object to the camera's XY plane or the Cartesian depth. (CV_16UC1) + - CAP_INTELPERC_UVDEPTH_MAP - each pixel contains two 32-bit floating point values in + the range of 0-1, representing the mapping of depth coordinates to the color + coordinates. (CV_32FC2) + - CAP_INTELPERC_IR_MAP - each pixel is a 16-bit integer. The value indicates the + intensity of the reflected laser beam. (CV_16UC1) + +-# data given from RGB image generator: + - CAP_INTELPERC_IMAGE - color image. (CV_8UC3) + +In order to get depth map from depth sensor use VideoCapture::operator \>\>, e. g. : +@code{.cpp} + VideoCapture capture( CAP_INTELPERC ); + for(;;) + { + Mat depthMap; + capture >> depthMap; + + if( waitKey( 30 ) >= 0 ) + break; + } +@endcode +For getting several data maps use VideoCapture::grab and VideoCapture::retrieve, e.g. : +@code{.cpp} + VideoCapture capture(CAP_INTELPERC); + for(;;) + { + Mat depthMap; + Mat image; + Mat irImage; + + capture.grab(); + + capture.retrieve( depthMap, CAP_INTELPERC_DEPTH_MAP ); + capture.retrieve( image, CAP_INTELPERC_IMAGE ); + capture.retrieve( irImage, CAP_INTELPERC_IR_MAP); + + if( waitKey( 30 ) >= 0 ) + break; + } +@endcode +For setting and getting some property of sensor\` data generators use VideoCapture::set and +VideoCapture::get methods respectively, e.g. : +@code{.cpp} + VideoCapture capture( CAP_INTELPERC ); + capture.set( CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, 0 ); + cout << "FPS " << capture.get( CAP_INTELPERC_DEPTH_GENERATOR+CAP_PROP_FPS ) << endl; +@endcode +Since two types of sensor's data generators are supported (image generator and depth generator), +there are two flags that should be used to set/get property of the needed generator: + +- CAP_INTELPERC_IMAGE_GENERATOR -- a flag for access to the image generator properties. +- CAP_INTELPERC_DEPTH_GENERATOR -- a flag for access to the depth generator properties. This + flag value is assumed by default if neither of the two possible values of the property is set. + +For more information please refer to the example of usage +[intelpercccaptureccpp](https://github.com/Itseez/opencv/tree/master/samples/cpp/intelperc_capture.cpp) +in opencv/samples/cpp folder. diff --git a/doc/user_guide/ug_mat.markdown b/doc/user_guide/ug_mat.markdown new file mode 100644 index 0000000000..d3994a8ea3 --- /dev/null +++ b/doc/user_guide/ug_mat.markdown @@ -0,0 +1,180 @@ +Operations with images {#tutorial_ug_mat} +====================== + +Input/Output +------------ + +### Images + +Load an image from a file: +@code{.cpp} + Mat img = imread(filename) +@endcode + +If you read a jpg file, a 3 channel image is created by default. If you need a grayscale image, use: + +@code{.cpp} + Mat img = imread(filename, 0); +@endcode + +@note format of the file is determined by its content (first few bytes) Save an image to a file: + +@code{.cpp} + imwrite(filename, img); +@endcode + +@note format of the file is determined by its extension. + +@note use imdecode and imencode to read and write image from/to memory rather than a file. + +XML/YAML +-------- + +TBD + +Basic operations with images +---------------------------- + +### Accessing pixel intensity values + +In order to get pixel intensity value, you have to know the type of an image and the number of +channels. Here is an example for a single channel grey scale image (type 8UC1) and pixel coordinates +x and y: +@code{.cpp} + Scalar intensity = img.at(y, x); +@endcode +intensity.val[0] contains a value from 0 to 255. Note the ordering of x and y. Since in OpenCV +images are represented by the same structure as matrices, we use the same convention for both +cases - the 0-based row index (or y-coordinate) goes first and the 0-based column index (or +x-coordinate) follows it. Alternatively, you can use the following notation: +@code{.cpp} + Scalar intensity = img.at(Point(x, y)); +@endcode +Now let us consider a 3 channel image with BGR color ordering (the default format returned by +imread): +@code{.cpp} + Vec3b intensity = img.at(y, x); + uchar blue = intensity.val[0]; + uchar green = intensity.val[1]; + uchar red = intensity.val[2]; +@endcode +You can use the same method for floating-point images (for example, you can get such an image by +running Sobel on a 3 channel image): +@code{.cpp} + Vec3f intensity = img.at(y, x); + float blue = intensity.val[0]; + float green = intensity.val[1]; + float red = intensity.val[2]; +@endcode +The same method can be used to change pixel intensities: +@code{.cpp} + img.at(y, x) = 128; +@endcode +There are functions in OpenCV, especially from calib3d module, such as projectPoints, that take an +array of 2D or 3D points in the form of Mat. Matrix should contain exactly one column, each row +corresponds to a point, matrix type should be 32FC2 or 32FC3 correspondingly. Such a matrix can be +easily constructed from `std::vector`: +@code{.cpp} + vector points; + //... fill the array + Mat pointsMat = Mat(points); +@endcode +One can access a point in this matrix using the same method Mat::at : +@code{.cpp} +Point2f point = pointsMat.at(i, 0); +@endcode + +### Memory management and reference counting + +Mat is a structure that keeps matrix/image characteristics (rows and columns number, data type etc) +and a pointer to data. So nothing prevents us from having several instances of Mat corresponding to +the same data. A Mat keeps a reference count that tells if data has to be deallocated when a +particular instance of Mat is destroyed. Here is an example of creating two matrices without copying +data: +@code{.cpp} + std::vector points; + // .. fill the array + Mat pointsMat = Mat(points).reshape(1); +@endcode +As a result we get a 32FC1 matrix with 3 columns instead of 32FC3 matrix with 1 column. pointsMat +uses data from points and will not deallocate the memory when destroyed. In this particular +instance, however, developer has to make sure that lifetime of points is longer than of pointsMat. +If we need to copy the data, this is done using, for example, cv::Mat::copyTo or cv::Mat::clone: +@code{.cpp} + Mat img = imread("image.jpg"); + Mat img1 = img.clone(); +@endcode +To the contrary with C API where an output image had to be created by developer, an empty output Mat +can be supplied to each function. Each implementation calls Mat::create for a destination matrix. +This method allocates data for a matrix if it is empty. If it is not empty and has the correct size +and type, the method does nothing. If, however, size or type are different from input arguments, the +data is deallocated (and lost) and a new data is allocated. For example: +@code{.cpp} + Mat img = imread("image.jpg"); + Mat sobelx; + Sobel(img, sobelx, CV_32F, 1, 0); +@endcode + +### Primitive operations + +There is a number of convenient operators defined on a matrix. For example, here is how we can make +a black image from an existing greyscale image \`img\`: +@code{.cpp} + img = Scalar(0); +@endcode +Selecting a region of interest: +@code{.cpp} + Rect r(10, 10, 100, 100); + Mat smallImg = img(r); +@endcode +A convertion from Mat to C API data structures: +@code{.cpp} + Mat img = imread("image.jpg"); + IplImage img1 = img; + CvMat m = img; +@endcode + +Note that there is no data copying here. + +Conversion from color to grey scale: +@code{.cpp} + Mat img = imread("image.jpg"); // loading a 8UC3 image + Mat grey; + cvtColor(img, grey, COLOR_BGR2GRAY); +@endcode +Change image type from 8UC1 to 32FC1: +@code{.cpp} + src.convertTo(dst, CV_32F); +@endcode + +### Visualizing images + +It is very useful to see intermediate results of your algorithm during development process. OpenCV +provides a convenient way of visualizing images. A 8U image can be shown using: +@code{.cpp} + Mat img = imread("image.jpg"); + + namedWindow("image", WINDOW_AUTOSIZE); + imshow("image", img); + waitKey(); +@endcode + +A call to waitKey() starts a message passing cycle that waits for a key stroke in the "image" +window. A 32F image needs to be converted to 8U type. For example: +@code{.cpp} + Mat img = imread("image.jpg"); + Mat grey; + cvtColor(img, grey, COLOR_BGR2GRAY); + + Mat sobelx; + Sobel(grey, sobelx, CV_32F, 1, 0); + + double minVal, maxVal; + minMaxLoc(sobelx, &minVal, &maxVal); //find minimum and maximum intensities + Mat draw; + sobelx.convertTo(draw, CV_8U, 255.0/(maxVal - minVal), -minVal * 255.0/(maxVal - minVal)); + + namedWindow("image", WINDOW_AUTOSIZE); + imshow("image", draw); + waitKey(); +@endcode diff --git a/doc/user_guide/ug_traincascade.markdown b/doc/user_guide/ug_traincascade.markdown new file mode 100644 index 0000000000..059d25e8a2 --- /dev/null +++ b/doc/user_guide/ug_traincascade.markdown @@ -0,0 +1,323 @@ +Cascade Classifier Training {#tutorial_ug_traincascade} +=========================== + +Introduction +------------ + +The work with a cascade classifier inlcudes two major stages: training and detection. Detection +stage is described in a documentation of objdetect module of general OpenCV documentation. +Documentation gives some basic information about cascade classifier. Current guide is describing how +to train a cascade classifier: preparation of a training data and running the training application. + +### Important notes + +There are two applications in OpenCV to train cascade classifier: opencv_haartraining and +opencv_traincascade. opencv_traincascade is a newer version, written in C++ in accordance to +OpenCV 2.x API. But the main difference between this two applications is that opencv_traincascade +supports both Haar @cite Viola01 and @cite Liao2007 (Local Binary Patterns) features. LBP features +are integer in contrast to Haar features, so both training and detection with LBP are several times +faster then with Haar features. Regarding the LBP and Haar detection quality, it depends on +training: the quality of training dataset first of all and training parameters too. It's possible to +train a LBP-based classifier that will provide almost the same quality as Haar-based one. + +opencv_traincascade and opencv_haartraining store the trained classifier in different file +formats. Note, the newer cascade detection interface (see CascadeClassifier class in objdetect +module) support both formats. opencv_traincascade can save (export) a trained cascade in the older +format. But opencv_traincascade and opencv_haartraining can not load (import) a classifier in +another format for the futher training after interruption. + +Note that opencv_traincascade application can use TBB for multi-threading. To use it in multicore +mode OpenCV must be built with TBB. + +Also there are some auxilary utilities related to the training. + +- opencv_createsamples is used to prepare a training dataset of positive and test samples. + opencv_createsamples produces dataset of positive samples in a format that is supported by + both opencv_haartraining and opencv_traincascade applications. The output is a file + with \*.vec extension, it is a binary format which contains images. +- opencv_performance may be used to evaluate the quality of classifiers, but for trained by + opencv_haartraining only. It takes a collection of marked up images, runs the classifier and + reports the performance, i.e. number of found objects, number of missed objects, number of + false alarms and other information. + +Since opencv_haartraining is an obsolete application, only opencv_traincascade will be described +futher. opencv_createsamples utility is needed to prepare a training data for opencv_traincascade, +so it will be described too. + +Training data preparation +------------------------- + +For training we need a set of samples. There are two types of samples: negative and positive. +Negative samples correspond to non-object images. Positive samples correspond to images with +detected objects. Set of negative samples must be prepared manually, whereas set of positive samples +is created using opencv_createsamples utility. + +### Negative Samples + +Negative samples are taken from arbitrary images. These images must not contain detected objects. +Negative samples are enumerated in a special file. It is a text file in which each line contains an +image filename (relative to the directory of the description file) of negative sample image. This +file must be created manually. Note that negative samples and sample images are also called +background samples or background samples images, and are used interchangeably in this document. +Described images may be of different sizes. But each image should be (but not nessesarily) larger +then a training window size, because these images are used to subsample negative image to the +training size. + +An example of description file: + +Directory structure: +@code{.text} +/img + img1.jpg + img2.jpg +bg.txt +@endcode +File bg.txt: +@code{.text} +img/img1.jpg +img/img2.jpg +@endcode +### Positive Samples + +Positive samples are created by opencv_createsamples utility. They may be created from a single +image with object or from a collection of previously marked up images. + +Please note that you need a large dataset of positive samples before you give it to the mentioned +utility, because it only applies perspective transformation. For example you may need only one +positive sample for absolutely rigid object like an OpenCV logo, but you definetely need hundreds +and even thousands of positive samples for faces. In the case of faces you should consider all the +race and age groups, emotions and perhaps beard styles. + +So, a single object image may contain a company logo. Then a large set of positive samples is +created from the given object image by random rotating, changing the logo intensity as well as +placing the logo on arbitrary background. The amount and range of randomness can be controlled by +command line arguments of opencv_createsamples utility. + +Command line arguments: + +- -vec \ + + Name of the output file containing the positive samples for training. + +- -img \ + + Source object image (e.g., a company logo). + +- -bg \ + + Background description file; contains a list of images which are used as a background for + randomly distorted versions of the object. + +- -num \ + + Number of positive samples to generate. + +- -bgcolor \ + + Background color (currently grayscale images are assumed); the background color denotes the + transparent color. Since there might be compression artifacts, the amount of color tolerance + can be specified by -bgthresh. All pixels withing bgcolor-bgthresh and bgcolor+bgthresh range + are interpreted as transparent. + +- -bgthresh \ +- -inv + + If specified, colors will be inverted. + +- -randinv + + If specified, colors will be inverted randomly. + +- -maxidev \ + + Maximal intensity deviation of pixels in foreground samples. + +- -maxxangle \ +- -maxyangle \ +- -maxzangle \ + + Maximum rotation angles must be given in radians. + +- -show + + Useful debugging option. If specified, each sample will be shown. Pressing Esc will continue + the samples creation process without. + +- -w \ + + Width (in pixels) of the output samples. + +- -h \ + + Height (in pixels) of the output samples. + +For following procedure is used to create a sample object instance: The source image is rotated +randomly around all three axes. The chosen angle is limited my -max?angle. Then pixels having the +intensity from [bg_color-bg_color_threshold; bg_color+bg_color_threshold] range are +interpreted as transparent. White noise is added to the intensities of the foreground. If the -inv +key is specified then foreground pixel intensities are inverted. If -randinv key is specified then +algorithm randomly selects whether inversion should be applied to this sample. Finally, the obtained +image is placed onto an arbitrary background from the background description file, resized to the +desired size specified by -w and -h and stored to the vec-file, specified by the -vec command line +option. + +Positive samples also may be obtained from a collection of previously marked up images. This +collection is described by a text file similar to background description file. Each line of this +file corresponds to an image. The first element of the line is the filename. It is followed by the +number of object instances. The following numbers are the coordinates of objects bounding rectangles +(x, y, width, height). + +An example of description file: + +Directory structure: +@code{.text} +/img + img1.jpg + img2.jpg +info.dat +@endcode +File info.dat: +@code{.text} +img/img1.jpg 1 140 100 45 45 +img/img2.jpg 2 100 200 50 50 50 30 25 25 +@endcode +Image img1.jpg contains single object instance with the following coordinates of bounding rectangle: +(140, 100, 45, 45). Image img2.jpg contains two object instances. + +In order to create positive samples from such collection, -info argument should be specified instead +of \`-img\`: + +- -info \ + + Description file of marked up images collection. + +The scheme of samples creation in this case is as follows. The object instances are taken from +images. Then they are resized to target samples size and stored in output vec-file. No distortion is +applied, so the only affecting arguments are -w, -h, -show and -num. + +opencv_createsamples utility may be used for examining samples stored in positive samples file. In +order to do this only -vec, -w and -h parameters should be specified. + +Note that for training, it does not matter how vec-files with positive samples are generated. But +opencv_createsamples utility is the only one way to collect/create a vector file of positive +samples, provided by OpenCV. + +Example of vec-file is available here opencv/data/vec_files/trainingfaces_24-24.vec. It can be +used to train a face detector with the following window size: -w 24 -h 24. + +Cascade Training +---------------- + +The next step is the training of classifier. As mentioned above opencv_traincascade or +opencv_haartraining may be used to train a cascade classifier, but only the newer +opencv_traincascade will be described futher. + +Command line arguments of opencv_traincascade application grouped by purposes: + +-# Common arguments: + + - -data \ + + Where the trained classifier should be stored. + + - -vec \ + + vec-file with positive samples (created by opencv_createsamples utility). + + - -bg \ + + Background description file. + + - -numPos \ + - -numNeg \ + + Number of positive/negative samples used in training for every classifier stage. + + - -numStages \ + + Number of cascade stages to be trained. + + - -precalcValBufSize \ + + Size of buffer for precalculated feature values (in Mb). + + - -precalcIdxBufSize \ + + Size of buffer for precalculated feature indices (in Mb). The more memory you have the + faster the training process. + + - -baseFormatSave + + This argument is actual in case of Haar-like features. If it is specified, the cascade will + be saved in the old format. + + - -numThreads \ + + Maximum number of threads to use during training. Notice that the actual number of used + threads may be lower, depending on your machine and compilation options. + +-# Cascade parameters: + + - -stageType \ + + Type of stages. Only boosted classifier are supported as a stage type at the moment. + + - -featureType\<{HAAR(default), LBP}\> + + Type of features: HAAR - Haar-like features, LBP - local binary patterns. + + - -w \ + - -h \ + + Size of training samples (in pixels). Must have exactly the same values as used during + training samples creation (opencv_createsamples utility). + +-# Boosted classifer parameters: + + - -bt \<{DAB, RAB, LB, GAB(default)}\> + + Type of boosted classifiers: DAB - Discrete AdaBoost, RAB - Real AdaBoost, LB - LogitBoost, + GAB - Gentle AdaBoost. + + - -minHitRate \ + + Minimal desired hit rate for each stage of the classifier. Overall hit rate may be estimated + as (min_hit_rate\^number_of_stages). + + - -maxFalseAlarmRate \ + + Maximal desired false alarm rate for each stage of the classifier. Overall false alarm rate + may be estimated as (max_false_alarm_rate\^number_of_stages). + + - -weightTrimRate \ + + Specifies whether trimming should be used and its weight. A decent choice is 0.95. + + - -maxDepth \ + + Maximal depth of a weak tree. A decent choice is 1, that is case of stumps. + + - -maxWeakCount \ + + Maximal count of weak trees for every cascade stage. The boosted classifier (stage) will + have so many weak trees (\<=maxWeakCount), as needed to achieve the + given -maxFalseAlarmRate. + +-# Haar-like feature parameters: + + - -mode \ + + Selects the type of Haar features set used in training. BASIC use only upright features, + while ALL uses the full set of upright and 45 degree rotated feature set. See @cite Lienhart02 + for more details. + +-# Local Binary Patterns parameters: + + Local Binary Patterns don't have parameters. + +After the opencv_traincascade application has finished its work, the trained cascade will be saved +in cascade.xml file in the folder, which was passed as -data parameter. Other files in this folder +are created for the case of interrupted training, so you may delete them after completion of +training. + +Training is finished and you can test you cascade classifier! diff --git a/doc/user_guide/user_guide.markdown b/doc/user_guide/user_guide.markdown new file mode 100644 index 0000000000..f940bf866e --- /dev/null +++ b/doc/user_guide/user_guide.markdown @@ -0,0 +1,8 @@ +OpenCV User Guide {#tutorial_user_guide} +================= + +- @subpage tutorial_ug_mat +- @subpage tutorial_ug_features2d +- @subpage tutorial_ug_highgui +- @subpage tutorial_ug_traincascade +- @subpage tutorial_ug_intelperc