mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 06:03:15 +08:00
Class-specific Extremal Region Filter algorithm as proposed in :
Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012. High-level C++ interface and implementation of algorithm is in the objdetect module. C++ example, a test image, and the default classifiers in xml files.
This commit is contained in:
parent
d81d3fc830
commit
5abe3b59f5
@ -1,2 +1,2 @@
|
||||
set(the_description "Object Detection")
|
||||
ocv_define_module(objdetect opencv_core opencv_imgproc OPTIONAL opencv_highgui)
|
||||
ocv_define_module(objdetect opencv_core opencv_imgproc opencv_ml OPTIONAL opencv_highgui)
|
||||
|
@ -394,5 +394,6 @@ CV_EXPORTS_W void drawDataMatrixCodes(InputOutputArray image,
|
||||
}
|
||||
|
||||
#include "opencv2/objdetect/linemod.hpp"
|
||||
#include "opencv2/objdetect/erfilter.hpp"
|
||||
|
||||
#endif
|
||||
|
202
modules/objdetect/include/opencv2/objdetect/erfilter.hpp
Normal file
202
modules/objdetect/include/opencv2/objdetect/erfilter.hpp
Normal file
@ -0,0 +1,202 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_OBJDETECT_ERFILTER_HPP__
|
||||
#define __OPENCV_OBJDETECT_ERFILTER_HPP__
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include <vector>
|
||||
#include <deque>
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
/*!
|
||||
Extremal Region Stat structure
|
||||
|
||||
The ERStat structure represents a class-specific Extremal Region (ER).
|
||||
|
||||
An ER is a 4-connected set of pixels with all its grey-level values smaller than the values
|
||||
in its outer boundary. A class-specific ER is selected (using a classifier) from all the ER's
|
||||
in the component tree of the image.
|
||||
*/
|
||||
struct CV_EXPORTS ERStat
|
||||
{
|
||||
public:
|
||||
//! Constructor
|
||||
ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);
|
||||
//! Destructor
|
||||
~ERStat(){};
|
||||
|
||||
//! seed point and the threshold (max grey-level value)
|
||||
int pixel;
|
||||
int level;
|
||||
|
||||
//! incrementally computable features
|
||||
int area;
|
||||
int perimeter;
|
||||
int euler; //!< euler number
|
||||
int bbox[4];
|
||||
double raw_moments[2]; //!< order 1 raw moments to derive the centroid
|
||||
double central_moments[3]; //!< order 2 central moments to construct the covariance matrix
|
||||
std::deque<int> *crossings;//!< horizontal crossings
|
||||
|
||||
//! 1st stage features
|
||||
float aspect_ratio;
|
||||
float compactness;
|
||||
float num_holes;
|
||||
float med_crossings;
|
||||
|
||||
//! 2nd stage features
|
||||
float hole_area_ratio;
|
||||
float convex_hull_ratio;
|
||||
float num_inflexion_points;
|
||||
|
||||
// TODO Other features can be added (average color, standard deviation, and such)
|
||||
|
||||
|
||||
// TODO shall we include the pixel list whenever available (i.e. after 2nd stage) ?
|
||||
std::vector<int> *pixels;
|
||||
|
||||
//! probability that the ER belongs to the class we are looking for
|
||||
double probability;
|
||||
|
||||
//! pointers preserving the tree structure of the component tree
|
||||
ERStat* parent;
|
||||
ERStat* child;
|
||||
ERStat* next;
|
||||
ERStat* prev;
|
||||
|
||||
//! wenever the regions is a local maxima of the probability
|
||||
bool local_maxima;
|
||||
ERStat* max_probability_ancestor;
|
||||
ERStat* min_probability_ancestor;
|
||||
};
|
||||
|
||||
/*!
|
||||
Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithms
|
||||
Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
|
||||
|
||||
Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier.
|
||||
*/
|
||||
class CV_EXPORTS ERFilter : public cv::Algorithm
|
||||
{
|
||||
public:
|
||||
|
||||
//! callback with the classifier is made a class. By doing it we hide SVM, Boost etc.
|
||||
class CV_EXPORTS Callback
|
||||
{
|
||||
public:
|
||||
virtual ~Callback(){};
|
||||
//! The classifier must return probability measure for the region.
|
||||
virtual double eval(const ERStat& stat) = 0; //const = 0; //TODO why cannot use const = 0 here?
|
||||
};
|
||||
|
||||
/*!
|
||||
the key method. Takes image on input and returns the selected regions in a vector of ERStat
|
||||
only distinctive ERs which correspond to characters are selected by a sequential classifier
|
||||
\param image is the input image
|
||||
\param regions is output for the first stage, input/output for the second one.
|
||||
*/
|
||||
virtual void run( cv::InputArray image, std::vector<ERStat>& regions ) = 0;
|
||||
|
||||
|
||||
//! set/get methods to set the algorithm properties,
|
||||
virtual void setCallback(const cv::Ptr<ERFilter::Callback>& cb) = 0;
|
||||
virtual void setThresholdDelta(int thresholdDelta) = 0;
|
||||
virtual void setMinArea(float minArea) = 0;
|
||||
virtual void setMaxArea(float maxArea) = 0;
|
||||
virtual void setMinProbability(float minProbability) = 0;
|
||||
virtual void setMinProbabilityDiff(float minProbabilityDiff) = 0;
|
||||
virtual void setNonMaxSuppression(bool nonMaxSuppression) = 0;
|
||||
virtual int getNumRejected() = 0;
|
||||
};
|
||||
|
||||
|
||||
/*!
|
||||
Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm
|
||||
Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
|
||||
|
||||
The component tree of the image is extracted by a threshold increased step by step
|
||||
from 0 to 255, incrementally computable descriptors (aspect_ratio, compactness,
|
||||
number of holes, and number of horizontal crossings) are computed for each ER
|
||||
and used as features for a classifier which estimates the class-conditional
|
||||
probability P(er|character). The value of P(er|character) is tracked using the inclusion
|
||||
relation of ER across all thresholds and only the ERs which correspond to local maximum
|
||||
of the probability P(er|character) are selected (if the local maximum of the
|
||||
probability is above a global limit pmin and the difference between local maximum and
|
||||
local minimum is greater than minProbabilityDiff).
|
||||
|
||||
\param cb Callback with the classifier.
|
||||
if omitted tries to load a default classifier from file trained_classifierNM1.xml
|
||||
\param thresholdDelta Threshold step in subsequent thresholds when extracting the component tree
|
||||
\param minArea The minimum area (% of image size) allowed for retreived ER's
|
||||
\param minArea The maximum area (% of image size) allowed for retreived ER's
|
||||
\param minProbability The minimum probability P(er|character) allowed for retreived ER's
|
||||
\param nonMaxSuppression Whenever non-maximum suppression is done over the branch probabilities
|
||||
\param minProbability The minimum probability difference between local maxima and local minima ERs
|
||||
*/
|
||||
CV_EXPORTS cv::Ptr<ERFilter> createERFilterNM1(const cv::Ptr<ERFilter::Callback>& cb = NULL,
|
||||
int thresholdDelta = 1, float minArea = 0.000025,
|
||||
float maxArea = 0.13, float minProbability = 0.2,
|
||||
bool nonMaxSuppression = true,
|
||||
float minProbabilityDiff = 0.1);
|
||||
|
||||
/*!
|
||||
Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm
|
||||
Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
|
||||
|
||||
In the second stage, the ERs that passed the first stage are classified into character
|
||||
and non-character classes using more informative but also more computationally expensive
|
||||
features. The classifier uses all the features calculated in the first stage and the following
|
||||
additional features: hole area ratio, convex hull ratio, and number of outer inflexion points.
|
||||
|
||||
\param cb Callback with the classifier
|
||||
if omitted tries to load a default classifier from file trained_classifierNM2.xml
|
||||
\param minProbability The minimum probability P(er|character) allowed for retreived ER's
|
||||
*/
|
||||
CV_EXPORTS cv::Ptr<ERFilter> createERFilterNM2(const cv::Ptr<ERFilter::Callback>& cb = NULL,
|
||||
float minProbability = 0.85);
|
||||
|
||||
}
|
||||
#endif // _OPENCV_ERFILTER_HPP_
|
1124
modules/objdetect/src/erfilter.cpp
Normal file
1124
modules/objdetect/src/erfilter.cpp
Normal file
File diff suppressed because it is too large
Load Diff
120
samples/cpp/erfilter.cpp
Normal file
120
samples/cpp/erfilter.cpp
Normal file
@ -0,0 +1,120 @@
|
||||
|
||||
//--------------------------------------------------------------------------------------------------
|
||||
// A demo program of the Extremal Region Filter algorithm described in
|
||||
// Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
|
||||
//--------------------------------------------------------------------------------------------------
|
||||
|
||||
#include "opencv2/opencv.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
#include "opencv2/highgui.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
|
||||
void er_draw(Mat &src, Mat &dst, ERStat& er);
|
||||
|
||||
void er_draw(Mat &src, Mat &dst, ERStat& er)
|
||||
{
|
||||
|
||||
if (er.parent != NULL) // deprecate the root region
|
||||
{
|
||||
int newMaskVal = 255;
|
||||
int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
|
||||
floodFill(src,dst,Point(er.pixel%src.cols,er.pixel/src.cols),Scalar(255),0,Scalar(er.level),Scalar(0),flags);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, const char * argv[])
|
||||
{
|
||||
|
||||
|
||||
vector<ERStat> regions;
|
||||
|
||||
if (argc < 2) {
|
||||
cout << "Demo program of the Extremal Region Filter algorithm described in " << endl;
|
||||
cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;
|
||||
cout << " Usage: " << argv[0] << " input_image <optional_groundtruth_image>" << endl;
|
||||
cout << " Default classifier files (trained_classifierNM*.xml) should be in ./" << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
Mat original = imread(argv[1]);
|
||||
Mat gt;
|
||||
if (argc > 2)
|
||||
{
|
||||
gt = imread(argv[2]);
|
||||
cvtColor(gt, gt, COLOR_RGB2GRAY);
|
||||
threshold(gt, gt, 254, 255, THRESH_BINARY);
|
||||
}
|
||||
Mat grey(original.size(),CV_8UC1);
|
||||
cvtColor(original,grey,COLOR_RGB2GRAY);
|
||||
|
||||
double t = (double)getTickCount();
|
||||
|
||||
// Build ER tree and filter with the 1st stage default classifier
|
||||
Ptr<ERFilter> er_filter1 = createERFilterNM1();
|
||||
|
||||
er_filter1->run(grey, regions);
|
||||
|
||||
t = (double)getTickCount() - t;
|
||||
cout << " --------------------------------------------------------------------------------------------------" << endl;
|
||||
cout << "\t FIRST STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl;
|
||||
cout << " --------------------------------------------------------------------------------------------------" << endl;
|
||||
cout << setw(9) << regions.size()+er_filter1->getNumRejected() << "\t Extremal Regions extracted " << endl;
|
||||
cout << setw(9) << regions.size() << "\t Extremal Regions selected by the first stage of the sequential classifier." << endl;
|
||||
cout << "\t \t (saving into out_second_stage.jpg)" << endl;
|
||||
cout << " --------------------------------------------------------------------------------------------------" << endl;
|
||||
|
||||
er_filter1.release();
|
||||
|
||||
// draw regions
|
||||
Mat mask = Mat::zeros(grey.rows+2,grey.cols+2,CV_8UC1);
|
||||
for (int r=0; r<(int)regions.size(); r++)
|
||||
er_draw(grey, mask, regions.at(r));
|
||||
mask = 255-mask;
|
||||
imwrite("out_first_stage.jpg", mask);
|
||||
|
||||
if (argc > 2)
|
||||
{
|
||||
Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2))));
|
||||
cout << "Recall for the 1st stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl;
|
||||
}
|
||||
|
||||
t = (double)getTickCount();
|
||||
|
||||
// Default second stage classifier
|
||||
Ptr<ERFilter> er_filter2 = createERFilterNM2();
|
||||
er_filter2->run(grey, regions);
|
||||
|
||||
t = (double)getTickCount() - t;
|
||||
cout << " --------------------------------------------------------------------------------------------------" << endl;
|
||||
cout << "\t SECOND STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl;
|
||||
cout << " --------------------------------------------------------------------------------------------------" << endl;
|
||||
cout << setw(9) << regions.size() << "\t Extremal Regions selected by the second stage of the sequential classifier." << endl;
|
||||
cout << "\t \t (saving into out_second_stage.jpg)" << endl;
|
||||
cout << " --------------------------------------------------------------------------------------------------" << endl;
|
||||
|
||||
er_filter2.release();
|
||||
|
||||
// draw regions
|
||||
mask = mask*0;
|
||||
for (int r=0; r<(int)regions.size(); r++)
|
||||
er_draw(grey, mask, regions.at(r));
|
||||
mask = 255-mask;
|
||||
imwrite("out_second_stage.jpg", mask);
|
||||
|
||||
if (argc > 2)
|
||||
{
|
||||
Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2))));
|
||||
cout << "Recall for the 2nd stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl;
|
||||
}
|
||||
|
||||
regions.clear();
|
||||
|
||||
}
|
BIN
samples/cpp/scenetext.jpg
Normal file
BIN
samples/cpp/scenetext.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 83 KiB |
BIN
samples/cpp/scenetext_GT.png
Normal file
BIN
samples/cpp/scenetext_GT.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.1 KiB |
4046
samples/cpp/trained_classifierNM1.xml
Normal file
4046
samples/cpp/trained_classifierNM1.xml
Normal file
File diff suppressed because it is too large
Load Diff
4046
samples/cpp/trained_classifierNM2.xml
Normal file
4046
samples/cpp/trained_classifierNM2.xml
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user