mirror of
https://github.com/opencv/opencv.git
synced 2024-11-25 03:30:34 +08:00
Added erGrouping function: Find groups of Extremal Regions that are organized as text blocks. Updated sample/cpp to use the complete text detection pipeline
This commit is contained in:
parent
3fa3722641
commit
2837bfd9fa
@ -236,5 +236,28 @@ enum { ERFILTER_NM_RGBLGrad = 0,
|
||||
*/
|
||||
CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
|
||||
|
||||
|
||||
/*!
|
||||
Find groups of Extremal Regions that are organized as text blocks. This function implements
|
||||
the grouping algorithm described in:
|
||||
Gomez L. and Karatzas D.: Multi-script Text Extraction from Natural Scenes, ICDAR 2013.
|
||||
Notice that this implementation constrains the results to horizontally-aligned text and
|
||||
latin script (since ERFilter classifiers are trained only for latin script detection).
|
||||
|
||||
The algorithm combines two different clustering techniques in a single parameter-free procedure
|
||||
to detect groups of regions organized as text. The maximally meaningful groups are fist detected
|
||||
in several feature spaces, where each feature space is a combination of proximity information
|
||||
(x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.),
|
||||
thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to
|
||||
combine all these hypotheses to get the final estimate. Each of the resulting groups are finally
|
||||
heuristically validated in order to assest if they form a valid horizontally-aligned text block.
|
||||
|
||||
\param src Vector of sinle channel images CV_8UC1 from wich the regions were extracted.
|
||||
\param regions Vector of ER's retreived from the ERFilter algorithm from each channel
|
||||
\param groups The output of the algorithm are stored in this parameter as list of rectangles.
|
||||
*/
|
||||
CV_EXPORTS void erGrouping(InputArrayOfArrays src, std::vector<std::vector<ERStat> > ®ions,
|
||||
std::vector<Rect> &groups);
|
||||
|
||||
}
|
||||
#endif // _OPENCV_ERFILTER_HPP_
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -16,9 +16,83 @@
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
|
||||
void er_draw(Mat &src, Mat &dst, ERStat& er);
|
||||
void show_help_and_exit(const char *cmd);
|
||||
void groups_draw(Mat &src, vector<Rect> &groups);
|
||||
void er_draw(Mat &src, Mat &dst, ERStat& er);
|
||||
|
||||
void er_draw(Mat &src, Mat &dst, ERStat& er)
|
||||
int main(int argc, const char * argv[])
|
||||
{
|
||||
|
||||
if (argc < 2) show_help_and_exit(argv[0]);
|
||||
|
||||
Mat src = imread(argv[1]);
|
||||
|
||||
// Extract channels to be processed individually
|
||||
vector<Mat> channels;
|
||||
computeNMChannels(src, channels);
|
||||
|
||||
int cn = (int)channels.size();
|
||||
// Append negative channels to detect ER- (bright regions over dark background)
|
||||
for (int c = 0; c < cn-1; c++)
|
||||
channels.push_back(255-channels[c]);
|
||||
|
||||
// Create ERFilter objects with the 1st and 2nd stage default classifiers
|
||||
Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),8,0.00025,0.13,0.4,true,0.1);
|
||||
Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.3);
|
||||
|
||||
vector<vector<ERStat> > regions(channels.size());
|
||||
// Apply the default cascade classifier to each independent channel (could be done in parallel)
|
||||
for (int c=0; c<(int)channels.size(); c++)
|
||||
{
|
||||
er_filter1->run(channels[c], regions[c]);
|
||||
er_filter2->run(channels[c], regions[c]);
|
||||
}
|
||||
|
||||
// Detect character groups
|
||||
vector<Rect> groups;
|
||||
erGrouping(channels, regions, groups);
|
||||
|
||||
// draw groups
|
||||
groups_draw(src, groups);
|
||||
imshow("grouping",src);
|
||||
waitKey(-1);
|
||||
|
||||
// memory clean-up
|
||||
er_filter1.release();
|
||||
er_filter2.release();
|
||||
regions.clear();
|
||||
if (!groups.empty())
|
||||
{
|
||||
groups.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// helper functions
|
||||
|
||||
void show_help_and_exit(const char *cmd)
|
||||
{
|
||||
cout << endl << cmd << endl << endl;
|
||||
cout << "Demo program of the Extremal Region Filter algorithm described in " << endl;
|
||||
cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;
|
||||
cout << " Usage: " << cmd << " <input_image> " << endl;
|
||||
cout << " Default classifier files (trained_classifierNM*.xml) must be in current directory" << endl << endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
void groups_draw(Mat &src, vector<Rect> &groups)
|
||||
{
|
||||
for (int i=groups.size()-1; i>=0; i--)
|
||||
{
|
||||
if (src.type() == CV_8UC3)
|
||||
rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 0, 255, 255 ), 3, 8 );
|
||||
else
|
||||
rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 255 ), 3, 8 );
|
||||
}
|
||||
}
|
||||
|
||||
void er_draw(Mat &src, Mat &dst, ERStat& er)
|
||||
{
|
||||
|
||||
if (er.parent != NULL) // deprecate the root region
|
||||
@ -29,92 +103,3 @@ void er_draw(Mat &src, Mat &dst, ERStat& er)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, const char * argv[])
|
||||
{
|
||||
|
||||
|
||||
vector<ERStat> regions;
|
||||
|
||||
if (argc < 2) {
|
||||
cout << "Demo program of the Extremal Region Filter algorithm described in " << endl;
|
||||
cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;
|
||||
cout << " Usage: " << argv[0] << " input_image <optional_groundtruth_image>" << endl;
|
||||
cout << " Default classifier files (trained_classifierNM*.xml) should be in ./" << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
Mat original = imread(argv[1]);
|
||||
Mat gt;
|
||||
if (argc > 2)
|
||||
{
|
||||
gt = imread(argv[2]);
|
||||
cvtColor(gt, gt, COLOR_RGB2GRAY);
|
||||
threshold(gt, gt, 254, 255, THRESH_BINARY);
|
||||
}
|
||||
Mat grey(original.size(),CV_8UC1);
|
||||
cvtColor(original,grey,COLOR_RGB2GRAY);
|
||||
|
||||
double t = (double)getTickCount();
|
||||
|
||||
// Build ER tree and filter with the 1st stage default classifier
|
||||
Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"));
|
||||
|
||||
er_filter1->run(grey, regions);
|
||||
|
||||
t = (double)getTickCount() - t;
|
||||
cout << " --------------------------------------------------------------------------------------------------" << endl;
|
||||
cout << "\t FIRST STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl;
|
||||
cout << " --------------------------------------------------------------------------------------------------" << endl;
|
||||
cout << setw(9) << regions.size()+er_filter1->getNumRejected() << "\t Extremal Regions extracted " << endl;
|
||||
cout << setw(9) << regions.size() << "\t Extremal Regions selected by the first stage of the sequential classifier." << endl;
|
||||
cout << "\t \t (saving into out_second_stage.jpg)" << endl;
|
||||
cout << " --------------------------------------------------------------------------------------------------" << endl;
|
||||
|
||||
er_filter1.release();
|
||||
|
||||
// draw regions
|
||||
Mat mask = Mat::zeros(grey.rows+2,grey.cols+2,CV_8UC1);
|
||||
for (int r=0; r<(int)regions.size(); r++)
|
||||
er_draw(grey, mask, regions.at(r));
|
||||
mask = 255-mask;
|
||||
imwrite("out_first_stage.jpg", mask);
|
||||
|
||||
if (argc > 2)
|
||||
{
|
||||
Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2))));
|
||||
cout << "Recall for the 1st stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl;
|
||||
}
|
||||
|
||||
t = (double)getTickCount();
|
||||
|
||||
// Default second stage classifier
|
||||
Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"));
|
||||
er_filter2->run(grey, regions);
|
||||
|
||||
t = (double)getTickCount() - t;
|
||||
cout << " --------------------------------------------------------------------------------------------------" << endl;
|
||||
cout << "\t SECOND STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl;
|
||||
cout << " --------------------------------------------------------------------------------------------------" << endl;
|
||||
cout << setw(9) << regions.size() << "\t Extremal Regions selected by the second stage of the sequential classifier." << endl;
|
||||
cout << "\t \t (saving into out_second_stage.jpg)" << endl;
|
||||
cout << " --------------------------------------------------------------------------------------------------" << endl;
|
||||
|
||||
er_filter2.release();
|
||||
|
||||
// draw regions
|
||||
mask = mask*0;
|
||||
for (int r=0; r<(int)regions.size(); r++)
|
||||
er_draw(grey, mask, regions.at(r));
|
||||
mask = 255-mask;
|
||||
imwrite("out_second_stage.jpg", mask);
|
||||
|
||||
if (argc > 2)
|
||||
{
|
||||
Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2))));
|
||||
cout << "Recall for the 2nd stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl;
|
||||
}
|
||||
|
||||
regions.clear();
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user