Added erGrouping function: Find groups of Extremal Regions that are organized as text blocks. Updated sample/cpp to use the complete text detection pipeline

This commit is contained in:
lluis 2013-09-17 23:45:23 +02:00
parent 3fa3722641
commit 2837bfd9fa
3 changed files with 2026 additions and 91 deletions

View File

@ -236,5 +236,28 @@ enum { ERFILTER_NM_RGBLGrad = 0,
*/
CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
/*!
Find groups of Extremal Regions that are organized as text blocks. This function implements
the grouping algorithm described in:
Gomez L. and Karatzas D.: Multi-script Text Extraction from Natural Scenes, ICDAR 2013.
Notice that this implementation constrains the results to horizontally-aligned text and
latin script (since ERFilter classifiers are trained only for latin script detection).
The algorithm combines two different clustering techniques in a single parameter-free procedure
to detect groups of regions organized as text. The maximally meaningful groups are fist detected
in several feature spaces, where each feature space is a combination of proximity information
(x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.),
thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to
combine all these hypotheses to get the final estimate. Each of the resulting groups are finally
heuristically validated in order to assest if they form a valid horizontally-aligned text block.
\param src Vector of sinle channel images CV_8UC1 from wich the regions were extracted.
\param regions Vector of ER's retreived from the ERFilter algorithm from each channel
\param groups The output of the algorithm are stored in this parameter as list of rectangles.
*/
CV_EXPORTS void erGrouping(InputArrayOfArrays src, std::vector<std::vector<ERStat> > &regions,
std::vector<Rect> &groups);
}
#endif // _OPENCV_ERFILTER_HPP_

File diff suppressed because it is too large Load Diff

View File

@ -16,9 +16,83 @@
using namespace std;
using namespace cv;
void er_draw(Mat &src, Mat &dst, ERStat& er);
void show_help_and_exit(const char *cmd);
void groups_draw(Mat &src, vector<Rect> &groups);
void er_draw(Mat &src, Mat &dst, ERStat& er);
void er_draw(Mat &src, Mat &dst, ERStat& er)
int main(int argc, const char * argv[])
{
if (argc < 2) show_help_and_exit(argv[0]);
Mat src = imread(argv[1]);
// Extract channels to be processed individually
vector<Mat> channels;
computeNMChannels(src, channels);
int cn = (int)channels.size();
// Append negative channels to detect ER- (bright regions over dark background)
for (int c = 0; c < cn-1; c++)
channels.push_back(255-channels[c]);
// Create ERFilter objects with the 1st and 2nd stage default classifiers
Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),8,0.00025,0.13,0.4,true,0.1);
Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.3);
vector<vector<ERStat> > regions(channels.size());
// Apply the default cascade classifier to each independent channel (could be done in parallel)
for (int c=0; c<(int)channels.size(); c++)
{
er_filter1->run(channels[c], regions[c]);
er_filter2->run(channels[c], regions[c]);
}
// Detect character groups
vector<Rect> groups;
erGrouping(channels, regions, groups);
// draw groups
groups_draw(src, groups);
imshow("grouping",src);
waitKey(-1);
// memory clean-up
er_filter1.release();
er_filter2.release();
regions.clear();
if (!groups.empty())
{
groups.clear();
}
}
// helper functions
void show_help_and_exit(const char *cmd)
{
cout << endl << cmd << endl << endl;
cout << "Demo program of the Extremal Region Filter algorithm described in " << endl;
cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;
cout << " Usage: " << cmd << " <input_image> " << endl;
cout << " Default classifier files (trained_classifierNM*.xml) must be in current directory" << endl << endl;
exit(-1);
}
void groups_draw(Mat &src, vector<Rect> &groups)
{
for (int i=groups.size()-1; i>=0; i--)
{
if (src.type() == CV_8UC3)
rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 0, 255, 255 ), 3, 8 );
else
rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 255 ), 3, 8 );
}
}
void er_draw(Mat &src, Mat &dst, ERStat& er)
{
if (er.parent != NULL) // deprecate the root region
@ -29,92 +103,3 @@ void er_draw(Mat &src, Mat &dst, ERStat& er)
}
}
int main(int argc, const char * argv[])
{
vector<ERStat> regions;
if (argc < 2) {
cout << "Demo program of the Extremal Region Filter algorithm described in " << endl;
cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;
cout << " Usage: " << argv[0] << " input_image <optional_groundtruth_image>" << endl;
cout << " Default classifier files (trained_classifierNM*.xml) should be in ./" << endl;
return -1;
}
Mat original = imread(argv[1]);
Mat gt;
if (argc > 2)
{
gt = imread(argv[2]);
cvtColor(gt, gt, COLOR_RGB2GRAY);
threshold(gt, gt, 254, 255, THRESH_BINARY);
}
Mat grey(original.size(),CV_8UC1);
cvtColor(original,grey,COLOR_RGB2GRAY);
double t = (double)getTickCount();
// Build ER tree and filter with the 1st stage default classifier
Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"));
er_filter1->run(grey, regions);
t = (double)getTickCount() - t;
cout << " --------------------------------------------------------------------------------------------------" << endl;
cout << "\t FIRST STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl;
cout << " --------------------------------------------------------------------------------------------------" << endl;
cout << setw(9) << regions.size()+er_filter1->getNumRejected() << "\t Extremal Regions extracted " << endl;
cout << setw(9) << regions.size() << "\t Extremal Regions selected by the first stage of the sequential classifier." << endl;
cout << "\t \t (saving into out_second_stage.jpg)" << endl;
cout << " --------------------------------------------------------------------------------------------------" << endl;
er_filter1.release();
// draw regions
Mat mask = Mat::zeros(grey.rows+2,grey.cols+2,CV_8UC1);
for (int r=0; r<(int)regions.size(); r++)
er_draw(grey, mask, regions.at(r));
mask = 255-mask;
imwrite("out_first_stage.jpg", mask);
if (argc > 2)
{
Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2))));
cout << "Recall for the 1st stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl;
}
t = (double)getTickCount();
// Default second stage classifier
Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"));
er_filter2->run(grey, regions);
t = (double)getTickCount() - t;
cout << " --------------------------------------------------------------------------------------------------" << endl;
cout << "\t SECOND STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl;
cout << " --------------------------------------------------------------------------------------------------" << endl;
cout << setw(9) << regions.size() << "\t Extremal Regions selected by the second stage of the sequential classifier." << endl;
cout << "\t \t (saving into out_second_stage.jpg)" << endl;
cout << " --------------------------------------------------------------------------------------------------" << endl;
er_filter2.release();
// draw regions
mask = mask*0;
for (int r=0; r<(int)regions.size(); r++)
er_draw(grey, mask, regions.at(r));
mask = 255-mask;
imwrite("out_second_stage.jpg", mask);
if (argc > 2)
{
Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2))));
cout << "Recall for the 2nd stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl;
}
regions.clear();
}