Major refactor of control.cpp to enable line recognition

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1147 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2014-08-11 23:23:06 +00:00
parent e249d7bcb2
commit dbf6197471
34 changed files with 931 additions and 560 deletions

View File

@ -790,6 +790,10 @@ int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks,
* Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just
* the page layout results. Returns an iterator to the results.
* If merge_similar_words is true, words are combined where suitable for use
* with a line recognizer. Use if you want to use AnalyseLayout to find the
* textlines, and then want to process textline fragments with an external
* line recognizer.
* Returns NULL on error or an empty page.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
@ -797,11 +801,11 @@ int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks,
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator* TessBaseAPI::AnalyseLayout() {
PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
if (FindLines() == 0) {
if (block_list_->empty())
return NULL; // The page was empty.
page_res_ = new PAGE_RES(block_list_, NULL);
page_res_ = new PAGE_RES(merge_similar_words, block_list_, NULL);
DetectParagraphs(false);
return new PageIterator(
page_res_, tesseract_, thresholder_->GetScaleFactor(),
@ -823,18 +827,22 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
if (page_res_ != NULL)
delete page_res_;
if (block_list_->empty()) {
page_res_ = new PAGE_RES(block_list_, &tesseract_->prev_word_best_choice_);
page_res_ = new PAGE_RES(false, block_list_,
&tesseract_->prev_word_best_choice_);
return 0; // Empty page.
}
tesseract_->SetBlackAndWhitelist();
recognition_done_ = true;
if (tesseract_->tessedit_resegment_from_line_boxes)
if (tesseract_->tessedit_resegment_from_line_boxes) {
page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_);
else if (tesseract_->tessedit_resegment_from_boxes)
} else if (tesseract_->tessedit_resegment_from_boxes) {
page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
else
page_res_ = new PAGE_RES(block_list_, &tesseract_->prev_word_best_choice_);
} else {
// TODO(rays) LSTM here.
page_res_ = new PAGE_RES(false,
block_list_, &tesseract_->prev_word_best_choice_);
}
if (tesseract_->tessedit_make_boxes_from_boxes) {
tesseract_->CorrectClassifyWords(page_res_);
return 0;
@ -900,7 +908,8 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
recognition_done_ = true;
page_res_ = new PAGE_RES(block_list_, &(tesseract_->prev_word_best_choice_));
page_res_ = new PAGE_RES(false, block_list_,
&(tesseract_->prev_word_best_choice_));
PAGE_RES_IT page_res_it(page_res_);
@ -1977,7 +1986,10 @@ void TessBaseAPI::Threshold(Pix** pix) {
// than over-estimate resolution.
thresholder_->SetSourceYResolution(kMinCredibleResolution);
}
thresholder_->ThresholdToPix(pix);
PageSegMode pageseg_mode =
static_cast<PageSegMode>(
static_cast<int>(tesseract_->tessedit_pageseg_mode));
thresholder_->ThresholdToPix(pageseg_mode, pix);
thresholder_->GetImageSizes(&rect_left_, &rect_top_,
&rect_width_, &rect_height_,
&image_width_, &image_height_);
@ -2332,7 +2344,7 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
PAGE_RES *page_res = new PAGE_RES(block_list,
PAGE_RES *page_res = new PAGE_RES(false, block_list,
&(tesseract_->prev_word_best_choice_));
tesseract_->recog_all_words(page_res, NULL, NULL, NULL, 1);
return page_res;
@ -2341,7 +2353,7 @@ PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
PAGE_RES* pass1_result) {
if (!pass1_result)
pass1_result = new PAGE_RES(block_list,
pass1_result = new PAGE_RES(false, block_list,
&(tesseract_->prev_word_best_choice_));
tesseract_->recog_all_words(pass1_result, NULL, NULL, NULL, 2);
return pass1_result;

View File

@ -484,14 +484,21 @@ class TESS_API TessBaseAPI {
* Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just
* the page layout results. Returns an iterator to the results.
* Returns NULL on error.
* If merge_similar_words is true, words are combined where suitable for use
* with a line recognizer. Use if you want to use AnalyseLayout to find the
* textlines, and then want to process textline fragments with an external
* line recognizer.
* Returns NULL on error or an empty page.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator* AnalyseLayout();
PageIterator* AnalyseLayout() {
return AnalyseLayout(false);
}
PageIterator* AnalyseLayout(bool merge_similar_words);
/**
* Recognize the image from SetAndThresholdImage, generating Tesseract

View File

@ -110,30 +110,20 @@ static void clear_any_old_text(BLOCK_LIST *block_list) {
PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname,
bool find_segmentation,
BLOCK_LIST *block_list) {
int box_count = 0;
int box_failures = 0;
FILE* box_file = OpenBoxFile(fname);
TBOX box;
GenericVector<TBOX> boxes;
GenericVector<STRING> texts, full_texts;
bool found_box = true;
while (found_box) {
int line_number = 0; // Line number of the box file.
STRING text, full_text;
found_box = ReadNextBox(applybox_page, &line_number, box_file, &text, &box);
if (found_box) {
++box_count;
MakeBoxFileStr(text.string(), box, applybox_page, &full_text);
} else {
full_text = "";
}
boxes.push_back(box);
texts.push_back(text);
full_texts.push_back(full_text);
if (!ReadAllBoxes(applybox_page, true, fname, &boxes, &texts, &full_texts,
NULL)) {
return NULL; // Can't do it.
}
int box_count = boxes.size();
int box_failures = 0;
// Add an empty everything to the end.
boxes.push_back(TBOX());
texts.push_back(STRING());
full_texts.push_back(STRING());
// In word mode, we use the boxes to make a word for each box, but
// in blob mode we use the existing words and maximally chop them first.
PAGE_RES* page_res = find_segmentation ?
@ -239,7 +229,7 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
}
}
}
PAGE_RES* page_res = new PAGE_RES(block_list, NULL);
PAGE_RES* page_res = new PAGE_RES(false, block_list, NULL);
PAGE_RES_IT pr_it(page_res);
WERD_RES* word_res;
while ((word_res = pr_it.word()) != NULL) {

View File

@ -69,16 +69,11 @@ const double kMinRefitXHeightFraction = 0.5;
namespace tesseract {
void Tesseract::recog_pseudo_word(PAGE_RES* page_res,
TBOX &selection_box) {
WERD *word;
ROW *pseudo_row; // row of word
BLOCK *pseudo_block; // block of word
word = make_pseudo_word(page_res, selection_box,
pseudo_block, pseudo_row);
if (word != NULL) {
WERD_RES word_res(word);
recog_interactive(pseudo_block, pseudo_row, &word_res);
delete word;
PAGE_RES_IT* it = make_pseudo_word(page_res, selection_box);
if (it != NULL) {
recog_interactive(it);
it->DeleteCurrentWord();
delete it;
}
}
@ -92,19 +87,22 @@ void Tesseract::recog_pseudo_word(PAGE_RES* page_res,
* @param row row of word
* @param word_res word to recognise
*/
BOOL8 Tesseract::recog_interactive(BLOCK* block, ROW* row, WERD_RES* word_res) {
BOOL8 Tesseract::recog_interactive(PAGE_RES_IT* pr_it) {
inT16 char_qual;
inT16 good_char_qual;
WordData word_data(block, row, word_res);
WordData word_data(*pr_it);
SetupWordPassN(2, &word_data);
classify_word_and_language(&Tesseract::classify_word_pass2, &word_data);
classify_word_and_language(&Tesseract::classify_word_pass2, pr_it,
&word_data);
if (tessedit_debug_quality_metrics) {
word_char_quality(word_res, row, &char_qual, &good_char_qual);
tprintf
("\n%d chars; word_blob_quality: %d; outline_errs: %d; char_quality: %d; good_char_quality: %d\n",
word_res->reject_map.length(), word_blob_quality(word_res, row),
word_outline_errs(word_res), char_qual, good_char_qual);
WERD_RES* word_res = pr_it->word();
word_char_quality(word_res, pr_it->row()->row, &char_qual, &good_char_qual);
tprintf("\n%d chars; word_blob_quality: %d; outline_errs: %d; "
"char_quality: %d; good_char_quality: %d\n",
word_res->reject_map.length(),
word_blob_quality(word_res, pr_it->row()->row),
word_outline_errs(word_res), char_qual, good_char_qual);
}
return TRUE;
}
@ -163,8 +161,6 @@ void Tesseract::SetupAllWordsPassN(int pass_n,
PAGE_RES_IT page_res_it(page_res);
for (page_res_it.restart_page(); page_res_it.word() != NULL;
page_res_it.forward()) {
if (pass_n == 1)
page_res_it.word()->SetupFake(unicharset);
if (target_word_box == NULL ||
ProcessTargetWord(page_res_it.word()->word->bounding_box(),
*target_word_box, word_config, 1)) {
@ -180,33 +176,29 @@ void Tesseract::SetupAllWordsPassN(int pass_n,
// Sets up the single word ready for whichever engine is to be run.
void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
if (pass_n == 1 || !word->word->done || tessedit_training_tess) {
if (pass_n == 2) {
if (pass_n == 1 || !word->word->done) {
if (pass_n == 1) {
word->word->SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
word->row, word->block);
} else if (pass_n == 2) {
// TODO(rays) Should we do this on pass1 too?
word->word->caps_height = 0.0;
if (word->word->x_height == 0.0f)
word->word->x_height = word->row->x_height();
}
// Cube doesn't get setup for pass2.
if (pass_n != 2 || tessedit_ocr_engine_mode != OEM_CUBE_ONLY) {
word->word->SetupForRecognition(
unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode, textord_use_cjk_fp_model,
poly_allow_detailed_fx, word->row, word->block);
}
}
if (!sub_langs_.empty()) {
if (word->lang_words.size() != sub_langs_.size()) {
// Setup the words for all the sub-languages now.
WERD_RES empty;
word->lang_words.init_to_size(sub_langs_.size(), empty);
}
for (int s = 0; s < sub_langs_.size(); ++s) {
Tesseract* lang_t = sub_langs_[s];
if (pass_n == 1 || (lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY &&
(!word->lang_words[s].done || lang_t->tessedit_training_tess))) {
word->lang_words[s].InitForRetryRecognition(*word->word);
word->lang_words[s].SetupForRecognition(
for (int s = 0; s <= sub_langs_.size(); ++s) {
// The sub_langs_.size() entry is for the master language.
Tesseract* lang_t = s < sub_langs_.size() ? sub_langs_[s] : this;
WERD_RES* word_res = new WERD_RES;
word_res->InitForRetryRecognition(*word->word);
word->lang_words.push_back(word_res);
// Cube doesn't get setup for pass2.
if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY) {
word_res->SetupForRecognition(
lang_t->unicharset, lang_t, BestPix(),
lang_t->tessedit_ocr_engine_mode, NULL,
lang_t->classify_bln_numeric_mode,
@ -217,17 +209,19 @@ void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
}
}
// Runs word recognition on all the words.
bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
PAGE_RES_IT* pr_it,
GenericVector<WordData>* words) {
// TODO(rays) Before this loop can be parallelized (it would yield a massive
// speed-up) all remaining member globals need to be converted to local/heap
// (eg set_pass1 and set_pass2) and an intermediate adaption pass needs to be
// added. The results will be significantly different with adaption on, and
// deterioration will need investigation.
pr_it->restart_page();
for (int w = 0; w < words->size(); ++w) {
WordData* word = &(*words)[w];
if (w > 0) word->prev_word = &(*words)[w - 1];
if (monitor != NULL) {
monitor->ocr_alive = TRUE;
if (pass_n == 1)
@ -244,16 +238,26 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
return false;
}
}
if (word->word->tess_failed) continue;
if (word->word->tess_failed) {
int s;
for (s = 0; s < word->lang_words.size() &&
word->lang_words[s]->tess_failed; ++s) {}
// If all are failed, skip it. Image words are skipped by this test.
if (s > word->lang_words.size()) continue;
}
// Sync pr_it with the wth WordData.
while (pr_it->word() != NULL && pr_it->word() != word->word)
pr_it->forward();
ASSERT_HOST(pr_it->word() != NULL);
WordRecognizer recognizer = pass_n == 1 ? &Tesseract::classify_word_pass1
: &Tesseract::classify_word_pass2;
classify_word_and_language(recognizer, word);
classify_word_and_language(recognizer, pr_it, word);
if (tessedit_dump_choices) {
word_dumper(NULL, word->row, word->word);
tprintf("Pass%d: %s [%s]\n", pass_n,
word->word->best_choice->unichar_string().string(),
word->word->best_choice->debug_string().string());
}
pr_it->forward();
}
return true;
}
@ -326,12 +330,12 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
most_recently_used_ = this;
// Run pass 1 word recognition.
if (!RecogAllWordsPassN(1, monitor, &words)) return false;
if (!RecogAllWordsPassN(1, monitor, &page_res_it, &words)) return false;
// Pass 1 post-processing.
while (page_res_it.word() != NULL) {
for (page_res_it.restart_page(); page_res_it.word() != NULL;
page_res_it.forward()) {
if (page_res_it.word()->word->flag(W_REP_CHAR)) {
fix_rep_char(&page_res_it);
page_res_it.forward();
continue;
}
@ -346,15 +350,14 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
page_res->misadaption_log.push_back(
page_res_it.word()->blamer_bundle->misadaption_debug());
}
page_res_it.forward();
}
}
if (dopasses == 1) return true;
// ****************** Pass 2 *******************
if (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption) {
if (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption &&
tessedit_ocr_engine_mode != OEM_CUBE_ONLY ) {
page_res_it.restart_page();
GenericVector<WordData> words;
SetupAllWordsPassN(2, target_word_box, word_config, page_res, &words);
@ -363,17 +366,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
}
most_recently_used_ = this;
// Run pass 2 word recognition.
if (!RecogAllWordsPassN(2, monitor, &words)) return false;
// Pass 2 post-processing.
while (page_res_it.word() != NULL) {
WERD_RES* word = page_res_it.word();
if (word->word->flag(W_REP_CHAR) && !word->done) {
fix_rep_char(&page_res_it);
page_res_it.forward();
continue;
}
page_res_it.forward();
}
if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words)) return false;
}
// The next passes can only be run if tesseract has been used, as cube
@ -407,8 +400,8 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
// ****************** Pass 9 *******************
// Check the correctness of the final results.
blamer_pass(page_res);
script_pos_pass(page_res);
}
script_pos_pass(page_res);
// Write results pass.
set_global_loc_code(LOC_WRITE_RESULTS);
@ -745,166 +738,232 @@ void Tesseract::script_pos_pass(PAGE_RES* page_res) {
}
}
// Helper returns true if the new_word is better than the word, using a
// simple test of better certainty AND rating (to reduce false positives
// from cube) or a dictionary vs non-dictionary word.
static bool NewWordBetter(const WERD_RES& word, const WERD_RES& new_word,
double rating_ratio,
double certainty_margin) {
if (new_word.best_choice == NULL) {
return false; // New one no good.
// Factored helper considers the indexed word and updates all the pointed
// values.
static void EvaluateWord(const PointerVector<WERD_RES>& words, int index,
float* rating, float* certainty, bool* bad,
bool* valid_permuter, int* right, int* next_left) {
*right = -MAX_INT32;
*next_left = MAX_INT32;
if (index < words.size()) {
WERD_CHOICE* choice = words[index]->best_choice;
if (choice == NULL) {
*bad = true;
} else {
*rating += choice->rating();
*certainty = MIN(*certainty, choice->certainty());
if (!Dict::valid_word_permuter(choice->permuter(), false))
*valid_permuter = false;
}
*right = words[index]->word->bounding_box().right();
if (index + 1 < words.size())
*next_left = words[index + 1]->word->bounding_box().left();
} else {
*valid_permuter = false;
*bad = true;
}
if (word.best_choice == NULL) {
return true; // Old one no good.
}
// Helper chooses the best combination of words, transferring good ones from
// new_words to best_words. To win, a new word must have (better rating and
// certainty) or (better permuter status and rating within rating ratio and
// certainty within certainty margin) than current best.
// All the new_words are consumed (moved to best_words or deleted.)
// The return value is the number of new_words used minus the number of
// best_words that remain in the output.
static int SelectBestWords(double rating_ratio,
double certainty_margin,
bool debug,
PointerVector<WERD_RES>* new_words,
PointerVector<WERD_RES>* best_words) {
// Process the smallest groups of words that have an overlapping word
// boundary at the end.
GenericVector<WERD_RES*> out_words;
// Index into each word vector (best, new).
int b = 0, n = 0;
int num_best = 0, num_new = 0;
while (b < best_words->size() || n < new_words->size()) {
// Start of the current run in each.
int start_b = b, start_n = n;
// Rating of the current run in each.
float b_rating = 0.0f, n_rating = 0.0f;
// Certainty of the current run in each.
float b_certainty = 0.0f, n_certainty = 0.0f;
// True if any word is missing its best choice.
bool b_bad = false, n_bad = false;
// True if all words have a valid permuter.
bool b_valid_permuter = true, n_valid_permuter = true;
while (b < best_words->size() || n < new_words->size()) {
int b_right = -MAX_INT32;
int next_b_left = MAX_INT32;
EvaluateWord(*best_words, b, &b_rating, &b_certainty, &b_bad,
&b_valid_permuter, &b_right, &next_b_left);
int n_right = -MAX_INT32;
int next_n_left = MAX_INT32;
EvaluateWord(*new_words, n, &n_rating, &n_certainty, &n_bad,
&n_valid_permuter, &n_right, &next_n_left);
if (MAX(b_right, n_right) < MIN(next_b_left, next_n_left)) {
// The word breaks overlap. [start_b,b] and [start_n, n] match.
break;
}
// Keep searching for the matching word break.
if ((b_right < n_right && b < best_words->size()) ||
n == new_words->size())
++b;
else
++n;
}
bool new_better = false;
if (!n_bad && (b_bad || (n_certainty > b_certainty &&
n_rating < b_rating) ||
(!b_valid_permuter && n_valid_permuter &&
n_rating < b_rating * rating_ratio &&
n_certainty > b_certainty - certainty_margin))) {
// New is better.
for (int i = start_n; i <= n; ++i) {
out_words.push_back((*new_words)[i]);
(*new_words)[i] = NULL;
++num_new;
}
new_better = true;
} else if (!b_bad) {
// Current best is better.
for (int i = start_b; i <= b; ++i) {
out_words.push_back((*best_words)[i]);
(*best_words)[i] = NULL;
++num_best;
}
}
int end_b = b < best_words->size() ? b + 1 : b;
int end_n = n < new_words->size() ? n + 1 : n;
if (debug) {
tprintf("%d new words %s than %d old words: r: %g v %g c: %g v %g"
" valid dict: %d v %d\n",
end_n - start_n, new_better ? "better" : "worse",
end_b - start_b, n_rating, b_rating,
n_certainty, b_certainty, n_valid_permuter, b_valid_permuter);
}
// Move on to the next group.
b = end_b;
n = end_n;
}
if (new_word.best_choice->certainty() > word.best_choice->certainty() &&
new_word.best_choice->rating() < word.best_choice->rating()) {
return true; // New word has better confidence.
}
if (!Dict::valid_word_permuter(word.best_choice->permuter(), false) &&
Dict::valid_word_permuter(new_word.best_choice->permuter(), false) &&
new_word.best_choice->rating() <
word.best_choice->rating() * rating_ratio &&
new_word.best_choice->certainty() >
word.best_choice->certainty() - certainty_margin) {
return true; // New word is from a dictionary.
}
return false; // New word is no better.
// Transfer from out_words to best_words.
best_words->clear();
for (int i = 0; i < out_words.size(); ++i)
best_words->push_back(out_words[i]);
return num_new - num_best;
}
// Helper to recognize the word using the given (language-specific) tesseract.
// Returns true if the result was better than previously.
bool Tesseract::RetryWithLanguage(const WERD_RES& best_word,
WordData* word_data, WERD_RES* word,
WordRecognizer recognizer) {
if (classify_debug_level || cube_debug_level) {
tprintf("Retrying word using lang %s, oem %d\n",
// Returns positive if this recognizer found more new best words than the
// number kept from best_words.
int Tesseract::RetryWithLanguage(const WordData& word_data,
WordRecognizer recognizer,
WERD_RES** in_word,
PointerVector<WERD_RES>* best_words) {
bool debug = classify_debug_level || cube_debug_level;
if (debug) {
tprintf("Trying word using lang %s, oem %d\n",
lang.string(), static_cast<int>(tessedit_ocr_engine_mode));
}
// Run the recognizer on the word.
PointerVector<WERD_RES> new_words;
(this->*recognizer)(word_data, in_word, &new_words);
if (new_words.empty()) {
// Transfer input word to new_words, as the classifier must have put
// the result back in the input.
new_words.push_back(*in_word);
*in_word = NULL;
}
if (debug) {
for (int i = 0; i < new_words.size(); ++i)
new_words[i]->DebugTopChoice("Lang result");
}
// Initial version is a bit of a hack based on better certainty and rating
// (to reduce false positives from cube) or a dictionary vs non-dictionary
// word.
(this->*recognizer)(word_data, word);
bool new_is_better = NewWordBetter(best_word, *word,
classify_max_rating_ratio,
classify_max_certainty_margin);
if (classify_debug_level || cube_debug_level) {
if (word->best_choice == NULL) {
tprintf("NULL result %s better!\n",
new_is_better ? "IS" : "NOT");
} else {
tprintf("New result %s better:%s, r=%g, c=%g\n",
new_is_better ? "IS" : "NOT",
word->best_choice->unichar_string().string(),
word->best_choice->rating(),
word->best_choice->certainty());
}
return SelectBestWords(classify_max_rating_ratio,
classify_max_certainty_margin,
debug, &new_words, best_words);
}
// Helper returns true if all the words are acceptable.
static bool WordsAcceptable(const PointerVector<WERD_RES>& words) {
for (int w = 0; w < words.size(); ++w) {
if (words[w]->tess_failed || !words[w]->tess_accepted) return false;
}
return new_is_better;
return true;
}
// Generic function for classifying a word. Can be used either for pass1 or
// pass2 according to the function passed to recognizer.
// word block and row are the current location in the document's PAGE_RES.
// word_data holds the word to be recognized, and its block and row, and
// pr_it points to the word as well, in case we are running LSTM and it wants
// to output multiple words.
// Recognizes in the current language, and if successful that is all.
// If recognition was not successful, tries all available languages until
// it gets a successful result or runs out of languages. Keeps the best result.
void Tesseract::classify_word_and_language(WordRecognizer recognizer,
PAGE_RES_IT* pr_it,
WordData* word_data) {
// Best result so far.
PointerVector<WERD_RES> best_words;
// Points to the best result. May be word or in lang_words.
WERD_RES* word = word_data->word;
clock_t start_t = clock();
if (classify_debug_level || cube_debug_level) {
tprintf("Processing word with lang %s at:",
tprintf("%s word with lang %s at:",
word->done ? "Already done" : "Processing",
most_recently_used_->lang.string());
word->word->bounding_box().print();
}
const char* result_type = "Initial";
bool initially_done = !word->tess_failed && word->done;
if (initially_done) {
if (word->done) {
// If done on pass1, leave it as-is.
most_recently_used_ = word->tesseract;
result_type = "Already done";
} else {
if (most_recently_used_ != this) {
// Point to the word for most_recently_used_.
for (int s = 0; s < sub_langs_.size(); ++s) {
if (most_recently_used_ == sub_langs_[s]) {
word = &word_data->lang_words[s];
break;
}
}
}
(most_recently_used_->*recognizer)(word_data, word);
if (!word->tess_failed && word->tess_accepted)
result_type = "Accepted";
if (!word->tess_failed)
most_recently_used_ = word->tesseract;
return;
}
if (classify_debug_level || cube_debug_level) {
tprintf("%s result: %s r=%.4g, c=%.4g, accepted=%d, adaptable=%d"
" xht=[%g,%g]\n",
result_type,
word->best_choice->unichar_string().string(),
word->best_choice->rating(),
word->best_choice->certainty(),
word->tess_accepted, word->tess_would_adapt,
word->best_choice->min_x_height(),
word->best_choice->max_x_height());
int sub = sub_langs_.size();
if (most_recently_used_ != this) {
// Get the index of the most_recently_used_.
for (sub = 0; sub < sub_langs_.size() &&
most_recently_used_ != sub_langs_[sub]; ++sub) {}
}
if (word->tess_failed || !word->tess_accepted) {
most_recently_used_->RetryWithLanguage(
*word_data, recognizer, &word_data->lang_words[sub], &best_words);
Tesseract* best_lang_tess = most_recently_used_;
if (!WordsAcceptable(best_words)) {
// Try all the other languages to see if they are any better.
Tesseract* previous_used = most_recently_used_;
if (most_recently_used_ != this) {
if (classify_debug_level) {
tprintf("Retrying with main-Tesseract, lang: %s\n", lang.string());
}
if (word_data->word->tesseract == this) {
// This is pass1, and we are trying the main language.
if (RetryWithLanguage(*word, word_data, word_data->word, recognizer)) {
most_recently_used_ = this;
word = word_data->word;
}
} else {
// This is pass2, and we are trying the main language again, but it
// has no word allocated to it, so we must re-initialize it.
WERD_RES main_word(*word_data->word);
main_word.InitForRetryRecognition(*word_data->word);
main_word.SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
word_data->row, word_data->block);
if (RetryWithLanguage(*word, word_data, &main_word, recognizer)) {
most_recently_used_ = this;
word_data->word->ConsumeWordResults(&main_word);
word = word_data->word;
}
}
if (!word->tess_failed && word->tess_accepted)
return; // No need to look at the others.
if (most_recently_used_ != this &&
this->RetryWithLanguage(*word_data, recognizer,
&word_data->lang_words[sub_langs_.size()],
&best_words) > 0) {
best_lang_tess = this;
}
for (int i = 0; i < sub_langs_.size(); ++i) {
if (sub_langs_[i] != previous_used) {
if (classify_debug_level) {
tprintf("Retrying with sub-Tesseract[%d] lang: %s\n",
i, sub_langs_[i]->lang.string());
}
if (sub_langs_[i]->RetryWithLanguage(*word, word_data,
&word_data->lang_words[i],
recognizer)) {
most_recently_used_ = sub_langs_[i];
word = &word_data->lang_words[i];
if (!word->tess_failed && word->tess_accepted)
break; // No need to look at the others.
}
for (int i = 0; !WordsAcceptable(best_words) && i < sub_langs_.size();
++i) {
if (most_recently_used_ != sub_langs_[i] &&
sub_langs_[i]->RetryWithLanguage(*word_data, recognizer,
&word_data->lang_words[i],
&best_words) > 0) {
best_lang_tess = sub_langs_[i];
}
}
}
if (word != word_data->word) {
// Move the result for the best language to the main word.
word_data->word->ConsumeWordResults(word);
most_recently_used_ = best_lang_tess;
if (!best_words.empty()) {
if (best_words.size() == 1 && !best_words[0]->combination) {
// Move the best single result to the main word.
word_data->word->ConsumeWordResults(best_words[0]);
} else {
// Words came from LSTM, and must be moved to the PAGE_RES properly.
word_data->word = best_words.back();
pr_it->ReplaceCurrentWord(&best_words);
}
ASSERT_HOST(word_data->word->box_word != NULL);
} else {
tprintf("no best words!!\n");
}
clock_t ocr_t = clock();
if (tessedit_timing_debug) {
@ -920,16 +979,19 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
* Baseline normalize the word and pass it to Tess.
*/
void Tesseract::classify_word_pass1(WordData* word_data, WERD_RES* word) {
ROW* row = word_data->row;
BLOCK* block = word_data->block;
prev_word_best_choice_ = word_data->prev_word != NULL
? word_data->prev_word->word->best_choice : NULL;
void Tesseract::classify_word_pass1(const WordData& word_data,
WERD_RES** in_word,
PointerVector<WERD_RES>* out_words) {
ROW* row = word_data.row;
BLOCK* block = word_data.block;
prev_word_best_choice_ = word_data.prev_word != NULL
? word_data.prev_word->word->best_choice : NULL;
// If we only intend to run cube - run it and return.
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
cube_word_pass1(block, row, word);
cube_word_pass1(block, row, *in_word);
return;
}
WERD_RES* word = *in_word;
match_word_pass_n(1, word, row, block);
if (!word->tess_failed && !word->word->flag(W_REP_CHAR)) {
word->tess_would_adapt = AdaptableWord(word);
@ -1027,19 +1089,23 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) {
* Control what to do with the word in pass 2
*/
void Tesseract::classify_word_pass2(WordData* word_data, WERD_RES* word) {
void Tesseract::classify_word_pass2(const WordData& word_data,
WERD_RES** in_word,
PointerVector<WERD_RES>* out_words) {
// Return if we do not want to run Tesseract.
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY &&
tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED)
tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED &&
word_data.word->best_choice != NULL)
return;
ROW* row = word_data->row;
BLOCK* block = word_data->block;
prev_word_best_choice_ = word_data->prev_word != NULL
? word_data->prev_word->word->best_choice : NULL;
ROW* row = word_data.row;
BLOCK* block = word_data.block;
WERD_RES* word = *in_word;
prev_word_best_choice_ = word_data.prev_word != NULL
? word_data.prev_word->word->best_choice : NULL;
set_global_subloc_code(SUBLOC_NORM);
check_debug_pt(word, 30);
if (!word->done || tessedit_training_tess) {
if (!word->done) {
word->caps_height = 0.0;
if (word->x_height == 0.0f)
word->x_height = row->x_height();
@ -1161,11 +1227,9 @@ void Tesseract::fix_rep_char(PAGE_RES_IT* page_res_it) {
const WERD_CHOICE &word = *(word_res->best_choice);
// Find the frequency of each unique character in the word.
UNICHAR_ID space = word_res->uch_set->unichar_to_id(" ");
SortHelper<UNICHAR_ID> rep_ch(word.length());
for (int i = 0; i < word.length(); ++i) {
if (word.unichar_id(i) != space)
rep_ch.Add(word.unichar_id(i), 1);
rep_ch.Add(word.unichar_id(i), 1);
}
// Find the most frequent result.
@ -1194,51 +1258,9 @@ void Tesseract::fix_rep_char(PAGE_RES_IT* page_res_it) {
++gap_count;
prev_blob = blob;
}
if (total_gap > word_res->x_height * gap_count * kRepcharGapThreshold) {
// Needs spaces between.
ExplodeRepeatedWord(best_choice, page_res_it);
} else {
// Just correct existing classification.
CorrectRepcharChoices(best_choice, word_res);
word_res->reject_map.initialise(word.length());
}
}
// Explode the word at the given iterator location into individual words
// of a single given unichar_id defined by best_choice.
// The original word is deleted, and the replacements copy most of their
// fields from the original.
void Tesseract::ExplodeRepeatedWord(BLOB_CHOICE* best_choice,
PAGE_RES_IT* page_res_it) {
WERD_RES *word_res = page_res_it->word();
ASSERT_HOST(best_choice != NULL);
// Make a new word for each blob in the original.
WERD* werd = word_res->word;
C_BLOB_IT blob_it(werd->cblob_list());
for (; !blob_it.empty(); blob_it.forward()) {
bool first_blob = blob_it.at_first();
bool last_blob = blob_it.at_last();
WERD* blob_word = werd->ConstructFromSingleBlob(first_blob, last_blob,
blob_it.extract());
// Note that blamer_bundle (truth information) is not copied, which is
// desirable, since the newly inserted words would not have the original
// bounding box corresponding to the one recorded in truth fields.
WERD_RES* rep_word =
page_res_it->InsertSimpleCloneWord(*word_res, blob_word);
// Setup the single char WERD_RES
if (rep_word->SetupForRecognition(*word_res->uch_set, this, BestPix(),
tessedit_ocr_engine_mode, NULL, false,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
page_res_it->row()->row,
page_res_it->block()->block)) {
rep_word->CloneChoppedToRebuild();
BLOB_CHOICE* blob_choice = new BLOB_CHOICE(*best_choice);
rep_word->FakeClassifyWord(1, &blob_choice);
}
}
page_res_it->DeleteCurrentWord();
// Just correct existing classification.
CorrectRepcharChoices(best_choice, word_res);
word_res->reject_map.initialise(word.length());
}
ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(
@ -1405,16 +1427,19 @@ BOOL8 Tesseract::check_debug_pt(WERD_RES *word, int location) {
show_map_detail = TRUE;
break;
}
tprintf(" \"%s\" ",
word->best_choice->unichar_string().string());
word->reject_map.print (debug_fp);
tprintf ("\n");
if (show_map_detail) {
tprintf ("\"%s\"\n", word->best_choice->unichar_string().string());
for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
tprintf ("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
word->reject_map[i].full_print(debug_fp);
if (word->best_choice != NULL) {
tprintf(" \"%s\" ", word->best_choice->unichar_string().string());
word->reject_map.print(debug_fp);
tprintf("\n");
if (show_map_detail) {
tprintf("\"%s\"\n", word->best_choice->unichar_string().string());
for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
word->reject_map[i].full_print(debug_fp);
}
}
} else {
tprintf("null best choice\n");
}
tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");

View File

@ -205,7 +205,8 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
if ((!word->part_of_combo) && (word->box_word == NULL)) {
WordData word_data(block, row, word);
SetupWordPassN(2, &word_data);
classify_word_and_language(&Tesseract::classify_word_pass2, &word_data);
classify_word_and_language(&Tesseract::classify_word_pass2, NULL,
&word_data);
}
prev_word_best_choice_ = word->best_choice;
}

View File

@ -30,15 +30,12 @@ namespace tesseract {
void Tesseract::process_selected_words(
PAGE_RES* page_res, // blocks to check
TBOX & selection_box,
BOOL8(tesseract::Tesseract::*word_processor)( // function to call
BLOCK* block, ROW* row, WERD_RES* word_res)) {
BOOL8(tesseract::Tesseract::*word_processor)(PAGE_RES_IT* pr_it)) {
for (PAGE_RES_IT page_res_it(page_res); page_res_it.word() != NULL;
page_res_it.forward()) {
WERD* word = page_res_it.word()->word;
if (word->bounding_box().overlap(selection_box)) {
if (!((this->*word_processor)(page_res_it.block()->block,
page_res_it.row()->row,
page_res_it.word())))
if (!(this->*word_processor)(&page_res_it))
return;
}
}

View File

@ -39,13 +39,11 @@ void Tesseract::PrerecAllWordsPar(const GenericVector<WordData>& words) {
for (int w = 0; w < words.size(); ++w) {
if (words[w].word->ratings != NULL &&
words[w].word->ratings->get(0, 0) == NULL) {
for (int b = 0; b < words[w].word->chopped_word->NumBlobs(); ++b) {
blobs.push_back(BlobData(b, this, *words[w].word));
}
for (int s = 0; s < words[w].lang_words.size(); ++s) {
const WERD_RES& word = words[w].lang_words[s];
Tesseract* sub = s < sub_langs_.size() ? sub_langs_[s] : this;
const WERD_RES& word = *words[w].lang_words[s];
for (int b = 0; b < word.chopped_word->NumBlobs(); ++b) {
blobs.push_back(BlobData(b, sub_langs_[s], word));
blobs.push_back(BlobData(b, sub, word));
}
}
}

View File

@ -306,10 +306,7 @@ SVMenuNode *Tesseract::build_menu_new() {
* Redisplay page
*/
void Tesseract::do_re_display(
BOOL8 (tesseract::Tesseract::*word_painter)(BLOCK* block,
ROW* row,
WERD_RES* word_res)) {
PAGE_RES_IT pr_it(current_page_res);
BOOL8 (tesseract::Tesseract::*word_painter)(PAGE_RES_IT* pr_it)) {
int block_count = 1;
image_win->Clear();
@ -317,8 +314,9 @@ void Tesseract::do_re_display(
image_win->Image(pix_binary_, 0, 0);
}
PAGE_RES_IT pr_it(current_page_res);
for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) {
(this->*word_painter)(pr_it.block()->block, pr_it.row()->row, word);
(this->*word_painter)(&pr_it);
if (display_baselines && pr_it.row() != pr_it.prev_row())
pr_it.row()->row->plot_baseline(image_win, ScrollView::GREEN);
if (display_blocks && pr_it.block() != pr_it.prev_block())
@ -714,11 +712,10 @@ void show_point(PAGE_RES* page_res, float x, float y) {
#endif // GRAPHICS_DISABLED
namespace tesseract {
#ifndef GRAPHICS_DISABLED
BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row,
WERD_RES* word_res) {
word_res->word->bounding_box().plot(image_win, ScrollView::BLACK,
ScrollView::BLACK);
return word_set_display(block, row, word_res);
BOOL8 Tesseract:: word_blank_and_set_display(PAGE_RES_IT* pr_it) {
pr_it->word()->word->bounding_box().plot(image_win, ScrollView::BLACK,
ScrollView::BLACK);
return word_set_display(pr_it);
}
@ -727,7 +724,8 @@ BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row,
*
* Normalize word and display in word window
*/
BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
BOOL8 Tesseract::word_bln_display(PAGE_RES_IT* pr_it) {
WERD_RES* word_res = pr_it->word();
if (word_res->chopped_word == NULL) {
// Setup word normalization parameters.
word_res->SetupForRecognition(unicharset, this, BestPix(),
@ -735,7 +733,7 @@ BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block);
pr_it->row()->row, pr_it->block()->block);
}
bln_word_window_handle()->Clear();
display_bln_lines(bln_word_window_handle(), ScrollView::CYAN,
@ -758,7 +756,8 @@ BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
*
* Display a word according to its display modes
*/
BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
BOOL8 Tesseract::word_display(PAGE_RES_IT* pr_it) {
WERD_RES* word_res = pr_it->word();
WERD* word = word_res->word;
TBOX word_bb; // word bounding box
int word_height; // ht of word BB
@ -918,14 +917,15 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
*
* Dump members to the debug window
*/
BOOL8 Tesseract::word_dumper(BLOCK* block, ROW* row, WERD_RES* word_res) {
if (block != NULL) {
BOOL8 Tesseract::word_dumper(PAGE_RES_IT* pr_it) {
if (pr_it->block()->block != NULL) {
tprintf("\nBlock data...\n");
block->print(NULL, FALSE);
pr_it->block()->block->print(NULL, FALSE);
}
tprintf("\nRow data...\n");
row->print(NULL);
pr_it->row()->row->print(NULL);
tprintf("\nWord data...\n");
WERD_RES* word_res = pr_it->word();
word_res->word->print();
if (word_res->blamer_bundle != NULL && wordrec_debug_blamer &&
word_res->blamer_bundle->incorrect_result_reason() != IRR_CORRECT) {
@ -941,8 +941,8 @@ BOOL8 Tesseract::word_dumper(BLOCK* block, ROW* row, WERD_RES* word_res) {
*
* Display word according to current display mode settings
*/
BOOL8 Tesseract::word_set_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
WERD* word = word_res->word;
BOOL8 Tesseract::word_set_display(PAGE_RES_IT* pr_it) {
WERD* word = pr_it->word()->word;
word->set_display_flag(DF_BOX, word_display_mode.bit(DF_BOX));
word->set_display_flag(DF_TEXT, word_display_mode.bit(DF_TEXT));
word->set_display_flag(DF_POLYGONAL, word_display_mode.bit(DF_POLYGONAL));
@ -950,26 +950,24 @@ BOOL8 Tesseract::word_set_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
word->set_display_flag(DF_BN_POLYGONAL,
word_display_mode.bit(DF_BN_POLYGONAL));
word->set_display_flag(DF_BLAMER, word_display_mode.bit(DF_BLAMER));
return word_display(block, row, word_res);
return word_display(pr_it);
}
// page_res is non-const because the iterator doesn't know if you are going
// to change the items it points to! Really a const here though.
void Tesseract::blob_feature_display(PAGE_RES* page_res,
const TBOX& selection_box) {
ROW* row; // row of word
BLOCK* block; // block of word
WERD* word = make_pseudo_word(page_res, selection_box, block, row);
if (word != NULL) {
WERD_RES word_res(word);
word_res.x_height = row->x_height();
word_res.SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block);
TWERD* bln_word = word_res.chopped_word;
PAGE_RES_IT* it = make_pseudo_word(page_res, selection_box);
if (it != NULL) {
WERD_RES* word_res = it->word();
word_res->x_height = it->row()->row->x_height();
word_res->SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
it->row()->row, it->block()->block);
TWERD* bln_word = word_res->chopped_word;
TBLOB* bln_blob = bln_word->blobs[0];
INT_FX_RESULT_STRUCT fx_info;
GenericVector<INT_FEATURE_STRUCT> bl_features;
@ -989,7 +987,8 @@ void Tesseract::blob_feature_display(PAGE_RES* page_res,
RenderIntFeature(cn_win, &cn_features[f], ScrollView::GREEN);
cn_win->Update();
delete word;
it->DeleteCurrentWord();
delete it;
}
}

View File

@ -51,15 +51,11 @@ FILE *Tesseract::init_recog_training(const STRING &fname) {
// Copies the bounding box from page_res_it->word() to the given TBOX.
bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) {
while (page_res_it->block() != NULL) {
if (page_res_it->word() != NULL)
break;
while (page_res_it->block() != NULL && page_res_it->word() == NULL)
page_res_it->forward();
}
if (page_res_it->word() != NULL) {
*tbox = page_res_it->word()->word->bounding_box();
page_res_it->forward();
// If tbox->left() is negative, the training image has vertical text and
// all the coordinates of bounding boxes of page_res are rotated by 90
@ -109,26 +105,34 @@ void Tesseract::recog_training_segmented(const STRING &fname,
// Align bottom left points of the TBOXes.
while (keep_going &&
!NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) {
keep_going = (bbox.bottom() < tbox.bottom()) ?
read_t(&page_res_it, &tbox) :
ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox);
if (bbox.bottom() < tbox.bottom()) {
page_res_it.forward();
keep_going = read_t(&page_res_it, &tbox);
} else {
keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
&bbox);
}
}
while (keep_going &&
!NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) {
keep_going = (bbox.left() > tbox.left()) ? read_t(&page_res_it, &tbox) :
ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox);
if (bbox.left() > tbox.left()) {
page_res_it.forward();
keep_going = read_t(&page_res_it, &tbox);
} else {
keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label,
&bbox);
}
}
// OCR the word if top right points of the TBOXes are similar.
if (keep_going &&
NearlyEqual<int>(tbox.right(), bbox.right(), kMaxBoxEdgeDiff) &&
NearlyEqual<int>(tbox.top(), bbox.top(), kMaxBoxEdgeDiff)) {
ambigs_classify_and_output(page_res_it.prev_word(),
page_res_it.prev_row(),
page_res_it.prev_block(),
label.string(), output_file);
ambigs_classify_and_output(label.string(), &page_res_it, output_file);
examined_words++;
}
page_res_it.forward();
} while (keep_going);
fclose(box_file);
// Set up scripts on all of the words that did not get sent to
// ambigs_classify_and_output. They all should have, but if all the
@ -196,16 +200,16 @@ static void PrintMatrixPaths(int col, int dim,
// raw choice as a result of the classification. For words labeled with a
// single unichar also outputs all alternatives from blob_choices of the
// best choice.
void Tesseract::ambigs_classify_and_output(WERD_RES *werd_res,
ROW_RES *row_res,
BLOCK_RES *block_res,
const char *label,
void Tesseract::ambigs_classify_and_output(const char *label,
PAGE_RES_IT* pr_it,
FILE *output_file) {
// Classify word.
fflush(stdout);
WordData word_data(block_res->block, row_res->row, werd_res);
WordData word_data(*pr_it);
SetupWordPassN(1, &word_data);
classify_word_pass1(&word_data, werd_res);
classify_word_and_language(&Tesseract::classify_word_pass1,
pr_it, &word_data);
WERD_RES* werd_res = word_data.word;
WERD_CHOICE *best_choice = werd_res->best_choice;
ASSERT_HOST(best_choice != NULL);

View File

@ -96,8 +96,6 @@ Tesseract::Tesseract()
" whose outlines overlap horizontally.", this->params()),
BOOL_MEMBER(tessedit_display_outwords, false,
"Draw output words", this->params()),
BOOL_MEMBER(tessedit_training_tess, false,
"Call Tess to learn blobs", this->params()),
BOOL_MEMBER(tessedit_dump_choices, false,
"Dump char choices", this->params()),
BOOL_MEMBER(tessedit_timing_debug, false, "Print timing stats",
@ -315,16 +313,6 @@ Tesseract::Tesseract()
"Write .html hOCR output file", this->params()),
BOOL_MEMBER(tessedit_create_pdf, false,
"Write .pdf output file", this->params()),
INT_MEMBER(tessedit_pdf_compression, 0,
"Type of image compression in pdf output: "
"0 - autoselection (default); "
"1 - jpeg; "
"2 - G4; "
"3 - flate",
this->params()),
INT_MEMBER(tessedit_pdf_jpg_quality, 85,
"Quality level of jpeg image compression in pdf output",
this->params()),
STRING_MEMBER(unrecognised_char, "|",
"Output char for unidentified blobs", this->params()),
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),

View File

@ -31,20 +31,20 @@
#include "textord.h"
#include "wordrec.h"
class PAGE_RES;
class PAGE_RES_IT;
class BLOB_CHOICE_LIST_CLIST;
class BLOCK_LIST;
class CharSamp;
class TO_BLOCK_LIST;
class WERD_RES;
class ROW;
class TBOX;
class SVMenuNode;
struct Pix;
class WERD_CHOICE;
class WERD;
class BLOB_CHOICE_LIST_CLIST;
struct OSResults;
class PAGE_RES;
class PAGE_RES_IT;
struct Pix;
class ROW;
class SVMenuNode;
class TBOX;
class TO_BLOCK_LIST;
class WERD;
class WERD_CHOICE;
class WERD_RES;
// Top-level class for all tesseract global instance data.
@ -144,10 +144,19 @@ struct WordData {
ROW* row;
BLOCK* block;
WordData* prev_word;
GenericVector<WERD_RES> lang_words;
PointerVector<WERD_RES> lang_words;
};
typedef void (Tesseract::*WordRecognizer)(WordData* word_data, WERD_RES* word);
// Definition of a Tesseract WordRecognizer. The WordData provides the context
// of row/block, in_word holds an initialized, possibly pre-classified word,
// that the recognizer may or may not consume (but if so it sets *in_word=NULL)
// and produces one or more output words in out_words, which may be the
// consumed in_word, or may be generated independently.
// This api allows both a conventional tesseract classifier to work, or a
// line-level classifier that generates multiple words from a merged input.
typedef void (Tesseract::*WordRecognizer)(const WordData& word_data,
WERD_RES** in_word,
PointerVector<WERD_RES>* out_words);
class Tesseract : public Wordrec {
public:
@ -279,6 +288,7 @@ class Tesseract : public Wordrec {
void SetupWordPassN(int pass_n, WordData* word);
// Runs word recognition on all the words.
bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
PAGE_RES_IT* pr_it,
GenericVector<WordData>* words);
bool recog_all_words(PAGE_RES* page_res,
ETEXT_DESC* monitor,
@ -294,28 +304,35 @@ class Tesseract : public Wordrec {
// Sets script positions and detects smallcaps on all output words.
void script_pos_pass(PAGE_RES* page_res);
// Helper to recognize the word using the given (language-specific) tesseract.
// Returns true if the result was better than previously.
bool RetryWithLanguage(const WERD_RES& best_word, WordData* word_data,
WERD_RES* word, WordRecognizer recognizer);
// Returns positive if this recognizer found more new best words than the
// number kept from best_words.
int RetryWithLanguage(const WordData& word_data,
WordRecognizer recognizer,
WERD_RES** in_word,
PointerVector<WERD_RES>* best_words);
void classify_word_and_language(WordRecognizer recognizer,
PAGE_RES_IT* pr_it,
WordData* word_data);
void classify_word_pass1(WordData* word_data, WERD_RES* word);
void classify_word_pass1(const WordData& word_data,
WERD_RES** in_word,
PointerVector<WERD_RES>* out_words);
void recog_pseudo_word(PAGE_RES* page_res, // blocks to check
TBOX &selection_box);
void fix_rep_char(PAGE_RES_IT* page_res_it);
void ExplodeRepeatedWord(BLOB_CHOICE* best_choice, PAGE_RES_IT* page_res_it);
ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET& char_set,
const char *s,
const char *lengths);
void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block);
void classify_word_pass2(WordData* word_data, WERD_RES* word);
void classify_word_pass2(const WordData& word_data,
WERD_RES** in_word,
PointerVector<WERD_RES>* out_words);
void ReportXhtFixResult(bool accept_new_word, float new_x_ht,
WERD_RES* word, WERD_RES* new_word);
bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row);
bool TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row);
BOOL8 recog_interactive(BLOCK* block, ROW* row, WERD_RES* word_res);
BOOL8 recog_interactive(PAGE_RES_IT* pr_it);
// Set fonts of this word.
void set_word_fonts(WERD_RES *word);
@ -473,15 +490,13 @@ class Tesseract : public Wordrec {
);
void debug_word(PAGE_RES* page_res, const TBOX &selection_box);
void do_re_display(
BOOL8 (tesseract::Tesseract::*word_painter)(BLOCK* block,
ROW* row,
WERD_RES* word_res));
BOOL8 word_display(BLOCK* block, ROW* row, WERD_RES* word_res);
BOOL8 word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res);
BOOL8 word_blank_and_set_display(BLOCK* block, ROW* row, WERD_RES* word_res);
BOOL8 word_set_display(BLOCK* block, ROW* row, WERD_RES* word_res);
BOOL8 (tesseract::Tesseract::*word_painter)(PAGE_RES_IT* pr_it));
BOOL8 word_display(PAGE_RES_IT* pr_it);
BOOL8 word_bln_display(PAGE_RES_IT* pr_it);
BOOL8 word_blank_and_set_display(PAGE_RES_IT* pr_its);
BOOL8 word_set_display(PAGE_RES_IT* pr_it);
// #ifndef GRAPHICS_DISABLED
BOOL8 word_dumper(BLOCK* block, ROW* row, WERD_RES* word_res);
BOOL8 word_dumper(PAGE_RES_IT* pr_it);
// #endif // GRAPHICS_DISABLED
void blob_feature_display(PAGE_RES* page_res, const TBOX& selection_box);
//// reject.h //////////////////////////////////////////////////////////
@ -537,10 +552,7 @@ class Tesseract : public Wordrec {
void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block);
inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list);
void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block);
void fix_fuzzy_space_list( //space explorer
WERD_RES_LIST &best_perm,
ROW *row,
BLOCK* block);
void fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block);
void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block);
void fix_fuzzy_spaces( //find fuzzy words
ETEXT_DESC *monitor, //progress monitor
@ -583,9 +595,7 @@ class Tesseract : public Wordrec {
PAGE_RES* page_res, // blocks to check
//function to call
TBOX & selection_box,
BOOL8 (tesseract::Tesseract::*word_processor) (BLOCK* block,
ROW* row,
WERD_RES* word_res));
BOOL8 (tesseract::Tesseract::*word_processor)(PAGE_RES_IT* pr_it));
//// tessbox.cpp ///////////////////////////////////////////////////////
void tess_add_doc_word( //test acceptability
WERD_CHOICE *word_choice //after context
@ -752,7 +762,6 @@ class Tesseract : public Wordrec {
"Each bounding box is assumed to contain ngrams. Only"
" learn the ngrams whose outlines overlap horizontally.");
BOOL_VAR_H(tessedit_display_outwords, false, "Draw output words");
BOOL_VAR_H(tessedit_training_tess, false, "Call Tess to learn blobs");
BOOL_VAR_H(tessedit_dump_choices, false, "Dump char choices");
BOOL_VAR_H(tessedit_timing_debug, false, "Print timing stats");
BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true,
@ -908,13 +917,6 @@ class Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file");
BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
INT_VAR_H(tessedit_pdf_compression, 0, "Type of image encoding in pdf output:"
"0 - autoselection (default); "
"1 - jpeg; "
"2 - G4; "
"3 - flate");
INT_VAR_H(tessedit_pdf_jpg_quality, 85, "Quality level of jpeg image "
"compression in pdf output");
STRING_VAR_H(unrecognised_char, "|",
"Output char for unidentified blobs");
INT_VAR_H(suspect_level, 99, "Suspect marker level");
@ -1046,10 +1048,8 @@ class Tesseract : public Wordrec {
PAGE_RES *page_res,
volatile ETEXT_DESC *monitor,
FILE *output_file);
void ambigs_classify_and_output(WERD_RES *werd_res,
ROW_RES *row_res,
BLOCK_RES *block_res,
const char *label,
void ambigs_classify_and_output(const char *label,
PAGE_RES_IT* pr_it,
FILE *output_file);
inline CubeRecoContext *GetCubeRecoContext() { return cube_cntxt_; }

View File

@ -171,7 +171,7 @@ void ImageThresholder::SetImage(const Pix* pix) {
// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
void ImageThresholder::ThresholdToPix(Pix** pix) {
void ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
if (pix_channels_ == 0) {
// We have a binary image, so it just has to be cloned.
*pix = GetPixRect();

View File

@ -20,7 +20,8 @@
#ifndef TESSERACT_CCMAIN_THRESHOLDER_H__
#define TESSERACT_CCMAIN_THRESHOLDER_H__
#include "platform.h"
#include "platform.h"
#include "publictypes.h"
struct Pix;
@ -116,7 +117,7 @@ class TESS_API ImageThresholder {
/// Threshold the source image as efficiently as possible to the output Pix.
/// Creates a Pix and sets pix to point to the resulting pointer.
/// Caller must use pixDestroy to free the created Pix.
virtual void ThresholdToPix(Pix** pix);
virtual void ThresholdToPix(PageSegMode pageseg_mode, Pix** pix);
// Gets a pix that contains an 8 bit threshold value at each pixel. The
// returned pix may be an integer reduction of the binary image such that

View File

@ -23,17 +23,15 @@
* make_pseudo_word
*
* Make all the blobs inside a selection into a single word.
* The word is always a copy and needs to be deleted.
* The returned PAGE_RES_IT* it points to the new word. After use, call
* it->DeleteCurrentWord() to delete the fake word, and then
* delete it to get rid of the iterator itself.
**********************************************************************/
WERD *make_pseudo_word(PAGE_RES* page_res, // Blocks to check.
const TBOX &selection_box,
BLOCK *&pseudo_block,
ROW *&pseudo_row) { // Row of selection.
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box) {
PAGE_RES_IT pr_it(page_res);
C_BLOB_LIST new_blobs; // list of gathered blobs
C_BLOB_IT new_blob_it = &new_blobs; // iterator
WERD *pseudo_word; // fabricated word
for (WERD_RES* word_res = pr_it.word(); word_res != NULL;
word_res = pr_it.forward()) {
@ -45,15 +43,17 @@ WERD *make_pseudo_word(PAGE_RES* page_res, // Blocks to check.
C_BLOB* blob = blob_it.data();
if (blob->bounding_box().overlap(selection_box)) {
new_blob_it.add_after_then_move(C_BLOB::deep_copy(blob));
pseudo_row = pr_it.row()->row;
pseudo_block = pr_it.block()->block;
}
}
if (!new_blobs.empty()) {
WERD* pseudo_word = new WERD(&new_blobs, 1, NULL);
word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word);
PAGE_RES_IT* it = new PAGE_RES_IT(page_res);
while (it->word() != word_res && it->word() != NULL) it->forward();
ASSERT_HOST(it->word() == word_res);
return it;
}
}
}
if (!new_blobs.empty())
pseudo_word = new WERD(&new_blobs, 1, NULL);
else
pseudo_word = NULL;
return pseudo_word;
return NULL;
}

View File

@ -22,9 +22,6 @@
#include "pageres.h"
WERD *make_pseudo_word(PAGE_RES* page_res, // blocks to check
const TBOX &selection_box,
BLOCK *&pseudo_block,
ROW *&pseudo_row);
PAGE_RES_IT* make_pseudo_word(PAGE_RES* page_res, const TBOX& selection_box);
#endif

View File

@ -157,6 +157,13 @@ void BoxWord::InsertBox(int index, const TBOX& box) {
ComputeBoundingBox();
}
// Changes the box at the given index to the new box.
// Recomputes the bounding box.
void BoxWord::ChangeBox(int index, const TBOX& box) {
boxes_[index] = box;
ComputeBoundingBox();
}
// Deletes the box with the given index, and shuffles up the rest.
// Recomputes the bounding box.
void BoxWord::DeleteBox(int index) {

View File

@ -63,6 +63,10 @@ class BoxWord {
// Recomputes the bounding box.
void InsertBox(int index, const TBOX& box);
// Changes the box at the given index to the new box.
// Recomputes the bounding box.
void ChangeBox(int index, const TBOX& box);
// Deletes the box with the given index, and shuffles up the rest.
// Recomputes the bounding box.
void DeleteBox(int index);

View File

@ -34,6 +34,13 @@ static const double kStopperAmbiguityThresholdGain = 8.0;
static const double kStopperAmbiguityThresholdOffset = 1.5;
// Max number of broken pieces to associate.
const int kWordrecMaxNumJoinChunks = 4;
// Max ratio of word box height to line size to allow it to be processed as
// a line with other words.
const double kMaxWordSizeRatio = 1.25;
// Max ratio of line box height to line size to allow a new word to be added.
const double kMaxLineSizeRatio = 1.25;
// Max ratio of word gap to line size to allow a new word to be added.
const double kMaxWordGapRatio = 2.0;
// Computes and returns a threshold of certainty difference used to determine
// which words to keep, based on the adjustment factors of the two words.
@ -49,6 +56,7 @@ static double StopperAmbigThreshold(double f1, double f2) {
* Constructor for page results
*************************************************************************/
PAGE_RES::PAGE_RES(
bool merge_similar_words,
BLOCK_LIST *the_block_list,
WERD_CHOICE **prev_word_best_choice_ptr) {
Init();
@ -56,7 +64,8 @@ PAGE_RES::PAGE_RES(
BLOCK_RES_IT block_res_it(&block_res_list);
for (block_it.mark_cycle_pt();
!block_it.cycled_list(); block_it.forward()) {
block_res_it.add_to_end(new BLOCK_RES(block_it.data()));
block_res_it.add_to_end(new BLOCK_RES(merge_similar_words,
block_it.data()));
}
prev_word_best_choice = prev_word_best_choice_ptr;
}
@ -67,7 +76,7 @@ PAGE_RES::PAGE_RES(
* Constructor for BLOCK results
*************************************************************************/
BLOCK_RES::BLOCK_RES(BLOCK *the_block) {
BLOCK_RES::BLOCK_RES(bool merge_similar_words, BLOCK *the_block) {
ROW_IT row_it (the_block->row_list ());
ROW_RES_IT row_res_it(&row_res_list);
@ -83,22 +92,20 @@ BLOCK_RES::BLOCK_RES(BLOCK *the_block) {
block = the_block;
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
row_res_it.add_to_end(new ROW_RES(row_it.data()));
row_res_it.add_to_end(new ROW_RES(merge_similar_words, row_it.data()));
}
}
/*************************************************************************
* ROW_RES::ROW_RES
*
* Constructor for ROW results
*************************************************************************/
ROW_RES::ROW_RES(ROW *the_row) {
ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) {
WERD_IT word_it(the_row->word_list());
WERD_RES_IT word_res_it(&word_res_list);
WERD_RES *combo = NULL; // current combination of fuzzies
WERD_RES *word_res; // current word
WERD *copy_word;
char_count = 0;
@ -106,20 +113,48 @@ ROW_RES::ROW_RES(ROW *the_row) {
whole_word_rej_count = 0;
row = the_row;
bool add_next_word = false;
TBOX union_box;
float line_height = the_row->x_height() + the_row->ascenders() -
the_row->descenders();
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
word_res = new WERD_RES(word_it.data());
WERD_RES* word_res = new WERD_RES(word_it.data());
word_res->x_height = the_row->x_height();
if (word_res->word->flag(W_FUZZY_NON)) {
if (add_next_word) {
ASSERT_HOST(combo != NULL);
// We are adding this word to the combination.
word_res->part_of_combo = TRUE;
combo->copy_on(word_res);
} else if (merge_similar_words) {
union_box = word_res->word->bounding_box();
add_next_word = !word_res->word->flag(W_REP_CHAR) &&
union_box.height() <= line_height * kMaxWordSizeRatio;
word_res->odd_size = !add_next_word;
}
if (word_it.data_relative(1)->flag(W_FUZZY_NON)) {
WERD* next_word = word_it.data_relative(1);
if (merge_similar_words) {
if (add_next_word && !next_word->flag(W_REP_CHAR)) {
// Next word will be added on if all of the following are true:
// Not a rep char.
// Box height small enough.
// Union box height small enough.
// Horizontal gap small enough.
TBOX next_box = next_word->bounding_box();
int prev_right = union_box.right();
union_box += next_box;
if (next_box.height() > line_height * kMaxWordSizeRatio ||
union_box.height() > line_height * kMaxLineSizeRatio ||
next_box.left() > prev_right + line_height * kMaxWordGapRatio) {
add_next_word = false;
}
}
} else {
add_next_word = next_word->flag(W_FUZZY_NON);
}
if (add_next_word) {
if (combo == NULL) {
copy_word = new WERD;
//deep copy
*copy_word = *(word_it.data());
*copy_word = *(word_it.data()); // deep copy
combo = new WERD_RES(copy_word);
combo->x_height = the_row->x_height();
combo->combination = TRUE;
@ -208,6 +243,7 @@ void WERD_RES::CopySimpleFields(const WERD_RES& source) {
done = source.done;
unlv_crunch_mode = source.unlv_crunch_mode;
small_caps = source.small_caps;
odd_size = source.odd_size;
italic = source.italic;
bold = source.bold;
fontinfo = source.fontinfo;
@ -318,8 +354,7 @@ void WERD_RES::SetupFake(const UNICHARSET& unicharset_in) {
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
TBOX box = b_it.data()->bounding_box();
box_word->InsertBox(box_word->length(), box);
fake_choices[blob_id++] = new BLOB_CHOICE(0, 10.0f, -1.0f,
-1, -1, -1, 0, 0, 0, BCC_FAKE);
fake_choices[blob_id++] = new BLOB_CHOICE;
}
FakeClassifyWord(blob_count, fake_choices);
delete [] fake_choices;
@ -446,6 +481,13 @@ void WERD_RES::DebugWordChoices(bool debug, const char* word_to_debug) {
}
}
// Prints the top choice along with the accepted/done flags.
void WERD_RES::DebugTopChoice(const char* msg) const {
tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ",
tess_accepted, tess_would_adapt, done);
best_choice->print(msg);
}
// Removes from best_choices all choices which are not within a reasonable
// range of the best choice.
// TODO(rays) incorporate the information used here into the params training
@ -830,6 +872,7 @@ void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE** choices) {
}
FakeWordFromRatings();
reject_map.initialise(blob_count);
done = true;
}
// Creates a WERD_CHOICE for the word using the top choices from the leading
@ -1038,6 +1081,7 @@ void WERD_RES::InitNonPointers() {
done = FALSE;
unlv_crunch_mode = CR_NONE;
small_caps = false;
odd_size = false;
italic = FALSE;
bold = FALSE;
// The fontinfos and tesseract count as non-pointers as they point to
@ -1239,6 +1283,159 @@ WERD_RES* PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES& clone_res,
return new_res;
}
// Helper computes the boundaries between blobs in the word. The blob bounds
// are likely very poor, if they come from LSTM, where it only outputs the
// character at one pixel within it, so we find the midpoints between them.
static void ComputeBlobEnds(const WERD_RES& word, C_BLOB_LIST* next_word_blobs,
GenericVector<int>* blob_ends) {
C_BLOB_IT blob_it(word.word->cblob_list());
for (int i = 0; i < word.best_state.size(); ++i) {
int length = word.best_state[i];
// Get the bounding box of the fake blobs
TBOX blob_box = blob_it.data()->bounding_box();
blob_it.forward();
for (int b = 1; b < length; ++b) {
blob_box += blob_it.data()->bounding_box();
blob_it.forward();
}
// This blob_box is crap, so for now we are only looking for the
// boundaries between them.
int blob_end = MAX_INT32;
if (!blob_it.at_first() || next_word_blobs != NULL) {
if (blob_it.at_first())
blob_it.set_to_list(next_word_blobs);
blob_end = (blob_box.right() + blob_it.data()->bounding_box().left()) / 2;
}
blob_ends->push_back(blob_end);
}
}
// Replaces the current WERD/WERD_RES with the given words. The given words
// contain fake blobs that indicate the position of the characters. These are
// replaced with real blobs from the current word as much as possible.
void PAGE_RES_IT::ReplaceCurrentWord(
tesseract::PointerVector<WERD_RES>* words) {
WERD_RES* input_word = word();
// Set the BOL/EOL flags on the words from the input word.
if (input_word->word->flag(W_BOL)) {
(*words)[0]->word->set_flag(W_BOL, true);
} else {
(*words)[0]->word->set_blanks(1);
}
words->back()->word->set_flag(W_EOL, input_word->word->flag(W_EOL));
// Move the blobs from the input word to the new set of words.
// If the input word_res is a combination, then the replacements will also be
// combinations, and will own their own words. If the input word_res is not a
// combination, then the final replacements will not be either, (although it
// is allowed for the input words to be combinations) and their words
// will get put on the row list. This maintains the ownership rules.
WERD_IT w_it(row()->row->word_list());
if (!input_word->combination) {
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
WERD* word = w_it.data();
if (word == input_word->word)
break;
}
// w_it is now set to the input_word's word.
ASSERT_HOST(!w_it.cycled_list());
}
// Insert into the appropriate place in the ROW_RES.
WERD_RES_IT wr_it(&row()->word_res_list);
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
WERD_RES* word = wr_it.data();
if (word == input_word)
break;
}
ASSERT_HOST(!wr_it.cycled_list());
// Since we only have an estimate of the bounds between blobs, use the blob
// x-middle as the determiner of where to put the blobs
C_BLOB_IT src_b_it(input_word->word->cblob_list());
src_b_it.sort(&C_BLOB::SortByXMiddle);
C_BLOB_IT rej_b_it(input_word->word->rej_cblob_list());
rej_b_it.sort(&C_BLOB::SortByXMiddle);
for (int w = 0; w < words->size(); ++w) {
WERD_RES* word_w = (*words)[w];
// Compute blob boundaries.
GenericVector<int> blob_ends;
C_BLOB_LIST* next_word_blobs =
w + 1 < words->size() ? (*words)[w + 1]->word->cblob_list() : NULL;
ComputeBlobEnds(*word_w, next_word_blobs, &blob_ends);
// Delete the fake blobs on the current word.
word_w->word->cblob_list()->clear();
C_BLOB_IT dest_it(word_w->word->cblob_list());
// Build the box word as we move the blobs.
tesseract::BoxWord* box_word = new tesseract::BoxWord;
for (int i = 0; i < blob_ends.size(); ++i) {
int end_x = blob_ends[i];
TBOX blob_box;
// Add the blobs up to end_x.
while (!src_b_it.empty() &&
src_b_it.data()->bounding_box().x_middle() < end_x) {
blob_box += src_b_it.data()->bounding_box();
dest_it.add_after_then_move(src_b_it.extract());
src_b_it.forward();
}
while (!rej_b_it.empty() &&
rej_b_it.data()->bounding_box().x_middle() < end_x) {
blob_box += rej_b_it.data()->bounding_box();
dest_it.add_after_then_move(rej_b_it.extract());
rej_b_it.forward();
}
// Clip to the previously computed bounds. Although imperfectly accurate,
// it is good enough, and much more complicated to determine where else
// to clip.
if (i > 0 && blob_box.left() < blob_ends[i - 1])
blob_box.set_left(blob_ends[i - 1]);
if (blob_box.right() > end_x)
blob_box.set_right(end_x);
box_word->InsertBox(i, blob_box);
}
// Fix empty boxes. If a very joined blob sits over multiple characters,
// then we will have some empty boxes from using the middle, so look for
// overlaps.
for (int i = 0; i < box_word->length(); ++i) {
TBOX box = box_word->BlobBox(i);
if (box.null_box()) {
// Nothing has its middle in the bounds of this blob, so use anything
// that overlaps.
for (dest_it.mark_cycle_pt(); !dest_it.cycled_list();
dest_it.forward()) {
TBOX blob_box = dest_it.data()->bounding_box();
if (blob_box.left() < blob_ends[i] &&
(i == 0 || blob_box.right() >= blob_ends[i - 1])) {
if (i > 0 && blob_box.left() < blob_ends[i - 1])
blob_box.set_left(blob_ends[i - 1]);
if (blob_box.right() > blob_ends[i])
blob_box.set_right(blob_ends[i]);
box_word->ChangeBox(i, blob_box);
break;
}
}
}
}
delete word_w->box_word;
word_w->box_word = box_word;
if (!input_word->combination) {
// Insert word_w->word into the ROW. It doesn't own its word, so the
// ROW needs to own it.
w_it.add_before_stay_put(word_w->word);
word_w->combination = false;
}
(*words)[w] = NULL; // We are taking ownership.
wr_it.add_before_stay_put(word_w);
}
// We have taken ownership of the words.
words->clear();
// Delete the current word, which has been replaced. We could just call
// DeleteCurrentWord, but that would iterate both lists again, and we know
// we are already in the right place.
if (!input_word->combination)
delete w_it.extract();
delete wr_it.extract();
ResetWordIterator();
}
// Deletes the current WERD_RES and its underlying WERD.
void PAGE_RES_IT::DeleteCurrentWord() {
// Check that this word is as we expect. part_of_combos are NEVER iterated
@ -1298,18 +1495,30 @@ WERD_RES *PAGE_RES_IT::start_page(bool empty_ok) {
// Resets the word_res_it so that it is one past the next_word_res, as
// it should be after internal_forward. If next_row_res != row_res,
// then the next_word_res is in the next row, so there is no need to do
// anything, since operations on the current word will not have disturbed
// the word_res_it.
// anything to word_res_it, but it is still a good idea to reset the pointers
// word_res and prev_word_res, which are still in the current row.
void PAGE_RES_IT::ResetWordIterator() {
if (row_res == next_row_res) {
// Reset the member iterator so it can move forward and detect the
// cycled_list state correctly.
word_res_it.move_to_first();
word_res_it.mark_cycle_pt();
while (!word_res_it.cycled_list() && word_res_it.data() != next_word_res)
while (!word_res_it.cycled_list() && word_res_it.data() != next_word_res) {
if (prev_row_res == row_res)
prev_word_res = word_res;
word_res = word_res_it.data();
word_res_it.forward();
}
ASSERT_HOST(!word_res_it.cycled_list());
word_res_it.forward();
} else {
// word_res_it is OK, but reset word_res and prev_word_res if needed.
WERD_RES_IT wr_it(&row_res->word_res_list);
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
if (prev_row_res == row_res)
prev_word_res = word_res;
word_res = wr_it.data();
}
}
}

View File

@ -82,7 +82,8 @@ class PAGE_RES { // page result
PAGE_RES() { Init(); } // empty constructor
PAGE_RES(BLOCK_LIST *block_list, // real blocks
PAGE_RES(bool merge_similar_words,
BLOCK_LIST *block_list, // real blocks
WERD_CHOICE **prev_word_best_choice_ptr);
~PAGE_RES () { // destructor
@ -111,7 +112,7 @@ class BLOCK_RES:public ELIST_LINK {
BLOCK_RES() {
} // empty constructor
BLOCK_RES(BLOCK *the_block); // real block
BLOCK_RES(bool merge_similar_words, BLOCK *the_block); // real block
~BLOCK_RES () { // destructor
}
@ -132,7 +133,7 @@ class ROW_RES:public ELIST_LINK {
ROW_RES() {
} // empty constructor
ROW_RES(ROW *the_row); // real row
ROW_RES(bool merge_similar_words, ROW *the_row); // real row
~ROW_RES() { // destructor
}
@ -279,7 +280,8 @@ class WERD_RES : public ELIST_LINK {
BOOL8 tess_accepted; // Tess thinks its ok?
BOOL8 tess_would_adapt; // Tess would adapt?
BOOL8 done; // ready for output?
bool small_caps; // word appears to be small caps
bool small_caps; // word appears to be small caps
bool odd_size; // word is bigger than line or leader dots.
inT8 italic;
inT8 bold;
// The fontinfos are pointers to data owned by the classifier.
@ -486,6 +488,9 @@ class WERD_RES : public ELIST_LINK {
// the word_to_debug.
void DebugWordChoices(bool debug, const char* word_to_debug);
// Prints the top choice along with the accepted/done flags.
void DebugTopChoice(const char* msg) const;
// Removes from best_choices all choices which are not within a reasonable
// range of the best choice.
void FilterWordChoices(int debug_level);
@ -694,6 +699,11 @@ class PAGE_RES_IT {
// the resulting WERD_RES is returned for further setup with best_choice etc.
WERD_RES* InsertSimpleCloneWord(const WERD_RES& clone_res, WERD* new_word);
// Replaces the current WERD/WERD_RES with the given words. The given words
// contain fake blobs that indicate the position of the characters. These are
// replaced with real blobs from the current word as much as possible.
void ReplaceCurrentWord(tesseract::PointerVector<WERD_RES>* words);
// Deletes the current WERD_RES and its underlying WERD.
void DeleteCurrentWord();

View File

@ -164,28 +164,37 @@ enum PageSegMode {
PSM_SINGLE_CHAR, ///< Treat the image as a single character.
PSM_SPARSE_TEXT, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD, ///< Sparse text with orientation and script det.
PSM_RAW_LINE, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
};
/**
* Macros that act on a PageSegMode to determine whether components of
* Inline functions that act on a PageSegMode to determine whether components of
* layout analysis are enabled.
* *Depend critically on the order of elements of PageSegMode.*
* NOTE that arg is an int for compatibility with INT_PARAM.
*/
#define PSM_OSD_ENABLED(pageseg_mode) ((pageseg_mode) <= PSM_AUTO_OSD || \
(pageseg_mode) == PSM_SPARSE_TEXT_OSD)
#define PSM_COL_FIND_ENABLED(pageseg_mode) \
((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_AUTO)
#define PSM_SPARSE(pageseg_mode) \
((pageseg_mode) == PSM_SPARSE_TEXT || (pageseg_mode) == PSM_SPARSE_TEXT_OSD)
#define PSM_BLOCK_FIND_ENABLED(pageseg_mode) \
((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_COLUMN)
#define PSM_LINE_FIND_ENABLED(pageseg_mode) \
((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_BLOCK)
#define PSM_WORD_FIND_ENABLED(pageseg_mode) \
(((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_LINE) || \
(pageseg_mode) == PSM_SPARSE_TEXT || (pageseg_mode) == PSM_SPARSE_TEXT_OSD)
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
}
inline bool PSM_SPARSE(int pageseg_mode) {
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
}
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
}
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
/**
* enum of the elements of the page hierarchy, used in ResultIterator

View File

@ -48,11 +48,11 @@ class BLOB_CHOICE: public ELIST_LINK
{
public:
BLOB_CHOICE() {
unichar_id_ = INVALID_UNICHAR_ID;
unichar_id_ = UNICHAR_SPACE;
fontinfo_id_ = -1;
fontinfo_id2_ = -1;
rating_ = MAX_FLOAT32;
certainty_ = -MAX_FLOAT32;
rating_ = 10.0;
certainty_ = -1.0;
script_id_ = -1;
xgap_before_ = 0;
xgap_after_ = 0;

View File

@ -78,6 +78,12 @@ class DLLSYM TBOX { // bounding box
void set_right(int x) {
top_right.set_x(x);
}
int x_middle() const {
return (bot_left.x() + top_right.x()) / 2;
}
int y_middle() const {
return (bot_left.y() + top_right.y()) / 2;
}
const ICOORD &botleft() const { // access function
return bot_left;

View File

@ -247,10 +247,11 @@ C_BLOB* C_BLOB::FakeBlob(const TBOX& box) {
* Return the bounding box of the blob.
**********************************************************************/
TBOX C_BLOB::bounding_box() { //bounding box
C_OUTLINE *outline; //current outline
C_OUTLINE_IT it = &outlines; //outlines of blob
TBOX box; //bounding box
TBOX C_BLOB::bounding_box() const { // bounding box
C_OUTLINE *outline; // current outline
// This is a read-only iteration of the outlines.
C_OUTLINE_IT it = const_cast<C_OUTLINE_LIST*>(&outlines);
TBOX box; // bounding box
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
outline = it.data ();

View File

@ -65,7 +65,7 @@ class C_BLOB:public ELIST_LINK
return &outlines;
}
TBOX bounding_box(); //compute bounding box
TBOX bounding_box() const; // compute bounding box
inT32 area(); //compute area
inT32 perimeter(); // Total perimeter of outlines and 1st level children.
inT32 outer_area(); //compute area
@ -116,6 +116,14 @@ class C_BLOB:public ELIST_LINK
return blob;
}
static int SortByXMiddle(const void *v1, const void *v2) {
const C_BLOB* blob1 = *reinterpret_cast<const C_BLOB* const *>(v1);
const C_BLOB* blob2 = *reinterpret_cast<const C_BLOB* const *>(v2);
return blob1->bounding_box().x_middle() -
blob2->bounding_box().x_middle();
}
private:
C_OUTLINE_LIST outlines; //master elements
};

View File

@ -17,15 +17,17 @@
//
///////////////////////////////////////////////////////////////////////
#include "unicharset.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "params.h"
#include "serialis.h"
#include "tesscallback.h"
#include "tprintf.h"
#include "unichar.h"
#include "unicharset.h"
#include "params.h"
// Special character used in representing character fragments.
static const char kSeparator = '|';
@ -448,11 +450,19 @@ void UNICHARSET::ExpandRangesFromOther(const UNICHARSET& src) {
}
}
// Makes this a copy of src. Clears this completely first, so the automattic
// ids will not be present in this if not in src.
// Makes this a copy of src. Clears this completely first, so the automatic
// ids will not be present in this if not in src. Does NOT reorder the set!
void UNICHARSET::CopyFrom(const UNICHARSET& src) {
clear();
AppendOtherUnicharset(src);
for (int ch = 0; ch < src.size_used; ++ch) {
const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
const char* utf8 = src.id_to_unichar(ch);
unichar_insert(utf8);
unichars[ch].properties.ExpandRangesFrom(src_props);
}
// Set properties, including mirror and other_case, WITHOUT reordering
// the unicharset.
PartialSetPropertiesFromOther(0, src);
}
// For each id in src, if it does not occur in this, add it, as in
@ -689,8 +699,11 @@ bool UNICHARSET::eq(UNICHAR_ID unichar_id,
return strcmp(this->id_to_unichar(unichar_id), unichar_repr) == 0;
}
bool UNICHARSET::save_to_file(FILE *file) const {
fprintf(file, "%d\n", this->size());
bool UNICHARSET::save_to_string(STRING *str) const {
const int kFileBufSize = 1024;
char buffer[kFileBufSize + 1];
snprintf(buffer, kFileBufSize, "%d\n", this->size());
*str = buffer;
for (UNICHAR_ID id = 0; id < this->size(); ++id) {
int min_bottom, max_bottom, min_top, max_top;
get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top);
@ -702,11 +715,11 @@ bool UNICHARSET::save_to_file(FILE *file) const {
get_advance_range(id, &min_advance, &max_advance);
unsigned int properties = this->get_properties(id);
if (strcmp(this->id_to_unichar(id), " ") == 0) {
fprintf(file, "%s %x %s %d\n", "NULL", properties,
snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties,
this->get_script_from_script_id(this->get_script(id)),
this->get_other_case(id));
} else {
fprintf(file,
snprintf(buffer, kFileBufSize,
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %s %d %d %d %s\t# %s\n",
this->id_to_unichar(id), properties,
min_bottom, max_bottom, min_top, max_top, min_width, max_width,
@ -716,10 +729,12 @@ bool UNICHARSET::save_to_file(FILE *file) const {
this->get_mirror(id), this->get_normed_unichar(id),
this->debug_str(id).string());
}
*str += buffer;
}
return true;
}
// TODO(rays) Replace with TFile everywhere.
class InMemoryFilePointer {
public:
InMemoryFilePointer(const char *memory, int mem_size)
@ -776,6 +791,14 @@ bool UNICHARSET::load_from_file(FILE *file, bool skip_fragments) {
return success;
}
bool UNICHARSET::load_from_file(tesseract::TFile *file, bool skip_fragments) {
TessResultCallback2<char *, char *, int> *fgets_cb =
NewPermanentTessCallback(file, &tesseract::TFile::FGets);
bool success = load_via_fgets(fgets_cb, skip_fragments);
delete fgets_cb;
return success;
}
bool UNICHARSET::load_via_fgets(
TessResultCallback2<char *, char *, int> *fgets_cb,
bool skip_fragments) {

View File

@ -23,6 +23,7 @@
#include "errcode.h"
#include "genericvector.h"
#include "helpers.h"
#include "serialis.h"
#include "strngs.h"
#include "tesscallback.h"
#include "unichar.h"
@ -317,7 +318,22 @@ class UNICHARSET {
// Saves the content of the UNICHARSET to the given file.
// Returns true if the operation is successful.
bool save_to_file(FILE *file) const;
bool save_to_file(FILE *file) const {
STRING str;
if (!save_to_string(&str)) return false;
if (fwrite(&str[0], str.length(), 1, file) != 1) return false;
return true;
}
bool save_to_file(tesseract::TFile *file) const {
STRING str;
if (!save_to_string(&str)) return false;
if (file->FWrite(&str[0], str.length(), 1) != 1) return false;
return true;
}
// Saves the content of the UNICHARSET to the given STRING.
// Returns true if the operation is successful.
bool save_to_string(STRING *str) const;
// Load a unicharset from a unicharset file that has been loaded into
// the given memory buffer.
@ -348,6 +364,8 @@ class UNICHARSET {
// Returns true if the operation is successful.
bool load_from_file(FILE *file, bool skip_fragments);
bool load_from_file(FILE *file) { return load_from_file(file, false); }
bool load_from_file(tesseract::TFile *file, bool skip_fragments);
// Sets up internal data after loading the file, based on the char
// properties. Called from load_from_file, but also needs to be run

View File

@ -161,7 +161,8 @@ float MakeRowFromSubBlobs(TO_BLOCK* block, C_BLOB* blob, TO_ROW_IT* row_it) {
* only a single blob, it makes 2 rows, in case the top-level blob
* is a container of the real blobs to recognize.
*/
float make_single_row(ICOORD page_tr, TO_BLOCK* block, TO_BLOCK_LIST* blocks) {
float make_single_row(ICOORD page_tr, bool allow_sub_blobs,
TO_BLOCK* block, TO_BLOCK_LIST* blocks) {
BLOBNBOX_IT blob_it = &block->blobs;
TO_ROW_IT row_it = block->get_rows();
@ -169,11 +170,17 @@ float make_single_row(ICOORD page_tr, TO_BLOCK* block, TO_BLOCK_LIST* blocks) {
blob_it.add_list_after(&block->small_blobs);
blob_it.add_list_after(&block->noise_blobs);
blob_it.add_list_after(&block->large_blobs);
if (block->blobs.singleton()) {
if (block->blobs.singleton() && allow_sub_blobs) {
blob_it.move_to_first();
float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
if (size > block->line_size)
block->line_size = size;
} else if (block->blobs.empty()) {
// Make a fake blob.
C_BLOB* blob = C_BLOB::FakeBlob(block->block->bounding_box());
// The blobnbox owns the blob.
BLOBNBOX* bblob = new BLOBNBOX(blob);
blob_it.add_after_then_move(bblob);
}
MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
// Fit an LMS line to the rows.

View File

@ -133,7 +133,7 @@ inline bool within_error_margin(float test, float num, float margin) {
void fill_heights(TO_ROW *row, float gradient, int min_height,
int max_height, STATS *heights, STATS *floating_heights);
float make_single_row(ICOORD page_tr, TO_BLOCK* block,
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK* block,
TO_BLOCK_LIST* blocks);
float make_rows(ICOORD page_tr, // top right
TO_BLOCK_LIST *port_blocks);

View File

@ -317,8 +317,9 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew,
if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
gradient = make_rows(page_tr_, to_blocks);
} else if (!PSM_SPARSE(pageseg_mode)) {
// SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
gradient = make_single_row(page_tr_, to_block, to_blocks);
// RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,
to_block, to_blocks);
}
BaselineDetect baseline_detector(textord_baseline_debug,
reskew, to_blocks);
@ -339,7 +340,8 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew,
make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
to_block->get_rows(), to_block->block->row_list());
}
cleanup_blocks(blocks); // Remove empties.
cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
// Remove empties.
// Compute the margins for each row in the block, to be used later for
// paragraph detection.

View File

@ -206,7 +206,7 @@ class Textord {
// Must have at least one WERD.
// WERDs contain a fake blob.
void cleanup_nontext_block(BLOCK* block);
void cleanup_blocks(BLOCK_LIST *blocks);
void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks);
BOOL8 clean_noise_from_row(ROW *row);
void clean_noise_from_words(ROW *row);
// Remove outlines that are a tiny fraction in either width or height

View File

@ -360,9 +360,11 @@ void Textord::cleanup_nontext_block(BLOCK* block) {
// Non-text blocks must contain at least one row.
ROW_IT row_it(block->row_list());
if (row_it.empty()) {
float height = block->bounding_box().height();
inT32 zero = 0;
ROW* row = new ROW(0, &zero, NULL, height / 2.0f, height / 4.0f,
TBOX box = block->bounding_box();
float height = box.height();
inT32 xstarts[2] = {box.left(), box.right()};
double coeffs[3] = {0.0, 0.0, static_cast<double>(box.bottom())};
ROW* row = new ROW(1, xstarts, coeffs, height / 2.0f, height / 4.0f,
height / 4.0f, 0, 1);
row_it.add_after_then_move(row);
}
@ -398,9 +400,7 @@ void Textord::cleanup_nontext_block(BLOCK* block) {
* Delete empty blocks, rows from the page.
**********************************************************************/
void Textord::cleanup_blocks( //remove empties
BLOCK_LIST *blocks //list
) {
void Textord::cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks) {
BLOCK_IT block_it = blocks; //iterator
ROW_IT row_it; //row iterator
@ -417,22 +417,24 @@ void Textord::cleanup_blocks( //remove empties
}
num_rows = 0;
num_rows_all = 0;
row_it.set_to_list(block->row_list());
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
++num_rows_all;
clean_small_noise_from_words(row_it.data());
if ((textord_noise_rejrows && !row_it.data()->word_list()->empty() &&
clean_noise_from_row(row_it.data())) ||
row_it.data()->word_list()->empty()) {
delete row_it.extract(); // lose empty row.
} else {
if (textord_noise_rejwords)
clean_noise_from_words(row_it.data());
if (textord_blshift_maxshift >= 0)
tweak_row_baseline(row_it.data(),
textord_blshift_maxshift,
textord_blshift_xfraction);
++num_rows;
if (clean_noise) {
row_it.set_to_list(block->row_list());
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
++num_rows_all;
clean_small_noise_from_words(row_it.data());
if ((textord_noise_rejrows && !row_it.data()->word_list()->empty() &&
clean_noise_from_row(row_it.data())) ||
row_it.data()->word_list()->empty()) {
delete row_it.extract(); // lose empty row.
} else {
if (textord_noise_rejwords)
clean_noise_from_words(row_it.data());
if (textord_blshift_maxshift >= 0)
tweak_row_baseline(row_it.data(),
textord_blshift_maxshift,
textord_blshift_xfraction);
++num_rows;
}
}
}
if (block->row_list()->empty()) {

View File

@ -299,7 +299,7 @@ bool LanguageModel::UpdateState(
//if (!curr_list->singleton() && c_it.data()->unichar_id() == 0) continue;
UNICHAR_ID unichar_id = choice->unichar_id();
if (unicharset.get_fragment(unichar_id)) {
continue; // skip fragments
continue; // Skip fragments.
}
// Set top choice flags.
LanguageModelFlagsType blob_choice_flags = kXhtConsistentFlag;
@ -651,6 +651,8 @@ bool LanguageModel::AddViterbiStateEntry(
ngram_info, (language_model_debug_level > 0) ?
dict_->getUnicharset().id_to_unichar(b->unichar_id()) : NULL);
new_vse->cost = ComputeAdjustedPathCost(new_vse);
if (language_model_debug_level >= 3)
tprintf("Adjusted cost = %g\n", new_vse->cost);
// Invoke Top Choice language model component to make the final adjustments
// to new_vse->top_choice_flags.
@ -1311,7 +1313,7 @@ void LanguageModel::UpdateBestChoice(
vse->dawg_info != NULL && vse->top_choice_flags);
}
}
if (wordrec_display_segmentations) {
if (wordrec_display_segmentations && word_res->chopped_word != NULL) {
word->DisplaySegmentation(word_res->chopped_word);
}
}

View File

@ -37,52 +37,16 @@ void Wordrec::DoSegSearch(WERD_RES* word_res) {
void Wordrec::SegSearch(WERD_RES* word_res,
BestChoiceBundle* best_choice_bundle,
BlamerBundle* blamer_bundle) {
if (segsearch_debug_level > 0) {
tprintf("Starting SegSearch on ratings matrix%s:\n",
wordrec_enable_assoc ? " (with assoc)" : "");
word_res->ratings->print(getDict().getUnicharset());
}
LMPainPoints pain_points(segsearch_max_pain_points,
segsearch_max_char_wh_ratio,
assume_fixed_pitch_char_segment,
&getDict(), segsearch_debug_level);
pain_points.GenerateInitial(word_res);
// Compute scaling factor that will help us recover blob outline length
// from classifier rating and certainty for the blob.
float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale;
language_model_->InitForWord(prev_word_best_choice_,
assume_fixed_pitch_char_segment,
segsearch_max_char_wh_ratio, rating_cert_scale);
// Initialize blamer-related information: map character boxes recorded in
// blamer_bundle->norm_truth_word to the corresponding i,j indices in the
// ratings matrix. We expect this step to succeed, since when running the
// chopper we checked that the correct chops are present.
if (blamer_bundle != NULL) {
blamer_bundle->SetupCorrectSegmentation(word_res->chopped_word,
wordrec_debug_blamer);
}
MATRIX_COORD pain_point;
float pain_point_priority;
// pending[col] tells whether there is update work to do to combine
// best_choice_bundle->beam[col - 1] with some BLOB_CHOICEs in matrix[col, *].
// As the language model state is updated, pending entries are modified to
// minimize duplication of work. It is important that during the update the
// children are considered in the non-decreasing order of their column, since
// this guarantees that all the parents would be up to date before an update
// of a child is done.
GenericVector<SegSearchPending> pending;
pending.init_to_size(word_res->ratings->dimension(), SegSearchPending());
// Search the ratings matrix for the initial best path.
pending[0].SetColumnClassified();
UpdateSegSearchNodes(rating_cert_scale, 0, &pending, word_res,
&pain_points, best_choice_bundle, blamer_bundle);
InitialSegSearch(word_res, &pain_points, &pending, best_choice_bundle,
blamer_bundle);
if (!SegSearchDone(0)) { // find a better choice
if (chop_enable && word_res->chopped_word != NULL) {
@ -98,6 +62,9 @@ void Wordrec::SegSearch(WERD_RES* word_res,
}
}
// Keep trying to find a better path by fixing the "pain points".
MATRIX_COORD pain_point;
float pain_point_priority;
int num_futile_classifications = 0;
STRING blamer_debug;
while (wordrec_enable_assoc &&
@ -159,6 +126,72 @@ void Wordrec::SegSearch(WERD_RES* word_res,
}
}
// Setup and run just the initial segsearch on an established matrix,
// without doing any additional chopping or joining.
void Wordrec::WordSearch(WERD_RES* word_res) {
LMPainPoints pain_points(segsearch_max_pain_points,
segsearch_max_char_wh_ratio,
assume_fixed_pitch_char_segment,
&getDict(), segsearch_debug_level);
GenericVector<SegSearchPending> pending;
BestChoiceBundle best_choice_bundle(word_res->ratings->dimension());
// Run Segmentation Search.
InitialSegSearch(word_res, &pain_points, &pending, &best_choice_bundle, NULL);
if (segsearch_debug_level > 0) {
tprintf("Ending ratings matrix%s:\n",
wordrec_enable_assoc ? " (with assoc)" : "");
word_res->ratings->print(getDict().getUnicharset());
}
}
// Setup and run just the initial segsearch on an established matrix,
// without doing any additional chopping or joining.
// (Internal factored version that can be used as part of the main SegSearch.)
void Wordrec::InitialSegSearch(WERD_RES* word_res, LMPainPoints* pain_points,
GenericVector<SegSearchPending>* pending,
BestChoiceBundle* best_choice_bundle,
BlamerBundle* blamer_bundle) {
if (segsearch_debug_level > 0) {
tprintf("Starting SegSearch on ratings matrix%s:\n",
wordrec_enable_assoc ? " (with assoc)" : "");
word_res->ratings->print(getDict().getUnicharset());
}
pain_points->GenerateInitial(word_res);
// Compute scaling factor that will help us recover blob outline length
// from classifier rating and certainty for the blob.
float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale;
language_model_->InitForWord(prev_word_best_choice_,
assume_fixed_pitch_char_segment,
segsearch_max_char_wh_ratio, rating_cert_scale);
// Initialize blamer-related information: map character boxes recorded in
// blamer_bundle->norm_truth_word to the corresponding i,j indices in the
// ratings matrix. We expect this step to succeed, since when running the
// chopper we checked that the correct chops are present.
if (blamer_bundle != NULL) {
blamer_bundle->SetupCorrectSegmentation(word_res->chopped_word,
wordrec_debug_blamer);
}
// pending[col] tells whether there is update work to do to combine
// best_choice_bundle->beam[col - 1] with some BLOB_CHOICEs in matrix[col, *].
// As the language model state is updated, pending entries are modified to
// minimize duplication of work. It is important that during the update the
// children are considered in the non-decreasing order of their column, since
// this guarantees that all the parents would be up to date before an update
// of a child is done.
pending->init_to_size(word_res->ratings->dimension(), SegSearchPending());
// Search the ratings matrix for the initial best path.
(*pending)[0].SetColumnClassified();
UpdateSegSearchNodes(rating_cert_scale, 0, pending, word_res,
pain_points, best_choice_bundle, blamer_bundle);
}
void Wordrec::UpdateSegSearchNodes(
float rating_cert_scale,
int starting_col,

View File

@ -266,11 +266,22 @@ class Wordrec : public Classify {
// to combine blobs. Segmentation search will run only one "iteration"
// on the classifications already recorded in chunks_record.ratings.
//
// Note: this function assumes that word, output_best_state,
// best_char_choices and fixpt arguments are not NULL.
// Note: this function assumes that word_res, best_choice_bundle arguments
// are not NULL.
void SegSearch(WERD_RES* word_res,
BestChoiceBundle* best_choice_bundle,
BlamerBundle* blamer_bundle);
// Setup and run just the initial segsearch on an established matrix,
// without doing any additional chopping or joining.
void WordSearch(WERD_RES* word_res);
// Setup and run just the initial segsearch on an established matrix,
// without doing any additional chopping or joining.
// (Internal factored version that can be used as part of the main SegSearch.)
void InitialSegSearch(WERD_RES* word_res, LMPainPoints* pain_points,
GenericVector<SegSearchPending>* pending,
BestChoiceBundle* best_choice_bundle,
BlamerBundle* blamer_bundle);
// Runs SegSearch() function (above) without needing a best_choice_bundle
// or blamer_bundle. Used for testing.