/////////////////////////////////////////////////////////////////////// // File: osdetect.cpp // Description: Orientation and script detection. // Author: Samuel Charron // // (C) Copyright 2008, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /////////////////////////////////////////////////////////////////////// #include "osdetect.h" #include "strngs.h" #include "blobbox.h" #include "blread.h" #include "tordmain.h" #include "ratngs.h" #include "oldlist.h" #include "adaptmatch.h" #include "tstruct.h" #include "expandblob.h" #include "tesseractclass.h" #include "qrsequence.h" extern IMAGE page_image; const int kMinCharactersToTry = 50; const int kMaxCharactersToTry = 5 * kMinCharactersToTry; const float kSizeRatioToReject = 2.0; const float kOrientationAcceptRatio = 1.3; const float kScriptAcceptRatio = 1.3; const float kHanRatioInKorean = 0.7; const float kHanRatioInJapanese = 0.3; const float kLatinRationInFraktur = 0.7; const float kNonAmbiguousMargin = 1.0; // General scripts static const char* han_script = "Han"; static const char* latin_script = "Latin"; static const char* katakana_script = "Katakana"; static const char* hiragana_script = "Hiragana"; static const char* hangul_script = "Hangul"; // Pseudo-scripts Name const char* ScriptDetector::korean_script_ = "Korean"; const char* ScriptDetector::japanese_script_ = "Japanese"; const char* ScriptDetector::fraktur_script_ = "Fraktur"; CLISTIZEH(BLOBNBOX); CLISTIZE(BLOBNBOX); // Find connected components in the page and process a subset until finished or // a stopping criterion is met. // Returns true if the page was successfully processed. bool orientation_and_script_detection(STRING& filename, OSResults* osr, tesseract::Tesseract* tess) { STRING name = filename; //truncated name const char *lastdot; //of name TO_BLOCK_LIST land_blocks, port_blocks; BLOCK_LIST blocks; TBOX page_box; lastdot = strrchr (name.string (), '.'); if (lastdot != NULL) name[lastdot-name.string()] = '\0'; if (!read_unlv_file(name, page_image.get_xsize(), page_image.get_ysize(), &blocks)) FullPageBlock(page_image.get_xsize(), page_image.get_ysize(), &blocks); find_components(&blocks, &land_blocks, &port_blocks, &page_box); return os_detect(&port_blocks, osr, tess); } // Filter and sample the blobs. // Returns true if the page was successfully processed, or false if the page had // too few characters to be reliable bool os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr, tesseract::Tesseract* tess) { int blobs_total = 0; OSResults osr_; if (osr == NULL) osr = &osr_; osr->unicharset = &tess->unicharset; OrientationDetector o(osr); ScriptDetector s(osr, tess); TO_BLOCK_IT block_it; block_it.set_to_list(port_blocks); BLOBNBOX_CLIST filtered_list; BLOBNBOX_C_IT filtered_it(&filtered_list); for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward ()) { TO_BLOCK* block = block_it.data(); BLOBNBOX_IT bbox_it; bbox_it.set_to_list(&block->blobs); for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list (); bbox_it.forward ()) { BLOBNBOX* bbox = bbox_it.data(); C_BLOB* blob = bbox->cblob(); TBOX box = blob->bounding_box(); ++blobs_total; float y_x = fabs((box.height() * 1.0) / box.width()); float x_y = 1.0f / y_x; // Select a >= 1.0 ratio float ratio = x_y > y_x ? x_y : y_x; // Blob is ambiguous if (ratio > kSizeRatioToReject) continue; if (box.height() < 10) continue; filtered_it.add_to_end(bbox); } } if (filtered_it.length() > 0) filtered_it.move_to_first(); int real_max = MIN(filtered_it.length(), kMaxCharactersToTry); printf("Total blobs found = %d\n", blobs_total); printf("Number of blobs post-filtering = %d\n", filtered_it.length()); printf("Number of blobs to try = %d\n", real_max); // If there are too few characters, skip this page entirely. if (real_max < kMinCharactersToTry / 2) { printf("Too few characters. Skipping this page\n"); return false; } BLOBNBOX** blobs = new BLOBNBOX*[filtered_it.length()]; int number_of_blobs = 0; for (filtered_it.mark_cycle_pt (); !filtered_it.cycled_list (); filtered_it.forward ()) { blobs[number_of_blobs++] = (BLOBNBOX*)filtered_it.data(); } QRSequenceGenerator sequence(number_of_blobs); for (int i = 0; i < real_max; ++i) { if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess) && i > kMinCharactersToTry) { break; } } delete [] blobs; // Make sure the best_result is up-to-date int orientation = o.get_orientation(); s.update_best_script(orientation); return true; } // Processes a single blob to estimate script and orientation. // Return true if estimate of orientation and script satisfies stopping // criteria. bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, ScriptDetector* s, OSResults* osr, tesseract::Tesseract* tess) { C_BLOB* blob = bbox->cblob(); TBOX box = blob->bounding_box(); int x_mid = (box.left() + box.right()) / 2.0f; int y_mid = (box.bottom() + box.top()) / 2.0f; PBLOB pblob(blob, box.height()); BLOB_CHOICE_LIST ratings[4]; // Test the 4 orientations for (int i = 0; i < 4; ++i) { // normalize the blob pblob.move(FCOORD(-x_mid, -box.bottom())); pblob.scale(static_cast(bln_x_height) / box.height()); pblob.move(FCOORD(0.0f, bln_baseline_offset)); { // List of choices given by the classifier TBLOB *tessblob; //converted blob TEXTROW tessrow; //dummy row tess_cn_matching.set_value(true); // turn it on tess_bn_matching.set_value(false); //convert blob tessblob = make_tess_blob (&pblob, TRUE); //make dummy row make_tess_row(NULL, &tessrow); //classify tess->AdaptiveClassifier (tessblob, NULL, &tessrow, ratings + i, NULL); free_blob(tessblob); } // undo normalize pblob.move(FCOORD(0.0f, -bln_baseline_offset)); pblob.scale(1.0f / (static_cast(bln_x_height) / box.height())); pblob.move(FCOORD(x_mid, box.bottom())); // center the blob pblob.move(FCOORD(-x_mid, -y_mid)); // Rotate it pblob.rotate(); // Re-compute the mid box = pblob.bounding_box(); x_mid = (box.left() + box.right()) / 2; y_mid = (box.top() + box.bottom()) / 2; // re-center in the new mid pblob.move(FCOORD(x_mid, y_mid)); } bool stop = o->detect_blob(ratings); s->detect_blob(ratings); int orientation = o->get_orientation(); stop = s->must_stop(orientation) && stop; return stop; } OrientationDetector::OrientationDetector(OSResults* osr) { osr_ = osr; } // Score the given blob and return true if it is now sure of the orientation // after adding this block. bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST* scores) { for (int i = 0; i < 4; ++i) { BLOB_CHOICE_IT choice_it; choice_it.set_to_list(scores + i); if (!choice_it.empty()) { osr_->orientations[i] += (100 + choice_it.data()->certainty()); } } float first = -1; float second = -1; int idx = -1; for (int i = 0; i < 4; ++i) { if (osr_->orientations[i] > first) { idx = i; second = first; first = osr_->orientations[i]; } else if (osr_->orientations[i] > second) { second = osr_->orientations[i]; } } return first / second > kOrientationAcceptRatio; } void OrientationDetector::update_best_orientation() { float first = osr_->orientations[0]; float second = osr_->orientations[1]; if (first < second) { second = first; first = osr_->orientations[1]; } osr_->best_result.orientation = 0; osr_->best_result.oconfidence = 0; for (int i = 0; i < 4; ++i) { if (osr_->orientations[i] > first) { second = first; first = osr_->orientations[i]; osr_->best_result.orientation = i; } else if (osr_->orientations[i] > second) { second = osr_->orientations[i]; } } osr_->best_result.oconfidence = (first / second - 1.0) / (kOrientationAcceptRatio - 1.0); } int OrientationDetector::get_orientation() { update_best_orientation(); return osr_->best_result.orientation; } ScriptDetector::ScriptDetector(OSResults* osr, tesseract::Tesseract* tess) { osr_ = osr; tess_ = tess; katakana_id_ = tess_->unicharset.add_script(katakana_script); hiragana_id_ = tess_->unicharset.add_script(hiragana_script); han_id_ = tess_->unicharset.add_script(han_script); hangul_id_ = tess_->unicharset.add_script(hangul_script); japanese_id_ = tess_->unicharset.add_script(japanese_script_); korean_id_ = tess_->unicharset.add_script(korean_script_); latin_id_ = tess_->unicharset.add_script(latin_script); fraktur_id_ = tess_->unicharset.add_script(fraktur_script_); } // Score the given blob and return true if it is now sure of the script after // adding this blob. void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) { bool done[kMaxNumberOfScripts]; for (int i = 0; i < 4; ++i) { for (int j = 0; j < kMaxNumberOfScripts; ++j) done[j] = false; BLOB_CHOICE_IT choice_it; choice_it.set_to_list(scores + i); float prev_score = -1; int script_count = 0; int prev_id = -1; int prev_script; int prev_class_id = -1; int prev_config = -1; const char* prev_unichar = ""; const char* unichar = ""; float next_best_score = -1.0; int next_best_script_id = -1; const char* next_best_unichar = ""; for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) { BLOB_CHOICE* choice = choice_it.data(); int id = choice->script_id(); // Script already processed before. if (done[id]) continue; done[id] = true; unichar = tess_->unicharset.id_to_unichar(choice->unichar_id()); // Save data from the first match if (prev_score < 0) { prev_score = -choice->certainty(); script_count = 1; prev_id = id; prev_script = choice->script_id(); prev_unichar = unichar; prev_class_id = choice->unichar_id(); prev_config = choice->config(); } else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) { script_count++; next_best_score = -choice->certainty(); next_best_script_id = choice->script_id(); next_best_unichar = tess_->unicharset.id_to_unichar(choice->unichar_id()); } if (strlen(prev_unichar) == 1) if (unichar[0] >= '0' && unichar[0] <= '9') break; // if script_count is >= 2, character is ambiguous, skip other matches // since they are useless. if (script_count >= 2) break; } // Character is non ambiguous if (script_count == 1) { // Update the score of the winning script osr_->scripts_na[i][prev_id] += 1; // Workaround for Fraktur if (prev_id == latin_id_) { int font_set_id = tess_->PreTrainedTemplates-> Class[prev_class_id]->font_set_id; if (font_set_id >= 0 && prev_config >= 0) { FontInfo fi = tess_->get_fontinfo_table().get( tess_->get_fontset_table().get(font_set_id).configs[prev_config]); //printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name, // fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(), // fi.is_serif(), fi.is_fraktur(), // prev_unichar); if (fi.is_fraktur()) { osr_->scripts_na[i][prev_id] -= 1; osr_->scripts_na[i][fraktur_id_] += 1; } } } // Update Japanese / Korean pseudo-scripts if (prev_id == katakana_id_) osr_->scripts_na[i][japanese_id_] += 1; if (prev_id == hiragana_id_) osr_->scripts_na[i][japanese_id_] += 1; if (prev_id == hangul_id_) osr_->scripts_na[i][korean_id_] += 1; if (prev_id == han_id_) osr_->scripts_na[i][korean_id_] += kHanRatioInKorean; if (prev_id == han_id_) osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese; } } // iterate over each orientation } bool ScriptDetector::must_stop(int orientation) { update_best_script(orientation); return osr_->best_result.sconfidence > 1; } void ScriptDetector::update_best_script(int orientation) { float first = -1; float second = -1; // i = 1 -> ignore Common scripts for (int i = 1; i < kMaxNumberOfScripts; ++i) { if (osr_->scripts_na[orientation][i] > first) { osr_->best_result.script = tess_->unicharset.get_script_from_script_id(i); second = first; first = osr_->scripts_na[orientation][i]; } else if (osr_->scripts_na[orientation][i] > second) { second = osr_->scripts_na[orientation][i]; } } osr_->best_result.sconfidence = (first / second - 1.0) / (kOrientationAcceptRatio - 1.0); }