/////////////////////////////////////////////////////////////////////// // File: osdetect.h // Description: Orientation and script detection. // Author: Samuel Charron // // (C) Copyright 2008, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /////////////////////////////////////////////////////////////////////// #ifndef TESSERACT_CCMAIN_OSDETECT_H__ #define TESSERACT_CCMAIN_OSDETECT_H__ #include "strngs.h" #include "unicharset.h" class TO_BLOCK_LIST; class BLOBNBOX; class BLOB_CHOICE_LIST; namespace tesseract { class Tesseract; } // Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur const int kMaxNumberOfScripts = 116 + 1 + 2 + 1; struct OSBestResult { int orientation; const char* script; float sconfidence; float oconfidence; }; struct OSResults { OSResults() { for (int i = 0; i < 4; ++i) { for (int j = 0; j < kMaxNumberOfScripts; ++j) scripts_na[i][j] = 0; orientations[i] = 0; } } float orientations[4]; float scripts_na[4][kMaxNumberOfScripts]; UNICHARSET* unicharset; OSBestResult best_result; }; class OrientationDetector { public: OrientationDetector(OSResults*); bool detect_blob(BLOB_CHOICE_LIST* scores); void update_best_orientation(); int get_orientation(); private: OSResults* osr_; }; class ScriptDetector { public: ScriptDetector(OSResults*, tesseract::Tesseract* tess); void detect_blob(BLOB_CHOICE_LIST* scores); void update_best_script(int); void get_script() ; bool must_stop(int orientation); private: OSResults* osr_; static const char* korean_script_; static const char* japanese_script_; static const char* fraktur_script_; int korean_id_; int japanese_id_; int katakana_id_; int hiragana_id_; int han_id_; int hangul_id_; int latin_id_; int fraktur_id_; tesseract::Tesseract* tess_; }; bool orientation_and_script_detection(STRING& filename, OSResults*, tesseract::Tesseract*); bool os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr, tesseract::Tesseract* tess); bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, ScriptDetector* s, OSResults*, tesseract::Tesseract* tess); #endif // TESSERACT_CCMAIN_OSDETECT_H__