/////////////////////////////////////////////////////////////////////// // File: tesseractclass.h // Description: An instance of Tesseract. For thread safety, *every* // global variable goes in here, directly, or indirectly. // Author: Ray Smith // Created: Fri Mar 07 08:17:01 PST 2008 // // (C) Copyright 2008, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /////////////////////////////////////////////////////////////////////// #ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__ #define TESSERACT_CCMAIN_TESSERACTCLASS_H__ #include "varable.h" #include "wordrec.h" #include "ocrclass.h" #include "control.h" #include "docqual.h" class CHAR_SAMPLES_LIST; class CHAR_SAMPLE_LIST; class PAGE_RES; class PAGE_RES_IT; class BLOCK_LIST; class TO_BLOCK_LIST; class IMAGE; class WERD_RES; class ROW; class TBOX; class SVMenuNode; struct Pix; class WERD_CHOICE; class WERD; class BLOB_CHOICE_LIST_CLIST; // Top-level class for all tesseract global instance data. // This class either holds or points to all data used by an instance // of Tesseract, including the memory allocator. When this is // complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT! // // NOTE to developers: Do not create cyclic dependencies through this class! // The directory dependency tree must remain a tree! The keep this clean, // lower-level code (eg in ccutil, the bottom level) must never need to // know about the content of a higher-level directory. // The following scheme will grant the easiest access to lower-level // global members without creating a cyclic dependency: // ccmain inherits wordrec, includes textord as a member // wordrec inherits classify // classify inherits ccstruct, includes dict as a member // ccstruct inherits c_util, includes image as a member // c_util inherits cc_util // textord has a pointer to ccstruct, but doesn't own it. // dict has a pointer to ccstruct, but doesn't own it. // // NOTE: that each level contains members that correspond to global // data that is defined (and used) at that level, not necessarily where // the type is defined so for instance: // BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs"); // goes inside the Textord class, not the cc_util class. namespace tesseract { class Tesseract : public Wordrec { public: Tesseract(); ~Tesseract(); void Clear(); // Simple accessors. const FCOORD& reskew() const { return reskew_; } // Destroy any existing pix and return a pointer to the pointer. Pix** mutable_pix_binary() { Clear(); return &pix_binary_; } Pix* pix_binary() const { return pix_binary_; } void SetBlackAndWhitelist(); int SegmentPage(const STRING* input_file, IMAGE* image, BLOCK_LIST* blocks); int AutoPageSeg(int width, int height, int resolution, bool single_column, IMAGE* image, BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); //// control.h ///////////////////////////////////////////////////////// void recog_all_words( //process words PAGE_RES *page_res, //page structure //progress monitor volatile ETEXT_DESC *monitor, TBOX *target_word_box=0L, inT16 dopasses=0 ); void classify_word_pass1( //recog one word WERD_RES *word, //word to do ROW *row, BLOCK* block, BOOL8 cluster_adapt, CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting); void recog_pseudo_word( //recognize blobs BLOCK_LIST *block_list, //blocks to check TBOX &selection_box); // This method returns all the blobs in the specified blocks. // It's the caller's responsibility to destroy the returned list. C_BLOB_LIST* get_blobs_from_blocks(BLOCK_LIST* blocks // blocks to look at. ); // This method can be used to perform word-level training using box files. // TODO: this can be modified to perform training in general case too. void train_word_level_with_boxes( const STRING& box_file, // File with boxes. const STRING& out_file, // Output file. BLOCK_LIST* blocks // Blocks to use. ); void fix_rep_char(WERD_RES *word); void fix_quotes( //make double quotes WERD_CHOICE *choice, //choice to fix WERD *word, //word to do //char choices BLOB_CHOICE_LIST_CLIST *blob_choices); ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s, const char *lengths); void match_word_pass2( //recog one word WERD_RES *word, //word to do ROW *row, BLOCK* block, float x_height); void classify_word_pass2( //word to do WERD_RES *word, BLOCK* block, ROW *row); BOOL8 recog_interactive( //recognize blobs BLOCK *block, //block ROW *row, //row of word WERD *word //word to recognize ); void fix_hyphens( //crunch double hyphens WERD_CHOICE *choice, //choice to fix WERD *word, //word to do //char choices BLOB_CHOICE_LIST_CLIST *blob_choices); void set_word_fonts( WERD_RES *word, // word to adapt to BLOB_CHOICE_LIST_CLIST *blob_choices); // detailed results void font_recognition_pass( //good chars in word PAGE_RES_IT &page_res_it); //// output.h ////////////////////////////////////////////////////////// void output_pass( //Tess output pass //send to api PAGE_RES_IT &page_res_it, BOOL8 write_to_shm, TBOX *target_word_box); FILE *open_outfile( //open .map & .unlv file const char *extension); void write_results( //output a word PAGE_RES_IT &page_res_it, //full info char newline_type, //type of newline BOOL8 force_eol, //override tilde crunch? BOOL8 write_to_shm //send to api ); void set_unlv_suspects(WERD_RES *word); UNICHAR_ID get_rep_char(WERD_RES *word); // what char is repeated? BOOL8 acceptable_number_string(const char *s, const char *lengths); inT16 count_alphanums(const WERD_CHOICE &word); inT16 count_alphas(const WERD_CHOICE &word); //// tessedit.h //////////////////////////////////////////////////////// void read_config_file(const char *filename, bool global_only); int init_tesseract(const char *arg0, const char *textbase, const char *language, char **configs, int configs_size, bool configs_global_only); int init_tesseract_lm(const char *arg0, const char *textbase, const char *language); // Initializes the tesseract classifier without loading language models. int init_tesseract_classifier(const char *arg0, const char *textbase, const char *language, char **configs, int configs_size, bool configs_global_only); void recognize_page(STRING& image_name); void end_tesseract(); bool init_tesseract_lang_data(const char *arg0, const char *textbase, const char *language, char **configs, int configs_size, bool configs_global_only); //// pgedit.h ////////////////////////////////////////////////////////// SVMenuNode *build_menu_new(); void pgeditor_main(BLOCK_LIST *blocks); void process_image_event( // action in image win const SVEvent &event); void pgeditor_read_file( // of serialised file STRING &filename, BLOCK_LIST *blocks // block list to add to ); void do_new_source( // serialise ); BOOL8 process_cmd_win_event( // UI command semantics inT32 cmd_event, // which menu item? char *new_value // any prompt data ); //// reject.h ////////////////////////////////////////////////////////// const char *char_ambiguities(char c); void make_reject_map( //make rej map for wd //detailed results WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices, ROW *row, inT16 pass //1st or 2nd? ); BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map); inT16 first_alphanum_index(const char *word, const char *word_lengths); inT16 first_alphanum_offset(const char *word, const char *word_lengths); inT16 alpha_count(const char *word, const char *word_lengths); BOOL8 word_contains_non_1_digit(const char *word, const char *word_lengths); void dont_allow_1Il(WERD_RES *word); inT16 count_alphanums( //how many alphanums WERD_RES *word); BOOL8 repeated_ch_string(const char *rep_ch_str, const char *lengths); void flip_0O(WERD_RES *word); BOOL8 non_0_digit(UNICHAR_ID unichar_id); BOOL8 non_O_upper(UNICHAR_ID unichar_id); BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row); void nn_match_word( //Match a word WERD_RES *word, ROW *row); void nn_recover_rejects(WERD_RES *word, ROW *row); BOOL8 test_ambig_word( //test for ambiguity WERD_RES *word); void set_done( //set done flag WERD_RES *word, inT16 pass); inT16 safe_dict_word(const WERD_CHOICE &word); void flip_hyphens(WERD_RES *word); //// adaptions.h /////////////////////////////////////////////////////// void adapt_to_good_ems(WERD_RES *word, CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting); void adapt_to_good_samples(WERD_RES *word, CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting); BOOL8 word_adaptable( //should we adapt? WERD_RES *word, uinT16 mode); void reject_suspect_ems(WERD_RES *word); void collect_ems_for_adaption(WERD_RES *word, CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting); void collect_characters_for_adaption(WERD_RES *word, CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting); void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting, CHAR_SAMPLE *sample, CHAR_SAMPLES *best_cluster); void cluster_sample(CHAR_SAMPLE *sample, CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting); void complete_clustering(CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting); //// tfacepp.cpp /////////////////////////////////////////////////////// WERD_CHOICE *recog_word_recursive( //recog one owrd WERD *word, //word to do DENORM *denorm, //de-normaliser //matcher function POLY_MATCHER matcher, //tester function POLY_TESTER tester, //trainer function POLY_TESTER trainer, BOOL8 testing, //true if answer driven //raw result WERD_CHOICE *&raw_choice, //list of blob lists BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword //bln word output ); WERD_CHOICE *recog_word( //recog one owrd WERD *word, //word to do DENORM *denorm, //de-normaliser POLY_MATCHER matcher, //matcher function POLY_TESTER tester, //tester function POLY_TESTER trainer, //trainer function BOOL8 testing, //true if answer driven WERD_CHOICE *&raw_choice, //raw result //list of blob lists BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword //bln word output ); WERD_CHOICE *split_and_recog_word( //recog one owrd WERD *word, //word to do DENORM *denorm, //de-normaliser //matcher function POLY_MATCHER matcher, //tester function POLY_TESTER tester, //trainer function POLY_TESTER trainer, BOOL8 testing, //true if answer driven //raw result WERD_CHOICE *&raw_choice, //list of blob lists BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword //bln word output ); //// fixspace.cpp /////////////////////////////////////////////////////// BOOL8 digit_or_numeric_punct(WERD_RES *word, int char_position); inT16 eval_word_spacing(WERD_RES_LIST &word_res_list); void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block); inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list); void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block); void fix_fuzzy_space_list( //space explorer WERD_RES_LIST &best_perm, ROW *row, BLOCK* block); void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block); void fix_fuzzy_spaces( //find fuzzy words volatile ETEXT_DESC *monitor, //progress monitor inT32 word_count, //count of words in doc PAGE_RES *page_res); //// docqual.cpp //////////////////////////////////////////////////////// GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word); BOOL8 potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level, BOOL8 ok_dict_word); void tilde_crunch(PAGE_RES_IT &page_res_it); void unrej_good_quality_words( //unreject potential PAGE_RES_IT &page_res_it); void doc_and_block_rejection( //reject big chunks PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc); void quality_based_rejection(PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc); void convert_bad_unlv_chs(WERD_RES *word_res); void merge_tess_fails(WERD_RES *word_res); void tilde_delete(PAGE_RES_IT &page_res_it); void insert_rej_cblobs(WERD_RES *word); //// pagewalk.cpp /////////////////////////////////////////////////////// void process_selected_words ( BLOCK_LIST * block_list, //blocks to check //function to call TBOX & selection_box, BOOL8 (tesseract::Tesseract::*word_processor) ( BLOCK *, ROW *, WERD *)); //// tessbox.cpp /////////////////////////////////////////////////////// void tess_add_doc_word( //test acceptability WERD_CHOICE *word_choice //after context ); void tess_adapter( //adapt to word WERD *word, //bln word DENORM *denorm, //de-normalise const WERD_CHOICE& choice, //string for word const WERD_CHOICE& raw_choice, //before context const char *rejmap //reject map ); WERD_CHOICE *test_segment_pass2( //recog one word WERD *word, //bln word to do DENORM *denorm, //de-normaliser POLY_MATCHER matcher, //matcher function POLY_TESTER tester, //tester function //raw result WERD_CHOICE *&raw_choice, //list of blob lists BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword //bln word output ); WERD_CHOICE *tess_segment_pass1( //recog one word WERD *word, //bln word to do DENORM *denorm, //de-normaliser POLY_MATCHER matcher, //matcher function //raw result WERD_CHOICE *&raw_choice, //list of blob lists BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword //bln word output ); WERD_CHOICE *tess_segment_pass2( //recog one word WERD *word, //bln word to do DENORM *denorm, //de-normaliser POLY_MATCHER matcher, //matcher function //raw result WERD_CHOICE *&raw_choice, //list of blob lists BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword //bln word output ); WERD_CHOICE *correct_segment_pass2( //recog one word WERD *word, //bln word to do DENORM *denorm, //de-normaliser POLY_MATCHER matcher, //matcher function POLY_TESTER tester, //tester function //raw result WERD_CHOICE *&raw_choice, //list of blob lists BLOB_CHOICE_LIST_CLIST *blob_choices, WERD *&outword //bln word output ); void tess_default_matcher( //call tess PBLOB *pblob, //previous blob PBLOB *blob, //blob to match PBLOB *nblob, //next blob WERD *word, //word it came from DENORM *denorm, //de-normaliser BLOB_CHOICE_LIST *ratings, //list of results const char* script ); void tess_bn_matcher( //call tess PBLOB *pblob, //previous blob PBLOB *blob, //blob to match PBLOB *nblob, //next blob WERD *word, //word it came from DENORM *denorm, //de-normaliser BLOB_CHOICE_LIST *ratings //list of results ); void tess_cn_matcher( //call tess PBLOB *pblob, //previous blob PBLOB *blob, //blob to match PBLOB *nblob, //next blob WERD *word, //word it came from DENORM *denorm, //de-normaliser BLOB_CHOICE_LIST *ratings, //list of results // Sorted array of CP_RESULT_STRUCT from class pruner. CLASS_PRUNER_RESULTS cpresults ); BOOL8 tess_adaptable_word( //test adaptability WERD *word, //word to test WERD_CHOICE *word_choice, //after context WERD_CHOICE *raw_choice //before context ); BOOL8 tess_acceptable_word( //test acceptability WERD_CHOICE *word_choice, //after context WERD_CHOICE *raw_choice //before context ); //// applybox.cpp ////////////////////////////////////////////////////// void apply_box_testing(BLOCK_LIST *block_list); void apply_boxes(const STRING& fname, BLOCK_LIST *block_list //real blocks ); // converts an array of boxes to a block list int Boxes2BlockList(int box_cnt, TBOX *boxes, BLOCK_LIST *block_list, bool right2left); //// blobcmp.cpp /////////////////////////////////////////////////////// float compare_tess_blobs(TBLOB *blob1, TEXTROW *row1, TBLOB *blob2, TEXTROW *row2); //// paircmp.cpp /////////////////////////////////////////////////////// float compare_bln_blobs( //match 2 blobs PBLOB *blob1, //first blob DENORM *denorm1, PBLOB *blob2, //other blob DENORM *denorm2); float compare_blobs( //match 2 blobs PBLOB *blob1, //first blob ROW *row1, //row it came from PBLOB *blob2, //other blob ROW *row2); BOOL8 compare_blob_pairs( //blob processor BLOCK *, ROW *row, //row it came from WERD *, PBLOB *blob //blob to compare ); //// fixxht.cpp /////////////////////////////////////////////////////// void check_block_occ(WERD_RES *word_res); //// Data members /////////////////////////////////////////////////////// BOOL_VAR_H(tessedit_resegment_from_boxes, false, "Take segmentation and labeling from box file"); BOOL_VAR_H(tessedit_train_from_boxes, false, "Generate training data from boxed chars"); BOOL_VAR_H(tessedit_dump_pageseg_images, false, "Dump itermediate images made during page segmentation"); INT_VAR_H(tessedit_pageseg_mode, 2, "Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char" " (Values from PageSegMode enum in baseapi.h)"); INT_VAR_H(tessedit_accuracyvspeed, 0, "Accuracy V Speed tradeoff: 0 fastest, 100 most accurate" " (Values from AccuracyVSpeed enum in baseapi.h)"); BOOL_VAR_H(tessedit_train_from_boxes_word_level, false, "Generate training data from boxed chars at word level."); STRING_VAR_H(tessedit_char_blacklist, "", "Blacklist of chars not to recognize"); STRING_VAR_H(tessedit_char_whitelist, "", "Whitelist of chars to recognize"); BOOL_VAR_H(global_tessedit_ambigs_training, false, "Perform training for ambiguities"); //// ambigsrecog.cpp ///////////////////////////////////////////////////////// FILE *init_ambigs_training(const STRING &fname); void ambigs_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file); void ambigs_classify_and_output(PAGE_RES_IT *page_res_it, const char *label, FILE *output_file); private: Pix* pix_binary_; FCOORD deskew_; FCOORD reskew_; bool hindi_image_; }; } // namespace tesseract #endif // TESSERACT_CCMAIN_TESSERACTCLASS_H__