tesseract/ccmain/tesseractclass.h

///////////////////////////////////////////////////////////////////////
// File:        tesseractclass.h
// Description: An instance of Tesseract. For thread safety, *every*
//              global variable goes in here, directly, or indirectly.
// Author:      Ray Smith
// Created:     Fri Mar 07 08:17:01 PST 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

#ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__
#define TESSERACT_CCMAIN_TESSERACTCLASS_H__

#include "varable.h"
#include "wordrec.h"
#include "ocrclass.h"
#include "control.h"
#include "docqual.h"

class CHAR_SAMPLES_LIST;
class CHAR_SAMPLE_LIST;
class PAGE_RES;
class PAGE_RES_IT;
class BLOCK_LIST;
class TO_BLOCK_LIST;
class IMAGE;
class WERD_RES;
class ROW;
class TBOX;
class SVMenuNode;
struct Pix;
class WERD_CHOICE;
class WERD;
class BLOB_CHOICE_LIST_CLIST;


// Top-level class for all tesseract global instance data.
// This class either holds or points to all data used by an instance
// of Tesseract, including the memory allocator. When this is
// complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT!
//
// NOTE to developers: Do not create cyclic dependencies through this class!
// The directory dependency tree must remain a tree! The keep this clean,
// lower-level code (eg in ccutil, the bottom level) must never need to
// know about the content of a higher-level directory.
// The following scheme will grant the easiest access to lower-level
// global members without creating a cyclic dependency:
// ccmain inherits wordrec, includes textord as a member
// wordrec inherits classify
// classify inherits ccstruct, includes dict as a member
// ccstruct inherits c_util, includes image as a member
// c_util inherits cc_util
// textord has a pointer to ccstruct, but doesn't own it.
// dict has a pointer to ccstruct, but doesn't own it.
//
// NOTE: that each level contains members that correspond to global
// data that is defined (and used) at that level, not necessarily where
// the type is defined so for instance:
// BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs");
// goes inside the Textord class, not the cc_util class.

namespace tesseract {

class Tesseract : public Wordrec {
 public:
  Tesseract();
  ~Tesseract();

  void Clear();

  // Simple accessors.
  const FCOORD& reskew() const {
    return reskew_;
  }
  // Destroy any existing pix and return a pointer to the pointer.
  Pix** mutable_pix_binary() {
    Clear();
    return &pix_binary_;
  }
  Pix* pix_binary() const {
    return pix_binary_;
  }

  void SetBlackAndWhitelist();
  int SegmentPage(const STRING* input_file,
                  IMAGE* image, BLOCK_LIST* blocks);
  int AutoPageSeg(int width, int height, int resolution,
                  bool single_column, IMAGE* image,
                  BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);

  //// control.h /////////////////////////////////////////////////////////
  void recog_all_words(                                //process words
                                  PAGE_RES *page_res,  //page structure
                                                       //progress monitor
                                  volatile ETEXT_DESC *monitor,
                                  TBOX *target_word_box=0L,
                                  inT16 dopasses=0
                                 );
  void classify_word_pass1(                 //recog one word
                           WERD_RES *word,  //word to do
                           ROW *row,
                           BLOCK* block,
                           BOOL8 cluster_adapt,
                           CHAR_SAMPLES_LIST *char_clusters,
                           CHAR_SAMPLE_LIST *chars_waiting);
  void recog_pseudo_word(                         //recognize blobs
                         BLOCK_LIST *block_list,  //blocks to check
                         TBOX &selection_box);

  // This method returns all the blobs in the specified blocks.
  // It's the caller's responsibility to destroy the returned list.
  C_BLOB_LIST* get_blobs_from_blocks(BLOCK_LIST* blocks  // blocks to look at.
                                    );

  // This method can be used to perform word-level training using box files.
  // TODO: this can be modified to perform training in general case too.
  void train_word_level_with_boxes(
                                   const STRING& box_file,  // File with boxes.
                                   const STRING& out_file,  // Output file.
                                   BLOCK_LIST* blocks       // Blocks to use.
                                  );
  void fix_rep_char(WERD_RES *word);
  void fix_quotes(               //make double quotes
                  WERD_CHOICE *choice,  //choice to fix
                  WERD *word,    //word to do //char choices
                  BLOB_CHOICE_LIST_CLIST *blob_choices);
  ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s,
                                              const char *lengths);
  void match_word_pass2(                 //recog one word
                        WERD_RES *word,  //word to do
                        ROW *row,
                        BLOCK* block,
                        float x_height);
  void classify_word_pass2(  //word to do
                           WERD_RES *word,
                           BLOCK* block,
                           ROW *row);
  BOOL8 recog_interactive(            //recognize blobs
                          BLOCK *block,    //block
                          ROW *row,   //row of word
                          WERD *word  //word to recognize
                         );
  void fix_hyphens(               //crunch double hyphens
                   WERD_CHOICE *choice,  //choice to fix
                   WERD *word,    //word to do //char choices
                   BLOB_CHOICE_LIST_CLIST *blob_choices);
  void set_word_fonts(
      WERD_RES *word,  // word to adapt to
      BLOB_CHOICE_LIST_CLIST *blob_choices);  // detailed results
  void font_recognition_pass(  //good chars in word
                             PAGE_RES_IT &page_res_it);

  //// output.h //////////////////////////////////////////////////////////

  void output_pass(  //Tess output pass //send to api
                            PAGE_RES_IT &page_res_it,
                            BOOL8 write_to_shm,
                            TBOX *target_word_box);
  FILE *open_outfile(  //open .map & .unlv file
                                const char *extension);
  void write_results(                           //output a word
                     PAGE_RES_IT &page_res_it,  //full info
                     char newline_type,         //type of newline
                     BOOL8 force_eol,           //override tilde crunch?
                     BOOL8 write_to_shm         //send to api
                    );
  void set_unlv_suspects(WERD_RES *word);
  UNICHAR_ID get_rep_char(WERD_RES *word);  // what char is repeated?
  BOOL8 acceptable_number_string(const char *s,
                                 const char *lengths);
  inT16 count_alphanums(const WERD_CHOICE &word);
  inT16 count_alphas(const WERD_CHOICE &word);
  //// tessedit.h ////////////////////////////////////////////////////////
  void read_config_file(const char *filename, bool global_only);
  int init_tesseract(const char *arg0,
                     const char *textbase,
                     const char *language,
                     char **configs,
                     int configs_size,
                     bool configs_global_only);

  int init_tesseract_lm(const char *arg0,
                        const char *textbase,
                        const char *language);

  // Initializes the tesseract classifier without loading language models.
  int init_tesseract_classifier(const char *arg0,
                                const char *textbase,
                                const char *language,
                                char **configs,
                                int configs_size,
                                bool configs_global_only);

  void recognize_page(STRING& image_name);
  void end_tesseract();

  bool init_tesseract_lang_data(const char *arg0,
                                const char *textbase,
                                const char *language,
                                char **configs,
                                int configs_size,
                                bool configs_global_only);

  //// pgedit.h //////////////////////////////////////////////////////////
  SVMenuNode *build_menu_new();
  void pgeditor_main(BLOCK_LIST *blocks);
  void process_image_event( // action in image win
                           const SVEvent &event);
  void pgeditor_read_file(                   // of serialised file
                          STRING &filename,
                          BLOCK_LIST *blocks  // block list to add to
                         );
  void do_new_source(           // serialise
                    );
  BOOL8 process_cmd_win_event(                 // UI command semantics
                              inT32 cmd_event,  // which menu item?
                              char *new_value   // any prompt data
                             );
  //// reject.h //////////////////////////////////////////////////////////
  const char *char_ambiguities(char c);
  void make_reject_map(            //make rej map for wd //detailed results
                       WERD_RES *word,
                       BLOB_CHOICE_LIST_CLIST *blob_choices,
                       ROW *row,
                       inT16 pass  //1st or 2nd?
                      );
  BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map);
  inT16 first_alphanum_index(const char *word,
                             const char *word_lengths);
  inT16 first_alphanum_offset(const char *word,
                              const char *word_lengths);
  inT16 alpha_count(const char *word,
                    const char *word_lengths);
  BOOL8 word_contains_non_1_digit(const char *word,
                                  const char *word_lengths);
  void dont_allow_1Il(WERD_RES *word);
  inT16 count_alphanums(  //how many alphanums
                        WERD_RES *word);
  BOOL8 repeated_ch_string(const char *rep_ch_str,
                           const char *lengths);
  void flip_0O(WERD_RES *word);
  BOOL8 non_0_digit(UNICHAR_ID unichar_id);
  BOOL8 non_O_upper(UNICHAR_ID unichar_id);
  BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row);
  void nn_match_word(  //Match a word
                     WERD_RES *word,
                     ROW *row);
  void nn_recover_rejects(WERD_RES *word, ROW *row);
  BOOL8 test_ambig_word(  //test for ambiguity
                        WERD_RES *word);
  void set_done(  //set done flag
                WERD_RES *word,
                inT16 pass);
  inT16 safe_dict_word(const WERD_CHOICE  &word);
  void flip_hyphens(WERD_RES *word);
  //// adaptions.h ///////////////////////////////////////////////////////
  void adapt_to_good_ems(WERD_RES *word,
                         CHAR_SAMPLES_LIST *char_clusters,
                         CHAR_SAMPLE_LIST *chars_waiting);
  void adapt_to_good_samples(WERD_RES *word,
                             CHAR_SAMPLES_LIST *char_clusters,
                             CHAR_SAMPLE_LIST *chars_waiting);
  BOOL8 word_adaptable(  //should we adapt?
                       WERD_RES *word,
                       uinT16 mode);
  void reject_suspect_ems(WERD_RES *word);
  void collect_ems_for_adaption(WERD_RES *word,
                                CHAR_SAMPLES_LIST *char_clusters,
                                CHAR_SAMPLE_LIST *chars_waiting);
  void collect_characters_for_adaption(WERD_RES *word,
                                       CHAR_SAMPLES_LIST *char_clusters,
                                       CHAR_SAMPLE_LIST *chars_waiting);
  void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting,
                       CHAR_SAMPLE *sample,
                       CHAR_SAMPLES *best_cluster);
  void cluster_sample(CHAR_SAMPLE *sample,
                      CHAR_SAMPLES_LIST *char_clusters,
                      CHAR_SAMPLE_LIST *chars_waiting);
  void complete_clustering(CHAR_SAMPLES_LIST *char_clusters,
                           CHAR_SAMPLE_LIST *chars_waiting);

  //// tfacepp.cpp ///////////////////////////////////////////////////////
  WERD_CHOICE *recog_word_recursive(                    //recog one owrd
                                    WERD *word,         //word to do
                                    DENORM *denorm,     //de-normaliser
                                                        //matcher function
                                    POLY_MATCHER matcher,
                                                        //tester function
                                    POLY_TESTER tester,
                                                        //trainer function
                                    POLY_TESTER trainer,
                                    BOOL8 testing,      //true if answer driven
                                                        //raw result
                                    WERD_CHOICE *&raw_choice,
                                                        //list of blob lists
                                    BLOB_CHOICE_LIST_CLIST *blob_choices,
                                    WERD *&outword      //bln word output
                                   );
  WERD_CHOICE *recog_word(                           //recog one owrd
                          WERD *word,                //word to do
                          DENORM *denorm,            //de-normaliser
                          POLY_MATCHER matcher,      //matcher function
                          POLY_TESTER tester,        //tester function
                          POLY_TESTER trainer,       //trainer function
                          BOOL8 testing,             //true if answer driven
                          WERD_CHOICE *&raw_choice,  //raw result
                                                     //list of blob lists
                          BLOB_CHOICE_LIST_CLIST *blob_choices,
                          WERD *&outword             //bln word output
                         );
  WERD_CHOICE *split_and_recog_word(                    //recog one owrd
                                    WERD *word,         //word to do
                                    DENORM *denorm,     //de-normaliser
                                                        //matcher function
                                    POLY_MATCHER matcher,
                                                        //tester function
                                    POLY_TESTER tester,
                                                        //trainer function
                                    POLY_TESTER trainer,
                                    BOOL8 testing,      //true if answer driven
                                                        //raw result
                                    WERD_CHOICE *&raw_choice,
                                                        //list of blob lists
                                    BLOB_CHOICE_LIST_CLIST *blob_choices,
                                    WERD *&outword      //bln word output
                                   );
  //// fixspace.cpp ///////////////////////////////////////////////////////
  BOOL8 digit_or_numeric_punct(WERD_RES *word, int char_position);
  inT16 eval_word_spacing(WERD_RES_LIST &word_res_list);
  void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block);
  inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list);
  void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block);
  void fix_fuzzy_space_list(  //space explorer
                            WERD_RES_LIST &best_perm,
                            ROW *row,
                            BLOCK* block);
  void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block);
  void fix_fuzzy_spaces(                               //find fuzzy words
                        volatile ETEXT_DESC *monitor,  //progress monitor
                        inT32 word_count,              //count of words in doc
                        PAGE_RES *page_res);
  //// docqual.cpp ////////////////////////////////////////////////////////
  GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word);
  BOOL8 potential_word_crunch(WERD_RES *word,
                              GARBAGE_LEVEL garbage_level,
                              BOOL8 ok_dict_word);
  void tilde_crunch(PAGE_RES_IT &page_res_it);
  void unrej_good_quality_words(  //unreject potential
                                PAGE_RES_IT &page_res_it);
  void doc_and_block_rejection(  //reject big chunks
                               PAGE_RES_IT &page_res_it,
                               BOOL8 good_quality_doc);
  void quality_based_rejection(PAGE_RES_IT &page_res_it,
                               BOOL8 good_quality_doc);
  void convert_bad_unlv_chs(WERD_RES *word_res);
  void merge_tess_fails(WERD_RES *word_res);
  void tilde_delete(PAGE_RES_IT &page_res_it);
  void insert_rej_cblobs(WERD_RES *word);
  //// pagewalk.cpp ///////////////////////////////////////////////////////
  void
  process_selected_words (
      BLOCK_LIST * block_list, //blocks to check
      //function to call
      TBOX & selection_box,
      BOOL8 (tesseract::Tesseract::*word_processor) (
          BLOCK *,
          ROW *,
          WERD *));
  //// tessbox.cpp ///////////////////////////////////////////////////////
  void tess_add_doc_word(                          //test acceptability
                         WERD_CHOICE *word_choice  //after context
                        );
  void tess_adapter(                         //adapt to word
                    WERD *word,              //bln word
                    DENORM *denorm,          //de-normalise
                    const WERD_CHOICE& choice,      //string for word
                    const WERD_CHOICE& raw_choice,  //before context
                    const char *rejmap       //reject map
                   );
  WERD_CHOICE *test_segment_pass2(                        //recog one word
                                  WERD *word,             //bln word to do
                                  DENORM *denorm,         //de-normaliser
                                  POLY_MATCHER matcher,   //matcher function
                                  POLY_TESTER tester,     //tester function
                                                          //raw result
                                  WERD_CHOICE *&raw_choice,
                                                          //list of blob lists
                                  BLOB_CHOICE_LIST_CLIST *blob_choices,
                                  WERD *&outword          //bln word output
                                 );
  WERD_CHOICE *tess_segment_pass1(                        //recog one word
                                  WERD *word,             //bln word to do
                                  DENORM *denorm,         //de-normaliser
                                  POLY_MATCHER matcher,   //matcher function
                                                          //raw result
                                  WERD_CHOICE *&raw_choice,
                                                          //list of blob lists
                                  BLOB_CHOICE_LIST_CLIST *blob_choices,
                                  WERD *&outword             //bln word output
                                 );
  WERD_CHOICE *tess_segment_pass2(                        //recog one word
                                  WERD *word,             //bln word to do
                                  DENORM *denorm,         //de-normaliser
                                  POLY_MATCHER matcher,   //matcher function
                                                          //raw result
                                  WERD_CHOICE *&raw_choice,
                                                          //list of blob lists
                                  BLOB_CHOICE_LIST_CLIST *blob_choices,
                                  WERD *&outword          //bln word output
                                 );
  WERD_CHOICE *correct_segment_pass2(                       //recog one word
                                     WERD *word,            //bln word to do
                                     DENORM *denorm,        //de-normaliser
                                     POLY_MATCHER matcher,  //matcher function
                                     POLY_TESTER tester,    //tester function
                                                            //raw result
                                     WERD_CHOICE *&raw_choice,
                                                            //list of blob lists
                                     BLOB_CHOICE_LIST_CLIST *blob_choices,
                                     WERD *&outword         //bln word output
                                    );
  void tess_default_matcher(                            //call tess
                            PBLOB *pblob,               //previous blob
                            PBLOB *blob,                //blob to match
                            PBLOB *nblob,               //next blob
                            WERD *word,                 //word it came from
                            DENORM *denorm,             //de-normaliser
                            BLOB_CHOICE_LIST *ratings,  //list of results
                            const char* script
                            );
  void tess_bn_matcher(                           //call tess
                       PBLOB *pblob,              //previous blob
                       PBLOB *blob,               //blob to match
                       PBLOB *nblob,              //next blob
                       WERD *word,                //word it came from
                       DENORM *denorm,            //de-normaliser
                       BLOB_CHOICE_LIST *ratings  //list of results
                      );
  void tess_cn_matcher(                           //call tess
                       PBLOB *pblob,              //previous blob
                       PBLOB *blob,               //blob to match
                       PBLOB *nblob,              //next blob
                       WERD *word,                //word it came from
                       DENORM *denorm,            //de-normaliser
                       BLOB_CHOICE_LIST *ratings,  //list of results
                       // Sorted array of CP_RESULT_STRUCT from class pruner.
                       CLASS_PRUNER_RESULTS cpresults
                      );
  BOOL8 tess_adaptable_word(                           //test adaptability
                            WERD *word,                //word to test
                            WERD_CHOICE *word_choice,  //after context
                            WERD_CHOICE *raw_choice    //before context
                           );
  BOOL8 tess_acceptable_word(                           //test acceptability
                             WERD_CHOICE *word_choice,  //after context
                             WERD_CHOICE *raw_choice    //before context
                            );
  //// applybox.cpp //////////////////////////////////////////////////////
  void apply_box_testing(BLOCK_LIST *block_list);
  void apply_boxes(const STRING& fname,
                   BLOCK_LIST *block_list    //real blocks
                  );
  // converts an array of boxes to a block list
  int Boxes2BlockList(int box_cnt, TBOX *boxes, BLOCK_LIST *block_list,
                      bool right2left);
  //// blobcmp.cpp ///////////////////////////////////////////////////////
  float compare_tess_blobs(TBLOB *blob1,
                           TEXTROW *row1,
                           TBLOB *blob2,
                           TEXTROW *row2);
  //// paircmp.cpp ///////////////////////////////////////////////////////
  float compare_bln_blobs(               //match 2 blobs
                          PBLOB *blob1,  //first blob
                          DENORM *denorm1,
                          PBLOB *blob2,  //other blob
                          DENORM *denorm2);
  float compare_blobs(               //match 2 blobs
                      PBLOB *blob1,  //first blob
                      ROW *row1,     //row it came from
                      PBLOB *blob2,  //other blob
                      ROW *row2);
  BOOL8 compare_blob_pairs(             //blob processor
                           BLOCK *,
                           ROW *row,    //row it came from
                           WERD *,
                           PBLOB *blob  //blob to compare
                          );
  //// fixxht.cpp ///////////////////////////////////////////////////////
  void check_block_occ(WERD_RES *word_res);

  //// Data members ///////////////////////////////////////////////////////
  BOOL_VAR_H(tessedit_resegment_from_boxes, false,
             "Take segmentation and labeling from box file");
  BOOL_VAR_H(tessedit_train_from_boxes, false,
             "Generate training data from boxed chars");
  BOOL_VAR_H(tessedit_dump_pageseg_images, false,
             "Dump itermediate images made during page segmentation");
  INT_VAR_H(tessedit_pageseg_mode, 2,
            "Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char"
            " (Values from PageSegMode enum in baseapi.h)");
  INT_VAR_H(tessedit_accuracyvspeed, 0,
            "Accuracy V Speed tradeoff: 0 fastest, 100 most accurate"
            " (Values from AccuracyVSpeed enum in baseapi.h)");
  BOOL_VAR_H(tessedit_train_from_boxes_word_level, false,
             "Generate training data from boxed chars at word level.");
  STRING_VAR_H(tessedit_char_blacklist, "",
               "Blacklist of chars not to recognize");
  STRING_VAR_H(tessedit_char_whitelist, "",
               "Whitelist of chars to recognize");
  BOOL_VAR_H(global_tessedit_ambigs_training, false,
             "Perform training for ambiguities");
  //// ambigsrecog.cpp /////////////////////////////////////////////////////////
  FILE *init_ambigs_training(const STRING &fname);
  void ambigs_training_segmented(const STRING &fname,
                                 PAGE_RES *page_res,
                                 volatile ETEXT_DESC *monitor,
                                 FILE *output_file);
  void ambigs_classify_and_output(PAGE_RES_IT *page_res_it,
                                  const char *label,
                                  FILE *output_file);
 private:
  Pix* pix_binary_;
  FCOORD deskew_;
  FCOORD reskew_;
  bool hindi_image_;
};

}  // namespace tesseract


#endif  // TESSERACT_CCMAIN_TESSERACTCLASS_H__
More changes to ccmain for 3.00 git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@287 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2009-07-11 10:07:25 +08:00			`///////////////////////////////////////////////////////////////////////`
			`// File: tesseractclass.h`
			`// Description: An instance of Tesseract. For thread safety, every`
			`// global variable goes in here, directly, or indirectly.`
			`// Author: Ray Smith`
			`// Created: Fri Mar 07 08:17:01 PST 2008`
			`//`
			`// (C) Copyright 2008, Google Inc.`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
			`//`
			`///////////////////////////////////////////////////////////////////////`

			`#ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__`
			`#define TESSERACT_CCMAIN_TESSERACTCLASS_H__`

			`#include "varable.h"`
			`#include "wordrec.h"`
			`#include "ocrclass.h"`
			`#include "control.h"`
			`#include "docqual.h"`

			`class CHAR_SAMPLES_LIST;`
			`class CHAR_SAMPLE_LIST;`
			`class PAGE_RES;`
			`class PAGE_RES_IT;`
			`class BLOCK_LIST;`
			`class TO_BLOCK_LIST;`
			`class IMAGE;`
			`class WERD_RES;`
			`class ROW;`
			`class TBOX;`
			`class SVMenuNode;`
			`struct Pix;`
			`class WERD_CHOICE;`
			`class WERD;`
			`class BLOB_CHOICE_LIST_CLIST;`


			`// Top-level class for all tesseract global instance data.`
			`// This class either holds or points to all data used by an instance`
			`// of Tesseract, including the memory allocator. When this is`
			`// complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT!`
			`//`
			`// NOTE to developers: Do not create cyclic dependencies through this class!`
			`// The directory dependency tree must remain a tree! The keep this clean,`
			`// lower-level code (eg in ccutil, the bottom level) must never need to`
			`// know about the content of a higher-level directory.`
			`// The following scheme will grant the easiest access to lower-level`
			`// global members without creating a cyclic dependency:`
			`// ccmain inherits wordrec, includes textord as a member`
			`// wordrec inherits classify`
			`// classify inherits ccstruct, includes dict as a member`
			`// ccstruct inherits c_util, includes image as a member`
			`// c_util inherits cc_util`
			`// textord has a pointer to ccstruct, but doesn't own it.`
			`// dict has a pointer to ccstruct, but doesn't own it.`
			`//`
			`// NOTE: that each level contains members that correspond to global`
			`// data that is defined (and used) at that level, not necessarily where`
			`// the type is defined so for instance:`
			`// BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs");`
			`// goes inside the Textord class, not the cc_util class.`

			`namespace tesseract {`

			`class Tesseract : public Wordrec {`
			`public:`
			`Tesseract();`
			`~Tesseract();`

			`void Clear();`

			`// Simple accessors.`
			`const FCOORD& reskew() const {`
			`return reskew_;`
			`}`
			`// Destroy any existing pix and return a pointer to the pointer.`
			`Pix** mutable_pix_binary() {`
			`Clear();`
			`return &pix_binary_;`
			`}`
			`Pix* pix_binary() const {`
			`return pix_binary_;`
			`}`

			`void SetBlackAndWhitelist();`
			`int SegmentPage(const STRING* input_file,`
			`IMAGE* image, BLOCK_LIST* blocks);`
			`int AutoPageSeg(int width, int height, int resolution,`
			`bool single_column, IMAGE* image,`
			`BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);`

			`//// control.h /////////////////////////////////////////////////////////`
			`void recog_all_words( //process words`
			`PAGE_RES *page_res, //page structure`
			`//progress monitor`
			`volatile ETEXT_DESC *monitor,`
			`TBOX *target_word_box=0L,`
			`inT16 dopasses=0`
			`);`
			`void classify_word_pass1( //recog one word`
			`WERD_RES *word, //word to do`
			`ROW *row,`
			`BLOCK* block,`
			`BOOL8 cluster_adapt,`
			`CHAR_SAMPLES_LIST *char_clusters,`
			`CHAR_SAMPLE_LIST *chars_waiting);`
			`void recog_pseudo_word( //recognize blobs`
			`BLOCK_LIST *block_list, //blocks to check`
			`TBOX &selection_box);`

			`// This method returns all the blobs in the specified blocks.`
			`// It's the caller's responsibility to destroy the returned list.`
			`C_BLOB_LIST* get_blobs_from_blocks(BLOCK_LIST* blocks // blocks to look at.`
			`);`

			`// This method can be used to perform word-level training using box files.`
			`// TODO: this can be modified to perform training in general case too.`
			`void train_word_level_with_boxes(`
			`const STRING& box_file, // File with boxes.`
			`const STRING& out_file, // Output file.`
			`BLOCK_LIST* blocks // Blocks to use.`
			`);`
			`void fix_rep_char(WERD_RES *word);`
			`void fix_quotes( //make double quotes`
			`WERD_CHOICE *choice, //choice to fix`
			`WERD *word, //word to do //char choices`
			`BLOB_CHOICE_LIST_CLIST *blob_choices);`
			`ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s,`
			`const char *lengths);`
			`void match_word_pass2( //recog one word`
			`WERD_RES *word, //word to do`
			`ROW *row,`
			`BLOCK* block,`
			`float x_height);`
			`void classify_word_pass2( //word to do`
			`WERD_RES *word,`
			`BLOCK* block,`
			`ROW *row);`
			`BOOL8 recog_interactive( //recognize blobs`
			`BLOCK *block, //block`
			`ROW *row, //row of word`
			`WERD *word //word to recognize`
			`);`
			`void fix_hyphens( //crunch double hyphens`
			`WERD_CHOICE *choice, //choice to fix`
			`WERD *word, //word to do //char choices`
			`BLOB_CHOICE_LIST_CLIST *blob_choices);`
			`void set_word_fonts(`
			`WERD_RES *word, // word to adapt to`
			`BLOB_CHOICE_LIST_CLIST *blob_choices); // detailed results`
			`void font_recognition_pass( //good chars in word`
			`PAGE_RES_IT &page_res_it);`

			`//// output.h //////////////////////////////////////////////////////////`

			`void output_pass( //Tess output pass //send to api`
			`PAGE_RES_IT &page_res_it,`
			`BOOL8 write_to_shm,`
			`TBOX *target_word_box);`
			`FILE *open_outfile( //open .map & .unlv file`
			`const char *extension);`
			`void write_results( //output a word`
			`PAGE_RES_IT &page_res_it, //full info`
			`char newline_type, //type of newline`
			`BOOL8 force_eol, //override tilde crunch?`
			`BOOL8 write_to_shm //send to api`
			`);`
			`void set_unlv_suspects(WERD_RES *word);`
			`UNICHAR_ID get_rep_char(WERD_RES *word); // what char is repeated?`
			`BOOL8 acceptable_number_string(const char *s,`
			`const char *lengths);`
			`inT16 count_alphanums(const WERD_CHOICE &word);`
			`inT16 count_alphas(const WERD_CHOICE &word);`
			`//// tessedit.h ////////////////////////////////////////////////////////`
			`void read_config_file(const char *filename, bool global_only);`
			`int init_tesseract(const char *arg0,`
			`const char *textbase,`
			`const char *language,`
			`char **configs,`
			`int configs_size,`
			`bool configs_global_only);`

			`int init_tesseract_lm(const char *arg0,`
			`const char *textbase,`
			`const char *language);`

			`// Initializes the tesseract classifier without loading language models.`
			`int init_tesseract_classifier(const char *arg0,`
			`const char *textbase,`
			`const char *language,`
			`char **configs,`
			`int configs_size,`
			`bool configs_global_only);`

			`void recognize_page(STRING& image_name);`
			`void end_tesseract();`

			`bool init_tesseract_lang_data(const char *arg0,`
			`const char *textbase,`
			`const char *language,`
			`char **configs,`
			`int configs_size,`
			`bool configs_global_only);`

			`//// pgedit.h //////////////////////////////////////////////////////////`
			`SVMenuNode *build_menu_new();`
			`void pgeditor_main(BLOCK_LIST *blocks);`
			`void process_image_event( // action in image win`
			`const SVEvent &event);`
			`void pgeditor_read_file( // of serialised file`
			`STRING &filename,`
			`BLOCK_LIST *blocks // block list to add to`
			`);`
			`void do_new_source( // serialise`
			`);`
			`BOOL8 process_cmd_win_event( // UI command semantics`
			`inT32 cmd_event, // which menu item?`
			`char *new_value // any prompt data`
			`);`
			`//// reject.h //////////////////////////////////////////////////////////`
			`const char *char_ambiguities(char c);`
			`void make_reject_map( //make rej map for wd //detailed results`
			`WERD_RES *word,`
			`BLOB_CHOICE_LIST_CLIST *blob_choices,`
			`ROW *row,`
			`inT16 pass //1st or 2nd?`
			`);`
			`BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map);`
			`inT16 first_alphanum_index(const char *word,`
			`const char *word_lengths);`
			`inT16 first_alphanum_offset(const char *word,`
			`const char *word_lengths);`
			`inT16 alpha_count(const char *word,`
			`const char *word_lengths);`
			`BOOL8 word_contains_non_1_digit(const char *word,`
			`const char *word_lengths);`
			`void dont_allow_1Il(WERD_RES *word);`
			`inT16 count_alphanums( //how many alphanums`
			`WERD_RES *word);`
			`BOOL8 repeated_ch_string(const char *rep_ch_str,`
			`const char *lengths);`
			`void flip_0O(WERD_RES *word);`
			`BOOL8 non_0_digit(UNICHAR_ID unichar_id);`
			`BOOL8 non_O_upper(UNICHAR_ID unichar_id);`
			`BOOL8 repeated_nonalphanum_wd(WERD_RES word, ROW row);`
			`void nn_match_word( //Match a word`
			`WERD_RES *word,`
			`ROW *row);`
			`void nn_recover_rejects(WERD_RES word, ROW row);`
			`BOOL8 test_ambig_word( //test for ambiguity`
			`WERD_RES *word);`
			`void set_done( //set done flag`
			`WERD_RES *word,`
			`inT16 pass);`
			`inT16 safe_dict_word(const WERD_CHOICE &word);`
			`void flip_hyphens(WERD_RES *word);`
			`//// adaptions.h ///////////////////////////////////////////////////////`
			`void adapt_to_good_ems(WERD_RES *word,`
			`CHAR_SAMPLES_LIST *char_clusters,`
			`CHAR_SAMPLE_LIST *chars_waiting);`
			`void adapt_to_good_samples(WERD_RES *word,`
			`CHAR_SAMPLES_LIST *char_clusters,`
			`CHAR_SAMPLE_LIST *chars_waiting);`
			`BOOL8 word_adaptable( //should we adapt?`
			`WERD_RES *word,`
			`uinT16 mode);`
			`void reject_suspect_ems(WERD_RES *word);`
			`void collect_ems_for_adaption(WERD_RES *word,`
			`CHAR_SAMPLES_LIST *char_clusters,`
			`CHAR_SAMPLE_LIST *chars_waiting);`
			`void collect_characters_for_adaption(WERD_RES *word,`
			`CHAR_SAMPLES_LIST *char_clusters,`
			`CHAR_SAMPLE_LIST *chars_waiting);`
			`void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting,`
			`CHAR_SAMPLE *sample,`
			`CHAR_SAMPLES *best_cluster);`
			`void cluster_sample(CHAR_SAMPLE *sample,`
			`CHAR_SAMPLES_LIST *char_clusters,`
			`CHAR_SAMPLE_LIST *chars_waiting);`
			`void complete_clustering(CHAR_SAMPLES_LIST *char_clusters,`
			`CHAR_SAMPLE_LIST *chars_waiting);`

			`//// tfacepp.cpp ///////////////////////////////////////////////////////`
			`WERD_CHOICE *recog_word_recursive( //recog one owrd`
			`WERD *word, //word to do`
			`DENORM *denorm, //de-normaliser`
			`//matcher function`
			`POLY_MATCHER matcher,`
			`//tester function`
			`POLY_TESTER tester,`
			`//trainer function`
			`POLY_TESTER trainer,`
			`BOOL8 testing, //true if answer driven`
			`//raw result`
			`WERD_CHOICE *&raw_choice,`
			`//list of blob lists`
			`BLOB_CHOICE_LIST_CLIST *blob_choices,`
			`WERD *&outword //bln word output`
			`);`
			`WERD_CHOICE *recog_word( //recog one owrd`
			`WERD *word, //word to do`
			`DENORM *denorm, //de-normaliser`
			`POLY_MATCHER matcher, //matcher function`
			`POLY_TESTER tester, //tester function`
			`POLY_TESTER trainer, //trainer function`
			`BOOL8 testing, //true if answer driven`
			`WERD_CHOICE *&raw_choice, //raw result`
			`//list of blob lists`
			`BLOB_CHOICE_LIST_CLIST *blob_choices,`
			`WERD *&outword //bln word output`
			`);`
			`WERD_CHOICE *split_and_recog_word( //recog one owrd`
			`WERD *word, //word to do`
			`DENORM *denorm, //de-normaliser`
			`//matcher function`
			`POLY_MATCHER matcher,`
			`//tester function`
			`POLY_TESTER tester,`
			`//trainer function`
			`POLY_TESTER trainer,`
			`BOOL8 testing, //true if answer driven`
			`//raw result`
			`WERD_CHOICE *&raw_choice,`
			`//list of blob lists`
			`BLOB_CHOICE_LIST_CLIST *blob_choices,`
			`WERD *&outword //bln word output`
			`);`
			`//// fixspace.cpp ///////////////////////////////////////////////////////`
			`BOOL8 digit_or_numeric_punct(WERD_RES *word, int char_position);`
			`inT16 eval_word_spacing(WERD_RES_LIST &word_res_list);`
			`void match_current_words(WERD_RES_LIST &words, ROW row, BLOCK block);`
			`inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list);`
			`void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW row, BLOCK block);`
			`void fix_fuzzy_space_list( //space explorer`
			`WERD_RES_LIST &best_perm,`
			`ROW *row,`
			`BLOCK* block);`
			`void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW row, BLOCK block);`
			`void fix_fuzzy_spaces( //find fuzzy words`
			`volatile ETEXT_DESC *monitor, //progress monitor`
			`inT32 word_count, //count of words in doc`
			`PAGE_RES *page_res);`
			`//// docqual.cpp ////////////////////////////////////////////////////////`
			`GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word);`
			`BOOL8 potential_word_crunch(WERD_RES *word,`
			`GARBAGE_LEVEL garbage_level,`
			`BOOL8 ok_dict_word);`
			`void tilde_crunch(PAGE_RES_IT &page_res_it);`
			`void unrej_good_quality_words( //unreject potential`
			`PAGE_RES_IT &page_res_it);`
			`void doc_and_block_rejection( //reject big chunks`
			`PAGE_RES_IT &page_res_it,`
			`BOOL8 good_quality_doc);`
			`void quality_based_rejection(PAGE_RES_IT &page_res_it,`
			`BOOL8 good_quality_doc);`
			`void convert_bad_unlv_chs(WERD_RES *word_res);`
			`void merge_tess_fails(WERD_RES *word_res);`
			`void tilde_delete(PAGE_RES_IT &page_res_it);`
			`void insert_rej_cblobs(WERD_RES *word);`
			`//// pagewalk.cpp ///////////////////////////////////////////////////////`
			`void`
			`process_selected_words (`
			`BLOCK_LIST * block_list, //blocks to check`
			`//function to call`
			`TBOX & selection_box,`
			`BOOL8 (tesseract::Tesseract::*word_processor) (`
			`BLOCK *,`
			`ROW *,`
			`WERD *));`
			`//// tessbox.cpp ///////////////////////////////////////////////////////`
			`void tess_add_doc_word( //test acceptability`
			`WERD_CHOICE *word_choice //after context`
			`);`
			`void tess_adapter( //adapt to word`
			`WERD *word, //bln word`
			`DENORM *denorm, //de-normalise`
			`const WERD_CHOICE& choice, //string for word`
			`const WERD_CHOICE& raw_choice, //before context`
			`const char *rejmap //reject map`
			`);`
			`WERD_CHOICE *test_segment_pass2( //recog one word`
			`WERD *word, //bln word to do`
			`DENORM *denorm, //de-normaliser`
			`POLY_MATCHER matcher, //matcher function`
			`POLY_TESTER tester, //tester function`
			`//raw result`
			`WERD_CHOICE *&raw_choice,`
			`//list of blob lists`
			`BLOB_CHOICE_LIST_CLIST *blob_choices,`
			`WERD *&outword //bln word output`
			`);`
			`WERD_CHOICE *tess_segment_pass1( //recog one word`
			`WERD *word, //bln word to do`
			`DENORM *denorm, //de-normaliser`
			`POLY_MATCHER matcher, //matcher function`
			`//raw result`
			`WERD_CHOICE *&raw_choice,`
			`//list of blob lists`
			`BLOB_CHOICE_LIST_CLIST *blob_choices,`
			`WERD *&outword //bln word output`
			`);`
			`WERD_CHOICE *tess_segment_pass2( //recog one word`
			`WERD *word, //bln word to do`
			`DENORM *denorm, //de-normaliser`
			`POLY_MATCHER matcher, //matcher function`
			`//raw result`
			`WERD_CHOICE *&raw_choice,`
			`//list of blob lists`
			`BLOB_CHOICE_LIST_CLIST *blob_choices,`
			`WERD *&outword //bln word output`
			`);`
			`WERD_CHOICE *correct_segment_pass2( //recog one word`
			`WERD *word, //bln word to do`
			`DENORM *denorm, //de-normaliser`
			`POLY_MATCHER matcher, //matcher function`
			`POLY_TESTER tester, //tester function`
			`//raw result`
			`WERD_CHOICE *&raw_choice,`
			`//list of blob lists`
			`BLOB_CHOICE_LIST_CLIST *blob_choices,`
			`WERD *&outword //bln word output`
			`);`
			`void tess_default_matcher( //call tess`
			`PBLOB *pblob, //previous blob`
			`PBLOB *blob, //blob to match`
			`PBLOB *nblob, //next blob`
			`WERD *word, //word it came from`
			`DENORM *denorm, //de-normaliser`
			`BLOB_CHOICE_LIST *ratings, //list of results`
			`const char* script`
			`);`
			`void tess_bn_matcher( //call tess`
			`PBLOB *pblob, //previous blob`
			`PBLOB *blob, //blob to match`
			`PBLOB *nblob, //next blob`
			`WERD *word, //word it came from`
			`DENORM *denorm, //de-normaliser`
			`BLOB_CHOICE_LIST *ratings //list of results`
			`);`
			`void tess_cn_matcher( //call tess`
			`PBLOB *pblob, //previous blob`
			`PBLOB *blob, //blob to match`
			`PBLOB *nblob, //next blob`
			`WERD *word, //word it came from`
			`DENORM *denorm, //de-normaliser`
			`BLOB_CHOICE_LIST *ratings, //list of results`
			`// Sorted array of CP_RESULT_STRUCT from class pruner.`
			`CLASS_PRUNER_RESULTS cpresults`
			`);`
			`BOOL8 tess_adaptable_word( //test adaptability`
			`WERD *word, //word to test`
			`WERD_CHOICE *word_choice, //after context`
			`WERD_CHOICE *raw_choice //before context`
			`);`
			`BOOL8 tess_acceptable_word( //test acceptability`
			`WERD_CHOICE *word_choice, //after context`
			`WERD_CHOICE *raw_choice //before context`
			`);`
			`//// applybox.cpp //////////////////////////////////////////////////////`
			`void apply_box_testing(BLOCK_LIST *block_list);`
			`void apply_boxes(const STRING& fname,`
			`BLOCK_LIST *block_list //real blocks`
			`);`
			`// converts an array of boxes to a block list`
			`int Boxes2BlockList(int box_cnt, TBOX boxes, BLOCK_LIST block_list,`
			`bool right2left);`
			`//// blobcmp.cpp ///////////////////////////////////////////////////////`
			`float compare_tess_blobs(TBLOB *blob1,`
			`TEXTROW *row1,`
			`TBLOB *blob2,`
			`TEXTROW *row2);`
			`//// paircmp.cpp ///////////////////////////////////////////////////////`
			`float compare_bln_blobs( //match 2 blobs`
			`PBLOB *blob1, //first blob`
			`DENORM *denorm1,`
			`PBLOB *blob2, //other blob`
			`DENORM *denorm2);`
			`float compare_blobs( //match 2 blobs`
			`PBLOB *blob1, //first blob`
			`ROW *row1, //row it came from`
			`PBLOB *blob2, //other blob`
			`ROW *row2);`
			`BOOL8 compare_blob_pairs( //blob processor`
			`BLOCK *,`
			`ROW *row, //row it came from`
			`WERD *,`
			`PBLOB *blob //blob to compare`
			`);`
			`//// fixxht.cpp ///////////////////////////////////////////////////////`
			`void check_block_occ(WERD_RES *word_res);`

			`//// Data members ///////////////////////////////////////////////////////`
			`BOOL_VAR_H(tessedit_resegment_from_boxes, false,`
			`"Take segmentation and labeling from box file");`
			`BOOL_VAR_H(tessedit_train_from_boxes, false,`
			`"Generate training data from boxed chars");`
			`BOOL_VAR_H(tessedit_dump_pageseg_images, false,`
			`"Dump itermediate images made during page segmentation");`
			`INT_VAR_H(tessedit_pageseg_mode, 2,`
			`"Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char"`
			`" (Values from PageSegMode enum in baseapi.h)");`
			`INT_VAR_H(tessedit_accuracyvspeed, 0,`
			`"Accuracy V Speed tradeoff: 0 fastest, 100 most accurate"`
			`" (Values from AccuracyVSpeed enum in baseapi.h)");`
			`BOOL_VAR_H(tessedit_train_from_boxes_word_level, false,`
			`"Generate training data from boxed chars at word level.");`
			`STRING_VAR_H(tessedit_char_blacklist, "",`
			`"Blacklist of chars not to recognize");`
			`STRING_VAR_H(tessedit_char_whitelist, "",`
			`"Whitelist of chars to recognize");`
			`BOOL_VAR_H(global_tessedit_ambigs_training, false,`
			`"Perform training for ambiguities");`
			`//// ambigsrecog.cpp /////////////////////////////////////////////////////////`
			`FILE *init_ambigs_training(const STRING &fname);`
			`void ambigs_training_segmented(const STRING &fname,`
			`PAGE_RES *page_res,`
			`volatile ETEXT_DESC *monitor,`
			`FILE *output_file);`
			`void ambigs_classify_and_output(PAGE_RES_IT *page_res_it,`
			`const char *label,`
			`FILE *output_file);`
			`private:`
			`Pix* pix_binary_;`
			`FCOORD deskew_;`
			`FCOORD reskew_;`
			`bool hindi_image_;`
			`};`

			`} // namespace tesseract`


			`#endif // TESSERACT_CCMAIN_TESSERACTCLASS_H__`