/********************************************************************** * File: tface.c (Formerly tface.c) * Description: C side of the Tess/tessedit C/C++ interface. * Author: Ray Smith * Created: Mon Apr 27 11:57:06 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/ #include "bestfirst.h" #include "callcpp.h" #include "chop.h" #include "chopper.h" #include "danerror.h" #include "fxdefs.h" #include "globals.h" #include "gradechop.h" #include "matchtab.h" #include "pageres.h" #include "permute.h" #include "wordclass.h" #include "wordrec.h" #include "featdefs.h" #include #ifdef __UNIX__ #include #endif namespace tesseract { /** * @name program_editup * * Initialize all the things in the program that need to be initialized. * init_permute determines whether to initialize the permute functions * and Dawg models. */ void Wordrec::program_editup(const char *textbase, bool init_classifier, bool init_dict) { if (textbase != NULL) imagefile = textbase; InitFeatureDefs(&feature_defs_); SetupExtractors(&feature_defs_); InitAdaptiveClassifier(init_classifier); if (init_dict) getDict().Load(); pass2_ok_split = chop_ok_split; pass2_seg_states = wordrec_num_seg_states; } /** * @name end_recog * * Cleanup and exit the recog program. */ int Wordrec::end_recog() { program_editdown (0); return (0); } /** * @name program_editdown * * This function holds any nessessary post processing for the Wise Owl * program. */ void Wordrec::program_editdown(inT32 elasped_time) { EndAdaptiveClassifier(); blob_match_table.end_match_table(); getDict().InitChoiceAccum(); getDict().End(); } /** * @name set_pass1 * * Get ready to do some pass 1 stuff. */ void Wordrec::set_pass1() { chop_ok_split.set_value(70.0); wordrec_num_seg_states.set_value(15); SettupPass1(); } /** * @name set_pass2 * * Get ready to do some pass 2 stuff. */ void Wordrec::set_pass2() { chop_ok_split.set_value(pass2_ok_split); wordrec_num_seg_states.set_value(pass2_seg_states); SettupPass2(); } /** * @name cc_recog * * Recognize a word. */ BLOB_CHOICE_LIST_VECTOR *Wordrec::cc_recog(WERD_RES *word) { getDict().InitChoiceAccum(); getDict().reset_hyphen_vars(word->word->flag(W_EOL)); blob_match_table.init_match_table(); BLOB_CHOICE_LIST_VECTOR *results = chop_word_main(word); getDict().DebugWordChoices(); return results; } /** * @name dict_word() * * Test the dictionaries, returning NO_PERM (0) if not found, or one * of the PermuterType values if found, according to the dictionary. */ int Wordrec::dict_word(const WERD_CHOICE &word) { return getDict().valid_word(word); } /** * @name call_matcher * * Called from Tess with a blob in tess form. * The blob may need rotating to the correct orientation for classification. */ BLOB_CHOICE_LIST *Wordrec::call_matcher(TBLOB *tessblob) { TBLOB* rotated_blob = NULL; // If necessary, copy the blob and rotate it. if (denorm_.block() != NULL && denorm_.block()->classify_rotation().y() != 0.0) { TBOX box = tessblob->bounding_box(); int src_width = box.width(); int src_height = box.height(); src_width = static_cast(src_width / denorm_.scale() + 0.5); src_height = static_cast(src_height / denorm_.scale() + 0.5); int x_middle = (box.left() + box.right()) / 2; int y_middle = (box.top() + box.bottom()) / 2; rotated_blob = new TBLOB(*tessblob); rotated_blob->Move(ICOORD(-x_middle, -y_middle)); rotated_blob->Rotate(denorm_.block()->classify_rotation()); tessblob = rotated_blob; ICOORD median_size = denorm_.block()->median_size(); int tolerance = median_size.x() / 8; // TODO(dsl/rays) find a better normalization solution. In the mean time // make it work for CJK by normalizing for Cap height in the same way // as is applied in compute_block_xheight when the row is presumed to // be ALLCAPS, i.e. the x-height is the fixed fraction // blob height * CCStruct::kXHeightFraction / // (CCStruct::kXHeightFraction + CCStruct::kXAscenderFraction) if (NearlyEqual(src_width, static_cast(median_size.x()), tolerance) && NearlyEqual(src_height, static_cast(median_size.y()), tolerance)) { float target_height = kBlnXHeight * (CCStruct::kXHeightFraction + CCStruct::kAscenderFraction) / CCStruct::kXHeightFraction; rotated_blob->Scale(target_height / box.width()); rotated_blob->Move(ICOORD(0, kBlnBaselineOffset - rotated_blob->bounding_box().bottom())); } } BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); // matcher result AdaptiveClassifier(tessblob, ratings, NULL); if (rotated_blob != NULL) delete rotated_blob; return ratings; } } // namespace tesseract