Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen
closes #14

Commits:
6317305  doxygen
9f42f69  doxygen
0fc4d52  doxygen
37b4b55  fix typo
bded8f1  some more doxy
020eb00  slight tweak
524666d  doxygenify
2a36a3e  doxygenify
229d218  doxygenify
7fd28ae  doxygenify
a8c64bc  doxygenify
f5d21b6  fix
5d8ede8  doxygenify
a58a4e0  language_model.cpp
fa85709  lm_pain_points.cpp lm_state.cpp
6418da3  merge
06190ba  Merge branch 'old_doxygen_merge' into more-doxygen
84acf08  Merge branch 'master' into more-doxygen
50fe1ff  pagewalk.cpp cube_reco_context.cpp
2982583  change to relative
192a24a  applybox.cpp, take one
8eeb053  delete docs for obsolete params
52e4c77  modernise classify/ocrfeatures.cpp
2a1cba6  modernise cutil/emalloc.cpp
773e006  silence doxygen warning
aeb1731  silence doxygen warning
f18387f  silence doxygen; new params are unused?
15ad6bd  doxygenify cutil/efio.cpp
c8b5dad  doxygenify cutil/danerror.cpp
784450f  the globals and exceptions parts are obsolete; remove
8bca324  doxygen classify/normfeat.cpp
9bcbe16  doxygen classify/normmatch.cpp
aa9a971  doxygen ccmain/cube_control.cpp
c083ff2  doxygen ccmain/cube_reco_context.cpp
f842850  params changed
5c94f12  doxygen ccmain/cubeclassifier.cpp
15ba750  case sensitive
f5c71d4  case sensitive
f85655b  doxygen classify/intproto.cpp
4bbc7aa  partial doxygen classify/mfx.cpp
dbb6041  partial doxygen classify/intproto.cpp
2aa72db  finish doxygen classify/intproto.cpp
0b8de99  doxygen training/mftraining.cpp
0b5b35c  partial doxygen ccstruct/coutln.cpp
b81c766  partial doxygen ccstruct/coutln.cpp
40fc415  finished? doxygen ccstruct/coutln.cpp
6e4165c  doxygen classify/clusttool.cpp
0267dec  doxygen classify/cutoffs.cpp
7f0c70c  doxygen classify/fpoint.cpp
512f3bd  ignore ~ files
5668a52  doxygen classify/intmatcher.cpp
84788d4  doxygen classify/kdtree.cpp
29f36ca  doxygen classify/mfoutline.cpp
40b94b1  silence doxygen warnings
6c511b9  doxygen classify/mfx.cpp
f9b4080  doxygen classify/outfeat.cpp
aa1df05  doxygen classify/picofeat.cpp
cc5f466  doxygen training/cntraining.cpp
cce044f  doxygen training/commontraining.cpp
167e216  missing param
9498383  renamed params
37eeac2  renamed param
d87b5dd  case
c8ee174  renamed params
b858db8  typo
4c2a838  h2 context?
81a2c0c  fix some param names; add some missing params, no docs
bcf8a4c  add some missing params, no docs
af77f86  add some missing params, no docs; fix some param names
01df24e  fix some params
6161056  fix some params
68508b6  fix some params
285aeb6  doxygen complains here no matter what
529bcfa  rm some missing params, typos
cd21226  rm some missing params, add some new ones
48a4bc2  fix params
c844628  missing param
312ce37  missing param; rename one
ec2fdec  missing param
05e15e0  missing params
d515858  change "<" to &lt; to make doxygen happy
b476a28  wrong place
This commit is contained in:
Jim O'Regan 2014-09-12 21:41:19 +01:00
parent 541408763d
commit 524a61452d
47 changed files with 2897 additions and 3111 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
*~
# Windows # Windows
*.user *.user
*.log *.log

View File

@ -36,18 +36,22 @@
#include "tesseractclass.h" #include "tesseractclass.h"
#include "genericvector.h" #include "genericvector.h"
// Max number of blobs to classify together in FindSegmentation. /** Max number of blobs to classify together in FindSegmentation. */
const int kMaxGroupSize = 4; const int kMaxGroupSize = 4;
// Max fraction of median allowed as deviation in xheight before switching /// Max fraction of median allowed as deviation in xheight before switching
// to median. /// to median.
const double kMaxXHeightDeviationFraction = 0.125; const double kMaxXHeightDeviationFraction = 0.125;
/************************************************************************* /**
* The box file is assumed to contain box definitions, one per line, of the * The box file is assumed to contain box definitions, one per line, of the
* following format for blob-level boxes: * following format for blob-level boxes:
* @verbatim
* <UTF8 str> <left> <bottom> <right> <top> <page id> * <UTF8 str> <left> <bottom> <right> <top> <page id>
* @endverbatim
* and for word/line-level boxes: * and for word/line-level boxes:
* @verbatim
* WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str> * WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
* @endverbatim
* NOTES: * NOTES:
* The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT. * The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.
* *
@ -62,13 +66,16 @@ const double kMaxXHeightDeviationFraction = 0.125;
* units in the word/line are listed after the # at the end of the line and * units in the word/line are listed after the # at the end of the line and
* are space delimited, ignoring any original spaces on the line. * are space delimited, ignoring any original spaces on the line.
* Eg. * Eg.
* @verbatim
* word -> #w o r d * word -> #w o r d
* multi word line -> #m u l t i w o r d l i n e * multi word line -> #m u l t i w o r d l i n e
* @endverbatim
* The recognizable units must be space-delimited in order to allow multiple * The recognizable units must be space-delimited in order to allow multiple
* unicodes to be used for a single recognizable unit, eg Hindi. * unicodes to be used for a single recognizable unit, eg Hindi.
*
* In this mode, the classifier must have been pre-trained with the desired * In this mode, the classifier must have been pre-trained with the desired
* character set, or it will not be able to find the character segmentations. * character set, or it will not be able to find the character segmentations.
*************************************************************************/ */
namespace tesseract { namespace tesseract {
@ -181,8 +188,8 @@ static double MedianXHeight(BLOCK_LIST *block_list) {
return xheights.median(); return xheights.median();
} }
// Any row xheight that is significantly different from the median is set /// Any row xheight that is significantly different from the median is set
// to the median. /// to the median.
void Tesseract::PreenXHeights(BLOCK_LIST *block_list) { void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
double median_xheight = MedianXHeight(block_list); double median_xheight = MedianXHeight(block_list);
double max_deviation = kMaxXHeightDeviationFraction * median_xheight; double max_deviation = kMaxXHeightDeviationFraction * median_xheight;
@ -205,8 +212,8 @@ void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
} }
} }
// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: /// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
// All fuzzy spaces are removed, and all the words are maximally chopped. /// All fuzzy spaces are removed, and all the words are maximally chopped.
PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes, PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
BLOCK_LIST *block_list) { BLOCK_LIST *block_list) {
PreenXHeights(block_list); PreenXHeights(block_list);
@ -240,9 +247,9 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
return page_res; return page_res;
} }
// Tests the chopper by exhaustively running chop_one_blob. /// Tests the chopper by exhaustively running chop_one_blob.
// The word_res will contain filled chopped_word, seam_array, denorm, /// The word_res will contain filled chopped_word, seam_array, denorm,
// box_word and best_state for the maximally chopped word. /// box_word and best_state for the maximally chopped word.
void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes, void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
BLOCK* block, ROW* row, BLOCK* block, ROW* row,
WERD_RES* word_res) { WERD_RES* word_res) {
@ -300,17 +307,17 @@ void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
word_res->FakeClassifyWord(blob_choices.size(), &blob_choices[0]); word_res->FakeClassifyWord(blob_choices.size(), &blob_choices[0]);
} }
// Helper to compute the dispute resolution metric. /// Helper to compute the dispute resolution metric.
// Disputed blob resolution. The aim is to give the blob to the most /// Disputed blob resolution. The aim is to give the blob to the most
// appropriate boxfile box. Most of the time it is obvious, but if /// appropriate boxfile box. Most of the time it is obvious, but if
// two boxfile boxes overlap significantly it is not. If a small boxfile /// two boxfile boxes overlap significantly it is not. If a small boxfile
// box takes most of the blob, and a large boxfile box does too, then /// box takes most of the blob, and a large boxfile box does too, then
// we want the small boxfile box to get it, but if the small box /// we want the small boxfile box to get it, but if the small box
// is much smaller than the blob, we don't want it to get it. /// is much smaller than the blob, we don't want it to get it.
// Details of the disputed blob resolution: /// Details of the disputed blob resolution:
// Given a box with area A, and a blob with area B, with overlap area C, /// Given a box with area A, and a blob with area B, with overlap area C,
// then the miss metric is (A-C)(B-C)/(AB) and the box with minimum /// then the miss metric is (A-C)(B-C)/(AB) and the box with minimum
// miss metric gets the blob. /// miss metric gets the blob.
static double BoxMissMetric(const TBOX& box1, const TBOX& box2) { static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
int overlap_area = box1.intersection(box2).area(); int overlap_area = box1.intersection(box2).area();
double miss_metric = box1.area()- overlap_area; double miss_metric = box1.area()- overlap_area;
@ -320,14 +327,16 @@ static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
return miss_metric; return miss_metric;
} }
// Gather consecutive blobs that match the given box into the best_state /// Gather consecutive blobs that match the given box into the best_state
// and corresponding correct_text. /// and corresponding correct_text.
// Fights over which box owns which blobs are settled by pre-chopping and ///
// applying the blobs to box or next_box with the least non-overlap. /// Fights over which box owns which blobs are settled by pre-chopping and
// Returns false if the box was in error, which can only be caused by /// applying the blobs to box or next_box with the least non-overlap.
// failing to find an appropriate blob for a box. /// @return false if the box was in error, which can only be caused by
// This means that occasionally, blobs may be incorrectly segmented if the /// failing to find an appropriate blob for a box.
// chopper fails to find a suitable chop point. ///
/// This means that occasionally, blobs may be incorrectly segmented if the
/// chopper fails to find a suitable chop point.
bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box, bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
const TBOX& box, const TBOX& next_box, const TBOX& box, const TBOX& next_box,
const char* correct_text) { const char* correct_text) {
@ -420,12 +429,12 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
return false; // Failure. return false; // Failure.
} }
// Consume all source blobs that strongly overlap the given box, /// Consume all source blobs that strongly overlap the given box,
// putting them into a new word, with the correct_text label. /// putting them into a new word, with the correct_text label.
// Fights over which box owns which blobs are settled by /// Fights over which box owns which blobs are settled by
// applying the blobs to box or next_box with the least non-overlap. /// applying the blobs to box or next_box with the least non-overlap.
// Returns false if the box was in error, which can only be caused by /// @return false if the box was in error, which can only be caused by
// failing to find an overlapping blob for a box. /// failing to find an overlapping blob for a box.
bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
const TBOX& box, const TBOX& next_box, const TBOX& box, const TBOX& next_box,
const char* correct_text) { const char* correct_text) {
@ -495,8 +504,8 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
return new_word != NULL; return new_word != NULL;
} }
// Resegments the words by running the classifier in an attempt to find the /// Resegments the words by running the classifier in an attempt to find the
// correct segmentation that produces the required string. /// correct segmentation that produces the required string.
void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) { void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) {
PAGE_RES_IT pr_it(page_res); PAGE_RES_IT pr_it(page_res);
WERD_RES* word_res; WERD_RES* word_res;
@ -521,8 +530,8 @@ void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) {
} }
} }
// Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID. /// Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID.
// Returns false if an invalid UNICHAR_ID is encountered. /// @return false if an invalid UNICHAR_ID is encountered.
bool Tesseract::ConvertStringToUnichars(const char* utf8, bool Tesseract::ConvertStringToUnichars(const char* utf8,
GenericVector<UNICHAR_ID>* class_ids) { GenericVector<UNICHAR_ID>* class_ids) {
for (int step = 0; *utf8 != '\0'; utf8 += step) { for (int step = 0; *utf8 != '\0'; utf8 += step) {
@ -541,12 +550,12 @@ bool Tesseract::ConvertStringToUnichars(const char* utf8,
return true; return true;
} }
// Resegments the word to achieve the target_text from the classifier. /// Resegments the word to achieve the target_text from the classifier.
// Returns false if the re-segmentation fails. /// Returns false if the re-segmentation fails.
// Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and /// Uses brute-force combination of up to #kMaxGroupSize adjacent blobs, and
// applies a full search on the classifier results to find the best classified /// applies a full search on the classifier results to find the best classified
// segmentation. As a compromise to obtain better recall, 1-1 ambiguity /// segmentation. As a compromise to obtain better recall, 1-1 ambiguity
// substitutions ARE used. /// substitutions ARE used.
bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text, bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
WERD_RES* word_res) { WERD_RES* word_res) {
// Classify all required combinations of blobs and save results in choices. // Classify all required combinations of blobs and save results in choices.
@ -603,12 +612,20 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
return true; return true;
} }
// Recursive helper to find a match to the target_text (from text_index /// Recursive helper to find a match to the target_text (from text_index
// position) in the choices (from choices_pos position). /// position) in the choices (from choices_pos position).
// Choices is an array of GenericVectors, of length choices_length, with each /// @param choices is an array of GenericVectors, of length choices_length,
// element representing a starting position in the word, and the /// with each element representing a starting position in the word, and the
// GenericVector holding classification results for a sequence of consecutive /// #GenericVector holding classification results for a sequence of consecutive
// blobs, with index 0 being a single blob, index 1 being 2 blobs etc. /// blobs, with index 0 being a single blob, index 1 being 2 blobs etc.
/// @param choices_pos
/// @param choices_length
/// @param target_text
/// @param text_index
/// @param rating
/// @param segmentation
/// @param best_rating
/// @param best_segmentation
void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices, void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
int choices_pos, int choices_length, int choices_pos, int choices_length,
const GenericVector<UNICHAR_ID>& target_text, const GenericVector<UNICHAR_ID>& target_text,
@ -682,10 +699,10 @@ void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
} }
} }
// Counts up the labelled words and the blobs within. /// - Counts up the labelled words and the blobs within.
// Deletes all unused or emptied words, counting the unused ones. /// - Deletes all unused or emptied words, counting the unused ones.
// Resets W_BOL and W_EOL flags correctly. /// - Resets W_BOL and W_EOL flags correctly.
// Builds the rebuild_word and rebuilds the box_word and the best_choice. /// - Builds the rebuild_word and rebuilds the box_word and the best_choice.
void Tesseract::TidyUp(PAGE_RES* page_res) { void Tesseract::TidyUp(PAGE_RES* page_res) {
int ok_blob_count = 0; int ok_blob_count = 0;
int bad_blob_count = 0; int bad_blob_count = 0;
@ -743,7 +760,7 @@ void Tesseract::TidyUp(PAGE_RES* page_res) {
} }
} }
// Logs a bad box by line in the box file and box coords. /** Logs a bad box by line in the box file and box coords.*/
void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box, void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box,
const char *box_ch, const char *err_msg) { const char *box_ch, const char *err_msg) {
tprintf("APPLY_BOXES: boxfile line %d/%s ((%d,%d),(%d,%d)): %s\n", tprintf("APPLY_BOXES: boxfile line %d/%s ((%d,%d),(%d,%d)): %s\n",
@ -751,7 +768,7 @@ void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box,
box.left(), box.bottom(), box.right(), box.top(), err_msg); box.left(), box.bottom(), box.right(), box.top(), err_msg);
} }
// Creates a fake best_choice entry in each WERD_RES with the correct text. /** Creates a fake best_choice entry in each WERD_RES with the correct text.*/
void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) { void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
PAGE_RES_IT pr_it(page_res); PAGE_RES_IT pr_it(page_res);
for (WERD_RES *word_res = pr_it.word(); word_res != NULL; for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
@ -774,8 +791,8 @@ void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
} }
} }
// Calls LearnWord to extract features for labelled blobs within each word. /// Calls #LearnWord to extract features for labelled blobs within each word.
// Features are stored in an internal buffer. /// Features are stored in an internal buffer.
void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) { void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) {
PAGE_RES_IT pr_it(page_res); PAGE_RES_IT pr_it(page_res);
int word_count = 0; int word_count = 0;

View File

@ -59,8 +59,6 @@ const double kMinRefitXHeightFraction = 0.5;
/** /**
* recog_pseudo_word
*
* Make a word from the selected blobs and run Tess on them. * Make a word from the selected blobs and run Tess on them.
* *
* @param page_res recognise blobs * @param page_res recognise blobs
@ -79,13 +77,9 @@ void Tesseract::recog_pseudo_word(PAGE_RES* page_res,
/** /**
* recog_interactive
*
* Recognize a single word in interactive mode. * Recognize a single word in interactive mode.
* *
* @param block block * @param pr_it the page results iterator
* @param row row of word
* @param word_res word to recognise
*/ */
BOOL8 Tesseract::recog_interactive(PAGE_RES_IT* pr_it) { BOOL8 Tesseract::recog_interactive(PAGE_RES_IT* pr_it) {
inT16 char_qual; inT16 char_qual;
@ -150,7 +144,7 @@ bool Tesseract::ProcessTargetWord(const TBOX& word_box,
return true; return true;
} }
// If tesseract is to be run, sets the words up ready for it. /** If tesseract is to be run, sets the words up ready for it. */
void Tesseract::SetupAllWordsPassN(int pass_n, void Tesseract::SetupAllWordsPassN(int pass_n,
const TBOX* target_word_box, const TBOX* target_word_box,
const char* word_config, const char* word_config,

View File

@ -21,24 +21,24 @@
namespace tesseract { namespace tesseract {
/********************************************************************** /**
* convert_prob_to_tess_certainty * @name convert_prob_to_tess_certainty
* *
* Normalize a probability in the range [0.0, 1.0] to a tesseract * Normalize a probability in the range [0.0, 1.0] to a tesseract
* certainty in the range [-20.0, 0.0] * certainty in the range [-20.0, 0.0]
**********************************************************************/ */
static float convert_prob_to_tess_certainty(float prob) { static float convert_prob_to_tess_certainty(float prob) {
return (prob - 1.0) * 20.0; return (prob - 1.0) * 20.0;
} }
/********************************************************************** /**
* char_box_to_tbox * @name char_box_to_tbox
* *
* Create a TBOX from a character bounding box. If nonzero, the * Create a TBOX from a character bounding box. If nonzero, the
* x_offset accounts for any additional padding of the word box that * x_offset accounts for any additional padding of the word box that
* should be taken into account. * should be taken into account.
* *
**********************************************************************/ */
TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) { TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
l_int32 left; l_int32 left;
l_int32 top; l_int32 top;
@ -55,13 +55,13 @@ TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
return TBOX(left, bottom, right, top); return TBOX(left, bottom, right, top);
} }
/********************************************************************** /**
* extract_cube_state * @name extract_cube_state
* *
* Extract CharSamp objects and character bounding boxes from the * Extract CharSamp objects and character bounding boxes from the
* CubeObject's state. The caller should free both structres. * CubeObject's state. The caller should free both structres.
* *
**********************************************************************/ */
bool Tesseract::extract_cube_state(CubeObject* cube_obj, bool Tesseract::extract_cube_state(CubeObject* cube_obj,
int* num_chars, int* num_chars,
Boxa** char_boxes, Boxa** char_boxes,
@ -104,15 +104,15 @@ bool Tesseract::extract_cube_state(CubeObject* cube_obj,
return true; return true;
} }
/********************************************************************** /**
* create_cube_box_word * @name create_cube_box_word
* *
* Fill the given BoxWord with boxes from character bounding * Fill the given BoxWord with boxes from character bounding
* boxes. The char_boxes have local coordinates w.r.t. the * boxes. The char_boxes have local coordinates w.r.t. the
* word bounding box, i.e., the left-most character bbox of each word * word bounding box, i.e., the left-most character bbox of each word
* has (0,0) left-top coord, but the BoxWord must be defined in page * has (0,0) left-top coord, but the BoxWord must be defined in page
* coordinates. * coordinates.
**********************************************************************/ */
bool Tesseract::create_cube_box_word(Boxa *char_boxes, bool Tesseract::create_cube_box_word(Boxa *char_boxes,
int num_chars, int num_chars,
TBOX word_box, TBOX word_box,
@ -144,13 +144,13 @@ bool Tesseract::create_cube_box_word(Boxa *char_boxes,
return true; return true;
} }
/********************************************************************** /**
* init_cube_objects * @name init_cube_objects
* *
* Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner. * Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
* Returns false if cube context could not be created or if load_combiner is * Returns false if cube context could not be created or if load_combiner is
* true, but the combiner could not be loaded. * true, but the combiner could not be loaded.
**********************************************************************/ */
bool Tesseract::init_cube_objects(bool load_combiner, bool Tesseract::init_cube_objects(bool load_combiner,
TessdataManager *tessdata_manager) { TessdataManager *tessdata_manager) {
ASSERT_HOST(cube_cntxt_ == NULL); ASSERT_HOST(cube_cntxt_ == NULL);
@ -184,12 +184,12 @@ bool Tesseract::init_cube_objects(bool load_combiner,
return true; return true;
} }
/********************************************************************** /**
* run_cube_combiner * @name run_cube_combiner
* *
* Iterates through tesseract's results and calls cube on each word, * Iterates through tesseract's results and calls cube on each word,
* combining the results with the existing tesseract result. * combining the results with the existing tesseract result.
**********************************************************************/ */
void Tesseract::run_cube_combiner(PAGE_RES *page_res) { void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
if (page_res == NULL || tess_cube_combiner_ == NULL) if (page_res == NULL || tess_cube_combiner_ == NULL)
return; return;
@ -226,23 +226,23 @@ void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
} }
} }
/********************************************************************** /**
* cube_word_pass1 * @name cube_word_pass1
* *
* Recognizes a single word using (only) cube. Compatible with * Recognizes a single word using (only) cube. Compatible with
* Tesseract's classify_word_pass1/classify_word_pass2. * Tesseract's classify_word_pass1/classify_word_pass2.
**********************************************************************/ */
void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) { void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
CubeObject *cube_obj = cube_recognize_word(block, word); CubeObject *cube_obj = cube_recognize_word(block, word);
delete cube_obj; delete cube_obj;
} }
/********************************************************************** /**
* cube_recognize_word * @name cube_recognize_word
* *
* Cube recognizer to recognize a single word as with classify_word_pass1 * Cube recognizer to recognize a single word as with classify_word_pass1
* but also returns the cube object in case the combiner is needed. * but also returns the cube object in case the combiner is needed.
**********************************************************************/ */
CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) { CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) {
if (!cube_binary_ || !cube_cntxt_) { if (!cube_binary_ || !cube_cntxt_) {
if (cube_debug_level > 0 && !cube_binary_) if (cube_debug_level > 0 && !cube_binary_)
@ -274,12 +274,12 @@ CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) {
return cube_obj; return cube_obj;
} }
/********************************************************************** /**
* cube_combine_word * @name cube_combine_word
* *
* Combines the cube and tesseract results for a single word, leaving the * Combines the cube and tesseract results for a single word, leaving the
* result in tess_word. * result in tess_word.
**********************************************************************/ */
void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word, void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
WERD_RES* tess_word) { WERD_RES* tess_word) {
float combiner_prob = tess_cube_combiner_->CombineResults(tess_word, float combiner_prob = tess_cube_combiner_->CombineResults(tess_word,
@ -317,12 +317,12 @@ void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
tess_word->ConsumeWordResults(cube_word); tess_word->ConsumeWordResults(cube_word);
} }
/********************************************************************** /**
* cube_recognize * @name cube_recognize
* *
* Call cube on the current word, and write the result to word. * Call cube on the current word, and write the result to word.
* Sets up a fake result and returns false if something goes wrong. * Sets up a fake result and returns false if something goes wrong.
**********************************************************************/ */
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block, bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
WERD_RES *word) { WERD_RES *word) {
// Run cube // Run cube
@ -404,12 +404,12 @@ bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
return true; return true;
} }
/********************************************************************** /**
* fill_werd_res * @name fill_werd_res
* *
* Fill Tesseract's word result fields with cube's. * Fill Tesseract's word result fields with cube's.
* *
**********************************************************************/ */
void Tesseract::fill_werd_res(const BoxWord& cube_box_word, void Tesseract::fill_werd_res(const BoxWord& cube_box_word,
const char* cube_best_str, const char* cube_best_str,
WERD_RES* tess_werd_res) { WERD_RES* tess_werd_res) {

View File

@ -32,11 +32,13 @@
namespace tesseract { namespace tesseract {
// Instantiate a CubeRecoContext object using a Tesseract object. /**
// CubeRecoContext will not take ownership of tess_obj, but will * Instantiate a CubeRecoContext object using a Tesseract object.
// record the pointer to it and will make use of various Tesseract * CubeRecoContext will not take ownership of tess_obj, but will
// components (language model, flags, etc). Thus the caller should * record the pointer to it and will make use of various Tesseract
// keep tess_obj alive so long as the instantiated CubeRecoContext is used. * components (language model, flags, etc). Thus the caller should
* keep tess_obj alive so long as the instantiated CubeRecoContext is used.
*/
CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) { CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) {
tess_obj_ = tess_obj; tess_obj_ = tess_obj;
lang_ = ""; lang_ = "";
@ -89,23 +91,27 @@ CubeRecoContext::~CubeRecoContext() {
} }
} }
// Returns the path of the data files by looking up the TESSDATA_PREFIX /**
// environment variable and appending a "tessdata" directory to it * Returns the path of the data files by looking up the TESSDATA_PREFIX
* environment variable and appending a "tessdata" directory to it
*/
bool CubeRecoContext::GetDataFilePath(string *path) const { bool CubeRecoContext::GetDataFilePath(string *path) const {
*path = tess_obj_->datadir.string(); *path = tess_obj_->datadir.string();
return true; return true;
} }
// The object initialization function that loads all the necessary /**
// components of a RecoContext. TessdataManager is used to load the * The object initialization function that loads all the necessary
// data from [lang].traineddata file. If TESSDATA_CUBE_UNICHARSET * components of a RecoContext. TessdataManager is used to load the
// component is present, Cube will be instantiated with the unicharset * data from [lang].traineddata file. If TESSDATA_CUBE_UNICHARSET
// specified in this component and the corresponding dictionary * component is present, Cube will be instantiated with the unicharset
// (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to * specified in this component and the corresponding dictionary
// Tesseract's. Otherwise, TessdataManager will assume that Cube will * (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to
// be using Tesseract's unicharset and dawgs, and will load the * Tesseract's. Otherwise, TessdataManager will assume that Cube will
// unicharset from the TESSDATA_UNICHARSET component and will load the * be using Tesseract's unicharset and dawgs, and will load the
// dawgs from TESSDATA_*_DAWG components. * unicharset from the TESSDATA_UNICHARSET component and will load the
* dawgs from TESSDATA_*_DAWG components.
*/
bool CubeRecoContext::Load(TessdataManager *tessdata_manager, bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset) { UNICHARSET *tess_unicharset) {
ASSERT_HOST(tess_obj_ != NULL); ASSERT_HOST(tess_obj_ != NULL);
@ -178,7 +184,7 @@ bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
return true; return true;
} }
// Creates a CubeRecoContext object using a tesseract object /** Creates a CubeRecoContext object using a tesseract object */
CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj, CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj,
TessdataManager *tessdata_manager, TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset) { UNICHARSET *tess_unicharset) {

View File

@ -39,8 +39,8 @@ CubeClassifier::CubeClassifier(tesseract::Tesseract* tesseract)
CubeClassifier::~CubeClassifier() { CubeClassifier::~CubeClassifier() {
} }
// Classifies the given [training] sample, writing to results. /// Classifies the given [training] sample, writing to results.
// See ShapeClassifier for a full description. /// See ShapeClassifier for a full description.
int CubeClassifier::UnicharClassifySample( int CubeClassifier::UnicharClassifySample(
const TrainingSample& sample, Pix* page_pix, int debug, const TrainingSample& sample, Pix* page_pix, int debug,
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) { UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
@ -70,7 +70,7 @@ int CubeClassifier::UnicharClassifySample(
return results->size(); return results->size();
} }
// Provides access to the ShapeTable that this classifier works with. /** Provides access to the ShapeTable that this classifier works with. */
const ShapeTable* CubeClassifier::GetShapeTable() const { const ShapeTable* CubeClassifier::GetShapeTable() const {
return &shape_table_; return &shape_table_;
} }
@ -84,8 +84,8 @@ CubeTessClassifier::~CubeTessClassifier() {
delete pruner_; delete pruner_;
} }
// Classifies the given [training] sample, writing to results. /// Classifies the given [training] sample, writing to results.
// See ShapeClassifier for a full description. /// See ShapeClassifier for a full description.
int CubeTessClassifier::UnicharClassifySample( int CubeTessClassifier::UnicharClassifySample(
const TrainingSample& sample, Pix* page_pix, int debug, const TrainingSample& sample, Pix* page_pix, int debug,
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) { UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
@ -123,7 +123,7 @@ int CubeTessClassifier::UnicharClassifySample(
return results->size(); return results->size();
} }
// Provides access to the ShapeTable that this classifier works with. /** Provides access to the ShapeTable that this classifier works with. */
const ShapeTable* CubeTessClassifier::GetShapeTable() const { const ShapeTable* CubeTessClassifier::GetShapeTable() const {
return &shape_table_; return &shape_table_;
} }

View File

@ -20,13 +20,13 @@
#include "pageres.h" #include "pageres.h"
#include "tesseractclass.h" #include "tesseractclass.h"
namespace tesseract {
/** /**
* process_selected_words() * @name process_selected_words()
* *
* Walk the current block list applying the specified word processor function * Walk the current block list applying the specified word processor function
* to each word that overlaps the selection_box. * to each word that overlaps the selection_box.
*/ */
namespace tesseract {
void Tesseract::process_selected_words( void Tesseract::process_selected_words(
PAGE_RES* page_res, // blocks to check PAGE_RES* page_res, // blocks to check
TBOX & selection_box, TBOX & selection_box,

View File

@ -38,18 +38,19 @@ ICOORD C_OUTLINE::step_coords[4] = {
ICOORD (-1, 0), ICOORD (0, -1), ICOORD (1, 0), ICOORD (0, 1) ICOORD (-1, 0), ICOORD (0, -1), ICOORD (1, 0), ICOORD (0, 1)
}; };
/********************************************************************** /**
* C_OUTLINE::C_OUTLINE * @name C_OUTLINE::C_OUTLINE
* *
* Constructor to build a C_OUTLINE from a CRACKEDGE LOOP. * Constructor to build a C_OUTLINE from a CRACKEDGE LOOP.
**********************************************************************/ * @param startpt outline to convert
* @param bot_left bounding box
* @param top_right bounding box
* @param length length of loop
*/
C_OUTLINE::C_OUTLINE ( C_OUTLINE::C_OUTLINE (CRACKEDGE * startpt, ICOORD bot_left,
//constructor ICOORD top_right, inT16 length)
CRACKEDGE * startpt, //outline to convert : box (bot_left, top_right), start (startpt->pos), offsets(NULL) {
ICOORD bot_left, //bounding box
ICOORD top_right, inT16 length //length of loop
):box (bot_left, top_right), start (startpt->pos), offsets(NULL) {
inT16 stepindex; //index to step inT16 stepindex; //index to step
CRACKEDGE *edgept; //current point CRACKEDGE *edgept; //current point
@ -71,11 +72,11 @@ ICOORD top_right, inT16 length //length of loop
} }
/********************************************************************** /**
* C_OUTLINE::C_OUTLINE * @name C_OUTLINE::C_OUTLINE
* *
* Constructor to build a C_OUTLINE from a C_OUTLINE_FRAG. * Constructor to build a C_OUTLINE from a C_OUTLINE_FRAG.
**********************************************************************/ */
C_OUTLINE::C_OUTLINE ( C_OUTLINE::C_OUTLINE (
//constructor //constructor
//steps to copy //steps to copy
@ -130,16 +131,15 @@ inT16 length //length of loop
ASSERT_HOST (stepcount >= 4); ASSERT_HOST (stepcount >= 4);
} }
/********************************************************************** /**
* C_OUTLINE::C_OUTLINE * @name C_OUTLINE::C_OUTLINE
* *
* Constructor to build a C_OUTLINE from a rotation of a C_OUTLINE. * Constructor to build a C_OUTLINE from a rotation of a C_OUTLINE.
**********************************************************************/ * @param srcline outline to rotate
* @param rotation rotate to coord
*/
C_OUTLINE::C_OUTLINE( //constructor C_OUTLINE::C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation) : offsets(NULL) {
C_OUTLINE *srcline, //outline to
FCOORD rotation //rotate
) : offsets(NULL) {
TBOX new_box; //easy bounding TBOX new_box; //easy bounding
inT16 stepindex; //index to step inT16 stepindex; //index to step
inT16 dirdiff; //direction change inT16 dirdiff; //direction change
@ -247,11 +247,11 @@ void C_OUTLINE::FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines) {
ol_it.add_to_end(outline); ol_it.add_to_end(outline);
} }
/********************************************************************** /**
* C_OUTLINE::area * @name C_OUTLINE::area
* *
* Compute the area of the outline. * Compute the area of the outline.
**********************************************************************/ */
inT32 C_OUTLINE::area() const { inT32 C_OUTLINE::area() const {
int stepindex; //current step int stepindex; //current step
@ -281,11 +281,11 @@ inT32 C_OUTLINE::area() const {
return total; return total;
} }
/********************************************************************** /**
* C_OUTLINE::perimeter * @name C_OUTLINE::perimeter
* *
* Compute the perimeter of the outline and its first level children. * Compute the perimeter of the outline and its first level children.
**********************************************************************/ */
inT32 C_OUTLINE::perimeter() const { inT32 C_OUTLINE::perimeter() const {
inT32 total_steps; // Return value. inT32 total_steps; // Return value.
@ -301,11 +301,11 @@ inT32 C_OUTLINE::perimeter() const {
} }
/********************************************************************** /**
* C_OUTLINE::outer_area * @name C_OUTLINE::outer_area
* *
* Compute the area of the outline. * Compute the area of the outline.
**********************************************************************/ */
inT32 C_OUTLINE::outer_area() const { inT32 C_OUTLINE::outer_area() const {
int stepindex; //current step int stepindex; //current step
@ -333,15 +333,14 @@ inT32 C_OUTLINE::outer_area() const {
} }
/********************************************************************** /**
* C_OUTLINE::count_transitions * @name C_OUTLINE::count_transitions
* *
* Compute the number of x and y maxes and mins in the outline. * Compute the number of x and y maxes and mins in the outline.
**********************************************************************/ * @param threshold winding number on size
*/
inT32 C_OUTLINE::count_transitions( //winding number inT32 C_OUTLINE::count_transitions(inT32 threshold) {
inT32 threshold //on size
) {
BOOL8 first_was_max_x; //what was first BOOL8 first_was_max_x; //what was first
BOOL8 first_was_max_y; BOOL8 first_was_max_y;
BOOL8 looking_for_max_x; //what is next BOOL8 looking_for_max_x; //what is next
@ -461,16 +460,15 @@ inT32 C_OUTLINE::count_transitions( //winding number
} }
/********************************************************************** /**
* C_OUTLINE::operator< * @name C_OUTLINE::operator<
* *
* Return TRUE if the left operand is inside the right one. * @return TRUE if the left operand is inside the right one.
**********************************************************************/ * @param other other outline
*/
BOOL8 BOOL8
C_OUTLINE::operator< ( //winding number C_OUTLINE::operator< (const C_OUTLINE & other) const
const C_OUTLINE & other //other outline
) const
{ {
inT16 count = 0; //winding count inT16 count = 0; //winding count
ICOORD pos; //position of point ICOORD pos; //position of point
@ -498,15 +496,14 @@ const C_OUTLINE & other //other outline
} }
/********************************************************************** /**
* C_OUTLINE::winding_number * @name C_OUTLINE::winding_number
* *
* Return the winding number of the outline around the given point. * @return the winding number of the outline around the given point.
**********************************************************************/ * @param point point to wind around
*/
inT16 C_OUTLINE::winding_number( //winding number inT16 C_OUTLINE::winding_number(ICOORD point) const {
ICOORD point //point to wind around
) const {
inT16 stepindex; //index to cstep inT16 stepindex; //index to cstep
inT16 count; //winding count inT16 count; //winding count
ICOORD vec; //to current point ICOORD vec; //to current point
@ -538,11 +535,11 @@ inT16 C_OUTLINE::winding_number( //winding number
} }
/********************************************************************** /**
* C_OUTLINE::turn_direction * C_OUTLINE::turn_direction
* *
* Return the sum direction delta of the outline. * @return the sum direction delta of the outline.
**********************************************************************/ */
inT16 C_OUTLINE::turn_direction() const { //winding number inT16 C_OUTLINE::turn_direction() const { //winding number
DIR128 prevdir; //previous direction DIR128 prevdir; //previous direction
@ -567,11 +564,11 @@ inT16 C_OUTLINE::turn_direction() const { //winding number
} }
/********************************************************************** /**
* C_OUTLINE::reverse * @name C_OUTLINE::reverse
* *
* Reverse the direction of an outline. * Reverse the direction of an outline.
**********************************************************************/ */
void C_OUTLINE::reverse() { //reverse drection void C_OUTLINE::reverse() { //reverse drection
DIR128 halfturn = MODULUS / 2; //amount to shift DIR128 halfturn = MODULUS / 2; //amount to shift
@ -590,15 +587,14 @@ void C_OUTLINE::reverse() { //reverse drection
} }
/********************************************************************** /**
* C_OUTLINE::move * @name C_OUTLINE::move
* *
* Move C_OUTLINE by vector * Move C_OUTLINE by vector
**********************************************************************/ * @param vec vector to reposition OUTLINE by
*/
void C_OUTLINE::move( // reposition OUTLINE void C_OUTLINE::move(const ICOORD vec) {
const ICOORD vec // by vector
) {
C_OUTLINE_IT it(&children); // iterator C_OUTLINE_IT it(&children); // iterator
box.move (vec); box.move (vec);
@ -608,10 +604,12 @@ void C_OUTLINE::move( // reposition OUTLINE
it.data ()->move (vec); // move child outlines it.data ()->move (vec); // move child outlines
} }
// Returns true if *this and its children are legally nested. /**
// The outer area of a child should have the opposite sign to the * Returns true if *this and its children are legally nested.
// parent. If not, it means we have discarded an outline in between * The outer area of a child should have the opposite sign to the
// (probably due to excessive length). * parent. If not, it means we have discarded an outline in between
* (probably due to excessive length).
*/
bool C_OUTLINE::IsLegallyNested() const { bool C_OUTLINE::IsLegallyNested() const {
if (stepcount == 0) return true; if (stepcount == 0) return true;
int parent_area = outer_area(); int parent_area = outer_area();
@ -626,11 +624,15 @@ bool C_OUTLINE::IsLegallyNested() const {
return true; return true;
} }
// If this outline is smaller than the given min_size, delete this and /**
// remove from its list, via *it, after checking that *it points to this. * If this outline is smaller than the given min_size, delete this and
// Otherwise, if any children of this are too small, delete them. * remove from its list, via *it, after checking that *it points to this.
// On entry, *it must be an iterator pointing to this. If this gets deleted * Otherwise, if any children of this are too small, delete them.
// then this is extracted from *it, so an iteration can continue. * On entry, *it must be an iterator pointing to this. If this gets deleted
* then this is extracted from *it, so an iteration can continue.
* @param min_size minimum size for outline
* @param it outline iterator
*/
void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) { void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) {
if (box.width() < min_size || box.height() < min_size) { if (box.width() < min_size || box.height() < min_size) {
ASSERT_HOST(this == it->data()); ASSERT_HOST(this == it->data());
@ -650,9 +652,11 @@ void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) {
// on data from an 8-bit Pix, and assume that any input x and/or y are already // on data from an 8-bit Pix, and assume that any input x and/or y are already
// constrained to be legal Pix coordinates. // constrained to be legal Pix coordinates.
// Helper computes the local 2-D gradient (dx, dy) from the 2x2 cell centered /**
// on the given (x,y). If the cell would go outside the image, it is padded * Helper computes the local 2-D gradient (dx, dy) from the 2x2 cell centered
// with white. * on the given (x,y). If the cell would go outside the image, it is padded
* with white.
*/
static void ComputeGradient(const l_uint32* data, int wpl, static void ComputeGradient(const l_uint32* data, int wpl,
int x, int y, int width, int height, int x, int y, int width, int height,
ICOORD* gradient) { ICOORD* gradient) {
@ -669,9 +673,11 @@ static void ComputeGradient(const l_uint32* data, int wpl,
gradient->set_y(pix_x_prevy + pix_prevx_prevy - (pix_x_y + pix_prevx_y)); gradient->set_y(pix_x_prevy + pix_prevx_prevy - (pix_x_y + pix_prevx_y));
} }
// Helper evaluates a vertical difference, (x,y) - (x,y-1), returning true if /**
// the difference, matches diff_sign and updating the best_diff, best_sum, * Helper evaluates a vertical difference, (x,y) - (x,y-1), returning true if
// best_y if a new max. * the difference, matches diff_sign and updating the best_diff, best_sum,
* best_y if a new max.
*/
static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign, static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign,
int x, int y, int height, int x, int y, int height,
int* best_diff, int* best_sum, int* best_y) { int* best_diff, int* best_sum, int* best_y) {
@ -689,9 +695,11 @@ static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign,
return diff > 0; return diff > 0;
} }
// Helper evaluates a horizontal difference, (x,y) - (x-1,y), where y is implied /**
// by the input image line, returning true if the difference matches diff_sign * Helper evaluates a horizontal difference, (x,y) - (x-1,y), where y is implied
// and updating the best_diff, best_sum, best_x if a new max. * by the input image line, returning true if the difference matches diff_sign
* and updating the best_diff, best_sum, best_x if a new max.
*/
static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign, static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign,
int x, int width, int x, int width,
int* best_diff, int* best_sum, int* best_x) { int* best_diff, int* best_sum, int* best_x) {
@ -708,17 +716,21 @@ static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign,
return diff > 0; return diff > 0;
} }
// Adds sub-pixel resolution EdgeOffsets for the outline if the supplied /**
// pix is 8-bit. Does nothing otherwise. * Adds sub-pixel resolution EdgeOffsets for the outline if the supplied
// Operation: Consider the following near-horizontal line: * pix is 8-bit. Does nothing otherwise.
// _________ * Operation: Consider the following near-horizontal line:
// |________ * @verbatim
// |________ * _________
// At *every* position along this line, the gradient direction will be close * |________
// to vertical. Extrapoaltion/interpolation of the position of the threshold * |________
// that was used to binarize the image gives a more precise vertical position * @endverbatim
// for each horizontal step, and the conflict in step direction and gradient * At *every* position along this line, the gradient direction will be close
// direction can be used to ignore the vertical steps. * to vertical. Extrapoaltion/interpolation of the position of the threshold
* that was used to binarize the image gives a more precise vertical position
* for each horizontal step, and the conflict in step direction and gradient
* direction can be used to ignore the vertical steps.
*/
void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) { void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) {
if (pixGetDepth(pix) != 8) return; if (pixGetDepth(pix) != 8) return;
const l_uint32* data = pixGetData(pix); const l_uint32* data = pixGetData(pix);
@ -807,30 +819,35 @@ void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) {
} }
} }
// Adds sub-pixel resolution EdgeOffsets for the outline using only /**
// a binary image source. * Adds sub-pixel resolution EdgeOffsets for the outline using only
// Runs a sliding window of 5 edge steps over the outline, maintaining a count * a binary image source.
// of the number of steps in each of the 4 directions in the window, and a *
// sum of the x or y position of each step (as appropriate to its direction.) * Runs a sliding window of 5 edge steps over the outline, maintaining a count
// Ignores single-count steps EXCEPT the sharp U-turn and smoothes out the * of the number of steps in each of the 4 directions in the window, and a
// perpendicular direction. Eg * sum of the x or y position of each step (as appropriate to its direction.)
// ___ ___ Chain code from the left: * Ignores single-count steps EXCEPT the sharp U-turn and smoothes out the
// |___ ___ ___| 222122212223221232223000 * perpendicular direction. Eg
// |___| |_| Corresponding counts of each direction: * @verbatim
// 0 00000000000000000123 * ___ ___ Chain code from the left:
// 1 11121111001111100000 * |___ ___ ___| 222122212223221232223000
// 2 44434443443333343321 * |___| |_| Corresponding counts of each direction:
// 3 00000001111111112111 * 0 00000000000000000123
// Count of direction at center 41434143413313143313 * 1 11121111001111100000
// Step gets used? YNYYYNYYYNYYNYNYYYyY (y= U-turn exception) * 2 44434443443333343321
// Path redrawn showing only the used points: * 3 00000001111111112111
// ___ ___ * Count of direction at center 41434143413313143313
// ___ ___ ___| * Step gets used? YNYYYNYYYNYYNYNYYYyY (y= U-turn exception)
// ___ _ * Path redrawn showing only the used points:
// Sub-pixel edge position cannot be shown well with ASCII-art, but each * ___ ___
// horizontal step's y position is the mean of the y positions of the steps * ___ ___ ___|
// in the same direction in the sliding window, which makes a much smoother * ___ _
// outline, without losing important detail. * @endverbatim
* Sub-pixel edge position cannot be shown well with ASCII-art, but each
* horizontal step's y position is the mean of the y positions of the steps
* in the same direction in the sliding window, which makes a much smoother
* outline, without losing important detail.
*/
void C_OUTLINE::ComputeBinaryOffsets() { void C_OUTLINE::ComputeBinaryOffsets() {
delete [] offsets; delete [] offsets;
offsets = new EdgeOffset[stepcount]; offsets = new EdgeOffset[stepcount];
@ -885,8 +902,10 @@ void C_OUTLINE::ComputeBinaryOffsets() {
} }
} }
// Renders the outline to the given pix, with left and top being /**
// the coords of the upper-left corner of the pix. * Renders the outline to the given pix, with left and top being
* the coords of the upper-left corner of the pix.
*/
void C_OUTLINE::render(int left, int top, Pix* pix) const { void C_OUTLINE::render(int left, int top, Pix* pix) const {
ICOORD pos = start; ICOORD pos = start;
for (int stepindex = 0; stepindex < stepcount; ++stepindex) { for (int stepindex = 0; stepindex < stepcount; ++stepindex) {
@ -902,8 +921,13 @@ void C_OUTLINE::render(int left, int top, Pix* pix) const {
} }
} }
// Renders just the outline to the given pix (no fill), with left and top /**
// being the coords of the upper-left corner of the pix. * Renders just the outline to the given pix (no fill), with left and top
* being the coords of the upper-left corner of the pix.
* @param left coord
* @param top coord
* @param pix the pix to outline
*/
void C_OUTLINE::render_outline(int left, int top, Pix* pix) const { void C_OUTLINE::render_outline(int left, int top, Pix* pix) const {
ICOORD pos = start; ICOORD pos = start;
for (int stepindex = 0; stepindex < stepcount; ++stepindex) { for (int stepindex = 0; stepindex < stepcount; ++stepindex) {
@ -921,17 +945,17 @@ void C_OUTLINE::render_outline(int left, int top, Pix* pix) const {
} }
} }
/********************************************************************** /**
* C_OUTLINE::plot * @name C_OUTLINE::plot
* *
* Draw the outline in the given colour. * Draw the outline in the given colour.
**********************************************************************/ * @param window window to draw in
* @param colour colour to draw in
*/
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
void C_OUTLINE::plot( //draw it void C_OUTLINE::plot(ScrollView* window,
ScrollView* window, // window to draw in ScrollView::Color colour) const {
ScrollView::Color colour // colour to draw in
) const {
inT16 stepindex; // index to cstep inT16 stepindex; // index to cstep
ICOORD pos; // current position ICOORD pos; // current position
DIR128 stepdir; // direction of step DIR128 stepdir; // direction of step
@ -958,8 +982,11 @@ void C_OUTLINE::plot( //draw it
window->DrawTo(pos.x(), pos.y()); window->DrawTo(pos.x(), pos.y());
} }
} }
// Draws the outline in the given colour, normalized using the given denorm,
// making use of sub-pixel accurate information if available. /**
* Draws the outline in the given colour, normalized using the given denorm,
* making use of sub-pixel accurate information if available.
*/
void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour, void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour,
ScrollView* window) const { ScrollView* window) const {
window->Pen(colour); window->Pen(colour);
@ -990,16 +1017,14 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour,
#endif #endif
/********************************************************************** /**
* C_OUTLINE::operator= * @name C_OUTLINE::operator=
* *
* Assignment - deep copy data * Assignment - deep copy data
**********************************************************************/ * @param source assign from this
*/
//assignment C_OUTLINE & C_OUTLINE::operator= (const C_OUTLINE & source) {
C_OUTLINE & C_OUTLINE::operator= (
const C_OUTLINE & source //from this
) {
box = source.box; box = source.box;
start = source.start; start = source.start;
if (steps != NULL) if (steps != NULL)
@ -1020,10 +1045,12 @@ const C_OUTLINE & source //from this
return *this; return *this;
} }
// Helper for ComputeBinaryOffsets. Increments pos, dir_counts, pos_totals /**
// by the step, increment, and vertical step ? x : y position * increment * Helper for ComputeBinaryOffsets. Increments pos, dir_counts, pos_totals
// at step s Mod stepcount respectively. Used to add or subtract the * by the step, increment, and vertical step ? x : y position * increment
// direction and position to/from accumulators of a small neighbourhood. * at step s Mod stepcount respectively. Used to add or subtract the
* direction and position to/from accumulators of a small neighbourhood.
*/
void C_OUTLINE::increment_step(int s, int increment, ICOORD* pos, void C_OUTLINE::increment_step(int s, int increment, ICOORD* pos,
int* dir_counts, int* pos_totals) const { int* dir_counts, int* pos_totals) const {
int step_index = Modulo(s, stepcount); int step_index = Modulo(s, stepcount);

View File

@ -144,7 +144,7 @@ class TessdataManager {
/** /**
* Opens the given data file and reads the offset table. * Opens the given data file and reads the offset table.
* Returns true on success. * @return true on success.
*/ */
bool Init(const char *data_file_name, int debug_level); bool Init(const char *data_file_name, int debug_level);

View File

@ -24,13 +24,13 @@ namespace tesseract {
extern const char *kUTF8LineSeparator; extern const char *kUTF8LineSeparator;
extern const char *kUTF8ParagraphSeparator; extern const char *kUTF8ParagraphSeparator;
extern const char *kLRM; // Left-to-Right Mark extern const char *kLRM; //< Left-to-Right Mark
extern const char *kRLM; // Right-to-Left Mark extern const char *kRLM; //< Right-to-Left Mark
extern const char *kRLE; // Right-to-Left Embedding extern const char *kRLE; //< Right-to-Left Embedding
extern const char *kPDF; // Pop Directional Formatting extern const char *kPDF; //< Pop Directional Formatting
// The following are confusable internal word punctuation symbols /// The following are confusable internal word punctuation symbols
// which we normalize to the first variant when matching in dawgs. /// which we normalize to the first variant when matching in dawgs.
extern const char *kHyphenLikeUTF8[]; extern const char *kHyphenLikeUTF8[];
extern const char *kApostropheLikeUTF8[]; extern const char *kApostropheLikeUTF8[];

View File

@ -841,8 +841,7 @@ int Classify::GetAdaptiveFeatures(TBLOB *Blob,
* *
* Globals: none * Globals: none
* *
* @param Word current word * @param word current word
* @param BestChoiceWord best overall choice for word with context
* *
* @return TRUE or FALSE * @return TRUE or FALSE
* @note Exceptions: none * @note Exceptions: none
@ -1007,7 +1006,6 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
/*---------------------------------------------------------------------------*/
/** /**
* This routine adds the result of a classification into * This routine adds the result of a classification into
* Results. If the new rating is much worse than the current * Results. If the new rating is much worse than the current
@ -1022,14 +1020,8 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
* Globals: * Globals:
* - #matcher_bad_match_pad defines limits of an acceptable match * - #matcher_bad_match_pad defines limits of an acceptable match
* *
* @param new_result new result to add
* @param[out] results results to add new result to * @param[out] results results to add new result to
* @param class_id class of new result
* @param shape_id shape index
* @param rating rating of new result
* @param adapted adapted match or not
* @param config config id of new result
* @param fontinfo_id font information of the new result
* @param fontinfo_id2 font information of the 2nd choice result
* *
* @note Exceptions: none * @note Exceptions: none
* @note History: Tue Mar 12 18:19:29 1991, DSJ, Created. * @note History: Tue Mar 12 18:19:29 1991, DSJ, Created.
@ -1077,11 +1069,13 @@ void Classify::AddNewResult(const UnicharRating& new_result,
* - #AllProtosOn mask that enables all protos * - #AllProtosOn mask that enables all protos
* - #AllConfigsOn mask that enables all configs * - #AllConfigsOn mask that enables all configs
* *
* @param Blob blob to be classified * @param blob blob to be classified
* @param Templates built-in templates to classify against * @param templates built-in templates to classify against
* @param Classes adapted class templates * @param classes adapted class templates
* @param Ambiguities array of class id's to match against * @param ambiguities array of unichar id's to match against
* @param[out] Results place to put match results * @param[out] results place to put match results
* @param int_features
* @param fx_info
* *
* @note Exceptions: none * @note Exceptions: none
* @note History: Tue Mar 12 19:40:36 1991, DSJ, Created. * @note History: Tue Mar 12 19:40:36 1991, DSJ, Created.
@ -1301,6 +1295,8 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
* @param Blob blob to be classified * @param Blob blob to be classified
* @param Templates current set of adapted templates * @param Templates current set of adapted templates
* @param Results place to put match results * @param Results place to put match results
* @param int_features
* @param fx_info
* *
* @return Array of possible ambiguous chars that should be checked. * @return Array of possible ambiguous chars that should be checked.
* @note Exceptions: none * @note Exceptions: none
@ -1343,9 +1339,9 @@ UNICHAR_ID *Classify::BaselineClassifier(
* specified set of templates. The classes which match * specified set of templates. The classes which match
* are added to Results. * are added to Results.
* *
* @param Blob blob to be classified * @param blob blob to be classified
* @param Templates templates to classify unknown against * @param sample templates to classify unknown against
* @param Results place to put match results * @param adapt_results place to put match results
* *
* Globals: * Globals:
* - CharNormCutoffs expected num features for each class * - CharNormCutoffs expected num features for each class
@ -1438,7 +1434,7 @@ int Classify::CharNormTrainingSample(bool pruner_only,
* blob. NOTE: assumes that the blob length has already been * blob. NOTE: assumes that the blob length has already been
* computed and placed into Results. * computed and placed into Results.
* *
* @param Results results to add noise classification to * @param results results to add noise classification to
* *
* Globals: * Globals:
* - matcher_avg_noise_size avg. length of a noise blob * - matcher_avg_noise_size avg. length of a noise blob
@ -1539,7 +1535,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
/** /**
* *
* @param Blob blob whose classification is being debugged * @param blob blob whose classification is being debugged
* @param Results results of match being debugged * @param Results results of match being debugged
* *
* Globals: none * Globals: none
@ -1716,13 +1712,11 @@ bool Classify::LooksLikeGarbage(TBLOB *blob) {
* It then copies the char norm features into the IntFeatures * It then copies the char norm features into the IntFeatures
* array provided by the caller. * array provided by the caller.
* *
* @param Blob blob to extract features from * @param templates used to compute char norm adjustments
* @param Templates used to compute char norm adjustments * @param pruner_norm_array Array of factors from blob normalization
* @param IntFeatures array to fill with integer features
* @param PrunerNormArray Array of factors from blob normalization
* process * process
* @param CharNormArray array to fill with dummy char norm adjustments * @param char_norm_array array to fill with dummy char norm adjustments
* @param BlobLength length of blob in baseline-normalized units * @param fx_info
* *
* Globals: * Globals:
* *
@ -2072,8 +2066,7 @@ namespace tesseract {
/** /**
* This routine writes the matches in Results to File. * This routine writes the matches in Results to File.
* *
* @param File open text file to write Results to * @param results match results to write to File
* @param Results match results to write to File
* *
* Globals: none * Globals: none
* *

File diff suppressed because it is too large Load Diff

View File

@ -26,23 +26,20 @@
#include <math.h> #include <math.h>
//---------------Global Data Definitions and Declarations-------------------- //---------------Global Data Definitions and Declarations--------------------
#define TOKENSIZE 80 //max size of tokens read from an input file #define TOKENSIZE 80 //< max size of tokens read from an input file
#define MAXSAMPLESIZE 65535 //max num of dimensions in feature space #define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space
//#define MAXBLOCKSIZE 65535 //max num of samples in a character (block size) //#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block size)
/*--------------------------------------------------------------------------- /**
Public Code * This routine reads a single integer from the specified
-----------------------------------------------------------------------------*/ * file and checks to ensure that it is between 0 and
/** ReadSampleSize *********************************************************** * MAXSAMPLESIZE.
Parameters: File open text file to read sample size from * @param File open text file to read sample size from
Globals: None * @return Sample size
Operation: This routine reads a single integer from the specified * @note Globals: None
file and checks to ensure that it is between 0 and * @note Exceptions: ILLEGALSAMPLESIZE illegal format or range
MAXSAMPLESIZE. * @note History: 6/6/89, DSJ, Created.
Return: Sample size */
Exceptions: ILLEGALSAMPLESIZE illegal format or range
History: 6/6/89, DSJ, Created.
******************************************************************************/
uinT16 ReadSampleSize(FILE *File) { uinT16 ReadSampleSize(FILE *File) {
int SampleSize; int SampleSize;
@ -50,21 +47,22 @@ uinT16 ReadSampleSize(FILE *File) {
(SampleSize < 0) || (SampleSize > MAXSAMPLESIZE)) (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE))
DoError (ILLEGALSAMPLESIZE, "Illegal sample size"); DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
return (SampleSize); return (SampleSize);
} // ReadSampleSize }
/**
/** ReadParamDesc ************************************************************* * This routine reads textual descriptions of sets of parameters
Parameters: File open text file to read N parameter descriptions from * which describe the characteristics of feature dimensions.
N number of parameter descriptions to read *
Globals: None * Exceptions:
Operation: This routine reads textual descriptions of sets of parameters * - ILLEGALCIRCULARSPEC
which describe the characteristics of feature dimensions. * - ILLEGALESSENTIALSPEC
Return: Pointer to an array of parameter descriptors. * - ILLEGALMINMAXSPEC
Exceptions: ILLEGALCIRCULARSPEC * @param File open text file to read N parameter descriptions from
ILLEGALESSENTIALSPEC * @param N number of parameter descriptions to read
ILLEGALMINMAXSPEC * @return Pointer to an array of parameter descriptors.
History: 6/6/89, DSJ, Created. * @note Globals: None
******************************************************************************/ * @note History: 6/6/89, DSJ, Created.
*/
PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) { PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
int i; int i;
PARAM_DESC *ParamDesc; PARAM_DESC *ParamDesc;
@ -94,23 +92,24 @@ PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2; ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
} }
return (ParamDesc); return (ParamDesc);
} // ReadParamDesc }
/**
/** ReadPrototype ************************************************************* * This routine reads a textual description of a prototype from
Parameters: File open text file to read prototype from * the specified file.
N number of dimensions used in prototype *
Globals: None * Exceptions:
Operation: This routine reads a textual description of a prototype from * - ILLEGALSIGNIFICANCESPEC
the specified file. * - ILLEGALSAMPLECOUNT
Return: List of prototypes * - ILLEGALMEANSPEC
Exceptions: ILLEGALSIGNIFICANCESPEC * - ILLEGALVARIANCESPEC
ILLEGALSAMPLECOUNT * - ILLEGALDISTRIBUTION
ILLEGALMEANSPEC * @param File open text file to read prototype from
ILLEGALVARIANCESPEC * @param N number of dimensions used in prototype
ILLEGALDISTRIBUTION * @return List of prototypes
History: 6/6/89, DSJ, Created. * @note Globals: None
******************************************************************************/ * @note History: 6/6/89, DSJ, Created.
*/
PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) { PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
char Token[TOKENSIZE]; char Token[TOKENSIZE];
int Status; int Status;
@ -228,18 +227,17 @@ PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification"); DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification");
return (NULL); return (NULL);
} }
} // ReadPrototype }
/**
/* ReadProtoStyle ************************************************************* * This routine reads an single token from the specified
Parameters: File open text file to read prototype style from * text file and interprets it as a prototype specification.
Globals: None * @param File open text file to read prototype style from
Operation: This routine reads an single token from the specified * @return Prototype style read from text file
text file and interprets it as a prototype specification. * @note Globals: None
Return: Prototype style read from text file * @note Exceptions: ILLEGALSTYLESPEC illegal prototype style specification
Exceptions: ILLEGALSTYLESPEC illegal prototype style specification * @note History: 6/8/89, DSJ, Created.
History: 6/8/89, DSJ, Created. */
*******************************************************************************/
PROTOSTYLE ReadProtoStyle(FILE *File) { PROTOSTYLE ReadProtoStyle(FILE *File) {
char Token[TOKENSIZE]; char Token[TOKENSIZE];
PROTOSTYLE Style; PROTOSTYLE Style;
@ -264,23 +262,22 @@ PROTOSTYLE ReadProtoStyle(FILE *File) {
DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification"); DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
} }
return (Style); return (Style);
} // ReadProtoStyle }
/**
/** ReadNFloats ************************************************************* * This routine reads N floats from the specified text file
Parameters: File open text file to read floats from * and places them into Buffer. If Buffer is NULL, a buffer
N number of floats to read * is created and passed back to the caller. If EOF is
Buffer pointer to buffer to place floats into * encountered before any floats can be read, NULL is
Globals: None * returned.
Operation: This routine reads N floats from the specified text file * @param File open text file to read floats from
and places them into Buffer. If Buffer is NULL, a buffer * @param N number of floats to read
is created and passed back to the caller. If EOF is * @param Buffer pointer to buffer to place floats into
encountered before any floats can be read, NULL is * @return Pointer to buffer holding floats or NULL if EOF
returned. * @note Globals: None
Return: Pointer to buffer holding floats or NULL if EOF * @note Exceptions: ILLEGALFLOAT
Exceptions: ILLEGALFLOAT * @note History: 6/6/89, DSJ, Created.
History: 6/6/89, DSJ, Created. */
******************************************************************************/
FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) { FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) {
int i; int i;
int NumFloatsRead; int NumFloatsRead;
@ -300,20 +297,19 @@ FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) {
} }
} }
return Buffer; return Buffer;
} // ReadNFloats }
/**
/** WriteParamDesc ************************************************************ * This routine writes an array of dimension descriptors to
Parameters: File open text file to write param descriptors to * the specified text file.
N number of param descriptors to write * @param File open text file to write param descriptors to
ParamDesc array of param descriptors to write * @param N number of param descriptors to write
Globals: None * @param ParamDesc array of param descriptors to write
Operation: This routine writes an array of dimension descriptors to * @return None
the specified text file. * @note Globals: None
Return: None * @note Exceptions: None
Exceptions: None * @note History: 6/6/89, DSJ, Created.
History: 6/6/89, DSJ, Created. */
******************************************************************************/
void void
WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) { WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) {
int i; int i;
@ -331,20 +327,19 @@ WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) {
fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max); fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
} }
} // WriteParamDesc }
/**
/** WritePrototype ************************************************************ * This routine writes a textual description of a prototype
Parameters: File open text file to write prototype to * to the specified text file.
N number of dimensions in feature space * @param File open text file to write prototype to
Proto prototype to write out * @param N number of dimensions in feature space
Globals: None * @param Proto prototype to write out
Operation: This routine writes a textual description of a prototype * @return None
to the specified text file. * @note Globals: None
Return: None * @note Exceptions: None
Exceptions: None * @note History: 6/12/89, DSJ, Created.
History: 6/12/89, DSJ, Created. */
*******************************************************************************/
void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto) { void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto) {
int i; int i;
@ -382,38 +377,36 @@ void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto) {
fprintf (File, "\n\t"); fprintf (File, "\n\t");
WriteNFloats (File, N, Proto->Variance.Elliptical); WriteNFloats (File, N, Proto->Variance.Elliptical);
} }
} // WritePrototype }
/**
/** WriteNFloats *********************************************************** * This routine writes a text representation of N floats from
Parameters: File open text file to write N floats to * an array to a file. All of the floats are placed on one line.
N number of floats to write * @param File open text file to write N floats to
Array array of floats to write * @param N number of floats to write
Globals: None * @param Array array of floats to write
Operation: This routine writes a text representation of N floats from * @return None
an array to a file. All of the floats are placed on one line. * @note Globals: None
Return: None * @note Exceptions: None
Exceptions: None * @note History: 6/6/89, DSJ, Created.
History: 6/6/89, DSJ, Created. */
****************************************************************************/
void WriteNFloats(FILE * File, uinT16 N, FLOAT32 Array[]) { void WriteNFloats(FILE * File, uinT16 N, FLOAT32 Array[]) {
for (int i = 0; i < N; i++) for (int i = 0; i < N; i++)
fprintf(File, " %9.6f", Array[i]); fprintf(File, " %9.6f", Array[i]);
fprintf(File, "\n"); fprintf(File, "\n");
} // WriteNFloats }
/**
/** WriteProtoSyle ********************************************************** * This routine writes to the specified text file a word
Parameters: File open text file to write prototype style to * which represents the ProtoStyle. It does not append
ProtoStyle prototype style to write * a carriage return to the end.
Globals: None * @param File open text file to write prototype style to
Operation: This routine writes to the specified text file a word * @param ProtoStyle prototype style to write
which represents the ProtoStyle. It does not append * @return None
a carriage return to the end. * @note Globals: None
Return: None * @note Exceptions: None
Exceptions: None * @note History: 6/8/89, DSJ, Created.
History: 6/8/89, DSJ, Created. */
****************************************************************************/
void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) { void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) {
switch (ProtoStyle) { switch (ProtoStyle) {
case spherical: case spherical:
@ -429,9 +422,25 @@ void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) {
fprintf (File, "automatic"); fprintf (File, "automatic");
break; break;
} }
} // WriteProtoStyle }
/**
* This routine writes a textual description of each prototype
* in the prototype list to the specified file. It also
* writes a file header which includes the number of dimensions
* in feature space and the descriptions for each dimension.
* @param File open text file to write prototypes to
* @param N number of dimensions in feature space
* @param ParamDesc descriptions for each dimension
* @param ProtoList list of prototypes to be written
* @param WriteSigProtos TRUE to write out significant prototypes
* @param WriteInsigProtos TRUE to write out insignificants
* @note Globals: None
* @return None
* @note Exceptions: None
* @note History: 6/12/89, DSJ, Created.
*/
/*---------------------------------------------------------------------------*/
void WriteProtoList( void WriteProtoList(
FILE *File, FILE *File,
uinT16 N, uinT16 N,
@ -439,30 +448,6 @@ void WriteProtoList(
LIST ProtoList, LIST ProtoList,
BOOL8 WriteSigProtos, BOOL8 WriteSigProtos,
BOOL8 WriteInsigProtos) BOOL8 WriteInsigProtos)
/*
** Parameters:
** File open text file to write prototypes to
** N number of dimensions in feature space
** ParamDesc descriptions for each dimension
** ProtoList list of prototypes to be written
** WriteSigProtos TRUE to write out significant prototypes
** WriteInsigProtos TRUE to write out insignificants
** Globals:
** None
** Operation:
** This routine writes a textual description of each prototype
** in the prototype list to the specified file. It also
** writes a file header which includes the number of dimensions
** in feature space and the descriptions for each dimension.
** Return:
** None
** Exceptions:
** None
** History:
** 6/12/89, DSJ, Created.
*/
{ {
PROTOTYPE *Proto; PROTOTYPE *Proto;
@ -478,5 +463,4 @@ void WriteProtoList(
( ! Proto->Significant && WriteInsigProtos ) ) ( ! Proto->Significant && WriteInsigProtos ) )
WritePrototype( File, N, Proto ); WritePrototype( File, N, Proto );
} }
} /* WriteProtoList */ }

View File

@ -15,9 +15,9 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "cutoffs.h" #include "cutoffs.h"
#include <stdio.h> #include <stdio.h>
@ -34,26 +34,23 @@
#define MAX_CUTOFF 1000 #define MAX_CUTOFF 1000
/**----------------------------------------------------------------------------
Public Code
----------------------------------------------------------------------------**/
/*---------------------------------------------------------------------------*/
namespace tesseract { namespace tesseract {
/**
* Open Filename, read in all of the class-id/cutoff pairs
* and insert them into the Cutoffs array. Cutoffs are
* indexed in the array by class id. Unused entries in the
* array are set to an arbitrarily high cutoff value.
* @param CutoffFile name of file containing cutoff definitions
* @param Cutoffs array to put cutoffs into
* @param swap
* @param end_offset
* @return none
* @note Globals: none
* @note Exceptions: none
* @note History: Wed Feb 20 09:38:26 1991, DSJ, Created.
*/
void Classify::ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset, void Classify::ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
CLASS_CUTOFF_ARRAY Cutoffs) { CLASS_CUTOFF_ARRAY Cutoffs) {
/*
** Parameters:
** Filename name of file containing cutoff definitions
** Cutoffs array to put cutoffs into
** Globals: none
** Operation: Open Filename, read in all of the class-id/cutoff pairs
** and insert them into the Cutoffs array. Cutoffs are
** indexed in the array by class id. Unused entries in the
** array are set to an arbitrarily high cutoff value.
** Return: none
** Exceptions: none
** History: Wed Feb 20 09:38:26 1991, DSJ, Created.
*/
char Class[UNICHAR_LEN + 1]; char Class[UNICHAR_LEN + 1];
CLASS_ID ClassId; CLASS_ID ClassId;
int Cutoff; int Cutoff;
@ -78,6 +75,6 @@ void Classify::ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
Cutoffs[ClassId] = Cutoff; Cutoffs[ClassId] = Cutoff;
SkipNewline(CutoffFile); SkipNewline(CutoffFile);
} }
} /* ReadNewCutoffs */ }
} // namespace tesseract } // namespace tesseract

View File

@ -15,18 +15,17 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "const.h" #include "const.h"
#include "fpoint.h" #include "fpoint.h"
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
FLOAT32 DistanceBetween(FPOINT A, FPOINT B) { FLOAT32 DistanceBetween(FPOINT A, FPOINT B) {
double xd = XDelta(A, B); double xd = XDelta(A, B);
@ -34,23 +33,21 @@ FLOAT32 DistanceBetween(FPOINT A, FPOINT B) {
return sqrt(static_cast<double>(xd * xd + yd * yd)); return sqrt(static_cast<double>(xd * xd + yd * yd));
} }
/**
* Return the angle from Point1 to Point2 normalized to
* lie in the range 0 to FullScale (where FullScale corresponds
* to 2*pi or 360 degrees).
* @param Point1 points to compute angle between
* @param Point2 points to compute angle between
* @param FullScale value to associate with 2*pi
* @return none
* @note Globals: none
* @note Exceptions: none
* @note History: Wed Mar 28 14:27:25 1990, DSJ, Created.
*/
FLOAT32 NormalizedAngleFrom(FPOINT *Point1, FLOAT32 NormalizedAngleFrom(FPOINT *Point1,
FPOINT *Point2, FPOINT *Point2,
FLOAT32 FullScale) { FLOAT32 FullScale) {
/*
** Parameters:
** Point1, Point2 points to compute angle between
** FullScale value to associate with 2*pi
** Globals: none
** Operation: Return the angle from Point1 to Point2 normalized to
** lie in the range 0 to FullScale (where FullScale corresponds
** to 2*pi or 360 degrees).
** Return: none
** Exceptions: none
** History: Wed Mar 28 14:27:25 1990, DSJ, Created.
*/
FLOAT32 Angle; FLOAT32 Angle;
FLOAT32 NumRadsInCircle = 2.0 * PI; FLOAT32 NumRadsInCircle = 2.0 * PI;
@ -62,4 +59,4 @@ FLOAT32 NormalizedAngleFrom(FPOINT *Point1,
Angle = 0.0; Angle = 0.0;
return (Angle); return (Angle);
} /* NormalizedAngleFrom */ }

View File

@ -135,8 +135,8 @@ class ClassPruner {
delete []sort_index_; delete []sort_index_;
} }
// Computes the scores for every class in the character set, by summing the /// Computes the scores for every class in the character set, by summing the
// weights for each feature and stores the sums internally in class_count_. /// weights for each feature and stores the sums internally in class_count_.
void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates, void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates,
int num_features, const INT_FEATURE_STRUCT* features) { int num_features, const INT_FEATURE_STRUCT* features) {
num_features_ = num_features; num_features_ = num_features;
@ -203,11 +203,11 @@ class ClassPruner {
} }
} }
// Adjusts the scores according to the number of expected features. Used /// Adjusts the scores according to the number of expected features. Used
// in lieu of a constant bias, this penalizes classes that expect more /// in lieu of a constant bias, this penalizes classes that expect more
// features than there are present. Thus an actual c will score higher for c /// features than there are present. Thus an actual c will score higher for c
// than e, even though almost all the features match e as well as c, because /// than e, even though almost all the features match e as well as c, because
// e expects more features to be present. /// e expects more features to be present.
void AdjustForExpectedNumFeatures(const uinT16* expected_num_features, void AdjustForExpectedNumFeatures(const uinT16* expected_num_features,
int cutoff_strength) { int cutoff_strength) {
for (int class_id = 0; class_id < max_classes_; ++class_id) { for (int class_id = 0; class_id < max_classes_; ++class_id) {
@ -219,8 +219,8 @@ class ClassPruner {
} }
} }
// Zeros the scores for classes disabled in the unicharset. /// Zeros the scores for classes disabled in the unicharset.
// Implements the black-list to recognize a subset of the character set. /// Implements the black-list to recognize a subset of the character set.
void DisableDisabledClasses(const UNICHARSET& unicharset) { void DisableDisabledClasses(const UNICHARSET& unicharset) {
for (int class_id = 0; class_id < max_classes_; ++class_id) { for (int class_id = 0; class_id < max_classes_; ++class_id) {
if (!unicharset.get_enabled(class_id)) if (!unicharset.get_enabled(class_id))
@ -228,7 +228,7 @@ class ClassPruner {
} }
} }
// Zeros the scores of fragments. /** Zeros the scores of fragments. */
void DisableFragments(const UNICHARSET& unicharset) { void DisableFragments(const UNICHARSET& unicharset) {
for (int class_id = 0; class_id < max_classes_; ++class_id) { for (int class_id = 0; class_id < max_classes_; ++class_id) {
// Do not include character fragments in the class pruner // Do not include character fragments in the class pruner
@ -239,10 +239,10 @@ class ClassPruner {
} }
} }
// Normalizes the counts for xheight, putting the normalized result in /// Normalizes the counts for xheight, putting the normalized result in
// norm_count_. Applies a simple subtractive penalty for incorrect vertical /// norm_count_. Applies a simple subtractive penalty for incorrect vertical
// position provided by the normalization_factors array, indexed by /// position provided by the normalization_factors array, indexed by
// character class, and scaled by the norm_multiplier. /// character class, and scaled by the norm_multiplier.
void NormalizeForXheight(int norm_multiplier, void NormalizeForXheight(int norm_multiplier,
const uinT8* normalization_factors) { const uinT8* normalization_factors) {
for (int class_id = 0; class_id < max_classes_; class_id++) { for (int class_id = 0; class_id < max_classes_; class_id++) {
@ -251,16 +251,16 @@ class ClassPruner {
} }
} }
// The nop normalization copies the class_count_ array to norm_count_. /** The nop normalization copies the class_count_ array to norm_count_. */
void NoNormalization() { void NoNormalization() {
for (int class_id = 0; class_id < max_classes_; class_id++) { for (int class_id = 0; class_id < max_classes_; class_id++) {
norm_count_[class_id] = class_count_[class_id]; norm_count_[class_id] = class_count_[class_id];
} }
} }
// Prunes the classes using <the maximum count> * pruning_factor/256 as a /// Prunes the classes using &lt;the maximum count> * pruning_factor/256 as a
// threshold for keeping classes. If max_of_non_fragments, then ignore /// threshold for keeping classes. If max_of_non_fragments, then ignore
// fragments in computing the maximum count. /// fragments in computing the maximum count.
void PruneAndSort(int pruning_factor, int keep_this, void PruneAndSort(int pruning_factor, int keep_this,
bool max_of_non_fragments, const UNICHARSET& unicharset) { bool max_of_non_fragments, const UNICHARSET& unicharset) {
int max_count = 0; int max_count = 0;
@ -295,7 +295,7 @@ class ClassPruner {
HeapSort(num_classes_, sort_key_, sort_index_); HeapSort(num_classes_, sort_key_, sort_index_);
} }
// Prints debug info on the class pruner matches for the pruned classes only. /** Prints debug info on the class pruner matches for the pruned classes only. */
void DebugMatch(const Classify& classify, void DebugMatch(const Classify& classify,
const INT_TEMPLATES_STRUCT* int_templates, const INT_TEMPLATES_STRUCT* int_templates,
const INT_FEATURE_STRUCT* features) const { const INT_FEATURE_STRUCT* features) const {
@ -332,7 +332,7 @@ class ClassPruner {
} }
} }
// Prints a summary of the pruner result. /** Prints a summary of the pruner result. */
void SummarizeResult(const Classify& classify, void SummarizeResult(const Classify& classify,
const INT_TEMPLATES_STRUCT* int_templates, const INT_TEMPLATES_STRUCT* int_templates,
const uinT16* expected_num_features, const uinT16* expected_num_features,
@ -354,8 +354,8 @@ class ClassPruner {
} }
} }
// Copies the pruned, sorted classes into the output results and returns /// Copies the pruned, sorted classes into the output results and returns
// the number of classes. /// the number of classes.
int SetupResults(GenericVector<CP_RESULT_STRUCT>* results) const { int SetupResults(GenericVector<CP_RESULT_STRUCT>* results) const {
CP_RESULT_STRUCT empty; CP_RESULT_STRUCT empty;
results->init_to_size(num_classes_, empty); results->init_to_size(num_classes_, empty);
@ -368,57 +368,50 @@ class ClassPruner {
} }
private: private:
// Array[rounded_classes_] of initial counts for each class. /** Array[rounded_classes_] of initial counts for each class. */
int *class_count_; int *class_count_;
// Array[rounded_classes_] of modified counts for each class after normalizing /// Array[rounded_classes_] of modified counts for each class after normalizing
// for expected number of features, disabled classes, fragments, and xheights. /// for expected number of features, disabled classes, fragments, and xheights.
int *norm_count_; int *norm_count_;
// Array[rounded_classes_ +1] of pruned counts that gets sorted /** Array[rounded_classes_ +1] of pruned counts that gets sorted */
int *sort_key_; int *sort_key_;
// Array[rounded_classes_ +1] of classes corresponding to sort_key_. /** Array[rounded_classes_ +1] of classes corresponding to sort_key_. */
int *sort_index_; int *sort_index_;
// Number of classes in this class pruner. /** Number of classes in this class pruner. */
int max_classes_; int max_classes_;
// Rounded up number of classes used for array sizes. /** Rounded up number of classes used for array sizes. */
int rounded_classes_; int rounded_classes_;
// Threshold count applied to prune classes. /** Threshold count applied to prune classes. */
int pruning_threshold_; int pruning_threshold_;
// The number of features used to compute the scores. /** The number of features used to compute the scores. */
int num_features_; int num_features_;
// Final number of pruned classes. /** Final number of pruned classes. */
int num_classes_; int num_classes_;
}; };
/*---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------*/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /**
// Runs the class pruner from int_templates on the given features, returning * Runs the class pruner from int_templates on the given features, returning
// the number of classes output in results. * the number of classes output in results.
// int_templates Class pruner tables * @param int_templates Class pruner tables
// num_features Number of features in blob * @param num_features Number of features in blob
// features Array of features * @param features Array of features
// normalization_factors Array of fudge factors from blob * @param normalization_factors Array of fudge factors from blob
// normalization process (by CLASS_INDEX) * normalization process (by CLASS_INDEX)
// expected_num_features Array of expected number of features * @param expected_num_features Array of expected number of features
// for each class (by CLASS_INDEX) * for each class (by CLASS_INDEX)
// results Sorted Array of pruned classes. Must be an array * @param results Sorted Array of pruned classes. Must be an array
// of size at least int_templates->NumClasses. * of size at least int_templates->NumClasses.
* @param keep_this
*/
int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
int num_features, int keep_this, int num_features, int keep_this,
const INT_FEATURE_STRUCT* features, const INT_FEATURE_STRUCT* features,
const uinT8* normalization_factors, const uinT8* normalization_factors,
const uinT16* expected_num_features, const uinT16* expected_num_features,
GenericVector<CP_RESULT_STRUCT>* results) { GenericVector<CP_RESULT_STRUCT>* results) {
/*
** Operation:
** Prunes the classes using a modified fast match table.
** Returns a sorted list of classes along with the number
** of pruned classes in that list.
** Return: Number of pruned classes.
** Exceptions: none
** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
*/
ClassPruner pruner(int_templates->NumClasses); ClassPruner pruner(int_templates->NumClasses);
// Compute initial match scores for all classes. // Compute initial match scores for all classes.
pruner.ComputeScores(int_templates, num_features, features); pruner.ComputeScores(int_templates, num_features, features);
@ -457,7 +450,25 @@ int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
} // namespace tesseract } // namespace tesseract
/*---------------------------------------------------------------------------*/ /**
* IntegerMatcher returns the best configuration and rating
* for a single class. The class matched against is determined
* by the uniqueness of the ClassTemplate parameter. The
* best rating and its associated configuration are returned.
*
* Globals:
* - local_matcher_multiplier_ Normalization factor multiplier
* param ClassTemplate Prototypes & tables for a class
* param BlobLength Length of unormalized blob
* param NumFeatures Number of features in blob
* param Features Array of features
* param NormalizationFactor Fudge factor from blob normalization process
* param Result Class rating & configuration: (0.0 -> 1.0), 0=bad, 1=good
* param Debug Debugger flag: 1=debugger on
* @return none
* @note Exceptions: none
* @note History: Tue Feb 19 16:36:23 MST 1991, RWM, Created.
*/
void IntegerMatcher::Match(INT_CLASS ClassTemplate, void IntegerMatcher::Match(INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask, BIT_VECTOR ProtoMask,
BIT_VECTOR ConfigMask, BIT_VECTOR ConfigMask,
@ -467,28 +478,6 @@ void IntegerMatcher::Match(INT_CLASS ClassTemplate,
int AdaptFeatureThreshold, int AdaptFeatureThreshold,
int Debug, int Debug,
bool SeparateDebugWindows) { bool SeparateDebugWindows) {
/*
** Parameters:
** ClassTemplate Prototypes & tables for a class
** BlobLength Length of unormalized blob
** NumFeatures Number of features in blob
** Features Array of features
** NormalizationFactor Fudge factor from blob
** normalization process
** Result Class rating & configuration:
** (0.0 -> 1.0), 0=bad, 1=good
** Debug Debugger flag: 1=debugger on
** Globals:
** local_matcher_multiplier_ Normalization factor multiplier
** Operation:
** IntegerMatcher returns the best configuration and rating
** for a single class. The class matched against is determined
** by the uniqueness of the ClassTemplate parameter. The
** best rating and its associated configuration are returned.
** Return:
** Exceptions: none
** History: Tue Feb 19 16:36:23 MST 1991, RWM, Created.
*/
ScratchEvidence *tables = new ScratchEvidence(); ScratchEvidence *tables = new ScratchEvidence();
int Feature; int Feature;
int BestMatch; int BestMatch;
@ -542,8 +531,26 @@ void IntegerMatcher::Match(INT_CLASS ClassTemplate,
delete tables; delete tables;
} }
/**
/*---------------------------------------------------------------------------*/ * FindGoodProtos finds all protos whose normalized proto-evidence
* exceed classify_adapt_proto_thresh. The list is ordered by increasing
* proto id number.
*
* Globals:
* - local_matcher_multiplier_ Normalization factor multiplier
* param ClassTemplate Prototypes & tables for a class
* param ProtoMask AND Mask for proto word
* param ConfigMask AND Mask for config word
* param BlobLength Length of unormalized blob
* param NumFeatures Number of features in blob
* param Features Array of features
* param ProtoArray Array of good protos
* param AdaptProtoThreshold Threshold for good protos
* param Debug Debugger flag: 1=debugger on
* @return Number of good protos in ProtoArray.
* @note Exceptions: none
* @note History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
*/
int IntegerMatcher::FindGoodProtos( int IntegerMatcher::FindGoodProtos(
INT_CLASS ClassTemplate, INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask, BIT_VECTOR ProtoMask,
@ -554,28 +561,6 @@ int IntegerMatcher::FindGoodProtos(
PROTO_ID *ProtoArray, PROTO_ID *ProtoArray,
int AdaptProtoThreshold, int AdaptProtoThreshold,
int Debug) { int Debug) {
/*
** Parameters:
** ClassTemplate Prototypes & tables for a class
** ProtoMask AND Mask for proto word
** ConfigMask AND Mask for config word
** BlobLength Length of unormalized blob
** NumFeatures Number of features in blob
** Features Array of features
** ProtoArray Array of good protos
** AdaptProtoThreshold Threshold for good protos
** Debug Debugger flag: 1=debugger on
** Globals:
** local_matcher_multiplier_ Normalization factor multiplier
** Operation:
** FindGoodProtos finds all protos whose normalized proto-evidence
** exceed classify_adapt_proto_thresh. The list is ordered by increasing
** proto id number.
** Return:
** Number of good protos in ProtoArray.
** Exceptions: none
** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
*/
ScratchEvidence *tables = new ScratchEvidence(); ScratchEvidence *tables = new ScratchEvidence();
int NumGoodProtos = 0; int NumGoodProtos = 0;
@ -622,7 +607,21 @@ int IntegerMatcher::FindGoodProtos(
} }
/*---------------------------------------------------------------------------*/ /**
* FindBadFeatures finds all features with maximum feature-evidence <
* AdaptFeatureThresh. The list is ordered by increasing feature number.
* @param ClassTemplate Prototypes & tables for a class
* @param ProtoMask AND Mask for proto word
* @param ConfigMask AND Mask for config word
* @param BlobLength Length of unormalized blob
* @param NumFeatures Number of features in blob
* @param Features Array of features
* @param FeatureArray Array of bad features
* @param AdaptFeatureThreshold Threshold for bad features
* @param Debug Debugger flag: 1=debugger on
* @return Number of bad features in FeatureArray.
* @note History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
*/
int IntegerMatcher::FindBadFeatures( int IntegerMatcher::FindBadFeatures(
INT_CLASS ClassTemplate, INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask, BIT_VECTOR ProtoMask,
@ -633,24 +632,6 @@ int IntegerMatcher::FindBadFeatures(
FEATURE_ID *FeatureArray, FEATURE_ID *FeatureArray,
int AdaptFeatureThreshold, int AdaptFeatureThreshold,
int Debug) { int Debug) {
/*
** Parameters:
** ClassTemplate Prototypes & tables for a class
** ProtoMask AND Mask for proto word
** ConfigMask AND Mask for config word
** BlobLength Length of unormalized blob
** NumFeatures Number of features in blob
** Features Array of features
** FeatureArray Array of bad features
** AdaptFeatureThreshold Threshold for bad features
** Debug Debugger flag: 1=debugger on
** Operation:
** FindBadFeatures finds all features with maximum feature-evidence <
** AdaptFeatureThresh. The list is ordered by increasing feature number.
** Return:
** Number of bad features in FeatureArray.
** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
*/
ScratchEvidence *tables = new ScratchEvidence(); ScratchEvidence *tables = new ScratchEvidence();
int NumBadFeatures = 0; int NumBadFeatures = 0;
@ -693,7 +674,6 @@ int IntegerMatcher::FindBadFeatures(
} }
/*---------------------------------------------------------------------------*/
void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) { void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
classify_debug_level_ = classify_debug_level; classify_debug_level_ = classify_debug_level;
@ -722,9 +702,9 @@ void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
} }
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Private Code Private Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
void ScratchEvidence::Clear(const INT_CLASS class_template) { void ScratchEvidence::Clear(const INT_CLASS class_template) {
memset(sum_feature_evidence_, 0, memset(sum_feature_evidence_, 0,
class_template->NumConfigs * sizeof(sum_feature_evidence_[0])); class_template->NumConfigs * sizeof(sum_feature_evidence_[0]));
@ -739,21 +719,17 @@ void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) {
/*---------------------------------------------------------------------------*/ /**
* Print debugging information for Configuations
* @return none
* @note Exceptions: none
* @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
*/
void IMDebugConfiguration(int FeatureNum, void IMDebugConfiguration(int FeatureNum,
uinT16 ActualProtoNum, uinT16 ActualProtoNum,
uinT8 Evidence, uinT8 Evidence,
BIT_VECTOR ConfigMask, BIT_VECTOR ConfigMask,
uinT32 ConfigWord) { uinT32 ConfigWord) {
/*
** Parameters:
** Globals:
** Operation:
** Print debugging information for Configuations
** Return:
** Exceptions: none
** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
*/
cprintf ("F = %3d, P = %3d, E = %3d, Configs = ", cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
FeatureNum, (int) ActualProtoNum, (int) Evidence); FeatureNum, (int) ActualProtoNum, (int) Evidence);
while (ConfigWord) { while (ConfigWord) {
@ -767,19 +743,15 @@ void IMDebugConfiguration(int FeatureNum,
} }
/*---------------------------------------------------------------------------*/ /**
* Print debugging information for Configuations
* @return none
* @note Exceptions: none
* @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
*/
void IMDebugConfigurationSum(int FeatureNum, void IMDebugConfigurationSum(int FeatureNum,
uinT8 *FeatureEvidence, uinT8 *FeatureEvidence,
inT32 ConfigCount) { inT32 ConfigCount) {
/*
** Parameters:
** Globals:
** Operation:
** Print debugging information for Configuations
** Return:
** Exceptions: none
** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
*/
cprintf("F=%3d, C=", FeatureNum); cprintf("F=%3d, C=", FeatureNum);
for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) { for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
cprintf("%4d", FeatureEvidence[ConfigNum]); cprintf("%4d", FeatureEvidence[ConfigNum]);
@ -787,9 +759,17 @@ void IMDebugConfigurationSum(int FeatureNum,
cprintf("\n"); cprintf("\n");
} }
/**
* For the given feature: prune protos, compute evidence,
/*---------------------------------------------------------------------------*/ * update Feature Evidence, Proto Evidence, and Sum of Feature
* Evidence tables.
* @param ClassTemplate Prototypes & tables for a class
* @param FeatureNum Current feature number (for DEBUG only)
* @param Feature Pointer to a feature struct
* @param tables Evidence tables
* @param Debug Debugger flag: 1=debugger on
* @return none
*/
int IntegerMatcher::UpdateTablesForFeature( int IntegerMatcher::UpdateTablesForFeature(
INT_CLASS ClassTemplate, INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask, BIT_VECTOR ProtoMask,
@ -798,19 +778,6 @@ int IntegerMatcher::UpdateTablesForFeature(
const INT_FEATURE_STRUCT* Feature, const INT_FEATURE_STRUCT* Feature,
ScratchEvidence *tables, ScratchEvidence *tables,
int Debug) { int Debug) {
/*
** Parameters:
** ClassTemplate Prototypes & tables for a class
** FeatureNum Current feature number (for DEBUG only)
** Feature Pointer to a feature struct
** tables Evidence tables
** Debug Debugger flag: 1=debugger on
** Operation:
** For the given feature: prune protos, compute evidence,
** update Feature Evidence, Proto Evidence, and Sum of Feature
** Evidence tables.
** Return:
*/
register uinT32 ConfigWord; register uinT32 ConfigWord;
register uinT32 ProtoWord; register uinT32 ProtoWord;
register uinT32 ProtoNum; register uinT32 ProtoNum;
@ -950,7 +917,12 @@ int IntegerMatcher::UpdateTablesForFeature(
} }
/*---------------------------------------------------------------------------*/ /**
* Print debugging information for Configuations
* @return none
* @note Exceptions: none
* @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
*/
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
void IntegerMatcher::DebugFeatureProtoError( void IntegerMatcher::DebugFeatureProtoError(
INT_CLASS ClassTemplate, INT_CLASS ClassTemplate,
@ -959,15 +931,6 @@ void IntegerMatcher::DebugFeatureProtoError(
const ScratchEvidence& tables, const ScratchEvidence& tables,
inT16 NumFeatures, inT16 NumFeatures,
int Debug) { int Debug) {
/*
** Parameters:
** Globals:
** Operation:
** Print debugging information for Configuations
** Return:
** Exceptions: none
** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
*/
FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS]; FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS];
int ConfigNum; int ConfigNum;
uinT32 ConfigWord; uinT32 ConfigWord;
@ -1076,8 +1039,6 @@ void IntegerMatcher::DebugFeatureProtoError(
} }
/*---------------------------------------------------------------------------*/
void IntegerMatcher::DisplayProtoDebugInfo( void IntegerMatcher::DisplayProtoDebugInfo(
INT_CLASS ClassTemplate, INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask, BIT_VECTOR ProtoMask,
@ -1119,7 +1080,6 @@ void IntegerMatcher::DisplayProtoDebugInfo(
} }
/*---------------------------------------------------------------------------*/
void IntegerMatcher::DisplayFeatureDebugInfo( void IntegerMatcher::DisplayFeatureDebugInfo(
INT_CLASS ClassTemplate, INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask, BIT_VECTOR ProtoMask,
@ -1165,8 +1125,9 @@ void IntegerMatcher::DisplayFeatureDebugInfo(
} }
#endif #endif
/*---------------------------------------------------------------------------*/ /**
// Add sum of Proto Evidences into Sum Of Feature Evidence Array * Add sum of Proto Evidences into Sum Of Feature Evidence Array
*/
void ScratchEvidence::UpdateSumOfProtoEvidences( void ScratchEvidence::UpdateSumOfProtoEvidences(
INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) { INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) {
@ -1206,9 +1167,10 @@ void ScratchEvidence::UpdateSumOfProtoEvidences(
/*---------------------------------------------------------------------------*/ /**
// Normalize Sum of Proto and Feature Evidence by dividing by the sum of * Normalize Sum of Proto and Feature Evidence by dividing by the sum of
// the Feature Lengths and the Proto Lengths for each configuration. * the Feature Lengths and the Proto Lengths for each configuration.
*/
void ScratchEvidence::NormalizeSums( void ScratchEvidence::NormalizeSums(
INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) { INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) {
@ -1219,22 +1181,17 @@ void ScratchEvidence::NormalizeSums(
} }
/*---------------------------------------------------------------------------*/ /**
* Find the best match for the current class and update the Result
* with the configuration and match rating.
* @return The best normalized sum of evidences
* @note Exceptions: none
* @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
*/
int IntegerMatcher::FindBestMatch( int IntegerMatcher::FindBestMatch(
INT_CLASS class_template, INT_CLASS class_template,
const ScratchEvidence &tables, const ScratchEvidence &tables,
UnicharRating* result) { UnicharRating* result) {
/*
** Parameters:
** Globals:
** Operation:
** Find the best match for the current class and update the Result
** with the configuration and match rating.
** Return:
** The best normalized sum of evidences
** Exceptions: none
** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
*/
int best_match = 0; int best_match = 0;
result->config = 0; result->config = 0;
result->fonts.truncate(0); result->fonts.truncate(0);
@ -1258,8 +1215,10 @@ int IntegerMatcher::FindBestMatch(
return best_match; return best_match;
} }
// Applies the CN normalization factor to the given rating and returns /**
// the modified rating. * Applies the CN normalization factor to the given rating and returns
* the modified rating.
*/
float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length, float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
int normalization_factor, int normalization_factor,
int matcher_multiplier) { int matcher_multiplier) {
@ -1268,23 +1227,19 @@ float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
(blob_length + matcher_multiplier); (blob_length + matcher_multiplier);
} }
/*---------------------------------------------------------------------------*/ /**
* Sort Key array in ascending order using heap sort
* algorithm. Also sort Index array that is tied to
* the key array.
* @param n Number of elements to sort
* @param ra Key array [1..n]
* @param rb Index array [1..n]
* @return none
* @note Exceptions: none
* @note History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
*/
void void
HeapSort (int n, register int ra[], register int rb[]) { HeapSort (int n, register int ra[], register int rb[]) {
/*
** Parameters:
** n Number of elements to sort
** ra Key array [1..n]
** rb Index array [1..n]
** Globals:
** Operation:
** Sort Key array in ascending order using heap sort
** algorithm. Also sort Index array that is tied to
** the key array.
** Return:
** Exceptions: none
** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
*/
register int i, rra, rrb; register int i, rra, rrb;
int l, j, ir; int l, j, ir;

View File

@ -207,15 +207,15 @@ double_VAR(classify_pp_side_pad, 2.5, "Proto Pruner Side Pad");
/*----------------------------------------------------------------------------- /*-----------------------------------------------------------------------------
Public Code Public Code
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
// Builds a feature from an FCOORD for position with all the necessary /// Builds a feature from an FCOORD for position with all the necessary
// clipping and rounding. /// clipping and rounding.
INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(const FCOORD& pos, uinT8 theta) INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(const FCOORD& pos, uinT8 theta)
: X(ClipToRange<inT16>(static_cast<inT16>(pos.x() + 0.5), 0, 255)), : X(ClipToRange<inT16>(static_cast<inT16>(pos.x() + 0.5), 0, 255)),
Y(ClipToRange<inT16>(static_cast<inT16>(pos.y() + 0.5), 0, 255)), Y(ClipToRange<inT16>(static_cast<inT16>(pos.y() + 0.5), 0, 255)),
Theta(theta), Theta(theta),
CP_misses(0) { CP_misses(0) {
} }
// Builds a feature from ints with all the necessary clipping and casting. /** Builds a feature from ints with all the necessary clipping and casting. */
INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(int x, int y, int theta) INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(int x, int y, int theta)
: X(static_cast<uinT8>(ClipToRange(x, 0, MAX_UINT8))), : X(static_cast<uinT8>(ClipToRange(x, 0, MAX_UINT8))),
Y(static_cast<uinT8>(ClipToRange(y, 0, MAX_UINT8))), Y(static_cast<uinT8>(ClipToRange(y, 0, MAX_UINT8))),
@ -223,7 +223,6 @@ INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(int x, int y, int theta)
CP_misses(0) { CP_misses(0) {
} }
/*---------------------------------------------------------------------------*/
/** /**
* This routine adds a new class structure to a set of * This routine adds a new class structure to a set of
* templates. Classes have to be added to Templates in * templates. Classes have to be added to Templates in
@ -258,7 +257,6 @@ void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class) {
} /* AddIntClass */ } /* AddIntClass */
/*---------------------------------------------------------------------------*/
/** /**
* This routine returns the index of the next free config * This routine returns the index of the next free config
* in Class. * in Class.
@ -282,7 +280,6 @@ int AddIntConfig(INT_CLASS Class) {
} /* AddIntConfig */ } /* AddIntConfig */
/*---------------------------------------------------------------------------*/
/** /**
* This routine allocates the next free proto in Class and * This routine allocates the next free proto in Class and
* returns its index. * returns its index.
@ -330,25 +327,24 @@ int AddIntProto(INT_CLASS Class) {
return (Index); return (Index);
} /* AddIntProto */ }
/*---------------------------------------------------------------------------*/ /**
* This routine adds Proto to the class pruning tables
* for the specified class in Templates.
*
* Globals:
* - classify_num_cp_levels number of levels used in the class pruner
* @param Proto floating-pt proto to add to class pruner
* @param ClassId class id corresponding to Proto
* @param Templates set of templates containing class pruner
* @return none
* @note Exceptions: none
* @note History: Wed Feb 13 08:49:54 1991, DSJ, Created.
*/
void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId, void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId,
INT_TEMPLATES Templates) INT_TEMPLATES Templates)
/*
** Parameters:
** Proto floating-pt proto to add to class pruner
** ClassId class id corresponding to Proto
** Templates set of templates containing class pruner
** Globals:
** classify_num_cp_levels number of levels used in the class pruner
** Operation: This routine adds Proto to the class pruning tables
** for the specified class in Templates.
** Return: none
** Exceptions: none
** History: Wed Feb 13 08:49:54 1991, DSJ, Created.
*/
#define MAX_LEVEL 2 #define MAX_LEVEL 2
{ {
CLASS_PRUNER_STRUCT* Pruner; CLASS_PRUNER_STRUCT* Pruner;
@ -377,22 +373,21 @@ void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId,
} /* AddProtoToClassPruner */ } /* AddProtoToClassPruner */
/*---------------------------------------------------------------------------*/ /**
* This routine updates the proto pruner lookup tables
* for Class to include a new proto identified by ProtoId
* and described by Proto.
* @param Proto floating-pt proto to be added to proto pruner
* @param ProtoId id of proto
* @param Class integer class that contains desired proto pruner
* @param debug debug flag
* @note Globals: none
* @return none
* @note Exceptions: none
* @note History: Fri Feb 8 13:07:19 1991, DSJ, Created.
*/
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, void AddProtoToProtoPruner(PROTO Proto, int ProtoId,
INT_CLASS Class, bool debug) { INT_CLASS Class, bool debug) {
/*
** Parameters:
** Proto floating-pt proto to be added to proto pruner
** ProtoId id of proto
** Class integer class that contains desired proto pruner
** Globals: none
** Operation: This routine updates the proto pruner lookup tables
** for Class to include a new proto identified by ProtoId
** and described by Proto.
** Return: none
** Exceptions: none
** History: Fri Feb 8 13:07:19 1991, DSJ, Created.
*/
FLOAT32 Angle, X, Y, Length; FLOAT32 Angle, X, Y, Length;
FLOAT32 Pad; FLOAT32 Pad;
int Index; int Index;
@ -438,10 +433,11 @@ void AddProtoToProtoPruner(PROTO Proto, int ProtoId,
} /* AddProtoToProtoPruner */ } /* AddProtoToProtoPruner */
/*---------------------------------------------------------------------------*/ /**
// Returns a quantized bucket for the given param shifted by offset, * Returns a quantized bucket for the given param shifted by offset,
// notionally (param + offset) * num_buckets, but clipped and casted to the * notionally (param + offset) * num_buckets, but clipped and casted to the
// appropriate type. * appropriate type.
*/
uinT8 Bucket8For(FLOAT32 param, FLOAT32 offset, int num_buckets) { uinT8 Bucket8For(FLOAT32 param, FLOAT32 offset, int num_buckets) {
int bucket = IntCastRounded(MapParam(param, offset, num_buckets)); int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
return static_cast<uinT8>(ClipToRange(bucket, 0, num_buckets - 1)); return static_cast<uinT8>(ClipToRange(bucket, 0, num_buckets - 1));
@ -451,52 +447,50 @@ uinT16 Bucket16For(FLOAT32 param, FLOAT32 offset, int num_buckets) {
return static_cast<uinT16>(ClipToRange(bucket, 0, num_buckets - 1)); return static_cast<uinT16>(ClipToRange(bucket, 0, num_buckets - 1));
} }
/*---------------------------------------------------------------------------*/ /**
// Returns a quantized bucket for the given circular param shifted by offset, * Returns a quantized bucket for the given circular param shifted by offset,
// notionally (param + offset) * num_buckets, but modded and casted to the * notionally (param + offset) * num_buckets, but modded and casted to the
// appropriate type. * appropriate type.
*/
uinT8 CircBucketFor(FLOAT32 param, FLOAT32 offset, int num_buckets) { uinT8 CircBucketFor(FLOAT32 param, FLOAT32 offset, int num_buckets) {
int bucket = IntCastRounded(MapParam(param, offset, num_buckets)); int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
return static_cast<uinT8>(Modulo(bucket, num_buckets)); return static_cast<uinT8>(Modulo(bucket, num_buckets));
} /* CircBucketFor */ } /* CircBucketFor */
/*---------------------------------------------------------------------------*/
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
void UpdateMatchDisplay() { /**
/* * This routine clears the global feature and proto
** Parameters: none * display lists.
** Globals: *
** FeatureShapes display list for features * Globals:
** ProtoShapes display list for protos * - FeatureShapes display list for features
** Operation: This routine clears the global feature and proto * - ProtoShapes display list for protos
** display lists. * @return none
** Return: none * @note Exceptions: none
** Exceptions: none * @note History: Thu Mar 21 15:40:19 1991, DSJ, Created.
** History: Thu Mar 21 15:40:19 1991, DSJ, Created.
*/ */
void UpdateMatchDisplay() {
if (IntMatchWindow != NULL) if (IntMatchWindow != NULL)
IntMatchWindow->Update(); IntMatchWindow->Update();
} /* ClearMatchDisplay */ } /* ClearMatchDisplay */
#endif #endif
/*---------------------------------------------------------------------------*/ /**
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) { * This operation updates the config vectors of all protos
/* * in Class to indicate that the protos with 1's in Config
** Parameters: * belong to a new configuration identified by ConfigId.
** Config config to be added to class * It is assumed that the length of the Config bit vector is
** ConfigId id to be used for new config * equal to the number of protos in Class.
** Class class to add new config to * @param Config config to be added to class
** Globals: none * @param ConfigId id to be used for new config
** Operation: This operation updates the config vectors of all protos * @param Class class to add new config to
** in Class to indicate that the protos with 1's in Config * @return none
** belong to a new configuration identified by ConfigId. * @note Globals: none
** It is assumed that the length of the Config bit vector is * @note Exceptions: none
** equal to the number of protos in Class. * @note History: Mon Feb 11 14:57:31 1991, DSJ, Created.
** Return: none
** Exceptions: none
** History: Mon Feb 11 14:57:31 1991, DSJ, Created.
*/ */
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) {
int ProtoId; int ProtoId;
INT_PROTO Proto; INT_PROTO Proto;
int TotalLength; int TotalLength;
@ -514,20 +508,18 @@ void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) {
namespace tesseract { namespace tesseract {
/*---------------------------------------------------------------------------*/ /**
void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) { * This routine converts Proto to integer format and
/* * installs it as ProtoId in Class.
** Parameters: * @param Proto floating-pt proto to be converted to integer format
** Proto floating-pt proto to be converted to integer format * @param ProtoId id of proto
** ProtoId id of proto * @param Class integer class to add converted proto to
** Class integer class to add converted proto to * @return none
** Globals: none * @note Globals: none
** Operation: This routine converts Proto to integer format and * @note Exceptions: none
** installs it as ProtoId in Class. * @note History: Fri Feb 8 11:22:43 1991, DSJ, Created.
** Return: none
** Exceptions: none
** History: Fri Feb 8 11:22:43 1991, DSJ, Created.
*/ */
void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) {
INT_PROTO P; INT_PROTO P;
FLOAT32 Param; FLOAT32 Param;
@ -559,20 +551,19 @@ void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) {
} /* ConvertProto */ } /* ConvertProto */
/*---------------------------------------------------------------------------*/ /**
* This routine converts from the old floating point format
* to the new integer format.
* @param FloatProtos prototypes in old floating pt format
* @param target_unicharset the UNICHARSET to use
* @return New set of training templates in integer format.
* @note Globals: none
* @note Exceptions: none
* @note History: Thu Feb 7 14:40:42 1991, DSJ, Created.
*/
INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos, INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos,
const UNICHARSET& const UNICHARSET&
target_unicharset) { target_unicharset) {
/*
** Parameters:
** FloatProtos prototypes in old floating pt format
** Globals: none
** Operation: This routine converts from the old floating point format
** to the new integer format.
** Return: New set of training templates in integer format.
** Exceptions: none
** History: Thu Feb 7 14:40:42 1991, DSJ, Created.
*/
INT_TEMPLATES IntTemplates; INT_TEMPLATES IntTemplates;
CLASS_TYPE FClass; CLASS_TYPE FClass;
INT_CLASS IClass; INT_CLASS IClass;
@ -623,21 +614,20 @@ INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos,
} // namespace tesseract } // namespace tesseract
/*---------------------------------------------------------------------------*/
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) { /**
/* * This routine renders the specified feature into a
** Parameters: * global display list.
** Feature pico-feature to be displayed *
** Evidence best evidence for this feature (0-1) * Globals:
** Globals: * - FeatureShapes global display list for features
** FeatureShapes global display list for features * @param Feature pico-feature to be displayed
** Operation: This routine renders the specified feature into a * @param Evidence best evidence for this feature (0-1)
** global display list. * @return none
** Return: none * @note Exceptions: none
** Exceptions: none * @note History: Thu Mar 21 14:45:04 1991, DSJ, Created.
** History: Thu Mar 21 14:45:04 1991, DSJ, Created.
*/ */
void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) {
ScrollView::Color color = GetMatchColorFor(Evidence); ScrollView::Color color = GetMatchColorFor(Evidence);
RenderIntFeature(IntMatchWindow, Feature, color); RenderIntFeature(IntMatchWindow, Feature, color);
if (FeatureDisplayWindow) { if (FeatureDisplayWindow) {
@ -646,21 +636,20 @@ void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) {
} /* DisplayIntFeature */ } /* DisplayIntFeature */
/*---------------------------------------------------------------------------*/ /**
void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence) { * This routine renders the specified proto into a
/* * global display list.
** Parameters: *
** Class class to take proto from * Globals:
** ProtoId id of proto in Class to be displayed * - ProtoShapes global display list for protos
** Evidence total evidence for proto (0-1) * @param Class class to take proto from
** Globals: * @param ProtoId id of proto in Class to be displayed
** ProtoShapes global display list for protos * @param Evidence total evidence for proto (0-1)
** Operation: This routine renders the specified proto into a * @return none
** global display list. * @note Exceptions: none
** Return: none * @note History: Thu Mar 21 14:45:04 1991, DSJ, Created.
** Exceptions: none
** History: Thu Mar 21 14:45:04 1991, DSJ, Created.
*/ */
void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence) {
ScrollView::Color color = GetMatchColorFor(Evidence); ScrollView::Color color = GetMatchColorFor(Evidence);
RenderIntProto(IntMatchWindow, Class, ProtoId, color); RenderIntProto(IntMatchWindow, Class, ProtoId, color);
if (ProtoDisplayWindow) { if (ProtoDisplayWindow) {
@ -669,20 +658,18 @@ void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence) {
} /* DisplayIntProto */ } /* DisplayIntProto */
#endif #endif
/*---------------------------------------------------------------------------*/ /**
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) { * This routine creates a new integer class data structure
/* * and returns it. Sufficient space is allocated
** Parameters: * to handle the specified number of protos and configs.
** MaxNumProtos number of protos to allocate space for * @param MaxNumProtos number of protos to allocate space for
** MaxNumConfigs number of configs to allocate space for * @param MaxNumConfigs number of configs to allocate space for
** Globals: none * @return New class created.
** Operation: This routine creates a new integer class data structure * @note Globals: none
** and returns it. Sufficient space is allocated * @note Exceptions: none
** to handle the specified number of protos and configs. * @note History: Fri Feb 8 10:51:23 1991, DSJ, Created.
** Return: New class created.
** Exceptions: none
** History: Fri Feb 8 10:51:23 1991, DSJ, Created.
*/ */
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) {
INT_CLASS Class; INT_CLASS Class;
PROTO_SET ProtoSet; PROTO_SET ProtoSet;
int i; int i;
@ -721,7 +708,6 @@ INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) {
} /* NewIntClass */ } /* NewIntClass */
/*-------------------------------------------------------------------------*/
void free_int_class(INT_CLASS int_class) { void free_int_class(INT_CLASS int_class) {
int i; int i;
@ -735,17 +721,15 @@ void free_int_class(INT_CLASS int_class) {
} }
/*---------------------------------------------------------------------------*/ /**
INT_TEMPLATES NewIntTemplates() { * This routine allocates a new set of integer templates
/* * initialized to hold 0 classes.
** Parameters: none * @return The integer templates created.
** Globals: none * @note Globals: none
** Operation: This routine allocates a new set of integer templates * @note Exceptions: none
** initialized to hold 0 classes. * @note History: Fri Feb 8 08:38:51 1991, DSJ, Created.
** Return: The integer templates created.
** Exceptions: none
** History: Fri Feb 8 08:38:51 1991, DSJ, Created.
*/ */
INT_TEMPLATES NewIntTemplates() {
INT_TEMPLATES T; INT_TEMPLATES T;
int i; int i;
@ -773,18 +757,17 @@ void free_int_templates(INT_TEMPLATES templates) {
namespace tesseract { namespace tesseract {
INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) { /**
/* * This routine reads a set of integer templates from
** Parameters: * File. File must already be open and must be in the
** File open file to read templates from * correct binary format.
** Globals: none * @param File open file to read templates from
** Operation: This routine reads a set of integer templates from * @return Pointer to integer templates read from File.
** File. File must already be open and must be in the * @note Globals: none
** correct binary format. * @note Exceptions: none
** Return: Pointer to integer templates read from File. * @note History: Wed Feb 27 11:48:46 1991, DSJ, Created.
** Exceptions: none
** History: Wed Feb 27 11:48:46 1991, DSJ, Created.
*/ */
INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
int i, j, w, x, y, z; int i, j, w, x, y, z;
BOOL8 swap; BOOL8 swap;
int nread; int nread;
@ -1081,20 +1064,19 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
} /* ReadIntTemplates */ } /* ReadIntTemplates */
/*---------------------------------------------------------------------------*/
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
void Classify::ShowMatchDisplay() { /**
/* * This routine sends the shapes in the global display
** Parameters: none * lists to the match debugger window.
** Globals: *
** FeatureShapes display list containing feature matches * Globals:
** ProtoShapes display list containing proto matches * - FeatureShapes display list containing feature matches
** Operation: This routine sends the shapes in the global display * - ProtoShapes display list containing proto matches
** lists to the match debugger window. * @return none
** Return: none * @note Exceptions: none
** Exceptions: none * @note History: Thu Mar 21 15:47:33 1991, DSJ, Created.
** History: Thu Mar 21 15:47:33 1991, DSJ, Created.
*/ */
void Classify::ShowMatchDisplay() {
InitIntMatchWindowIfReqd(); InitIntMatchWindowIfReqd();
if (ProtoDisplayWindow) { if (ProtoDisplayWindow) {
ProtoDisplayWindow->Clear(); ProtoDisplayWindow->Clear();
@ -1117,8 +1099,8 @@ void Classify::ShowMatchDisplay() {
} }
} /* ShowMatchDisplay */ } /* ShowMatchDisplay */
// Clears the given window and draws the featurespace guides for the /// Clears the given window and draws the featurespace guides for the
// appropriate normalization method. /// appropriate normalization method.
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window) { void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window) {
window->Clear(); window->Clear();
@ -1141,21 +1123,20 @@ void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window) {
} }
#endif #endif
/*---------------------------------------------------------------------------*/ /**
* This routine writes Templates to File. The format
* is an efficient binary format. File must already be open
* for writing.
* @param File open file to write templates to
* @param Templates templates to save into File
* @param target_unicharset the UNICHARSET to use
* @return none
* @note Globals: none
* @note Exceptions: none
* @note History: Wed Feb 27 11:48:46 1991, DSJ, Created.
*/
void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
const UNICHARSET& target_unicharset) { const UNICHARSET& target_unicharset) {
/*
** Parameters:
** File open file to write templates to
** Templates templates to save into File
** Globals: none
** Operation: This routine writes Templates to File. The format
** is an efficient binary format. File must already be open
** for writing.
** Return: none
** Exceptions: none
** History: Wed Feb 27 11:48:46 1991, DSJ, Created.
*/
int i, j; int i, j;
INT_CLASS Class; INT_CLASS Class;
int unicharset_size = target_unicharset.size(); int unicharset_size = target_unicharset.size();
@ -1219,68 +1200,62 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
/*----------------------------------------------------------------------------- /*-----------------------------------------------------------------------------
Private Code Private Code
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /**
FLOAT32 BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets) { * This routine returns the parameter value which
/* * corresponds to the beginning of the specified bucket.
** Parameters: * The bucket number should have been generated using the
** Bucket bucket whose start is to be computed * BucketFor() function with parameters Offset and NumBuckets.
** Offset offset used to map params to buckets * @param Bucket bucket whose start is to be computed
** NumBuckets total number of buckets * @param Offset offset used to map params to buckets
** Globals: none * @param NumBuckets total number of buckets
** Operation: This routine returns the parameter value which * @return Param value corresponding to start position of Bucket.
** corresponds to the beginning of the specified bucket. * @note Globals: none
** The bucket number should have been generated using the * @note Exceptions: none
** BucketFor() function with parameters Offset and NumBuckets. * @note History: Thu Feb 14 13:24:33 1991, DSJ, Created.
** Return: Param value corresponding to start position of Bucket.
** Exceptions: none
** History: Thu Feb 14 13:24:33 1991, DSJ, Created.
*/ */
FLOAT32 BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets) {
return (((FLOAT32) Bucket / NumBuckets) - Offset); return (((FLOAT32) Bucket / NumBuckets) - Offset);
} /* BucketStart */ } /* BucketStart */
/*---------------------------------------------------------------------------*/ /**
FLOAT32 BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets) { * This routine returns the parameter value which
/* * corresponds to the end of the specified bucket.
** Parameters: * The bucket number should have been generated using the
** Bucket bucket whose end is to be computed * BucketFor() function with parameters Offset and NumBuckets.
** Offset offset used to map params to buckets * @param Bucket bucket whose end is to be computed
** NumBuckets total number of buckets * @param Offset offset used to map params to buckets
** Globals: none * @param NumBuckets total number of buckets
** Operation: This routine returns the parameter value which * @return Param value corresponding to end position of Bucket.
** corresponds to the end of the specified bucket. * @note Globals: none
** The bucket number should have been generated using the * @note Exceptions: none
** BucketFor() function with parameters Offset and NumBuckets. * @note History: Thu Feb 14 13:24:33 1991, DSJ, Created.
** Return: Param value corresponding to end position of Bucket.
** Exceptions: none
** History: Thu Feb 14 13:24:33 1991, DSJ, Created.
*/ */
FLOAT32 BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets) {
return (((FLOAT32) (Bucket + 1) / NumBuckets) - Offset); return (((FLOAT32) (Bucket + 1) / NumBuckets) - Offset);
} /* BucketEnd */ } /* BucketEnd */
/*---------------------------------------------------------------------------*/ /**
* This routine fills in the section of a class pruner
* corresponding to a single x value for a single proto of
* a class.
* @param FillSpec specifies which bits to fill in pruner
* @param Pruner class pruner to be filled
* @param ClassMask indicates which bits to change in each word
* @param ClassCount indicates what to change bits to
* @param WordIndex indicates which word to change
* @return none
* @note Globals: none
* @note Exceptions: none
* @note History: Tue Feb 19 11:11:29 1991, DSJ, Created.
*/
void DoFill(FILL_SPEC *FillSpec, void DoFill(FILL_SPEC *FillSpec,
CLASS_PRUNER_STRUCT* Pruner, CLASS_PRUNER_STRUCT* Pruner,
register uinT32 ClassMask, register uinT32 ClassMask,
register uinT32 ClassCount, register uinT32 ClassCount,
register uinT32 WordIndex) { register uinT32 WordIndex) {
/*
** Parameters:
** FillSpec specifies which bits to fill in pruner
** Pruner class pruner to be filled
** ClassMask indicates which bits to change in each word
** ClassCount indicates what to change bits to
** WordIndex indicates which word to change
** Globals: none
** Operation: This routine fills in the section of a class pruner
** corresponding to a single x value for a single proto of
** a class.
** Return: none
** Exceptions: none
** History: Tue Feb 19 11:11:29 1991, DSJ, Created.
*/
register int X, Y, Angle; register int X, Y, Angle;
register uinT32 OldWord; register uinT32 OldWord;
@ -1310,18 +1285,16 @@ void DoFill(FILL_SPEC *FillSpec,
} /* DoFill */ } /* DoFill */
/*---------------------------------------------------------------------------*/ /**
BOOL8 FillerDone(TABLE_FILLER *Filler) { * Return TRUE if the specified table filler is done, i.e.
/* * if it has no more lines to fill.
** Parameters: * @param Filler table filler to check if done
** Filler table filler to check if done * @return TRUE if no more lines to fill, FALSE otherwise.
** Globals: none * @note Globals: none
** Operation: Return TRUE if the specified table filler is done, i.e. * @note Exceptions: none
** if it has no more lines to fill. * @note History: Tue Feb 19 10:08:05 1991, DSJ, Created.
** Return: TRUE if no more lines to fill, FALSE otherwise.
** Exceptions: none
** History: Tue Feb 19 10:08:05 1991, DSJ, Created.
*/ */
BOOL8 FillerDone(TABLE_FILLER *Filler) {
FILL_SWITCH *Next; FILL_SWITCH *Next;
Next = &(Filler->Switch[Filler->NextSwitch]); Next = &(Filler->Switch[Filler->NextSwitch]);
@ -1334,26 +1307,25 @@ BOOL8 FillerDone(TABLE_FILLER *Filler) {
} /* FillerDone */ } /* FillerDone */
/*---------------------------------------------------------------------------*/ /**
* This routine sets Bit in each bit vector whose
* bucket lies within the range Center +- Spread. The fill
* is done for a circular dimension, i.e. bucket 0 is adjacent
* to the last bucket. It is assumed that Center and Spread
* are expressed in a circular coordinate system whose range
* is 0 to 1.
* @param ParamTable table of bit vectors, one per param bucket
* @param Bit bit position in vectors to be filled
* @param Center center of filled area
* @param Spread spread of filled area
* @param debug debug flag
* @return none
* @note Globals: none
* @note Exceptions: none
* @note History: Tue Oct 16 09:26:54 1990, DSJ, Created.
*/
void FillPPCircularBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], void FillPPCircularBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug) { int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug) {
/*
** Parameters:
** ParamTable table of bit vectors, one per param bucket
** Bit bit position in vectors to be filled
** Center center of filled area
** Spread spread of filled area
** Globals: none
** Operation: This routine sets Bit in each bit vector whose
** bucket lies within the range Center +- Spread. The fill
** is done for a circular dimension, i.e. bucket 0 is adjacent
** to the last bucket. It is assumed that Center and Spread
** are expressed in a circular coordinate system whose range
** is 0 to 1.
** Return: none
** Exceptions: none
** History: Tue Oct 16 09:26:54 1990, DSJ, Created.
*/
int i, FirstBucket, LastBucket; int i, FirstBucket, LastBucket;
if (Spread > 0.5) if (Spread > 0.5)
@ -1378,27 +1350,26 @@ void FillPPCircularBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
} /* FillPPCircularBits */ } /* FillPPCircularBits */
/*---------------------------------------------------------------------------*/ /**
* This routine sets Bit in each bit vector whose
* bucket lies within the range Center +- Spread. The fill
* is done for a linear dimension, i.e. there is no wrap-around
* for this dimension. It is assumed that Center and Spread
* are expressed in a linear coordinate system whose range
* is approximately 0 to 1. Values outside this range will
* be clipped.
* @param ParamTable table of bit vectors, one per param bucket
* @param Bit bit number being filled
* @param Center center of filled area
* @param Spread spread of filled area
* @param debug debug flag
* @return none
* @note Globals: none
* @note Exceptions: none
* @note History: Tue Oct 16 09:26:54 1990, DSJ, Created.
*/
void FillPPLinearBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], void FillPPLinearBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug) { int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug) {
/*
** Parameters:
** ParamTable table of bit vectors, one per param bucket
** Bit bit number being filled
** Center center of filled area
** Spread spread of filled area
** Globals: none
** Operation: This routine sets Bit in each bit vector whose
** bucket lies within the range Center +- Spread. The fill
** is done for a linear dimension, i.e. there is no wrap-around
** for this dimension. It is assumed that Center and Spread
** are expressed in a linear coordinate system whose range
** is approximately 0 to 1. Values outside this range will
** be clipped.
** Return: none
** Exceptions: none
** History: Tue Oct 16 09:26:54 1990, DSJ, Created.
*/
int i, FirstBucket, LastBucket; int i, FirstBucket, LastBucket;
FirstBucket = (int) floor ((Center - Spread) * NUM_PP_BUCKETS); FirstBucket = (int) floor ((Center - Spread) * NUM_PP_BUCKETS);
@ -1419,18 +1390,20 @@ void FillPPLinearBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
namespace tesseract { namespace tesseract {
/**
* This routine prompts the user with Prompt and waits
* for the user to enter something in the debug window.
* @param Prompt prompt to print while waiting for input from window
* @param adaptive_on
* @param pretrained_on
* @param shape_id
* @return Character entered in the debug window.
* @note Globals: none
* @note Exceptions: none
* @note History: Thu Mar 21 16:55:13 1991, DSJ, Created.
*/
CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool* adaptive_on, CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool* adaptive_on,
bool* pretrained_on, int* shape_id) { bool* pretrained_on, int* shape_id) {
/*
** Parameters:
** Prompt prompt to print while waiting for input from window
** Globals: none
** Operation: This routine prompts the user with Prompt and waits
** for the user to enter something in the debug window.
** Return: Character entered in the debug window.
** Exceptions: none
** History: Thu Mar 21 16:55:13 1991, DSJ, Created.
*/
tprintf("%s\n", Prompt); tprintf("%s\n", Prompt);
SVEvent* ev; SVEvent* ev;
SVEventType ev_type; SVEventType ev_type;
@ -1494,27 +1467,25 @@ CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool* adaptive_on,
} // namespace tesseract } // namespace tesseract
#endif #endif
/*---------------------------------------------------------------------------*/ /**
* This routine copies the appropriate global pad variables
* into EndPad, SidePad, and AnglePad. This is a kludge used
* to get around the fact that global control variables cannot
* be arrays. If the specified level is illegal, the tightest
* possible pads are returned.
* @param Level "tightness" level to return pads for
* @param EndPad place to put end pad for Level
* @param SidePad place to put side pad for Level
* @param AnglePad place to put angle pad for Level
* @return none (results are returned in EndPad, SidePad, and AnglePad.
* @note Globals: none
* @note Exceptions: none
* @note History: Thu Feb 14 08:26:49 1991, DSJ, Created.
*/
void GetCPPadsForLevel(int Level, void GetCPPadsForLevel(int Level,
FLOAT32 *EndPad, FLOAT32 *EndPad,
FLOAT32 *SidePad, FLOAT32 *SidePad,
FLOAT32 *AnglePad) { FLOAT32 *AnglePad) {
/*
** Parameters:
** Level "tightness" level to return pads for
** EndPad place to put end pad for Level
** SidePad place to put side pad for Level
** AnglePad place to put angle pad for Level
** Globals: none
** Operation: This routine copies the appropriate global pad variables
** into EndPad, SidePad, and AnglePad. This is a kludge used
** to get around the fact that global control variables cannot
** be arrays. If the specified level is illegal, the tightest
** possible pads are returned.
** Return: none (results are returned in EndPad, SidePad, and AnglePad.
** Exceptions: none
** History: Thu Feb 14 08:26:49 1991, DSJ, Created.
*/
switch (Level) { switch (Level) {
case 0: case 0:
*EndPad = classify_cp_end_pad_loose * GetPicoFeatureLength (); *EndPad = classify_cp_end_pad_loose * GetPicoFeatureLength ();
@ -1546,18 +1517,14 @@ void GetCPPadsForLevel(int Level,
} /* GetCPPadsForLevel */ } /* GetCPPadsForLevel */
/*---------------------------------------------------------------------------*/ /**
ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) { * @param Evidence evidence value to return color for
/* * @return Color which corresponds to specified Evidence value.
** Parameters: * @note Globals: none
** Evidence evidence value to return color for * @note Exceptions: none
** Globals: none * @note History: Thu Mar 21 15:24:52 1991, DSJ, Created.
** Operation:
** Return: Color which corresponds to specified Evidence value.
** Exceptions: none
** History: Thu Mar 21 15:24:52 1991, DSJ, Created.
*/ */
ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) {
assert (Evidence >= 0.0); assert (Evidence >= 0.0);
assert (Evidence <= 1.0); assert (Evidence <= 1.0);
@ -1572,21 +1539,19 @@ ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) {
} /* GetMatchColorFor */ } /* GetMatchColorFor */
/*---------------------------------------------------------------------------*/ /**
void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) { * This routine returns (in Fill) the specification of
/* * the next line to be filled from Filler. FillerDone() should
** Parameters: * always be called before GetNextFill() to ensure that we
** Filler filler to get next fill spec from * do not run past the end of the fill table.
** Fill place to put spec for next fill * @param Filler filler to get next fill spec from
** Globals: none * @param Fill place to put spec for next fill
** Operation: This routine returns (in Fill) the specification of * @return none (results are returned in Fill)
** the next line to be filled from Filler. FillerDone() should * @note Globals: none
** always be called before GetNextFill() to ensure that we * @note Exceptions: none
** do not run past the end of the fill table. * @note History: Tue Feb 19 10:17:42 1991, DSJ, Created.
** Return: none (results are returned in Fill)
** Exceptions: none
** History: Tue Feb 19 10:17:42 1991, DSJ, Created.
*/ */
void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) {
FILL_SWITCH *Next; FILL_SWITCH *Next;
/* compute the fill assuming no switches will be encountered */ /* compute the fill assuming no switches will be encountered */
@ -1625,7 +1590,6 @@ void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) {
} /* GetNextFill */ } /* GetNextFill */
/*---------------------------------------------------------------------------*/
/** /**
* This routine computes a data structure (Filler) * This routine computes a data structure (Filler)
* which can be used to fill in a rectangle surrounding * which can be used to fill in a rectangle surrounding
@ -1635,9 +1599,8 @@ void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) {
* @param Proto proto to create a filler for * @param Proto proto to create a filler for
* @param Filler place to put table filler * @param Filler place to put table filler
* *
* Globals: none
*
* @return none (results are returned in Filler) * @return none (results are returned in Filler)
* @note Globals: none
* @note Exceptions: none * @note Exceptions: none
* @note History: Thu Feb 14 09:27:05 1991, DSJ, Created. * @note History: Thu Feb 14 09:27:05 1991, DSJ, Created.
*/ */
@ -1794,14 +1757,13 @@ void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad,
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
/* /**
* Parameters: * This routine renders the specified feature into ShapeList.
* ShapeList shape list to add feature rendering to * @param window to add feature rendering to
* Feature feature to be rendered * @param Feature feature to be rendered
* Color color to use for feature rendering * @param color color to use for feature rendering
* Globals: none * @return New shape list with rendering of Feature added.
* Operation: This routine renders the specified feature into ShapeList. * @note Globals: none
* Return: New shape list with rendering of Feature added.
* @note Exceptions: none * @note Exceptions: none
* @note History: Thu Mar 21 14:57:41 1991, DSJ, Created. * @note History: Thu Mar 21 14:57:41 1991, DSJ, Created.
*/ */
@ -1826,12 +1788,12 @@ void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature,
} /* RenderIntFeature */ } /* RenderIntFeature */
/*---------------------------------------------------------------------------*/ /**
/*
* This routine extracts the parameters of the specified * This routine extracts the parameters of the specified
* proto from the class description and adds a rendering of * proto from the class description and adds a rendering of
* the proto onto the ShapeList. * the proto onto the ShapeList.
* *
* @param window ScrollView instance
* @param Class class that proto is contained in * @param Class class that proto is contained in
* @param ProtoId id of proto to be rendered * @param ProtoId id of proto to be rendered
* @param color color to render proto in * @param color color to render proto in
@ -1894,7 +1856,6 @@ void RenderIntProto(ScrollView *window,
} /* RenderIntProto */ } /* RenderIntProto */
#endif #endif
/*---------------------------------------------------------------------------*/
/** /**
* This routine truncates Param to lie within the range * This routine truncates Param to lie within the range
* of Min-Max inclusive. If a truncation is performed, and * of Min-Max inclusive. If a truncation is performed, and
@ -1926,7 +1887,6 @@ int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id) {
} /* TruncateParam */ } /* TruncateParam */
/*---------------------------------------------------------------------------*/
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
/** /**
* Initializes the int matcher window if it is not already * Initializes the int matcher window if it is not already
@ -1971,8 +1931,8 @@ void InitFeatureDisplayWindowIfReqd() {
} }
} }
// Creates a window of the appropriate size for displaying elements /// Creates a window of the appropriate size for displaying elements
// in feature space. /// in feature space.
ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos) { ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos) {
return new ScrollView(name, xpos, ypos, 520, 520, 260, 260, true); return new ScrollView(name, xpos, ypos, 520, 520, 260, 260, true);
} }

View File

@ -48,7 +48,7 @@ static int NextLevel(KDTREE *tree, int level) {
} }
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Store the k smallest-keyed key-value pairs. /** Store the k smallest-keyed key-value pairs. */
template<typename Key, typename Value> template<typename Key, typename Value>
class MinK { class MinK {
public: public:
@ -70,11 +70,11 @@ class MinK {
const Element* elements() { return elements_; } const Element* elements() { return elements_; }
private: private:
const Key max_key_; // the maximum possible Key const Key max_key_; //< the maximum possible Key
Element* elements_; // unsorted array of elements Element* elements_; //< unsorted array of elements
int elements_count_; // the number of results collected so far int elements_count_; //< the number of results collected so far
int k_; // the number of results we want from the search int k_; //< the number of results we want from the search
int max_index_; // the index of the result with the largest key int max_index_; //< the index of the result with the largest key
}; };
template<typename Key, typename Value> template<typename Key, typename Value>
@ -117,13 +117,13 @@ bool MinK<Key, Value>::insert(Key key, Value value) {
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Helper class for searching for the k closest points to query_point in tree. /** Helper class for searching for the k closest points to query_point in tree. */
class KDTreeSearch { class KDTreeSearch {
public: public:
KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest); KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest);
~KDTreeSearch(); ~KDTreeSearch();
// Return the k nearest points' data. /** Return the k nearest points' data. */
void Search(int *result_count, FLOAT32 *distances, void **results); void Search(int *result_count, FLOAT32 *distances, void **results);
private: private:
@ -133,8 +133,8 @@ class KDTreeSearch {
KDTREE *tree_; KDTREE *tree_;
FLOAT32 *query_point_; FLOAT32 *query_point_;
MinK<FLOAT32, void *>* results_; MinK<FLOAT32, void *>* results_;
FLOAT32 *sb_min_; // search box minimum FLOAT32 *sb_min_; //< search box minimum
FLOAT32 *sb_max_; // search box maximum FLOAT32 *sb_max_; //< search box maximum
}; };
KDTreeSearch::KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest) : KDTreeSearch::KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest) :
@ -151,8 +151,8 @@ KDTreeSearch::~KDTreeSearch() {
delete[] sb_max_; delete[] sb_max_;
} }
// Locate the k_closest points to query_point_, and return their distances and /// Locate the k_closest points to query_point_, and return their distances and
// data into the given buffers. /// data into the given buffers.
void KDTreeSearch::Search(int *result_count, void KDTreeSearch::Search(int *result_count,
FLOAT32 *distances, FLOAT32 *distances,
void **results) { void **results) {
@ -176,11 +176,9 @@ void KDTreeSearch::Search(int *result_count,
/*----------------------------------------------------------------------------- /*-----------------------------------------------------------------------------
Public Code Public Code
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /// @return a new KDTREE based on the specified parameters.
/// Return a new KDTREE based on the specified parameters. /// @param KeySize # of dimensions in the K-D tree
/// Parameters: /// @param KeyDesc array of params to describe key dimensions
/// KeySize # of dimensions in the K-D tree
/// KeyDesc array of params to describe key dimensions
KDTREE *MakeKDTree(inT16 KeySize, const PARAM_DESC KeyDesc[]) { KDTREE *MakeKDTree(inT16 KeySize, const PARAM_DESC KeyDesc[]) {
KDTREE *KDTree = (KDTREE *) Emalloc( KDTREE *KDTree = (KDTREE *) Emalloc(
sizeof(KDTREE) + (KeySize - 1) * sizeof(PARAM_DESC)); sizeof(KDTREE) + (KeySize - 1) * sizeof(PARAM_DESC));
@ -205,8 +203,6 @@ KDTREE *MakeKDTree(inT16 KeySize, const PARAM_DESC KeyDesc[]) {
} }
/*---------------------------------------------------------------------------*/
void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
/** /**
* This routine stores Data in the K-D tree specified by Tree * This routine stores Data in the K-D tree specified by Tree
* using Key as an access key. * using Key as an access key.
@ -219,6 +215,7 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
* @note History: 3/10/89, DSJ, Created. * @note History: 3/10/89, DSJ, Created.
* 7/13/89, DSJ, Changed return to void. * 7/13/89, DSJ, Changed return to void.
*/ */
void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
int Level; int Level;
KDNODE *Node; KDNODE *Node;
KDNODE **PtrToNode; KDNODE **PtrToNode;
@ -245,7 +242,6 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
} /* KDStore */ } /* KDStore */
/*---------------------------------------------------------------------------*/
/** /**
* This routine deletes a node from Tree. The node to be * This routine deletes a node from Tree. The node to be
* deleted is specified by the Key for the node and the Data * deleted is specified by the Key for the node and the Data
@ -303,39 +299,36 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) {
} /* KDDelete */ } /* KDDelete */
/*---------------------------------------------------------------------------*/ /**
* This routine searches the K-D tree specified by Tree and
* finds the QuerySize nearest neighbors of Query. All neighbors
* must be within MaxDistance of Query. The data contents of
* the nearest neighbors
* are placed in NBuffer and their distances from Query are
* placed in DBuffer.
* @param Tree ptr to K-D tree to be searched
* @param Query ptr to query key (point in D-space)
* @param QuerySize number of nearest neighbors to be found
* @param MaxDistance all neighbors must be within this distance
* @param NBuffer ptr to QuerySize buffer to hold nearest neighbors
* @param DBuffer ptr to QuerySize buffer to hold distances
* from nearest neighbor to query point
* @param NumberOfResults [out] Number of nearest neighbors actually found
* @note Exceptions: none
* @note History:
* - 3/10/89, DSJ, Created.
* - 7/13/89, DSJ, Return contents of node instead of node itself.
*/
void KDNearestNeighborSearch( void KDNearestNeighborSearch(
KDTREE *Tree, FLOAT32 Query[], int QuerySize, FLOAT32 MaxDistance, KDTREE *Tree, FLOAT32 Query[], int QuerySize, FLOAT32 MaxDistance,
int *NumberOfResults, void **NBuffer, FLOAT32 DBuffer[]) { int *NumberOfResults, void **NBuffer, FLOAT32 DBuffer[]) {
/*
** Parameters:
** Tree ptr to K-D tree to be searched
** Query ptr to query key (point in D-space)
** QuerySize number of nearest neighbors to be found
** MaxDistance all neighbors must be within this distance
** NBuffer ptr to QuerySize buffer to hold nearest neighbors
** DBuffer ptr to QuerySize buffer to hold distances
** from nearest neighbor to query point
** Operation:
** This routine searches the K-D tree specified by Tree and
** finds the QuerySize nearest neighbors of Query. All neighbors
** must be within MaxDistance of Query. The data contents of
** the nearest neighbors
** are placed in NBuffer and their distances from Query are
** placed in DBuffer.
** Return: Number of nearest neighbors actually found
** Exceptions: none
** History:
** 3/10/89, DSJ, Created.
** 7/13/89, DSJ, Return contents of node instead of node itself.
*/
KDTreeSearch search(Tree, Query, QuerySize); KDTreeSearch search(Tree, Query, QuerySize);
search.Search(NumberOfResults, DBuffer, NBuffer); search.Search(NumberOfResults, DBuffer, NBuffer);
} }
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
// Walk a given Tree with action. /** Walk a given Tree with action. */
void KDWalk(KDTREE *Tree, void_proc action, void *context) { void KDWalk(KDTREE *Tree, void_proc action, void *context) {
if (Tree->Root.Left != NULL) if (Tree->Root.Left != NULL)
Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1)); Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
@ -343,22 +336,19 @@ void KDWalk(KDTREE *Tree, void_proc action, void *context) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void FreeKDTree(KDTREE *Tree) { /**
/* * This routine frees all memory which is allocated to the
** Parameters: * specified KD-tree. This includes the data structure for
** Tree tree data structure to be released * the kd-tree itself plus the data structures for each node
** Operation: * in the tree. It does not include the Key and Data items
** This routine frees all memory which is allocated to the * which are pointed to by the nodes. This memory is left
** specified KD-tree. This includes the data structure for * untouched.
** the kd-tree itself plus the data structures for each node * @param Tree tree data structure to be released
** in the tree. It does not include the Key and Data items * @return none
** which are pointed to by the nodes. This memory is left * @note Exceptions: none
** untouched. * @note History: 5/26/89, DSJ, Created.
** Return: none
** Exceptions: none
** History:
** 5/26/89, DSJ, Created.
*/ */
void FreeKDTree(KDTREE *Tree) {
FreeSubTree(Tree->Root.Left); FreeSubTree(Tree->Root.Left);
memfree(Tree); memfree(Tree);
} /* FreeKDTree */ } /* FreeKDTree */
@ -368,25 +358,20 @@ void FreeKDTree(KDTREE *Tree) {
Private Code Private Code
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
KDNODE *MakeKDNode(KDTREE *tree, FLOAT32 Key[], void *Data, int Index) { /**
/* * This routine allocates memory for a new K-D tree node
** Parameters: * and places the specified Key and Data into it. The
** tree The tree to create the node for * left and right subtree pointers for the node are
** Key Access key for new node in KD tree * initialized to empty subtrees.
** Data ptr to data to be stored in new node * @param tree The tree to create the node for
** Index index of Key to branch on * @param Key Access key for new node in KD tree
** Operation: * @param Data ptr to data to be stored in new node
** This routine allocates memory for a new K-D tree node * @param Index index of Key to branch on
** and places the specified Key and Data into it. The * @return pointer to new K-D tree node
** left and right subtree pointers for the node are * @note Exceptions: None
** initialized to empty subtrees. * @note History: 3/11/89, DSJ, Created.
** Return:
** pointer to new K-D tree node
** Exceptions:
** None
** History:
** 3/11/89, DSJ, Created.
*/ */
KDNODE *MakeKDNode(KDTREE *tree, FLOAT32 Key[], void *Data, int Index) {
KDNODE *NewNode; KDNODE *NewNode;
NewNode = (KDNODE *) Emalloc (sizeof (KDNODE)); NewNode = (KDNODE *) Emalloc (sizeof (KDNODE));
@ -410,10 +395,11 @@ void FreeKDNode(KDNODE *Node) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
// Recursively accumulate the k_closest points to query_point_ into results_. /**
// Parameters: * Recursively accumulate the k_closest points to query_point_ into results_.
// Level level in tree of sub-tree to be searched * @param Level level in tree of sub-tree to be searched
// SubTree sub-tree to be searched * @param SubTree sub-tree to be searched
*/
void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) { void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
if (level >= tree_->KeySize) if (level >= tree_->KeySize)
level = 0; level = 0;
@ -456,12 +442,13 @@ void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
// Returns the Euclidean distance squared between p1 and p2 for all essential /**
// dimensions. *Returns the Euclidean distance squared between p1 and p2 for all essential
// Parameters: * dimensions.
// k keys are in k-space * @param k keys are in k-space
// dim dimension descriptions (essential, circular, etc) * @param dim dimension descriptions (essential, circular, etc)
// p1,p2 two different points in K-D space * @param p1,p2 two different points in K-D space
*/
FLOAT32 DistanceSquared(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) { FLOAT32 DistanceSquared(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) {
FLOAT32 total_distance = 0; FLOAT32 total_distance = 0;
@ -488,10 +475,10 @@ FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) {
} }
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
// Return whether the query region (the smallest known circle about /// Return whether the query region (the smallest known circle about
// query_point_ containing results->k_ points) intersects the box specified /// query_point_ containing results->k_ points) intersects the box specified
// between lower and upper. For circular dimensions, we also check the point /// between lower and upper. For circular dimensions, we also check the point
// one wrap distance away from the query. /// one wrap distance away from the query.
bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) { bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) {
FLOAT32 *query = query_point_; FLOAT32 *query = query_point_;
FLOAT64 total_distance = 0.0; FLOAT64 total_distance = 0.0;
@ -530,20 +517,21 @@ bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
// Walk a tree, calling action once on each node. /**
// * Walk a tree, calling action once on each node.
// Parameters: *
// tree root of the tree being walked. * Operation:
// action action to be performed at every node * This routine walks thru the specified sub_tree and invokes action
// context action's context * action at each node as follows:
// sub_tree ptr to root of subtree to be walked * action(context, data, level)
// level current level in the tree for this node * data the data contents of the node being visited,
// Operation: * level is the level of the node in the tree with the root being level 0.
// This routine walks thru the specified sub_tree and invokes action * @param tree root of the tree being walked.
// action at each node as follows: * @param action action to be performed at every node
// action(context, data, level) * @param context action's context
// data the data contents of the node being visited, * @param sub_tree ptr to root of subtree to be walked
// level is the level of the node in the tree with the root being level 0. * @param level current level in the tree for this node
*/
void Walk(KDTREE *tree, void_proc action, void *context, void Walk(KDTREE *tree, void_proc action, void *context,
KDNODE *sub_tree, inT32 level) { KDNODE *sub_tree, inT32 level) {
(*action)(context, sub_tree->Data, level); (*action)(context, sub_tree->Data, level);
@ -554,7 +542,7 @@ void Walk(KDTREE *tree, void_proc action, void *context,
} }
// Given a subtree nodes, insert all of its elements into tree. /** Given a subtree nodes, insert all of its elements into tree. */
void InsertNodes(KDTREE *tree, KDNODE *nodes) { void InsertNodes(KDTREE *tree, KDNODE *nodes) {
if (nodes == NULL) if (nodes == NULL)
return; return;
@ -564,11 +552,11 @@ void InsertNodes(KDTREE *tree, KDNODE *nodes) {
InsertNodes(tree, nodes->Right); InsertNodes(tree, nodes->Right);
} }
// Free all of the nodes of a sub tree. /** Free all of the nodes of a sub tree. */
void FreeSubTree(KDNODE *sub_tree) { void FreeSubTree(KDNODE *sub_tree) {
if (sub_tree != NULL) { if (sub_tree != NULL) {
FreeSubTree(sub_tree->Left); FreeSubTree(sub_tree->Left);
FreeSubTree(sub_tree->Right); FreeSubTree(sub_tree->Right);
memfree(sub_tree); memfree(sub_tree);
} }
} /* FreeSubTree */ }

View File

@ -15,9 +15,9 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "mf.h" #include "mf.h"
#include "featdefs.h" #include "featdefs.h"
@ -28,24 +28,25 @@
/**---------------------------------------------------------------------------- /**----------------------------------------------------------------------------
Global Data Definitions and Declarations Global Data Definitions and Declarations
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Private Code Private Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /**
FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm) { * Call the old micro-feature extractor and then copy
/* * the features into the new format. Then deallocate the
** Parameters: * old micro-features.
** Blob blob to extract micro-features from * @param Blob blob to extract micro-features from
** denorm control parameter to feature extractor. * @param bl_denorm currently unused
** Globals: none * @param cn_denorm control parameter to feature extractor.
** Operation: Call the old micro-feature extractor and then copy * @param fx_info currently unused
** the features into the new format. Then deallocate the * @return Micro-features for Blob.
** old micro-features. * @note Exceptions: none
** Return: Micro-features for Blob. * @note History: Wed May 23 18:06:38 1990, DSJ, Created.
** Exceptions: none
** History: Wed May 23 18:06:38 1990, DSJ, Created.
*/ */
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info) {
int NumFeatures; int NumFeatures;
MICROFEATURES Features, OldFeatures; MICROFEATURES Features, OldFeatures;
FEATURE_SET FeatureSet; FEATURE_SET FeatureSet;

View File

@ -15,44 +15,36 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------**/
#include "mfdefs.h" #include "mfdefs.h"
#include "emalloc.h" #include "emalloc.h"
#include <math.h> #include <math.h>
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------**/
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
MICROFEATURE NewMicroFeature() { /**
/* * This routine allocates and returns a new micro-feature
** Parameters: none * data structure.
** Globals: none * @return New MICROFEATURE
** Operation: * @note History: 7/27/89, DSJ, Created.
** This routine allocates and returns a new micro-feature
** data structure.
** Return: New micro-feature.
** Exceptions: none
** History: 7/27/89, DSJ, Created.
*/ */
MICROFEATURE NewMicroFeature() {
return ((MICROFEATURE) Emalloc (sizeof (MFBLOCK))); return ((MICROFEATURE) Emalloc (sizeof (MFBLOCK)));
} /* NewMicroFeature */ } /* NewMicroFeature */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void FreeMicroFeatures(MICROFEATURES MicroFeatures) { /**
/* * This routine deallocates all of the memory consumed by
** Parameters: * a list of micro-features.
** MicroFeatures list of micro-features to be freed * @param MicroFeatures list of micro-features to be freed
** Globals: none * @return none
** Operation: * @note History: 7/27/89, DSJ, Created.
** This routine deallocates all of the memory consumed by
** a list of micro-features.
** Return: none
** Exceptions: none
** History: 7/27/89, DSJ, Created.
*/ */
void FreeMicroFeatures(MICROFEATURES MicroFeatures) {
destroy_nodes(MicroFeatures, Efree); destroy_nodes(MicroFeatures, Efree);
} /* FreeMicroFeatures */ } /* FreeMicroFeatures */

View File

@ -35,7 +35,7 @@
----------------------------------------------------------------------------*/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
// Convert a blob into a list of MFOUTLINEs (float-based microfeature format). /** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). */
LIST ConvertBlob(TBLOB *blob) { LIST ConvertBlob(TBLOB *blob) {
LIST outlines = NIL_LIST; LIST outlines = NIL_LIST;
return (blob == NULL) return (blob == NULL)
@ -45,7 +45,7 @@ LIST ConvertBlob(TBLOB *blob) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
// Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. /** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */
MFOUTLINE ConvertOutline(TESSLINE *outline) { MFOUTLINE ConvertOutline(TESSLINE *outline) {
MFEDGEPT *NewPoint; MFEDGEPT *NewPoint;
MFOUTLINE MFOutline = NIL_LIST; MFOUTLINE MFOutline = NIL_LIST;
@ -81,12 +81,13 @@ MFOUTLINE ConvertOutline(TESSLINE *outline) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
// Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs). /**
// * Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs).
// Parameters: *
// outline first outline to be converted * @param outline first outline to be converted
// mf_outlines list to add converted outlines to * @param mf_outlines list to add converted outlines to
// outline_type are the outlines outer or holes? * @param outline_type are the outlines outer or holes?
*/
LIST ConvertOutlines(TESSLINE *outline, LIST ConvertOutlines(TESSLINE *outline,
LIST mf_outlines, LIST mf_outlines,
OUTLINETYPE outline_type) { OUTLINETYPE outline_type) {
@ -102,26 +103,23 @@ LIST ConvertOutlines(TESSLINE *outline,
} }
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/**
* This routine searches thru the specified outline, computes
* a slope for each vector in the outline, and marks each
* vector as having one of the following directions:
* N, S, E, W, NE, NW, SE, SW
* This information is then stored in the outline and the
* outline is returned.
* @param Outline micro-feature outline to analyze
* @param MinSlope controls "snapping" of segments to horizontal
* @param MaxSlope controls "snapping" of segments to vertical
* @return none
* @note Exceptions: none
* @note History: 7/21/89, DSJ, Created.
*/
void FindDirectionChanges(MFOUTLINE Outline, void FindDirectionChanges(MFOUTLINE Outline,
FLOAT32 MinSlope, FLOAT32 MinSlope,
FLOAT32 MaxSlope) { FLOAT32 MaxSlope) {
/*
** Parameters:
** Outline micro-feature outline to analyze
** MinSlope controls "snapping" of segments to horizontal
** MaxSlope controls "snapping" of segments to vertical
** Globals: none
** Operation:
** This routine searches thru the specified outline, computes
** a slope for each vector in the outline, and marks each
** vector as having one of the following directions:
** N, S, E, W, NE, NW, SE, SW
** This information is then stored in the outline and the
** outline is returned.
** Return: none
** Exceptions: none
** History: 7/21/89, DSJ, Created.
*/
MFEDGEPT *Current; MFEDGEPT *Current;
MFEDGEPT *Last; MFEDGEPT *Last;
MFOUTLINE EdgePoint; MFOUTLINE EdgePoint;
@ -145,18 +143,15 @@ void FindDirectionChanges(MFOUTLINE Outline,
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void FreeMFOutline(void *arg) { //MFOUTLINE Outline) /**
/* * This routine deallocates all of the memory consumed by
** Parameters: * a micro-feature outline.
** Outline micro-feature outline to be freed * @param arg micro-feature outline to be freed
** Globals: none * @return none
** Operation: * @note Exceptions: none
** This routine deallocates all of the memory consumed by * @note History: 7/27/89, DSJ, Created.
** a micro-feature outline.
** Return: none
** Exceptions: none
** History: 7/27/89, DSJ, Created.
*/ */
void FreeMFOutline(void *arg) { //MFOUTLINE Outline)
MFOUTLINE Start; MFOUTLINE Start;
MFOUTLINE Outline = (MFOUTLINE) arg; MFOUTLINE Outline = (MFOUTLINE) arg;
@ -172,39 +167,35 @@ void FreeMFOutline(void *arg) { //MFOUTLINE Outline
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void FreeOutlines(LIST Outlines) { /**
/* * Release all memory consumed by the specified list
** Parameters: * of outlines.
** Outlines list of mf-outlines to be freed * @param Outlines list of mf-outlines to be freed
** Globals: none * @return none
** Operation: Release all memory consumed by the specified list * @note Exceptions: none
** of outlines. * @note History: Thu Dec 13 16:14:50 1990, DSJ, Created.
** Return: none
** Exceptions: none
** History: Thu Dec 13 16:14:50 1990, DSJ, Created.
*/ */
void FreeOutlines(LIST Outlines) {
destroy_nodes(Outlines, FreeMFOutline); destroy_nodes(Outlines, FreeMFOutline);
} /* FreeOutlines */ } /* FreeOutlines */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void MarkDirectionChanges(MFOUTLINE Outline) { /**
/* * This routine searches thru the specified outline and finds
** Parameters: * the points at which the outline changes direction. These
** Outline micro-feature outline to analyze * points are then marked as "extremities". This routine is
** Globals: none * used as an alternative to FindExtremities(). It forces the
** Operation: * endpoints of the microfeatures to be at the direction
** This routine searches thru the specified outline and finds * changes rather than at the midpoint between direction
** the points at which the outline changes direction. These * changes.
** points are then marked as "extremities". This routine is * @param Outline micro-feature outline to analyze
** used as an alternative to FindExtremities(). It forces the * @return none
** endpoints of the microfeatures to be at the direction * @note Globals: none
** changes rather than at the midpoint between direction * @note Exceptions: none
** changes. * @note History: 6/29/90, DSJ, Created.
** Return: none
** Exceptions: none
** History: 6/29/90, DSJ, Created.
*/ */
void MarkDirectionChanges(MFOUTLINE Outline) {
MFOUTLINE Current; MFOUTLINE Current;
MFOUTLINE Last; MFOUTLINE Last;
MFOUTLINE First; MFOUTLINE First;
@ -225,28 +216,26 @@ void MarkDirectionChanges(MFOUTLINE Outline) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
// Return a new edge point for a micro-feature outline. /** Return a new edge point for a micro-feature outline. */
MFEDGEPT *NewEdgePoint() { MFEDGEPT *NewEdgePoint() {
return ((MFEDGEPT *) alloc_struct(sizeof(MFEDGEPT), "MFEDGEPT")); return ((MFEDGEPT *) alloc_struct(sizeof(MFEDGEPT), "MFEDGEPT"));
} }
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) { /**
/* * This routine returns the next point in the micro-feature
** Parameters: * outline that is an extremity. The search starts after
** EdgePoint start search from this point * EdgePoint. The routine assumes that the outline being
** Globals: none * searched is not a degenerate outline (i.e. it must have
** Operation: * 2 or more edge points).
** This routine returns the next point in the micro-feature * @param EdgePoint start search from this point
** outline that is an extremity. The search starts after * @return Next extremity in the outline after EdgePoint.
** EdgePoint. The routine assumes that the outline being * @note Globals: none
** searched is not a degenerate outline (i.e. it must have * @note Exceptions: none
** 2 or more edge points). * @note History: 7/26/89, DSJ, Created.
** Return: Next extremity in the outline after EdgePoint.
** Exceptions: none
** History: 7/26/89, DSJ, Created.
*/ */
MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) {
EdgePoint = NextPointAfter(EdgePoint); EdgePoint = NextPointAfter(EdgePoint);
while (!PointAt(EdgePoint)->ExtremityMark) while (!PointAt(EdgePoint)->ExtremityMark)
EdgePoint = NextPointAfter(EdgePoint); EdgePoint = NextPointAfter(EdgePoint);
@ -257,25 +246,23 @@ MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/**
* This routine normalizes the coordinates of the specified
* outline so that the outline is deskewed down to the
* baseline, translated so that x=0 is at XOrigin, and scaled
* so that the height of a character cell from descender to
* ascender is 1. Of this height, 0.25 is for the descender,
* 0.25 for the ascender, and 0.5 for the x-height. The
* y coordinate of the baseline is 0.
* @param Outline outline to be normalized
* @param XOrigin x-origin of text
* @return none
* @note Globals: none
* @note Exceptions: none
* @note History: 8/2/89, DSJ, Created.
*/
void NormalizeOutline(MFOUTLINE Outline, void NormalizeOutline(MFOUTLINE Outline,
FLOAT32 XOrigin) { FLOAT32 XOrigin) {
/*
** Parameters:
** Outline outline to be normalized
** XOrigin x-origin of text
** Globals: none
** Operation:
** This routine normalizes the coordinates of the specified
** outline so that the outline is deskewed down to the
** baseline, translated so that x=0 is at XOrigin, and scaled
** so that the height of a character cell from descender to
** ascender is 1. Of this height, 0.25 is for the descender,
** 0.25 for the ascender, and 0.5 for the x-height. The
** y coordinate of the baseline is 0.
** Return: none
** Exceptions: none
** History: 8/2/89, DSJ, Created.
*/
if (Outline == NIL_LIST) if (Outline == NIL_LIST)
return; return;
@ -292,27 +279,27 @@ void NormalizeOutline(MFOUTLINE Outline,
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
namespace tesseract { namespace tesseract {
/**
* This routine normalizes every outline in Outlines
* according to the currently selected normalization method.
* It also returns the scale factors that it used to do this
* scaling. The scale factors returned represent the x and
* y sizes in the normalized coordinate system that correspond
* to 1 pixel in the original coordinate system.
*
* Globals:
* - classify_norm_method method being used for normalization
* - classify_char_norm_range map radius of gyration to this value
* @param Outlines list of outlines to be normalized
* @param XScale x-direction scale factor used by routine
* @param YScale y-direction scale factor used by routine
* @return none (Outlines are changed and XScale and YScale are updated)
* @note Exceptions: none
* @note History: Fri Dec 14 08:14:55 1990, DSJ, Created.
*/
void Classify::NormalizeOutlines(LIST Outlines, void Classify::NormalizeOutlines(LIST Outlines,
FLOAT32 *XScale, FLOAT32 *XScale,
FLOAT32 *YScale) { FLOAT32 *YScale) {
/*
** Parameters:
** Outlines list of outlines to be normalized
** XScale x-direction scale factor used by routine
** YScale y-direction scale factor used by routine
** Globals:
** classify_norm_method method being used for normalization
** classify_char_norm_range map radius of gyration to this value
** Operation: This routine normalizes every outline in Outlines
** according to the currently selected normalization method.
** It also returns the scale factors that it used to do this
** scaling. The scale factors returned represent the x and
** y sizes in the normalized coordinate system that correspond
** to 1 pixel in the original coordinate system.
** Return: none (Outlines are changed and XScale and YScale are updated)
** Exceptions: none
** History: Fri Dec 14 08:14:55 1990, DSJ, Created.
*/
MFOUTLINE Outline; MFOUTLINE Outline;
switch (classify_norm_method) { switch (classify_norm_method) {
@ -331,25 +318,23 @@ void Classify::NormalizeOutlines(LIST Outlines,
} /* NormalizeOutlines */ } /* NormalizeOutlines */
} // namespace tesseract } // namespace tesseract
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Private Code Private Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /**
void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { * Change the direction of every vector in the specified
/* * outline segment to Direction. The segment to be changed
** Parameters: * starts at Start and ends at End. Note that the previous
** Start, End defines segment of outline to be modified * direction of End must also be changed to reflect the
** Direction new direction to assign to segment * change in direction of the point before it.
** Globals: none * @param Start, End defines segment of outline to be modified
** Operation: Change the direction of every vector in the specified * @param Direction new direction to assign to segment
** outline segment to Direction. The segment to be changed * @return none
** starts at Start and ends at End. Note that the previous * @note Globals: none
** direction of End must also be changed to reflect the * @note Exceptions: none
** change in direction of the point before it. * @note History: Fri May 4 10:42:04 1990, DSJ, Created.
** Return: none
** Exceptions: none
** History: Fri May 4 10:42:04 1990, DSJ, Created.
*/ */
void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) {
MFOUTLINE Current; MFOUTLINE Current;
for (Current = Start; Current != End; Current = NextPointAfter (Current)) for (Current = Start; Current != End; Current = NextPointAfter (Current))
@ -360,21 +345,18 @@ void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) {
} /* ChangeDirection */ } /* ChangeDirection */
/*---------------------------------------------------------------------------*/ /**
void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) { * This routine normalizes each point in Outline by
/* * translating it to the specified center and scaling it
** Parameters: * anisotropically according to the given scale factors.
** Outline outline to be character normalized * @param Outline outline to be character normalized
** XCenter, YCenter center point for normalization * @param cn_denorm
** XScale, YScale scale factors for normalization * @return none
** Globals: none * @note Globals: none
** Operation: This routine normalizes each point in Outline by * @note Exceptions: none
** translating it to the specified center and scaling it * @note History: Fri Dec 14 10:27:11 1990, DSJ, Created.
** anisotropically according to the given scale factors.
** Return: none
** Exceptions: none
** History: Fri Dec 14 10:27:11 1990, DSJ, Created.
*/ */
void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) {
MFOUTLINE First, Current; MFOUTLINE First, Current;
MFEDGEPT *CurrentPoint; MFEDGEPT *CurrentPoint;
@ -397,32 +379,29 @@ void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) {
} /* CharNormalizeOutline */ } /* CharNormalizeOutline */
/*---------------------------------------------------------------------------*/ /**
* This routine computes the slope from Start to Finish and
* and then computes the approximate direction of the line
* segment from Start to Finish. The direction is quantized
* into 8 buckets:
* N, S, E, W, NE, NW, SE, SW
* Both the slope and the direction are then stored into
* the appropriate fields of the Start edge point. The
* direction is also stored into the PreviousDirection field
* of the Finish edge point.
* @param Start starting point to compute direction from
* @param Finish finishing point to compute direction to
* @param MinSlope slope below which lines are horizontal
* @param MaxSlope slope above which lines are vertical
* @return none
* @note Globals: none
* @note Exceptions: none
* @note History: 7/25/89, DSJ, Created.
*/
void ComputeDirection(MFEDGEPT *Start, void ComputeDirection(MFEDGEPT *Start,
MFEDGEPT *Finish, MFEDGEPT *Finish,
FLOAT32 MinSlope, FLOAT32 MinSlope,
FLOAT32 MaxSlope) { FLOAT32 MaxSlope) {
/*
** Parameters:
** Start starting point to compute direction from
** Finish finishing point to compute direction to
** MinSlope slope below which lines are horizontal
** MaxSlope slope above which lines are vertical
** Globals: none
** Operation:
** This routine computes the slope from Start to Finish and
** and then computes the approximate direction of the line
** segment from Start to Finish. The direction is quantized
** into 8 buckets:
** N, S, E, W, NE, NW, SE, SW
** Both the slope and the direction are then stored into
** the appropriate fields of the Start edge point. The
** direction is also stored into the PreviousDirection field
** of the Finish edge point.
** Return: none
** Exceptions: none
** History: 7/25/89, DSJ, Created.
*/
FVECTOR Delta; FVECTOR Delta;
Delta.x = Finish->Point.x - Start->Point.x; Delta.x = Finish->Point.x - Start->Point.x;
@ -471,23 +450,20 @@ void ComputeDirection(MFEDGEPT *Start,
Start->Direction = west; Start->Direction = west;
} }
Finish->PreviousDirection = Start->Direction; Finish->PreviousDirection = Start->Direction;
} /* ComputeDirection */ }
/*---------------------------------------------------------------------------*/ /**
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) { * This routine returns the next point in the micro-feature
/* * outline that has a direction different than EdgePoint. The
** Parameters: * routine assumes that the outline being searched is not a
** EdgePoint start search from this point * degenerate outline (i.e. it must have 2 or more edge points).
** Globals: none * @param EdgePoint start search from this point
** Operation: * @return Point of next direction change in micro-feature outline.
** This routine returns the next point in the micro-feature * @note Globals: none
** outline that has a direction different than EdgePoint. The * @note Exceptions: none
** routine assumes that the outline being searched is not a * @note History: 7/25/89, DSJ, Created.
** degenerate outline (i.e. it must have 2 or more edge points).
** Return: Point of next direction change in micro-feature outline.
** Exceptions: none
** History: 7/25/89, DSJ, Created.
*/ */
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
DIRECTION InitialDirection; DIRECTION InitialDirection;
InitialDirection = PointAt (EdgePoint)->Direction; InitialDirection = PointAt (EdgePoint)->Direction;
@ -501,4 +477,4 @@ MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
next_pt != NULL && !PointAt(next_pt)->Hidden); next_pt != NULL && !PointAt(next_pt)->Hidden);
return (EdgePoint); return (EdgePoint);
} /* NextDirectionChange */ }

View File

@ -15,9 +15,9 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "mfdefs.h" #include "mfdefs.h"
#include "mfoutline.h" #include "mfoutline.h"
#include "clusttool.h" //NEEDED #include "clusttool.h" //NEEDED
@ -28,9 +28,9 @@
#include <math.h> #include <math.h>
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Variables Variables
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/* old numbers corresponded to 10.0 degrees and 80.0 degrees */ /* old numbers corresponded to 10.0 degrees and 80.0 degrees */
double_VAR(classify_min_slope, 0.414213562, double_VAR(classify_min_slope, 0.414213562,
@ -38,9 +38,9 @@ double_VAR(classify_min_slope, 0.414213562,
double_VAR(classify_max_slope, 2.414213562, double_VAR(classify_max_slope, 2.414213562,
"Slope above which lines are called vertical"); "Slope above which lines are called vertical");
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Macros Macros
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/* miscellaneous macros */ /* miscellaneous macros */
#define NormalizeAngle(A) ( (((A)<0)?((A)+2*PI):(A)) / (2*PI) ) #define NormalizeAngle(A) ( (((A)<0)?((A)+2*PI):(A)) / (2*PI) )
@ -54,25 +54,22 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline,
MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End); MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End);
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /**
MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) { * This routine extracts micro-features from the specified
/* * blob and returns a list of the micro-features. All
** Parameters: * micro-features are normalized according to the specified
** Blob blob to extract micro-features from * line statistics.
** denorm control parameter to feature extractor * @param Blob blob to extract micro-features from
** Operation: * @param cn_denorm control parameter to feature extractor
** This routine extracts micro-features from the specified * @return List of micro-features extracted from the blob.
** blob and returns a list of the micro-features. All * @note Exceptions: none
** micro-features are normalized according to the specified * @note History: 7/21/89, DSJ, Created.
** line statistics.
** Return: List of micro-features extracted from the blob.
** Exceptions: none
** History: 7/21/89, DSJ, Created.
*/ */
MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) {
MICROFEATURES MicroFeatures = NIL_LIST; MICROFEATURES MicroFeatures = NIL_LIST;
LIST Outlines; LIST Outlines;
LIST RemainingOutlines; LIST RemainingOutlines;
@ -104,26 +101,23 @@ MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) {
Private Code Private Code
---------------------------------------------------------------------------*/ ---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /**
FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) { * This routine computes the orientation parameter of the
/* * specified micro-feature. The orientation is the angle of
** Parameters: * the vector from Start to End. It is normalized to a number
** Start starting edge point of micro-feature * between 0 and 1 where 0 corresponds to 0 degrees and 1
** End ending edge point of micro-feature * corresponds to 360 degrees. The actual range is [0,1), i.e.
** Globals: none * 1 is excluded from the range (since it is actual the
** Operation: * same orientation as 0). This routine assumes that Start
** This routine computes the orientation parameter of the * and End are not the same point.
** specified micro-feature. The orientation is the angle of * @param Start starting edge point of micro-feature
** the vector from Start to End. It is normalized to a number * @param End ending edge point of micro-feature
** between 0 and 1 where 0 corresponds to 0 degrees and 1 * @note Globals: none
** corresponds to 360 degrees. The actual range is [0,1), i.e. * @return Orientation parameter for the specified micro-feature.
** 1 is excluded from the range (since it is actual the * @note Exceptions: none
** same orientation as 0). This routine assumes that Start * @note History: 7/27/89, DSJ, Created.
** and End are not the same point.
** Return: Orientation parameter for the specified micro-feature.
** Exceptions: none
** History: 7/27/89, DSJ, Created.
*/ */
FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) {
FLOAT32 Orientation; FLOAT32 Orientation;
Orientation = NormalizeAngle (AngleFrom (Start->Point, End->Point)); Orientation = NormalizeAngle (AngleFrom (Start->Point, End->Point));
@ -135,20 +129,17 @@ FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) {
} /* ComputeOrientation */ } /* ComputeOrientation */
/*---------------------------------------------------------------------------*/ /**
* Convert Outline to MicroFeatures
* @param Outline outline to extract micro-features from
* @param MicroFeatures list of micro-features to add to
* @return List of micro-features with new features added to front.
* @note Globals: none
* @note Exceptions: none
* @note History: 7/26/89, DSJ, Created.
*/
MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline,
MICROFEATURES MicroFeatures) { MICROFEATURES MicroFeatures) {
/*
** Parameters:
** Outline outline to extract micro-features from
** MicroFeatures list of micro-features to add to
** Globals: none
** Operation:
** This routine
** Return: List of micro-features with new features added to front.
** Exceptions: none
** History: 7/26/89, DSJ, Created.
*/
MFOUTLINE Current; MFOUTLINE Current;
MFOUTLINE Last; MFOUTLINE Last;
MFOUTLINE First; MFOUTLINE First;
@ -174,26 +165,24 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline,
} /* ConvertToMicroFeatures */ } /* ConvertToMicroFeatures */
/*---------------------------------------------------------------------------*/ /**
MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) { * This routine computes the feature parameters which describe
/* * the micro-feature that starts and Start and ends at End.
** Parameters: * A new micro-feature is allocated, filled with the feature
** Start starting point of micro-feature * parameters, and returned. The routine assumes that
** End ending point of micro-feature * Start and End are not the same point. If they are the
** Globals: none * same point, NULL is returned, a warning message is
** Operation: * printed, and the current outline is dumped to stdout.
** This routine computes the feature parameters which describe * @param Start starting point of micro-feature
** the micro-feature that starts and Start and ends at End. * @param End ending point of micro-feature
** A new micro-feature is allocated, filled with the feature * @return New micro-feature or NULL if the feature was rejected.
** parameters, and returned. The routine assumes that * @note Globals: none
** Start and End are not the same point. If they are the * @note Exceptions: none
** same point, NULL is returned, a warning message is * @note History:
** printed, and the current outline is dumped to stdout. * - 7/26/89, DSJ, Created.
** Return: New micro-feature or NULL if the feature was rejected. * - 11/17/89, DSJ, Added handling for Start and End same point.
** Exceptions: none
** History: 7/26/89, DSJ, Created.
** 11/17/89, DSJ, Added handling for Start and End same point.
*/ */
MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) {
MICROFEATURE NewFeature; MICROFEATURE NewFeature;
MFEDGEPT *P1, *P2; MFEDGEPT *P1, *P2;

View File

@ -18,12 +18,12 @@
#ifndef MFX_H #ifndef MFX_H
#define MFX_H #define MFX_H
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------**/
#include "mfdefs.h" #include "mfdefs.h"
#include "params.h" #include "params.h"
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Variables Variables
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------**/
@ -33,7 +33,7 @@ extern double_VAR_H(classify_min_slope, 0.414213562,
extern double_VAR_H(classify_max_slope, 2.414213562, extern double_VAR_H(classify_max_slope, 2.414213562,
"Slope above which lines are called vertical"); "Slope above which lines are called vertical");
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Function Prototypes Public Function Prototypes
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------**/
MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm); MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm);

View File

@ -15,50 +15,50 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "normfeat.h" #include "normfeat.h"
#include "intfx.h" #include "intfx.h"
#include "featdefs.h" #include "featdefs.h"
#include "mfoutline.h" #include "mfoutline.h"
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
// Return the length of the outline in baseline normalized form. /** Return the length of the outline in baseline normalized form. */
FLOAT32 ActualOutlineLength(FEATURE Feature) { FLOAT32 ActualOutlineLength(FEATURE Feature) {
return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION); return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION);
} }
/*---------------------------------------------------------------------------*/ /**
// Return the character normalization feature for a blob. * Return the character normalization feature for a blob.
// *
// The features returned are in a scale where the x-height has been * The features returned are in a scale where the x-height has been
// normalized to live in the region y = [-0.25 .. 0.25]. Example ranges * normalized to live in the region y = [-0.25 .. 0.25]. Example ranges
// for English below are based on the Linux font collection on 2009-12-04: * for English below are based on the Linux font collection on 2009-12-04:
// *
// Params[CharNormY] * - Params[CharNormY]
// The y coordinate of the grapheme's centroid. * - The y coordinate of the grapheme's centroid.
// English: [-0.27, 0.71] * - English: [-0.27, 0.71]
// *
// Params[CharNormLength] * - Params[CharNormLength]
// The length of the grapheme's outline (tiny segments discarded), * - The length of the grapheme's outline (tiny segments discarded),
// divided by 10.0=LENGTH_COMPRESSION. * divided by 10.0=LENGTH_COMPRESSION.
// English: [0.16, 0.85] * - English: [0.16, 0.85]
// *
// Params[CharNormRx] * - Params[CharNormRx]
// The radius of gyration about the x axis, as measured from CharNormY. * - The radius of gyration about the x axis, as measured from CharNormY.
// English: [0.011, 0.34] * - English: [0.011, 0.34]
// *
// Params[CharNormRy] * - Params[CharNormRy]
// The radius of gyration about the y axis, as measured from * - The radius of gyration about the y axis, as measured from
// the x center of the grapheme's bounding box. * the x center of the grapheme's bounding box.
// English: [0.011, 0.31] * - English: [0.011, 0.31]
// */
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info) { FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info) {
FEATURE_SET feature_set = NewFeatureSet(1); FEATURE_SET feature_set = NewFeatureSet(1);
FEATURE feature = NewFeature(&CharNormDesc); FEATURE feature = NewFeature(&CharNormDesc);

View File

@ -15,9 +15,9 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "normmatch.h" #include "normmatch.h"
#include <stdio.h> #include <stdio.h>
@ -43,9 +43,9 @@ struct NORM_PROTOS
int NumProtos; int NumProtos;
}; };
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Private Function Prototypes Private Function Prototypes
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
double NormEvidenceOf(register double NormAdj); double NormEvidenceOf(register double NormAdj);
void PrintNormMatch(FILE *File, void PrintNormMatch(FILE *File,
@ -55,38 +55,39 @@ void PrintNormMatch(FILE *File,
NORM_PROTOS *ReadNormProtos(FILE *File); NORM_PROTOS *ReadNormProtos(FILE *File);
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Variables Variables
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/* control knobs used to control the normalization adjustment process */ /** control knobs used to control the normalization adjustment process */
double_VAR(classify_norm_adj_midpoint, 32.0, "Norm adjust midpoint ..."); double_VAR(classify_norm_adj_midpoint, 32.0, "Norm adjust midpoint ...");
double_VAR(classify_norm_adj_curl, 2.0, "Norm adjust curl ..."); double_VAR(classify_norm_adj_curl, 2.0, "Norm adjust curl ...");
// Weight of width variance against height and vertical position. /** Weight of width variance against height and vertical position. */
const double kWidthErrorWeighting = 0.125; const double kWidthErrorWeighting = 0.125;
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
namespace tesseract { namespace tesseract {
/**
* This routine compares Features against each character
* normalization proto for ClassId and returns the match
* rating of the best match.
* @param ClassId id of class to match against
* @param feature character normalization feature
* @param DebugMatch controls dump of debug info
*
* Globals:
* #NormProtos character normalization prototypes
*
* @return Best match rating for Feature against protos of ClassId.
* @note Exceptions: none
* @note History: Wed Dec 19 16:56:12 1990, DSJ, Created.
*/
FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId, FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId,
const FEATURE_STRUCT& feature, const FEATURE_STRUCT& feature,
BOOL8 DebugMatch) { BOOL8 DebugMatch) {
/*
** Parameters:
** ClassId id of class to match against
** Feature character normalization feature
** DebugMatch controls dump of debug info
** Globals:
** NormProtos character normalization prototypes
** Operation: This routine compares Features against each character
** normalization proto for ClassId and returns the match
** rating of the best match.
** Return: Best match rating for Feature against protos of ClassId.
** Exceptions: none
** History: Wed Dec 19 16:56:12 1990, DSJ, Created.
*/
LIST Protos; LIST Protos;
FLOAT32 BestMatch; FLOAT32 BestMatch;
FLOAT32 Match; FLOAT32 Match;
@ -170,16 +171,16 @@ void Classify::FreeNormProtos() {
} }
} // namespace tesseract } // namespace tesseract
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Private Code Private Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/********************************************************************** /**
* NormEvidenceOf * @name NormEvidenceOf
* *
* Return the new type of evidence number corresponding to this * Return the new type of evidence number corresponding to this
* normalization adjustment. The equation that represents the transform is: * normalization adjustment. The equation that represents the transform is:
* 1 / (1 + (NormAdj / midpoint) ^ curl) * 1 / (1 + (NormAdj / midpoint) ^ curl)
**********************************************************************/ */
double NormEvidenceOf(register double NormAdj) { double NormEvidenceOf(register double NormAdj) {
NormAdj /= classify_norm_adj_midpoint; NormAdj /= classify_norm_adj_midpoint;
@ -194,22 +195,21 @@ double NormEvidenceOf(register double NormAdj) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/**
* This routine dumps out detailed normalization match info.
* @param File open text file to dump match debug info to
* @param NumParams # of parameters in proto and feature
* @param Proto[] array of prototype parameters
* @param Feature[] array of feature parameters
* Globals: none
* @return none
* @note Exceptions: none
* @note History: Wed Jan 2 09:49:35 1991, DSJ, Created.
*/
void PrintNormMatch(FILE *File, void PrintNormMatch(FILE *File,
int NumParams, int NumParams,
PROTOTYPE *Proto, PROTOTYPE *Proto,
FEATURE Feature) { FEATURE Feature) {
/*
** Parameters:
** File open text file to dump match debug info to
** NumParams # of parameters in proto and feature
** Proto[] array of prototype parameters
** Feature[] array of feature parameters
** Globals: none
** Operation: This routine dumps out detailed normalization match info.
** Return: none
** Exceptions: none
** History: Wed Jan 2 09:49:35 1991, DSJ, Created.
*/
int i; int i;
FLOAT32 ParamMatch; FLOAT32 ParamMatch;
FLOAT32 TotalMatch; FLOAT32 TotalMatch;
@ -231,18 +231,18 @@ void PrintNormMatch(FILE *File,
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
namespace tesseract { namespace tesseract {
NORM_PROTOS *Classify::ReadNormProtos(FILE *File, inT64 end_offset) { /**
/* * This routine allocates a new data structure to hold
** Parameters: * a set of character normalization protos. It then fills in
** File open text file to read normalization protos from * the data structure by reading from the specified File.
** Globals: none * @param File open text file to read normalization protos from
** Operation: This routine allocates a new data structure to hold * @param end_offset
** a set of character normalization protos. It then fills in * Globals: none
** the data structure by reading from the specified File. * @return Character normalization protos.
** Return: Character normalization protos. * @note Exceptions: none
** Exceptions: none * @note History: Wed Dec 19 16:38:49 1990, DSJ, Created.
** History: Wed Dec 19 16:38:49 1990, DSJ, Created.
*/ */
NORM_PROTOS *Classify::ReadNormProtos(FILE *File, inT64 end_offset) {
NORM_PROTOS *NormProtos; NORM_PROTOS *NormProtos;
int i; int i;
char unichar[2 * UNICHAR_LEN + 1]; char unichar[2 * UNICHAR_LEN + 1];

View File

@ -15,9 +15,9 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "ocrfeatures.h" #include "ocrfeatures.h"
#include "emalloc.h" #include "emalloc.h"
#include "callcpp.h" #include "callcpp.h"
@ -28,24 +28,20 @@
#include <assert.h> #include <assert.h>
#include <math.h> #include <math.h>
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /**
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) { * Add a feature to a feature set. If the feature set is
/* * already full, FALSE is returned to indicate that the
** Parameters: * feature could not be added to the set; otherwise, TRUE is
** FeatureSet set of features to add Feature to * returned.
** Feature feature to be added to FeatureSet * @param FeatureSet set of features to add Feature to
** Globals: none * @param Feature feature to be added to FeatureSet
** Operation: Add a feature to a feature set. If the feature set is * @return TRUE if feature added to set, FALSE if set is already full.
** already full, FALSE is returned to indicate that the * @note History: Tue May 22 17:22:23 1990, DSJ, Created.
** feature could not be added to the set; otherwise, TRUE is
** returned.
** Return: TRUE if feature added to set, FALSE if set is already full.
** Exceptions: none
** History: Tue May 22 17:22:23 1990, DSJ, Created.
*/ */
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) {
if (FeatureSet->NumFeatures >= FeatureSet->MaxNumFeatures) { if (FeatureSet->NumFeatures >= FeatureSet->MaxNumFeatures) {
FreeFeature(Feature); FreeFeature(Feature);
return FALSE; return FALSE;
@ -55,17 +51,13 @@ BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) {
return TRUE; return TRUE;
} /* AddFeature */ } /* AddFeature */
/*---------------------------------------------------------------------------*/ /**
void FreeFeature(FEATURE Feature) { * Release the memory consumed by the specified feature.
/* * @param Feature feature to be deallocated.
** Parameters: * @return none
** Feature feature to be deallocated. * @note History: Mon May 21 13:33:27 1990, DSJ, Created.
** Globals: none
** Operation: Release the memory consumed by the specified feature.
** Return: none
** Exceptions: none
** History: Mon May 21 13:33:27 1990, DSJ, Created.
*/ */
void FreeFeature(FEATURE Feature) {
if (Feature) { if (Feature) {
free_struct (Feature, sizeof (FEATURE_STRUCT) free_struct (Feature, sizeof (FEATURE_STRUCT)
+ sizeof (FLOAT32) * (Feature->Type->NumParams - 1), + sizeof (FLOAT32) * (Feature->Type->NumParams - 1),
@ -75,19 +67,15 @@ void FreeFeature(FEATURE Feature) {
} /* FreeFeature */ } /* FreeFeature */
/*---------------------------------------------------------------------------*/ /**
void FreeFeatureSet(FEATURE_SET FeatureSet) { * Release the memory consumed by the specified feature
/* * set. This routine also frees the memory consumed by the
** Parameters: * features contained in the set.
** FeatureSet set of features to be freed * @param FeatureSet set of features to be freed
** Globals: none * @return none
** Operation: Release the memory consumed by the specified feature * @note History: Mon May 21 13:59:46 1990, DSJ, Created.
** set. This routine also frees the memory consumed by the
** features contained in the set.
** Return: none
** Exceptions: none
** History: Mon May 21 13:59:46 1990, DSJ, Created.
*/ */
void FreeFeatureSet(FEATURE_SET FeatureSet) {
int i; int i;
if (FeatureSet) { if (FeatureSet) {
@ -98,18 +86,14 @@ void FreeFeatureSet(FEATURE_SET FeatureSet) {
} /* FreeFeatureSet */ } /* FreeFeatureSet */
/*---------------------------------------------------------------------------*/ /**
FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) { * Allocate and return a new feature of the specified
/* * type.
** Parameters: * @param FeatureDesc description of feature to be created.
** FeatureDesc description of feature to be created. * @return New #FEATURE.
** Globals: none * @note History: Mon May 21 14:06:42 1990, DSJ, Created.
** Operation: Allocate and return a new feature of the specified
** type.
** Return: New feature.
** Exceptions: none
** History: Mon May 21 14:06:42 1990, DSJ, Created.
*/ */
FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) {
FEATURE Feature; FEATURE Feature;
Feature = (FEATURE) alloc_struct (sizeof (FEATURE_STRUCT) + Feature = (FEATURE) alloc_struct (sizeof (FEATURE_STRUCT) +
@ -122,18 +106,14 @@ FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) {
} /* NewFeature */ } /* NewFeature */
/*---------------------------------------------------------------------------*/ /**
FEATURE_SET NewFeatureSet(int NumFeatures) { * Allocate and return a new feature set large enough to
/* * hold the specified number of features.
** Parameters: * @param NumFeatures maximum # of features to be put in feature set
** NumFeatures maximum # of features to be put in feature set * @return New #FEATURE_SET.
** Globals: none * @note History: Mon May 21 14:22:40 1990, DSJ, Created.
** Operation: Allocate and return a new feature set large enough to
** hold the specified number of features.
** Return: New feature set.
** Exceptions: none
** History: Mon May 21 14:22:40 1990, DSJ, Created.
*/ */
FEATURE_SET NewFeatureSet(int NumFeatures) {
FEATURE_SET FeatureSet; FEATURE_SET FeatureSet;
FeatureSet = (FEATURE_SET) Emalloc (sizeof (FEATURE_SET_STRUCT) + FeatureSet = (FEATURE_SET) Emalloc (sizeof (FEATURE_SET_STRUCT) +
@ -145,23 +125,20 @@ FEATURE_SET NewFeatureSet(int NumFeatures) {
} /* NewFeatureSet */ } /* NewFeatureSet */
/*---------------------------------------------------------------------------*/ /**
FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { * Create a new feature of the specified type and read in
/* * the value of its parameters from File. The extra penalty
** Parameters: * for the feature is also computed by calling the appropriate
** File open text file to read feature from * function for the specified feature type. The correct text
** FeatureDesc specifies type of feature to read from File * representation for a feature is a list of N floats where
** Globals: none * N is the number of parameters in the feature.
** Operation: Create a new feature of the specified type and read in * @param File open text file to read feature from
** the value of its parameters from File. The extra penalty * @param FeatureDesc specifies type of feature to read from File
** for the feature is also computed by calling the appropriate * @return New #FEATURE read from File.
** function for the specified feature type. The correct text * @note Exceptions: #ILLEGAL_FEATURE_PARAM if text file doesn't match expected format
** representation for a feature is a list of N floats where * @note History: Wed May 23 08:53:16 1990, DSJ, Created.
** N is the number of parameters in the feature.
** Return: New feature read from File.
** Exceptions: ILLEGAL_FEATURE_PARAM if text file doesn't match expected format
** History: Wed May 23 08:53:16 1990, DSJ, Created.
*/ */
FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
FEATURE Feature; FEATURE Feature;
int i; int i;
@ -177,22 +154,18 @@ FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
} /* ReadFeature */ } /* ReadFeature */
/*---------------------------------------------------------------------------*/ /**
FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { * Create a new feature set of the specified type and read in
/* * the features from File. The correct text representation
** Parameters: * for a feature set is an integer which specifies the number (N)
** File open text file to read new feature set from * of features in a set followed by a list of N feature
** FeatureDesc specifies type of feature to read from File * descriptions.
** Globals: none * @param File open text file to read new feature set from
** Operation: Create a new feature set of the specified type and read in * @param FeatureDesc specifies type of feature to read from File
** the features from File. The correct text representation * @return New feature set read from File.
** for a feature set is an integer which specifies the number (N) * @note History: Wed May 23 09:17:31 1990, DSJ, Created.
** of features in a set followed by a list of N feature
** descriptions.
** Return: New feature set read from File.
** Exceptions: none
** History: Wed May 23 09:17:31 1990, DSJ, Created.
*/ */
FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
FEATURE_SET FeatureSet; FEATURE_SET FeatureSet;
int NumFeatures; int NumFeatures;
int i; int i;
@ -208,20 +181,17 @@ FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
} /* ReadFeatureSet */ } /* ReadFeatureSet */
/*---------------------------------------------------------------------------*/ /**
/* * Appends a textual representation of Feature to str.
** Parameters: * This representation is simply a list of the N parameters
** Feature: feature to write out to str * of the feature, terminated with a newline. It is assumed
** str: string to write Feature to * that the ExtraPenalty field can be reconstructed from the
** Operation: Appends a textual representation of Feature to str. * parameters of the feature. It is also assumed that the
** This representation is simply a list of the N parameters * feature type information is specified or assumed elsewhere.
** of the feature, terminated with a newline. It is assumed * @param Feature feature to write out to str
** that the ExtraPenalty field can be reconstructed from the * @param str string to write Feature to
** parameters of the feature. It is also assumed that the * @return none
** feature type information is specified or assumed elsewhere. * @note History: Wed May 23 09:28:18 1990, DSJ, Created.
** Return: none
** Exceptions: none
** History: Wed May 23 09:28:18 1990, DSJ, Created.
*/ */
void WriteFeature(FEATURE Feature, STRING* str) { void WriteFeature(FEATURE Feature, STRING* str) {
for (int i = 0; i < Feature->Type->NumParams; i++) { for (int i = 0; i < Feature->Type->NumParams; i++) {
@ -234,19 +204,15 @@ void WriteFeature(FEATURE Feature, STRING* str) {
} /* WriteFeature */ } /* WriteFeature */
/*---------------------------------------------------------------------------*/ /**
/* * Write a textual representation of FeatureSet to File.
** Parameters: * This representation is an integer specifying the number of
** FeatureSet: feature set to write to File * features in the set, followed by a newline, followed by
** str: string to write Feature to * text representations for each feature in the set.
** Globals: none * @param FeatureSet feature set to write to File
** Operation: Write a textual representation of FeatureSet to File. * @param str string to write Feature to
** This representation is an integer specifying the number of * @return none
** features in the set, followed by a newline, followed by * @note History: Wed May 23 10:06:03 1990, DSJ, Created.
** text representations for each feature in the set.
** Return: none
** Exceptions: none
** History: Wed May 23 10:06:03 1990, DSJ, Created.
*/ */
void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) { void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) {
if (FeatureSet) { if (FeatureSet) {
@ -259,23 +225,22 @@ void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) {
} /* WriteFeatureSet */ } /* WriteFeatureSet */
/*---------------------------------------------------------------------------*/ /**
void WriteOldParamDesc(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { * Write a textual representation of FeatureDesc to File
/* * in the old format (i.e. the format used by the clusterer).
** Parameters: *
** File open text file to write FeatureDesc to * This format is:
** FeatureDesc feature descriptor to write to File * @verbatim
** Globals: none * Number of Params
** Operation: Write a textual representation of FeatureDesc to File * Description of Param 1
** in the old format (i.e. the format used by the clusterer). * ...
** This format is: * @endverbatim
** Number of Params * @param File open text file to write FeatureDesc to
** Description of Param 1 * @param FeatureDesc feature descriptor to write to File
** ... * @return none
** Return: none * @note History: Fri May 25 15:27:18 1990, DSJ, Created.
** Exceptions: none
** History: Fri May 25 15:27:18 1990, DSJ, Created.
*/ */
void WriteOldParamDesc(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
int i; int i;
fprintf (File, "%d\n", FeatureDesc->NumParams); fprintf (File, "%d\n", FeatureDesc->NumParams);

View File

@ -15,9 +15,9 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "outfeat.h" #include "outfeat.h"
#include "classify.h" #include "classify.h"
@ -28,24 +28,23 @@
#include <stdio.h> #include <stdio.h>
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
namespace tesseract { namespace tesseract {
FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) { /**
/* * Convert each segment in the outline to a feature
** Parameters: * and return the features.
** Blob blob to extract pico-features from * @param Blob blob to extract pico-features from
** LineStats statistics on text row blob is in * @return Outline-features for Blob.
** Globals: none * @note Globals: none
** Operation: Convert each segment in the outline to a feature * @note Exceptions: none
** and return the features. * @note History:
** Return: Outline-features for Blob. * - 11/13/90, DSJ, Created.
** Exceptions: none * - 05/24/91, DSJ, Updated for either char or baseline normalize.
** History: 11/13/90, DSJ, Created.
** 05/24/91, DSJ, Updated for either char or baseline normalize.
*/ */
FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) {
LIST Outlines; LIST Outlines;
LIST RemainingOutlines; LIST RemainingOutlines;
MFOUTLINE Outline; MFOUTLINE Outline;
@ -71,30 +70,29 @@ FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) {
} /* ExtractOutlineFeatures */ } /* ExtractOutlineFeatures */
} // namespace tesseract } // namespace tesseract
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Private Code Private Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/**
* This routine computes the midpoint between Start and
* End to obtain the x,y position of the outline-feature. It
* also computes the direction from Start to End as the
* direction of the outline-feature and the distance from
* Start to End as the length of the outline-feature.
* This feature is then
* inserted into the next feature slot in FeatureSet.
* @param Start starting point of outline-feature
* @param End ending point of outline-feature
* @param FeatureSet set to add outline-feature to
* @return none (results are placed in FeatureSet)
* @note Globals: none
* @note Exceptions: none
* @note History: 11/13/90, DSJ, Created.
*/
void AddOutlineFeatureToSet(FPOINT *Start, void AddOutlineFeatureToSet(FPOINT *Start,
FPOINT *End, FPOINT *End,
FEATURE_SET FeatureSet) { FEATURE_SET FeatureSet) {
/*
** Parameters:
** Start starting point of outline-feature
** End ending point of outline-feature
** FeatureSet set to add outline-feature to
** Globals: none
** Operation: This routine computes the midpoint between Start and
** End to obtain the x,y position of the outline-feature. It
** also computes the direction from Start to End as the
** direction of the outline-feature and the distance from
** Start to End as the length of the outline-feature.
** This feature is then
** inserted into the next feature slot in FeatureSet.
** Return: none (results are placed in FeatureSet)
** Exceptions: none
** History: 11/13/90, DSJ, Created.
*/
FEATURE Feature; FEATURE Feature;
Feature = NewFeature(&OutlineFeatDesc); Feature = NewFeature(&OutlineFeatDesc);
@ -108,21 +106,20 @@ void AddOutlineFeatureToSet(FPOINT *Start,
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) { /**
/* * This routine steps converts each section in the specified
** Parameters: * outline to a feature described by its x,y position, length
** Outline outline to extract outline-features from * and angle.
** FeatureSet set of features to add outline-features to * @param Outline outline to extract outline-features from
** Globals: none * @param FeatureSet set of features to add outline-features to
** Operation: * @return none (results are returned in FeatureSet)
** This routine steps converts each section in the specified * @note Globals: none
** outline to a feature described by its x,y position, length * @note Exceptions: none
** and angle. * @note History:
** Return: none (results are returned in FeatureSet) * - 11/13/90, DSJ, Created.
** Exceptions: none * - 5/24/91, DSJ, Added hidden edge capability.
** History: 11/13/90, DSJ, Created.
** 5/24/91, DSJ, Added hidden edge capability.
*/ */
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
MFOUTLINE Next; MFOUTLINE Next;
MFOUTLINE First; MFOUTLINE First;
FPOINT FeatureStart; FPOINT FeatureStart;
@ -152,19 +149,18 @@ void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void NormalizeOutlineX(FEATURE_SET FeatureSet) { /**
/* * This routine computes the weighted average x position
** Parameters: * over all of the outline-features in FeatureSet and then
** FeatureSet outline-features to be normalized * renormalizes the outline-features to force this average
** Globals: none * to be the x origin (i.e. x=0).
** Operation: This routine computes the weighted average x position * @param FeatureSet outline-features to be normalized
** over all of the outline-features in FeatureSet and then * @return none (FeatureSet is changed)
** renormalizes the outline-features to force this average * @note Globals: none
** to be the x origin (i.e. x=0). * @note Exceptions: none
** Return: none (FeatureSet is changed) * @note History: 11/13/90, DSJ, Created.
** Exceptions: none
** History: 11/13/90, DSJ, Created.
*/ */
void NormalizeOutlineX(FEATURE_SET FeatureSet) {
int i; int i;
FEATURE Feature; FEATURE Feature;
FLOAT32 Length; FLOAT32 Length;

View File

@ -15,9 +15,9 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "picofeat.h" #include "picofeat.h"
#include "classify.h" #include "classify.h"
@ -49,23 +49,22 @@ void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
void NormalizePicoX(FEATURE_SET FeatureSet); void NormalizePicoX(FEATURE_SET FeatureSet);
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
namespace tesseract { namespace tesseract {
FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { /**
/* * Operation: Dummy for now.
** Parameters: *
** Blob blob to extract pico-features from * Globals:
** LineStats statistics on text row blob is in * - classify_norm_method normalization method currently specified
** Globals: * @param Blob blob to extract pico-features from
** classify_norm_method normalization method currently specified * @return Pico-features for Blob.
** Operation: Dummy for now. * @note Exceptions: none
** Return: Pico-features for Blob. * @note History: 9/4/90, DSJ, Created.
** Exceptions: none
** History: 9/4/90, DSJ, Created.
*/ */
FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) {
LIST Outlines; LIST Outlines;
LIST RemainingOutlines; LIST RemainingOutlines;
MFOUTLINE Outline; MFOUTLINE Outline;
@ -88,29 +87,28 @@ FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) {
} /* ExtractPicoFeatures */ } /* ExtractPicoFeatures */
} // namespace tesseract } // namespace tesseract
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Private Code Private Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/**
* This routine converts an entire segment of an outline
* into a set of pico features which are added to
* FeatureSet. The length of the segment is rounded to the
* nearest whole number of pico-features. The pico-features
* are spaced evenly over the entire segment.
* Globals:
* - classify_pico_feature_length length of a single pico-feature
* @param Start starting point of pico-feature
* @param End ending point of pico-feature
* @param FeatureSet set to add pico-feature to
* @return none (results are placed in FeatureSet)
* @note Exceptions: none
* @note History: Tue Apr 30 15:44:34 1991, DSJ, Created.
*/
void ConvertSegmentToPicoFeat(FPOINT *Start, void ConvertSegmentToPicoFeat(FPOINT *Start,
FPOINT *End, FPOINT *End,
FEATURE_SET FeatureSet) { FEATURE_SET FeatureSet) {
/*
** Parameters:
** Start starting point of pico-feature
** End ending point of pico-feature
** FeatureSet set to add pico-feature to
** Globals:
** classify_pico_feature_length length of a single pico-feature
** Operation: This routine converts an entire segment of an outline
** into a set of pico features which are added to
** FeatureSet. The length of the segment is rounded to the
** nearest whole number of pico-features. The pico-features
** are spaced evenly over the entire segment.
** Return: none (results are placed in FeatureSet)
** Exceptions: none
** History: Tue Apr 30 15:44:34 1991, DSJ, Created.
*/
FEATURE Feature; FEATURE Feature;
FLOAT32 Angle; FLOAT32 Angle;
FLOAT32 Length; FLOAT32 Length;
@ -148,23 +146,21 @@ void ConvertSegmentToPicoFeat(FPOINT *Start,
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) { /**
/* * This routine steps thru the specified outline and cuts it
** Parameters: * up into pieces of equal length. These pieces become the
** Outline outline to extract micro-features from * desired pico-features. Each segment in the outline
** FeatureSet set of features to add pico-features to * is converted into an integral number of pico-features.
** Globals: *
** classify_pico_feature_length * Globals:
** length of features to be extracted * - classify_pico_feature_length length of features to be extracted
** Operation: * @param Outline outline to extract micro-features from
** This routine steps thru the specified outline and cuts it * @param FeatureSet set of features to add pico-features to
** up into pieces of equal length. These pieces become the * @return none (results are returned in FeatureSet)
** desired pico-features. Each segment in the outline * @note Exceptions: none
** is converted into an integral number of pico-features. * @note History: 4/30/91, DSJ, Adapted from ConvertToPicoFeatures().
** Return: none (results are returned in FeatureSet)
** Exceptions: none
** History: 4/30/91, DSJ, Adapted from ConvertToPicoFeatures().
*/ */
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
MFOUTLINE Next; MFOUTLINE Next;
MFOUTLINE First; MFOUTLINE First;
MFOUTLINE Current; MFOUTLINE Current;
@ -194,19 +190,18 @@ void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void NormalizePicoX(FEATURE_SET FeatureSet) { /**
/* * This routine computes the average x position over all
** Parameters: * of the pico-features in FeatureSet and then renormalizes
** FeatureSet pico-features to be normalized * the pico-features to force this average to be the x origin
** Globals: none * (i.e. x=0).
** Operation: This routine computes the average x position over all * @param FeatureSet pico-features to be normalized
** of the pico-features in FeatureSet and then renormalizes * @return none (FeatureSet is changed)
** the pico-features to force this average to be the x origin * @note Globals: none
** (i.e. x=0). * @note Exceptions: none
** Return: none (FeatureSet is changed) * @note History: Tue Sep 4 16:50:08 1990, DSJ, Created.
** Exceptions: none
** History: Tue Sep 4 16:50:08 1990, DSJ, Created.
*/ */
void NormalizePicoX(FEATURE_SET FeatureSet) {
int i; int i;
FEATURE Feature; FEATURE Feature;
FLOAT32 Origin = 0.0; FLOAT32 Origin = 0.0;
@ -225,16 +220,15 @@ void NormalizePicoX(FEATURE_SET FeatureSet) {
namespace tesseract { namespace tesseract {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/**
* @param blob blob to extract features from
* @param fx_info
* @return Integer character-normalized features for blob.
* @note Exceptions: none
* @note History: 8/8/2011, rays, Created.
*/
FEATURE_SET Classify::ExtractIntCNFeatures( FEATURE_SET Classify::ExtractIntCNFeatures(
const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) { const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
/*
** Parameters:
** blob blob to extract features from
** denorm normalization/denormalization parameters.
** Return: Integer character-normalized features for blob.
** Exceptions: none
** History: 8/8/2011, rays, Created.
*/
INT_FX_RESULT_STRUCT local_fx_info(fx_info); INT_FX_RESULT_STRUCT local_fx_info(fx_info);
GenericVector<INT_FEATURE_STRUCT> bl_features; GenericVector<INT_FEATURE_STRUCT> bl_features;
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
@ -258,16 +252,15 @@ FEATURE_SET Classify::ExtractIntCNFeatures(
} /* ExtractIntCNFeatures */ } /* ExtractIntCNFeatures */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/**
* @param blob blob to extract features from
* @param fx_info
* @return Geometric (top/bottom/width) features for blob.
* @note Exceptions: none
* @note History: 8/8/2011, rays, Created.
*/
FEATURE_SET Classify::ExtractIntGeoFeatures( FEATURE_SET Classify::ExtractIntGeoFeatures(
const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) { const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
/*
** Parameters:
** blob blob to extract features from
** denorm normalization/denormalization parameters.
** Return: Geometric (top/bottom/width) features for blob.
** Exceptions: none
** History: 8/8/2011, rays, Created.
*/
INT_FX_RESULT_STRUCT local_fx_info(fx_info); INT_FX_RESULT_STRUCT local_fx_info(fx_info);
GenericVector<INT_FEATURE_STRUCT> bl_features; GenericVector<INT_FEATURE_STRUCT> bl_features;
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(

View File

@ -61,22 +61,28 @@ ConvNetCharClassifier::~ConvNetCharClassifier() {
} }
} }
// The main training function. Given a sample and a class ID the classifier /**
// updates its parameters according to its learning algorithm. This function * The main training function. Given a sample and a class ID the classifier
// is currently not implemented. TODO(ahmadab): implement end-2-end training * updates its parameters according to its learning algorithm. This function
* is currently not implemented. TODO(ahmadab): implement end-2-end training
*/
bool ConvNetCharClassifier::Train(CharSamp *char_samp, int ClassID) { bool ConvNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
return false; return false;
} }
// A secondary function needed for training. Allows the trainer to set the /**
// value of any train-time paramter. This function is currently not * A secondary function needed for training. Allows the trainer to set the
// implemented. TODO(ahmadab): implement end-2-end training * value of any train-time paramter. This function is currently not
* implemented. TODO(ahmadab): implement end-2-end training
*/
bool ConvNetCharClassifier::SetLearnParam(char *var_name, float val) { bool ConvNetCharClassifier::SetLearnParam(char *var_name, float val) {
// TODO(ahmadab): implementation of parameter initializing. // TODO(ahmadab): implementation of parameter initializing.
return false; return false;
} }
// Folds the output of the NeuralNet using the loaded folding sets /**
* Folds the output of the NeuralNet using the loaded folding sets
*/
void ConvNetCharClassifier::Fold() { void ConvNetCharClassifier::Fold() {
// in case insensitive mode // in case insensitive mode
if (case_sensitive_ == false) { if (case_sensitive_ == false) {
@ -125,8 +131,10 @@ void ConvNetCharClassifier::Fold() {
} }
} }
// Compute the features of specified charsamp and feedforward the /**
// specified nets * Compute the features of specified charsamp and feedforward the
* specified nets
*/
bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) { bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) {
if (char_net_ == NULL) { if (char_net_ == NULL) {
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): " fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
@ -173,7 +181,9 @@ bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) {
return true; return true;
} }
// return the cost of being a char /**
* return the cost of being a char
*/
int ConvNetCharClassifier::CharCost(CharSamp *char_samp) { int ConvNetCharClassifier::CharCost(CharSamp *char_samp) {
if (RunNets(char_samp) == false) { if (RunNets(char_samp) == false) {
return 0; return 0;
@ -181,8 +191,10 @@ int ConvNetCharClassifier::CharCost(CharSamp *char_samp) {
return CubeUtils::Prob2Cost(1.0f - net_output_[0]); return CubeUtils::Prob2Cost(1.0f - net_output_[0]);
} }
// classifies a charsamp and returns an alternate list /**
// of chars sorted by char costs * classifies a charsamp and returns an alternate list
* of chars sorted by char costs
*/
CharAltList *ConvNetCharClassifier::Classify(CharSamp *char_samp) { CharAltList *ConvNetCharClassifier::Classify(CharSamp *char_samp) {
// run the needed nets // run the needed nets
if (RunNets(char_samp) == false) { if (RunNets(char_samp) == false) {
@ -207,7 +219,9 @@ CharAltList *ConvNetCharClassifier::Classify(CharSamp *char_samp) {
return alt_list; return alt_list;
} }
// Set an external net (for training purposes) /**
* Set an external net (for training purposes)
*/
void ConvNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) { void ConvNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) {
if (char_net_ != NULL) { if (char_net_ != NULL) {
delete char_net_; delete char_net_;
@ -216,8 +230,10 @@ void ConvNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) {
char_net_ = char_net; char_net_ = char_net;
} }
// This function will return true if the file does not exist. /**
// But will fail if the it did not pass the sanity checks * This function will return true if the file does not exist.
* But will fail if the it did not pass the sanity checks
*/
bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path, bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path,
const string &lang, const string &lang,
LangModel *lang_mod) { LangModel *lang_mod) {
@ -284,7 +300,9 @@ bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path,
return true; return true;
} }
// Init the classifier provided a data-path and a language string /**
* Init the classifier provided a data-path and a language string
*/
bool ConvNetCharClassifier::Init(const string &data_file_path, bool ConvNetCharClassifier::Init(const string &data_file_path,
const string &lang, const string &lang,
LangModel *lang_mod) { LangModel *lang_mod) {
@ -308,9 +326,11 @@ bool ConvNetCharClassifier::Init(const string &data_file_path,
return true; return true;
} }
// Load the classifier's Neural Nets /**
// This function will return true if the net file does not exist. * Load the classifier's Neural Nets
// But will fail if the net did not pass the sanity checks * This function will return true if the net file does not exist.
* But will fail if the net did not pass the sanity checks
*/
bool ConvNetCharClassifier::LoadNets(const string &data_file_path, bool ConvNetCharClassifier::LoadNets(const string &data_file_path,
const string &lang) { const string &lang) {
string char_net_file; string char_net_file;

View File

@ -99,10 +99,12 @@ CubeObject::~CubeObject() {
Cleanup(); Cleanup();
} }
// Actually do the recognition using the specified language mode. If none /**
// is specified, the default language model in the CubeRecoContext is used. * Actually do the recognition using the specified language mode. If none
// Returns the sorted list of alternate answers * is specified, the default language model in the CubeRecoContext is used.
// The Word mode determines whether recognition is done as a word or a phrase * @return the sorted list of alternate answers
* @param word_mode determines whether recognition is done as a word or a phrase
*/
WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) { WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
if (char_samp_ == NULL) { if (char_samp_ == NULL) {
return NULL; return NULL;
@ -197,18 +199,24 @@ WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
return alt_list_; return alt_list_;
} }
// Recognize the member char sample as a word /**
* Recognize the member char sample as a word
*/
WordAltList *CubeObject::RecognizeWord(LangModel *lang_mod) { WordAltList *CubeObject::RecognizeWord(LangModel *lang_mod) {
return Recognize(lang_mod, true); return Recognize(lang_mod, true);
} }
// Recognize the member char sample as a word /**
* Recognize the member char sample as a phrase
*/
WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) { WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) {
return Recognize(lang_mod, false); return Recognize(lang_mod, false);
} }
// Computes the cost of a specific string. This is done by performing /**
// recognition of a language model that allows only the specified word * Computes the cost of a specific string. This is done by performing
* recognition of a language model that allows only the specified word
*/
int CubeObject::WordCost(const char *str) { int CubeObject::WordCost(const char *str) {
WordListLangModel *lang_mod = new WordListLangModel(cntxt_); WordListLangModel *lang_mod = new WordListLangModel(cntxt_);
if (lang_mod == NULL) { if (lang_mod == NULL) {

View File

@ -31,7 +31,9 @@ CubeUtils::CubeUtils() {
CubeUtils::~CubeUtils() { CubeUtils::~CubeUtils() {
} }
// convert a prob to a cost (-ve log prob) /**
* convert a prob to a cost (-ve log prob)
*/
int CubeUtils::Prob2Cost(double prob_val) { int CubeUtils::Prob2Cost(double prob_val) {
if (prob_val < MIN_PROB) { if (prob_val < MIN_PROB) {
return MIN_PROB_COST; return MIN_PROB_COST;
@ -39,12 +41,16 @@ int CubeUtils::Prob2Cost(double prob_val) {
return static_cast<int>(-log(prob_val) * PROB2COST_SCALE); return static_cast<int>(-log(prob_val) * PROB2COST_SCALE);
} }
// converts a cost to probability /**
* converts a cost to probability
*/
double CubeUtils::Cost2Prob(int cost) { double CubeUtils::Cost2Prob(int cost) {
return exp(-cost / PROB2COST_SCALE); return exp(-cost / PROB2COST_SCALE);
} }
// computes the length of a NULL terminated char_32 string /**
* computes the length of a NULL terminated char_32 string
*/
int CubeUtils::StrLen(const char_32 *char_32_ptr) { int CubeUtils::StrLen(const char_32 *char_32_ptr) {
if (char_32_ptr == NULL) { if (char_32_ptr == NULL) {
return 0; return 0;
@ -54,7 +60,9 @@ int CubeUtils::StrLen(const char_32 *char_32_ptr) {
return len; return len;
} }
// compares two char_32 strings /**
* compares two char_32 strings
*/
int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) { int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
const char_32 *pch1 = str1; const char_32 *pch1 = str1;
const char_32 *pch2 = str2; const char_32 *pch2 = str2;
@ -76,7 +84,9 @@ int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
} }
} }
// Duplicates a 32-bit char buffer /**
* Duplicates a 32-bit char buffer
*/
char_32 *CubeUtils::StrDup(const char_32 *str32) { char_32 *CubeUtils::StrDup(const char_32 *str32) {
int len = StrLen(str32); int len = StrLen(str32);
char_32 *new_str = new char_32[len + 1]; char_32 *new_str = new char_32[len + 1];
@ -88,7 +98,9 @@ char_32 *CubeUtils::StrDup(const char_32 *str32) {
return new_str; return new_str;
} }
// creates a char samp from a specified portion of the image /**
* creates a char samp from a specified portion of the image
*/
CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top, CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top,
int wid, int hgt) { int wid, int hgt) {
// get the raw img data from the image // get the raw img data from the image
@ -105,7 +117,9 @@ CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top,
return char_samp; return char_samp;
} }
// create a B/W image from a char_sample /**
* create a B/W image from a char_sample
*/
Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) { Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) {
// parameter check // parameter check
if (char_samp == NULL) { if (char_samp == NULL) {
@ -137,7 +151,9 @@ Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) {
return pix; return pix;
} }
// creates a raw buffer from the specified location of the pix /**
* creates a raw buffer from the specified location of the pix
*/
unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top, unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
int wid, int hgt) { int wid, int hgt) {
// skip invalid dimensions // skip invalid dimensions
@ -173,7 +189,9 @@ unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
return temp_buff; return temp_buff;
} }
// read file contents to a string /**
* read file contents to a string
*/
bool CubeUtils::ReadFileToString(const string &file_name, string *str) { bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
str->clear(); str->clear();
FILE *fp = fopen(file_name.c_str(), "rb"); FILE *fp = fopen(file_name.c_str(), "rb");
@ -206,7 +224,9 @@ bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
return (read_bytes == file_size); return (read_bytes == file_size);
} }
// splits a string into vectors based on specified delimiters /**
* splits a string into vectors based on specified delimiters
*/
void CubeUtils::SplitStringUsing(const string &str, void CubeUtils::SplitStringUsing(const string &str,
const string &delims, const string &delims,
vector<string> *str_vec) { vector<string> *str_vec) {
@ -240,7 +260,9 @@ void CubeUtils::SplitStringUsing(const string &str,
} }
} }
// UTF-8 to UTF-32 convesion functions /**
* UTF-8 to UTF-32 conversion functions
*/
void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) { void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) {
str32->clear(); str32->clear();
int len = strlen(utf8_str); int len = strlen(utf8_str);
@ -254,7 +276,9 @@ void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) {
} }
} }
// UTF-8 to UTF-32 convesion functions /**
* UTF-8 to UTF-32 conversion functions
*/
void CubeUtils::UTF32ToUTF8(const char_32 *utf32_str, string *str) { void CubeUtils::UTF32ToUTF8(const char_32 *utf32_str, string *str) {
str->clear(); str->clear();
for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) { for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) {

View File

@ -37,7 +37,9 @@ TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, int class_id) {
path_cost_ = Cost(); path_cost_ = Cost();
} }
// leading, trailing punc constructor and single byte UTF char /**
* leading, trailing punc constructor and single byte UTF char
*/
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt,
const Dawg *dawg, EDGE_REF edge_idx, int class_id) { const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
root_ = false; root_ = false;
@ -51,7 +53,9 @@ TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt,
path_cost_ = Cost(); path_cost_ = Cost();
} }
// dict constructor: multi byte UTF char /**
* dict constructor: multi byte UTF char
*/
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg, TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg,
EDGE_REF start_edge_idx, EDGE_REF end_edge_idx, EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
int class_id) { int class_id) {

View File

@ -37,7 +37,9 @@ WordAltList::~WordAltList() {
} }
} }
// insert an alternate word with the specified cost and tag /**
* insert an alternate word with the specified cost and tag
*/
bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) { bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
if (word_alt_ == NULL || alt_cost_ == NULL) { if (word_alt_ == NULL || alt_cost_ == NULL) {
word_alt_ = new char_32*[max_alt_]; word_alt_ = new char_32*[max_alt_];
@ -84,7 +86,9 @@ bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
return true; return true;
} }
// sort the alternate in descending order based on the cost /**
* sort the alternate in descending order based on the cost
*/
void WordAltList::Sort() { void WordAltList::Sort() {
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) { for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) { for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {

View File

@ -50,8 +50,10 @@ WordUnigrams::~WordUnigrams() {
} }
} }
// Load the word-list and unigrams from file and create an object /**
// The word list is assumed to be sorted in lexicographic order. * Load the word-list and unigrams from file and create an object
* The word list is assumed to be sorted in lexicographic order.
*/
WordUnigrams *WordUnigrams::Create(const string &data_file_path, WordUnigrams *WordUnigrams::Create(const string &data_file_path,
const string &lang) { const string &lang) {
string file_name; string file_name;
@ -143,10 +145,12 @@ WordUnigrams *WordUnigrams::Create(const string &data_file_path,
return word_unigrams_obj; return word_unigrams_obj;
} }
// Split input into space-separated tokens, strip trailing punctuation /**
// from each, determine case properties, call UTF-8 flavor of cost * Split input into space-separated tokens, strip trailing punctuation
// function on each word, and aggregate all into single mean word * from each, determine case properties, call UTF-8 flavor of cost
// cost. * function on each word, and aggregate all into single mean word
* cost.
*/
int WordUnigrams::Cost(const char_32 *key_str32, int WordUnigrams::Cost(const char_32 *key_str32,
LangModel *lang_mod, LangModel *lang_mod,
CharSet *char_set) const { CharSet *char_set) const {
@ -239,7 +243,9 @@ int WordUnigrams::Cost(const char_32 *key_str32,
return static_cast<int>(cost / static_cast<double>(words.size())); return static_cast<int>(cost / static_cast<double>(words.size()));
} }
// Search for UTF-8 string using binary search of sorted words_ array. /**
* Search for UTF-8 string using binary search of sorted words_ array.
*/
int WordUnigrams::CostInternal(const char *key_str) const { int WordUnigrams::CostInternal(const char *key_str) const {
if (strlen(key_str) == 0) if (strlen(key_str) == 0)
return not_in_list_cost_; return not_in_list_cost_;

View File

@ -15,9 +15,9 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "host.h" #include "host.h"
#include "danerror.h" #include "danerror.h"
#include "tprintf.h" #include "tprintf.h"
@ -28,27 +28,18 @@
#include <stdio.h> #include <stdio.h>
/*---------------------------------------------------------------------------*/ /**
void DoError(int Error, const char *Message) { * This routine prints the specified error message to stderr.
/* * It then jumps to the current error trap. If the error trap
** Parameters: * stack is empty, the calling program is terminated with a
** Error error number which is to be trapped * fatal error message.
** Message pointer to a string to be printed as an error message *
** Globals: * @param Error error number which is to be trapped
** ErrorTrapStack stack of error traps * @param Message pointer to a string to be printed as an error message
** CurrentTrapDepth number of traps on the stack * @return None - this routine does not return.
** Operation: * @note History: 4/3/89, DSJ, Created.
** This routine prints the specified error message to stderr.
** It then jumps to the current error trap. If the error trap
** stack is empty, the calling program is terminated with a
** fatal error message.
** Return:
** None - this routine does not return.
** Exceptions:
** Empty error trap stack terminates the calling program.
** History:
** 4/3/89, DSJ, Created.
*/ */
void DoError(int Error, const char *Message) {
if (Message != NULL) { if (Message != NULL) {
tprintf("\nError: %s!\n", Message); tprintf("\nError: %s!\n", Message);
} }

View File

@ -15,9 +15,9 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "efio.h" #include "efio.h"
#include "danerror.h" #include "danerror.h"
#include <stdio.h> #include <stdio.h>
@ -25,29 +25,22 @@
#define MAXERRORMESSAGE 256 #define MAXERRORMESSAGE 256
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /**
FILE *Efopen(const char *Name, const char *Mode) { * This routine attempts to open the specified file in the
/* * specified mode. If the file can be opened, a pointer to
** Parameters: * the open file is returned. If the file cannot be opened,
** Name name of file to be opened * an error is trapped.
** Mode mode to be used to open file * @param Name name of file to be opened
** Globals: * @param Mode mode to be used to open file
** None * @return Pointer to open file.
** Operation: * @note Globals: None
** This routine attempts to open the specified file in the * @note Exceptions: #FOPENERROR unable to open specified file
** specified mode. If the file can be opened, a pointer to * @note History: 5/21/89, DSJ, Created.
** the open file is returned. If the file cannot be opened,
** an error is trapped.
** Return:
** Pointer to open file.
** Exceptions:
** FOPENERROR unable to open specified file
** History:
** 5/21/89, DSJ, Created.
*/ */
FILE *Efopen(const char *Name, const char *Mode) {
FILE *File; FILE *File;
char ErrorMessage[MAXERRORMESSAGE]; char ErrorMessage[MAXERRORMESSAGE];

View File

@ -1,5 +1,5 @@
/****************************************************************************** /**************************************************************************
** Filename: * Filename:
emalloc.c emalloc.c
** Purpose: ** Purpose:
Routines for trapping memory allocation errors. Routines for trapping memory allocation errors.
@ -21,36 +21,30 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "emalloc.h" #include "emalloc.h"
#include "danerror.h" #include "danerror.h"
#include <stdlib.h> #include <stdlib.h>
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /**
void *Emalloc(int Size) { * This routine attempts to allocate the specified number of
/* * bytes. If the memory can be allocated, a pointer to the
** Parameters: * memory is returned. If the memory cannot be allocated, or
** Size * if the allocation request is negative or zero,
number of bytes of memory to be allocated * an error is trapped.
** Globals: none * @param Size number of bytes of memory to be allocated
** Operation: * @return Pointer to allocated memory.
** This routine attempts to allocate the specified number of * @note Exceptions:
** bytes. If the memory can be allocated, a pointer to the * - #NOTENOUGHMEMORY unable to allocate Size bytes
** memory is returned. If the memory cannot be allocated, or * - #ILLEGALMALLOCREQUEST negative or zero request size
** if the allocation request is negative or zero, * @note History: 4/3/89, DSJ, Created.
** an error is trapped.
** Return: Pointer to allocated memory.
** Exceptions: NOTENOUGHMEMORY
unable to allocate Size bytes
** ILLEGALMALLOCREQUEST
negative or zero request size
** History: 4/3/89, DSJ, Created.
*/ */
void *Emalloc(int Size) {
void *Buffer; void *Buffer;
if (Size <= 0) if (Size <= 0)

View File

@ -42,9 +42,9 @@
#endif #endif
using tesseract::ScriptPos; using tesseract::ScriptPos;
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Private Code Private Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
namespace tesseract { namespace tesseract {

View File

@ -18,9 +18,9 @@
#ifndef STOPPER_H #ifndef STOPPER_H
#define STOPPER_H #define STOPPER_H
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "genericvector.h" #include "genericvector.h"
#include "params.h" #include "params.h"

View File

@ -58,7 +58,7 @@ PROJECT_LOGO =
# entered, it will be relative to the location where doxygen was started. If # entered, it will be relative to the location where doxygen was started. If
# left blank the current directory will be used. # left blank the current directory will be used.
OUTPUT_DIRECTORY = $(builddir)/doc/ OUTPUT_DIRECTORY = doc/
# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub-
# directories (in 2 levels) under the output directory of each output format and # directories (in 2 levels) under the output directory of each output format and

View File

@ -21,9 +21,9 @@
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#include "oldlist.h" #include "oldlist.h"
#include "efio.h" #include "efio.h"
#include "emalloc.h" #include "emalloc.h"
@ -42,16 +42,16 @@
DECLARE_STRING_PARAM_FLAG(D); DECLARE_STRING_PARAM_FLAG(D);
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Function Prototypes Public Function Prototypes
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
int main ( int main (
int argc, int argc,
char **argv); char **argv);
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Private Function Prototypes Private Function Prototypes
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
void WriteNormProtos ( void WriteNormProtos (
const char *Directory, const char *Directory,
@ -71,9 +71,9 @@ void WriteProtos(
BOOL8 WriteSigProtos, BOOL8 WriteSigProtos,
BOOL8 WriteInsigProtos); BOOL8 WriteInsigProtos);
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Global Data Definitions and Declarations Global Data Definitions and Declarations
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/* global variable to hold configuration parameters to control clustering */ /* global variable to hold configuration parameters to control clustering */
//-M 0.025 -B 0.05 -I 0.8 -C 1e-3 //-M 0.025 -B 0.05 -I 0.8 -C 1e-3
CLUSTERCONFIG CNConfig = CLUSTERCONFIG CNConfig =
@ -82,63 +82,59 @@ CLUSTERCONFIG CNConfig =
}; };
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
int main(int argc, char* argv[]) /**
* This program reads in a text file consisting of feature
/* * samples from a training page in the following format:
** Parameters: * @verbatim
** argc number of command line arguments FontName CharName NumberOfFeatureTypes(N)
** argv array of command line arguments FeatureTypeName1 NumberOfFeatures(M)
** Globals: none Feature1
** Operation: ...
** This program reads in a text file consisting of feature FeatureM
** samples from a training page in the following format: FeatureTypeName2 NumberOfFeatures(M)
** Feature1
** FontName CharName NumberOfFeatureTypes(N) ...
** FeatureTypeName1 NumberOfFeatures(M) FeatureM
** Feature1 ...
** ... FeatureTypeNameN NumberOfFeatures(M)
** FeatureM Feature1
** FeatureTypeName2 NumberOfFeatures(M) ...
** Feature1 FeatureM
** ... FontName CharName ...
** FeatureM @endverbatim
** ... * It then appends these samples into a separate file for each
** FeatureTypeNameN NumberOfFeatures(M) * character. The name of the file is
** Feature1 *
** ... * DirectoryName/FontName/CharName.FeatureTypeName
** FeatureM *
** FontName CharName ... * The DirectoryName can be specified via a command
** * line argument. If not specified, it defaults to the
** It then appends these samples into a separate file for each * current directory. The format of the resulting files is:
** character. The name of the file is * @verbatim
** NumberOfFeatures(M)
** DirectoryName/FontName/CharName.FeatureTypeName Feature1
** ...
** The DirectoryName can be specified via a command FeatureM
** line argument. If not specified, it defaults to the NumberOfFeatures(M)
** current directory. The format of the resulting files is: ...
** @endverbatim
** NumberOfFeatures(M) * The output files each have a header which describes the
** Feature1 * type of feature which the file contains. This header is
** ... * in the format required by the clusterer. A command line
** FeatureM * argument can also be used to specify that only the first
** NumberOfFeatures(M) * N samples of each class should be used.
** ... * @param argc number of command line arguments
** * @param argv array of command line arguments
** The output files each have a header which describes the * @return none
** type of feature which the file contains. This header is * @note Globals: none
** in the format required by the clusterer. A command line * @note Exceptions: none
** argument can also be used to specify that only the first * @note History: Fri Aug 18 08:56:17 1989, DSJ, Created.
** N samples of each class should be used.
** Return: none
** Exceptions: none
** History: Fri Aug 18 08:56:17 1989, DSJ, Created.
*/ */
int main(int argc, char* argv[])
{ {
// Set the global Config parameters before parsing the command line. // Set the global Config parameters before parsing the command line.
Config = CNConfig; Config = CNConfig;
@ -207,28 +203,26 @@ int main(int argc, char* argv[])
} // main } // main
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Private Code Private Code
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
/**
* This routine writes the specified samples into files which
* are organized according to the font name and character name
* of the samples.
* @param Directory directory to place sample files into
* @param LabeledProtoList List of labeled protos
* @param Clusterer The CLUSTERER to use
* @return none
* @note Exceptions: none
* @note History: Fri Aug 18 16:17:06 1989, DSJ, Created.
*/
void WriteNormProtos ( void WriteNormProtos (
const char *Directory, const char *Directory,
LIST LabeledProtoList, LIST LabeledProtoList,
CLUSTERER *Clusterer) CLUSTERER *Clusterer)
/*
** Parameters:
** Directory directory to place sample files into
** Operation:
** This routine writes the specified samples into files which
** are organized according to the font name and character name
** of the samples.
** Return: none
** Exceptions: none
** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
*/
{ {
FILE *File; FILE *File;
STRING Filename; STRING Filename;

View File

@ -73,18 +73,17 @@ DOUBLE_PARAM_FLAG(clusterconfig_independence, Config.Independence,
DOUBLE_PARAM_FLAG(clusterconfig_confidence, Config.Confidence, DOUBLE_PARAM_FLAG(clusterconfig_confidence, Config.Confidence,
"Desired confidence in prototypes created"); "Desired confidence in prototypes created");
/* /**
** Parameters: * This routine parses the command line arguments that were
** argc number of command line arguments to parse * passed to the program and ses them to set relevant
** argv command line arguments * training-related global parameters
** Globals: *
** Config current clustering parameters * Globals:
** Operation: * - Config current clustering parameters
** This routine parses the command line arguments that were * @param argc number of command line arguments to parse
** passed to the program and ses them to set relevant * @param argv command line arguments
** training-related global parameters * @return none
** Return: none * @note Exceptions: Illegal options terminate the program.
** Exceptions: Illegal options terminate the program.
*/ */
void ParseArguments(int* argc, char ***argv) { void ParseArguments(int* argc, char ***argv) {
STRING usage; STRING usage;
@ -158,19 +157,21 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table) {
} }
} }
// Creates a MasterTraininer and loads the training data into it: /**
// Initializes feature_defs and IntegerFX. * Creates a MasterTraininer and loads the training data into it:
// Loads the shape_table if shape_table != NULL. * Initializes feature_defs and IntegerFX.
// Loads initial unicharset from -U command-line option. * Loads the shape_table if shape_table != NULL.
// If FLAGS_T is set, loads the majority of data from there, else: * Loads initial unicharset from -U command-line option.
// Loads font info from -F option. * If FLAGS_T is set, loads the majority of data from there, else:
// Loads xheights from -X option. * - Loads font info from -F option.
// Loads samples from .tr files in remaining command-line args. * - Loads xheights from -X option.
// Deletes outliers and computes canonical samples. * - Loads samples from .tr files in remaining command-line args.
// If FLAGS_output_trainer is set, saves the trainer for future use. * - Deletes outliers and computes canonical samples.
// Computes canonical and cloud features. * - If FLAGS_output_trainer is set, saves the trainer for future use.
// If shape_table is not NULL, but failed to load, make a fake flat one, * Computes canonical and cloud features.
// as shape clustering was not run. * If shape_table is not NULL, but failed to load, make a fake flat one,
* as shape clustering was not run.
*/
MasterTrainer* LoadTrainingData(int argc, const char* const * argv, MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
bool replication, bool replication,
ShapeTable** shape_table, ShapeTable** shape_table,
@ -294,20 +295,19 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
} // namespace tesseract. } // namespace tesseract.
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
const char *GetNextFilename(int argc, const char* const * argv) { /**
/* * This routine returns the next command line argument. If
** Parameters: none * there are no remaining command line arguments, it returns
** Globals: * NULL. This routine should only be called after all option
** tessoptind defined by tessopt sys call * arguments have been parsed and removed with ParseArguments.
** Operation: *
** This routine returns the next command line argument. If * Globals:
** there are no remaining command line arguments, it returns * - tessoptind defined by tessopt sys call
** NULL. This routine should only be called after all option * @return Next command line argument or NULL.
** arguments have been parsed and removed with ParseArguments. * @note Exceptions: none
** Return: Next command line argument or NULL. * @note History: Fri Aug 18 09:34:12 1989, DSJ, Created.
** Exceptions: none
** History: Fri Aug 18 09:34:12 1989, DSJ, Created.
*/ */
const char *GetNextFilename(int argc, const char* const * argv) {
if (tessoptind < argc) if (tessoptind < argc)
return argv[tessoptind++]; return argv[tessoptind++];
else else
@ -317,24 +317,20 @@ const char *GetNextFilename(int argc, const char* const * argv) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/**
* This routine searches thru a list of labeled lists to find
* a list with the specified label. If a matching labeled list
* cannot be found, NULL is returned.
* @param List list to search
* @param Label label to search for
* @return Labeled list with the specified Label or NULL.
* @note Globals: none
* @note Exceptions: none
* @note History: Fri Aug 18 15:57:41 1989, DSJ, Created.
*/
LABELEDLIST FindList ( LABELEDLIST FindList (
LIST List, LIST List,
char *Label) char *Label)
/*
** Parameters:
** List list to search
** Label label to search for
** Globals: none
** Operation:
** This routine searches thru a list of labeled lists to find
** a list with the specified label. If a matching labeled list
** cannot be found, NULL is returned.
** Return: Labeled list with the specified Label or NULL.
** Exceptions: none
** History: Fri Aug 18 15:57:41 1989, DSJ, Created.
*/
{ {
LABELEDLIST LabeledList; LABELEDLIST LabeledList;
@ -349,21 +345,17 @@ LABELEDLIST FindList (
} /* FindList */ } /* FindList */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/**
* This routine allocates a new, empty labeled list and gives
* it the specified label.
* @param Label label for new list
* @return New, empty labeled list.
* @note Globals: none
* @note Exceptions: none
* @note History: Fri Aug 18 16:08:46 1989, DSJ, Created.
*/
LABELEDLIST NewLabeledList ( LABELEDLIST NewLabeledList (
const char *Label) const char *Label)
/*
** Parameters:
** Label label for new list
** Globals: none
** Operation:
** This routine allocates a new, empty labeled list and gives
** it the specified label.
** Return: New, empty labeled list.
** Exceptions: none
** History: Fri Aug 18 16:08:46 1989, DSJ, Created.
*/
{ {
LABELEDLIST LabeledList; LABELEDLIST LabeledList;
@ -380,25 +372,29 @@ LABELEDLIST NewLabeledList (
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
// TODO(rays) This is now used only by cntraining. Convert cntraining to use // TODO(rays) This is now used only by cntraining. Convert cntraining to use
// the new method or get rid of it entirely. // the new method or get rid of it entirely.
/**
* This routine reads training samples from a file and
* places them into a data structure which organizes the
* samples by FontName and CharName. It then returns this
* data structure.
* @param file open text file to read samples from
* @param feature_defs
* @param feature_name
* @param max_samples
* @param unicharset
* @param training_samples
* @return none
* @note Globals: none
* @note Exceptions: none
* @note History:
* - Fri Aug 18 13:11:39 1989, DSJ, Created.
* - Tue May 17 1998 simplifications to structure, illiminated
* font, and feature specification levels of structure.
*/
void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs, void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
const char *feature_name, int max_samples, const char *feature_name, int max_samples,
UNICHARSET* unicharset, UNICHARSET* unicharset,
FILE* file, LIST* training_samples) { FILE* file, LIST* training_samples) {
/*
** Parameters:
** file open text file to read samples from
** Globals: none
** Operation:
** This routine reads training samples from a file and
** places them into a data structure which organizes the
** samples by FontName and CharName. It then returns this
** data structure.
** Return: none
** Exceptions: none
** History: Fri Aug 18 13:11:39 1989, DSJ, Created.
** Tue May 17 1998 simplifications to structure, illiminated
** font, and feature specification levels of structure.
*/
char buffer[2048]; char buffer[2048];
char unichar[UNICHAR_LEN + 1]; char unichar[UNICHAR_LEN + 1];
LABELEDLIST char_sample; LABELEDLIST char_sample;
@ -450,18 +446,16 @@ void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void FreeTrainingSamples(LIST CharList) { /**
/* * This routine deallocates all of the space allocated to
** Parameters: * the specified list of training samples.
** FontList list of all fonts in document * @param CharList list of all fonts in document
** Globals: none * @return none
** Operation: * @note Globals: none
** This routine deallocates all of the space allocated to * @note Exceptions: none
** the specified list of training samples. * @note History: Fri Aug 18 17:44:27 1989, DSJ, Created.
** Return: none
** Exceptions: none
** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
*/ */
void FreeTrainingSamples(LIST CharList) {
LABELEDLIST char_sample; LABELEDLIST char_sample;
FEATURE_SET FeatureSet; FEATURE_SET FeatureSet;
LIST FeatureList; LIST FeatureList;
@ -480,45 +474,39 @@ void FreeTrainingSamples(LIST CharList) {
} /* FreeTrainingSamples */ } /* FreeTrainingSamples */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void FreeLabeledList(LABELEDLIST LabeledList) { /**
/* * This routine deallocates all of the memory consumed by
** Parameters: * a labeled list. It does not free any memory which may be
** LabeledList labeled list to be freed * consumed by the items in the list.
** Globals: none * @param LabeledList labeled list to be freed
** Operation: * @note Globals: none
** This routine deallocates all of the memory consumed by * @return none
** a labeled list. It does not free any memory which may be * @note Exceptions: none
** consumed by the items in the list. * @note History: Fri Aug 18 17:52:45 1989, DSJ, Created.
** Return: none
** Exceptions: none
** History: Fri Aug 18 17:52:45 1989, DSJ, Created.
*/ */
void FreeLabeledList(LABELEDLIST LabeledList) {
destroy(LabeledList->List); destroy(LabeledList->List);
free(LabeledList->Label); free(LabeledList->Label);
free(LabeledList); free(LabeledList);
} /* FreeLabeledList */ } /* FreeLabeledList */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/**
* This routine reads samples from a LABELEDLIST and enters
* those samples into a clusterer data structure. This
* data structure is then returned to the caller.
* @param char_sample: LABELEDLIST that holds all the feature information for a
* @param FeatureDefs
* @param program_feature_type
* given character.
* @return Pointer to new clusterer data structure.
* @note Globals: None
* @note Exceptions: None
* @note History: 8/16/89, DSJ, Created.
*/
CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs, CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs,
LABELEDLIST char_sample, LABELEDLIST char_sample,
const char* program_feature_type) { const char* program_feature_type) {
/*
** Parameters:
** char_sample: LABELEDLIST that holds all the feature information for a
** given character.
** Globals:
** None
** Operation:
** This routine reads samples from a LABELEDLIST and enters
** those samples into a clusterer data structure. This
** data structure is then returned to the caller.
** Return:
** Pointer to new clusterer data structure.
** Exceptions:
** None
** History:
** 8/16/89, DSJ, Created.
*/
uinT16 N; uinT16 N;
int i, j; int i, j;
FLOAT32 *Sample = NULL; FLOAT32 *Sample = NULL;
@ -741,21 +729,17 @@ MERGE_CLASS NewLabeledClass (
} /* NewLabeledClass */ } /* NewLabeledClass */
/*-----------------------------------------------------------------------------*/ /*-----------------------------------------------------------------------------*/
/**
* This routine deallocates all of the space allocated to
* the specified list of training samples.
* @param ClassList list of all fonts in document
* @return none
* @note Globals: none
* @note Exceptions: none
* @note History: Fri Aug 18 17:44:27 1989, DSJ, Created.
*/
void FreeLabeledClassList ( void FreeLabeledClassList (
LIST ClassList) LIST ClassList)
/*
** Parameters:
** FontList list of all fonts in document
** Globals: none
** Operation:
** This routine deallocates all of the space allocated to
** the specified list of training samples.
** Return: none
** Exceptions: none
** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
*/
{ {
MERGE_CLASS MergeClass; MERGE_CLASS MergeClass;
@ -770,7 +754,7 @@ void FreeLabeledClassList (
} /* FreeLabeledClassList */ } /* FreeLabeledClassList */
/** SetUpForFloat2Int **************************************************/ /* SetUpForFloat2Int */
CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset, CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset,
LIST LabeledClassList) { LIST LabeledClassList) {
MERGE_CLASS MergeClass; MERGE_CLASS MergeClass;

View File

@ -23,9 +23,9 @@
** See the License for the specific language governing permissions and ** See the License for the specific language governing permissions and
** limitations under the License. ** limitations under the License.
******************************************************************************/ ******************************************************************************/
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Include Files and Type Defines Include Files and Type Defines
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
#include "config_auto.h" #include "config_auto.h"
#endif #endif
@ -79,9 +79,9 @@ const int kMaxShapeLabelLength = 10;
DECLARE_STRING_PARAM_FLAG(test_ch); DECLARE_STRING_PARAM_FLAG(test_ch);
/**---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Function Prototypes Public Function Prototypes
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------*/
int main ( int main (
int argc, int argc,
char **argv); char **argv);
@ -208,41 +208,37 @@ static void SetupConfigMap(ShapeTable* shape_table, IndexMapBiDi* config_map) {
config_map->CompleteMerges(); config_map->CompleteMerges();
} }
/*---------------------------------------------------------------------------*/ /**
* This program reads in a text file consisting of feature
* samples from a training page in the following format:
* @verbatim
FontName UTF8-char-str xmin ymin xmax ymax page-number
NumberOfFeatureTypes(N)
FeatureTypeName1 NumberOfFeatures(M)
Feature1
...
FeatureM
FeatureTypeName2 NumberOfFeatures(M)
Feature1
...
FeatureM
...
FeatureTypeNameN NumberOfFeatures(M)
Feature1
...
FeatureM
FontName CharName ...
@endverbatim
* The result of this program is a binary inttemp file used by
* the OCR engine.
* @param argc number of command line arguments
* @param argv array of command line arguments
* @return none
* @note Exceptions: none
* @note History: Fri Aug 18 08:56:17 1989, DSJ, Created.
* @note History: Mon May 18 1998, Christy Russson, Revistion started.
*/
int main (int argc, char **argv) { int main (int argc, char **argv) {
/*
** Parameters:
** argc number of command line arguments
** argv array of command line arguments
** Globals: none
** Operation:
** This program reads in a text file consisting of feature
** samples from a training page in the following format:
**
** FontName UTF8-char-str xmin ymin xmax ymax page-number
** NumberOfFeatureTypes(N)
** FeatureTypeName1 NumberOfFeatures(M)
** Feature1
** ...
** FeatureM
** FeatureTypeName2 NumberOfFeatures(M)
** Feature1
** ...
** FeatureM
** ...
** FeatureTypeNameN NumberOfFeatures(M)
** Feature1
** ...
** FeatureM
** FontName CharName ...
**
** The result of this program is a binary inttemp file used by
** the OCR engine.
** Return: none
** Exceptions: none
** History: Fri Aug 18 08:56:17 1989, DSJ, Created.
** Mon May 18 1998, Christy Russson, Revistion started.
*/
ParseArguments(&argc, &argv); ParseArguments(&argc, &argv);
ShapeTable* shape_table = NULL; ShapeTable* shape_table = NULL;

View File

@ -171,8 +171,10 @@ void LanguageModel::InitForWord(const WERD_CHOICE *prev_word,
} }
} }
// Helper scans the collection of predecessors for competing siblings that /**
// have the same letter with the opposite case, setting competing_vse. * Helper scans the collection of predecessors for competing siblings that
* have the same letter with the opposite case, setting competing_vse.
*/
static void ScanParentsForCaseMix(const UNICHARSET& unicharset, static void ScanParentsForCaseMix(const UNICHARSET& unicharset,
LanguageModelState* parent_node) { LanguageModelState* parent_node) {
if (parent_node == NULL) return; if (parent_node == NULL) return;
@ -200,8 +202,10 @@ static void ScanParentsForCaseMix(const UNICHARSET& unicharset,
} }
} }
// Helper returns true if the given choice has a better case variant before /**
// it in the choice_list that is not distinguishable by size. * Helper returns true if the given choice has a better case variant before
* it in the choice_list that is not distinguishable by size.
*/
static bool HasBetterCaseVariant(const UNICHARSET& unicharset, static bool HasBetterCaseVariant(const UNICHARSET& unicharset,
const BLOB_CHOICE* choice, const BLOB_CHOICE* choice,
BLOB_CHOICE_LIST* choices) { BLOB_CHOICE_LIST* choices) {
@ -222,27 +226,32 @@ static bool HasBetterCaseVariant(const UNICHARSET& unicharset,
return false; // Should never happen, but just in case. return false; // Should never happen, but just in case.
} }
// UpdateState has the job of combining the ViterbiStateEntry lists on each /**
// of the choices on parent_list with each of the blob choices in curr_list, * UpdateState has the job of combining the ViterbiStateEntry lists on each
// making a new ViterbiStateEntry for each sensible path. * of the choices on parent_list with each of the blob choices in curr_list,
// This could be a huge set of combinations, creating a lot of work only to * making a new ViterbiStateEntry for each sensible path.
// be truncated by some beam limit, but only certain kinds of paths will *
// continue at the next step: * This could be a huge set of combinations, creating a lot of work only to
// paths that are liked by the language model: either a DAWG or the n-gram * be truncated by some beam limit, but only certain kinds of paths will
// model, where active. * continue at the next step:
// paths that represent some kind of top choice. The old permuter permuted * - paths that are liked by the language model: either a DAWG or the n-gram
// the top raw classifier score, the top upper case word and the top lower- * model, where active.
// case word. UpdateState now concentrates its top-choice paths on top * - paths that represent some kind of top choice. The old permuter permuted
// lower-case, top upper-case (or caseless alpha), and top digit sequence, * the top raw classifier score, the top upper case word and the top lower-
// with allowance for continuation of these paths through blobs where such * case word. UpdateState now concentrates its top-choice paths on top
// a character does not appear in the choices list. * lower-case, top upper-case (or caseless alpha), and top digit sequence,
// GetNextParentVSE enforces some of these models to minimize the number of * with allowance for continuation of these paths through blobs where such
// calls to AddViterbiStateEntry, even prior to looking at the language model. * a character does not appear in the choices list.
// Thus an n-blob sequence of [l1I] will produce 3n calls to *
// AddViterbiStateEntry instead of 3^n. * GetNextParentVSE enforces some of these models to minimize the number of
// Of course it isn't quite that simple as Title Case is handled by allowing * calls to AddViterbiStateEntry, even prior to looking at the language model.
// lower case to continue an upper case initial, but it has to be detected * Thus an n-blob sequence of [l1I] will produce 3n calls to
// in the combiner so it knows which upper case letters are initial alphas. * AddViterbiStateEntry instead of 3^n.
*
* Of course it isn't quite that simple as Title Case is handled by allowing
* lower case to continue an upper case initial, but it has to be detected
* in the combiner so it knows which upper case letters are initial alphas.
*/
bool LanguageModel::UpdateState( bool LanguageModel::UpdateState(
bool just_classified, bool just_classified,
int curr_col, int curr_row, int curr_col, int curr_row,
@ -367,10 +376,12 @@ bool LanguageModel::UpdateState(
return new_changed; return new_changed;
} }
// Finds the first lower and upper case letter and first digit in curr_list. /**
// For non-upper/lower languages, alpha counts as upper. * Finds the first lower and upper case letter and first digit in curr_list.
// Uses the first character in the list in place of empty results. * For non-upper/lower languages, alpha counts as upper.
// Returns true if both alpha and digits are found. * Uses the first character in the list in place of empty results.
* Returns true if both alpha and digits are found.
*/
bool LanguageModel::GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list, bool LanguageModel::GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list,
BLOB_CHOICE **first_lower, BLOB_CHOICE **first_lower,
BLOB_CHOICE **first_upper, BLOB_CHOICE **first_upper,
@ -402,13 +413,15 @@ bool LanguageModel::GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list,
return mixed; return mixed;
} }
// Forces there to be at least one entry in the overall set of the /**
// viterbi_state_entries of each element of parent_node that has the * Forces there to be at least one entry in the overall set of the
// top_choice_flag set for lower, upper and digit using the same rules as * viterbi_state_entries of each element of parent_node that has the
// GetTopLowerUpperDigit, setting the flag on the first found suitable * top_choice_flag set for lower, upper and digit using the same rules as
// candidate, whether or not the flag is set on some other parent. * GetTopLowerUpperDigit, setting the flag on the first found suitable
// Returns 1 if both alpha and digits are found among the parents, -1 if no * candidate, whether or not the flag is set on some other parent.
// parents are found at all (a legitimate case), and 0 otherwise. * Returns 1 if both alpha and digits are found among the parents, -1 if no
* parents are found at all (a legitimate case), and 0 otherwise.
*/
int LanguageModel::SetTopParentLowerUpperDigit( int LanguageModel::SetTopParentLowerUpperDigit(
LanguageModelState *parent_node) const { LanguageModelState *parent_node) const {
if (parent_node == NULL) return -1; if (parent_node == NULL) return -1;
@ -481,9 +494,11 @@ int LanguageModel::SetTopParentLowerUpperDigit(
return mixed ? 1 : 0; return mixed ? 1 : 0;
} }
// Finds the next ViterbiStateEntry with which the given unichar_id can /**
// combine sensibly, taking into account any mixed alnum/mixed case * Finds the next ViterbiStateEntry with which the given unichar_id can
// situation, and whether this combination has been inspected before. * combine sensibly, taking into account any mixed alnum/mixed case
* situation, and whether this combination has been inspected before.
*/
ViterbiStateEntry* LanguageModel::GetNextParentVSE( ViterbiStateEntry* LanguageModel::GetNextParentVSE(
bool just_classified, bool mixed_alnum, const BLOB_CHOICE* bc, bool just_classified, bool mixed_alnum, const BLOB_CHOICE* bc,
LanguageModelFlagsType blob_choice_flags, const UNICHARSET& unicharset, LanguageModelFlagsType blob_choice_flags, const UNICHARSET& unicharset,

View File

@ -202,8 +202,10 @@ bool LMPainPoints::GeneratePainPoint(
} }
} }
// Adjusts the pain point coordinates to cope with expansion of the ratings /**
// matrix due to a split of the blob with the given index. * Adjusts the pain point coordinates to cope with expansion of the ratings
* matrix due to a split of the blob with the given index.
*/
void LMPainPoints::RemapForSplit(int index) { void LMPainPoints::RemapForSplit(int index) {
for (int i = 0; i < LM_PPTYPE_NUM; ++i) { for (int i = 0; i < LM_PPTYPE_NUM; ++i) {
GenericVector<MatrixCoordPair>* heap = pain_points_heaps_[i].heap(); GenericVector<MatrixCoordPair>* heap = pain_points_heaps_[i].heap();

View File

@ -59,7 +59,7 @@ void ViterbiStateEntry::Print(const char *msg) const {
tprintf("\n"); tprintf("\n");
} }
// Clears the viterbi search state back to its initial conditions. /// Clears the viterbi search state back to its initial conditions.
void LanguageModelState::Clear() { void LanguageModelState::Clear() {
viterbi_state_entries.clear(); viterbi_state_entries.clear();
viterbi_state_entries_prunable_length = 0; viterbi_state_entries_prunable_length = 0;

View File

@ -33,28 +33,31 @@
namespace tesseract { namespace tesseract {
// Used for expressing various language model flags. /// Used for expressing various language model flags.
typedef unsigned char LanguageModelFlagsType; typedef unsigned char LanguageModelFlagsType;
// The following structs are used for storing the state of the language model /// The following structs are used for storing the state of the language model
// in the segmentation search graph. In this graph the nodes are BLOB_CHOICEs /// in the segmentation search graph. In this graph the nodes are BLOB_CHOICEs
// and the links are the relationships between the underlying blobs (see /// and the links are the relationships between the underlying blobs (see
// segsearch.h for a more detailed description). /// segsearch.h for a more detailed description).
// Each of the BLOB_CHOICEs contains LanguageModelState struct, which has ///
// a list of N best paths (list of ViterbiStateEntry) explored by the Viterbi /// Each of the BLOB_CHOICEs contains LanguageModelState struct, which has
// search leading up to and including this BLOB_CHOICE. /// a list of N best paths (list of ViterbiStateEntry) explored by the Viterbi
// Each ViterbiStateEntry contains information from various components of the /// search leading up to and including this BLOB_CHOICE.
// language model: dawgs in which the path is found, character ngram model ///
// probability of the path, script/chartype/font consistency info, state for /// Each ViterbiStateEntry contains information from various components of the
// language-specific heuristics (e.g. hyphenated and compound words, lower/upper /// language model: dawgs in which the path is found, character ngram model
// case preferences, etc). /// probability of the path, script/chartype/font consistency info, state for
// Each ViterbiStateEntry also contains the parent pointer, so that the path /// language-specific heuristics (e.g. hyphenated and compound words, lower/upper
// that it represents (WERD_CHOICE) can be constructed by following these /// case preferences, etc).
// parent pointers. ///
/// Each ViterbiStateEntry also contains the parent pointer, so that the path
/// that it represents (WERD_CHOICE) can be constructed by following these
/// parent pointers.
// Struct for storing additional information used by Dawg language model /// Struct for storing additional information used by Dawg language model
// component. It stores the set of active dawgs in which the sequence of /// component. It stores the set of active dawgs in which the sequence of
// letters on a path can be found. /// letters on a path can be found.
struct LanguageModelDawgInfo { struct LanguageModelDawgInfo {
LanguageModelDawgInfo(DawgPositionVector *a, PermuterType pt) : permuter(pt) { LanguageModelDawgInfo(DawgPositionVector *a, PermuterType pt) : permuter(pt) {
active_dawgs = new DawgPositionVector(*a); active_dawgs = new DawgPositionVector(*a);
@ -66,29 +69,29 @@ struct LanguageModelDawgInfo {
PermuterType permuter; PermuterType permuter;
}; };
// Struct for storing additional information used by Ngram language model /// Struct for storing additional information used by Ngram language model
// component. /// component.
struct LanguageModelNgramInfo { struct LanguageModelNgramInfo {
LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc) LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc)
: context(c), context_unichar_step_len(l), pruned(p), ngram_cost(nc), : context(c), context_unichar_step_len(l), pruned(p), ngram_cost(nc),
ngram_and_classifier_cost(ncc) {} ngram_and_classifier_cost(ncc) {}
STRING context; // context string STRING context; //< context string
// Length of the context measured by advancing using UNICHAR::utf8_step() /// Length of the context measured by advancing using UNICHAR::utf8_step()
// (should be at most the order of the character ngram model used). /// (should be at most the order of the character ngram model used).
int context_unichar_step_len; int context_unichar_step_len;
// The paths with pruned set are pruned out from the perspective of the /// The paths with pruned set are pruned out from the perspective of the
// character ngram model. They are explored further because they represent /// character ngram model. They are explored further because they represent
// a dictionary match or a top choice. Thus ngram_info is still computed /// a dictionary match or a top choice. Thus ngram_info is still computed
// for them in order to calculate the combined cost. /// for them in order to calculate the combined cost.
bool pruned; bool pruned;
// -ln(P_ngram_model(path)) /// -ln(P_ngram_model(path))
float ngram_cost; float ngram_cost;
// -[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ] /// -[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ]
float ngram_and_classifier_cost; float ngram_and_classifier_cost;
}; };
// Struct for storing the information about a path in the segmentation graph /// Struct for storing the information about a path in the segmentation graph
// explored by Viterbi search. /// explored by Viterbi search.
struct ViterbiStateEntry : public ELIST_LINK { struct ViterbiStateEntry : public ELIST_LINK {
ViterbiStateEntry(ViterbiStateEntry *pe, ViterbiStateEntry(ViterbiStateEntry *pe,
BLOB_CHOICE *b, float c, float ol, BLOB_CHOICE *b, float c, float ol,
@ -122,8 +125,8 @@ struct ViterbiStateEntry : public ELIST_LINK {
delete ngram_info; delete ngram_info;
delete debug_str; delete debug_str;
} }
// Comparator function for sorting ViterbiStateEntry_LISTs in /// Comparator function for sorting ViterbiStateEntry_LISTs in
// non-increasing order of costs. /// non-increasing order of costs.
static int Compare(const void *e1, const void *e2) { static int Compare(const void *e1, const void *e2) {
const ViterbiStateEntry *ve1 = const ViterbiStateEntry *ve1 =
*reinterpret_cast<const ViterbiStateEntry * const *>(e1); *reinterpret_cast<const ViterbiStateEntry * const *>(e1);
@ -137,8 +140,8 @@ struct ViterbiStateEntry : public ELIST_LINK {
} }
return consistency_info.Consistent(); return consistency_info.Consistent();
} }
// Returns true if this VSE has an alphanumeric character as its classifier /// Returns true if this VSE has an alphanumeric character as its classifier
// result. /// result.
bool HasAlnumChoice(const UNICHARSET& unicharset) { bool HasAlnumChoice(const UNICHARSET& unicharset) {
if (curr_b == NULL) return false; if (curr_b == NULL) return false;
UNICHAR_ID unichar_id = curr_b->unichar_id(); UNICHAR_ID unichar_id = curr_b->unichar_id();
@ -149,48 +152,48 @@ struct ViterbiStateEntry : public ELIST_LINK {
} }
void Print(const char *msg) const; void Print(const char *msg) const;
// The cost is an adjusted ratings sum, that is adjusted by all the language /// The cost is an adjusted ratings sum, that is adjusted by all the language
// model components that use Viterbi search. /// model components that use Viterbi search.
float cost; float cost;
// Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this). /// Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this).
BLOB_CHOICE *curr_b; BLOB_CHOICE *curr_b;
ViterbiStateEntry *parent_vse; ViterbiStateEntry *parent_vse;
// Pointer to a case-competing ViterbiStateEntry in the same list that /// Pointer to a case-competing ViterbiStateEntry in the same list that
// represents a path ending in the same letter of the opposite case. /// represents a path ending in the same letter of the opposite case.
ViterbiStateEntry *competing_vse; ViterbiStateEntry *competing_vse;
// Various information about the characters on the path represented /// Various information about the characters on the path represented
// by this ViterbiStateEntry. /// by this ViterbiStateEntry.
float ratings_sum; // sum of ratings of character on the path float ratings_sum; //< sum of ratings of character on the path
float min_certainty; // minimum certainty on the path float min_certainty; //< minimum certainty on the path
int adapted; // number of BLOB_CHOICES from adapted templates int adapted; //< number of BLOB_CHOICES from adapted templates
int length; // number of characters on the path int length; //< number of characters on the path
float outline_length; // length of the outline so far float outline_length; //< length of the outline so far
LMConsistencyInfo consistency_info; // path consistency info LMConsistencyInfo consistency_info; //< path consistency info
AssociateStats associate_stats; // character widths/gaps/seams AssociateStats associate_stats; //< character widths/gaps/seams
// Flags for marking the entry as a top choice path with /// Flags for marking the entry as a top choice path with
// the smallest rating or lower/upper case letters). /// the smallest rating or lower/upper case letters).
LanguageModelFlagsType top_choice_flags; LanguageModelFlagsType top_choice_flags;
// Extra information maintained by Dawg laguage model component /// Extra information maintained by Dawg laguage model component
// (owned by ViterbiStateEntry). /// (owned by ViterbiStateEntry).
LanguageModelDawgInfo *dawg_info; LanguageModelDawgInfo *dawg_info;
// Extra information maintained by Ngram laguage model component /// Extra information maintained by Ngram laguage model component
// (owned by ViterbiStateEntry). /// (owned by ViterbiStateEntry).
LanguageModelNgramInfo *ngram_info; LanguageModelNgramInfo *ngram_info;
bool updated; // set to true if the entry has just been created/updated bool updated; //< set to true if the entry has just been created/updated
// UTF8 string representing the path corresponding to this vse. /// UTF8 string representing the path corresponding to this vse.
// Populated only in when language_model_debug_level > 0. /// Populated only in when language_model_debug_level > 0.
STRING *debug_str; STRING *debug_str;
}; };
ELISTIZEH(ViterbiStateEntry); ELISTIZEH(ViterbiStateEntry);
// Struct to store information maintained by various language model components. /// Struct to store information maintained by various language model components.
struct LanguageModelState { struct LanguageModelState {
LanguageModelState() : LanguageModelState() :
viterbi_state_entries_prunable_length(0), viterbi_state_entries_prunable_length(0),
@ -198,21 +201,21 @@ struct LanguageModelState {
viterbi_state_entries_length(0) {} viterbi_state_entries_length(0) {}
~LanguageModelState() {} ~LanguageModelState() {}
// Clears the viterbi search state back to its initial conditions. /// Clears the viterbi search state back to its initial conditions.
void Clear(); void Clear();
void Print(const char *msg); void Print(const char *msg);
// Storage for the Viterbi state. /// Storage for the Viterbi state.
ViterbiStateEntry_LIST viterbi_state_entries; ViterbiStateEntry_LIST viterbi_state_entries;
// Number and max cost of prunable paths in viterbi_state_entries. /// Number and max cost of prunable paths in viterbi_state_entries.
int viterbi_state_entries_prunable_length; int viterbi_state_entries_prunable_length;
float viterbi_state_entries_prunable_max_cost; float viterbi_state_entries_prunable_max_cost;
// Total number of entries in viterbi_state_entries. /// Total number of entries in viterbi_state_entries.
int viterbi_state_entries_length; int viterbi_state_entries_length;
}; };
// Bundle together all the things pertaining to the best choice/state. /// Bundle together all the things pertaining to the best choice/state.
struct BestChoiceBundle { struct BestChoiceBundle {
explicit BestChoiceBundle(int matrix_dimension) explicit BestChoiceBundle(int matrix_dimension)
: updated(false), best_vse(NULL) { : updated(false), best_vse(NULL) {
@ -222,15 +225,15 @@ struct BestChoiceBundle {
} }
~BestChoiceBundle() {} ~BestChoiceBundle() {}
// Flag to indicate whether anything was changed. /// Flag to indicate whether anything was changed.
bool updated; bool updated;
// Places to try to fix the word suggested by ambiguity checking. /// Places to try to fix the word suggested by ambiguity checking.
DANGERR fixpt; DANGERR fixpt;
// The beam. One LanguageModelState containing a list of ViterbiStateEntry per /// The beam. One LanguageModelState containing a list of ViterbiStateEntry
// row in the ratings matrix containing all VSEs whose BLOB_CHOICE is /// per row in the ratings matrix containing all VSEs whose BLOB_CHOICE is
// somewhere in the corresponding row. /// somewhere in the corresponding row.
PointerVector<LanguageModelState> beam; PointerVector<LanguageModelState> beam;
// Best ViterbiStateEntry and BLOB_CHOICE. /// Best ViterbiStateEntry and BLOB_CHOICE.
ViterbiStateEntry *best_vse; ViterbiStateEntry *best_vse;
}; };