mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 12:49:35 +08:00
Doxygen
Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits:6317305
doxygen9f42f69
doxygen0fc4d52
doxygen37b4b55
fix typobded8f1
some more doxy020eb00
slight tweak524666d
doxygenify2a36a3e
doxygenify229d218
doxygenify7fd28ae
doxygenifya8c64bc
doxygenifyf5d21b6
fix5d8ede8
doxygenifya58a4e0
language_model.cppfa85709
lm_pain_points.cpp lm_state.cpp6418da3
merge06190ba
Merge branch 'old_doxygen_merge' into more-doxygen84acf08
Merge branch 'master' into more-doxygen50fe1ff
pagewalk.cpp cube_reco_context.cpp2982583
change to relative192a24a
applybox.cpp, take one8eeb053
delete docs for obsolete params52e4c77
modernise classify/ocrfeatures.cpp2a1cba6
modernise cutil/emalloc.cpp773e006
silence doxygen warningaeb1731
silence doxygen warningf18387f
silence doxygen; new params are unused?15ad6bd
doxygenify cutil/efio.cppc8b5dad
doxygenify cutil/danerror.cpp784450f
the globals and exceptions parts are obsolete; remove8bca324
doxygen classify/normfeat.cpp9bcbe16
doxygen classify/normmatch.cppaa9a971
doxygen ccmain/cube_control.cppc083ff2
doxygen ccmain/cube_reco_context.cppf842850
params changed5c94f12
doxygen ccmain/cubeclassifier.cpp15ba750
case sensitivef5c71d4
case sensitivef85655b
doxygen classify/intproto.cpp4bbc7aa
partial doxygen classify/mfx.cppdbb6041
partial doxygen classify/intproto.cpp2aa72db
finish doxygen classify/intproto.cpp0b8de99
doxygen training/mftraining.cpp0b5b35c
partial doxygen ccstruct/coutln.cppb81c766
partial doxygen ccstruct/coutln.cpp40fc415
finished? doxygen ccstruct/coutln.cpp6e4165c
doxygen classify/clusttool.cpp0267dec
doxygen classify/cutoffs.cpp7f0c70c
doxygen classify/fpoint.cpp512f3bd
ignore ~ files5668a52
doxygen classify/intmatcher.cpp84788d4
doxygen classify/kdtree.cpp29f36ca
doxygen classify/mfoutline.cpp40b94b1
silence doxygen warnings6c511b9
doxygen classify/mfx.cppf9b4080
doxygen classify/outfeat.cppaa1df05
doxygen classify/picofeat.cppcc5f466
doxygen training/cntraining.cppcce044f
doxygen training/commontraining.cpp167e216
missing param9498383
renamed params37eeac2
renamed paramd87b5dd
casec8ee174
renamed paramsb858db8
typo4c2a838
h2 context?81a2c0c
fix some param names; add some missing params, no docsbcf8a4c
add some missing params, no docsaf77f86
add some missing params, no docs; fix some param names01df24e
fix some params6161056
fix some params68508b6
fix some params285aeb6
doxygen complains here no matter what529bcfa
rm some missing params, typoscd21226
rm some missing params, add some new ones48a4bc2
fix paramsc844628
missing param312ce37
missing param; rename oneec2fdec
missing param05e15e0
missing paramsd515858
change "<" to < to make doxygen happyb476a28
wrong place
This commit is contained in:
parent
541408763d
commit
524a61452d
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,3 +1,4 @@
|
||||
*~
|
||||
# Windows
|
||||
*.user
|
||||
*.log
|
||||
@ -62,4 +63,4 @@ training/wordlist2dawg
|
||||
# tessdata
|
||||
*.cube.*
|
||||
*.tesseract_cube.*
|
||||
*.traineddata
|
||||
*.traineddata
|
||||
|
@ -36,18 +36,22 @@
|
||||
#include "tesseractclass.h"
|
||||
#include "genericvector.h"
|
||||
|
||||
// Max number of blobs to classify together in FindSegmentation.
|
||||
/** Max number of blobs to classify together in FindSegmentation. */
|
||||
const int kMaxGroupSize = 4;
|
||||
// Max fraction of median allowed as deviation in xheight before switching
|
||||
// to median.
|
||||
/// Max fraction of median allowed as deviation in xheight before switching
|
||||
/// to median.
|
||||
const double kMaxXHeightDeviationFraction = 0.125;
|
||||
|
||||
/*************************************************************************
|
||||
/**
|
||||
* The box file is assumed to contain box definitions, one per line, of the
|
||||
* following format for blob-level boxes:
|
||||
* @verbatim
|
||||
* <UTF8 str> <left> <bottom> <right> <top> <page id>
|
||||
* @endverbatim
|
||||
* and for word/line-level boxes:
|
||||
* @verbatim
|
||||
* WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
|
||||
* @endverbatim
|
||||
* NOTES:
|
||||
* The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.
|
||||
*
|
||||
@ -62,13 +66,16 @@ const double kMaxXHeightDeviationFraction = 0.125;
|
||||
* units in the word/line are listed after the # at the end of the line and
|
||||
* are space delimited, ignoring any original spaces on the line.
|
||||
* Eg.
|
||||
* @verbatim
|
||||
* word -> #w o r d
|
||||
* multi word line -> #m u l t i w o r d l i n e
|
||||
* @endverbatim
|
||||
* The recognizable units must be space-delimited in order to allow multiple
|
||||
* unicodes to be used for a single recognizable unit, eg Hindi.
|
||||
*
|
||||
* In this mode, the classifier must have been pre-trained with the desired
|
||||
* character set, or it will not be able to find the character segmentations.
|
||||
*************************************************************************/
|
||||
*/
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -181,8 +188,8 @@ static double MedianXHeight(BLOCK_LIST *block_list) {
|
||||
return xheights.median();
|
||||
}
|
||||
|
||||
// Any row xheight that is significantly different from the median is set
|
||||
// to the median.
|
||||
/// Any row xheight that is significantly different from the median is set
|
||||
/// to the median.
|
||||
void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
|
||||
double median_xheight = MedianXHeight(block_list);
|
||||
double max_deviation = kMaxXHeightDeviationFraction * median_xheight;
|
||||
@ -205,8 +212,8 @@ void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
|
||||
}
|
||||
}
|
||||
|
||||
// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
|
||||
// All fuzzy spaces are removed, and all the words are maximally chopped.
|
||||
/// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
|
||||
/// All fuzzy spaces are removed, and all the words are maximally chopped.
|
||||
PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
|
||||
BLOCK_LIST *block_list) {
|
||||
PreenXHeights(block_list);
|
||||
@ -240,9 +247,9 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
|
||||
return page_res;
|
||||
}
|
||||
|
||||
// Tests the chopper by exhaustively running chop_one_blob.
|
||||
// The word_res will contain filled chopped_word, seam_array, denorm,
|
||||
// box_word and best_state for the maximally chopped word.
|
||||
/// Tests the chopper by exhaustively running chop_one_blob.
|
||||
/// The word_res will contain filled chopped_word, seam_array, denorm,
|
||||
/// box_word and best_state for the maximally chopped word.
|
||||
void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
|
||||
BLOCK* block, ROW* row,
|
||||
WERD_RES* word_res) {
|
||||
@ -300,17 +307,17 @@ void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
|
||||
word_res->FakeClassifyWord(blob_choices.size(), &blob_choices[0]);
|
||||
}
|
||||
|
||||
// Helper to compute the dispute resolution metric.
|
||||
// Disputed blob resolution. The aim is to give the blob to the most
|
||||
// appropriate boxfile box. Most of the time it is obvious, but if
|
||||
// two boxfile boxes overlap significantly it is not. If a small boxfile
|
||||
// box takes most of the blob, and a large boxfile box does too, then
|
||||
// we want the small boxfile box to get it, but if the small box
|
||||
// is much smaller than the blob, we don't want it to get it.
|
||||
// Details of the disputed blob resolution:
|
||||
// Given a box with area A, and a blob with area B, with overlap area C,
|
||||
// then the miss metric is (A-C)(B-C)/(AB) and the box with minimum
|
||||
// miss metric gets the blob.
|
||||
/// Helper to compute the dispute resolution metric.
|
||||
/// Disputed blob resolution. The aim is to give the blob to the most
|
||||
/// appropriate boxfile box. Most of the time it is obvious, but if
|
||||
/// two boxfile boxes overlap significantly it is not. If a small boxfile
|
||||
/// box takes most of the blob, and a large boxfile box does too, then
|
||||
/// we want the small boxfile box to get it, but if the small box
|
||||
/// is much smaller than the blob, we don't want it to get it.
|
||||
/// Details of the disputed blob resolution:
|
||||
/// Given a box with area A, and a blob with area B, with overlap area C,
|
||||
/// then the miss metric is (A-C)(B-C)/(AB) and the box with minimum
|
||||
/// miss metric gets the blob.
|
||||
static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
|
||||
int overlap_area = box1.intersection(box2).area();
|
||||
double miss_metric = box1.area()- overlap_area;
|
||||
@ -320,14 +327,16 @@ static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
|
||||
return miss_metric;
|
||||
}
|
||||
|
||||
// Gather consecutive blobs that match the given box into the best_state
|
||||
// and corresponding correct_text.
|
||||
// Fights over which box owns which blobs are settled by pre-chopping and
|
||||
// applying the blobs to box or next_box with the least non-overlap.
|
||||
// Returns false if the box was in error, which can only be caused by
|
||||
// failing to find an appropriate blob for a box.
|
||||
// This means that occasionally, blobs may be incorrectly segmented if the
|
||||
// chopper fails to find a suitable chop point.
|
||||
/// Gather consecutive blobs that match the given box into the best_state
|
||||
/// and corresponding correct_text.
|
||||
///
|
||||
/// Fights over which box owns which blobs are settled by pre-chopping and
|
||||
/// applying the blobs to box or next_box with the least non-overlap.
|
||||
/// @return false if the box was in error, which can only be caused by
|
||||
/// failing to find an appropriate blob for a box.
|
||||
///
|
||||
/// This means that occasionally, blobs may be incorrectly segmented if the
|
||||
/// chopper fails to find a suitable chop point.
|
||||
bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
|
||||
const TBOX& box, const TBOX& next_box,
|
||||
const char* correct_text) {
|
||||
@ -420,12 +429,12 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
|
||||
return false; // Failure.
|
||||
}
|
||||
|
||||
// Consume all source blobs that strongly overlap the given box,
|
||||
// putting them into a new word, with the correct_text label.
|
||||
// Fights over which box owns which blobs are settled by
|
||||
// applying the blobs to box or next_box with the least non-overlap.
|
||||
// Returns false if the box was in error, which can only be caused by
|
||||
// failing to find an overlapping blob for a box.
|
||||
/// Consume all source blobs that strongly overlap the given box,
|
||||
/// putting them into a new word, with the correct_text label.
|
||||
/// Fights over which box owns which blobs are settled by
|
||||
/// applying the blobs to box or next_box with the least non-overlap.
|
||||
/// @return false if the box was in error, which can only be caused by
|
||||
/// failing to find an overlapping blob for a box.
|
||||
bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
|
||||
const TBOX& box, const TBOX& next_box,
|
||||
const char* correct_text) {
|
||||
@ -495,8 +504,8 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
|
||||
return new_word != NULL;
|
||||
}
|
||||
|
||||
// Resegments the words by running the classifier in an attempt to find the
|
||||
// correct segmentation that produces the required string.
|
||||
/// Resegments the words by running the classifier in an attempt to find the
|
||||
/// correct segmentation that produces the required string.
|
||||
void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) {
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
WERD_RES* word_res;
|
||||
@ -521,8 +530,8 @@ void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) {
|
||||
}
|
||||
}
|
||||
|
||||
// Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID.
|
||||
// Returns false if an invalid UNICHAR_ID is encountered.
|
||||
/// Converts the space-delimited string of utf8 text to a vector of UNICHAR_ID.
|
||||
/// @return false if an invalid UNICHAR_ID is encountered.
|
||||
bool Tesseract::ConvertStringToUnichars(const char* utf8,
|
||||
GenericVector<UNICHAR_ID>* class_ids) {
|
||||
for (int step = 0; *utf8 != '\0'; utf8 += step) {
|
||||
@ -541,12 +550,12 @@ bool Tesseract::ConvertStringToUnichars(const char* utf8,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Resegments the word to achieve the target_text from the classifier.
|
||||
// Returns false if the re-segmentation fails.
|
||||
// Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and
|
||||
// applies a full search on the classifier results to find the best classified
|
||||
// segmentation. As a compromise to obtain better recall, 1-1 ambiguity
|
||||
// substitutions ARE used.
|
||||
/// Resegments the word to achieve the target_text from the classifier.
|
||||
/// Returns false if the re-segmentation fails.
|
||||
/// Uses brute-force combination of up to #kMaxGroupSize adjacent blobs, and
|
||||
/// applies a full search on the classifier results to find the best classified
|
||||
/// segmentation. As a compromise to obtain better recall, 1-1 ambiguity
|
||||
/// substitutions ARE used.
|
||||
bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
|
||||
WERD_RES* word_res) {
|
||||
// Classify all required combinations of blobs and save results in choices.
|
||||
@ -603,12 +612,20 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Recursive helper to find a match to the target_text (from text_index
|
||||
// position) in the choices (from choices_pos position).
|
||||
// Choices is an array of GenericVectors, of length choices_length, with each
|
||||
// element representing a starting position in the word, and the
|
||||
// GenericVector holding classification results for a sequence of consecutive
|
||||
// blobs, with index 0 being a single blob, index 1 being 2 blobs etc.
|
||||
/// Recursive helper to find a match to the target_text (from text_index
|
||||
/// position) in the choices (from choices_pos position).
|
||||
/// @param choices is an array of GenericVectors, of length choices_length,
|
||||
/// with each element representing a starting position in the word, and the
|
||||
/// #GenericVector holding classification results for a sequence of consecutive
|
||||
/// blobs, with index 0 being a single blob, index 1 being 2 blobs etc.
|
||||
/// @param choices_pos
|
||||
/// @param choices_length
|
||||
/// @param target_text
|
||||
/// @param text_index
|
||||
/// @param rating
|
||||
/// @param segmentation
|
||||
/// @param best_rating
|
||||
/// @param best_segmentation
|
||||
void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
|
||||
int choices_pos, int choices_length,
|
||||
const GenericVector<UNICHAR_ID>& target_text,
|
||||
@ -682,10 +699,10 @@ void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
|
||||
}
|
||||
}
|
||||
|
||||
// Counts up the labelled words and the blobs within.
|
||||
// Deletes all unused or emptied words, counting the unused ones.
|
||||
// Resets W_BOL and W_EOL flags correctly.
|
||||
// Builds the rebuild_word and rebuilds the box_word and the best_choice.
|
||||
/// - Counts up the labelled words and the blobs within.
|
||||
/// - Deletes all unused or emptied words, counting the unused ones.
|
||||
/// - Resets W_BOL and W_EOL flags correctly.
|
||||
/// - Builds the rebuild_word and rebuilds the box_word and the best_choice.
|
||||
void Tesseract::TidyUp(PAGE_RES* page_res) {
|
||||
int ok_blob_count = 0;
|
||||
int bad_blob_count = 0;
|
||||
@ -743,7 +760,7 @@ void Tesseract::TidyUp(PAGE_RES* page_res) {
|
||||
}
|
||||
}
|
||||
|
||||
// Logs a bad box by line in the box file and box coords.
|
||||
/** Logs a bad box by line in the box file and box coords.*/
|
||||
void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box,
|
||||
const char *box_ch, const char *err_msg) {
|
||||
tprintf("APPLY_BOXES: boxfile line %d/%s ((%d,%d),(%d,%d)): %s\n",
|
||||
@ -751,7 +768,7 @@ void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box,
|
||||
box.left(), box.bottom(), box.right(), box.top(), err_msg);
|
||||
}
|
||||
|
||||
// Creates a fake best_choice entry in each WERD_RES with the correct text.
|
||||
/** Creates a fake best_choice entry in each WERD_RES with the correct text.*/
|
||||
void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
|
||||
@ -774,8 +791,8 @@ void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
|
||||
}
|
||||
}
|
||||
|
||||
// Calls LearnWord to extract features for labelled blobs within each word.
|
||||
// Features are stored in an internal buffer.
|
||||
/// Calls #LearnWord to extract features for labelled blobs within each word.
|
||||
/// Features are stored in an internal buffer.
|
||||
void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) {
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
int word_count = 0;
|
||||
|
@ -59,8 +59,6 @@ const double kMinRefitXHeightFraction = 0.5;
|
||||
|
||||
|
||||
/**
|
||||
* recog_pseudo_word
|
||||
*
|
||||
* Make a word from the selected blobs and run Tess on them.
|
||||
*
|
||||
* @param page_res recognise blobs
|
||||
@ -79,13 +77,9 @@ void Tesseract::recog_pseudo_word(PAGE_RES* page_res,
|
||||
|
||||
|
||||
/**
|
||||
* recog_interactive
|
||||
*
|
||||
* Recognize a single word in interactive mode.
|
||||
*
|
||||
* @param block block
|
||||
* @param row row of word
|
||||
* @param word_res word to recognise
|
||||
* @param pr_it the page results iterator
|
||||
*/
|
||||
BOOL8 Tesseract::recog_interactive(PAGE_RES_IT* pr_it) {
|
||||
inT16 char_qual;
|
||||
@ -150,7 +144,7 @@ bool Tesseract::ProcessTargetWord(const TBOX& word_box,
|
||||
return true;
|
||||
}
|
||||
|
||||
// If tesseract is to be run, sets the words up ready for it.
|
||||
/** If tesseract is to be run, sets the words up ready for it. */
|
||||
void Tesseract::SetupAllWordsPassN(int pass_n,
|
||||
const TBOX* target_word_box,
|
||||
const char* word_config,
|
||||
|
@ -21,24 +21,24 @@
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**********************************************************************
|
||||
* convert_prob_to_tess_certainty
|
||||
/**
|
||||
* @name convert_prob_to_tess_certainty
|
||||
*
|
||||
* Normalize a probability in the range [0.0, 1.0] to a tesseract
|
||||
* certainty in the range [-20.0, 0.0]
|
||||
**********************************************************************/
|
||||
*/
|
||||
static float convert_prob_to_tess_certainty(float prob) {
|
||||
return (prob - 1.0) * 20.0;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* char_box_to_tbox
|
||||
/**
|
||||
* @name char_box_to_tbox
|
||||
*
|
||||
* Create a TBOX from a character bounding box. If nonzero, the
|
||||
* x_offset accounts for any additional padding of the word box that
|
||||
* should be taken into account.
|
||||
*
|
||||
**********************************************************************/
|
||||
*/
|
||||
TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
|
||||
l_int32 left;
|
||||
l_int32 top;
|
||||
@ -55,13 +55,13 @@ TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
|
||||
return TBOX(left, bottom, right, top);
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* extract_cube_state
|
||||
/**
|
||||
* @name extract_cube_state
|
||||
*
|
||||
* Extract CharSamp objects and character bounding boxes from the
|
||||
* CubeObject's state. The caller should free both structres.
|
||||
*
|
||||
**********************************************************************/
|
||||
*/
|
||||
bool Tesseract::extract_cube_state(CubeObject* cube_obj,
|
||||
int* num_chars,
|
||||
Boxa** char_boxes,
|
||||
@ -104,15 +104,15 @@ bool Tesseract::extract_cube_state(CubeObject* cube_obj,
|
||||
return true;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* create_cube_box_word
|
||||
/**
|
||||
* @name create_cube_box_word
|
||||
*
|
||||
* Fill the given BoxWord with boxes from character bounding
|
||||
* boxes. The char_boxes have local coordinates w.r.t. the
|
||||
* word bounding box, i.e., the left-most character bbox of each word
|
||||
* has (0,0) left-top coord, but the BoxWord must be defined in page
|
||||
* coordinates.
|
||||
**********************************************************************/
|
||||
*/
|
||||
bool Tesseract::create_cube_box_word(Boxa *char_boxes,
|
||||
int num_chars,
|
||||
TBOX word_box,
|
||||
@ -144,13 +144,13 @@ bool Tesseract::create_cube_box_word(Boxa *char_boxes,
|
||||
return true;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* init_cube_objects
|
||||
/**
|
||||
* @name init_cube_objects
|
||||
*
|
||||
* Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
|
||||
* Returns false if cube context could not be created or if load_combiner is
|
||||
* true, but the combiner could not be loaded.
|
||||
**********************************************************************/
|
||||
*/
|
||||
bool Tesseract::init_cube_objects(bool load_combiner,
|
||||
TessdataManager *tessdata_manager) {
|
||||
ASSERT_HOST(cube_cntxt_ == NULL);
|
||||
@ -184,12 +184,12 @@ bool Tesseract::init_cube_objects(bool load_combiner,
|
||||
return true;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* run_cube_combiner
|
||||
/**
|
||||
* @name run_cube_combiner
|
||||
*
|
||||
* Iterates through tesseract's results and calls cube on each word,
|
||||
* combining the results with the existing tesseract result.
|
||||
**********************************************************************/
|
||||
*/
|
||||
void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
|
||||
if (page_res == NULL || tess_cube_combiner_ == NULL)
|
||||
return;
|
||||
@ -226,23 +226,23 @@ void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* cube_word_pass1
|
||||
/**
|
||||
* @name cube_word_pass1
|
||||
*
|
||||
* Recognizes a single word using (only) cube. Compatible with
|
||||
* Tesseract's classify_word_pass1/classify_word_pass2.
|
||||
**********************************************************************/
|
||||
*/
|
||||
void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
|
||||
CubeObject *cube_obj = cube_recognize_word(block, word);
|
||||
delete cube_obj;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* cube_recognize_word
|
||||
/**
|
||||
* @name cube_recognize_word
|
||||
*
|
||||
* Cube recognizer to recognize a single word as with classify_word_pass1
|
||||
* but also returns the cube object in case the combiner is needed.
|
||||
**********************************************************************/
|
||||
*/
|
||||
CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) {
|
||||
if (!cube_binary_ || !cube_cntxt_) {
|
||||
if (cube_debug_level > 0 && !cube_binary_)
|
||||
@ -274,12 +274,12 @@ CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) {
|
||||
return cube_obj;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* cube_combine_word
|
||||
/**
|
||||
* @name cube_combine_word
|
||||
*
|
||||
* Combines the cube and tesseract results for a single word, leaving the
|
||||
* result in tess_word.
|
||||
**********************************************************************/
|
||||
*/
|
||||
void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
|
||||
WERD_RES* tess_word) {
|
||||
float combiner_prob = tess_cube_combiner_->CombineResults(tess_word,
|
||||
@ -317,12 +317,12 @@ void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
|
||||
tess_word->ConsumeWordResults(cube_word);
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* cube_recognize
|
||||
/**
|
||||
* @name cube_recognize
|
||||
*
|
||||
* Call cube on the current word, and write the result to word.
|
||||
* Sets up a fake result and returns false if something goes wrong.
|
||||
**********************************************************************/
|
||||
*/
|
||||
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
|
||||
WERD_RES *word) {
|
||||
// Run cube
|
||||
@ -404,12 +404,12 @@ bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
|
||||
return true;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* fill_werd_res
|
||||
/**
|
||||
* @name fill_werd_res
|
||||
*
|
||||
* Fill Tesseract's word result fields with cube's.
|
||||
*
|
||||
**********************************************************************/
|
||||
*/
|
||||
void Tesseract::fill_werd_res(const BoxWord& cube_box_word,
|
||||
const char* cube_best_str,
|
||||
WERD_RES* tess_werd_res) {
|
||||
|
@ -32,11 +32,13 @@
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Instantiate a CubeRecoContext object using a Tesseract object.
|
||||
// CubeRecoContext will not take ownership of tess_obj, but will
|
||||
// record the pointer to it and will make use of various Tesseract
|
||||
// components (language model, flags, etc). Thus the caller should
|
||||
// keep tess_obj alive so long as the instantiated CubeRecoContext is used.
|
||||
/**
|
||||
* Instantiate a CubeRecoContext object using a Tesseract object.
|
||||
* CubeRecoContext will not take ownership of tess_obj, but will
|
||||
* record the pointer to it and will make use of various Tesseract
|
||||
* components (language model, flags, etc). Thus the caller should
|
||||
* keep tess_obj alive so long as the instantiated CubeRecoContext is used.
|
||||
*/
|
||||
CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) {
|
||||
tess_obj_ = tess_obj;
|
||||
lang_ = "";
|
||||
@ -89,23 +91,27 @@ CubeRecoContext::~CubeRecoContext() {
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the path of the data files by looking up the TESSDATA_PREFIX
|
||||
// environment variable and appending a "tessdata" directory to it
|
||||
/**
|
||||
* Returns the path of the data files by looking up the TESSDATA_PREFIX
|
||||
* environment variable and appending a "tessdata" directory to it
|
||||
*/
|
||||
bool CubeRecoContext::GetDataFilePath(string *path) const {
|
||||
*path = tess_obj_->datadir.string();
|
||||
return true;
|
||||
}
|
||||
|
||||
// The object initialization function that loads all the necessary
|
||||
// components of a RecoContext. TessdataManager is used to load the
|
||||
// data from [lang].traineddata file. If TESSDATA_CUBE_UNICHARSET
|
||||
// component is present, Cube will be instantiated with the unicharset
|
||||
// specified in this component and the corresponding dictionary
|
||||
// (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to
|
||||
// Tesseract's. Otherwise, TessdataManager will assume that Cube will
|
||||
// be using Tesseract's unicharset and dawgs, and will load the
|
||||
// unicharset from the TESSDATA_UNICHARSET component and will load the
|
||||
// dawgs from TESSDATA_*_DAWG components.
|
||||
/**
|
||||
* The object initialization function that loads all the necessary
|
||||
* components of a RecoContext. TessdataManager is used to load the
|
||||
* data from [lang].traineddata file. If TESSDATA_CUBE_UNICHARSET
|
||||
* component is present, Cube will be instantiated with the unicharset
|
||||
* specified in this component and the corresponding dictionary
|
||||
* (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to
|
||||
* Tesseract's. Otherwise, TessdataManager will assume that Cube will
|
||||
* be using Tesseract's unicharset and dawgs, and will load the
|
||||
* unicharset from the TESSDATA_UNICHARSET component and will load the
|
||||
* dawgs from TESSDATA_*_DAWG components.
|
||||
*/
|
||||
bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset) {
|
||||
ASSERT_HOST(tess_obj_ != NULL);
|
||||
@ -178,7 +184,7 @@ bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Creates a CubeRecoContext object using a tesseract object
|
||||
/** Creates a CubeRecoContext object using a tesseract object */
|
||||
CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj,
|
||||
TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset) {
|
||||
|
@ -39,8 +39,8 @@ CubeClassifier::CubeClassifier(tesseract::Tesseract* tesseract)
|
||||
CubeClassifier::~CubeClassifier() {
|
||||
}
|
||||
|
||||
// Classifies the given [training] sample, writing to results.
|
||||
// See ShapeClassifier for a full description.
|
||||
/// Classifies the given [training] sample, writing to results.
|
||||
/// See ShapeClassifier for a full description.
|
||||
int CubeClassifier::UnicharClassifySample(
|
||||
const TrainingSample& sample, Pix* page_pix, int debug,
|
||||
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
|
||||
@ -70,7 +70,7 @@ int CubeClassifier::UnicharClassifySample(
|
||||
return results->size();
|
||||
}
|
||||
|
||||
// Provides access to the ShapeTable that this classifier works with.
|
||||
/** Provides access to the ShapeTable that this classifier works with. */
|
||||
const ShapeTable* CubeClassifier::GetShapeTable() const {
|
||||
return &shape_table_;
|
||||
}
|
||||
@ -84,8 +84,8 @@ CubeTessClassifier::~CubeTessClassifier() {
|
||||
delete pruner_;
|
||||
}
|
||||
|
||||
// Classifies the given [training] sample, writing to results.
|
||||
// See ShapeClassifier for a full description.
|
||||
/// Classifies the given [training] sample, writing to results.
|
||||
/// See ShapeClassifier for a full description.
|
||||
int CubeTessClassifier::UnicharClassifySample(
|
||||
const TrainingSample& sample, Pix* page_pix, int debug,
|
||||
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
|
||||
@ -123,7 +123,7 @@ int CubeTessClassifier::UnicharClassifySample(
|
||||
return results->size();
|
||||
}
|
||||
|
||||
// Provides access to the ShapeTable that this classifier works with.
|
||||
/** Provides access to the ShapeTable that this classifier works with. */
|
||||
const ShapeTable* CubeTessClassifier::GetShapeTable() const {
|
||||
return &shape_table_;
|
||||
}
|
||||
|
@ -20,13 +20,13 @@
|
||||
#include "pageres.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
namespace tesseract {
|
||||
/**
|
||||
* process_selected_words()
|
||||
* @name process_selected_words()
|
||||
*
|
||||
* Walk the current block list applying the specified word processor function
|
||||
* to each word that overlaps the selection_box.
|
||||
*/
|
||||
namespace tesseract {
|
||||
void Tesseract::process_selected_words(
|
||||
PAGE_RES* page_res, // blocks to check
|
||||
TBOX & selection_box,
|
||||
|
@ -38,18 +38,19 @@ ICOORD C_OUTLINE::step_coords[4] = {
|
||||
ICOORD (-1, 0), ICOORD (0, -1), ICOORD (1, 0), ICOORD (0, 1)
|
||||
};
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::C_OUTLINE
|
||||
/**
|
||||
* @name C_OUTLINE::C_OUTLINE
|
||||
*
|
||||
* Constructor to build a C_OUTLINE from a CRACKEDGE LOOP.
|
||||
**********************************************************************/
|
||||
* @param startpt outline to convert
|
||||
* @param bot_left bounding box
|
||||
* @param top_right bounding box
|
||||
* @param length length of loop
|
||||
*/
|
||||
|
||||
C_OUTLINE::C_OUTLINE (
|
||||
//constructor
|
||||
CRACKEDGE * startpt, //outline to convert
|
||||
ICOORD bot_left, //bounding box
|
||||
ICOORD top_right, inT16 length //length of loop
|
||||
):box (bot_left, top_right), start (startpt->pos), offsets(NULL) {
|
||||
C_OUTLINE::C_OUTLINE (CRACKEDGE * startpt, ICOORD bot_left,
|
||||
ICOORD top_right, inT16 length)
|
||||
: box (bot_left, top_right), start (startpt->pos), offsets(NULL) {
|
||||
inT16 stepindex; //index to step
|
||||
CRACKEDGE *edgept; //current point
|
||||
|
||||
@ -71,11 +72,11 @@ ICOORD top_right, inT16 length //length of loop
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::C_OUTLINE
|
||||
/**
|
||||
* @name C_OUTLINE::C_OUTLINE
|
||||
*
|
||||
* Constructor to build a C_OUTLINE from a C_OUTLINE_FRAG.
|
||||
**********************************************************************/
|
||||
*/
|
||||
C_OUTLINE::C_OUTLINE (
|
||||
//constructor
|
||||
//steps to copy
|
||||
@ -130,16 +131,15 @@ inT16 length //length of loop
|
||||
ASSERT_HOST (stepcount >= 4);
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::C_OUTLINE
|
||||
/**
|
||||
* @name C_OUTLINE::C_OUTLINE
|
||||
*
|
||||
* Constructor to build a C_OUTLINE from a rotation of a C_OUTLINE.
|
||||
**********************************************************************/
|
||||
* @param srcline outline to rotate
|
||||
* @param rotation rotate to coord
|
||||
*/
|
||||
|
||||
C_OUTLINE::C_OUTLINE( //constructor
|
||||
C_OUTLINE *srcline, //outline to
|
||||
FCOORD rotation //rotate
|
||||
) : offsets(NULL) {
|
||||
C_OUTLINE::C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation) : offsets(NULL) {
|
||||
TBOX new_box; //easy bounding
|
||||
inT16 stepindex; //index to step
|
||||
inT16 dirdiff; //direction change
|
||||
@ -247,11 +247,11 @@ void C_OUTLINE::FakeOutline(const TBOX& box, C_OUTLINE_LIST* outlines) {
|
||||
ol_it.add_to_end(outline);
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::area
|
||||
/**
|
||||
* @name C_OUTLINE::area
|
||||
*
|
||||
* Compute the area of the outline.
|
||||
**********************************************************************/
|
||||
*/
|
||||
|
||||
inT32 C_OUTLINE::area() const {
|
||||
int stepindex; //current step
|
||||
@ -281,11 +281,11 @@ inT32 C_OUTLINE::area() const {
|
||||
return total;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::perimeter
|
||||
/**
|
||||
* @name C_OUTLINE::perimeter
|
||||
*
|
||||
* Compute the perimeter of the outline and its first level children.
|
||||
**********************************************************************/
|
||||
*/
|
||||
|
||||
inT32 C_OUTLINE::perimeter() const {
|
||||
inT32 total_steps; // Return value.
|
||||
@ -301,11 +301,11 @@ inT32 C_OUTLINE::perimeter() const {
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::outer_area
|
||||
/**
|
||||
* @name C_OUTLINE::outer_area
|
||||
*
|
||||
* Compute the area of the outline.
|
||||
**********************************************************************/
|
||||
*/
|
||||
|
||||
inT32 C_OUTLINE::outer_area() const {
|
||||
int stepindex; //current step
|
||||
@ -333,15 +333,14 @@ inT32 C_OUTLINE::outer_area() const {
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::count_transitions
|
||||
/**
|
||||
* @name C_OUTLINE::count_transitions
|
||||
*
|
||||
* Compute the number of x and y maxes and mins in the outline.
|
||||
**********************************************************************/
|
||||
* @param threshold winding number on size
|
||||
*/
|
||||
|
||||
inT32 C_OUTLINE::count_transitions( //winding number
|
||||
inT32 threshold //on size
|
||||
) {
|
||||
inT32 C_OUTLINE::count_transitions(inT32 threshold) {
|
||||
BOOL8 first_was_max_x; //what was first
|
||||
BOOL8 first_was_max_y;
|
||||
BOOL8 looking_for_max_x; //what is next
|
||||
@ -461,16 +460,15 @@ inT32 C_OUTLINE::count_transitions( //winding number
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::operator<
|
||||
/**
|
||||
* @name C_OUTLINE::operator<
|
||||
*
|
||||
* Return TRUE if the left operand is inside the right one.
|
||||
**********************************************************************/
|
||||
* @return TRUE if the left operand is inside the right one.
|
||||
* @param other other outline
|
||||
*/
|
||||
|
||||
BOOL8
|
||||
C_OUTLINE::operator< ( //winding number
|
||||
const C_OUTLINE & other //other outline
|
||||
) const
|
||||
C_OUTLINE::operator< (const C_OUTLINE & other) const
|
||||
{
|
||||
inT16 count = 0; //winding count
|
||||
ICOORD pos; //position of point
|
||||
@ -498,15 +496,14 @@ const C_OUTLINE & other //other outline
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::winding_number
|
||||
/**
|
||||
* @name C_OUTLINE::winding_number
|
||||
*
|
||||
* Return the winding number of the outline around the given point.
|
||||
**********************************************************************/
|
||||
* @return the winding number of the outline around the given point.
|
||||
* @param point point to wind around
|
||||
*/
|
||||
|
||||
inT16 C_OUTLINE::winding_number( //winding number
|
||||
ICOORD point //point to wind around
|
||||
) const {
|
||||
inT16 C_OUTLINE::winding_number(ICOORD point) const {
|
||||
inT16 stepindex; //index to cstep
|
||||
inT16 count; //winding count
|
||||
ICOORD vec; //to current point
|
||||
@ -538,11 +535,11 @@ inT16 C_OUTLINE::winding_number( //winding number
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
/**
|
||||
* C_OUTLINE::turn_direction
|
||||
*
|
||||
* Return the sum direction delta of the outline.
|
||||
**********************************************************************/
|
||||
* @return the sum direction delta of the outline.
|
||||
*/
|
||||
|
||||
inT16 C_OUTLINE::turn_direction() const { //winding number
|
||||
DIR128 prevdir; //previous direction
|
||||
@ -567,11 +564,11 @@ inT16 C_OUTLINE::turn_direction() const { //winding number
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::reverse
|
||||
/**
|
||||
* @name C_OUTLINE::reverse
|
||||
*
|
||||
* Reverse the direction of an outline.
|
||||
**********************************************************************/
|
||||
*/
|
||||
|
||||
void C_OUTLINE::reverse() { //reverse drection
|
||||
DIR128 halfturn = MODULUS / 2; //amount to shift
|
||||
@ -590,15 +587,14 @@ void C_OUTLINE::reverse() { //reverse drection
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::move
|
||||
/**
|
||||
* @name C_OUTLINE::move
|
||||
*
|
||||
* Move C_OUTLINE by vector
|
||||
**********************************************************************/
|
||||
* @param vec vector to reposition OUTLINE by
|
||||
*/
|
||||
|
||||
void C_OUTLINE::move( // reposition OUTLINE
|
||||
const ICOORD vec // by vector
|
||||
) {
|
||||
void C_OUTLINE::move(const ICOORD vec) {
|
||||
C_OUTLINE_IT it(&children); // iterator
|
||||
|
||||
box.move (vec);
|
||||
@ -608,10 +604,12 @@ void C_OUTLINE::move( // reposition OUTLINE
|
||||
it.data ()->move (vec); // move child outlines
|
||||
}
|
||||
|
||||
// Returns true if *this and its children are legally nested.
|
||||
// The outer area of a child should have the opposite sign to the
|
||||
// parent. If not, it means we have discarded an outline in between
|
||||
// (probably due to excessive length).
|
||||
/**
|
||||
* Returns true if *this and its children are legally nested.
|
||||
* The outer area of a child should have the opposite sign to the
|
||||
* parent. If not, it means we have discarded an outline in between
|
||||
* (probably due to excessive length).
|
||||
*/
|
||||
bool C_OUTLINE::IsLegallyNested() const {
|
||||
if (stepcount == 0) return true;
|
||||
int parent_area = outer_area();
|
||||
@ -626,11 +624,15 @@ bool C_OUTLINE::IsLegallyNested() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If this outline is smaller than the given min_size, delete this and
|
||||
// remove from its list, via *it, after checking that *it points to this.
|
||||
// Otherwise, if any children of this are too small, delete them.
|
||||
// On entry, *it must be an iterator pointing to this. If this gets deleted
|
||||
// then this is extracted from *it, so an iteration can continue.
|
||||
/**
|
||||
* If this outline is smaller than the given min_size, delete this and
|
||||
* remove from its list, via *it, after checking that *it points to this.
|
||||
* Otherwise, if any children of this are too small, delete them.
|
||||
* On entry, *it must be an iterator pointing to this. If this gets deleted
|
||||
* then this is extracted from *it, so an iteration can continue.
|
||||
* @param min_size minimum size for outline
|
||||
* @param it outline iterator
|
||||
*/
|
||||
void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) {
|
||||
if (box.width() < min_size || box.height() < min_size) {
|
||||
ASSERT_HOST(this == it->data());
|
||||
@ -650,9 +652,11 @@ void C_OUTLINE::RemoveSmallRecursive(int min_size, C_OUTLINE_IT* it) {
|
||||
// on data from an 8-bit Pix, and assume that any input x and/or y are already
|
||||
// constrained to be legal Pix coordinates.
|
||||
|
||||
// Helper computes the local 2-D gradient (dx, dy) from the 2x2 cell centered
|
||||
// on the given (x,y). If the cell would go outside the image, it is padded
|
||||
// with white.
|
||||
/**
|
||||
* Helper computes the local 2-D gradient (dx, dy) from the 2x2 cell centered
|
||||
* on the given (x,y). If the cell would go outside the image, it is padded
|
||||
* with white.
|
||||
*/
|
||||
static void ComputeGradient(const l_uint32* data, int wpl,
|
||||
int x, int y, int width, int height,
|
||||
ICOORD* gradient) {
|
||||
@ -669,9 +673,11 @@ static void ComputeGradient(const l_uint32* data, int wpl,
|
||||
gradient->set_y(pix_x_prevy + pix_prevx_prevy - (pix_x_y + pix_prevx_y));
|
||||
}
|
||||
|
||||
// Helper evaluates a vertical difference, (x,y) - (x,y-1), returning true if
|
||||
// the difference, matches diff_sign and updating the best_diff, best_sum,
|
||||
// best_y if a new max.
|
||||
/**
|
||||
* Helper evaluates a vertical difference, (x,y) - (x,y-1), returning true if
|
||||
* the difference, matches diff_sign and updating the best_diff, best_sum,
|
||||
* best_y if a new max.
|
||||
*/
|
||||
static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign,
|
||||
int x, int y, int height,
|
||||
int* best_diff, int* best_sum, int* best_y) {
|
||||
@ -689,9 +695,11 @@ static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign,
|
||||
return diff > 0;
|
||||
}
|
||||
|
||||
// Helper evaluates a horizontal difference, (x,y) - (x-1,y), where y is implied
|
||||
// by the input image line, returning true if the difference matches diff_sign
|
||||
// and updating the best_diff, best_sum, best_x if a new max.
|
||||
/**
|
||||
* Helper evaluates a horizontal difference, (x,y) - (x-1,y), where y is implied
|
||||
* by the input image line, returning true if the difference matches diff_sign
|
||||
* and updating the best_diff, best_sum, best_x if a new max.
|
||||
*/
|
||||
static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign,
|
||||
int x, int width,
|
||||
int* best_diff, int* best_sum, int* best_x) {
|
||||
@ -708,17 +716,21 @@ static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign,
|
||||
return diff > 0;
|
||||
}
|
||||
|
||||
// Adds sub-pixel resolution EdgeOffsets for the outline if the supplied
|
||||
// pix is 8-bit. Does nothing otherwise.
|
||||
// Operation: Consider the following near-horizontal line:
|
||||
// _________
|
||||
// |________
|
||||
// |________
|
||||
// At *every* position along this line, the gradient direction will be close
|
||||
// to vertical. Extrapoaltion/interpolation of the position of the threshold
|
||||
// that was used to binarize the image gives a more precise vertical position
|
||||
// for each horizontal step, and the conflict in step direction and gradient
|
||||
// direction can be used to ignore the vertical steps.
|
||||
/**
|
||||
* Adds sub-pixel resolution EdgeOffsets for the outline if the supplied
|
||||
* pix is 8-bit. Does nothing otherwise.
|
||||
* Operation: Consider the following near-horizontal line:
|
||||
* @verbatim
|
||||
* _________
|
||||
* |________
|
||||
* |________
|
||||
* @endverbatim
|
||||
* At *every* position along this line, the gradient direction will be close
|
||||
* to vertical. Extrapoaltion/interpolation of the position of the threshold
|
||||
* that was used to binarize the image gives a more precise vertical position
|
||||
* for each horizontal step, and the conflict in step direction and gradient
|
||||
* direction can be used to ignore the vertical steps.
|
||||
*/
|
||||
void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) {
|
||||
if (pixGetDepth(pix) != 8) return;
|
||||
const l_uint32* data = pixGetData(pix);
|
||||
@ -807,30 +819,35 @@ void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix* pix) {
|
||||
}
|
||||
}
|
||||
|
||||
// Adds sub-pixel resolution EdgeOffsets for the outline using only
|
||||
// a binary image source.
|
||||
// Runs a sliding window of 5 edge steps over the outline, maintaining a count
|
||||
// of the number of steps in each of the 4 directions in the window, and a
|
||||
// sum of the x or y position of each step (as appropriate to its direction.)
|
||||
// Ignores single-count steps EXCEPT the sharp U-turn and smoothes out the
|
||||
// perpendicular direction. Eg
|
||||
// ___ ___ Chain code from the left:
|
||||
// |___ ___ ___| 222122212223221232223000
|
||||
// |___| |_| Corresponding counts of each direction:
|
||||
// 0 00000000000000000123
|
||||
// 1 11121111001111100000
|
||||
// 2 44434443443333343321
|
||||
// 3 00000001111111112111
|
||||
// Count of direction at center 41434143413313143313
|
||||
// Step gets used? YNYYYNYYYNYYNYNYYYyY (y= U-turn exception)
|
||||
// Path redrawn showing only the used points:
|
||||
// ___ ___
|
||||
// ___ ___ ___|
|
||||
// ___ _
|
||||
// Sub-pixel edge position cannot be shown well with ASCII-art, but each
|
||||
// horizontal step's y position is the mean of the y positions of the steps
|
||||
// in the same direction in the sliding window, which makes a much smoother
|
||||
// outline, without losing important detail.
|
||||
/**
|
||||
* Adds sub-pixel resolution EdgeOffsets for the outline using only
|
||||
* a binary image source.
|
||||
*
|
||||
* Runs a sliding window of 5 edge steps over the outline, maintaining a count
|
||||
* of the number of steps in each of the 4 directions in the window, and a
|
||||
* sum of the x or y position of each step (as appropriate to its direction.)
|
||||
* Ignores single-count steps EXCEPT the sharp U-turn and smoothes out the
|
||||
* perpendicular direction. Eg
|
||||
* @verbatim
|
||||
* ___ ___ Chain code from the left:
|
||||
* |___ ___ ___| 222122212223221232223000
|
||||
* |___| |_| Corresponding counts of each direction:
|
||||
* 0 00000000000000000123
|
||||
* 1 11121111001111100000
|
||||
* 2 44434443443333343321
|
||||
* 3 00000001111111112111
|
||||
* Count of direction at center 41434143413313143313
|
||||
* Step gets used? YNYYYNYYYNYYNYNYYYyY (y= U-turn exception)
|
||||
* Path redrawn showing only the used points:
|
||||
* ___ ___
|
||||
* ___ ___ ___|
|
||||
* ___ _
|
||||
* @endverbatim
|
||||
* Sub-pixel edge position cannot be shown well with ASCII-art, but each
|
||||
* horizontal step's y position is the mean of the y positions of the steps
|
||||
* in the same direction in the sliding window, which makes a much smoother
|
||||
* outline, without losing important detail.
|
||||
*/
|
||||
void C_OUTLINE::ComputeBinaryOffsets() {
|
||||
delete [] offsets;
|
||||
offsets = new EdgeOffset[stepcount];
|
||||
@ -885,8 +902,10 @@ void C_OUTLINE::ComputeBinaryOffsets() {
|
||||
}
|
||||
}
|
||||
|
||||
// Renders the outline to the given pix, with left and top being
|
||||
// the coords of the upper-left corner of the pix.
|
||||
/**
|
||||
* Renders the outline to the given pix, with left and top being
|
||||
* the coords of the upper-left corner of the pix.
|
||||
*/
|
||||
void C_OUTLINE::render(int left, int top, Pix* pix) const {
|
||||
ICOORD pos = start;
|
||||
for (int stepindex = 0; stepindex < stepcount; ++stepindex) {
|
||||
@ -902,8 +921,13 @@ void C_OUTLINE::render(int left, int top, Pix* pix) const {
|
||||
}
|
||||
}
|
||||
|
||||
// Renders just the outline to the given pix (no fill), with left and top
|
||||
// being the coords of the upper-left corner of the pix.
|
||||
/**
|
||||
* Renders just the outline to the given pix (no fill), with left and top
|
||||
* being the coords of the upper-left corner of the pix.
|
||||
* @param left coord
|
||||
* @param top coord
|
||||
* @param pix the pix to outline
|
||||
*/
|
||||
void C_OUTLINE::render_outline(int left, int top, Pix* pix) const {
|
||||
ICOORD pos = start;
|
||||
for (int stepindex = 0; stepindex < stepcount; ++stepindex) {
|
||||
@ -921,17 +945,17 @@ void C_OUTLINE::render_outline(int left, int top, Pix* pix) const {
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::plot
|
||||
/**
|
||||
* @name C_OUTLINE::plot
|
||||
*
|
||||
* Draw the outline in the given colour.
|
||||
**********************************************************************/
|
||||
* @param window window to draw in
|
||||
* @param colour colour to draw in
|
||||
*/
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void C_OUTLINE::plot( //draw it
|
||||
ScrollView* window, // window to draw in
|
||||
ScrollView::Color colour // colour to draw in
|
||||
) const {
|
||||
void C_OUTLINE::plot(ScrollView* window,
|
||||
ScrollView::Color colour) const {
|
||||
inT16 stepindex; // index to cstep
|
||||
ICOORD pos; // current position
|
||||
DIR128 stepdir; // direction of step
|
||||
@ -958,8 +982,11 @@ void C_OUTLINE::plot( //draw it
|
||||
window->DrawTo(pos.x(), pos.y());
|
||||
}
|
||||
}
|
||||
// Draws the outline in the given colour, normalized using the given denorm,
|
||||
// making use of sub-pixel accurate information if available.
|
||||
|
||||
/**
|
||||
* Draws the outline in the given colour, normalized using the given denorm,
|
||||
* making use of sub-pixel accurate information if available.
|
||||
*/
|
||||
void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour,
|
||||
ScrollView* window) const {
|
||||
window->Pen(colour);
|
||||
@ -990,16 +1017,14 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour,
|
||||
#endif
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* C_OUTLINE::operator=
|
||||
/**
|
||||
* @name C_OUTLINE::operator=
|
||||
*
|
||||
* Assignment - deep copy data
|
||||
**********************************************************************/
|
||||
* @param source assign from this
|
||||
*/
|
||||
|
||||
//assignment
|
||||
C_OUTLINE & C_OUTLINE::operator= (
|
||||
const C_OUTLINE & source //from this
|
||||
) {
|
||||
C_OUTLINE & C_OUTLINE::operator= (const C_OUTLINE & source) {
|
||||
box = source.box;
|
||||
start = source.start;
|
||||
if (steps != NULL)
|
||||
@ -1020,10 +1045,12 @@ const C_OUTLINE & source //from this
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Helper for ComputeBinaryOffsets. Increments pos, dir_counts, pos_totals
|
||||
// by the step, increment, and vertical step ? x : y position * increment
|
||||
// at step s Mod stepcount respectively. Used to add or subtract the
|
||||
// direction and position to/from accumulators of a small neighbourhood.
|
||||
/**
|
||||
* Helper for ComputeBinaryOffsets. Increments pos, dir_counts, pos_totals
|
||||
* by the step, increment, and vertical step ? x : y position * increment
|
||||
* at step s Mod stepcount respectively. Used to add or subtract the
|
||||
* direction and position to/from accumulators of a small neighbourhood.
|
||||
*/
|
||||
void C_OUTLINE::increment_step(int s, int increment, ICOORD* pos,
|
||||
int* dir_counts, int* pos_totals) const {
|
||||
int step_index = Modulo(s, stepcount);
|
||||
|
@ -144,7 +144,7 @@ class TessdataManager {
|
||||
|
||||
/**
|
||||
* Opens the given data file and reads the offset table.
|
||||
* Returns true on success.
|
||||
* @return true on success.
|
||||
*/
|
||||
bool Init(const char *data_file_name, int debug_level);
|
||||
|
||||
|
@ -24,13 +24,13 @@ namespace tesseract {
|
||||
|
||||
extern const char *kUTF8LineSeparator;
|
||||
extern const char *kUTF8ParagraphSeparator;
|
||||
extern const char *kLRM; // Left-to-Right Mark
|
||||
extern const char *kRLM; // Right-to-Left Mark
|
||||
extern const char *kRLE; // Right-to-Left Embedding
|
||||
extern const char *kPDF; // Pop Directional Formatting
|
||||
extern const char *kLRM; //< Left-to-Right Mark
|
||||
extern const char *kRLM; //< Right-to-Left Mark
|
||||
extern const char *kRLE; //< Right-to-Left Embedding
|
||||
extern const char *kPDF; //< Pop Directional Formatting
|
||||
|
||||
// The following are confusable internal word punctuation symbols
|
||||
// which we normalize to the first variant when matching in dawgs.
|
||||
/// The following are confusable internal word punctuation symbols
|
||||
/// which we normalize to the first variant when matching in dawgs.
|
||||
extern const char *kHyphenLikeUTF8[];
|
||||
extern const char *kApostropheLikeUTF8[];
|
||||
|
||||
|
@ -841,8 +841,7 @@ int Classify::GetAdaptiveFeatures(TBLOB *Blob,
|
||||
*
|
||||
* Globals: none
|
||||
*
|
||||
* @param Word current word
|
||||
* @param BestChoiceWord best overall choice for word with context
|
||||
* @param word current word
|
||||
*
|
||||
* @return TRUE or FALSE
|
||||
* @note Exceptions: none
|
||||
@ -1007,7 +1006,6 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
|
||||
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine adds the result of a classification into
|
||||
* Results. If the new rating is much worse than the current
|
||||
@ -1022,14 +1020,8 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
|
||||
* Globals:
|
||||
* - #matcher_bad_match_pad defines limits of an acceptable match
|
||||
*
|
||||
* @param new_result new result to add
|
||||
* @param[out] results results to add new result to
|
||||
* @param class_id class of new result
|
||||
* @param shape_id shape index
|
||||
* @param rating rating of new result
|
||||
* @param adapted adapted match or not
|
||||
* @param config config id of new result
|
||||
* @param fontinfo_id font information of the new result
|
||||
* @param fontinfo_id2 font information of the 2nd choice result
|
||||
*
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Mar 12 18:19:29 1991, DSJ, Created.
|
||||
@ -1077,11 +1069,13 @@ void Classify::AddNewResult(const UnicharRating& new_result,
|
||||
* - #AllProtosOn mask that enables all protos
|
||||
* - #AllConfigsOn mask that enables all configs
|
||||
*
|
||||
* @param Blob blob to be classified
|
||||
* @param Templates built-in templates to classify against
|
||||
* @param Classes adapted class templates
|
||||
* @param Ambiguities array of class id's to match against
|
||||
* @param[out] Results place to put match results
|
||||
* @param blob blob to be classified
|
||||
* @param templates built-in templates to classify against
|
||||
* @param classes adapted class templates
|
||||
* @param ambiguities array of unichar id's to match against
|
||||
* @param[out] results place to put match results
|
||||
* @param int_features
|
||||
* @param fx_info
|
||||
*
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Mar 12 19:40:36 1991, DSJ, Created.
|
||||
@ -1301,6 +1295,8 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
|
||||
* @param Blob blob to be classified
|
||||
* @param Templates current set of adapted templates
|
||||
* @param Results place to put match results
|
||||
* @param int_features
|
||||
* @param fx_info
|
||||
*
|
||||
* @return Array of possible ambiguous chars that should be checked.
|
||||
* @note Exceptions: none
|
||||
@ -1343,9 +1339,9 @@ UNICHAR_ID *Classify::BaselineClassifier(
|
||||
* specified set of templates. The classes which match
|
||||
* are added to Results.
|
||||
*
|
||||
* @param Blob blob to be classified
|
||||
* @param Templates templates to classify unknown against
|
||||
* @param Results place to put match results
|
||||
* @param blob blob to be classified
|
||||
* @param sample templates to classify unknown against
|
||||
* @param adapt_results place to put match results
|
||||
*
|
||||
* Globals:
|
||||
* - CharNormCutoffs expected num features for each class
|
||||
@ -1438,7 +1434,7 @@ int Classify::CharNormTrainingSample(bool pruner_only,
|
||||
* blob. NOTE: assumes that the blob length has already been
|
||||
* computed and placed into Results.
|
||||
*
|
||||
* @param Results results to add noise classification to
|
||||
* @param results results to add noise classification to
|
||||
*
|
||||
* Globals:
|
||||
* - matcher_avg_noise_size avg. length of a noise blob
|
||||
@ -1539,7 +1535,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
/**
|
||||
*
|
||||
* @param Blob blob whose classification is being debugged
|
||||
* @param blob blob whose classification is being debugged
|
||||
* @param Results results of match being debugged
|
||||
*
|
||||
* Globals: none
|
||||
@ -1716,13 +1712,11 @@ bool Classify::LooksLikeGarbage(TBLOB *blob) {
|
||||
* It then copies the char norm features into the IntFeatures
|
||||
* array provided by the caller.
|
||||
*
|
||||
* @param Blob blob to extract features from
|
||||
* @param Templates used to compute char norm adjustments
|
||||
* @param IntFeatures array to fill with integer features
|
||||
* @param PrunerNormArray Array of factors from blob normalization
|
||||
* @param templates used to compute char norm adjustments
|
||||
* @param pruner_norm_array Array of factors from blob normalization
|
||||
* process
|
||||
* @param CharNormArray array to fill with dummy char norm adjustments
|
||||
* @param BlobLength length of blob in baseline-normalized units
|
||||
* @param char_norm_array array to fill with dummy char norm adjustments
|
||||
* @param fx_info
|
||||
*
|
||||
* Globals:
|
||||
*
|
||||
@ -2072,8 +2066,7 @@ namespace tesseract {
|
||||
/**
|
||||
* This routine writes the matches in Results to File.
|
||||
*
|
||||
* @param File open text file to write Results to
|
||||
* @param Results match results to write to File
|
||||
* @param results match results to write to File
|
||||
*
|
||||
* Globals: none
|
||||
*
|
||||
|
1535
classify/cluster.cpp
1535
classify/cluster.cpp
File diff suppressed because it is too large
Load Diff
@ -26,23 +26,20 @@
|
||||
#include <math.h>
|
||||
|
||||
//---------------Global Data Definitions and Declarations--------------------
|
||||
#define TOKENSIZE 80 //max size of tokens read from an input file
|
||||
#define MAXSAMPLESIZE 65535 //max num of dimensions in feature space
|
||||
//#define MAXBLOCKSIZE 65535 //max num of samples in a character (block size)
|
||||
#define TOKENSIZE 80 //< max size of tokens read from an input file
|
||||
#define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space
|
||||
//#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block size)
|
||||
|
||||
/*---------------------------------------------------------------------------
|
||||
Public Code
|
||||
-----------------------------------------------------------------------------*/
|
||||
/** ReadSampleSize ***********************************************************
|
||||
Parameters: File open text file to read sample size from
|
||||
Globals: None
|
||||
Operation: This routine reads a single integer from the specified
|
||||
file and checks to ensure that it is between 0 and
|
||||
MAXSAMPLESIZE.
|
||||
Return: Sample size
|
||||
Exceptions: ILLEGALSAMPLESIZE illegal format or range
|
||||
History: 6/6/89, DSJ, Created.
|
||||
******************************************************************************/
|
||||
/**
|
||||
* This routine reads a single integer from the specified
|
||||
* file and checks to ensure that it is between 0 and
|
||||
* MAXSAMPLESIZE.
|
||||
* @param File open text file to read sample size from
|
||||
* @return Sample size
|
||||
* @note Globals: None
|
||||
* @note Exceptions: ILLEGALSAMPLESIZE illegal format or range
|
||||
* @note History: 6/6/89, DSJ, Created.
|
||||
*/
|
||||
uinT16 ReadSampleSize(FILE *File) {
|
||||
int SampleSize;
|
||||
|
||||
@ -50,21 +47,22 @@ uinT16 ReadSampleSize(FILE *File) {
|
||||
(SampleSize < 0) || (SampleSize > MAXSAMPLESIZE))
|
||||
DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
|
||||
return (SampleSize);
|
||||
} // ReadSampleSize
|
||||
}
|
||||
|
||||
|
||||
/** ReadParamDesc *************************************************************
|
||||
Parameters: File open text file to read N parameter descriptions from
|
||||
N number of parameter descriptions to read
|
||||
Globals: None
|
||||
Operation: This routine reads textual descriptions of sets of parameters
|
||||
which describe the characteristics of feature dimensions.
|
||||
Return: Pointer to an array of parameter descriptors.
|
||||
Exceptions: ILLEGALCIRCULARSPEC
|
||||
ILLEGALESSENTIALSPEC
|
||||
ILLEGALMINMAXSPEC
|
||||
History: 6/6/89, DSJ, Created.
|
||||
******************************************************************************/
|
||||
/**
|
||||
* This routine reads textual descriptions of sets of parameters
|
||||
* which describe the characteristics of feature dimensions.
|
||||
*
|
||||
* Exceptions:
|
||||
* - ILLEGALCIRCULARSPEC
|
||||
* - ILLEGALESSENTIALSPEC
|
||||
* - ILLEGALMINMAXSPEC
|
||||
* @param File open text file to read N parameter descriptions from
|
||||
* @param N number of parameter descriptions to read
|
||||
* @return Pointer to an array of parameter descriptors.
|
||||
* @note Globals: None
|
||||
* @note History: 6/6/89, DSJ, Created.
|
||||
*/
|
||||
PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
|
||||
int i;
|
||||
PARAM_DESC *ParamDesc;
|
||||
@ -94,23 +92,24 @@ PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
|
||||
ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
|
||||
}
|
||||
return (ParamDesc);
|
||||
} // ReadParamDesc
|
||||
}
|
||||
|
||||
|
||||
/** ReadPrototype *************************************************************
|
||||
Parameters: File open text file to read prototype from
|
||||
N number of dimensions used in prototype
|
||||
Globals: None
|
||||
Operation: This routine reads a textual description of a prototype from
|
||||
the specified file.
|
||||
Return: List of prototypes
|
||||
Exceptions: ILLEGALSIGNIFICANCESPEC
|
||||
ILLEGALSAMPLECOUNT
|
||||
ILLEGALMEANSPEC
|
||||
ILLEGALVARIANCESPEC
|
||||
ILLEGALDISTRIBUTION
|
||||
History: 6/6/89, DSJ, Created.
|
||||
******************************************************************************/
|
||||
/**
|
||||
* This routine reads a textual description of a prototype from
|
||||
* the specified file.
|
||||
*
|
||||
* Exceptions:
|
||||
* - ILLEGALSIGNIFICANCESPEC
|
||||
* - ILLEGALSAMPLECOUNT
|
||||
* - ILLEGALMEANSPEC
|
||||
* - ILLEGALVARIANCESPEC
|
||||
* - ILLEGALDISTRIBUTION
|
||||
* @param File open text file to read prototype from
|
||||
* @param N number of dimensions used in prototype
|
||||
* @return List of prototypes
|
||||
* @note Globals: None
|
||||
* @note History: 6/6/89, DSJ, Created.
|
||||
*/
|
||||
PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
|
||||
char Token[TOKENSIZE];
|
||||
int Status;
|
||||
@ -228,18 +227,17 @@ PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
|
||||
DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification");
|
||||
return (NULL);
|
||||
}
|
||||
} // ReadPrototype
|
||||
}
|
||||
|
||||
|
||||
/* ReadProtoStyle *************************************************************
|
||||
Parameters: File open text file to read prototype style from
|
||||
Globals: None
|
||||
Operation: This routine reads an single token from the specified
|
||||
text file and interprets it as a prototype specification.
|
||||
Return: Prototype style read from text file
|
||||
Exceptions: ILLEGALSTYLESPEC illegal prototype style specification
|
||||
History: 6/8/89, DSJ, Created.
|
||||
*******************************************************************************/
|
||||
/**
|
||||
* This routine reads an single token from the specified
|
||||
* text file and interprets it as a prototype specification.
|
||||
* @param File open text file to read prototype style from
|
||||
* @return Prototype style read from text file
|
||||
* @note Globals: None
|
||||
* @note Exceptions: ILLEGALSTYLESPEC illegal prototype style specification
|
||||
* @note History: 6/8/89, DSJ, Created.
|
||||
*/
|
||||
PROTOSTYLE ReadProtoStyle(FILE *File) {
|
||||
char Token[TOKENSIZE];
|
||||
PROTOSTYLE Style;
|
||||
@ -264,23 +262,22 @@ PROTOSTYLE ReadProtoStyle(FILE *File) {
|
||||
DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
|
||||
}
|
||||
return (Style);
|
||||
} // ReadProtoStyle
|
||||
}
|
||||
|
||||
|
||||
/** ReadNFloats *************************************************************
|
||||
Parameters: File open text file to read floats from
|
||||
N number of floats to read
|
||||
Buffer pointer to buffer to place floats into
|
||||
Globals: None
|
||||
Operation: This routine reads N floats from the specified text file
|
||||
and places them into Buffer. If Buffer is NULL, a buffer
|
||||
is created and passed back to the caller. If EOF is
|
||||
encountered before any floats can be read, NULL is
|
||||
returned.
|
||||
Return: Pointer to buffer holding floats or NULL if EOF
|
||||
Exceptions: ILLEGALFLOAT
|
||||
History: 6/6/89, DSJ, Created.
|
||||
******************************************************************************/
|
||||
/**
|
||||
* This routine reads N floats from the specified text file
|
||||
* and places them into Buffer. If Buffer is NULL, a buffer
|
||||
* is created and passed back to the caller. If EOF is
|
||||
* encountered before any floats can be read, NULL is
|
||||
* returned.
|
||||
* @param File open text file to read floats from
|
||||
* @param N number of floats to read
|
||||
* @param Buffer pointer to buffer to place floats into
|
||||
* @return Pointer to buffer holding floats or NULL if EOF
|
||||
* @note Globals: None
|
||||
* @note Exceptions: ILLEGALFLOAT
|
||||
* @note History: 6/6/89, DSJ, Created.
|
||||
*/
|
||||
FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) {
|
||||
int i;
|
||||
int NumFloatsRead;
|
||||
@ -300,20 +297,19 @@ FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) {
|
||||
}
|
||||
}
|
||||
return Buffer;
|
||||
} // ReadNFloats
|
||||
}
|
||||
|
||||
|
||||
/** WriteParamDesc ************************************************************
|
||||
Parameters: File open text file to write param descriptors to
|
||||
N number of param descriptors to write
|
||||
ParamDesc array of param descriptors to write
|
||||
Globals: None
|
||||
Operation: This routine writes an array of dimension descriptors to
|
||||
the specified text file.
|
||||
Return: None
|
||||
Exceptions: None
|
||||
History: 6/6/89, DSJ, Created.
|
||||
******************************************************************************/
|
||||
/**
|
||||
* This routine writes an array of dimension descriptors to
|
||||
* the specified text file.
|
||||
* @param File open text file to write param descriptors to
|
||||
* @param N number of param descriptors to write
|
||||
* @param ParamDesc array of param descriptors to write
|
||||
* @return None
|
||||
* @note Globals: None
|
||||
* @note Exceptions: None
|
||||
* @note History: 6/6/89, DSJ, Created.
|
||||
*/
|
||||
void
|
||||
WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) {
|
||||
int i;
|
||||
@ -331,20 +327,19 @@ WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) {
|
||||
|
||||
fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
|
||||
}
|
||||
} // WriteParamDesc
|
||||
}
|
||||
|
||||
|
||||
/** WritePrototype ************************************************************
|
||||
Parameters: File open text file to write prototype to
|
||||
N number of dimensions in feature space
|
||||
Proto prototype to write out
|
||||
Globals: None
|
||||
Operation: This routine writes a textual description of a prototype
|
||||
to the specified text file.
|
||||
Return: None
|
||||
Exceptions: None
|
||||
History: 6/12/89, DSJ, Created.
|
||||
*******************************************************************************/
|
||||
/**
|
||||
* This routine writes a textual description of a prototype
|
||||
* to the specified text file.
|
||||
* @param File open text file to write prototype to
|
||||
* @param N number of dimensions in feature space
|
||||
* @param Proto prototype to write out
|
||||
* @return None
|
||||
* @note Globals: None
|
||||
* @note Exceptions: None
|
||||
* @note History: 6/12/89, DSJ, Created.
|
||||
*/
|
||||
void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto) {
|
||||
int i;
|
||||
|
||||
@ -382,38 +377,36 @@ void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto) {
|
||||
fprintf (File, "\n\t");
|
||||
WriteNFloats (File, N, Proto->Variance.Elliptical);
|
||||
}
|
||||
} // WritePrototype
|
||||
}
|
||||
|
||||
|
||||
/** WriteNFloats ***********************************************************
|
||||
Parameters: File open text file to write N floats to
|
||||
N number of floats to write
|
||||
Array array of floats to write
|
||||
Globals: None
|
||||
Operation: This routine writes a text representation of N floats from
|
||||
an array to a file. All of the floats are placed on one line.
|
||||
Return: None
|
||||
Exceptions: None
|
||||
History: 6/6/89, DSJ, Created.
|
||||
****************************************************************************/
|
||||
/**
|
||||
* This routine writes a text representation of N floats from
|
||||
* an array to a file. All of the floats are placed on one line.
|
||||
* @param File open text file to write N floats to
|
||||
* @param N number of floats to write
|
||||
* @param Array array of floats to write
|
||||
* @return None
|
||||
* @note Globals: None
|
||||
* @note Exceptions: None
|
||||
* @note History: 6/6/89, DSJ, Created.
|
||||
*/
|
||||
void WriteNFloats(FILE * File, uinT16 N, FLOAT32 Array[]) {
|
||||
for (int i = 0; i < N; i++)
|
||||
fprintf(File, " %9.6f", Array[i]);
|
||||
fprintf(File, "\n");
|
||||
} // WriteNFloats
|
||||
}
|
||||
|
||||
|
||||
/** WriteProtoSyle **********************************************************
|
||||
Parameters: File open text file to write prototype style to
|
||||
ProtoStyle prototype style to write
|
||||
Globals: None
|
||||
Operation: This routine writes to the specified text file a word
|
||||
which represents the ProtoStyle. It does not append
|
||||
a carriage return to the end.
|
||||
Return: None
|
||||
Exceptions: None
|
||||
History: 6/8/89, DSJ, Created.
|
||||
****************************************************************************/
|
||||
/**
|
||||
* This routine writes to the specified text file a word
|
||||
* which represents the ProtoStyle. It does not append
|
||||
* a carriage return to the end.
|
||||
* @param File open text file to write prototype style to
|
||||
* @param ProtoStyle prototype style to write
|
||||
* @return None
|
||||
* @note Globals: None
|
||||
* @note Exceptions: None
|
||||
* @note History: 6/8/89, DSJ, Created.
|
||||
*/
|
||||
void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) {
|
||||
switch (ProtoStyle) {
|
||||
case spherical:
|
||||
@ -429,9 +422,25 @@ void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) {
|
||||
fprintf (File, "automatic");
|
||||
break;
|
||||
}
|
||||
} // WriteProtoStyle
|
||||
}
|
||||
|
||||
/**
|
||||
* This routine writes a textual description of each prototype
|
||||
* in the prototype list to the specified file. It also
|
||||
* writes a file header which includes the number of dimensions
|
||||
* in feature space and the descriptions for each dimension.
|
||||
* @param File open text file to write prototypes to
|
||||
* @param N number of dimensions in feature space
|
||||
* @param ParamDesc descriptions for each dimension
|
||||
* @param ProtoList list of prototypes to be written
|
||||
* @param WriteSigProtos TRUE to write out significant prototypes
|
||||
* @param WriteInsigProtos TRUE to write out insignificants
|
||||
* @note Globals: None
|
||||
* @return None
|
||||
* @note Exceptions: None
|
||||
* @note History: 6/12/89, DSJ, Created.
|
||||
*/
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void WriteProtoList(
|
||||
FILE *File,
|
||||
uinT16 N,
|
||||
@ -439,30 +448,6 @@ void WriteProtoList(
|
||||
LIST ProtoList,
|
||||
BOOL8 WriteSigProtos,
|
||||
BOOL8 WriteInsigProtos)
|
||||
|
||||
/*
|
||||
** Parameters:
|
||||
** File open text file to write prototypes to
|
||||
** N number of dimensions in feature space
|
||||
** ParamDesc descriptions for each dimension
|
||||
** ProtoList list of prototypes to be written
|
||||
** WriteSigProtos TRUE to write out significant prototypes
|
||||
** WriteInsigProtos TRUE to write out insignificants
|
||||
** Globals:
|
||||
** None
|
||||
** Operation:
|
||||
** This routine writes a textual description of each prototype
|
||||
** in the prototype list to the specified file. It also
|
||||
** writes a file header which includes the number of dimensions
|
||||
** in feature space and the descriptions for each dimension.
|
||||
** Return:
|
||||
** None
|
||||
** Exceptions:
|
||||
** None
|
||||
** History:
|
||||
** 6/12/89, DSJ, Created.
|
||||
*/
|
||||
|
||||
{
|
||||
PROTOTYPE *Proto;
|
||||
|
||||
@ -478,5 +463,4 @@ void WriteProtoList(
|
||||
( ! Proto->Significant && WriteInsigProtos ) )
|
||||
WritePrototype( File, N, Proto );
|
||||
}
|
||||
} /* WriteProtoList */
|
||||
|
||||
}
|
||||
|
@ -15,9 +15,9 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "cutoffs.h"
|
||||
|
||||
#include <stdio.h>
|
||||
@ -34,26 +34,23 @@
|
||||
|
||||
#define MAX_CUTOFF 1000
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
namespace tesseract {
|
||||
/**
|
||||
* Open Filename, read in all of the class-id/cutoff pairs
|
||||
* and insert them into the Cutoffs array. Cutoffs are
|
||||
* indexed in the array by class id. Unused entries in the
|
||||
* array are set to an arbitrarily high cutoff value.
|
||||
* @param CutoffFile name of file containing cutoff definitions
|
||||
* @param Cutoffs array to put cutoffs into
|
||||
* @param swap
|
||||
* @param end_offset
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Feb 20 09:38:26 1991, DSJ, Created.
|
||||
*/
|
||||
void Classify::ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
|
||||
CLASS_CUTOFF_ARRAY Cutoffs) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Filename name of file containing cutoff definitions
|
||||
** Cutoffs array to put cutoffs into
|
||||
** Globals: none
|
||||
** Operation: Open Filename, read in all of the class-id/cutoff pairs
|
||||
** and insert them into the Cutoffs array. Cutoffs are
|
||||
** indexed in the array by class id. Unused entries in the
|
||||
** array are set to an arbitrarily high cutoff value.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Wed Feb 20 09:38:26 1991, DSJ, Created.
|
||||
*/
|
||||
char Class[UNICHAR_LEN + 1];
|
||||
CLASS_ID ClassId;
|
||||
int Cutoff;
|
||||
@ -78,6 +75,6 @@ void Classify::ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
|
||||
Cutoffs[ClassId] = Cutoff;
|
||||
SkipNewline(CutoffFile);
|
||||
}
|
||||
} /* ReadNewCutoffs */
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -15,18 +15,17 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "const.h"
|
||||
#include "fpoint.h"
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
----------------------------------------------------------------------------*/
|
||||
|
||||
FLOAT32 DistanceBetween(FPOINT A, FPOINT B) {
|
||||
double xd = XDelta(A, B);
|
||||
@ -34,23 +33,21 @@ FLOAT32 DistanceBetween(FPOINT A, FPOINT B) {
|
||||
return sqrt(static_cast<double>(xd * xd + yd * yd));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Return the angle from Point1 to Point2 normalized to
|
||||
* lie in the range 0 to FullScale (where FullScale corresponds
|
||||
* to 2*pi or 360 degrees).
|
||||
* @param Point1 points to compute angle between
|
||||
* @param Point2 points to compute angle between
|
||||
* @param FullScale value to associate with 2*pi
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Mar 28 14:27:25 1990, DSJ, Created.
|
||||
*/
|
||||
FLOAT32 NormalizedAngleFrom(FPOINT *Point1,
|
||||
FPOINT *Point2,
|
||||
FLOAT32 FullScale) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Point1, Point2 points to compute angle between
|
||||
** FullScale value to associate with 2*pi
|
||||
** Globals: none
|
||||
** Operation: Return the angle from Point1 to Point2 normalized to
|
||||
** lie in the range 0 to FullScale (where FullScale corresponds
|
||||
** to 2*pi or 360 degrees).
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Wed Mar 28 14:27:25 1990, DSJ, Created.
|
||||
*/
|
||||
FLOAT32 Angle;
|
||||
FLOAT32 NumRadsInCircle = 2.0 * PI;
|
||||
|
||||
@ -62,4 +59,4 @@ FLOAT32 NormalizedAngleFrom(FPOINT *Point1,
|
||||
Angle = 0.0;
|
||||
return (Angle);
|
||||
|
||||
} /* NormalizedAngleFrom */
|
||||
}
|
||||
|
@ -135,8 +135,8 @@ class ClassPruner {
|
||||
delete []sort_index_;
|
||||
}
|
||||
|
||||
// Computes the scores for every class in the character set, by summing the
|
||||
// weights for each feature and stores the sums internally in class_count_.
|
||||
/// Computes the scores for every class in the character set, by summing the
|
||||
/// weights for each feature and stores the sums internally in class_count_.
|
||||
void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates,
|
||||
int num_features, const INT_FEATURE_STRUCT* features) {
|
||||
num_features_ = num_features;
|
||||
@ -203,11 +203,11 @@ class ClassPruner {
|
||||
}
|
||||
}
|
||||
|
||||
// Adjusts the scores according to the number of expected features. Used
|
||||
// in lieu of a constant bias, this penalizes classes that expect more
|
||||
// features than there are present. Thus an actual c will score higher for c
|
||||
// than e, even though almost all the features match e as well as c, because
|
||||
// e expects more features to be present.
|
||||
/// Adjusts the scores according to the number of expected features. Used
|
||||
/// in lieu of a constant bias, this penalizes classes that expect more
|
||||
/// features than there are present. Thus an actual c will score higher for c
|
||||
/// than e, even though almost all the features match e as well as c, because
|
||||
/// e expects more features to be present.
|
||||
void AdjustForExpectedNumFeatures(const uinT16* expected_num_features,
|
||||
int cutoff_strength) {
|
||||
for (int class_id = 0; class_id < max_classes_; ++class_id) {
|
||||
@ -219,8 +219,8 @@ class ClassPruner {
|
||||
}
|
||||
}
|
||||
|
||||
// Zeros the scores for classes disabled in the unicharset.
|
||||
// Implements the black-list to recognize a subset of the character set.
|
||||
/// Zeros the scores for classes disabled in the unicharset.
|
||||
/// Implements the black-list to recognize a subset of the character set.
|
||||
void DisableDisabledClasses(const UNICHARSET& unicharset) {
|
||||
for (int class_id = 0; class_id < max_classes_; ++class_id) {
|
||||
if (!unicharset.get_enabled(class_id))
|
||||
@ -228,7 +228,7 @@ class ClassPruner {
|
||||
}
|
||||
}
|
||||
|
||||
// Zeros the scores of fragments.
|
||||
/** Zeros the scores of fragments. */
|
||||
void DisableFragments(const UNICHARSET& unicharset) {
|
||||
for (int class_id = 0; class_id < max_classes_; ++class_id) {
|
||||
// Do not include character fragments in the class pruner
|
||||
@ -239,10 +239,10 @@ class ClassPruner {
|
||||
}
|
||||
}
|
||||
|
||||
// Normalizes the counts for xheight, putting the normalized result in
|
||||
// norm_count_. Applies a simple subtractive penalty for incorrect vertical
|
||||
// position provided by the normalization_factors array, indexed by
|
||||
// character class, and scaled by the norm_multiplier.
|
||||
/// Normalizes the counts for xheight, putting the normalized result in
|
||||
/// norm_count_. Applies a simple subtractive penalty for incorrect vertical
|
||||
/// position provided by the normalization_factors array, indexed by
|
||||
/// character class, and scaled by the norm_multiplier.
|
||||
void NormalizeForXheight(int norm_multiplier,
|
||||
const uinT8* normalization_factors) {
|
||||
for (int class_id = 0; class_id < max_classes_; class_id++) {
|
||||
@ -251,16 +251,16 @@ class ClassPruner {
|
||||
}
|
||||
}
|
||||
|
||||
// The nop normalization copies the class_count_ array to norm_count_.
|
||||
/** The nop normalization copies the class_count_ array to norm_count_. */
|
||||
void NoNormalization() {
|
||||
for (int class_id = 0; class_id < max_classes_; class_id++) {
|
||||
norm_count_[class_id] = class_count_[class_id];
|
||||
}
|
||||
}
|
||||
|
||||
// Prunes the classes using <the maximum count> * pruning_factor/256 as a
|
||||
// threshold for keeping classes. If max_of_non_fragments, then ignore
|
||||
// fragments in computing the maximum count.
|
||||
/// Prunes the classes using <the maximum count> * pruning_factor/256 as a
|
||||
/// threshold for keeping classes. If max_of_non_fragments, then ignore
|
||||
/// fragments in computing the maximum count.
|
||||
void PruneAndSort(int pruning_factor, int keep_this,
|
||||
bool max_of_non_fragments, const UNICHARSET& unicharset) {
|
||||
int max_count = 0;
|
||||
@ -295,7 +295,7 @@ class ClassPruner {
|
||||
HeapSort(num_classes_, sort_key_, sort_index_);
|
||||
}
|
||||
|
||||
// Prints debug info on the class pruner matches for the pruned classes only.
|
||||
/** Prints debug info on the class pruner matches for the pruned classes only. */
|
||||
void DebugMatch(const Classify& classify,
|
||||
const INT_TEMPLATES_STRUCT* int_templates,
|
||||
const INT_FEATURE_STRUCT* features) const {
|
||||
@ -332,7 +332,7 @@ class ClassPruner {
|
||||
}
|
||||
}
|
||||
|
||||
// Prints a summary of the pruner result.
|
||||
/** Prints a summary of the pruner result. */
|
||||
void SummarizeResult(const Classify& classify,
|
||||
const INT_TEMPLATES_STRUCT* int_templates,
|
||||
const uinT16* expected_num_features,
|
||||
@ -354,8 +354,8 @@ class ClassPruner {
|
||||
}
|
||||
}
|
||||
|
||||
// Copies the pruned, sorted classes into the output results and returns
|
||||
// the number of classes.
|
||||
/// Copies the pruned, sorted classes into the output results and returns
|
||||
/// the number of classes.
|
||||
int SetupResults(GenericVector<CP_RESULT_STRUCT>* results) const {
|
||||
CP_RESULT_STRUCT empty;
|
||||
results->init_to_size(num_classes_, empty);
|
||||
@ -368,57 +368,50 @@ class ClassPruner {
|
||||
}
|
||||
|
||||
private:
|
||||
// Array[rounded_classes_] of initial counts for each class.
|
||||
/** Array[rounded_classes_] of initial counts for each class. */
|
||||
int *class_count_;
|
||||
// Array[rounded_classes_] of modified counts for each class after normalizing
|
||||
// for expected number of features, disabled classes, fragments, and xheights.
|
||||
/// Array[rounded_classes_] of modified counts for each class after normalizing
|
||||
/// for expected number of features, disabled classes, fragments, and xheights.
|
||||
int *norm_count_;
|
||||
// Array[rounded_classes_ +1] of pruned counts that gets sorted
|
||||
/** Array[rounded_classes_ +1] of pruned counts that gets sorted */
|
||||
int *sort_key_;
|
||||
// Array[rounded_classes_ +1] of classes corresponding to sort_key_.
|
||||
/** Array[rounded_classes_ +1] of classes corresponding to sort_key_. */
|
||||
int *sort_index_;
|
||||
// Number of classes in this class pruner.
|
||||
/** Number of classes in this class pruner. */
|
||||
int max_classes_;
|
||||
// Rounded up number of classes used for array sizes.
|
||||
/** Rounded up number of classes used for array sizes. */
|
||||
int rounded_classes_;
|
||||
// Threshold count applied to prune classes.
|
||||
/** Threshold count applied to prune classes. */
|
||||
int pruning_threshold_;
|
||||
// The number of features used to compute the scores.
|
||||
/** The number of features used to compute the scores. */
|
||||
int num_features_;
|
||||
// Final number of pruned classes.
|
||||
/** Final number of pruned classes. */
|
||||
int num_classes_;
|
||||
};
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Runs the class pruner from int_templates on the given features, returning
|
||||
// the number of classes output in results.
|
||||
// int_templates Class pruner tables
|
||||
// num_features Number of features in blob
|
||||
// features Array of features
|
||||
// normalization_factors Array of fudge factors from blob
|
||||
// normalization process (by CLASS_INDEX)
|
||||
// expected_num_features Array of expected number of features
|
||||
// for each class (by CLASS_INDEX)
|
||||
// results Sorted Array of pruned classes. Must be an array
|
||||
// of size at least int_templates->NumClasses.
|
||||
/**
|
||||
* Runs the class pruner from int_templates on the given features, returning
|
||||
* the number of classes output in results.
|
||||
* @param int_templates Class pruner tables
|
||||
* @param num_features Number of features in blob
|
||||
* @param features Array of features
|
||||
* @param normalization_factors Array of fudge factors from blob
|
||||
* normalization process (by CLASS_INDEX)
|
||||
* @param expected_num_features Array of expected number of features
|
||||
* for each class (by CLASS_INDEX)
|
||||
* @param results Sorted Array of pruned classes. Must be an array
|
||||
* of size at least int_templates->NumClasses.
|
||||
* @param keep_this
|
||||
*/
|
||||
int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
|
||||
int num_features, int keep_this,
|
||||
const INT_FEATURE_STRUCT* features,
|
||||
const uinT8* normalization_factors,
|
||||
const uinT16* expected_num_features,
|
||||
GenericVector<CP_RESULT_STRUCT>* results) {
|
||||
/*
|
||||
** Operation:
|
||||
** Prunes the classes using a modified fast match table.
|
||||
** Returns a sorted list of classes along with the number
|
||||
** of pruned classes in that list.
|
||||
** Return: Number of pruned classes.
|
||||
** Exceptions: none
|
||||
** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
|
||||
*/
|
||||
ClassPruner pruner(int_templates->NumClasses);
|
||||
// Compute initial match scores for all classes.
|
||||
pruner.ComputeScores(int_templates, num_features, features);
|
||||
@ -457,7 +450,25 @@ int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* IntegerMatcher returns the best configuration and rating
|
||||
* for a single class. The class matched against is determined
|
||||
* by the uniqueness of the ClassTemplate parameter. The
|
||||
* best rating and its associated configuration are returned.
|
||||
*
|
||||
* Globals:
|
||||
* - local_matcher_multiplier_ Normalization factor multiplier
|
||||
* param ClassTemplate Prototypes & tables for a class
|
||||
* param BlobLength Length of unormalized blob
|
||||
* param NumFeatures Number of features in blob
|
||||
* param Features Array of features
|
||||
* param NormalizationFactor Fudge factor from blob normalization process
|
||||
* param Result Class rating & configuration: (0.0 -> 1.0), 0=bad, 1=good
|
||||
* param Debug Debugger flag: 1=debugger on
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Feb 19 16:36:23 MST 1991, RWM, Created.
|
||||
*/
|
||||
void IntegerMatcher::Match(INT_CLASS ClassTemplate,
|
||||
BIT_VECTOR ProtoMask,
|
||||
BIT_VECTOR ConfigMask,
|
||||
@ -467,28 +478,6 @@ void IntegerMatcher::Match(INT_CLASS ClassTemplate,
|
||||
int AdaptFeatureThreshold,
|
||||
int Debug,
|
||||
bool SeparateDebugWindows) {
|
||||
/*
|
||||
** Parameters:
|
||||
** ClassTemplate Prototypes & tables for a class
|
||||
** BlobLength Length of unormalized blob
|
||||
** NumFeatures Number of features in blob
|
||||
** Features Array of features
|
||||
** NormalizationFactor Fudge factor from blob
|
||||
** normalization process
|
||||
** Result Class rating & configuration:
|
||||
** (0.0 -> 1.0), 0=bad, 1=good
|
||||
** Debug Debugger flag: 1=debugger on
|
||||
** Globals:
|
||||
** local_matcher_multiplier_ Normalization factor multiplier
|
||||
** Operation:
|
||||
** IntegerMatcher returns the best configuration and rating
|
||||
** for a single class. The class matched against is determined
|
||||
** by the uniqueness of the ClassTemplate parameter. The
|
||||
** best rating and its associated configuration are returned.
|
||||
** Return:
|
||||
** Exceptions: none
|
||||
** History: Tue Feb 19 16:36:23 MST 1991, RWM, Created.
|
||||
*/
|
||||
ScratchEvidence *tables = new ScratchEvidence();
|
||||
int Feature;
|
||||
int BestMatch;
|
||||
@ -542,8 +531,26 @@ void IntegerMatcher::Match(INT_CLASS ClassTemplate,
|
||||
delete tables;
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* FindGoodProtos finds all protos whose normalized proto-evidence
|
||||
* exceed classify_adapt_proto_thresh. The list is ordered by increasing
|
||||
* proto id number.
|
||||
*
|
||||
* Globals:
|
||||
* - local_matcher_multiplier_ Normalization factor multiplier
|
||||
* param ClassTemplate Prototypes & tables for a class
|
||||
* param ProtoMask AND Mask for proto word
|
||||
* param ConfigMask AND Mask for config word
|
||||
* param BlobLength Length of unormalized blob
|
||||
* param NumFeatures Number of features in blob
|
||||
* param Features Array of features
|
||||
* param ProtoArray Array of good protos
|
||||
* param AdaptProtoThreshold Threshold for good protos
|
||||
* param Debug Debugger flag: 1=debugger on
|
||||
* @return Number of good protos in ProtoArray.
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
|
||||
*/
|
||||
int IntegerMatcher::FindGoodProtos(
|
||||
INT_CLASS ClassTemplate,
|
||||
BIT_VECTOR ProtoMask,
|
||||
@ -554,28 +561,6 @@ int IntegerMatcher::FindGoodProtos(
|
||||
PROTO_ID *ProtoArray,
|
||||
int AdaptProtoThreshold,
|
||||
int Debug) {
|
||||
/*
|
||||
** Parameters:
|
||||
** ClassTemplate Prototypes & tables for a class
|
||||
** ProtoMask AND Mask for proto word
|
||||
** ConfigMask AND Mask for config word
|
||||
** BlobLength Length of unormalized blob
|
||||
** NumFeatures Number of features in blob
|
||||
** Features Array of features
|
||||
** ProtoArray Array of good protos
|
||||
** AdaptProtoThreshold Threshold for good protos
|
||||
** Debug Debugger flag: 1=debugger on
|
||||
** Globals:
|
||||
** local_matcher_multiplier_ Normalization factor multiplier
|
||||
** Operation:
|
||||
** FindGoodProtos finds all protos whose normalized proto-evidence
|
||||
** exceed classify_adapt_proto_thresh. The list is ordered by increasing
|
||||
** proto id number.
|
||||
** Return:
|
||||
** Number of good protos in ProtoArray.
|
||||
** Exceptions: none
|
||||
** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
|
||||
*/
|
||||
ScratchEvidence *tables = new ScratchEvidence();
|
||||
int NumGoodProtos = 0;
|
||||
|
||||
@ -622,7 +607,21 @@ int IntegerMatcher::FindGoodProtos(
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* FindBadFeatures finds all features with maximum feature-evidence <
|
||||
* AdaptFeatureThresh. The list is ordered by increasing feature number.
|
||||
* @param ClassTemplate Prototypes & tables for a class
|
||||
* @param ProtoMask AND Mask for proto word
|
||||
* @param ConfigMask AND Mask for config word
|
||||
* @param BlobLength Length of unormalized blob
|
||||
* @param NumFeatures Number of features in blob
|
||||
* @param Features Array of features
|
||||
* @param FeatureArray Array of bad features
|
||||
* @param AdaptFeatureThreshold Threshold for bad features
|
||||
* @param Debug Debugger flag: 1=debugger on
|
||||
* @return Number of bad features in FeatureArray.
|
||||
* @note History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
|
||||
*/
|
||||
int IntegerMatcher::FindBadFeatures(
|
||||
INT_CLASS ClassTemplate,
|
||||
BIT_VECTOR ProtoMask,
|
||||
@ -633,24 +632,6 @@ int IntegerMatcher::FindBadFeatures(
|
||||
FEATURE_ID *FeatureArray,
|
||||
int AdaptFeatureThreshold,
|
||||
int Debug) {
|
||||
/*
|
||||
** Parameters:
|
||||
** ClassTemplate Prototypes & tables for a class
|
||||
** ProtoMask AND Mask for proto word
|
||||
** ConfigMask AND Mask for config word
|
||||
** BlobLength Length of unormalized blob
|
||||
** NumFeatures Number of features in blob
|
||||
** Features Array of features
|
||||
** FeatureArray Array of bad features
|
||||
** AdaptFeatureThreshold Threshold for bad features
|
||||
** Debug Debugger flag: 1=debugger on
|
||||
** Operation:
|
||||
** FindBadFeatures finds all features with maximum feature-evidence <
|
||||
** AdaptFeatureThresh. The list is ordered by increasing feature number.
|
||||
** Return:
|
||||
** Number of bad features in FeatureArray.
|
||||
** History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
|
||||
*/
|
||||
ScratchEvidence *tables = new ScratchEvidence();
|
||||
int NumBadFeatures = 0;
|
||||
|
||||
@ -693,7 +674,6 @@ int IntegerMatcher::FindBadFeatures(
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
|
||||
classify_debug_level_ = classify_debug_level;
|
||||
|
||||
@ -722,9 +702,9 @@ void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
|
||||
}
|
||||
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Private Code
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
void ScratchEvidence::Clear(const INT_CLASS class_template) {
|
||||
memset(sum_feature_evidence_, 0,
|
||||
class_template->NumConfigs * sizeof(sum_feature_evidence_[0]));
|
||||
@ -739,21 +719,17 @@ void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) {
|
||||
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* Print debugging information for Configuations
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
|
||||
*/
|
||||
void IMDebugConfiguration(int FeatureNum,
|
||||
uinT16 ActualProtoNum,
|
||||
uinT8 Evidence,
|
||||
BIT_VECTOR ConfigMask,
|
||||
uinT32 ConfigWord) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Globals:
|
||||
** Operation:
|
||||
** Print debugging information for Configuations
|
||||
** Return:
|
||||
** Exceptions: none
|
||||
** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
|
||||
*/
|
||||
cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
|
||||
FeatureNum, (int) ActualProtoNum, (int) Evidence);
|
||||
while (ConfigWord) {
|
||||
@ -767,19 +743,15 @@ void IMDebugConfiguration(int FeatureNum,
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* Print debugging information for Configuations
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
|
||||
*/
|
||||
void IMDebugConfigurationSum(int FeatureNum,
|
||||
uinT8 *FeatureEvidence,
|
||||
inT32 ConfigCount) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Globals:
|
||||
** Operation:
|
||||
** Print debugging information for Configuations
|
||||
** Return:
|
||||
** Exceptions: none
|
||||
** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
|
||||
*/
|
||||
cprintf("F=%3d, C=", FeatureNum);
|
||||
for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
|
||||
cprintf("%4d", FeatureEvidence[ConfigNum]);
|
||||
@ -787,9 +759,17 @@ void IMDebugConfigurationSum(int FeatureNum,
|
||||
cprintf("\n");
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* For the given feature: prune protos, compute evidence,
|
||||
* update Feature Evidence, Proto Evidence, and Sum of Feature
|
||||
* Evidence tables.
|
||||
* @param ClassTemplate Prototypes & tables for a class
|
||||
* @param FeatureNum Current feature number (for DEBUG only)
|
||||
* @param Feature Pointer to a feature struct
|
||||
* @param tables Evidence tables
|
||||
* @param Debug Debugger flag: 1=debugger on
|
||||
* @return none
|
||||
*/
|
||||
int IntegerMatcher::UpdateTablesForFeature(
|
||||
INT_CLASS ClassTemplate,
|
||||
BIT_VECTOR ProtoMask,
|
||||
@ -798,19 +778,6 @@ int IntegerMatcher::UpdateTablesForFeature(
|
||||
const INT_FEATURE_STRUCT* Feature,
|
||||
ScratchEvidence *tables,
|
||||
int Debug) {
|
||||
/*
|
||||
** Parameters:
|
||||
** ClassTemplate Prototypes & tables for a class
|
||||
** FeatureNum Current feature number (for DEBUG only)
|
||||
** Feature Pointer to a feature struct
|
||||
** tables Evidence tables
|
||||
** Debug Debugger flag: 1=debugger on
|
||||
** Operation:
|
||||
** For the given feature: prune protos, compute evidence,
|
||||
** update Feature Evidence, Proto Evidence, and Sum of Feature
|
||||
** Evidence tables.
|
||||
** Return:
|
||||
*/
|
||||
register uinT32 ConfigWord;
|
||||
register uinT32 ProtoWord;
|
||||
register uinT32 ProtoNum;
|
||||
@ -950,7 +917,12 @@ int IntegerMatcher::UpdateTablesForFeature(
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* Print debugging information for Configuations
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
|
||||
*/
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void IntegerMatcher::DebugFeatureProtoError(
|
||||
INT_CLASS ClassTemplate,
|
||||
@ -959,15 +931,6 @@ void IntegerMatcher::DebugFeatureProtoError(
|
||||
const ScratchEvidence& tables,
|
||||
inT16 NumFeatures,
|
||||
int Debug) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Globals:
|
||||
** Operation:
|
||||
** Print debugging information for Configuations
|
||||
** Return:
|
||||
** Exceptions: none
|
||||
** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
|
||||
*/
|
||||
FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS];
|
||||
int ConfigNum;
|
||||
uinT32 ConfigWord;
|
||||
@ -1076,8 +1039,6 @@ void IntegerMatcher::DebugFeatureProtoError(
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void IntegerMatcher::DisplayProtoDebugInfo(
|
||||
INT_CLASS ClassTemplate,
|
||||
BIT_VECTOR ProtoMask,
|
||||
@ -1119,7 +1080,6 @@ void IntegerMatcher::DisplayProtoDebugInfo(
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void IntegerMatcher::DisplayFeatureDebugInfo(
|
||||
INT_CLASS ClassTemplate,
|
||||
BIT_VECTOR ProtoMask,
|
||||
@ -1165,8 +1125,9 @@ void IntegerMatcher::DisplayFeatureDebugInfo(
|
||||
}
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Add sum of Proto Evidences into Sum Of Feature Evidence Array
|
||||
/**
|
||||
* Add sum of Proto Evidences into Sum Of Feature Evidence Array
|
||||
*/
|
||||
void ScratchEvidence::UpdateSumOfProtoEvidences(
|
||||
INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) {
|
||||
|
||||
@ -1206,9 +1167,10 @@ void ScratchEvidence::UpdateSumOfProtoEvidences(
|
||||
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Normalize Sum of Proto and Feature Evidence by dividing by the sum of
|
||||
// the Feature Lengths and the Proto Lengths for each configuration.
|
||||
/**
|
||||
* Normalize Sum of Proto and Feature Evidence by dividing by the sum of
|
||||
* the Feature Lengths and the Proto Lengths for each configuration.
|
||||
*/
|
||||
void ScratchEvidence::NormalizeSums(
|
||||
INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) {
|
||||
|
||||
@ -1219,22 +1181,17 @@ void ScratchEvidence::NormalizeSums(
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* Find the best match for the current class and update the Result
|
||||
* with the configuration and match rating.
|
||||
* @return The best normalized sum of evidences
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
|
||||
*/
|
||||
int IntegerMatcher::FindBestMatch(
|
||||
INT_CLASS class_template,
|
||||
const ScratchEvidence &tables,
|
||||
UnicharRating* result) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Globals:
|
||||
** Operation:
|
||||
** Find the best match for the current class and update the Result
|
||||
** with the configuration and match rating.
|
||||
** Return:
|
||||
** The best normalized sum of evidences
|
||||
** Exceptions: none
|
||||
** History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
|
||||
*/
|
||||
int best_match = 0;
|
||||
result->config = 0;
|
||||
result->fonts.truncate(0);
|
||||
@ -1258,8 +1215,10 @@ int IntegerMatcher::FindBestMatch(
|
||||
return best_match;
|
||||
}
|
||||
|
||||
// Applies the CN normalization factor to the given rating and returns
|
||||
// the modified rating.
|
||||
/**
|
||||
* Applies the CN normalization factor to the given rating and returns
|
||||
* the modified rating.
|
||||
*/
|
||||
float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
|
||||
int normalization_factor,
|
||||
int matcher_multiplier) {
|
||||
@ -1268,23 +1227,19 @@ float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
|
||||
(blob_length + matcher_multiplier);
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* Sort Key array in ascending order using heap sort
|
||||
* algorithm. Also sort Index array that is tied to
|
||||
* the key array.
|
||||
* @param n Number of elements to sort
|
||||
* @param ra Key array [1..n]
|
||||
* @param rb Index array [1..n]
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
|
||||
*/
|
||||
void
|
||||
HeapSort (int n, register int ra[], register int rb[]) {
|
||||
/*
|
||||
** Parameters:
|
||||
** n Number of elements to sort
|
||||
** ra Key array [1..n]
|
||||
** rb Index array [1..n]
|
||||
** Globals:
|
||||
** Operation:
|
||||
** Sort Key array in ascending order using heap sort
|
||||
** algorithm. Also sort Index array that is tied to
|
||||
** the key array.
|
||||
** Return:
|
||||
** Exceptions: none
|
||||
** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
|
||||
*/
|
||||
register int i, rra, rrb;
|
||||
int l, j, ir;
|
||||
|
||||
|
@ -207,15 +207,15 @@ double_VAR(classify_pp_side_pad, 2.5, "Proto Pruner Side Pad");
|
||||
/*-----------------------------------------------------------------------------
|
||||
Public Code
|
||||
-----------------------------------------------------------------------------*/
|
||||
// Builds a feature from an FCOORD for position with all the necessary
|
||||
// clipping and rounding.
|
||||
/// Builds a feature from an FCOORD for position with all the necessary
|
||||
/// clipping and rounding.
|
||||
INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(const FCOORD& pos, uinT8 theta)
|
||||
: X(ClipToRange<inT16>(static_cast<inT16>(pos.x() + 0.5), 0, 255)),
|
||||
Y(ClipToRange<inT16>(static_cast<inT16>(pos.y() + 0.5), 0, 255)),
|
||||
Theta(theta),
|
||||
CP_misses(0) {
|
||||
}
|
||||
// Builds a feature from ints with all the necessary clipping and casting.
|
||||
/** Builds a feature from ints with all the necessary clipping and casting. */
|
||||
INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(int x, int y, int theta)
|
||||
: X(static_cast<uinT8>(ClipToRange(x, 0, MAX_UINT8))),
|
||||
Y(static_cast<uinT8>(ClipToRange(y, 0, MAX_UINT8))),
|
||||
@ -223,7 +223,6 @@ INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(int x, int y, int theta)
|
||||
CP_misses(0) {
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine adds a new class structure to a set of
|
||||
* templates. Classes have to be added to Templates in
|
||||
@ -258,7 +257,6 @@ void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class) {
|
||||
} /* AddIntClass */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine returns the index of the next free config
|
||||
* in Class.
|
||||
@ -282,7 +280,6 @@ int AddIntConfig(INT_CLASS Class) {
|
||||
} /* AddIntConfig */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine allocates the next free proto in Class and
|
||||
* returns its index.
|
||||
@ -330,25 +327,24 @@ int AddIntProto(INT_CLASS Class) {
|
||||
|
||||
return (Index);
|
||||
|
||||
} /* AddIntProto */
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine adds Proto to the class pruning tables
|
||||
* for the specified class in Templates.
|
||||
*
|
||||
* Globals:
|
||||
* - classify_num_cp_levels number of levels used in the class pruner
|
||||
* @param Proto floating-pt proto to add to class pruner
|
||||
* @param ClassId class id corresponding to Proto
|
||||
* @param Templates set of templates containing class pruner
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Feb 13 08:49:54 1991, DSJ, Created.
|
||||
*/
|
||||
void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId,
|
||||
INT_TEMPLATES Templates)
|
||||
/*
|
||||
** Parameters:
|
||||
** Proto floating-pt proto to add to class pruner
|
||||
** ClassId class id corresponding to Proto
|
||||
** Templates set of templates containing class pruner
|
||||
** Globals:
|
||||
** classify_num_cp_levels number of levels used in the class pruner
|
||||
** Operation: This routine adds Proto to the class pruning tables
|
||||
** for the specified class in Templates.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Wed Feb 13 08:49:54 1991, DSJ, Created.
|
||||
*/
|
||||
#define MAX_LEVEL 2
|
||||
{
|
||||
CLASS_PRUNER_STRUCT* Pruner;
|
||||
@ -377,22 +373,21 @@ void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId,
|
||||
} /* AddProtoToClassPruner */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine updates the proto pruner lookup tables
|
||||
* for Class to include a new proto identified by ProtoId
|
||||
* and described by Proto.
|
||||
* @param Proto floating-pt proto to be added to proto pruner
|
||||
* @param ProtoId id of proto
|
||||
* @param Class integer class that contains desired proto pruner
|
||||
* @param debug debug flag
|
||||
* @note Globals: none
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Feb 8 13:07:19 1991, DSJ, Created.
|
||||
*/
|
||||
void AddProtoToProtoPruner(PROTO Proto, int ProtoId,
|
||||
INT_CLASS Class, bool debug) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Proto floating-pt proto to be added to proto pruner
|
||||
** ProtoId id of proto
|
||||
** Class integer class that contains desired proto pruner
|
||||
** Globals: none
|
||||
** Operation: This routine updates the proto pruner lookup tables
|
||||
** for Class to include a new proto identified by ProtoId
|
||||
** and described by Proto.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Feb 8 13:07:19 1991, DSJ, Created.
|
||||
*/
|
||||
FLOAT32 Angle, X, Y, Length;
|
||||
FLOAT32 Pad;
|
||||
int Index;
|
||||
@ -438,10 +433,11 @@ void AddProtoToProtoPruner(PROTO Proto, int ProtoId,
|
||||
} /* AddProtoToProtoPruner */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Returns a quantized bucket for the given param shifted by offset,
|
||||
// notionally (param + offset) * num_buckets, but clipped and casted to the
|
||||
// appropriate type.
|
||||
/**
|
||||
* Returns a quantized bucket for the given param shifted by offset,
|
||||
* notionally (param + offset) * num_buckets, but clipped and casted to the
|
||||
* appropriate type.
|
||||
*/
|
||||
uinT8 Bucket8For(FLOAT32 param, FLOAT32 offset, int num_buckets) {
|
||||
int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
|
||||
return static_cast<uinT8>(ClipToRange(bucket, 0, num_buckets - 1));
|
||||
@ -451,52 +447,50 @@ uinT16 Bucket16For(FLOAT32 param, FLOAT32 offset, int num_buckets) {
|
||||
return static_cast<uinT16>(ClipToRange(bucket, 0, num_buckets - 1));
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Returns a quantized bucket for the given circular param shifted by offset,
|
||||
// notionally (param + offset) * num_buckets, but modded and casted to the
|
||||
// appropriate type.
|
||||
/**
|
||||
* Returns a quantized bucket for the given circular param shifted by offset,
|
||||
* notionally (param + offset) * num_buckets, but modded and casted to the
|
||||
* appropriate type.
|
||||
*/
|
||||
uinT8 CircBucketFor(FLOAT32 param, FLOAT32 offset, int num_buckets) {
|
||||
int bucket = IntCastRounded(MapParam(param, offset, num_buckets));
|
||||
return static_cast<uinT8>(Modulo(bucket, num_buckets));
|
||||
} /* CircBucketFor */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void UpdateMatchDisplay() {
|
||||
/*
|
||||
** Parameters: none
|
||||
** Globals:
|
||||
** FeatureShapes display list for features
|
||||
** ProtoShapes display list for protos
|
||||
** Operation: This routine clears the global feature and proto
|
||||
** display lists.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Thu Mar 21 15:40:19 1991, DSJ, Created.
|
||||
/**
|
||||
* This routine clears the global feature and proto
|
||||
* display lists.
|
||||
*
|
||||
* Globals:
|
||||
* - FeatureShapes display list for features
|
||||
* - ProtoShapes display list for protos
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Mar 21 15:40:19 1991, DSJ, Created.
|
||||
*/
|
||||
void UpdateMatchDisplay() {
|
||||
if (IntMatchWindow != NULL)
|
||||
IntMatchWindow->Update();
|
||||
} /* ClearMatchDisplay */
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Config config to be added to class
|
||||
** ConfigId id to be used for new config
|
||||
** Class class to add new config to
|
||||
** Globals: none
|
||||
** Operation: This operation updates the config vectors of all protos
|
||||
** in Class to indicate that the protos with 1's in Config
|
||||
** belong to a new configuration identified by ConfigId.
|
||||
** It is assumed that the length of the Config bit vector is
|
||||
** equal to the number of protos in Class.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Mon Feb 11 14:57:31 1991, DSJ, Created.
|
||||
/**
|
||||
* This operation updates the config vectors of all protos
|
||||
* in Class to indicate that the protos with 1's in Config
|
||||
* belong to a new configuration identified by ConfigId.
|
||||
* It is assumed that the length of the Config bit vector is
|
||||
* equal to the number of protos in Class.
|
||||
* @param Config config to be added to class
|
||||
* @param ConfigId id to be used for new config
|
||||
* @param Class class to add new config to
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Mon Feb 11 14:57:31 1991, DSJ, Created.
|
||||
*/
|
||||
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) {
|
||||
int ProtoId;
|
||||
INT_PROTO Proto;
|
||||
int TotalLength;
|
||||
@ -514,20 +508,18 @@ void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) {
|
||||
|
||||
|
||||
namespace tesseract {
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Proto floating-pt proto to be converted to integer format
|
||||
** ProtoId id of proto
|
||||
** Class integer class to add converted proto to
|
||||
** Globals: none
|
||||
** Operation: This routine converts Proto to integer format and
|
||||
** installs it as ProtoId in Class.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Feb 8 11:22:43 1991, DSJ, Created.
|
||||
/**
|
||||
* This routine converts Proto to integer format and
|
||||
* installs it as ProtoId in Class.
|
||||
* @param Proto floating-pt proto to be converted to integer format
|
||||
* @param ProtoId id of proto
|
||||
* @param Class integer class to add converted proto to
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Feb 8 11:22:43 1991, DSJ, Created.
|
||||
*/
|
||||
void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) {
|
||||
INT_PROTO P;
|
||||
FLOAT32 Param;
|
||||
|
||||
@ -559,20 +551,19 @@ void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) {
|
||||
} /* ConvertProto */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine converts from the old floating point format
|
||||
* to the new integer format.
|
||||
* @param FloatProtos prototypes in old floating pt format
|
||||
* @param target_unicharset the UNICHARSET to use
|
||||
* @return New set of training templates in integer format.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Feb 7 14:40:42 1991, DSJ, Created.
|
||||
*/
|
||||
INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos,
|
||||
const UNICHARSET&
|
||||
target_unicharset) {
|
||||
/*
|
||||
** Parameters:
|
||||
** FloatProtos prototypes in old floating pt format
|
||||
** Globals: none
|
||||
** Operation: This routine converts from the old floating point format
|
||||
** to the new integer format.
|
||||
** Return: New set of training templates in integer format.
|
||||
** Exceptions: none
|
||||
** History: Thu Feb 7 14:40:42 1991, DSJ, Created.
|
||||
*/
|
||||
INT_TEMPLATES IntTemplates;
|
||||
CLASS_TYPE FClass;
|
||||
INT_CLASS IClass;
|
||||
@ -623,21 +614,20 @@ INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos,
|
||||
} // namespace tesseract
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Feature pico-feature to be displayed
|
||||
** Evidence best evidence for this feature (0-1)
|
||||
** Globals:
|
||||
** FeatureShapes global display list for features
|
||||
** Operation: This routine renders the specified feature into a
|
||||
** global display list.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Thu Mar 21 14:45:04 1991, DSJ, Created.
|
||||
/**
|
||||
* This routine renders the specified feature into a
|
||||
* global display list.
|
||||
*
|
||||
* Globals:
|
||||
* - FeatureShapes global display list for features
|
||||
* @param Feature pico-feature to be displayed
|
||||
* @param Evidence best evidence for this feature (0-1)
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Mar 21 14:45:04 1991, DSJ, Created.
|
||||
*/
|
||||
void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) {
|
||||
ScrollView::Color color = GetMatchColorFor(Evidence);
|
||||
RenderIntFeature(IntMatchWindow, Feature, color);
|
||||
if (FeatureDisplayWindow) {
|
||||
@ -646,21 +636,20 @@ void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) {
|
||||
} /* DisplayIntFeature */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Class class to take proto from
|
||||
** ProtoId id of proto in Class to be displayed
|
||||
** Evidence total evidence for proto (0-1)
|
||||
** Globals:
|
||||
** ProtoShapes global display list for protos
|
||||
** Operation: This routine renders the specified proto into a
|
||||
** global display list.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Thu Mar 21 14:45:04 1991, DSJ, Created.
|
||||
/**
|
||||
* This routine renders the specified proto into a
|
||||
* global display list.
|
||||
*
|
||||
* Globals:
|
||||
* - ProtoShapes global display list for protos
|
||||
* @param Class class to take proto from
|
||||
* @param ProtoId id of proto in Class to be displayed
|
||||
* @param Evidence total evidence for proto (0-1)
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Mar 21 14:45:04 1991, DSJ, Created.
|
||||
*/
|
||||
void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence) {
|
||||
ScrollView::Color color = GetMatchColorFor(Evidence);
|
||||
RenderIntProto(IntMatchWindow, Class, ProtoId, color);
|
||||
if (ProtoDisplayWindow) {
|
||||
@ -669,20 +658,18 @@ void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence) {
|
||||
} /* DisplayIntProto */
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) {
|
||||
/*
|
||||
** Parameters:
|
||||
** MaxNumProtos number of protos to allocate space for
|
||||
** MaxNumConfigs number of configs to allocate space for
|
||||
** Globals: none
|
||||
** Operation: This routine creates a new integer class data structure
|
||||
** and returns it. Sufficient space is allocated
|
||||
** to handle the specified number of protos and configs.
|
||||
** Return: New class created.
|
||||
** Exceptions: none
|
||||
** History: Fri Feb 8 10:51:23 1991, DSJ, Created.
|
||||
/**
|
||||
* This routine creates a new integer class data structure
|
||||
* and returns it. Sufficient space is allocated
|
||||
* to handle the specified number of protos and configs.
|
||||
* @param MaxNumProtos number of protos to allocate space for
|
||||
* @param MaxNumConfigs number of configs to allocate space for
|
||||
* @return New class created.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Feb 8 10:51:23 1991, DSJ, Created.
|
||||
*/
|
||||
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) {
|
||||
INT_CLASS Class;
|
||||
PROTO_SET ProtoSet;
|
||||
int i;
|
||||
@ -721,7 +708,6 @@ INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) {
|
||||
} /* NewIntClass */
|
||||
|
||||
|
||||
/*-------------------------------------------------------------------------*/
|
||||
void free_int_class(INT_CLASS int_class) {
|
||||
int i;
|
||||
|
||||
@ -735,17 +721,15 @@ void free_int_class(INT_CLASS int_class) {
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
INT_TEMPLATES NewIntTemplates() {
|
||||
/*
|
||||
** Parameters: none
|
||||
** Globals: none
|
||||
** Operation: This routine allocates a new set of integer templates
|
||||
** initialized to hold 0 classes.
|
||||
** Return: The integer templates created.
|
||||
** Exceptions: none
|
||||
** History: Fri Feb 8 08:38:51 1991, DSJ, Created.
|
||||
/**
|
||||
* This routine allocates a new set of integer templates
|
||||
* initialized to hold 0 classes.
|
||||
* @return The integer templates created.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Feb 8 08:38:51 1991, DSJ, Created.
|
||||
*/
|
||||
INT_TEMPLATES NewIntTemplates() {
|
||||
INT_TEMPLATES T;
|
||||
int i;
|
||||
|
||||
@ -773,18 +757,17 @@ void free_int_templates(INT_TEMPLATES templates) {
|
||||
|
||||
|
||||
namespace tesseract {
|
||||
INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
|
||||
/*
|
||||
** Parameters:
|
||||
** File open file to read templates from
|
||||
** Globals: none
|
||||
** Operation: This routine reads a set of integer templates from
|
||||
** File. File must already be open and must be in the
|
||||
** correct binary format.
|
||||
** Return: Pointer to integer templates read from File.
|
||||
** Exceptions: none
|
||||
** History: Wed Feb 27 11:48:46 1991, DSJ, Created.
|
||||
/**
|
||||
* This routine reads a set of integer templates from
|
||||
* File. File must already be open and must be in the
|
||||
* correct binary format.
|
||||
* @param File open file to read templates from
|
||||
* @return Pointer to integer templates read from File.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Feb 27 11:48:46 1991, DSJ, Created.
|
||||
*/
|
||||
INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
|
||||
int i, j, w, x, y, z;
|
||||
BOOL8 swap;
|
||||
int nread;
|
||||
@ -1081,20 +1064,19 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
|
||||
} /* ReadIntTemplates */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void Classify::ShowMatchDisplay() {
|
||||
/*
|
||||
** Parameters: none
|
||||
** Globals:
|
||||
** FeatureShapes display list containing feature matches
|
||||
** ProtoShapes display list containing proto matches
|
||||
** Operation: This routine sends the shapes in the global display
|
||||
** lists to the match debugger window.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Thu Mar 21 15:47:33 1991, DSJ, Created.
|
||||
/**
|
||||
* This routine sends the shapes in the global display
|
||||
* lists to the match debugger window.
|
||||
*
|
||||
* Globals:
|
||||
* - FeatureShapes display list containing feature matches
|
||||
* - ProtoShapes display list containing proto matches
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Mar 21 15:47:33 1991, DSJ, Created.
|
||||
*/
|
||||
void Classify::ShowMatchDisplay() {
|
||||
InitIntMatchWindowIfReqd();
|
||||
if (ProtoDisplayWindow) {
|
||||
ProtoDisplayWindow->Clear();
|
||||
@ -1117,8 +1099,8 @@ void Classify::ShowMatchDisplay() {
|
||||
}
|
||||
} /* ShowMatchDisplay */
|
||||
|
||||
// Clears the given window and draws the featurespace guides for the
|
||||
// appropriate normalization method.
|
||||
/// Clears the given window and draws the featurespace guides for the
|
||||
/// appropriate normalization method.
|
||||
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window) {
|
||||
window->Clear();
|
||||
|
||||
@ -1141,21 +1123,20 @@ void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine writes Templates to File. The format
|
||||
* is an efficient binary format. File must already be open
|
||||
* for writing.
|
||||
* @param File open file to write templates to
|
||||
* @param Templates templates to save into File
|
||||
* @param target_unicharset the UNICHARSET to use
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Feb 27 11:48:46 1991, DSJ, Created.
|
||||
*/
|
||||
void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
|
||||
const UNICHARSET& target_unicharset) {
|
||||
/*
|
||||
** Parameters:
|
||||
** File open file to write templates to
|
||||
** Templates templates to save into File
|
||||
** Globals: none
|
||||
** Operation: This routine writes Templates to File. The format
|
||||
** is an efficient binary format. File must already be open
|
||||
** for writing.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Wed Feb 27 11:48:46 1991, DSJ, Created.
|
||||
*/
|
||||
int i, j;
|
||||
INT_CLASS Class;
|
||||
int unicharset_size = target_unicharset.size();
|
||||
@ -1219,68 +1200,62 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
|
||||
/*-----------------------------------------------------------------------------
|
||||
Private Code
|
||||
-----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
FLOAT32 BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Bucket bucket whose start is to be computed
|
||||
** Offset offset used to map params to buckets
|
||||
** NumBuckets total number of buckets
|
||||
** Globals: none
|
||||
** Operation: This routine returns the parameter value which
|
||||
** corresponds to the beginning of the specified bucket.
|
||||
** The bucket number should have been generated using the
|
||||
** BucketFor() function with parameters Offset and NumBuckets.
|
||||
** Return: Param value corresponding to start position of Bucket.
|
||||
** Exceptions: none
|
||||
** History: Thu Feb 14 13:24:33 1991, DSJ, Created.
|
||||
/**
|
||||
* This routine returns the parameter value which
|
||||
* corresponds to the beginning of the specified bucket.
|
||||
* The bucket number should have been generated using the
|
||||
* BucketFor() function with parameters Offset and NumBuckets.
|
||||
* @param Bucket bucket whose start is to be computed
|
||||
* @param Offset offset used to map params to buckets
|
||||
* @param NumBuckets total number of buckets
|
||||
* @return Param value corresponding to start position of Bucket.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Feb 14 13:24:33 1991, DSJ, Created.
|
||||
*/
|
||||
FLOAT32 BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets) {
|
||||
return (((FLOAT32) Bucket / NumBuckets) - Offset);
|
||||
|
||||
} /* BucketStart */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
FLOAT32 BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Bucket bucket whose end is to be computed
|
||||
** Offset offset used to map params to buckets
|
||||
** NumBuckets total number of buckets
|
||||
** Globals: none
|
||||
** Operation: This routine returns the parameter value which
|
||||
** corresponds to the end of the specified bucket.
|
||||
** The bucket number should have been generated using the
|
||||
** BucketFor() function with parameters Offset and NumBuckets.
|
||||
** Return: Param value corresponding to end position of Bucket.
|
||||
** Exceptions: none
|
||||
** History: Thu Feb 14 13:24:33 1991, DSJ, Created.
|
||||
/**
|
||||
* This routine returns the parameter value which
|
||||
* corresponds to the end of the specified bucket.
|
||||
* The bucket number should have been generated using the
|
||||
* BucketFor() function with parameters Offset and NumBuckets.
|
||||
* @param Bucket bucket whose end is to be computed
|
||||
* @param Offset offset used to map params to buckets
|
||||
* @param NumBuckets total number of buckets
|
||||
* @return Param value corresponding to end position of Bucket.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Feb 14 13:24:33 1991, DSJ, Created.
|
||||
*/
|
||||
FLOAT32 BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets) {
|
||||
return (((FLOAT32) (Bucket + 1) / NumBuckets) - Offset);
|
||||
} /* BucketEnd */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine fills in the section of a class pruner
|
||||
* corresponding to a single x value for a single proto of
|
||||
* a class.
|
||||
* @param FillSpec specifies which bits to fill in pruner
|
||||
* @param Pruner class pruner to be filled
|
||||
* @param ClassMask indicates which bits to change in each word
|
||||
* @param ClassCount indicates what to change bits to
|
||||
* @param WordIndex indicates which word to change
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Feb 19 11:11:29 1991, DSJ, Created.
|
||||
*/
|
||||
void DoFill(FILL_SPEC *FillSpec,
|
||||
CLASS_PRUNER_STRUCT* Pruner,
|
||||
register uinT32 ClassMask,
|
||||
register uinT32 ClassCount,
|
||||
register uinT32 WordIndex) {
|
||||
/*
|
||||
** Parameters:
|
||||
** FillSpec specifies which bits to fill in pruner
|
||||
** Pruner class pruner to be filled
|
||||
** ClassMask indicates which bits to change in each word
|
||||
** ClassCount indicates what to change bits to
|
||||
** WordIndex indicates which word to change
|
||||
** Globals: none
|
||||
** Operation: This routine fills in the section of a class pruner
|
||||
** corresponding to a single x value for a single proto of
|
||||
** a class.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Tue Feb 19 11:11:29 1991, DSJ, Created.
|
||||
*/
|
||||
register int X, Y, Angle;
|
||||
register uinT32 OldWord;
|
||||
|
||||
@ -1310,18 +1285,16 @@ void DoFill(FILL_SPEC *FillSpec,
|
||||
} /* DoFill */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
BOOL8 FillerDone(TABLE_FILLER *Filler) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Filler table filler to check if done
|
||||
** Globals: none
|
||||
** Operation: Return TRUE if the specified table filler is done, i.e.
|
||||
** if it has no more lines to fill.
|
||||
** Return: TRUE if no more lines to fill, FALSE otherwise.
|
||||
** Exceptions: none
|
||||
** History: Tue Feb 19 10:08:05 1991, DSJ, Created.
|
||||
/**
|
||||
* Return TRUE if the specified table filler is done, i.e.
|
||||
* if it has no more lines to fill.
|
||||
* @param Filler table filler to check if done
|
||||
* @return TRUE if no more lines to fill, FALSE otherwise.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Feb 19 10:08:05 1991, DSJ, Created.
|
||||
*/
|
||||
BOOL8 FillerDone(TABLE_FILLER *Filler) {
|
||||
FILL_SWITCH *Next;
|
||||
|
||||
Next = &(Filler->Switch[Filler->NextSwitch]);
|
||||
@ -1334,26 +1307,25 @@ BOOL8 FillerDone(TABLE_FILLER *Filler) {
|
||||
} /* FillerDone */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine sets Bit in each bit vector whose
|
||||
* bucket lies within the range Center +- Spread. The fill
|
||||
* is done for a circular dimension, i.e. bucket 0 is adjacent
|
||||
* to the last bucket. It is assumed that Center and Spread
|
||||
* are expressed in a circular coordinate system whose range
|
||||
* is 0 to 1.
|
||||
* @param ParamTable table of bit vectors, one per param bucket
|
||||
* @param Bit bit position in vectors to be filled
|
||||
* @param Center center of filled area
|
||||
* @param Spread spread of filled area
|
||||
* @param debug debug flag
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Oct 16 09:26:54 1990, DSJ, Created.
|
||||
*/
|
||||
void FillPPCircularBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
|
||||
int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug) {
|
||||
/*
|
||||
** Parameters:
|
||||
** ParamTable table of bit vectors, one per param bucket
|
||||
** Bit bit position in vectors to be filled
|
||||
** Center center of filled area
|
||||
** Spread spread of filled area
|
||||
** Globals: none
|
||||
** Operation: This routine sets Bit in each bit vector whose
|
||||
** bucket lies within the range Center +- Spread. The fill
|
||||
** is done for a circular dimension, i.e. bucket 0 is adjacent
|
||||
** to the last bucket. It is assumed that Center and Spread
|
||||
** are expressed in a circular coordinate system whose range
|
||||
** is 0 to 1.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Tue Oct 16 09:26:54 1990, DSJ, Created.
|
||||
*/
|
||||
int i, FirstBucket, LastBucket;
|
||||
|
||||
if (Spread > 0.5)
|
||||
@ -1378,27 +1350,26 @@ void FillPPCircularBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
|
||||
} /* FillPPCircularBits */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine sets Bit in each bit vector whose
|
||||
* bucket lies within the range Center +- Spread. The fill
|
||||
* is done for a linear dimension, i.e. there is no wrap-around
|
||||
* for this dimension. It is assumed that Center and Spread
|
||||
* are expressed in a linear coordinate system whose range
|
||||
* is approximately 0 to 1. Values outside this range will
|
||||
* be clipped.
|
||||
* @param ParamTable table of bit vectors, one per param bucket
|
||||
* @param Bit bit number being filled
|
||||
* @param Center center of filled area
|
||||
* @param Spread spread of filled area
|
||||
* @param debug debug flag
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Oct 16 09:26:54 1990, DSJ, Created.
|
||||
*/
|
||||
void FillPPLinearBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
|
||||
int Bit, FLOAT32 Center, FLOAT32 Spread, bool debug) {
|
||||
/*
|
||||
** Parameters:
|
||||
** ParamTable table of bit vectors, one per param bucket
|
||||
** Bit bit number being filled
|
||||
** Center center of filled area
|
||||
** Spread spread of filled area
|
||||
** Globals: none
|
||||
** Operation: This routine sets Bit in each bit vector whose
|
||||
** bucket lies within the range Center +- Spread. The fill
|
||||
** is done for a linear dimension, i.e. there is no wrap-around
|
||||
** for this dimension. It is assumed that Center and Spread
|
||||
** are expressed in a linear coordinate system whose range
|
||||
** is approximately 0 to 1. Values outside this range will
|
||||
** be clipped.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Tue Oct 16 09:26:54 1990, DSJ, Created.
|
||||
*/
|
||||
int i, FirstBucket, LastBucket;
|
||||
|
||||
FirstBucket = (int) floor ((Center - Spread) * NUM_PP_BUCKETS);
|
||||
@ -1419,18 +1390,20 @@ void FillPPLinearBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR],
|
||||
/*---------------------------------------------------------------------------*/
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
namespace tesseract {
|
||||
/**
|
||||
* This routine prompts the user with Prompt and waits
|
||||
* for the user to enter something in the debug window.
|
||||
* @param Prompt prompt to print while waiting for input from window
|
||||
* @param adaptive_on
|
||||
* @param pretrained_on
|
||||
* @param shape_id
|
||||
* @return Character entered in the debug window.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Mar 21 16:55:13 1991, DSJ, Created.
|
||||
*/
|
||||
CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool* adaptive_on,
|
||||
bool* pretrained_on, int* shape_id) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Prompt prompt to print while waiting for input from window
|
||||
** Globals: none
|
||||
** Operation: This routine prompts the user with Prompt and waits
|
||||
** for the user to enter something in the debug window.
|
||||
** Return: Character entered in the debug window.
|
||||
** Exceptions: none
|
||||
** History: Thu Mar 21 16:55:13 1991, DSJ, Created.
|
||||
*/
|
||||
tprintf("%s\n", Prompt);
|
||||
SVEvent* ev;
|
||||
SVEventType ev_type;
|
||||
@ -1494,27 +1467,25 @@ CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool* adaptive_on,
|
||||
} // namespace tesseract
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine copies the appropriate global pad variables
|
||||
* into EndPad, SidePad, and AnglePad. This is a kludge used
|
||||
* to get around the fact that global control variables cannot
|
||||
* be arrays. If the specified level is illegal, the tightest
|
||||
* possible pads are returned.
|
||||
* @param Level "tightness" level to return pads for
|
||||
* @param EndPad place to put end pad for Level
|
||||
* @param SidePad place to put side pad for Level
|
||||
* @param AnglePad place to put angle pad for Level
|
||||
* @return none (results are returned in EndPad, SidePad, and AnglePad.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Feb 14 08:26:49 1991, DSJ, Created.
|
||||
*/
|
||||
void GetCPPadsForLevel(int Level,
|
||||
FLOAT32 *EndPad,
|
||||
FLOAT32 *SidePad,
|
||||
FLOAT32 *AnglePad) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Level "tightness" level to return pads for
|
||||
** EndPad place to put end pad for Level
|
||||
** SidePad place to put side pad for Level
|
||||
** AnglePad place to put angle pad for Level
|
||||
** Globals: none
|
||||
** Operation: This routine copies the appropriate global pad variables
|
||||
** into EndPad, SidePad, and AnglePad. This is a kludge used
|
||||
** to get around the fact that global control variables cannot
|
||||
** be arrays. If the specified level is illegal, the tightest
|
||||
** possible pads are returned.
|
||||
** Return: none (results are returned in EndPad, SidePad, and AnglePad.
|
||||
** Exceptions: none
|
||||
** History: Thu Feb 14 08:26:49 1991, DSJ, Created.
|
||||
*/
|
||||
switch (Level) {
|
||||
case 0:
|
||||
*EndPad = classify_cp_end_pad_loose * GetPicoFeatureLength ();
|
||||
@ -1546,18 +1517,14 @@ void GetCPPadsForLevel(int Level,
|
||||
} /* GetCPPadsForLevel */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Evidence evidence value to return color for
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** Return: Color which corresponds to specified Evidence value.
|
||||
** Exceptions: none
|
||||
** History: Thu Mar 21 15:24:52 1991, DSJ, Created.
|
||||
/**
|
||||
* @param Evidence evidence value to return color for
|
||||
* @return Color which corresponds to specified Evidence value.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Mar 21 15:24:52 1991, DSJ, Created.
|
||||
*/
|
||||
|
||||
ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) {
|
||||
assert (Evidence >= 0.0);
|
||||
assert (Evidence <= 1.0);
|
||||
|
||||
@ -1572,21 +1539,19 @@ ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) {
|
||||
} /* GetMatchColorFor */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Filler filler to get next fill spec from
|
||||
** Fill place to put spec for next fill
|
||||
** Globals: none
|
||||
** Operation: This routine returns (in Fill) the specification of
|
||||
** the next line to be filled from Filler. FillerDone() should
|
||||
** always be called before GetNextFill() to ensure that we
|
||||
** do not run past the end of the fill table.
|
||||
** Return: none (results are returned in Fill)
|
||||
** Exceptions: none
|
||||
** History: Tue Feb 19 10:17:42 1991, DSJ, Created.
|
||||
/**
|
||||
* This routine returns (in Fill) the specification of
|
||||
* the next line to be filled from Filler. FillerDone() should
|
||||
* always be called before GetNextFill() to ensure that we
|
||||
* do not run past the end of the fill table.
|
||||
* @param Filler filler to get next fill spec from
|
||||
* @param Fill place to put spec for next fill
|
||||
* @return none (results are returned in Fill)
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Feb 19 10:17:42 1991, DSJ, Created.
|
||||
*/
|
||||
void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) {
|
||||
FILL_SWITCH *Next;
|
||||
|
||||
/* compute the fill assuming no switches will be encountered */
|
||||
@ -1625,7 +1590,6 @@ void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) {
|
||||
} /* GetNextFill */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine computes a data structure (Filler)
|
||||
* which can be used to fill in a rectangle surrounding
|
||||
@ -1635,9 +1599,8 @@ void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) {
|
||||
* @param Proto proto to create a filler for
|
||||
* @param Filler place to put table filler
|
||||
*
|
||||
* Globals: none
|
||||
*
|
||||
* @return none (results are returned in Filler)
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Feb 14 09:27:05 1991, DSJ, Created.
|
||||
*/
|
||||
@ -1794,14 +1757,13 @@ void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad,
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
/*
|
||||
* Parameters:
|
||||
* ShapeList shape list to add feature rendering to
|
||||
* Feature feature to be rendered
|
||||
* Color color to use for feature rendering
|
||||
* Globals: none
|
||||
* Operation: This routine renders the specified feature into ShapeList.
|
||||
* Return: New shape list with rendering of Feature added.
|
||||
/**
|
||||
* This routine renders the specified feature into ShapeList.
|
||||
* @param window to add feature rendering to
|
||||
* @param Feature feature to be rendered
|
||||
* @param color color to use for feature rendering
|
||||
* @return New shape list with rendering of Feature added.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Mar 21 14:57:41 1991, DSJ, Created.
|
||||
*/
|
||||
@ -1826,15 +1788,15 @@ void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature,
|
||||
} /* RenderIntFeature */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/*
|
||||
/**
|
||||
* This routine extracts the parameters of the specified
|
||||
* proto from the class description and adds a rendering of
|
||||
* the proto onto the ShapeList.
|
||||
*
|
||||
* @param Class class that proto is contained in
|
||||
* @param ProtoId id of proto to be rendered
|
||||
* @param color color to render proto in
|
||||
* @param window ScrollView instance
|
||||
* @param Class class that proto is contained in
|
||||
* @param ProtoId id of proto to be rendered
|
||||
* @param color color to render proto in
|
||||
*
|
||||
* Globals: none
|
||||
*
|
||||
@ -1894,7 +1856,6 @@ void RenderIntProto(ScrollView *window,
|
||||
} /* RenderIntProto */
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine truncates Param to lie within the range
|
||||
* of Min-Max inclusive. If a truncation is performed, and
|
||||
@ -1926,7 +1887,6 @@ int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id) {
|
||||
} /* TruncateParam */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
/**
|
||||
* Initializes the int matcher window if it is not already
|
||||
@ -1971,8 +1931,8 @@ void InitFeatureDisplayWindowIfReqd() {
|
||||
}
|
||||
}
|
||||
|
||||
// Creates a window of the appropriate size for displaying elements
|
||||
// in feature space.
|
||||
/// Creates a window of the appropriate size for displaying elements
|
||||
/// in feature space.
|
||||
ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos) {
|
||||
return new ScrollView(name, xpos, ypos, 520, 520, 260, 260, true);
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ static int NextLevel(KDTREE *tree, int level) {
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Store the k smallest-keyed key-value pairs.
|
||||
/** Store the k smallest-keyed key-value pairs. */
|
||||
template<typename Key, typename Value>
|
||||
class MinK {
|
||||
public:
|
||||
@ -70,11 +70,11 @@ class MinK {
|
||||
const Element* elements() { return elements_; }
|
||||
|
||||
private:
|
||||
const Key max_key_; // the maximum possible Key
|
||||
Element* elements_; // unsorted array of elements
|
||||
int elements_count_; // the number of results collected so far
|
||||
int k_; // the number of results we want from the search
|
||||
int max_index_; // the index of the result with the largest key
|
||||
const Key max_key_; //< the maximum possible Key
|
||||
Element* elements_; //< unsorted array of elements
|
||||
int elements_count_; //< the number of results collected so far
|
||||
int k_; //< the number of results we want from the search
|
||||
int max_index_; //< the index of the result with the largest key
|
||||
};
|
||||
|
||||
template<typename Key, typename Value>
|
||||
@ -117,13 +117,13 @@ bool MinK<Key, Value>::insert(Key key, Value value) {
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Helper class for searching for the k closest points to query_point in tree.
|
||||
/** Helper class for searching for the k closest points to query_point in tree. */
|
||||
class KDTreeSearch {
|
||||
public:
|
||||
KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest);
|
||||
~KDTreeSearch();
|
||||
|
||||
// Return the k nearest points' data.
|
||||
/** Return the k nearest points' data. */
|
||||
void Search(int *result_count, FLOAT32 *distances, void **results);
|
||||
|
||||
private:
|
||||
@ -133,8 +133,8 @@ class KDTreeSearch {
|
||||
KDTREE *tree_;
|
||||
FLOAT32 *query_point_;
|
||||
MinK<FLOAT32, void *>* results_;
|
||||
FLOAT32 *sb_min_; // search box minimum
|
||||
FLOAT32 *sb_max_; // search box maximum
|
||||
FLOAT32 *sb_min_; //< search box minimum
|
||||
FLOAT32 *sb_max_; //< search box maximum
|
||||
};
|
||||
|
||||
KDTreeSearch::KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest) :
|
||||
@ -151,8 +151,8 @@ KDTreeSearch::~KDTreeSearch() {
|
||||
delete[] sb_max_;
|
||||
}
|
||||
|
||||
// Locate the k_closest points to query_point_, and return their distances and
|
||||
// data into the given buffers.
|
||||
/// Locate the k_closest points to query_point_, and return their distances and
|
||||
/// data into the given buffers.
|
||||
void KDTreeSearch::Search(int *result_count,
|
||||
FLOAT32 *distances,
|
||||
void **results) {
|
||||
@ -176,11 +176,9 @@ void KDTreeSearch::Search(int *result_count,
|
||||
/*-----------------------------------------------------------------------------
|
||||
Public Code
|
||||
-----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/// Return a new KDTREE based on the specified parameters.
|
||||
/// Parameters:
|
||||
/// KeySize # of dimensions in the K-D tree
|
||||
/// KeyDesc array of params to describe key dimensions
|
||||
/// @return a new KDTREE based on the specified parameters.
|
||||
/// @param KeySize # of dimensions in the K-D tree
|
||||
/// @param KeyDesc array of params to describe key dimensions
|
||||
KDTREE *MakeKDTree(inT16 KeySize, const PARAM_DESC KeyDesc[]) {
|
||||
KDTREE *KDTree = (KDTREE *) Emalloc(
|
||||
sizeof(KDTREE) + (KeySize - 1) * sizeof(PARAM_DESC));
|
||||
@ -205,8 +203,6 @@ KDTREE *MakeKDTree(inT16 KeySize, const PARAM_DESC KeyDesc[]) {
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
|
||||
/**
|
||||
* This routine stores Data in the K-D tree specified by Tree
|
||||
* using Key as an access key.
|
||||
@ -219,6 +215,7 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
|
||||
* @note History: 3/10/89, DSJ, Created.
|
||||
* 7/13/89, DSJ, Changed return to void.
|
||||
*/
|
||||
void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
|
||||
int Level;
|
||||
KDNODE *Node;
|
||||
KDNODE **PtrToNode;
|
||||
@ -245,7 +242,6 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
|
||||
} /* KDStore */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine deletes a node from Tree. The node to be
|
||||
* deleted is specified by the Key for the node and the Data
|
||||
@ -303,39 +299,36 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) {
|
||||
} /* KDDelete */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine searches the K-D tree specified by Tree and
|
||||
* finds the QuerySize nearest neighbors of Query. All neighbors
|
||||
* must be within MaxDistance of Query. The data contents of
|
||||
* the nearest neighbors
|
||||
* are placed in NBuffer and their distances from Query are
|
||||
* placed in DBuffer.
|
||||
* @param Tree ptr to K-D tree to be searched
|
||||
* @param Query ptr to query key (point in D-space)
|
||||
* @param QuerySize number of nearest neighbors to be found
|
||||
* @param MaxDistance all neighbors must be within this distance
|
||||
* @param NBuffer ptr to QuerySize buffer to hold nearest neighbors
|
||||
* @param DBuffer ptr to QuerySize buffer to hold distances
|
||||
* from nearest neighbor to query point
|
||||
* @param NumberOfResults [out] Number of nearest neighbors actually found
|
||||
* @note Exceptions: none
|
||||
* @note History:
|
||||
* - 3/10/89, DSJ, Created.
|
||||
* - 7/13/89, DSJ, Return contents of node instead of node itself.
|
||||
*/
|
||||
void KDNearestNeighborSearch(
|
||||
KDTREE *Tree, FLOAT32 Query[], int QuerySize, FLOAT32 MaxDistance,
|
||||
int *NumberOfResults, void **NBuffer, FLOAT32 DBuffer[]) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Tree ptr to K-D tree to be searched
|
||||
** Query ptr to query key (point in D-space)
|
||||
** QuerySize number of nearest neighbors to be found
|
||||
** MaxDistance all neighbors must be within this distance
|
||||
** NBuffer ptr to QuerySize buffer to hold nearest neighbors
|
||||
** DBuffer ptr to QuerySize buffer to hold distances
|
||||
** from nearest neighbor to query point
|
||||
** Operation:
|
||||
** This routine searches the K-D tree specified by Tree and
|
||||
** finds the QuerySize nearest neighbors of Query. All neighbors
|
||||
** must be within MaxDistance of Query. The data contents of
|
||||
** the nearest neighbors
|
||||
** are placed in NBuffer and their distances from Query are
|
||||
** placed in DBuffer.
|
||||
** Return: Number of nearest neighbors actually found
|
||||
** Exceptions: none
|
||||
** History:
|
||||
** 3/10/89, DSJ, Created.
|
||||
** 7/13/89, DSJ, Return contents of node instead of node itself.
|
||||
*/
|
||||
KDTreeSearch search(Tree, Query, QuerySize);
|
||||
search.Search(NumberOfResults, DBuffer, NBuffer);
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Walk a given Tree with action.
|
||||
/** Walk a given Tree with action. */
|
||||
void KDWalk(KDTREE *Tree, void_proc action, void *context) {
|
||||
if (Tree->Root.Left != NULL)
|
||||
Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
|
||||
@ -343,22 +336,19 @@ void KDWalk(KDTREE *Tree, void_proc action, void *context) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FreeKDTree(KDTREE *Tree) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Tree tree data structure to be released
|
||||
** Operation:
|
||||
** This routine frees all memory which is allocated to the
|
||||
** specified KD-tree. This includes the data structure for
|
||||
** the kd-tree itself plus the data structures for each node
|
||||
** in the tree. It does not include the Key and Data items
|
||||
** which are pointed to by the nodes. This memory is left
|
||||
** untouched.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History:
|
||||
** 5/26/89, DSJ, Created.
|
||||
/**
|
||||
* This routine frees all memory which is allocated to the
|
||||
* specified KD-tree. This includes the data structure for
|
||||
* the kd-tree itself plus the data structures for each node
|
||||
* in the tree. It does not include the Key and Data items
|
||||
* which are pointed to by the nodes. This memory is left
|
||||
* untouched.
|
||||
* @param Tree tree data structure to be released
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: 5/26/89, DSJ, Created.
|
||||
*/
|
||||
void FreeKDTree(KDTREE *Tree) {
|
||||
FreeSubTree(Tree->Root.Left);
|
||||
memfree(Tree);
|
||||
} /* FreeKDTree */
|
||||
@ -368,25 +358,20 @@ void FreeKDTree(KDTREE *Tree) {
|
||||
Private Code
|
||||
-----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
KDNODE *MakeKDNode(KDTREE *tree, FLOAT32 Key[], void *Data, int Index) {
|
||||
/*
|
||||
** Parameters:
|
||||
** tree The tree to create the node for
|
||||
** Key Access key for new node in KD tree
|
||||
** Data ptr to data to be stored in new node
|
||||
** Index index of Key to branch on
|
||||
** Operation:
|
||||
** This routine allocates memory for a new K-D tree node
|
||||
** and places the specified Key and Data into it. The
|
||||
** left and right subtree pointers for the node are
|
||||
** initialized to empty subtrees.
|
||||
** Return:
|
||||
** pointer to new K-D tree node
|
||||
** Exceptions:
|
||||
** None
|
||||
** History:
|
||||
** 3/11/89, DSJ, Created.
|
||||
/**
|
||||
* This routine allocates memory for a new K-D tree node
|
||||
* and places the specified Key and Data into it. The
|
||||
* left and right subtree pointers for the node are
|
||||
* initialized to empty subtrees.
|
||||
* @param tree The tree to create the node for
|
||||
* @param Key Access key for new node in KD tree
|
||||
* @param Data ptr to data to be stored in new node
|
||||
* @param Index index of Key to branch on
|
||||
* @return pointer to new K-D tree node
|
||||
* @note Exceptions: None
|
||||
* @note History: 3/11/89, DSJ, Created.
|
||||
*/
|
||||
KDNODE *MakeKDNode(KDTREE *tree, FLOAT32 Key[], void *Data, int Index) {
|
||||
KDNODE *NewNode;
|
||||
|
||||
NewNode = (KDNODE *) Emalloc (sizeof (KDNODE));
|
||||
@ -410,10 +395,11 @@ void FreeKDNode(KDNODE *Node) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Recursively accumulate the k_closest points to query_point_ into results_.
|
||||
// Parameters:
|
||||
// Level level in tree of sub-tree to be searched
|
||||
// SubTree sub-tree to be searched
|
||||
/**
|
||||
* Recursively accumulate the k_closest points to query_point_ into results_.
|
||||
* @param Level level in tree of sub-tree to be searched
|
||||
* @param SubTree sub-tree to be searched
|
||||
*/
|
||||
void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
|
||||
if (level >= tree_->KeySize)
|
||||
level = 0;
|
||||
@ -456,12 +442,13 @@ void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Returns the Euclidean distance squared between p1 and p2 for all essential
|
||||
// dimensions.
|
||||
// Parameters:
|
||||
// k keys are in k-space
|
||||
// dim dimension descriptions (essential, circular, etc)
|
||||
// p1,p2 two different points in K-D space
|
||||
/**
|
||||
*Returns the Euclidean distance squared between p1 and p2 for all essential
|
||||
* dimensions.
|
||||
* @param k keys are in k-space
|
||||
* @param dim dimension descriptions (essential, circular, etc)
|
||||
* @param p1,p2 two different points in K-D space
|
||||
*/
|
||||
FLOAT32 DistanceSquared(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) {
|
||||
FLOAT32 total_distance = 0;
|
||||
|
||||
@ -488,10 +475,10 @@ FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) {
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Return whether the query region (the smallest known circle about
|
||||
// query_point_ containing results->k_ points) intersects the box specified
|
||||
// between lower and upper. For circular dimensions, we also check the point
|
||||
// one wrap distance away from the query.
|
||||
/// Return whether the query region (the smallest known circle about
|
||||
/// query_point_ containing results->k_ points) intersects the box specified
|
||||
/// between lower and upper. For circular dimensions, we also check the point
|
||||
/// one wrap distance away from the query.
|
||||
bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) {
|
||||
FLOAT32 *query = query_point_;
|
||||
FLOAT64 total_distance = 0.0;
|
||||
@ -530,20 +517,21 @@ bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Walk a tree, calling action once on each node.
|
||||
//
|
||||
// Parameters:
|
||||
// tree root of the tree being walked.
|
||||
// action action to be performed at every node
|
||||
// context action's context
|
||||
// sub_tree ptr to root of subtree to be walked
|
||||
// level current level in the tree for this node
|
||||
// Operation:
|
||||
// This routine walks thru the specified sub_tree and invokes action
|
||||
// action at each node as follows:
|
||||
// action(context, data, level)
|
||||
// data the data contents of the node being visited,
|
||||
// level is the level of the node in the tree with the root being level 0.
|
||||
/**
|
||||
* Walk a tree, calling action once on each node.
|
||||
*
|
||||
* Operation:
|
||||
* This routine walks thru the specified sub_tree and invokes action
|
||||
* action at each node as follows:
|
||||
* action(context, data, level)
|
||||
* data the data contents of the node being visited,
|
||||
* level is the level of the node in the tree with the root being level 0.
|
||||
* @param tree root of the tree being walked.
|
||||
* @param action action to be performed at every node
|
||||
* @param context action's context
|
||||
* @param sub_tree ptr to root of subtree to be walked
|
||||
* @param level current level in the tree for this node
|
||||
*/
|
||||
void Walk(KDTREE *tree, void_proc action, void *context,
|
||||
KDNODE *sub_tree, inT32 level) {
|
||||
(*action)(context, sub_tree->Data, level);
|
||||
@ -554,7 +542,7 @@ void Walk(KDTREE *tree, void_proc action, void *context,
|
||||
}
|
||||
|
||||
|
||||
// Given a subtree nodes, insert all of its elements into tree.
|
||||
/** Given a subtree nodes, insert all of its elements into tree. */
|
||||
void InsertNodes(KDTREE *tree, KDNODE *nodes) {
|
||||
if (nodes == NULL)
|
||||
return;
|
||||
@ -564,11 +552,11 @@ void InsertNodes(KDTREE *tree, KDNODE *nodes) {
|
||||
InsertNodes(tree, nodes->Right);
|
||||
}
|
||||
|
||||
// Free all of the nodes of a sub tree.
|
||||
/** Free all of the nodes of a sub tree. */
|
||||
void FreeSubTree(KDNODE *sub_tree) {
|
||||
if (sub_tree != NULL) {
|
||||
FreeSubTree(sub_tree->Left);
|
||||
FreeSubTree(sub_tree->Right);
|
||||
memfree(sub_tree);
|
||||
}
|
||||
} /* FreeSubTree */
|
||||
}
|
||||
|
@ -15,9 +15,9 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "mf.h"
|
||||
|
||||
#include "featdefs.h"
|
||||
@ -28,24 +28,25 @@
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Global Data Definitions and Declarations
|
||||
----------------------------------------------------------------------------**/
|
||||
/**----------------------------------------------------------------------------
|
||||
----------------------------------------------------------------------------*/
|
||||
/*----------------------------------------------------------------------------
|
||||
Private Code
|
||||
----------------------------------------------------------------------------**/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Blob blob to extract micro-features from
|
||||
** denorm control parameter to feature extractor.
|
||||
** Globals: none
|
||||
** Operation: Call the old micro-feature extractor and then copy
|
||||
** the features into the new format. Then deallocate the
|
||||
** old micro-features.
|
||||
** Return: Micro-features for Blob.
|
||||
** Exceptions: none
|
||||
** History: Wed May 23 18:06:38 1990, DSJ, Created.
|
||||
----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* Call the old micro-feature extractor and then copy
|
||||
* the features into the new format. Then deallocate the
|
||||
* old micro-features.
|
||||
* @param Blob blob to extract micro-features from
|
||||
* @param bl_denorm currently unused
|
||||
* @param cn_denorm control parameter to feature extractor.
|
||||
* @param fx_info currently unused
|
||||
* @return Micro-features for Blob.
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed May 23 18:06:38 1990, DSJ, Created.
|
||||
*/
|
||||
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
int NumFeatures;
|
||||
MICROFEATURES Features, OldFeatures;
|
||||
FEATURE_SET FeatureSet;
|
||||
|
@ -15,44 +15,36 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
#include "mfdefs.h"
|
||||
#include "emalloc.h"
|
||||
#include <math.h>
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
MICROFEATURE NewMicroFeature() {
|
||||
/*
|
||||
** Parameters: none
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine allocates and returns a new micro-feature
|
||||
** data structure.
|
||||
** Return: New micro-feature.
|
||||
** Exceptions: none
|
||||
** History: 7/27/89, DSJ, Created.
|
||||
/**
|
||||
* This routine allocates and returns a new micro-feature
|
||||
* data structure.
|
||||
* @return New MICROFEATURE
|
||||
* @note History: 7/27/89, DSJ, Created.
|
||||
*/
|
||||
MICROFEATURE NewMicroFeature() {
|
||||
return ((MICROFEATURE) Emalloc (sizeof (MFBLOCK)));
|
||||
} /* NewMicroFeature */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FreeMicroFeatures(MICROFEATURES MicroFeatures) {
|
||||
/*
|
||||
** Parameters:
|
||||
** MicroFeatures list of micro-features to be freed
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine deallocates all of the memory consumed by
|
||||
** a list of micro-features.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: 7/27/89, DSJ, Created.
|
||||
/**
|
||||
* This routine deallocates all of the memory consumed by
|
||||
* a list of micro-features.
|
||||
* @param MicroFeatures list of micro-features to be freed
|
||||
* @return none
|
||||
* @note History: 7/27/89, DSJ, Created.
|
||||
*/
|
||||
void FreeMicroFeatures(MICROFEATURES MicroFeatures) {
|
||||
destroy_nodes(MicroFeatures, Efree);
|
||||
} /* FreeMicroFeatures */
|
||||
|
@ -35,7 +35,7 @@
|
||||
----------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Convert a blob into a list of MFOUTLINEs (float-based microfeature format).
|
||||
/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). */
|
||||
LIST ConvertBlob(TBLOB *blob) {
|
||||
LIST outlines = NIL_LIST;
|
||||
return (blob == NULL)
|
||||
@ -45,7 +45,7 @@ LIST ConvertBlob(TBLOB *blob) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Convert a TESSLINE into the float-based MFOUTLINE micro-feature format.
|
||||
/** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */
|
||||
MFOUTLINE ConvertOutline(TESSLINE *outline) {
|
||||
MFEDGEPT *NewPoint;
|
||||
MFOUTLINE MFOutline = NIL_LIST;
|
||||
@ -81,12 +81,13 @@ MFOUTLINE ConvertOutline(TESSLINE *outline) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs).
|
||||
//
|
||||
// Parameters:
|
||||
// outline first outline to be converted
|
||||
// mf_outlines list to add converted outlines to
|
||||
// outline_type are the outlines outer or holes?
|
||||
/**
|
||||
* Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs).
|
||||
*
|
||||
* @param outline first outline to be converted
|
||||
* @param mf_outlines list to add converted outlines to
|
||||
* @param outline_type are the outlines outer or holes?
|
||||
*/
|
||||
LIST ConvertOutlines(TESSLINE *outline,
|
||||
LIST mf_outlines,
|
||||
OUTLINETYPE outline_type) {
|
||||
@ -102,26 +103,23 @@ LIST ConvertOutlines(TESSLINE *outline,
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine searches thru the specified outline, computes
|
||||
* a slope for each vector in the outline, and marks each
|
||||
* vector as having one of the following directions:
|
||||
* N, S, E, W, NE, NW, SE, SW
|
||||
* This information is then stored in the outline and the
|
||||
* outline is returned.
|
||||
* @param Outline micro-feature outline to analyze
|
||||
* @param MinSlope controls "snapping" of segments to horizontal
|
||||
* @param MaxSlope controls "snapping" of segments to vertical
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: 7/21/89, DSJ, Created.
|
||||
*/
|
||||
void FindDirectionChanges(MFOUTLINE Outline,
|
||||
FLOAT32 MinSlope,
|
||||
FLOAT32 MaxSlope) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Outline micro-feature outline to analyze
|
||||
** MinSlope controls "snapping" of segments to horizontal
|
||||
** MaxSlope controls "snapping" of segments to vertical
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine searches thru the specified outline, computes
|
||||
** a slope for each vector in the outline, and marks each
|
||||
** vector as having one of the following directions:
|
||||
** N, S, E, W, NE, NW, SE, SW
|
||||
** This information is then stored in the outline and the
|
||||
** outline is returned.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: 7/21/89, DSJ, Created.
|
||||
*/
|
||||
MFEDGEPT *Current;
|
||||
MFEDGEPT *Last;
|
||||
MFOUTLINE EdgePoint;
|
||||
@ -145,18 +143,15 @@ void FindDirectionChanges(MFOUTLINE Outline,
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FreeMFOutline(void *arg) { //MFOUTLINE Outline)
|
||||
/*
|
||||
** Parameters:
|
||||
** Outline micro-feature outline to be freed
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine deallocates all of the memory consumed by
|
||||
** a micro-feature outline.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: 7/27/89, DSJ, Created.
|
||||
/**
|
||||
* This routine deallocates all of the memory consumed by
|
||||
* a micro-feature outline.
|
||||
* @param arg micro-feature outline to be freed
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: 7/27/89, DSJ, Created.
|
||||
*/
|
||||
void FreeMFOutline(void *arg) { //MFOUTLINE Outline)
|
||||
MFOUTLINE Start;
|
||||
MFOUTLINE Outline = (MFOUTLINE) arg;
|
||||
|
||||
@ -172,39 +167,35 @@ void FreeMFOutline(void *arg) { //MFOUTLINE Outline
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FreeOutlines(LIST Outlines) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Outlines list of mf-outlines to be freed
|
||||
** Globals: none
|
||||
** Operation: Release all memory consumed by the specified list
|
||||
** of outlines.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Thu Dec 13 16:14:50 1990, DSJ, Created.
|
||||
/**
|
||||
* Release all memory consumed by the specified list
|
||||
* of outlines.
|
||||
* @param Outlines list of mf-outlines to be freed
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Dec 13 16:14:50 1990, DSJ, Created.
|
||||
*/
|
||||
void FreeOutlines(LIST Outlines) {
|
||||
destroy_nodes(Outlines, FreeMFOutline);
|
||||
} /* FreeOutlines */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void MarkDirectionChanges(MFOUTLINE Outline) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Outline micro-feature outline to analyze
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine searches thru the specified outline and finds
|
||||
** the points at which the outline changes direction. These
|
||||
** points are then marked as "extremities". This routine is
|
||||
** used as an alternative to FindExtremities(). It forces the
|
||||
** endpoints of the microfeatures to be at the direction
|
||||
** changes rather than at the midpoint between direction
|
||||
** changes.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: 6/29/90, DSJ, Created.
|
||||
/**
|
||||
* This routine searches thru the specified outline and finds
|
||||
* the points at which the outline changes direction. These
|
||||
* points are then marked as "extremities". This routine is
|
||||
* used as an alternative to FindExtremities(). It forces the
|
||||
* endpoints of the microfeatures to be at the direction
|
||||
* changes rather than at the midpoint between direction
|
||||
* changes.
|
||||
* @param Outline micro-feature outline to analyze
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: 6/29/90, DSJ, Created.
|
||||
*/
|
||||
void MarkDirectionChanges(MFOUTLINE Outline) {
|
||||
MFOUTLINE Current;
|
||||
MFOUTLINE Last;
|
||||
MFOUTLINE First;
|
||||
@ -225,28 +216,26 @@ void MarkDirectionChanges(MFOUTLINE Outline) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Return a new edge point for a micro-feature outline.
|
||||
/** Return a new edge point for a micro-feature outline. */
|
||||
MFEDGEPT *NewEdgePoint() {
|
||||
return ((MFEDGEPT *) alloc_struct(sizeof(MFEDGEPT), "MFEDGEPT"));
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) {
|
||||
/*
|
||||
** Parameters:
|
||||
** EdgePoint start search from this point
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine returns the next point in the micro-feature
|
||||
** outline that is an extremity. The search starts after
|
||||
** EdgePoint. The routine assumes that the outline being
|
||||
** searched is not a degenerate outline (i.e. it must have
|
||||
** 2 or more edge points).
|
||||
** Return: Next extremity in the outline after EdgePoint.
|
||||
** Exceptions: none
|
||||
** History: 7/26/89, DSJ, Created.
|
||||
/**
|
||||
* This routine returns the next point in the micro-feature
|
||||
* outline that is an extremity. The search starts after
|
||||
* EdgePoint. The routine assumes that the outline being
|
||||
* searched is not a degenerate outline (i.e. it must have
|
||||
* 2 or more edge points).
|
||||
* @param EdgePoint start search from this point
|
||||
* @return Next extremity in the outline after EdgePoint.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: 7/26/89, DSJ, Created.
|
||||
*/
|
||||
MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) {
|
||||
EdgePoint = NextPointAfter(EdgePoint);
|
||||
while (!PointAt(EdgePoint)->ExtremityMark)
|
||||
EdgePoint = NextPointAfter(EdgePoint);
|
||||
@ -257,25 +246,23 @@ MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine normalizes the coordinates of the specified
|
||||
* outline so that the outline is deskewed down to the
|
||||
* baseline, translated so that x=0 is at XOrigin, and scaled
|
||||
* so that the height of a character cell from descender to
|
||||
* ascender is 1. Of this height, 0.25 is for the descender,
|
||||
* 0.25 for the ascender, and 0.5 for the x-height. The
|
||||
* y coordinate of the baseline is 0.
|
||||
* @param Outline outline to be normalized
|
||||
* @param XOrigin x-origin of text
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: 8/2/89, DSJ, Created.
|
||||
*/
|
||||
void NormalizeOutline(MFOUTLINE Outline,
|
||||
FLOAT32 XOrigin) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Outline outline to be normalized
|
||||
** XOrigin x-origin of text
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine normalizes the coordinates of the specified
|
||||
** outline so that the outline is deskewed down to the
|
||||
** baseline, translated so that x=0 is at XOrigin, and scaled
|
||||
** so that the height of a character cell from descender to
|
||||
** ascender is 1. Of this height, 0.25 is for the descender,
|
||||
** 0.25 for the ascender, and 0.5 for the x-height. The
|
||||
** y coordinate of the baseline is 0.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: 8/2/89, DSJ, Created.
|
||||
*/
|
||||
if (Outline == NIL_LIST)
|
||||
return;
|
||||
|
||||
@ -292,27 +279,27 @@ void NormalizeOutline(MFOUTLINE Outline,
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
namespace tesseract {
|
||||
/**
|
||||
* This routine normalizes every outline in Outlines
|
||||
* according to the currently selected normalization method.
|
||||
* It also returns the scale factors that it used to do this
|
||||
* scaling. The scale factors returned represent the x and
|
||||
* y sizes in the normalized coordinate system that correspond
|
||||
* to 1 pixel in the original coordinate system.
|
||||
*
|
||||
* Globals:
|
||||
* - classify_norm_method method being used for normalization
|
||||
* - classify_char_norm_range map radius of gyration to this value
|
||||
* @param Outlines list of outlines to be normalized
|
||||
* @param XScale x-direction scale factor used by routine
|
||||
* @param YScale y-direction scale factor used by routine
|
||||
* @return none (Outlines are changed and XScale and YScale are updated)
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Dec 14 08:14:55 1990, DSJ, Created.
|
||||
*/
|
||||
void Classify::NormalizeOutlines(LIST Outlines,
|
||||
FLOAT32 *XScale,
|
||||
FLOAT32 *YScale) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Outlines list of outlines to be normalized
|
||||
** XScale x-direction scale factor used by routine
|
||||
** YScale y-direction scale factor used by routine
|
||||
** Globals:
|
||||
** classify_norm_method method being used for normalization
|
||||
** classify_char_norm_range map radius of gyration to this value
|
||||
** Operation: This routine normalizes every outline in Outlines
|
||||
** according to the currently selected normalization method.
|
||||
** It also returns the scale factors that it used to do this
|
||||
** scaling. The scale factors returned represent the x and
|
||||
** y sizes in the normalized coordinate system that correspond
|
||||
** to 1 pixel in the original coordinate system.
|
||||
** Return: none (Outlines are changed and XScale and YScale are updated)
|
||||
** Exceptions: none
|
||||
** History: Fri Dec 14 08:14:55 1990, DSJ, Created.
|
||||
*/
|
||||
MFOUTLINE Outline;
|
||||
|
||||
switch (classify_norm_method) {
|
||||
@ -331,25 +318,23 @@ void Classify::NormalizeOutlines(LIST Outlines,
|
||||
} /* NormalizeOutlines */
|
||||
} // namespace tesseract
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Private Code
|
||||
----------------------------------------------------------------------------**/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Start, End defines segment of outline to be modified
|
||||
** Direction new direction to assign to segment
|
||||
** Globals: none
|
||||
** Operation: Change the direction of every vector in the specified
|
||||
** outline segment to Direction. The segment to be changed
|
||||
** starts at Start and ends at End. Note that the previous
|
||||
** direction of End must also be changed to reflect the
|
||||
** change in direction of the point before it.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri May 4 10:42:04 1990, DSJ, Created.
|
||||
----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* Change the direction of every vector in the specified
|
||||
* outline segment to Direction. The segment to be changed
|
||||
* starts at Start and ends at End. Note that the previous
|
||||
* direction of End must also be changed to reflect the
|
||||
* change in direction of the point before it.
|
||||
* @param Start, End defines segment of outline to be modified
|
||||
* @param Direction new direction to assign to segment
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri May 4 10:42:04 1990, DSJ, Created.
|
||||
*/
|
||||
void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) {
|
||||
MFOUTLINE Current;
|
||||
|
||||
for (Current = Start; Current != End; Current = NextPointAfter (Current))
|
||||
@ -360,21 +345,18 @@ void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) {
|
||||
} /* ChangeDirection */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Outline outline to be character normalized
|
||||
** XCenter, YCenter center point for normalization
|
||||
** XScale, YScale scale factors for normalization
|
||||
** Globals: none
|
||||
** Operation: This routine normalizes each point in Outline by
|
||||
** translating it to the specified center and scaling it
|
||||
** anisotropically according to the given scale factors.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Dec 14 10:27:11 1990, DSJ, Created.
|
||||
/**
|
||||
* This routine normalizes each point in Outline by
|
||||
* translating it to the specified center and scaling it
|
||||
* anisotropically according to the given scale factors.
|
||||
* @param Outline outline to be character normalized
|
||||
* @param cn_denorm
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Dec 14 10:27:11 1990, DSJ, Created.
|
||||
*/
|
||||
void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) {
|
||||
MFOUTLINE First, Current;
|
||||
MFEDGEPT *CurrentPoint;
|
||||
|
||||
@ -397,32 +379,29 @@ void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) {
|
||||
} /* CharNormalizeOutline */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine computes the slope from Start to Finish and
|
||||
* and then computes the approximate direction of the line
|
||||
* segment from Start to Finish. The direction is quantized
|
||||
* into 8 buckets:
|
||||
* N, S, E, W, NE, NW, SE, SW
|
||||
* Both the slope and the direction are then stored into
|
||||
* the appropriate fields of the Start edge point. The
|
||||
* direction is also stored into the PreviousDirection field
|
||||
* of the Finish edge point.
|
||||
* @param Start starting point to compute direction from
|
||||
* @param Finish finishing point to compute direction to
|
||||
* @param MinSlope slope below which lines are horizontal
|
||||
* @param MaxSlope slope above which lines are vertical
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: 7/25/89, DSJ, Created.
|
||||
*/
|
||||
void ComputeDirection(MFEDGEPT *Start,
|
||||
MFEDGEPT *Finish,
|
||||
FLOAT32 MinSlope,
|
||||
FLOAT32 MaxSlope) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Start starting point to compute direction from
|
||||
** Finish finishing point to compute direction to
|
||||
** MinSlope slope below which lines are horizontal
|
||||
** MaxSlope slope above which lines are vertical
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine computes the slope from Start to Finish and
|
||||
** and then computes the approximate direction of the line
|
||||
** segment from Start to Finish. The direction is quantized
|
||||
** into 8 buckets:
|
||||
** N, S, E, W, NE, NW, SE, SW
|
||||
** Both the slope and the direction are then stored into
|
||||
** the appropriate fields of the Start edge point. The
|
||||
** direction is also stored into the PreviousDirection field
|
||||
** of the Finish edge point.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: 7/25/89, DSJ, Created.
|
||||
*/
|
||||
FVECTOR Delta;
|
||||
|
||||
Delta.x = Finish->Point.x - Start->Point.x;
|
||||
@ -471,23 +450,20 @@ void ComputeDirection(MFEDGEPT *Start,
|
||||
Start->Direction = west;
|
||||
}
|
||||
Finish->PreviousDirection = Start->Direction;
|
||||
} /* ComputeDirection */
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
|
||||
/*
|
||||
** Parameters:
|
||||
** EdgePoint start search from this point
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine returns the next point in the micro-feature
|
||||
** outline that has a direction different than EdgePoint. The
|
||||
** routine assumes that the outline being searched is not a
|
||||
** degenerate outline (i.e. it must have 2 or more edge points).
|
||||
** Return: Point of next direction change in micro-feature outline.
|
||||
** Exceptions: none
|
||||
** History: 7/25/89, DSJ, Created.
|
||||
/**
|
||||
* This routine returns the next point in the micro-feature
|
||||
* outline that has a direction different than EdgePoint. The
|
||||
* routine assumes that the outline being searched is not a
|
||||
* degenerate outline (i.e. it must have 2 or more edge points).
|
||||
* @param EdgePoint start search from this point
|
||||
* @return Point of next direction change in micro-feature outline.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: 7/25/89, DSJ, Created.
|
||||
*/
|
||||
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
|
||||
DIRECTION InitialDirection;
|
||||
|
||||
InitialDirection = PointAt (EdgePoint)->Direction;
|
||||
@ -501,4 +477,4 @@ MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
|
||||
next_pt != NULL && !PointAt(next_pt)->Hidden);
|
||||
|
||||
return (EdgePoint);
|
||||
} /* NextDirectionChange */
|
||||
}
|
||||
|
133
classify/mfx.cpp
133
classify/mfx.cpp
@ -15,9 +15,9 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "mfdefs.h"
|
||||
#include "mfoutline.h"
|
||||
#include "clusttool.h" //NEEDED
|
||||
@ -28,9 +28,9 @@
|
||||
|
||||
#include <math.h>
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Variables
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
|
||||
/* old numbers corresponded to 10.0 degrees and 80.0 degrees */
|
||||
double_VAR(classify_min_slope, 0.414213562,
|
||||
@ -38,9 +38,9 @@ double_VAR(classify_min_slope, 0.414213562,
|
||||
double_VAR(classify_max_slope, 2.414213562,
|
||||
"Slope above which lines are called vertical");
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Macros
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
/* miscellaneous macros */
|
||||
#define NormalizeAngle(A) ( (((A)<0)?((A)+2*PI):(A)) / (2*PI) )
|
||||
|
||||
@ -54,25 +54,22 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline,
|
||||
|
||||
MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End);
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Blob blob to extract micro-features from
|
||||
** denorm control parameter to feature extractor
|
||||
** Operation:
|
||||
** This routine extracts micro-features from the specified
|
||||
** blob and returns a list of the micro-features. All
|
||||
** micro-features are normalized according to the specified
|
||||
** line statistics.
|
||||
** Return: List of micro-features extracted from the blob.
|
||||
** Exceptions: none
|
||||
** History: 7/21/89, DSJ, Created.
|
||||
/**
|
||||
* This routine extracts micro-features from the specified
|
||||
* blob and returns a list of the micro-features. All
|
||||
* micro-features are normalized according to the specified
|
||||
* line statistics.
|
||||
* @param Blob blob to extract micro-features from
|
||||
* @param cn_denorm control parameter to feature extractor
|
||||
* @return List of micro-features extracted from the blob.
|
||||
* @note Exceptions: none
|
||||
* @note History: 7/21/89, DSJ, Created.
|
||||
*/
|
||||
MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) {
|
||||
MICROFEATURES MicroFeatures = NIL_LIST;
|
||||
LIST Outlines;
|
||||
LIST RemainingOutlines;
|
||||
@ -104,26 +101,23 @@ MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) {
|
||||
Private Code
|
||||
---------------------------------------------------------------------------*/
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Start starting edge point of micro-feature
|
||||
** End ending edge point of micro-feature
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine computes the orientation parameter of the
|
||||
** specified micro-feature. The orientation is the angle of
|
||||
** the vector from Start to End. It is normalized to a number
|
||||
** between 0 and 1 where 0 corresponds to 0 degrees and 1
|
||||
** corresponds to 360 degrees. The actual range is [0,1), i.e.
|
||||
** 1 is excluded from the range (since it is actual the
|
||||
** same orientation as 0). This routine assumes that Start
|
||||
** and End are not the same point.
|
||||
** Return: Orientation parameter for the specified micro-feature.
|
||||
** Exceptions: none
|
||||
** History: 7/27/89, DSJ, Created.
|
||||
/**
|
||||
* This routine computes the orientation parameter of the
|
||||
* specified micro-feature. The orientation is the angle of
|
||||
* the vector from Start to End. It is normalized to a number
|
||||
* between 0 and 1 where 0 corresponds to 0 degrees and 1
|
||||
* corresponds to 360 degrees. The actual range is [0,1), i.e.
|
||||
* 1 is excluded from the range (since it is actual the
|
||||
* same orientation as 0). This routine assumes that Start
|
||||
* and End are not the same point.
|
||||
* @param Start starting edge point of micro-feature
|
||||
* @param End ending edge point of micro-feature
|
||||
* @note Globals: none
|
||||
* @return Orientation parameter for the specified micro-feature.
|
||||
* @note Exceptions: none
|
||||
* @note History: 7/27/89, DSJ, Created.
|
||||
*/
|
||||
FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) {
|
||||
FLOAT32 Orientation;
|
||||
|
||||
Orientation = NormalizeAngle (AngleFrom (Start->Point, End->Point));
|
||||
@ -135,20 +129,17 @@ FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) {
|
||||
} /* ComputeOrientation */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* Convert Outline to MicroFeatures
|
||||
* @param Outline outline to extract micro-features from
|
||||
* @param MicroFeatures list of micro-features to add to
|
||||
* @return List of micro-features with new features added to front.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: 7/26/89, DSJ, Created.
|
||||
*/
|
||||
MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline,
|
||||
MICROFEATURES MicroFeatures) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Outline outline to extract micro-features from
|
||||
** MicroFeatures list of micro-features to add to
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine
|
||||
** Return: List of micro-features with new features added to front.
|
||||
** Exceptions: none
|
||||
** History: 7/26/89, DSJ, Created.
|
||||
*/
|
||||
MFOUTLINE Current;
|
||||
MFOUTLINE Last;
|
||||
MFOUTLINE First;
|
||||
@ -174,26 +165,24 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline,
|
||||
} /* ConvertToMicroFeatures */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Start starting point of micro-feature
|
||||
** End ending point of micro-feature
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine computes the feature parameters which describe
|
||||
** the micro-feature that starts and Start and ends at End.
|
||||
** A new micro-feature is allocated, filled with the feature
|
||||
** parameters, and returned. The routine assumes that
|
||||
** Start and End are not the same point. If they are the
|
||||
** same point, NULL is returned, a warning message is
|
||||
** printed, and the current outline is dumped to stdout.
|
||||
** Return: New micro-feature or NULL if the feature was rejected.
|
||||
** Exceptions: none
|
||||
** History: 7/26/89, DSJ, Created.
|
||||
** 11/17/89, DSJ, Added handling for Start and End same point.
|
||||
/**
|
||||
* This routine computes the feature parameters which describe
|
||||
* the micro-feature that starts and Start and ends at End.
|
||||
* A new micro-feature is allocated, filled with the feature
|
||||
* parameters, and returned. The routine assumes that
|
||||
* Start and End are not the same point. If they are the
|
||||
* same point, NULL is returned, a warning message is
|
||||
* printed, and the current outline is dumped to stdout.
|
||||
* @param Start starting point of micro-feature
|
||||
* @param End ending point of micro-feature
|
||||
* @return New micro-feature or NULL if the feature was rejected.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History:
|
||||
* - 7/26/89, DSJ, Created.
|
||||
* - 11/17/89, DSJ, Added handling for Start and End same point.
|
||||
*/
|
||||
MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) {
|
||||
MICROFEATURE NewFeature;
|
||||
MFEDGEPT *P1, *P2;
|
||||
|
||||
|
@ -18,12 +18,12 @@
|
||||
#ifndef MFX_H
|
||||
#define MFX_H
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
#include "mfdefs.h"
|
||||
#include "params.h"
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Variables
|
||||
----------------------------------------------------------------------------**/
|
||||
|
||||
@ -33,7 +33,7 @@ extern double_VAR_H(classify_min_slope, 0.414213562,
|
||||
extern double_VAR_H(classify_max_slope, 2.414213562,
|
||||
"Slope above which lines are called vertical");
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Function Prototypes
|
||||
----------------------------------------------------------------------------**/
|
||||
MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm);
|
||||
|
@ -15,50 +15,50 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "normfeat.h"
|
||||
|
||||
#include "intfx.h"
|
||||
#include "featdefs.h"
|
||||
#include "mfoutline.h"
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
|
||||
// Return the length of the outline in baseline normalized form.
|
||||
/** Return the length of the outline in baseline normalized form. */
|
||||
FLOAT32 ActualOutlineLength(FEATURE Feature) {
|
||||
return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION);
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Return the character normalization feature for a blob.
|
||||
//
|
||||
// The features returned are in a scale where the x-height has been
|
||||
// normalized to live in the region y = [-0.25 .. 0.25]. Example ranges
|
||||
// for English below are based on the Linux font collection on 2009-12-04:
|
||||
//
|
||||
// Params[CharNormY]
|
||||
// The y coordinate of the grapheme's centroid.
|
||||
// English: [-0.27, 0.71]
|
||||
//
|
||||
// Params[CharNormLength]
|
||||
// The length of the grapheme's outline (tiny segments discarded),
|
||||
// divided by 10.0=LENGTH_COMPRESSION.
|
||||
// English: [0.16, 0.85]
|
||||
//
|
||||
// Params[CharNormRx]
|
||||
// The radius of gyration about the x axis, as measured from CharNormY.
|
||||
// English: [0.011, 0.34]
|
||||
//
|
||||
// Params[CharNormRy]
|
||||
// The radius of gyration about the y axis, as measured from
|
||||
// the x center of the grapheme's bounding box.
|
||||
// English: [0.011, 0.31]
|
||||
//
|
||||
/**
|
||||
* Return the character normalization feature for a blob.
|
||||
*
|
||||
* The features returned are in a scale where the x-height has been
|
||||
* normalized to live in the region y = [-0.25 .. 0.25]. Example ranges
|
||||
* for English below are based on the Linux font collection on 2009-12-04:
|
||||
*
|
||||
* - Params[CharNormY]
|
||||
* - The y coordinate of the grapheme's centroid.
|
||||
* - English: [-0.27, 0.71]
|
||||
*
|
||||
* - Params[CharNormLength]
|
||||
* - The length of the grapheme's outline (tiny segments discarded),
|
||||
* divided by 10.0=LENGTH_COMPRESSION.
|
||||
* - English: [0.16, 0.85]
|
||||
*
|
||||
* - Params[CharNormRx]
|
||||
* - The radius of gyration about the x axis, as measured from CharNormY.
|
||||
* - English: [0.011, 0.34]
|
||||
*
|
||||
* - Params[CharNormRy]
|
||||
* - The radius of gyration about the y axis, as measured from
|
||||
* the x center of the grapheme's bounding box.
|
||||
* - English: [0.011, 0.31]
|
||||
*/
|
||||
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
FEATURE_SET feature_set = NewFeatureSet(1);
|
||||
FEATURE feature = NewFeature(&CharNormDesc);
|
||||
|
@ -15,9 +15,9 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "normmatch.h"
|
||||
|
||||
#include <stdio.h>
|
||||
@ -43,9 +43,9 @@ struct NORM_PROTOS
|
||||
int NumProtos;
|
||||
};
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Private Function Prototypes
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
double NormEvidenceOf(register double NormAdj);
|
||||
|
||||
void PrintNormMatch(FILE *File,
|
||||
@ -55,38 +55,39 @@ void PrintNormMatch(FILE *File,
|
||||
|
||||
NORM_PROTOS *ReadNormProtos(FILE *File);
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Variables
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
|
||||
/* control knobs used to control the normalization adjustment process */
|
||||
/** control knobs used to control the normalization adjustment process */
|
||||
double_VAR(classify_norm_adj_midpoint, 32.0, "Norm adjust midpoint ...");
|
||||
double_VAR(classify_norm_adj_curl, 2.0, "Norm adjust curl ...");
|
||||
// Weight of width variance against height and vertical position.
|
||||
/** Weight of width variance against height and vertical position. */
|
||||
const double kWidthErrorWeighting = 0.125;
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
namespace tesseract {
|
||||
/**
|
||||
* This routine compares Features against each character
|
||||
* normalization proto for ClassId and returns the match
|
||||
* rating of the best match.
|
||||
* @param ClassId id of class to match against
|
||||
* @param feature character normalization feature
|
||||
* @param DebugMatch controls dump of debug info
|
||||
*
|
||||
* Globals:
|
||||
* #NormProtos character normalization prototypes
|
||||
*
|
||||
* @return Best match rating for Feature against protos of ClassId.
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Dec 19 16:56:12 1990, DSJ, Created.
|
||||
*/
|
||||
FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId,
|
||||
const FEATURE_STRUCT& feature,
|
||||
BOOL8 DebugMatch) {
|
||||
/*
|
||||
** Parameters:
|
||||
** ClassId id of class to match against
|
||||
** Feature character normalization feature
|
||||
** DebugMatch controls dump of debug info
|
||||
** Globals:
|
||||
** NormProtos character normalization prototypes
|
||||
** Operation: This routine compares Features against each character
|
||||
** normalization proto for ClassId and returns the match
|
||||
** rating of the best match.
|
||||
** Return: Best match rating for Feature against protos of ClassId.
|
||||
** Exceptions: none
|
||||
** History: Wed Dec 19 16:56:12 1990, DSJ, Created.
|
||||
*/
|
||||
LIST Protos;
|
||||
FLOAT32 BestMatch;
|
||||
FLOAT32 Match;
|
||||
@ -170,16 +171,16 @@ void Classify::FreeNormProtos() {
|
||||
}
|
||||
} // namespace tesseract
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Private Code
|
||||
----------------------------------------------------------------------------**/
|
||||
/**********************************************************************
|
||||
* NormEvidenceOf
|
||||
----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* @name NormEvidenceOf
|
||||
*
|
||||
* Return the new type of evidence number corresponding to this
|
||||
* normalization adjustment. The equation that represents the transform is:
|
||||
* 1 / (1 + (NormAdj / midpoint) ^ curl)
|
||||
**********************************************************************/
|
||||
*/
|
||||
double NormEvidenceOf(register double NormAdj) {
|
||||
NormAdj /= classify_norm_adj_midpoint;
|
||||
|
||||
@ -194,22 +195,21 @@ double NormEvidenceOf(register double NormAdj) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine dumps out detailed normalization match info.
|
||||
* @param File open text file to dump match debug info to
|
||||
* @param NumParams # of parameters in proto and feature
|
||||
* @param Proto[] array of prototype parameters
|
||||
* @param Feature[] array of feature parameters
|
||||
* Globals: none
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Jan 2 09:49:35 1991, DSJ, Created.
|
||||
*/
|
||||
void PrintNormMatch(FILE *File,
|
||||
int NumParams,
|
||||
PROTOTYPE *Proto,
|
||||
FEATURE Feature) {
|
||||
/*
|
||||
** Parameters:
|
||||
** File open text file to dump match debug info to
|
||||
** NumParams # of parameters in proto and feature
|
||||
** Proto[] array of prototype parameters
|
||||
** Feature[] array of feature parameters
|
||||
** Globals: none
|
||||
** Operation: This routine dumps out detailed normalization match info.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Wed Jan 2 09:49:35 1991, DSJ, Created.
|
||||
*/
|
||||
int i;
|
||||
FLOAT32 ParamMatch;
|
||||
FLOAT32 TotalMatch;
|
||||
@ -231,18 +231,18 @@ void PrintNormMatch(FILE *File,
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
namespace tesseract {
|
||||
NORM_PROTOS *Classify::ReadNormProtos(FILE *File, inT64 end_offset) {
|
||||
/*
|
||||
** Parameters:
|
||||
** File open text file to read normalization protos from
|
||||
** Globals: none
|
||||
** Operation: This routine allocates a new data structure to hold
|
||||
** a set of character normalization protos. It then fills in
|
||||
** the data structure by reading from the specified File.
|
||||
** Return: Character normalization protos.
|
||||
** Exceptions: none
|
||||
** History: Wed Dec 19 16:38:49 1990, DSJ, Created.
|
||||
/**
|
||||
* This routine allocates a new data structure to hold
|
||||
* a set of character normalization protos. It then fills in
|
||||
* the data structure by reading from the specified File.
|
||||
* @param File open text file to read normalization protos from
|
||||
* @param end_offset
|
||||
* Globals: none
|
||||
* @return Character normalization protos.
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Dec 19 16:38:49 1990, DSJ, Created.
|
||||
*/
|
||||
NORM_PROTOS *Classify::ReadNormProtos(FILE *File, inT64 end_offset) {
|
||||
NORM_PROTOS *NormProtos;
|
||||
int i;
|
||||
char unichar[2 * UNICHAR_LEN + 1];
|
||||
|
@ -15,9 +15,9 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "ocrfeatures.h"
|
||||
#include "emalloc.h"
|
||||
#include "callcpp.h"
|
||||
@ -28,24 +28,20 @@
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) {
|
||||
/*
|
||||
** Parameters:
|
||||
** FeatureSet set of features to add Feature to
|
||||
** Feature feature to be added to FeatureSet
|
||||
** Globals: none
|
||||
** Operation: Add a feature to a feature set. If the feature set is
|
||||
** already full, FALSE is returned to indicate that the
|
||||
** feature could not be added to the set; otherwise, TRUE is
|
||||
** returned.
|
||||
** Return: TRUE if feature added to set, FALSE if set is already full.
|
||||
** Exceptions: none
|
||||
** History: Tue May 22 17:22:23 1990, DSJ, Created.
|
||||
----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* Add a feature to a feature set. If the feature set is
|
||||
* already full, FALSE is returned to indicate that the
|
||||
* feature could not be added to the set; otherwise, TRUE is
|
||||
* returned.
|
||||
* @param FeatureSet set of features to add Feature to
|
||||
* @param Feature feature to be added to FeatureSet
|
||||
* @return TRUE if feature added to set, FALSE if set is already full.
|
||||
* @note History: Tue May 22 17:22:23 1990, DSJ, Created.
|
||||
*/
|
||||
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) {
|
||||
if (FeatureSet->NumFeatures >= FeatureSet->MaxNumFeatures) {
|
||||
FreeFeature(Feature);
|
||||
return FALSE;
|
||||
@ -55,17 +51,13 @@ BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) {
|
||||
return TRUE;
|
||||
} /* AddFeature */
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FreeFeature(FEATURE Feature) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Feature feature to be deallocated.
|
||||
** Globals: none
|
||||
** Operation: Release the memory consumed by the specified feature.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Mon May 21 13:33:27 1990, DSJ, Created.
|
||||
/**
|
||||
* Release the memory consumed by the specified feature.
|
||||
* @param Feature feature to be deallocated.
|
||||
* @return none
|
||||
* @note History: Mon May 21 13:33:27 1990, DSJ, Created.
|
||||
*/
|
||||
void FreeFeature(FEATURE Feature) {
|
||||
if (Feature) {
|
||||
free_struct (Feature, sizeof (FEATURE_STRUCT)
|
||||
+ sizeof (FLOAT32) * (Feature->Type->NumParams - 1),
|
||||
@ -75,19 +67,15 @@ void FreeFeature(FEATURE Feature) {
|
||||
} /* FreeFeature */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FreeFeatureSet(FEATURE_SET FeatureSet) {
|
||||
/*
|
||||
** Parameters:
|
||||
** FeatureSet set of features to be freed
|
||||
** Globals: none
|
||||
** Operation: Release the memory consumed by the specified feature
|
||||
** set. This routine also frees the memory consumed by the
|
||||
** features contained in the set.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Mon May 21 13:59:46 1990, DSJ, Created.
|
||||
/**
|
||||
* Release the memory consumed by the specified feature
|
||||
* set. This routine also frees the memory consumed by the
|
||||
* features contained in the set.
|
||||
* @param FeatureSet set of features to be freed
|
||||
* @return none
|
||||
* @note History: Mon May 21 13:59:46 1990, DSJ, Created.
|
||||
*/
|
||||
void FreeFeatureSet(FEATURE_SET FeatureSet) {
|
||||
int i;
|
||||
|
||||
if (FeatureSet) {
|
||||
@ -98,18 +86,14 @@ void FreeFeatureSet(FEATURE_SET FeatureSet) {
|
||||
} /* FreeFeatureSet */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) {
|
||||
/*
|
||||
** Parameters:
|
||||
** FeatureDesc description of feature to be created.
|
||||
** Globals: none
|
||||
** Operation: Allocate and return a new feature of the specified
|
||||
** type.
|
||||
** Return: New feature.
|
||||
** Exceptions: none
|
||||
** History: Mon May 21 14:06:42 1990, DSJ, Created.
|
||||
/**
|
||||
* Allocate and return a new feature of the specified
|
||||
* type.
|
||||
* @param FeatureDesc description of feature to be created.
|
||||
* @return New #FEATURE.
|
||||
* @note History: Mon May 21 14:06:42 1990, DSJ, Created.
|
||||
*/
|
||||
FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) {
|
||||
FEATURE Feature;
|
||||
|
||||
Feature = (FEATURE) alloc_struct (sizeof (FEATURE_STRUCT) +
|
||||
@ -122,18 +106,14 @@ FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) {
|
||||
} /* NewFeature */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
FEATURE_SET NewFeatureSet(int NumFeatures) {
|
||||
/*
|
||||
** Parameters:
|
||||
** NumFeatures maximum # of features to be put in feature set
|
||||
** Globals: none
|
||||
** Operation: Allocate and return a new feature set large enough to
|
||||
** hold the specified number of features.
|
||||
** Return: New feature set.
|
||||
** Exceptions: none
|
||||
** History: Mon May 21 14:22:40 1990, DSJ, Created.
|
||||
/**
|
||||
* Allocate and return a new feature set large enough to
|
||||
* hold the specified number of features.
|
||||
* @param NumFeatures maximum # of features to be put in feature set
|
||||
* @return New #FEATURE_SET.
|
||||
* @note History: Mon May 21 14:22:40 1990, DSJ, Created.
|
||||
*/
|
||||
FEATURE_SET NewFeatureSet(int NumFeatures) {
|
||||
FEATURE_SET FeatureSet;
|
||||
|
||||
FeatureSet = (FEATURE_SET) Emalloc (sizeof (FEATURE_SET_STRUCT) +
|
||||
@ -145,23 +125,20 @@ FEATURE_SET NewFeatureSet(int NumFeatures) {
|
||||
} /* NewFeatureSet */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
|
||||
/*
|
||||
** Parameters:
|
||||
** File open text file to read feature from
|
||||
** FeatureDesc specifies type of feature to read from File
|
||||
** Globals: none
|
||||
** Operation: Create a new feature of the specified type and read in
|
||||
** the value of its parameters from File. The extra penalty
|
||||
** for the feature is also computed by calling the appropriate
|
||||
** function for the specified feature type. The correct text
|
||||
** representation for a feature is a list of N floats where
|
||||
** N is the number of parameters in the feature.
|
||||
** Return: New feature read from File.
|
||||
** Exceptions: ILLEGAL_FEATURE_PARAM if text file doesn't match expected format
|
||||
** History: Wed May 23 08:53:16 1990, DSJ, Created.
|
||||
/**
|
||||
* Create a new feature of the specified type and read in
|
||||
* the value of its parameters from File. The extra penalty
|
||||
* for the feature is also computed by calling the appropriate
|
||||
* function for the specified feature type. The correct text
|
||||
* representation for a feature is a list of N floats where
|
||||
* N is the number of parameters in the feature.
|
||||
* @param File open text file to read feature from
|
||||
* @param FeatureDesc specifies type of feature to read from File
|
||||
* @return New #FEATURE read from File.
|
||||
* @note Exceptions: #ILLEGAL_FEATURE_PARAM if text file doesn't match expected format
|
||||
* @note History: Wed May 23 08:53:16 1990, DSJ, Created.
|
||||
*/
|
||||
FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
|
||||
FEATURE Feature;
|
||||
int i;
|
||||
|
||||
@ -177,22 +154,18 @@ FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
|
||||
} /* ReadFeature */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
|
||||
/*
|
||||
** Parameters:
|
||||
** File open text file to read new feature set from
|
||||
** FeatureDesc specifies type of feature to read from File
|
||||
** Globals: none
|
||||
** Operation: Create a new feature set of the specified type and read in
|
||||
** the features from File. The correct text representation
|
||||
** for a feature set is an integer which specifies the number (N)
|
||||
** of features in a set followed by a list of N feature
|
||||
** descriptions.
|
||||
** Return: New feature set read from File.
|
||||
** Exceptions: none
|
||||
** History: Wed May 23 09:17:31 1990, DSJ, Created.
|
||||
/**
|
||||
* Create a new feature set of the specified type and read in
|
||||
* the features from File. The correct text representation
|
||||
* for a feature set is an integer which specifies the number (N)
|
||||
* of features in a set followed by a list of N feature
|
||||
* descriptions.
|
||||
* @param File open text file to read new feature set from
|
||||
* @param FeatureDesc specifies type of feature to read from File
|
||||
* @return New feature set read from File.
|
||||
* @note History: Wed May 23 09:17:31 1990, DSJ, Created.
|
||||
*/
|
||||
FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
|
||||
FEATURE_SET FeatureSet;
|
||||
int NumFeatures;
|
||||
int i;
|
||||
@ -208,20 +181,17 @@ FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
|
||||
} /* ReadFeatureSet */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/*
|
||||
** Parameters:
|
||||
** Feature: feature to write out to str
|
||||
** str: string to write Feature to
|
||||
** Operation: Appends a textual representation of Feature to str.
|
||||
** This representation is simply a list of the N parameters
|
||||
** of the feature, terminated with a newline. It is assumed
|
||||
** that the ExtraPenalty field can be reconstructed from the
|
||||
** parameters of the feature. It is also assumed that the
|
||||
** feature type information is specified or assumed elsewhere.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Wed May 23 09:28:18 1990, DSJ, Created.
|
||||
/**
|
||||
* Appends a textual representation of Feature to str.
|
||||
* This representation is simply a list of the N parameters
|
||||
* of the feature, terminated with a newline. It is assumed
|
||||
* that the ExtraPenalty field can be reconstructed from the
|
||||
* parameters of the feature. It is also assumed that the
|
||||
* feature type information is specified or assumed elsewhere.
|
||||
* @param Feature feature to write out to str
|
||||
* @param str string to write Feature to
|
||||
* @return none
|
||||
* @note History: Wed May 23 09:28:18 1990, DSJ, Created.
|
||||
*/
|
||||
void WriteFeature(FEATURE Feature, STRING* str) {
|
||||
for (int i = 0; i < Feature->Type->NumParams; i++) {
|
||||
@ -234,19 +204,15 @@ void WriteFeature(FEATURE Feature, STRING* str) {
|
||||
} /* WriteFeature */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/*
|
||||
** Parameters:
|
||||
** FeatureSet: feature set to write to File
|
||||
** str: string to write Feature to
|
||||
** Globals: none
|
||||
** Operation: Write a textual representation of FeatureSet to File.
|
||||
** This representation is an integer specifying the number of
|
||||
** features in the set, followed by a newline, followed by
|
||||
** text representations for each feature in the set.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Wed May 23 10:06:03 1990, DSJ, Created.
|
||||
/**
|
||||
* Write a textual representation of FeatureSet to File.
|
||||
* This representation is an integer specifying the number of
|
||||
* features in the set, followed by a newline, followed by
|
||||
* text representations for each feature in the set.
|
||||
* @param FeatureSet feature set to write to File
|
||||
* @param str string to write Feature to
|
||||
* @return none
|
||||
* @note History: Wed May 23 10:06:03 1990, DSJ, Created.
|
||||
*/
|
||||
void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) {
|
||||
if (FeatureSet) {
|
||||
@ -259,23 +225,22 @@ void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) {
|
||||
} /* WriteFeatureSet */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void WriteOldParamDesc(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
|
||||
/*
|
||||
** Parameters:
|
||||
** File open text file to write FeatureDesc to
|
||||
** FeatureDesc feature descriptor to write to File
|
||||
** Globals: none
|
||||
** Operation: Write a textual representation of FeatureDesc to File
|
||||
** in the old format (i.e. the format used by the clusterer).
|
||||
** This format is:
|
||||
** Number of Params
|
||||
** Description of Param 1
|
||||
** ...
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri May 25 15:27:18 1990, DSJ, Created.
|
||||
/**
|
||||
* Write a textual representation of FeatureDesc to File
|
||||
* in the old format (i.e. the format used by the clusterer).
|
||||
*
|
||||
* This format is:
|
||||
* @verbatim
|
||||
* Number of Params
|
||||
* Description of Param 1
|
||||
* ...
|
||||
* @endverbatim
|
||||
* @param File open text file to write FeatureDesc to
|
||||
* @param FeatureDesc feature descriptor to write to File
|
||||
* @return none
|
||||
* @note History: Fri May 25 15:27:18 1990, DSJ, Created.
|
||||
*/
|
||||
void WriteOldParamDesc(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
|
||||
int i;
|
||||
|
||||
fprintf (File, "%d\n", FeatureDesc->NumParams);
|
||||
|
@ -15,9 +15,9 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "outfeat.h"
|
||||
|
||||
#include "classify.h"
|
||||
@ -28,24 +28,23 @@
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
namespace tesseract {
|
||||
FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Blob blob to extract pico-features from
|
||||
** LineStats statistics on text row blob is in
|
||||
** Globals: none
|
||||
** Operation: Convert each segment in the outline to a feature
|
||||
** and return the features.
|
||||
** Return: Outline-features for Blob.
|
||||
** Exceptions: none
|
||||
** History: 11/13/90, DSJ, Created.
|
||||
** 05/24/91, DSJ, Updated for either char or baseline normalize.
|
||||
/**
|
||||
* Convert each segment in the outline to a feature
|
||||
* and return the features.
|
||||
* @param Blob blob to extract pico-features from
|
||||
* @return Outline-features for Blob.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History:
|
||||
* - 11/13/90, DSJ, Created.
|
||||
* - 05/24/91, DSJ, Updated for either char or baseline normalize.
|
||||
*/
|
||||
FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) {
|
||||
LIST Outlines;
|
||||
LIST RemainingOutlines;
|
||||
MFOUTLINE Outline;
|
||||
@ -71,30 +70,29 @@ FEATURE_SET Classify::ExtractOutlineFeatures(TBLOB *Blob) {
|
||||
} /* ExtractOutlineFeatures */
|
||||
} // namespace tesseract
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Private Code
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine computes the midpoint between Start and
|
||||
* End to obtain the x,y position of the outline-feature. It
|
||||
* also computes the direction from Start to End as the
|
||||
* direction of the outline-feature and the distance from
|
||||
* Start to End as the length of the outline-feature.
|
||||
* This feature is then
|
||||
* inserted into the next feature slot in FeatureSet.
|
||||
* @param Start starting point of outline-feature
|
||||
* @param End ending point of outline-feature
|
||||
* @param FeatureSet set to add outline-feature to
|
||||
* @return none (results are placed in FeatureSet)
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: 11/13/90, DSJ, Created.
|
||||
*/
|
||||
void AddOutlineFeatureToSet(FPOINT *Start,
|
||||
FPOINT *End,
|
||||
FEATURE_SET FeatureSet) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Start starting point of outline-feature
|
||||
** End ending point of outline-feature
|
||||
** FeatureSet set to add outline-feature to
|
||||
** Globals: none
|
||||
** Operation: This routine computes the midpoint between Start and
|
||||
** End to obtain the x,y position of the outline-feature. It
|
||||
** also computes the direction from Start to End as the
|
||||
** direction of the outline-feature and the distance from
|
||||
** Start to End as the length of the outline-feature.
|
||||
** This feature is then
|
||||
** inserted into the next feature slot in FeatureSet.
|
||||
** Return: none (results are placed in FeatureSet)
|
||||
** Exceptions: none
|
||||
** History: 11/13/90, DSJ, Created.
|
||||
*/
|
||||
FEATURE Feature;
|
||||
|
||||
Feature = NewFeature(&OutlineFeatDesc);
|
||||
@ -108,21 +106,20 @@ void AddOutlineFeatureToSet(FPOINT *Start,
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Outline outline to extract outline-features from
|
||||
** FeatureSet set of features to add outline-features to
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine steps converts each section in the specified
|
||||
** outline to a feature described by its x,y position, length
|
||||
** and angle.
|
||||
** Return: none (results are returned in FeatureSet)
|
||||
** Exceptions: none
|
||||
** History: 11/13/90, DSJ, Created.
|
||||
** 5/24/91, DSJ, Added hidden edge capability.
|
||||
/**
|
||||
* This routine steps converts each section in the specified
|
||||
* outline to a feature described by its x,y position, length
|
||||
* and angle.
|
||||
* @param Outline outline to extract outline-features from
|
||||
* @param FeatureSet set of features to add outline-features to
|
||||
* @return none (results are returned in FeatureSet)
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History:
|
||||
* - 11/13/90, DSJ, Created.
|
||||
* - 5/24/91, DSJ, Added hidden edge capability.
|
||||
*/
|
||||
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
|
||||
MFOUTLINE Next;
|
||||
MFOUTLINE First;
|
||||
FPOINT FeatureStart;
|
||||
@ -152,19 +149,18 @@ void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void NormalizeOutlineX(FEATURE_SET FeatureSet) {
|
||||
/*
|
||||
** Parameters:
|
||||
** FeatureSet outline-features to be normalized
|
||||
** Globals: none
|
||||
** Operation: This routine computes the weighted average x position
|
||||
** over all of the outline-features in FeatureSet and then
|
||||
** renormalizes the outline-features to force this average
|
||||
** to be the x origin (i.e. x=0).
|
||||
** Return: none (FeatureSet is changed)
|
||||
** Exceptions: none
|
||||
** History: 11/13/90, DSJ, Created.
|
||||
/**
|
||||
* This routine computes the weighted average x position
|
||||
* over all of the outline-features in FeatureSet and then
|
||||
* renormalizes the outline-features to force this average
|
||||
* to be the x origin (i.e. x=0).
|
||||
* @param FeatureSet outline-features to be normalized
|
||||
* @return none (FeatureSet is changed)
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: 11/13/90, DSJ, Created.
|
||||
*/
|
||||
void NormalizeOutlineX(FEATURE_SET FeatureSet) {
|
||||
int i;
|
||||
FEATURE Feature;
|
||||
FLOAT32 Length;
|
||||
|
@ -15,9 +15,9 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "picofeat.h"
|
||||
|
||||
#include "classify.h"
|
||||
@ -49,23 +49,22 @@ void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet);
|
||||
|
||||
void NormalizePicoX(FEATURE_SET FeatureSet);
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
namespace tesseract {
|
||||
FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Blob blob to extract pico-features from
|
||||
** LineStats statistics on text row blob is in
|
||||
** Globals:
|
||||
** classify_norm_method normalization method currently specified
|
||||
** Operation: Dummy for now.
|
||||
** Return: Pico-features for Blob.
|
||||
** Exceptions: none
|
||||
** History: 9/4/90, DSJ, Created.
|
||||
/**
|
||||
* Operation: Dummy for now.
|
||||
*
|
||||
* Globals:
|
||||
* - classify_norm_method normalization method currently specified
|
||||
* @param Blob blob to extract pico-features from
|
||||
* @return Pico-features for Blob.
|
||||
* @note Exceptions: none
|
||||
* @note History: 9/4/90, DSJ, Created.
|
||||
*/
|
||||
FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) {
|
||||
LIST Outlines;
|
||||
LIST RemainingOutlines;
|
||||
MFOUTLINE Outline;
|
||||
@ -88,29 +87,28 @@ FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) {
|
||||
} /* ExtractPicoFeatures */
|
||||
} // namespace tesseract
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Private Code
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine converts an entire segment of an outline
|
||||
* into a set of pico features which are added to
|
||||
* FeatureSet. The length of the segment is rounded to the
|
||||
* nearest whole number of pico-features. The pico-features
|
||||
* are spaced evenly over the entire segment.
|
||||
* Globals:
|
||||
* - classify_pico_feature_length length of a single pico-feature
|
||||
* @param Start starting point of pico-feature
|
||||
* @param End ending point of pico-feature
|
||||
* @param FeatureSet set to add pico-feature to
|
||||
* @return none (results are placed in FeatureSet)
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Apr 30 15:44:34 1991, DSJ, Created.
|
||||
*/
|
||||
void ConvertSegmentToPicoFeat(FPOINT *Start,
|
||||
FPOINT *End,
|
||||
FEATURE_SET FeatureSet) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Start starting point of pico-feature
|
||||
** End ending point of pico-feature
|
||||
** FeatureSet set to add pico-feature to
|
||||
** Globals:
|
||||
** classify_pico_feature_length length of a single pico-feature
|
||||
** Operation: This routine converts an entire segment of an outline
|
||||
** into a set of pico features which are added to
|
||||
** FeatureSet. The length of the segment is rounded to the
|
||||
** nearest whole number of pico-features. The pico-features
|
||||
** are spaced evenly over the entire segment.
|
||||
** Return: none (results are placed in FeatureSet)
|
||||
** Exceptions: none
|
||||
** History: Tue Apr 30 15:44:34 1991, DSJ, Created.
|
||||
*/
|
||||
FEATURE Feature;
|
||||
FLOAT32 Angle;
|
||||
FLOAT32 Length;
|
||||
@ -148,23 +146,21 @@ void ConvertSegmentToPicoFeat(FPOINT *Start,
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Outline outline to extract micro-features from
|
||||
** FeatureSet set of features to add pico-features to
|
||||
** Globals:
|
||||
** classify_pico_feature_length
|
||||
** length of features to be extracted
|
||||
** Operation:
|
||||
** This routine steps thru the specified outline and cuts it
|
||||
** up into pieces of equal length. These pieces become the
|
||||
** desired pico-features. Each segment in the outline
|
||||
** is converted into an integral number of pico-features.
|
||||
** Return: none (results are returned in FeatureSet)
|
||||
** Exceptions: none
|
||||
** History: 4/30/91, DSJ, Adapted from ConvertToPicoFeatures().
|
||||
/**
|
||||
* This routine steps thru the specified outline and cuts it
|
||||
* up into pieces of equal length. These pieces become the
|
||||
* desired pico-features. Each segment in the outline
|
||||
* is converted into an integral number of pico-features.
|
||||
*
|
||||
* Globals:
|
||||
* - classify_pico_feature_length length of features to be extracted
|
||||
* @param Outline outline to extract micro-features from
|
||||
* @param FeatureSet set of features to add pico-features to
|
||||
* @return none (results are returned in FeatureSet)
|
||||
* @note Exceptions: none
|
||||
* @note History: 4/30/91, DSJ, Adapted from ConvertToPicoFeatures().
|
||||
*/
|
||||
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
|
||||
MFOUTLINE Next;
|
||||
MFOUTLINE First;
|
||||
MFOUTLINE Current;
|
||||
@ -194,19 +190,18 @@ void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void NormalizePicoX(FEATURE_SET FeatureSet) {
|
||||
/*
|
||||
** Parameters:
|
||||
** FeatureSet pico-features to be normalized
|
||||
** Globals: none
|
||||
** Operation: This routine computes the average x position over all
|
||||
** of the pico-features in FeatureSet and then renormalizes
|
||||
** the pico-features to force this average to be the x origin
|
||||
** (i.e. x=0).
|
||||
** Return: none (FeatureSet is changed)
|
||||
** Exceptions: none
|
||||
** History: Tue Sep 4 16:50:08 1990, DSJ, Created.
|
||||
/**
|
||||
* This routine computes the average x position over all
|
||||
* of the pico-features in FeatureSet and then renormalizes
|
||||
* the pico-features to force this average to be the x origin
|
||||
* (i.e. x=0).
|
||||
* @param FeatureSet pico-features to be normalized
|
||||
* @return none (FeatureSet is changed)
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Sep 4 16:50:08 1990, DSJ, Created.
|
||||
*/
|
||||
void NormalizePicoX(FEATURE_SET FeatureSet) {
|
||||
int i;
|
||||
FEATURE Feature;
|
||||
FLOAT32 Origin = 0.0;
|
||||
@ -225,16 +220,15 @@ void NormalizePicoX(FEATURE_SET FeatureSet) {
|
||||
|
||||
namespace tesseract {
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* @param blob blob to extract features from
|
||||
* @param fx_info
|
||||
* @return Integer character-normalized features for blob.
|
||||
* @note Exceptions: none
|
||||
* @note History: 8/8/2011, rays, Created.
|
||||
*/
|
||||
FEATURE_SET Classify::ExtractIntCNFeatures(
|
||||
const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
/*
|
||||
** Parameters:
|
||||
** blob blob to extract features from
|
||||
** denorm normalization/denormalization parameters.
|
||||
** Return: Integer character-normalized features for blob.
|
||||
** Exceptions: none
|
||||
** History: 8/8/2011, rays, Created.
|
||||
*/
|
||||
INT_FX_RESULT_STRUCT local_fx_info(fx_info);
|
||||
GenericVector<INT_FEATURE_STRUCT> bl_features;
|
||||
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
|
||||
@ -258,16 +252,15 @@ FEATURE_SET Classify::ExtractIntCNFeatures(
|
||||
} /* ExtractIntCNFeatures */
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* @param blob blob to extract features from
|
||||
* @param fx_info
|
||||
* @return Geometric (top/bottom/width) features for blob.
|
||||
* @note Exceptions: none
|
||||
* @note History: 8/8/2011, rays, Created.
|
||||
*/
|
||||
FEATURE_SET Classify::ExtractIntGeoFeatures(
|
||||
const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
/*
|
||||
** Parameters:
|
||||
** blob blob to extract features from
|
||||
** denorm normalization/denormalization parameters.
|
||||
** Return: Geometric (top/bottom/width) features for blob.
|
||||
** Exceptions: none
|
||||
** History: 8/8/2011, rays, Created.
|
||||
*/
|
||||
INT_FX_RESULT_STRUCT local_fx_info(fx_info);
|
||||
GenericVector<INT_FEATURE_STRUCT> bl_features;
|
||||
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
|
||||
|
@ -61,22 +61,28 @@ ConvNetCharClassifier::~ConvNetCharClassifier() {
|
||||
}
|
||||
}
|
||||
|
||||
// The main training function. Given a sample and a class ID the classifier
|
||||
// updates its parameters according to its learning algorithm. This function
|
||||
// is currently not implemented. TODO(ahmadab): implement end-2-end training
|
||||
/**
|
||||
* The main training function. Given a sample and a class ID the classifier
|
||||
* updates its parameters according to its learning algorithm. This function
|
||||
* is currently not implemented. TODO(ahmadab): implement end-2-end training
|
||||
*/
|
||||
bool ConvNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// A secondary function needed for training. Allows the trainer to set the
|
||||
// value of any train-time paramter. This function is currently not
|
||||
// implemented. TODO(ahmadab): implement end-2-end training
|
||||
/**
|
||||
* A secondary function needed for training. Allows the trainer to set the
|
||||
* value of any train-time paramter. This function is currently not
|
||||
* implemented. TODO(ahmadab): implement end-2-end training
|
||||
*/
|
||||
bool ConvNetCharClassifier::SetLearnParam(char *var_name, float val) {
|
||||
// TODO(ahmadab): implementation of parameter initializing.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Folds the output of the NeuralNet using the loaded folding sets
|
||||
/**
|
||||
* Folds the output of the NeuralNet using the loaded folding sets
|
||||
*/
|
||||
void ConvNetCharClassifier::Fold() {
|
||||
// in case insensitive mode
|
||||
if (case_sensitive_ == false) {
|
||||
@ -125,8 +131,10 @@ void ConvNetCharClassifier::Fold() {
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the features of specified charsamp and feedforward the
|
||||
// specified nets
|
||||
/**
|
||||
* Compute the features of specified charsamp and feedforward the
|
||||
* specified nets
|
||||
*/
|
||||
bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) {
|
||||
if (char_net_ == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
|
||||
@ -173,7 +181,9 @@ bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// return the cost of being a char
|
||||
/**
|
||||
* return the cost of being a char
|
||||
*/
|
||||
int ConvNetCharClassifier::CharCost(CharSamp *char_samp) {
|
||||
if (RunNets(char_samp) == false) {
|
||||
return 0;
|
||||
@ -181,8 +191,10 @@ int ConvNetCharClassifier::CharCost(CharSamp *char_samp) {
|
||||
return CubeUtils::Prob2Cost(1.0f - net_output_[0]);
|
||||
}
|
||||
|
||||
// classifies a charsamp and returns an alternate list
|
||||
// of chars sorted by char costs
|
||||
/**
|
||||
* classifies a charsamp and returns an alternate list
|
||||
* of chars sorted by char costs
|
||||
*/
|
||||
CharAltList *ConvNetCharClassifier::Classify(CharSamp *char_samp) {
|
||||
// run the needed nets
|
||||
if (RunNets(char_samp) == false) {
|
||||
@ -207,7 +219,9 @@ CharAltList *ConvNetCharClassifier::Classify(CharSamp *char_samp) {
|
||||
return alt_list;
|
||||
}
|
||||
|
||||
// Set an external net (for training purposes)
|
||||
/**
|
||||
* Set an external net (for training purposes)
|
||||
*/
|
||||
void ConvNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) {
|
||||
if (char_net_ != NULL) {
|
||||
delete char_net_;
|
||||
@ -216,8 +230,10 @@ void ConvNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) {
|
||||
char_net_ = char_net;
|
||||
}
|
||||
|
||||
// This function will return true if the file does not exist.
|
||||
// But will fail if the it did not pass the sanity checks
|
||||
/**
|
||||
* This function will return true if the file does not exist.
|
||||
* But will fail if the it did not pass the sanity checks
|
||||
*/
|
||||
bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path,
|
||||
const string &lang,
|
||||
LangModel *lang_mod) {
|
||||
@ -284,7 +300,9 @@ bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Init the classifier provided a data-path and a language string
|
||||
/**
|
||||
* Init the classifier provided a data-path and a language string
|
||||
*/
|
||||
bool ConvNetCharClassifier::Init(const string &data_file_path,
|
||||
const string &lang,
|
||||
LangModel *lang_mod) {
|
||||
@ -308,9 +326,11 @@ bool ConvNetCharClassifier::Init(const string &data_file_path,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Load the classifier's Neural Nets
|
||||
// This function will return true if the net file does not exist.
|
||||
// But will fail if the net did not pass the sanity checks
|
||||
/**
|
||||
* Load the classifier's Neural Nets
|
||||
* This function will return true if the net file does not exist.
|
||||
* But will fail if the net did not pass the sanity checks
|
||||
*/
|
||||
bool ConvNetCharClassifier::LoadNets(const string &data_file_path,
|
||||
const string &lang) {
|
||||
string char_net_file;
|
||||
|
@ -99,10 +99,12 @@ CubeObject::~CubeObject() {
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
// Actually do the recognition using the specified language mode. If none
|
||||
// is specified, the default language model in the CubeRecoContext is used.
|
||||
// Returns the sorted list of alternate answers
|
||||
// The Word mode determines whether recognition is done as a word or a phrase
|
||||
/**
|
||||
* Actually do the recognition using the specified language mode. If none
|
||||
* is specified, the default language model in the CubeRecoContext is used.
|
||||
* @return the sorted list of alternate answers
|
||||
* @param word_mode determines whether recognition is done as a word or a phrase
|
||||
*/
|
||||
WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
|
||||
if (char_samp_ == NULL) {
|
||||
return NULL;
|
||||
@ -197,18 +199,24 @@ WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
|
||||
return alt_list_;
|
||||
}
|
||||
|
||||
// Recognize the member char sample as a word
|
||||
/**
|
||||
* Recognize the member char sample as a word
|
||||
*/
|
||||
WordAltList *CubeObject::RecognizeWord(LangModel *lang_mod) {
|
||||
return Recognize(lang_mod, true);
|
||||
}
|
||||
|
||||
// Recognize the member char sample as a word
|
||||
/**
|
||||
* Recognize the member char sample as a phrase
|
||||
*/
|
||||
WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) {
|
||||
return Recognize(lang_mod, false);
|
||||
}
|
||||
|
||||
// Computes the cost of a specific string. This is done by performing
|
||||
// recognition of a language model that allows only the specified word
|
||||
/**
|
||||
* Computes the cost of a specific string. This is done by performing
|
||||
* recognition of a language model that allows only the specified word
|
||||
*/
|
||||
int CubeObject::WordCost(const char *str) {
|
||||
WordListLangModel *lang_mod = new WordListLangModel(cntxt_);
|
||||
if (lang_mod == NULL) {
|
||||
|
@ -31,7 +31,9 @@ CubeUtils::CubeUtils() {
|
||||
CubeUtils::~CubeUtils() {
|
||||
}
|
||||
|
||||
// convert a prob to a cost (-ve log prob)
|
||||
/**
|
||||
* convert a prob to a cost (-ve log prob)
|
||||
*/
|
||||
int CubeUtils::Prob2Cost(double prob_val) {
|
||||
if (prob_val < MIN_PROB) {
|
||||
return MIN_PROB_COST;
|
||||
@ -39,12 +41,16 @@ int CubeUtils::Prob2Cost(double prob_val) {
|
||||
return static_cast<int>(-log(prob_val) * PROB2COST_SCALE);
|
||||
}
|
||||
|
||||
// converts a cost to probability
|
||||
/**
|
||||
* converts a cost to probability
|
||||
*/
|
||||
double CubeUtils::Cost2Prob(int cost) {
|
||||
return exp(-cost / PROB2COST_SCALE);
|
||||
}
|
||||
|
||||
// computes the length of a NULL terminated char_32 string
|
||||
/**
|
||||
* computes the length of a NULL terminated char_32 string
|
||||
*/
|
||||
int CubeUtils::StrLen(const char_32 *char_32_ptr) {
|
||||
if (char_32_ptr == NULL) {
|
||||
return 0;
|
||||
@ -54,7 +60,9 @@ int CubeUtils::StrLen(const char_32 *char_32_ptr) {
|
||||
return len;
|
||||
}
|
||||
|
||||
// compares two char_32 strings
|
||||
/**
|
||||
* compares two char_32 strings
|
||||
*/
|
||||
int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
|
||||
const char_32 *pch1 = str1;
|
||||
const char_32 *pch2 = str2;
|
||||
@ -76,7 +84,9 @@ int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
|
||||
}
|
||||
}
|
||||
|
||||
// Duplicates a 32-bit char buffer
|
||||
/**
|
||||
* Duplicates a 32-bit char buffer
|
||||
*/
|
||||
char_32 *CubeUtils::StrDup(const char_32 *str32) {
|
||||
int len = StrLen(str32);
|
||||
char_32 *new_str = new char_32[len + 1];
|
||||
@ -88,7 +98,9 @@ char_32 *CubeUtils::StrDup(const char_32 *str32) {
|
||||
return new_str;
|
||||
}
|
||||
|
||||
// creates a char samp from a specified portion of the image
|
||||
/**
|
||||
* creates a char samp from a specified portion of the image
|
||||
*/
|
||||
CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top,
|
||||
int wid, int hgt) {
|
||||
// get the raw img data from the image
|
||||
@ -105,7 +117,9 @@ CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top,
|
||||
return char_samp;
|
||||
}
|
||||
|
||||
// create a B/W image from a char_sample
|
||||
/**
|
||||
* create a B/W image from a char_sample
|
||||
*/
|
||||
Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) {
|
||||
// parameter check
|
||||
if (char_samp == NULL) {
|
||||
@ -137,7 +151,9 @@ Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) {
|
||||
return pix;
|
||||
}
|
||||
|
||||
// creates a raw buffer from the specified location of the pix
|
||||
/**
|
||||
* creates a raw buffer from the specified location of the pix
|
||||
*/
|
||||
unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
|
||||
int wid, int hgt) {
|
||||
// skip invalid dimensions
|
||||
@ -173,7 +189,9 @@ unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
|
||||
return temp_buff;
|
||||
}
|
||||
|
||||
// read file contents to a string
|
||||
/**
|
||||
* read file contents to a string
|
||||
*/
|
||||
bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
|
||||
str->clear();
|
||||
FILE *fp = fopen(file_name.c_str(), "rb");
|
||||
@ -206,7 +224,9 @@ bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
|
||||
return (read_bytes == file_size);
|
||||
}
|
||||
|
||||
// splits a string into vectors based on specified delimiters
|
||||
/**
|
||||
* splits a string into vectors based on specified delimiters
|
||||
*/
|
||||
void CubeUtils::SplitStringUsing(const string &str,
|
||||
const string &delims,
|
||||
vector<string> *str_vec) {
|
||||
@ -240,7 +260,9 @@ void CubeUtils::SplitStringUsing(const string &str,
|
||||
}
|
||||
}
|
||||
|
||||
// UTF-8 to UTF-32 convesion functions
|
||||
/**
|
||||
* UTF-8 to UTF-32 conversion functions
|
||||
*/
|
||||
void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) {
|
||||
str32->clear();
|
||||
int len = strlen(utf8_str);
|
||||
@ -254,7 +276,9 @@ void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) {
|
||||
}
|
||||
}
|
||||
|
||||
// UTF-8 to UTF-32 convesion functions
|
||||
/**
|
||||
* UTF-8 to UTF-32 conversion functions
|
||||
*/
|
||||
void CubeUtils::UTF32ToUTF8(const char_32 *utf32_str, string *str) {
|
||||
str->clear();
|
||||
for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) {
|
||||
|
@ -37,7 +37,9 @@ TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, int class_id) {
|
||||
path_cost_ = Cost();
|
||||
}
|
||||
|
||||
// leading, trailing punc constructor and single byte UTF char
|
||||
/**
|
||||
* leading, trailing punc constructor and single byte UTF char
|
||||
*/
|
||||
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt,
|
||||
const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
|
||||
root_ = false;
|
||||
@ -51,7 +53,9 @@ TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt,
|
||||
path_cost_ = Cost();
|
||||
}
|
||||
|
||||
// dict constructor: multi byte UTF char
|
||||
/**
|
||||
* dict constructor: multi byte UTF char
|
||||
*/
|
||||
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg,
|
||||
EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
|
||||
int class_id) {
|
||||
|
@ -37,7 +37,9 @@ WordAltList::~WordAltList() {
|
||||
}
|
||||
}
|
||||
|
||||
// insert an alternate word with the specified cost and tag
|
||||
/**
|
||||
* insert an alternate word with the specified cost and tag
|
||||
*/
|
||||
bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
|
||||
if (word_alt_ == NULL || alt_cost_ == NULL) {
|
||||
word_alt_ = new char_32*[max_alt_];
|
||||
@ -84,7 +86,9 @@ bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// sort the alternate in descending order based on the cost
|
||||
/**
|
||||
* sort the alternate in descending order based on the cost
|
||||
*/
|
||||
void WordAltList::Sort() {
|
||||
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
|
||||
for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
|
||||
|
@ -50,8 +50,10 @@ WordUnigrams::~WordUnigrams() {
|
||||
}
|
||||
}
|
||||
|
||||
// Load the word-list and unigrams from file and create an object
|
||||
// The word list is assumed to be sorted in lexicographic order.
|
||||
/**
|
||||
* Load the word-list and unigrams from file and create an object
|
||||
* The word list is assumed to be sorted in lexicographic order.
|
||||
*/
|
||||
WordUnigrams *WordUnigrams::Create(const string &data_file_path,
|
||||
const string &lang) {
|
||||
string file_name;
|
||||
@ -143,10 +145,12 @@ WordUnigrams *WordUnigrams::Create(const string &data_file_path,
|
||||
return word_unigrams_obj;
|
||||
}
|
||||
|
||||
// Split input into space-separated tokens, strip trailing punctuation
|
||||
// from each, determine case properties, call UTF-8 flavor of cost
|
||||
// function on each word, and aggregate all into single mean word
|
||||
// cost.
|
||||
/**
|
||||
* Split input into space-separated tokens, strip trailing punctuation
|
||||
* from each, determine case properties, call UTF-8 flavor of cost
|
||||
* function on each word, and aggregate all into single mean word
|
||||
* cost.
|
||||
*/
|
||||
int WordUnigrams::Cost(const char_32 *key_str32,
|
||||
LangModel *lang_mod,
|
||||
CharSet *char_set) const {
|
||||
@ -239,7 +243,9 @@ int WordUnigrams::Cost(const char_32 *key_str32,
|
||||
return static_cast<int>(cost / static_cast<double>(words.size()));
|
||||
}
|
||||
|
||||
// Search for UTF-8 string using binary search of sorted words_ array.
|
||||
/**
|
||||
* Search for UTF-8 string using binary search of sorted words_ array.
|
||||
*/
|
||||
int WordUnigrams::CostInternal(const char *key_str) const {
|
||||
if (strlen(key_str) == 0)
|
||||
return not_in_list_cost_;
|
||||
|
@ -15,9 +15,9 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "host.h"
|
||||
#include "danerror.h"
|
||||
#include "tprintf.h"
|
||||
@ -28,27 +28,18 @@
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void DoError(int Error, const char *Message) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Error error number which is to be trapped
|
||||
** Message pointer to a string to be printed as an error message
|
||||
** Globals:
|
||||
** ErrorTrapStack stack of error traps
|
||||
** CurrentTrapDepth number of traps on the stack
|
||||
** Operation:
|
||||
** This routine prints the specified error message to stderr.
|
||||
** It then jumps to the current error trap. If the error trap
|
||||
** stack is empty, the calling program is terminated with a
|
||||
** fatal error message.
|
||||
** Return:
|
||||
** None - this routine does not return.
|
||||
** Exceptions:
|
||||
** Empty error trap stack terminates the calling program.
|
||||
** History:
|
||||
** 4/3/89, DSJ, Created.
|
||||
/**
|
||||
* This routine prints the specified error message to stderr.
|
||||
* It then jumps to the current error trap. If the error trap
|
||||
* stack is empty, the calling program is terminated with a
|
||||
* fatal error message.
|
||||
*
|
||||
* @param Error error number which is to be trapped
|
||||
* @param Message pointer to a string to be printed as an error message
|
||||
* @return None - this routine does not return.
|
||||
* @note History: 4/3/89, DSJ, Created.
|
||||
*/
|
||||
void DoError(int Error, const char *Message) {
|
||||
if (Message != NULL) {
|
||||
tprintf("\nError: %s!\n", Message);
|
||||
}
|
||||
|
@ -15,9 +15,9 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "efio.h"
|
||||
#include "danerror.h"
|
||||
#include <stdio.h>
|
||||
@ -25,29 +25,22 @@
|
||||
|
||||
#define MAXERRORMESSAGE 256
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
FILE *Efopen(const char *Name, const char *Mode) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Name name of file to be opened
|
||||
** Mode mode to be used to open file
|
||||
** Globals:
|
||||
** None
|
||||
** Operation:
|
||||
** This routine attempts to open the specified file in the
|
||||
** specified mode. If the file can be opened, a pointer to
|
||||
** the open file is returned. If the file cannot be opened,
|
||||
** an error is trapped.
|
||||
** Return:
|
||||
** Pointer to open file.
|
||||
** Exceptions:
|
||||
** FOPENERROR unable to open specified file
|
||||
** History:
|
||||
** 5/21/89, DSJ, Created.
|
||||
----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine attempts to open the specified file in the
|
||||
* specified mode. If the file can be opened, a pointer to
|
||||
* the open file is returned. If the file cannot be opened,
|
||||
* an error is trapped.
|
||||
* @param Name name of file to be opened
|
||||
* @param Mode mode to be used to open file
|
||||
* @return Pointer to open file.
|
||||
* @note Globals: None
|
||||
* @note Exceptions: #FOPENERROR unable to open specified file
|
||||
* @note History: 5/21/89, DSJ, Created.
|
||||
*/
|
||||
FILE *Efopen(const char *Name, const char *Mode) {
|
||||
FILE *File;
|
||||
char ErrorMessage[MAXERRORMESSAGE];
|
||||
|
||||
|
@ -1,13 +1,13 @@
|
||||
/******************************************************************************
|
||||
** Filename:
|
||||
/**************************************************************************
|
||||
* Filename:
|
||||
emalloc.c
|
||||
** Purpose:
|
||||
** Purpose:
|
||||
Routines for trapping memory allocation errors.
|
||||
** Author:
|
||||
** Author:
|
||||
Dan Johnson
|
||||
HP-UX 6.2
|
||||
HP-UX 6.2
|
||||
** History:
|
||||
** History:
|
||||
4/3/89, DSJ, Created.
|
||||
**
|
||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
||||
@ -21,36 +21,30 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "emalloc.h"
|
||||
#include "danerror.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void *Emalloc(int Size) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Size
|
||||
number of bytes of memory to be allocated
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine attempts to allocate the specified number of
|
||||
** bytes. If the memory can be allocated, a pointer to the
|
||||
** memory is returned. If the memory cannot be allocated, or
|
||||
** if the allocation request is negative or zero,
|
||||
** an error is trapped.
|
||||
** Return: Pointer to allocated memory.
|
||||
** Exceptions: NOTENOUGHMEMORY
|
||||
unable to allocate Size bytes
|
||||
** ILLEGALMALLOCREQUEST
|
||||
negative or zero request size
|
||||
** History: 4/3/89, DSJ, Created.
|
||||
----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine attempts to allocate the specified number of
|
||||
* bytes. If the memory can be allocated, a pointer to the
|
||||
* memory is returned. If the memory cannot be allocated, or
|
||||
* if the allocation request is negative or zero,
|
||||
* an error is trapped.
|
||||
* @param Size number of bytes of memory to be allocated
|
||||
* @return Pointer to allocated memory.
|
||||
* @note Exceptions:
|
||||
* - #NOTENOUGHMEMORY unable to allocate Size bytes
|
||||
* - #ILLEGALMALLOCREQUEST negative or zero request size
|
||||
* @note History: 4/3/89, DSJ, Created.
|
||||
*/
|
||||
void *Emalloc(int Size) {
|
||||
void *Buffer;
|
||||
|
||||
if (Size <= 0)
|
||||
|
@ -42,9 +42,9 @@
|
||||
#endif
|
||||
|
||||
using tesseract::ScriptPos;
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Private Code
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
|
@ -18,9 +18,9 @@
|
||||
#ifndef STOPPER_H
|
||||
#define STOPPER_H
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "params.h"
|
||||
|
@ -58,7 +58,7 @@ PROJECT_LOGO =
|
||||
# entered, it will be relative to the location where doxygen was started. If
|
||||
# left blank the current directory will be used.
|
||||
|
||||
OUTPUT_DIRECTORY = $(builddir)/doc/
|
||||
OUTPUT_DIRECTORY = doc/
|
||||
|
||||
# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub-
|
||||
# directories (in 2 levels) under the output directory of each output format and
|
||||
|
@ -21,9 +21,9 @@
|
||||
******************************************************************************/
|
||||
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#include "oldlist.h"
|
||||
#include "efio.h"
|
||||
#include "emalloc.h"
|
||||
@ -42,16 +42,16 @@
|
||||
|
||||
DECLARE_STRING_PARAM_FLAG(D);
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Function Prototypes
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
int main (
|
||||
int argc,
|
||||
char **argv);
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Private Function Prototypes
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
|
||||
void WriteNormProtos (
|
||||
const char *Directory,
|
||||
@ -71,9 +71,9 @@ void WriteProtos(
|
||||
BOOL8 WriteSigProtos,
|
||||
BOOL8 WriteInsigProtos);
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Global Data Definitions and Declarations
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
/* global variable to hold configuration parameters to control clustering */
|
||||
//-M 0.025 -B 0.05 -I 0.8 -C 1e-3
|
||||
CLUSTERCONFIG CNConfig =
|
||||
@ -82,63 +82,59 @@ CLUSTERCONFIG CNConfig =
|
||||
};
|
||||
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int main(int argc, char* argv[])
|
||||
|
||||
/*
|
||||
** Parameters:
|
||||
** argc number of command line arguments
|
||||
** argv array of command line arguments
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This program reads in a text file consisting of feature
|
||||
** samples from a training page in the following format:
|
||||
**
|
||||
** FontName CharName NumberOfFeatureTypes(N)
|
||||
** FeatureTypeName1 NumberOfFeatures(M)
|
||||
** Feature1
|
||||
** ...
|
||||
** FeatureM
|
||||
** FeatureTypeName2 NumberOfFeatures(M)
|
||||
** Feature1
|
||||
** ...
|
||||
** FeatureM
|
||||
** ...
|
||||
** FeatureTypeNameN NumberOfFeatures(M)
|
||||
** Feature1
|
||||
** ...
|
||||
** FeatureM
|
||||
** FontName CharName ...
|
||||
**
|
||||
** It then appends these samples into a separate file for each
|
||||
** character. The name of the file is
|
||||
**
|
||||
** DirectoryName/FontName/CharName.FeatureTypeName
|
||||
**
|
||||
** The DirectoryName can be specified via a command
|
||||
** line argument. If not specified, it defaults to the
|
||||
** current directory. The format of the resulting files is:
|
||||
**
|
||||
** NumberOfFeatures(M)
|
||||
** Feature1
|
||||
** ...
|
||||
** FeatureM
|
||||
** NumberOfFeatures(M)
|
||||
** ...
|
||||
**
|
||||
** The output files each have a header which describes the
|
||||
** type of feature which the file contains. This header is
|
||||
** in the format required by the clusterer. A command line
|
||||
** argument can also be used to specify that only the first
|
||||
** N samples of each class should be used.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Aug 18 08:56:17 1989, DSJ, Created.
|
||||
/**
|
||||
* This program reads in a text file consisting of feature
|
||||
* samples from a training page in the following format:
|
||||
* @verbatim
|
||||
FontName CharName NumberOfFeatureTypes(N)
|
||||
FeatureTypeName1 NumberOfFeatures(M)
|
||||
Feature1
|
||||
...
|
||||
FeatureM
|
||||
FeatureTypeName2 NumberOfFeatures(M)
|
||||
Feature1
|
||||
...
|
||||
FeatureM
|
||||
...
|
||||
FeatureTypeNameN NumberOfFeatures(M)
|
||||
Feature1
|
||||
...
|
||||
FeatureM
|
||||
FontName CharName ...
|
||||
@endverbatim
|
||||
* It then appends these samples into a separate file for each
|
||||
* character. The name of the file is
|
||||
*
|
||||
* DirectoryName/FontName/CharName.FeatureTypeName
|
||||
*
|
||||
* The DirectoryName can be specified via a command
|
||||
* line argument. If not specified, it defaults to the
|
||||
* current directory. The format of the resulting files is:
|
||||
* @verbatim
|
||||
NumberOfFeatures(M)
|
||||
Feature1
|
||||
...
|
||||
FeatureM
|
||||
NumberOfFeatures(M)
|
||||
...
|
||||
@endverbatim
|
||||
* The output files each have a header which describes the
|
||||
* type of feature which the file contains. This header is
|
||||
* in the format required by the clusterer. A command line
|
||||
* argument can also be used to specify that only the first
|
||||
* N samples of each class should be used.
|
||||
* @param argc number of command line arguments
|
||||
* @param argv array of command line arguments
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Aug 18 08:56:17 1989, DSJ, Created.
|
||||
*/
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// Set the global Config parameters before parsing the command line.
|
||||
Config = CNConfig;
|
||||
@ -207,28 +203,26 @@ int main(int argc, char* argv[])
|
||||
} // main
|
||||
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Private Code
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine writes the specified samples into files which
|
||||
* are organized according to the font name and character name
|
||||
* of the samples.
|
||||
* @param Directory directory to place sample files into
|
||||
* @param LabeledProtoList List of labeled protos
|
||||
* @param Clusterer The CLUSTERER to use
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Aug 18 16:17:06 1989, DSJ, Created.
|
||||
*/
|
||||
void WriteNormProtos (
|
||||
const char *Directory,
|
||||
LIST LabeledProtoList,
|
||||
CLUSTERER *Clusterer)
|
||||
|
||||
/*
|
||||
** Parameters:
|
||||
** Directory directory to place sample files into
|
||||
** Operation:
|
||||
** This routine writes the specified samples into files which
|
||||
** are organized according to the font name and character name
|
||||
** of the samples.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Aug 18 16:17:06 1989, DSJ, Created.
|
||||
*/
|
||||
|
||||
CLUSTERER *Clusterer)
|
||||
{
|
||||
FILE *File;
|
||||
STRING Filename;
|
||||
|
@ -73,18 +73,17 @@ DOUBLE_PARAM_FLAG(clusterconfig_independence, Config.Independence,
|
||||
DOUBLE_PARAM_FLAG(clusterconfig_confidence, Config.Confidence,
|
||||
"Desired confidence in prototypes created");
|
||||
|
||||
/*
|
||||
** Parameters:
|
||||
** argc number of command line arguments to parse
|
||||
** argv command line arguments
|
||||
** Globals:
|
||||
** Config current clustering parameters
|
||||
** Operation:
|
||||
** This routine parses the command line arguments that were
|
||||
** passed to the program and ses them to set relevant
|
||||
** training-related global parameters
|
||||
** Return: none
|
||||
** Exceptions: Illegal options terminate the program.
|
||||
/**
|
||||
* This routine parses the command line arguments that were
|
||||
* passed to the program and ses them to set relevant
|
||||
* training-related global parameters
|
||||
*
|
||||
* Globals:
|
||||
* - Config current clustering parameters
|
||||
* @param argc number of command line arguments to parse
|
||||
* @param argv command line arguments
|
||||
* @return none
|
||||
* @note Exceptions: Illegal options terminate the program.
|
||||
*/
|
||||
void ParseArguments(int* argc, char ***argv) {
|
||||
STRING usage;
|
||||
@ -158,19 +157,21 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table) {
|
||||
}
|
||||
}
|
||||
|
||||
// Creates a MasterTraininer and loads the training data into it:
|
||||
// Initializes feature_defs and IntegerFX.
|
||||
// Loads the shape_table if shape_table != NULL.
|
||||
// Loads initial unicharset from -U command-line option.
|
||||
// If FLAGS_T is set, loads the majority of data from there, else:
|
||||
// Loads font info from -F option.
|
||||
// Loads xheights from -X option.
|
||||
// Loads samples from .tr files in remaining command-line args.
|
||||
// Deletes outliers and computes canonical samples.
|
||||
// If FLAGS_output_trainer is set, saves the trainer for future use.
|
||||
// Computes canonical and cloud features.
|
||||
// If shape_table is not NULL, but failed to load, make a fake flat one,
|
||||
// as shape clustering was not run.
|
||||
/**
|
||||
* Creates a MasterTraininer and loads the training data into it:
|
||||
* Initializes feature_defs and IntegerFX.
|
||||
* Loads the shape_table if shape_table != NULL.
|
||||
* Loads initial unicharset from -U command-line option.
|
||||
* If FLAGS_T is set, loads the majority of data from there, else:
|
||||
* - Loads font info from -F option.
|
||||
* - Loads xheights from -X option.
|
||||
* - Loads samples from .tr files in remaining command-line args.
|
||||
* - Deletes outliers and computes canonical samples.
|
||||
* - If FLAGS_output_trainer is set, saves the trainer for future use.
|
||||
* Computes canonical and cloud features.
|
||||
* If shape_table is not NULL, but failed to load, make a fake flat one,
|
||||
* as shape clustering was not run.
|
||||
*/
|
||||
MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
||||
bool replication,
|
||||
ShapeTable** shape_table,
|
||||
@ -294,20 +295,19 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
||||
} // namespace tesseract.
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine returns the next command line argument. If
|
||||
* there are no remaining command line arguments, it returns
|
||||
* NULL. This routine should only be called after all option
|
||||
* arguments have been parsed and removed with ParseArguments.
|
||||
*
|
||||
* Globals:
|
||||
* - tessoptind defined by tessopt sys call
|
||||
* @return Next command line argument or NULL.
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Aug 18 09:34:12 1989, DSJ, Created.
|
||||
*/
|
||||
const char *GetNextFilename(int argc, const char* const * argv) {
|
||||
/*
|
||||
** Parameters: none
|
||||
** Globals:
|
||||
** tessoptind defined by tessopt sys call
|
||||
** Operation:
|
||||
** This routine returns the next command line argument. If
|
||||
** there are no remaining command line arguments, it returns
|
||||
** NULL. This routine should only be called after all option
|
||||
** arguments have been parsed and removed with ParseArguments.
|
||||
** Return: Next command line argument or NULL.
|
||||
** Exceptions: none
|
||||
** History: Fri Aug 18 09:34:12 1989, DSJ, Created.
|
||||
*/
|
||||
if (tessoptind < argc)
|
||||
return argv[tessoptind++];
|
||||
else
|
||||
@ -317,24 +317,20 @@ const char *GetNextFilename(int argc, const char* const * argv) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine searches thru a list of labeled lists to find
|
||||
* a list with the specified label. If a matching labeled list
|
||||
* cannot be found, NULL is returned.
|
||||
* @param List list to search
|
||||
* @param Label label to search for
|
||||
* @return Labeled list with the specified Label or NULL.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Aug 18 15:57:41 1989, DSJ, Created.
|
||||
*/
|
||||
LABELEDLIST FindList (
|
||||
LIST List,
|
||||
char *Label)
|
||||
|
||||
/*
|
||||
** Parameters:
|
||||
** List list to search
|
||||
** Label label to search for
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine searches thru a list of labeled lists to find
|
||||
** a list with the specified label. If a matching labeled list
|
||||
** cannot be found, NULL is returned.
|
||||
** Return: Labeled list with the specified Label or NULL.
|
||||
** Exceptions: none
|
||||
** History: Fri Aug 18 15:57:41 1989, DSJ, Created.
|
||||
*/
|
||||
|
||||
{
|
||||
LABELEDLIST LabeledList;
|
||||
|
||||
@ -349,21 +345,17 @@ LABELEDLIST FindList (
|
||||
} /* FindList */
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine allocates a new, empty labeled list and gives
|
||||
* it the specified label.
|
||||
* @param Label label for new list
|
||||
* @return New, empty labeled list.
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Aug 18 16:08:46 1989, DSJ, Created.
|
||||
*/
|
||||
LABELEDLIST NewLabeledList (
|
||||
const char *Label)
|
||||
|
||||
/*
|
||||
** Parameters:
|
||||
** Label label for new list
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine allocates a new, empty labeled list and gives
|
||||
** it the specified label.
|
||||
** Return: New, empty labeled list.
|
||||
** Exceptions: none
|
||||
** History: Fri Aug 18 16:08:46 1989, DSJ, Created.
|
||||
*/
|
||||
|
||||
{
|
||||
LABELEDLIST LabeledList;
|
||||
|
||||
@ -380,25 +372,29 @@ LABELEDLIST NewLabeledList (
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// TODO(rays) This is now used only by cntraining. Convert cntraining to use
|
||||
// the new method or get rid of it entirely.
|
||||
/**
|
||||
* This routine reads training samples from a file and
|
||||
* places them into a data structure which organizes the
|
||||
* samples by FontName and CharName. It then returns this
|
||||
* data structure.
|
||||
* @param file open text file to read samples from
|
||||
* @param feature_defs
|
||||
* @param feature_name
|
||||
* @param max_samples
|
||||
* @param unicharset
|
||||
* @param training_samples
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History:
|
||||
* - Fri Aug 18 13:11:39 1989, DSJ, Created.
|
||||
* - Tue May 17 1998 simplifications to structure, illiminated
|
||||
* font, and feature specification levels of structure.
|
||||
*/
|
||||
void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
|
||||
const char *feature_name, int max_samples,
|
||||
UNICHARSET* unicharset,
|
||||
FILE* file, LIST* training_samples) {
|
||||
/*
|
||||
** Parameters:
|
||||
** file open text file to read samples from
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine reads training samples from a file and
|
||||
** places them into a data structure which organizes the
|
||||
** samples by FontName and CharName. It then returns this
|
||||
** data structure.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Aug 18 13:11:39 1989, DSJ, Created.
|
||||
** Tue May 17 1998 simplifications to structure, illiminated
|
||||
** font, and feature specification levels of structure.
|
||||
*/
|
||||
char buffer[2048];
|
||||
char unichar[UNICHAR_LEN + 1];
|
||||
LABELEDLIST char_sample;
|
||||
@ -450,18 +446,16 @@ void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FreeTrainingSamples(LIST CharList) {
|
||||
/*
|
||||
** Parameters:
|
||||
** FontList list of all fonts in document
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine deallocates all of the space allocated to
|
||||
** the specified list of training samples.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
|
||||
/**
|
||||
* This routine deallocates all of the space allocated to
|
||||
* the specified list of training samples.
|
||||
* @param CharList list of all fonts in document
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Aug 18 17:44:27 1989, DSJ, Created.
|
||||
*/
|
||||
void FreeTrainingSamples(LIST CharList) {
|
||||
LABELEDLIST char_sample;
|
||||
FEATURE_SET FeatureSet;
|
||||
LIST FeatureList;
|
||||
@ -480,45 +474,39 @@ void FreeTrainingSamples(LIST CharList) {
|
||||
} /* FreeTrainingSamples */
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FreeLabeledList(LABELEDLIST LabeledList) {
|
||||
/*
|
||||
** Parameters:
|
||||
** LabeledList labeled list to be freed
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine deallocates all of the memory consumed by
|
||||
** a labeled list. It does not free any memory which may be
|
||||
** consumed by the items in the list.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Aug 18 17:52:45 1989, DSJ, Created.
|
||||
/**
|
||||
* This routine deallocates all of the memory consumed by
|
||||
* a labeled list. It does not free any memory which may be
|
||||
* consumed by the items in the list.
|
||||
* @param LabeledList labeled list to be freed
|
||||
* @note Globals: none
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Aug 18 17:52:45 1989, DSJ, Created.
|
||||
*/
|
||||
void FreeLabeledList(LABELEDLIST LabeledList) {
|
||||
destroy(LabeledList->List);
|
||||
free(LabeledList->Label);
|
||||
free(LabeledList);
|
||||
} /* FreeLabeledList */
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine reads samples from a LABELEDLIST and enters
|
||||
* those samples into a clusterer data structure. This
|
||||
* data structure is then returned to the caller.
|
||||
* @param char_sample: LABELEDLIST that holds all the feature information for a
|
||||
* @param FeatureDefs
|
||||
* @param program_feature_type
|
||||
* given character.
|
||||
* @return Pointer to new clusterer data structure.
|
||||
* @note Globals: None
|
||||
* @note Exceptions: None
|
||||
* @note History: 8/16/89, DSJ, Created.
|
||||
*/
|
||||
CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||
LABELEDLIST char_sample,
|
||||
const char* program_feature_type) {
|
||||
/*
|
||||
** Parameters:
|
||||
** char_sample: LABELEDLIST that holds all the feature information for a
|
||||
** given character.
|
||||
** Globals:
|
||||
** None
|
||||
** Operation:
|
||||
** This routine reads samples from a LABELEDLIST and enters
|
||||
** those samples into a clusterer data structure. This
|
||||
** data structure is then returned to the caller.
|
||||
** Return:
|
||||
** Pointer to new clusterer data structure.
|
||||
** Exceptions:
|
||||
** None
|
||||
** History:
|
||||
** 8/16/89, DSJ, Created.
|
||||
*/
|
||||
uinT16 N;
|
||||
int i, j;
|
||||
FLOAT32 *Sample = NULL;
|
||||
@ -741,21 +729,17 @@ MERGE_CLASS NewLabeledClass (
|
||||
} /* NewLabeledClass */
|
||||
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This routine deallocates all of the space allocated to
|
||||
* the specified list of training samples.
|
||||
* @param ClassList list of all fonts in document
|
||||
* @return none
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Aug 18 17:44:27 1989, DSJ, Created.
|
||||
*/
|
||||
void FreeLabeledClassList (
|
||||
LIST ClassList)
|
||||
|
||||
/*
|
||||
** Parameters:
|
||||
** FontList list of all fonts in document
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This routine deallocates all of the space allocated to
|
||||
** the specified list of training samples.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
|
||||
*/
|
||||
|
||||
{
|
||||
MERGE_CLASS MergeClass;
|
||||
|
||||
@ -770,7 +754,7 @@ void FreeLabeledClassList (
|
||||
|
||||
} /* FreeLabeledClassList */
|
||||
|
||||
/** SetUpForFloat2Int **************************************************/
|
||||
/* SetUpForFloat2Int */
|
||||
CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset,
|
||||
LIST LabeledClassList) {
|
||||
MERGE_CLASS MergeClass;
|
||||
|
@ -23,9 +23,9 @@
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
@ -79,9 +79,9 @@ const int kMaxShapeLabelLength = 10;
|
||||
|
||||
DECLARE_STRING_PARAM_FLAG(test_ch);
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Function Prototypes
|
||||
----------------------------------------------------------------------------**/
|
||||
----------------------------------------------------------------------------*/
|
||||
int main (
|
||||
int argc,
|
||||
char **argv);
|
||||
@ -208,41 +208,37 @@ static void SetupConfigMap(ShapeTable* shape_table, IndexMapBiDi* config_map) {
|
||||
config_map->CompleteMerges();
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* This program reads in a text file consisting of feature
|
||||
* samples from a training page in the following format:
|
||||
* @verbatim
|
||||
FontName UTF8-char-str xmin ymin xmax ymax page-number
|
||||
NumberOfFeatureTypes(N)
|
||||
FeatureTypeName1 NumberOfFeatures(M)
|
||||
Feature1
|
||||
...
|
||||
FeatureM
|
||||
FeatureTypeName2 NumberOfFeatures(M)
|
||||
Feature1
|
||||
...
|
||||
FeatureM
|
||||
...
|
||||
FeatureTypeNameN NumberOfFeatures(M)
|
||||
Feature1
|
||||
...
|
||||
FeatureM
|
||||
FontName CharName ...
|
||||
@endverbatim
|
||||
* The result of this program is a binary inttemp file used by
|
||||
* the OCR engine.
|
||||
* @param argc number of command line arguments
|
||||
* @param argv array of command line arguments
|
||||
* @return none
|
||||
* @note Exceptions: none
|
||||
* @note History: Fri Aug 18 08:56:17 1989, DSJ, Created.
|
||||
* @note History: Mon May 18 1998, Christy Russson, Revistion started.
|
||||
*/
|
||||
int main (int argc, char **argv) {
|
||||
/*
|
||||
** Parameters:
|
||||
** argc number of command line arguments
|
||||
** argv array of command line arguments
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** This program reads in a text file consisting of feature
|
||||
** samples from a training page in the following format:
|
||||
**
|
||||
** FontName UTF8-char-str xmin ymin xmax ymax page-number
|
||||
** NumberOfFeatureTypes(N)
|
||||
** FeatureTypeName1 NumberOfFeatures(M)
|
||||
** Feature1
|
||||
** ...
|
||||
** FeatureM
|
||||
** FeatureTypeName2 NumberOfFeatures(M)
|
||||
** Feature1
|
||||
** ...
|
||||
** FeatureM
|
||||
** ...
|
||||
** FeatureTypeNameN NumberOfFeatures(M)
|
||||
** Feature1
|
||||
** ...
|
||||
** FeatureM
|
||||
** FontName CharName ...
|
||||
**
|
||||
** The result of this program is a binary inttemp file used by
|
||||
** the OCR engine.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Aug 18 08:56:17 1989, DSJ, Created.
|
||||
** Mon May 18 1998, Christy Russson, Revistion started.
|
||||
*/
|
||||
ParseArguments(&argc, &argv);
|
||||
|
||||
ShapeTable* shape_table = NULL;
|
||||
|
@ -171,8 +171,10 @@ void LanguageModel::InitForWord(const WERD_CHOICE *prev_word,
|
||||
}
|
||||
}
|
||||
|
||||
// Helper scans the collection of predecessors for competing siblings that
|
||||
// have the same letter with the opposite case, setting competing_vse.
|
||||
/**
|
||||
* Helper scans the collection of predecessors for competing siblings that
|
||||
* have the same letter with the opposite case, setting competing_vse.
|
||||
*/
|
||||
static void ScanParentsForCaseMix(const UNICHARSET& unicharset,
|
||||
LanguageModelState* parent_node) {
|
||||
if (parent_node == NULL) return;
|
||||
@ -200,8 +202,10 @@ static void ScanParentsForCaseMix(const UNICHARSET& unicharset,
|
||||
}
|
||||
}
|
||||
|
||||
// Helper returns true if the given choice has a better case variant before
|
||||
// it in the choice_list that is not distinguishable by size.
|
||||
/**
|
||||
* Helper returns true if the given choice has a better case variant before
|
||||
* it in the choice_list that is not distinguishable by size.
|
||||
*/
|
||||
static bool HasBetterCaseVariant(const UNICHARSET& unicharset,
|
||||
const BLOB_CHOICE* choice,
|
||||
BLOB_CHOICE_LIST* choices) {
|
||||
@ -222,27 +226,32 @@ static bool HasBetterCaseVariant(const UNICHARSET& unicharset,
|
||||
return false; // Should never happen, but just in case.
|
||||
}
|
||||
|
||||
// UpdateState has the job of combining the ViterbiStateEntry lists on each
|
||||
// of the choices on parent_list with each of the blob choices in curr_list,
|
||||
// making a new ViterbiStateEntry for each sensible path.
|
||||
// This could be a huge set of combinations, creating a lot of work only to
|
||||
// be truncated by some beam limit, but only certain kinds of paths will
|
||||
// continue at the next step:
|
||||
// paths that are liked by the language model: either a DAWG or the n-gram
|
||||
// model, where active.
|
||||
// paths that represent some kind of top choice. The old permuter permuted
|
||||
// the top raw classifier score, the top upper case word and the top lower-
|
||||
// case word. UpdateState now concentrates its top-choice paths on top
|
||||
// lower-case, top upper-case (or caseless alpha), and top digit sequence,
|
||||
// with allowance for continuation of these paths through blobs where such
|
||||
// a character does not appear in the choices list.
|
||||
// GetNextParentVSE enforces some of these models to minimize the number of
|
||||
// calls to AddViterbiStateEntry, even prior to looking at the language model.
|
||||
// Thus an n-blob sequence of [l1I] will produce 3n calls to
|
||||
// AddViterbiStateEntry instead of 3^n.
|
||||
// Of course it isn't quite that simple as Title Case is handled by allowing
|
||||
// lower case to continue an upper case initial, but it has to be detected
|
||||
// in the combiner so it knows which upper case letters are initial alphas.
|
||||
/**
|
||||
* UpdateState has the job of combining the ViterbiStateEntry lists on each
|
||||
* of the choices on parent_list with each of the blob choices in curr_list,
|
||||
* making a new ViterbiStateEntry for each sensible path.
|
||||
*
|
||||
* This could be a huge set of combinations, creating a lot of work only to
|
||||
* be truncated by some beam limit, but only certain kinds of paths will
|
||||
* continue at the next step:
|
||||
* - paths that are liked by the language model: either a DAWG or the n-gram
|
||||
* model, where active.
|
||||
* - paths that represent some kind of top choice. The old permuter permuted
|
||||
* the top raw classifier score, the top upper case word and the top lower-
|
||||
* case word. UpdateState now concentrates its top-choice paths on top
|
||||
* lower-case, top upper-case (or caseless alpha), and top digit sequence,
|
||||
* with allowance for continuation of these paths through blobs where such
|
||||
* a character does not appear in the choices list.
|
||||
*
|
||||
* GetNextParentVSE enforces some of these models to minimize the number of
|
||||
* calls to AddViterbiStateEntry, even prior to looking at the language model.
|
||||
* Thus an n-blob sequence of [l1I] will produce 3n calls to
|
||||
* AddViterbiStateEntry instead of 3^n.
|
||||
*
|
||||
* Of course it isn't quite that simple as Title Case is handled by allowing
|
||||
* lower case to continue an upper case initial, but it has to be detected
|
||||
* in the combiner so it knows which upper case letters are initial alphas.
|
||||
*/
|
||||
bool LanguageModel::UpdateState(
|
||||
bool just_classified,
|
||||
int curr_col, int curr_row,
|
||||
@ -367,10 +376,12 @@ bool LanguageModel::UpdateState(
|
||||
return new_changed;
|
||||
}
|
||||
|
||||
// Finds the first lower and upper case letter and first digit in curr_list.
|
||||
// For non-upper/lower languages, alpha counts as upper.
|
||||
// Uses the first character in the list in place of empty results.
|
||||
// Returns true if both alpha and digits are found.
|
||||
/**
|
||||
* Finds the first lower and upper case letter and first digit in curr_list.
|
||||
* For non-upper/lower languages, alpha counts as upper.
|
||||
* Uses the first character in the list in place of empty results.
|
||||
* Returns true if both alpha and digits are found.
|
||||
*/
|
||||
bool LanguageModel::GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list,
|
||||
BLOB_CHOICE **first_lower,
|
||||
BLOB_CHOICE **first_upper,
|
||||
@ -402,13 +413,15 @@ bool LanguageModel::GetTopLowerUpperDigit(BLOB_CHOICE_LIST *curr_list,
|
||||
return mixed;
|
||||
}
|
||||
|
||||
// Forces there to be at least one entry in the overall set of the
|
||||
// viterbi_state_entries of each element of parent_node that has the
|
||||
// top_choice_flag set for lower, upper and digit using the same rules as
|
||||
// GetTopLowerUpperDigit, setting the flag on the first found suitable
|
||||
// candidate, whether or not the flag is set on some other parent.
|
||||
// Returns 1 if both alpha and digits are found among the parents, -1 if no
|
||||
// parents are found at all (a legitimate case), and 0 otherwise.
|
||||
/**
|
||||
* Forces there to be at least one entry in the overall set of the
|
||||
* viterbi_state_entries of each element of parent_node that has the
|
||||
* top_choice_flag set for lower, upper and digit using the same rules as
|
||||
* GetTopLowerUpperDigit, setting the flag on the first found suitable
|
||||
* candidate, whether or not the flag is set on some other parent.
|
||||
* Returns 1 if both alpha and digits are found among the parents, -1 if no
|
||||
* parents are found at all (a legitimate case), and 0 otherwise.
|
||||
*/
|
||||
int LanguageModel::SetTopParentLowerUpperDigit(
|
||||
LanguageModelState *parent_node) const {
|
||||
if (parent_node == NULL) return -1;
|
||||
@ -481,9 +494,11 @@ int LanguageModel::SetTopParentLowerUpperDigit(
|
||||
return mixed ? 1 : 0;
|
||||
}
|
||||
|
||||
// Finds the next ViterbiStateEntry with which the given unichar_id can
|
||||
// combine sensibly, taking into account any mixed alnum/mixed case
|
||||
// situation, and whether this combination has been inspected before.
|
||||
/**
|
||||
* Finds the next ViterbiStateEntry with which the given unichar_id can
|
||||
* combine sensibly, taking into account any mixed alnum/mixed case
|
||||
* situation, and whether this combination has been inspected before.
|
||||
*/
|
||||
ViterbiStateEntry* LanguageModel::GetNextParentVSE(
|
||||
bool just_classified, bool mixed_alnum, const BLOB_CHOICE* bc,
|
||||
LanguageModelFlagsType blob_choice_flags, const UNICHARSET& unicharset,
|
||||
|
@ -202,8 +202,10 @@ bool LMPainPoints::GeneratePainPoint(
|
||||
}
|
||||
}
|
||||
|
||||
// Adjusts the pain point coordinates to cope with expansion of the ratings
|
||||
// matrix due to a split of the blob with the given index.
|
||||
/**
|
||||
* Adjusts the pain point coordinates to cope with expansion of the ratings
|
||||
* matrix due to a split of the blob with the given index.
|
||||
*/
|
||||
void LMPainPoints::RemapForSplit(int index) {
|
||||
for (int i = 0; i < LM_PPTYPE_NUM; ++i) {
|
||||
GenericVector<MatrixCoordPair>* heap = pain_points_heaps_[i].heap();
|
||||
|
@ -59,7 +59,7 @@ void ViterbiStateEntry::Print(const char *msg) const {
|
||||
tprintf("\n");
|
||||
}
|
||||
|
||||
// Clears the viterbi search state back to its initial conditions.
|
||||
/// Clears the viterbi search state back to its initial conditions.
|
||||
void LanguageModelState::Clear() {
|
||||
viterbi_state_entries.clear();
|
||||
viterbi_state_entries_prunable_length = 0;
|
||||
|
@ -33,28 +33,31 @@
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Used for expressing various language model flags.
|
||||
/// Used for expressing various language model flags.
|
||||
typedef unsigned char LanguageModelFlagsType;
|
||||
|
||||
// The following structs are used for storing the state of the language model
|
||||
// in the segmentation search graph. In this graph the nodes are BLOB_CHOICEs
|
||||
// and the links are the relationships between the underlying blobs (see
|
||||
// segsearch.h for a more detailed description).
|
||||
// Each of the BLOB_CHOICEs contains LanguageModelState struct, which has
|
||||
// a list of N best paths (list of ViterbiStateEntry) explored by the Viterbi
|
||||
// search leading up to and including this BLOB_CHOICE.
|
||||
// Each ViterbiStateEntry contains information from various components of the
|
||||
// language model: dawgs in which the path is found, character ngram model
|
||||
// probability of the path, script/chartype/font consistency info, state for
|
||||
// language-specific heuristics (e.g. hyphenated and compound words, lower/upper
|
||||
// case preferences, etc).
|
||||
// Each ViterbiStateEntry also contains the parent pointer, so that the path
|
||||
// that it represents (WERD_CHOICE) can be constructed by following these
|
||||
// parent pointers.
|
||||
/// The following structs are used for storing the state of the language model
|
||||
/// in the segmentation search graph. In this graph the nodes are BLOB_CHOICEs
|
||||
/// and the links are the relationships between the underlying blobs (see
|
||||
/// segsearch.h for a more detailed description).
|
||||
///
|
||||
/// Each of the BLOB_CHOICEs contains LanguageModelState struct, which has
|
||||
/// a list of N best paths (list of ViterbiStateEntry) explored by the Viterbi
|
||||
/// search leading up to and including this BLOB_CHOICE.
|
||||
///
|
||||
/// Each ViterbiStateEntry contains information from various components of the
|
||||
/// language model: dawgs in which the path is found, character ngram model
|
||||
/// probability of the path, script/chartype/font consistency info, state for
|
||||
/// language-specific heuristics (e.g. hyphenated and compound words, lower/upper
|
||||
/// case preferences, etc).
|
||||
///
|
||||
/// Each ViterbiStateEntry also contains the parent pointer, so that the path
|
||||
/// that it represents (WERD_CHOICE) can be constructed by following these
|
||||
/// parent pointers.
|
||||
|
||||
// Struct for storing additional information used by Dawg language model
|
||||
// component. It stores the set of active dawgs in which the sequence of
|
||||
// letters on a path can be found.
|
||||
/// Struct for storing additional information used by Dawg language model
|
||||
/// component. It stores the set of active dawgs in which the sequence of
|
||||
/// letters on a path can be found.
|
||||
struct LanguageModelDawgInfo {
|
||||
LanguageModelDawgInfo(DawgPositionVector *a, PermuterType pt) : permuter(pt) {
|
||||
active_dawgs = new DawgPositionVector(*a);
|
||||
@ -66,29 +69,29 @@ struct LanguageModelDawgInfo {
|
||||
PermuterType permuter;
|
||||
};
|
||||
|
||||
// Struct for storing additional information used by Ngram language model
|
||||
// component.
|
||||
/// Struct for storing additional information used by Ngram language model
|
||||
/// component.
|
||||
struct LanguageModelNgramInfo {
|
||||
LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc)
|
||||
: context(c), context_unichar_step_len(l), pruned(p), ngram_cost(nc),
|
||||
ngram_and_classifier_cost(ncc) {}
|
||||
STRING context; // context string
|
||||
// Length of the context measured by advancing using UNICHAR::utf8_step()
|
||||
// (should be at most the order of the character ngram model used).
|
||||
STRING context; //< context string
|
||||
/// Length of the context measured by advancing using UNICHAR::utf8_step()
|
||||
/// (should be at most the order of the character ngram model used).
|
||||
int context_unichar_step_len;
|
||||
// The paths with pruned set are pruned out from the perspective of the
|
||||
// character ngram model. They are explored further because they represent
|
||||
// a dictionary match or a top choice. Thus ngram_info is still computed
|
||||
// for them in order to calculate the combined cost.
|
||||
/// The paths with pruned set are pruned out from the perspective of the
|
||||
/// character ngram model. They are explored further because they represent
|
||||
/// a dictionary match or a top choice. Thus ngram_info is still computed
|
||||
/// for them in order to calculate the combined cost.
|
||||
bool pruned;
|
||||
// -ln(P_ngram_model(path))
|
||||
/// -ln(P_ngram_model(path))
|
||||
float ngram_cost;
|
||||
// -[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ]
|
||||
/// -[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ]
|
||||
float ngram_and_classifier_cost;
|
||||
};
|
||||
|
||||
// Struct for storing the information about a path in the segmentation graph
|
||||
// explored by Viterbi search.
|
||||
/// Struct for storing the information about a path in the segmentation graph
|
||||
/// explored by Viterbi search.
|
||||
struct ViterbiStateEntry : public ELIST_LINK {
|
||||
ViterbiStateEntry(ViterbiStateEntry *pe,
|
||||
BLOB_CHOICE *b, float c, float ol,
|
||||
@ -122,8 +125,8 @@ struct ViterbiStateEntry : public ELIST_LINK {
|
||||
delete ngram_info;
|
||||
delete debug_str;
|
||||
}
|
||||
// Comparator function for sorting ViterbiStateEntry_LISTs in
|
||||
// non-increasing order of costs.
|
||||
/// Comparator function for sorting ViterbiStateEntry_LISTs in
|
||||
/// non-increasing order of costs.
|
||||
static int Compare(const void *e1, const void *e2) {
|
||||
const ViterbiStateEntry *ve1 =
|
||||
*reinterpret_cast<const ViterbiStateEntry * const *>(e1);
|
||||
@ -137,8 +140,8 @@ struct ViterbiStateEntry : public ELIST_LINK {
|
||||
}
|
||||
return consistency_info.Consistent();
|
||||
}
|
||||
// Returns true if this VSE has an alphanumeric character as its classifier
|
||||
// result.
|
||||
/// Returns true if this VSE has an alphanumeric character as its classifier
|
||||
/// result.
|
||||
bool HasAlnumChoice(const UNICHARSET& unicharset) {
|
||||
if (curr_b == NULL) return false;
|
||||
UNICHAR_ID unichar_id = curr_b->unichar_id();
|
||||
@ -149,48 +152,48 @@ struct ViterbiStateEntry : public ELIST_LINK {
|
||||
}
|
||||
void Print(const char *msg) const;
|
||||
|
||||
// The cost is an adjusted ratings sum, that is adjusted by all the language
|
||||
// model components that use Viterbi search.
|
||||
/// The cost is an adjusted ratings sum, that is adjusted by all the language
|
||||
/// model components that use Viterbi search.
|
||||
float cost;
|
||||
|
||||
// Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this).
|
||||
/// Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this).
|
||||
BLOB_CHOICE *curr_b;
|
||||
ViterbiStateEntry *parent_vse;
|
||||
// Pointer to a case-competing ViterbiStateEntry in the same list that
|
||||
// represents a path ending in the same letter of the opposite case.
|
||||
/// Pointer to a case-competing ViterbiStateEntry in the same list that
|
||||
/// represents a path ending in the same letter of the opposite case.
|
||||
ViterbiStateEntry *competing_vse;
|
||||
|
||||
// Various information about the characters on the path represented
|
||||
// by this ViterbiStateEntry.
|
||||
float ratings_sum; // sum of ratings of character on the path
|
||||
float min_certainty; // minimum certainty on the path
|
||||
int adapted; // number of BLOB_CHOICES from adapted templates
|
||||
int length; // number of characters on the path
|
||||
float outline_length; // length of the outline so far
|
||||
LMConsistencyInfo consistency_info; // path consistency info
|
||||
AssociateStats associate_stats; // character widths/gaps/seams
|
||||
/// Various information about the characters on the path represented
|
||||
/// by this ViterbiStateEntry.
|
||||
float ratings_sum; //< sum of ratings of character on the path
|
||||
float min_certainty; //< minimum certainty on the path
|
||||
int adapted; //< number of BLOB_CHOICES from adapted templates
|
||||
int length; //< number of characters on the path
|
||||
float outline_length; //< length of the outline so far
|
||||
LMConsistencyInfo consistency_info; //< path consistency info
|
||||
AssociateStats associate_stats; //< character widths/gaps/seams
|
||||
|
||||
// Flags for marking the entry as a top choice path with
|
||||
// the smallest rating or lower/upper case letters).
|
||||
/// Flags for marking the entry as a top choice path with
|
||||
/// the smallest rating or lower/upper case letters).
|
||||
LanguageModelFlagsType top_choice_flags;
|
||||
|
||||
// Extra information maintained by Dawg laguage model component
|
||||
// (owned by ViterbiStateEntry).
|
||||
/// Extra information maintained by Dawg laguage model component
|
||||
/// (owned by ViterbiStateEntry).
|
||||
LanguageModelDawgInfo *dawg_info;
|
||||
|
||||
// Extra information maintained by Ngram laguage model component
|
||||
// (owned by ViterbiStateEntry).
|
||||
/// Extra information maintained by Ngram laguage model component
|
||||
/// (owned by ViterbiStateEntry).
|
||||
LanguageModelNgramInfo *ngram_info;
|
||||
|
||||
bool updated; // set to true if the entry has just been created/updated
|
||||
// UTF8 string representing the path corresponding to this vse.
|
||||
// Populated only in when language_model_debug_level > 0.
|
||||
bool updated; //< set to true if the entry has just been created/updated
|
||||
/// UTF8 string representing the path corresponding to this vse.
|
||||
/// Populated only in when language_model_debug_level > 0.
|
||||
STRING *debug_str;
|
||||
};
|
||||
|
||||
ELISTIZEH(ViterbiStateEntry);
|
||||
|
||||
// Struct to store information maintained by various language model components.
|
||||
/// Struct to store information maintained by various language model components.
|
||||
struct LanguageModelState {
|
||||
LanguageModelState() :
|
||||
viterbi_state_entries_prunable_length(0),
|
||||
@ -198,21 +201,21 @@ struct LanguageModelState {
|
||||
viterbi_state_entries_length(0) {}
|
||||
~LanguageModelState() {}
|
||||
|
||||
// Clears the viterbi search state back to its initial conditions.
|
||||
/// Clears the viterbi search state back to its initial conditions.
|
||||
void Clear();
|
||||
|
||||
void Print(const char *msg);
|
||||
|
||||
// Storage for the Viterbi state.
|
||||
/// Storage for the Viterbi state.
|
||||
ViterbiStateEntry_LIST viterbi_state_entries;
|
||||
// Number and max cost of prunable paths in viterbi_state_entries.
|
||||
/// Number and max cost of prunable paths in viterbi_state_entries.
|
||||
int viterbi_state_entries_prunable_length;
|
||||
float viterbi_state_entries_prunable_max_cost;
|
||||
// Total number of entries in viterbi_state_entries.
|
||||
/// Total number of entries in viterbi_state_entries.
|
||||
int viterbi_state_entries_length;
|
||||
};
|
||||
|
||||
// Bundle together all the things pertaining to the best choice/state.
|
||||
/// Bundle together all the things pertaining to the best choice/state.
|
||||
struct BestChoiceBundle {
|
||||
explicit BestChoiceBundle(int matrix_dimension)
|
||||
: updated(false), best_vse(NULL) {
|
||||
@ -222,15 +225,15 @@ struct BestChoiceBundle {
|
||||
}
|
||||
~BestChoiceBundle() {}
|
||||
|
||||
// Flag to indicate whether anything was changed.
|
||||
/// Flag to indicate whether anything was changed.
|
||||
bool updated;
|
||||
// Places to try to fix the word suggested by ambiguity checking.
|
||||
/// Places to try to fix the word suggested by ambiguity checking.
|
||||
DANGERR fixpt;
|
||||
// The beam. One LanguageModelState containing a list of ViterbiStateEntry per
|
||||
// row in the ratings matrix containing all VSEs whose BLOB_CHOICE is
|
||||
// somewhere in the corresponding row.
|
||||
/// The beam. One LanguageModelState containing a list of ViterbiStateEntry
|
||||
/// per row in the ratings matrix containing all VSEs whose BLOB_CHOICE is
|
||||
/// somewhere in the corresponding row.
|
||||
PointerVector<LanguageModelState> beam;
|
||||
// Best ViterbiStateEntry and BLOB_CHOICE.
|
||||
/// Best ViterbiStateEntry and BLOB_CHOICE.
|
||||
ViterbiStateEntry *best_vse;
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user