mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-07 01:42:41 +08:00
Fixed issue 1252: Refactored LearnBlob and its call hierarchy to make it a member of Classify.
Eliminated the flexfx scheme for calling global feature extractor functions through an array of function pointers. Deleted dead code I found as a by-product. This CL does not change BlobToTrainingSample or ExtractFeatures to be full members of Classify (the eventual goal) as that would make it even bigger, since there are a lot of callers to these functions. When ExtractFeatures and BlobToTrainingSample are members of Classify they will be able to access control parameters in Classify, which will greatly simplify developing variations to the feature extraction process.
This commit is contained in:
parent
e735a9017b
commit
53fc4456cc
@ -51,6 +51,7 @@
|
|||||||
#include "allheaders.h"
|
#include "allheaders.h"
|
||||||
|
|
||||||
#include "baseapi.h"
|
#include "baseapi.h"
|
||||||
|
#include "blobclass.h"
|
||||||
#include "resultiterator.h"
|
#include "resultiterator.h"
|
||||||
#include "mutableiterator.h"
|
#include "mutableiterator.h"
|
||||||
#include "thresholder.h"
|
#include "thresholder.h"
|
||||||
@ -870,7 +871,9 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
|
|||||||
page_res_ = NULL;
|
page_res_ = NULL;
|
||||||
return -1;
|
return -1;
|
||||||
} else if (tesseract_->tessedit_train_from_boxes) {
|
} else if (tesseract_->tessedit_train_from_boxes) {
|
||||||
tesseract_->ApplyBoxTraining(*output_file_, page_res_);
|
STRING fontname;
|
||||||
|
ExtractFontName(*output_file_, &fontname);
|
||||||
|
tesseract_->ApplyBoxTraining(fontname, page_res_);
|
||||||
} else if (tesseract_->tessedit_ambigs_training) {
|
} else if (tesseract_->tessedit_ambigs_training) {
|
||||||
FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
|
FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
|
||||||
// OCR the page segmented into words by tesseract.
|
// OCR the page segmented into words by tesseract.
|
||||||
@ -1051,6 +1054,23 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Master ProcessPages calls ProcessPagesInternal and then does any post-
|
||||||
|
// processing required due to being in a training mode.
|
||||||
|
bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
|
||||||
|
int timeout_millisec,
|
||||||
|
TessResultRenderer* renderer) {
|
||||||
|
bool result =
|
||||||
|
ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
|
||||||
|
if (result) {
|
||||||
|
if (tesseract_->tessedit_train_from_boxes &&
|
||||||
|
!tesseract_->WriteTRFile(*output_file_)) {
|
||||||
|
tprintf("Write of TR file failed: %s\n", output_file_->string());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
// In the ideal scenario, Tesseract will start working on data as soon
|
// In the ideal scenario, Tesseract will start working on data as soon
|
||||||
// as it can. For example, if you steam a filelist through stdin, we
|
// as it can. For example, if you steam a filelist through stdin, we
|
||||||
// should start the OCR process as soon as the first filename is
|
// should start the OCR process as soon as the first filename is
|
||||||
@ -1063,9 +1083,10 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
|
|||||||
// identify the scenario that really matters: filelists on
|
// identify the scenario that really matters: filelists on
|
||||||
// stdin. We'll still do our best if the user likes pipes. That means
|
// stdin. We'll still do our best if the user likes pipes. That means
|
||||||
// piling up any data coming into stdin into a memory buffer.
|
// piling up any data coming into stdin into a memory buffer.
|
||||||
bool TessBaseAPI::ProcessPages(const char* filename,
|
bool TessBaseAPI::ProcessPagesInternal(const char* filename,
|
||||||
const char* retry_config, int timeout_millisec,
|
const char* retry_config,
|
||||||
TessResultRenderer* renderer) {
|
int timeout_millisec,
|
||||||
|
TessResultRenderer* renderer) {
|
||||||
PERF_COUNT_START("ProcessPages")
|
PERF_COUNT_START("ProcessPages")
|
||||||
bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
|
bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
|
||||||
if (stdInput) {
|
if (stdInput) {
|
||||||
|
@ -538,9 +538,11 @@ class TESS_API TessBaseAPI {
|
|||||||
*
|
*
|
||||||
* Returns true if successful, false on error.
|
* Returns true if successful, false on error.
|
||||||
*/
|
*/
|
||||||
bool ProcessPages(const char* filename,
|
bool ProcessPages(const char* filename, const char* retry_config,
|
||||||
const char* retry_config, int timeout_millisec,
|
int timeout_millisec, TessResultRenderer* renderer);
|
||||||
TessResultRenderer* renderer);
|
// Does the real work of ProcessPages.
|
||||||
|
bool ProcessPagesInternal(const char* filename, const char* retry_config,
|
||||||
|
int timeout_millisec, TessResultRenderer* renderer);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Turn a single image into symbolic text.
|
* Turn a single image into symbolic text.
|
||||||
|
@ -775,13 +775,13 @@ void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Calls LearnWord to extract features for labelled blobs within each word.
|
// Calls LearnWord to extract features for labelled blobs within each word.
|
||||||
// Features are written to the given filename.
|
// Features are stored in an internal buffer.
|
||||||
void Tesseract::ApplyBoxTraining(const STRING& filename, PAGE_RES* page_res) {
|
void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) {
|
||||||
PAGE_RES_IT pr_it(page_res);
|
PAGE_RES_IT pr_it(page_res);
|
||||||
int word_count = 0;
|
int word_count = 0;
|
||||||
for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
|
for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
|
||||||
word_res = pr_it.forward()) {
|
word_res = pr_it.forward()) {
|
||||||
LearnWord(filename.string(), word_res);
|
LearnWord(fontname.string(), word_res);
|
||||||
++word_count;
|
++word_count;
|
||||||
}
|
}
|
||||||
tprintf("Generated training data for %d words\n", word_count);
|
tprintf("Generated training data for %d words\n", word_count);
|
||||||
|
@ -220,17 +220,15 @@ void Classify::RefreshDebugWindow(ScrollView **win, const char *msg,
|
|||||||
|
|
||||||
// Learns the given word using its chopped_word, seam_array, denorm,
|
// Learns the given word using its chopped_word, seam_array, denorm,
|
||||||
// box_word, best_state, and correct_text to learn both correctly and
|
// box_word, best_state, and correct_text to learn both correctly and
|
||||||
// incorrectly segmented blobs. If filename is not NULL, then LearnBlob
|
// incorrectly segmented blobs. If fontname is not NULL, then LearnBlob
|
||||||
// is called and the data will be written to a file for static training.
|
// is called and the data will be saved in an internal buffer.
|
||||||
// Otherwise AdaptToBlob is called for adaption within a document.
|
// Otherwise AdaptToBlob is called for adaption within a document.
|
||||||
// If rejmap is not NULL, then only chars with a rejmap entry of '1' will
|
void Classify::LearnWord(const char* fontname, WERD_RES* word) {
|
||||||
// be learned, otherwise all chars with good correct_text are learned.
|
|
||||||
void Classify::LearnWord(const char* filename, WERD_RES *word) {
|
|
||||||
int word_len = word->correct_text.size();
|
int word_len = word->correct_text.size();
|
||||||
if (word_len == 0) return;
|
if (word_len == 0) return;
|
||||||
|
|
||||||
float* thresholds = NULL;
|
float* thresholds = NULL;
|
||||||
if (filename == NULL) {
|
if (fontname == NULL) {
|
||||||
// Adaption mode.
|
// Adaption mode.
|
||||||
if (!EnableLearning || word->best_choice == NULL)
|
if (!EnableLearning || word->best_choice == NULL)
|
||||||
return; // Can't or won't adapt.
|
return; // Can't or won't adapt.
|
||||||
@ -267,8 +265,8 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
|
|||||||
if (word->correct_text[ch].length() > 0) {
|
if (word->correct_text[ch].length() > 0) {
|
||||||
float threshold = thresholds != NULL ? thresholds[ch] : 0.0f;
|
float threshold = thresholds != NULL ? thresholds[ch] : 0.0f;
|
||||||
|
|
||||||
LearnPieces(filename, start_blob, word->best_state[ch],
|
LearnPieces(fontname, start_blob, word->best_state[ch], threshold,
|
||||||
threshold, CST_WHOLE, word->correct_text[ch].string(), word);
|
CST_WHOLE, word->correct_text[ch].string(), word);
|
||||||
|
|
||||||
if (word->best_state[ch] > 1 && !disable_character_fragments) {
|
if (word->best_state[ch] > 1 && !disable_character_fragments) {
|
||||||
// Check that the character breaks into meaningful fragments
|
// Check that the character breaks into meaningful fragments
|
||||||
@ -301,8 +299,8 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
|
|||||||
if (i != tokens.size() - 1)
|
if (i != tokens.size() - 1)
|
||||||
full_string += ' ';
|
full_string += ' ';
|
||||||
}
|
}
|
||||||
LearnPieces(filename, start_blob + frag, 1,
|
LearnPieces(fontname, start_blob + frag, 1, threshold,
|
||||||
threshold, CST_FRAGMENT, full_string.string(), word);
|
CST_FRAGMENT, full_string.string(), word);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -314,13 +312,13 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
|
|||||||
if (word->best_state[ch] > 1) {
|
if (word->best_state[ch] > 1) {
|
||||||
// If the next blob is good, make junk with the rightmost fragment.
|
// If the next blob is good, make junk with the rightmost fragment.
|
||||||
if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
|
if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
|
||||||
LearnPieces(filename, start_blob + word->best_state[ch] - 1,
|
LearnPieces(fontname, start_blob + word->best_state[ch] - 1,
|
||||||
word->best_state[ch + 1] + 1,
|
word->best_state[ch + 1] + 1,
|
||||||
threshold, CST_IMPROPER, INVALID_UNICHAR, word);
|
threshold, CST_IMPROPER, INVALID_UNICHAR, word);
|
||||||
}
|
}
|
||||||
// If the previous blob is good, make junk with the leftmost fragment.
|
// If the previous blob is good, make junk with the leftmost fragment.
|
||||||
if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
|
if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
|
||||||
LearnPieces(filename, start_blob - word->best_state[ch - 1],
|
LearnPieces(fontname, start_blob - word->best_state[ch - 1],
|
||||||
word->best_state[ch - 1] + 1,
|
word->best_state[ch - 1] + 1,
|
||||||
threshold, CST_IMPROPER, INVALID_UNICHAR, word);
|
threshold, CST_IMPROPER, INVALID_UNICHAR, word);
|
||||||
}
|
}
|
||||||
@ -329,7 +327,7 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
|
|||||||
if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
|
if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
|
||||||
STRING joined_text = word->correct_text[ch];
|
STRING joined_text = word->correct_text[ch];
|
||||||
joined_text += word->correct_text[ch + 1];
|
joined_text += word->correct_text[ch + 1];
|
||||||
LearnPieces(filename, start_blob,
|
LearnPieces(fontname, start_blob,
|
||||||
word->best_state[ch] + word->best_state[ch + 1],
|
word->best_state[ch] + word->best_state[ch + 1],
|
||||||
threshold, CST_NGRAM, joined_text.string(), word);
|
threshold, CST_NGRAM, joined_text.string(), word);
|
||||||
}
|
}
|
||||||
@ -342,16 +340,16 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
|
|||||||
|
|
||||||
// Builds a blob of length fragments, from the word, starting at start,
|
// Builds a blob of length fragments, from the word, starting at start,
|
||||||
// and then learns it, as having the given correct_text.
|
// and then learns it, as having the given correct_text.
|
||||||
// If filename is not NULL, then LearnBlob
|
// If fontname is not NULL, then LearnBlob is called and the data will be
|
||||||
// is called and the data will be written to a file for static training.
|
// saved in an internal buffer for static training.
|
||||||
// Otherwise AdaptToBlob is called for adaption within a document.
|
// Otherwise AdaptToBlob is called for adaption within a document.
|
||||||
// threshold is a magic number required by AdaptToChar and generated by
|
// threshold is a magic number required by AdaptToChar and generated by
|
||||||
// ComputeAdaptionThresholds.
|
// ComputeAdaptionThresholds.
|
||||||
// Although it can be partly inferred from the string, segmentation is
|
// Although it can be partly inferred from the string, segmentation is
|
||||||
// provided to explicitly clarify the character segmentation.
|
// provided to explicitly clarify the character segmentation.
|
||||||
void Classify::LearnPieces(const char* filename, int start, int length,
|
void Classify::LearnPieces(const char* fontname, int start, int length,
|
||||||
float threshold, CharSegmentationType segmentation,
|
float threshold, CharSegmentationType segmentation,
|
||||||
const char* correct_text, WERD_RES *word) {
|
const char* correct_text, WERD_RES* word) {
|
||||||
// TODO(daria) Remove/modify this if/when we want
|
// TODO(daria) Remove/modify this if/when we want
|
||||||
// to train and/or adapt to n-grams.
|
// to train and/or adapt to n-grams.
|
||||||
if (segmentation != CST_WHOLE &&
|
if (segmentation != CST_WHOLE &&
|
||||||
@ -385,7 +383,7 @@ void Classify::LearnPieces(const char* filename, int start, int length,
|
|||||||
}
|
}
|
||||||
#endif // GRAPHICS_DISABLED
|
#endif // GRAPHICS_DISABLED
|
||||||
|
|
||||||
if (filename != NULL) {
|
if (fontname != NULL) {
|
||||||
classify_norm_method.set_value(character); // force char norm spc 30/11/93
|
classify_norm_method.set_value(character); // force char norm spc 30/11/93
|
||||||
tess_bn_matching.set_value(false); // turn it off
|
tess_bn_matching.set_value(false); // turn it off
|
||||||
tess_cn_matching.set_value(false);
|
tess_cn_matching.set_value(false);
|
||||||
@ -393,8 +391,7 @@ void Classify::LearnPieces(const char* filename, int start, int length,
|
|||||||
INT_FX_RESULT_STRUCT fx_info;
|
INT_FX_RESULT_STRUCT fx_info;
|
||||||
SetupBLCNDenorms(*rotated_blob, classify_nonlinear_norm,
|
SetupBLCNDenorms(*rotated_blob, classify_nonlinear_norm,
|
||||||
&bl_denorm, &cn_denorm, &fx_info);
|
&bl_denorm, &cn_denorm, &fx_info);
|
||||||
LearnBlob(feature_defs_, filename, rotated_blob, bl_denorm, cn_denorm,
|
LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
|
||||||
fx_info, correct_text);
|
|
||||||
} else if (unicharset.contains_unichar(correct_text)) {
|
} else if (unicharset.contains_unichar(correct_text)) {
|
||||||
UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
|
UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
|
||||||
int font_id = word->fontinfo != NULL
|
int font_id = word->fontinfo != NULL
|
||||||
|
@ -20,63 +20,32 @@
|
|||||||
Include Files and Type Defines
|
Include Files and Type Defines
|
||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
#include "blobclass.h"
|
#include "blobclass.h"
|
||||||
#include "extract.h"
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "classify.h"
|
||||||
#include "efio.h"
|
#include "efio.h"
|
||||||
#include "featdefs.h"
|
#include "featdefs.h"
|
||||||
#include "callcpp.h"
|
#include "mf.h"
|
||||||
|
#include "normfeat.h"
|
||||||
#include <math.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <signal.h>
|
|
||||||
|
|
||||||
#define MAXFILENAME 80
|
|
||||||
#define MAXMATCHES 10
|
|
||||||
|
|
||||||
static const char kUnknownFontName[] = "UnknownFont";
|
static const char kUnknownFontName[] = "UnknownFont";
|
||||||
|
|
||||||
STRING_VAR(classify_font_name, kUnknownFontName,
|
STRING_VAR(classify_font_name, kUnknownFontName,
|
||||||
"Default font name to be used in training");
|
"Default font name to be used in training");
|
||||||
|
|
||||||
/**----------------------------------------------------------------------------
|
namespace tesseract {
|
||||||
Global Data Definitions and Declarations
|
|
||||||
----------------------------------------------------------------------------**/
|
|
||||||
/* name of current image file being processed */
|
|
||||||
extern char imagefile[];
|
|
||||||
|
|
||||||
/**----------------------------------------------------------------------------
|
/**----------------------------------------------------------------------------
|
||||||
Public Code
|
Public Code
|
||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
|
// Finds the name of the training font and returns it in fontname, by cutting
|
||||||
/*---------------------------------------------------------------------------*/
|
// it out based on the expectation that the filename is of the form:
|
||||||
// As all TBLOBs, Blob is in baseline normalized coords.
|
// /path/to/dir/[lang].[fontname].exp[num]
|
||||||
// See SetupBLCNDenorms in intfx.cpp for other args.
|
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||||
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename,
|
// If the global parameter classify_font_name is set, its value is used instead.
|
||||||
TBLOB * Blob, const DENORM& bl_denorm, const DENORM& cn_denorm,
|
void ExtractFontName(const STRING& filename, STRING* fontname) {
|
||||||
const INT_FX_RESULT_STRUCT& fx_info, const char* BlobText) {
|
*fontname = classify_font_name;
|
||||||
/*
|
if (*fontname == kUnknownFontName) {
|
||||||
** Parameters:
|
|
||||||
** Blob blob whose micro-features are to be learned
|
|
||||||
** Row row of text that blob came from
|
|
||||||
** BlobText text that corresponds to blob
|
|
||||||
** TextLength number of characters in blob
|
|
||||||
** Globals:
|
|
||||||
** imagefile base filename of the page being learned
|
|
||||||
** classify_font_name
|
|
||||||
** name of font currently being trained on
|
|
||||||
** Operation:
|
|
||||||
** Extract micro-features from the specified blob and append
|
|
||||||
** them to the appropriate file.
|
|
||||||
** Return: none
|
|
||||||
** Exceptions: none
|
|
||||||
** History: 7/28/89, DSJ, Created.
|
|
||||||
*/
|
|
||||||
#define TRAIN_SUFFIX ".tr"
|
|
||||||
static FILE *FeatureFile = NULL;
|
|
||||||
STRING Filename(filename);
|
|
||||||
|
|
||||||
// If no fontname was set, try to extract it from the filename
|
|
||||||
STRING CurrFontName = classify_font_name;
|
|
||||||
if (CurrFontName == kUnknownFontName) {
|
|
||||||
// filename is expected to be of the form [lang].[fontname].exp[num]
|
// filename is expected to be of the form [lang].[fontname].exp[num]
|
||||||
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||||
const char *basename = strrchr(filename.string(), '/');
|
const char *basename = strrchr(filename.string(), '/');
|
||||||
@ -84,47 +53,56 @@ void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename,
|
|||||||
const char *lastdot = strrchr(filename.string(), '.');
|
const char *lastdot = strrchr(filename.string(), '.');
|
||||||
if (firstdot != lastdot && firstdot != NULL && lastdot != NULL) {
|
if (firstdot != lastdot && firstdot != NULL && lastdot != NULL) {
|
||||||
++firstdot;
|
++firstdot;
|
||||||
CurrFontName = firstdot;
|
*fontname = firstdot;
|
||||||
CurrFontName[lastdot - firstdot] = '\0';
|
fontname->truncate_at(lastdot - firstdot);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// if a feature file is not yet open, open it
|
/*---------------------------------------------------------------------------*/
|
||||||
// the name of the file is the name of the image plus TRAIN_SUFFIX
|
// Extracts features from the given blob and saves them in the tr_file_data_
|
||||||
if (FeatureFile == NULL) {
|
// member variable.
|
||||||
Filename += TRAIN_SUFFIX;
|
// fontname: Name of font that this blob was printed in.
|
||||||
FeatureFile = Efopen(Filename.string(), "wb");
|
// cn_denorm: Character normalization transformation to apply to the blob.
|
||||||
cprintf("TRAINING ... Font name = %s\n", CurrFontName.string());
|
// fx_info: Character normalization parameters computed with cn_denorm.
|
||||||
}
|
// blob_text: Ground truth text for the blob.
|
||||||
|
void Classify::LearnBlob(const STRING& fontname, TBLOB* blob,
|
||||||
|
const DENORM& cn_denorm,
|
||||||
|
const INT_FX_RESULT_STRUCT& fx_info,
|
||||||
|
const char* blob_text) {
|
||||||
|
CHAR_DESC CharDesc = NewCharDescription(feature_defs_);
|
||||||
|
CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm);
|
||||||
|
CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info);
|
||||||
|
CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info);
|
||||||
|
CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info);
|
||||||
|
|
||||||
LearnBlob(FeatureDefs, FeatureFile, Blob, bl_denorm, cn_denorm, fx_info,
|
if (ValidCharDescription(feature_defs_, CharDesc)) {
|
||||||
BlobText, CurrFontName.string());
|
// Label the features with a class name and font name.
|
||||||
} // LearnBlob
|
tr_file_data_ += "\n";
|
||||||
|
tr_file_data_ += fontname;
|
||||||
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* FeatureFile,
|
tr_file_data_ += " ";
|
||||||
TBLOB* Blob, const DENORM& bl_denorm, const DENORM& cn_denorm,
|
tr_file_data_ += blob_text;
|
||||||
const INT_FX_RESULT_STRUCT& fx_info,
|
tr_file_data_ += "\n";
|
||||||
const char* BlobText, const char* FontName) {
|
|
||||||
CHAR_DESC CharDesc;
|
|
||||||
|
|
||||||
ASSERT_HOST(FeatureFile != NULL);
|
|
||||||
|
|
||||||
CharDesc = ExtractBlobFeatures(FeatureDefs, bl_denorm, cn_denorm, fx_info,
|
|
||||||
Blob);
|
|
||||||
if (CharDesc == NULL) {
|
|
||||||
cprintf("LearnBLob: CharDesc was NULL. Aborting.\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ValidCharDescription(FeatureDefs, CharDesc)) {
|
|
||||||
// label the features with a class name and font name
|
|
||||||
fprintf(FeatureFile, "\n%s %s\n", FontName, BlobText);
|
|
||||||
|
|
||||||
// write micro-features to file and clean up
|
// write micro-features to file and clean up
|
||||||
WriteCharDescription(FeatureDefs, FeatureFile, CharDesc);
|
WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_);
|
||||||
} else {
|
} else {
|
||||||
tprintf("Blob learned was invalid!\n");
|
tprintf("Blob learned was invalid!\n");
|
||||||
}
|
}
|
||||||
FreeCharDescription(CharDesc);
|
FreeCharDescription(CharDesc);
|
||||||
|
|
||||||
} // LearnBlob
|
} // LearnBlob
|
||||||
|
|
||||||
|
// Writes stored training data to a .tr file based on the given filename.
|
||||||
|
// Returns false on error.
|
||||||
|
bool Classify::WriteTRFile(const STRING& filename) {
|
||||||
|
STRING tr_filename = filename + ".tr";
|
||||||
|
FILE* fp = Efopen(tr_filename.string(), "wb");
|
||||||
|
int len = tr_file_data_.length();
|
||||||
|
bool result =
|
||||||
|
fwrite(&tr_file_data_[0], sizeof(tr_file_data_[0]), len, fp) == len;
|
||||||
|
fclose(fp);
|
||||||
|
tr_file_data_.truncate_at(0);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace tesseract.
|
||||||
|
@ -21,9 +21,7 @@
|
|||||||
/**----------------------------------------------------------------------------
|
/**----------------------------------------------------------------------------
|
||||||
Include Files and Type Defines
|
Include Files and Type Defines
|
||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
#include "featdefs.h"
|
#include "strngs.h"
|
||||||
#include "oldlist.h"
|
|
||||||
#include "blobs.h"
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------
|
/*---------------------------------------------------------------------------
|
||||||
Macros
|
Macros
|
||||||
@ -39,18 +37,14 @@
|
|||||||
/**----------------------------------------------------------------------------
|
/**----------------------------------------------------------------------------
|
||||||
Public Function Prototypes
|
Public Function Prototypes
|
||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename,
|
namespace tesseract {
|
||||||
TBLOB * Blob, const DENORM& bl_denorm, const DENORM& cn_denorm,
|
// Finds the name of the training font and returns it in fontname, by cutting
|
||||||
const INT_FX_RESULT_STRUCT& fx_info,
|
// it out based on the expectation that the filename is of the form:
|
||||||
const char* BlobText);
|
// /path/to/dir/[lang].[fontname].exp[num]
|
||||||
|
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||||
|
// If the global parameter classify_font_name is set, its value is used instead.
|
||||||
|
void ExtractFontName(const STRING& filename, STRING* fontname);
|
||||||
|
|
||||||
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* File, TBLOB* Blob,
|
} // namespace tesseract.
|
||||||
const DENORM& bl_denorm, const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info,
|
|
||||||
const char* BlobText, const char* FontName);
|
|
||||||
|
|
||||||
/**----------------------------------------------------------------------------
|
|
||||||
Global Data Definitions and Declarations
|
|
||||||
----------------------------------------------------------------------------**/
|
|
||||||
/*parameter used to turn on/off output of recognized chars to the screen */
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
#include "dict.h"
|
#include "dict.h"
|
||||||
#include "featdefs.h"
|
#include "featdefs.h"
|
||||||
#include "fontinfo.h"
|
#include "fontinfo.h"
|
||||||
|
#include "imagedata.h"
|
||||||
#include "intfx.h"
|
#include "intfx.h"
|
||||||
#include "intmatcher.h"
|
#include "intmatcher.h"
|
||||||
#include "normalis.h"
|
#include "normalis.h"
|
||||||
@ -119,25 +120,25 @@ class Classify : public CCStruct {
|
|||||||
const UNICHARSET& target_unicharset);
|
const UNICHARSET& target_unicharset);
|
||||||
/* adaptmatch.cpp ***********************************************************/
|
/* adaptmatch.cpp ***********************************************************/
|
||||||
|
|
||||||
// Learn the given word using its chopped_word, seam_array, denorm,
|
// Learns the given word using its chopped_word, seam_array, denorm,
|
||||||
// box_word, best_state, and correct_text to learn both correctly and
|
// box_word, best_state, and correct_text to learn both correctly and
|
||||||
// incorrectly segmented blobs. If filename is not NULL, then LearnBlob
|
// incorrectly segmented blobs. If fontname is not NULL, then LearnBlob
|
||||||
// is called and the data will be written to a file for static training.
|
// is called and the data will be saved in an internal buffer.
|
||||||
// Otherwise AdaptToBlob is called for adaption within a document.
|
// Otherwise AdaptToBlob is called for adaption within a document.
|
||||||
void LearnWord(const char* filename, WERD_RES *word);
|
void LearnWord(const char* fontname, WERD_RES* word);
|
||||||
|
|
||||||
// Builds a blob of length fragments, from the word, starting at start,
|
// Builds a blob of length fragments, from the word, starting at start,
|
||||||
// and then learn it, as having the given correct_text.
|
// and then learns it, as having the given correct_text.
|
||||||
// If filename is not NULL, then LearnBlob
|
// If fontname is not NULL, then LearnBlob is called and the data will be
|
||||||
// is called and the data will be written to a file for static training.
|
// saved in an internal buffer for static training.
|
||||||
// Otherwise AdaptToBlob is called for adaption within a document.
|
// Otherwise AdaptToBlob is called for adaption within a document.
|
||||||
// threshold is a magic number required by AdaptToChar and generated by
|
// threshold is a magic number required by AdaptToChar and generated by
|
||||||
// GetAdaptThresholds.
|
// ComputeAdaptionThresholds.
|
||||||
// Although it can be partly inferred from the string, segmentation is
|
// Although it can be partly inferred from the string, segmentation is
|
||||||
// provided to explicitly clarify the character segmentation.
|
// provided to explicitly clarify the character segmentation.
|
||||||
void LearnPieces(const char* filename, int start, int length,
|
void LearnPieces(const char* fontname, int start, int length, float threshold,
|
||||||
float threshold, CharSegmentationType segmentation,
|
CharSegmentationType segmentation, const char* correct_text,
|
||||||
const char* correct_text, WERD_RES *word);
|
WERD_RES* word);
|
||||||
void InitAdaptiveClassifier(bool load_pre_trained_templates);
|
void InitAdaptiveClassifier(bool load_pre_trained_templates);
|
||||||
void InitAdaptedClass(TBLOB *Blob,
|
void InitAdaptedClass(TBLOB *Blob,
|
||||||
CLASS_ID ClassId,
|
CLASS_ID ClassId,
|
||||||
@ -361,7 +362,22 @@ class Classify : public CCStruct {
|
|||||||
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob);
|
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob);
|
||||||
/* picofeat.cpp ***********************************************************/
|
/* picofeat.cpp ***********************************************************/
|
||||||
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob);
|
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob);
|
||||||
|
FEATURE_SET ExtractIntCNFeatures(const TBLOB& blob,
|
||||||
|
const INT_FX_RESULT_STRUCT& fx_info);
|
||||||
|
FEATURE_SET ExtractIntGeoFeatures(const TBLOB& blob,
|
||||||
|
const INT_FX_RESULT_STRUCT& fx_info);
|
||||||
|
/* blobclass.cpp ***********************************************************/
|
||||||
|
// Extracts features from the given blob and saves them in the tr_file_data_
|
||||||
|
// member variable.
|
||||||
|
// fontname: Name of font that this blob was printed in.
|
||||||
|
// cn_denorm: Character normalization transformation to apply to the blob.
|
||||||
|
// fx_info: Character normalization parameters computed with cn_denorm.
|
||||||
|
// blob_text: Ground truth text for the blob.
|
||||||
|
void LearnBlob(const STRING& fontname, TBLOB* Blob, const DENORM& cn_denorm,
|
||||||
|
const INT_FX_RESULT_STRUCT& fx_info, const char* blob_text);
|
||||||
|
// Writes stored training data to a .tr file based on the given filename.
|
||||||
|
// Returns false on error.
|
||||||
|
bool WriteTRFile(const STRING& filename);
|
||||||
|
|
||||||
// Member variables.
|
// Member variables.
|
||||||
|
|
||||||
@ -498,6 +514,9 @@ class Classify : public CCStruct {
|
|||||||
/* variables used to hold performance statistics */
|
/* variables used to hold performance statistics */
|
||||||
int NumAdaptationsFailed;
|
int NumAdaptationsFailed;
|
||||||
|
|
||||||
|
// Training data gathered here for all the images in a document.
|
||||||
|
STRING tr_file_data_;
|
||||||
|
|
||||||
// Expected number of features in the class pruner, used to penalize
|
// Expected number of features in the class pruner, used to penalize
|
||||||
// unknowns that have too few features (like a c being classified as e) so
|
// unknowns that have too few features (like a c being classified as e) so
|
||||||
// it doesn't recognize everything as '@' or '#'.
|
// it doesn't recognize everything as '@' or '#'.
|
||||||
|
@ -1,32 +0,0 @@
|
|||||||
#ifndef EXTERN_H
|
|
||||||
#define EXTERN_H
|
|
||||||
|
|
||||||
/* -*-C-*-
|
|
||||||
********************************************************************************
|
|
||||||
*
|
|
||||||
* File: extern.h (Formerly extern.h)
|
|
||||||
* Description: External definitions for C or C++
|
|
||||||
* Author: Mark Seaman, OCR Technology
|
|
||||||
* Created: Tue Mar 20 14:01:22 1990
|
|
||||||
* Modified: Tue Mar 20 14:02:09 1990 (Mark Seaman) marks@hpgrlt
|
|
||||||
* Language: C
|
|
||||||
* Package: N/A
|
|
||||||
* Status: Experimental (Do Not Distribute)
|
|
||||||
*
|
|
||||||
* (c) Copyright 1990, Hewlett-Packard Company.
|
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
** you may not use this file except in compliance with the License.
|
|
||||||
** You may obtain a copy of the License at
|
|
||||||
** http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
** Unless required by applicable law or agreed to in writing, software
|
|
||||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
** See the License for the specific language governing permissions and
|
|
||||||
** limitations under the License.
|
|
||||||
*
|
|
||||||
********************************************************************************
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define EXTERN extern
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,74 +0,0 @@
|
|||||||
/******************************************************************************
|
|
||||||
** Filename: extract.c
|
|
||||||
** Purpose: Generic high level feature extractor routines.
|
|
||||||
** Author: Dan Johnson
|
|
||||||
** History: Sun Jan 21 09:44:08 1990, DSJ, Created.
|
|
||||||
**
|
|
||||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
** you may not use this file except in compliance with the License.
|
|
||||||
** You may obtain a copy of the License at
|
|
||||||
** http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
** Unless required by applicable law or agreed to in writing, software
|
|
||||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
** See the License for the specific language governing permissions and
|
|
||||||
** limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
/*-----------------------------------------------------------------------------
|
|
||||||
Include Files and Type Defines
|
|
||||||
-----------------------------------------------------------------------------*/
|
|
||||||
#include "extract.h"
|
|
||||||
#include "flexfx.h"
|
|
||||||
#include "danerror.h"
|
|
||||||
|
|
||||||
typedef CHAR_FEATURES (*CF_FUNC) ();
|
|
||||||
|
|
||||||
/*-----------------------------------------------------------------------------
|
|
||||||
Private Function Prototypes
|
|
||||||
-----------------------------------------------------------------------------*/
|
|
||||||
void ExtractorStub();
|
|
||||||
|
|
||||||
/*-----------------------------------------------------------------------------
|
|
||||||
Public Code
|
|
||||||
-----------------------------------------------------------------------------*/
|
|
||||||
/*---------------------------------------------------------------------------*/
|
|
||||||
/**
|
|
||||||
* Extract features from Blob by calling the feature
|
|
||||||
* extractor which is currently being used. This routine
|
|
||||||
* simply provides a high level interface to feature
|
|
||||||
* extraction. The caller can extract any type of features
|
|
||||||
* from a blob without understanding any lower level details.
|
|
||||||
*
|
|
||||||
* @param FeatureDefs definitions of feature types/extractors
|
|
||||||
* @param denorm Normalize/denormalize to access original image
|
|
||||||
* @param Blob blob to extract features from
|
|
||||||
*
|
|
||||||
* @return The character features extracted from Blob.
|
|
||||||
* @note Exceptions: none
|
|
||||||
* @note History: Sun Jan 21 10:07:28 1990, DSJ, Created.
|
|
||||||
*/
|
|
||||||
CHAR_DESC ExtractBlobFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
|
||||||
const DENORM& bl_denorm, const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info,
|
|
||||||
TBLOB *Blob) {
|
|
||||||
return ExtractFlexFeatures(FeatureDefs, Blob, bl_denorm, cn_denorm, fx_info);
|
|
||||||
} /* ExtractBlobFeatures */
|
|
||||||
|
|
||||||
/*-----------------------------------------------------------------------------
|
|
||||||
Private Code
|
|
||||||
-----------------------------------------------------------------------------*/
|
|
||||||
/*---------------------------------------------------------------------------*/
|
|
||||||
void
|
|
||||||
ExtractorStub ()
|
|
||||||
/**
|
|
||||||
* This routine is used to stub out feature extractors
|
|
||||||
* that are no longer used. It simply calls DoError.
|
|
||||||
*
|
|
||||||
* @note Exceptions: none
|
|
||||||
* @note History: Wed Jan 2 14:16:49 1991, DSJ, Created.
|
|
||||||
*/
|
|
||||||
#define DUMMY_ERROR 1
|
|
||||||
{
|
|
||||||
DoError (DUMMY_ERROR, "Selected feature extractor has been stubbed out!");
|
|
||||||
} /* ExtractorStub */
|
|
@ -1,40 +0,0 @@
|
|||||||
/******************************************************************************
|
|
||||||
** Filename: extract.h
|
|
||||||
** Purpose: Interface to high level generic feature extraction.
|
|
||||||
** Author: Dan Johnson
|
|
||||||
** History: 1/21/90, DSJ, Created.
|
|
||||||
**
|
|
||||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
** you may not use this file except in compliance with the License.
|
|
||||||
** You may obtain a copy of the License at
|
|
||||||
** http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
** Unless required by applicable law or agreed to in writing, software
|
|
||||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
** See the License for the specific language governing permissions and
|
|
||||||
** limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
#ifndef EXTRACT_H
|
|
||||||
#define EXTRACT_H
|
|
||||||
|
|
||||||
#include "featdefs.h"
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
class DENORM;
|
|
||||||
|
|
||||||
/*-----------------------------------------------------------------------------
|
|
||||||
Public Function Prototypes
|
|
||||||
-----------------------------------------------------------------------------*/
|
|
||||||
// Deprecated! Will be deleted soon!
|
|
||||||
// In the meantime, as all TBLOBs, Blob is in baseline normalized coords.
|
|
||||||
// See SetupBLCNDenorms in intfx.cpp for other args.
|
|
||||||
CHAR_DESC ExtractBlobFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
|
||||||
const DENORM& bl_denorm, const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info, TBLOB *Blob);
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------
|
|
||||||
Private Function Prototypes
|
|
||||||
----------------------------------------------------------------------------*/
|
|
||||||
void ExtractorStub();
|
|
||||||
#endif
|
|
@ -178,7 +178,7 @@ CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs) {
|
|||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
/**
|
/**
|
||||||
* Write a textual representation of CharDesc to File.
|
* Appends a textual representation of CharDesc to str.
|
||||||
* The format used is to write out the number of feature
|
* The format used is to write out the number of feature
|
||||||
* sets which will be written followed by a representation of
|
* sets which will be written followed by a representation of
|
||||||
* each feature set.
|
* each feature set.
|
||||||
@ -187,18 +187,15 @@ CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs) {
|
|||||||
* by a description of the feature set. Feature sets which are
|
* by a description of the feature set. Feature sets which are
|
||||||
* not present are not written.
|
* not present are not written.
|
||||||
*
|
*
|
||||||
* Globals:
|
|
||||||
* - none
|
|
||||||
*
|
|
||||||
* @param FeatureDefs definitions of feature types/extractors
|
* @param FeatureDefs definitions of feature types/extractors
|
||||||
* @param File open text file to write CharDesc to
|
* @param str string to append CharDesc to
|
||||||
* @param CharDesc character description to write to File
|
* @param CharDesc character description to write to File
|
||||||
*
|
*
|
||||||
* @note Exceptions: none
|
* @note Exceptions: none
|
||||||
* @note History: Wed May 23 17:21:18 1990, DSJ, Created.
|
* @note History: Wed May 23 17:21:18 1990, DSJ, Created.
|
||||||
*/
|
*/
|
||||||
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs,
|
||||||
FILE *File, CHAR_DESC CharDesc) {
|
CHAR_DESC CharDesc, STRING* str) {
|
||||||
int Type;
|
int Type;
|
||||||
int NumSetsToWrite = 0;
|
int NumSetsToWrite = 0;
|
||||||
|
|
||||||
@ -206,11 +203,14 @@ void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
|||||||
if (CharDesc->FeatureSets[Type])
|
if (CharDesc->FeatureSets[Type])
|
||||||
NumSetsToWrite++;
|
NumSetsToWrite++;
|
||||||
|
|
||||||
fprintf (File, " %d\n", NumSetsToWrite);
|
str->add_str_int(" ", NumSetsToWrite);
|
||||||
for (Type = 0; Type < CharDesc->NumFeatureSets; Type++)
|
*str += "\n";
|
||||||
if (CharDesc->FeatureSets[Type]) {
|
for (Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
|
||||||
fprintf (File, "%s ", (FeatureDefs.FeatureDesc[Type])->ShortName);
|
if (CharDesc->FeatureSets[Type]) {
|
||||||
WriteFeatureSet (File, CharDesc->FeatureSets[Type]);
|
*str += FeatureDefs.FeatureDesc[Type]->ShortName;
|
||||||
|
*str += " ";
|
||||||
|
WriteFeatureSet(CharDesc->FeatureSets[Type], str);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} /* WriteCharDescription */
|
} /* WriteCharDescription */
|
||||||
|
|
||||||
@ -231,6 +231,8 @@ bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
|||||||
anything_written = true;
|
anything_written = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return anything_written && well_formed;
|
return anything_written && well_formed;
|
||||||
|
@ -48,7 +48,6 @@ typedef CHAR_DESC_STRUCT *CHAR_DESC;
|
|||||||
struct FEATURE_DEFS_STRUCT {
|
struct FEATURE_DEFS_STRUCT {
|
||||||
inT32 NumFeatureTypes;
|
inT32 NumFeatureTypes;
|
||||||
const FEATURE_DESC_STRUCT* FeatureDesc[NUM_FEATURE_TYPES];
|
const FEATURE_DESC_STRUCT* FeatureDesc[NUM_FEATURE_TYPES];
|
||||||
const FEATURE_EXT_STRUCT* FeatureExtractors[NUM_FEATURE_TYPES];
|
|
||||||
int FeatureEnabled[NUM_FEATURE_TYPES];
|
int FeatureEnabled[NUM_FEATURE_TYPES];
|
||||||
};
|
};
|
||||||
typedef FEATURE_DEFS_STRUCT *FEATURE_DEFS;
|
typedef FEATURE_DEFS_STRUCT *FEATURE_DEFS;
|
||||||
@ -65,8 +64,8 @@ CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs);
|
|||||||
bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||||
CHAR_DESC CharDesc);
|
CHAR_DESC CharDesc);
|
||||||
|
|
||||||
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs,
|
||||||
FILE *File, CHAR_DESC CharDesc);
|
CHAR_DESC CharDesc, STRING* str);
|
||||||
|
|
||||||
CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||||
FILE *File);
|
FILE *File);
|
||||||
|
@ -1,72 +0,0 @@
|
|||||||
/******************************************************************************
|
|
||||||
** Filename: flexfx.c
|
|
||||||
** Purpose: Interface to flexible feature extractor.
|
|
||||||
** Author: Dan Johnson
|
|
||||||
** History: Wed May 23 13:45:10 1990, DSJ, Created.
|
|
||||||
**
|
|
||||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
** you may not use this file except in compliance with the License.
|
|
||||||
** You may obtain a copy of the License at
|
|
||||||
** http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
** Unless required by applicable law or agreed to in writing, software
|
|
||||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
** See the License for the specific language governing permissions and
|
|
||||||
** limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
/**----------------------------------------------------------------------------
|
|
||||||
Include Files and Type Defines
|
|
||||||
----------------------------------------------------------------------------**/
|
|
||||||
#include "flexfx.h"
|
|
||||||
#include "featdefs.h"
|
|
||||||
#include "emalloc.h"
|
|
||||||
#include <string.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
/**----------------------------------------------------------------------------
|
|
||||||
Public Code
|
|
||||||
----------------------------------------------------------------------------**/
|
|
||||||
/*---------------------------------------------------------------------------*/
|
|
||||||
// Deprecated! Will be deleted soon!
|
|
||||||
// In the meantime, as all TBLOBs, Blob is in baseline normalized coords.
|
|
||||||
// See SetupBLCNDenorms in intfx.cpp for other args.
|
|
||||||
CHAR_DESC ExtractFlexFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
|
||||||
TBLOB *Blob, const DENORM& bl_denorm,
|
|
||||||
const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
|
||||||
/*
|
|
||||||
** Parameters:
|
|
||||||
** Blob blob to extract features from
|
|
||||||
** denorm control parameter for feature extractor
|
|
||||||
** Globals: none
|
|
||||||
** Operation: Allocate a new character descriptor and fill it in by
|
|
||||||
** calling all feature extractors which are enabled.
|
|
||||||
** Return: Structure containing features extracted from Blob.
|
|
||||||
** Exceptions: none
|
|
||||||
** History: Wed May 23 13:46:22 1990, DSJ, Created.
|
|
||||||
*/
|
|
||||||
int Type;
|
|
||||||
CHAR_DESC CharDesc;
|
|
||||||
|
|
||||||
CharDesc = NewCharDescription(FeatureDefs);
|
|
||||||
|
|
||||||
for (Type = 0; Type < CharDesc->NumFeatureSets; Type++)
|
|
||||||
if (FeatureDefs.FeatureExtractors[Type] != NULL &&
|
|
||||||
FeatureDefs.FeatureExtractors[Type]->Extractor != NULL) {
|
|
||||||
CharDesc->FeatureSets[Type] =
|
|
||||||
(FeatureDefs.FeatureExtractors[Type])->Extractor(Blob,
|
|
||||||
bl_denorm,
|
|
||||||
cn_denorm,
|
|
||||||
fx_info);
|
|
||||||
if (CharDesc->FeatureSets[Type] == NULL) {
|
|
||||||
tprintf("Feature extractor for type %d = %s returned NULL!\n",
|
|
||||||
Type, FeatureDefs.FeatureDesc[Type]->ShortName);
|
|
||||||
FreeCharDescription(CharDesc);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (CharDesc);
|
|
||||||
|
|
||||||
} /* ExtractFlexFeatures */
|
|
@ -1,36 +0,0 @@
|
|||||||
/******************************************************************************
|
|
||||||
** Filename: flexfx.h
|
|
||||||
** Purpose: Interface to flexible feature extractor.
|
|
||||||
** Author: Dan Johnson
|
|
||||||
** History: Wed May 23 13:36:58 1990, DSJ, Created.
|
|
||||||
**
|
|
||||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
** you may not use this file except in compliance with the License.
|
|
||||||
** You may obtain a copy of the License at
|
|
||||||
** http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
** Unless required by applicable law or agreed to in writing, software
|
|
||||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
** See the License for the specific language governing permissions and
|
|
||||||
** limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
#ifndef FLEXFX_H
|
|
||||||
#define FLEXFX_H
|
|
||||||
|
|
||||||
/**----------------------------------------------------------------------------
|
|
||||||
Include Files and Type Defines
|
|
||||||
----------------------------------------------------------------------------**/
|
|
||||||
#include "featdefs.h"
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
/**----------------------------------------------------------------------------
|
|
||||||
Public Function Prototypes
|
|
||||||
----------------------------------------------------------------------------**/
|
|
||||||
// As with all TBLOBs this one is also baseline normalized.
|
|
||||||
CHAR_DESC ExtractFlexFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
|
||||||
TBLOB *Blob, const DENORM& bl_denorm,
|
|
||||||
const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,45 +0,0 @@
|
|||||||
/******************************************************************************
|
|
||||||
** Filename: fxdefs.c
|
|
||||||
** Purpose: Utility functions to be used by feature extractors.
|
|
||||||
** Author: Dan Johnson
|
|
||||||
** History: Sun Jan 21 15:29:02 1990, DSJ, Created.
|
|
||||||
**
|
|
||||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
** you may not use this file except in compliance with the License.
|
|
||||||
** You may obtain a copy of the License at
|
|
||||||
** http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
** Unless required by applicable law or agreed to in writing, software
|
|
||||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
** See the License for the specific language governing permissions and
|
|
||||||
** limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
#include "fxdefs.h"
|
|
||||||
#include "featdefs.h"
|
|
||||||
#include "mf.h"
|
|
||||||
#include "outfeat.h"
|
|
||||||
#include "picofeat.h"
|
|
||||||
#include "normfeat.h"
|
|
||||||
|
|
||||||
/*-----------------------------------------------------------------------------
|
|
||||||
Global Data Definitions and Declarations
|
|
||||||
-----------------------------------------------------------------------------*/
|
|
||||||
// Definitions of extractors separated from feature definitions.
|
|
||||||
const FEATURE_EXT_STRUCT MicroFeatureExt = { ExtractMicros };
|
|
||||||
const FEATURE_EXT_STRUCT CharNormExt = { ExtractCharNormFeatures };
|
|
||||||
const FEATURE_EXT_STRUCT IntFeatExt = { ExtractIntCNFeatures };
|
|
||||||
const FEATURE_EXT_STRUCT GeoFeatExt = { ExtractIntGeoFeatures };
|
|
||||||
|
|
||||||
// MUST be kept in-sync with DescDefs in featdefs.cpp.
|
|
||||||
const FEATURE_EXT_STRUCT* ExtractorDefs[NUM_FEATURE_TYPES] = {
|
|
||||||
&MicroFeatureExt,
|
|
||||||
&CharNormExt,
|
|
||||||
&IntFeatExt,
|
|
||||||
&GeoFeatExt
|
|
||||||
};
|
|
||||||
|
|
||||||
void SetupExtractors(FEATURE_DEFS_STRUCT *FeatureDefs) {
|
|
||||||
for (int i = 0; i < NUM_FEATURE_TYPES; ++i)
|
|
||||||
FeatureDefs->FeatureExtractors[i] = ExtractorDefs[i];
|
|
||||||
}
|
|
@ -1,25 +0,0 @@
|
|||||||
/******************************************************************************
|
|
||||||
** Filename: fxdefs.h
|
|
||||||
** Purpose: Generic interface definitions for feature extractors
|
|
||||||
** Author: Dan Johnson
|
|
||||||
** History: Fri Jan 19 09:04:14 1990, DSJ, Created.
|
|
||||||
**
|
|
||||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
** you may not use this file except in compliance with the License.
|
|
||||||
** You may obtain a copy of the License at
|
|
||||||
** http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
** Unless required by applicable law or agreed to in writing, software
|
|
||||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
** See the License for the specific language governing permissions and
|
|
||||||
** limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
#ifndef FXDEFS_H
|
|
||||||
#define FXDEFS_H
|
|
||||||
|
|
||||||
#include "featdefs.h"
|
|
||||||
|
|
||||||
void SetupExtractors(FEATURE_DEFS_STRUCT *FeatureDefs);
|
|
||||||
|
|
||||||
#endif
|
|
@ -75,9 +75,9 @@ namespace tesseract {
|
|||||||
|
|
||||||
// Generates a TrainingSample from a TBLOB. Extracts features and sets
|
// Generates a TrainingSample from a TBLOB. Extracts features and sets
|
||||||
// the bounding box, so classifiers that operate on the image can work.
|
// the bounding box, so classifiers that operate on the image can work.
|
||||||
// TODO(rays) BlobToTrainingSample must remain a global function until
|
// TODO(rays) Make BlobToTrainingSample a member of Classify now that
|
||||||
// the FlexFx and FeatureDescription code can be removed and LearnBlob
|
// the FlexFx and FeatureDescription code have been removed and LearnBlob
|
||||||
// made a member of Classify.
|
// is now a member of Classify.
|
||||||
TrainingSample* BlobToTrainingSample(
|
TrainingSample* BlobToTrainingSample(
|
||||||
const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
|
const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
|
||||||
GenericVector<INT_FEATURE_STRUCT>* bl_features) {
|
GenericVector<INT_FEATURE_STRUCT>* bl_features) {
|
||||||
|
@ -33,9 +33,7 @@
|
|||||||
Private Code
|
Private Code
|
||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& bl_denorm,
|
FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm) {
|
||||||
const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
|
||||||
/*
|
/*
|
||||||
** Parameters:
|
** Parameters:
|
||||||
** Blob blob to extract micro-features from
|
** Blob blob to extract micro-features from
|
||||||
@ -54,8 +52,7 @@ FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& bl_denorm,
|
|||||||
FEATURE Feature;
|
FEATURE Feature;
|
||||||
MICROFEATURE OldFeature;
|
MICROFEATURE OldFeature;
|
||||||
|
|
||||||
OldFeatures = (MICROFEATURES)BlobMicroFeatures(Blob, bl_denorm, cn_denorm,
|
OldFeatures = BlobMicroFeatures(Blob, cn_denorm);
|
||||||
fx_info);
|
|
||||||
if (OldFeatures == NULL)
|
if (OldFeatures == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
NumFeatures = count (OldFeatures);
|
NumFeatures = count (OldFeatures);
|
||||||
|
@ -34,8 +34,6 @@ typedef float MicroFeature[MFCount];
|
|||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
Private Function Prototypes
|
Private Function Prototypes
|
||||||
-----------------------------------------------------------------------------*/
|
-----------------------------------------------------------------------------*/
|
||||||
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& bl_denorm,
|
FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm);
|
||||||
const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -23,7 +23,6 @@
|
|||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
#include "oldlist.h"
|
#include "oldlist.h"
|
||||||
#include "matchdefs.h"
|
#include "matchdefs.h"
|
||||||
#include "xform2d.h"
|
|
||||||
|
|
||||||
/* definition of a list of micro-features */
|
/* definition of a list of micro-features */
|
||||||
typedef LIST MICROFEATURES;
|
typedef LIST MICROFEATURES;
|
||||||
|
@ -59,9 +59,7 @@ MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End);
|
|||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& bl_denorm,
|
MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) {
|
||||||
const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
|
||||||
/*
|
/*
|
||||||
** Parameters:
|
** Parameters:
|
||||||
** Blob blob to extract micro-features from
|
** Blob blob to extract micro-features from
|
||||||
@ -98,7 +96,7 @@ CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& bl_denorm,
|
|||||||
}
|
}
|
||||||
FreeOutlines(Outlines);
|
FreeOutlines(Outlines);
|
||||||
}
|
}
|
||||||
return ((CHAR_FEATURES) MicroFeatures);
|
return MicroFeatures;
|
||||||
} /* BlobMicroFeatures */
|
} /* BlobMicroFeatures */
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
/**----------------------------------------------------------------------------
|
/**----------------------------------------------------------------------------
|
||||||
Include Files and Type Defines
|
Include Files and Type Defines
|
||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
|
#include "mfdefs.h"
|
||||||
#include "params.h"
|
#include "params.h"
|
||||||
/**----------------------------------------------------------------------------
|
/**----------------------------------------------------------------------------
|
||||||
Variables
|
Variables
|
||||||
@ -35,8 +36,6 @@ extern double_VAR_H(classify_max_slope, 2.414213562,
|
|||||||
/**----------------------------------------------------------------------------
|
/**----------------------------------------------------------------------------
|
||||||
Public Function Prototypes
|
Public Function Prototypes
|
||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& bl_denorm,
|
MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm);
|
||||||
const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -59,9 +59,7 @@ FLOAT32 ActualOutlineLength(FEATURE Feature) {
|
|||||||
// the x center of the grapheme's bounding box.
|
// the x center of the grapheme's bounding box.
|
||||||
// English: [0.011, 0.31]
|
// English: [0.011, 0.31]
|
||||||
//
|
//
|
||||||
FEATURE_SET ExtractCharNormFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info) {
|
||||||
const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
|
||||||
FEATURE_SET feature_set = NewFeatureSet(1);
|
FEATURE_SET feature_set = NewFeatureSet(1);
|
||||||
FEATURE feature = NewFeature(&CharNormDesc);
|
FEATURE feature = NewFeature(&CharNormDesc);
|
||||||
|
|
||||||
|
@ -34,8 +34,6 @@ typedef enum {
|
|||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
FLOAT32 ActualOutlineLength(FEATURE Feature);
|
FLOAT32 ActualOutlineLength(FEATURE Feature);
|
||||||
|
|
||||||
FEATURE_SET ExtractCharNormFeatures(TBLOB *Blob, const DENORM& bl_denorm,
|
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info);
|
||||||
const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -209,55 +209,52 @@ FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
|
|||||||
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
void WriteFeature(FILE *File, FEATURE Feature) {
|
|
||||||
/*
|
/*
|
||||||
** Parameters:
|
** Parameters:
|
||||||
** File open text file to write Feature to
|
** Feature: feature to write out to str
|
||||||
** Feature feature to write out to File
|
** str: string to write Feature to
|
||||||
** Globals: none
|
** Operation: Appends a textual representation of Feature to str.
|
||||||
** Operation: Write a textual representation of Feature to File.
|
** This representation is simply a list of the N parameters
|
||||||
** This representation is simply a list of the N parameters
|
** of the feature, terminated with a newline. It is assumed
|
||||||
** of the feature, terminated with a newline. It is assumed
|
** that the ExtraPenalty field can be reconstructed from the
|
||||||
** that the ExtraPenalty field can be reconstructed from the
|
** parameters of the feature. It is also assumed that the
|
||||||
** parameters of the feature. It is also assumed that the
|
** feature type information is specified or assumed elsewhere.
|
||||||
** feature type information is specified or assumed elsewhere.
|
** Return: none
|
||||||
** Return: none
|
** Exceptions: none
|
||||||
** Exceptions: none
|
** History: Wed May 23 09:28:18 1990, DSJ, Created.
|
||||||
** History: Wed May 23 09:28:18 1990, DSJ, Created.
|
|
||||||
*/
|
*/
|
||||||
int i;
|
void WriteFeature(FEATURE Feature, STRING* str) {
|
||||||
|
for (int i = 0; i < Feature->Type->NumParams; i++) {
|
||||||
for (i = 0; i < Feature->Type->NumParams; i++) {
|
|
||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
assert(!isnan(Feature->Params[i]));
|
assert(!isnan(Feature->Params[i]));
|
||||||
#endif
|
#endif
|
||||||
fprintf(File, " %g", Feature->Params[i]);
|
str->add_str_double(" ", Feature->Params[i]);
|
||||||
}
|
}
|
||||||
fprintf(File, "\n");
|
*str += "\n";
|
||||||
} /* WriteFeature */
|
} /* WriteFeature */
|
||||||
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
void WriteFeatureSet(FILE *File, FEATURE_SET FeatureSet) {
|
|
||||||
/*
|
/*
|
||||||
** Parameters:
|
** Parameters:
|
||||||
** File open text file to write FeatureSet to
|
** FeatureSet: feature set to write to File
|
||||||
** FeatureSet feature set to write to File
|
** str: string to write Feature to
|
||||||
** Globals: none
|
** Globals: none
|
||||||
** Operation: Write a textual representation of FeatureSet to File.
|
** Operation: Write a textual representation of FeatureSet to File.
|
||||||
** This representation is an integer specifying the number of
|
** This representation is an integer specifying the number of
|
||||||
** features in the set, followed by a newline, followed by
|
** features in the set, followed by a newline, followed by
|
||||||
** text representations for each feature in the set.
|
** text representations for each feature in the set.
|
||||||
** Return: none
|
** Return: none
|
||||||
** Exceptions: none
|
** Exceptions: none
|
||||||
** History: Wed May 23 10:06:03 1990, DSJ, Created.
|
** History: Wed May 23 10:06:03 1990, DSJ, Created.
|
||||||
*/
|
*/
|
||||||
int i;
|
void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) {
|
||||||
|
|
||||||
if (FeatureSet) {
|
if (FeatureSet) {
|
||||||
fprintf (File, "%d\n", FeatureSet->NumFeatures);
|
str->add_str_int("", FeatureSet->NumFeatures);
|
||||||
for (i = 0; i < FeatureSet->NumFeatures; i++)
|
*str += "\n";
|
||||||
WriteFeature (File, FeatureSet->Features[i]);
|
for (int i = 0; i < FeatureSet->NumFeatures; i++) {
|
||||||
|
WriteFeature(FeatureSet->Features[i], str);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} /* WriteFeatureSet */
|
} /* WriteFeatureSet */
|
||||||
|
|
||||||
|
@ -79,13 +79,6 @@ typedef FEATURE_SET_STRUCT *FEATURE_SET;
|
|||||||
// classifier does not need to know the details of this data structure.
|
// classifier does not need to know the details of this data structure.
|
||||||
typedef char *CHAR_FEATURES;
|
typedef char *CHAR_FEATURES;
|
||||||
|
|
||||||
typedef FEATURE_SET (*FX_FUNC)(TBLOB *, const DENORM&, const DENORM&,
|
|
||||||
const INT_FX_RESULT_STRUCT&);
|
|
||||||
|
|
||||||
struct FEATURE_EXT_STRUCT {
|
|
||||||
FX_FUNC Extractor; // func to extract features
|
|
||||||
};
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------
|
/*----------------------------------------------------------------------
|
||||||
Macros for defining the parameters of a new features
|
Macros for defining the parameters of a new features
|
||||||
----------------------------------------------------------------------*/
|
----------------------------------------------------------------------*/
|
||||||
|
@ -223,10 +223,10 @@ void NormalizePicoX(FEATURE_SET FeatureSet) {
|
|||||||
}
|
}
|
||||||
} /* NormalizePicoX */
|
} /* NormalizePicoX */
|
||||||
|
|
||||||
|
namespace tesseract {
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
FEATURE_SET Classify::ExtractIntCNFeatures(
|
||||||
const DENORM& cn_denorm,
|
const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
|
||||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
|
||||||
/*
|
/*
|
||||||
** Parameters:
|
** Parameters:
|
||||||
** blob blob to extract features from
|
** blob blob to extract features from
|
||||||
@ -237,9 +237,8 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
|||||||
*/
|
*/
|
||||||
INT_FX_RESULT_STRUCT local_fx_info(fx_info);
|
INT_FX_RESULT_STRUCT local_fx_info(fx_info);
|
||||||
GenericVector<INT_FEATURE_STRUCT> bl_features;
|
GenericVector<INT_FEATURE_STRUCT> bl_features;
|
||||||
tesseract::TrainingSample* sample =
|
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
|
||||||
tesseract::BlobToTrainingSample(*blob, false, &local_fx_info,
|
blob, false, &local_fx_info, &bl_features);
|
||||||
&bl_features);
|
|
||||||
if (sample == NULL) return NULL;
|
if (sample == NULL) return NULL;
|
||||||
|
|
||||||
int num_features = sample->num_features();
|
int num_features = sample->num_features();
|
||||||
@ -259,9 +258,8 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
|||||||
} /* ExtractIntCNFeatures */
|
} /* ExtractIntCNFeatures */
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
FEATURE_SET Classify::ExtractIntGeoFeatures(
|
||||||
const DENORM& cn_denorm,
|
const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
|
||||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
|
||||||
/*
|
/*
|
||||||
** Parameters:
|
** Parameters:
|
||||||
** blob blob to extract features from
|
** blob blob to extract features from
|
||||||
@ -272,9 +270,8 @@ FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
|||||||
*/
|
*/
|
||||||
INT_FX_RESULT_STRUCT local_fx_info(fx_info);
|
INT_FX_RESULT_STRUCT local_fx_info(fx_info);
|
||||||
GenericVector<INT_FEATURE_STRUCT> bl_features;
|
GenericVector<INT_FEATURE_STRUCT> bl_features;
|
||||||
tesseract::TrainingSample* sample =
|
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
|
||||||
tesseract::BlobToTrainingSample(*blob, false, &local_fx_info,
|
blob, false, &local_fx_info, &bl_features);
|
||||||
&bl_features);
|
|
||||||
if (sample == NULL) return NULL;
|
if (sample == NULL) return NULL;
|
||||||
|
|
||||||
FEATURE_SET feature_set = NewFeatureSet(1);
|
FEATURE_SET feature_set = NewFeatureSet(1);
|
||||||
@ -288,3 +285,5 @@ FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
|||||||
|
|
||||||
return feature_set;
|
return feature_set;
|
||||||
} /* ExtractIntGeoFeatures */
|
} /* ExtractIntGeoFeatures */
|
||||||
|
|
||||||
|
} // namespace tesseract.
|
||||||
|
@ -58,13 +58,6 @@ extern double_VAR_H(classify_pico_feature_length, 0.05, "Pico Feature Length");
|
|||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
#define GetPicoFeatureLength() (PicoFeatureLength)
|
#define GetPicoFeatureLength() (PicoFeatureLength)
|
||||||
|
|
||||||
FEATURE_SET ExtractIntCNFeatures(TBLOB *Blob, const DENORM& bl_denorm,
|
|
||||||
const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info);
|
|
||||||
FEATURE_SET ExtractIntGeoFeatures(TBLOB *Blob, const DENORM& bl_denorm,
|
|
||||||
const DENORM& cn_denorm,
|
|
||||||
const INT_FX_RESULT_STRUCT& fx_info);
|
|
||||||
|
|
||||||
/**----------------------------------------------------------------------------
|
/**----------------------------------------------------------------------------
|
||||||
Global Data Definitions and Declarations
|
Global Data Definitions and Declarations
|
||||||
----------------------------------------------------------------------------**/
|
----------------------------------------------------------------------------**/
|
||||||
|
@ -1,120 +0,0 @@
|
|||||||
/******************************************************************************
|
|
||||||
** Filename: xform2d.c
|
|
||||||
** Purpose: Library routines for performing 2D point transformations
|
|
||||||
** Author: Dan Johnson
|
|
||||||
** History: Fri Sep 22 09:54:17 1989, DSJ, Created.
|
|
||||||
**
|
|
||||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
** you may not use this file except in compliance with the License.
|
|
||||||
** You may obtain a copy of the License at
|
|
||||||
** http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
** Unless required by applicable law or agreed to in writing, software
|
|
||||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
** See the License for the specific language governing permissions and
|
|
||||||
** limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
/**----------------------------------------------------------------------------
|
|
||||||
Include Files and Type Defines
|
|
||||||
----------------------------------------------------------------------------**/
|
|
||||||
#include "xform2d.h"
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
/**----------------------------------------------------------------------------
|
|
||||||
Public Code
|
|
||||||
----------------------------------------------------------------------------**/
|
|
||||||
|
|
||||||
void InitMatrix(MATRIX_2D *M) {
|
|
||||||
M->a = 1;
|
|
||||||
M->b = 0;
|
|
||||||
M->c = 0;
|
|
||||||
M->d = 1;
|
|
||||||
M->tx = 0;
|
|
||||||
M->ty = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CopyMatrix(MATRIX_2D *A, MATRIX_2D *B) {
|
|
||||||
B->a = A->a;
|
|
||||||
B->b = A->b;
|
|
||||||
B->c = A->c;
|
|
||||||
B->d = A->d;
|
|
||||||
B->tx = A->tx;
|
|
||||||
B->ty = A->ty;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TranslateMatrix(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y) {
|
|
||||||
M->tx += M->a * X + M->c * Y;
|
|
||||||
M->ty += M->b * X + M->d * Y;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ScaleMatrix(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y) {
|
|
||||||
M->a *= X;
|
|
||||||
M->b *= X;
|
|
||||||
M->c *= Y;
|
|
||||||
M->d *= Y;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MirrorMatrixInX(MATRIX_2D *M) {ScaleMatrix(M, -1, 1);}
|
|
||||||
void MirrorMatrixInY(MATRIX_2D *M) {ScaleMatrix(M, 1, -1);}
|
|
||||||
void MirrorMatrixInXY(MATRIX_2D *M) {ScaleMatrix(M, -1, -1);}
|
|
||||||
|
|
||||||
FLOAT32 MapX(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y) {
|
|
||||||
return M->a * (X) + (M)->c * (Y) + (M)->tx;
|
|
||||||
}
|
|
||||||
|
|
||||||
FLOAT32 MapY(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y) {
|
|
||||||
return M->b * X + M->d * Y + M->ty;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MapPoint(MATRIX_2D *M, const FPOINT &A, FPOINT* B) {
|
|
||||||
B->x = MapX(M, A.x, A.y);
|
|
||||||
B->y = MapY(M, A.x, A.y);
|
|
||||||
}
|
|
||||||
|
|
||||||
FLOAT32 MapDx(MATRIX_2D *M, FLOAT32 DX, FLOAT32 DY) {
|
|
||||||
return M->a * DX + M->c * DY;
|
|
||||||
}
|
|
||||||
|
|
||||||
FLOAT32 MapDy(MATRIX_2D *M, FLOAT32 DX, FLOAT32 DY) {
|
|
||||||
return M->b * DX + M->d * DY;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
|
||||||
void RotateMatrix(MATRIX_2D_PTR Matrix, FLOAT32 Angle) {
|
|
||||||
/*
|
|
||||||
** Parameters:
|
|
||||||
** Matrix transformation matrix to rotate
|
|
||||||
** Angle angle to rotate matrix
|
|
||||||
** Globals: none
|
|
||||||
** Operation:
|
|
||||||
** Rotate the coordinate system (as specified by Matrix) about
|
|
||||||
** its origin by Angle radians. In matrix notation the
|
|
||||||
** effect is as follows:
|
|
||||||
**
|
|
||||||
** Matrix = R X Matrix
|
|
||||||
**
|
|
||||||
** where R is the following matrix
|
|
||||||
**
|
|
||||||
** cos Angle sin Angle 0
|
|
||||||
** -sin Angle cos Angle 0
|
|
||||||
** 0 0 1
|
|
||||||
** Return: none
|
|
||||||
** Exceptions: none
|
|
||||||
** History: 7/27/89, DSJ, Create.
|
|
||||||
*/
|
|
||||||
FLOAT32 Cos, Sin;
|
|
||||||
FLOAT32 NewA, NewB;
|
|
||||||
|
|
||||||
Cos = cos ((double) Angle);
|
|
||||||
Sin = sin ((double) Angle);
|
|
||||||
|
|
||||||
NewA = Matrix->a * Cos + Matrix->c * Sin;
|
|
||||||
NewB = Matrix->b * Cos + Matrix->d * Sin;
|
|
||||||
Matrix->c = Matrix->a * -Sin + Matrix->c * Cos;
|
|
||||||
Matrix->d = Matrix->b * -Sin + Matrix->d * Cos;
|
|
||||||
Matrix->a = NewA;
|
|
||||||
Matrix->b = NewB;
|
|
||||||
|
|
||||||
} /* RotateMatrix */
|
|
@ -1,60 +0,0 @@
|
|||||||
/******************************************************************************
|
|
||||||
** Filename: xform2d.h
|
|
||||||
** Purpose: Definitions for using 2D point transformation library
|
|
||||||
** Author: Dan Johnson
|
|
||||||
** History: Fri Sep 22 09:57:08 1989, DSJ, Created.
|
|
||||||
**
|
|
||||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
** you may not use this file except in compliance with the License.
|
|
||||||
** You may obtain a copy of the License at
|
|
||||||
** http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
** Unless required by applicable law or agreed to in writing, software
|
|
||||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
** See the License for the specific language governing permissions and
|
|
||||||
** limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
#ifndef XFORM2D_H
|
|
||||||
#define XFORM2D_H
|
|
||||||
|
|
||||||
/**----------------------------------------------------------------------------
|
|
||||||
Include Files and Type Defines
|
|
||||||
----------------------------------------------------------------------------**/
|
|
||||||
#include "fpoint.h"
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
FLOAT32 a, b, c, d, tx, ty;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
MATRIX_2D, *MATRIX_2D_PTR;
|
|
||||||
|
|
||||||
/**----------------------------------------------------------------------------
|
|
||||||
Public Function Prototypes
|
|
||||||
----------------------------------------------------------------------------**/
|
|
||||||
|
|
||||||
void InitMatrix(MATRIX_2D *M);
|
|
||||||
void CopyMatrix(MATRIX_2D *A, MATRIX_2D *B);
|
|
||||||
|
|
||||||
/* matrix scaling, translation, rotation, mirroring, etc.*/
|
|
||||||
void TranslateMatrix(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y);
|
|
||||||
void ScaleMatrix(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y);
|
|
||||||
|
|
||||||
void MirrorMatrixInX(MATRIX_2D *M);
|
|
||||||
void MirrorMatrixInY(MATRIX_2D *M);
|
|
||||||
void MirrorMatrixInXY(MATRIX_2D *M);
|
|
||||||
|
|
||||||
/* using a matrix to map points*/
|
|
||||||
FLOAT32 MapX(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y);
|
|
||||||
|
|
||||||
FLOAT32 MapY(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y);
|
|
||||||
|
|
||||||
void MapPoint(MATRIX_2D *M, const FPOINT &A, FPOINT* B);
|
|
||||||
|
|
||||||
FLOAT32 MapDx(MATRIX_2D *M, FLOAT32 DX, FLOAT32 DY);
|
|
||||||
FLOAT32 MapDy(MATRIX_2D M, FLOAT32 DX, FLOAT32 DY);
|
|
||||||
|
|
||||||
void RotateMatrix(MATRIX_2D_PTR Matrix, FLOAT32 Angle);
|
|
||||||
#endif
|
|
@ -21,7 +21,6 @@
|
|||||||
#include "chop.h"
|
#include "chop.h"
|
||||||
#include "chopper.h"
|
#include "chopper.h"
|
||||||
#include "danerror.h"
|
#include "danerror.h"
|
||||||
#include "fxdefs.h"
|
|
||||||
#include "globals.h"
|
#include "globals.h"
|
||||||
#include "gradechop.h"
|
#include "gradechop.h"
|
||||||
#include "pageres.h"
|
#include "pageres.h"
|
||||||
@ -49,7 +48,6 @@ void Wordrec::program_editup(const char *textbase,
|
|||||||
bool init_dict) {
|
bool init_dict) {
|
||||||
if (textbase != NULL) imagefile = textbase;
|
if (textbase != NULL) imagefile = textbase;
|
||||||
InitFeatureDefs(&feature_defs_);
|
InitFeatureDefs(&feature_defs_);
|
||||||
SetupExtractors(&feature_defs_);
|
|
||||||
InitAdaptiveClassifier(init_classifier);
|
InitAdaptiveClassifier(init_classifier);
|
||||||
if (init_dict) getDict().Load(Dict::GlobalDawgCache());
|
if (init_dict) getDict().Load(Dict::GlobalDawgCache());
|
||||||
pass2_ok_split = chop_ok_split;
|
pass2_ok_split = chop_ok_split;
|
||||||
|
Loading…
Reference in New Issue
Block a user