mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
Fixed issue 1252: Refactored LearnBlob and its call hierarchy to make it a member of Classify.
Eliminated the flexfx scheme for calling global feature extractor functions through an array of function pointers. Deleted dead code I found as a by-product. This CL does not change BlobToTrainingSample or ExtractFeatures to be full members of Classify (the eventual goal) as that would make it even bigger, since there are a lot of callers to these functions. When ExtractFeatures and BlobToTrainingSample are members of Classify they will be able to access control parameters in Classify, which will greatly simplify developing variations to the feature extraction process.
This commit is contained in:
parent
e735a9017b
commit
53fc4456cc
@ -51,6 +51,7 @@
|
||||
#include "allheaders.h"
|
||||
|
||||
#include "baseapi.h"
|
||||
#include "blobclass.h"
|
||||
#include "resultiterator.h"
|
||||
#include "mutableiterator.h"
|
||||
#include "thresholder.h"
|
||||
@ -870,7 +871,9 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
|
||||
page_res_ = NULL;
|
||||
return -1;
|
||||
} else if (tesseract_->tessedit_train_from_boxes) {
|
||||
tesseract_->ApplyBoxTraining(*output_file_, page_res_);
|
||||
STRING fontname;
|
||||
ExtractFontName(*output_file_, &fontname);
|
||||
tesseract_->ApplyBoxTraining(fontname, page_res_);
|
||||
} else if (tesseract_->tessedit_ambigs_training) {
|
||||
FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
|
||||
// OCR the page segmented into words by tesseract.
|
||||
@ -1051,6 +1054,23 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Master ProcessPages calls ProcessPagesInternal and then does any post-
|
||||
// processing required due to being in a training mode.
|
||||
bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer* renderer) {
|
||||
bool result =
|
||||
ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
|
||||
if (result) {
|
||||
if (tesseract_->tessedit_train_from_boxes &&
|
||||
!tesseract_->WriteTRFile(*output_file_)) {
|
||||
tprintf("Write of TR file failed: %s\n", output_file_->string());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// In the ideal scenario, Tesseract will start working on data as soon
|
||||
// as it can. For example, if you steam a filelist through stdin, we
|
||||
// should start the OCR process as soon as the first filename is
|
||||
@ -1063,9 +1083,10 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
|
||||
// identify the scenario that really matters: filelists on
|
||||
// stdin. We'll still do our best if the user likes pipes. That means
|
||||
// piling up any data coming into stdin into a memory buffer.
|
||||
bool TessBaseAPI::ProcessPages(const char* filename,
|
||||
const char* retry_config, int timeout_millisec,
|
||||
TessResultRenderer* renderer) {
|
||||
bool TessBaseAPI::ProcessPagesInternal(const char* filename,
|
||||
const char* retry_config,
|
||||
int timeout_millisec,
|
||||
TessResultRenderer* renderer) {
|
||||
PERF_COUNT_START("ProcessPages")
|
||||
bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
|
||||
if (stdInput) {
|
||||
|
@ -538,9 +538,11 @@ class TESS_API TessBaseAPI {
|
||||
*
|
||||
* Returns true if successful, false on error.
|
||||
*/
|
||||
bool ProcessPages(const char* filename,
|
||||
const char* retry_config, int timeout_millisec,
|
||||
TessResultRenderer* renderer);
|
||||
bool ProcessPages(const char* filename, const char* retry_config,
|
||||
int timeout_millisec, TessResultRenderer* renderer);
|
||||
// Does the real work of ProcessPages.
|
||||
bool ProcessPagesInternal(const char* filename, const char* retry_config,
|
||||
int timeout_millisec, TessResultRenderer* renderer);
|
||||
|
||||
/**
|
||||
* Turn a single image into symbolic text.
|
||||
|
@ -775,13 +775,13 @@ void Tesseract::CorrectClassifyWords(PAGE_RES* page_res) {
|
||||
}
|
||||
|
||||
// Calls LearnWord to extract features for labelled blobs within each word.
|
||||
// Features are written to the given filename.
|
||||
void Tesseract::ApplyBoxTraining(const STRING& filename, PAGE_RES* page_res) {
|
||||
// Features are stored in an internal buffer.
|
||||
void Tesseract::ApplyBoxTraining(const STRING& fontname, PAGE_RES* page_res) {
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
int word_count = 0;
|
||||
for (WERD_RES *word_res = pr_it.word(); word_res != NULL;
|
||||
word_res = pr_it.forward()) {
|
||||
LearnWord(filename.string(), word_res);
|
||||
LearnWord(fontname.string(), word_res);
|
||||
++word_count;
|
||||
}
|
||||
tprintf("Generated training data for %d words\n", word_count);
|
||||
|
@ -220,17 +220,15 @@ void Classify::RefreshDebugWindow(ScrollView **win, const char *msg,
|
||||
|
||||
// Learns the given word using its chopped_word, seam_array, denorm,
|
||||
// box_word, best_state, and correct_text to learn both correctly and
|
||||
// incorrectly segmented blobs. If filename is not NULL, then LearnBlob
|
||||
// is called and the data will be written to a file for static training.
|
||||
// incorrectly segmented blobs. If fontname is not NULL, then LearnBlob
|
||||
// is called and the data will be saved in an internal buffer.
|
||||
// Otherwise AdaptToBlob is called for adaption within a document.
|
||||
// If rejmap is not NULL, then only chars with a rejmap entry of '1' will
|
||||
// be learned, otherwise all chars with good correct_text are learned.
|
||||
void Classify::LearnWord(const char* filename, WERD_RES *word) {
|
||||
void Classify::LearnWord(const char* fontname, WERD_RES* word) {
|
||||
int word_len = word->correct_text.size();
|
||||
if (word_len == 0) return;
|
||||
|
||||
float* thresholds = NULL;
|
||||
if (filename == NULL) {
|
||||
if (fontname == NULL) {
|
||||
// Adaption mode.
|
||||
if (!EnableLearning || word->best_choice == NULL)
|
||||
return; // Can't or won't adapt.
|
||||
@ -267,8 +265,8 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
|
||||
if (word->correct_text[ch].length() > 0) {
|
||||
float threshold = thresholds != NULL ? thresholds[ch] : 0.0f;
|
||||
|
||||
LearnPieces(filename, start_blob, word->best_state[ch],
|
||||
threshold, CST_WHOLE, word->correct_text[ch].string(), word);
|
||||
LearnPieces(fontname, start_blob, word->best_state[ch], threshold,
|
||||
CST_WHOLE, word->correct_text[ch].string(), word);
|
||||
|
||||
if (word->best_state[ch] > 1 && !disable_character_fragments) {
|
||||
// Check that the character breaks into meaningful fragments
|
||||
@ -301,8 +299,8 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
|
||||
if (i != tokens.size() - 1)
|
||||
full_string += ' ';
|
||||
}
|
||||
LearnPieces(filename, start_blob + frag, 1,
|
||||
threshold, CST_FRAGMENT, full_string.string(), word);
|
||||
LearnPieces(fontname, start_blob + frag, 1, threshold,
|
||||
CST_FRAGMENT, full_string.string(), word);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -314,13 +312,13 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
|
||||
if (word->best_state[ch] > 1) {
|
||||
// If the next blob is good, make junk with the rightmost fragment.
|
||||
if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
|
||||
LearnPieces(filename, start_blob + word->best_state[ch] - 1,
|
||||
LearnPieces(fontname, start_blob + word->best_state[ch] - 1,
|
||||
word->best_state[ch + 1] + 1,
|
||||
threshold, CST_IMPROPER, INVALID_UNICHAR, word);
|
||||
}
|
||||
// If the previous blob is good, make junk with the leftmost fragment.
|
||||
if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
|
||||
LearnPieces(filename, start_blob - word->best_state[ch - 1],
|
||||
LearnPieces(fontname, start_blob - word->best_state[ch - 1],
|
||||
word->best_state[ch - 1] + 1,
|
||||
threshold, CST_IMPROPER, INVALID_UNICHAR, word);
|
||||
}
|
||||
@ -329,7 +327,7 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
|
||||
if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
|
||||
STRING joined_text = word->correct_text[ch];
|
||||
joined_text += word->correct_text[ch + 1];
|
||||
LearnPieces(filename, start_blob,
|
||||
LearnPieces(fontname, start_blob,
|
||||
word->best_state[ch] + word->best_state[ch + 1],
|
||||
threshold, CST_NGRAM, joined_text.string(), word);
|
||||
}
|
||||
@ -342,16 +340,16 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
|
||||
|
||||
// Builds a blob of length fragments, from the word, starting at start,
|
||||
// and then learns it, as having the given correct_text.
|
||||
// If filename is not NULL, then LearnBlob
|
||||
// is called and the data will be written to a file for static training.
|
||||
// If fontname is not NULL, then LearnBlob is called and the data will be
|
||||
// saved in an internal buffer for static training.
|
||||
// Otherwise AdaptToBlob is called for adaption within a document.
|
||||
// threshold is a magic number required by AdaptToChar and generated by
|
||||
// ComputeAdaptionThresholds.
|
||||
// Although it can be partly inferred from the string, segmentation is
|
||||
// provided to explicitly clarify the character segmentation.
|
||||
void Classify::LearnPieces(const char* filename, int start, int length,
|
||||
void Classify::LearnPieces(const char* fontname, int start, int length,
|
||||
float threshold, CharSegmentationType segmentation,
|
||||
const char* correct_text, WERD_RES *word) {
|
||||
const char* correct_text, WERD_RES* word) {
|
||||
// TODO(daria) Remove/modify this if/when we want
|
||||
// to train and/or adapt to n-grams.
|
||||
if (segmentation != CST_WHOLE &&
|
||||
@ -385,7 +383,7 @@ void Classify::LearnPieces(const char* filename, int start, int length,
|
||||
}
|
||||
#endif // GRAPHICS_DISABLED
|
||||
|
||||
if (filename != NULL) {
|
||||
if (fontname != NULL) {
|
||||
classify_norm_method.set_value(character); // force char norm spc 30/11/93
|
||||
tess_bn_matching.set_value(false); // turn it off
|
||||
tess_cn_matching.set_value(false);
|
||||
@ -393,8 +391,7 @@ void Classify::LearnPieces(const char* filename, int start, int length,
|
||||
INT_FX_RESULT_STRUCT fx_info;
|
||||
SetupBLCNDenorms(*rotated_blob, classify_nonlinear_norm,
|
||||
&bl_denorm, &cn_denorm, &fx_info);
|
||||
LearnBlob(feature_defs_, filename, rotated_blob, bl_denorm, cn_denorm,
|
||||
fx_info, correct_text);
|
||||
LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
|
||||
} else if (unicharset.contains_unichar(correct_text)) {
|
||||
UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
|
||||
int font_id = word->fontinfo != NULL
|
||||
|
@ -20,63 +20,32 @@
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
#include "blobclass.h"
|
||||
#include "extract.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "classify.h"
|
||||
#include "efio.h"
|
||||
#include "featdefs.h"
|
||||
#include "callcpp.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
|
||||
#define MAXFILENAME 80
|
||||
#define MAXMATCHES 10
|
||||
#include "mf.h"
|
||||
#include "normfeat.h"
|
||||
|
||||
static const char kUnknownFontName[] = "UnknownFont";
|
||||
|
||||
STRING_VAR(classify_font_name, kUnknownFontName,
|
||||
"Default font name to be used in training");
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Global Data Definitions and Declarations
|
||||
----------------------------------------------------------------------------**/
|
||||
/* name of current image file being processed */
|
||||
extern char imagefile[];
|
||||
|
||||
namespace tesseract {
|
||||
/**----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// As all TBLOBs, Blob is in baseline normalized coords.
|
||||
// See SetupBLCNDenorms in intfx.cpp for other args.
|
||||
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename,
|
||||
TBLOB * Blob, const DENORM& bl_denorm, const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info, const char* BlobText) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Blob blob whose micro-features are to be learned
|
||||
** Row row of text that blob came from
|
||||
** BlobText text that corresponds to blob
|
||||
** TextLength number of characters in blob
|
||||
** Globals:
|
||||
** imagefile base filename of the page being learned
|
||||
** classify_font_name
|
||||
** name of font currently being trained on
|
||||
** Operation:
|
||||
** Extract micro-features from the specified blob and append
|
||||
** them to the appropriate file.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: 7/28/89, DSJ, Created.
|
||||
*/
|
||||
#define TRAIN_SUFFIX ".tr"
|
||||
static FILE *FeatureFile = NULL;
|
||||
STRING Filename(filename);
|
||||
|
||||
// If no fontname was set, try to extract it from the filename
|
||||
STRING CurrFontName = classify_font_name;
|
||||
if (CurrFontName == kUnknownFontName) {
|
||||
// Finds the name of the training font and returns it in fontname, by cutting
|
||||
// it out based on the expectation that the filename is of the form:
|
||||
// /path/to/dir/[lang].[fontname].exp[num]
|
||||
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||
// If the global parameter classify_font_name is set, its value is used instead.
|
||||
void ExtractFontName(const STRING& filename, STRING* fontname) {
|
||||
*fontname = classify_font_name;
|
||||
if (*fontname == kUnknownFontName) {
|
||||
// filename is expected to be of the form [lang].[fontname].exp[num]
|
||||
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||
const char *basename = strrchr(filename.string(), '/');
|
||||
@ -84,47 +53,56 @@ void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename,
|
||||
const char *lastdot = strrchr(filename.string(), '.');
|
||||
if (firstdot != lastdot && firstdot != NULL && lastdot != NULL) {
|
||||
++firstdot;
|
||||
CurrFontName = firstdot;
|
||||
CurrFontName[lastdot - firstdot] = '\0';
|
||||
*fontname = firstdot;
|
||||
fontname->truncate_at(lastdot - firstdot);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if a feature file is not yet open, open it
|
||||
// the name of the file is the name of the image plus TRAIN_SUFFIX
|
||||
if (FeatureFile == NULL) {
|
||||
Filename += TRAIN_SUFFIX;
|
||||
FeatureFile = Efopen(Filename.string(), "wb");
|
||||
cprintf("TRAINING ... Font name = %s\n", CurrFontName.string());
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Extracts features from the given blob and saves them in the tr_file_data_
|
||||
// member variable.
|
||||
// fontname: Name of font that this blob was printed in.
|
||||
// cn_denorm: Character normalization transformation to apply to the blob.
|
||||
// fx_info: Character normalization parameters computed with cn_denorm.
|
||||
// blob_text: Ground truth text for the blob.
|
||||
void Classify::LearnBlob(const STRING& fontname, TBLOB* blob,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info,
|
||||
const char* blob_text) {
|
||||
CHAR_DESC CharDesc = NewCharDescription(feature_defs_);
|
||||
CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm);
|
||||
CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info);
|
||||
CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info);
|
||||
CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info);
|
||||
|
||||
LearnBlob(FeatureDefs, FeatureFile, Blob, bl_denorm, cn_denorm, fx_info,
|
||||
BlobText, CurrFontName.string());
|
||||
} // LearnBlob
|
||||
|
||||
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* FeatureFile,
|
||||
TBLOB* Blob, const DENORM& bl_denorm, const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info,
|
||||
const char* BlobText, const char* FontName) {
|
||||
CHAR_DESC CharDesc;
|
||||
|
||||
ASSERT_HOST(FeatureFile != NULL);
|
||||
|
||||
CharDesc = ExtractBlobFeatures(FeatureDefs, bl_denorm, cn_denorm, fx_info,
|
||||
Blob);
|
||||
if (CharDesc == NULL) {
|
||||
cprintf("LearnBLob: CharDesc was NULL. Aborting.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (ValidCharDescription(FeatureDefs, CharDesc)) {
|
||||
// label the features with a class name and font name
|
||||
fprintf(FeatureFile, "\n%s %s\n", FontName, BlobText);
|
||||
if (ValidCharDescription(feature_defs_, CharDesc)) {
|
||||
// Label the features with a class name and font name.
|
||||
tr_file_data_ += "\n";
|
||||
tr_file_data_ += fontname;
|
||||
tr_file_data_ += " ";
|
||||
tr_file_data_ += blob_text;
|
||||
tr_file_data_ += "\n";
|
||||
|
||||
// write micro-features to file and clean up
|
||||
WriteCharDescription(FeatureDefs, FeatureFile, CharDesc);
|
||||
WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_);
|
||||
} else {
|
||||
tprintf("Blob learned was invalid!\n");
|
||||
}
|
||||
FreeCharDescription(CharDesc);
|
||||
|
||||
} // LearnBlob
|
||||
|
||||
// Writes stored training data to a .tr file based on the given filename.
|
||||
// Returns false on error.
|
||||
bool Classify::WriteTRFile(const STRING& filename) {
|
||||
STRING tr_filename = filename + ".tr";
|
||||
FILE* fp = Efopen(tr_filename.string(), "wb");
|
||||
int len = tr_file_data_.length();
|
||||
bool result =
|
||||
fwrite(&tr_file_data_[0], sizeof(tr_file_data_[0]), len, fp) == len;
|
||||
fclose(fp);
|
||||
tr_file_data_.truncate_at(0);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -21,9 +21,7 @@
|
||||
/**----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
#include "featdefs.h"
|
||||
#include "oldlist.h"
|
||||
#include "blobs.h"
|
||||
#include "strngs.h"
|
||||
|
||||
/*---------------------------------------------------------------------------
|
||||
Macros
|
||||
@ -39,18 +37,14 @@
|
||||
/**----------------------------------------------------------------------------
|
||||
Public Function Prototypes
|
||||
----------------------------------------------------------------------------**/
|
||||
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename,
|
||||
TBLOB * Blob, const DENORM& bl_denorm, const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info,
|
||||
const char* BlobText);
|
||||
namespace tesseract {
|
||||
// Finds the name of the training font and returns it in fontname, by cutting
|
||||
// it out based on the expectation that the filename is of the form:
|
||||
// /path/to/dir/[lang].[fontname].exp[num]
|
||||
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||
// If the global parameter classify_font_name is set, its value is used instead.
|
||||
void ExtractFontName(const STRING& filename, STRING* fontname);
|
||||
|
||||
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* File, TBLOB* Blob,
|
||||
const DENORM& bl_denorm, const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info,
|
||||
const char* BlobText, const char* FontName);
|
||||
} // namespace tesseract.
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Global Data Definitions and Declarations
|
||||
----------------------------------------------------------------------------**/
|
||||
/*parameter used to turn on/off output of recognized chars to the screen */
|
||||
#endif
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "dict.h"
|
||||
#include "featdefs.h"
|
||||
#include "fontinfo.h"
|
||||
#include "imagedata.h"
|
||||
#include "intfx.h"
|
||||
#include "intmatcher.h"
|
||||
#include "normalis.h"
|
||||
@ -119,25 +120,25 @@ class Classify : public CCStruct {
|
||||
const UNICHARSET& target_unicharset);
|
||||
/* adaptmatch.cpp ***********************************************************/
|
||||
|
||||
// Learn the given word using its chopped_word, seam_array, denorm,
|
||||
// Learns the given word using its chopped_word, seam_array, denorm,
|
||||
// box_word, best_state, and correct_text to learn both correctly and
|
||||
// incorrectly segmented blobs. If filename is not NULL, then LearnBlob
|
||||
// is called and the data will be written to a file for static training.
|
||||
// incorrectly segmented blobs. If fontname is not NULL, then LearnBlob
|
||||
// is called and the data will be saved in an internal buffer.
|
||||
// Otherwise AdaptToBlob is called for adaption within a document.
|
||||
void LearnWord(const char* filename, WERD_RES *word);
|
||||
void LearnWord(const char* fontname, WERD_RES* word);
|
||||
|
||||
// Builds a blob of length fragments, from the word, starting at start,
|
||||
// and then learn it, as having the given correct_text.
|
||||
// If filename is not NULL, then LearnBlob
|
||||
// is called and the data will be written to a file for static training.
|
||||
// and then learns it, as having the given correct_text.
|
||||
// If fontname is not NULL, then LearnBlob is called and the data will be
|
||||
// saved in an internal buffer for static training.
|
||||
// Otherwise AdaptToBlob is called for adaption within a document.
|
||||
// threshold is a magic number required by AdaptToChar and generated by
|
||||
// GetAdaptThresholds.
|
||||
// ComputeAdaptionThresholds.
|
||||
// Although it can be partly inferred from the string, segmentation is
|
||||
// provided to explicitly clarify the character segmentation.
|
||||
void LearnPieces(const char* filename, int start, int length,
|
||||
float threshold, CharSegmentationType segmentation,
|
||||
const char* correct_text, WERD_RES *word);
|
||||
void LearnPieces(const char* fontname, int start, int length, float threshold,
|
||||
CharSegmentationType segmentation, const char* correct_text,
|
||||
WERD_RES* word);
|
||||
void InitAdaptiveClassifier(bool load_pre_trained_templates);
|
||||
void InitAdaptedClass(TBLOB *Blob,
|
||||
CLASS_ID ClassId,
|
||||
@ -361,7 +362,22 @@ class Classify : public CCStruct {
|
||||
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob);
|
||||
/* picofeat.cpp ***********************************************************/
|
||||
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob);
|
||||
|
||||
FEATURE_SET ExtractIntCNFeatures(const TBLOB& blob,
|
||||
const INT_FX_RESULT_STRUCT& fx_info);
|
||||
FEATURE_SET ExtractIntGeoFeatures(const TBLOB& blob,
|
||||
const INT_FX_RESULT_STRUCT& fx_info);
|
||||
/* blobclass.cpp ***********************************************************/
|
||||
// Extracts features from the given blob and saves them in the tr_file_data_
|
||||
// member variable.
|
||||
// fontname: Name of font that this blob was printed in.
|
||||
// cn_denorm: Character normalization transformation to apply to the blob.
|
||||
// fx_info: Character normalization parameters computed with cn_denorm.
|
||||
// blob_text: Ground truth text for the blob.
|
||||
void LearnBlob(const STRING& fontname, TBLOB* Blob, const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info, const char* blob_text);
|
||||
// Writes stored training data to a .tr file based on the given filename.
|
||||
// Returns false on error.
|
||||
bool WriteTRFile(const STRING& filename);
|
||||
|
||||
// Member variables.
|
||||
|
||||
@ -498,6 +514,9 @@ class Classify : public CCStruct {
|
||||
/* variables used to hold performance statistics */
|
||||
int NumAdaptationsFailed;
|
||||
|
||||
// Training data gathered here for all the images in a document.
|
||||
STRING tr_file_data_;
|
||||
|
||||
// Expected number of features in the class pruner, used to penalize
|
||||
// unknowns that have too few features (like a c being classified as e) so
|
||||
// it doesn't recognize everything as '@' or '#'.
|
||||
|
@ -1,32 +0,0 @@
|
||||
#ifndef EXTERN_H
|
||||
#define EXTERN_H
|
||||
|
||||
/* -*-C-*-
|
||||
********************************************************************************
|
||||
*
|
||||
* File: extern.h (Formerly extern.h)
|
||||
* Description: External definitions for C or C++
|
||||
* Author: Mark Seaman, OCR Technology
|
||||
* Created: Tue Mar 20 14:01:22 1990
|
||||
* Modified: Tue Mar 20 14:02:09 1990 (Mark Seaman) marks@hpgrlt
|
||||
* Language: C
|
||||
* Package: N/A
|
||||
* Status: Experimental (Do Not Distribute)
|
||||
*
|
||||
* (c) Copyright 1990, Hewlett-Packard Company.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#define EXTERN extern
|
||||
|
||||
#endif
|
@ -1,74 +0,0 @@
|
||||
/******************************************************************************
|
||||
** Filename: extract.c
|
||||
** Purpose: Generic high level feature extractor routines.
|
||||
** Author: Dan Johnson
|
||||
** History: Sun Jan 21 09:44:08 1990, DSJ, Created.
|
||||
**
|
||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/*-----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
-----------------------------------------------------------------------------*/
|
||||
#include "extract.h"
|
||||
#include "flexfx.h"
|
||||
#include "danerror.h"
|
||||
|
||||
typedef CHAR_FEATURES (*CF_FUNC) ();
|
||||
|
||||
/*-----------------------------------------------------------------------------
|
||||
Private Function Prototypes
|
||||
-----------------------------------------------------------------------------*/
|
||||
void ExtractorStub();
|
||||
|
||||
/*-----------------------------------------------------------------------------
|
||||
Public Code
|
||||
-----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* Extract features from Blob by calling the feature
|
||||
* extractor which is currently being used. This routine
|
||||
* simply provides a high level interface to feature
|
||||
* extraction. The caller can extract any type of features
|
||||
* from a blob without understanding any lower level details.
|
||||
*
|
||||
* @param FeatureDefs definitions of feature types/extractors
|
||||
* @param denorm Normalize/denormalize to access original image
|
||||
* @param Blob blob to extract features from
|
||||
*
|
||||
* @return The character features extracted from Blob.
|
||||
* @note Exceptions: none
|
||||
* @note History: Sun Jan 21 10:07:28 1990, DSJ, Created.
|
||||
*/
|
||||
CHAR_DESC ExtractBlobFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||
const DENORM& bl_denorm, const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info,
|
||||
TBLOB *Blob) {
|
||||
return ExtractFlexFeatures(FeatureDefs, Blob, bl_denorm, cn_denorm, fx_info);
|
||||
} /* ExtractBlobFeatures */
|
||||
|
||||
/*-----------------------------------------------------------------------------
|
||||
Private Code
|
||||
-----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void
|
||||
ExtractorStub ()
|
||||
/**
|
||||
* This routine is used to stub out feature extractors
|
||||
* that are no longer used. It simply calls DoError.
|
||||
*
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Jan 2 14:16:49 1991, DSJ, Created.
|
||||
*/
|
||||
#define DUMMY_ERROR 1
|
||||
{
|
||||
DoError (DUMMY_ERROR, "Selected feature extractor has been stubbed out!");
|
||||
} /* ExtractorStub */
|
@ -1,40 +0,0 @@
|
||||
/******************************************************************************
|
||||
** Filename: extract.h
|
||||
** Purpose: Interface to high level generic feature extraction.
|
||||
** Author: Dan Johnson
|
||||
** History: 1/21/90, DSJ, Created.
|
||||
**
|
||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
#ifndef EXTRACT_H
|
||||
#define EXTRACT_H
|
||||
|
||||
#include "featdefs.h"
|
||||
#include <stdio.h>
|
||||
|
||||
class DENORM;
|
||||
|
||||
/*-----------------------------------------------------------------------------
|
||||
Public Function Prototypes
|
||||
-----------------------------------------------------------------------------*/
|
||||
// Deprecated! Will be deleted soon!
|
||||
// In the meantime, as all TBLOBs, Blob is in baseline normalized coords.
|
||||
// See SetupBLCNDenorms in intfx.cpp for other args.
|
||||
CHAR_DESC ExtractBlobFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||
const DENORM& bl_denorm, const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info, TBLOB *Blob);
|
||||
|
||||
/*---------------------------------------------------------------------------
|
||||
Private Function Prototypes
|
||||
----------------------------------------------------------------------------*/
|
||||
void ExtractorStub();
|
||||
#endif
|
@ -178,7 +178,7 @@ CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs) {
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
* Write a textual representation of CharDesc to File.
|
||||
* Appends a textual representation of CharDesc to str.
|
||||
* The format used is to write out the number of feature
|
||||
* sets which will be written followed by a representation of
|
||||
* each feature set.
|
||||
@ -187,18 +187,15 @@ CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs) {
|
||||
* by a description of the feature set. Feature sets which are
|
||||
* not present are not written.
|
||||
*
|
||||
* Globals:
|
||||
* - none
|
||||
*
|
||||
* @param FeatureDefs definitions of feature types/extractors
|
||||
* @param File open text file to write CharDesc to
|
||||
* @param CharDesc character description to write to File
|
||||
* @param str string to append CharDesc to
|
||||
* @param CharDesc character description to write to File
|
||||
*
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed May 23 17:21:18 1990, DSJ, Created.
|
||||
*/
|
||||
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||
FILE *File, CHAR_DESC CharDesc) {
|
||||
void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs,
|
||||
CHAR_DESC CharDesc, STRING* str) {
|
||||
int Type;
|
||||
int NumSetsToWrite = 0;
|
||||
|
||||
@ -206,11 +203,14 @@ void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||
if (CharDesc->FeatureSets[Type])
|
||||
NumSetsToWrite++;
|
||||
|
||||
fprintf (File, " %d\n", NumSetsToWrite);
|
||||
for (Type = 0; Type < CharDesc->NumFeatureSets; Type++)
|
||||
if (CharDesc->FeatureSets[Type]) {
|
||||
fprintf (File, "%s ", (FeatureDefs.FeatureDesc[Type])->ShortName);
|
||||
WriteFeatureSet (File, CharDesc->FeatureSets[Type]);
|
||||
str->add_str_int(" ", NumSetsToWrite);
|
||||
*str += "\n";
|
||||
for (Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
|
||||
if (CharDesc->FeatureSets[Type]) {
|
||||
*str += FeatureDefs.FeatureDesc[Type]->ShortName;
|
||||
*str += " ";
|
||||
WriteFeatureSet(CharDesc->FeatureSets[Type], str);
|
||||
}
|
||||
}
|
||||
} /* WriteCharDescription */
|
||||
|
||||
@ -231,6 +231,8 @@ bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||
anything_written = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return anything_written && well_formed;
|
||||
|
@ -48,7 +48,6 @@ typedef CHAR_DESC_STRUCT *CHAR_DESC;
|
||||
struct FEATURE_DEFS_STRUCT {
|
||||
inT32 NumFeatureTypes;
|
||||
const FEATURE_DESC_STRUCT* FeatureDesc[NUM_FEATURE_TYPES];
|
||||
const FEATURE_EXT_STRUCT* FeatureExtractors[NUM_FEATURE_TYPES];
|
||||
int FeatureEnabled[NUM_FEATURE_TYPES];
|
||||
};
|
||||
typedef FEATURE_DEFS_STRUCT *FEATURE_DEFS;
|
||||
@ -65,8 +64,8 @@ CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs);
|
||||
bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||
CHAR_DESC CharDesc);
|
||||
|
||||
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||
FILE *File, CHAR_DESC CharDesc);
|
||||
void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs,
|
||||
CHAR_DESC CharDesc, STRING* str);
|
||||
|
||||
CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||
FILE *File);
|
||||
|
@ -1,72 +0,0 @@
|
||||
/******************************************************************************
|
||||
** Filename: flexfx.c
|
||||
** Purpose: Interface to flexible feature extractor.
|
||||
** Author: Dan Johnson
|
||||
** History: Wed May 23 13:45:10 1990, DSJ, Created.
|
||||
**
|
||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
#include "flexfx.h"
|
||||
#include "featdefs.h"
|
||||
#include "emalloc.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Deprecated! Will be deleted soon!
|
||||
// In the meantime, as all TBLOBs, Blob is in baseline normalized coords.
|
||||
// See SetupBLCNDenorms in intfx.cpp for other args.
|
||||
CHAR_DESC ExtractFlexFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||
TBLOB *Blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Blob blob to extract features from
|
||||
** denorm control parameter for feature extractor
|
||||
** Globals: none
|
||||
** Operation: Allocate a new character descriptor and fill it in by
|
||||
** calling all feature extractors which are enabled.
|
||||
** Return: Structure containing features extracted from Blob.
|
||||
** Exceptions: none
|
||||
** History: Wed May 23 13:46:22 1990, DSJ, Created.
|
||||
*/
|
||||
int Type;
|
||||
CHAR_DESC CharDesc;
|
||||
|
||||
CharDesc = NewCharDescription(FeatureDefs);
|
||||
|
||||
for (Type = 0; Type < CharDesc->NumFeatureSets; Type++)
|
||||
if (FeatureDefs.FeatureExtractors[Type] != NULL &&
|
||||
FeatureDefs.FeatureExtractors[Type]->Extractor != NULL) {
|
||||
CharDesc->FeatureSets[Type] =
|
||||
(FeatureDefs.FeatureExtractors[Type])->Extractor(Blob,
|
||||
bl_denorm,
|
||||
cn_denorm,
|
||||
fx_info);
|
||||
if (CharDesc->FeatureSets[Type] == NULL) {
|
||||
tprintf("Feature extractor for type %d = %s returned NULL!\n",
|
||||
Type, FeatureDefs.FeatureDesc[Type]->ShortName);
|
||||
FreeCharDescription(CharDesc);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return (CharDesc);
|
||||
|
||||
} /* ExtractFlexFeatures */
|
@ -1,36 +0,0 @@
|
||||
/******************************************************************************
|
||||
** Filename: flexfx.h
|
||||
** Purpose: Interface to flexible feature extractor.
|
||||
** Author: Dan Johnson
|
||||
** History: Wed May 23 13:36:58 1990, DSJ, Created.
|
||||
**
|
||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
#ifndef FLEXFX_H
|
||||
#define FLEXFX_H
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
#include "featdefs.h"
|
||||
#include <stdio.h>
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Public Function Prototypes
|
||||
----------------------------------------------------------------------------**/
|
||||
// As with all TBLOBs this one is also baseline normalized.
|
||||
CHAR_DESC ExtractFlexFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||
TBLOB *Blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info);
|
||||
|
||||
#endif
|
@ -1,45 +0,0 @@
|
||||
/******************************************************************************
|
||||
** Filename: fxdefs.c
|
||||
** Purpose: Utility functions to be used by feature extractors.
|
||||
** Author: Dan Johnson
|
||||
** History: Sun Jan 21 15:29:02 1990, DSJ, Created.
|
||||
**
|
||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
#include "fxdefs.h"
|
||||
#include "featdefs.h"
|
||||
#include "mf.h"
|
||||
#include "outfeat.h"
|
||||
#include "picofeat.h"
|
||||
#include "normfeat.h"
|
||||
|
||||
/*-----------------------------------------------------------------------------
|
||||
Global Data Definitions and Declarations
|
||||
-----------------------------------------------------------------------------*/
|
||||
// Definitions of extractors separated from feature definitions.
|
||||
const FEATURE_EXT_STRUCT MicroFeatureExt = { ExtractMicros };
|
||||
const FEATURE_EXT_STRUCT CharNormExt = { ExtractCharNormFeatures };
|
||||
const FEATURE_EXT_STRUCT IntFeatExt = { ExtractIntCNFeatures };
|
||||
const FEATURE_EXT_STRUCT GeoFeatExt = { ExtractIntGeoFeatures };
|
||||
|
||||
// MUST be kept in-sync with DescDefs in featdefs.cpp.
|
||||
const FEATURE_EXT_STRUCT* ExtractorDefs[NUM_FEATURE_TYPES] = {
|
||||
&MicroFeatureExt,
|
||||
&CharNormExt,
|
||||
&IntFeatExt,
|
||||
&GeoFeatExt
|
||||
};
|
||||
|
||||
void SetupExtractors(FEATURE_DEFS_STRUCT *FeatureDefs) {
|
||||
for (int i = 0; i < NUM_FEATURE_TYPES; ++i)
|
||||
FeatureDefs->FeatureExtractors[i] = ExtractorDefs[i];
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
/******************************************************************************
|
||||
** Filename: fxdefs.h
|
||||
** Purpose: Generic interface definitions for feature extractors
|
||||
** Author: Dan Johnson
|
||||
** History: Fri Jan 19 09:04:14 1990, DSJ, Created.
|
||||
**
|
||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
#ifndef FXDEFS_H
|
||||
#define FXDEFS_H
|
||||
|
||||
#include "featdefs.h"
|
||||
|
||||
void SetupExtractors(FEATURE_DEFS_STRUCT *FeatureDefs);
|
||||
|
||||
#endif
|
@ -75,9 +75,9 @@ namespace tesseract {
|
||||
|
||||
// Generates a TrainingSample from a TBLOB. Extracts features and sets
|
||||
// the bounding box, so classifiers that operate on the image can work.
|
||||
// TODO(rays) BlobToTrainingSample must remain a global function until
|
||||
// the FlexFx and FeatureDescription code can be removed and LearnBlob
|
||||
// made a member of Classify.
|
||||
// TODO(rays) Make BlobToTrainingSample a member of Classify now that
|
||||
// the FlexFx and FeatureDescription code have been removed and LearnBlob
|
||||
// is now a member of Classify.
|
||||
TrainingSample* BlobToTrainingSample(
|
||||
const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
|
||||
GenericVector<INT_FEATURE_STRUCT>* bl_features) {
|
||||
|
@ -33,9 +33,7 @@
|
||||
Private Code
|
||||
----------------------------------------------------------------------------**/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Blob blob to extract micro-features from
|
||||
@ -54,8 +52,7 @@ FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& bl_denorm,
|
||||
FEATURE Feature;
|
||||
MICROFEATURE OldFeature;
|
||||
|
||||
OldFeatures = (MICROFEATURES)BlobMicroFeatures(Blob, bl_denorm, cn_denorm,
|
||||
fx_info);
|
||||
OldFeatures = BlobMicroFeatures(Blob, cn_denorm);
|
||||
if (OldFeatures == NULL)
|
||||
return NULL;
|
||||
NumFeatures = count (OldFeatures);
|
||||
|
@ -34,8 +34,6 @@ typedef float MicroFeature[MFCount];
|
||||
/*----------------------------------------------------------------------------
|
||||
Private Function Prototypes
|
||||
-----------------------------------------------------------------------------*/
|
||||
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info);
|
||||
FEATURE_SET ExtractMicros(TBLOB* Blob, const DENORM& cn_denorm);
|
||||
|
||||
#endif
|
||||
|
@ -23,7 +23,6 @@
|
||||
----------------------------------------------------------------------------**/
|
||||
#include "oldlist.h"
|
||||
#include "matchdefs.h"
|
||||
#include "xform2d.h"
|
||||
|
||||
/* definition of a list of micro-features */
|
||||
typedef LIST MICROFEATURES;
|
||||
|
@ -59,9 +59,7 @@ MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End);
|
||||
----------------------------------------------------------------------------**/
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Blob blob to extract micro-features from
|
||||
@ -98,7 +96,7 @@ CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& bl_denorm,
|
||||
}
|
||||
FreeOutlines(Outlines);
|
||||
}
|
||||
return ((CHAR_FEATURES) MicroFeatures);
|
||||
return MicroFeatures;
|
||||
} /* BlobMicroFeatures */
|
||||
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
/**----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
#include "mfdefs.h"
|
||||
#include "params.h"
|
||||
/**----------------------------------------------------------------------------
|
||||
Variables
|
||||
@ -35,8 +36,6 @@ extern double_VAR_H(classify_max_slope, 2.414213562,
|
||||
/**----------------------------------------------------------------------------
|
||||
Public Function Prototypes
|
||||
----------------------------------------------------------------------------**/
|
||||
CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info);
|
||||
MICROFEATURES BlobMicroFeatures(TBLOB* Blob, const DENORM& cn_denorm);
|
||||
|
||||
#endif
|
||||
|
@ -59,9 +59,7 @@ FLOAT32 ActualOutlineLength(FEATURE Feature) {
|
||||
// the x center of the grapheme's bounding box.
|
||||
// English: [0.011, 0.31]
|
||||
//
|
||||
FEATURE_SET ExtractCharNormFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
FEATURE_SET feature_set = NewFeatureSet(1);
|
||||
FEATURE feature = NewFeature(&CharNormDesc);
|
||||
|
||||
|
@ -34,8 +34,6 @@ typedef enum {
|
||||
----------------------------------------------------------------------------**/
|
||||
FLOAT32 ActualOutlineLength(FEATURE Feature);
|
||||
|
||||
FEATURE_SET ExtractCharNormFeatures(TBLOB *Blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info);
|
||||
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT& fx_info);
|
||||
|
||||
#endif
|
||||
|
@ -209,55 +209,52 @@ FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void WriteFeature(FILE *File, FEATURE Feature) {
|
||||
/*
|
||||
** Parameters:
|
||||
** File open text file to write Feature to
|
||||
** Feature feature to write out to File
|
||||
** Globals: none
|
||||
** Operation: Write a textual representation of Feature to File.
|
||||
** This representation is simply a list of the N parameters
|
||||
** of the feature, terminated with a newline. It is assumed
|
||||
** that the ExtraPenalty field can be reconstructed from the
|
||||
** parameters of the feature. It is also assumed that the
|
||||
** feature type information is specified or assumed elsewhere.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Wed May 23 09:28:18 1990, DSJ, Created.
|
||||
** Parameters:
|
||||
** Feature: feature to write out to str
|
||||
** str: string to write Feature to
|
||||
** Operation: Appends a textual representation of Feature to str.
|
||||
** This representation is simply a list of the N parameters
|
||||
** of the feature, terminated with a newline. It is assumed
|
||||
** that the ExtraPenalty field can be reconstructed from the
|
||||
** parameters of the feature. It is also assumed that the
|
||||
** feature type information is specified or assumed elsewhere.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Wed May 23 09:28:18 1990, DSJ, Created.
|
||||
*/
|
||||
int i;
|
||||
|
||||
for (i = 0; i < Feature->Type->NumParams; i++) {
|
||||
void WriteFeature(FEATURE Feature, STRING* str) {
|
||||
for (int i = 0; i < Feature->Type->NumParams; i++) {
|
||||
#ifndef WIN32
|
||||
assert(!isnan(Feature->Params[i]));
|
||||
#endif
|
||||
fprintf(File, " %g", Feature->Params[i]);
|
||||
str->add_str_double(" ", Feature->Params[i]);
|
||||
}
|
||||
fprintf(File, "\n");
|
||||
*str += "\n";
|
||||
} /* WriteFeature */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void WriteFeatureSet(FILE *File, FEATURE_SET FeatureSet) {
|
||||
/*
|
||||
** Parameters:
|
||||
** File open text file to write FeatureSet to
|
||||
** FeatureSet feature set to write to File
|
||||
** Globals: none
|
||||
** Operation: Write a textual representation of FeatureSet to File.
|
||||
** This representation is an integer specifying the number of
|
||||
** features in the set, followed by a newline, followed by
|
||||
** text representations for each feature in the set.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Wed May 23 10:06:03 1990, DSJ, Created.
|
||||
** Parameters:
|
||||
** FeatureSet: feature set to write to File
|
||||
** str: string to write Feature to
|
||||
** Globals: none
|
||||
** Operation: Write a textual representation of FeatureSet to File.
|
||||
** This representation is an integer specifying the number of
|
||||
** features in the set, followed by a newline, followed by
|
||||
** text representations for each feature in the set.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Wed May 23 10:06:03 1990, DSJ, Created.
|
||||
*/
|
||||
int i;
|
||||
|
||||
void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) {
|
||||
if (FeatureSet) {
|
||||
fprintf (File, "%d\n", FeatureSet->NumFeatures);
|
||||
for (i = 0; i < FeatureSet->NumFeatures; i++)
|
||||
WriteFeature (File, FeatureSet->Features[i]);
|
||||
str->add_str_int("", FeatureSet->NumFeatures);
|
||||
*str += "\n";
|
||||
for (int i = 0; i < FeatureSet->NumFeatures; i++) {
|
||||
WriteFeature(FeatureSet->Features[i], str);
|
||||
}
|
||||
}
|
||||
} /* WriteFeatureSet */
|
||||
|
||||
|
@ -79,13 +79,6 @@ typedef FEATURE_SET_STRUCT *FEATURE_SET;
|
||||
// classifier does not need to know the details of this data structure.
|
||||
typedef char *CHAR_FEATURES;
|
||||
|
||||
typedef FEATURE_SET (*FX_FUNC)(TBLOB *, const DENORM&, const DENORM&,
|
||||
const INT_FX_RESULT_STRUCT&);
|
||||
|
||||
struct FEATURE_EXT_STRUCT {
|
||||
FX_FUNC Extractor; // func to extract features
|
||||
};
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
Macros for defining the parameters of a new features
|
||||
----------------------------------------------------------------------*/
|
||||
|
@ -223,10 +223,10 @@ void NormalizePicoX(FEATURE_SET FeatureSet) {
|
||||
}
|
||||
} /* NormalizePicoX */
|
||||
|
||||
namespace tesseract {
|
||||
/*---------------------------------------------------------------------------*/
|
||||
FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
FEATURE_SET Classify::ExtractIntCNFeatures(
|
||||
const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
/*
|
||||
** Parameters:
|
||||
** blob blob to extract features from
|
||||
@ -237,9 +237,8 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
||||
*/
|
||||
INT_FX_RESULT_STRUCT local_fx_info(fx_info);
|
||||
GenericVector<INT_FEATURE_STRUCT> bl_features;
|
||||
tesseract::TrainingSample* sample =
|
||||
tesseract::BlobToTrainingSample(*blob, false, &local_fx_info,
|
||||
&bl_features);
|
||||
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
|
||||
blob, false, &local_fx_info, &bl_features);
|
||||
if (sample == NULL) return NULL;
|
||||
|
||||
int num_features = sample->num_features();
|
||||
@ -259,9 +258,8 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
||||
} /* ExtractIntCNFeatures */
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
FEATURE_SET Classify::ExtractIntGeoFeatures(
|
||||
const TBLOB& blob, const INT_FX_RESULT_STRUCT& fx_info) {
|
||||
/*
|
||||
** Parameters:
|
||||
** blob blob to extract features from
|
||||
@ -272,9 +270,8 @@ FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
||||
*/
|
||||
INT_FX_RESULT_STRUCT local_fx_info(fx_info);
|
||||
GenericVector<INT_FEATURE_STRUCT> bl_features;
|
||||
tesseract::TrainingSample* sample =
|
||||
tesseract::BlobToTrainingSample(*blob, false, &local_fx_info,
|
||||
&bl_features);
|
||||
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
|
||||
blob, false, &local_fx_info, &bl_features);
|
||||
if (sample == NULL) return NULL;
|
||||
|
||||
FEATURE_SET feature_set = NewFeatureSet(1);
|
||||
@ -288,3 +285,5 @@ FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm,
|
||||
|
||||
return feature_set;
|
||||
} /* ExtractIntGeoFeatures */
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -58,13 +58,6 @@ extern double_VAR_H(classify_pico_feature_length, 0.05, "Pico Feature Length");
|
||||
----------------------------------------------------------------------------**/
|
||||
#define GetPicoFeatureLength() (PicoFeatureLength)
|
||||
|
||||
FEATURE_SET ExtractIntCNFeatures(TBLOB *Blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info);
|
||||
FEATURE_SET ExtractIntGeoFeatures(TBLOB *Blob, const DENORM& bl_denorm,
|
||||
const DENORM& cn_denorm,
|
||||
const INT_FX_RESULT_STRUCT& fx_info);
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Global Data Definitions and Declarations
|
||||
----------------------------------------------------------------------------**/
|
||||
|
@ -1,120 +0,0 @@
|
||||
/******************************************************************************
|
||||
** Filename: xform2d.c
|
||||
** Purpose: Library routines for performing 2D point transformations
|
||||
** Author: Dan Johnson
|
||||
** History: Fri Sep 22 09:54:17 1989, DSJ, Created.
|
||||
**
|
||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
/**----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
#include "xform2d.h"
|
||||
#include <math.h>
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
|
||||
void InitMatrix(MATRIX_2D *M) {
|
||||
M->a = 1;
|
||||
M->b = 0;
|
||||
M->c = 0;
|
||||
M->d = 1;
|
||||
M->tx = 0;
|
||||
M->ty = 0;
|
||||
}
|
||||
|
||||
void CopyMatrix(MATRIX_2D *A, MATRIX_2D *B) {
|
||||
B->a = A->a;
|
||||
B->b = A->b;
|
||||
B->c = A->c;
|
||||
B->d = A->d;
|
||||
B->tx = A->tx;
|
||||
B->ty = A->ty;
|
||||
}
|
||||
|
||||
void TranslateMatrix(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y) {
|
||||
M->tx += M->a * X + M->c * Y;
|
||||
M->ty += M->b * X + M->d * Y;
|
||||
}
|
||||
|
||||
void ScaleMatrix(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y) {
|
||||
M->a *= X;
|
||||
M->b *= X;
|
||||
M->c *= Y;
|
||||
M->d *= Y;
|
||||
}
|
||||
|
||||
void MirrorMatrixInX(MATRIX_2D *M) {ScaleMatrix(M, -1, 1);}
|
||||
void MirrorMatrixInY(MATRIX_2D *M) {ScaleMatrix(M, 1, -1);}
|
||||
void MirrorMatrixInXY(MATRIX_2D *M) {ScaleMatrix(M, -1, -1);}
|
||||
|
||||
FLOAT32 MapX(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y) {
|
||||
return M->a * (X) + (M)->c * (Y) + (M)->tx;
|
||||
}
|
||||
|
||||
FLOAT32 MapY(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y) {
|
||||
return M->b * X + M->d * Y + M->ty;
|
||||
}
|
||||
|
||||
void MapPoint(MATRIX_2D *M, const FPOINT &A, FPOINT* B) {
|
||||
B->x = MapX(M, A.x, A.y);
|
||||
B->y = MapY(M, A.x, A.y);
|
||||
}
|
||||
|
||||
FLOAT32 MapDx(MATRIX_2D *M, FLOAT32 DX, FLOAT32 DY) {
|
||||
return M->a * DX + M->c * DY;
|
||||
}
|
||||
|
||||
FLOAT32 MapDy(MATRIX_2D *M, FLOAT32 DX, FLOAT32 DY) {
|
||||
return M->b * DX + M->d * DY;
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void RotateMatrix(MATRIX_2D_PTR Matrix, FLOAT32 Angle) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Matrix transformation matrix to rotate
|
||||
** Angle angle to rotate matrix
|
||||
** Globals: none
|
||||
** Operation:
|
||||
** Rotate the coordinate system (as specified by Matrix) about
|
||||
** its origin by Angle radians. In matrix notation the
|
||||
** effect is as follows:
|
||||
**
|
||||
** Matrix = R X Matrix
|
||||
**
|
||||
** where R is the following matrix
|
||||
**
|
||||
** cos Angle sin Angle 0
|
||||
** -sin Angle cos Angle 0
|
||||
** 0 0 1
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: 7/27/89, DSJ, Create.
|
||||
*/
|
||||
FLOAT32 Cos, Sin;
|
||||
FLOAT32 NewA, NewB;
|
||||
|
||||
Cos = cos ((double) Angle);
|
||||
Sin = sin ((double) Angle);
|
||||
|
||||
NewA = Matrix->a * Cos + Matrix->c * Sin;
|
||||
NewB = Matrix->b * Cos + Matrix->d * Sin;
|
||||
Matrix->c = Matrix->a * -Sin + Matrix->c * Cos;
|
||||
Matrix->d = Matrix->b * -Sin + Matrix->d * Cos;
|
||||
Matrix->a = NewA;
|
||||
Matrix->b = NewB;
|
||||
|
||||
} /* RotateMatrix */
|
@ -1,60 +0,0 @@
|
||||
/******************************************************************************
|
||||
** Filename: xform2d.h
|
||||
** Purpose: Definitions for using 2D point transformation library
|
||||
** Author: Dan Johnson
|
||||
** History: Fri Sep 22 09:57:08 1989, DSJ, Created.
|
||||
**
|
||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
#ifndef XFORM2D_H
|
||||
#define XFORM2D_H
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
#include "fpoint.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
FLOAT32 a, b, c, d, tx, ty;
|
||||
}
|
||||
|
||||
|
||||
MATRIX_2D, *MATRIX_2D_PTR;
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Public Function Prototypes
|
||||
----------------------------------------------------------------------------**/
|
||||
|
||||
void InitMatrix(MATRIX_2D *M);
|
||||
void CopyMatrix(MATRIX_2D *A, MATRIX_2D *B);
|
||||
|
||||
/* matrix scaling, translation, rotation, mirroring, etc.*/
|
||||
void TranslateMatrix(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y);
|
||||
void ScaleMatrix(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y);
|
||||
|
||||
void MirrorMatrixInX(MATRIX_2D *M);
|
||||
void MirrorMatrixInY(MATRIX_2D *M);
|
||||
void MirrorMatrixInXY(MATRIX_2D *M);
|
||||
|
||||
/* using a matrix to map points*/
|
||||
FLOAT32 MapX(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y);
|
||||
|
||||
FLOAT32 MapY(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y);
|
||||
|
||||
void MapPoint(MATRIX_2D *M, const FPOINT &A, FPOINT* B);
|
||||
|
||||
FLOAT32 MapDx(MATRIX_2D *M, FLOAT32 DX, FLOAT32 DY);
|
||||
FLOAT32 MapDy(MATRIX_2D M, FLOAT32 DX, FLOAT32 DY);
|
||||
|
||||
void RotateMatrix(MATRIX_2D_PTR Matrix, FLOAT32 Angle);
|
||||
#endif
|
@ -21,7 +21,6 @@
|
||||
#include "chop.h"
|
||||
#include "chopper.h"
|
||||
#include "danerror.h"
|
||||
#include "fxdefs.h"
|
||||
#include "globals.h"
|
||||
#include "gradechop.h"
|
||||
#include "pageres.h"
|
||||
@ -49,7 +48,6 @@ void Wordrec::program_editup(const char *textbase,
|
||||
bool init_dict) {
|
||||
if (textbase != NULL) imagefile = textbase;
|
||||
InitFeatureDefs(&feature_defs_);
|
||||
SetupExtractors(&feature_defs_);
|
||||
InitAdaptiveClassifier(init_classifier);
|
||||
if (init_dict) getDict().Load(Dict::GlobalDawgCache());
|
||||
pass2_ok_split = chop_ok_split;
|
||||
|
Loading…
Reference in New Issue
Block a user