Delete cube code

This commit is contained in:
Ray Smith 2016-12-14 11:00:43 -08:00
parent 432684dd6e
commit 5c3839bdb4
84 changed files with 0 additions and 14952 deletions

View File

@ -1,440 +0,0 @@
/******************************************************************
* File: cube_control.cpp
* Description: Tesseract class methods for invoking cube convolutional
* neural network word recognizer.
* Author: Raquel Romano
* Created: September 2009
*
* (C) Copyright 2009, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
**********************************************************************/
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif
#include "allheaders.h"
#include "cube_object.h"
#include "cube_reco_context.h"
#include "tesseractclass.h"
#include "tesseract_cube_combiner.h"
namespace tesseract {
/**
* @name convert_prob_to_tess_certainty
*
* Normalize a probability in the range [0.0, 1.0] to a tesseract
* certainty in the range [-20.0, 0.0]
*/
static float convert_prob_to_tess_certainty(float prob) {
return (prob - 1.0) * 20.0;
}
/**
* @name char_box_to_tbox
*
* Create a TBOX from a character bounding box. If nonzero, the
* x_offset accounts for any additional padding of the word box that
* should be taken into account.
*
*/
TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
l_int32 left;
l_int32 top;
l_int32 width;
l_int32 height;
l_int32 right;
l_int32 bottom;
boxGetGeometry(char_box, &left, &top, &width, &height);
left += word_box.left() - x_offset;
right = left + width;
top = word_box.bottom() + word_box.height() - top;
bottom = top - height;
return TBOX(left, bottom, right, top);
}
/**
* @name extract_cube_state
*
* Extract CharSamp objects and character bounding boxes from the
* CubeObject's state. The caller should free both structres.
*
*/
bool Tesseract::extract_cube_state(CubeObject* cube_obj,
int* num_chars,
Boxa** char_boxes,
CharSamp*** char_samples) {
if (!cube_obj) {
if (cube_debug_level > 0) {
tprintf("Cube WARNING (extract_cube_state): Invalid cube object "
"passed to extract_cube_state\n");
}
return false;
}
// Note that the CubeObject accessors return either the deslanted or
// regular objects search object or beam search object, whichever
// was used in the last call to Recognize()
CubeSearchObject* cube_search_obj = cube_obj->SrchObj();
if (!cube_search_obj) {
if (cube_debug_level > 0) {
tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
"cube's search object in extract_cube_state.\n");
}
return false;
}
BeamSearch *beam_search_obj = cube_obj->BeamObj();
if (!beam_search_obj) {
if (cube_debug_level > 0) {
tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
"cube's beam search object in extract_cube_state.\n");
}
return false;
}
// Get the character samples and bounding boxes by backtracking
// through the beam search path
int best_node_index = beam_search_obj->BestPresortedNodeIndex();
*char_samples = beam_search_obj->BackTrack(
cube_search_obj, best_node_index, num_chars, NULL, char_boxes);
if (!*char_samples)
return false;
return true;
}
/**
* @name create_cube_box_word
*
* Fill the given BoxWord with boxes from character bounding
* boxes. The char_boxes have local coordinates w.r.t. the
* word bounding box, i.e., the left-most character bbox of each word
* has (0,0) left-top coord, but the BoxWord must be defined in page
* coordinates.
*/
bool Tesseract::create_cube_box_word(Boxa *char_boxes,
int num_chars,
TBOX word_box,
BoxWord* box_word) {
if (!box_word) {
if (cube_debug_level > 0) {
tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n");
}
return false;
}
// Find the x-coordinate of left-most char_box, which could be
// nonzero if the word image was padded before recognition took place.
int x_offset = -1;
for (int i = 0; i < num_chars; ++i) {
Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
if (x_offset < 0 || char_box->x < x_offset) {
x_offset = char_box->x;
}
boxDestroy(&char_box);
}
for (int i = 0; i < num_chars; ++i) {
Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset);
boxDestroy(&char_box);
box_word->InsertBox(i, tbox);
}
return true;
}
/**
* @name init_cube_objects
*
* Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
* Returns false if cube context could not be created or if load_combiner is
* true, but the combiner could not be loaded.
*/
bool Tesseract::init_cube_objects(bool load_combiner,
TessdataManager *tessdata_manager) {
ASSERT_HOST(cube_cntxt_ == NULL);
ASSERT_HOST(tess_cube_combiner_ == NULL);
// Create the cube context object
cube_cntxt_ = CubeRecoContext::Create(this, tessdata_manager, &unicharset);
if (cube_cntxt_ == NULL) {
if (cube_debug_level > 0) {
tprintf("Cube WARNING (Tesseract::init_cube_objects()): Failed to "
"instantiate CubeRecoContext\n");
}
return false;
}
// Create the combiner object and load the combiner net for target languages.
if (load_combiner) {
tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_);
if (!tess_cube_combiner_->LoadCombinerNet()) {
delete cube_cntxt_;
cube_cntxt_ = NULL;
delete tess_cube_combiner_;
tess_cube_combiner_ = NULL;
if (cube_debug_level > 0)
tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n");
return false;
}
}
return true;
}
/**
* @name run_cube_combiner
*
* Iterates through tesseract's results and calls cube on each word,
* combining the results with the existing tesseract result.
*/
void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
if (page_res == NULL || tess_cube_combiner_ == NULL)
return;
PAGE_RES_IT page_res_it(page_res);
// Iterate through the word results and call cube on each word.
for (page_res_it.restart_page(); page_res_it.word () != NULL;
page_res_it.forward()) {
BLOCK* block = page_res_it.block()->block;
if (block->poly_block() != NULL && !block->poly_block()->IsText())
continue; // Don't deal with non-text blocks.
WERD_RES* word = page_res_it.word();
// Skip cube entirely if tesseract's certainty is greater than threshold.
int combiner_run_thresh = convert_prob_to_tess_certainty(
cube_cntxt_->Params()->CombinerRunThresh());
if (word->best_choice->certainty() >= combiner_run_thresh) {
continue;
}
// Use the same language as Tesseract used for the word.
Tesseract* lang_tess = word->tesseract;
// Setup a trial WERD_RES in which to classify with cube.
WERD_RES cube_word;
cube_word.InitForRetryRecognition(*word);
cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
OEM_CUBE_ONLY,
NULL, false, false, false,
page_res_it.row()->row,
page_res_it.block()->block);
CubeObject *cube_obj = lang_tess->cube_recognize_word(
page_res_it.block()->block, &cube_word);
if (cube_obj != NULL)
lang_tess->cube_combine_word(cube_obj, &cube_word, word);
delete cube_obj;
}
}
/**
* @name cube_word_pass1
*
* Recognizes a single word using (only) cube. Compatible with
* Tesseract's classify_word_pass1/classify_word_pass2.
*/
void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
CubeObject *cube_obj = cube_recognize_word(block, word);
delete cube_obj;
}
/**
* @name cube_recognize_word
*
* Cube recognizer to recognize a single word as with classify_word_pass1
* but also returns the cube object in case the combiner is needed.
*/
CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) {
if (!cube_binary_ || !cube_cntxt_) {
if (cube_debug_level > 0 && !cube_binary_)
tprintf("Tesseract::run_cube(): NULL binary image.\n");
word->SetupFake(unicharset);
return NULL;
}
TBOX word_box = word->word->bounding_box();
if (block != NULL && (block->re_rotation().x() != 1.0f ||
block->re_rotation().y() != 0.0f)) {
// TODO(rays) We have to rotate the bounding box to get the true coords.
// This will be achieved in the future via DENORM.
// In the mean time, cube can't process this word.
if (cube_debug_level > 0) {
tprintf("Cube can't process rotated word at:");
word_box.print();
}
word->SetupFake(unicharset);
return NULL;
}
CubeObject* cube_obj = new tesseract::CubeObject(
cube_cntxt_, cube_binary_, word_box.left(),
pixGetHeight(cube_binary_) - word_box.top(),
word_box.width(), word_box.height());
if (!cube_recognize(cube_obj, block, word)) {
delete cube_obj;
return NULL;
}
return cube_obj;
}
/**
* @name cube_combine_word
*
* Combines the cube and tesseract results for a single word, leaving the
* result in tess_word.
*/
void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
WERD_RES* tess_word) {
float combiner_prob = tess_cube_combiner_->CombineResults(tess_word,
cube_obj);
// If combiner probability is greater than tess/cube combiner
// classifier threshold, i.e. tesseract wins, then just return the
// tesseract result unchanged, as the combiner knows nothing about how
// correct the answer is. If cube and tesseract agree, then improve the
// scores before returning.
WERD_CHOICE* tess_best = tess_word->best_choice;
WERD_CHOICE* cube_best = cube_word->best_choice;
if (cube_debug_level || classify_debug_level) {
tprintf("Combiner prob = %g vs threshold %g\n",
combiner_prob, cube_cntxt_->Params()->CombinerClassifierThresh());
}
if (combiner_prob >=
cube_cntxt_->Params()->CombinerClassifierThresh()) {
if (tess_best->unichar_string() == cube_best->unichar_string()) {
// Cube and tess agree, so improve the scores.
tess_best->set_rating(tess_best->rating() / 2);
tess_best->set_certainty(tess_best->certainty() / 2);
}
return;
}
// Cube wins.
// It is better for the language combiner to have all tesseract scores,
// so put them in the cube result.
cube_best->set_rating(tess_best->rating());
cube_best->set_certainty(tess_best->certainty());
if (cube_debug_level || classify_debug_level) {
tprintf("Cube INFO: tesseract result replaced by cube: %s -> %s\n",
tess_best->unichar_string().string(),
cube_best->unichar_string().string());
}
tess_word->ConsumeWordResults(cube_word);
}
/**
* @name cube_recognize
*
* Call cube on the current word, and write the result to word.
* Sets up a fake result and returns false if something goes wrong.
*/
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
WERD_RES *word) {
// Run cube
WordAltList *cube_alt_list = cube_obj->RecognizeWord();
if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
if (cube_debug_level > 0) {
tprintf("Cube returned nothing for word at:");
word->word->bounding_box().print();
}
word->SetupFake(unicharset);
return false;
}
// Get cube's best result and its probability, mapped to tesseract's
// certainty range
char_32 *cube_best_32 = cube_alt_list->Alt(0);
double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
string cube_best_str;
CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);
// Retrieve Cube's character bounding boxes and CharSamples,
// corresponding to the most recent call to RecognizeWord().
Boxa *char_boxes = NULL;
CharSamp **char_samples = NULL;;
int num_chars;
if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
&& cube_debug_level > 0) {
tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
"cube state.\n");
word->SetupFake(unicharset);
return false;
}
// Convert cube's character bounding boxes to a BoxWord.
BoxWord cube_box_word;
TBOX tess_word_box = word->word->bounding_box();
if (word->denorm.block() != NULL)
tess_word_box.rotate(word->denorm.block()->re_rotation());
bool box_word_success = create_cube_box_word(char_boxes, num_chars,
tess_word_box,
&cube_box_word);
boxaDestroy(&char_boxes);
if (!box_word_success) {
if (cube_debug_level > 0) {
tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
"create cube BoxWord\n");
}
word->SetupFake(unicharset);
return false;
}
// Fill tesseract result's fields with cube results
fill_werd_res(cube_box_word, cube_best_str.c_str(), word);
// Create cube's best choice.
BLOB_CHOICE** choices = new BLOB_CHOICE*[num_chars];
for (int i = 0; i < num_chars; ++i) {
UNICHAR_ID uch_id =
cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
-1, 0.0f, 0.0f, 0.0f, BCC_STATIC_CLASSIFIER);
}
word->FakeClassifyWord(num_chars, choices);
// within a word, cube recognizes the word in reading order.
word->best_choice->set_unichars_in_script_order(true);
delete [] choices;
delete [] char_samples;
// Some sanity checks
ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
if (cube_debug_level || classify_debug_level) {
tprintf("Cube result: %s r=%g, c=%g\n",
word->best_choice->unichar_string().string(),
word->best_choice->rating(),
word->best_choice->certainty());
}
return true;
}
/**
* @name fill_werd_res
*
* Fill Tesseract's word result fields with cube's.
*
*/
void Tesseract::fill_werd_res(const BoxWord& cube_box_word,
const char* cube_best_str,
WERD_RES* tess_werd_res) {
delete tess_werd_res->box_word;
tess_werd_res->box_word = new BoxWord(cube_box_word);
tess_werd_res->box_word->ClipToOriginalWord(tess_werd_res->denorm.block(),
tess_werd_res->word);
// Fill text and remaining fields
tess_werd_res->word->set_text(cube_best_str);
tess_werd_res->tess_failed = FALSE;
tess_werd_res->tess_accepted = tess_acceptable_word(tess_werd_res);
// There is no output word, so we can' call AdaptableWord, but then I don't
// think we need to. Fudge the result with accepted.
tess_werd_res->tess_would_adapt = tess_werd_res->tess_accepted;
// Set word to done, i.e., ignore all of tesseract's tests for rejection
tess_werd_res->done = tess_werd_res->tess_accepted;
}
} // namespace tesseract

View File

@ -1,184 +0,0 @@
/**********************************************************************
* File: cube_reco_context.cpp
* Description: Implementation of the Cube Recognition Context Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <string>
#include <limits.h>
#include "cube_reco_context.h"
#include "classifier_factory.h"
#include "cube_tuning_params.h"
#include "dict.h"
#include "feature_bmp.h"
#include "tessdatamanager.h"
#include "tesseractclass.h"
#include "tess_lang_model.h"
namespace tesseract {
/**
* Instantiate a CubeRecoContext object using a Tesseract object.
* CubeRecoContext will not take ownership of tess_obj, but will
* record the pointer to it and will make use of various Tesseract
* components (language model, flags, etc). Thus the caller should
* keep tess_obj alive so long as the instantiated CubeRecoContext is used.
*/
CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) {
tess_obj_ = tess_obj;
lang_ = "";
loaded_ = false;
lang_mod_ = NULL;
params_ = NULL;
char_classifier_ = NULL;
char_set_ = NULL;
word_size_model_ = NULL;
char_bigrams_ = NULL;
word_unigrams_ = NULL;
noisy_input_ = false;
size_normalization_ = false;
}
CubeRecoContext::~CubeRecoContext() {
delete char_classifier_;
char_classifier_ = NULL;
delete word_size_model_;
word_size_model_ = NULL;
delete char_set_;
char_set_ = NULL;
delete char_bigrams_;
char_bigrams_ = NULL;
delete word_unigrams_;
word_unigrams_ = NULL;
delete lang_mod_;
lang_mod_ = NULL;
delete params_;
params_ = NULL;
}
/**
* Returns the path of the data files by looking up the TESSDATA_PREFIX
* environment variable and appending a "tessdata" directory to it
*/
bool CubeRecoContext::GetDataFilePath(string *path) const {
*path = tess_obj_->datadir.string();
return true;
}
/**
* The object initialization function that loads all the necessary
* components of a RecoContext. TessdataManager is used to load the
* data from [lang].traineddata file. If TESSDATA_CUBE_UNICHARSET
* component is present, Cube will be instantiated with the unicharset
* specified in this component and the corresponding dictionary
* (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to
* Tesseract's. Otherwise, TessdataManager will assume that Cube will
* be using Tesseract's unicharset and dawgs, and will load the
* unicharset from the TESSDATA_UNICHARSET component and will load the
* dawgs from TESSDATA_*_DAWG components.
*/
bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset) {
ASSERT_HOST(tess_obj_ != NULL);
tess_unicharset_ = tess_unicharset;
string data_file_path;
// Get the data file path.
if (GetDataFilePath(&data_file_path) == false) {
fprintf(stderr, "Unable to get data file path\n");
return false;
}
// Get the language from the Tesseract object.
lang_ = tess_obj_->lang.string();
// Create the char set.
if ((char_set_ =
CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) {
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
"CharSet\n");
return false;
}
// Create the language model.
string lm_file_name = data_file_path + lang_ + ".cube.lm";
string lm_params;
if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) {
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube "
"language model params from %s\n", lm_file_name.c_str());
return false;
}
lang_mod_ = new TessLangModel(lm_params, data_file_path,
tess_obj_->getDict().load_system_dawg,
tessdata_manager, this);
// Create the optional char bigrams object.
char_bigrams_ = CharBigrams::Create(data_file_path, lang_);
// Create the optional word unigrams object.
word_unigrams_ = WordUnigrams::Create(data_file_path, lang_);
// Create the optional size model.
word_size_model_ = WordSizeModel::Create(data_file_path, lang_,
char_set_, Contextual());
// Load tuning params.
params_ = CubeTuningParams::Create(data_file_path, lang_);
if (params_ == NULL) {
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read "
"CubeTuningParams from %s\n", data_file_path.c_str());
return false;
}
// Create the char classifier.
char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_,
lang_mod_, char_set_,
params_);
if (char_classifier_ == NULL) {
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
"CharClassifierFactory object from %s\n", data_file_path.c_str());
return false;
}
loaded_ = true;
return true;
}
/** Creates a CubeRecoContext object using a tesseract object */
CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj,
TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset) {
// create the object
CubeRecoContext *cntxt = new CubeRecoContext(tess_obj);
// load the necessary components
if (cntxt->Load(tessdata_manager, tess_unicharset) == false) {
fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init "
"CubeRecoContext object\n");
delete cntxt;
return NULL;
}
// success
return cntxt;
}
} // tesseract}

View File

@ -1,157 +0,0 @@
/**********************************************************************
* File: cube_reco_context.h
* Description: Declaration of the Cube Recognition Context Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
// (or a thread) would create one CubeRecoContext object per language.
// The CubeRecoContext object also provides methods to get and set the
// different attribues of the Cube OCR Engine.
#ifndef CUBE_RECO_CONTEXT_H
#define CUBE_RECO_CONTEXT_H
#include <string>
#include "neural_net.h"
#include "lang_model.h"
#include "classifier_base.h"
#include "feature_base.h"
#include "char_set.h"
#include "word_size_model.h"
#include "char_bigrams.h"
#include "word_unigrams.h"
namespace tesseract {
class Tesseract;
class TessdataManager;
class CubeRecoContext {
public:
// Reading order enum type
enum ReadOrder {
L2R,
R2L
};
// Instantiate using a Tesseract object
CubeRecoContext(Tesseract *tess_obj);
~CubeRecoContext();
// accessor functions
inline const string & Lang() const { return lang_; }
inline CharSet *CharacterSet() const { return char_set_; }
const UNICHARSET *TessUnicharset() const { return tess_unicharset_; }
inline CharClassifier *Classifier() const { return char_classifier_; }
inline WordSizeModel *SizeModel() const { return word_size_model_; }
inline CharBigrams *Bigrams() const { return char_bigrams_; }
inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
inline TuningParams *Params() const { return params_; }
inline LangModel *LangMod() const { return lang_mod_; }
// the reading order of the language
inline ReadOrder ReadingOrder() const {
return ((lang_ == "ara") ? R2L : L2R);
}
// does the language support case
inline bool HasCase() const {
return (lang_ != "ara" && lang_ != "hin");
}
inline bool Cursive() const {
return (lang_ == "ara");
}
inline bool HasItalics() const {
return (lang_ != "ara" && lang_ != "hin");
}
inline bool Contextual() const {
return (lang_ == "ara");
}
// RecoContext runtime flags accessor functions
inline bool SizeNormalization() const { return size_normalization_; }
inline bool NoisyInput() const { return noisy_input_; }
inline bool OOD() const { return lang_mod_->OOD(); }
inline bool Numeric() const { return lang_mod_->Numeric(); }
inline bool WordList() const { return lang_mod_->WordList(); }
inline bool Punc() const { return lang_mod_->Punc(); }
inline bool CaseSensitive() const {
return char_classifier_->CaseSensitive();
}
inline void SetSizeNormalization(bool size_normalization) {
size_normalization_ = size_normalization;
}
inline void SetNoisyInput(bool noisy_input) {
noisy_input_ = noisy_input;
}
inline void SetOOD(bool ood_enabled) {
lang_mod_->SetOOD(ood_enabled);
}
inline void SetNumeric(bool numeric_enabled) {
lang_mod_->SetNumeric(numeric_enabled);
}
inline void SetWordList(bool word_list_enabled) {
lang_mod_->SetWordList(word_list_enabled);
}
inline void SetPunc(bool punc_enabled) {
lang_mod_->SetPunc(punc_enabled);
}
inline void SetCaseSensitive(bool case_sensitive) {
char_classifier_->SetCaseSensitive(case_sensitive);
}
inline tesseract::Tesseract *TesseractObject() const {
return tess_obj_;
}
// Returns the path of the data files
bool GetDataFilePath(string *path) const;
// Creates a CubeRecoContext object using a tesseract object. Data
// files are loaded via the tessdata_manager, and the tesseract
// unicharset is provided in order to map Cube's unicharset to
// Tesseract's in the case where the two unicharsets differ.
static CubeRecoContext *Create(Tesseract *tess_obj,
TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset);
private:
bool loaded_;
string lang_;
CharSet *char_set_;
UNICHARSET *tess_unicharset_;
WordSizeModel *word_size_model_;
CharClassifier *char_classifier_;
CharBigrams *char_bigrams_;
WordUnigrams *word_unigrams_;
TuningParams *params_;
LangModel *lang_mod_;
Tesseract *tess_obj_; // CubeRecoContext does not own this pointer
bool size_normalization_;
bool noisy_input_;
// Loads and initialized all the necessary components of a
// CubeRecoContext. See .cpp for more details.
bool Load(TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset);
};
}
#endif // CUBE_RECO_CONTEXT_H

View File

@ -1,134 +0,0 @@
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
///////////////////////////////////////////////////////////////////////
// File: cubeclassifier.cpp
// Description: Cube implementation of a ShapeClassifier.
// Author: Ray Smith
// Created: Wed Nov 23 10:39:45 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "cubeclassifier.h"
#include "char_altlist.h"
#include "char_set.h"
#include "cube_object.h"
#include "cube_reco_context.h"
#include "tessclassifier.h"
#include "tesseractclass.h"
#include "trainingsample.h"
#include "unicharset.h"
namespace tesseract {
CubeClassifier::CubeClassifier(tesseract::Tesseract* tesseract)
: cube_cntxt_(tesseract->GetCubeRecoContext()),
shape_table_(*tesseract->shape_table()) {
}
CubeClassifier::~CubeClassifier() {
}
/// Classifies the given [training] sample, writing to results.
/// See ShapeClassifier for a full description.
int CubeClassifier::UnicharClassifySample(
const TrainingSample& sample, Pix* page_pix, int debug,
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
results->clear();
if (page_pix == NULL) return 0;
ASSERT_HOST(cube_cntxt_ != NULL);
const TBOX& char_box = sample.bounding_box();
CubeObject* cube_obj = new tesseract::CubeObject(
cube_cntxt_, page_pix, char_box.left(),
pixGetHeight(page_pix) - char_box.top(),
char_box.width(), char_box.height());
CharAltList* alt_list = cube_obj->RecognizeChar();
if (alt_list != NULL) {
alt_list->Sort();
CharSet* char_set = cube_cntxt_->CharacterSet();
for (int i = 0; i < alt_list->AltCount(); ++i) {
// Convert cube representation to a shape_id.
int alt_id = alt_list->Alt(i);
int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
if (unichar_id >= 0)
results->push_back(UnicharRating(unichar_id, alt_list->AltProb(i)));
}
delete alt_list;
}
delete cube_obj;
return results->size();
}
/** Provides access to the ShapeTable that this classifier works with. */
const ShapeTable* CubeClassifier::GetShapeTable() const {
return &shape_table_;
}
CubeTessClassifier::CubeTessClassifier(tesseract::Tesseract* tesseract)
: cube_cntxt_(tesseract->GetCubeRecoContext()),
shape_table_(*tesseract->shape_table()),
pruner_(new TessClassifier(true, tesseract)) {
}
CubeTessClassifier::~CubeTessClassifier() {
delete pruner_;
}
/// Classifies the given [training] sample, writing to results.
/// See ShapeClassifier for a full description.
int CubeTessClassifier::UnicharClassifySample(
const TrainingSample& sample, Pix* page_pix, int debug,
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
int num_results = pruner_->UnicharClassifySample(sample, page_pix, debug,
keep_this, results);
if (page_pix == NULL) return num_results;
ASSERT_HOST(cube_cntxt_ != NULL);
const TBOX& char_box = sample.bounding_box();
CubeObject* cube_obj = new tesseract::CubeObject(
cube_cntxt_, page_pix, char_box.left(),
pixGetHeight(page_pix) - char_box.top(),
char_box.width(), char_box.height());
CharAltList* alt_list = cube_obj->RecognizeChar();
CharSet* char_set = cube_cntxt_->CharacterSet();
if (alt_list != NULL) {
for (int r = 0; r < num_results; ++r) {
// Get the best cube probability of the unichar in the result.
double best_prob = 0.0;
for (int i = 0; i < alt_list->AltCount(); ++i) {
int alt_id = alt_list->Alt(i);
int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
if (unichar_id == (*results)[r].unichar_id &&
alt_list->AltProb(i) > best_prob) {
best_prob = alt_list->AltProb(i);
}
}
(*results)[r].rating = best_prob;
}
delete alt_list;
// Re-sort by rating.
results->sort(&UnicharRating::SortDescendingRating);
}
delete cube_obj;
return results->size();
}
/** Provides access to the ShapeTable that this classifier works with. */
const ShapeTable* CubeTessClassifier::GetShapeTable() const {
return &shape_table_;
}
} // namespace tesseract

View File

@ -1,80 +0,0 @@
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
///////////////////////////////////////////////////////////////////////
// File: cubeclassifier.h
// Description: Cube implementation of a ShapeClassifier.
// Author: Ray Smith
// Created: Wed Nov 23 10:36:32 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
#define THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
#include "shapeclassifier.h"
namespace tesseract {
class Classify;
class CubeRecoContext;
class ShapeTable;
class TessClassifier;
class Tesseract;
class TrainingSample;
struct UnicharRating;
// Cube implementation of a ShapeClassifier.
class CubeClassifier : public ShapeClassifier {
public:
explicit CubeClassifier(Tesseract* tesseract);
virtual ~CubeClassifier();
// Classifies the given [training] sample, writing to results.
// See ShapeClassifier for a full description.
virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
int debug, UNICHAR_ID keep_this,
GenericVector<UnicharRating>* results);
// Provides access to the ShapeTable that this classifier works with.
virtual const ShapeTable* GetShapeTable() const;
private:
// Cube objects.
CubeRecoContext* cube_cntxt_;
const ShapeTable& shape_table_;
};
// Combination of Tesseract class pruner with scoring by cube.
class CubeTessClassifier : public ShapeClassifier {
public:
explicit CubeTessClassifier(Tesseract* tesseract);
virtual ~CubeTessClassifier();
// Classifies the given [training] sample, writing to results.
// See ShapeClassifier for a full description.
virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
int debug, UNICHAR_ID keep_this,
GenericVector<UnicharRating>* results);
// Provides access to the ShapeTable that this classifier works with.
virtual const ShapeTable* GetShapeTable() const;
private:
// Cube objects.
CubeRecoContext* cube_cntxt_;
const ShapeTable& shape_table_;
TessClassifier* pruner_;
};
} // namespace tesseract
#endif /* THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_ */

View File

@ -1,55 +0,0 @@
AM_CPPFLAGS += \
-DUSE_STD_NAMESPACE \
-I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \
-I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \
-I$(top_srcdir)/ccmain -I$(top_srcdir)/classify \
-I$(top_srcdir)/textord -I$(top_srcdir)/wordrec \
-I$(top_srcdir)/neural_networks/runtime \
-I$(top_srcdir)/viewer
if VISIBILITY
AM_CPPFLAGS += -DTESS_EXPORTS \
-fvisibility=hidden -fvisibility-inlines-hidden
endif
noinst_HEADERS = \
altlist.h beam_search.h bmp_8.h cached_file.h \
char_altlist.h char_bigrams.h char_samp.h char_samp_enum.h \
char_samp_set.h char_set.h classifier_base.h classifier_factory.h \
con_comp.h cube_const.h conv_net_classifier.h cube_line_object.h \
cube_line_segmenter.h cube_object.h cube_search_object.h \
cube_tuning_params.h cube_utils.h feature_base.h feature_bmp.h \
feature_chebyshev.h feature_hybrid.h hybrid_neural_net_classifier.h \
lang_mod_edge.h lang_model.h search_column.h search_node.h \
search_object.h string_32.h tess_lang_mod_edge.h tess_lang_model.h \
tuning_params.h word_altlist.h word_list_lang_model.h word_size_model.h \
word_unigrams.h
if !USING_MULTIPLELIBS
noinst_LTLIBRARIES = libtesseract_cube.la
else
lib_LTLIBRARIES = libtesseract_cube.la
libtesseract_cube_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
libtesseract_cube_la_LIBADD = \
../ccstruct/libtesseract_ccstruct.la \
../ccutil/libtesseract_ccutil.la \
../neural_networks/runtime/libtesseract_neural.la \
../viewer/libtesseract_viewer.la \
../wordrec/libtesseract_wordrec.la \
../cutil/libtesseract_cutil.la \
../classify/libtesseract_classify.la \
../dict/libtesseract_dict.la
endif
libtesseract_cube_la_SOURCES = \
altlist.cpp beam_search.cpp bmp_8.cpp cached_file.cpp \
char_altlist.cpp char_bigrams.cpp char_samp.cpp char_samp_enum.cpp \
char_samp_set.cpp char_set.cpp classifier_factory.cpp \
con_comp.cpp conv_net_classifier.cpp cube_line_object.cpp \
cube_line_segmenter.cpp cube_object.cpp cube_search_object.cpp \
cube_tuning_params.cpp cube_utils.cpp feature_bmp.cpp \
feature_chebyshev.cpp feature_hybrid.cpp hybrid_neural_net_classifier.cpp \
search_column.cpp search_node.cpp \
tess_lang_mod_edge.cpp tess_lang_model.cpp \
word_altlist.cpp word_list_lang_model.cpp word_size_model.cpp \
word_unigrams.cpp

View File

@ -1,60 +0,0 @@
/**********************************************************************
* File: alt_list.cpp
* Description: Class to abstarct a list of alternate results
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "altlist.h"
#include <stdlib.h>
namespace tesseract {
AltList::AltList(int max_alt) {
max_alt_ = max_alt;
alt_cnt_ = 0;
alt_cost_ = NULL;
alt_tag_ = NULL;
}
AltList::~AltList() {
if (alt_cost_ != NULL) {
delete []alt_cost_;
alt_cost_ = NULL;
}
if (alt_tag_ != NULL) {
delete []alt_tag_;
alt_tag_ = NULL;
}
}
// return the best possible cost and index of corresponding alternate
int AltList::BestCost(int *best_alt) const {
if (alt_cnt_ <= 0) {
(*best_alt) = -1;
return -1;
}
int best_alt_idx = 0;
for (int alt_idx = 1; alt_idx < alt_cnt_; alt_idx++) {
if (alt_cost_[alt_idx] < alt_cost_[best_alt_idx]) {
best_alt_idx = alt_idx;
}
}
(*best_alt) = best_alt_idx;
return alt_cost_[best_alt_idx];
}
}

View File

@ -1,61 +0,0 @@
/**********************************************************************
* File: alt_list.h
* Description: Class to abstarct a list of alternate results
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The AltList class is the base class for the list of alternate recognition
// results. Each alternate has a cost an an optional tag associated with it
#ifndef ALT_LIST_H
#define ALT_LIST_H
#include <math.h>
#include "cube_utils.h"
namespace tesseract {
class AltList {
public:
explicit AltList(int max_alt);
virtual ~AltList();
// sort the list of alternates based
virtual void Sort() = 0;
// return the best possible cost and index of corresponding alternate
int BestCost (int *best_alt) const;
// return the count of alternates
inline int AltCount() const { return alt_cnt_; }
// returns the cost (-ve log prob) of an alternate
inline int AltCost(int alt_idx) const { return alt_cost_[alt_idx]; }
// returns the prob of an alternate
inline double AltProb(int alt_idx) const {
return CubeUtils::Cost2Prob(AltCost(alt_idx));
}
// returns the alternate tag
inline void *AltTag(int alt_idx) const { return alt_tag_[alt_idx]; }
protected:
// max number of alternates the list can hold
int max_alt_;
// actual alternate count
int alt_cnt_;
// array of alternate costs
int *alt_cost_;
// array of alternate tags
void **alt_tag_;
};
}
#endif // ALT_LIST_H

View File

@ -1,470 +0,0 @@
/**********************************************************************
* File: beam_search.cpp
* Description: Class to implement Beam Word Search Algorithm
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <algorithm>
#include "beam_search.h"
#include "tesseractclass.h"
namespace tesseract {
BeamSearch::BeamSearch(CubeRecoContext *cntxt, bool word_mode) {
cntxt_ = cntxt;
seg_pt_cnt_ = 0;
col_cnt_ = 1;
col_ = NULL;
word_mode_ = word_mode;
}
// Cleanup the lattice corresponding to the last search
void BeamSearch::Cleanup() {
if (col_ != NULL) {
for (int col = 0; col < col_cnt_; col++) {
delete col_[col];
}
delete []col_;
}
col_ = NULL;
}
BeamSearch::~BeamSearch() {
Cleanup();
}
// Creates a set of children nodes emerging from a parent node based on
// the character alternate list and the language model.
void BeamSearch::CreateChildren(SearchColumn *out_col, LangModel *lang_mod,
SearchNode *parent_node,
LangModEdge *lm_parent_edge,
CharAltList *char_alt_list, int extra_cost) {
// get all the edges from this parent
int edge_cnt;
LangModEdge **lm_edges = lang_mod->GetEdges(char_alt_list,
lm_parent_edge, &edge_cnt);
if (lm_edges) {
// add them to the ending column with the appropriate parent
for (int edge = 0; edge < edge_cnt; edge++) {
// add a node to the column if the current column is not the
// last one, or if the lang model edge indicates it is valid EOW
if (!cntxt_->NoisyInput() && out_col->ColIdx() >= seg_pt_cnt_ &&
!lm_edges[edge]->IsEOW()) {
// free edge since no object is going to own it
delete lm_edges[edge];
continue;
}
// compute the recognition cost of this node
int recognition_cost = MIN_PROB_COST;
if (char_alt_list && char_alt_list->AltCount() > 0) {
recognition_cost = MAX(0, char_alt_list->ClassCost(
lm_edges[edge]->ClassID()));
// Add the no space cost. This should zero in word mode
recognition_cost += extra_cost;
}
// Note that the edge will be freed inside the column if
// AddNode is called
if (recognition_cost >= 0) {
out_col->AddNode(lm_edges[edge], recognition_cost, parent_node,
cntxt_);
} else {
delete lm_edges[edge];
}
} // edge
// free edge array
delete []lm_edges;
} // lm_edges
}
// Performs a beam search in the specified search using the specified
// language model; returns an alternate list of possible words as a result.
WordAltList * BeamSearch::Search(SearchObject *srch_obj, LangModel *lang_mod) {
// verifications
if (!lang_mod)
lang_mod = cntxt_->LangMod();
if (!lang_mod) {
fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
"LangModel\n");
return NULL;
}
// free existing state
Cleanup();
// get seg pt count
seg_pt_cnt_ = srch_obj->SegPtCnt();
if (seg_pt_cnt_ < 0) {
return NULL;
}
col_cnt_ = seg_pt_cnt_ + 1;
// disregard suspicious cases
if (seg_pt_cnt_ > 128) {
fprintf(stderr, "Cube ERROR (BeamSearch::Search): segment point count is "
"suspiciously high; bailing out\n");
return NULL;
}
// alloc memory for columns
col_ = new SearchColumn *[col_cnt_];
memset(col_, 0, col_cnt_ * sizeof(*col_));
// for all possible segments
for (int end_seg = 1; end_seg <= (seg_pt_cnt_ + 1); end_seg++) {
// create a search column
col_[end_seg - 1] = new SearchColumn(end_seg - 1,
cntxt_->Params()->BeamWidth());
// for all possible start segments
int init_seg = MAX(0, end_seg - cntxt_->Params()->MaxSegPerChar());
for (int strt_seg = init_seg; strt_seg < end_seg; strt_seg++) {
int parent_nodes_cnt;
SearchNode **parent_nodes;
// for the root segment, we do not have a parent
if (strt_seg == 0) {
parent_nodes_cnt = 1;
parent_nodes = NULL;
} else {
// for all the existing nodes in the starting column
parent_nodes_cnt = col_[strt_seg - 1]->NodeCount();
parent_nodes = col_[strt_seg - 1]->Nodes();
}
// run the shape recognizer
CharAltList *char_alt_list = srch_obj->RecognizeSegment(strt_seg - 1,
end_seg - 1);
// for all the possible parents
for (int parent_idx = 0; parent_idx < parent_nodes_cnt; parent_idx++) {
// point to the parent node
SearchNode *parent_node = !parent_nodes ? NULL
: parent_nodes[parent_idx];
LangModEdge *lm_parent_edge = !parent_node ? lang_mod->Root()
: parent_node->LangModelEdge();
// compute the cost of not having spaces within the segment range
int contig_cost = srch_obj->NoSpaceCost(strt_seg - 1, end_seg - 1);
// In phrase mode, compute the cost of not having a space before
// this character
int no_space_cost = 0;
if (!word_mode_ && strt_seg > 0) {
no_space_cost = srch_obj->NoSpaceCost(strt_seg - 1);
}
// if the no space cost is low enough
if ((contig_cost + no_space_cost) < MIN_PROB_COST) {
// Add the children nodes
CreateChildren(col_[end_seg - 1], lang_mod, parent_node,
lm_parent_edge, char_alt_list,
contig_cost + no_space_cost);
}
// In phrase mode and if not starting at the root
if (!word_mode_ && strt_seg > 0) { // parent_node must be non-NULL
// consider starting a new word for nodes that are valid EOW
if (parent_node->LangModelEdge()->IsEOW()) {
// get the space cost
int space_cost = srch_obj->SpaceCost(strt_seg - 1);
// if the space cost is low enough
if ((contig_cost + space_cost) < MIN_PROB_COST) {
// Restart the language model and add nodes as children to the
// space node.
CreateChildren(col_[end_seg - 1], lang_mod, parent_node, NULL,
char_alt_list, contig_cost + space_cost);
}
}
}
} // parent
} // strt_seg
// prune the column nodes
col_[end_seg - 1]->Prune();
// Free the column hash table. No longer needed
col_[end_seg - 1]->FreeHashTable();
} // end_seg
WordAltList *alt_list = CreateWordAltList(srch_obj);
return alt_list;
}
// Creates a Word alternate list from the results in the lattice.
WordAltList *BeamSearch::CreateWordAltList(SearchObject *srch_obj) {
// create an alternate list of all the nodes in the last column
int node_cnt = col_[col_cnt_ - 1]->NodeCount();
SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes();
CharBigrams *bigrams = cntxt_->Bigrams();
WordUnigrams *word_unigrams = cntxt_->WordUnigramsObj();
// Save the index of the best-cost node before the alt list is
// sorted, so that we can retrieve it from the node list when backtracking.
best_presorted_node_idx_ = 0;
int best_cost = -1;
if (node_cnt <= 0)
return NULL;
// start creating the word alternate list
WordAltList *alt_list = new WordAltList(node_cnt + 1);
for (int node_idx = 0; node_idx < node_cnt; node_idx++) {
// recognition cost
int recognition_cost = srch_nodes[node_idx]->BestCost();
// compute the size cost of the alternate
char_32 *ch_buff = NULL;
int size_cost = SizeCost(srch_obj, srch_nodes[node_idx], &ch_buff);
// accumulate other costs
if (ch_buff) {
int cost = 0;
// char bigram cost
int bigram_cost = !bigrams ? 0 :
bigrams->Cost(ch_buff, cntxt_->CharacterSet());
// word unigram cost
int unigram_cost = !word_unigrams ? 0 :
word_unigrams->Cost(ch_buff, cntxt_->LangMod(),
cntxt_->CharacterSet());
// overall cost
cost = static_cast<int>(
(size_cost * cntxt_->Params()->SizeWgt()) +
(bigram_cost * cntxt_->Params()->CharBigramWgt()) +
(unigram_cost * cntxt_->Params()->WordUnigramWgt()) +
(recognition_cost * cntxt_->Params()->RecoWgt()));
// insert into word alt list
alt_list->Insert(ch_buff, cost,
static_cast<void *>(srch_nodes[node_idx]));
// Note that strict < is necessary because WordAltList::Sort()
// uses it in a bubble sort to swap entries.
if (best_cost < 0 || cost < best_cost) {
best_presorted_node_idx_ = node_idx;
best_cost = cost;
}
delete []ch_buff;
}
}
// sort the alternates based on cost
alt_list->Sort();
return alt_list;
}
// Returns the lattice column corresponding to the specified column index.
SearchColumn *BeamSearch::Column(int col) const {
if (col < 0 || col >= col_cnt_ || !col_)
return NULL;
return col_[col];
}
// Returns the best node in the last column of last performed search.
SearchNode *BeamSearch::BestNode() const {
if (col_cnt_ < 1 || !col_ || !col_[col_cnt_ - 1])
return NULL;
int node_cnt = col_[col_cnt_ - 1]->NodeCount();
SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes();
if (node_cnt < 1 || !srch_nodes || !srch_nodes[0])
return NULL;
return srch_nodes[0];
}
// Returns the string corresponding to the specified alt.
char_32 *BeamSearch::Alt(int alt) const {
// get the last column of the lattice
if (col_cnt_ <= 0)
return NULL;
SearchColumn *srch_col = col_[col_cnt_ - 1];
if (!srch_col)
return NULL;
// point to the last node in the selected path
if (alt >= srch_col->NodeCount() || srch_col->Nodes() == NULL) {
return NULL;
}
SearchNode *srch_node = srch_col->Nodes()[alt];
if (!srch_node)
return NULL;
// get string
char_32 *str32 = srch_node->PathString();
if (!str32)
return NULL;
return str32;
}
// Backtracks from the specified node index and returns the corresponding
// character mapped segments and character count. Optional return
// arguments are the char_32 result string and character bounding
// boxes, if non-NULL values are passed in.
CharSamp **BeamSearch::BackTrack(SearchObject *srch_obj, int node_index,
int *char_cnt, char_32 **str32,
Boxa **char_boxes) const {
// get the last column of the lattice
if (col_cnt_ <= 0)
return NULL;
SearchColumn *srch_col = col_[col_cnt_ - 1];
if (!srch_col)
return NULL;
// point to the last node in the selected path
if (node_index >= srch_col->NodeCount() || !srch_col->Nodes())
return NULL;
SearchNode *srch_node = srch_col->Nodes()[node_index];
if (!srch_node)
return NULL;
return BackTrack(srch_obj, srch_node, char_cnt, str32, char_boxes);
}
// Backtracks from the specified node index and returns the corresponding
// character mapped segments and character count. Optional return
// arguments are the char_32 result string and character bounding
// boxes, if non-NULL values are passed in.
CharSamp **BeamSearch::BackTrack(SearchObject *srch_obj, SearchNode *srch_node,
int *char_cnt, char_32 **str32,
Boxa **char_boxes) const {
if (!srch_node)
return NULL;
if (str32) {
delete [](*str32); // clear existing value
*str32 = srch_node->PathString();
if (!*str32)
return NULL;
}
if (char_boxes && *char_boxes) {
boxaDestroy(char_boxes); // clear existing value
}
CharSamp **chars;
chars = SplitByNode(srch_obj, srch_node, char_cnt, char_boxes);
if (!chars && str32)
delete []*str32;
return chars;
}
// Backtracks from the given lattice node and return the corresponding
// char mapped segments and character count. The character bounding
// boxes are optional return arguments, if non-NULL values are passed in.
CharSamp **BeamSearch::SplitByNode(SearchObject *srch_obj,
SearchNode *srch_node,
int *char_cnt,
Boxa **char_boxes) const {
// Count the characters (could be less than the path length when in
// phrase mode)
*char_cnt = 0;
SearchNode *node = srch_node;
while (node) {
node = node->ParentNode();
(*char_cnt)++;
}
if (*char_cnt == 0)
return NULL;
// Allocate box array
if (char_boxes) {
if (*char_boxes)
boxaDestroy(char_boxes); // clear existing value
*char_boxes = boxaCreate(*char_cnt);
if (*char_boxes == NULL)
return NULL;
}
// Allocate memory for CharSamp array.
CharSamp **chars = new CharSamp *[*char_cnt];
int ch_idx = *char_cnt - 1;
int seg_pt_cnt = srch_obj->SegPtCnt();
bool success=true;
while (srch_node && ch_idx >= 0) {
// Parent node (could be null)
SearchNode *parent_node = srch_node->ParentNode();
// Get the seg pts corresponding to the search node
int st_col = !parent_node ? 0 : parent_node->ColIdx() + 1;
int st_seg_pt = st_col <= 0 ? -1 : st_col - 1;
int end_col = srch_node->ColIdx();
int end_seg_pt = end_col >= seg_pt_cnt ? seg_pt_cnt : end_col;
// Get a char sample corresponding to the segmentation points
CharSamp *samp = srch_obj->CharSample(st_seg_pt, end_seg_pt);
if (!samp) {
success = false;
break;
}
samp->SetLabel(srch_node->NodeString());
chars[ch_idx] = samp;
if (char_boxes) {
// Create the corresponding character bounding box
Box *char_box = boxCreate(samp->Left(), samp->Top(),
samp->Width(), samp->Height());
if (!char_box) {
success = false;
break;
}
boxaAddBox(*char_boxes, char_box, L_INSERT);
}
srch_node = parent_node;
ch_idx--;
}
if (!success) {
delete []chars;
if (char_boxes)
boxaDestroy(char_boxes);
return NULL;
}
// Reverse the order of boxes.
if (char_boxes) {
int char_boxa_size = boxaGetCount(*char_boxes);
int limit = char_boxa_size / 2;
for (int i = 0; i < limit; ++i) {
int box1_idx = i;
int box2_idx = char_boxa_size - 1 - i;
Box *box1 = boxaGetBox(*char_boxes, box1_idx, L_CLONE);
Box *box2 = boxaGetBox(*char_boxes, box2_idx, L_CLONE);
boxaReplaceBox(*char_boxes, box2_idx, box1);
boxaReplaceBox(*char_boxes, box1_idx, box2);
}
}
return chars;
}
// Returns the size cost of a string for a lattice path that
// ends at the specified lattice node.
int BeamSearch::SizeCost(SearchObject *srch_obj, SearchNode *node,
char_32 **str32) const {
CharSamp **chars = NULL;
int char_cnt = 0;
if (!node)
return 0;
// Backtrack to get string and character segmentation
chars = BackTrack(srch_obj, node, &char_cnt, str32, NULL);
if (!chars)
return WORST_COST;
int size_cost = (cntxt_->SizeModel() == NULL) ? 0 :
cntxt_->SizeModel()->Cost(chars, char_cnt);
delete []chars;
return size_cost;
}
} // namespace tesesract

View File

@ -1,126 +0,0 @@
/**********************************************************************
* File: beam_search.h
* Description: Declaration of Beam Word Search Algorithm Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The Beam Search class implements a Beam Search algorithm for the
// N-best paths through the lattice of a search object using a language model
// The search object is a segmented bitmap of a word image. The language model
// is a state machine that defines valid sequences of characters
// The cost of each path is the combined (product) probabilities of the
// characters along the path. The character probabilities are computed using
// the character classifier member of the RecoContext
// The BeamSearch class itself holds the state of the last search it performed
// using its "Search" method. Subsequent class to the Search method erase the
// states of previously done searches
#ifndef BEAM_SEARCH_H
#define BEAM_SEARCH_H
#include "search_column.h"
#include "word_altlist.h"
#include "search_object.h"
#include "lang_model.h"
#include "cube_utils.h"
#include "cube_reco_context.h"
#include "allheaders.h"
namespace tesseract {
class BeamSearch {
public:
explicit BeamSearch(CubeRecoContext *cntxt, bool word_mode = true);
~BeamSearch();
// Performs a beam search in the specified search using the specified
// language model; returns an alternate list of possible words as a result.
WordAltList *Search(SearchObject *srch_obj, LangModel *lang_mod = NULL);
// Returns the best node in the last column of last performed search.
SearchNode *BestNode() const;
// Returns the string corresponding to the specified alt.
char_32 *Alt(int alt) const;
// Backtracks from the specified lattice node and returns the corresponding
// character-mapped segments, character count, char_32 result string, and
// character bounding boxes (if char_boxes is not NULL). If the segments
// cannot be constructed, returns NULL, and all result arguments
// will be NULL.
CharSamp **BackTrack(SearchObject *srch_obj, int node_index,
int *char_cnt, char_32 **str32, Boxa **char_boxes) const;
// Same as above, except it takes a pointer to a search node object
// instead of node index.
CharSamp **BackTrack(SearchObject *srch_obj, SearchNode *node,
int *char_cnt, char_32 **str32, Boxa **char_boxes) const;
// Returns the size cost of a specified string of a lattice
// path that ends at the specified lattice node.
int SizeCost(SearchObject *srch_obj, SearchNode *node,
char_32 **str32 = NULL) const;
// Returns the word unigram cost of the given string, possibly
// stripping out a single trailing punctuation character.
int WordUnigramCost(char_32 *str32, WordUnigrams* word_unigrams) const;
// Supplementary functions needed for visualization
// Return column count of the lattice.
inline int ColCnt() const { return col_cnt_; }
// Returns the lattice column corresponding to the specified column index.
SearchColumn *Column(int col_idx) const;
// Return the index of the best node in the last column of the
// best-cost path before the alternates list is sorted.
inline int BestPresortedNodeIndex() const {
return best_presorted_node_idx_;
}
private:
// Maximum reasonable segmentation point count
static const int kMaxSegPointCnt = 128;
// Recognition context object; the context holds the character classifier
// and the tuning parameters object
CubeRecoContext *cntxt_;
// Count of segmentation pts
int seg_pt_cnt_;
// Lattice column count; currently redundant with respect to seg_pt_cnt_
// but that might change in the future
int col_cnt_;
// Array of lattice columns
SearchColumn **col_;
// Run in word or phrase mode
bool word_mode_;
// Node index of best-cost node, before alternates are merged and sorted
int best_presorted_node_idx_;
// Cleans up beam search state
void Cleanup();
// Creates a Word alternate list from the results in the lattice.
// This function computes a cost for each node in the final column
// of the lattice, which is a weighted average of several costs:
// size cost, character bigram cost, word unigram cost, and
// recognition cost from the beam search. The weights are the
// CubeTuningParams, which are learned together with the character
// classifiers.
WordAltList *CreateWordAltList(SearchObject *srch_obj);
// Creates a set of children nodes emerging from a parent node based on
// the character alternate list and the language model.
void CreateChildren(SearchColumn *out_col, LangModel *lang_mod,
SearchNode *parent_node, LangModEdge *lm_parent_edge,
CharAltList *char_alt_list, int extra_cost);
// Backtracks from the given lattice node and returns the corresponding
// char mapped segments, character count, and character bounding boxes (if
// char_boxes is not NULL). If the segments cannot be constructed,
// returns NULL, and all result arguments will be NULL.
CharSamp **SplitByNode(SearchObject *srch_obj, SearchNode *srch_node,
int* char_cnt, Boxa **char_boxes) const;
};
}
#endif // BEAM_SEARCH_H

File diff suppressed because it is too large Load Diff

View File

@ -1,122 +0,0 @@
/**********************************************************************
* File: bmp_8.h
* Description: Declaration of an 8-bit Bitmap class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef BMP8_H
#define BMP8_H
// The Bmp8 class is an 8-bit bitmap that represents images of
// words, characters and segments throughout Cube
// It is meant to provide fast access to the bitmap bits and provide
// fast scaling, cropping, deslanting, connected components detection,
// loading and saving functionality
#include <stdlib.h>
#include <stdio.h>
#include "con_comp.h"
#include "cached_file.h"
namespace tesseract {
// Non-integral deslanting parameters.
static const float kMinDeslantAngle = -30.0f;
static const float kMaxDeslantAngle = 30.0f;
static const float kDeslantAngleDelta = 0.5f;
class Bmp8 {
public:
Bmp8(unsigned short wid, unsigned short hgt);
~Bmp8();
// Clears the bitmap
bool Clear();
// accessors to bitmap dimensions
inline unsigned short Width() const { return wid_; }
inline unsigned short Stride() const { return stride_; }
inline unsigned short Height() const { return hgt_; }
inline unsigned char *RawData() const {
return (line_buff_ == NULL ? NULL : line_buff_[0]);
}
// creates a scaled version of the specified bitmap
// Optionally, scaling can be isotropic (preserving aspect ratio) or not
bool ScaleFrom(Bmp8 *bmp, bool isotropic = true);
// Deslant the bitmap vertically
bool Deslant();
// Deslant the bitmap horizontally
bool HorizontalDeslant(double *deslant_angle);
// Create a bitmap object from a file
static Bmp8 *FromCharDumpFile(CachedFile *fp);
static Bmp8 *FromCharDumpFile(FILE *fp);
// are two bitmaps identical
bool IsIdentical(Bmp8 *pBmp) const;
// Detect connected components
ConComp ** FindConComps(int *concomp_cnt, int min_size) const;
// compute the foreground ratio
float ForegroundRatio() const;
// returns the mean horizontal histogram entropy of the bitmap
float MeanHorizontalHistogramEntropy() const;
// returns the horizontal histogram of the bitmap
int *HorizontalHistogram() const;
private:
// Compute a look up tan table that will be used for fast slant computation
static bool ComputeTanTable();
// create a bitmap buffer (two flavors char & int) and init contents
unsigned char ** CreateBmpBuffer(unsigned char init_val = 0xff);
static unsigned int ** CreateBmpBuffer(int wid, int hgt,
unsigned char init_val = 0xff);
// Free a bitmap buffer
static void FreeBmpBuffer(unsigned char **buff);
static void FreeBmpBuffer(unsigned int **buff);
// a static array that holds the tan lookup table
static float *tan_table_;
// bitmap 32-bit-aligned stride
unsigned short stride_;
// Bmp8 magic number used to validate saved bitmaps
static const unsigned int kMagicNumber = 0xdeadbeef;
protected:
// bitmap dimensions
unsigned short wid_;
unsigned short hgt_;
// bitmap contents
unsigned char **line_buff_;
// deslanting parameters
static const int kConCompAllocChunk = 16;
static const int kDeslantAngleCount;
// Load dimensions & contents of bitmap from file
bool LoadFromCharDumpFile(CachedFile *fp);
bool LoadFromCharDumpFile(FILE *fp);
// Load dimensions & contents of bitmap from raw data
bool LoadFromCharDumpFile(unsigned char **raw_data);
// Load contents of bitmap from raw data
bool LoadFromRawData(unsigned char *data);
// save bitmap to a file
bool SaveBmp2CharDumpFile(FILE *fp) const;
// checks if a row or a column are entirely blank
bool IsBlankColumn(int x) const;
bool IsBlankRow(int y) const;
// crop the bitmap returning new dimensions
void Crop(int *xst_src, int *yst_src, int *wid, int *hgt);
// copy part of the specified bitmap
void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const;
};
}
#endif // BMP8_H

View File

@ -1,147 +0,0 @@
/**********************************************************************
* File: cached_file.pp
* Description: Implementation of an Cached File Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <string>
#include <stdlib.h>
#include <cstring>
#include "cached_file.h"
namespace tesseract {
CachedFile::CachedFile(string file_name) {
file_name_ = file_name;
buff_ = NULL;
buff_pos_ = 0;
buff_size_ = 0;
file_pos_ = 0;
file_size_ = 0;
fp_ = NULL;
}
CachedFile::~CachedFile() {
if (fp_ != NULL) {
fclose(fp_);
fp_ = NULL;
}
if (buff_ != NULL) {
delete []buff_;
buff_ = NULL;
}
}
// free buffers and init vars
bool CachedFile::Open() {
if (fp_ != NULL) {
return true;
}
fp_ = fopen(file_name_.c_str(), "rb");
if (fp_ == NULL) {
return false;
}
// seek to the end
fseek(fp_, 0, SEEK_END);
// get file size
file_size_ = ftell(fp_);
if (file_size_ < 1) {
return false;
}
// rewind again
rewind(fp_);
// alloc memory for buffer
buff_ = new unsigned char[kCacheSize];
// init counters
buff_size_ = 0;
buff_pos_ = 0;
file_pos_ = 0;
return true;
}
// add a new sample
int CachedFile::Read(void *read_buff, int bytes) {
int read_bytes = 0;
unsigned char *buff = (unsigned char *)read_buff;
// do we need to read beyond the buffer
if ((buff_pos_ + bytes) > buff_size_) {
// copy as much bytes from the current buffer if any
int copy_bytes = buff_size_ - buff_pos_;
if (copy_bytes > 0) {
memcpy(buff, buff_ + buff_pos_, copy_bytes);
buff += copy_bytes;
bytes -= copy_bytes;
read_bytes += copy_bytes;
}
// determine how much to read
buff_size_ = kCacheSize;
if ((file_pos_ + buff_size_) > file_size_) {
buff_size_ = static_cast<int>(file_size_ - file_pos_);
}
// EOF ?
if (buff_size_ <= 0 || bytes > buff_size_) {
return read_bytes;
}
// read the first chunck
if (fread(buff_, 1, buff_size_, fp_) != buff_size_) {
return read_bytes;
}
buff_pos_ = 0;
file_pos_ += buff_size_;
}
memcpy(buff, buff_ + buff_pos_, bytes);
read_bytes += bytes;
buff_pos_ += bytes;
return read_bytes;
}
long CachedFile::Size() {
if (fp_ == NULL && Open() == false) {
return 0;
}
return file_size_;
}
long CachedFile::Tell() {
if (fp_ == NULL && Open() == false) {
return 0;
}
return file_pos_ - buff_size_ + buff_pos_;
}
bool CachedFile::eof() {
if (fp_ == NULL && Open() == false) {
return true;
}
return (file_pos_ - buff_size_ + buff_pos_) >= file_size_;
}
} // namespace tesseract

View File

@ -1,69 +0,0 @@
/**********************************************************************
* File: cached_file.h
* Description: Declaration of a Cached File class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef CACHED_FILE_H
#define CACHED_FILE_H
// The CachedFile class provides a large-cache read access to a file
// It is mainly designed for loading large word dump files
#include <stdio.h>
#include <string>
#ifdef USE_STD_NAMESPACE
using std::string;
#endif
namespace tesseract {
class CachedFile {
public:
explicit CachedFile(string file_name);
~CachedFile();
// reads a specified number of bytes to the specified buffer and
// returns the actual number of bytes read
int Read(void *read_buff, int bytes);
// Returns the file size
long Size();
// returns the current position in the file
long Tell();
// End of file flag
bool eof();
private:
static const unsigned int kCacheSize = 0x8000000;
// file name
string file_name_;
// internal file buffer
unsigned char *buff_;
// file position
long file_pos_;
// file size
long file_size_;
// position of file within buffer
int buff_pos_;
// buffer size
int buff_size_;
// file handle
FILE *fp_;
// Opens the file
bool Open();
};
}
#endif // CACHED_FILE_H

View File

@ -1,108 +0,0 @@
/**********************************************************************
* File: char_altlist.cpp
* Description: Implementation of a Character Alternate List Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "char_altlist.h"
namespace tesseract {
// The CharSet is not class owned and must exist for
// the life time of this class
CharAltList::CharAltList(const CharSet *char_set, int max_alt)
: AltList(max_alt) {
char_set_ = char_set;
max_alt_ = max_alt;
class_id_alt_ = NULL;
class_id_cost_ = NULL;
}
CharAltList::~CharAltList() {
if (class_id_alt_ != NULL) {
delete []class_id_alt_;
class_id_alt_ = NULL;
}
if (class_id_cost_ != NULL) {
delete []class_id_cost_;
class_id_cost_ = NULL;
}
}
// Insert a new char alternate
bool CharAltList::Insert(int class_id, int cost, void *tag) {
// validate class ID
if (class_id < 0 || class_id >= char_set_->ClassCount()) {
return false;
}
// allocate buffers if nedded
if (class_id_alt_ == NULL || alt_cost_ == NULL) {
class_id_alt_ = new int[max_alt_];
alt_cost_ = new int[max_alt_];
alt_tag_ = new void *[max_alt_];
memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
}
if (class_id_cost_ == NULL) {
int class_cnt = char_set_->ClassCount();
class_id_cost_ = new int[class_cnt];
for (int ich = 0; ich < class_cnt; ich++) {
class_id_cost_[ich] = WORST_COST;
}
}
if (class_id < 0 || class_id >= char_set_->ClassCount()) {
return false;
}
// insert the alternate
class_id_alt_[alt_cnt_] = class_id;
alt_cost_[alt_cnt_] = cost;
alt_tag_[alt_cnt_] = tag;
alt_cnt_++;
class_id_cost_[class_id] = cost;
return true;
}
// sort the alternate Desc. based on prob
void CharAltList::Sort() {
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
if (alt_cost_[alt_idx] > alt_cost_[alt]) {
int temp = class_id_alt_[alt_idx];
class_id_alt_[alt_idx] = class_id_alt_[alt];
class_id_alt_[alt] = temp;
temp = alt_cost_[alt_idx];
alt_cost_[alt_idx] = alt_cost_[alt];
alt_cost_[alt] = temp;
void *tag = alt_tag_[alt_idx];
alt_tag_[alt_idx] = alt_tag_[alt];
alt_tag_[alt] = tag;
}
}
}
}
}

View File

@ -1,70 +0,0 @@
/**********************************************************************
* File: char_altlist.h
* Description: Declaration of a Character Alternate List Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef CHAR_ALT_LIST_H
#define CHAR_ALT_LIST_H
// The CharAltList class holds the list of class alternates returned from
// a character classifier. Each alternate represents a class ID.
// It inherits from the AltList class.
// The CharAltList owns a CharSet object that maps a class-id to a string.
#include "altlist.h"
#include "char_set.h"
namespace tesseract {
class CharAltList : public AltList {
public:
CharAltList(const CharSet *char_set, int max_alt = kMaxCharAlt);
~CharAltList();
// Sort the alternate list based on cost
void Sort();
// insert a new alternate with the specified class-id, cost and tag
bool Insert(int class_id, int cost, void *tag = NULL);
// returns the cost of a specific class ID
inline int ClassCost(int class_id) const {
if (class_id_cost_ == NULL ||
class_id < 0 ||
class_id >= char_set_->ClassCount()) {
return WORST_COST;
}
return class_id_cost_[class_id];
}
// returns the alternate class-id corresponding to an alternate index
inline int Alt(int alt_idx) const { return class_id_alt_[alt_idx]; }
// set the cost of a certain alternate
void SetAltCost(int alt_idx, int cost) {
alt_cost_[alt_idx] = cost;
class_id_cost_[class_id_alt_[alt_idx]] = cost;
}
private:
// character set object. Passed at construction time
const CharSet *char_set_;
// array of alternate class-ids
int *class_id_alt_;
// array of alternate costs
int *class_id_cost_;
// default max count of alternates
static const int kMaxCharAlt = 256;
};
}
#endif // CHAR_ALT_LIST_H

View File

@ -1,191 +0,0 @@
/**********************************************************************
* File: char_bigrams.cpp
* Description: Implementation of a Character Bigrams Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <algorithm>
#include <math.h>
#include <string>
#include <vector>
#include "char_bigrams.h"
#include "cube_utils.h"
#include "ndminx.h"
#include "cube_const.h"
namespace tesseract {
CharBigrams::CharBigrams() {
memset(&bigram_table_, 0, sizeof(bigram_table_));
}
CharBigrams::~CharBigrams() {
if (bigram_table_.char_bigram != NULL) {
for (int ch1 = 0; ch1 <= bigram_table_.max_char; ch1++) {
CharBigram *char_bigram = bigram_table_.char_bigram + ch1;
if (char_bigram->bigram != NULL) {
delete []char_bigram->bigram;
}
}
delete []bigram_table_.char_bigram;
}
}
CharBigrams *CharBigrams::Create(const string &data_file_path,
const string &lang) {
string file_name;
string str;
file_name = data_file_path + lang;
file_name += ".cube.bigrams";
// load the string into memory
if (!CubeUtils::ReadFileToString(file_name, &str)) {
return NULL;
}
// construct a new object
CharBigrams *char_bigrams_obj = new CharBigrams();
CharBigramTable *table = &char_bigrams_obj->bigram_table_;
table->total_cnt = 0;
table->max_char = -1;
table->char_bigram = NULL;
// split into lines
vector<string> str_vec;
CubeUtils::SplitStringUsing(str, "\r\n", &str_vec);
for (int big = 0; big < str_vec.size(); big++) {
char_32 ch1;
char_32 ch2;
int cnt;
if (sscanf(str_vec[big].c_str(), "%d %x %x", &cnt, &ch1, &ch2) != 3) {
fprintf(stderr, "Cube ERROR (CharBigrams::Create): invalid format "
"reading line: %s\n", str_vec[big].c_str());
delete char_bigrams_obj;
return NULL;
}
// expand the bigram table
if (ch1 > table->max_char) {
CharBigram *char_bigram = new CharBigram[ch1 + 1];
if (table->char_bigram != NULL && table->max_char >= 0) {
memcpy(char_bigram, table->char_bigram,
(table->max_char + 1) * sizeof(*char_bigram));
delete []table->char_bigram;
}
table->char_bigram = char_bigram;
// init
for (int new_big = table->max_char + 1; new_big <= ch1; new_big++) {
table->char_bigram[new_big].total_cnt = 0;
table->char_bigram[new_big].max_char = -1;
table->char_bigram[new_big].bigram = NULL;
}
table->max_char = ch1;
}
if (ch2 > table->char_bigram[ch1].max_char) {
Bigram *bigram = new Bigram[ch2 + 1];
if (table->char_bigram[ch1].bigram != NULL &&
table->char_bigram[ch1].max_char >= 0) {
memcpy(bigram, table->char_bigram[ch1].bigram,
(table->char_bigram[ch1].max_char + 1) * sizeof(*bigram));
delete []table->char_bigram[ch1].bigram;
}
table->char_bigram[ch1].bigram = bigram;
// init
for (int new_big = table->char_bigram[ch1].max_char + 1;
new_big <= ch2; new_big++) {
table->char_bigram[ch1].bigram[new_big].cnt = 0;
}
table->char_bigram[ch1].max_char = ch2;
}
table->char_bigram[ch1].bigram[ch2].cnt = cnt;
table->char_bigram[ch1].total_cnt += cnt;
table->total_cnt += cnt;
}
// compute costs (-log probs)
table->worst_cost = static_cast<int>(
-PROB2COST_SCALE * log(0.5 / table->total_cnt));
for (char_32 ch1 = 0; ch1 <= table->max_char; ch1++) {
for (char_32 ch2 = 0; ch2 <= table->char_bigram[ch1].max_char; ch2++) {
int cnt = table->char_bigram[ch1].bigram[ch2].cnt;
table->char_bigram[ch1].bigram[ch2].cost =
static_cast<int>(-PROB2COST_SCALE *
log(MAX(0.5, static_cast<double>(cnt)) /
table->total_cnt));
}
}
return char_bigrams_obj;
}
int CharBigrams::PairCost(char_32 ch1, char_32 ch2) const {
if (ch1 > bigram_table_.max_char) {
return bigram_table_.worst_cost;
}
if (ch2 > bigram_table_.char_bigram[ch1].max_char) {
return bigram_table_.worst_cost;
}
return bigram_table_.char_bigram[ch1].bigram[ch2].cost;
}
int CharBigrams::Cost(const char_32 *char_32_ptr, CharSet *char_set) const {
if (!char_32_ptr || char_32_ptr[0] == 0) {
return bigram_table_.worst_cost;
}
int cost = MeanCostWithSpaces(char_32_ptr);
if (CubeUtils::StrLen(char_32_ptr) >= kMinLengthCaseInvariant &&
CubeUtils::IsCaseInvariant(char_32_ptr, char_set)) {
char_32 *lower_32 = CubeUtils::ToLower(char_32_ptr, char_set);
if (lower_32 && lower_32[0] != 0) {
int cost_lower = MeanCostWithSpaces(lower_32);
cost = MIN(cost, cost_lower);
}
delete [] lower_32;
char_32 *upper_32 = CubeUtils::ToUpper(char_32_ptr, char_set);
if (upper_32 && upper_32[0] != 0) {
int cost_upper = MeanCostWithSpaces(upper_32);
cost = MIN(cost, cost_upper);
}
delete [] upper_32;
}
return cost;
}
int CharBigrams::MeanCostWithSpaces(const char_32 *char_32_ptr) const {
if (!char_32_ptr)
return bigram_table_.worst_cost;
int len = CubeUtils::StrLen(char_32_ptr);
int cost = 0;
int c = 0;
cost = PairCost(' ', char_32_ptr[0]);
for (c = 1; c < len; c++) {
cost += PairCost(char_32_ptr[c - 1], char_32_ptr[c]);
}
cost += PairCost(char_32_ptr[len - 1], ' ');
return static_cast<int>(cost / static_cast<double>(len + 1));
}
} // namespace tesseract

View File

@ -1,89 +0,0 @@
/**********************************************************************
* File: char_bigrams.h
* Description: Declaration of a Character Bigrams Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CharBigram class represents the interface to the character bigram
// table used by Cube
// A CharBigram object can be constructed from the Char Bigrams file
// Given a sequence of characters, the "Cost" method returns the Char Bigram
// cost of the string according to the table
#ifndef CHAR_BIGRAMS_H
#define CHAR_BIGRAMS_H
#include <string>
#include "char_set.h"
namespace tesseract {
// structure representing a single bigram value
struct Bigram {
int cnt;
int cost;
};
// structure representing the char bigram array of characters
// following a specific character
struct CharBigram {
int total_cnt;
char_32 max_char;
Bigram *bigram;
};
// structure representing the whole bigram table
struct CharBigramTable {
int total_cnt;
int worst_cost;
char_32 max_char;
CharBigram *char_bigram;
};
class CharBigrams {
public:
CharBigrams();
~CharBigrams();
// Construct the CharBigrams class from a file
static CharBigrams *Create(const string &data_file_path,
const string &lang);
// Top-level function to return the mean character bigram cost of a
// sequence of characters. If char_set is not NULL, use
// tesseract functions to return a case-invariant cost.
// This avoids unnecessarily penalizing all-one-case words or
// capitalized words (first-letter upper-case and remaining letters
// lower-case).
int Cost(const char_32 *str, CharSet *char_set) const;
protected:
// Returns the character bigram cost of two characters.
int PairCost(char_32 ch1, char_32 ch2) const;
// Returns the mean character bigram cost of a sequence of
// characters. Adds a space at the beginning and end to account for
// cost of starting and ending characters.
int MeanCostWithSpaces(const char_32 *char_32_ptr) const;
private:
// Only words this length or greater qualify for case-invariant character
// bigram cost.
static const int kMinLengthCaseInvariant = 4;
CharBigramTable bigram_table_;
};
}
#endif // CHAR_BIGRAMS_H

View File

@ -1,640 +0,0 @@
/**********************************************************************
* File: char_samp.cpp
* Description: Implementation of a Character Bitmap Sample Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <string.h>
#include <string>
#include "char_samp.h"
#include "cube_utils.h"
namespace tesseract {
#define MAX_LINE_LEN 1024
CharSamp::CharSamp()
: Bmp8(0, 0) {
left_ = 0;
top_ = 0;
label32_ = NULL;
page_ = -1;
}
CharSamp::CharSamp(int wid, int hgt)
: Bmp8(wid, hgt) {
left_ = 0;
top_ = 0;
label32_ = NULL;
page_ = -1;
}
CharSamp::CharSamp(int left, int top, int wid, int hgt)
: Bmp8(wid, hgt)
, left_(left)
, top_(top) {
label32_ = NULL;
page_ = -1;
}
CharSamp::~CharSamp() {
if (label32_ != NULL) {
delete []label32_;
label32_ = NULL;
}
}
// returns a UTF-8 version of the string label
string CharSamp::stringLabel() const {
string str = "";
if (label32_ != NULL) {
string_32 str32(label32_);
CubeUtils::UTF32ToUTF8(str32.c_str(), &str);
}
return str;
}
// set a the string label using a UTF encoded string
void CharSamp::SetLabel(string str) {
if (label32_ != NULL) {
delete []label32_;
label32_ = NULL;
}
string_32 str32;
CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
SetLabel(reinterpret_cast<const char_32 *>(str32.c_str()));
}
// creates a CharSamp object from file
CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) {
unsigned short left;
unsigned short top;
unsigned short page;
unsigned short first_char;
unsigned short last_char;
unsigned short norm_top;
unsigned short norm_bottom;
unsigned short norm_aspect_ratio;
unsigned int val32;
char_32 *label32;
// read and check 32 bit marker
if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
return NULL;
}
if (val32 != 0xabd0fefe) {
return NULL;
}
// read label length,
if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
return NULL;
}
// the label is not null terminated in the file
if (val32 > 0 && val32 < MAX_UINT32) {
label32 = new char_32[val32 + 1];
// read label
if (fp->Read(label32, val32 * sizeof(*label32)) !=
(val32 * sizeof(*label32))) {
delete [] label32;
return NULL;
}
// null terminate
label32[val32] = 0;
} else {
label32 = NULL;
}
// read coordinates
if (fp->Read(&page, sizeof(page)) != sizeof(page)) {
delete [] label32;
return NULL;
}
if (fp->Read(&left, sizeof(left)) != sizeof(left)) {
delete [] label32;
return NULL;
}
if (fp->Read(&top, sizeof(top)) != sizeof(top)) {
delete [] label32;
return NULL;
}
if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) {
delete [] label32;
return NULL;
}
if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) {
delete [] label32;
return NULL;
}
if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) {
delete [] label32;
return NULL;
}
if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) {
delete [] label32;
return NULL;
}
if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) !=
sizeof(norm_aspect_ratio)) {
delete [] label32;
return NULL;
}
// create the object
CharSamp *char_samp = new CharSamp();
// init
char_samp->label32_ = label32;
char_samp->page_ = page;
char_samp->left_ = left;
char_samp->top_ = top;
char_samp->first_char_ = first_char;
char_samp->last_char_ = last_char;
char_samp->norm_top_ = norm_top;
char_samp->norm_bottom_ = norm_bottom;
char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
// load the Bmp8 part
if (char_samp->LoadFromCharDumpFile(fp) == false) {
delete char_samp;
return NULL;
}
return char_samp;
}
// Load a Char Samp from a dump file
CharSamp *CharSamp::FromCharDumpFile(FILE *fp) {
unsigned short left;
unsigned short top;
unsigned short page;
unsigned short first_char;
unsigned short last_char;
unsigned short norm_top;
unsigned short norm_bottom;
unsigned short norm_aspect_ratio;
unsigned int val32;
char_32 *label32;
// read and check 32 bit marker
if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
return NULL;
}
if (val32 != 0xabd0fefe) {
return NULL;
}
// read label length,
if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
return NULL;
}
// the label is not null terminated in the file
if (val32 > 0 && val32 < MAX_UINT32) {
label32 = new char_32[val32 + 1];
// read label
if (fread(label32, 1, val32 * sizeof(*label32), fp) !=
(val32 * sizeof(*label32))) {
delete [] label32;
return NULL;
}
// null terminate
label32[val32] = 0;
} else {
label32 = NULL;
}
// read coordinates
if (fread(&page, 1, sizeof(page), fp) != sizeof(page) ||
fread(&left, 1, sizeof(left), fp) != sizeof(left) ||
fread(&top, 1, sizeof(top), fp) != sizeof(top) ||
fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char) ||
fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char) ||
fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top) ||
fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom) ||
fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) !=
sizeof(norm_aspect_ratio)) {
delete [] label32;
return NULL;
}
// create the object
CharSamp *char_samp = new CharSamp();
// init
char_samp->label32_ = label32;
char_samp->page_ = page;
char_samp->left_ = left;
char_samp->top_ = top;
char_samp->first_char_ = first_char;
char_samp->last_char_ = last_char;
char_samp->norm_top_ = norm_top;
char_samp->norm_bottom_ = norm_bottom;
char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
// load the Bmp8 part
if (char_samp->LoadFromCharDumpFile(fp) == false) {
delete char_samp; // It owns label32.
return NULL;
}
return char_samp;
}
// returns a copy of the charsamp that is scaled to the
// specified width and height
CharSamp *CharSamp::Scale(int wid, int hgt, bool isotropic) {
CharSamp *scaled_samp = new CharSamp(wid, hgt);
if (scaled_samp->ScaleFrom(this, isotropic) == false) {
delete scaled_samp;
return NULL;
}
scaled_samp->left_ = left_;
scaled_samp->top_ = top_;
scaled_samp->page_ = page_;
scaled_samp->SetLabel(label32_);
scaled_samp->first_char_ = first_char_;
scaled_samp->last_char_ = last_char_;
scaled_samp->norm_top_ = norm_top_;
scaled_samp->norm_bottom_ = norm_bottom_;
scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_;
return scaled_samp;
}
// Load a Char Samp from a dump file
CharSamp *CharSamp::FromRawData(int left, int top, int wid, int hgt,
unsigned char *data) {
// create the object
CharSamp *char_samp = new CharSamp(left, top, wid, hgt);
if (char_samp->LoadFromRawData(data) == false) {
delete char_samp;
return NULL;
}
return char_samp;
}
// Saves the charsamp to a dump file
bool CharSamp::Save2CharDumpFile(FILE *fp) const {
unsigned int val32;
// write and check 32 bit marker
val32 = 0xabd0fefe;
if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
return false;
}
// write label length
val32 = (label32_ == NULL) ? 0 : LabelLen(label32_);
if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
return false;
}
// write label
if (label32_ != NULL) {
if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) !=
(val32 * sizeof(*label32_))) {
return false;
}
}
// write coordinates
if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) {
return false;
}
if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) {
return false;
}
if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) {
return false;
}
if (fwrite(&first_char_, 1, sizeof(first_char_), fp) !=
sizeof(first_char_)) {
return false;
}
if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) {
return false;
}
if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) {
return false;
}
if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) !=
sizeof(norm_bottom_)) {
return false;
}
if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) !=
sizeof(norm_aspect_ratio_)) {
return false;
}
if (SaveBmp2CharDumpFile(fp) == false) {
return false;
}
return true;
}
// Crop the char samp such that there are no white spaces on any side.
// The norm_top_ and norm_bottom_ fields are the character top/bottom
// with respect to whatever context the character is being recognized
// in (e.g. word bounding box) normalized to a standard size of
// 255. Here they default to 0 and 255 (word box boundaries), but
// since they are context dependent, they may need to be reset by the
// calling function.
CharSamp *CharSamp::Crop() {
// get the dimesions of the cropped img
int cropped_left = 0;
int cropped_top = 0;
int cropped_wid = wid_;
int cropped_hgt = hgt_;
Bmp8::Crop(&cropped_left, &cropped_top,
&cropped_wid, &cropped_hgt);
if (cropped_wid == 0 || cropped_hgt == 0) {
return NULL;
}
// create the cropped char samp
CharSamp *cropped_samp = new CharSamp(left_ + cropped_left,
top_ + cropped_top,
cropped_wid, cropped_hgt);
cropped_samp->SetLabel(label32_);
cropped_samp->SetFirstChar(first_char_);
cropped_samp->SetLastChar(last_char_);
// the following 3 fields may/should be reset by the calling function
// using context information, i.e., location of character box
// w.r.t. the word bounding box
cropped_samp->SetNormAspectRatio(255 *
cropped_wid / (cropped_wid + cropped_hgt));
cropped_samp->SetNormTop(0);
cropped_samp->SetNormBottom(255);
// copy the bitmap to the cropped img
Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp);
return cropped_samp;
}
// segment the char samp to connected components
// based on contiguity and vertical pixel density histogram
ConComp **CharSamp::Segment(int *segment_cnt, bool right_2_left,
int max_hist_wnd, int min_con_comp_size) const {
// init
(*segment_cnt) = 0;
int concomp_cnt = 0;
int seg_cnt = 0;
// find the concomps of the image
ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size);
if (concomp_cnt <= 0 || !concomp_array) {
if (concomp_array)
delete []concomp_array;
return NULL;
}
ConComp **seg_array = NULL;
// segment each concomp further using vertical histogram
for (int concomp = 0; concomp < concomp_cnt; concomp++) {
int concomp_seg_cnt = 0;
// segment the concomp
ConComp **concomp_seg_array = NULL;
ConComp **concomp_alloc_seg =
concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt);
// no segments, add the whole concomp
if (concomp_alloc_seg == NULL) {
concomp_seg_cnt = 1;
concomp_seg_array = concomp_array + concomp;
} else {
// delete the original concomp, we no longer need it
concomp_seg_array = concomp_alloc_seg;
delete concomp_array[concomp];
}
// add the resulting segments
for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) {
// too small of a segment: ignore
if (concomp_seg_array[seg_idx]->Width() < 2 &&
concomp_seg_array[seg_idx]->Height() < 2) {
delete concomp_seg_array[seg_idx];
} else {
// add the new segment
// extend the segment array
if ((seg_cnt % kConCompAllocChunk) == 0) {
ConComp **temp_segm_array =
new ConComp *[seg_cnt + kConCompAllocChunk];
if (seg_cnt > 0) {
memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array));
delete []seg_array;
}
seg_array = temp_segm_array;
}
seg_array[seg_cnt++] = concomp_seg_array[seg_idx];
}
} // segment
if (concomp_alloc_seg != NULL) {
delete []concomp_alloc_seg;
}
} // concomp
delete []concomp_array;
// sort the concomps from Left2Right or Right2Left, based on the reading order
if (seg_cnt > 0 && seg_array != NULL) {
qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ?
ConComp::Right2LeftComparer : ConComp::Left2RightComparer);
}
(*segment_cnt) = seg_cnt;
return seg_array;
}
// builds a char samp from a set of connected components
CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp,
int seg_flags_size, int *seg_flags,
bool *left_most, bool *right_most,
int word_hgt) {
int concomp;
int end_concomp;
int concomp_cnt = 0;
end_concomp = strt_concomp + seg_flags_size;
// determine ID range
bool once = false;
int min_id = -1;
int max_id = -1;
for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
if (!once) {
min_id = concomp_array[concomp]->ID();
max_id = concomp_array[concomp]->ID();
once = true;
} else {
UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id);
}
concomp_cnt++;
}
}
if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) {
return NULL;
}
// alloc memo for computing leftmost and right most attributes
int id_cnt = max_id - min_id + 1;
bool *id_exist = new bool[id_cnt];
bool *left_most_exist = new bool[id_cnt];
bool *right_most_exist = new bool[id_cnt];
memset(id_exist, 0, id_cnt * sizeof(*id_exist));
memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist));
memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist));
// find the dimensions of the charsamp
once = false;
int left = -1;
int right = -1;
int top = -1;
int bottom = -1;
int unq_ids = 0;
int unq_left_most = 0;
int unq_right_most = 0;
for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
if (!once) {
left = concomp_array[concomp]->Left();
right = concomp_array[concomp]->Right();
top = concomp_array[concomp]->Top();
bottom = concomp_array[concomp]->Bottom();
once = true;
} else {
UpdateRange(concomp_array[concomp]->Left(),
concomp_array[concomp]->Right(), &left, &right);
UpdateRange(concomp_array[concomp]->Top(),
concomp_array[concomp]->Bottom(), &top, &bottom);
}
// count unq ids, unq left most and right mosts ids
int concomp_id = concomp_array[concomp]->ID() - min_id;
if (!id_exist[concomp_id]) {
id_exist[concomp_id] = true;
unq_ids++;
}
if (concomp_array[concomp]->LeftMost()) {
if (left_most_exist[concomp_id] == false) {
left_most_exist[concomp_id] = true;
unq_left_most++;
}
}
if (concomp_array[concomp]->RightMost()) {
if (right_most_exist[concomp_id] == false) {
right_most_exist[concomp_id] = true;
unq_right_most++;
}
}
}
}
delete []id_exist;
delete []left_most_exist;
delete []right_most_exist;
if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) {
return NULL;
}
(*left_most) = (unq_left_most >= unq_ids);
(*right_most) = (unq_right_most >= unq_ids);
// create the char sample object
CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1);
// set the foreground pixels
for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
ConCompPt *pt_ptr = concomp_array[concomp]->Head();
while (pt_ptr) {
samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0;
pt_ptr = pt_ptr->Next();
}
}
}
return samp;
}
// clones the object
CharSamp *CharSamp::Clone() const {
// create the cropped char samp
CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_);
samp->SetLabel(label32_);
samp->SetFirstChar(first_char_);
samp->SetLastChar(last_char_);
samp->SetNormTop(norm_top_);
samp->SetNormBottom(norm_bottom_);
samp->SetNormAspectRatio(norm_aspect_ratio_);
// copy the bitmap to the cropped img
Copy(0, 0, wid_, hgt_, samp);
return samp;
}
// Load a Char Samp from a dump file
CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) {
unsigned int val32;
char_32 *label32;
unsigned char *raw_data = *raw_data_ptr;
// read and check 32 bit marker
memcpy(&val32, raw_data, sizeof(val32));
raw_data += sizeof(val32);
if (val32 != 0xabd0fefe) {
return NULL;
}
// read label length,
memcpy(&val32, raw_data, sizeof(val32));
raw_data += sizeof(val32);
// the label is not null terminated in the file
if (val32 > 0 && val32 < MAX_UINT32) {
label32 = new char_32[val32 + 1];
// read label
memcpy(label32, raw_data, val32 * sizeof(*label32));
raw_data += (val32 * sizeof(*label32));
// null terminate
label32[val32] = 0;
} else {
label32 = NULL;
}
// create the object
CharSamp *char_samp = new CharSamp();
// read coordinates
char_samp->label32_ = label32;
memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_));
raw_data += sizeof(char_samp->page_);
memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_));
raw_data += sizeof(char_samp->left_);
memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_));
raw_data += sizeof(char_samp->top_);
memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_));
raw_data += sizeof(char_samp->first_char_);
memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_));
raw_data += sizeof(char_samp->last_char_);
memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_));
raw_data += sizeof(char_samp->norm_top_);
memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_));
raw_data += sizeof(char_samp->norm_bottom_);
memcpy(&char_samp->norm_aspect_ratio_, raw_data,
sizeof(char_samp->norm_aspect_ratio_));
raw_data += sizeof(char_samp->norm_aspect_ratio_);
// load the Bmp8 part
if (char_samp->LoadFromCharDumpFile(&raw_data) == false) {
delete char_samp;
return NULL;
}
(*raw_data_ptr) = raw_data;
return char_samp;
}
// computes the features corresponding to the char sample
bool CharSamp::ComputeFeatures(int conv_grid_size, float *features) {
// Create a scaled BMP
CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size);
if (!scaled_bmp) {
return false;
}
// prepare input
unsigned char *buff = scaled_bmp->RawData();
// bitmap features
int input;
int bmp_size = conv_grid_size * conv_grid_size;
for (input = 0; input < bmp_size; input++) {
features[input] = 255.0f - (1.0f * buff[input]);
}
// word context features
features[input++] = FirstChar();
features[input++] = LastChar();
features[input++] = NormTop();
features[input++] = NormBottom();
features[input++] = NormAspectRatio();
delete scaled_bmp;
return true;
}
} // namespace tesseract

View File

@ -1,158 +0,0 @@
/**********************************************************************
* File: char_samp.h
* Description: Declaration of a Character Bitmap Sample Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CharSamp inherits the Bmp8 class that represents images of
// words, characters and segments throughout Cube
// CharSamp adds more data members to hold the physical location of the image
// in a page, page number in a book if available.
// It also holds the label (GT) of the image that might correspond to a single
// character or a word
// It also provides methods for segmenting, scaling and cropping of the sample
#ifndef CHAR_SAMP_H
#define CHAR_SAMP_H
#include <stdlib.h>
#include <stdio.h>
#include <string>
#include "bmp_8.h"
#include "string_32.h"
namespace tesseract {
class CharSamp : public Bmp8 {
public:
CharSamp();
CharSamp(int wid, int hgt);
CharSamp(int left, int top, int wid, int hgt);
~CharSamp();
// accessor methods
unsigned short Left() const { return left_; }
unsigned short Right() const { return left_ + wid_; }
unsigned short Top() const { return top_; }
unsigned short Bottom() const { return top_ + hgt_; }
unsigned short Page() const { return page_; }
unsigned short NormTop() const { return norm_top_; }
unsigned short NormBottom() const { return norm_bottom_; }
unsigned short NormAspectRatio() const { return norm_aspect_ratio_; }
unsigned short FirstChar() const { return first_char_; }
unsigned short LastChar() const { return last_char_; }
char_32 Label() const {
if (label32_ == NULL || LabelLen() != 1) {
return 0;
}
return label32_[0];
}
char_32 * StrLabel() const { return label32_; }
string stringLabel() const;
void SetLeft(unsigned short left) { left_ = left; }
void SetTop(unsigned short top) { top_ = top; }
void SetPage(unsigned short page) { page_ = page; }
void SetLabel(char_32 label) {
delete []label32_;
label32_ = new char_32[2];
label32_[0] = label;
label32_[1] = 0;
}
void SetLabel(const char_32 *label32) {
delete []label32_;
label32_ = NULL;
if (label32 != NULL) {
// remove any byte order marks if any
if (label32[0] == 0xfeff) {
label32++;
}
int len = LabelLen(label32);
label32_ = new char_32[len + 1];
memcpy(label32_, label32, len * sizeof(*label32));
label32_[len] = 0;
}
}
void SetLabel(string str);
void SetNormTop(unsigned short norm_top) { norm_top_ = norm_top; }
void SetNormBottom(unsigned short norm_bottom) {
norm_bottom_ = norm_bottom;
}
void SetNormAspectRatio(unsigned short norm_aspect_ratio) {
norm_aspect_ratio_ = norm_aspect_ratio;
}
void SetFirstChar(unsigned short first_char) {
first_char_ = first_char;
}
void SetLastChar(unsigned short last_char) {
last_char_ = last_char;
}
// Saves the charsamp to a dump file
bool Save2CharDumpFile(FILE *fp) const;
// Crops the underlying image and returns a new CharSamp with the
// same character information but new dimensions. Warning: does not
// necessarily set the normalized top and bottom correctly since
// those depend on its location within the word (or CubeSearchObject).
CharSamp *Crop();
// Computes the connected components of the char sample
ConComp **Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd,
int min_con_comp_size) const;
// returns a copy of the charsamp that is scaled to the
// specified width and height
CharSamp *Scale(int wid, int hgt, bool isotropic = true);
// returns a Clone of the charsample
CharSamp *Clone() const;
// computes the features corresponding to the char sample
bool ComputeFeatures(int conv_grid_size, float *features);
// Load a Char Samp from a dump file
static CharSamp *FromCharDumpFile(CachedFile *fp);
static CharSamp *FromCharDumpFile(FILE *fp);
static CharSamp *FromCharDumpFile(unsigned char **raw_data);
static CharSamp *FromRawData(int left, int top, int wid, int hgt,
unsigned char *data);
static CharSamp *FromConComps(ConComp **concomp_array,
int strt_concomp, int seg_flags_size,
int *seg_flags, bool *left_most,
bool *right_most, int word_hgt);
static int AuxFeatureCnt() { return (5); }
// Return the length of the label string
int LabelLen() const { return LabelLen(label32_); }
static int LabelLen(const char_32 *label32) {
if (label32 == NULL) {
return 0;
}
int len = 0;
while (label32[++len] != 0);
return len;
}
private:
char_32 * label32_;
unsigned short page_;
unsigned short left_;
unsigned short top_;
// top of sample normalized to a word height of 255
unsigned short norm_top_;
// bottom of sample normalized to a word height of 255
unsigned short norm_bottom_;
// 255 * ratio of character width to (width + height)
unsigned short norm_aspect_ratio_;
unsigned short first_char_;
unsigned short last_char_;
};
}
#endif // CHAR_SAMP_H

View File

@ -1,30 +0,0 @@
/**********************************************************************
* File: char_samp_enum.cpp
* Description: Implementation of a Character Sample Enumerator Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "char_samp_enum.h"
namespace tesseract {
CharSampEnum::CharSampEnum() {
}
CharSampEnum::~CharSampEnum() {
}
} // namespace ocrlib

View File

@ -1,38 +0,0 @@
/**********************************************************************
* File: char_samp_enum.h
* Description: Declaration of a Character Sample Enumerator Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CharSampEnum class provides the base class for CharSamp class
// Enumerators. This is typically used to implement dump file readers
#ifndef CHARSAMP_ENUM_H
#define CHARSAMP_ENUM_H
#include "char_samp.h"
namespace tesseract {
class CharSampEnum {
public:
CharSampEnum();
virtual ~CharSampEnum();
virtual bool EnumCharSamp(CharSamp *char_samp, float progress) = 0;
};
}
#endif // CHARSAMP_ENUM_H

View File

@ -1,170 +0,0 @@
/**********************************************************************
* File: char_samp_enum.cpp
* Description: Implementation of a Character Sample Set Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <stdlib.h>
#include <string>
#include "char_samp_set.h"
#include "cached_file.h"
namespace tesseract {
CharSampSet::CharSampSet() {
cnt_ = 0;
samp_buff_ = NULL;
own_samples_ = false;
}
CharSampSet::~CharSampSet() {
Cleanup();
}
// free buffers and init vars
void CharSampSet::Cleanup() {
if (samp_buff_ != NULL) {
// only free samples if owned by class
if (own_samples_ == true) {
for (int samp_idx = 0; samp_idx < cnt_; samp_idx++) {
delete samp_buff_[samp_idx];
}
}
delete []samp_buff_;
}
cnt_ = 0;
samp_buff_ = NULL;
}
// add a new sample
bool CharSampSet::Add(CharSamp *char_samp) {
if ((cnt_ % SAMP_ALLOC_BLOCK) == 0) {
// create an extended buffer
CharSamp **new_samp_buff =
reinterpret_cast<CharSamp **>(new CharSamp *[cnt_ + SAMP_ALLOC_BLOCK]);
// copy old contents
if (cnt_ > 0) {
memcpy(new_samp_buff, samp_buff_, cnt_ * sizeof(*samp_buff_));
delete []samp_buff_;
}
samp_buff_ = new_samp_buff;
}
samp_buff_[cnt_++] = char_samp;
return true;
}
// load char samples from file
bool CharSampSet::LoadCharSamples(FILE *fp) {
// free existing
Cleanup();
// samples are created here and owned by the class
own_samples_ = true;
// start loading char samples
while (feof(fp) == 0) {
CharSamp *new_samp = CharSamp::FromCharDumpFile(fp);
if (new_samp != NULL) {
if (Add(new_samp) == false) {
return false;
}
}
}
return true;
}
// creates a CharSampSet object from file
CharSampSet * CharSampSet::FromCharDumpFile(string file_name) {
FILE *fp;
unsigned int val32;
// open the file
fp = fopen(file_name.c_str(), "rb");
if (fp == NULL) {
return NULL;
}
// read and verify marker
if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
fclose(fp);
return NULL;
}
if (val32 != 0xfefeabd0) {
fclose(fp);
return NULL;
}
// create an object
CharSampSet *samp_set = new CharSampSet();
if (samp_set->LoadCharSamples(fp) == false) {
delete samp_set;
samp_set = NULL;
}
fclose(fp);
return samp_set;
}
// Create a new Char Dump file
FILE *CharSampSet::CreateCharDumpFile(string file_name) {
FILE *fp;
unsigned int val32;
// create the file
fp = fopen(file_name.c_str(), "wb");
if (!fp) {
return NULL;
}
// read and verify marker
val32 = 0xfefeabd0;
if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
fclose(fp);
return NULL;
}
return fp;
}
// Enumerate the Samples in the set one-by-one calling the enumertor's
// EnumCharSamp method for each sample
bool CharSampSet::EnumSamples(string file_name, CharSampEnum *enum_obj) {
CachedFile *fp_in;
unsigned int val32;
long i64_size,
i64_pos;
// open the file
fp_in = new CachedFile(file_name);
i64_size = fp_in->Size();
if (i64_size < 1) {
return false;
}
// read and verify marker
if (fp_in->Read(&val32, sizeof(val32)) != sizeof(val32)) {
return false;
}
if (val32 != 0xfefeabd0) {
return false;
}
// start loading char samples
while (fp_in->eof() == false) {
CharSamp *new_samp = CharSamp::FromCharDumpFile(fp_in);
i64_pos = fp_in->Tell();
if (new_samp != NULL) {
bool ret_flag = (enum_obj)->EnumCharSamp(new_samp,
(100.0f * i64_pos / i64_size));
delete new_samp;
if (ret_flag == false) {
break;
}
}
}
delete fp_in;
return true;
}
} // namespace ocrlib

View File

@ -1,73 +0,0 @@
/**********************************************************************
* File: char_samp_set.h
* Description: Declaration of a Character Sample Set Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CharSampSet set encapsulates a set of CharSet objects typically
// but not necessarily loaded from a file
// It provides methods to load samples from File, Create a new file and
// Add new char samples to the set
#ifndef CHAR_SAMP_SET_H
#define CHAR_SAMP_SET_H
#include <stdlib.h>
#include <stdio.h>
#include <string>
#include "char_samp.h"
#include "char_samp_enum.h"
#include "char_set.h"
namespace tesseract {
// chunks of samp pointers to allocate
#define SAMP_ALLOC_BLOCK 10000
class CharSampSet {
public:
CharSampSet();
~CharSampSet();
// return sample count
int SampleCount() const { return cnt_; }
// returns samples buffer
CharSamp ** Samples() const { return samp_buff_; }
// Create a CharSampSet set object from a file
static CharSampSet *FromCharDumpFile(string file_name);
// Enumerate the Samples in the set one-by-one calling the enumertor's
// EnumCharSamp method for each sample
static bool EnumSamples(string file_name, CharSampEnum *enumerator);
// Create a new Char Dump file
static FILE *CreateCharDumpFile(string file_name);
// Add a new sample to the set
bool Add(CharSamp *char_samp);
private:
// sample count
int cnt_;
// the char samp array
CharSamp **samp_buff_;
// Are the samples owned by the set or not.
// Determines whether we should cleanup in the end
bool own_samples_;
// Cleanup
void Cleanup();
// Load character samples from a file
bool LoadCharSamples(FILE *fp);
};
}
#endif // CHAR_SAMP_SET_H

View File

@ -1,168 +0,0 @@
/**********************************************************************
* File: char_samp_enum.cpp
* Description: Implementation of a Character Set Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <string>
#include "char_set.h"
#include "cube_utils.h"
#include "tessdatamanager.h"
namespace tesseract {
CharSet::CharSet() {
class_cnt_ = 0;
class_strings_ = NULL;
unicharset_map_ = NULL;
init_ = false;
// init hash table
memset(hash_bin_size_, 0, sizeof(hash_bin_size_));
}
CharSet::~CharSet() {
if (class_strings_ != NULL) {
for (int cls = 0; cls < class_cnt_; cls++) {
if (class_strings_[cls] != NULL) {
delete class_strings_[cls];
}
}
delete []class_strings_;
class_strings_ = NULL;
}
delete []unicharset_map_;
}
// Creates CharSet object by reading the unicharset from the
// TessDatamanager, and mapping Cube's unicharset to Tesseract's if
// they differ.
CharSet *CharSet::Create(TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset) {
CharSet *char_set = new CharSet();
// First look for Cube's unicharset; if not there, use tesseract's
bool cube_unicharset_exists;
if (!(cube_unicharset_exists =
tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) &&
!tessdata_manager->SeekToStart(TESSDATA_UNICHARSET)) {
fprintf(stderr, "Cube ERROR (CharSet::Create): could not find "
"either cube or tesseract unicharset\n");
return NULL;
}
FILE *charset_fp = tessdata_manager->GetDataFilePtr();
if (!charset_fp) {
fprintf(stderr, "Cube ERROR (CharSet::Create): could not load "
"a unicharset\n");
return NULL;
}
// If we found a cube unicharset separate from tesseract's, load it and
// map its unichars to tesseract's; if only one unicharset exists,
// just load it.
bool loaded;
if (cube_unicharset_exists) {
char_set->cube_unicharset_.load_from_file(charset_fp);
loaded = tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET);
loaded = loaded && char_set->LoadSupportedCharList(
tessdata_manager->GetDataFilePtr(), tess_unicharset);
char_set->unicharset_ = &char_set->cube_unicharset_;
} else {
loaded = char_set->LoadSupportedCharList(charset_fp, NULL);
char_set->unicharset_ = tess_unicharset;
}
if (!loaded) {
delete char_set;
return NULL;
}
char_set->init_ = true;
return char_set;
}
// Load the list of supported chars from the given data file pointer.
bool CharSet::LoadSupportedCharList(FILE *fp, UNICHARSET *tess_unicharset) {
if (init_)
return true;
char str_line[256];
// init hash table
memset(hash_bin_size_, 0, sizeof(hash_bin_size_));
// read the char count
if (fgets(str_line, sizeof(str_line), fp) == NULL) {
fprintf(stderr, "Cube ERROR (CharSet::InitMemory): could not "
"read char count.\n");
return false;
}
class_cnt_ = atoi(str_line);
if (class_cnt_ < 2) {
fprintf(stderr, "Cube ERROR (CharSet::InitMemory): invalid "
"class count: %d\n", class_cnt_);
return false;
}
// memory for class strings
class_strings_ = new string_32*[class_cnt_];
// memory for unicharset map
if (tess_unicharset) {
unicharset_map_ = new int[class_cnt_];
}
// Read in character strings and add to hash table
for (int class_id = 0; class_id < class_cnt_; class_id++) {
// Read the class string
if (fgets(str_line, sizeof(str_line), fp) == NULL) {
fprintf(stderr, "Cube ERROR (CharSet::ReadAndHashStrings): "
"could not read class string with class_id=%d.\n", class_id);
return false;
}
// Terminate at space if any
char *p = strchr(str_line, ' ');
if (p != NULL)
*p = '\0';
// Convert to UTF32 and store
string_32 str32;
// Convert NULL to a space
if (strcmp(str_line, "NULL") == 0) {
strcpy(str_line, " ");
}
CubeUtils::UTF8ToUTF32(str_line, &str32);
class_strings_[class_id] = new string_32(str32);
// Add to hash-table
int hash_val = Hash(reinterpret_cast<const char_32 *>(str32.c_str()));
if (hash_bin_size_[hash_val] >= kMaxHashSize) {
fprintf(stderr, "Cube ERROR (CharSet::LoadSupportedCharList): hash "
"table is full.\n");
return false;
}
hash_bins_[hash_val][hash_bin_size_[hash_val]++] = class_id;
if (tess_unicharset != NULL) {
// Add class id to unicharset map
UNICHAR_ID tess_id = tess_unicharset->unichar_to_id(str_line);
if (tess_id == INVALID_UNICHAR_ID) {
tess_unicharset->unichar_insert(str_line);
tess_id = tess_unicharset->unichar_to_id(str_line);
}
ASSERT_HOST(tess_id != INVALID_UNICHAR_ID);
unicharset_map_[class_id] = tess_id;
}
}
return true;
}
} // tesseract

View File

@ -1,174 +0,0 @@
/**********************************************************************
* File: char_samp_enum.h
* Description: Declaration of a Character Set Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CharSet class encapsulates the list of 32-bit strings/characters that
// Cube supports for a specific language. The char set is loaded from the
// .unicharset file corresponding to a specific language
// Each string has a corresponding int class-id that gets used throughout Cube
// The class provides pass back and forth conversion between the class-id
// and its corresponding 32-bit string. This is done using a hash table that
// maps the string to the class id.
#ifndef CHAR_SET_H
#define CHAR_SET_H
#include <string.h>
#include <string>
#include <algorithm>
#include "string_32.h"
#include "tessdatamanager.h"
#include "unicharset.h"
#include "cube_const.h"
namespace tesseract {
class CharSet {
public:
CharSet();
~CharSet();
// Returns true if Cube is sharing Tesseract's unicharset.
inline bool SharedUnicharset() { return (unicharset_map_ == NULL); }
// Returns the class id corresponding to a 32-bit string. Returns -1
// if the string is not supported. This is done by hashing the
// string and then looking up the string in the hash-bin if there
// are collisions.
inline int ClassID(const char_32 *str) const {
int hash_val = Hash(str);
if (hash_bin_size_[hash_val] == 0)
return -1;
for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
if (class_strings_[hash_bins_[hash_val][bin]]->compare(str) == 0)
return hash_bins_[hash_val][bin];
}
return -1;
}
// Same as above but using a 32-bit char instead of a string
inline int ClassID(char_32 ch) const {
int hash_val = Hash(ch);
if (hash_bin_size_[hash_val] == 0)
return -1;
for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
if ((*class_strings_[hash_bins_[hash_val][bin]])[0] == ch &&
class_strings_[hash_bins_[hash_val][bin]]->length() == 1) {
return hash_bins_[hash_val][bin];
}
}
return -1;
}
// Retrieve the unicharid in Tesseract's unicharset corresponding
// to a 32-bit string. When Tesseract and Cube share the same
// unicharset, this will just be the class id.
inline int UnicharID(const char_32 *str) const {
int class_id = ClassID(str);
if (class_id == INVALID_UNICHAR_ID)
return INVALID_UNICHAR_ID;
int unichar_id;
if (unicharset_map_)
unichar_id = unicharset_map_[class_id];
else
unichar_id = class_id;
return unichar_id;
}
// Same as above but using a 32-bit char instead of a string
inline int UnicharID(char_32 ch) const {
int class_id = ClassID(ch);
if (class_id == INVALID_UNICHAR_ID)
return INVALID_UNICHAR_ID;
int unichar_id;
if (unicharset_map_)
unichar_id = unicharset_map_[class_id];
else
unichar_id = class_id;
return unichar_id;
}
// Returns the 32-bit string corresponding to a class id
inline const char_32 * ClassString(int class_id) const {
if (class_id < 0 || class_id >= class_cnt_) {
return NULL;
}
return reinterpret_cast<const char_32 *>(class_strings_[class_id]->c_str());
}
// Returns the count of supported strings
inline int ClassCount() const { return class_cnt_; }
// Creates CharSet object by reading the unicharset from the
// TessDatamanager, and mapping Cube's unicharset to Tesseract's if
// they differ.
static CharSet *Create(TessdataManager *tessdata_manager,
UNICHARSET *tess_unicharset);
// Return the UNICHARSET cube is using for recognition internally --
// ClassId() returns unichar_id's in this unicharset.
UNICHARSET *InternalUnicharset() { return unicharset_; }
private:
// Hash table configuration params. Determined emperically on
// the supported languages so far (Eng, Ara, Hin). Might need to be
// tuned for speed when more languages are supported
static const int kHashBins = 3001;
static const int kMaxHashSize = 16;
// Using djb2 hashing function to hash a 32-bit string
// introduced in http://www.cse.yorku.ca/~oz/hash.html
static inline int Hash(const char_32 *str) {
unsigned long hash = 5381;
int c;
while ((c = *str++))
hash = ((hash << 5) + hash) + c;
return (hash%kHashBins);
}
// Same as above but for a single char
static inline int Hash(char_32 ch) {
char_32 b[2];
b[0] = ch;
b[1] = 0;
return Hash(b);
}
// Load the list of supported chars from the given data file
// pointer. If tess_unicharset is non-NULL, mapping each Cube class
// id to a tesseract unicharid.
bool LoadSupportedCharList(FILE *fp, UNICHARSET *tess_unicharset);
// class count
int class_cnt_;
// hash-bin sizes array
int hash_bin_size_[kHashBins];
// hash bins
int hash_bins_[kHashBins][kMaxHashSize];
// supported strings array
string_32 **class_strings_;
// map from class id to secondary (tesseract's) unicharset's ids
int *unicharset_map_;
// A unicharset which is filled in with a Tesseract-style UNICHARSET for
// cube's data if our unicharset is different from tesseract's.
UNICHARSET cube_unicharset_;
// This points to either the tess_unicharset we're passed or cube_unicharset_,
// depending upon whether we just have one unicharset or one for each
// tesseract and cube, respectively.
UNICHARSET *unicharset_;
// has the char set been initialized flag
bool init_;
};
}
#endif // CHAR_SET_H

View File

@ -1,94 +0,0 @@
/**********************************************************************
* File: classifier_base.h
* Description: Declaration of the Base Character Classifier
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CharClassifier class is the abstract class for any character/grapheme
// classifier.
#ifndef CHAR_CLASSIFIER_BASE_H
#define CHAR_CLASSIFIER_BASE_H
#include <string>
#include "char_samp.h"
#include "char_altlist.h"
#include "char_set.h"
#include "feature_base.h"
#include "lang_model.h"
#include "tuning_params.h"
namespace tesseract {
class CharClassifier {
public:
CharClassifier(CharSet *char_set, TuningParams *params,
FeatureBase *feat_extract) {
char_set_ = char_set;
params_ = params;
feat_extract_ = feat_extract;
fold_sets_ = NULL;
fold_set_cnt_ = 0;
fold_set_len_ = NULL;
init_ = false;
case_sensitive_ = true;
}
virtual ~CharClassifier() {
if (fold_sets_ != NULL) {
for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
delete []fold_sets_[fold_set];
}
delete []fold_sets_;
fold_sets_ = NULL;
}
delete []fold_set_len_;
fold_set_len_ = NULL;
delete feat_extract_;
feat_extract_ = NULL;
}
// pure virtual functions that need to be implemented by any inheriting class
virtual CharAltList * Classify(CharSamp *char_samp) = 0;
virtual int CharCost(CharSamp *char_samp) = 0;
virtual bool Train(CharSamp *char_samp, int ClassID) = 0;
virtual bool SetLearnParam(char *var_name, float val) = 0;
virtual bool Init(const string &data_file_path, const string &lang,
LangModel *lang_mod) = 0;
// accessors
FeatureBase *FeatureExtractor() {return feat_extract_;}
inline bool CaseSensitive() const { return case_sensitive_; }
inline void SetCaseSensitive(bool case_sensitive) {
case_sensitive_ = case_sensitive;
}
protected:
virtual void Fold() = 0;
virtual bool LoadFoldingSets(const string &data_file_path,
const string &lang,
LangModel *lang_mod) = 0;
FeatureBase *feat_extract_;
CharSet *char_set_;
TuningParams *params_;
int **fold_sets_;
int *fold_set_len_;
int fold_set_cnt_;
bool init_;
bool case_sensitive_;
};
} // tesseract
#endif // CHAR_CLASSIFIER_BASE_H

View File

@ -1,85 +0,0 @@
/**********************************************************************
* File: classifier_factory.cpp
* Description: Implementation of the Base Character Classifier
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include "classifier_factory.h"
#include "conv_net_classifier.h"
#include "feature_base.h"
#include "feature_bmp.h"
#include "feature_chebyshev.h"
#include "feature_hybrid.h"
#include "hybrid_neural_net_classifier.h"
namespace tesseract {
// Creates a CharClassifier object of the appropriate type depending on the
// classifier type in the settings file
CharClassifier *CharClassifierFactory::Create(const string &data_file_path,
const string &lang,
LangModel *lang_mod,
CharSet *char_set,
TuningParams *params) {
// create the feature extraction object
FeatureBase *feat_extract;
switch (params->TypeFeature()) {
case TuningParams::BMP:
feat_extract = new FeatureBmp(params);
break;
case TuningParams::CHEBYSHEV:
feat_extract = new FeatureChebyshev(params);
break;
case TuningParams::HYBRID:
feat_extract = new FeatureHybrid(params);
break;
default:
fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): invalid "
"feature type.\n");
return NULL;
}
// create the classifier object
CharClassifier *classifier_obj;
switch (params->TypeClassifier()) {
case TuningParams::NN:
classifier_obj = new ConvNetCharClassifier(char_set, params,
feat_extract);
break;
case TuningParams::HYBRID_NN:
classifier_obj = new HybridNeuralNetCharClassifier(char_set, params,
feat_extract);
break;
default:
fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): invalid "
"classifier type.\n");
return NULL;
}
// Init the classifier
if (!classifier_obj->Init(data_file_path, lang, lang_mod)) {
delete classifier_obj;
fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): unable "
"to Init() character classifier object.\n");
return NULL;
}
return classifier_obj;
}
}

View File

@ -1,43 +0,0 @@
/**********************************************************************
* File: classifier_factory.h
* Description: Declaration of the Base Character Classifier
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CharClassifierFactory provides a single static method to create an
// instance of the desired classifier
#ifndef CHAR_CLASSIFIER_FACTORY_H
#define CHAR_CLASSIFIER_FACTORY_H
#include <string>
#include "classifier_base.h"
#include "lang_model.h"
namespace tesseract {
class CharClassifierFactory {
public:
// Creates a CharClassifier object of the appropriate type depending on the
// classifier type in the settings file
static CharClassifier *Create(const string &data_file_path,
const string &lang,
LangModel *lang_mod,
CharSet *char_set,
TuningParams *params);
};
} // tesseract
#endif // CHAR_CLASSIFIER_FACTORY_H

View File

@ -1,268 +0,0 @@
/**********************************************************************
* File: con_comp.cpp
* Description: Implementation of a Connected Component class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <stdlib.h>
#include <string.h>
#include "con_comp.h"
#include "cube_const.h"
namespace tesseract {
ConComp::ConComp() {
head_ = NULL;
tail_ = NULL;
left_ = 0;
top_ = 0;
right_ = 0;
bottom_ = 0;
left_most_ = false;
right_most_ = false;
id_ = -1;
pt_cnt_ = 0;
}
ConComp::~ConComp() {
if (head_ != NULL) {
ConCompPt *pt_ptr = head_;
while (pt_ptr != NULL) {
ConCompPt *pptNext = pt_ptr->Next();
delete pt_ptr;
pt_ptr = pptNext;
}
head_ = NULL;
}
}
// adds a pt to the conn comp and updates its boundaries
bool ConComp::Add(int x, int y) {
ConCompPt *pt_ptr = new ConCompPt(x, y);
if (head_ == NULL) {
left_ = x;
right_ = x;
top_ = y;
bottom_ = y;
head_ = pt_ptr;
} else {
left_ = left_ <= x ? left_ : x;
top_ = top_ <= y ? top_ : y;
right_ = right_ >= x ? right_ : x;
bottom_ = bottom_ >= y ? bottom_ : y;
}
if (tail_ != NULL) {
tail_->SetNext(pt_ptr);
}
tail_ = pt_ptr;
pt_cnt_++;
return true;
}
// merges two connected components
bool ConComp::Merge(ConComp *concomp) {
if (head_ == NULL || tail_ == NULL ||
concomp->head_ == NULL || concomp->tail_ == NULL) {
return false;
}
tail_->SetNext(concomp->head_);
tail_ = concomp->tail_;
left_ = left_ <= concomp->left_ ? left_ : concomp->left_;
top_ = top_ <= concomp->top_ ? top_ : concomp->top_;
right_ = right_ >= concomp->right_ ? right_ : concomp->right_;
bottom_ = bottom_ >= concomp->bottom_ ? bottom_ : concomp->bottom_;
pt_cnt_ += concomp->pt_cnt_;
concomp->head_ = NULL;
concomp->tail_ = NULL;
return true;
}
// Creates the x-coord density histogram after spreading
// each x-coord position by the HIST_WND_RATIO fraction of the
// height of the ConComp, but limited to max_hist_wnd
int *ConComp::CreateHistogram(int max_hist_wnd) {
int wid = right_ - left_ + 1,
hgt = bottom_ - top_ + 1,
hist_wnd = static_cast<int>(hgt * HIST_WND_RATIO);
if (hist_wnd > max_hist_wnd) {
hist_wnd = max_hist_wnd;
}
// alloc memo for histogram
int *hist_array = new int[wid];
memset(hist_array, 0, wid * sizeof(*hist_array));
// compute windowed histogram
ConCompPt *pt_ptr = head_;
while (pt_ptr != NULL) {
int x = pt_ptr->x() - left_,
xw = x - hist_wnd;
for (int xdel = -hist_wnd; xdel <= hist_wnd; xdel++, xw++) {
if (xw >= 0 && xw < wid) {
hist_array[xw]++;
}
}
pt_ptr = pt_ptr->Next();
}
return hist_array;
}
// find out the seg pts by looking for local minima in the histogram
int *ConComp::SegmentHistogram(int *hist_array, int *seg_pt_cnt) {
// init
(*seg_pt_cnt) = 0;
int wid = right_ - left_ + 1,
hgt = bottom_ - top_ + 1;
int *x_seg_pt = new int[wid];
int seg_pt_wnd = static_cast<int>(hgt * SEG_PT_WND_RATIO);
if (seg_pt_wnd > 1) {
seg_pt_wnd = 1;
}
for (int x = 2; x < (wid - 2); x++) {
if (hist_array[x] < hist_array[x - 1] &&
hist_array[x] < hist_array[x - 2] &&
hist_array[x] <= hist_array[x + 1] &&
hist_array[x] <= hist_array[x + 2]) {
x_seg_pt[(*seg_pt_cnt)++] = x;
x += seg_pt_wnd;
} else if (hist_array[x] <= hist_array[x - 1] &&
hist_array[x] <= hist_array[x - 2] &&
hist_array[x] < hist_array[x + 1] &&
hist_array[x] < hist_array[x + 2]) {
x_seg_pt[(*seg_pt_cnt)++] = x;
x += seg_pt_wnd;
}
}
// no segments, nothing to do
if ((*seg_pt_cnt) == 0) {
delete []x_seg_pt;
return NULL;
}
return x_seg_pt;
}
// segments a concomp based on pixel density histogram local minima
// if there were none found, it returns NULL
// this is more useful than creating a clone of itself
ConComp **ConComp::Segment(int max_hist_wnd, int *concomp_cnt) {
// init
(*concomp_cnt) = 0;
// No pts
if (head_ == NULL) {
return NULL;
}
int seg_pt_cnt = 0;
// create the histogram
int *hist_array = CreateHistogram(max_hist_wnd);
if (hist_array == NULL) {
return NULL;
}
int *x_seg_pt = SegmentHistogram(hist_array, &seg_pt_cnt);
// free histogram
delete []hist_array;
// no segments, nothing to do
if (seg_pt_cnt == 0) {
delete []x_seg_pt;
return NULL;
}
// create concomp array
ConComp **concomp_array = new ConComp *[seg_pt_cnt + 1];
for (int concomp = 0; concomp <= seg_pt_cnt; concomp++) {
concomp_array[concomp] = new ConComp();
// split concomps inherit the ID this concomp
concomp_array[concomp]->SetID(id_);
}
// set the left and right most attributes of the
// appropriate concomps
concomp_array[0]->left_most_ = true;
concomp_array[seg_pt_cnt]->right_most_ = true;
// assign pts to concomps
ConCompPt *pt_ptr = head_;
while (pt_ptr != NULL) {
int seg_pt;
// find the first seg-pt that exceeds the x value
// of the pt
for (seg_pt = 0; seg_pt < seg_pt_cnt; seg_pt++) {
if ((x_seg_pt[seg_pt] + left_) > pt_ptr->x()) {
break;
}
}
// add the pt to the proper concomp
if (concomp_array[seg_pt]->Add(pt_ptr->x(), pt_ptr->y()) == false) {
delete []x_seg_pt;
delete []concomp_array;
return NULL;
}
pt_ptr = pt_ptr->Next();
}
delete []x_seg_pt;
(*concomp_cnt) = (seg_pt_cnt + 1);
return concomp_array;
}
// Shifts the co-ordinates of all points by the specified x & y deltas
void ConComp::Shift(int dx, int dy) {
ConCompPt *pt_ptr = head_;
while (pt_ptr != NULL) {
pt_ptr->Shift(dx, dy);
pt_ptr = pt_ptr->Next();
}
left_ += dx;
right_ += dx;
top_ += dy;
bottom_ += dy;
}
} // namespace tesseract

View File

@ -1,124 +0,0 @@
/**********************************************************************
* File: con_comp.h
* Description: Declaration of a Connected Component class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef CONCOMP_H
#define CONCOMP_H
// The ConComp class implements the functionality needed for a
// Connected Component object and Connected Component (ConComp) points.
// The points consituting a connected component are kept in a linked-list
// The Concomp class provided methods to:
// 1- Compare components in L2R and R2L reading orders.
// 2- Merge ConComps
// 3- Compute the windowed vertical pixel density histogram for a specific
// windows size
// 4- Segment a ConComp based on the local windowed vertical pixel
// density histogram local minima
namespace tesseract {
// Implments a ConComp point in a linked list of points
class ConCompPt {
public:
ConCompPt(int x, int y) {
x_ = x;
y_ = y;
next_pt_ = NULL;
}
inline int x() { return x_; }
inline int y() { return y_; }
inline void Shift(int dx, int dy) {
x_ += dx;
y_ += dy;
}
inline ConCompPt * Next() { return next_pt_; }
inline void SetNext(ConCompPt *pt) { next_pt_ = pt; }
private:
int x_;
int y_;
ConCompPt *next_pt_;
};
class ConComp {
public:
ConComp();
virtual ~ConComp();
// accessors
inline ConCompPt *Head() { return head_; }
inline int Left() const { return left_; }
inline int Top() const { return top_; }
inline int Right() const { return right_; }
inline int Bottom() const { return bottom_; }
inline int Width() const { return right_ - left_ + 1; }
inline int Height() const { return bottom_ - top_ + 1; }
// Comparer used for sorting L2R reading order
inline static int Left2RightComparer(const void *comp1,
const void *comp2) {
return (*(reinterpret_cast<ConComp * const *>(comp1)))->left_ +
(*(reinterpret_cast<ConComp * const *>(comp1)))->right_ -
(*(reinterpret_cast<ConComp * const *>(comp2)))->left_ -
(*(reinterpret_cast<ConComp * const *>(comp2)))->right_;
}
// Comparer used for sorting R2L reading order
inline static int Right2LeftComparer(const void *comp1,
const void *comp2) {
return (*(reinterpret_cast<ConComp * const *>(comp2)))->right_ -
(*(reinterpret_cast<ConComp * const *>(comp1)))->right_;
}
// accessors for attribues of a ConComp
inline bool LeftMost() const { return left_most_; }
inline bool RightMost() const { return right_most_; }
inline void SetLeftMost(bool left_most) { left_most_ = left_most; }
inline void SetRightMost(bool right_most) { right_most_ = right_most;
}
inline int ID () const { return id_; }
inline void SetID(int id) { id_ = id; }
inline int PtCnt () const { return pt_cnt_; }
// Add a new pt
bool Add(int x, int y);
// Merge two connected components in-place
bool Merge(ConComp *con_comp);
// Shifts the co-ordinates of all points by the specified x & y deltas
void Shift(int dx, int dy);
// segments a concomp based on pixel density histogram local minima
ConComp **Segment(int max_hist_wnd, int *concomp_cnt);
// creates the vertical pixel density histogram of the concomp
int *CreateHistogram(int max_hist_wnd);
// find out the seg pts by looking for local minima in the histogram
int *SegmentHistogram(int *hist_array, int *seg_pt_cnt);
private:
int id_;
bool left_most_;
bool right_most_;
int left_;
int top_;
int right_;
int bottom_;
ConCompPt *head_;
ConCompPt *tail_;
int pt_cnt_;
};
}
#endif // CONCOMP_H

View File

@ -1,354 +0,0 @@
/**********************************************************************
* File: charclassifier.cpp
* Description: Implementation of Convolutional-NeuralNet Character Classifier
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <algorithm>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <vector>
#include <wctype.h>
#include "char_set.h"
#include "classifier_base.h"
#include "const.h"
#include "conv_net_classifier.h"
#include "cube_utils.h"
#include "feature_base.h"
#include "feature_bmp.h"
#include "tess_lang_model.h"
namespace tesseract {
ConvNetCharClassifier::ConvNetCharClassifier(CharSet *char_set,
TuningParams *params,
FeatureBase *feat_extract)
: CharClassifier(char_set, params, feat_extract) {
char_net_ = NULL;
net_input_ = NULL;
net_output_ = NULL;
}
ConvNetCharClassifier::~ConvNetCharClassifier() {
if (char_net_ != NULL) {
delete char_net_;
char_net_ = NULL;
}
if (net_input_ != NULL) {
delete []net_input_;
net_input_ = NULL;
}
if (net_output_ != NULL) {
delete []net_output_;
net_output_ = NULL;
}
}
/**
* The main training function. Given a sample and a class ID the classifier
* updates its parameters according to its learning algorithm. This function
* is currently not implemented. TODO(ahmadab): implement end-2-end training
*/
bool ConvNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
return false;
}
/**
* A secondary function needed for training. Allows the trainer to set the
* value of any train-time parameter. This function is currently not
* implemented. TODO(ahmadab): implement end-2-end training
*/
bool ConvNetCharClassifier::SetLearnParam(char *var_name, float val) {
// TODO(ahmadab): implementation of parameter initializing.
return false;
}
/**
* Folds the output of the NeuralNet using the loaded folding sets
*/
void ConvNetCharClassifier::Fold() {
// in case insensitive mode
if (case_sensitive_ == false) {
int class_cnt = char_set_->ClassCount();
// fold case
for (int class_id = 0; class_id < class_cnt; class_id++) {
// get class string
const char_32 *str32 = char_set_->ClassString(class_id);
// get the upper case form of the string
string_32 upper_form32 = str32;
for (int ch = 0; ch < upper_form32.length(); ch++) {
if (iswalpha(static_cast<int>(upper_form32[ch])) != 0) {
upper_form32[ch] = towupper(upper_form32[ch]);
}
}
// find out the upperform class-id if any
int upper_class_id =
char_set_->ClassID(reinterpret_cast<const char_32 *>(
upper_form32.c_str()));
if (upper_class_id != -1 && class_id != upper_class_id) {
float max_out = MAX(net_output_[class_id], net_output_[upper_class_id]);
net_output_[class_id] = max_out;
net_output_[upper_class_id] = max_out;
}
}
}
// The folding sets specify how groups of classes should be folded
// Folding involved assigning a min-activation to all the members
// of the folding set. The min-activation is a fraction of the max-activation
// of the members of the folding set
for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
if (fold_set_len_[fold_set] == 0)
continue;
float max_prob = net_output_[fold_sets_[fold_set][0]];
for (int ch = 1; ch < fold_set_len_[fold_set]; ch++) {
if (net_output_[fold_sets_[fold_set][ch]] > max_prob) {
max_prob = net_output_[fold_sets_[fold_set][ch]];
}
}
for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
net_output_[fold_sets_[fold_set][ch]] = MAX(max_prob * kFoldingRatio,
net_output_[fold_sets_[fold_set][ch]]);
}
}
}
/**
* Compute the features of specified charsamp and feedforward the
* specified nets
*/
bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) {
if (char_net_ == NULL) {
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
"NeuralNet is NULL\n");
return false;
}
int feat_cnt = char_net_->in_cnt();
int class_cnt = char_set_->ClassCount();
// allocate i/p and o/p buffers if needed
if (net_input_ == NULL) {
net_input_ = new float[feat_cnt];
net_output_ = new float[class_cnt];
}
// compute input features
if (feat_extract_->ComputeFeatures(char_samp, net_input_) == false) {
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
"unable to compute features\n");
return false;
}
if (char_net_ != NULL) {
if (char_net_->FeedForward(net_input_, net_output_) == false) {
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
"unable to run feed-forward\n");
return false;
}
} else {
return false;
}
Fold();
return true;
}
/**
* return the cost of being a char
*/
int ConvNetCharClassifier::CharCost(CharSamp *char_samp) {
if (RunNets(char_samp) == false) {
return 0;
}
return CubeUtils::Prob2Cost(1.0f - net_output_[0]);
}
/**
* classifies a charsamp and returns an alternate list
* of chars sorted by char costs
*/
CharAltList *ConvNetCharClassifier::Classify(CharSamp *char_samp) {
// run the needed nets
if (RunNets(char_samp) == false) {
return NULL;
}
int class_cnt = char_set_->ClassCount();
// create an altlist
CharAltList *alt_list = new CharAltList(char_set_, class_cnt);
for (int out = 1; out < class_cnt; out++) {
int cost = CubeUtils::Prob2Cost(net_output_[out]);
alt_list->Insert(out, cost);
}
return alt_list;
}
/**
* Set an external net (for training purposes)
*/
void ConvNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) {
if (char_net_ != NULL) {
delete char_net_;
char_net_ = NULL;
}
char_net_ = char_net;
}
/**
* This function will return true if the file does not exist.
* But will fail if the it did not pass the sanity checks
*/
bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path,
const string &lang,
LangModel *lang_mod) {
fold_set_cnt_ = 0;
string fold_file_name;
fold_file_name = data_file_path + lang;
fold_file_name += ".cube.fold";
// folding sets are optional
FILE *fp = fopen(fold_file_name.c_str(), "rb");
if (fp == NULL) {
return true;
}
fclose(fp);
string fold_sets_str;
if (!CubeUtils::ReadFileToString(fold_file_name,
&fold_sets_str)) {
return false;
}
// split into lines
vector<string> str_vec;
CubeUtils::SplitStringUsing(fold_sets_str, "\r\n", &str_vec);
fold_set_cnt_ = str_vec.size();
fold_sets_ = new int *[fold_set_cnt_];
fold_set_len_ = new int[fold_set_cnt_];
for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
reinterpret_cast<TessLangModel *>(lang_mod)->RemoveInvalidCharacters(
&str_vec[fold_set]);
// if all or all but one character are invalid, invalidate this set
if (str_vec[fold_set].length() <= 1) {
fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::LoadFoldingSets): "
"invalidating folding set %d\n", fold_set);
fold_set_len_[fold_set] = 0;
fold_sets_[fold_set] = NULL;
continue;
}
string_32 str32;
CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32);
fold_set_len_[fold_set] = str32.length();
fold_sets_[fold_set] = new int[fold_set_len_[fold_set]];
for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]);
}
}
return true;
}
/**
* Init the classifier provided a data-path and a language string
*/
bool ConvNetCharClassifier::Init(const string &data_file_path,
const string &lang,
LangModel *lang_mod) {
if (init_) {
return true;
}
// load the nets if any. This function will return true if the net file
// does not exist. But will fail if the net did not pass the sanity checks
if (!LoadNets(data_file_path, lang)) {
return false;
}
// load the folding sets if any. This function will return true if the
// file does not exist. But will fail if the it did not pass the sanity checks
if (!LoadFoldingSets(data_file_path, lang, lang_mod)) {
return false;
}
init_ = true;
return true;
}
/**
* Load the classifier's Neural Nets
* This function will return true if the net file does not exist.
* But will fail if the net did not pass the sanity checks
*/
bool ConvNetCharClassifier::LoadNets(const string &data_file_path,
const string &lang) {
string char_net_file;
// add the lang identifier
char_net_file = data_file_path + lang;
char_net_file += ".cube.nn";
// neural network is optional
FILE *fp = fopen(char_net_file.c_str(), "rb");
if (fp == NULL) {
return true;
}
fclose(fp);
// load main net
char_net_ = tesseract::NeuralNet::FromFile(char_net_file);
if (char_net_ == NULL) {
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
"could not load %s\n", char_net_file.c_str());
return false;
}
// validate net
if (char_net_->in_cnt()!= feat_extract_->FeatureCnt()) {
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
"could not validate net %s\n", char_net_file.c_str());
return false;
}
// alloc net i/o buffers
int feat_cnt = char_net_->in_cnt();
int class_cnt = char_set_->ClassCount();
if (char_net_->out_cnt() != class_cnt) {
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
"output count (%d) and class count (%d) are not equal\n",
char_net_->out_cnt(), class_cnt);
return false;
}
// allocate i/p and o/p buffers if needed
if (net_input_ == NULL) {
net_input_ = new float[feat_cnt];
net_output_ = new float[class_cnt];
}
return true;
}
} // tesseract

View File

@ -1,94 +0,0 @@
/**********************************************************************
* File: conv_net_classifier.h
* Description: Declaration of Convolutional-NeuralNet Character Classifier
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The ConvNetCharClassifier inherits from the base classifier class:
// "CharClassifierBase". It implements a Convolutional Neural Net classifier
// instance of the base classifier. It uses the Tesseract Neural Net library
// The Neural Net takes a scaled version of a bitmap and feeds it to a
// Convolutional Neural Net as input and performs a FeedForward. Each output
// of the net corresponds to class_id in the CharSet passed at construction
// time.
// Afterwards, the outputs of the Net are "folded" using the folding set
// (if any)
#ifndef CONV_NET_CLASSIFIER_H
#define CONV_NET_CLASSIFIER_H
#include <string>
#include "char_samp.h"
#include "char_altlist.h"
#include "char_set.h"
#include "feature_base.h"
#include "classifier_base.h"
#include "neural_net.h"
#include "lang_model.h"
#include "tuning_params.h"
namespace tesseract {
// Folding Ratio is the ratio of the max-activation of members of a folding
// set that is used to compute the min-activation of the rest of the set
static const float kFoldingRatio = 0.75;
class ConvNetCharClassifier : public CharClassifier {
public:
ConvNetCharClassifier(CharSet *char_set, TuningParams *params,
FeatureBase *feat_extract);
virtual ~ConvNetCharClassifier();
// The main training function. Given a sample and a class ID the classifier
// updates its parameters according to its learning algorithm. This function
// is currently not implemented. TODO(ahmadab): implement end-2-end training
virtual bool Train(CharSamp *char_samp, int ClassID);
// A secondary function needed for training. Allows the trainer to set the
// value of any train-time parameter. This function is currently not
// implemented. TODO(ahmadab): implement end-2-end training
virtual bool SetLearnParam(char *var_name, float val);
// Externally sets the Neural Net used by the classifier. Used for training
void SetNet(tesseract::NeuralNet *net);
// Classifies an input charsamp and return a CharAltList object containing
// the possible candidates and corresponding scores
virtual CharAltList * Classify(CharSamp *char_samp);
// Computes the cost of a specific charsamp being a character (versus a
// non-character: part-of-a-character OR more-than-one-character)
virtual int CharCost(CharSamp *char_samp);
private:
// Neural Net object used for classification
tesseract::NeuralNet *char_net_;
// data buffers used to hold Neural Net inputs and outputs
float *net_input_;
float *net_output_;
// Init the classifier provided a data-path and a language string
virtual bool Init(const string &data_file_path, const string &lang,
LangModel *lang_mod);
// Loads the NeuralNets needed for the classifier
bool LoadNets(const string &data_file_path, const string &lang);
// Loads the folding sets provided a data-path and a language string
virtual bool LoadFoldingSets(const string &data_file_path,
const string &lang,
LangModel *lang_mod);
// Folds the output of the NeuralNet using the loaded folding sets
virtual void Fold();
// Scales the input char_samp and feeds it to the NeuralNet as input
bool RunNets(CharSamp *char_samp);
};
}
#endif // CONV_NET_CLASSIFIER_H

View File

@ -1,41 +0,0 @@
/**********************************************************************
* File: const.h
* Description: Defintions of constants used by Cube
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef CUBE_CONST_H
#define CUBE_CONST_H
// Scale used to normalize a log-prob to a cost
#define PROB2COST_SCALE 4096.0
// Maximum possible cost (-log prob of MIN_PROB)
#define MIN_PROB_COST 65536
// Probability corresponding to the max cost MIN_PROB_COST
#define MIN_PROB 0.000000113
// Worst possible cost (returned on failure)
#define WORST_COST 0x40000
// Oversegmentation hysteresis thresholds
#define HIST_WND_RATIO 0.1f
#define SEG_PT_WND_RATIO 0.1f
#ifdef _WIN32
#ifdef __GNUC__
#include <climits>
#endif
#endif
#endif // CUBE_CONST_H

View File

@ -1,249 +0,0 @@
/**********************************************************************
* File: cube_line_object.cpp
* Description: Implementation of the Cube Line Object Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <algorithm>
#include "cube_line_object.h"
namespace tesseract {
CubeLineObject::CubeLineObject(CubeRecoContext *cntxt, Pix *pix) {
line_pix_ = pix;
own_pix_ = false;
processed_ = false;
cntxt_ = cntxt;
phrase_cnt_ = 0;
phrases_ = NULL;
}
CubeLineObject::~CubeLineObject() {
if (line_pix_ != NULL && own_pix_ == true) {
pixDestroy(&line_pix_);
line_pix_ = NULL;
}
if (phrases_ != NULL) {
for (int phrase_idx = 0; phrase_idx < phrase_cnt_; phrase_idx++) {
if (phrases_[phrase_idx] != NULL) {
delete phrases_[phrase_idx];
}
}
delete []phrases_;
phrases_ = NULL;
}
}
// Recognize the specified pix as one line returning the recognized
bool CubeLineObject::Process() {
// do nothing if pix had already been processed
if (processed_) {
return true;
}
// validate data
if (line_pix_ == NULL || cntxt_ == NULL) {
return false;
}
// create a CharSamp
CharSamp *char_samp = CubeUtils::CharSampleFromPix(line_pix_, 0, 0,
line_pix_->w,
line_pix_->h);
if (char_samp == NULL) {
return false;
}
// compute connected components.
int con_comp_cnt = 0;
ConComp **con_comps = char_samp->FindConComps(&con_comp_cnt,
cntxt_->Params()->MinConCompSize());
// no longer need char_samp, delete it
delete char_samp;
// no connected components, bail out
if (con_comp_cnt <= 0 || con_comps == NULL) {
return false;
}
// sort connected components based on reading order
bool rtl = (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L);
qsort(con_comps, con_comp_cnt, sizeof(*con_comps), rtl ?
ConComp::Right2LeftComparer : ConComp::Left2RightComparer);
// compute work breaking threshold as a ratio of line height
bool ret_val = false;
int word_break_threshold = ComputeWordBreakThreshold(con_comp_cnt, con_comps,
rtl);
if (word_break_threshold > 0) {
// over-allocate phrases object buffer
phrases_ = new CubeObject *[con_comp_cnt];
// create a phrase if the horizontal distance between two consecutive
// concomps is higher than threshold
int start_con_idx = 0;
int current_phrase_limit = rtl ? con_comps[0]->Left() :
con_comps[0]->Right();
for (int con_idx = 1; con_idx <= con_comp_cnt; con_idx++) {
bool create_new_phrase = true;
// if not at the end, compute the distance between two consecutive
// concomps
if (con_idx < con_comp_cnt) {
int dist = 0;
if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) {
dist = current_phrase_limit - con_comps[con_idx]->Right();
} else {
dist = con_comps[con_idx]->Left() - current_phrase_limit;
}
create_new_phrase = (dist > word_break_threshold);
}
// create a new phrase
if (create_new_phrase) {
// create a phrase corresponding to a range on components
bool left_most;
bool right_most;
CharSamp *phrase_char_samp =
CharSamp::FromConComps(con_comps, start_con_idx,
con_idx - start_con_idx, NULL,
&left_most, &right_most,
line_pix_->h);
if (phrase_char_samp == NULL) {
break;
}
phrases_[phrase_cnt_] = new CubeObject(cntxt_, phrase_char_samp);
// set the ownership of the charsamp to the cube object
phrases_[phrase_cnt_]->SetCharSampOwnership(true);
phrase_cnt_++;
// advance the starting index to the current index
start_con_idx = con_idx;
// set the limit of the newly starting phrase (if any)
if (con_idx < con_comp_cnt) {
current_phrase_limit = rtl ? con_comps[con_idx]->Left() :
con_comps[con_idx]->Right();
}
} else {
// update the limit of the current phrase
if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) {
current_phrase_limit = MIN(current_phrase_limit,
con_comps[con_idx]->Left());
} else {
current_phrase_limit = MAX(current_phrase_limit,
con_comps[con_idx]->Right());
}
}
}
ret_val = true;
}
// clean-up connected comps
for (int con_idx = 0; con_idx < con_comp_cnt; con_idx++) {
delete con_comps[con_idx];
}
delete []con_comps;
// success
processed_ = true;
return ret_val;
}
// Compute the least word breaking threshold that is required to produce a
// valid set of phrases. Phrases are validated using the Aspect ratio
// constraints specified in the language specific Params object
int CubeLineObject::ComputeWordBreakThreshold(int con_comp_cnt,
ConComp **con_comps, bool rtl) {
// initial estimate of word breaking threshold
int word_break_threshold =
static_cast<int>(line_pix_->h * cntxt_->Params()->MaxSpaceHeightRatio());
bool valid = false;
// compute the resulting words and validate each's aspect ratio
do {
// group connected components into words based on breaking threshold
int start_con_idx = 0;
int current_phrase_limit = (rtl ? con_comps[0]->Left() :
con_comps[0]->Right());
int min_x = con_comps[0]->Left();
int max_x = con_comps[0]->Right();
int min_y = con_comps[0]->Top();
int max_y = con_comps[0]->Bottom();
valid = true;
for (int con_idx = 1; con_idx <= con_comp_cnt; con_idx++) {
bool create_new_phrase = true;
// if not at the end, compute the distance between two consecutive
// concomps
if (con_idx < con_comp_cnt) {
int dist = 0;
if (rtl) {
dist = current_phrase_limit - con_comps[con_idx]->Right();
} else {
dist = con_comps[con_idx]->Left() - current_phrase_limit;
}
create_new_phrase = (dist > word_break_threshold);
}
// create a new phrase
if (create_new_phrase) {
// check aspect ratio. Break if invalid
if ((max_x - min_x + 1) >
(cntxt_->Params()->MaxWordAspectRatio() * (max_y - min_y + 1))) {
valid = false;
break;
}
// advance the starting index to the current index
start_con_idx = con_idx;
// set the limit of the newly starting phrase (if any)
if (con_idx < con_comp_cnt) {
current_phrase_limit = rtl ? con_comps[con_idx]->Left() :
con_comps[con_idx]->Right();
// re-init bounding box
min_x = con_comps[con_idx]->Left();
max_x = con_comps[con_idx]->Right();
min_y = con_comps[con_idx]->Top();
max_y = con_comps[con_idx]->Bottom();
}
} else {
// update the limit of the current phrase
if (rtl) {
current_phrase_limit = MIN(current_phrase_limit,
con_comps[con_idx]->Left());
} else {
current_phrase_limit = MAX(current_phrase_limit,
con_comps[con_idx]->Right());
}
// update bounding box
UpdateRange(con_comps[con_idx]->Left(),
con_comps[con_idx]->Right(), &min_x, &max_x);
UpdateRange(con_comps[con_idx]->Top(),
con_comps[con_idx]->Bottom(), &min_y, &max_y);
}
}
// return the breaking threshold if all broken word dimensions are valid
if (valid) {
return word_break_threshold;
}
// decrease the threshold and try again
word_break_threshold--;
} while (!valid && word_break_threshold > 0);
// failed to find a threshold that achieves the target aspect ratio.
// Just use the default threshold
return static_cast<int>(line_pix_->h *
cntxt_->Params()->MaxSpaceHeightRatio());
}
}

View File

@ -1,67 +0,0 @@
/**********************************************************************
* File: cube_line_object.h
* Description: Declaration of the Cube Line Object Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CubeLineObject implements an objects that holds a line of text
// Each line is broken into phrases. Phrases are blocks within the line that
// are unambiguously separate collections of words
#ifndef CUBE_LINE_OBJECT_H
#define CUBE_LINE_OBJECT_H
#include "cube_reco_context.h"
#include "cube_object.h"
#include "allheaders.h"
namespace tesseract {
class CubeLineObject {
public:
CubeLineObject(CubeRecoContext *cntxt, Pix *pix);
~CubeLineObject();
// accessors
inline int PhraseCount() {
if (!processed_ && !Process()) {
return 0;
}
return phrase_cnt_;
}
inline CubeObject **Phrases() {
if (!processed_ && !Process()) {
return NULL;
}
return phrases_;
}
private:
CubeRecoContext *cntxt_;
bool own_pix_;
bool processed_;
Pix *line_pix_;
CubeObject **phrases_;
int phrase_cnt_;
bool Process();
// Compute the least word breaking threshold that is required to produce a
// valid set of phrases. Phrases are validated using the Aspect ratio
// constraints specified in the language specific Params object
int ComputeWordBreakThreshold(int con_comp_cnt, ConComp **con_comps,
bool rtl);
};
}
#endif // CUBE_LINE_OBJECT_H

View File

@ -1,949 +0,0 @@
/**********************************************************************
* File: cube_page_segmenter.cpp
* Description: Implementation of the Cube Page Segmenter Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "cube_line_segmenter.h"
#include "ndminx.h"
namespace tesseract {
// constants that worked for Arabic page segmenter
const int CubeLineSegmenter::kLineSepMorphMinHgt = 20;
const int CubeLineSegmenter::kHgtBins = 20;
const double CubeLineSegmenter::kMaxValidLineRatio = 3.2;
const int CubeLineSegmenter::kMaxConnCompHgt = 150;
const int CubeLineSegmenter::kMaxConnCompWid = 500;
const int CubeLineSegmenter::kMaxHorzAspectRatio = 50;
const int CubeLineSegmenter::kMaxVertAspectRatio = 20;
const int CubeLineSegmenter::kMinWid = 2;
const int CubeLineSegmenter::kMinHgt = 2;
const float CubeLineSegmenter::kMinValidLineHgtRatio = 2.5;
CubeLineSegmenter::CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img) {
cntxt_ = cntxt;
orig_img_ = img;
img_ = NULL;
lines_pixa_ = NULL;
init_ = false;
line_cnt_ = 0;
columns_ = NULL;
con_comps_ = NULL;
est_alef_hgt_ = 0.0;
est_dot_hgt_ = 0.0;
}
CubeLineSegmenter::~CubeLineSegmenter() {
if (img_ != NULL) {
pixDestroy(&img_);
img_ = NULL;
}
if (lines_pixa_ != NULL) {
pixaDestroy(&lines_pixa_);
lines_pixa_ = NULL;
}
if (con_comps_ != NULL) {
pixaDestroy(&con_comps_);
con_comps_ = NULL;
}
if (columns_ != NULL) {
pixaaDestroy(&columns_);
columns_ = NULL;
}
}
// compute validity ratio for a line
double CubeLineSegmenter::ValidityRatio(Pix *line_mask_pix, Box *line_box) {
return line_box->h / est_alef_hgt_;
}
// validate line
bool CubeLineSegmenter::ValidLine(Pix *line_mask_pix, Box *line_box) {
double validity_ratio = ValidityRatio(line_mask_pix, line_box);
return validity_ratio < kMaxValidLineRatio;
}
// perform a vertical Closing with the specified threshold
// returning the resulting conn comps as a pixa
Pixa *CubeLineSegmenter::VerticalClosing(Pix *pix,
int threshold, Boxa **boxa) {
char sequence_str[16];
// do the morphology
sprintf(sequence_str, "c100.%d", threshold);
Pix *morphed_pix = pixMorphCompSequence(pix, sequence_str, 0);
if (morphed_pix == NULL) {
return NULL;
}
// get the resulting lines by computing concomps
Pixa *pixac;
(*boxa) = pixConnComp(morphed_pix, &pixac, 8);
pixDestroy(&morphed_pix);
if ((*boxa) == NULL) {
return NULL;
}
return pixac;
}
// Helper cleans up after CrackLine.
static void CleanupCrackLine(int line_cnt, Pixa **lines_pixa,
Boxa **line_con_comps,
Pixa **line_con_comps_pix) {
for (int line = 0; line < line_cnt; line++) {
if (lines_pixa[line] != NULL) {
pixaDestroy(&lines_pixa[line]);
}
}
delete []lines_pixa;
boxaDestroy(line_con_comps);
pixaDestroy(line_con_comps_pix);
}
// do a desperate attempt at cracking lines
Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix,
Box *cracked_line_box, int line_cnt) {
// create lines pixa array
Pixa **lines_pixa = new Pixa*[line_cnt];
memset(lines_pixa, 0, line_cnt * sizeof(*lines_pixa));
// compute line conn comps
Pixa *line_con_comps_pix;
Boxa *line_con_comps = ComputeLineConComps(cracked_line_pix,
cracked_line_box, &line_con_comps_pix);
if (line_con_comps == NULL) {
delete []lines_pixa;
return NULL;
}
// assign each conn comp to the a line based on its centroid
for (int con = 0; con < line_con_comps->n; con++) {
Box *con_box = line_con_comps->box[con];
Pix *con_pix = line_con_comps_pix->pix[con];
int mid_y = (con_box->y - cracked_line_box->y) + (con_box->h / 2),
line_idx = MIN(line_cnt - 1,
(mid_y * line_cnt / cracked_line_box->h));
// create the line if it has not been created?
if (lines_pixa[line_idx] == NULL) {
lines_pixa[line_idx] = pixaCreate(line_con_comps->n);
if (lines_pixa[line_idx] == NULL) {
CleanupCrackLine(line_cnt, lines_pixa, &line_con_comps,
&line_con_comps_pix);
return NULL;
}
}
// add the concomp to the line
if (pixaAddPix(lines_pixa[line_idx], con_pix, L_CLONE) != 0 ||
pixaAddBox(lines_pixa[line_idx], con_box, L_CLONE)) {
CleanupCrackLine(line_cnt, lines_pixa, &line_con_comps,
&line_con_comps_pix);
return NULL;
}
}
// create the lines pixa
Pixa *lines = pixaCreate(line_cnt);
bool success = true;
// create and check the validity of the lines
for (int line = 0; line < line_cnt; line++) {
Pixa *line_pixa = lines_pixa[line];
// skip invalid lines
if (line_pixa == NULL) {
continue;
}
// merge the pix, check the validity of the line
// and add it to the lines pixa
Box *line_box;
Pix *line_pix = Pixa2Pix(line_pixa, &line_box);
if (line_pix == NULL ||
line_box == NULL ||
ValidLine(line_pix, line_box) == false ||
pixaAddPix(lines, line_pix, L_INSERT) != 0 ||
pixaAddBox(lines, line_box, L_INSERT) != 0) {
if (line_pix != NULL) {
pixDestroy(&line_pix);
}
if (line_box != NULL) {
boxDestroy(&line_box);
}
success = false;
break;
}
}
// cleanup
CleanupCrackLine(line_cnt, lines_pixa, &line_con_comps,
&line_con_comps_pix);
if (success == false) {
pixaDestroy(&lines);
lines = NULL;
}
return lines;
}
// do a desperate attempt at cracking lines
Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix,
Box *cracked_line_box) {
// estimate max line count
int max_line_cnt = static_cast<int>((cracked_line_box->h /
est_alef_hgt_) + 0.5);
if (max_line_cnt < 2) {
return NULL;
}
for (int line_cnt = 2; line_cnt < max_line_cnt; line_cnt++) {
Pixa *lines = CrackLine(cracked_line_pix, cracked_line_box, line_cnt);
if (lines != NULL) {
return lines;
}
}
return NULL;
}
// split a line continuously until valid or fail
Pixa *CubeLineSegmenter::SplitLine(Pix *line_mask_pix, Box *line_box) {
// clone the line mask
Pix *line_pix = pixClone(line_mask_pix);
if (line_pix == NULL) {
return NULL;
}
// AND with the image to get the actual line
pixRasterop(line_pix, 0, 0, line_pix->w, line_pix->h,
PIX_SRC & PIX_DST, img_, line_box->x, line_box->y);
// continue to do rasterop morphology on the line until
// it splits to valid lines or we fail
int morph_hgt = kLineSepMorphMinHgt - 1,
best_threshold = kLineSepMorphMinHgt - 1,
max_valid_portion = 0;
Boxa *boxa;
Pixa *pixac;
do {
pixac = VerticalClosing(line_pix, morph_hgt, &boxa);
// add the box offset to all the lines
// and check for the validity of each
int line,
valid_line_cnt = 0,
valid_portion = 0;
for (line = 0; line < pixac->n; line++) {
boxa->box[line]->x += line_box->x;
boxa->box[line]->y += line_box->y;
if (ValidLine(pixac->pix[line], boxa->box[line]) == true) {
// count valid lines
valid_line_cnt++;
// and the valid portions
valid_portion += boxa->box[line]->h;
}
}
// all the lines are valid
if (valid_line_cnt == pixac->n) {
boxaDestroy(&boxa);
pixDestroy(&line_pix);
return pixac;
}
// a larger valid portion
if (valid_portion > max_valid_portion) {
max_valid_portion = valid_portion;
best_threshold = morph_hgt;
}
boxaDestroy(&boxa);
pixaDestroy(&pixac);
morph_hgt--;
}
while (morph_hgt > 0);
// failed to break into valid lines
// attempt to crack the line
pixac = CrackLine(line_pix, line_box);
if (pixac != NULL) {
pixDestroy(&line_pix);
return pixac;
}
// try to leverage any of the lines
// did the best threshold yield a non zero valid portion
if (max_valid_portion > 0) {
// use this threshold to break lines
pixac = VerticalClosing(line_pix, best_threshold, &boxa);
// add the box offset to all the lines
// and check for the validity of each
for (int line = 0; line < pixac->n; line++) {
boxa->box[line]->x += line_box->x;
boxa->box[line]->y += line_box->y;
// remove invalid lines from the pixa
if (ValidLine(pixac->pix[line], boxa->box[line]) == false) {
pixaRemovePix(pixac, line);
line--;
}
}
boxaDestroy(&boxa);
pixDestroy(&line_pix);
return pixac;
}
// last resort: attempt to crack the line
pixDestroy(&line_pix);
return NULL;
}
// Checks of a line is too small
bool CubeLineSegmenter::SmallLine(Box *line_box) {
return line_box->h <= (kMinValidLineHgtRatio * est_dot_hgt_);
}
// Compute the connected components in a line
Boxa * CubeLineSegmenter::ComputeLineConComps(Pix *line_mask_pix,
Box *line_box,
Pixa **con_comps_pixa) {
// clone the line mask
Pix *line_pix = pixClone(line_mask_pix);
if (line_pix == NULL) {
return NULL;
}
// AND with the image to get the actual line
pixRasterop(line_pix, 0, 0, line_pix->w, line_pix->h,
PIX_SRC & PIX_DST, img_, line_box->x, line_box->y);
// compute the connected components of the line to be merged
Boxa *line_con_comps = pixConnComp(line_pix, con_comps_pixa, 8);
pixDestroy(&line_pix);
// offset boxes by the bbox of the line
for (int con = 0; con < line_con_comps->n; con++) {
line_con_comps->box[con]->x += line_box->x;
line_con_comps->box[con]->y += line_box->y;
}
return line_con_comps;
}
// create a union of two arbitrary pix
Pix *CubeLineSegmenter::PixUnion(Pix *dest_pix, Box *dest_box,
Pix *src_pix, Box *src_box) {
// compute dimensions of union rect
BOX *union_box = boxBoundingRegion(src_box, dest_box);
// create the union pix
Pix *union_pix = pixCreate(union_box->w, union_box->h, src_pix->d);
if (union_pix == NULL) {
return NULL;
}
// blt the src and dest pix
pixRasterop(union_pix,
src_box->x - union_box->x, src_box->y - union_box->y,
src_box->w, src_box->h, PIX_SRC | PIX_DST, src_pix, 0, 0);
pixRasterop(union_pix,
dest_box->x - union_box->x, dest_box->y - union_box->y,
dest_box->w, dest_box->h, PIX_SRC | PIX_DST, dest_pix, 0, 0);
// replace the dest_box
*dest_box = *union_box;
boxDestroy(&union_box);
return union_pix;
}
// create a union of a number of arbitrary pix
Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box,
int start_pix, int pix_cnt) {
// compute union_box
int min_x = INT_MAX,
max_x = INT_MIN,
min_y = INT_MAX,
max_y = INT_MIN;
for (int pix_idx = start_pix; pix_idx < (start_pix + pix_cnt); pix_idx++) {
Box *pix_box = pixa->boxa->box[pix_idx];
UpdateRange(pix_box->x, pix_box->x + pix_box->w, &min_x, &max_x);
UpdateRange(pix_box->y, pix_box->y + pix_box->h, &min_y, &max_y);
}
(*dest_box) = boxCreate(min_x, min_y, max_x - min_x, max_y - min_y);
if ((*dest_box) == NULL) {
return NULL;
}
// create the union pix
Pix *union_pix = pixCreate((*dest_box)->w, (*dest_box)->h, img_->d);
if (union_pix == NULL) {
boxDestroy(dest_box);
return NULL;
}
// create a pix corresponding to the union of all pixs
// blt the src and dest pix
for (int pix_idx = start_pix; pix_idx < (start_pix + pix_cnt); pix_idx++) {
Box *pix_box = pixa->boxa->box[pix_idx];
Pix *con_pix = pixa->pix[pix_idx];
pixRasterop(union_pix,
pix_box->x - (*dest_box)->x, pix_box->y - (*dest_box)->y,
pix_box->w, pix_box->h, PIX_SRC | PIX_DST, con_pix, 0, 0);
}
return union_pix;
}
// create a union of a number of arbitrary pix
Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box) {
return Pixa2Pix(pixa, dest_box, 0, pixa->n);
}
// merges a number of lines into one line given a bounding box and a mask
bool CubeLineSegmenter::MergeLine(Pix *line_mask_pix, Box *line_box,
Pixa *lines, Boxaa *lines_con_comps) {
// compute the connected components of the lines to be merged
Pixa *small_con_comps_pix;
Boxa *small_line_con_comps = ComputeLineConComps(line_mask_pix,
line_box, &small_con_comps_pix);
if (small_line_con_comps == NULL) {
return false;
}
// for each connected component
for (int con = 0; con < small_line_con_comps->n; con++) {
Box *small_con_comp_box = small_line_con_comps->box[con];
int best_line = -1,
best_dist = INT_MAX,
small_box_right = small_con_comp_box->x + small_con_comp_box->w,
small_box_bottom = small_con_comp_box->y + small_con_comp_box->h;
// for each valid line
for (int line = 0; line < lines->n; line++) {
if (SmallLine(lines->boxa->box[line]) == true) {
continue;
}
// for all the connected components in the line
Boxa *line_con_comps = lines_con_comps->boxa[line];
for (int lcon = 0; lcon < line_con_comps->n; lcon++) {
Box *con_comp_box = line_con_comps->box[lcon];
int xdist,
ydist,
box_right = con_comp_box->x + con_comp_box->w,
box_bottom = con_comp_box->y + con_comp_box->h;
xdist = MAX(small_con_comp_box->x, con_comp_box->x) -
MIN(small_box_right, box_right);
ydist = MAX(small_con_comp_box->y, con_comp_box->y) -
MIN(small_box_bottom, box_bottom);
// if there is an overlap in x-direction
if (xdist <= 0) {
if (best_line == -1 || ydist < best_dist) {
best_dist = ydist;
best_line = line;
}
}
}
}
// if the distance is too big, do not merged
if (best_line != -1 && best_dist < est_alef_hgt_) {
// add the pix to the best line
Pix *new_line = PixUnion(lines->pix[best_line],
lines->boxa->box[best_line],
small_con_comps_pix->pix[con], small_con_comp_box);
if (new_line == NULL) {
return false;
}
pixDestroy(&lines->pix[best_line]);
lines->pix[best_line] = new_line;
}
}
pixaDestroy(&small_con_comps_pix);
boxaDestroy(&small_line_con_comps);
return true;
}
// Creates new set of lines from the computed columns
bool CubeLineSegmenter::AddLines(Pixa *lines) {
// create an array that will hold the bounding boxes
// of the concomps belonging to each line
Boxaa *lines_con_comps = boxaaCreate(lines->n);
if (lines_con_comps == NULL) {
return false;
}
for (int line = 0; line < lines->n; line++) {
// if the line is not valid
if (ValidLine(lines->pix[line], lines->boxa->box[line]) == false) {
// split it
Pixa *split_lines = SplitLine(lines->pix[line],
lines->boxa->box[line]);
// remove the old line
if (pixaRemovePix(lines, line) != 0) {
return false;
}
line--;
if (split_lines == NULL) {
continue;
}
// add the split lines instead and move the pointer
for (int s_line = 0; s_line < split_lines->n; s_line++) {
Pix *sp_line = pixaGetPix(split_lines, s_line, L_CLONE);
Box *sp_box = boxaGetBox(split_lines->boxa, s_line, L_CLONE);
if (sp_line == NULL || sp_box == NULL) {
return false;
}
// insert the new line
if (pixaInsertPix(lines, ++line, sp_line, sp_box) != 0) {
return false;
}
}
// remove the split lines
pixaDestroy(&split_lines);
}
}
// compute the concomps bboxes of each line
for (int line = 0; line < lines->n; line++) {
Boxa *line_con_comps = ComputeLineConComps(lines->pix[line],
lines->boxa->box[line], NULL);
if (line_con_comps == NULL) {
return false;
}
// insert it into the boxaa array
if (boxaaAddBoxa(lines_con_comps, line_con_comps, L_INSERT) != 0) {
return false;
}
}
// post process the lines:
// merge the contents of "small" lines info legitimate lines
for (int line = 0; line < lines->n; line++) {
// a small line detected
if (SmallLine(lines->boxa->box[line]) == true) {
// merge its components to one of the valid lines
if (MergeLine(lines->pix[line], lines->boxa->box[line],
lines, lines_con_comps) == true) {
// remove the small line
if (pixaRemovePix(lines, line) != 0) {
return false;
}
if (boxaaRemoveBoxa(lines_con_comps, line) != 0) {
return false;
}
line--;
}
}
}
boxaaDestroy(&lines_con_comps);
// add the pix masks
if (pixaaAddPixa(columns_, lines, L_INSERT) != 0) {
return false;
}
return true;
}
// Index the specific pixa using RTL reading order
int *CubeLineSegmenter::IndexRTL(Pixa *pixa) {
int *pix_index = new int[pixa->n];
for (int pix = 0; pix < pixa->n; pix++) {
pix_index[pix] = pix;
}
for (int ipix = 0; ipix < pixa->n; ipix++) {
for (int jpix = ipix + 1; jpix < pixa->n; jpix++) {
Box *ipix_box = pixa->boxa->box[pix_index[ipix]],
*jpix_box = pixa->boxa->box[pix_index[jpix]];
// swap?
if ((ipix_box->x + ipix_box->w) < (jpix_box->x + jpix_box->w)) {
int temp = pix_index[ipix];
pix_index[ipix] = pix_index[jpix];
pix_index[jpix] = temp;
}
}
}
return pix_index;
}
// Performs line segmentation
bool CubeLineSegmenter::LineSegment() {
// Use full image morphology to find columns
// This only works for simple layouts where each column
// of text extends the full height of the input image.
Pix *pix_temp1 = pixMorphCompSequence(img_, "c5.500", 0);
if (pix_temp1 == NULL) {
return false;
}
// Mask with a single component over each column
Pixa *pixam;
Boxa *boxa = pixConnComp(pix_temp1, &pixam, 8);
if (boxa == NULL) {
return false;
}
int init_morph_min_hgt = kLineSepMorphMinHgt;
char sequence_str[16];
sprintf(sequence_str, "c100.%d", init_morph_min_hgt);
// Use selective region-based morphology to get the textline mask.
Pixa *pixad = pixaMorphSequenceByRegion(img_, pixam, sequence_str, 0, 0);
if (pixad == NULL) {
return false;
}
// for all columns
int col_cnt = boxaGetCount(boxa);
// create columns
columns_ = pixaaCreate(col_cnt);
if (columns_ == NULL) {
return false;
}
// index columns based on readind order (RTL)
int *col_order = IndexRTL(pixad);
if (col_order == NULL) {
return false;
}
line_cnt_ = 0;
for (int col_idx = 0; col_idx < col_cnt; col_idx++) {
int col = col_order[col_idx];
// get the pix and box corresponding to the column
Pix *pixt3 = pixaGetPix(pixad, col, L_CLONE);
if (pixt3 == NULL) {
delete []col_order;
return false;
}
Box *col_box = pixad->boxa->box[col];
Pixa *pixac;
Boxa *boxa2 = pixConnComp(pixt3, &pixac, 8);
if (boxa2 == NULL) {
delete []col_order;
return false;
}
// offset the boxes by the column box
for (int line = 0; line < pixac->n; line++) {
pixac->boxa->box[line]->x += col_box->x;
pixac->boxa->box[line]->y += col_box->y;
}
// add the lines
if (AddLines(pixac) == true) {
if (pixaaAddBox(columns_, col_box, L_CLONE) != 0) {
delete []col_order;
return false;
}
}
pixDestroy(&pixt3);
boxaDestroy(&boxa2);
line_cnt_ += columns_->pixa[col_idx]->n;
}
pixaDestroy(&pixam);
pixaDestroy(&pixad);
boxaDestroy(&boxa);
delete []col_order;
pixDestroy(&pix_temp1);
return true;
}
// Estimate the parameters of the font(s) used in the page
bool CubeLineSegmenter::EstimateFontParams() {
int hgt_hist[kHgtBins];
int max_hgt;
double mean_hgt;
// init hgt histogram of concomps
memset(hgt_hist, 0, sizeof(hgt_hist));
// compute max hgt
max_hgt = 0;
for (int con = 0; con < con_comps_->n; con++) {
// skip conn comps that are too long or too wide
if (con_comps_->boxa->box[con]->h > kMaxConnCompHgt ||
con_comps_->boxa->box[con]->w > kMaxConnCompWid) {
continue;
}
max_hgt = MAX(max_hgt, con_comps_->boxa->box[con]->h);
}
if (max_hgt <= 0) {
return false;
}
// init hgt histogram of concomps
memset(hgt_hist, 0, sizeof(hgt_hist));
// compute histogram
mean_hgt = 0.0;
for (int con = 0; con < con_comps_->n; con++) {
// skip conn comps that are too long or too wide
if (con_comps_->boxa->box[con]->h > kMaxConnCompHgt ||
con_comps_->boxa->box[con]->w > kMaxConnCompWid) {
continue;
}
int bin = static_cast<int>(kHgtBins * con_comps_->boxa->box[con]->h /
max_hgt);
bin = MIN(bin, kHgtBins - 1);
hgt_hist[bin]++;
mean_hgt += con_comps_->boxa->box[con]->h;
}
mean_hgt /= con_comps_->n;
// find the top 2 bins
int idx[kHgtBins];
for (int bin = 0; bin < kHgtBins; bin++) {
idx[bin] = bin;
}
for (int ibin = 0; ibin < 2; ibin++) {
for (int jbin = ibin + 1; jbin < kHgtBins; jbin++) {
if (hgt_hist[idx[ibin]] < hgt_hist[idx[jbin]]) {
int swap = idx[ibin];
idx[ibin] = idx[jbin];
idx[jbin] = swap;
}
}
}
// emperically, we found out that the 2 highest freq bins correspond
// respectively to the dot and alef
est_dot_hgt_ = (1.0 * (idx[0] + 1) * max_hgt / kHgtBins);
est_alef_hgt_ = (1.0 * (idx[1] + 1) * max_hgt / kHgtBins);
// as a sanity check the dot hgt must be significanly lower than alef
if (est_alef_hgt_ < (est_dot_hgt_ * 2)) {
// use max_hgt to estimate instead
est_alef_hgt_ = mean_hgt * 1.5;
est_dot_hgt_ = est_alef_hgt_ / 5.0;
}
est_alef_hgt_ = MAX(est_alef_hgt_, est_dot_hgt_ * 4.0);
return true;
}
// clean up the image
Pix *CubeLineSegmenter::CleanUp(Pix *orig_img) {
// get rid of long horizontal lines
Pix *pix_temp0 = pixMorphCompSequence(orig_img, "o300.2", 0);
pixXor(pix_temp0, pix_temp0, orig_img);
// get rid of long vertical lines
Pix *pix_temp1 = pixMorphCompSequence(pix_temp0, "o2.300", 0);
pixXor(pix_temp1, pix_temp1, pix_temp0);
pixDestroy(&pix_temp0);
// detect connected components
Pixa *con_comps;
Boxa *boxa = pixConnComp(pix_temp1, &con_comps, 8);
if (boxa == NULL) {
return NULL;
}
// detect and remove suspicious conn comps
for (int con = 0; con < con_comps->n; con++) {
Box *box = boxa->box[con];
// remove if suspc. conn comp
if ((box->w > (box->h * kMaxHorzAspectRatio)) ||
(box->h > (box->w * kMaxVertAspectRatio)) ||
(box->w < kMinWid && box->h < kMinHgt)) {
pixRasterop(pix_temp1, box->x, box->y, box->w, box->h,
PIX_SRC ^ PIX_DST, con_comps->pix[con], 0, 0);
}
}
pixaDestroy(&con_comps);
boxaDestroy(&boxa);
return pix_temp1;
}
// Init the page segmenter
bool CubeLineSegmenter::Init() {
if (init_ == true) {
return true;
}
if (orig_img_ == NULL) {
return false;
}
// call the internal line segmentation
return FindLines();
}
// return the pix mask and box of a specific line
Pix *CubeLineSegmenter::Line(int line, Box **line_box) {
if (init_ == false && Init() == false) {
return NULL;
}
if (line < 0 || line >= line_cnt_) {
return NULL;
}
(*line_box) = lines_pixa_->boxa->box[line];
return lines_pixa_->pix[line];
}
// Implements a basic rudimentary layout analysis based on Leptonica
// works OK for Arabic. For other languages, the function TesseractPageAnalysis
// should be called instead.
bool CubeLineSegmenter::FindLines() {
// convert the image to gray scale if necessary
Pix *gray_scale_img = NULL;
if (orig_img_->d != 2 && orig_img_->d != 8) {
gray_scale_img = pixConvertTo8(orig_img_, false);
if (gray_scale_img == NULL) {
return false;
}
} else {
gray_scale_img = orig_img_;
}
// threshold image
Pix *thresholded_img;
thresholded_img = pixThresholdToBinary(gray_scale_img, 128);
// free the gray scale image if necessary
if (gray_scale_img != orig_img_) {
pixDestroy(&gray_scale_img);
}
// bail-out if thresholding failed
if (thresholded_img == NULL) {
return false;
}
// deskew
Pix *deskew_img = pixDeskew(thresholded_img, 2);
if (deskew_img == NULL) {
return false;
}
pixDestroy(&thresholded_img);
img_ = CleanUp(deskew_img);
pixDestroy(&deskew_img);
if (img_ == NULL) {
return false;
}
pixDestroy(&deskew_img);
// compute connected components
Boxa *boxa = pixConnComp(img_, &con_comps_, 8);
if (boxa == NULL) {
return false;
}
boxaDestroy(&boxa);
// estimate dot and alef hgts
if (EstimateFontParams() == false) {
return false;
}
// perform line segmentation
if (LineSegment() == false) {
return false;
}
// success
init_ = true;
return true;
}
}

View File

@ -1,156 +0,0 @@
/**********************************************************************
* File: cube_page_segmenter.h
* Description: Declaration of the Cube Page Segmenter Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// TODO(ahmadab)
// This is really a makeshift line segmenter that works well for Arabic
// This should eventually be replaced by Ray Smith's Page segmenter
// There are lots of magic numbers below that were determined empirically
// but not thoroughly tested
#ifndef CUBE_LINE_SEGMENTER_H
#define CUBE_LINE_SEGMENTER_H
#include "cube_reco_context.h"
#include "allheaders.h"
namespace tesseract {
class CubeLineSegmenter {
public:
CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img);
~CubeLineSegmenter();
// Accessor functions
Pix *PostProcessedImage() {
if (init_ == false && Init() == false) {
return NULL;
}
return img_;
}
int ColumnCnt() {
if (init_ == false && Init() == false) {
return 0;
}
return columns_->n;
}
Box *Column(int col) {
if (init_ == false && Init() == false) {
return NULL;
}
return columns_->boxa->box[col];
}
int LineCnt() {
if (init_ == false && Init() == false) {
return 0;
}
return line_cnt_;
}
Pixa *ConComps() {
if (init_ == false && Init() == false) {
return NULL;
}
return con_comps_;
}
Pixaa *Columns() {
if (init_ == false && Init() == false) {
return NULL;
}
return columns_;
}
inline double AlefHgtEst() { return est_alef_hgt_; }
inline double DotHgtEst() { return est_dot_hgt_; }
Pix *Line(int line, Box **line_box);
private:
static const float kMinValidLineHgtRatio;
static const int kLineSepMorphMinHgt;
static const int kHgtBins;
static const int kMaxConnCompHgt;
static const int kMaxConnCompWid;
static const int kMaxHorzAspectRatio;
static const int kMaxVertAspectRatio;
static const int kMinWid;
static const int kMinHgt;
static const double kMaxValidLineRatio;
// Cube Reco context
CubeRecoContext *cntxt_;
// Original image
Pix *orig_img_;
// Post processed image
Pix *img_;
// Init flag
bool init_;
// Output Line and column info
int line_cnt_;
Pixaa *columns_;
Pixa *con_comps_;
Pixa *lines_pixa_;
// Estimates for sizes of ALEF and DOT needed for Arabic analysis
double est_alef_hgt_;
double est_dot_hgt_;
// Init the page analysis
bool Init();
// Performs line segmentation
bool LineSegment();
// Cleanup function
Pix *CleanUp(Pix *pix);
// compute validity ratio for a line
double ValidityRatio(Pix *line_mask_pix, Box *line_box);
// validate line
bool ValidLine(Pix *line_mask_pix, Box *line_box);
// split a line continuously until valid or fail
Pixa *SplitLine(Pix *line_mask_pix, Box *line_box);
// do a desperate attempt at cracking lines
Pixa *CrackLine(Pix *line_mask_pix, Box *line_box);
Pixa *CrackLine(Pix *line_mask_pix, Box *line_box, int line_cnt);
// Checks of a line is too small
bool SmallLine(Box *line_box);
// Compute the connected components in a line
Boxa * ComputeLineConComps(Pix *line_mask_pix, Box *line_box,
Pixa **con_comps_pixa);
// create a union of two arbitrary pix
Pix *PixUnion(Pix *dest_pix, Box *dest_box, Pix *src_pix, Box *src_box);
// create a union of a pixa subset
Pix *Pixa2Pix(Pixa *pixa, Box **dest_box, int start_pix, int pix_cnt);
// create a union of a pixa
Pix *Pixa2Pix(Pixa *pixa, Box **dest_box);
// merges a number of lines into one line given a bounding box and a mask
bool MergeLine(Pix *line_mask_pix, Box *line_box,
Pixa *lines, Boxaa *lines_con_comps);
// Creates new set of lines from the computed columns
bool AddLines(Pixa *lines);
// Estimate the parameters of the font(s) used in the page
bool EstimateFontParams();
// perform a vertical Closing with the specified threshold
// returning the resulting conn comps as a pixa
Pixa *VerticalClosing(Pix *pix, int thresold, Boxa **boxa);
// Index the specific pixa using RTL reading order
int *IndexRTL(Pixa *pixa);
// Implements a rudimentary page & line segmenter
bool FindLines();
};
}
#endif // CUBE_LINE_SEGMENTER_H

View File

@ -1,257 +0,0 @@
/**********************************************************************
* File: cube_object.cpp
* Description: Implementation of the Cube Object Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <math.h>
#include "cube_object.h"
#include "cube_utils.h"
#include "word_list_lang_model.h"
namespace tesseract {
CubeObject::CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp) {
Init();
char_samp_ = char_samp;
cntxt_ = cntxt;
}
CubeObject::CubeObject(CubeRecoContext *cntxt, Pix *pix,
int left, int top, int wid, int hgt) {
Init();
char_samp_ = CubeUtils::CharSampleFromPix(pix, left, top, wid, hgt);
own_char_samp_ = true;
cntxt_ = cntxt;
}
// Data member initialization function
void CubeObject::Init() {
char_samp_ = NULL;
own_char_samp_ = false;
alt_list_ = NULL;
srch_obj_ = NULL;
deslanted_alt_list_ = NULL;
deslanted_srch_obj_ = NULL;
deslanted_ = false;
deslanted_char_samp_ = NULL;
beam_obj_ = NULL;
deslanted_beam_obj_ = NULL;
cntxt_ = NULL;
}
// Cleanup function
void CubeObject::Cleanup() {
delete alt_list_;
alt_list_ = NULL;
delete deslanted_alt_list_;
deslanted_alt_list_ = NULL;
}
CubeObject::~CubeObject() {
if (own_char_samp_ == true) {
delete char_samp_;
char_samp_ = NULL;
}
delete srch_obj_;
srch_obj_ = NULL;
delete deslanted_srch_obj_;
deslanted_srch_obj_ = NULL;
delete beam_obj_;
beam_obj_ = NULL;
delete deslanted_beam_obj_;
deslanted_beam_obj_ = NULL;
delete deslanted_char_samp_;
deslanted_char_samp_ = NULL;
Cleanup();
}
/**
* Actually do the recognition using the specified language mode. If none
* is specified, the default language model in the CubeRecoContext is used.
* @return the sorted list of alternate answers
* @param word_mode determines whether recognition is done as a word or a phrase
*/
WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
if (char_samp_ == NULL) {
return NULL;
}
// clear alt lists
Cleanup();
// no specified language model, use the one in the reco context
if (lang_mod == NULL) {
lang_mod = cntxt_->LangMod();
}
// normalize if necessary
if (cntxt_->SizeNormalization()) {
Normalize();
}
// assume not de-slanted by default
deslanted_ = false;
// create a beam search object
if (beam_obj_ == NULL) {
beam_obj_ = new BeamSearch(cntxt_, word_mode);
}
// create a cube search object
if (srch_obj_ == NULL) {
srch_obj_ = new CubeSearchObject(cntxt_, char_samp_);
}
// run a beam search against the tesslang model
alt_list_ = beam_obj_->Search(srch_obj_, lang_mod);
// deslant (if supported by language) and re-reco if probability is low enough
if (cntxt_->HasItalics() == true &&
(alt_list_ == NULL || alt_list_->AltCount() < 1 ||
alt_list_->AltCost(0) > CubeUtils::Prob2Cost(kMinProbSkipDeslanted))) {
if (deslanted_beam_obj_ == NULL) {
deslanted_beam_obj_ = new BeamSearch(cntxt_);
}
if (deslanted_srch_obj_ == NULL) {
deslanted_char_samp_ = char_samp_->Clone();
if (deslanted_char_samp_ == NULL) {
fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not "
"construct deslanted CharSamp\n");
return NULL;
}
if (deslanted_char_samp_->Deslant() == false) {
return NULL;
}
deslanted_srch_obj_ = new CubeSearchObject(cntxt_, deslanted_char_samp_);
}
// run a beam search against the tesslang model
deslanted_alt_list_ = deslanted_beam_obj_->Search(deslanted_srch_obj_,
lang_mod);
// should we use de-slanted altlist?
if (deslanted_alt_list_ != NULL && deslanted_alt_list_->AltCount() > 0) {
if (alt_list_ == NULL || alt_list_->AltCount() < 1 ||
deslanted_alt_list_->AltCost(0) < alt_list_->AltCost(0)) {
deslanted_ = true;
return deslanted_alt_list_;
}
}
}
return alt_list_;
}
/**
* Recognize the member char sample as a word
*/
WordAltList *CubeObject::RecognizeWord(LangModel *lang_mod) {
return Recognize(lang_mod, true);
}
/**
* Recognize the member char sample as a phrase
*/
WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) {
return Recognize(lang_mod, false);
}
/**
* Computes the cost of a specific string. This is done by performing
* recognition of a language model that allows only the specified word
*/
int CubeObject::WordCost(const char *str) {
WordListLangModel *lang_mod = new WordListLangModel(cntxt_);
if (lang_mod->AddString(str) == false) {
delete lang_mod;
return WORST_COST;
}
// run a beam search against the single string wordlist model
WordAltList *alt_list = RecognizeWord(lang_mod);
delete lang_mod;
int cost = WORST_COST;
if (alt_list != NULL) {
if (alt_list->AltCount() > 0) {
cost = alt_list->AltCost(0);
}
}
return cost;
}
// Recognizes a single character and returns the list of results.
CharAltList *CubeObject::RecognizeChar() {
if (char_samp_ == NULL) return NULL;
CharAltList* alt_list = NULL;
CharClassifier *char_classifier = cntxt_->Classifier();
ASSERT_HOST(char_classifier != NULL);
alt_list = char_classifier->Classify(char_samp_);
return alt_list;
}
// Normalize the input word bitmap to have a minimum aspect ratio
bool CubeObject::Normalize() {
// create a cube search object
CubeSearchObject *srch_obj = new CubeSearchObject(cntxt_, char_samp_);
// Perform over-segmentation
int seg_cnt = srch_obj->SegPtCnt();
// Only perform normalization if segment count is large enough
if (seg_cnt < kMinNormalizationSegmentCnt) {
delete srch_obj;
return true;
}
// compute the mean AR of the segments
double ar_mean = 0.0;
for (int seg_idx = 0; seg_idx <= seg_cnt; seg_idx++) {
CharSamp *seg_samp = srch_obj->CharSample(seg_idx - 1, seg_idx);
if (seg_samp != NULL && seg_samp->Width() > 0) {
ar_mean += (1.0 * seg_samp->Height() / seg_samp->Width());
}
}
ar_mean /= (seg_cnt + 1);
// perform normalization if segment AR is too high
if (ar_mean > kMinNormalizationAspectRatio) {
// scale down the image in the y-direction to attain AR
CharSamp *new_samp = char_samp_->Scale(char_samp_->Width(),
2.0 * char_samp_->Height() / ar_mean,
false);
if (new_samp != NULL) {
// free existing char samp if owned
if (own_char_samp_) {
delete char_samp_;
}
// update with new scaled charsamp and set ownership flag
char_samp_ = new_samp;
own_char_samp_ = true;
}
}
delete srch_obj;
return true;
}
}

View File

@ -1,171 +0,0 @@
/**********************************************************************
* File: cube_object.h
* Description: Declaration of the Cube Object Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CubeObject class is the main class used to perform recognition of
// a specific char_samp as a single word.
// To recognize a word, a CubeObject is constructed for this word.
// A Call to RecognizeWord is then issued specifying the language model that
// will be used during recognition. If none is specified, the default language
// model in the CubeRecoContext is used. The CubeRecoContext is passed at
// construction time
//
// The typical usage pattern for Cube is shown below:
//
// // Create and initialize Tesseract object and get its
// // CubeRecoContext object (note that Tesseract object owns it,
// // so it will be freed when the Tesseract object is freed).
// tesseract::Tesseract *tess_obj = new tesseract::Tesseract();
// tess_obj->init_tesseract(data_path, lang, tesseract::OEM_CUBE_ONLY);
// CubeRecoContext *cntxt = tess_obj->GetCubeRecoContext();
// CHECK(cntxt != NULL) << "Unable to create a Cube reco context";
// .
// .
// .
// // Do this to recognize a word in pix whose co-ordinates are
// // (left,top,width,height)
// tesseract::CubeObject *cube_obj;
// cube_obj = new tesseract::CubeObject(cntxt, pix,
// left, top, width, height);
//
// // Get back Cube's list of answers
// tesseract::WordAltList *alt_list = cube_obj->RecognizeWord();
// CHECK(alt_list != NULL && alt_list->AltCount() > 0);
//
// // Get the string and cost of every alternate
// for (int alt = 0; alt < alt_list->AltCount(); alt++) {
// // Return the result as a UTF-32 string
// string_32 res_str32 = alt_list->Alt(alt);
// // Convert to UTF8 if need-be
// string res_str;
// CubeUtils::UTF32ToUTF8(res_str32.c_str(), &res_str);
// // Get the string cost. This should get bigger as you go deeper
// // in the list
// int cost = alt_list->AltCost(alt);
// }
//
// // Call this once you are done recognizing this word
// delete cube_obj;
//
// // Call this once you are done recognizing all words with
// // for the current language
// delete tess_obj;
//
// Note that if the language supports "Italics" (see the CubeRecoContext), the
// RecognizeWord function attempts to de-slant the word.
#ifndef CUBE_OBJECT_H
#define CUBE_OBJECT_H
#include "char_samp.h"
#include "word_altlist.h"
#include "beam_search.h"
#include "cube_search_object.h"
#include "tess_lang_model.h"
#include "cube_reco_context.h"
namespace tesseract {
// minimum aspect ratio needed to normalize a char_samp before recognition
static const float kMinNormalizationAspectRatio = 3.5;
// minimum probability a top alt choice must meet before having
// deslanted processing applied to it
static const float kMinProbSkipDeslanted = 0.25;
class CubeObject {
public:
// Different flavors of constructor. They just differ in the way the
// word image is specified
CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp);
CubeObject(CubeRecoContext *cntxt, Pix *pix,
int left, int top, int wid, int hgt);
~CubeObject();
// Perform the word recognition using the specified language mode. If none
// is specified, the default language model in the CubeRecoContext is used.
// Returns the sorted list of alternate word answers
WordAltList *RecognizeWord(LangModel *lang_mod = NULL);
// Same as RecognizeWord but recognizes as a phrase
WordAltList *RecognizePhrase(LangModel *lang_mod = NULL);
// Computes the cost of a specific string. This is done by performing
// recognition of a language model that allows only the specified word.
// The alternate list(s) will be permanently modified.
int WordCost(const char *str);
// Recognizes a single character and returns the list of results.
CharAltList *RecognizeChar();
// Returns the BeamSearch object that resulted from the last call to
// RecognizeWord
inline BeamSearch *BeamObj() const {
return (deslanted_ == true ? deslanted_beam_obj_ : beam_obj_);
}
// Returns the WordAltList object that resulted from the last call to
// RecognizeWord
inline WordAltList *AlternateList() const {
return (deslanted_ == true ? deslanted_alt_list_ : alt_list_);
}
// Returns the CubeSearchObject object that resulted from the last call to
// RecognizeWord
inline CubeSearchObject *SrchObj() const {
return (deslanted_ == true ? deslanted_srch_obj_ : srch_obj_);
}
// Returns the CharSamp object that resulted from the last call to
// RecognizeWord. Note that this object is not necessarily identical to the
// one passed at construction time as normalization might have occurred
inline CharSamp *CharSample() const {
return (deslanted_ == true ? deslanted_char_samp_ : char_samp_);
}
// Set the ownership of the CharSamp
inline void SetCharSampOwnership(bool own_char_samp) {
own_char_samp_ = own_char_samp;
}
protected:
// Normalize the CharSamp if its aspect ratio exceeds the below constant.
bool Normalize();
private:
// minimum segment count needed to normalize a char_samp before recognition
static const int kMinNormalizationSegmentCnt = 4;
// Data member initialization function
void Init();
// Free alternate lists.
void Cleanup();
// Perform the actual recognition using the specified language mode. If none
// is specified, the default language model in the CubeRecoContext is used.
// Returns the sorted list of alternate answers. Called by both
// RecognizerWord (word_mode is true) or RecognizePhrase (word mode is false)
WordAltList *Recognize(LangModel *lang_mod, bool word_mode);
CubeRecoContext *cntxt_;
BeamSearch *beam_obj_;
BeamSearch *deslanted_beam_obj_;
bool own_char_samp_;
bool deslanted_;
CharSamp *char_samp_;
CharSamp *deslanted_char_samp_;
CubeSearchObject *srch_obj_;
CubeSearchObject *deslanted_srch_obj_;
WordAltList *alt_list_;
WordAltList *deslanted_alt_list_;
};
}
#endif // CUBE_OBJECT_H

View File

@ -1,421 +0,0 @@
/**********************************************************************
* File: cube_search_object.cpp
* Description: Implementation of the Cube Search Object Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "cube_search_object.h"
#include "cube_utils.h"
#include "ndminx.h"
namespace tesseract {
const bool CubeSearchObject::kUseCroppedChars = true;
CubeSearchObject::CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp)
: SearchObject(cntxt) {
init_ = false;
reco_cache_ = NULL;
samp_cache_ = NULL;
segments_ = NULL;
segment_cnt_ = 0;
samp_ = samp;
left_ = 0;
itop_ = 0;
space_cost_ = NULL;
no_space_cost_ = NULL;
wid_ = samp_->Width();
hgt_ = samp_->Height();
max_seg_per_char_ = cntxt_->Params()->MaxSegPerChar();
rtl_ = (cntxt_->ReadingOrder() == CubeRecoContext::R2L);
min_spc_gap_ =
static_cast<int>(hgt_ * cntxt_->Params()->MinSpaceHeightRatio());
max_spc_gap_ =
static_cast<int>(hgt_ * cntxt_->Params()->MaxSpaceHeightRatio());
}
CubeSearchObject::~CubeSearchObject() {
Cleanup();
}
// Cleanup
void CubeSearchObject::Cleanup() {
// delete Recognition Cache
if (reco_cache_) {
for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++) {
if (reco_cache_[strt_seg]) {
for (int end_seg = 0; end_seg < segment_cnt_; end_seg++) {
if (reco_cache_[strt_seg][end_seg]) {
delete reco_cache_[strt_seg][end_seg];
}
}
delete []reco_cache_[strt_seg];
}
}
delete []reco_cache_;
reco_cache_ = NULL;
}
// delete CharSamp Cache
if (samp_cache_) {
for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++) {
if (samp_cache_[strt_seg]) {
for (int end_seg = 0; end_seg < segment_cnt_; end_seg++) {
if (samp_cache_[strt_seg][end_seg]) {
delete samp_cache_[strt_seg][end_seg];
}
}
delete []samp_cache_[strt_seg];
}
}
delete []samp_cache_;
samp_cache_ = NULL;
}
// delete segment list
if (segments_) {
for (int seg = 0; seg < segment_cnt_; seg++) {
if (segments_[seg]) {
delete segments_[seg];
}
}
delete []segments_;
segments_ = NULL;
}
if (space_cost_) {
delete []space_cost_;
space_cost_ = NULL;
}
if (no_space_cost_) {
delete []no_space_cost_;
no_space_cost_ = NULL;
}
segment_cnt_ = 0;
init_ = false;
}
// # of segmentation points. One less than the count of segments
int CubeSearchObject::SegPtCnt() {
if (!init_ && !Init())
return -1;
return segment_cnt_ - 1;
}
// init and allocate variables, perform segmentation
bool CubeSearchObject::Init() {
if (init_)
return true;
if (!Segment()) {
return false;
}
// init cache
reco_cache_ = new CharAltList **[segment_cnt_];
samp_cache_ = new CharSamp **[segment_cnt_];
for (int seg = 0; seg < segment_cnt_; seg++) {
reco_cache_[seg] = new CharAltList *[segment_cnt_];
memset(reco_cache_[seg], 0, segment_cnt_ * sizeof(*reco_cache_[seg]));
samp_cache_[seg] = new CharSamp *[segment_cnt_];
memset(samp_cache_[seg], 0, segment_cnt_ * sizeof(*samp_cache_[seg]));
}
init_ = true;
return true;
}
// returns a char sample corresponding to the bitmap between 2 seg pts
CharSamp *CubeSearchObject::CharSample(int start_pt, int end_pt) {
// init if necessary
if (!init_ && !Init())
return NULL;
// validate segment range
if (!IsValidSegmentRange(start_pt, end_pt))
return NULL;
// look for the samp in the cache
if (samp_cache_ && samp_cache_[start_pt + 1] &&
samp_cache_[start_pt + 1][end_pt]) {
return samp_cache_[start_pt + 1][end_pt];
}
// create a char samp object from the specified range of segments
bool left_most;
bool right_most;
CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1,
end_pt - start_pt, NULL,
&left_most, &right_most, hgt_);
if (!samp)
return NULL;
if (kUseCroppedChars) {
CharSamp *cropped_samp = samp->Crop();
// we no longer need the orig sample
delete samp;
if (!cropped_samp)
return NULL;
samp = cropped_samp;
}
// get the dimensions of the new cropped sample
int char_top = samp->Top();
int char_wid = samp->Width();
int char_hgt = samp->Height();
// for cursive languages, these features correspond to whether
// the charsamp is at the beginning or end of conncomp
if (cntxt_->Cursive() == true) {
// first and last char flags depend on reading order
bool first_char = rtl_ ? right_most : left_most;
bool last_char = rtl_ ? left_most : right_most;
samp->SetFirstChar(first_char ? 255 : 0);
samp->SetLastChar(last_char ? 255 : 0);
} else {
// for non cursive languages, these features correspond
// to whether the charsamp is at the beginning or end of the word
samp->SetFirstChar((start_pt == -1) ? 255 : 0);
samp->SetLastChar((end_pt == (segment_cnt_ - 1)) ? 255 : 0);
}
samp->SetNormTop(255 * char_top / hgt_);
samp->SetNormBottom(255 * (char_top + char_hgt) / hgt_);
samp->SetNormAspectRatio(255 * char_wid / (char_wid + char_hgt));
// add to cache & return
samp_cache_[start_pt + 1][end_pt] = samp;
return samp;
}
Box *CubeSearchObject::CharBox(int start_pt, int end_pt) {
if (!init_ && !Init())
return NULL;
if (!IsValidSegmentRange(start_pt, end_pt)) {
fprintf(stderr, "Cube ERROR (CubeSearchObject::CharBox): invalid "
"segment range (%d, %d)\n", start_pt, end_pt);
return NULL;
}
// create a char samp object from the specified range of segments,
// extract its dimensions into a leptonica box, and delete it
bool left_most;
bool right_most;
CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1,
end_pt - start_pt, NULL,
&left_most, &right_most, hgt_);
if (!samp)
return NULL;
if (kUseCroppedChars) {
CharSamp *cropped_samp = samp->Crop();
delete samp;
if (!cropped_samp) {
return NULL;
}
samp = cropped_samp;
}
Box *box = boxCreate(samp->Left(), samp->Top(),
samp->Width(), samp->Height());
delete samp;
return box;
}
// call from Beam Search to return the alt list corresponding to
// recognizing the bitmap between two segmentation pts
CharAltList * CubeSearchObject::RecognizeSegment(int start_pt, int end_pt) {
// init if necessary
if (!init_ && !Init()) {
fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could "
"not initialize CubeSearchObject\n");
return NULL;
}
// validate segment range
if (!IsValidSegmentRange(start_pt, end_pt)) {
fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): invalid "
"segment range (%d, %d)\n", start_pt, end_pt);
return NULL;
}
// look for the recognition results in cache in the cache
if (reco_cache_ && reco_cache_[start_pt + 1] &&
reco_cache_[start_pt + 1][end_pt]) {
return reco_cache_[start_pt + 1][end_pt];
}
// create the char sample corresponding to the blob
CharSamp *samp = CharSample(start_pt, end_pt);
if (!samp) {
fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could "
"not construct CharSamp\n");
return NULL;
}
// recognize the char sample
CharClassifier *char_classifier = cntxt_->Classifier();
if (char_classifier) {
reco_cache_[start_pt + 1][end_pt] = char_classifier->Classify(samp);
} else {
// no classifer: all characters are equally probable; add a penalty
// that favors 2-segment characters and aspect ratios (w/h) > 1
fprintf(stderr, "Cube WARNING (CubeSearchObject::RecognizeSegment): cube "
"context has no character classifier!! Inventing a probability "
"distribution.\n");
int class_cnt = cntxt_->CharacterSet()->ClassCount();
CharAltList *alt_list = new CharAltList(cntxt_->CharacterSet(), class_cnt);
int seg_cnt = end_pt - start_pt;
double prob_val = (1.0 / class_cnt) *
exp(-fabs(seg_cnt - 2.0)) *
exp(-samp->Width() / static_cast<double>(samp->Height()));
for (int class_idx = 0; class_idx < class_cnt; class_idx++) {
alt_list->Insert(class_idx, CubeUtils::Prob2Cost(prob_val));
}
reco_cache_[start_pt + 1][end_pt] = alt_list;
}
return reco_cache_[start_pt + 1][end_pt];
}
// Perform segmentation of the bitmap by detecting connected components,
// segmenting each connected component using windowed vertical pixel density
// histogram and sorting the resulting segments in reading order
bool CubeSearchObject::Segment() {
if (!samp_)
return false;
segment_cnt_ = 0;
segments_ = samp_->Segment(&segment_cnt_, rtl_,
cntxt_->Params()->HistWindWid(),
cntxt_->Params()->MinConCompSize());
if (!segments_ || segment_cnt_ <= 0) {
return false;
}
if (segment_cnt_ >= kMaxSegmentCnt) {
return false;
}
return true;
}
// computes the space and no space costs at gaps between segments
bool CubeSearchObject::ComputeSpaceCosts() {
// init if necessary
if (!init_ && !Init())
return false;
// Already computed
if (space_cost_)
return true;
// No segmentation points
if (segment_cnt_ < 2)
return false;
// Compute the maximum x to the left of and minimum x to the right of each
// segmentation point
int *max_left_x = new int[segment_cnt_ - 1];
int *min_right_x = new int[segment_cnt_ - 1];
if (rtl_) {
min_right_x[0] = segments_[0]->Left();
max_left_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Right();
for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) {
min_right_x[pt_idx] =
MIN(min_right_x[pt_idx - 1], segments_[pt_idx]->Left());
max_left_x[segment_cnt_ - pt_idx - 2] =
MAX(max_left_x[segment_cnt_ - pt_idx - 1],
segments_[segment_cnt_ - pt_idx - 1]->Right());
}
} else {
min_right_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Left();
max_left_x[0] = segments_[0]->Right();
for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) {
min_right_x[segment_cnt_ - pt_idx - 2] =
MIN(min_right_x[segment_cnt_ - pt_idx - 1],
segments_[segment_cnt_ - pt_idx - 1]->Left());
max_left_x[pt_idx] =
MAX(max_left_x[pt_idx - 1], segments_[pt_idx]->Right());
}
}
// Allocate memory for space and no space costs
// trivial cases
space_cost_ = new int[segment_cnt_ - 1];
no_space_cost_ = new int[segment_cnt_ - 1];
// go through all segmentation points determining the horizontal gap between
// the images on both sides of each break points. Use the gap to estimate
// the probability of a space. The probability is modeled a linear function
// of the gap width
for (int pt_idx = 0; pt_idx < (segment_cnt_ - 1); pt_idx++) {
// determine the gap at the segmentation point
int gap = min_right_x[pt_idx] - max_left_x[pt_idx];
float prob = 0.0;
// gap is too small => no space
if (gap < min_spc_gap_ || max_spc_gap_ == min_spc_gap_) {
prob = 0.0;
} else if (gap > max_spc_gap_) {
// gap is too big => definite space
prob = 1.0;
} else {
// gap is somewhere in between, compute probability
prob = (gap - min_spc_gap_) /
static_cast<double>(max_spc_gap_ - min_spc_gap_);
}
// compute cost of space and non-space
space_cost_[pt_idx] = CubeUtils::Prob2Cost(prob) +
CubeUtils::Prob2Cost(0.1);
no_space_cost_[pt_idx] = CubeUtils::Prob2Cost(1.0 - prob);
}
delete []min_right_x;
delete []max_left_x;
return true;
}
// Returns the cost of having a space before the specified segmentation point
int CubeSearchObject::SpaceCost(int pt_idx) {
if (!space_cost_ && !ComputeSpaceCosts()) {
// Failed to compute costs return a zero prob
return CubeUtils::Prob2Cost(0.0);
}
return space_cost_[pt_idx];
}
// Returns the cost of not having a space before the specified
// segmentation point
int CubeSearchObject::NoSpaceCost(int pt_idx) {
// If failed to compute costs, return a 1.0 prob
if (!space_cost_ && !ComputeSpaceCosts())
return CubeUtils::Prob2Cost(0.0);
return no_space_cost_[pt_idx];
}
// Returns the cost of not having any spaces within the specified range
// of segmentation points
int CubeSearchObject::NoSpaceCost(int st_pt, int end_pt) {
// If fail to compute costs, return a 1.0 prob
if (!space_cost_ && !ComputeSpaceCosts())
return CubeUtils::Prob2Cost(1.0);
int no_spc_cost = 0;
for (int pt_idx = st_pt + 1; pt_idx < end_pt; pt_idx++)
no_spc_cost += NoSpaceCost(pt_idx);
return no_spc_cost;
}
}

View File

@ -1,122 +0,0 @@
/**********************************************************************
* File: cube_search_object.h
* Description: Declaration of the Cube Search Object Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CubeSearchObject class represents a char_samp (a word bitmap) that is
// being searched for characters (or recognizeable entities).
// The Class detects the connected components and peforms an oversegmentation
// on each ConComp. The result of which is a list of segments that are ordered
// in reading order.
// The class provided methods that inquire about the number of segments, the
// CharSamp corresponding to any segment range and the recognition results
// of any segment range
// An object of Class CubeSearchObject is used by the BeamSearch algorithm
// to recognize a CharSamp into a list of word alternates
#ifndef CUBE_SEARCH_OBJECT_H
#define CUBE_SEARCH_OBJECT_H
#include "search_object.h"
#include "char_samp.h"
#include "conv_net_classifier.h"
#include "cube_reco_context.h"
#include "allheaders.h"
namespace tesseract {
class CubeSearchObject : public SearchObject {
public:
CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp);
~CubeSearchObject();
// returns the Segmentation Point count of the CharSamp owned by the class
int SegPtCnt();
// Recognize the set of segments given by the specified range and return
// a list of possible alternate answers
CharAltList * RecognizeSegment(int start_pt, int end_pt);
// Returns the CharSamp corresponding to the specified segment range
CharSamp *CharSample(int start_pt, int end_pt);
// Returns a leptonica box corresponding to the specified segment range
Box *CharBox(int start_pt, int end_pt);
// Returns the cost of having a space before the specified segmentation pt
int SpaceCost(int seg_pt);
// Returns the cost of not having a space before the specified
// segmentation pt
int NoSpaceCost(int seg_pt);
// Returns the cost of not having any spaces within the specified range
// of segmentation points
int NoSpaceCost(int seg_pt, int end_pt);
private:
// Maximum reasonable segment count
static const int kMaxSegmentCnt = 128;
// Use cropped samples
static const bool kUseCroppedChars;
// reading order flag
bool rtl_;
// cached dimensions of char samp
int left_;
int itop_;
int wid_;
int hgt_;
// minimum and maximum and possible inter-segment gaps for spaces
int min_spc_gap_;
int max_spc_gap_;
// initialization flag
bool init_;
// maximum segments per character: Cached from tuning parameters object
int max_seg_per_char_;
// char sample to be processed
CharSamp *samp_;
// segment count
int segment_cnt_;
// segments of the processed char samp
ConComp **segments_;
// Cache data members:
// There are two caches kept; a CharSamp cache and a CharAltList cache
// Each is a 2-D array of CharSamp and CharAltList pointers respectively
// hence the triple pointer.
CharAltList ***reco_cache_;
CharSamp ***samp_cache_;
// Cached costs of space and no-space after every segment. Computed only
// in phrase mode
int *space_cost_;
int *no_space_cost_;
// init and allocate variables, perform segmentation
bool Init();
// Cleanup
void Cleanup();
// Perform segmentation of the bitmap by detecting connected components,
// segmenting each connected component using windowed vertical pixel density
// histogram and sorting the resulting segments in reading order
// Returns true on success
bool Segment();
// validate the segment ranges.
inline bool IsValidSegmentRange(int start_pt, int end_pt) {
return (end_pt > start_pt && start_pt >= -1 && start_pt < segment_cnt_ &&
end_pt >= 0 && end_pt <= segment_cnt_ &&
end_pt <= (start_pt + max_seg_per_char_));
}
// computes the space and no space costs at gaps between segments
// return true on success
bool ComputeSpaceCosts();
};
}
#endif // CUBE_SEARCH_OBJECT_H

View File

@ -1,213 +0,0 @@
/**********************************************************************
* File: cube_tuning_params.cpp
* Description: Implementation of the CubeTuningParameters Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <string>
#include <vector>
#include "cube_tuning_params.h"
#include "tuning_params.h"
#include "cube_utils.h"
namespace tesseract {
CubeTuningParams::CubeTuningParams() {
reco_wgt_ = 1.0;
size_wgt_ = 1.0;
char_bigrams_wgt_ = 1.0;
word_unigrams_wgt_ = 0.0;
max_seg_per_char_ = 8;
beam_width_ = 32;
tp_classifier_ = NN;
tp_feat_ = BMP;
conv_grid_size_ = 32;
hist_wind_wid_ = 0;
max_word_aspect_ratio_ = 10.0;
min_space_height_ratio_ = 0.2;
max_space_height_ratio_ = 0.3;
min_con_comp_size_ = 0;
combiner_run_thresh_ = 1.0;
combiner_classifier_thresh_ = 0.5;
ood_wgt_ = 1.0;
num_wgt_ = 1.0;
}
CubeTuningParams::~CubeTuningParams() {
}
// Create an Object given the data file path and the language by loading
// the approporiate file
CubeTuningParams *CubeTuningParams::Create(const string &data_file_path,
const string &lang) {
CubeTuningParams *obj = new CubeTuningParams();
string tuning_params_file;
tuning_params_file = data_file_path + lang;
tuning_params_file += ".cube.params";
if (!obj->Load(tuning_params_file)) {
fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to "
"load tuning parameters from %s\n", tuning_params_file.c_str());
delete obj;
obj = NULL;
}
return obj;
}
// Loads the params file
bool CubeTuningParams::Load(string tuning_params_file) {
// load the string into memory
string param_str;
if (CubeUtils::ReadFileToString(tuning_params_file, &param_str) == false) {
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unable to read "
"file %s\n", tuning_params_file.c_str());
return false;
}
// split into lines
vector<string> str_vec;
CubeUtils::SplitStringUsing(param_str, "\r\n", &str_vec);
if (str_vec.size() < 8) {
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): number of rows "
"in parameter file is too low\n");
return false;
}
// for all entries
for (int entry = 0; entry < str_vec.size(); entry++) {
// tokenize
vector<string> str_tok;
// should be only two tokens
CubeUtils::SplitStringUsing(str_vec[entry], "=", &str_tok);
if (str_tok.size() != 2) {
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format in "
"line: %s.\n", str_vec[entry].c_str());
return false;
}
double val = 0;
char peekchar = (str_tok[1].c_str())[0];
if ((peekchar >= '0' && peekchar <= '9') ||
peekchar == '-' || peekchar == '+' ||
peekchar == '.') {
// read the value
if (sscanf(str_tok[1].c_str(), "%lf", &val) != 1) {
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format "
"in line: %s.\n", str_vec[entry].c_str());
return false;
}
}
// token type
if (str_tok[0] == "RecoWgt") {
reco_wgt_ = val;
} else if (str_tok[0] == "SizeWgt") {
size_wgt_ = val;
} else if (str_tok[0] == "CharBigramsWgt") {
char_bigrams_wgt_ = val;
} else if (str_tok[0] == "WordUnigramsWgt") {
word_unigrams_wgt_ = val;
} else if (str_tok[0] == "MaxSegPerChar") {
max_seg_per_char_ = static_cast<int>(val);
} else if (str_tok[0] == "BeamWidth") {
beam_width_ = static_cast<int>(val);
} else if (str_tok[0] == "Classifier") {
if (str_tok[1] == "NN") {
tp_classifier_ = TuningParams::NN;
} else if (str_tok[1] == "HYBRID_NN") {
tp_classifier_ = TuningParams::HYBRID_NN;
} else {
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid "
"classifier type in line: %s.\n", str_vec[entry].c_str());
return false;
}
} else if (str_tok[0] == "FeatureType") {
if (str_tok[1] == "BMP") {
tp_feat_ = TuningParams::BMP;
} else if (str_tok[1] == "CHEBYSHEV") {
tp_feat_ = TuningParams::CHEBYSHEV;
} else if (str_tok[1] == "HYBRID") {
tp_feat_ = TuningParams::HYBRID;
} else {
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid feature "
"type in line: %s.\n", str_vec[entry].c_str());
return false;
}
} else if (str_tok[0] == "ConvGridSize") {
conv_grid_size_ = static_cast<int>(val);
} else if (str_tok[0] == "HistWindWid") {
hist_wind_wid_ = val;
} else if (str_tok[0] == "MinConCompSize") {
min_con_comp_size_ = val;
} else if (str_tok[0] == "MaxWordAspectRatio") {
max_word_aspect_ratio_ = val;
} else if (str_tok[0] == "MinSpaceHeightRatio") {
min_space_height_ratio_ = val;
} else if (str_tok[0] == "MaxSpaceHeightRatio") {
max_space_height_ratio_ = val;
} else if (str_tok[0] == "CombinerRunThresh") {
combiner_run_thresh_ = val;
} else if (str_tok[0] == "CombinerClassifierThresh") {
combiner_classifier_thresh_ = val;
} else if (str_tok[0] == "OODWgt") {
ood_wgt_ = val;
} else if (str_tok[0] == "NumWgt") {
num_wgt_ = val;
} else {
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unknown parameter "
"in line: %s.\n", str_vec[entry].c_str());
return false;
}
}
return true;
}
// Save the parameters to a file
bool CubeTuningParams::Save(string file_name) {
FILE *params_file = fopen(file_name.c_str(), "wb");
if (params_file == NULL) {
fprintf(stderr, "Cube ERROR (CubeTuningParams::Save): error opening file "
"%s for write.\n", file_name.c_str());
return false;
}
fprintf(params_file, "RecoWgt=%.4f\n", reco_wgt_);
fprintf(params_file, "SizeWgt=%.4f\n", size_wgt_);
fprintf(params_file, "CharBigramsWgt=%.4f\n", char_bigrams_wgt_);
fprintf(params_file, "WordUnigramsWgt=%.4f\n", word_unigrams_wgt_);
fprintf(params_file, "MaxSegPerChar=%d\n", max_seg_per_char_);
fprintf(params_file, "BeamWidth=%d\n", beam_width_);
fprintf(params_file, "ConvGridSize=%d\n", conv_grid_size_);
fprintf(params_file, "HistWindWid=%d\n", hist_wind_wid_);
fprintf(params_file, "MinConCompSize=%d\n", min_con_comp_size_);
fprintf(params_file, "MaxWordAspectRatio=%.4f\n", max_word_aspect_ratio_);
fprintf(params_file, "MinSpaceHeightRatio=%.4f\n", min_space_height_ratio_);
fprintf(params_file, "MaxSpaceHeightRatio=%.4f\n", max_space_height_ratio_);
fprintf(params_file, "CombinerRunThresh=%.4f\n", combiner_run_thresh_);
fprintf(params_file, "CombinerClassifierThresh=%.4f\n",
combiner_classifier_thresh_);
fprintf(params_file, "OODWgt=%.4f\n", ood_wgt_);
fprintf(params_file, "NumWgt=%.4f\n", num_wgt_);
fclose(params_file);
return true;
}
}

View File

@ -1,57 +0,0 @@
/**********************************************************************
* File: cube_tuning_params.h
* Description: Declaration of the CubeTuningParameters Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CubeTuningParams class abstracts all the parameters that are used
// in Cube and are tuned/learned during the training process. Inherits
// from the TuningParams class.
#ifndef CUBE_TUNING_PARAMS_H
#define CUBE_TUNING_PARAMS_H
#include <string>
#include "tuning_params.h"
namespace tesseract {
class CubeTuningParams : public TuningParams {
public:
CubeTuningParams();
~CubeTuningParams();
// Accessor functions
inline double OODWgt() { return ood_wgt_; }
inline double NumWgt() { return num_wgt_; }
inline void SetOODWgt(double wgt) { ood_wgt_ = wgt; }
inline void SetNumWgt(double wgt) { num_wgt_ = wgt; }
// Create an object given the data file path and the language by loading
// the approporiate file
static CubeTuningParams * Create(const string &data_file,
const string &lang);
// Save and load the tuning parameters to a specified file
bool Save(string file_name);
bool Load(string file_name);
private:
double ood_wgt_;
double num_wgt_;
};
}
#endif // CUBE_TUNING_PARAMS_H

View File

@ -1,399 +0,0 @@
/**********************************************************************
* File: cube_utils.cpp
* Description: Implementation of the Cube Utilities Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <math.h>
#include <string>
#include <vector>
#include "cube_utils.h"
#include "char_set.h"
#include "unichar.h"
namespace tesseract {
CubeUtils::CubeUtils() {
}
CubeUtils::~CubeUtils() {
}
/**
* convert a prob to a cost (-ve log prob)
*/
int CubeUtils::Prob2Cost(double prob_val) {
if (prob_val < MIN_PROB) {
return MIN_PROB_COST;
}
return static_cast<int>(-log(prob_val) * PROB2COST_SCALE);
}
/**
* converts a cost to probability
*/
double CubeUtils::Cost2Prob(int cost) {
return exp(-cost / PROB2COST_SCALE);
}
/**
* computes the length of a NULL terminated char_32 string
*/
int CubeUtils::StrLen(const char_32 *char_32_ptr) {
if (char_32_ptr == NULL) {
return 0;
}
int len = -1;
while (char_32_ptr[++len]);
return len;
}
/**
* compares two char_32 strings
*/
int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
const char_32 *pch1 = str1;
const char_32 *pch2 = str2;
for (; (*pch1) != 0 && (*pch2) != 0; pch1++, pch2++) {
if ((*pch1) != (*pch2)) {
return (*pch1) - (*pch2);
}
}
if ((*pch1) == 0) {
if ((*pch2) == 0) {
return 0;
} else {
return -1;
}
} else {
return 1;
}
}
/**
* Duplicates a 32-bit char buffer
*/
char_32 *CubeUtils::StrDup(const char_32 *str32) {
int len = StrLen(str32);
char_32 *new_str = new char_32[len + 1];
memcpy(new_str, str32, len * sizeof(*str32));
new_str[len] = 0;
return new_str;
}
/**
* creates a char samp from a specified portion of the image
*/
CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top,
int wid, int hgt) {
// get the raw img data from the image
unsigned char *temp_buff = GetImageData(pix, left, top, wid, hgt);
if (temp_buff == NULL) {
return NULL;
}
// create a char samp from temp buffer
CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff);
// clean up temp buffer
delete []temp_buff;
return char_samp;
}
/**
* create a B/W image from a char_sample
*/
Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) {
// parameter check
if (char_samp == NULL) {
return NULL;
}
// get the raw data
int stride = char_samp->Stride();
int wid = char_samp->Width();
int hgt = char_samp->Height();
Pix *pix = pixCreate(wid, hgt, 1);
if (pix == NULL) {
return NULL;
}
// copy the contents
unsigned char *line = char_samp->RawData();
for (int y = 0; y < hgt ; y++, line += stride) {
for (int x = 0; x < wid; x++) {
if (line[x] != 0) {
pixSetPixel(pix, x, y, 0);
} else {
pixSetPixel(pix, x, y, 255);
}
}
}
return pix;
}
/**
* creates a raw buffer from the specified location of the pix
*/
unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
int wid, int hgt) {
// skip invalid dimensions
if (left < 0 || top < 0 || wid < 0 || hgt < 0 ||
(left + wid) > pix->w || (top + hgt) > pix->h ||
pix->d != 1) {
return NULL;
}
// copy the char img to a temp buffer
unsigned char *temp_buff = new unsigned char[wid * hgt];
l_int32 w;
l_int32 h;
l_int32 d;
l_int32 wpl;
l_uint32 *line;
l_uint32 *data;
pixGetDimensions(pix, &w, &h, &d);
wpl = pixGetWpl(pix);
data = pixGetData(pix);
line = data + (top * wpl);
for (int y = 0, off = 0; y < hgt ; y++) {
for (int x = 0; x < wid; x++, off++) {
temp_buff[off] = GET_DATA_BIT(line, x + left) ? 0 : 255;
}
line += wpl;
}
return temp_buff;
}
/**
* read file contents to a string
*/
bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
str->clear();
FILE *fp = fopen(file_name.c_str(), "rb");
if (fp == NULL) {
return false;
}
// get the size of the size
fseek(fp, 0, SEEK_END);
int file_size = ftell(fp);
if (file_size < 1) {
fclose(fp);
return false;
}
// adjust string size
str->reserve(file_size);
// read the contents
rewind(fp);
char *buff = new char[file_size];
int read_bytes = fread(buff, 1, static_cast<int>(file_size), fp);
if (read_bytes == file_size) {
str->append(buff, file_size);
}
delete []buff;
fclose(fp);
return (read_bytes == file_size);
}
/**
* splits a string into vectors based on specified delimiters
*/
void CubeUtils::SplitStringUsing(const string &str,
const string &delims,
vector<string> *str_vec) {
// Optimize the common case where delims is a single character.
if (delims[0] != '\0' && delims[1] == '\0') {
char c = delims[0];
const char* p = str.data();
const char* end = p + str.size();
while (p != end) {
if (*p == c) {
++p;
} else {
const char* start = p;
while (++p != end && *p != c);
str_vec->push_back(string(start, p - start));
}
}
return;
}
string::size_type begin_index, end_index;
begin_index = str.find_first_not_of(delims);
while (begin_index != string::npos) {
end_index = str.find_first_of(delims, begin_index);
if (end_index == string::npos) {
str_vec->push_back(str.substr(begin_index));
return;
}
str_vec->push_back(str.substr(begin_index, (end_index - begin_index)));
begin_index = str.find_first_not_of(delims, end_index);
}
}
/**
* UTF-8 to UTF-32 conversion functions
*/
void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) {
str32->clear();
int len = strlen(utf8_str);
int step = 0;
for (int ch = 0; ch < len; ch += step) {
step = UNICHAR::utf8_step(utf8_str + ch);
if (step > 0) {
UNICHAR uni_ch(utf8_str + ch, step);
(*str32) += uni_ch.first_uni();
}
}
}
/**
* UTF-32 to UTF-8 conversion functions
*/
void CubeUtils::UTF32ToUTF8(const char_32 *utf32_str, string *str) {
str->clear();
for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) {
UNICHAR uni_ch((*ch_32));
char *utf8 = uni_ch.utf8_str();
if (utf8 != NULL) {
(*str) += utf8;
delete []utf8;
}
}
}
bool CubeUtils::IsCaseInvariant(const char_32 *str32, CharSet *char_set) {
bool all_one_case = true;
bool capitalized;
bool prev_upper;
bool prev_lower;
bool first_upper;
bool first_lower;
bool cur_upper;
bool cur_lower;
string str8;
if (!char_set) {
// If cube char_set is missing, use C-locale-dependent functions
// on UTF8 characters to determine case properties.
first_upper = isupper(str32[0]);
first_lower = islower(str32[0]);
if (first_upper)
capitalized = true;
prev_upper = first_upper;
prev_lower = first_lower;
for (int c = 1; str32[c] != 0; ++c) {
cur_upper = isupper(str32[c]);
cur_lower = islower(str32[c]);
if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
all_one_case = false;
if (cur_upper)
capitalized = false;
prev_upper = cur_upper;
prev_lower = cur_lower;
}
} else {
UNICHARSET *unicharset = char_set->InternalUnicharset();
// Use UNICHARSET functions to determine case properties
first_upper = unicharset->get_isupper(char_set->ClassID(str32[0]));
first_lower = unicharset->get_islower(char_set->ClassID(str32[0]));
if (first_upper)
capitalized = true;
prev_upper = first_upper;
prev_lower = first_lower;
for (int c = 1; c < StrLen(str32); ++c) {
cur_upper = unicharset->get_isupper(char_set->ClassID(str32[c]));
cur_lower = unicharset->get_islower(char_set->ClassID(str32[c]));
if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
all_one_case = false;
if (cur_upper)
capitalized = false;
prev_upper = cur_upper;
prev_lower = cur_lower;
}
}
return all_one_case || capitalized;
}
char_32 *CubeUtils::ToLower(const char_32 *str32, CharSet *char_set) {
if (!char_set) {
return NULL;
}
UNICHARSET *unicharset = char_set->InternalUnicharset();
int len = StrLen(str32);
char_32 *lower = new char_32[len + 1];
for (int i = 0; i < len; ++i) {
char_32 ch = str32[i];
if (ch == INVALID_UNICHAR_ID) {
delete [] lower;
return NULL;
}
// convert upper-case characters to lower-case
if (unicharset->get_isupper(char_set->ClassID(ch))) {
UNICHAR_ID uid_lower = unicharset->get_other_case(char_set->ClassID(ch));
const char_32 *str32_lower = char_set->ClassString(uid_lower);
// expect lower-case version of character to be a single character
if (!str32_lower || StrLen(str32_lower) != 1) {
delete [] lower;
return NULL;
}
lower[i] = str32_lower[0];
} else {
lower[i] = ch;
}
}
lower[len] = 0;
return lower;
}
char_32 *CubeUtils::ToUpper(const char_32 *str32, CharSet *char_set) {
if (!char_set) {
return NULL;
}
UNICHARSET *unicharset = char_set->InternalUnicharset();
int len = StrLen(str32);
char_32 *upper = new char_32[len + 1];
for (int i = 0; i < len; ++i) {
char_32 ch = str32[i];
if (ch == INVALID_UNICHAR_ID) {
delete [] upper;
return NULL;
}
// convert lower-case characters to upper-case
if (unicharset->get_islower(char_set->ClassID(ch))) {
UNICHAR_ID uid_upper = unicharset->get_other_case(char_set->ClassID(ch));
const char_32 *str32_upper = char_set->ClassString(uid_upper);
// expect upper-case version of character to be a single character
if (!str32_upper || StrLen(str32_upper) != 1) {
delete [] upper;
return NULL;
}
upper[i] = str32_upper[0];
} else {
upper[i] = ch;
}
}
upper[len] = 0;
return upper;
}
} // namespace tesseract

View File

@ -1,83 +0,0 @@
/**********************************************************************
* File: cube_utils.h
* Description: Declaration of the Cube Utilities Class
* Author: Ahmad Abdulkader
* Created: 2008
*
*(C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0(the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The CubeUtils class provides miscellaneous utility and helper functions
// to the rest of the Cube Engine
#ifndef CUBE_UTILS_H
#define CUBE_UTILS_H
#include <vector>
#include <string>
#include "allheaders.h"
#include "const.h"
#include "char_set.h"
#include "char_samp.h"
namespace tesseract {
class CubeUtils {
public:
CubeUtils();
~CubeUtils();
// Converts a probability value to a cost by getting the -log() of the
// probability value to a known base
static int Prob2Cost(double prob_val);
// Converts a cost to probability by getting the exp(-normalized cost)
static double Cost2Prob(int cost);
// Computes the length of a 32-bit char buffer
static int StrLen(const char_32 *str);
// Compares two 32-bit char buffers
static int StrCmp(const char_32 *str1, const char_32 *str2);
// Duplicates a 32-bit char buffer
static char_32 *StrDup(const char_32 *str);
// Creates a CharSamp from an Pix and a bounding box
static CharSamp *CharSampleFromPix(Pix *pix,
int left, int top, int wid, int hgt);
// Creates a Pix from a CharSamp
static Pix *PixFromCharSample(CharSamp *char_samp);
// read the contents of a file to a string
static bool ReadFileToString(const string &file_name, string *str);
// split a string into vectors using any of the specified delimiters
static void SplitStringUsing(const string &str, const string &delims,
vector<string> *str_vec);
// UTF-8 to UTF-32 convesion functions
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32);
static void UTF32ToUTF8(const char_32 *utf32_str, string *str);
// Returns true if input word has either 1) all-one-case, or 2)
// first character upper-case, and remaining characters lower-case.
// If char_set is not NULL, uses tesseract's unicharset functions
// to determine case properties. Otherwise, uses C-locale-dependent
// functions, which may be unreliable on non-ASCII characters.
static bool IsCaseInvariant(const char_32 *str32, CharSet *char_set);
// Returns char_32 pointer to the lower-case-transformed version of
// the input string or NULL on error. If char_set is NULL returns NULL.
// Return array must be freed by caller.
static char_32 *ToLower(const char_32 *str32, CharSet *char_set);
// Returns char_32 pointer to the upper-case-transformed version of
// the input string or NULL on error. If char_set is NULL returns NULL.
// Return array must be freed by caller.
static char_32 *ToUpper(const char_32 *str32, CharSet *char_set);
private:
static unsigned char *GetImageData(Pix *pix,
int left, int top, int wid, int hgt);
};
} // namespace tesseract
#endif // CUBE_UTILS_H

View File

@ -1,55 +0,0 @@
/**********************************************************************
* File: feature_base.h
* Description: Declaration of the Feature Base Class
* Author: Ping Ping (xiupingping), Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The FeatureBase class is the base class for any Feature Extraction class
// It provided 3 pure virtual functions (to inherit):
// 1- FeatureCnt: A method to returns the count of features
// 2- ComputeFeatures: A method to compute the features for a given CharSamp
// 3- ComputeFeatureBitmap: A method to render a visualization of the features
// to a CharSamp. This is mainly used by visual-debuggers
#ifndef FEATURE_BASE_H
#define FEATURE_BASE_H
#include "char_samp.h"
#include "tuning_params.h"
namespace tesseract {
class FeatureBase {
public:
explicit FeatureBase(TuningParams *params)
: params_(params) {
}
virtual ~FeatureBase() {}
// Compute the features for a given CharSamp
virtual bool ComputeFeatures(CharSamp *char_samp, float *features) = 0;
// Render a visualization of the features to a CharSamp.
// This is mainly used by visual-debuggers
virtual CharSamp *ComputeFeatureBitmap(CharSamp *char_samp) = 0;
// Returns the count of features
virtual int FeatureCnt() = 0;
protected:
TuningParams *params_;
};
}
#endif // FEATURE_BASE_H

View File

@ -1,50 +0,0 @@
/**********************************************************************
* File: feature_bmp.cpp
* Description: Implementation of the Bitmap Feature Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include "feature_base.h"
#include "feature_bmp.h"
#include "cube_utils.h"
#include "const.h"
#include "char_samp.h"
namespace tesseract {
FeatureBmp::FeatureBmp(TuningParams *params)
:FeatureBase(params) {
conv_grid_size_ = params->ConvGridSize();
}
FeatureBmp::~FeatureBmp() {
}
// Render a visualization of the features to a CharSamp.
// This is mainly used by visual-debuggers
CharSamp *FeatureBmp::ComputeFeatureBitmap(CharSamp *char_samp) {
return char_samp->Scale(conv_grid_size_, conv_grid_size_);
}
// Compute the features for a given CharSamp
bool FeatureBmp::ComputeFeatures(CharSamp *char_samp, float *features) {
return char_samp->ComputeFeatures(conv_grid_size_, features);
}
}

View File

@ -1,53 +0,0 @@
/**********************************************************************
* File: feature_bmp.h
* Description: Declaration of the Bitmap Feature Class
* Author: PingPing xiu (xiupingping) & Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The FeatureBmp class implements a Bitmap feature extractor class. It
// inherits from the FeatureBase class
// The Bitmap feature vectors is the the bitmap of the specified CharSamp
// scaled to a fixed grid size and then augmented by a 5 aux features that
// describe the size, aspect ration and placement within a word
#ifndef FEATURE_BMP_H
#define FEATURE_BMP_H
#include "char_samp.h"
#include "feature_base.h"
namespace tesseract {
class FeatureBmp : public FeatureBase {
public:
explicit FeatureBmp(TuningParams *params);
virtual ~FeatureBmp();
// Render a visualization of the features to a CharSamp.
// This is mainly used by visual-debuggers
virtual CharSamp *ComputeFeatureBitmap(CharSamp *samp);
// Compute the features for a given CharSamp
virtual bool ComputeFeatures(CharSamp *samp, float *features);
// Returns the count of features
virtual int FeatureCnt() {
return 5 + (conv_grid_size_ * conv_grid_size_);
}
protected:
// grid size, cached from the TuningParams object
int conv_grid_size_;
};
}
#endif // FEATURE_BMP_H

View File

@ -1,138 +0,0 @@
/**********************************************************************
* File: feature_chebyshev.cpp
* Description: Implementation of the Chebyshev coefficients Feature Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string>
#include <vector>
#include <algorithm>
#include "feature_base.h"
#include "feature_chebyshev.h"
#include "cube_utils.h"
#include "const.h"
#include "char_samp.h"
namespace tesseract {
FeatureChebyshev::FeatureChebyshev(TuningParams *params)
: FeatureBase(params) {
}
FeatureChebyshev::~FeatureChebyshev() {
}
// Render a visualization of the features to a CharSamp.
// This is mainly used by visual-debuggers
CharSamp *FeatureChebyshev::ComputeFeatureBitmap(CharSamp *char_samp) {
return char_samp;
}
// Compute Chebyshev coefficients for the specified vector
void FeatureChebyshev::ChebyshevCoefficients(const vector<float> &input,
int coeff_cnt, float *coeff) {
// re-sample function
int input_range = (input.size() - 1);
vector<float> resamp(coeff_cnt);
for (int samp_idx = 0; samp_idx < coeff_cnt; samp_idx++) {
// compute sampling position
float samp_pos = input_range *
(1 + cos(M_PI * (samp_idx + 0.5) / coeff_cnt)) / 2;
// interpolate
int samp_start = static_cast<int>(samp_pos);
int samp_end = static_cast<int>(samp_pos + 0.5);
float func_delta = input[samp_end] - input[samp_start];
resamp[samp_idx] = input[samp_start] +
((samp_pos - samp_start) * func_delta);
}
// compute the coefficients
float normalizer = 2.0 / coeff_cnt;
for (int coeff_idx = 0; coeff_idx < coeff_cnt; coeff_idx++, coeff++) {
double sum = 0.0;
for (int samp_idx = 0; samp_idx < coeff_cnt; samp_idx++) {
sum += resamp[samp_idx] * cos(M_PI * coeff_idx * (samp_idx + 0.5) /
coeff_cnt);
}
(*coeff) = (normalizer * sum);
}
}
// Compute the features of a given CharSamp
bool FeatureChebyshev::ComputeFeatures(CharSamp *char_samp, float *features) {
return ComputeChebyshevCoefficients(char_samp, features);
}
// Compute the Chebyshev coefficients of a given CharSamp
bool FeatureChebyshev::ComputeChebyshevCoefficients(CharSamp *char_samp,
float *features) {
if (char_samp->NormBottom() <= 0) {
return false;
}
unsigned char *raw_data = char_samp->RawData();
int stride = char_samp->Stride();
// compute the height of the word
int word_hgt = (255 * (char_samp->Top() + char_samp->Height()) /
char_samp->NormBottom());
// compute left & right profiles
vector<float> left_profile(word_hgt, 0.0);
vector<float> right_profile(word_hgt, 0.0);
unsigned char *line_data = raw_data;
for (int y = 0; y < char_samp->Height(); y++, line_data += stride) {
int min_x = char_samp->Width();
int max_x = -1;
for (int x = 0; x < char_samp->Width(); x++) {
if (line_data[x] == 0) {
UpdateRange(x, &min_x, &max_x);
}
}
left_profile[char_samp->Top() + y] =
1.0 * (min_x == char_samp->Width() ? 0 : (min_x + 1)) /
char_samp->Width();
right_profile[char_samp->Top() + y] =
1.0 * (max_x == -1 ? 0 : char_samp->Width() - max_x) /
char_samp->Width();
}
// compute top and bottom profiles
vector<float> top_profile(char_samp->Width(), 0);
vector<float> bottom_profile(char_samp->Width(), 0);
for (int x = 0; x < char_samp->Width(); x++) {
int min_y = word_hgt;
int max_y = -1;
line_data = raw_data;
for (int y = 0; y < char_samp->Height(); y++, line_data += stride) {
if (line_data[x] == 0) {
UpdateRange(y + char_samp->Top(), &min_y, &max_y);
}
}
top_profile[x] = 1.0 * (min_y == word_hgt ? 0 : (min_y + 1)) / word_hgt;
bottom_profile[x] = 1.0 * (max_y == -1 ? 0 : (word_hgt - max_y)) / word_hgt;
}
// compute the chebyshev coefficients of each profile
ChebyshevCoefficients(left_profile, kChebychevCoefficientCnt, features);
ChebyshevCoefficients(top_profile, kChebychevCoefficientCnt,
features + kChebychevCoefficientCnt);
ChebyshevCoefficients(right_profile, kChebychevCoefficientCnt,
features + (2 * kChebychevCoefficientCnt));
ChebyshevCoefficients(bottom_profile, kChebychevCoefficientCnt,
features + (3 * kChebychevCoefficientCnt));
return true;
}
} // namespace tesseract

View File

@ -1,57 +0,0 @@
/**********************************************************************
* File: feature_chebyshev.h
* Description: Declaration of the Chebyshev coefficients Feature Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The FeatureChebyshev class implements a Bitmap feature extractor class. It
// inherits from the FeatureBase class
// The feature vector is the composed of the chebyshev coefficients of 4 time
// sequences. The time sequences are the left, top, right & bottom
// bitmap profiles of the input samples
#ifndef FEATURE_CHEBYSHEV_H
#define FEATURE_CHEBYSHEV_H
#include "char_samp.h"
#include "feature_base.h"
namespace tesseract {
class FeatureChebyshev : public FeatureBase {
public:
explicit FeatureChebyshev(TuningParams *params);
virtual ~FeatureChebyshev();
// Render a visualization of the features to a CharSamp.
// This is mainly used by visual-debuggers
virtual CharSamp *ComputeFeatureBitmap(CharSamp *samp);
// Compute the features for a given CharSamp
virtual bool ComputeFeatures(CharSamp *samp, float *features);
// Returns the count of features
virtual int FeatureCnt() {
return (4 * kChebychevCoefficientCnt);
}
protected:
static const int kChebychevCoefficientCnt = 40;
// Compute Chebychev coefficients for the specified vector
void ChebyshevCoefficients(const vector<float> &input,
int coeff_cnt, float *coeff);
// Compute the features for a given CharSamp
bool ComputeChebyshevCoefficients(CharSamp *samp, float *features);
};
}
#endif // FEATURE_CHEBYSHEV_H

View File

@ -1,64 +0,0 @@
/**********************************************************************
* File: feature_chebyshev.cpp
* Description: Implementation of the Chebyshev coefficients Feature Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string>
#include <vector>
#include <algorithm>
#include "feature_base.h"
#include "feature_hybrid.h"
#include "cube_utils.h"
#include "const.h"
#include "char_samp.h"
namespace tesseract {
FeatureHybrid::FeatureHybrid(TuningParams *params)
:FeatureBase(params) {
feature_bmp_ = new FeatureBmp(params);
feature_chebyshev_ = new FeatureChebyshev(params);
}
FeatureHybrid::~FeatureHybrid() {
delete feature_bmp_;
delete feature_chebyshev_;
}
// Render a visualization of the features to a CharSamp.
// This is mainly used by visual-debuggers
CharSamp *FeatureHybrid::ComputeFeatureBitmap(CharSamp *char_samp) {
return char_samp;
}
// Compute the features of a given CharSamp
bool FeatureHybrid::ComputeFeatures(CharSamp *char_samp, float *features) {
if (feature_bmp_ == NULL || feature_chebyshev_ == NULL) {
return false;
}
if (!feature_bmp_->ComputeFeatures(char_samp, features)) {
return false;
}
return feature_chebyshev_->ComputeFeatures(char_samp,
features + feature_bmp_->FeatureCnt());
}
} // namespace tesseract

View File

@ -1,56 +0,0 @@
/**********************************************************************
* File: feature_chebyshev.h
* Description: Declaration of the Chebyshev coefficients Feature Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The FeatureHybrid class implements a Bitmap feature extractor class. It
// inherits from the FeatureBase class
// This class describes the a hybrid feature vector composed by combining
// the bitmap and the chebyshev feature vectors
#ifndef FEATURE_HYBRID_H
#define FEATURE_HYBRID_H
#include "char_samp.h"
#include "feature_bmp.h"
#include "feature_chebyshev.h"
namespace tesseract {
class FeatureHybrid : public FeatureBase {
public:
explicit FeatureHybrid(TuningParams *params);
virtual ~FeatureHybrid();
// Render a visualization of the features to a CharSamp.
// This is mainly used by visual-debuggers
virtual CharSamp *ComputeFeatureBitmap(CharSamp *samp);
// Compute the features for a given CharSamp
virtual bool ComputeFeatures(CharSamp *samp, float *features);
// Returns the count of features
virtual int FeatureCnt() {
if (feature_bmp_ == NULL || feature_chebyshev_ == NULL) {
return 0;
}
return feature_bmp_->FeatureCnt() + feature_chebyshev_->FeatureCnt();
}
protected:
FeatureBmp *feature_bmp_;
FeatureChebyshev *feature_chebyshev_;
};
}
#endif // FEATURE_HYBRID_H

View File

@ -1,346 +0,0 @@
/**********************************************************************
* File: charclassifier.cpp
* Description: Implementation of Convolutional-NeuralNet Character Classifier
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <algorithm>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <vector>
#include <wctype.h>
#include "classifier_base.h"
#include "char_set.h"
#include "const.h"
#include "conv_net_classifier.h"
#include "cube_utils.h"
#include "feature_base.h"
#include "feature_bmp.h"
#include "hybrid_neural_net_classifier.h"
#include "tess_lang_model.h"
namespace tesseract {
HybridNeuralNetCharClassifier::HybridNeuralNetCharClassifier(
CharSet *char_set,
TuningParams *params,
FeatureBase *feat_extract)
: CharClassifier(char_set, params, feat_extract) {
net_input_ = NULL;
net_output_ = NULL;
}
HybridNeuralNetCharClassifier::~HybridNeuralNetCharClassifier() {
for (int net_idx = 0; net_idx < nets_.size(); net_idx++) {
if (nets_[net_idx] != NULL) {
delete nets_[net_idx];
}
}
nets_.clear();
if (net_input_ != NULL) {
delete []net_input_;
net_input_ = NULL;
}
if (net_output_ != NULL) {
delete []net_output_;
net_output_ = NULL;
}
}
// The main training function. Given a sample and a class ID the classifier
// updates its parameters according to its learning algorithm. This function
// is currently not implemented. TODO(ahmadab): implement end-2-end training
bool HybridNeuralNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
return false;
}
// A secondary function needed for training. Allows the trainer to set the
// value of any train-time parameter. This function is currently not
// implemented. TODO(ahmadab): implement end-2-end training
bool HybridNeuralNetCharClassifier::SetLearnParam(char *var_name, float val) {
// TODO(ahmadab): implementation of parameter initializing.
return false;
}
// Folds the output of the NeuralNet using the loaded folding sets
void HybridNeuralNetCharClassifier::Fold() {
// in case insensitive mode
if (case_sensitive_ == false) {
int class_cnt = char_set_->ClassCount();
// fold case
for (int class_id = 0; class_id < class_cnt; class_id++) {
// get class string
const char_32 *str32 = char_set_->ClassString(class_id);
// get the upper case form of the string
string_32 upper_form32 = str32;
for (int ch = 0; ch < upper_form32.length(); ch++) {
if (iswalpha(static_cast<int>(upper_form32[ch])) != 0) {
upper_form32[ch] = towupper(upper_form32[ch]);
}
}
// find out the upperform class-id if any
int upper_class_id =
char_set_->ClassID(reinterpret_cast<const char_32 *>(
upper_form32.c_str()));
if (upper_class_id != -1 && class_id != upper_class_id) {
float max_out = MAX(net_output_[class_id], net_output_[upper_class_id]);
net_output_[class_id] = max_out;
net_output_[upper_class_id] = max_out;
}
}
}
// The folding sets specify how groups of classes should be folded
// Folding involved assigning a min-activation to all the members
// of the folding set. The min-activation is a fraction of the max-activation
// of the members of the folding set
for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
float max_prob = net_output_[fold_sets_[fold_set][0]];
for (int ch = 1; ch < fold_set_len_[fold_set]; ch++) {
if (net_output_[fold_sets_[fold_set][ch]] > max_prob) {
max_prob = net_output_[fold_sets_[fold_set][ch]];
}
}
for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
net_output_[fold_sets_[fold_set][ch]] = MAX(max_prob * kFoldingRatio,
net_output_[fold_sets_[fold_set][ch]]);
}
}
}
// compute the features of specified charsamp and
// feedforward the specified nets
bool HybridNeuralNetCharClassifier::RunNets(CharSamp *char_samp) {
int feat_cnt = feat_extract_->FeatureCnt();
int class_cnt = char_set_->ClassCount();
// allocate i/p and o/p buffers if needed
if (net_input_ == NULL) {
net_input_ = new float[feat_cnt];
net_output_ = new float[class_cnt];
}
// compute input features
if (feat_extract_->ComputeFeatures(char_samp, net_input_) == false) {
return false;
}
// go through all the nets
memset(net_output_, 0, class_cnt * sizeof(*net_output_));
float *inputs = net_input_;
for (int net_idx = 0; net_idx < nets_.size(); net_idx++) {
// run each net
vector<float> net_out(class_cnt, 0.0);
if (!nets_[net_idx]->FeedForward(inputs, &net_out[0])) {
return false;
}
// add the output values
for (int class_idx = 0; class_idx < class_cnt; class_idx++) {
net_output_[class_idx] += (net_out[class_idx] * net_wgts_[net_idx]);
}
// increment inputs pointer
inputs += nets_[net_idx]->in_cnt();
}
Fold();
return true;
}
// return the cost of being a char
int HybridNeuralNetCharClassifier::CharCost(CharSamp *char_samp) {
// it is by design that a character cost is equal to zero
// when no nets are present. This is the case during training.
if (RunNets(char_samp) == false) {
return 0;
}
return CubeUtils::Prob2Cost(1.0f - net_output_[0]);
}
// classifies a charsamp and returns an alternate list
// of chars sorted by char costs
CharAltList *HybridNeuralNetCharClassifier::Classify(CharSamp *char_samp) {
// run the needed nets
if (RunNets(char_samp) == false) {
return NULL;
}
int class_cnt = char_set_->ClassCount();
// create an altlist
CharAltList *alt_list = new CharAltList(char_set_, class_cnt);
for (int out = 1; out < class_cnt; out++) {
int cost = CubeUtils::Prob2Cost(net_output_[out]);
alt_list->Insert(out, cost);
}
return alt_list;
}
// set an external net (for training purposes)
void HybridNeuralNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) {
}
// Load folding sets
// This function returns true on success or if the file can't be read,
// returns false if an error is encountered.
bool HybridNeuralNetCharClassifier::LoadFoldingSets(
const string &data_file_path, const string &lang, LangModel *lang_mod) {
fold_set_cnt_ = 0;
string fold_file_name;
fold_file_name = data_file_path + lang;
fold_file_name += ".cube.fold";
// folding sets are optional
FILE *fp = fopen(fold_file_name.c_str(), "rb");
if (fp == NULL) {
return true;
}
fclose(fp);
string fold_sets_str;
if (!CubeUtils::ReadFileToString(fold_file_name,
&fold_sets_str)) {
return false;
}
// split into lines
vector<string> str_vec;
CubeUtils::SplitStringUsing(fold_sets_str, "\r\n", &str_vec);
fold_set_cnt_ = str_vec.size();
fold_sets_ = new int *[fold_set_cnt_];
fold_set_len_ = new int[fold_set_cnt_];
for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
reinterpret_cast<TessLangModel *>(lang_mod)->RemoveInvalidCharacters(
&str_vec[fold_set]);
// if all or all but one character are invalid, invalidate this set
if (str_vec[fold_set].length() <= 1) {
fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::LoadFoldingSets): "
"invalidating folding set %d\n", fold_set);
fold_set_len_[fold_set] = 0;
fold_sets_[fold_set] = NULL;
continue;
}
string_32 str32;
CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32);
fold_set_len_[fold_set] = str32.length();
fold_sets_[fold_set] = new int[fold_set_len_[fold_set]];
for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]);
}
}
return true;
}
// Init the classifier provided a data-path and a language string
bool HybridNeuralNetCharClassifier::Init(const string &data_file_path,
const string &lang,
LangModel *lang_mod) {
if (init_ == true) {
return true;
}
// load the nets if any. This function will return true if the net file
// does not exist. But will fail if the net did not pass the sanity checks
if (!LoadNets(data_file_path, lang)) {
return false;
}
// load the folding sets if any. This function will return true if the
// file does not exist. But will fail if the it did not pass the sanity checks
if (!LoadFoldingSets(data_file_path, lang, lang_mod)) {
return false;
}
init_ = true;
return true;
}
// Load the classifier's Neural Nets
// This function will return true if the net file does not exist.
// But will fail if the net did not pass the sanity checks
bool HybridNeuralNetCharClassifier::LoadNets(const string &data_file_path,
const string &lang) {
string hybrid_net_file;
string junk_net_file;
// add the lang identifier
hybrid_net_file = data_file_path + lang;
hybrid_net_file += ".cube.hybrid";
// neural network is optional
FILE *fp = fopen(hybrid_net_file.c_str(), "rb");
if (fp == NULL) {
return true;
}
fclose(fp);
string str;
if (!CubeUtils::ReadFileToString(hybrid_net_file, &str)) {
return false;
}
// split into lines
vector<string> str_vec;
CubeUtils::SplitStringUsing(str, "\r\n", &str_vec);
if (str_vec.empty()) {
return false;
}
// create and add the nets
nets_.resize(str_vec.size(), NULL);
net_wgts_.resize(str_vec.size(), 0);
int total_input_size = 0;
for (int net_idx = 0; net_idx < str_vec.size(); net_idx++) {
// parse the string
vector<string> tokens_vec;
CubeUtils::SplitStringUsing(str_vec[net_idx], " \t", &tokens_vec);
// has to be 2 tokens, net name and input size
if (tokens_vec.size() != 2) {
return false;
}
// load the net
string net_file_name = data_file_path + tokens_vec[0];
nets_[net_idx] = tesseract::NeuralNet::FromFile(net_file_name);
if (nets_[net_idx] == NULL) {
return false;
}
// parse the input size and validate it
net_wgts_[net_idx] = atof(tokens_vec[1].c_str());
if (net_wgts_[net_idx] < 0.0) {
return false;
}
total_input_size += nets_[net_idx]->in_cnt();
}
// validate total input count
if (total_input_size != feat_extract_->FeatureCnt()) {
return false;
}
// success
return true;
}
} // tesseract

View File

@ -1,90 +0,0 @@
/**********************************************************************
* File: conv_net_classifier.h
* Description: Declaration of Convolutional-NeuralNet Character Classifier
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef HYBRID_NEURAL_NET_CLASSIFIER_H
#define HYBRID_NEURAL_NET_CLASSIFIER_H
#include <string>
#include <vector>
#include "char_samp.h"
#include "char_altlist.h"
#include "char_set.h"
#include "classifier_base.h"
#include "feature_base.h"
#include "lang_model.h"
#include "neural_net.h"
#include "tuning_params.h"
namespace tesseract {
// Folding Ratio is the ratio of the max-activation of members of a folding
// set that is used to compute the min-activation of the rest of the set
// static const float kFoldingRatio = 0.75; // see conv_net_classifier.h
class HybridNeuralNetCharClassifier : public CharClassifier {
public:
HybridNeuralNetCharClassifier(CharSet *char_set, TuningParams *params,
FeatureBase *feat_extract);
virtual ~HybridNeuralNetCharClassifier();
// The main training function. Given a sample and a class ID the classifier
// updates its parameters according to its learning algorithm. This function
// is currently not implemented. TODO(ahmadab): implement end-2-end training
virtual bool Train(CharSamp *char_samp, int ClassID);
// A secondary function needed for training. Allows the trainer to set the
// value of any train-time parameter. This function is currently not
// implemented. TODO(ahmadab): implement end-2-end training
virtual bool SetLearnParam(char *var_name, float val);
// Externally sets the Neural Net used by the classifier. Used for training
void SetNet(tesseract::NeuralNet *net);
// Classifies an input charsamp and return a CharAltList object containing
// the possible candidates and corresponding scores
virtual CharAltList *Classify(CharSamp *char_samp);
// Computes the cost of a specific charsamp being a character (versus a
// non-character: part-of-a-character OR more-than-one-character)
virtual int CharCost(CharSamp *char_samp);
private:
// Neural Net object used for classification
vector<tesseract::NeuralNet *> nets_;
vector<float> net_wgts_;
// data buffers used to hold Neural Net inputs and outputs
float *net_input_;
float *net_output_;
// Init the classifier provided a data-path and a language string
virtual bool Init(const string &data_file_path, const string &lang,
LangModel *lang_mod);
// Loads the NeuralNets needed for the classifier
bool LoadNets(const string &data_file_path, const string &lang);
// Load folding sets
// This function returns true on success or if the file can't be read,
// returns false if an error is encountered.
virtual bool LoadFoldingSets(const string &data_file_path,
const string &lang,
LangModel *lang_mod);
// Folds the output of the NeuralNet using the loaded folding sets
virtual void Fold();
// Scales the input char_samp and feeds it to the NeuralNet as input
bool RunNets(CharSamp *char_samp);
};
}
#endif // HYBRID_NEURAL_NET_CLASSIFIER_H

View File

@ -1,73 +0,0 @@
/**********************************************************************
* File: lang_mod_edge.h
* Description: Declaration of the Language Model Edge Base Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The LangModEdge abstracts an Edge in the language model trie
// This is an abstract class that any Language Model Edge should inherit from
// It provides methods for:
// 1- Returns the class ID corresponding to the edge
// 2- If the edge is a valid EndOfWord (EOW)
// 3- If the edge is coming from a OutOfDictionary (OOF) state machine
// 4- If the edge is a Terminal (has no children)
// 5- A Hash of the edge that will be used to retrieve the edge
// quickly from the BeamSearch lattice
// 6- If two edges are identcial
// 7- Returns a verbal description of the edge (use by debuggers)
// 8- the language model cost of the edge (if any)
// 9- The string corresponding to this edge
// 10- Getting and setting the "Root" status of the edge
#ifndef LANG_MOD_EDGE_H
#define LANG_MOD_EDGE_H
#include "cube_tuning_params.h"
#include "char_set.h"
namespace tesseract {
class LangModEdge {
public:
LangModEdge() {}
virtual ~LangModEdge() {}
// The string corresponding to this edge
virtual const char_32 * EdgeString() const = 0;
// Returns the class ID corresponding to the edge
virtual int ClassID() const = 0;
// If the edge is the root edge
virtual bool IsRoot() const = 0;
// Set the Root flag
virtual void SetRoot(bool flag) = 0;
// If the edge is a valid EndOfWord (EOW)
virtual bool IsEOW() const = 0;
// is the edge is coming from a OutOfDictionary (OOF) state machine
virtual bool IsOOD() const = 0;
// Is the edge is a Terminal (has no children)
virtual bool IsTerminal() const = 0;
// Returns A hash of the edge that will be used to retrieve the edge
virtual unsigned int Hash() const = 0;
// Are the two edges identcial?
virtual bool IsIdentical(LangModEdge *edge) const = 0;
// a verbal description of the edge (use by debuggers)
virtual char *Description() const = 0;
// the language model cost of the edge (if any)
virtual int PathCost() const = 0;
};
}
#endif // LANG_MOD_EDGE_H

View File

@ -1,78 +0,0 @@
/**********************************************************************
* File: lang_model.h
* Description: Declaration of the Language Model Edge Base Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The LanguageModel class abstracts a State machine that is modeled as a Trie
// structure. The state machine models the language being recognized by the OCR
// Engine
// This is an abstract class that is to be inherited by any language model
#ifndef LANG_MODEL_H
#define LANG_MODEL_H
#include "lang_mod_edge.h"
#include "char_altlist.h"
#include "char_set.h"
#include "tuning_params.h"
namespace tesseract {
class LangModel {
public:
LangModel() {
ood_enabled_ = true;
numeric_enabled_ = true;
word_list_enabled_ = true;
punc_enabled_ = true;
}
virtual ~LangModel() {}
// Returns an edge pointer to the Root
virtual LangModEdge *Root() = 0;
// Returns the edges that fan-out of the specified edge and their count
virtual LangModEdge **GetEdges(CharAltList *alt_list,
LangModEdge *parent_edge,
int *edge_cnt) = 0;
// Returns is a sequence of 32-bit characters are valid within this language
// model or net. And EndOfWord flag is specified. If true, the sequence has
// to end on a valid word. The function also optionally returns the list
// of language model edges traversed to parse the string
virtual bool IsValidSequence(const char_32 *str, bool eow_flag,
LangModEdge **edge_array = NULL) = 0;
virtual bool IsLeadingPunc(char_32 ch) = 0;
virtual bool IsTrailingPunc(char_32 ch) = 0;
virtual bool IsDigit(char_32 ch) = 0;
// accessor functions
inline bool OOD() { return ood_enabled_; }
inline bool Numeric() { return numeric_enabled_; }
inline bool WordList() { return word_list_enabled_; }
inline bool Punc() { return punc_enabled_; }
inline void SetOOD(bool ood) { ood_enabled_ = ood; }
inline void SetNumeric(bool numeric) { numeric_enabled_ = numeric; }
inline void SetWordList(bool word_list) { word_list_enabled_ = word_list; }
inline void SetPunc(bool punc_enabled) { punc_enabled_ = punc_enabled; }
protected:
bool ood_enabled_;
bool numeric_enabled_;
bool word_list_enabled_;
bool punc_enabled_;
};
}
#endif // LANG_MODEL_H

View File

@ -1,217 +0,0 @@
/**********************************************************************
* File: search_column.cpp
* Description: Implementation of the Beam Search Column Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "search_column.h"
#include <stdlib.h>
namespace tesseract {
SearchColumn::SearchColumn(int col_idx, int max_node) {
col_idx_ = col_idx;
node_cnt_ = 0;
node_array_ = NULL;
max_node_cnt_ = max_node;
node_hash_table_ = NULL;
init_ = false;
min_cost_ = INT_MAX;
max_cost_ = 0;
}
// Cleanup data
void SearchColumn::Cleanup() {
if (node_array_ != NULL) {
for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
if (node_array_[node_idx] != NULL) {
delete node_array_[node_idx];
}
}
delete []node_array_;
node_array_ = NULL;
}
FreeHashTable();
init_ = false;
}
SearchColumn::~SearchColumn() {
Cleanup();
}
// Initializations
bool SearchColumn::Init() {
if (init_ == true) {
return true;
}
// create hash table
if (node_hash_table_ == NULL) {
node_hash_table_ = new SearchNodeHashTable();
}
init_ = true;
return true;
}
// Prune the nodes if necessary. Pruning is done such that a max
// number of nodes is kept, i.e., the beam width
void SearchColumn::Prune() {
// no need to prune
if (node_cnt_ <= max_node_cnt_) {
return;
}
// compute the cost histogram
memset(score_bins_, 0, sizeof(score_bins_));
int cost_range = max_cost_ - min_cost_ + 1;
for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
int cost_bin = static_cast<int>(
((node_array_[node_idx]->BestCost() - min_cost_) *
kScoreBins) / static_cast<double>(cost_range));
if (cost_bin >= kScoreBins) {
cost_bin = kScoreBins - 1;
}
score_bins_[cost_bin]++;
}
// determine the pruning cost by scanning the cost histogram from
// least to greatest cost bins and finding the cost at which the
// max number of nodes is exceeded
int pruning_cost = 0;
int new_node_cnt = 0;
for (int cost_bin = 0; cost_bin < kScoreBins; cost_bin++) {
if (new_node_cnt > 0 &&
(new_node_cnt + score_bins_[cost_bin]) > max_node_cnt_) {
pruning_cost = min_cost_ + ((cost_bin * cost_range) / kScoreBins);
break;
}
new_node_cnt += score_bins_[cost_bin];
}
// prune out all the nodes above this cost
for (int node_idx = new_node_cnt = 0; node_idx < node_cnt_; node_idx++) {
// prune this node out
if (node_array_[node_idx]->BestCost() > pruning_cost ||
new_node_cnt > max_node_cnt_) {
delete node_array_[node_idx];
} else {
// keep it
node_array_[new_node_cnt++] = node_array_[node_idx];
}
}
node_cnt_ = new_node_cnt;
}
// sort all nodes
void SearchColumn::Sort() {
if (node_cnt_ > 0 && node_array_ != NULL) {
qsort(node_array_, node_cnt_, sizeof(*node_array_),
SearchNode::SearchNodeComparer);
}
}
// add a new node
SearchNode *SearchColumn::AddNode(LangModEdge *edge, int reco_cost,
SearchNode *parent_node,
CubeRecoContext *cntxt) {
// init if necessary
if (init_ == false && Init() == false) {
return NULL;
}
// find out if we have an node with the same edge
// look in the hash table
SearchNode *new_node = node_hash_table_->Lookup(edge, parent_node);
// node does not exist
if (new_node == NULL) {
new_node = new SearchNode(cntxt, parent_node, reco_cost, edge, col_idx_);
// if the max node count has already been reached, check if the cost of
// the new node exceeds the max cost. This indicates that it will be pruned
// and so there is no point adding it
if (node_cnt_ >= max_node_cnt_ && new_node->BestCost() > max_cost_) {
delete new_node;
return NULL;
}
// expand the node buffer if necc
if ((node_cnt_ % kNodeAllocChunk) == 0) {
// alloc a new buff
SearchNode **new_node_buff =
new SearchNode *[node_cnt_ + kNodeAllocChunk];
// free existing after copying contents
if (node_array_ != NULL) {
memcpy(new_node_buff, node_array_, node_cnt_ * sizeof(*new_node_buff));
delete []node_array_;
}
node_array_ = new_node_buff;
}
// add the node to the hash table only if it is non-OOD edge
// because the langmod state is not unique
if (edge->IsOOD() == false) {
if (!node_hash_table_->Insert(edge, new_node)) {
tprintf("Hash table full!!!");
delete new_node;
return NULL;
}
}
node_array_[node_cnt_++] = new_node;
} else {
// node exists before
// if no update occurred, return NULL
if (new_node->UpdateParent(parent_node, reco_cost, edge) == false) {
new_node = NULL;
}
// free the edge
delete edge;
}
// update Min and Max Costs
if (new_node != NULL) {
if (min_cost_ > new_node->BestCost()) {
min_cost_ = new_node->BestCost();
}
if (max_cost_ < new_node->BestCost()) {
max_cost_ = new_node->BestCost();
}
}
return new_node;
}
SearchNode *SearchColumn::BestNode() {
SearchNode *best_node = NULL;
for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
if (best_node == NULL ||
best_node->BestCost() > node_array_[node_idx]->BestCost()) {
best_node = node_array_[node_idx];
}
}
return best_node;
}
} // namespace tesseract

View File

@ -1,84 +0,0 @@
/**********************************************************************
* File: search_column.h
* Description: Declaration of the Beam Search Column Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The SearchColumn class abstracts a column in the lattice that is created
// by the BeamSearch during the recognition process
// The class holds the lattice nodes. New nodes are added by calls to AddNode
// made from the BeamSearch
// The class maintains a hash table of the nodes to be able to lookup nodes
// quickly using their lang_mod_edge. This is needed to merge similar paths
// in the lattice
#ifndef SEARCH_COLUMN_H
#define SEARCH_COLUMN_H
#include "search_node.h"
#include "lang_mod_edge.h"
#include "cube_reco_context.h"
namespace tesseract {
class SearchColumn {
public:
SearchColumn(int col_idx, int max_node_cnt);
~SearchColumn();
// Accessor functions
inline int ColIdx() const { return col_idx_; }
inline int NodeCount() const { return node_cnt_; }
inline SearchNode **Nodes() const { return node_array_; }
// Prune the nodes if necessary. Pruning is done such that a max
// number of nodes is kept, i.e., the beam width
void Prune();
SearchNode *AddNode(LangModEdge *edge, int score,
SearchNode *parent, CubeRecoContext *cntxt);
// Returns the node with the least cost
SearchNode *BestNode();
// Sort the lattice nodes. Needed for visualization
void Sort();
// Free up the Hash Table. Added to be called by the Beam Search after
// a column is pruned to reduce memory foot print
void FreeHashTable() {
if (node_hash_table_ != NULL) {
delete node_hash_table_;
node_hash_table_ = NULL;
}
}
private:
static const int kNodeAllocChunk = 1024;
static const int kScoreBins = 1024;
bool init_;
int min_cost_;
int max_cost_;
int max_node_cnt_;
int node_cnt_;
int col_idx_;
int score_bins_[kScoreBins];
SearchNode **node_array_;
SearchNodeHashTable *node_hash_table_;
// Free node array and hash table
void Cleanup();
// Create hash table
bool Init();
};
}
#endif // SEARCH_COLUMN_H

View File

@ -1,229 +0,0 @@
/**********************************************************************
* File: search_node.cpp
* Description: Implementation of the Beam Search Node Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "search_node.h"
namespace tesseract {
// The constructor updates the best paths and costs:
// mean_char_reco_cost_ (returned by BestRecoCost()) is the mean
// char_reco cost of the best_path, including this node.
// best_path_reco_cost is the total char_reco_cost of the best_path,
// but excludes the char_reco_cost of this node.
// best_cost is the mean mixed cost, i.e., mean_char_reco_cost_ +
// current language model cost, all weighted by the cube context's
// RecoWgt parameter
SearchNode::SearchNode(CubeRecoContext *cntxt, SearchNode *parent_node,
int char_reco_cost, LangModEdge *edge, int col_idx) {
// copy data members
cntxt_ = cntxt;
lang_mod_edge_ = edge;
col_idx_ = col_idx;
parent_node_ = parent_node;
char_reco_cost_ = char_reco_cost;
// the string of this node is the same as that of the language model edge
str_ = (edge == NULL ? NULL : edge->EdgeString());
// compute best path total reco cost
best_path_reco_cost_ = (parent_node_ == NULL) ? 0 :
parent_node_->CharRecoCost() + parent_node_->BestPathRecoCost();
// update best path length
best_path_len_ = (parent_node_ == NULL) ?
1 : parent_node_->BestPathLength() + 1;
if (edge != NULL && edge->IsRoot() && parent_node_ != NULL) {
best_path_len_++;
}
// compute best reco cost mean cost
mean_char_reco_cost_ = static_cast<int>(
(best_path_reco_cost_ + char_reco_cost_) /
static_cast<double>(best_path_len_));
// get language model cost
int lm_cost = LangModCost(lang_mod_edge_, parent_node_);
// compute aggregate best cost
best_cost_ = static_cast<int>(cntxt_->Params()->RecoWgt() *
(best_path_reco_cost_ + char_reco_cost_) /
static_cast<double>(best_path_len_)
) + lm_cost;
}
SearchNode::~SearchNode() {
if (lang_mod_edge_ != NULL) {
delete lang_mod_edge_;
}
}
// update the parent_node node if provides a better (less) cost
bool SearchNode::UpdateParent(SearchNode *new_parent, int new_reco_cost,
LangModEdge *new_edge) {
if (lang_mod_edge_ == NULL) {
if (new_edge != NULL) {
return false;
}
} else {
// to update the parent_node, we have to have the same target
// state and char
if (new_edge == NULL || !lang_mod_edge_->IsIdentical(new_edge) ||
!SearchNode::IdenticalPath(parent_node_, new_parent)) {
return false;
}
}
// compute the path cost and combined cost of the new path
int new_best_path_reco_cost;
int new_cost;
int new_best_path_len;
new_best_path_reco_cost = (new_parent == NULL) ?
0 : new_parent->BestPathRecoCost() + new_parent->CharRecoCost();
new_best_path_len =
(new_parent == NULL) ? 1 : new_parent->BestPathLength() + 1;
// compute the new language model cost
int new_lm_cost = LangModCost(new_edge, new_parent);
new_cost = static_cast<int>(cntxt_->Params()->RecoWgt() *
(new_best_path_reco_cost + new_reco_cost) /
static_cast<double>(new_best_path_len)
) + new_lm_cost;
// update if it is better (less) than the current one
if (best_cost_ > new_cost) {
parent_node_ = new_parent;
char_reco_cost_ = new_reco_cost;
best_path_reco_cost_ = new_best_path_reco_cost;
best_path_len_ = new_best_path_len;
mean_char_reco_cost_ = static_cast<int>(
(best_path_reco_cost_ + char_reco_cost_) /
static_cast<double>(best_path_len_));
best_cost_ = static_cast<int>(cntxt_->Params()->RecoWgt() *
(best_path_reco_cost_ + char_reco_cost_) /
static_cast<double>(best_path_len_)
) + new_lm_cost;
return true;
}
return false;
}
char_32 *SearchNode::PathString() {
SearchNode *node = this;
// compute string length
int len = 0;
while (node != NULL) {
if (node->str_ != NULL) {
len += CubeUtils::StrLen(node->str_);
}
// if the edge is a root and does not have a NULL parent, account for space
LangModEdge *lm_edge = node->LangModelEdge();
if (lm_edge != NULL && lm_edge->IsRoot() && node->ParentNode() != NULL) {
len++;
}
node = node->parent_node_;
}
char_32 *char_ptr = new char_32[len + 1];
int ch_idx = len;
node = this;
char_ptr[ch_idx--] = 0;
while (node != NULL) {
int str_len = ((node->str_ == NULL) ? 0 : CubeUtils::StrLen(node->str_));
while (str_len > 0) {
char_ptr[ch_idx--] = node->str_[--str_len];
}
// if the edge is a root and does not have a NULL parent, insert a space
LangModEdge *lm_edge = node->LangModelEdge();
if (lm_edge != NULL && lm_edge->IsRoot() && node->ParentNode() != NULL) {
char_ptr[ch_idx--] = (char_32)' ';
}
node = node->parent_node_;
}
return char_ptr;
}
// compares the path of two nodes and checks if its identical
bool SearchNode::IdenticalPath(SearchNode *node1, SearchNode *node2) {
if (node1 != NULL && node2 != NULL &&
node1->best_path_len_ != node2->best_path_len_) {
return false;
}
// backtrack until either a root or a NULL edge is reached
while (node1 != NULL && node2 != NULL) {
if (node1->str_ != node2->str_) {
return false;
}
// stop if either nodes is a root
if (node1->LangModelEdge()->IsRoot() || node2->LangModelEdge()->IsRoot()) {
break;
}
node1 = node1->parent_node_;
node2 = node2->parent_node_;
}
return ((node1 == NULL && node2 == NULL) ||
(node1 != NULL && node1->LangModelEdge()->IsRoot() &&
node2 != NULL && node2->LangModelEdge()->IsRoot()));
}
// Computes the language model cost of a path
int SearchNode::LangModCost(LangModEdge *current_lm_edge,
SearchNode *parent_node) {
int lm_cost = 0;
int node_cnt = 0;
do {
// check if root
bool is_root = ((current_lm_edge != NULL && current_lm_edge->IsRoot()) ||
parent_node == NULL);
if (is_root) {
node_cnt++;
lm_cost += (current_lm_edge == NULL ? 0 : current_lm_edge->PathCost());
}
// continue until we hit a null parent
if (parent_node == NULL) {
break;
}
// get the previous language model edge
current_lm_edge = parent_node->LangModelEdge();
// back track
parent_node = parent_node->ParentNode();
} while (true);
return static_cast<int>(lm_cost / static_cast<double>(node_cnt));
}
} // namespace tesseract

View File

@ -1,168 +0,0 @@
/**********************************************************************
* File: search_node.h
* Description: Declaration of the Beam Search Node Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The SearchNode class abstracts the search lattice node in the lattice
// generated by the BeamSearch class
// The SearchNode class holds the lang_mod_edge associated with the lattice
// node. It also holds a pointer to the parent SearchNode in the search path
// In addition it holds the recognition and the language model costs of the
// node and the path leading to this node
#ifndef SEARCH_NODE_H
#define SEARCH_NODE_H
#include "lang_mod_edge.h"
#include "cube_reco_context.h"
namespace tesseract {
class SearchNode {
public:
SearchNode(CubeRecoContext *cntxt, SearchNode *parent_node,
int char_reco_cost, LangModEdge *edge, int col_idx);
~SearchNode();
// Updates the parent of the current node if the specified path yields
// a better path cost
bool UpdateParent(SearchNode *new_parent, int new_reco_cost,
LangModEdge *new_edge);
// returns the 32-bit string corresponding to the path leading to this node
char_32 *PathString();
// True if the two input nodes correspond to the same path
static bool IdenticalPath(SearchNode *node1, SearchNode *node2);
inline const char_32 *NodeString() { return str_; }
inline void SetString(char_32 *str) { str_ = str; }
// This node's character recognition cost.
inline int CharRecoCost() { return char_reco_cost_; }
// Total character recognition cost of the nodes in the best path,
// excluding this node.
inline int BestPathRecoCost() { return best_path_reco_cost_; }
// Number of nodes in best path.
inline int BestPathLength() { return best_path_len_; }
// Mean mixed cost, i.e., mean character recognition cost +
// current language model cost, all weighted by the RecoWgt parameter
inline int BestCost() { return best_cost_; }
// Mean character recognition cost of the nodes on the best path,
// including this node.
inline int BestRecoCost() { return mean_char_reco_cost_ ; }
inline int ColIdx() { return col_idx_; }
inline SearchNode *ParentNode() { return parent_node_; }
inline LangModEdge *LangModelEdge() { return lang_mod_edge_;}
inline int LangModCost() { return LangModCost(lang_mod_edge_, parent_node_); }
// A comparer function that allows the SearchColumn class to sort the
// nodes based on the path cost
inline static int SearchNodeComparer(const void *node1, const void *node2) {
return (*(reinterpret_cast<SearchNode * const *>(node1)))->best_cost_ -
(*(reinterpret_cast<SearchNode * const *>(node2)))->best_cost_;
}
private:
CubeRecoContext *cntxt_;
// Character code
const char_32 *str_;
// Recognition cost of most recent character
int char_reco_cost_;
// Mean mixed cost, i.e., mean character recognition cost +
// current language model cost, all weighted by the RecoWgt parameter
int best_cost_;
// Mean character recognition cost of the nodes on the best path,
// including this node.
int mean_char_reco_cost_ ;
// Total character recognition cost of the nodes in the best path,
// excluding this node.
int best_path_reco_cost_;
// Number of nodes in best path.
int best_path_len_;
// Column index
int col_idx_;
// Parent Node
SearchNode *parent_node_;
// Language model edge
LangModEdge *lang_mod_edge_;
static int LangModCost(LangModEdge *lang_mod_edge, SearchNode *parent_node);
};
// Implments a SearchNode hash table used to detect if a Search Node exists
// or not. This is needed to make sure that identical paths in the BeamSearch
// converge
class SearchNodeHashTable {
public:
SearchNodeHashTable() {
memset(bin_size_array_, 0, sizeof(bin_size_array_));
}
~SearchNodeHashTable() {
}
// inserts an entry in the hash table
inline bool Insert(LangModEdge *lang_mod_edge, SearchNode *srch_node) {
// compute hash based on the edge and its parent node edge
unsigned int edge_hash = lang_mod_edge->Hash();
unsigned int parent_hash = (srch_node->ParentNode() == NULL ?
0 : srch_node->ParentNode()->LangModelEdge()->Hash());
unsigned int hash_bin = (edge_hash + parent_hash) % kSearchNodeHashBins;
// already maxed out, just fail
if (bin_size_array_[hash_bin] >= kMaxSearchNodePerBin) {
return false;
}
bin_array_[hash_bin][bin_size_array_[hash_bin]++] = srch_node;
return true;
}
// Looks up an entry in the hash table
inline SearchNode *Lookup(LangModEdge *lang_mod_edge,
SearchNode *parent_node) {
// compute hash based on the edge and its parent node edge
unsigned int edge_hash = lang_mod_edge->Hash();
unsigned int parent_hash = (parent_node == NULL ?
0 : parent_node->LangModelEdge()->Hash());
unsigned int hash_bin = (edge_hash + parent_hash) % kSearchNodeHashBins;
// lookup the entries in the hash bin
for (int node_idx = 0; node_idx < bin_size_array_[hash_bin]; node_idx++) {
if (lang_mod_edge->IsIdentical(
bin_array_[hash_bin][node_idx]->LangModelEdge()) == true &&
SearchNode::IdenticalPath(
bin_array_[hash_bin][node_idx]->ParentNode(), parent_node) == true) {
return bin_array_[hash_bin][node_idx];
}
}
return NULL;
}
private:
// Hash bin size parameters. These were determined emperically. These affect
// the speed of the beam search but have no impact on accuracy
static const int kSearchNodeHashBins = 4096;
static const int kMaxSearchNodePerBin = 512;
int bin_size_array_[kSearchNodeHashBins];
SearchNode *bin_array_[kSearchNodeHashBins][kMaxSearchNodePerBin];
};
}
#endif // SEARCH_NODE_H

View File

@ -1,55 +0,0 @@
/**********************************************************************
* File: search_object.h
* Description: Declaration of the Beam Search Object Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The SearchObject class represents a char_samp (a word bitmap) that is
// being searched for characters (or recognizeable entities).
// This is an abstract class that all SearchObjects should inherit from
// A SearchObject class provides methods to:
// 1- Returns the count of segments
// 2- Recognize a segment range
// 3- Creates a CharSamp for a segment range
#ifndef SEARCH_OBJECT_H
#define SEARCH_OBJECT_H
#include "char_altlist.h"
#include "char_samp.h"
#include "cube_reco_context.h"
namespace tesseract {
class SearchObject {
public:
explicit SearchObject(CubeRecoContext *cntxt) { cntxt_ = cntxt; }
virtual ~SearchObject() {}
virtual int SegPtCnt() = 0;
virtual CharAltList *RecognizeSegment(int start_pt, int end_pt) = 0;
virtual CharSamp *CharSample(int start_pt, int end_pt) = 0;
virtual Box* CharBox(int start_pt, int end_pt) = 0;
virtual int SpaceCost(int seg_pt) = 0;
virtual int NoSpaceCost(int seg_pt) = 0;
virtual int NoSpaceCost(int start_pt, int end_pt) = 0;
protected:
CubeRecoContext *cntxt_;
};
}
#endif // SEARCH_OBJECT_H

View File

@ -1,44 +0,0 @@
/**********************************************************************
* File: string_32.h
* Description: Declaration of a 32 Bit string class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// the string_32 class provides the functionality needed
// for a 32-bit string class
#ifndef STRING_32_H
#define STRING_32_H
#include <string.h>
#include <string>
#include <algorithm>
#include <vector>
#ifdef USE_STD_NAMESPACE
using std::basic_string;
using std::string;
using std::vector;
#endif
namespace tesseract {
// basic definitions
typedef signed int char_32;
typedef basic_string<char_32> string_32;
}
#endif // STRING_32_H

View File

@ -1,120 +0,0 @@
/**********************************************************************
* File: tess_lang_mod_edge.cpp
* Description: Implementation of the Tesseract Language Model Edge Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "tess_lang_mod_edge.h"
#include "const.h"
#include "unichar.h"
namespace tesseract {
// OOD constructor
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, int class_id) {
root_ = false;
cntxt_ = cntxt;
dawg_ = NULL;
start_edge_ = 0;
end_edge_ = 0;
edge_mask_ = 0;
class_id_ = class_id;
str_ = cntxt_->CharacterSet()->ClassString(class_id);
path_cost_ = Cost();
}
/**
* leading, trailing punc constructor and single byte UTF char
*/
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt,
const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
root_ = false;
cntxt_ = cntxt;
dawg_ = dawg;
start_edge_ = edge_idx;
end_edge_ = edge_idx;
edge_mask_ = 0;
class_id_ = class_id;
str_ = cntxt_->CharacterSet()->ClassString(class_id);
path_cost_ = Cost();
}
/**
* dict constructor: multi byte UTF char
*/
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg,
EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
int class_id) {
root_ = false;
cntxt_ = cntxt;
dawg_ = dawg;
start_edge_ = start_edge_idx;
end_edge_ = end_edge_idx;
edge_mask_ = 0;
class_id_ = class_id;
str_ = cntxt_->CharacterSet()->ClassString(class_id);
path_cost_ = Cost();
}
char *TessLangModEdge::Description() const {
char *char_ptr = new char[256];
char dawg_str[256];
char edge_str[32];
if (dawg_ == (Dawg *)DAWG_OOD) {
strcpy(dawg_str, "OOD");
} else if (dawg_ == (Dawg *)DAWG_NUMBER) {
strcpy(dawg_str, "NUM");
} else if (dawg_->permuter() == SYSTEM_DAWG_PERM) {
strcpy(dawg_str, "Main");
} else if (dawg_->permuter() == USER_DAWG_PERM) {
strcpy(dawg_str, "User");
} else if (dawg_->permuter() == DOC_DAWG_PERM) {
strcpy(dawg_str, "Doc");
} else {
strcpy(dawg_str, "N/A");
}
sprintf(edge_str, "%d", static_cast<int>(start_edge_));
if (IsLeadingPuncEdge(edge_mask_)) {
strcat(edge_str, "-LP");
}
if (IsTrailingPuncEdge(edge_mask_)) {
strcat(edge_str, "-TP");
}
sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d",
dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_);
return char_ptr;
}
int TessLangModEdge::CreateChildren(CubeRecoContext *cntxt,
const Dawg *dawg,
NODE_REF parent_node,
LangModEdge **edge_array) {
int edge_cnt = 0;
NodeChildVector vec;
dawg->unichar_ids_of(parent_node, &vec, false); // find all children
for (int i = 0; i < vec.size(); ++i) {
const NodeChild &child = vec[i];
if (child.unichar_id == INVALID_UNICHAR_ID) continue;
edge_array[edge_cnt++] =
new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id);
}
return edge_cnt;
}
}

View File

@ -1,233 +0,0 @@
/**********************************************************************
* File: tess_lang_mod_edge.h
* Description: Declaration of the Tesseract Language Model Edge Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The TessLangModEdge models an edge in the Tesseract language models
// It inherits from the LangModEdge class
#ifndef TESS_LANG_MOD_EDGE_H
#define TESS_LANG_MOD_EDGE_H
#include "dawg.h"
#include "char_set.h"
#include "lang_mod_edge.h"
#include "cube_reco_context.h"
#include "cube_utils.h"
// Macros needed to identify punctuation in the langmodel state
#ifdef _HMSW32_H
#define LEAD_PUNC_EDGE_REF_MASK (inT64) 0x0000000100000000i64
#define TRAIL_PUNC_EDGE_REF_MASK (inT64) 0x0000000200000000i64
#define TRAIL_PUNC_REPEAT_MASK (inT64) 0xffff000000000000i64
#else
#define LEAD_PUNC_EDGE_REF_MASK (inT64) 0x0000000100000000ll
#define TRAIL_PUNC_EDGE_REF_MASK (inT64) 0x0000000200000000ll
#define TRAIL_PUNC_REPEAT_MASK (inT64) 0xffff000000000000ll
#endif
// Number state machine macros
#define NUMBER_STATE_SHIFT 0
#define NUMBER_STATE_MASK 0x0000000fl
#define NUMBER_LITERAL_SHIFT 4
#define NUMBER_LITERAL_MASK 0x000000f0l
#define NUMBER_REPEAT_SHIFT 8
#define NUMBER_REPEAT_MASK 0x00000f00l
#define NUM_TRM -99
#define TRAIL_PUNC_REPEAT_SHIFT 48
#define IsLeadingPuncEdge(edge_mask) \
((edge_mask & LEAD_PUNC_EDGE_REF_MASK) != 0)
#define IsTrailingPuncEdge(edge_mask) \
((edge_mask & TRAIL_PUNC_EDGE_REF_MASK) != 0)
#define TrailingPuncCount(edge_mask) \
((edge_mask & TRAIL_PUNC_REPEAT_MASK) >> TRAIL_PUNC_REPEAT_SHIFT)
#define TrailingPuncEdgeMask(Cnt) \
(TRAIL_PUNC_EDGE_REF_MASK | ((Cnt) << TRAIL_PUNC_REPEAT_SHIFT))
// State machine IDs
#define DAWG_OOD 0
#define DAWG_NUMBER 1
namespace tesseract {
class TessLangModEdge : public LangModEdge {
public:
// Different ways of constructing a TessLangModEdge
TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array,
EDGE_REF edge, int class_id);
TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array,
EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
int class_id);
TessLangModEdge(CubeRecoContext *cntxt, int class_id);
~TessLangModEdge() {}
// Accessors
inline bool IsRoot() const {
return root_;
}
inline void SetRoot(bool flag) { root_ = flag; }
inline bool IsOOD() const {
return (dawg_ == (Dawg *)DAWG_OOD);
}
inline bool IsNumber() const {
return (dawg_ == (Dawg *)DAWG_NUMBER);
}
inline bool IsEOW() const {
return (IsTerminal() || (dawg_->end_of_word(end_edge_) != 0));
}
inline const Dawg *GetDawg() const { return dawg_; }
inline EDGE_REF StartEdge() const { return start_edge_; }
inline EDGE_REF EndEdge() const { return end_edge_; }
inline EDGE_REF EdgeMask() const { return edge_mask_; }
inline const char_32 * EdgeString() const { return str_; }
inline int ClassID () const { return class_id_; }
inline int PathCost() const { return path_cost_; }
inline void SetEdgeMask(EDGE_REF edge_mask) { edge_mask_ = edge_mask; }
inline void SetDawg(Dawg *dawg) { dawg_ = dawg; }
inline void SetStartEdge(EDGE_REF edge_idx) { start_edge_ = edge_idx; }
inline void SetEndEdge(EDGE_REF edge_idx) { end_edge_ = edge_idx; }
// is this a terminal node:
// we can terminate at any OOD char, trailing punc or
// when the dawg terminates
inline bool IsTerminal() const {
return (IsOOD() || IsNumber() || IsTrailingPuncEdge(start_edge_) ||
dawg_->next_node(end_edge_) == 0);
}
// How many signals does the LM provide for tuning. These are flags like:
// OOD or not, Number of not that are used by the training to compute
// extra costs for each word.
inline int SignalCnt() const {
return 2;
}
// returns the weight assigned to a specified signal
inline double SignalWgt(int signal) const {
CubeTuningParams *params =
reinterpret_cast<CubeTuningParams *>(cntxt_->Params());
if (params != NULL) {
switch (signal) {
case 0:
return params->OODWgt();
break;
case 1:
return params->NumWgt();
break;
}
}
return 0.0;
}
// sets the weight assigned to a specified signal: Used in training
void SetSignalWgt(int signal, double wgt) {
CubeTuningParams *params =
reinterpret_cast<CubeTuningParams *>(cntxt_->Params());
if (params != NULL) {
switch (signal) {
case 0:
params->SetOODWgt(wgt);
break;
case 1:
params->SetNumWgt(wgt);
break;
}
}
}
// returns the actual value of a specified signal
int Signal(int signal) {
switch (signal) {
case 0:
return IsOOD() ? MIN_PROB_COST : 0;
break;
case 1:
return IsNumber() ? MIN_PROB_COST : 0;
break;
default:
return 0;
}
}
// returns the Hash value of the edge. Used by the SearchNode hash table
// to quickly lookup exisiting edges to converge during search
inline unsigned int Hash() const {
return static_cast<unsigned int>(
((start_edge_ | end_edge_) ^ ((reinterpret_cast<uintptr_t>(dawg_)))) ^
((unsigned int)edge_mask_) ^ class_id_);
}
// A verbal description of the edge: Used by visualizers
char *Description() const;
// Is this edge identical to the specified edge
inline bool IsIdentical(LangModEdge *lang_mod_edge) const {
return (class_id_ ==
reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->class_id_ &&
str_ == reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->str_ &&
dawg_ == reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->dawg_ &&
start_edge_ ==
reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->start_edge_ &&
end_edge_ ==
reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->end_edge_ &&
edge_mask_ ==
reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->edge_mask_);
}
// Creates a set of fan-out edges for the specified edge
static int CreateChildren(CubeRecoContext *cntxt,
const Dawg *edges,
NODE_REF edge_reg,
LangModEdge **lm_edges);
private:
bool root_;
CubeRecoContext *cntxt_;
const Dawg *dawg_;
EDGE_REF start_edge_;
EDGE_REF end_edge_;
EDGE_REF edge_mask_;
int path_cost_;
int class_id_;
const char_32 * str_;
// returns the cost of the lang_mod_edge
inline int Cost() const {
if (cntxt_ != NULL) {
CubeTuningParams *params =
reinterpret_cast<CubeTuningParams *>(cntxt_->Params());
if (dawg_ == (Dawg *)DAWG_OOD) {
return static_cast<int>(params->OODWgt() * MIN_PROB_COST);
} else if (dawg_ == (Dawg *)DAWG_NUMBER) {
return static_cast<int>(params->NumWgt() * MIN_PROB_COST);
}
}
return 0;
}
};
} // namespace tesseract
#endif // TESS_LANG_MOD_EDGE_H

View File

@ -1,506 +0,0 @@
/**********************************************************************
* File: tess_lang_model.cpp
* Description: Implementation of the Tesseract Language Model Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The TessLangModel class abstracts the Tesseract language model. It inherits
// from the LangModel class. The Tesseract language model encompasses several
// Dawgs (words from training data, punctuation, numbers, document words).
// On top of this Cube adds an OOD state machine
// The class provides methods to traverse the language model in a generative
// fashion. Given any node in the DAWG, the language model can generate a list
// of children (or fan-out) edges
#include <string>
#include <vector>
#include "char_samp.h"
#include "cube_utils.h"
#include "dict.h"
#include "tesseractclass.h"
#include "tess_lang_model.h"
#include "tessdatamanager.h"
#include "unicharset.h"
namespace tesseract {
// max fan-out (used for preallocation). Initialized here, but modified by
// constructor
int TessLangModel::max_edge_ = 4096;
// Language model extra State machines
const Dawg *TessLangModel::ood_dawg_ = reinterpret_cast<Dawg *>(DAWG_OOD);
const Dawg *TessLangModel::number_dawg_ = reinterpret_cast<Dawg *>(DAWG_NUMBER);
// number state machine
const int TessLangModel::num_state_machine_[kStateCnt][kNumLiteralCnt] = {
{0, 1, 1, NUM_TRM, NUM_TRM},
{NUM_TRM, 1, 1, 3, 2},
{NUM_TRM, NUM_TRM, 1, NUM_TRM, 2},
{NUM_TRM, NUM_TRM, 3, NUM_TRM, 2},
};
const int TessLangModel::num_max_repeat_[kStateCnt] = {3, 32, 8, 3};
// thresholds and penalties
int TessLangModel::max_ood_shape_cost_ = CubeUtils::Prob2Cost(1e-4);
TessLangModel::TessLangModel(const string &lm_params,
const string &data_file_path,
bool load_system_dawg,
TessdataManager *tessdata_manager,
CubeRecoContext *cntxt) {
cntxt_ = cntxt;
has_case_ = cntxt_->HasCase();
// Load the rest of the language model elements from file
LoadLangModelElements(lm_params);
// Load word_dawgs_ if needed.
if (tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) {
word_dawgs_ = new DawgVector();
if (load_system_dawg &&
tessdata_manager->SeekToStart(TESSDATA_CUBE_SYSTEM_DAWG)) {
// The last parameter to the Dawg constructor (the debug level) is set to
// false, until Cube has a way to express its preferred debug level.
*word_dawgs_ += new SquishedDawg(tessdata_manager->GetDataFilePtr(),
DAWG_TYPE_WORD,
cntxt_->Lang().c_str(),
SYSTEM_DAWG_PERM, false);
}
} else {
word_dawgs_ = NULL;
}
}
// Cleanup an edge array
void TessLangModel::FreeEdges(int edge_cnt, LangModEdge **edge_array) {
if (edge_array != NULL) {
for (int edge_idx = 0; edge_idx < edge_cnt; edge_idx++) {
if (edge_array[edge_idx] != NULL) {
delete edge_array[edge_idx];
}
}
delete []edge_array;
}
}
// Determines if a sequence of 32-bit chars is valid in this language model
// starting from the specified edge. If the eow_flag is ON, also checks for
// a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last
// edge
bool TessLangModel::IsValidSequence(LangModEdge *edge,
const char_32 *sequence,
bool eow_flag,
LangModEdge **final_edge) {
// get the edges emerging from this edge
int edge_cnt = 0;
LangModEdge **edge_array = GetEdges(NULL, edge, &edge_cnt);
// find the 1st char in the sequence in the children
for (int edge_idx = 0; edge_idx < edge_cnt; edge_idx++) {
// found a match
if (sequence[0] == edge_array[edge_idx]->EdgeString()[0]) {
// if this is the last char
if (sequence[1] == 0) {
// succeed if we are in prefix mode or this is a terminal edge
if (eow_flag == false || edge_array[edge_idx]->IsEOW()) {
if (final_edge != NULL) {
(*final_edge) = edge_array[edge_idx];
edge_array[edge_idx] = NULL;
}
FreeEdges(edge_cnt, edge_array);
return true;
}
} else {
// not the last char continue checking
if (IsValidSequence(edge_array[edge_idx], sequence + 1, eow_flag,
final_edge) == true) {
FreeEdges(edge_cnt, edge_array);
return true;
}
}
}
}
FreeEdges(edge_cnt, edge_array);
return false;
}
// Determines if a sequence of 32-bit chars is valid in this language model
// starting from the root. If the eow_flag is ON, also checks for
// a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last
// edge
bool TessLangModel::IsValidSequence(const char_32 *sequence, bool eow_flag,
LangModEdge **final_edge) {
if (final_edge != NULL) {
(*final_edge) = NULL;
}
return IsValidSequence(NULL, sequence, eow_flag, final_edge);
}
bool TessLangModel::IsLeadingPunc(const char_32 ch) {
return lead_punc_.find(ch) != string::npos;
}
bool TessLangModel::IsTrailingPunc(const char_32 ch) {
return trail_punc_.find(ch) != string::npos;
}
bool TessLangModel::IsDigit(const char_32 ch) {
return digits_.find(ch) != string::npos;
}
// The general fan-out generation function. Returns the list of edges
// fanning-out of the specified edge and their count. If an AltList is
// specified, only the class-ids with a minimum cost are considered
LangModEdge ** TessLangModel::GetEdges(CharAltList *alt_list,
LangModEdge *lang_mod_edge,
int *edge_cnt) {
TessLangModEdge *tess_lm_edge =
reinterpret_cast<TessLangModEdge *>(lang_mod_edge);
LangModEdge **edge_array = NULL;
(*edge_cnt) = 0;
// if we are starting from the root, we'll instantiate every DAWG
// and get the all the edges that emerge from the root
if (tess_lm_edge == NULL) {
// get DAWG count from Tesseract
int dawg_cnt = NumDawgs();
// preallocate the edge buffer
(*edge_cnt) = dawg_cnt * max_edge_;
edge_array = new LangModEdge *[(*edge_cnt)];
for (int dawg_idx = (*edge_cnt) = 0; dawg_idx < dawg_cnt; dawg_idx++) {
const Dawg *curr_dawg = GetDawg(dawg_idx);
// Only look through word Dawgs (since there is a special way of
// handling numbers and punctuation).
if (curr_dawg->type() == DAWG_TYPE_WORD) {
(*edge_cnt) += FanOut(alt_list, curr_dawg, 0, 0, NULL, true,
edge_array + (*edge_cnt));
}
} // dawg
(*edge_cnt) += FanOut(alt_list, number_dawg_, 0, 0, NULL, true,
edge_array + (*edge_cnt));
// OOD: it is intentionally not added to the list to make sure it comes
// at the end
(*edge_cnt) += FanOut(alt_list, ood_dawg_, 0, 0, NULL, true,
edge_array + (*edge_cnt));
// set the root flag for all root edges
for (int edge_idx = 0; edge_idx < (*edge_cnt); edge_idx++) {
edge_array[edge_idx]->SetRoot(true);
}
} else { // not starting at the root
// preallocate the edge buffer
(*edge_cnt) = max_edge_;
// allocate memory for edges
edge_array = new LangModEdge *[(*edge_cnt)];
// get the FanOut edges from the root of each dawg
(*edge_cnt) = FanOut(alt_list,
tess_lm_edge->GetDawg(),
tess_lm_edge->EndEdge(), tess_lm_edge->EdgeMask(),
tess_lm_edge->EdgeString(), false, edge_array);
}
return edge_array;
}
// generate edges from an NULL terminated string
// (used for punctuation, operators and digits)
int TessLangModel::Edges(const char *strng, const Dawg *dawg,
EDGE_REF edge_ref, EDGE_REF edge_mask,
LangModEdge **edge_array) {
int edge_idx,
edge_cnt = 0;
for (edge_idx = 0; strng[edge_idx] != 0; edge_idx++) {
int class_id = cntxt_->CharacterSet()->ClassID((char_32)strng[edge_idx]);
if (class_id != INVALID_UNICHAR_ID) {
// create an edge object
edge_array[edge_cnt] = new TessLangModEdge(cntxt_, dawg, edge_ref,
class_id);
reinterpret_cast<TessLangModEdge *>(edge_array[edge_cnt])->
SetEdgeMask(edge_mask);
edge_cnt++;
}
}
return edge_cnt;
}
// generate OOD edges
int TessLangModel::OODEdges(CharAltList *alt_list, EDGE_REF edge_ref,
EDGE_REF edge_ref_mask, LangModEdge **edge_array) {
int class_cnt = cntxt_->CharacterSet()->ClassCount();
int edge_cnt = 0;
for (int class_id = 0; class_id < class_cnt; class_id++) {
// produce an OOD edge only if the cost of the char is low enough
if ((alt_list == NULL ||
alt_list->ClassCost(class_id) <= max_ood_shape_cost_)) {
// create an edge object
edge_array[edge_cnt] = new TessLangModEdge(cntxt_, class_id);
edge_cnt++;
}
}
return edge_cnt;
}
// computes and returns the edges that fan out of an edge ref
int TessLangModel::FanOut(CharAltList *alt_list, const Dawg *dawg,
EDGE_REF edge_ref, EDGE_REF edge_mask,
const char_32 *str, bool root_flag,
LangModEdge **edge_array) {
int edge_cnt = 0;
NODE_REF next_node = NO_EDGE;
// OOD
if (dawg == reinterpret_cast<Dawg *>(DAWG_OOD)) {
if (ood_enabled_ == true) {
return OODEdges(alt_list, edge_ref, edge_mask, edge_array);
} else {
return 0;
}
} else if (dawg == reinterpret_cast<Dawg *>(DAWG_NUMBER)) {
// Number
if (numeric_enabled_ == true) {
return NumberEdges(edge_ref, edge_array);
} else {
return 0;
}
} else if (IsTrailingPuncEdge(edge_mask)) {
// a TRAILING PUNC MASK, generate more trailing punctuation and return
if (punc_enabled_ == true) {
EDGE_REF trail_cnt = TrailingPuncCount(edge_mask);
return Edges(trail_punc_.c_str(), dawg, edge_ref,
TrailingPuncEdgeMask(trail_cnt + 1), edge_array);
} else {
return 0;
}
} else if (root_flag == true || edge_ref == 0) {
// Root, generate leading punctuation and continue
if (root_flag) {
if (punc_enabled_ == true) {
edge_cnt += Edges(lead_punc_.c_str(), dawg, 0, LEAD_PUNC_EDGE_REF_MASK,
edge_array);
}
}
next_node = 0;
} else {
// a node in the main trie
bool eow_flag = (dawg->end_of_word(edge_ref) != 0);
// for EOW
if (eow_flag == true) {
// generate trailing punctuation
if (punc_enabled_ == true) {
edge_cnt += Edges(trail_punc_.c_str(), dawg, edge_ref,
TrailingPuncEdgeMask((EDGE_REF)1), edge_array);
// generate a hyphen and go back to the root
edge_cnt += Edges("-/", dawg, 0, 0, edge_array + edge_cnt);
}
}
// advance node
next_node = dawg->next_node(edge_ref);
if (next_node == 0 || next_node == NO_EDGE) {
return edge_cnt;
}
}
// now get all the emerging edges if word list is enabled
if (word_list_enabled_ == true && next_node != NO_EDGE) {
// create child edges
int child_edge_cnt =
TessLangModEdge::CreateChildren(cntxt_, dawg, next_node,
edge_array + edge_cnt);
int strt_cnt = edge_cnt;
// set the edge mask
for (int child = 0; child < child_edge_cnt; child++) {
reinterpret_cast<TessLangModEdge *>(edge_array[edge_cnt++])->
SetEdgeMask(edge_mask);
}
// if we are at the root, create upper case forms of these edges if possible
if (root_flag == true) {
for (int child = 0; child < child_edge_cnt; child++) {
TessLangModEdge *child_edge =
reinterpret_cast<TessLangModEdge *>(edge_array[strt_cnt + child]);
if (has_case_ == true) {
const char_32 *edge_str = child_edge->EdgeString();
if (edge_str != NULL && islower(edge_str[0]) != 0 &&
edge_str[1] == 0) {
int class_id =
cntxt_->CharacterSet()->ClassID(toupper(edge_str[0]));
if (class_id != INVALID_UNICHAR_ID) {
// generate an upper case edge for lower case chars
edge_array[edge_cnt] = new TessLangModEdge(cntxt_, dawg,
child_edge->StartEdge(), child_edge->EndEdge(), class_id);
reinterpret_cast<TessLangModEdge *>(edge_array[edge_cnt])->
SetEdgeMask(edge_mask);
edge_cnt++;
}
}
}
}
}
}
return edge_cnt;
}
// Generate the edges fanning-out from an edge in the number state machine
int TessLangModel::NumberEdges(EDGE_REF edge_ref, LangModEdge **edge_array) {
EDGE_REF new_state,
state;
inT64 repeat_cnt,
new_repeat_cnt;
state = ((edge_ref & NUMBER_STATE_MASK) >> NUMBER_STATE_SHIFT);
repeat_cnt = ((edge_ref & NUMBER_REPEAT_MASK) >> NUMBER_REPEAT_SHIFT);
if (state < 0 || state >= kStateCnt) {
return 0;
}
// go through all valid transitions from the state
int edge_cnt = 0;
EDGE_REF new_edge_ref;
for (int lit = 0; lit < kNumLiteralCnt; lit++) {
// move to the new state
new_state = num_state_machine_[state][lit];
if (new_state == NUM_TRM) {
continue;
}
if (new_state == state) {
new_repeat_cnt = repeat_cnt + 1;
} else {
new_repeat_cnt = 1;
}
// not allowed to repeat beyond this
if (new_repeat_cnt > num_max_repeat_[state]) {
continue;
}
new_edge_ref = (new_state << NUMBER_STATE_SHIFT) |
(lit << NUMBER_LITERAL_SHIFT) |
(new_repeat_cnt << NUMBER_REPEAT_SHIFT);
edge_cnt += Edges(literal_str_[lit]->c_str(), number_dawg_,
new_edge_ref, 0, edge_array + edge_cnt);
}
return edge_cnt;
}
// Loads Language model elements from contents of the <lang>.cube.lm file
bool TessLangModel::LoadLangModelElements(const string &lm_params) {
bool success = true;
// split into lines, each corresponding to a token type below
vector<string> str_vec;
CubeUtils::SplitStringUsing(lm_params, "\r\n", &str_vec);
for (int entry = 0; entry < str_vec.size(); entry++) {
vector<string> tokens;
// should be only two tokens: type and value
CubeUtils::SplitStringUsing(str_vec[entry], "=", &tokens);
if (tokens.size() != 2)
success = false;
if (tokens[0] == "LeadPunc") {
lead_punc_ = tokens[1];
} else if (tokens[0] == "TrailPunc") {
trail_punc_ = tokens[1];
} else if (tokens[0] == "NumLeadPunc") {
num_lead_punc_ = tokens[1];
} else if (tokens[0] == "NumTrailPunc") {
num_trail_punc_ = tokens[1];
} else if (tokens[0] == "Operators") {
operators_ = tokens[1];
} else if (tokens[0] == "Digits") {
digits_ = tokens[1];
} else if (tokens[0] == "Alphas") {
alphas_ = tokens[1];
} else {
success = false;
}
}
RemoveInvalidCharacters(&num_lead_punc_);
RemoveInvalidCharacters(&num_trail_punc_);
RemoveInvalidCharacters(&digits_);
RemoveInvalidCharacters(&operators_);
RemoveInvalidCharacters(&alphas_);
// form the array of literal strings needed for number state machine
// It is essential that the literal strings go in the order below
literal_str_[0] = &num_lead_punc_;
literal_str_[1] = &num_trail_punc_;
literal_str_[2] = &digits_;
literal_str_[3] = &operators_;
literal_str_[4] = &alphas_;
return success;
}
void TessLangModel::RemoveInvalidCharacters(string *lm_str) {
CharSet *char_set = cntxt_->CharacterSet();
tesseract::string_32 lm_str32;
CubeUtils::UTF8ToUTF32(lm_str->c_str(), &lm_str32);
int len = CubeUtils::StrLen(lm_str32.c_str());
char_32 *clean_str32 = new char_32[len + 1];
int clean_len = 0;
for (int i = 0; i < len; ++i) {
int class_id = char_set->ClassID((char_32)lm_str32[i]);
if (class_id != INVALID_UNICHAR_ID) {
clean_str32[clean_len] = lm_str32[i];
++clean_len;
}
}
clean_str32[clean_len] = 0;
if (clean_len < len) {
lm_str->clear();
CubeUtils::UTF32ToUTF8(clean_str32, lm_str);
}
delete [] clean_str32;
}
int TessLangModel::NumDawgs() const {
return (word_dawgs_ != NULL) ?
word_dawgs_->size() : cntxt_->TesseractObject()->getDict().NumDawgs();
}
// Returns the dawgs with the given index from either the dawgs
// stored by the Tesseract object, or the word_dawgs_.
const Dawg *TessLangModel::GetDawg(int index) const {
if (word_dawgs_ != NULL) {
ASSERT_HOST(index < word_dawgs_->size());
return (*word_dawgs_)[index];
} else {
ASSERT_HOST(index < cntxt_->TesseractObject()->getDict().NumDawgs());
return cntxt_->TesseractObject()->getDict().GetDawg(index);
}
}
}

View File

@ -1,142 +0,0 @@
/**********************************************************************
* File: tess_lang_model.h
* Description: Declaration of the Tesseract Language Model Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef TESS_LANG_MODEL_H
#define TESS_LANG_MODEL_H
#include <string>
#include "char_altlist.h"
#include "cube_reco_context.h"
#include "cube_tuning_params.h"
#include "dict.h"
#include "lang_model.h"
#include "tessdatamanager.h"
#include "tess_lang_mod_edge.h"
namespace tesseract {
const int kStateCnt = 4;
const int kNumLiteralCnt = 5;
class TessLangModel : public LangModel {
public:
TessLangModel(const string &lm_params,
const string &data_file_path,
bool load_system_dawg,
TessdataManager *tessdata_manager,
CubeRecoContext *cntxt);
~TessLangModel() {
if (word_dawgs_ != NULL) {
word_dawgs_->delete_data_pointers();
delete word_dawgs_;
}
}
// returns a pointer to the root of the language model
inline TessLangModEdge *Root() {
return NULL;
}
// The general fan-out generation function. Returns the list of edges
// fanning-out of the specified edge and their count. If an AltList is
// specified, only the class-ids with a minimum cost are considered
LangModEdge **GetEdges(CharAltList *alt_list,
LangModEdge *edge,
int *edge_cnt);
// Determines if a sequence of 32-bit chars is valid in this language model
// starting from the root. If the eow_flag is ON, also checks for
// a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last
// edge
bool IsValidSequence(const char_32 *sequence, bool eow_flag,
LangModEdge **final_edge = NULL);
bool IsLeadingPunc(char_32 ch);
bool IsTrailingPunc(char_32 ch);
bool IsDigit(char_32 ch);
void RemoveInvalidCharacters(string *lm_str);
private:
// static LM state machines
static const Dawg *ood_dawg_;
static const Dawg *number_dawg_;
static const int num_state_machine_[kStateCnt][kNumLiteralCnt];
static const int num_max_repeat_[kStateCnt];
// word_dawgs_ should only be loaded if cube has its own version of the
// unicharset (different from the one used by tesseract) and therefore
// can not use the dawgs loaded for tesseract (since the unichar ids
// encoded in the dawgs differ).
DawgVector *word_dawgs_;
static int max_edge_;
static int max_ood_shape_cost_;
// remaining language model elements needed by cube. These get loaded from
// the .lm file
string lead_punc_;
string trail_punc_;
string num_lead_punc_;
string num_trail_punc_;
string operators_;
string digits_;
string alphas_;
// String of characters in RHS of each line of <lang>.cube.lm
// Each element is hard-coded to correspond to a specific token type
// (see LoadLangModelElements)
string *literal_str_[kNumLiteralCnt];
// Recognition context needed to access language properties
// (case, cursive,..)
CubeRecoContext *cntxt_;
bool has_case_;
// computes and returns the edges that fan out of an edge ref
int FanOut(CharAltList *alt_list,
const Dawg *dawg, EDGE_REF edge_ref, EDGE_REF edge_ref_mask,
const char_32 *str, bool root_flag, LangModEdge **edge_array);
// generate edges from an NULL terminated string
// (used for punctuation, operators and digits)
int Edges(const char *strng, const Dawg *dawg,
EDGE_REF edge_ref, EDGE_REF edge_ref_mask,
LangModEdge **edge_array);
// Generate the edges fanning-out from an edge in the number state machine
int NumberEdges(EDGE_REF edge_ref, LangModEdge **edge_array);
// Generate OOD edges
int OODEdges(CharAltList *alt_list, EDGE_REF edge_ref,
EDGE_REF edge_ref_mask, LangModEdge **edge_array);
// Cleanup an edge array
void FreeEdges(int edge_cnt, LangModEdge **edge_array);
// Determines if a sequence of 32-bit chars is valid in this language model
// starting from the specified edge. If the eow_flag is ON, also checks for
// a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last
// edge
bool IsValidSequence(LangModEdge *edge, const char_32 *sequence,
bool eow_flag, LangModEdge **final_edge);
// Parse language model elements from the given string, which should
// have been loaded from <lang>.cube.lm file, e.g. in CubeRecoContext
bool LoadLangModelElements(const string &lm_params);
// Returns the number of word Dawgs in the language model.
int NumDawgs() const;
// Returns the dawgs with the given index from either the dawgs
// stored by the Tesseract object, or the word_dawgs_.
const Dawg *GetDawg(int index) const;
};
} // tesseract
#endif // TESS_LANG_MODEL_H

View File

@ -1,129 +0,0 @@
/**********************************************************************
* File: tuning_params.h
* Description: Declaration of the Tuning Parameters Base Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The TuningParams class abstracts all the parameters that can be learned or
// tuned during the training process. It is a base class that all TuningParams
// classes should inherit from.
#ifndef TUNING_PARAMS_H
#define TUNING_PARAMS_H
#include <string>
#ifdef USE_STD_NAMESPACE
using std::string;
#endif
namespace tesseract {
class TuningParams {
public:
enum type_classifer {
NN,
HYBRID_NN
};
enum type_feature {
BMP,
CHEBYSHEV,
HYBRID
};
TuningParams() {}
virtual ~TuningParams() {}
// Accessor functions
inline double RecoWgt() const { return reco_wgt_; }
inline double SizeWgt() const { return size_wgt_; }
inline double CharBigramWgt() const { return char_bigrams_wgt_; }
inline double WordUnigramWgt() const { return word_unigrams_wgt_; }
inline int MaxSegPerChar() const { return max_seg_per_char_; }
inline int BeamWidth() const { return beam_width_; }
inline int TypeClassifier() const { return tp_classifier_; }
inline int TypeFeature() const { return tp_feat_; }
inline int ConvGridSize() const { return conv_grid_size_; }
inline int HistWindWid() const { return hist_wind_wid_; }
inline int MinConCompSize() const { return min_con_comp_size_; }
inline double MaxWordAspectRatio() const { return max_word_aspect_ratio_; }
inline double MinSpaceHeightRatio() const { return min_space_height_ratio_; }
inline double MaxSpaceHeightRatio() const { return max_space_height_ratio_; }
inline double CombinerRunThresh() const { return combiner_run_thresh_; }
inline double CombinerClassifierThresh() const {
return combiner_classifier_thresh_; }
inline void SetRecoWgt(double wgt) { reco_wgt_ = wgt; }
inline void SetSizeWgt(double wgt) { size_wgt_ = wgt; }
inline void SetCharBigramWgt(double wgt) { char_bigrams_wgt_ = wgt; }
inline void SetWordUnigramWgt(double wgt) { word_unigrams_wgt_ = wgt; }
inline void SetMaxSegPerChar(int max_seg_per_char) {
max_seg_per_char_ = max_seg_per_char;
}
inline void SetBeamWidth(int beam_width) { beam_width_ = beam_width; }
inline void SetTypeClassifier(type_classifer tp_classifier) {
tp_classifier_ = tp_classifier;
}
inline void SetTypeFeature(type_feature tp_feat) {tp_feat_ = tp_feat;}
inline void SetHistWindWid(int hist_wind_wid) {
hist_wind_wid_ = hist_wind_wid;
}
virtual bool Save(string file_name) = 0;
virtual bool Load(string file_name) = 0;
protected:
// weight of recognition cost. This includes the language model cost
double reco_wgt_;
// weight of size cost
double size_wgt_;
// weight of character bigrams cost
double char_bigrams_wgt_;
// weight of word unigrams cost
double word_unigrams_wgt_;
// Maximum number of segments per character
int max_seg_per_char_;
// Beam width equal to the maximum number of nodes kept in the beam search
// trellis column after pruning
int beam_width_;
// Classifier type: See enum type_classifer for classifier types
type_classifer tp_classifier_;
// Feature types: See enum type_feature for feature types
type_feature tp_feat_;
// Grid size to scale a grapheme bitmap used by the BMP feature type
int conv_grid_size_;
// Histogram window size as a ratio of the word height used in computing
// the vertical pixel density histogram in the segmentation algorithm
int hist_wind_wid_;
// Minimum possible size of a connected component
int min_con_comp_size_;
// Maximum aspect ratio of a word (width / height)
double max_word_aspect_ratio_;
// Minimum ratio relative to the line height of a gap to be considered as
// a word break
double min_space_height_ratio_;
// Maximum ratio relative to the line height of a gap to be considered as
// a definite word break
double max_space_height_ratio_;
// When Cube and Tesseract are run in combined mode, only run
// combiner classifier when tesseract confidence is below this
// threshold. When Cube is run without Tesseract, this is ignored.
double combiner_run_thresh_;
// When Cube and tesseract are run in combined mode, threshold on
// output of combiner binary classifier (chosen from ROC during
// combiner training). When Cube is run without Tesseract, this is ignored.
double combiner_classifier_thresh_;
};
}
#endif // TUNING_PARAMS_H

View File

@ -1,117 +0,0 @@
/**********************************************************************
* File: word_altlist.cpp
* Description: Implementation of the Word Alternate List Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "word_altlist.h"
namespace tesseract {
WordAltList::WordAltList(int max_alt)
: AltList(max_alt) {
word_alt_ = NULL;
}
WordAltList::~WordAltList() {
if (word_alt_ != NULL) {
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
if (word_alt_[alt_idx] != NULL) {
delete []word_alt_[alt_idx];
}
}
delete []word_alt_;
word_alt_ = NULL;
}
}
/**
* insert an alternate word with the specified cost and tag
*/
bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
if (word_alt_ == NULL || alt_cost_ == NULL) {
word_alt_ = new char_32*[max_alt_];
alt_cost_ = new int[max_alt_];
alt_tag_ = new void *[max_alt_];
memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
} else {
// check if alt already exists
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
if (CubeUtils::StrCmp(word_str, word_alt_[alt_idx]) == 0) {
// update the cost if we have a lower one
if (cost < alt_cost_[alt_idx]) {
alt_cost_[alt_idx] = cost;
alt_tag_[alt_idx] = tag;
}
return true;
}
}
}
// determine length of alternate
int len = CubeUtils::StrLen(word_str);
word_alt_[alt_cnt_] = new char_32[len + 1];
if (len > 0) {
memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str));
}
word_alt_[alt_cnt_][len] = 0;
alt_cost_[alt_cnt_] = cost;
alt_tag_[alt_cnt_] = tag;
alt_cnt_++;
return true;
}
/**
* sort the alternate in descending order based on the cost
*/
void WordAltList::Sort() {
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
if (alt_cost_[alt_idx] > alt_cost_[alt]) {
char_32 *pchTemp = word_alt_[alt_idx];
word_alt_[alt_idx] = word_alt_[alt];
word_alt_[alt] = pchTemp;
int temp = alt_cost_[alt_idx];
alt_cost_[alt_idx] = alt_cost_[alt];
alt_cost_[alt] = temp;
void *tag = alt_tag_[alt_idx];
alt_tag_[alt_idx] = alt_tag_[alt];
alt_tag_[alt] = tag;
}
}
}
}
void WordAltList::PrintDebug() {
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
char_32 *word_32 = word_alt_[alt_idx];
string word_str;
CubeUtils::UTF32ToUTF8(word_32, &word_str);
int num_unichars = CubeUtils::StrLen(word_32);
fprintf(stderr, "Alt[%d]=%s (cost=%d, num_unichars=%d); unichars=", alt_idx,
word_str.c_str(), alt_cost_[alt_idx], num_unichars);
for (int i = 0; i < num_unichars; ++i)
fprintf(stderr, "%d ", word_32[i]);
fprintf(stderr, "\n");
}
}
} // namespace tesseract

View File

@ -1,50 +0,0 @@
/**********************************************************************
* File: word_altlist.h
* Description: Declaration of the Word Alternate List Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The WordAltList abstracts a alternate list of words and their corresponding
// costs that result from the word recognition process. The class inherits
// from the AltList class
// It provides methods to add a new word alternate, its corresponding score and
// a tag.
#ifndef WORD_ALT_LIST_H
#define WORD_ALT_LIST_H
#include "altlist.h"
namespace tesseract {
class WordAltList : public AltList {
public:
explicit WordAltList(int max_alt);
~WordAltList();
// Sort the list of alternates based on cost
void Sort();
// insert an alternate word with the specified cost and tag
bool Insert(char_32 *char_ptr, int cost, void *tag = NULL);
// returns the alternate string at the specified position
inline char_32 * Alt(int alt_idx) { return word_alt_[alt_idx]; }
// print each entry of the altlist, both UTF8 and unichar ids, and
// their costs, to stderr
void PrintDebug();
private:
char_32 **word_alt_;
};
} // namespace tesseract
#endif // WORD_ALT_LIST_H

View File

@ -1,199 +0,0 @@
/**********************************************************************
* File: word_list_lang_model.cpp
* Description: Implementation of the Word List Language Model Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <string>
#include <vector>
#include "word_list_lang_model.h"
#include "cube_utils.h"
#include "ratngs.h"
#include "trie.h"
namespace tesseract {
WordListLangModel::WordListLangModel(CubeRecoContext *cntxt) {
cntxt_ = cntxt;
dawg_ = NULL;
init_ = false;
}
WordListLangModel::~WordListLangModel() {
Cleanup();
}
// Cleanup
void WordListLangModel::Cleanup() {
if (dawg_ != NULL) {
delete dawg_;
dawg_ = NULL;
}
init_ = false;
}
// Initialize the language model
bool WordListLangModel::Init() {
if (init_ == true) {
return true;
}
// The last parameter to the Trie constructor (the debug level) is set to
// false for now, until Cube has a way to express its preferred debug level.
dawg_ = new Trie(DAWG_TYPE_WORD, "", NO_PERM,
cntxt_->CharacterSet()->ClassCount(), false);
init_ = true;
return true;
}
// return a pointer to the root
LangModEdge * WordListLangModel::Root() {
return NULL;
}
// return the edges emerging from the current state
LangModEdge **WordListLangModel::GetEdges(CharAltList *alt_list,
LangModEdge *edge,
int *edge_cnt) {
// initialize if necessary
if (init_ == false) {
if (Init() == false) {
return NULL;
}
}
(*edge_cnt) = 0;
EDGE_REF edge_ref;
TessLangModEdge *tess_lm_edge = reinterpret_cast<TessLangModEdge *>(edge);
if (tess_lm_edge == NULL) {
edge_ref = 0;
} else {
edge_ref = tess_lm_edge->EndEdge();
// advance node
edge_ref = dawg_->next_node(edge_ref);
if (edge_ref == 0) {
return NULL;
}
}
// allocate memory for edges
LangModEdge **edge_array = new LangModEdge *[kMaxEdge];
// now get all the emerging edges
(*edge_cnt) += TessLangModEdge::CreateChildren(cntxt_, dawg_, edge_ref,
edge_array + (*edge_cnt));
return edge_array;
}
// returns true if the char_32 is supported by the language model
// TODO(ahmadab) currently not implemented
bool WordListLangModel::IsValidSequence(const char_32 *sequence,
bool terminal, LangModEdge **edges) {
return false;
}
// Recursive helper function for WordVariants().
void WordListLangModel::WordVariants(const CharSet &char_set,
string_32 prefix_str32,
WERD_CHOICE *word_so_far,
string_32 str32,
vector<WERD_CHOICE *> *word_variants) {
int str_len = str32.length();
if (str_len == 0) {
if (word_so_far->length() > 0) {
word_variants->push_back(new WERD_CHOICE(*word_so_far));
}
} else {
// Try out all the possible prefixes of the str32.
for (int len = 1; len <= str_len; len++) {
// Check if prefix is supported in character set.
string_32 str_pref32 = str32.substr(0, len);
int class_id = char_set.ClassID(reinterpret_cast<const char_32 *>(
str_pref32.c_str()));
if (class_id <= 0) {
continue;
} else {
string_32 new_prefix_str32 = prefix_str32 + str_pref32;
string_32 new_str32 = str32.substr(len);
word_so_far->append_unichar_id(class_id, 1, 0.0, 0.0);
WordVariants(char_set, new_prefix_str32, word_so_far, new_str32,
word_variants);
word_so_far->remove_last_unichar_id();
}
}
}
}
// Compute all the variants of a 32-bit string in terms of the class-ids
// This is needed for languages that have ligatures. A word can then have more
// than one spelling in terms of the class-ids
void WordListLangModel::WordVariants(const CharSet &char_set,
const UNICHARSET *uchset, string_32 str32,
vector<WERD_CHOICE *> *word_variants) {
for (int i = 0; i < word_variants->size(); i++) {
delete (*word_variants)[i];
}
word_variants->clear();
string_32 prefix_str32;
WERD_CHOICE word_so_far(uchset);
WordVariants(char_set, prefix_str32, &word_so_far, str32, word_variants);
}
// add a new UTF-8 string to the lang model
bool WordListLangModel::AddString(const char *char_ptr) {
if (!init_ && !Init()) { // initialize if necessary
return false;
}
string_32 str32;
CubeUtils::UTF8ToUTF32(char_ptr, &str32);
if (str32.length() < 1) {
return false;
}
return AddString32(str32.c_str());
}
// add a new UTF-32 string to the lang model
bool WordListLangModel::AddString32(const char_32 *char_32_ptr) {
if (char_32_ptr == NULL) {
return false;
}
// get all the word variants
vector<WERD_CHOICE *> word_variants;
WordVariants(*(cntxt_->CharacterSet()), cntxt_->TessUnicharset(),
char_32_ptr, &word_variants);
if (word_variants.size() > 0) {
// find the shortest variant
int shortest_word = 0;
for (int word = 1; word < word_variants.size(); word++) {
if (word_variants[shortest_word]->length() >
word_variants[word]->length()) {
shortest_word = word;
}
}
// only add the shortest grapheme interpretation of string to the word list
dawg_->add_word_to_dawg(*word_variants[shortest_word]);
}
for (int i = 0; i < word_variants.size(); i++) { delete word_variants[i]; }
return true;
}
}

View File

@ -1,89 +0,0 @@
/**********************************************************************
* File: word_list_lang_model.h
* Description: Declaration of the Word List Language Model Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The WordListLangModel class abstracts a language model that is based on
// a list of words. It inherits from the LangModel abstract class
// Besides providing the methods inherited from the LangModel abstract class,
// the class provided methods to add new strings to the Language Model:
// AddString & AddString32
#ifndef WORD_LIST_LANG_MODEL_H
#define WORD_LIST_LANG_MODEL_H
#include <vector>
#include "cube_reco_context.h"
#include "lang_model.h"
#include "tess_lang_mod_edge.h"
namespace tesseract {
class Trie;
class WordListLangModel : public LangModel {
public:
explicit WordListLangModel(CubeRecoContext *cntxt);
~WordListLangModel();
// Returns an edge pointer to the Root
LangModEdge *Root();
// Returns the edges that fan-out of the specified edge and their count
LangModEdge **GetEdges(CharAltList *alt_list,
LangModEdge *edge,
int *edge_cnt);
// Returns is a sequence of 32-bit characters are valid within this language
// model or net. And EndOfWord flag is specified. If true, the sequence has
// to end on a valid word. The function also optionally returns the list
// of language model edges traversed to parse the string
bool IsValidSequence(const char_32 *sequence,
bool eow_flag,
LangModEdge **edges);
bool IsLeadingPunc(char_32 ch) { return false; } // not yet implemented
bool IsTrailingPunc(char_32 ch) { return false; } // not yet implemented
bool IsDigit(char_32 ch) { return false; } // not yet implemented
// Adds a new UTF-8 string to the language model
bool AddString(const char *char_ptr);
// Adds a new UTF-32 string to the language model
bool AddString32(const char_32 *char_32_ptr);
// Compute all the variants of a 32-bit string in terms of the class-ids.
// This is needed for languages that have ligatures. A word can then have
// more than one spelling in terms of the class-ids.
static void WordVariants(const CharSet &char_set, const UNICHARSET *uchset,
string_32 str32,
vector<WERD_CHOICE *> *word_variants);
private:
// constants needed to configure the language model
static const int kMaxEdge = 512;
CubeRecoContext *cntxt_;
Trie *dawg_;
bool init_;
// Initialize the language model
bool Init();
// Cleanup
void Cleanup();
// Recursive helper function for WordVariants().
static void WordVariants(
const CharSet &char_set,
string_32 prefix_str32, WERD_CHOICE *word_so_far,
string_32 str32,
vector<WERD_CHOICE *> *word_variants);
};
} // tesseract
#endif // WORD_LIST_LANG_MODEL_H

View File

@ -1,286 +0,0 @@
/**********************************************************************
* File: word_size_model.cpp
* Description: Implementation of the Word Size Model Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <math.h>
#include <string>
#include <vector>
#include "word_size_model.h"
#include "cube_utils.h"
namespace tesseract {
WordSizeModel::WordSizeModel(CharSet * char_set, bool contextual) {
char_set_ = char_set;
contextual_ = contextual;
}
WordSizeModel::~WordSizeModel() {
for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
FontPairSizeInfo fnt_info = font_pair_size_models_[fnt];
delete []fnt_info.pair_size_info[0];
delete []fnt_info.pair_size_info;
}
}
WordSizeModel *WordSizeModel::Create(const string &data_file_path,
const string &lang,
CharSet *char_set,
bool contextual) {
WordSizeModel *obj = new WordSizeModel(char_set, contextual);
if (!obj->Init(data_file_path, lang)) {
delete obj;
return NULL;
}
return obj;
}
bool WordSizeModel::Init(const string &data_file_path, const string &lang) {
string stats_file_name;
stats_file_name = data_file_path + lang;
stats_file_name += ".cube.size";
// read file to memory
string str_data;
if (!CubeUtils::ReadFileToString(stats_file_name, &str_data)) {
return false;
}
// split to words
vector<string> tokens;
CubeUtils::SplitStringUsing(str_data, "\t\r\n", &tokens);
if (tokens.size() < 1) {
fprintf(stderr, "Cube ERROR (WordSizeModel::Init): invalid "
"file contents: %s\n", stats_file_name.c_str());
return false;
}
font_pair_size_models_.clear();
// token count per line depends on whether the language is contextual or not
int token_cnt = contextual_ ?
(kExpectedTokenCount + 4) : kExpectedTokenCount;
// the count of size classes depends on whether the language is contextual
// or not. For non contextual languages (Ex: Eng), it is equal to the class
// count. For contextual languages (Ex: Ara), it is equal to the class count
// multiplied by the position count (4: start, middle, final, isolated)
int size_class_cnt = contextual_ ?
(char_set_->ClassCount() * 4) : char_set_->ClassCount();
string fnt_name = "";
for (int tok = 0; tok < tokens.size(); tok += token_cnt) {
// a new font, write the old font data and re-init
if (tok == 0 || fnt_name != tokens[tok]) {
FontPairSizeInfo fnt_info;
fnt_info.pair_size_info = new PairSizeInfo *[size_class_cnt];
fnt_info.pair_size_info[0] =
new PairSizeInfo[size_class_cnt * size_class_cnt];
memset(fnt_info.pair_size_info[0], 0, size_class_cnt * size_class_cnt *
sizeof(PairSizeInfo));
for (int cls = 1; cls < size_class_cnt; cls++) {
fnt_info.pair_size_info[cls] =
fnt_info.pair_size_info[cls - 1] + size_class_cnt;
}
// strip out path and extension
string stripped_font_name = tokens[tok].substr(0, tokens[tok].find('.'));
string::size_type strt_pos = stripped_font_name.find_last_of("/\\");
if (strt_pos != string::npos) {
fnt_info.font_name = stripped_font_name.substr(strt_pos);
} else {
fnt_info.font_name = stripped_font_name;
}
font_pair_size_models_.push_back(fnt_info);
}
// parse the data
int cls_0;
int cls_1;
double delta_top;
double wid_0;
double hgt_0;
double wid_1;
double hgt_1;
int size_code_0;
int size_code_1;
// read and parse the tokens
if (contextual_) {
int start_0;
int end_0;
int start_1;
int end_1;
// The expected format for a character size bigram is as follows:
// ClassId0<delim>Start-flag0<delim>End-flag0<delim>String0(ignored)
// Width0<delim>Height0<delim>
// ClassId1<delim>Start-flag1<delim>End-flag1<delim>String1(ignored)
// HeightDelta<delim>Width1<delim>Height0<delim>
// In case of non-contextual languages, the Start and End flags are
// omitted
if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 ||
sscanf(tokens[tok + 2].c_str(), "%d", &start_0) != 1 ||
sscanf(tokens[tok + 3].c_str(), "%d", &end_0) != 1 ||
sscanf(tokens[tok + 5].c_str(), "%lf", &wid_0) != 1 ||
sscanf(tokens[tok + 6].c_str(), "%lf", &hgt_0) != 1 ||
sscanf(tokens[tok + 7].c_str(), "%d", &cls_1) != 1 ||
sscanf(tokens[tok + 8].c_str(), "%d", &start_1) != 1 ||
sscanf(tokens[tok + 9].c_str(), "%d", &end_1) != 1 ||
sscanf(tokens[tok + 11].c_str(), "%lf", &delta_top) != 1 ||
sscanf(tokens[tok + 12].c_str(), "%lf", &wid_1) != 1 ||
sscanf(tokens[tok + 13].c_str(), "%lf", &hgt_1) != 1 ||
(start_0 != 0 && start_0 != 1) || (end_0 != 0 && end_0 != 1) ||
(start_1 != 0 && start_1 != 1) || (end_1 != 0 && end_1 != 1)) {
fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at "
"line %d\n", 1 + (tok / token_cnt));
return false;
}
size_code_0 = SizeCode(cls_0, start_0, end_0);
size_code_1 = SizeCode(cls_1, start_1, end_1);
} else {
if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 ||
sscanf(tokens[tok + 3].c_str(), "%lf", &wid_0) != 1 ||
sscanf(tokens[tok + 4].c_str(), "%lf", &hgt_0) != 1 ||
sscanf(tokens[tok + 5].c_str(), "%d", &cls_1) != 1 ||
sscanf(tokens[tok + 7].c_str(), "%lf", &delta_top) != 1 ||
sscanf(tokens[tok + 8].c_str(), "%lf", &wid_1) != 1 ||
sscanf(tokens[tok + 9].c_str(), "%lf", &hgt_1) != 1) {
fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at "
"line %d\n", 1 + (tok / token_cnt));
return false;
}
size_code_0 = cls_0;
size_code_1 = cls_1;
}
// copy the data to the size tables
FontPairSizeInfo fnt_info = font_pair_size_models_.back();
fnt_info.pair_size_info[size_code_0][size_code_1].delta_top =
static_cast<int>(delta_top * kShapeModelScale);
fnt_info.pair_size_info[size_code_0][size_code_1].wid_0 =
static_cast<int>(wid_0 * kShapeModelScale);
fnt_info.pair_size_info[size_code_0][size_code_1].hgt_0 =
static_cast<int>(hgt_0 * kShapeModelScale);
fnt_info.pair_size_info[size_code_0][size_code_1].wid_1 =
static_cast<int>(wid_1 * kShapeModelScale);
fnt_info.pair_size_info[size_code_0][size_code_1].hgt_1 =
static_cast<int>(hgt_1 * kShapeModelScale);
fnt_name = tokens[tok];
}
return true;
}
int WordSizeModel::Cost(CharSamp **samp_array, int samp_cnt) const {
if (samp_cnt < 2) {
return 0;
}
double best_dist = static_cast<double>(WORST_COST);
int best_fnt = -1;
for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
const FontPairSizeInfo *fnt_info = &font_pair_size_models_[fnt];
double mean_dist = 0;
int pair_cnt = 0;
for (int smp_0 = 0; smp_0 < samp_cnt; smp_0++) {
int cls_0 = char_set_->ClassID(samp_array[smp_0]->StrLabel());
if (cls_0 < 1) {
continue;
}
// compute size code for samp 0 based on class id and position
int size_code_0;
if (contextual_) {
size_code_0 = SizeCode(cls_0,
samp_array[smp_0]->FirstChar() == 0 ? 0 : 1,
samp_array[smp_0]->LastChar() == 0 ? 0 : 1);
} else {
size_code_0 = cls_0;
}
int char0_height = samp_array[smp_0]->Height();
int char0_width = samp_array[smp_0]->Width();
int char0_top = samp_array[smp_0]->Top();
for (int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) {
int cls_1 = char_set_->ClassID(samp_array[smp_1]->StrLabel());
if (cls_1 < 1) {
continue;
}
// compute size code for samp 0 based on class id and position
int size_code_1;
if (contextual_) {
size_code_1 = SizeCode(cls_1,
samp_array[smp_1]->FirstChar() == 0 ? 0 : 1,
samp_array[smp_1]->LastChar() == 0 ? 0 : 1);
} else {
size_code_1 = cls_1;
}
double dist = PairCost(
char0_width, char0_height, char0_top, samp_array[smp_1]->Width(),
samp_array[smp_1]->Height(), samp_array[smp_1]->Top(),
fnt_info->pair_size_info[size_code_0][size_code_1]);
if (dist > 0) {
mean_dist += dist;
pair_cnt++;
}
} // smp_1
} // smp_0
if (pair_cnt == 0) {
continue;
}
mean_dist /= pair_cnt;
if (best_fnt == -1 || mean_dist < best_dist) {
best_dist = mean_dist;
best_fnt = fnt;
}
}
if (best_fnt == -1) {
return static_cast<int>(WORST_COST);
} else {
return static_cast<int>(best_dist);
}
}
double WordSizeModel::PairCost(int width_0, int height_0, int top_0,
int width_1, int height_1, int top_1,
const PairSizeInfo& pair_info) {
double scale_factor = static_cast<double>(pair_info.hgt_0) /
static_cast<double>(height_0);
double dist = 0.0;
if (scale_factor > 0) {
double norm_width_0 = width_0 * scale_factor;
double norm_width_1 = width_1 * scale_factor;
double norm_height_1 = height_1 * scale_factor;
double norm_delta_top = (top_1 - top_0) * scale_factor;
// accumulate the distance between the model character and the
// predicted one on all dimensions of the pair
dist += fabs(pair_info.wid_0 - norm_width_0);
dist += fabs(pair_info.wid_1 - norm_width_1);
dist += fabs(pair_info.hgt_1 - norm_height_1);
dist += fabs(pair_info.delta_top - norm_delta_top);
}
return dist;
}
} // namespace tesseract

View File

@ -1,100 +0,0 @@
/**********************************************************************
* File: word_size_model.h
* Description: Declaration of the Word Size Model Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The WordSizeModel class abstracts the geometrical relationships
// between characters/shapes in the same word (presumeably of the same font)
// A non-parametric bigram model describes the three geometrical properties of a
// character pair:
// 1- Normalized Width
// 2- Normalized Top
// 3- Normalized Height
// These dimensions are computed for each character pair in a word. These are
// then compared to the same information for each of the fonts that the size
// model knows about. The WordSizeCost is the cost of the font that matches
// best.
#ifndef WORD_SIZE_MODEL_H
#define WORD_SIZE_MODEL_H
#include <string>
#include "char_samp.h"
#include "char_set.h"
namespace tesseract {
struct PairSizeInfo {
int delta_top;
int wid_0;
int hgt_0;
int wid_1;
int hgt_1;
};
struct FontPairSizeInfo {
string font_name;
PairSizeInfo **pair_size_info;
};
class WordSizeModel {
public:
WordSizeModel(CharSet *, bool contextual);
virtual ~WordSizeModel();
static WordSizeModel *Create(const string &data_file_path,
const string &lang,
CharSet *char_set,
bool contextual);
// Given a word and number of unichars, return the size cost,
// minimized over all fonts in the size model.
int Cost(CharSamp **samp_array, int samp_cnt) const;
// Given dimensions of a pair of character samples and a font size
// model for that character pair, return the pair's size cost for
// the font.
static double PairCost(int width_0, int height_0, int top_0,
int width_1, int height_1, int top_1,
const PairSizeInfo& pair_info);
bool Save(string file_name);
// Number of fonts in size model.
inline int FontCount() const {
return font_pair_size_models_.size();
}
inline const FontPairSizeInfo *FontInfo() const {
return &font_pair_size_models_[0];
}
// Helper functions to convert between size codes, class id and position
// codes
static inline int SizeCode(int cls_id, int start, int end) {
return (cls_id << 2) + (end << 1) + start;
}
private:
// Scaling constant used to convert floating point ratios in size table
// to fixed point
static const int kShapeModelScale = 1000;
static const int kExpectedTokenCount = 10;
// Language properties
bool contextual_;
CharSet *char_set_;
// Size ratios table
vector<FontPairSizeInfo> font_pair_size_models_;
// Initialize the word size model object
bool Init(const string &data_file_path, const string &lang);
};
}
#endif // WORD_SIZE_MODEL_H

View File

@ -1,252 +0,0 @@
/**********************************************************************
* File: word_unigrams.cpp
* Description: Implementation of the Word Unigrams Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <math.h>
#include <string>
#include <vector>
#include <algorithm>
#include "const.h"
#include "cube_utils.h"
#include "ndminx.h"
#include "word_unigrams.h"
namespace tesseract {
WordUnigrams::WordUnigrams() {
costs_ = NULL;
words_ = NULL;
word_cnt_ = 0;
}
WordUnigrams::~WordUnigrams() {
if (words_ != NULL) {
if (words_[0] != NULL) {
delete []words_[0];
}
delete []words_;
words_ = NULL;
}
if (costs_ != NULL) {
delete []costs_;
}
}
/**
* Load the word-list and unigrams from file and create an object
* The word list is assumed to be sorted in lexicographic order.
*/
WordUnigrams *WordUnigrams::Create(const string &data_file_path,
const string &lang) {
string file_name;
string str;
file_name = data_file_path + lang;
file_name += ".cube.word-freq";
// load the string into memory
if (CubeUtils::ReadFileToString(file_name, &str) == false) {
return NULL;
}
// split into lines
vector<string> str_vec;
CubeUtils::SplitStringUsing(str, "\r\n \t", &str_vec);
if (str_vec.size() < 2) {
return NULL;
}
// allocate memory
WordUnigrams *word_unigrams_obj = new WordUnigrams();
int full_len = str.length();
int word_cnt = str_vec.size() / 2;
word_unigrams_obj->words_ = new char*[word_cnt];
word_unigrams_obj->costs_ = new int[word_cnt];
word_unigrams_obj->words_[0] = new char[full_len];
// construct sorted list of words and costs
word_unigrams_obj->word_cnt_ = 0;
char *char_buff = word_unigrams_obj->words_[0];
word_cnt = 0;
int max_cost = 0;
for (int wrd = 0; wrd < str_vec.size(); wrd += 2) {
word_unigrams_obj->words_[word_cnt] = char_buff;
strcpy(char_buff, str_vec[wrd].c_str());
char_buff += (str_vec[wrd].length() + 1);
if (sscanf(str_vec[wrd + 1].c_str(), "%d",
word_unigrams_obj->costs_ + word_cnt) != 1) {
fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error reading "
"word unigram data.\n");
delete word_unigrams_obj;
return NULL;
}
// update max cost
max_cost = MAX(max_cost, word_unigrams_obj->costs_[word_cnt]);
word_cnt++;
}
word_unigrams_obj->word_cnt_ = word_cnt;
// compute the not-in-list-cost by assuming that a word not in the list
// [ahmadab]: This can be computed as follows:
// - Given that the distribution of words follow Zipf's law:
// (F = K / (rank ^ S)), where s is slightly > 1.0
// - Number of words in the list is N
// - The mean frequency of a word that did not appear in the list is the
// area under the rest of the Zipf's curve divided by 2 (the mean)
// - The area would be the bound integral from N to infinity =
// (K * S) / (N ^ (S + 1)) ~= K / (N ^ 2)
// - Given that cost = -LOG(prob), the cost of an unlisted word would be
// = max_cost + 2*LOG(N)
word_unigrams_obj->not_in_list_cost_ = max_cost +
(2 * CubeUtils::Prob2Cost(1.0 / word_cnt));
// success
return word_unigrams_obj;
}
/**
* Split input into space-separated tokens, strip trailing punctuation
* from each, determine case properties, call UTF-8 flavor of cost
* function on each word, and aggregate all into single mean word
* cost.
*/
int WordUnigrams::Cost(const char_32 *key_str32,
LangModel *lang_mod,
CharSet *char_set) const {
if (!key_str32)
return 0;
// convert string to UTF8 to split into space-separated words
string key_str;
CubeUtils::UTF32ToUTF8(key_str32, &key_str);
vector<string> words;
CubeUtils::SplitStringUsing(key_str, " \t", &words);
// no words => no cost
if (words.empty()) {
return 0;
}
// aggregate the costs of all the words
int cost = 0;
for (int word_idx = 0; word_idx < words.size(); word_idx++) {
// convert each word back to UTF32 for analyzing case and punctuation
string_32 str32;
CubeUtils::UTF8ToUTF32(words[word_idx].c_str(), &str32);
int len = CubeUtils::StrLen(str32.c_str());
// strip all trailing punctuation
string clean_str;
int clean_len = len;
bool trunc = false;
while (clean_len > 0 &&
lang_mod->IsTrailingPunc(str32.c_str()[clean_len - 1])) {
--clean_len;
trunc = true;
}
// If either the original string was not truncated (no trailing
// punctuation) or the entire string was removed (all characters
// are trailing punctuation), evaluate original word as is;
// otherwise, copy all but the trailing punctuation characters
char_32 *clean_str32 = NULL;
if (clean_len == 0 || !trunc) {
clean_str32 = CubeUtils::StrDup(str32.c_str());
} else {
clean_str32 = new char_32[clean_len + 1];
for (int i = 0; i < clean_len; ++i) {
clean_str32[i] = str32[i];
}
clean_str32[clean_len] = '\0';
}
ASSERT_HOST(clean_str32 != NULL);
string str8;
CubeUtils::UTF32ToUTF8(clean_str32, &str8);
int word_cost = CostInternal(str8.c_str());
// if case invariant, get costs of all-upper-case and all-lower-case
// versions and return the min cost
if (clean_len >= kMinLengthNumOrCaseInvariant &&
CubeUtils::IsCaseInvariant(clean_str32, char_set)) {
char_32 *lower_32 = CubeUtils::ToLower(clean_str32, char_set);
if (lower_32) {
string lower_8;
CubeUtils::UTF32ToUTF8(lower_32, &lower_8);
word_cost = MIN(word_cost, CostInternal(lower_8.c_str()));
delete [] lower_32;
}
char_32 *upper_32 = CubeUtils::ToUpper(clean_str32, char_set);
if (upper_32) {
string upper_8;
CubeUtils::UTF32ToUTF8(upper_32, &upper_8);
word_cost = MIN(word_cost, CostInternal(upper_8.c_str()));
delete [] upper_32;
}
}
if (clean_len >= kMinLengthNumOrCaseInvariant) {
// if characters are all numeric, incur 0 word cost
bool is_numeric = true;
for (int i = 0; i < clean_len; ++i) {
if (!lang_mod->IsDigit(clean_str32[i]))
is_numeric = false;
}
if (is_numeric)
word_cost = 0;
}
delete [] clean_str32;
cost += word_cost;
} // word_idx
// return the mean cost
return static_cast<int>(cost / static_cast<double>(words.size()));
}
/**
* Search for UTF-8 string using binary search of sorted words_ array.
*/
int WordUnigrams::CostInternal(const char *key_str) const {
if (strlen(key_str) == 0)
return not_in_list_cost_;
int hi = word_cnt_ - 1;
int lo = 0;
while (lo <= hi) {
int current = (hi + lo) / 2;
int comp = strcmp(key_str, words_[current]);
// a match
if (comp == 0) {
return costs_[current];
}
if (comp < 0) {
// go lower
hi = current - 1;
} else {
// go higher
lo = current + 1;
}
}
return not_in_list_cost_;
}
} // namespace tesseract

View File

@ -1,69 +0,0 @@
/**********************************************************************
* File: word_unigrams.h
* Description: Declaration of the Word Unigrams Class
* Author: Ahmad Abdulkader
* Created: 2008
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
// The WordUnigram class holds the unigrams of the most frequent set of words
// in a language. It is an optional component of the Cube OCR engine. If
// present, the unigram cost of a word is aggregated with the other costs
// (Recognition, Language Model, Size) to compute a cost for a word.
// The word list is assumed to be sorted in lexicographic order.
#ifndef WORD_UNIGRAMS_H
#define WORD_UNIGRAMS_H
#include <string>
#include "char_set.h"
#include "lang_model.h"
namespace tesseract {
class WordUnigrams {
public:
WordUnigrams();
~WordUnigrams();
// Load the word-list and unigrams from file and create an object
// The word list is assumed to be sorted
static WordUnigrams *Create(const string &data_file_path,
const string &lang);
// Compute the unigram cost of a UTF-32 string. Splits into
// space-separated tokens, strips trailing punctuation from each
// token, evaluates case properties, and calls internal Cost()
// function on UTF-8 version. To avoid unnecessarily penalizing
// all-one-case words or capitalized words (first-letter
// upper-case and remaining letters lower-case) when not all
// versions of the word appear in the <lang>.cube.word-freq file, a
// case-invariant cost is computed in those cases, assuming the word
// meets a minimum length.
int Cost(const char_32 *str32, LangModel *lang_mod,
CharSet *char_set) const;
protected:
// Compute the word unigram cost of a UTF-8 string with binary
// search of sorted words_ array.
int CostInternal(const char *str) const;
private:
// Only words this length or greater qualify for all-numeric or
// case-invariant word unigram cost.
static const int kMinLengthNumOrCaseInvariant = 4;
int word_cnt_;
char **words_;
int *costs_;
int not_in_list_cost_;
};
}
#endif // WORD_UNIGRAMS_H

View File

@ -1,25 +0,0 @@
AM_CPPFLAGS += \
-DUSE_STD_NAMESPACE \
-I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \
-I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \
-I$(top_srcdir)/image -I$(top_srcdir)/viewer
if VISIBILITY
AM_CPPFLAGS += -DTESS_EXPORTS \
-fvisibility=hidden -fvisibility-inlines-hidden
endif
noinst_HEADERS = \
input_file_buffer.h neural_net.h neuron.h
if !USING_MULTIPLELIBS
noinst_LTLIBRARIES = libtesseract_neural.la
else
lib_LTLIBRARIES = libtesseract_neural.la
libtesseract_neural_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
endif
libtesseract_neural_la_SOURCES = \
input_file_buffer.cpp neural_net.cpp neuron.cpp sigmoid_table.cpp

View File

@ -1,45 +0,0 @@
// Copyright 2008 Google Inc.
// All Rights Reserved.
// Author: ahmadab@google.com (Ahmad Abdulkader)
//
// input_file_buffer.h: Declarations of a class for an object that
// represents an input file buffer.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "input_file_buffer.h"
namespace tesseract {
// default and only constructor
InputFileBuffer::InputFileBuffer(const string &file_name)
: file_name_(file_name) {
fp_ = NULL;
}
// virtual destructor
InputFileBuffer::~InputFileBuffer() {
if (fp_ != NULL) {
fclose(fp_);
}
}
// Read the specified number of bytes to the specified input buffer
int InputFileBuffer::Read(void *buffer, int bytes_to_read) {
// open the file if necessary
if (fp_ == NULL) {
fp_ = fopen(file_name_.c_str(), "rb");
if (fp_ == NULL) {
return 0;
}
}
return fread(buffer, 1, bytes_to_read, fp_);
}
}

View File

@ -1,40 +0,0 @@
// Copyright 2008 Google Inc.
// All Rights Reserved.
// Author: ahmadab@google.com (Ahmad Abdulkader)
//
// input_file_buffer.h: Declarations of a class for an object that
// represents an input file buffer.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef INPUT_FILE_BUFFER_H
#define INPUT_FILE_BUFFER_H
#include <stdio.h>
#include <string>
#ifdef USE_STD_NAMESPACE
using std::string;
#endif
namespace tesseract {
class InputFileBuffer {
public:
explicit InputFileBuffer(const string &file_name);
virtual ~InputFileBuffer();
int Read(void *buffer, int bytes_to_read);
protected:
string file_name_;
FILE *fp_;
};
}
#endif // INPUT_FILE_BUFFER_H__

View File

@ -1,308 +0,0 @@
// Copyright 2008 Google Inc.
// All Rights Reserved.
// Author: ahmadab@google.com (Ahmad Abdulkader)
//
// neural_net.cpp: Declarations of a class for an object that
// represents an arbitrary network of neurons
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <vector>
#include <string>
#include "neural_net.h"
#include "input_file_buffer.h"
namespace tesseract {
NeuralNet::NeuralNet() {
Init();
}
NeuralNet::~NeuralNet() {
// clean up the wts chunks vector
for (int vec = 0; vec < static_cast<int>(wts_vec_.size()); vec++) {
delete wts_vec_[vec];
}
// clean up neurons
delete []neurons_;
// clean up nodes
for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) {
delete []fast_nodes_[node_idx].inputs;
}
}
// Initiaization function
void NeuralNet::Init() {
read_only_ = true;
auto_encoder_ = false;
alloc_wgt_cnt_ = 0;
wts_cnt_ = 0;
neuron_cnt_ = 0;
in_cnt_ = 0;
out_cnt_ = 0;
wts_vec_.clear();
neurons_ = NULL;
inputs_mean_.clear();
inputs_std_dev_.clear();
inputs_min_.clear();
inputs_max_.clear();
}
// Does a fast feedforward for read_only nets
// Templatized for float and double Types
template <typename Type> bool NeuralNet::FastFeedForward(const Type *inputs,
Type *outputs) {
int node_idx = 0;
Node *node = &fast_nodes_[0];
// feed inputs in and offset them by the pre-computed bias
for (node_idx = 0; node_idx < in_cnt_; node_idx++, node++) {
node->out = inputs[node_idx] - node->bias;
}
// compute nodes activations and outputs
for (;node_idx < neuron_cnt_; node_idx++, node++) {
double activation = -node->bias;
for (int fan_in_idx = 0; fan_in_idx < node->fan_in_cnt; fan_in_idx++) {
activation += (node->inputs[fan_in_idx].input_weight *
node->inputs[fan_in_idx].input_node->out);
}
node->out = Neuron::Sigmoid(activation);
}
// copy the outputs to the output buffers
node = &fast_nodes_[neuron_cnt_ - out_cnt_];
for (node_idx = 0; node_idx < out_cnt_; node_idx++, node++) {
outputs[node_idx] = node->out;
}
return true;
}
// Performs a feedforward for general nets. Used mainly in training mode
// Templatized for float and double Types
template <typename Type> bool NeuralNet::FeedForward(const Type *inputs,
Type *outputs) {
// call the fast version in case of readonly nets
if (read_only_) {
return FastFeedForward(inputs, outputs);
}
// clear all neurons
Clear();
// for auto encoders, apply no input normalization
if (auto_encoder_) {
for (int in = 0; in < in_cnt_; in++) {
neurons_[in].set_output(inputs[in]);
}
} else {
// Input normalization : subtract mean and divide by stddev
for (int in = 0; in < in_cnt_; in++) {
neurons_[in].set_output((inputs[in] - inputs_min_[in]) /
(inputs_max_[in] - inputs_min_[in]));
neurons_[in].set_output((neurons_[in].output() - inputs_mean_[in]) /
inputs_std_dev_[in]);
}
}
// compute the net outputs: follow a pull model each output pulls the
// outputs of its input nodes and so on
for (int out = neuron_cnt_ - out_cnt_; out < neuron_cnt_; out++) {
neurons_[out].FeedForward();
// copy the values to the output buffer
outputs[out] = neurons_[out].output();
}
return true;
}
// Sets a connection between two neurons
bool NeuralNet::SetConnection(int from, int to) {
// allocate the wgt
float *wts = AllocWgt(1);
if (wts == NULL) {
return false;
}
// register the connection
neurons_[to].AddFromConnection(neurons_ + from, wts, 1);
return true;
}
// Create a fast readonly version of the net
bool NeuralNet::CreateFastNet() {
fast_nodes_.resize(neuron_cnt_);
// build the node structures
int wts_cnt = 0;
for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) {
Node *node = &fast_nodes_[node_idx];
if (neurons_[node_idx].node_type() == Neuron::Input) {
// Input neurons have no fan-in
node->fan_in_cnt = 0;
node->inputs = NULL;
// Input bias is the normalization offset computed from
// training input stats
if (fabs(inputs_max_[node_idx] - inputs_min_[node_idx]) <
kMinInputRange) {
// if the range approaches zero, the stdev is not defined,
// this indicates that this input does not change.
// Set the bias to zero
node->bias = 0.0f;
} else {
node->bias = inputs_min_[node_idx] + (inputs_mean_[node_idx] *
(inputs_max_[node_idx] - inputs_min_[node_idx]));
}
} else {
node->bias = neurons_[node_idx].bias();
node->fan_in_cnt = neurons_[node_idx].fan_in_cnt();
// allocate memory for fan-in nodes
node->inputs = new WeightedNode[node->fan_in_cnt];
for (int fan_in = 0; fan_in < node->fan_in_cnt; fan_in++) {
// identify fan-in neuron
const int id = neurons_[node_idx].fan_in(fan_in)->id();
// Feedback connections are not allowed and should never happen
if (id >= node_idx) {
return false;
}
// add the the fan-in neuron and its wgt
node->inputs[fan_in].input_node = &fast_nodes_[id];
float wgt_val = neurons_[node_idx].fan_in_wts(fan_in);
// for input neurons normalize the wgt by the input scaling
// values to save time during feedforward
if (neurons_[node_idx].fan_in(fan_in)->node_type() == Neuron::Input) {
// if the range approaches zero, the stdev is not defined,
// this indicates that this input does not change.
// Set the weight to zero
if (fabs(inputs_max_[id] - inputs_min_[id]) < kMinInputRange) {
wgt_val = 0.0f;
} else {
wgt_val /= ((inputs_max_[id] - inputs_min_[id]) *
inputs_std_dev_[id]);
}
}
node->inputs[fan_in].input_weight = wgt_val;
}
// incr wgt count to validate against at the end
wts_cnt += node->fan_in_cnt;
}
}
// sanity check
return wts_cnt_ == wts_cnt;
}
// returns a pointer to the requested set of weights
// Allocates in chunks
float * NeuralNet::AllocWgt(int wgt_cnt) {
// see if need to allocate a new chunk of wts
if (wts_vec_.size() == 0 || (alloc_wgt_cnt_ + wgt_cnt) > kWgtChunkSize) {
// add the new chunck to the wts_chunks vector
wts_vec_.push_back(new vector<float> (kWgtChunkSize));
alloc_wgt_cnt_ = 0;
}
float *ret_ptr = &((*wts_vec_.back())[alloc_wgt_cnt_]);
// incr usage counts
alloc_wgt_cnt_ += wgt_cnt;
wts_cnt_ += wgt_cnt;
return ret_ptr;
}
// create a new net object using an input file as a source
NeuralNet *NeuralNet::FromFile(const string file_name) {
// open the file
InputFileBuffer input_buff(file_name);
// create a new net object using input buffer
NeuralNet *net_obj = FromInputBuffer(&input_buff);
return net_obj;
}
// create a net object from an input buffer
NeuralNet *NeuralNet::FromInputBuffer(InputFileBuffer *ib) {
// create a new net object
NeuralNet *net_obj = new NeuralNet();
// load the net
if (!net_obj->ReadBinary(ib)) {
delete net_obj;
net_obj = NULL;
}
return net_obj;
}
// Compute the output of a specific output node.
// This function is useful for application that are interested in a single
// output of the net and do not want to waste time on the rest
// This is the fast-read-only version of this function
template <typename Type> bool NeuralNet::FastGetNetOutput(const Type *inputs,
int output_id,
Type *output) {
// feed inputs in and offset them by the pre-computed bias
int node_idx = 0;
Node *node = &fast_nodes_[0];
for (node_idx = 0; node_idx < in_cnt_; node_idx++, node++) {
node->out = inputs[node_idx] - node->bias;
}
// compute nodes' activations and outputs for hidden nodes if any
int hidden_node_cnt = neuron_cnt_ - out_cnt_;
for (;node_idx < hidden_node_cnt; node_idx++, node++) {
double activation = -node->bias;
for (int fan_in_idx = 0; fan_in_idx < node->fan_in_cnt; fan_in_idx++) {
activation += (node->inputs[fan_in_idx].input_weight *
node->inputs[fan_in_idx].input_node->out);
}
node->out = Neuron::Sigmoid(activation);
}
// compute the output of the required output node
node += output_id;
double activation = -node->bias;
for (int fan_in_idx = 0; fan_in_idx < node->fan_in_cnt; fan_in_idx++) {
activation += (node->inputs[fan_in_idx].input_weight *
node->inputs[fan_in_idx].input_node->out);
}
(*output) = Neuron::Sigmoid(activation);
return true;
}
// Performs a feedforward for general nets. Used mainly in training mode
// Templatized for float and double Types
template <typename Type> bool NeuralNet::GetNetOutput(const Type *inputs,
int output_id,
Type *output) {
// validate output id
if (output_id < 0 || output_id >= out_cnt_) {
return false;
}
// call the fast version in case of readonly nets
if (read_only_) {
return FastGetNetOutput(inputs, output_id, output);
}
// For the slow version, we'll just call FeedForward and return the
// appropriate output
vector<Type> outputs(out_cnt_);
if (!FeedForward(inputs, &outputs[0])) {
return false;
}
(*output) = outputs[output_id];
return true;
}
// Instantiate all supported templates now that the functions have been defined.
template bool NeuralNet::FeedForward(const float *inputs, float *outputs);
template bool NeuralNet::FeedForward(const double *inputs, double *outputs);
template bool NeuralNet::FastFeedForward(const float *inputs, float *outputs);
template bool NeuralNet::FastFeedForward(const double *inputs,
double *outputs);
template bool NeuralNet::GetNetOutput(const float *inputs, int output_id,
float *output);
template bool NeuralNet::GetNetOutput(const double *inputs, int output_id,
double *output);
template bool NeuralNet::FastGetNetOutput(const float *inputs, int output_id,
float *output);
template bool NeuralNet::FastGetNetOutput(const double *inputs, int output_id,
double *output);
template bool NeuralNet::ReadBinary(InputFileBuffer *input_buffer);
}

View File

@ -1,252 +0,0 @@
// Copyright 2008 Google Inc.
// All Rights Reserved.
// Author: ahmadab@google.com (Ahmad Abdulkader)
//
// neural_net.h: Declarations of a class for an object that
// represents an arbitrary network of neurons
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef NEURAL_NET_H
#define NEURAL_NET_H
#include <string>
#include <vector>
#include "neuron.h"
#include "input_file_buffer.h"
namespace tesseract {
// Minimum input range below which we set the input weight to zero
static const float kMinInputRange = 1e-6f;
class NeuralNet {
public:
NeuralNet();
virtual ~NeuralNet();
// create a net object from a file. Uses stdio
static NeuralNet *FromFile(const string file_name);
// create a net object from an input buffer
static NeuralNet *FromInputBuffer(InputFileBuffer *ib);
// Different flavors of feed forward function
template <typename Type> bool FeedForward(const Type *inputs,
Type *outputs);
// Compute the output of a specific output node.
// This function is useful for application that are interested in a single
// output of the net and do not want to waste time on the rest
template <typename Type> bool GetNetOutput(const Type *inputs,
int output_id,
Type *output);
// Accessor functions
int in_cnt() const { return in_cnt_; }
int out_cnt() const { return out_cnt_; }
protected:
struct Node;
// A node-weight pair
struct WeightedNode {
Node *input_node;
float input_weight;
};
// node struct used for fast feedforward in
// Read only nets
struct Node {
float out;
float bias;
int fan_in_cnt;
WeightedNode *inputs;
};
// Read-Only flag (no training: On by default)
// will presumeably be set to false by
// the inherting TrainableNeuralNet class
bool read_only_;
// input count
int in_cnt_;
// output count
int out_cnt_;
// Total neuron count (including inputs)
int neuron_cnt_;
// count of unique weights
int wts_cnt_;
// Neuron vector
Neuron *neurons_;
// size of allocated weight chunk (in weights)
// This is basically the size of the biggest network
// that I have trained. However, the class will allow
// a bigger sized net if desired
static const int kWgtChunkSize = 0x10000;
// Magic number expected at the beginning of the NN
// binary file
static const unsigned int kNetSignature = 0xFEFEABD0;
// count of allocated wgts in the last chunk
int alloc_wgt_cnt_;
// vector of weights buffers
vector<vector<float> *>wts_vec_;
// Is the net an auto-encoder type
bool auto_encoder_;
// vector of input max values
vector<float> inputs_max_;
// vector of input min values
vector<float> inputs_min_;
// vector of input mean values
vector<float> inputs_mean_;
// vector of input standard deviation values
vector<float> inputs_std_dev_;
// vector of input offsets used by fast read-only
// feedforward function
vector<Node> fast_nodes_;
// Network Initialization function
void Init();
// Clears all neurons
void Clear() {
for (int node = 0; node < neuron_cnt_; node++) {
neurons_[node].Clear();
}
}
// Reads the net from an input buffer
template<class ReadBuffType> bool ReadBinary(ReadBuffType *input_buff) {
// Init vars
Init();
// is this an autoencoder
unsigned int read_val;
unsigned int auto_encode;
// read and verify signature
if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
return false;
}
if (read_val != kNetSignature) {
return false;
}
if (input_buff->Read(&auto_encode, sizeof(auto_encode)) !=
sizeof(auto_encode)) {
return false;
}
auto_encoder_ = auto_encode;
// read and validate total # of nodes
if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
return false;
}
neuron_cnt_ = read_val;
if (neuron_cnt_ <= 0) {
return false;
}
// set the size of the neurons vector
neurons_ = new Neuron[neuron_cnt_];
// read & validate inputs
if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
return false;
}
in_cnt_ = read_val;
if (in_cnt_ <= 0) {
return false;
}
// read outputs
if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
return false;
}
out_cnt_ = read_val;
if (out_cnt_ <= 0) {
return false;
}
// set neuron ids and types
for (int idx = 0; idx < neuron_cnt_; idx++) {
neurons_[idx].set_id(idx);
// input type
if (idx < in_cnt_) {
neurons_[idx].set_node_type(Neuron::Input);
} else if (idx >= (neuron_cnt_ - out_cnt_)) {
neurons_[idx].set_node_type(Neuron::Output);
} else {
neurons_[idx].set_node_type(Neuron::Hidden);
}
}
// read the connections
for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) {
// read fanout
if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
return false;
}
// read the neuron's info
int fan_out_cnt = read_val;
for (int fan_out_idx = 0; fan_out_idx < fan_out_cnt; fan_out_idx++) {
// read the neuron id
if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
return false;
}
// create the connection
if (!SetConnection(node_idx, read_val)) {
return false;
}
}
}
// read all the neurons' fan-in connections
for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) {
// read
if (!neurons_[node_idx].ReadBinary(input_buff)) {
return false;
}
}
// size input stats vector to expected input size
inputs_mean_.resize(in_cnt_);
inputs_std_dev_.resize(in_cnt_);
inputs_min_.resize(in_cnt_);
inputs_max_.resize(in_cnt_);
// read stats
if (input_buff->Read(&(inputs_mean_.front()),
sizeof(inputs_mean_[0]) * in_cnt_) !=
sizeof(inputs_mean_[0]) * in_cnt_) {
return false;
}
if (input_buff->Read(&(inputs_std_dev_.front()),
sizeof(inputs_std_dev_[0]) * in_cnt_) !=
sizeof(inputs_std_dev_[0]) * in_cnt_) {
return false;
}
if (input_buff->Read(&(inputs_min_.front()),
sizeof(inputs_min_[0]) * in_cnt_) !=
sizeof(inputs_min_[0]) * in_cnt_) {
return false;
}
if (input_buff->Read(&(inputs_max_.front()),
sizeof(inputs_max_[0]) * in_cnt_) !=
sizeof(inputs_max_[0]) * in_cnt_) {
return false;
}
// create a readonly version for fast feedforward
if (read_only_) {
return CreateFastNet();
}
return true;
}
// creates a connection between two nodes
bool SetConnection(int from, int to);
// Create a read only version of the net that
// has faster feedforward performance
bool CreateFastNet();
// internal function to allocate a new set of weights
// Centralized weight allocation attempts to increase
// weights locality of reference making it more cache friendly
float *AllocWgt(int wgt_cnt);
// different flavors read-only feedforward function
template <typename Type> bool FastFeedForward(const Type *inputs,
Type *outputs);
// Compute the output of a specific output node.
// This function is useful for application that are interested in a single
// output of the net and do not want to waste time on the rest
// This is the fast-read-only version of this function
template <typename Type> bool FastGetNetOutput(const Type *inputs,
int output_id,
Type *output);
};
}
#endif // NEURAL_NET_H__

View File

@ -1,103 +0,0 @@
// Copyright 2008 Google Inc.
// All Rights Reserved.
// Author: ahmadab@google.com (Ahmad Abdulkader)
//
// neuron.cpp: The implementation of a class for an object
// that represents a single neuron in a neural network
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "neuron.h"
#include "input_file_buffer.h"
namespace tesseract {
// Instantiate all supported templates
template bool Neuron::ReadBinary(InputFileBuffer *input_buffer);
// default and only constructor
Neuron::Neuron() {
Init();
}
// virtual destructor
Neuron::~Neuron() {
}
// Initializer
void Neuron::Init() {
id_ = -1;
frwd_dirty_ = false;
fan_in_.clear();
fan_in_weights_.clear();
activation_ = 0.0f;
output_ = 0.0f;
bias_ = 0.0f;
node_type_ = Unknown;
}
// Computes the activation and output of the neuron if not fresh
// by pulling the outputs of all fan-in neurons
void Neuron::FeedForward() {
if (!frwd_dirty_ ) {
return;
}
// nothing to do for input nodes: just pass the input to the o/p
// otherwise, pull the output of all fan-in neurons
if (node_type_ != Input) {
int fan_in_cnt = fan_in_.size();
// sum out the activation
activation_ = -bias_;
for (int in = 0; in < fan_in_cnt; in++) {
if (fan_in_[in]->frwd_dirty_) {
fan_in_[in]->FeedForward();
}
activation_ += ((*(fan_in_weights_[in])) * fan_in_[in]->output_);
}
// sigmoid it
output_ = Sigmoid(activation_);
}
frwd_dirty_ = false;
}
// set the type of the neuron
void Neuron::set_node_type(NeuronTypes Type) {
node_type_ = Type;
}
// Adds new connections *to* this neuron *From*
// a target neuron using specfied params
// Note that what is actually copied in this function are pointers to the
// specified Neurons and weights and not the actualt values. This is by
// design to centralize the alloction of neurons and weights and so
// increase the locality of reference and improve cache-hits resulting
// in a faster net. This technique resulted in a 2X-10X speedup
// (depending on network size and processor)
void Neuron::AddFromConnection(Neuron *neurons,
float *wts_offset,
int from_cnt) {
for (int in = 0; in < from_cnt; in++) {
fan_in_.push_back(neurons + in);
fan_in_weights_.push_back(wts_offset + in);
}
}
// fast computation of sigmoid function using a lookup table
// defined in sigmoid_table.cpp
float Neuron::Sigmoid(float activation) {
if (activation <= -10.0f) {
return 0.0f;
} else if (activation >= 10.0f) {
return 1.0f;
} else {
return kSigmoidTable[static_cast<int>(100 * (activation + 10.0))];
}
}
}

View File

@ -1,156 +0,0 @@
// Copyright 2008 Google Inc.
// All Rights Reserved.
// Author: ahmadab@google.com (Ahmad Abdulkader)
//
// neuron.h: Declarations of a class for an object that
// represents a single neuron in a neural network
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef NEURON_H
#define NEURON_H
#include <math.h>
#include <vector>
#ifdef USE_STD_NAMESPACE
using std::vector;
#endif
namespace tesseract {
// Input Node bias values
static const float kInputNodeBias = 0.0f;
class Neuron {
public:
// Types of nodes
enum NeuronTypes {
Unknown = 0,
Input,
Hidden,
Output
};
Neuron();
~Neuron();
// set the forward dirty flag indicating that the
// activation of the net is not fresh
void Clear() {
frwd_dirty_ = true;
}
// Read a binary representation of the neuron info from
// an input buffer.
template <class BuffType> bool ReadBinary(BuffType *input_buff) {
float val;
if (input_buff->Read(&val, sizeof(val)) != sizeof(val)) {
return false;
}
// input nodes should have no biases
if (node_type_ == Input) {
bias_ = kInputNodeBias;
} else {
bias_ = val;
}
// read fanin count
int fan_in_cnt;
if (input_buff->Read(&fan_in_cnt, sizeof(fan_in_cnt)) !=
sizeof(fan_in_cnt)) {
return false;
}
// validate fan-in cnt
if (fan_in_cnt != fan_in_.size()) {
return false;
}
// read the weights
for (int in = 0; in < fan_in_cnt; in++) {
if (input_buff->Read(&val, sizeof(val)) != sizeof(val)) {
return false;
}
*(fan_in_weights_[in]) = val;
}
return true;
}
// Add a new connection from this neuron *From*
// a target neuron using specfied params
// Note that what is actually copied in this function are pointers to the
// specified Neurons and weights and not the actualt values. This is by
// design to centralize the alloction of neurons and weights and so
// increase the locality of reference and improve cache-hits resulting
// in a faster net. This technique resulted in a 2X-10X speedup
// (depending on network size and processor)
void AddFromConnection(Neuron *neuron_vec,
float *wts_offset,
int from_cnt);
// Set the type of a neuron
void set_node_type(NeuronTypes type);
// Computes the output of the node by
// "pulling" the output of the fan-in nodes
void FeedForward();
// fast computation of sigmoid function using a lookup table
// defined in sigmoid_table.cpp
static float Sigmoid(float activation);
// Accessor functions
float output() const {
return output_;
}
void set_output(float out_val) {
output_ = out_val;
}
int id() const {
return id_;
}
int fan_in_cnt() const {
return fan_in_.size();
}
Neuron * fan_in(int idx) const {
return fan_in_[idx];
}
float fan_in_wts(int idx) const {
return *(fan_in_weights_[idx]);
}
void set_id(int id) {
id_ = id;
}
float bias() const {
return bias_;
}
Neuron::NeuronTypes node_type() const {
return node_type_;
}
protected:
// Type of Neuron
NeuronTypes node_type_;
// unqique id of the neuron
int id_;
// node bias
float bias_;
// node net activation
float activation_;
// node output
float output_;
// pointers to fanin nodes
vector<Neuron *> fan_in_;
// pointers to fanin weights
vector<float *> fan_in_weights_;
// Sigmoid function lookup table used for fast computation
// of sigmoid function
static const float kSigmoidTable[];
// flag determining if the activation of the node
// is fresh or not (dirty)
bool frwd_dirty_;
// Initializer
void Init();
};
}
#endif // NEURON_H__

View File

@ -1,523 +0,0 @@
// Copyright 2007 Google Inc.
// All Rights Reserved.
// Author: ahmadab@google.com (Ahmad Abdulkader)
//
// sigmoid_table.cpp: Sigmoid function lookup table
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "neuron.h"
namespace tesseract {
const float Neuron::kSigmoidTable[] = {
4.53979E-05f, 4.58541E-05f, 4.63149E-05f, 4.67804E-05f,
4.72505E-05f, 4.77254E-05f, 4.8205E-05f, 4.86894E-05f,
4.91787E-05f, 4.9673E-05f, 5.01722E-05f, 5.06764E-05f,
5.11857E-05f, 5.17001E-05f, 5.22196E-05f, 5.27444E-05f,
5.32745E-05f, 5.38099E-05f, 5.43506E-05f, 5.48968E-05f,
5.54485E-05f, 5.60058E-05f, 5.65686E-05f, 5.71371E-05f,
5.77113E-05f, 5.82913E-05f, 5.88771E-05f, 5.94688E-05f,
6.00664E-05f, 6.067E-05f, 6.12797E-05f, 6.18956E-05f,
6.25176E-05f, 6.31459E-05f, 6.37805E-05f, 6.44214E-05f,
6.50688E-05f, 6.57227E-05f, 6.63832E-05f, 6.70503E-05f,
6.77241E-05f, 6.84047E-05f, 6.90922E-05f, 6.97865E-05f,
7.04878E-05f, 7.11962E-05f, 7.19117E-05f, 7.26343E-05f,
7.33643E-05f, 7.41016E-05f, 7.48462E-05f, 7.55984E-05f,
7.63581E-05f, 7.71255E-05f, 7.79005E-05f, 7.86834E-05f,
7.94741E-05f, 8.02728E-05f, 8.10794E-05f, 8.18942E-05f,
8.27172E-05f, 8.35485E-05f, 8.43881E-05f, 8.52361E-05f,
8.60927E-05f, 8.69579E-05f, 8.78317E-05f, 8.87144E-05f,
8.96059E-05f, 9.05064E-05f, 9.14159E-05f, 9.23345E-05f,
9.32624E-05f, 9.41996E-05f, 9.51463E-05f, 9.61024E-05f,
9.70682E-05f, 9.80436E-05f, 9.90289E-05f, 0.000100024f,
0.000101029f, 0.000102044f, 0.00010307f, 0.000104106f,
0.000105152f, 0.000106209f, 0.000107276f, 0.000108354f,
0.000109443f, 0.000110542f, 0.000111653f, 0.000112775f,
0.000113909f, 0.000115053f, 0.000116209f, 0.000117377f,
0.000118557f, 0.000119748f, 0.000120951f, 0.000122167f,
0.000123395f, 0.000124635f, 0.000125887f, 0.000127152f,
0.00012843f, 0.00012972f, 0.000131024f, 0.000132341f,
0.00013367f, 0.000135014f, 0.00013637f, 0.000137741f,
0.000139125f, 0.000140523f, 0.000141935f, 0.000143361f,
0.000144802f, 0.000146257f, 0.000147727f, 0.000149211f,
0.00015071f, 0.000152225f, 0.000153754f, 0.000155299f,
0.00015686f, 0.000158436f, 0.000160028f, 0.000161636f,
0.000163261f, 0.000164901f, 0.000166558f, 0.000168232f,
0.000169922f, 0.00017163f, 0.000173354f, 0.000175096f,
0.000176856f, 0.000178633f, 0.000180428f, 0.000182241f,
0.000184072f, 0.000185922f, 0.00018779f, 0.000189677f,
0.000191583f, 0.000193508f, 0.000195452f, 0.000197416f,
0.0001994f, 0.000201403f, 0.000203427f, 0.000205471f,
0.000207536f, 0.000209621f, 0.000211727f, 0.000213855f,
0.000216003f, 0.000218174f, 0.000220366f, 0.00022258f,
0.000224817f, 0.000227076f, 0.000229357f, 0.000231662f,
0.00023399f, 0.000236341f, 0.000238715f, 0.000241114f,
0.000243537f, 0.000245984f, 0.000248455f, 0.000250951f,
0.000253473f, 0.00025602f, 0.000258592f, 0.00026119f,
0.000263815f, 0.000266465f, 0.000269143f, 0.000271847f,
0.000274578f, 0.000277337f, 0.000280123f, 0.000282938f,
0.000285781f, 0.000288652f, 0.000291552f, 0.000294481f,
0.00029744f, 0.000300429f, 0.000303447f, 0.000306496f,
0.000309575f, 0.000312685f, 0.000315827f, 0.000319f,
0.000322205f, 0.000325442f, 0.000328712f, 0.000332014f,
0.00033535f, 0.000338719f, 0.000342122f, 0.00034556f,
0.000349031f, 0.000352538f, 0.00035608f, 0.000359657f,
0.00036327f, 0.00036692f, 0.000370606f, 0.000374329f,
0.00037809f, 0.000381888f, 0.000385725f, 0.0003896f,
0.000393514f, 0.000397467f, 0.00040146f, 0.000405494f,
0.000409567f, 0.000413682f, 0.000417838f, 0.000422035f,
0.000426275f, 0.000430557f, 0.000434882f, 0.000439251f,
0.000443664f, 0.000448121f, 0.000452622f, 0.000457169f,
0.000461762f, 0.0004664f, 0.000471085f, 0.000475818f,
0.000480597f, 0.000485425f, 0.000490301f, 0.000495226f,
0.000500201f, 0.000505226f, 0.000510301f, 0.000515427f,
0.000520604f, 0.000525833f, 0.000531115f, 0.00053645f,
0.000541839f, 0.000547281f, 0.000552779f, 0.000558331f,
0.000563939f, 0.000569604f, 0.000575325f, 0.000581104f,
0.00058694f, 0.000592836f, 0.00059879f, 0.000604805f,
0.000610879f, 0.000617015f, 0.000623212f, 0.000629472f,
0.000635794f, 0.00064218f, 0.00064863f, 0.000655144f,
0.000661724f, 0.00066837f, 0.000675083f, 0.000681863f,
0.000688711f, 0.000695628f, 0.000702614f, 0.00070967f,
0.000716798f, 0.000723996f, 0.000731267f, 0.000738611f,
0.000746029f, 0.000753521f, 0.000761088f, 0.000768731f,
0.000776451f, 0.000784249f, 0.000792124f, 0.000800079f,
0.000808113f, 0.000816228f, 0.000824425f, 0.000832703f,
0.000841065f, 0.000849511f, 0.000858041f, 0.000866657f,
0.00087536f, 0.000884149f, 0.000893027f, 0.000901994f,
0.000911051f, 0.000920199f, 0.000929439f, 0.000938771f,
0.000948197f, 0.000957717f, 0.000967333f, 0.000977045f,
0.000986855f, 0.000996763f, 0.001006771f, 0.001016879f,
0.001027088f, 0.0010374f, 0.001047815f, 0.001058334f,
0.00106896f, 0.001079691f, 0.00109053f, 0.001101478f,
0.001112536f, 0.001123705f, 0.001134985f, 0.001146379f,
0.001157887f, 0.00116951f, 0.00118125f, 0.001193108f,
0.001205084f, 0.001217181f, 0.001229399f, 0.001241739f,
0.001254203f, 0.001266792f, 0.001279507f, 0.00129235f,
0.001305321f, 0.001318423f, 0.001331655f, 0.001345021f,
0.00135852f, 0.001372155f, 0.001385926f, 0.001399835f,
0.001413884f, 0.001428073f, 0.001442405f, 0.00145688f,
0.001471501f, 0.001486267f, 0.001501182f, 0.001516247f,
0.001531462f, 0.001546829f, 0.001562351f, 0.001578028f,
0.001593862f, 0.001609855f, 0.001626008f, 0.001642323f,
0.001658801f, 0.001675444f, 0.001692254f, 0.001709233f,
0.001726381f, 0.001743701f, 0.001761195f, 0.001778864f,
0.00179671f, 0.001814734f, 0.001832939f, 0.001851326f,
0.001869898f, 0.001888655f, 0.0019076f, 0.001926735f,
0.001946061f, 0.001965581f, 0.001985296f, 0.002005209f,
0.00202532f, 0.002045634f, 0.00206615f, 0.002086872f,
0.002107801f, 0.00212894f, 0.00215029f, 0.002171854f,
0.002193633f, 0.002215631f, 0.002237849f, 0.002260288f,
0.002282953f, 0.002305844f, 0.002328964f, 0.002352316f,
0.002375901f, 0.002399721f, 0.002423781f, 0.00244808f,
0.002472623f, 0.002497411f, 0.002522447f, 0.002547734f,
0.002573273f, 0.002599068f, 0.00262512f, 0.002651433f,
0.002678009f, 0.002704851f, 0.002731961f, 0.002759342f,
0.002786996f, 0.002814927f, 0.002843137f, 0.002871629f,
0.002900406f, 0.00292947f, 0.002958825f, 0.002988472f,
0.003018416f, 0.003048659f, 0.003079205f, 0.003110055f,
0.003141213f, 0.003172683f, 0.003204467f, 0.003236568f,
0.00326899f, 0.003301735f, 0.003334807f, 0.00336821f,
0.003401946f, 0.003436018f, 0.003470431f, 0.003505187f,
0.00354029f, 0.003575744f, 0.003611551f, 0.003647715f,
0.00368424f, 0.003721129f, 0.003758387f, 0.003796016f,
0.00383402f, 0.003872403f, 0.00391117f, 0.003950322f,
0.003989865f, 0.004029802f, 0.004070138f, 0.004110875f,
0.004152019f, 0.004193572f, 0.00423554f, 0.004277925f,
0.004320734f, 0.004363968f, 0.004407633f, 0.004451734f,
0.004496273f, 0.004541256f, 0.004586687f, 0.004632571f,
0.004678911f, 0.004725713f, 0.00477298f, 0.004820718f,
0.004868931f, 0.004917624f, 0.004966802f, 0.005016468f,
0.005066629f, 0.005117289f, 0.005168453f, 0.005220126f,
0.005272312f, 0.005325018f, 0.005378247f, 0.005432006f,
0.005486299f, 0.005541132f, 0.005596509f, 0.005652437f,
0.005708921f, 0.005765966f, 0.005823577f, 0.005881761f,
0.005940522f, 0.005999867f, 0.006059801f, 0.006120331f,
0.006181461f, 0.006243198f, 0.006305547f, 0.006368516f,
0.006432108f, 0.006496332f, 0.006561193f, 0.006626697f,
0.006692851f, 0.006759661f, 0.006827132f, 0.006895273f,
0.006964089f, 0.007033587f, 0.007103774f, 0.007174656f,
0.00724624f, 0.007318533f, 0.007391541f, 0.007465273f,
0.007539735f, 0.007614933f, 0.007690876f, 0.00776757f,
0.007845023f, 0.007923242f, 0.008002235f, 0.008082009f,
0.008162571f, 0.00824393f, 0.008326093f, 0.008409068f,
0.008492863f, 0.008577485f, 0.008662944f, 0.008749246f,
0.0088364f, 0.008924415f, 0.009013299f, 0.009103059f,
0.009193705f, 0.009285246f, 0.009377689f, 0.009471044f,
0.009565319f, 0.009660523f, 0.009756666f, 0.009853756f,
0.009951802f, 0.010050814f, 0.010150801f, 0.010251772f,
0.010353738f, 0.010456706f, 0.010560688f, 0.010665693f,
0.01077173f, 0.01087881f, 0.010986943f, 0.011096138f,
0.011206406f, 0.011317758f, 0.011430203f, 0.011543752f,
0.011658417f, 0.011774206f, 0.011891132f, 0.012009204f,
0.012128435f, 0.012248835f, 0.012370415f, 0.012493186f,
0.012617161f, 0.012742349f, 0.012868764f, 0.012996417f,
0.013125318f, 0.013255481f, 0.013386918f, 0.01351964f,
0.013653659f, 0.013788989f, 0.01392564f, 0.014063627f,
0.014202961f, 0.014343656f, 0.014485724f, 0.014629178f,
0.014774032f, 0.014920298f, 0.01506799f, 0.015217121f,
0.015367706f, 0.015519757f, 0.015673288f, 0.015828314f,
0.015984848f, 0.016142905f, 0.016302499f, 0.016463645f,
0.016626356f, 0.016790648f, 0.016956536f, 0.017124033f,
0.017293157f, 0.01746392f, 0.01763634f, 0.017810432f,
0.01798621f, 0.018163691f, 0.018342891f, 0.018523825f,
0.01870651f, 0.018890962f, 0.019077197f, 0.019265233f,
0.019455085f, 0.01964677f, 0.019840306f, 0.020035709f,
0.020232997f, 0.020432187f, 0.020633297f, 0.020836345f,
0.021041347f, 0.021248323f, 0.02145729f, 0.021668266f,
0.021881271f, 0.022096322f, 0.022313439f, 0.022532639f,
0.022753943f, 0.02297737f, 0.023202938f, 0.023430668f,
0.023660578f, 0.023892689f, 0.024127021f, 0.024363594f,
0.024602428f, 0.024843544f, 0.025086962f, 0.025332703f,
0.025580788f, 0.025831239f, 0.026084075f, 0.02633932f,
0.026596994f, 0.026857119f, 0.027119717f, 0.027384811f,
0.027652422f, 0.027922574f, 0.028195288f, 0.028470588f,
0.028748496f, 0.029029036f, 0.029312231f, 0.029598104f,
0.02988668f, 0.030177981f, 0.030472033f, 0.030768859f,
0.031068484f, 0.031370932f, 0.031676228f, 0.031984397f,
0.032295465f, 0.032609455f, 0.032926395f, 0.033246309f,
0.033569223f, 0.033895164f, 0.034224158f, 0.03455623f,
0.034891409f, 0.035229719f, 0.035571189f, 0.035915846f,
0.036263716f, 0.036614828f, 0.036969209f, 0.037326887f,
0.037687891f, 0.038052247f, 0.038419986f, 0.038791134f,
0.039165723f, 0.03954378f, 0.039925334f, 0.040310415f,
0.040699054f, 0.041091278f, 0.041487119f, 0.041886607f,
0.042289772f, 0.042696644f, 0.043107255f, 0.043521635f,
0.043939815f, 0.044361828f, 0.044787703f, 0.045217473f,
0.045651171f, 0.046088827f, 0.046530475f, 0.046976146f,
0.047425873f, 0.04787969f, 0.048337629f, 0.048799723f,
0.049266006f, 0.049736512f, 0.050211273f, 0.050690325f,
0.051173701f, 0.051661435f, 0.052153563f, 0.052650118f,
0.053151136f, 0.053656652f, 0.0541667f, 0.054681317f,
0.055200538f, 0.055724398f, 0.056252934f, 0.056786181f,
0.057324176f, 0.057866955f, 0.058414556f, 0.058967013f,
0.059524366f, 0.06008665f, 0.060653903f, 0.061226163f,
0.061803466f, 0.062385851f, 0.062973356f, 0.063566018f,
0.064163876f, 0.064766969f, 0.065375333f, 0.065989009f,
0.066608036f, 0.067232451f, 0.067862294f, 0.068497604f,
0.06913842f, 0.069784783f, 0.070436731f, 0.071094304f,
0.071757542f, 0.072426485f, 0.073101173f, 0.073781647f,
0.074467945f, 0.075160109f, 0.07585818f, 0.076562197f,
0.077272202f, 0.077988235f, 0.078710337f, 0.079438549f,
0.080172912f, 0.080913467f, 0.081660255f, 0.082413318f,
0.083172696f, 0.083938432f, 0.084710566f, 0.085489139f,
0.086274194f, 0.087065772f, 0.087863915f, 0.088668663f,
0.089480059f, 0.090298145f, 0.091122961f, 0.09195455f,
0.092792953f, 0.093638212f, 0.094490369f, 0.095349465f,
0.096215542f, 0.097088641f, 0.097968804f, 0.098856073f,
0.099750489f, 0.100652094f, 0.101560928f, 0.102477033f,
0.103400451f, 0.104331223f, 0.10526939f, 0.106214992f,
0.10716807f, 0.108128667f, 0.109096821f, 0.110072574f,
0.111055967f, 0.112047039f, 0.11304583f, 0.114052381f,
0.115066732f, 0.116088922f, 0.117118991f, 0.118156978f,
0.119202922f, 0.120256862f, 0.121318838f, 0.122388887f,
0.123467048f, 0.124553358f, 0.125647857f, 0.12675058f,
0.127861566f, 0.128980852f, 0.130108474f, 0.131244469f,
0.132388874f, 0.133541723f, 0.134703052f, 0.135872897f,
0.137051293f, 0.138238273f, 0.139433873f, 0.140638126f,
0.141851065f, 0.143072723f, 0.144303134f, 0.145542329f,
0.14679034f, 0.148047198f, 0.149312935f, 0.15058758f,
0.151871164f, 0.153163716f, 0.154465265f, 0.15577584f,
0.157095469f, 0.158424179f, 0.159761997f, 0.16110895f,
0.162465063f, 0.163830361f, 0.16520487f, 0.166588614f,
0.167981615f, 0.169383897f, 0.170795482f, 0.172216392f,
0.173646647f, 0.175086268f, 0.176535275f, 0.177993686f,
0.179461519f, 0.180938793f, 0.182425524f, 0.183921727f,
0.185427419f, 0.186942614f, 0.188467325f, 0.190001566f,
0.191545349f, 0.193098684f, 0.194661584f, 0.196234056f,
0.197816111f, 0.199407757f, 0.201009f, 0.202619846f,
0.204240302f, 0.205870372f, 0.207510059f, 0.209159365f,
0.210818293f, 0.212486844f, 0.214165017f, 0.215852811f,
0.217550224f, 0.219257252f, 0.220973892f, 0.222700139f,
0.224435986f, 0.226181426f, 0.227936451f, 0.229701051f,
0.231475217f, 0.233258936f, 0.235052196f, 0.236854984f,
0.238667285f, 0.240489083f, 0.242320361f, 0.244161101f,
0.246011284f, 0.247870889f, 0.249739894f, 0.251618278f,
0.253506017f, 0.255403084f, 0.257309455f, 0.259225101f,
0.261149994f, 0.263084104f, 0.265027401f, 0.266979851f,
0.268941421f, 0.270912078f, 0.272891784f, 0.274880502f,
0.276878195f, 0.278884822f, 0.280900343f, 0.282924715f,
0.284957894f, 0.286999837f, 0.289050497f, 0.291109827f,
0.293177779f, 0.295254302f, 0.297339346f, 0.299432858f,
0.301534784f, 0.30364507f, 0.30576366f, 0.307890496f,
0.310025519f, 0.312168669f, 0.314319886f, 0.316479106f,
0.318646266f, 0.320821301f, 0.323004144f, 0.325194727f,
0.327392983f, 0.32959884f, 0.331812228f, 0.334033073f,
0.336261303f, 0.338496841f, 0.340739612f, 0.342989537f,
0.345246539f, 0.347510538f, 0.349781451f, 0.352059198f,
0.354343694f, 0.356634854f, 0.358932594f, 0.361236825f,
0.36354746f, 0.365864409f, 0.368187582f, 0.370516888f,
0.372852234f, 0.375193526f, 0.377540669f, 0.379893568f,
0.382252125f, 0.384616244f, 0.386985824f, 0.389360766f,
0.391740969f, 0.394126332f, 0.39651675f, 0.398912121f,
0.40131234f, 0.403717301f, 0.406126897f, 0.408541022f,
0.410959566f, 0.413382421f, 0.415809477f, 0.418240623f,
0.420675748f, 0.423114739f, 0.425557483f, 0.428003867f,
0.430453776f, 0.432907095f, 0.435363708f, 0.437823499f,
0.440286351f, 0.442752145f, 0.445220765f, 0.44769209f,
0.450166003f, 0.452642382f, 0.455121108f, 0.457602059f,
0.460085115f, 0.462570155f, 0.465057055f, 0.467545694f,
0.470035948f, 0.472527696f, 0.475020813f, 0.477515175f,
0.48001066f, 0.482507142f, 0.485004498f, 0.487502604f,
0.490001333f, 0.492500562f, 0.495000167f, 0.497500021f,
0.5f, 0.502499979f, 0.504999833f, 0.507499438f,
0.509998667f, 0.512497396f, 0.514995502f, 0.517492858f,
0.51998934f, 0.522484825f, 0.524979187f, 0.527472304f,
0.529964052f, 0.532454306f, 0.534942945f, 0.537429845f,
0.539914885f, 0.542397941f, 0.544878892f, 0.547357618f,
0.549833997f, 0.55230791f, 0.554779235f, 0.557247855f,
0.559713649f, 0.562176501f, 0.564636292f, 0.567092905f,
0.569546224f, 0.571996133f, 0.574442517f, 0.576885261f,
0.579324252f, 0.581759377f, 0.584190523f, 0.586617579f,
0.589040434f, 0.591458978f, 0.593873103f, 0.596282699f,
0.59868766f, 0.601087879f, 0.60348325f, 0.605873668f,
0.608259031f, 0.610639234f, 0.613014176f, 0.615383756f,
0.617747875f, 0.620106432f, 0.622459331f, 0.624806474f,
0.627147766f, 0.629483112f, 0.631812418f, 0.634135591f,
0.63645254f, 0.638763175f, 0.641067406f, 0.643365146f,
0.645656306f, 0.647940802f, 0.650218549f, 0.652489462f,
0.654753461f, 0.657010463f, 0.659260388f, 0.661503159f,
0.663738697f, 0.665966927f, 0.668187772f, 0.67040116f,
0.672607017f, 0.674805273f, 0.676995856f, 0.679178699f,
0.681353734f, 0.683520894f, 0.685680114f, 0.687831331f,
0.689974481f, 0.692109504f, 0.69423634f, 0.69635493f,
0.698465216f, 0.700567142f, 0.702660654f, 0.704745698f,
0.706822221f, 0.708890173f, 0.710949503f, 0.713000163f,
0.715042106f, 0.717075285f, 0.719099657f, 0.721115178f,
0.723121805f, 0.725119498f, 0.727108216f, 0.729087922f,
0.731058579f, 0.733020149f, 0.734972599f, 0.736915896f,
0.738850006f, 0.740774899f, 0.742690545f, 0.744596916f,
0.746493983f, 0.748381722f, 0.750260106f, 0.752129111f,
0.753988716f, 0.755838899f, 0.757679639f, 0.759510917f,
0.761332715f, 0.763145016f, 0.764947804f, 0.766741064f,
0.768524783f, 0.770298949f, 0.772063549f, 0.773818574f,
0.775564014f, 0.777299861f, 0.779026108f, 0.780742748f,
0.782449776f, 0.784147189f, 0.785834983f, 0.787513156f,
0.789181707f, 0.790840635f, 0.792489941f, 0.794129628f,
0.795759698f, 0.797380154f, 0.798991f, 0.800592243f,
0.802183889f, 0.803765944f, 0.805338416f, 0.806901316f,
0.808454651f, 0.809998434f, 0.811532675f, 0.813057386f,
0.814572581f, 0.816078273f, 0.817574476f, 0.819061207f,
0.820538481f, 0.822006314f, 0.823464725f, 0.824913732f,
0.826353353f, 0.827783608f, 0.829204518f, 0.830616103f,
0.832018385f, 0.833411386f, 0.83479513f, 0.836169639f,
0.837534937f, 0.83889105f, 0.840238003f, 0.841575821f,
0.842904531f, 0.84422416f, 0.845534735f, 0.846836284f,
0.848128836f, 0.84941242f, 0.850687065f, 0.851952802f,
0.85320966f, 0.854457671f, 0.855696866f, 0.856927277f,
0.858148935f, 0.859361874f, 0.860566127f, 0.861761727f,
0.862948707f, 0.864127103f, 0.865296948f, 0.866458277f,
0.867611126f, 0.868755531f, 0.869891526f, 0.871019148f,
0.872138434f, 0.87324942f, 0.874352143f, 0.875446642f,
0.876532952f, 0.877611113f, 0.878681162f, 0.879743138f,
0.880797078f, 0.881843022f, 0.882881009f, 0.883911078f,
0.884933268f, 0.885947619f, 0.88695417f, 0.887952961f,
0.888944033f, 0.889927426f, 0.890903179f, 0.891871333f,
0.89283193f, 0.893785008f, 0.89473061f, 0.895668777f,
0.896599549f, 0.897522967f, 0.898439072f, 0.899347906f,
0.900249511f, 0.901143927f, 0.902031196f, 0.902911359f,
0.903784458f, 0.904650535f, 0.905509631f, 0.906361788f,
0.907207047f, 0.90804545f, 0.908877039f, 0.909701855f,
0.910519941f, 0.911331337f, 0.912136085f, 0.912934228f,
0.913725806f, 0.914510861f, 0.915289434f, 0.916061568f,
0.916827304f, 0.917586682f, 0.918339745f, 0.919086533f,
0.919827088f, 0.920561451f, 0.921289663f, 0.922011765f,
0.922727798f, 0.923437803f, 0.92414182f, 0.924839891f,
0.925532055f, 0.926218353f, 0.926898827f, 0.927573515f,
0.928242458f, 0.928905696f, 0.929563269f, 0.930215217f,
0.93086158f, 0.931502396f, 0.932137706f, 0.932767549f,
0.933391964f, 0.934010991f, 0.934624667f, 0.935233031f,
0.935836124f, 0.936433982f, 0.937026644f, 0.937614149f,
0.938196534f, 0.938773837f, 0.939346097f, 0.93991335f,
0.940475634f, 0.941032987f, 0.941585444f, 0.942133045f,
0.942675824f, 0.943213819f, 0.943747066f, 0.944275602f,
0.944799462f, 0.945318683f, 0.9458333f, 0.946343348f,
0.946848864f, 0.947349882f, 0.947846437f, 0.948338565f,
0.948826299f, 0.949309675f, 0.949788727f, 0.950263488f,
0.950733994f, 0.951200277f, 0.951662371f, 0.95212031f,
0.952574127f, 0.953023854f, 0.953469525f, 0.953911173f,
0.954348829f, 0.954782527f, 0.955212297f, 0.955638172f,
0.956060185f, 0.956478365f, 0.956892745f, 0.957303356f,
0.957710228f, 0.958113393f, 0.958512881f, 0.958908722f,
0.959300946f, 0.959689585f, 0.960074666f, 0.96045622f,
0.960834277f, 0.961208866f, 0.961580014f, 0.961947753f,
0.962312109f, 0.962673113f, 0.963030791f, 0.963385172f,
0.963736284f, 0.964084154f, 0.964428811f, 0.964770281f,
0.965108591f, 0.96544377f, 0.965775842f, 0.966104836f,
0.966430777f, 0.966753691f, 0.967073605f, 0.967390545f,
0.967704535f, 0.968015603f, 0.968323772f, 0.968629068f,
0.968931516f, 0.969231141f, 0.969527967f, 0.969822019f,
0.97011332f, 0.970401896f, 0.970687769f, 0.970970964f,
0.971251504f, 0.971529412f, 0.971804712f, 0.972077426f,
0.972347578f, 0.972615189f, 0.972880283f, 0.973142881f,
0.973403006f, 0.97366068f, 0.973915925f, 0.974168761f,
0.974419212f, 0.974667297f, 0.974913038f, 0.975156456f,
0.975397572f, 0.975636406f, 0.975872979f, 0.976107311f,
0.976339422f, 0.976569332f, 0.976797062f, 0.97702263f,
0.977246057f, 0.977467361f, 0.977686561f, 0.977903678f,
0.978118729f, 0.978331734f, 0.97854271f, 0.978751677f,
0.978958653f, 0.979163655f, 0.979366703f, 0.979567813f,
0.979767003f, 0.979964291f, 0.980159694f, 0.98035323f,
0.980544915f, 0.980734767f, 0.980922803f, 0.981109038f,
0.98129349f, 0.981476175f, 0.981657109f, 0.981836309f,
0.98201379f, 0.982189568f, 0.98236366f, 0.98253608f,
0.982706843f, 0.982875967f, 0.983043464f, 0.983209352f,
0.983373644f, 0.983536355f, 0.983697501f, 0.983857095f,
0.984015152f, 0.984171686f, 0.984326712f, 0.984480243f,
0.984632294f, 0.984782879f, 0.98493201f, 0.985079702f,
0.985225968f, 0.985370822f, 0.985514276f, 0.985656344f,
0.985797039f, 0.985936373f, 0.98607436f, 0.986211011f,
0.986346341f, 0.98648036f, 0.986613082f, 0.986744519f,
0.986874682f, 0.987003583f, 0.987131236f, 0.987257651f,
0.987382839f, 0.987506814f, 0.987629585f, 0.987751165f,
0.987871565f, 0.987990796f, 0.988108868f, 0.988225794f,
0.988341583f, 0.988456248f, 0.988569797f, 0.988682242f,
0.988793594f, 0.988903862f, 0.989013057f, 0.98912119f,
0.98922827f, 0.989334307f, 0.989439312f, 0.989543294f,
0.989646262f, 0.989748228f, 0.989849199f, 0.989949186f,
0.990048198f, 0.990146244f, 0.990243334f, 0.990339477f,
0.990434681f, 0.990528956f, 0.990622311f, 0.990714754f,
0.990806295f, 0.990896941f, 0.990986701f, 0.991075585f,
0.9911636f, 0.991250754f, 0.991337056f, 0.991422515f,
0.991507137f, 0.991590932f, 0.991673907f, 0.99175607f,
0.991837429f, 0.991917991f, 0.991997765f, 0.992076758f,
0.992154977f, 0.99223243f, 0.992309124f, 0.992385067f,
0.992460265f, 0.992534727f, 0.992608459f, 0.992681467f,
0.99275376f, 0.992825344f, 0.992896226f, 0.992966413f,
0.993035911f, 0.993104727f, 0.993172868f, 0.993240339f,
0.993307149f, 0.993373303f, 0.993438807f, 0.993503668f,
0.993567892f, 0.993631484f, 0.993694453f, 0.993756802f,
0.993818539f, 0.993879669f, 0.993940199f, 0.994000133f,
0.994059478f, 0.994118239f, 0.994176423f, 0.994234034f,
0.994291079f, 0.994347563f, 0.994403491f, 0.994458868f,
0.994513701f, 0.994567994f, 0.994621753f, 0.994674982f,
0.994727688f, 0.994779874f, 0.994831547f, 0.994882711f,
0.994933371f, 0.994983532f, 0.995033198f, 0.995082376f,
0.995131069f, 0.995179282f, 0.99522702f, 0.995274287f,
0.995321089f, 0.995367429f, 0.995413313f, 0.995458744f,
0.995503727f, 0.995548266f, 0.995592367f, 0.995636032f,
0.995679266f, 0.995722075f, 0.99576446f, 0.995806428f,
0.995847981f, 0.995889125f, 0.995929862f, 0.995970198f,
0.996010135f, 0.996049678f, 0.99608883f, 0.996127597f,
0.99616598f, 0.996203984f, 0.996241613f, 0.996278871f,
0.99631576f, 0.996352285f, 0.996388449f, 0.996424256f,
0.99645971f, 0.996494813f, 0.996529569f, 0.996563982f,
0.996598054f, 0.99663179f, 0.996665193f, 0.996698265f,
0.99673101f, 0.996763432f, 0.996795533f, 0.996827317f,
0.996858787f, 0.996889945f, 0.996920795f, 0.996951341f,
0.996981584f, 0.997011528f, 0.997041175f, 0.99707053f,
0.997099594f, 0.997128371f, 0.997156863f, 0.997185073f,
0.997213004f, 0.997240658f, 0.997268039f, 0.997295149f,
0.997321991f, 0.997348567f, 0.99737488f, 0.997400932f,
0.997426727f, 0.997452266f, 0.997477553f, 0.997502589f,
0.997527377f, 0.99755192f, 0.997576219f, 0.997600279f,
0.997624099f, 0.997647684f, 0.997671036f, 0.997694156f,
0.997717047f, 0.997739712f, 0.997762151f, 0.997784369f,
0.997806367f, 0.997828146f, 0.99784971f, 0.99787106f,
0.997892199f, 0.997913128f, 0.99793385f, 0.997954366f,
0.99797468f, 0.997994791f, 0.998014704f, 0.998034419f,
0.998053939f, 0.998073265f, 0.9980924f, 0.998111345f,
0.998130102f, 0.998148674f, 0.998167061f, 0.998185266f,
0.99820329f, 0.998221136f, 0.998238805f, 0.998256299f,
0.998273619f, 0.998290767f, 0.998307746f, 0.998324556f,
0.998341199f, 0.998357677f, 0.998373992f, 0.998390145f,
0.998406138f, 0.998421972f, 0.998437649f, 0.998453171f,
0.998468538f, 0.998483753f, 0.998498818f, 0.998513733f,
0.998528499f, 0.99854312f, 0.998557595f, 0.998571927f,
0.998586116f, 0.998600165f, 0.998614074f, 0.998627845f,
0.99864148f, 0.998654979f, 0.998668345f, 0.998681577f,
0.998694679f, 0.99870765f, 0.998720493f, 0.998733208f,
0.998745797f, 0.998758261f, 0.998770601f, 0.998782819f,
0.998794916f, 0.998806892f, 0.99881875f, 0.99883049f,
0.998842113f, 0.998853621f, 0.998865015f, 0.998876295f,
0.998887464f, 0.998898522f, 0.99890947f, 0.998920309f,
0.99893104f, 0.998941666f, 0.998952185f, 0.9989626f,
0.998972912f, 0.998983121f, 0.998993229f, 0.999003237f,
0.999013145f, 0.999022955f, 0.999032667f, 0.999042283f,
0.999051803f, 0.999061229f, 0.999070561f, 0.999079801f,
0.999088949f, 0.999098006f, 0.999106973f, 0.999115851f,
0.99912464f, 0.999133343f, 0.999141959f, 0.999150489f,
0.999158935f, 0.999167297f, 0.999175575f, 0.999183772f,
0.999191887f, 0.999199921f, 0.999207876f, 0.999215751f,
0.999223549f, 0.999231269f, 0.999238912f, 0.999246479f,
0.999253971f, 0.999261389f, 0.999268733f, 0.999276004f,
0.999283202f, 0.99929033f, 0.999297386f, 0.999304372f,
0.999311289f, 0.999318137f, 0.999324917f, 0.99933163f,
0.999338276f, 0.999344856f, 0.99935137f, 0.99935782f,
0.999364206f, 0.999370528f, 0.999376788f, 0.999382985f,
0.999389121f, 0.999395195f, 0.99940121f, 0.999407164f,
0.99941306f, 0.999418896f, 0.999424675f, 0.999430396f,
0.999436061f, 0.999441669f, 0.999447221f, 0.999452719f,
0.999458161f, 0.99946355f, 0.999468885f, 0.999474167f,
0.999479396f, 0.999484573f, 0.999489699f, 0.999494774f,
0.999499799f, 0.999504774f, 0.999509699f, 0.999514575f,
0.999519403f, 0.999524182f, 0.999528915f, 0.9995336f,
0.999538238f, 0.999542831f, 0.999547378f, 0.999551879f,
0.999556336f, 0.999560749f, 0.999565118f, 0.999569443f,
0.999573725f, 0.999577965f, 0.999582162f, 0.999586318f,
0.999590433f, 0.999594506f, 0.99959854f, 0.999602533f,
0.999606486f, 0.9996104f, 0.999614275f, 0.999618112f,
0.99962191f, 0.999625671f, 0.999629394f, 0.99963308f,
0.99963673f, 0.999640343f, 0.99964392f, 0.999647462f,
0.999650969f, 0.99965444f, 0.999657878f, 0.999661281f,
0.99966465f, 0.999667986f, 0.999671288f, 0.999674558f,
0.999677795f, 0.999681f, 0.999684173f, 0.999687315f,
0.999690425f, 0.999693504f, 0.999696553f, 0.999699571f,
0.99970256f, 0.999705519f, 0.999708448f, 0.999711348f,
0.999714219f, 0.999717062f, 0.999719877f, 0.999722663f,
0.999725422f, 0.999728153f, 0.999730857f, 0.999733535f,
0.999736185f, 0.99973881f, 0.999741408f, 0.99974398f,
0.999746527f, 0.999749049f, 0.999751545f, 0.999754016f,
0.999756463f, 0.999758886f, 0.999761285f, 0.999763659f,
0.99976601f, 0.999768338f, 0.999770643f, 0.999772924f,
0.999775183f, 0.99977742f, 0.999779634f, 0.999781826f,
0.999783997f, 0.999786145f, 0.999788273f, 0.999790379f,
0.999792464f, 0.999794529f, 0.999796573f, 0.999798597f,
0.9998006f, 0.999802584f, 0.999804548f, 0.999806492f,
0.999808417f, 0.999810323f, 0.99981221f, 0.999814078f,
0.999815928f, 0.999817759f, 0.999819572f, 0.999821367f,
0.999823144f, 0.999824904f, 0.999826646f, 0.99982837f,
0.999830078f, 0.999831768f, 0.999833442f, 0.999835099f,
0.999836739f, 0.999838364f, 0.999839972f, 0.999841564f,
0.99984314f, 0.999844701f, 0.999846246f, 0.999847775f,
0.99984929f, 0.999850789f, 0.999852273f, 0.999853743f,
0.999855198f, 0.999856639f, 0.999858065f, 0.999859477f,
0.999860875f, 0.999862259f, 0.99986363f, 0.999864986f,
0.99986633f, 0.999867659f, 0.999868976f, 0.99987028f,
0.99987157f, 0.999872848f, 0.999874113f, 0.999875365f,
0.999876605f, 0.999877833f, 0.999879049f, 0.999880252f,
0.999881443f, 0.999882623f, 0.999883791f, 0.999884947f,
0.999886091f, 0.999887225f, 0.999888347f, 0.999889458f,
0.999890557f, 0.999891646f, 0.999892724f, 0.999893791f,
0.999894848f, 0.999895894f, 0.99989693f, 0.999897956f,
0.999898971f, 0.999899976f, 0.999900971f, 0.999901956f,
0.999902932f, 0.999903898f, 0.999904854f, 0.9999058f,
0.999906738f, 0.999907665f, 0.999908584f, 0.999909494f,
0.999910394f, 0.999911286f, 0.999912168f, 0.999913042f,
0.999913907f, 0.999914764f, 0.999915612f, 0.999916452f,
0.999917283f, 0.999918106f, 0.999918921f, 0.999919727f,
0.999920526f, 0.999921317f, 0.999922099f, 0.999922875f,
0.999923642f, 0.999924402f, 0.999925154f, 0.999925898f,
0.999926636f, 0.999927366f, 0.999928088f, 0.999928804f,
0.999929512f, 0.999930213f, 0.999930908f, 0.999931595f,
0.999932276f, 0.99993295f, 0.999933617f, 0.999934277f,
0.999934931f, 0.999935579f, 0.99993622f, 0.999936854f,
0.999937482f, 0.999938104f, 0.99993872f, 0.99993933f,
0.999939934f, 0.999940531f, 0.999941123f, 0.999941709f,
0.999942289f, 0.999942863f, 0.999943431f, 0.999943994f,
0.999944551f, 0.999945103f, 0.999945649f, 0.99994619f,
0.999946726f, 0.999947256f, 0.99994778f, 0.9999483f,
0.999948814f, 0.999949324f, 0.999949828f, 0.999950327f,
0.999950821f, 0.999951311f, 0.999951795f, 0.999952275f,
0.999952749f, 0.99995322f, 0.999953685f, 0.999954146f,
0.999954602f
};
} // namespace tesseract