mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
Delete cube code
This commit is contained in:
parent
432684dd6e
commit
5c3839bdb4
@ -1,440 +0,0 @@
|
||||
/******************************************************************
|
||||
* File: cube_control.cpp
|
||||
* Description: Tesseract class methods for invoking cube convolutional
|
||||
* neural network word recognizer.
|
||||
* Author: Raquel Romano
|
||||
* Created: September 2009
|
||||
*
|
||||
* (C) Copyright 2009, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
**********************************************************************/
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "allheaders.h"
|
||||
|
||||
#include "cube_object.h"
|
||||
#include "cube_reco_context.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tesseract_cube_combiner.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**
|
||||
* @name convert_prob_to_tess_certainty
|
||||
*
|
||||
* Normalize a probability in the range [0.0, 1.0] to a tesseract
|
||||
* certainty in the range [-20.0, 0.0]
|
||||
*/
|
||||
static float convert_prob_to_tess_certainty(float prob) {
|
||||
return (prob - 1.0) * 20.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name char_box_to_tbox
|
||||
*
|
||||
* Create a TBOX from a character bounding box. If nonzero, the
|
||||
* x_offset accounts for any additional padding of the word box that
|
||||
* should be taken into account.
|
||||
*
|
||||
*/
|
||||
TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
|
||||
l_int32 left;
|
||||
l_int32 top;
|
||||
l_int32 width;
|
||||
l_int32 height;
|
||||
l_int32 right;
|
||||
l_int32 bottom;
|
||||
|
||||
boxGetGeometry(char_box, &left, &top, &width, &height);
|
||||
left += word_box.left() - x_offset;
|
||||
right = left + width;
|
||||
top = word_box.bottom() + word_box.height() - top;
|
||||
bottom = top - height;
|
||||
return TBOX(left, bottom, right, top);
|
||||
}
|
||||
|
||||
/**
|
||||
* @name extract_cube_state
|
||||
*
|
||||
* Extract CharSamp objects and character bounding boxes from the
|
||||
* CubeObject's state. The caller should free both structres.
|
||||
*
|
||||
*/
|
||||
bool Tesseract::extract_cube_state(CubeObject* cube_obj,
|
||||
int* num_chars,
|
||||
Boxa** char_boxes,
|
||||
CharSamp*** char_samples) {
|
||||
if (!cube_obj) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (extract_cube_state): Invalid cube object "
|
||||
"passed to extract_cube_state\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Note that the CubeObject accessors return either the deslanted or
|
||||
// regular objects search object or beam search object, whichever
|
||||
// was used in the last call to Recognize()
|
||||
CubeSearchObject* cube_search_obj = cube_obj->SrchObj();
|
||||
if (!cube_search_obj) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
|
||||
"cube's search object in extract_cube_state.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
BeamSearch *beam_search_obj = cube_obj->BeamObj();
|
||||
if (!beam_search_obj) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
|
||||
"cube's beam search object in extract_cube_state.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the character samples and bounding boxes by backtracking
|
||||
// through the beam search path
|
||||
int best_node_index = beam_search_obj->BestPresortedNodeIndex();
|
||||
*char_samples = beam_search_obj->BackTrack(
|
||||
cube_search_obj, best_node_index, num_chars, NULL, char_boxes);
|
||||
if (!*char_samples)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name create_cube_box_word
|
||||
*
|
||||
* Fill the given BoxWord with boxes from character bounding
|
||||
* boxes. The char_boxes have local coordinates w.r.t. the
|
||||
* word bounding box, i.e., the left-most character bbox of each word
|
||||
* has (0,0) left-top coord, but the BoxWord must be defined in page
|
||||
* coordinates.
|
||||
*/
|
||||
bool Tesseract::create_cube_box_word(Boxa *char_boxes,
|
||||
int num_chars,
|
||||
TBOX word_box,
|
||||
BoxWord* box_word) {
|
||||
if (!box_word) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find the x-coordinate of left-most char_box, which could be
|
||||
// nonzero if the word image was padded before recognition took place.
|
||||
int x_offset = -1;
|
||||
for (int i = 0; i < num_chars; ++i) {
|
||||
Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
|
||||
if (x_offset < 0 || char_box->x < x_offset) {
|
||||
x_offset = char_box->x;
|
||||
}
|
||||
boxDestroy(&char_box);
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_chars; ++i) {
|
||||
Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
|
||||
TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset);
|
||||
boxDestroy(&char_box);
|
||||
box_word->InsertBox(i, tbox);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name init_cube_objects
|
||||
*
|
||||
* Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
|
||||
* Returns false if cube context could not be created or if load_combiner is
|
||||
* true, but the combiner could not be loaded.
|
||||
*/
|
||||
bool Tesseract::init_cube_objects(bool load_combiner,
|
||||
TessdataManager *tessdata_manager) {
|
||||
ASSERT_HOST(cube_cntxt_ == NULL);
|
||||
ASSERT_HOST(tess_cube_combiner_ == NULL);
|
||||
|
||||
// Create the cube context object
|
||||
cube_cntxt_ = CubeRecoContext::Create(this, tessdata_manager, &unicharset);
|
||||
if (cube_cntxt_ == NULL) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Tesseract::init_cube_objects()): Failed to "
|
||||
"instantiate CubeRecoContext\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create the combiner object and load the combiner net for target languages.
|
||||
if (load_combiner) {
|
||||
tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_);
|
||||
if (!tess_cube_combiner_->LoadCombinerNet()) {
|
||||
delete cube_cntxt_;
|
||||
cube_cntxt_ = NULL;
|
||||
delete tess_cube_combiner_;
|
||||
tess_cube_combiner_ = NULL;
|
||||
if (cube_debug_level > 0)
|
||||
tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name run_cube_combiner
|
||||
*
|
||||
* Iterates through tesseract's results and calls cube on each word,
|
||||
* combining the results with the existing tesseract result.
|
||||
*/
|
||||
void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
|
||||
if (page_res == NULL || tess_cube_combiner_ == NULL)
|
||||
return;
|
||||
PAGE_RES_IT page_res_it(page_res);
|
||||
// Iterate through the word results and call cube on each word.
|
||||
for (page_res_it.restart_page(); page_res_it.word () != NULL;
|
||||
page_res_it.forward()) {
|
||||
BLOCK* block = page_res_it.block()->block;
|
||||
if (block->poly_block() != NULL && !block->poly_block()->IsText())
|
||||
continue; // Don't deal with non-text blocks.
|
||||
WERD_RES* word = page_res_it.word();
|
||||
// Skip cube entirely if tesseract's certainty is greater than threshold.
|
||||
int combiner_run_thresh = convert_prob_to_tess_certainty(
|
||||
cube_cntxt_->Params()->CombinerRunThresh());
|
||||
if (word->best_choice->certainty() >= combiner_run_thresh) {
|
||||
continue;
|
||||
}
|
||||
// Use the same language as Tesseract used for the word.
|
||||
Tesseract* lang_tess = word->tesseract;
|
||||
|
||||
// Setup a trial WERD_RES in which to classify with cube.
|
||||
WERD_RES cube_word;
|
||||
cube_word.InitForRetryRecognition(*word);
|
||||
cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
|
||||
OEM_CUBE_ONLY,
|
||||
NULL, false, false, false,
|
||||
page_res_it.row()->row,
|
||||
page_res_it.block()->block);
|
||||
CubeObject *cube_obj = lang_tess->cube_recognize_word(
|
||||
page_res_it.block()->block, &cube_word);
|
||||
if (cube_obj != NULL)
|
||||
lang_tess->cube_combine_word(cube_obj, &cube_word, word);
|
||||
delete cube_obj;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @name cube_word_pass1
|
||||
*
|
||||
* Recognizes a single word using (only) cube. Compatible with
|
||||
* Tesseract's classify_word_pass1/classify_word_pass2.
|
||||
*/
|
||||
void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
|
||||
CubeObject *cube_obj = cube_recognize_word(block, word);
|
||||
delete cube_obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name cube_recognize_word
|
||||
*
|
||||
* Cube recognizer to recognize a single word as with classify_word_pass1
|
||||
* but also returns the cube object in case the combiner is needed.
|
||||
*/
|
||||
CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) {
|
||||
if (!cube_binary_ || !cube_cntxt_) {
|
||||
if (cube_debug_level > 0 && !cube_binary_)
|
||||
tprintf("Tesseract::run_cube(): NULL binary image.\n");
|
||||
word->SetupFake(unicharset);
|
||||
return NULL;
|
||||
}
|
||||
TBOX word_box = word->word->bounding_box();
|
||||
if (block != NULL && (block->re_rotation().x() != 1.0f ||
|
||||
block->re_rotation().y() != 0.0f)) {
|
||||
// TODO(rays) We have to rotate the bounding box to get the true coords.
|
||||
// This will be achieved in the future via DENORM.
|
||||
// In the mean time, cube can't process this word.
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube can't process rotated word at:");
|
||||
word_box.print();
|
||||
}
|
||||
word->SetupFake(unicharset);
|
||||
return NULL;
|
||||
}
|
||||
CubeObject* cube_obj = new tesseract::CubeObject(
|
||||
cube_cntxt_, cube_binary_, word_box.left(),
|
||||
pixGetHeight(cube_binary_) - word_box.top(),
|
||||
word_box.width(), word_box.height());
|
||||
if (!cube_recognize(cube_obj, block, word)) {
|
||||
delete cube_obj;
|
||||
return NULL;
|
||||
}
|
||||
return cube_obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name cube_combine_word
|
||||
*
|
||||
* Combines the cube and tesseract results for a single word, leaving the
|
||||
* result in tess_word.
|
||||
*/
|
||||
void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
|
||||
WERD_RES* tess_word) {
|
||||
float combiner_prob = tess_cube_combiner_->CombineResults(tess_word,
|
||||
cube_obj);
|
||||
// If combiner probability is greater than tess/cube combiner
|
||||
// classifier threshold, i.e. tesseract wins, then just return the
|
||||
// tesseract result unchanged, as the combiner knows nothing about how
|
||||
// correct the answer is. If cube and tesseract agree, then improve the
|
||||
// scores before returning.
|
||||
WERD_CHOICE* tess_best = tess_word->best_choice;
|
||||
WERD_CHOICE* cube_best = cube_word->best_choice;
|
||||
if (cube_debug_level || classify_debug_level) {
|
||||
tprintf("Combiner prob = %g vs threshold %g\n",
|
||||
combiner_prob, cube_cntxt_->Params()->CombinerClassifierThresh());
|
||||
}
|
||||
if (combiner_prob >=
|
||||
cube_cntxt_->Params()->CombinerClassifierThresh()) {
|
||||
if (tess_best->unichar_string() == cube_best->unichar_string()) {
|
||||
// Cube and tess agree, so improve the scores.
|
||||
tess_best->set_rating(tess_best->rating() / 2);
|
||||
tess_best->set_certainty(tess_best->certainty() / 2);
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Cube wins.
|
||||
// It is better for the language combiner to have all tesseract scores,
|
||||
// so put them in the cube result.
|
||||
cube_best->set_rating(tess_best->rating());
|
||||
cube_best->set_certainty(tess_best->certainty());
|
||||
if (cube_debug_level || classify_debug_level) {
|
||||
tprintf("Cube INFO: tesseract result replaced by cube: %s -> %s\n",
|
||||
tess_best->unichar_string().string(),
|
||||
cube_best->unichar_string().string());
|
||||
}
|
||||
tess_word->ConsumeWordResults(cube_word);
|
||||
}
|
||||
|
||||
/**
|
||||
* @name cube_recognize
|
||||
*
|
||||
* Call cube on the current word, and write the result to word.
|
||||
* Sets up a fake result and returns false if something goes wrong.
|
||||
*/
|
||||
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
|
||||
WERD_RES *word) {
|
||||
// Run cube
|
||||
WordAltList *cube_alt_list = cube_obj->RecognizeWord();
|
||||
if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube returned nothing for word at:");
|
||||
word->word->bounding_box().print();
|
||||
}
|
||||
word->SetupFake(unicharset);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get cube's best result and its probability, mapped to tesseract's
|
||||
// certainty range
|
||||
char_32 *cube_best_32 = cube_alt_list->Alt(0);
|
||||
double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
|
||||
float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
|
||||
string cube_best_str;
|
||||
CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);
|
||||
|
||||
// Retrieve Cube's character bounding boxes and CharSamples,
|
||||
// corresponding to the most recent call to RecognizeWord().
|
||||
Boxa *char_boxes = NULL;
|
||||
CharSamp **char_samples = NULL;;
|
||||
int num_chars;
|
||||
if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
|
||||
&& cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
|
||||
"cube state.\n");
|
||||
word->SetupFake(unicharset);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Convert cube's character bounding boxes to a BoxWord.
|
||||
BoxWord cube_box_word;
|
||||
TBOX tess_word_box = word->word->bounding_box();
|
||||
if (word->denorm.block() != NULL)
|
||||
tess_word_box.rotate(word->denorm.block()->re_rotation());
|
||||
bool box_word_success = create_cube_box_word(char_boxes, num_chars,
|
||||
tess_word_box,
|
||||
&cube_box_word);
|
||||
boxaDestroy(&char_boxes);
|
||||
if (!box_word_success) {
|
||||
if (cube_debug_level > 0) {
|
||||
tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
|
||||
"create cube BoxWord\n");
|
||||
}
|
||||
word->SetupFake(unicharset);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fill tesseract result's fields with cube results
|
||||
fill_werd_res(cube_box_word, cube_best_str.c_str(), word);
|
||||
|
||||
// Create cube's best choice.
|
||||
BLOB_CHOICE** choices = new BLOB_CHOICE*[num_chars];
|
||||
for (int i = 0; i < num_chars; ++i) {
|
||||
UNICHAR_ID uch_id =
|
||||
cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
|
||||
choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
|
||||
-1, 0.0f, 0.0f, 0.0f, BCC_STATIC_CLASSIFIER);
|
||||
}
|
||||
word->FakeClassifyWord(num_chars, choices);
|
||||
// within a word, cube recognizes the word in reading order.
|
||||
word->best_choice->set_unichars_in_script_order(true);
|
||||
delete [] choices;
|
||||
delete [] char_samples;
|
||||
|
||||
// Some sanity checks
|
||||
ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
|
||||
|
||||
if (cube_debug_level || classify_debug_level) {
|
||||
tprintf("Cube result: %s r=%g, c=%g\n",
|
||||
word->best_choice->unichar_string().string(),
|
||||
word->best_choice->rating(),
|
||||
word->best_choice->certainty());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @name fill_werd_res
|
||||
*
|
||||
* Fill Tesseract's word result fields with cube's.
|
||||
*
|
||||
*/
|
||||
void Tesseract::fill_werd_res(const BoxWord& cube_box_word,
|
||||
const char* cube_best_str,
|
||||
WERD_RES* tess_werd_res) {
|
||||
delete tess_werd_res->box_word;
|
||||
tess_werd_res->box_word = new BoxWord(cube_box_word);
|
||||
tess_werd_res->box_word->ClipToOriginalWord(tess_werd_res->denorm.block(),
|
||||
tess_werd_res->word);
|
||||
// Fill text and remaining fields
|
||||
tess_werd_res->word->set_text(cube_best_str);
|
||||
tess_werd_res->tess_failed = FALSE;
|
||||
tess_werd_res->tess_accepted = tess_acceptable_word(tess_werd_res);
|
||||
// There is no output word, so we can' call AdaptableWord, but then I don't
|
||||
// think we need to. Fudge the result with accepted.
|
||||
tess_werd_res->tess_would_adapt = tess_werd_res->tess_accepted;
|
||||
|
||||
// Set word to done, i.e., ignore all of tesseract's tests for rejection
|
||||
tess_werd_res->done = tess_werd_res->tess_accepted;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
@ -1,184 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_reco_context.cpp
|
||||
* Description: Implementation of the Cube Recognition Context Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <string>
|
||||
#include <limits.h>
|
||||
|
||||
#include "cube_reco_context.h"
|
||||
|
||||
#include "classifier_factory.h"
|
||||
#include "cube_tuning_params.h"
|
||||
#include "dict.h"
|
||||
#include "feature_bmp.h"
|
||||
#include "tessdatamanager.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tess_lang_model.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/**
|
||||
* Instantiate a CubeRecoContext object using a Tesseract object.
|
||||
* CubeRecoContext will not take ownership of tess_obj, but will
|
||||
* record the pointer to it and will make use of various Tesseract
|
||||
* components (language model, flags, etc). Thus the caller should
|
||||
* keep tess_obj alive so long as the instantiated CubeRecoContext is used.
|
||||
*/
|
||||
CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) {
|
||||
tess_obj_ = tess_obj;
|
||||
lang_ = "";
|
||||
loaded_ = false;
|
||||
lang_mod_ = NULL;
|
||||
params_ = NULL;
|
||||
char_classifier_ = NULL;
|
||||
char_set_ = NULL;
|
||||
word_size_model_ = NULL;
|
||||
char_bigrams_ = NULL;
|
||||
word_unigrams_ = NULL;
|
||||
noisy_input_ = false;
|
||||
size_normalization_ = false;
|
||||
}
|
||||
|
||||
CubeRecoContext::~CubeRecoContext() {
|
||||
delete char_classifier_;
|
||||
char_classifier_ = NULL;
|
||||
|
||||
delete word_size_model_;
|
||||
word_size_model_ = NULL;
|
||||
|
||||
delete char_set_;
|
||||
char_set_ = NULL;
|
||||
|
||||
delete char_bigrams_;
|
||||
char_bigrams_ = NULL;
|
||||
|
||||
delete word_unigrams_;
|
||||
word_unigrams_ = NULL;
|
||||
|
||||
delete lang_mod_;
|
||||
lang_mod_ = NULL;
|
||||
|
||||
delete params_;
|
||||
params_ = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the path of the data files by looking up the TESSDATA_PREFIX
|
||||
* environment variable and appending a "tessdata" directory to it
|
||||
*/
|
||||
bool CubeRecoContext::GetDataFilePath(string *path) const {
|
||||
*path = tess_obj_->datadir.string();
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* The object initialization function that loads all the necessary
|
||||
* components of a RecoContext. TessdataManager is used to load the
|
||||
* data from [lang].traineddata file. If TESSDATA_CUBE_UNICHARSET
|
||||
* component is present, Cube will be instantiated with the unicharset
|
||||
* specified in this component and the corresponding dictionary
|
||||
* (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to
|
||||
* Tesseract's. Otherwise, TessdataManager will assume that Cube will
|
||||
* be using Tesseract's unicharset and dawgs, and will load the
|
||||
* unicharset from the TESSDATA_UNICHARSET component and will load the
|
||||
* dawgs from TESSDATA_*_DAWG components.
|
||||
*/
|
||||
bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset) {
|
||||
ASSERT_HOST(tess_obj_ != NULL);
|
||||
tess_unicharset_ = tess_unicharset;
|
||||
string data_file_path;
|
||||
|
||||
// Get the data file path.
|
||||
if (GetDataFilePath(&data_file_path) == false) {
|
||||
fprintf(stderr, "Unable to get data file path\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the language from the Tesseract object.
|
||||
lang_ = tess_obj_->lang.string();
|
||||
|
||||
// Create the char set.
|
||||
if ((char_set_ =
|
||||
CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
|
||||
"CharSet\n");
|
||||
return false;
|
||||
}
|
||||
// Create the language model.
|
||||
string lm_file_name = data_file_path + lang_ + ".cube.lm";
|
||||
string lm_params;
|
||||
if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube "
|
||||
"language model params from %s\n", lm_file_name.c_str());
|
||||
return false;
|
||||
}
|
||||
lang_mod_ = new TessLangModel(lm_params, data_file_path,
|
||||
tess_obj_->getDict().load_system_dawg,
|
||||
tessdata_manager, this);
|
||||
|
||||
// Create the optional char bigrams object.
|
||||
char_bigrams_ = CharBigrams::Create(data_file_path, lang_);
|
||||
|
||||
// Create the optional word unigrams object.
|
||||
word_unigrams_ = WordUnigrams::Create(data_file_path, lang_);
|
||||
|
||||
// Create the optional size model.
|
||||
word_size_model_ = WordSizeModel::Create(data_file_path, lang_,
|
||||
char_set_, Contextual());
|
||||
|
||||
// Load tuning params.
|
||||
params_ = CubeTuningParams::Create(data_file_path, lang_);
|
||||
if (params_ == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read "
|
||||
"CubeTuningParams from %s\n", data_file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create the char classifier.
|
||||
char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_,
|
||||
lang_mod_, char_set_,
|
||||
params_);
|
||||
if (char_classifier_ == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
|
||||
"CharClassifierFactory object from %s\n", data_file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
loaded_ = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Creates a CubeRecoContext object using a tesseract object */
|
||||
CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj,
|
||||
TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset) {
|
||||
// create the object
|
||||
CubeRecoContext *cntxt = new CubeRecoContext(tess_obj);
|
||||
// load the necessary components
|
||||
if (cntxt->Load(tessdata_manager, tess_unicharset) == false) {
|
||||
fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init "
|
||||
"CubeRecoContext object\n");
|
||||
delete cntxt;
|
||||
return NULL;
|
||||
}
|
||||
// success
|
||||
return cntxt;
|
||||
}
|
||||
} // tesseract}
|
@ -1,157 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_reco_context.h
|
||||
* Description: Declaration of the Cube Recognition Context Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
|
||||
// (or a thread) would create one CubeRecoContext object per language.
|
||||
// The CubeRecoContext object also provides methods to get and set the
|
||||
// different attribues of the Cube OCR Engine.
|
||||
|
||||
#ifndef CUBE_RECO_CONTEXT_H
|
||||
#define CUBE_RECO_CONTEXT_H
|
||||
|
||||
#include <string>
|
||||
#include "neural_net.h"
|
||||
#include "lang_model.h"
|
||||
#include "classifier_base.h"
|
||||
#include "feature_base.h"
|
||||
#include "char_set.h"
|
||||
#include "word_size_model.h"
|
||||
#include "char_bigrams.h"
|
||||
#include "word_unigrams.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Tesseract;
|
||||
class TessdataManager;
|
||||
|
||||
class CubeRecoContext {
|
||||
public:
|
||||
// Reading order enum type
|
||||
enum ReadOrder {
|
||||
L2R,
|
||||
R2L
|
||||
};
|
||||
|
||||
// Instantiate using a Tesseract object
|
||||
CubeRecoContext(Tesseract *tess_obj);
|
||||
|
||||
~CubeRecoContext();
|
||||
|
||||
// accessor functions
|
||||
inline const string & Lang() const { return lang_; }
|
||||
inline CharSet *CharacterSet() const { return char_set_; }
|
||||
const UNICHARSET *TessUnicharset() const { return tess_unicharset_; }
|
||||
inline CharClassifier *Classifier() const { return char_classifier_; }
|
||||
inline WordSizeModel *SizeModel() const { return word_size_model_; }
|
||||
inline CharBigrams *Bigrams() const { return char_bigrams_; }
|
||||
inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
|
||||
inline TuningParams *Params() const { return params_; }
|
||||
inline LangModel *LangMod() const { return lang_mod_; }
|
||||
|
||||
// the reading order of the language
|
||||
inline ReadOrder ReadingOrder() const {
|
||||
return ((lang_ == "ara") ? R2L : L2R);
|
||||
}
|
||||
|
||||
// does the language support case
|
||||
inline bool HasCase() const {
|
||||
return (lang_ != "ara" && lang_ != "hin");
|
||||
}
|
||||
|
||||
inline bool Cursive() const {
|
||||
return (lang_ == "ara");
|
||||
}
|
||||
|
||||
inline bool HasItalics() const {
|
||||
return (lang_ != "ara" && lang_ != "hin");
|
||||
}
|
||||
|
||||
inline bool Contextual() const {
|
||||
return (lang_ == "ara");
|
||||
}
|
||||
|
||||
// RecoContext runtime flags accessor functions
|
||||
inline bool SizeNormalization() const { return size_normalization_; }
|
||||
inline bool NoisyInput() const { return noisy_input_; }
|
||||
inline bool OOD() const { return lang_mod_->OOD(); }
|
||||
inline bool Numeric() const { return lang_mod_->Numeric(); }
|
||||
inline bool WordList() const { return lang_mod_->WordList(); }
|
||||
inline bool Punc() const { return lang_mod_->Punc(); }
|
||||
inline bool CaseSensitive() const {
|
||||
return char_classifier_->CaseSensitive();
|
||||
}
|
||||
|
||||
inline void SetSizeNormalization(bool size_normalization) {
|
||||
size_normalization_ = size_normalization;
|
||||
}
|
||||
inline void SetNoisyInput(bool noisy_input) {
|
||||
noisy_input_ = noisy_input;
|
||||
}
|
||||
inline void SetOOD(bool ood_enabled) {
|
||||
lang_mod_->SetOOD(ood_enabled);
|
||||
}
|
||||
inline void SetNumeric(bool numeric_enabled) {
|
||||
lang_mod_->SetNumeric(numeric_enabled);
|
||||
}
|
||||
inline void SetWordList(bool word_list_enabled) {
|
||||
lang_mod_->SetWordList(word_list_enabled);
|
||||
}
|
||||
inline void SetPunc(bool punc_enabled) {
|
||||
lang_mod_->SetPunc(punc_enabled);
|
||||
}
|
||||
inline void SetCaseSensitive(bool case_sensitive) {
|
||||
char_classifier_->SetCaseSensitive(case_sensitive);
|
||||
}
|
||||
inline tesseract::Tesseract *TesseractObject() const {
|
||||
return tess_obj_;
|
||||
}
|
||||
|
||||
// Returns the path of the data files
|
||||
bool GetDataFilePath(string *path) const;
|
||||
// Creates a CubeRecoContext object using a tesseract object. Data
|
||||
// files are loaded via the tessdata_manager, and the tesseract
|
||||
// unicharset is provided in order to map Cube's unicharset to
|
||||
// Tesseract's in the case where the two unicharsets differ.
|
||||
static CubeRecoContext *Create(Tesseract *tess_obj,
|
||||
TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset);
|
||||
|
||||
private:
|
||||
bool loaded_;
|
||||
string lang_;
|
||||
CharSet *char_set_;
|
||||
UNICHARSET *tess_unicharset_;
|
||||
WordSizeModel *word_size_model_;
|
||||
CharClassifier *char_classifier_;
|
||||
CharBigrams *char_bigrams_;
|
||||
WordUnigrams *word_unigrams_;
|
||||
TuningParams *params_;
|
||||
LangModel *lang_mod_;
|
||||
Tesseract *tess_obj_; // CubeRecoContext does not own this pointer
|
||||
bool size_normalization_;
|
||||
bool noisy_input_;
|
||||
|
||||
// Loads and initialized all the necessary components of a
|
||||
// CubeRecoContext. See .cpp for more details.
|
||||
bool Load(TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CUBE_RECO_CONTEXT_H
|
@ -1,134 +0,0 @@
|
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: cubeclassifier.cpp
|
||||
// Description: Cube implementation of a ShapeClassifier.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Nov 23 10:39:45 PST 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "cubeclassifier.h"
|
||||
|
||||
#include "char_altlist.h"
|
||||
#include "char_set.h"
|
||||
#include "cube_object.h"
|
||||
#include "cube_reco_context.h"
|
||||
#include "tessclassifier.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "trainingsample.h"
|
||||
#include "unicharset.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
CubeClassifier::CubeClassifier(tesseract::Tesseract* tesseract)
|
||||
: cube_cntxt_(tesseract->GetCubeRecoContext()),
|
||||
shape_table_(*tesseract->shape_table()) {
|
||||
}
|
||||
CubeClassifier::~CubeClassifier() {
|
||||
}
|
||||
|
||||
/// Classifies the given [training] sample, writing to results.
|
||||
/// See ShapeClassifier for a full description.
|
||||
int CubeClassifier::UnicharClassifySample(
|
||||
const TrainingSample& sample, Pix* page_pix, int debug,
|
||||
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
|
||||
results->clear();
|
||||
if (page_pix == NULL) return 0;
|
||||
|
||||
ASSERT_HOST(cube_cntxt_ != NULL);
|
||||
const TBOX& char_box = sample.bounding_box();
|
||||
CubeObject* cube_obj = new tesseract::CubeObject(
|
||||
cube_cntxt_, page_pix, char_box.left(),
|
||||
pixGetHeight(page_pix) - char_box.top(),
|
||||
char_box.width(), char_box.height());
|
||||
CharAltList* alt_list = cube_obj->RecognizeChar();
|
||||
if (alt_list != NULL) {
|
||||
alt_list->Sort();
|
||||
CharSet* char_set = cube_cntxt_->CharacterSet();
|
||||
for (int i = 0; i < alt_list->AltCount(); ++i) {
|
||||
// Convert cube representation to a shape_id.
|
||||
int alt_id = alt_list->Alt(i);
|
||||
int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
|
||||
if (unichar_id >= 0)
|
||||
results->push_back(UnicharRating(unichar_id, alt_list->AltProb(i)));
|
||||
}
|
||||
delete alt_list;
|
||||
}
|
||||
delete cube_obj;
|
||||
return results->size();
|
||||
}
|
||||
|
||||
/** Provides access to the ShapeTable that this classifier works with. */
|
||||
const ShapeTable* CubeClassifier::GetShapeTable() const {
|
||||
return &shape_table_;
|
||||
}
|
||||
|
||||
CubeTessClassifier::CubeTessClassifier(tesseract::Tesseract* tesseract)
|
||||
: cube_cntxt_(tesseract->GetCubeRecoContext()),
|
||||
shape_table_(*tesseract->shape_table()),
|
||||
pruner_(new TessClassifier(true, tesseract)) {
|
||||
}
|
||||
CubeTessClassifier::~CubeTessClassifier() {
|
||||
delete pruner_;
|
||||
}
|
||||
|
||||
/// Classifies the given [training] sample, writing to results.
|
||||
/// See ShapeClassifier for a full description.
|
||||
int CubeTessClassifier::UnicharClassifySample(
|
||||
const TrainingSample& sample, Pix* page_pix, int debug,
|
||||
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
|
||||
int num_results = pruner_->UnicharClassifySample(sample, page_pix, debug,
|
||||
keep_this, results);
|
||||
if (page_pix == NULL) return num_results;
|
||||
|
||||
ASSERT_HOST(cube_cntxt_ != NULL);
|
||||
const TBOX& char_box = sample.bounding_box();
|
||||
CubeObject* cube_obj = new tesseract::CubeObject(
|
||||
cube_cntxt_, page_pix, char_box.left(),
|
||||
pixGetHeight(page_pix) - char_box.top(),
|
||||
char_box.width(), char_box.height());
|
||||
CharAltList* alt_list = cube_obj->RecognizeChar();
|
||||
CharSet* char_set = cube_cntxt_->CharacterSet();
|
||||
if (alt_list != NULL) {
|
||||
for (int r = 0; r < num_results; ++r) {
|
||||
// Get the best cube probability of the unichar in the result.
|
||||
double best_prob = 0.0;
|
||||
for (int i = 0; i < alt_list->AltCount(); ++i) {
|
||||
int alt_id = alt_list->Alt(i);
|
||||
int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
|
||||
if (unichar_id == (*results)[r].unichar_id &&
|
||||
alt_list->AltProb(i) > best_prob) {
|
||||
best_prob = alt_list->AltProb(i);
|
||||
}
|
||||
}
|
||||
(*results)[r].rating = best_prob;
|
||||
}
|
||||
delete alt_list;
|
||||
// Re-sort by rating.
|
||||
results->sort(&UnicharRating::SortDescendingRating);
|
||||
}
|
||||
delete cube_obj;
|
||||
return results->size();
|
||||
}
|
||||
|
||||
/** Provides access to the ShapeTable that this classifier works with. */
|
||||
const ShapeTable* CubeTessClassifier::GetShapeTable() const {
|
||||
return &shape_table_;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
|
||||
|
@ -1,80 +0,0 @@
|
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: cubeclassifier.h
|
||||
// Description: Cube implementation of a ShapeClassifier.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Nov 23 10:36:32 PST 2011
|
||||
//
|
||||
// (C) Copyright 2011, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
|
||||
#define THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
|
||||
|
||||
#include "shapeclassifier.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Classify;
|
||||
class CubeRecoContext;
|
||||
class ShapeTable;
|
||||
class TessClassifier;
|
||||
class Tesseract;
|
||||
class TrainingSample;
|
||||
struct UnicharRating;
|
||||
|
||||
// Cube implementation of a ShapeClassifier.
|
||||
class CubeClassifier : public ShapeClassifier {
|
||||
public:
|
||||
explicit CubeClassifier(Tesseract* tesseract);
|
||||
virtual ~CubeClassifier();
|
||||
|
||||
// Classifies the given [training] sample, writing to results.
|
||||
// See ShapeClassifier for a full description.
|
||||
virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
|
||||
int debug, UNICHAR_ID keep_this,
|
||||
GenericVector<UnicharRating>* results);
|
||||
// Provides access to the ShapeTable that this classifier works with.
|
||||
virtual const ShapeTable* GetShapeTable() const;
|
||||
|
||||
private:
|
||||
// Cube objects.
|
||||
CubeRecoContext* cube_cntxt_;
|
||||
const ShapeTable& shape_table_;
|
||||
};
|
||||
|
||||
// Combination of Tesseract class pruner with scoring by cube.
|
||||
class CubeTessClassifier : public ShapeClassifier {
|
||||
public:
|
||||
explicit CubeTessClassifier(Tesseract* tesseract);
|
||||
virtual ~CubeTessClassifier();
|
||||
|
||||
// Classifies the given [training] sample, writing to results.
|
||||
// See ShapeClassifier for a full description.
|
||||
virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
|
||||
int debug, UNICHAR_ID keep_this,
|
||||
GenericVector<UnicharRating>* results);
|
||||
// Provides access to the ShapeTable that this classifier works with.
|
||||
virtual const ShapeTable* GetShapeTable() const;
|
||||
|
||||
private:
|
||||
// Cube objects.
|
||||
CubeRecoContext* cube_cntxt_;
|
||||
const ShapeTable& shape_table_;
|
||||
TessClassifier* pruner_;
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif /* THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_ */
|
@ -1,55 +0,0 @@
|
||||
AM_CPPFLAGS += \
|
||||
-DUSE_STD_NAMESPACE \
|
||||
-I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \
|
||||
-I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \
|
||||
-I$(top_srcdir)/ccmain -I$(top_srcdir)/classify \
|
||||
-I$(top_srcdir)/textord -I$(top_srcdir)/wordrec \
|
||||
-I$(top_srcdir)/neural_networks/runtime \
|
||||
-I$(top_srcdir)/viewer
|
||||
|
||||
if VISIBILITY
|
||||
AM_CPPFLAGS += -DTESS_EXPORTS \
|
||||
-fvisibility=hidden -fvisibility-inlines-hidden
|
||||
endif
|
||||
|
||||
noinst_HEADERS = \
|
||||
altlist.h beam_search.h bmp_8.h cached_file.h \
|
||||
char_altlist.h char_bigrams.h char_samp.h char_samp_enum.h \
|
||||
char_samp_set.h char_set.h classifier_base.h classifier_factory.h \
|
||||
con_comp.h cube_const.h conv_net_classifier.h cube_line_object.h \
|
||||
cube_line_segmenter.h cube_object.h cube_search_object.h \
|
||||
cube_tuning_params.h cube_utils.h feature_base.h feature_bmp.h \
|
||||
feature_chebyshev.h feature_hybrid.h hybrid_neural_net_classifier.h \
|
||||
lang_mod_edge.h lang_model.h search_column.h search_node.h \
|
||||
search_object.h string_32.h tess_lang_mod_edge.h tess_lang_model.h \
|
||||
tuning_params.h word_altlist.h word_list_lang_model.h word_size_model.h \
|
||||
word_unigrams.h
|
||||
|
||||
if !USING_MULTIPLELIBS
|
||||
noinst_LTLIBRARIES = libtesseract_cube.la
|
||||
else
|
||||
lib_LTLIBRARIES = libtesseract_cube.la
|
||||
libtesseract_cube_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
|
||||
libtesseract_cube_la_LIBADD = \
|
||||
../ccstruct/libtesseract_ccstruct.la \
|
||||
../ccutil/libtesseract_ccutil.la \
|
||||
../neural_networks/runtime/libtesseract_neural.la \
|
||||
../viewer/libtesseract_viewer.la \
|
||||
../wordrec/libtesseract_wordrec.la \
|
||||
../cutil/libtesseract_cutil.la \
|
||||
../classify/libtesseract_classify.la \
|
||||
../dict/libtesseract_dict.la
|
||||
endif
|
||||
|
||||
libtesseract_cube_la_SOURCES = \
|
||||
altlist.cpp beam_search.cpp bmp_8.cpp cached_file.cpp \
|
||||
char_altlist.cpp char_bigrams.cpp char_samp.cpp char_samp_enum.cpp \
|
||||
char_samp_set.cpp char_set.cpp classifier_factory.cpp \
|
||||
con_comp.cpp conv_net_classifier.cpp cube_line_object.cpp \
|
||||
cube_line_segmenter.cpp cube_object.cpp cube_search_object.cpp \
|
||||
cube_tuning_params.cpp cube_utils.cpp feature_bmp.cpp \
|
||||
feature_chebyshev.cpp feature_hybrid.cpp hybrid_neural_net_classifier.cpp \
|
||||
search_column.cpp search_node.cpp \
|
||||
tess_lang_mod_edge.cpp tess_lang_model.cpp \
|
||||
word_altlist.cpp word_list_lang_model.cpp word_size_model.cpp \
|
||||
word_unigrams.cpp
|
@ -1,60 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: alt_list.cpp
|
||||
* Description: Class to abstarct a list of alternate results
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "altlist.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
AltList::AltList(int max_alt) {
|
||||
max_alt_ = max_alt;
|
||||
alt_cnt_ = 0;
|
||||
alt_cost_ = NULL;
|
||||
alt_tag_ = NULL;
|
||||
}
|
||||
|
||||
AltList::~AltList() {
|
||||
if (alt_cost_ != NULL) {
|
||||
delete []alt_cost_;
|
||||
alt_cost_ = NULL;
|
||||
}
|
||||
|
||||
if (alt_tag_ != NULL) {
|
||||
delete []alt_tag_;
|
||||
alt_tag_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// return the best possible cost and index of corresponding alternate
|
||||
int AltList::BestCost(int *best_alt) const {
|
||||
if (alt_cnt_ <= 0) {
|
||||
(*best_alt) = -1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int best_alt_idx = 0;
|
||||
for (int alt_idx = 1; alt_idx < alt_cnt_; alt_idx++) {
|
||||
if (alt_cost_[alt_idx] < alt_cost_[best_alt_idx]) {
|
||||
best_alt_idx = alt_idx;
|
||||
}
|
||||
}
|
||||
(*best_alt) = best_alt_idx;
|
||||
return alt_cost_[best_alt_idx];
|
||||
}
|
||||
}
|
@ -1,61 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: alt_list.h
|
||||
* Description: Class to abstarct a list of alternate results
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The AltList class is the base class for the list of alternate recognition
|
||||
// results. Each alternate has a cost an an optional tag associated with it
|
||||
|
||||
#ifndef ALT_LIST_H
|
||||
#define ALT_LIST_H
|
||||
|
||||
#include <math.h>
|
||||
#include "cube_utils.h"
|
||||
|
||||
namespace tesseract {
|
||||
class AltList {
|
||||
public:
|
||||
explicit AltList(int max_alt);
|
||||
virtual ~AltList();
|
||||
// sort the list of alternates based
|
||||
virtual void Sort() = 0;
|
||||
// return the best possible cost and index of corresponding alternate
|
||||
int BestCost (int *best_alt) const;
|
||||
// return the count of alternates
|
||||
inline int AltCount() const { return alt_cnt_; }
|
||||
// returns the cost (-ve log prob) of an alternate
|
||||
inline int AltCost(int alt_idx) const { return alt_cost_[alt_idx]; }
|
||||
// returns the prob of an alternate
|
||||
inline double AltProb(int alt_idx) const {
|
||||
return CubeUtils::Cost2Prob(AltCost(alt_idx));
|
||||
}
|
||||
// returns the alternate tag
|
||||
inline void *AltTag(int alt_idx) const { return alt_tag_[alt_idx]; }
|
||||
|
||||
protected:
|
||||
// max number of alternates the list can hold
|
||||
int max_alt_;
|
||||
// actual alternate count
|
||||
int alt_cnt_;
|
||||
// array of alternate costs
|
||||
int *alt_cost_;
|
||||
// array of alternate tags
|
||||
void **alt_tag_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // ALT_LIST_H
|
@ -1,470 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: beam_search.cpp
|
||||
* Description: Class to implement Beam Word Search Algorithm
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "beam_search.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
BeamSearch::BeamSearch(CubeRecoContext *cntxt, bool word_mode) {
|
||||
cntxt_ = cntxt;
|
||||
seg_pt_cnt_ = 0;
|
||||
col_cnt_ = 1;
|
||||
col_ = NULL;
|
||||
word_mode_ = word_mode;
|
||||
}
|
||||
|
||||
// Cleanup the lattice corresponding to the last search
|
||||
void BeamSearch::Cleanup() {
|
||||
if (col_ != NULL) {
|
||||
for (int col = 0; col < col_cnt_; col++) {
|
||||
delete col_[col];
|
||||
}
|
||||
delete []col_;
|
||||
}
|
||||
col_ = NULL;
|
||||
}
|
||||
|
||||
BeamSearch::~BeamSearch() {
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
// Creates a set of children nodes emerging from a parent node based on
|
||||
// the character alternate list and the language model.
|
||||
void BeamSearch::CreateChildren(SearchColumn *out_col, LangModel *lang_mod,
|
||||
SearchNode *parent_node,
|
||||
LangModEdge *lm_parent_edge,
|
||||
CharAltList *char_alt_list, int extra_cost) {
|
||||
// get all the edges from this parent
|
||||
int edge_cnt;
|
||||
LangModEdge **lm_edges = lang_mod->GetEdges(char_alt_list,
|
||||
lm_parent_edge, &edge_cnt);
|
||||
if (lm_edges) {
|
||||
// add them to the ending column with the appropriate parent
|
||||
for (int edge = 0; edge < edge_cnt; edge++) {
|
||||
// add a node to the column if the current column is not the
|
||||
// last one, or if the lang model edge indicates it is valid EOW
|
||||
if (!cntxt_->NoisyInput() && out_col->ColIdx() >= seg_pt_cnt_ &&
|
||||
!lm_edges[edge]->IsEOW()) {
|
||||
// free edge since no object is going to own it
|
||||
delete lm_edges[edge];
|
||||
continue;
|
||||
}
|
||||
|
||||
// compute the recognition cost of this node
|
||||
int recognition_cost = MIN_PROB_COST;
|
||||
if (char_alt_list && char_alt_list->AltCount() > 0) {
|
||||
recognition_cost = MAX(0, char_alt_list->ClassCost(
|
||||
lm_edges[edge]->ClassID()));
|
||||
// Add the no space cost. This should zero in word mode
|
||||
recognition_cost += extra_cost;
|
||||
}
|
||||
|
||||
// Note that the edge will be freed inside the column if
|
||||
// AddNode is called
|
||||
if (recognition_cost >= 0) {
|
||||
out_col->AddNode(lm_edges[edge], recognition_cost, parent_node,
|
||||
cntxt_);
|
||||
} else {
|
||||
delete lm_edges[edge];
|
||||
}
|
||||
} // edge
|
||||
// free edge array
|
||||
delete []lm_edges;
|
||||
} // lm_edges
|
||||
}
|
||||
|
||||
// Performs a beam search in the specified search using the specified
|
||||
// language model; returns an alternate list of possible words as a result.
|
||||
WordAltList * BeamSearch::Search(SearchObject *srch_obj, LangModel *lang_mod) {
|
||||
// verifications
|
||||
if (!lang_mod)
|
||||
lang_mod = cntxt_->LangMod();
|
||||
if (!lang_mod) {
|
||||
fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
|
||||
"LangModel\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// free existing state
|
||||
Cleanup();
|
||||
|
||||
// get seg pt count
|
||||
seg_pt_cnt_ = srch_obj->SegPtCnt();
|
||||
if (seg_pt_cnt_ < 0) {
|
||||
return NULL;
|
||||
}
|
||||
col_cnt_ = seg_pt_cnt_ + 1;
|
||||
|
||||
// disregard suspicious cases
|
||||
if (seg_pt_cnt_ > 128) {
|
||||
fprintf(stderr, "Cube ERROR (BeamSearch::Search): segment point count is "
|
||||
"suspiciously high; bailing out\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// alloc memory for columns
|
||||
col_ = new SearchColumn *[col_cnt_];
|
||||
memset(col_, 0, col_cnt_ * sizeof(*col_));
|
||||
|
||||
// for all possible segments
|
||||
for (int end_seg = 1; end_seg <= (seg_pt_cnt_ + 1); end_seg++) {
|
||||
// create a search column
|
||||
col_[end_seg - 1] = new SearchColumn(end_seg - 1,
|
||||
cntxt_->Params()->BeamWidth());
|
||||
|
||||
// for all possible start segments
|
||||
int init_seg = MAX(0, end_seg - cntxt_->Params()->MaxSegPerChar());
|
||||
for (int strt_seg = init_seg; strt_seg < end_seg; strt_seg++) {
|
||||
int parent_nodes_cnt;
|
||||
SearchNode **parent_nodes;
|
||||
|
||||
// for the root segment, we do not have a parent
|
||||
if (strt_seg == 0) {
|
||||
parent_nodes_cnt = 1;
|
||||
parent_nodes = NULL;
|
||||
} else {
|
||||
// for all the existing nodes in the starting column
|
||||
parent_nodes_cnt = col_[strt_seg - 1]->NodeCount();
|
||||
parent_nodes = col_[strt_seg - 1]->Nodes();
|
||||
}
|
||||
|
||||
// run the shape recognizer
|
||||
CharAltList *char_alt_list = srch_obj->RecognizeSegment(strt_seg - 1,
|
||||
end_seg - 1);
|
||||
// for all the possible parents
|
||||
for (int parent_idx = 0; parent_idx < parent_nodes_cnt; parent_idx++) {
|
||||
// point to the parent node
|
||||
SearchNode *parent_node = !parent_nodes ? NULL
|
||||
: parent_nodes[parent_idx];
|
||||
LangModEdge *lm_parent_edge = !parent_node ? lang_mod->Root()
|
||||
: parent_node->LangModelEdge();
|
||||
|
||||
// compute the cost of not having spaces within the segment range
|
||||
int contig_cost = srch_obj->NoSpaceCost(strt_seg - 1, end_seg - 1);
|
||||
|
||||
// In phrase mode, compute the cost of not having a space before
|
||||
// this character
|
||||
int no_space_cost = 0;
|
||||
if (!word_mode_ && strt_seg > 0) {
|
||||
no_space_cost = srch_obj->NoSpaceCost(strt_seg - 1);
|
||||
}
|
||||
|
||||
// if the no space cost is low enough
|
||||
if ((contig_cost + no_space_cost) < MIN_PROB_COST) {
|
||||
// Add the children nodes
|
||||
CreateChildren(col_[end_seg - 1], lang_mod, parent_node,
|
||||
lm_parent_edge, char_alt_list,
|
||||
contig_cost + no_space_cost);
|
||||
}
|
||||
|
||||
// In phrase mode and if not starting at the root
|
||||
if (!word_mode_ && strt_seg > 0) { // parent_node must be non-NULL
|
||||
// consider starting a new word for nodes that are valid EOW
|
||||
if (parent_node->LangModelEdge()->IsEOW()) {
|
||||
// get the space cost
|
||||
int space_cost = srch_obj->SpaceCost(strt_seg - 1);
|
||||
// if the space cost is low enough
|
||||
if ((contig_cost + space_cost) < MIN_PROB_COST) {
|
||||
// Restart the language model and add nodes as children to the
|
||||
// space node.
|
||||
CreateChildren(col_[end_seg - 1], lang_mod, parent_node, NULL,
|
||||
char_alt_list, contig_cost + space_cost);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // parent
|
||||
} // strt_seg
|
||||
|
||||
// prune the column nodes
|
||||
col_[end_seg - 1]->Prune();
|
||||
|
||||
// Free the column hash table. No longer needed
|
||||
col_[end_seg - 1]->FreeHashTable();
|
||||
} // end_seg
|
||||
|
||||
WordAltList *alt_list = CreateWordAltList(srch_obj);
|
||||
return alt_list;
|
||||
}
|
||||
|
||||
// Creates a Word alternate list from the results in the lattice.
|
||||
WordAltList *BeamSearch::CreateWordAltList(SearchObject *srch_obj) {
|
||||
// create an alternate list of all the nodes in the last column
|
||||
int node_cnt = col_[col_cnt_ - 1]->NodeCount();
|
||||
SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes();
|
||||
CharBigrams *bigrams = cntxt_->Bigrams();
|
||||
WordUnigrams *word_unigrams = cntxt_->WordUnigramsObj();
|
||||
|
||||
// Save the index of the best-cost node before the alt list is
|
||||
// sorted, so that we can retrieve it from the node list when backtracking.
|
||||
best_presorted_node_idx_ = 0;
|
||||
int best_cost = -1;
|
||||
|
||||
if (node_cnt <= 0)
|
||||
return NULL;
|
||||
|
||||
// start creating the word alternate list
|
||||
WordAltList *alt_list = new WordAltList(node_cnt + 1);
|
||||
for (int node_idx = 0; node_idx < node_cnt; node_idx++) {
|
||||
// recognition cost
|
||||
int recognition_cost = srch_nodes[node_idx]->BestCost();
|
||||
// compute the size cost of the alternate
|
||||
char_32 *ch_buff = NULL;
|
||||
int size_cost = SizeCost(srch_obj, srch_nodes[node_idx], &ch_buff);
|
||||
// accumulate other costs
|
||||
if (ch_buff) {
|
||||
int cost = 0;
|
||||
// char bigram cost
|
||||
int bigram_cost = !bigrams ? 0 :
|
||||
bigrams->Cost(ch_buff, cntxt_->CharacterSet());
|
||||
// word unigram cost
|
||||
int unigram_cost = !word_unigrams ? 0 :
|
||||
word_unigrams->Cost(ch_buff, cntxt_->LangMod(),
|
||||
cntxt_->CharacterSet());
|
||||
// overall cost
|
||||
cost = static_cast<int>(
|
||||
(size_cost * cntxt_->Params()->SizeWgt()) +
|
||||
(bigram_cost * cntxt_->Params()->CharBigramWgt()) +
|
||||
(unigram_cost * cntxt_->Params()->WordUnigramWgt()) +
|
||||
(recognition_cost * cntxt_->Params()->RecoWgt()));
|
||||
|
||||
// insert into word alt list
|
||||
alt_list->Insert(ch_buff, cost,
|
||||
static_cast<void *>(srch_nodes[node_idx]));
|
||||
// Note that strict < is necessary because WordAltList::Sort()
|
||||
// uses it in a bubble sort to swap entries.
|
||||
if (best_cost < 0 || cost < best_cost) {
|
||||
best_presorted_node_idx_ = node_idx;
|
||||
best_cost = cost;
|
||||
}
|
||||
delete []ch_buff;
|
||||
}
|
||||
}
|
||||
|
||||
// sort the alternates based on cost
|
||||
alt_list->Sort();
|
||||
return alt_list;
|
||||
}
|
||||
|
||||
// Returns the lattice column corresponding to the specified column index.
|
||||
SearchColumn *BeamSearch::Column(int col) const {
|
||||
if (col < 0 || col >= col_cnt_ || !col_)
|
||||
return NULL;
|
||||
return col_[col];
|
||||
}
|
||||
|
||||
// Returns the best node in the last column of last performed search.
|
||||
SearchNode *BeamSearch::BestNode() const {
|
||||
if (col_cnt_ < 1 || !col_ || !col_[col_cnt_ - 1])
|
||||
return NULL;
|
||||
|
||||
int node_cnt = col_[col_cnt_ - 1]->NodeCount();
|
||||
SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes();
|
||||
if (node_cnt < 1 || !srch_nodes || !srch_nodes[0])
|
||||
return NULL;
|
||||
return srch_nodes[0];
|
||||
}
|
||||
|
||||
// Returns the string corresponding to the specified alt.
|
||||
char_32 *BeamSearch::Alt(int alt) const {
|
||||
// get the last column of the lattice
|
||||
if (col_cnt_ <= 0)
|
||||
return NULL;
|
||||
|
||||
SearchColumn *srch_col = col_[col_cnt_ - 1];
|
||||
if (!srch_col)
|
||||
return NULL;
|
||||
|
||||
// point to the last node in the selected path
|
||||
if (alt >= srch_col->NodeCount() || srch_col->Nodes() == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SearchNode *srch_node = srch_col->Nodes()[alt];
|
||||
if (!srch_node)
|
||||
return NULL;
|
||||
|
||||
// get string
|
||||
char_32 *str32 = srch_node->PathString();
|
||||
if (!str32)
|
||||
return NULL;
|
||||
|
||||
return str32;
|
||||
}
|
||||
|
||||
// Backtracks from the specified node index and returns the corresponding
|
||||
// character mapped segments and character count. Optional return
|
||||
// arguments are the char_32 result string and character bounding
|
||||
// boxes, if non-NULL values are passed in.
|
||||
CharSamp **BeamSearch::BackTrack(SearchObject *srch_obj, int node_index,
|
||||
int *char_cnt, char_32 **str32,
|
||||
Boxa **char_boxes) const {
|
||||
// get the last column of the lattice
|
||||
if (col_cnt_ <= 0)
|
||||
return NULL;
|
||||
SearchColumn *srch_col = col_[col_cnt_ - 1];
|
||||
if (!srch_col)
|
||||
return NULL;
|
||||
|
||||
// point to the last node in the selected path
|
||||
if (node_index >= srch_col->NodeCount() || !srch_col->Nodes())
|
||||
return NULL;
|
||||
|
||||
SearchNode *srch_node = srch_col->Nodes()[node_index];
|
||||
if (!srch_node)
|
||||
return NULL;
|
||||
return BackTrack(srch_obj, srch_node, char_cnt, str32, char_boxes);
|
||||
}
|
||||
|
||||
// Backtracks from the specified node index and returns the corresponding
|
||||
// character mapped segments and character count. Optional return
|
||||
// arguments are the char_32 result string and character bounding
|
||||
// boxes, if non-NULL values are passed in.
|
||||
CharSamp **BeamSearch::BackTrack(SearchObject *srch_obj, SearchNode *srch_node,
|
||||
int *char_cnt, char_32 **str32,
|
||||
Boxa **char_boxes) const {
|
||||
if (!srch_node)
|
||||
return NULL;
|
||||
|
||||
if (str32) {
|
||||
delete [](*str32); // clear existing value
|
||||
*str32 = srch_node->PathString();
|
||||
if (!*str32)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (char_boxes && *char_boxes) {
|
||||
boxaDestroy(char_boxes); // clear existing value
|
||||
}
|
||||
|
||||
CharSamp **chars;
|
||||
chars = SplitByNode(srch_obj, srch_node, char_cnt, char_boxes);
|
||||
if (!chars && str32)
|
||||
delete []*str32;
|
||||
return chars;
|
||||
}
|
||||
|
||||
// Backtracks from the given lattice node and return the corresponding
|
||||
// char mapped segments and character count. The character bounding
|
||||
// boxes are optional return arguments, if non-NULL values are passed in.
|
||||
CharSamp **BeamSearch::SplitByNode(SearchObject *srch_obj,
|
||||
SearchNode *srch_node,
|
||||
int *char_cnt,
|
||||
Boxa **char_boxes) const {
|
||||
// Count the characters (could be less than the path length when in
|
||||
// phrase mode)
|
||||
*char_cnt = 0;
|
||||
SearchNode *node = srch_node;
|
||||
while (node) {
|
||||
node = node->ParentNode();
|
||||
(*char_cnt)++;
|
||||
}
|
||||
|
||||
if (*char_cnt == 0)
|
||||
return NULL;
|
||||
|
||||
// Allocate box array
|
||||
if (char_boxes) {
|
||||
if (*char_boxes)
|
||||
boxaDestroy(char_boxes); // clear existing value
|
||||
*char_boxes = boxaCreate(*char_cnt);
|
||||
if (*char_boxes == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Allocate memory for CharSamp array.
|
||||
CharSamp **chars = new CharSamp *[*char_cnt];
|
||||
|
||||
int ch_idx = *char_cnt - 1;
|
||||
int seg_pt_cnt = srch_obj->SegPtCnt();
|
||||
bool success=true;
|
||||
while (srch_node && ch_idx >= 0) {
|
||||
// Parent node (could be null)
|
||||
SearchNode *parent_node = srch_node->ParentNode();
|
||||
|
||||
// Get the seg pts corresponding to the search node
|
||||
int st_col = !parent_node ? 0 : parent_node->ColIdx() + 1;
|
||||
int st_seg_pt = st_col <= 0 ? -1 : st_col - 1;
|
||||
int end_col = srch_node->ColIdx();
|
||||
int end_seg_pt = end_col >= seg_pt_cnt ? seg_pt_cnt : end_col;
|
||||
|
||||
// Get a char sample corresponding to the segmentation points
|
||||
CharSamp *samp = srch_obj->CharSample(st_seg_pt, end_seg_pt);
|
||||
if (!samp) {
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
samp->SetLabel(srch_node->NodeString());
|
||||
chars[ch_idx] = samp;
|
||||
if (char_boxes) {
|
||||
// Create the corresponding character bounding box
|
||||
Box *char_box = boxCreate(samp->Left(), samp->Top(),
|
||||
samp->Width(), samp->Height());
|
||||
if (!char_box) {
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
boxaAddBox(*char_boxes, char_box, L_INSERT);
|
||||
}
|
||||
srch_node = parent_node;
|
||||
ch_idx--;
|
||||
}
|
||||
if (!success) {
|
||||
delete []chars;
|
||||
if (char_boxes)
|
||||
boxaDestroy(char_boxes);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Reverse the order of boxes.
|
||||
if (char_boxes) {
|
||||
int char_boxa_size = boxaGetCount(*char_boxes);
|
||||
int limit = char_boxa_size / 2;
|
||||
for (int i = 0; i < limit; ++i) {
|
||||
int box1_idx = i;
|
||||
int box2_idx = char_boxa_size - 1 - i;
|
||||
Box *box1 = boxaGetBox(*char_boxes, box1_idx, L_CLONE);
|
||||
Box *box2 = boxaGetBox(*char_boxes, box2_idx, L_CLONE);
|
||||
boxaReplaceBox(*char_boxes, box2_idx, box1);
|
||||
boxaReplaceBox(*char_boxes, box1_idx, box2);
|
||||
}
|
||||
}
|
||||
return chars;
|
||||
}
|
||||
|
||||
// Returns the size cost of a string for a lattice path that
|
||||
// ends at the specified lattice node.
|
||||
int BeamSearch::SizeCost(SearchObject *srch_obj, SearchNode *node,
|
||||
char_32 **str32) const {
|
||||
CharSamp **chars = NULL;
|
||||
int char_cnt = 0;
|
||||
if (!node)
|
||||
return 0;
|
||||
// Backtrack to get string and character segmentation
|
||||
chars = BackTrack(srch_obj, node, &char_cnt, str32, NULL);
|
||||
if (!chars)
|
||||
return WORST_COST;
|
||||
int size_cost = (cntxt_->SizeModel() == NULL) ? 0 :
|
||||
cntxt_->SizeModel()->Cost(chars, char_cnt);
|
||||
delete []chars;
|
||||
return size_cost;
|
||||
}
|
||||
} // namespace tesesract
|
@ -1,126 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: beam_search.h
|
||||
* Description: Declaration of Beam Word Search Algorithm Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The Beam Search class implements a Beam Search algorithm for the
|
||||
// N-best paths through the lattice of a search object using a language model
|
||||
// The search object is a segmented bitmap of a word image. The language model
|
||||
// is a state machine that defines valid sequences of characters
|
||||
// The cost of each path is the combined (product) probabilities of the
|
||||
// characters along the path. The character probabilities are computed using
|
||||
// the character classifier member of the RecoContext
|
||||
// The BeamSearch class itself holds the state of the last search it performed
|
||||
// using its "Search" method. Subsequent class to the Search method erase the
|
||||
// states of previously done searches
|
||||
|
||||
#ifndef BEAM_SEARCH_H
|
||||
#define BEAM_SEARCH_H
|
||||
|
||||
#include "search_column.h"
|
||||
#include "word_altlist.h"
|
||||
#include "search_object.h"
|
||||
#include "lang_model.h"
|
||||
#include "cube_utils.h"
|
||||
#include "cube_reco_context.h"
|
||||
#include "allheaders.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class BeamSearch {
|
||||
public:
|
||||
explicit BeamSearch(CubeRecoContext *cntxt, bool word_mode = true);
|
||||
~BeamSearch();
|
||||
// Performs a beam search in the specified search using the specified
|
||||
// language model; returns an alternate list of possible words as a result.
|
||||
WordAltList *Search(SearchObject *srch_obj, LangModel *lang_mod = NULL);
|
||||
// Returns the best node in the last column of last performed search.
|
||||
SearchNode *BestNode() const;
|
||||
// Returns the string corresponding to the specified alt.
|
||||
char_32 *Alt(int alt) const;
|
||||
// Backtracks from the specified lattice node and returns the corresponding
|
||||
// character-mapped segments, character count, char_32 result string, and
|
||||
// character bounding boxes (if char_boxes is not NULL). If the segments
|
||||
// cannot be constructed, returns NULL, and all result arguments
|
||||
// will be NULL.
|
||||
CharSamp **BackTrack(SearchObject *srch_obj, int node_index,
|
||||
int *char_cnt, char_32 **str32, Boxa **char_boxes) const;
|
||||
// Same as above, except it takes a pointer to a search node object
|
||||
// instead of node index.
|
||||
CharSamp **BackTrack(SearchObject *srch_obj, SearchNode *node,
|
||||
int *char_cnt, char_32 **str32, Boxa **char_boxes) const;
|
||||
// Returns the size cost of a specified string of a lattice
|
||||
// path that ends at the specified lattice node.
|
||||
int SizeCost(SearchObject *srch_obj, SearchNode *node,
|
||||
char_32 **str32 = NULL) const;
|
||||
// Returns the word unigram cost of the given string, possibly
|
||||
// stripping out a single trailing punctuation character.
|
||||
int WordUnigramCost(char_32 *str32, WordUnigrams* word_unigrams) const;
|
||||
|
||||
// Supplementary functions needed for visualization
|
||||
// Return column count of the lattice.
|
||||
inline int ColCnt() const { return col_cnt_; }
|
||||
// Returns the lattice column corresponding to the specified column index.
|
||||
SearchColumn *Column(int col_idx) const;
|
||||
// Return the index of the best node in the last column of the
|
||||
// best-cost path before the alternates list is sorted.
|
||||
inline int BestPresortedNodeIndex() const {
|
||||
return best_presorted_node_idx_;
|
||||
}
|
||||
|
||||
private:
|
||||
// Maximum reasonable segmentation point count
|
||||
static const int kMaxSegPointCnt = 128;
|
||||
// Recognition context object; the context holds the character classifier
|
||||
// and the tuning parameters object
|
||||
CubeRecoContext *cntxt_;
|
||||
// Count of segmentation pts
|
||||
int seg_pt_cnt_;
|
||||
// Lattice column count; currently redundant with respect to seg_pt_cnt_
|
||||
// but that might change in the future
|
||||
int col_cnt_;
|
||||
// Array of lattice columns
|
||||
SearchColumn **col_;
|
||||
// Run in word or phrase mode
|
||||
bool word_mode_;
|
||||
// Node index of best-cost node, before alternates are merged and sorted
|
||||
int best_presorted_node_idx_;
|
||||
// Cleans up beam search state
|
||||
void Cleanup();
|
||||
// Creates a Word alternate list from the results in the lattice.
|
||||
// This function computes a cost for each node in the final column
|
||||
// of the lattice, which is a weighted average of several costs:
|
||||
// size cost, character bigram cost, word unigram cost, and
|
||||
// recognition cost from the beam search. The weights are the
|
||||
// CubeTuningParams, which are learned together with the character
|
||||
// classifiers.
|
||||
WordAltList *CreateWordAltList(SearchObject *srch_obj);
|
||||
// Creates a set of children nodes emerging from a parent node based on
|
||||
// the character alternate list and the language model.
|
||||
void CreateChildren(SearchColumn *out_col, LangModel *lang_mod,
|
||||
SearchNode *parent_node, LangModEdge *lm_parent_edge,
|
||||
CharAltList *char_alt_list, int extra_cost);
|
||||
// Backtracks from the given lattice node and returns the corresponding
|
||||
// char mapped segments, character count, and character bounding boxes (if
|
||||
// char_boxes is not NULL). If the segments cannot be constructed,
|
||||
// returns NULL, and all result arguments will be NULL.
|
||||
CharSamp **SplitByNode(SearchObject *srch_obj, SearchNode *srch_node,
|
||||
int* char_cnt, Boxa **char_boxes) const;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // BEAM_SEARCH_H
|
1090
cube/bmp_8.cpp
1090
cube/bmp_8.cpp
File diff suppressed because it is too large
Load Diff
122
cube/bmp_8.h
122
cube/bmp_8.h
@ -1,122 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: bmp_8.h
|
||||
* Description: Declaration of an 8-bit Bitmap class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef BMP8_H
|
||||
#define BMP8_H
|
||||
|
||||
// The Bmp8 class is an 8-bit bitmap that represents images of
|
||||
// words, characters and segments throughout Cube
|
||||
// It is meant to provide fast access to the bitmap bits and provide
|
||||
// fast scaling, cropping, deslanting, connected components detection,
|
||||
// loading and saving functionality
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "con_comp.h"
|
||||
#include "cached_file.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Non-integral deslanting parameters.
|
||||
static const float kMinDeslantAngle = -30.0f;
|
||||
static const float kMaxDeslantAngle = 30.0f;
|
||||
static const float kDeslantAngleDelta = 0.5f;
|
||||
|
||||
class Bmp8 {
|
||||
public:
|
||||
Bmp8(unsigned short wid, unsigned short hgt);
|
||||
~Bmp8();
|
||||
// Clears the bitmap
|
||||
bool Clear();
|
||||
// accessors to bitmap dimensions
|
||||
inline unsigned short Width() const { return wid_; }
|
||||
inline unsigned short Stride() const { return stride_; }
|
||||
inline unsigned short Height() const { return hgt_; }
|
||||
inline unsigned char *RawData() const {
|
||||
return (line_buff_ == NULL ? NULL : line_buff_[0]);
|
||||
}
|
||||
// creates a scaled version of the specified bitmap
|
||||
// Optionally, scaling can be isotropic (preserving aspect ratio) or not
|
||||
bool ScaleFrom(Bmp8 *bmp, bool isotropic = true);
|
||||
// Deslant the bitmap vertically
|
||||
bool Deslant();
|
||||
// Deslant the bitmap horizontally
|
||||
bool HorizontalDeslant(double *deslant_angle);
|
||||
// Create a bitmap object from a file
|
||||
static Bmp8 *FromCharDumpFile(CachedFile *fp);
|
||||
static Bmp8 *FromCharDumpFile(FILE *fp);
|
||||
// are two bitmaps identical
|
||||
bool IsIdentical(Bmp8 *pBmp) const;
|
||||
// Detect connected components
|
||||
ConComp ** FindConComps(int *concomp_cnt, int min_size) const;
|
||||
// compute the foreground ratio
|
||||
float ForegroundRatio() const;
|
||||
// returns the mean horizontal histogram entropy of the bitmap
|
||||
float MeanHorizontalHistogramEntropy() const;
|
||||
// returns the horizontal histogram of the bitmap
|
||||
int *HorizontalHistogram() const;
|
||||
|
||||
private:
|
||||
// Compute a look up tan table that will be used for fast slant computation
|
||||
static bool ComputeTanTable();
|
||||
// create a bitmap buffer (two flavors char & int) and init contents
|
||||
unsigned char ** CreateBmpBuffer(unsigned char init_val = 0xff);
|
||||
static unsigned int ** CreateBmpBuffer(int wid, int hgt,
|
||||
unsigned char init_val = 0xff);
|
||||
// Free a bitmap buffer
|
||||
static void FreeBmpBuffer(unsigned char **buff);
|
||||
static void FreeBmpBuffer(unsigned int **buff);
|
||||
|
||||
// a static array that holds the tan lookup table
|
||||
static float *tan_table_;
|
||||
// bitmap 32-bit-aligned stride
|
||||
unsigned short stride_;
|
||||
// Bmp8 magic number used to validate saved bitmaps
|
||||
static const unsigned int kMagicNumber = 0xdeadbeef;
|
||||
|
||||
protected:
|
||||
// bitmap dimensions
|
||||
unsigned short wid_;
|
||||
unsigned short hgt_;
|
||||
// bitmap contents
|
||||
unsigned char **line_buff_;
|
||||
// deslanting parameters
|
||||
static const int kConCompAllocChunk = 16;
|
||||
static const int kDeslantAngleCount;
|
||||
|
||||
// Load dimensions & contents of bitmap from file
|
||||
bool LoadFromCharDumpFile(CachedFile *fp);
|
||||
bool LoadFromCharDumpFile(FILE *fp);
|
||||
// Load dimensions & contents of bitmap from raw data
|
||||
bool LoadFromCharDumpFile(unsigned char **raw_data);
|
||||
// Load contents of bitmap from raw data
|
||||
bool LoadFromRawData(unsigned char *data);
|
||||
// save bitmap to a file
|
||||
bool SaveBmp2CharDumpFile(FILE *fp) const;
|
||||
// checks if a row or a column are entirely blank
|
||||
bool IsBlankColumn(int x) const;
|
||||
bool IsBlankRow(int y) const;
|
||||
// crop the bitmap returning new dimensions
|
||||
void Crop(int *xst_src, int *yst_src, int *wid, int *hgt);
|
||||
// copy part of the specified bitmap
|
||||
void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // BMP8_H
|
@ -1,147 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cached_file.pp
|
||||
* Description: Implementation of an Cached File Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <string>
|
||||
#include <stdlib.h>
|
||||
#include <cstring>
|
||||
#include "cached_file.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
CachedFile::CachedFile(string file_name) {
|
||||
file_name_ = file_name;
|
||||
buff_ = NULL;
|
||||
buff_pos_ = 0;
|
||||
buff_size_ = 0;
|
||||
file_pos_ = 0;
|
||||
file_size_ = 0;
|
||||
fp_ = NULL;
|
||||
}
|
||||
|
||||
CachedFile::~CachedFile() {
|
||||
if (fp_ != NULL) {
|
||||
fclose(fp_);
|
||||
fp_ = NULL;
|
||||
}
|
||||
|
||||
if (buff_ != NULL) {
|
||||
delete []buff_;
|
||||
buff_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// free buffers and init vars
|
||||
bool CachedFile::Open() {
|
||||
if (fp_ != NULL) {
|
||||
return true;
|
||||
}
|
||||
|
||||
fp_ = fopen(file_name_.c_str(), "rb");
|
||||
if (fp_ == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// seek to the end
|
||||
fseek(fp_, 0, SEEK_END);
|
||||
// get file size
|
||||
file_size_ = ftell(fp_);
|
||||
if (file_size_ < 1) {
|
||||
return false;
|
||||
}
|
||||
// rewind again
|
||||
rewind(fp_);
|
||||
// alloc memory for buffer
|
||||
buff_ = new unsigned char[kCacheSize];
|
||||
// init counters
|
||||
buff_size_ = 0;
|
||||
buff_pos_ = 0;
|
||||
file_pos_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// add a new sample
|
||||
int CachedFile::Read(void *read_buff, int bytes) {
|
||||
int read_bytes = 0;
|
||||
unsigned char *buff = (unsigned char *)read_buff;
|
||||
|
||||
// do we need to read beyond the buffer
|
||||
if ((buff_pos_ + bytes) > buff_size_) {
|
||||
// copy as much bytes from the current buffer if any
|
||||
int copy_bytes = buff_size_ - buff_pos_;
|
||||
|
||||
if (copy_bytes > 0) {
|
||||
memcpy(buff, buff_ + buff_pos_, copy_bytes);
|
||||
buff += copy_bytes;
|
||||
bytes -= copy_bytes;
|
||||
read_bytes += copy_bytes;
|
||||
}
|
||||
|
||||
// determine how much to read
|
||||
buff_size_ = kCacheSize;
|
||||
|
||||
if ((file_pos_ + buff_size_) > file_size_) {
|
||||
buff_size_ = static_cast<int>(file_size_ - file_pos_);
|
||||
}
|
||||
|
||||
// EOF ?
|
||||
if (buff_size_ <= 0 || bytes > buff_size_) {
|
||||
return read_bytes;
|
||||
}
|
||||
|
||||
// read the first chunck
|
||||
if (fread(buff_, 1, buff_size_, fp_) != buff_size_) {
|
||||
return read_bytes;
|
||||
}
|
||||
|
||||
buff_pos_ = 0;
|
||||
file_pos_ += buff_size_;
|
||||
}
|
||||
|
||||
memcpy(buff, buff_ + buff_pos_, bytes);
|
||||
read_bytes += bytes;
|
||||
buff_pos_ += bytes;
|
||||
|
||||
return read_bytes;
|
||||
}
|
||||
|
||||
long CachedFile::Size() {
|
||||
if (fp_ == NULL && Open() == false) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return file_size_;
|
||||
}
|
||||
|
||||
long CachedFile::Tell() {
|
||||
if (fp_ == NULL && Open() == false) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return file_pos_ - buff_size_ + buff_pos_;
|
||||
}
|
||||
|
||||
bool CachedFile::eof() {
|
||||
if (fp_ == NULL && Open() == false) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return (file_pos_ - buff_size_ + buff_pos_) >= file_size_;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
@ -1,69 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cached_file.h
|
||||
* Description: Declaration of a Cached File class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef CACHED_FILE_H
|
||||
#define CACHED_FILE_H
|
||||
|
||||
// The CachedFile class provides a large-cache read access to a file
|
||||
// It is mainly designed for loading large word dump files
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#ifdef USE_STD_NAMESPACE
|
||||
using std::string;
|
||||
#endif
|
||||
|
||||
namespace tesseract {
|
||||
class CachedFile {
|
||||
public:
|
||||
explicit CachedFile(string file_name);
|
||||
~CachedFile();
|
||||
|
||||
// reads a specified number of bytes to the specified buffer and
|
||||
// returns the actual number of bytes read
|
||||
int Read(void *read_buff, int bytes);
|
||||
// Returns the file size
|
||||
long Size();
|
||||
// returns the current position in the file
|
||||
long Tell();
|
||||
// End of file flag
|
||||
bool eof();
|
||||
|
||||
private:
|
||||
static const unsigned int kCacheSize = 0x8000000;
|
||||
// file name
|
||||
string file_name_;
|
||||
// internal file buffer
|
||||
unsigned char *buff_;
|
||||
// file position
|
||||
long file_pos_;
|
||||
// file size
|
||||
long file_size_;
|
||||
// position of file within buffer
|
||||
int buff_pos_;
|
||||
// buffer size
|
||||
int buff_size_;
|
||||
// file handle
|
||||
FILE *fp_;
|
||||
// Opens the file
|
||||
bool Open();
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CACHED_FILE_H
|
@ -1,108 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: char_altlist.cpp
|
||||
* Description: Implementation of a Character Alternate List Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "char_altlist.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// The CharSet is not class owned and must exist for
|
||||
// the life time of this class
|
||||
CharAltList::CharAltList(const CharSet *char_set, int max_alt)
|
||||
: AltList(max_alt) {
|
||||
char_set_ = char_set;
|
||||
max_alt_ = max_alt;
|
||||
class_id_alt_ = NULL;
|
||||
class_id_cost_ = NULL;
|
||||
}
|
||||
|
||||
CharAltList::~CharAltList() {
|
||||
if (class_id_alt_ != NULL) {
|
||||
delete []class_id_alt_;
|
||||
class_id_alt_ = NULL;
|
||||
}
|
||||
|
||||
if (class_id_cost_ != NULL) {
|
||||
delete []class_id_cost_;
|
||||
class_id_cost_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Insert a new char alternate
|
||||
bool CharAltList::Insert(int class_id, int cost, void *tag) {
|
||||
// validate class ID
|
||||
if (class_id < 0 || class_id >= char_set_->ClassCount()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// allocate buffers if nedded
|
||||
if (class_id_alt_ == NULL || alt_cost_ == NULL) {
|
||||
class_id_alt_ = new int[max_alt_];
|
||||
alt_cost_ = new int[max_alt_];
|
||||
alt_tag_ = new void *[max_alt_];
|
||||
|
||||
memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
|
||||
}
|
||||
|
||||
if (class_id_cost_ == NULL) {
|
||||
int class_cnt = char_set_->ClassCount();
|
||||
|
||||
class_id_cost_ = new int[class_cnt];
|
||||
|
||||
for (int ich = 0; ich < class_cnt; ich++) {
|
||||
class_id_cost_[ich] = WORST_COST;
|
||||
}
|
||||
}
|
||||
|
||||
if (class_id < 0 || class_id >= char_set_->ClassCount()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// insert the alternate
|
||||
class_id_alt_[alt_cnt_] = class_id;
|
||||
alt_cost_[alt_cnt_] = cost;
|
||||
alt_tag_[alt_cnt_] = tag;
|
||||
|
||||
alt_cnt_++;
|
||||
|
||||
class_id_cost_[class_id] = cost;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// sort the alternate Desc. based on prob
|
||||
void CharAltList::Sort() {
|
||||
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
|
||||
for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
|
||||
if (alt_cost_[alt_idx] > alt_cost_[alt]) {
|
||||
int temp = class_id_alt_[alt_idx];
|
||||
class_id_alt_[alt_idx] = class_id_alt_[alt];
|
||||
class_id_alt_[alt] = temp;
|
||||
|
||||
temp = alt_cost_[alt_idx];
|
||||
alt_cost_[alt_idx] = alt_cost_[alt];
|
||||
alt_cost_[alt] = temp;
|
||||
|
||||
void *tag = alt_tag_[alt_idx];
|
||||
alt_tag_[alt_idx] = alt_tag_[alt];
|
||||
alt_tag_[alt] = tag;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,70 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: char_altlist.h
|
||||
* Description: Declaration of a Character Alternate List Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef CHAR_ALT_LIST_H
|
||||
#define CHAR_ALT_LIST_H
|
||||
|
||||
// The CharAltList class holds the list of class alternates returned from
|
||||
// a character classifier. Each alternate represents a class ID.
|
||||
// It inherits from the AltList class.
|
||||
// The CharAltList owns a CharSet object that maps a class-id to a string.
|
||||
|
||||
#include "altlist.h"
|
||||
#include "char_set.h"
|
||||
|
||||
namespace tesseract {
|
||||
class CharAltList : public AltList {
|
||||
public:
|
||||
CharAltList(const CharSet *char_set, int max_alt = kMaxCharAlt);
|
||||
~CharAltList();
|
||||
|
||||
// Sort the alternate list based on cost
|
||||
void Sort();
|
||||
// insert a new alternate with the specified class-id, cost and tag
|
||||
bool Insert(int class_id, int cost, void *tag = NULL);
|
||||
// returns the cost of a specific class ID
|
||||
inline int ClassCost(int class_id) const {
|
||||
if (class_id_cost_ == NULL ||
|
||||
class_id < 0 ||
|
||||
class_id >= char_set_->ClassCount()) {
|
||||
return WORST_COST;
|
||||
}
|
||||
return class_id_cost_[class_id];
|
||||
}
|
||||
// returns the alternate class-id corresponding to an alternate index
|
||||
inline int Alt(int alt_idx) const { return class_id_alt_[alt_idx]; }
|
||||
// set the cost of a certain alternate
|
||||
void SetAltCost(int alt_idx, int cost) {
|
||||
alt_cost_[alt_idx] = cost;
|
||||
class_id_cost_[class_id_alt_[alt_idx]] = cost;
|
||||
}
|
||||
|
||||
private:
|
||||
// character set object. Passed at construction time
|
||||
const CharSet *char_set_;
|
||||
// array of alternate class-ids
|
||||
int *class_id_alt_;
|
||||
// array of alternate costs
|
||||
int *class_id_cost_;
|
||||
// default max count of alternates
|
||||
static const int kMaxCharAlt = 256;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CHAR_ALT_LIST_H
|
@ -1,191 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: char_bigrams.cpp
|
||||
* Description: Implementation of a Character Bigrams Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <algorithm>
|
||||
#include <math.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "char_bigrams.h"
|
||||
#include "cube_utils.h"
|
||||
#include "ndminx.h"
|
||||
#include "cube_const.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
CharBigrams::CharBigrams() {
|
||||
memset(&bigram_table_, 0, sizeof(bigram_table_));
|
||||
}
|
||||
|
||||
CharBigrams::~CharBigrams() {
|
||||
if (bigram_table_.char_bigram != NULL) {
|
||||
for (int ch1 = 0; ch1 <= bigram_table_.max_char; ch1++) {
|
||||
CharBigram *char_bigram = bigram_table_.char_bigram + ch1;
|
||||
|
||||
if (char_bigram->bigram != NULL) {
|
||||
delete []char_bigram->bigram;
|
||||
}
|
||||
}
|
||||
delete []bigram_table_.char_bigram;
|
||||
}
|
||||
}
|
||||
|
||||
CharBigrams *CharBigrams::Create(const string &data_file_path,
|
||||
const string &lang) {
|
||||
string file_name;
|
||||
string str;
|
||||
|
||||
file_name = data_file_path + lang;
|
||||
file_name += ".cube.bigrams";
|
||||
|
||||
// load the string into memory
|
||||
if (!CubeUtils::ReadFileToString(file_name, &str)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// construct a new object
|
||||
CharBigrams *char_bigrams_obj = new CharBigrams();
|
||||
CharBigramTable *table = &char_bigrams_obj->bigram_table_;
|
||||
|
||||
table->total_cnt = 0;
|
||||
table->max_char = -1;
|
||||
table->char_bigram = NULL;
|
||||
|
||||
// split into lines
|
||||
vector<string> str_vec;
|
||||
CubeUtils::SplitStringUsing(str, "\r\n", &str_vec);
|
||||
|
||||
for (int big = 0; big < str_vec.size(); big++) {
|
||||
char_32 ch1;
|
||||
char_32 ch2;
|
||||
int cnt;
|
||||
if (sscanf(str_vec[big].c_str(), "%d %x %x", &cnt, &ch1, &ch2) != 3) {
|
||||
fprintf(stderr, "Cube ERROR (CharBigrams::Create): invalid format "
|
||||
"reading line: %s\n", str_vec[big].c_str());
|
||||
delete char_bigrams_obj;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// expand the bigram table
|
||||
if (ch1 > table->max_char) {
|
||||
CharBigram *char_bigram = new CharBigram[ch1 + 1];
|
||||
|
||||
if (table->char_bigram != NULL && table->max_char >= 0) {
|
||||
memcpy(char_bigram, table->char_bigram,
|
||||
(table->max_char + 1) * sizeof(*char_bigram));
|
||||
|
||||
delete []table->char_bigram;
|
||||
}
|
||||
table->char_bigram = char_bigram;
|
||||
|
||||
// init
|
||||
for (int new_big = table->max_char + 1; new_big <= ch1; new_big++) {
|
||||
table->char_bigram[new_big].total_cnt = 0;
|
||||
table->char_bigram[new_big].max_char = -1;
|
||||
table->char_bigram[new_big].bigram = NULL;
|
||||
}
|
||||
table->max_char = ch1;
|
||||
}
|
||||
|
||||
if (ch2 > table->char_bigram[ch1].max_char) {
|
||||
Bigram *bigram = new Bigram[ch2 + 1];
|
||||
|
||||
if (table->char_bigram[ch1].bigram != NULL &&
|
||||
table->char_bigram[ch1].max_char >= 0) {
|
||||
memcpy(bigram, table->char_bigram[ch1].bigram,
|
||||
(table->char_bigram[ch1].max_char + 1) * sizeof(*bigram));
|
||||
delete []table->char_bigram[ch1].bigram;
|
||||
}
|
||||
table->char_bigram[ch1].bigram = bigram;
|
||||
|
||||
// init
|
||||
for (int new_big = table->char_bigram[ch1].max_char + 1;
|
||||
new_big <= ch2; new_big++) {
|
||||
table->char_bigram[ch1].bigram[new_big].cnt = 0;
|
||||
}
|
||||
table->char_bigram[ch1].max_char = ch2;
|
||||
}
|
||||
|
||||
table->char_bigram[ch1].bigram[ch2].cnt = cnt;
|
||||
table->char_bigram[ch1].total_cnt += cnt;
|
||||
table->total_cnt += cnt;
|
||||
}
|
||||
|
||||
// compute costs (-log probs)
|
||||
table->worst_cost = static_cast<int>(
|
||||
-PROB2COST_SCALE * log(0.5 / table->total_cnt));
|
||||
for (char_32 ch1 = 0; ch1 <= table->max_char; ch1++) {
|
||||
for (char_32 ch2 = 0; ch2 <= table->char_bigram[ch1].max_char; ch2++) {
|
||||
int cnt = table->char_bigram[ch1].bigram[ch2].cnt;
|
||||
table->char_bigram[ch1].bigram[ch2].cost =
|
||||
static_cast<int>(-PROB2COST_SCALE *
|
||||
log(MAX(0.5, static_cast<double>(cnt)) /
|
||||
table->total_cnt));
|
||||
}
|
||||
}
|
||||
return char_bigrams_obj;
|
||||
}
|
||||
|
||||
int CharBigrams::PairCost(char_32 ch1, char_32 ch2) const {
|
||||
if (ch1 > bigram_table_.max_char) {
|
||||
return bigram_table_.worst_cost;
|
||||
}
|
||||
if (ch2 > bigram_table_.char_bigram[ch1].max_char) {
|
||||
return bigram_table_.worst_cost;
|
||||
}
|
||||
return bigram_table_.char_bigram[ch1].bigram[ch2].cost;
|
||||
}
|
||||
|
||||
int CharBigrams::Cost(const char_32 *char_32_ptr, CharSet *char_set) const {
|
||||
if (!char_32_ptr || char_32_ptr[0] == 0) {
|
||||
return bigram_table_.worst_cost;
|
||||
}
|
||||
int cost = MeanCostWithSpaces(char_32_ptr);
|
||||
if (CubeUtils::StrLen(char_32_ptr) >= kMinLengthCaseInvariant &&
|
||||
CubeUtils::IsCaseInvariant(char_32_ptr, char_set)) {
|
||||
char_32 *lower_32 = CubeUtils::ToLower(char_32_ptr, char_set);
|
||||
if (lower_32 && lower_32[0] != 0) {
|
||||
int cost_lower = MeanCostWithSpaces(lower_32);
|
||||
cost = MIN(cost, cost_lower);
|
||||
}
|
||||
delete [] lower_32;
|
||||
char_32 *upper_32 = CubeUtils::ToUpper(char_32_ptr, char_set);
|
||||
if (upper_32 && upper_32[0] != 0) {
|
||||
int cost_upper = MeanCostWithSpaces(upper_32);
|
||||
cost = MIN(cost, cost_upper);
|
||||
}
|
||||
delete [] upper_32;
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
int CharBigrams::MeanCostWithSpaces(const char_32 *char_32_ptr) const {
|
||||
if (!char_32_ptr)
|
||||
return bigram_table_.worst_cost;
|
||||
int len = CubeUtils::StrLen(char_32_ptr);
|
||||
int cost = 0;
|
||||
int c = 0;
|
||||
cost = PairCost(' ', char_32_ptr[0]);
|
||||
for (c = 1; c < len; c++) {
|
||||
cost += PairCost(char_32_ptr[c - 1], char_32_ptr[c]);
|
||||
}
|
||||
cost += PairCost(char_32_ptr[len - 1], ' ');
|
||||
return static_cast<int>(cost / static_cast<double>(len + 1));
|
||||
}
|
||||
} // namespace tesseract
|
@ -1,89 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: char_bigrams.h
|
||||
* Description: Declaration of a Character Bigrams Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CharBigram class represents the interface to the character bigram
|
||||
// table used by Cube
|
||||
// A CharBigram object can be constructed from the Char Bigrams file
|
||||
// Given a sequence of characters, the "Cost" method returns the Char Bigram
|
||||
// cost of the string according to the table
|
||||
|
||||
#ifndef CHAR_BIGRAMS_H
|
||||
#define CHAR_BIGRAMS_H
|
||||
|
||||
#include <string>
|
||||
#include "char_set.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// structure representing a single bigram value
|
||||
struct Bigram {
|
||||
int cnt;
|
||||
int cost;
|
||||
};
|
||||
|
||||
// structure representing the char bigram array of characters
|
||||
// following a specific character
|
||||
struct CharBigram {
|
||||
int total_cnt;
|
||||
char_32 max_char;
|
||||
Bigram *bigram;
|
||||
};
|
||||
|
||||
// structure representing the whole bigram table
|
||||
struct CharBigramTable {
|
||||
int total_cnt;
|
||||
int worst_cost;
|
||||
char_32 max_char;
|
||||
CharBigram *char_bigram;
|
||||
};
|
||||
|
||||
class CharBigrams {
|
||||
public:
|
||||
CharBigrams();
|
||||
~CharBigrams();
|
||||
// Construct the CharBigrams class from a file
|
||||
static CharBigrams *Create(const string &data_file_path,
|
||||
const string &lang);
|
||||
// Top-level function to return the mean character bigram cost of a
|
||||
// sequence of characters. If char_set is not NULL, use
|
||||
// tesseract functions to return a case-invariant cost.
|
||||
// This avoids unnecessarily penalizing all-one-case words or
|
||||
// capitalized words (first-letter upper-case and remaining letters
|
||||
// lower-case).
|
||||
int Cost(const char_32 *str, CharSet *char_set) const;
|
||||
|
||||
protected:
|
||||
// Returns the character bigram cost of two characters.
|
||||
int PairCost(char_32 ch1, char_32 ch2) const;
|
||||
// Returns the mean character bigram cost of a sequence of
|
||||
// characters. Adds a space at the beginning and end to account for
|
||||
// cost of starting and ending characters.
|
||||
int MeanCostWithSpaces(const char_32 *char_32_ptr) const;
|
||||
|
||||
private:
|
||||
// Only words this length or greater qualify for case-invariant character
|
||||
// bigram cost.
|
||||
static const int kMinLengthCaseInvariant = 4;
|
||||
|
||||
|
||||
CharBigramTable bigram_table_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CHAR_BIGRAMS_H
|
@ -1,640 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: char_samp.cpp
|
||||
* Description: Implementation of a Character Bitmap Sample Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include "char_samp.h"
|
||||
#include "cube_utils.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
#define MAX_LINE_LEN 1024
|
||||
|
||||
CharSamp::CharSamp()
|
||||
: Bmp8(0, 0) {
|
||||
left_ = 0;
|
||||
top_ = 0;
|
||||
label32_ = NULL;
|
||||
page_ = -1;
|
||||
}
|
||||
|
||||
CharSamp::CharSamp(int wid, int hgt)
|
||||
: Bmp8(wid, hgt) {
|
||||
left_ = 0;
|
||||
top_ = 0;
|
||||
label32_ = NULL;
|
||||
page_ = -1;
|
||||
}
|
||||
|
||||
CharSamp::CharSamp(int left, int top, int wid, int hgt)
|
||||
: Bmp8(wid, hgt)
|
||||
, left_(left)
|
||||
, top_(top) {
|
||||
label32_ = NULL;
|
||||
page_ = -1;
|
||||
}
|
||||
|
||||
CharSamp::~CharSamp() {
|
||||
if (label32_ != NULL) {
|
||||
delete []label32_;
|
||||
label32_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// returns a UTF-8 version of the string label
|
||||
string CharSamp::stringLabel() const {
|
||||
string str = "";
|
||||
if (label32_ != NULL) {
|
||||
string_32 str32(label32_);
|
||||
CubeUtils::UTF32ToUTF8(str32.c_str(), &str);
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
// set a the string label using a UTF encoded string
|
||||
void CharSamp::SetLabel(string str) {
|
||||
if (label32_ != NULL) {
|
||||
delete []label32_;
|
||||
label32_ = NULL;
|
||||
}
|
||||
string_32 str32;
|
||||
CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
|
||||
SetLabel(reinterpret_cast<const char_32 *>(str32.c_str()));
|
||||
}
|
||||
|
||||
// creates a CharSamp object from file
|
||||
CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) {
|
||||
unsigned short left;
|
||||
unsigned short top;
|
||||
unsigned short page;
|
||||
unsigned short first_char;
|
||||
unsigned short last_char;
|
||||
unsigned short norm_top;
|
||||
unsigned short norm_bottom;
|
||||
unsigned short norm_aspect_ratio;
|
||||
unsigned int val32;
|
||||
|
||||
char_32 *label32;
|
||||
|
||||
// read and check 32 bit marker
|
||||
if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
|
||||
return NULL;
|
||||
}
|
||||
if (val32 != 0xabd0fefe) {
|
||||
return NULL;
|
||||
}
|
||||
// read label length,
|
||||
if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
|
||||
return NULL;
|
||||
}
|
||||
// the label is not null terminated in the file
|
||||
if (val32 > 0 && val32 < MAX_UINT32) {
|
||||
label32 = new char_32[val32 + 1];
|
||||
// read label
|
||||
if (fp->Read(label32, val32 * sizeof(*label32)) !=
|
||||
(val32 * sizeof(*label32))) {
|
||||
delete [] label32;
|
||||
return NULL;
|
||||
}
|
||||
// null terminate
|
||||
label32[val32] = 0;
|
||||
} else {
|
||||
label32 = NULL;
|
||||
}
|
||||
// read coordinates
|
||||
if (fp->Read(&page, sizeof(page)) != sizeof(page)) {
|
||||
delete [] label32;
|
||||
return NULL;
|
||||
}
|
||||
if (fp->Read(&left, sizeof(left)) != sizeof(left)) {
|
||||
delete [] label32;
|
||||
return NULL;
|
||||
}
|
||||
if (fp->Read(&top, sizeof(top)) != sizeof(top)) {
|
||||
delete [] label32;
|
||||
return NULL;
|
||||
}
|
||||
if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) {
|
||||
delete [] label32;
|
||||
return NULL;
|
||||
}
|
||||
if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) {
|
||||
delete [] label32;
|
||||
return NULL;
|
||||
}
|
||||
if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) {
|
||||
delete [] label32;
|
||||
return NULL;
|
||||
}
|
||||
if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) {
|
||||
delete [] label32;
|
||||
return NULL;
|
||||
}
|
||||
if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) !=
|
||||
sizeof(norm_aspect_ratio)) {
|
||||
delete [] label32;
|
||||
return NULL;
|
||||
}
|
||||
// create the object
|
||||
CharSamp *char_samp = new CharSamp();
|
||||
// init
|
||||
char_samp->label32_ = label32;
|
||||
char_samp->page_ = page;
|
||||
char_samp->left_ = left;
|
||||
char_samp->top_ = top;
|
||||
char_samp->first_char_ = first_char;
|
||||
char_samp->last_char_ = last_char;
|
||||
char_samp->norm_top_ = norm_top;
|
||||
char_samp->norm_bottom_ = norm_bottom;
|
||||
char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
|
||||
// load the Bmp8 part
|
||||
if (char_samp->LoadFromCharDumpFile(fp) == false) {
|
||||
delete char_samp;
|
||||
return NULL;
|
||||
}
|
||||
return char_samp;
|
||||
}
|
||||
|
||||
// Load a Char Samp from a dump file
|
||||
CharSamp *CharSamp::FromCharDumpFile(FILE *fp) {
|
||||
unsigned short left;
|
||||
unsigned short top;
|
||||
unsigned short page;
|
||||
unsigned short first_char;
|
||||
unsigned short last_char;
|
||||
unsigned short norm_top;
|
||||
unsigned short norm_bottom;
|
||||
unsigned short norm_aspect_ratio;
|
||||
unsigned int val32;
|
||||
char_32 *label32;
|
||||
|
||||
// read and check 32 bit marker
|
||||
if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
|
||||
return NULL;
|
||||
}
|
||||
if (val32 != 0xabd0fefe) {
|
||||
return NULL;
|
||||
}
|
||||
// read label length,
|
||||
if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
|
||||
return NULL;
|
||||
}
|
||||
// the label is not null terminated in the file
|
||||
if (val32 > 0 && val32 < MAX_UINT32) {
|
||||
label32 = new char_32[val32 + 1];
|
||||
// read label
|
||||
if (fread(label32, 1, val32 * sizeof(*label32), fp) !=
|
||||
(val32 * sizeof(*label32))) {
|
||||
delete [] label32;
|
||||
return NULL;
|
||||
}
|
||||
// null terminate
|
||||
label32[val32] = 0;
|
||||
} else {
|
||||
label32 = NULL;
|
||||
}
|
||||
// read coordinates
|
||||
if (fread(&page, 1, sizeof(page), fp) != sizeof(page) ||
|
||||
fread(&left, 1, sizeof(left), fp) != sizeof(left) ||
|
||||
fread(&top, 1, sizeof(top), fp) != sizeof(top) ||
|
||||
fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char) ||
|
||||
fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char) ||
|
||||
fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top) ||
|
||||
fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom) ||
|
||||
fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) !=
|
||||
sizeof(norm_aspect_ratio)) {
|
||||
delete [] label32;
|
||||
return NULL;
|
||||
}
|
||||
// create the object
|
||||
CharSamp *char_samp = new CharSamp();
|
||||
// init
|
||||
char_samp->label32_ = label32;
|
||||
char_samp->page_ = page;
|
||||
char_samp->left_ = left;
|
||||
char_samp->top_ = top;
|
||||
char_samp->first_char_ = first_char;
|
||||
char_samp->last_char_ = last_char;
|
||||
char_samp->norm_top_ = norm_top;
|
||||
char_samp->norm_bottom_ = norm_bottom;
|
||||
char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
|
||||
// load the Bmp8 part
|
||||
if (char_samp->LoadFromCharDumpFile(fp) == false) {
|
||||
delete char_samp; // It owns label32.
|
||||
return NULL;
|
||||
}
|
||||
return char_samp;
|
||||
}
|
||||
|
||||
// returns a copy of the charsamp that is scaled to the
|
||||
// specified width and height
|
||||
CharSamp *CharSamp::Scale(int wid, int hgt, bool isotropic) {
|
||||
CharSamp *scaled_samp = new CharSamp(wid, hgt);
|
||||
if (scaled_samp->ScaleFrom(this, isotropic) == false) {
|
||||
delete scaled_samp;
|
||||
return NULL;
|
||||
}
|
||||
scaled_samp->left_ = left_;
|
||||
scaled_samp->top_ = top_;
|
||||
scaled_samp->page_ = page_;
|
||||
scaled_samp->SetLabel(label32_);
|
||||
scaled_samp->first_char_ = first_char_;
|
||||
scaled_samp->last_char_ = last_char_;
|
||||
scaled_samp->norm_top_ = norm_top_;
|
||||
scaled_samp->norm_bottom_ = norm_bottom_;
|
||||
scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_;
|
||||
return scaled_samp;
|
||||
}
|
||||
|
||||
// Load a Char Samp from a dump file
|
||||
CharSamp *CharSamp::FromRawData(int left, int top, int wid, int hgt,
|
||||
unsigned char *data) {
|
||||
// create the object
|
||||
CharSamp *char_samp = new CharSamp(left, top, wid, hgt);
|
||||
if (char_samp->LoadFromRawData(data) == false) {
|
||||
delete char_samp;
|
||||
return NULL;
|
||||
}
|
||||
return char_samp;
|
||||
}
|
||||
|
||||
// Saves the charsamp to a dump file
|
||||
bool CharSamp::Save2CharDumpFile(FILE *fp) const {
|
||||
unsigned int val32;
|
||||
// write and check 32 bit marker
|
||||
val32 = 0xabd0fefe;
|
||||
if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
|
||||
return false;
|
||||
}
|
||||
// write label length
|
||||
val32 = (label32_ == NULL) ? 0 : LabelLen(label32_);
|
||||
if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
|
||||
return false;
|
||||
}
|
||||
// write label
|
||||
if (label32_ != NULL) {
|
||||
if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) !=
|
||||
(val32 * sizeof(*label32_))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// write coordinates
|
||||
if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) {
|
||||
return false;
|
||||
}
|
||||
if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) {
|
||||
return false;
|
||||
}
|
||||
if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) {
|
||||
return false;
|
||||
}
|
||||
if (fwrite(&first_char_, 1, sizeof(first_char_), fp) !=
|
||||
sizeof(first_char_)) {
|
||||
return false;
|
||||
}
|
||||
if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) {
|
||||
return false;
|
||||
}
|
||||
if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) {
|
||||
return false;
|
||||
}
|
||||
if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) !=
|
||||
sizeof(norm_bottom_)) {
|
||||
return false;
|
||||
}
|
||||
if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) !=
|
||||
sizeof(norm_aspect_ratio_)) {
|
||||
return false;
|
||||
}
|
||||
if (SaveBmp2CharDumpFile(fp) == false) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Crop the char samp such that there are no white spaces on any side.
|
||||
// The norm_top_ and norm_bottom_ fields are the character top/bottom
|
||||
// with respect to whatever context the character is being recognized
|
||||
// in (e.g. word bounding box) normalized to a standard size of
|
||||
// 255. Here they default to 0 and 255 (word box boundaries), but
|
||||
// since they are context dependent, they may need to be reset by the
|
||||
// calling function.
|
||||
CharSamp *CharSamp::Crop() {
|
||||
// get the dimesions of the cropped img
|
||||
int cropped_left = 0;
|
||||
int cropped_top = 0;
|
||||
int cropped_wid = wid_;
|
||||
int cropped_hgt = hgt_;
|
||||
Bmp8::Crop(&cropped_left, &cropped_top,
|
||||
&cropped_wid, &cropped_hgt);
|
||||
|
||||
if (cropped_wid == 0 || cropped_hgt == 0) {
|
||||
return NULL;
|
||||
}
|
||||
// create the cropped char samp
|
||||
CharSamp *cropped_samp = new CharSamp(left_ + cropped_left,
|
||||
top_ + cropped_top,
|
||||
cropped_wid, cropped_hgt);
|
||||
cropped_samp->SetLabel(label32_);
|
||||
cropped_samp->SetFirstChar(first_char_);
|
||||
cropped_samp->SetLastChar(last_char_);
|
||||
// the following 3 fields may/should be reset by the calling function
|
||||
// using context information, i.e., location of character box
|
||||
// w.r.t. the word bounding box
|
||||
cropped_samp->SetNormAspectRatio(255 *
|
||||
cropped_wid / (cropped_wid + cropped_hgt));
|
||||
cropped_samp->SetNormTop(0);
|
||||
cropped_samp->SetNormBottom(255);
|
||||
|
||||
// copy the bitmap to the cropped img
|
||||
Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp);
|
||||
return cropped_samp;
|
||||
}
|
||||
|
||||
// segment the char samp to connected components
|
||||
// based on contiguity and vertical pixel density histogram
|
||||
ConComp **CharSamp::Segment(int *segment_cnt, bool right_2_left,
|
||||
int max_hist_wnd, int min_con_comp_size) const {
|
||||
// init
|
||||
(*segment_cnt) = 0;
|
||||
int concomp_cnt = 0;
|
||||
int seg_cnt = 0;
|
||||
// find the concomps of the image
|
||||
ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size);
|
||||
if (concomp_cnt <= 0 || !concomp_array) {
|
||||
if (concomp_array)
|
||||
delete []concomp_array;
|
||||
return NULL;
|
||||
}
|
||||
ConComp **seg_array = NULL;
|
||||
// segment each concomp further using vertical histogram
|
||||
for (int concomp = 0; concomp < concomp_cnt; concomp++) {
|
||||
int concomp_seg_cnt = 0;
|
||||
// segment the concomp
|
||||
ConComp **concomp_seg_array = NULL;
|
||||
ConComp **concomp_alloc_seg =
|
||||
concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt);
|
||||
// no segments, add the whole concomp
|
||||
if (concomp_alloc_seg == NULL) {
|
||||
concomp_seg_cnt = 1;
|
||||
concomp_seg_array = concomp_array + concomp;
|
||||
} else {
|
||||
// delete the original concomp, we no longer need it
|
||||
concomp_seg_array = concomp_alloc_seg;
|
||||
delete concomp_array[concomp];
|
||||
}
|
||||
// add the resulting segments
|
||||
for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) {
|
||||
// too small of a segment: ignore
|
||||
if (concomp_seg_array[seg_idx]->Width() < 2 &&
|
||||
concomp_seg_array[seg_idx]->Height() < 2) {
|
||||
delete concomp_seg_array[seg_idx];
|
||||
} else {
|
||||
// add the new segment
|
||||
// extend the segment array
|
||||
if ((seg_cnt % kConCompAllocChunk) == 0) {
|
||||
ConComp **temp_segm_array =
|
||||
new ConComp *[seg_cnt + kConCompAllocChunk];
|
||||
if (seg_cnt > 0) {
|
||||
memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array));
|
||||
delete []seg_array;
|
||||
}
|
||||
seg_array = temp_segm_array;
|
||||
}
|
||||
seg_array[seg_cnt++] = concomp_seg_array[seg_idx];
|
||||
}
|
||||
} // segment
|
||||
if (concomp_alloc_seg != NULL) {
|
||||
delete []concomp_alloc_seg;
|
||||
}
|
||||
} // concomp
|
||||
delete []concomp_array;
|
||||
|
||||
// sort the concomps from Left2Right or Right2Left, based on the reading order
|
||||
if (seg_cnt > 0 && seg_array != NULL) {
|
||||
qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ?
|
||||
ConComp::Right2LeftComparer : ConComp::Left2RightComparer);
|
||||
}
|
||||
(*segment_cnt) = seg_cnt;
|
||||
return seg_array;
|
||||
}
|
||||
|
||||
// builds a char samp from a set of connected components
|
||||
CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp,
|
||||
int seg_flags_size, int *seg_flags,
|
||||
bool *left_most, bool *right_most,
|
||||
int word_hgt) {
|
||||
int concomp;
|
||||
int end_concomp;
|
||||
int concomp_cnt = 0;
|
||||
end_concomp = strt_concomp + seg_flags_size;
|
||||
// determine ID range
|
||||
bool once = false;
|
||||
int min_id = -1;
|
||||
int max_id = -1;
|
||||
for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
|
||||
if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
|
||||
if (!once) {
|
||||
min_id = concomp_array[concomp]->ID();
|
||||
max_id = concomp_array[concomp]->ID();
|
||||
once = true;
|
||||
} else {
|
||||
UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id);
|
||||
}
|
||||
concomp_cnt++;
|
||||
}
|
||||
}
|
||||
if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) {
|
||||
return NULL;
|
||||
}
|
||||
// alloc memo for computing leftmost and right most attributes
|
||||
int id_cnt = max_id - min_id + 1;
|
||||
bool *id_exist = new bool[id_cnt];
|
||||
bool *left_most_exist = new bool[id_cnt];
|
||||
bool *right_most_exist = new bool[id_cnt];
|
||||
memset(id_exist, 0, id_cnt * sizeof(*id_exist));
|
||||
memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist));
|
||||
memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist));
|
||||
// find the dimensions of the charsamp
|
||||
once = false;
|
||||
int left = -1;
|
||||
int right = -1;
|
||||
int top = -1;
|
||||
int bottom = -1;
|
||||
int unq_ids = 0;
|
||||
int unq_left_most = 0;
|
||||
int unq_right_most = 0;
|
||||
for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
|
||||
if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
|
||||
if (!once) {
|
||||
left = concomp_array[concomp]->Left();
|
||||
right = concomp_array[concomp]->Right();
|
||||
top = concomp_array[concomp]->Top();
|
||||
bottom = concomp_array[concomp]->Bottom();
|
||||
once = true;
|
||||
} else {
|
||||
UpdateRange(concomp_array[concomp]->Left(),
|
||||
concomp_array[concomp]->Right(), &left, &right);
|
||||
UpdateRange(concomp_array[concomp]->Top(),
|
||||
concomp_array[concomp]->Bottom(), &top, &bottom);
|
||||
}
|
||||
// count unq ids, unq left most and right mosts ids
|
||||
int concomp_id = concomp_array[concomp]->ID() - min_id;
|
||||
if (!id_exist[concomp_id]) {
|
||||
id_exist[concomp_id] = true;
|
||||
unq_ids++;
|
||||
}
|
||||
if (concomp_array[concomp]->LeftMost()) {
|
||||
if (left_most_exist[concomp_id] == false) {
|
||||
left_most_exist[concomp_id] = true;
|
||||
unq_left_most++;
|
||||
}
|
||||
}
|
||||
if (concomp_array[concomp]->RightMost()) {
|
||||
if (right_most_exist[concomp_id] == false) {
|
||||
right_most_exist[concomp_id] = true;
|
||||
unq_right_most++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
delete []id_exist;
|
||||
delete []left_most_exist;
|
||||
delete []right_most_exist;
|
||||
if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) {
|
||||
return NULL;
|
||||
}
|
||||
(*left_most) = (unq_left_most >= unq_ids);
|
||||
(*right_most) = (unq_right_most >= unq_ids);
|
||||
// create the char sample object
|
||||
CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1);
|
||||
|
||||
// set the foreground pixels
|
||||
for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
|
||||
if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
|
||||
ConCompPt *pt_ptr = concomp_array[concomp]->Head();
|
||||
while (pt_ptr) {
|
||||
samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0;
|
||||
pt_ptr = pt_ptr->Next();
|
||||
}
|
||||
}
|
||||
}
|
||||
return samp;
|
||||
}
|
||||
|
||||
// clones the object
|
||||
CharSamp *CharSamp::Clone() const {
|
||||
// create the cropped char samp
|
||||
CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_);
|
||||
samp->SetLabel(label32_);
|
||||
samp->SetFirstChar(first_char_);
|
||||
samp->SetLastChar(last_char_);
|
||||
samp->SetNormTop(norm_top_);
|
||||
samp->SetNormBottom(norm_bottom_);
|
||||
samp->SetNormAspectRatio(norm_aspect_ratio_);
|
||||
// copy the bitmap to the cropped img
|
||||
Copy(0, 0, wid_, hgt_, samp);
|
||||
return samp;
|
||||
}
|
||||
|
||||
// Load a Char Samp from a dump file
|
||||
CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) {
|
||||
unsigned int val32;
|
||||
char_32 *label32;
|
||||
unsigned char *raw_data = *raw_data_ptr;
|
||||
|
||||
// read and check 32 bit marker
|
||||
memcpy(&val32, raw_data, sizeof(val32));
|
||||
raw_data += sizeof(val32);
|
||||
if (val32 != 0xabd0fefe) {
|
||||
return NULL;
|
||||
}
|
||||
// read label length,
|
||||
memcpy(&val32, raw_data, sizeof(val32));
|
||||
raw_data += sizeof(val32);
|
||||
// the label is not null terminated in the file
|
||||
if (val32 > 0 && val32 < MAX_UINT32) {
|
||||
label32 = new char_32[val32 + 1];
|
||||
// read label
|
||||
memcpy(label32, raw_data, val32 * sizeof(*label32));
|
||||
raw_data += (val32 * sizeof(*label32));
|
||||
// null terminate
|
||||
label32[val32] = 0;
|
||||
} else {
|
||||
label32 = NULL;
|
||||
}
|
||||
|
||||
// create the object
|
||||
CharSamp *char_samp = new CharSamp();
|
||||
|
||||
// read coordinates
|
||||
char_samp->label32_ = label32;
|
||||
memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_));
|
||||
raw_data += sizeof(char_samp->page_);
|
||||
memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_));
|
||||
raw_data += sizeof(char_samp->left_);
|
||||
memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_));
|
||||
raw_data += sizeof(char_samp->top_);
|
||||
memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_));
|
||||
raw_data += sizeof(char_samp->first_char_);
|
||||
memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_));
|
||||
raw_data += sizeof(char_samp->last_char_);
|
||||
memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_));
|
||||
raw_data += sizeof(char_samp->norm_top_);
|
||||
memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_));
|
||||
raw_data += sizeof(char_samp->norm_bottom_);
|
||||
memcpy(&char_samp->norm_aspect_ratio_, raw_data,
|
||||
sizeof(char_samp->norm_aspect_ratio_));
|
||||
raw_data += sizeof(char_samp->norm_aspect_ratio_);
|
||||
|
||||
// load the Bmp8 part
|
||||
if (char_samp->LoadFromCharDumpFile(&raw_data) == false) {
|
||||
delete char_samp;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
(*raw_data_ptr) = raw_data;
|
||||
return char_samp;
|
||||
}
|
||||
|
||||
// computes the features corresponding to the char sample
|
||||
bool CharSamp::ComputeFeatures(int conv_grid_size, float *features) {
|
||||
// Create a scaled BMP
|
||||
CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size);
|
||||
if (!scaled_bmp) {
|
||||
return false;
|
||||
}
|
||||
// prepare input
|
||||
unsigned char *buff = scaled_bmp->RawData();
|
||||
// bitmap features
|
||||
int input;
|
||||
int bmp_size = conv_grid_size * conv_grid_size;
|
||||
for (input = 0; input < bmp_size; input++) {
|
||||
features[input] = 255.0f - (1.0f * buff[input]);
|
||||
}
|
||||
// word context features
|
||||
features[input++] = FirstChar();
|
||||
features[input++] = LastChar();
|
||||
features[input++] = NormTop();
|
||||
features[input++] = NormBottom();
|
||||
features[input++] = NormAspectRatio();
|
||||
delete scaled_bmp;
|
||||
return true;
|
||||
}
|
||||
} // namespace tesseract
|
158
cube/char_samp.h
158
cube/char_samp.h
@ -1,158 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: char_samp.h
|
||||
* Description: Declaration of a Character Bitmap Sample Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CharSamp inherits the Bmp8 class that represents images of
|
||||
// words, characters and segments throughout Cube
|
||||
// CharSamp adds more data members to hold the physical location of the image
|
||||
// in a page, page number in a book if available.
|
||||
// It also holds the label (GT) of the image that might correspond to a single
|
||||
// character or a word
|
||||
// It also provides methods for segmenting, scaling and cropping of the sample
|
||||
|
||||
#ifndef CHAR_SAMP_H
|
||||
#define CHAR_SAMP_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include "bmp_8.h"
|
||||
#include "string_32.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class CharSamp : public Bmp8 {
|
||||
public:
|
||||
CharSamp();
|
||||
CharSamp(int wid, int hgt);
|
||||
CharSamp(int left, int top, int wid, int hgt);
|
||||
~CharSamp();
|
||||
// accessor methods
|
||||
unsigned short Left() const { return left_; }
|
||||
unsigned short Right() const { return left_ + wid_; }
|
||||
unsigned short Top() const { return top_; }
|
||||
unsigned short Bottom() const { return top_ + hgt_; }
|
||||
unsigned short Page() const { return page_; }
|
||||
unsigned short NormTop() const { return norm_top_; }
|
||||
unsigned short NormBottom() const { return norm_bottom_; }
|
||||
unsigned short NormAspectRatio() const { return norm_aspect_ratio_; }
|
||||
unsigned short FirstChar() const { return first_char_; }
|
||||
unsigned short LastChar() const { return last_char_; }
|
||||
char_32 Label() const {
|
||||
if (label32_ == NULL || LabelLen() != 1) {
|
||||
return 0;
|
||||
}
|
||||
return label32_[0];
|
||||
}
|
||||
char_32 * StrLabel() const { return label32_; }
|
||||
string stringLabel() const;
|
||||
|
||||
void SetLeft(unsigned short left) { left_ = left; }
|
||||
void SetTop(unsigned short top) { top_ = top; }
|
||||
void SetPage(unsigned short page) { page_ = page; }
|
||||
void SetLabel(char_32 label) {
|
||||
delete []label32_;
|
||||
label32_ = new char_32[2];
|
||||
label32_[0] = label;
|
||||
label32_[1] = 0;
|
||||
}
|
||||
void SetLabel(const char_32 *label32) {
|
||||
delete []label32_;
|
||||
label32_ = NULL;
|
||||
if (label32 != NULL) {
|
||||
// remove any byte order marks if any
|
||||
if (label32[0] == 0xfeff) {
|
||||
label32++;
|
||||
}
|
||||
int len = LabelLen(label32);
|
||||
label32_ = new char_32[len + 1];
|
||||
memcpy(label32_, label32, len * sizeof(*label32));
|
||||
label32_[len] = 0;
|
||||
}
|
||||
}
|
||||
void SetLabel(string str);
|
||||
void SetNormTop(unsigned short norm_top) { norm_top_ = norm_top; }
|
||||
void SetNormBottom(unsigned short norm_bottom) {
|
||||
norm_bottom_ = norm_bottom;
|
||||
}
|
||||
void SetNormAspectRatio(unsigned short norm_aspect_ratio) {
|
||||
norm_aspect_ratio_ = norm_aspect_ratio;
|
||||
}
|
||||
void SetFirstChar(unsigned short first_char) {
|
||||
first_char_ = first_char;
|
||||
}
|
||||
void SetLastChar(unsigned short last_char) {
|
||||
last_char_ = last_char;
|
||||
}
|
||||
|
||||
// Saves the charsamp to a dump file
|
||||
bool Save2CharDumpFile(FILE *fp) const;
|
||||
// Crops the underlying image and returns a new CharSamp with the
|
||||
// same character information but new dimensions. Warning: does not
|
||||
// necessarily set the normalized top and bottom correctly since
|
||||
// those depend on its location within the word (or CubeSearchObject).
|
||||
CharSamp *Crop();
|
||||
// Computes the connected components of the char sample
|
||||
ConComp **Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd,
|
||||
int min_con_comp_size) const;
|
||||
// returns a copy of the charsamp that is scaled to the
|
||||
// specified width and height
|
||||
CharSamp *Scale(int wid, int hgt, bool isotropic = true);
|
||||
// returns a Clone of the charsample
|
||||
CharSamp *Clone() const;
|
||||
// computes the features corresponding to the char sample
|
||||
bool ComputeFeatures(int conv_grid_size, float *features);
|
||||
// Load a Char Samp from a dump file
|
||||
static CharSamp *FromCharDumpFile(CachedFile *fp);
|
||||
static CharSamp *FromCharDumpFile(FILE *fp);
|
||||
static CharSamp *FromCharDumpFile(unsigned char **raw_data);
|
||||
static CharSamp *FromRawData(int left, int top, int wid, int hgt,
|
||||
unsigned char *data);
|
||||
static CharSamp *FromConComps(ConComp **concomp_array,
|
||||
int strt_concomp, int seg_flags_size,
|
||||
int *seg_flags, bool *left_most,
|
||||
bool *right_most, int word_hgt);
|
||||
static int AuxFeatureCnt() { return (5); }
|
||||
// Return the length of the label string
|
||||
int LabelLen() const { return LabelLen(label32_); }
|
||||
static int LabelLen(const char_32 *label32) {
|
||||
if (label32 == NULL) {
|
||||
return 0;
|
||||
}
|
||||
int len = 0;
|
||||
while (label32[++len] != 0);
|
||||
return len;
|
||||
}
|
||||
private:
|
||||
char_32 * label32_;
|
||||
unsigned short page_;
|
||||
unsigned short left_;
|
||||
unsigned short top_;
|
||||
// top of sample normalized to a word height of 255
|
||||
unsigned short norm_top_;
|
||||
// bottom of sample normalized to a word height of 255
|
||||
unsigned short norm_bottom_;
|
||||
// 255 * ratio of character width to (width + height)
|
||||
unsigned short norm_aspect_ratio_;
|
||||
unsigned short first_char_;
|
||||
unsigned short last_char_;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // CHAR_SAMP_H
|
@ -1,30 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: char_samp_enum.cpp
|
||||
* Description: Implementation of a Character Sample Enumerator Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "char_samp_enum.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
CharSampEnum::CharSampEnum() {
|
||||
}
|
||||
|
||||
CharSampEnum::~CharSampEnum() {
|
||||
}
|
||||
|
||||
} // namespace ocrlib
|
@ -1,38 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: char_samp_enum.h
|
||||
* Description: Declaration of a Character Sample Enumerator Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CharSampEnum class provides the base class for CharSamp class
|
||||
// Enumerators. This is typically used to implement dump file readers
|
||||
|
||||
#ifndef CHARSAMP_ENUM_H
|
||||
#define CHARSAMP_ENUM_H
|
||||
|
||||
#include "char_samp.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class CharSampEnum {
|
||||
public:
|
||||
CharSampEnum();
|
||||
virtual ~CharSampEnum();
|
||||
virtual bool EnumCharSamp(CharSamp *char_samp, float progress) = 0;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CHARSAMP_ENUM_H
|
@ -1,170 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: char_samp_enum.cpp
|
||||
* Description: Implementation of a Character Sample Set Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include "char_samp_set.h"
|
||||
#include "cached_file.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
CharSampSet::CharSampSet() {
|
||||
cnt_ = 0;
|
||||
samp_buff_ = NULL;
|
||||
own_samples_ = false;
|
||||
}
|
||||
|
||||
CharSampSet::~CharSampSet() {
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
// free buffers and init vars
|
||||
void CharSampSet::Cleanup() {
|
||||
if (samp_buff_ != NULL) {
|
||||
// only free samples if owned by class
|
||||
if (own_samples_ == true) {
|
||||
for (int samp_idx = 0; samp_idx < cnt_; samp_idx++) {
|
||||
delete samp_buff_[samp_idx];
|
||||
}
|
||||
}
|
||||
delete []samp_buff_;
|
||||
}
|
||||
cnt_ = 0;
|
||||
samp_buff_ = NULL;
|
||||
}
|
||||
|
||||
// add a new sample
|
||||
bool CharSampSet::Add(CharSamp *char_samp) {
|
||||
if ((cnt_ % SAMP_ALLOC_BLOCK) == 0) {
|
||||
// create an extended buffer
|
||||
CharSamp **new_samp_buff =
|
||||
reinterpret_cast<CharSamp **>(new CharSamp *[cnt_ + SAMP_ALLOC_BLOCK]);
|
||||
// copy old contents
|
||||
if (cnt_ > 0) {
|
||||
memcpy(new_samp_buff, samp_buff_, cnt_ * sizeof(*samp_buff_));
|
||||
delete []samp_buff_;
|
||||
}
|
||||
samp_buff_ = new_samp_buff;
|
||||
}
|
||||
samp_buff_[cnt_++] = char_samp;
|
||||
return true;
|
||||
}
|
||||
|
||||
// load char samples from file
|
||||
bool CharSampSet::LoadCharSamples(FILE *fp) {
|
||||
// free existing
|
||||
Cleanup();
|
||||
// samples are created here and owned by the class
|
||||
own_samples_ = true;
|
||||
// start loading char samples
|
||||
while (feof(fp) == 0) {
|
||||
CharSamp *new_samp = CharSamp::FromCharDumpFile(fp);
|
||||
if (new_samp != NULL) {
|
||||
if (Add(new_samp) == false) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// creates a CharSampSet object from file
|
||||
CharSampSet * CharSampSet::FromCharDumpFile(string file_name) {
|
||||
FILE *fp;
|
||||
unsigned int val32;
|
||||
// open the file
|
||||
fp = fopen(file_name.c_str(), "rb");
|
||||
if (fp == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
// read and verify marker
|
||||
if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
|
||||
fclose(fp);
|
||||
return NULL;
|
||||
}
|
||||
if (val32 != 0xfefeabd0) {
|
||||
fclose(fp);
|
||||
return NULL;
|
||||
}
|
||||
// create an object
|
||||
CharSampSet *samp_set = new CharSampSet();
|
||||
if (samp_set->LoadCharSamples(fp) == false) {
|
||||
delete samp_set;
|
||||
samp_set = NULL;
|
||||
}
|
||||
fclose(fp);
|
||||
return samp_set;
|
||||
}
|
||||
|
||||
// Create a new Char Dump file
|
||||
FILE *CharSampSet::CreateCharDumpFile(string file_name) {
|
||||
FILE *fp;
|
||||
unsigned int val32;
|
||||
// create the file
|
||||
fp = fopen(file_name.c_str(), "wb");
|
||||
if (!fp) {
|
||||
return NULL;
|
||||
}
|
||||
// read and verify marker
|
||||
val32 = 0xfefeabd0;
|
||||
if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
|
||||
fclose(fp);
|
||||
return NULL;
|
||||
}
|
||||
return fp;
|
||||
}
|
||||
|
||||
// Enumerate the Samples in the set one-by-one calling the enumertor's
|
||||
// EnumCharSamp method for each sample
|
||||
bool CharSampSet::EnumSamples(string file_name, CharSampEnum *enum_obj) {
|
||||
CachedFile *fp_in;
|
||||
unsigned int val32;
|
||||
long i64_size,
|
||||
i64_pos;
|
||||
// open the file
|
||||
fp_in = new CachedFile(file_name);
|
||||
i64_size = fp_in->Size();
|
||||
if (i64_size < 1) {
|
||||
return false;
|
||||
}
|
||||
// read and verify marker
|
||||
if (fp_in->Read(&val32, sizeof(val32)) != sizeof(val32)) {
|
||||
return false;
|
||||
}
|
||||
if (val32 != 0xfefeabd0) {
|
||||
return false;
|
||||
}
|
||||
// start loading char samples
|
||||
while (fp_in->eof() == false) {
|
||||
CharSamp *new_samp = CharSamp::FromCharDumpFile(fp_in);
|
||||
i64_pos = fp_in->Tell();
|
||||
if (new_samp != NULL) {
|
||||
bool ret_flag = (enum_obj)->EnumCharSamp(new_samp,
|
||||
(100.0f * i64_pos / i64_size));
|
||||
delete new_samp;
|
||||
if (ret_flag == false) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
delete fp_in;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ocrlib
|
@ -1,73 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: char_samp_set.h
|
||||
* Description: Declaration of a Character Sample Set Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CharSampSet set encapsulates a set of CharSet objects typically
|
||||
// but not necessarily loaded from a file
|
||||
// It provides methods to load samples from File, Create a new file and
|
||||
// Add new char samples to the set
|
||||
|
||||
#ifndef CHAR_SAMP_SET_H
|
||||
#define CHAR_SAMP_SET_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include "char_samp.h"
|
||||
#include "char_samp_enum.h"
|
||||
#include "char_set.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// chunks of samp pointers to allocate
|
||||
#define SAMP_ALLOC_BLOCK 10000
|
||||
|
||||
class CharSampSet {
|
||||
public:
|
||||
CharSampSet();
|
||||
~CharSampSet();
|
||||
// return sample count
|
||||
int SampleCount() const { return cnt_; }
|
||||
// returns samples buffer
|
||||
CharSamp ** Samples() const { return samp_buff_; }
|
||||
// Create a CharSampSet set object from a file
|
||||
static CharSampSet *FromCharDumpFile(string file_name);
|
||||
// Enumerate the Samples in the set one-by-one calling the enumertor's
|
||||
// EnumCharSamp method for each sample
|
||||
static bool EnumSamples(string file_name, CharSampEnum *enumerator);
|
||||
// Create a new Char Dump file
|
||||
static FILE *CreateCharDumpFile(string file_name);
|
||||
// Add a new sample to the set
|
||||
bool Add(CharSamp *char_samp);
|
||||
|
||||
private:
|
||||
// sample count
|
||||
int cnt_;
|
||||
// the char samp array
|
||||
CharSamp **samp_buff_;
|
||||
// Are the samples owned by the set or not.
|
||||
// Determines whether we should cleanup in the end
|
||||
bool own_samples_;
|
||||
// Cleanup
|
||||
void Cleanup();
|
||||
// Load character samples from a file
|
||||
bool LoadCharSamples(FILE *fp);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CHAR_SAMP_SET_H
|
@ -1,168 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: char_samp_enum.cpp
|
||||
* Description: Implementation of a Character Set Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "char_set.h"
|
||||
#include "cube_utils.h"
|
||||
#include "tessdatamanager.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
CharSet::CharSet() {
|
||||
class_cnt_ = 0;
|
||||
class_strings_ = NULL;
|
||||
unicharset_map_ = NULL;
|
||||
init_ = false;
|
||||
|
||||
// init hash table
|
||||
memset(hash_bin_size_, 0, sizeof(hash_bin_size_));
|
||||
}
|
||||
|
||||
CharSet::~CharSet() {
|
||||
if (class_strings_ != NULL) {
|
||||
for (int cls = 0; cls < class_cnt_; cls++) {
|
||||
if (class_strings_[cls] != NULL) {
|
||||
delete class_strings_[cls];
|
||||
}
|
||||
}
|
||||
delete []class_strings_;
|
||||
class_strings_ = NULL;
|
||||
}
|
||||
delete []unicharset_map_;
|
||||
}
|
||||
|
||||
// Creates CharSet object by reading the unicharset from the
|
||||
// TessDatamanager, and mapping Cube's unicharset to Tesseract's if
|
||||
// they differ.
|
||||
CharSet *CharSet::Create(TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset) {
|
||||
CharSet *char_set = new CharSet();
|
||||
|
||||
// First look for Cube's unicharset; if not there, use tesseract's
|
||||
bool cube_unicharset_exists;
|
||||
if (!(cube_unicharset_exists =
|
||||
tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) &&
|
||||
!tessdata_manager->SeekToStart(TESSDATA_UNICHARSET)) {
|
||||
fprintf(stderr, "Cube ERROR (CharSet::Create): could not find "
|
||||
"either cube or tesseract unicharset\n");
|
||||
return NULL;
|
||||
}
|
||||
FILE *charset_fp = tessdata_manager->GetDataFilePtr();
|
||||
if (!charset_fp) {
|
||||
fprintf(stderr, "Cube ERROR (CharSet::Create): could not load "
|
||||
"a unicharset\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// If we found a cube unicharset separate from tesseract's, load it and
|
||||
// map its unichars to tesseract's; if only one unicharset exists,
|
||||
// just load it.
|
||||
bool loaded;
|
||||
if (cube_unicharset_exists) {
|
||||
char_set->cube_unicharset_.load_from_file(charset_fp);
|
||||
loaded = tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET);
|
||||
loaded = loaded && char_set->LoadSupportedCharList(
|
||||
tessdata_manager->GetDataFilePtr(), tess_unicharset);
|
||||
char_set->unicharset_ = &char_set->cube_unicharset_;
|
||||
} else {
|
||||
loaded = char_set->LoadSupportedCharList(charset_fp, NULL);
|
||||
char_set->unicharset_ = tess_unicharset;
|
||||
}
|
||||
if (!loaded) {
|
||||
delete char_set;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char_set->init_ = true;
|
||||
return char_set;
|
||||
}
|
||||
|
||||
// Load the list of supported chars from the given data file pointer.
|
||||
bool CharSet::LoadSupportedCharList(FILE *fp, UNICHARSET *tess_unicharset) {
|
||||
if (init_)
|
||||
return true;
|
||||
|
||||
char str_line[256];
|
||||
// init hash table
|
||||
memset(hash_bin_size_, 0, sizeof(hash_bin_size_));
|
||||
// read the char count
|
||||
if (fgets(str_line, sizeof(str_line), fp) == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CharSet::InitMemory): could not "
|
||||
"read char count.\n");
|
||||
return false;
|
||||
}
|
||||
class_cnt_ = atoi(str_line);
|
||||
if (class_cnt_ < 2) {
|
||||
fprintf(stderr, "Cube ERROR (CharSet::InitMemory): invalid "
|
||||
"class count: %d\n", class_cnt_);
|
||||
return false;
|
||||
}
|
||||
// memory for class strings
|
||||
class_strings_ = new string_32*[class_cnt_];
|
||||
// memory for unicharset map
|
||||
if (tess_unicharset) {
|
||||
unicharset_map_ = new int[class_cnt_];
|
||||
}
|
||||
|
||||
// Read in character strings and add to hash table
|
||||
for (int class_id = 0; class_id < class_cnt_; class_id++) {
|
||||
// Read the class string
|
||||
if (fgets(str_line, sizeof(str_line), fp) == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CharSet::ReadAndHashStrings): "
|
||||
"could not read class string with class_id=%d.\n", class_id);
|
||||
return false;
|
||||
}
|
||||
// Terminate at space if any
|
||||
char *p = strchr(str_line, ' ');
|
||||
if (p != NULL)
|
||||
*p = '\0';
|
||||
// Convert to UTF32 and store
|
||||
string_32 str32;
|
||||
// Convert NULL to a space
|
||||
if (strcmp(str_line, "NULL") == 0) {
|
||||
strcpy(str_line, " ");
|
||||
}
|
||||
CubeUtils::UTF8ToUTF32(str_line, &str32);
|
||||
class_strings_[class_id] = new string_32(str32);
|
||||
|
||||
// Add to hash-table
|
||||
int hash_val = Hash(reinterpret_cast<const char_32 *>(str32.c_str()));
|
||||
if (hash_bin_size_[hash_val] >= kMaxHashSize) {
|
||||
fprintf(stderr, "Cube ERROR (CharSet::LoadSupportedCharList): hash "
|
||||
"table is full.\n");
|
||||
return false;
|
||||
}
|
||||
hash_bins_[hash_val][hash_bin_size_[hash_val]++] = class_id;
|
||||
|
||||
if (tess_unicharset != NULL) {
|
||||
// Add class id to unicharset map
|
||||
UNICHAR_ID tess_id = tess_unicharset->unichar_to_id(str_line);
|
||||
if (tess_id == INVALID_UNICHAR_ID) {
|
||||
tess_unicharset->unichar_insert(str_line);
|
||||
tess_id = tess_unicharset->unichar_to_id(str_line);
|
||||
}
|
||||
ASSERT_HOST(tess_id != INVALID_UNICHAR_ID);
|
||||
unicharset_map_[class_id] = tess_id;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // tesseract
|
174
cube/char_set.h
174
cube/char_set.h
@ -1,174 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: char_samp_enum.h
|
||||
* Description: Declaration of a Character Set Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CharSet class encapsulates the list of 32-bit strings/characters that
|
||||
// Cube supports for a specific language. The char set is loaded from the
|
||||
// .unicharset file corresponding to a specific language
|
||||
// Each string has a corresponding int class-id that gets used throughout Cube
|
||||
// The class provides pass back and forth conversion between the class-id
|
||||
// and its corresponding 32-bit string. This is done using a hash table that
|
||||
// maps the string to the class id.
|
||||
|
||||
#ifndef CHAR_SET_H
|
||||
#define CHAR_SET_H
|
||||
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
#include "string_32.h"
|
||||
#include "tessdatamanager.h"
|
||||
#include "unicharset.h"
|
||||
#include "cube_const.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class CharSet {
|
||||
public:
|
||||
CharSet();
|
||||
~CharSet();
|
||||
|
||||
// Returns true if Cube is sharing Tesseract's unicharset.
|
||||
inline bool SharedUnicharset() { return (unicharset_map_ == NULL); }
|
||||
|
||||
// Returns the class id corresponding to a 32-bit string. Returns -1
|
||||
// if the string is not supported. This is done by hashing the
|
||||
// string and then looking up the string in the hash-bin if there
|
||||
// are collisions.
|
||||
inline int ClassID(const char_32 *str) const {
|
||||
int hash_val = Hash(str);
|
||||
if (hash_bin_size_[hash_val] == 0)
|
||||
return -1;
|
||||
for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
|
||||
if (class_strings_[hash_bins_[hash_val][bin]]->compare(str) == 0)
|
||||
return hash_bins_[hash_val][bin];
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
// Same as above but using a 32-bit char instead of a string
|
||||
inline int ClassID(char_32 ch) const {
|
||||
int hash_val = Hash(ch);
|
||||
if (hash_bin_size_[hash_val] == 0)
|
||||
return -1;
|
||||
for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
|
||||
if ((*class_strings_[hash_bins_[hash_val][bin]])[0] == ch &&
|
||||
class_strings_[hash_bins_[hash_val][bin]]->length() == 1) {
|
||||
return hash_bins_[hash_val][bin];
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
// Retrieve the unicharid in Tesseract's unicharset corresponding
|
||||
// to a 32-bit string. When Tesseract and Cube share the same
|
||||
// unicharset, this will just be the class id.
|
||||
inline int UnicharID(const char_32 *str) const {
|
||||
int class_id = ClassID(str);
|
||||
if (class_id == INVALID_UNICHAR_ID)
|
||||
return INVALID_UNICHAR_ID;
|
||||
int unichar_id;
|
||||
if (unicharset_map_)
|
||||
unichar_id = unicharset_map_[class_id];
|
||||
else
|
||||
unichar_id = class_id;
|
||||
return unichar_id;
|
||||
}
|
||||
// Same as above but using a 32-bit char instead of a string
|
||||
inline int UnicharID(char_32 ch) const {
|
||||
int class_id = ClassID(ch);
|
||||
if (class_id == INVALID_UNICHAR_ID)
|
||||
return INVALID_UNICHAR_ID;
|
||||
int unichar_id;
|
||||
if (unicharset_map_)
|
||||
unichar_id = unicharset_map_[class_id];
|
||||
else
|
||||
unichar_id = class_id;
|
||||
return unichar_id;
|
||||
}
|
||||
// Returns the 32-bit string corresponding to a class id
|
||||
inline const char_32 * ClassString(int class_id) const {
|
||||
if (class_id < 0 || class_id >= class_cnt_) {
|
||||
return NULL;
|
||||
}
|
||||
return reinterpret_cast<const char_32 *>(class_strings_[class_id]->c_str());
|
||||
}
|
||||
// Returns the count of supported strings
|
||||
inline int ClassCount() const { return class_cnt_; }
|
||||
|
||||
// Creates CharSet object by reading the unicharset from the
|
||||
// TessDatamanager, and mapping Cube's unicharset to Tesseract's if
|
||||
// they differ.
|
||||
static CharSet *Create(TessdataManager *tessdata_manager,
|
||||
UNICHARSET *tess_unicharset);
|
||||
|
||||
// Return the UNICHARSET cube is using for recognition internally --
|
||||
// ClassId() returns unichar_id's in this unicharset.
|
||||
UNICHARSET *InternalUnicharset() { return unicharset_; }
|
||||
|
||||
private:
|
||||
// Hash table configuration params. Determined emperically on
|
||||
// the supported languages so far (Eng, Ara, Hin). Might need to be
|
||||
// tuned for speed when more languages are supported
|
||||
static const int kHashBins = 3001;
|
||||
static const int kMaxHashSize = 16;
|
||||
|
||||
// Using djb2 hashing function to hash a 32-bit string
|
||||
// introduced in http://www.cse.yorku.ca/~oz/hash.html
|
||||
static inline int Hash(const char_32 *str) {
|
||||
unsigned long hash = 5381;
|
||||
int c;
|
||||
while ((c = *str++))
|
||||
hash = ((hash << 5) + hash) + c;
|
||||
return (hash%kHashBins);
|
||||
}
|
||||
// Same as above but for a single char
|
||||
static inline int Hash(char_32 ch) {
|
||||
char_32 b[2];
|
||||
b[0] = ch;
|
||||
b[1] = 0;
|
||||
return Hash(b);
|
||||
}
|
||||
|
||||
// Load the list of supported chars from the given data file
|
||||
// pointer. If tess_unicharset is non-NULL, mapping each Cube class
|
||||
// id to a tesseract unicharid.
|
||||
bool LoadSupportedCharList(FILE *fp, UNICHARSET *tess_unicharset);
|
||||
|
||||
// class count
|
||||
int class_cnt_;
|
||||
// hash-bin sizes array
|
||||
int hash_bin_size_[kHashBins];
|
||||
// hash bins
|
||||
int hash_bins_[kHashBins][kMaxHashSize];
|
||||
// supported strings array
|
||||
string_32 **class_strings_;
|
||||
// map from class id to secondary (tesseract's) unicharset's ids
|
||||
int *unicharset_map_;
|
||||
// A unicharset which is filled in with a Tesseract-style UNICHARSET for
|
||||
// cube's data if our unicharset is different from tesseract's.
|
||||
UNICHARSET cube_unicharset_;
|
||||
// This points to either the tess_unicharset we're passed or cube_unicharset_,
|
||||
// depending upon whether we just have one unicharset or one for each
|
||||
// tesseract and cube, respectively.
|
||||
UNICHARSET *unicharset_;
|
||||
// has the char set been initialized flag
|
||||
bool init_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CHAR_SET_H
|
@ -1,94 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: classifier_base.h
|
||||
* Description: Declaration of the Base Character Classifier
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CharClassifier class is the abstract class for any character/grapheme
|
||||
// classifier.
|
||||
|
||||
#ifndef CHAR_CLASSIFIER_BASE_H
|
||||
#define CHAR_CLASSIFIER_BASE_H
|
||||
|
||||
#include <string>
|
||||
#include "char_samp.h"
|
||||
#include "char_altlist.h"
|
||||
#include "char_set.h"
|
||||
#include "feature_base.h"
|
||||
#include "lang_model.h"
|
||||
#include "tuning_params.h"
|
||||
|
||||
namespace tesseract {
|
||||
class CharClassifier {
|
||||
public:
|
||||
CharClassifier(CharSet *char_set, TuningParams *params,
|
||||
FeatureBase *feat_extract) {
|
||||
char_set_ = char_set;
|
||||
params_ = params;
|
||||
feat_extract_ = feat_extract;
|
||||
fold_sets_ = NULL;
|
||||
fold_set_cnt_ = 0;
|
||||
fold_set_len_ = NULL;
|
||||
init_ = false;
|
||||
case_sensitive_ = true;
|
||||
}
|
||||
|
||||
virtual ~CharClassifier() {
|
||||
if (fold_sets_ != NULL) {
|
||||
for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
|
||||
delete []fold_sets_[fold_set];
|
||||
}
|
||||
delete []fold_sets_;
|
||||
fold_sets_ = NULL;
|
||||
}
|
||||
delete []fold_set_len_;
|
||||
fold_set_len_ = NULL;
|
||||
delete feat_extract_;
|
||||
feat_extract_ = NULL;
|
||||
}
|
||||
|
||||
// pure virtual functions that need to be implemented by any inheriting class
|
||||
virtual CharAltList * Classify(CharSamp *char_samp) = 0;
|
||||
virtual int CharCost(CharSamp *char_samp) = 0;
|
||||
virtual bool Train(CharSamp *char_samp, int ClassID) = 0;
|
||||
virtual bool SetLearnParam(char *var_name, float val) = 0;
|
||||
virtual bool Init(const string &data_file_path, const string &lang,
|
||||
LangModel *lang_mod) = 0;
|
||||
|
||||
// accessors
|
||||
FeatureBase *FeatureExtractor() {return feat_extract_;}
|
||||
inline bool CaseSensitive() const { return case_sensitive_; }
|
||||
inline void SetCaseSensitive(bool case_sensitive) {
|
||||
case_sensitive_ = case_sensitive;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void Fold() = 0;
|
||||
virtual bool LoadFoldingSets(const string &data_file_path,
|
||||
const string &lang,
|
||||
LangModel *lang_mod) = 0;
|
||||
FeatureBase *feat_extract_;
|
||||
CharSet *char_set_;
|
||||
TuningParams *params_;
|
||||
int **fold_sets_;
|
||||
int *fold_set_len_;
|
||||
int fold_set_cnt_;
|
||||
bool init_;
|
||||
bool case_sensitive_;
|
||||
};
|
||||
} // tesseract
|
||||
|
||||
#endif // CHAR_CLASSIFIER_BASE_H
|
@ -1,85 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: classifier_factory.cpp
|
||||
* Description: Implementation of the Base Character Classifier
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include "classifier_factory.h"
|
||||
#include "conv_net_classifier.h"
|
||||
#include "feature_base.h"
|
||||
#include "feature_bmp.h"
|
||||
#include "feature_chebyshev.h"
|
||||
#include "feature_hybrid.h"
|
||||
#include "hybrid_neural_net_classifier.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Creates a CharClassifier object of the appropriate type depending on the
|
||||
// classifier type in the settings file
|
||||
CharClassifier *CharClassifierFactory::Create(const string &data_file_path,
|
||||
const string &lang,
|
||||
LangModel *lang_mod,
|
||||
CharSet *char_set,
|
||||
TuningParams *params) {
|
||||
// create the feature extraction object
|
||||
FeatureBase *feat_extract;
|
||||
|
||||
switch (params->TypeFeature()) {
|
||||
case TuningParams::BMP:
|
||||
feat_extract = new FeatureBmp(params);
|
||||
break;
|
||||
case TuningParams::CHEBYSHEV:
|
||||
feat_extract = new FeatureChebyshev(params);
|
||||
break;
|
||||
case TuningParams::HYBRID:
|
||||
feat_extract = new FeatureHybrid(params);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): invalid "
|
||||
"feature type.\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// create the classifier object
|
||||
CharClassifier *classifier_obj;
|
||||
switch (params->TypeClassifier()) {
|
||||
case TuningParams::NN:
|
||||
classifier_obj = new ConvNetCharClassifier(char_set, params,
|
||||
feat_extract);
|
||||
break;
|
||||
case TuningParams::HYBRID_NN:
|
||||
classifier_obj = new HybridNeuralNetCharClassifier(char_set, params,
|
||||
feat_extract);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): invalid "
|
||||
"classifier type.\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Init the classifier
|
||||
if (!classifier_obj->Init(data_file_path, lang, lang_mod)) {
|
||||
delete classifier_obj;
|
||||
fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): unable "
|
||||
"to Init() character classifier object.\n");
|
||||
return NULL;
|
||||
}
|
||||
return classifier_obj;
|
||||
}
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: classifier_factory.h
|
||||
* Description: Declaration of the Base Character Classifier
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CharClassifierFactory provides a single static method to create an
|
||||
// instance of the desired classifier
|
||||
|
||||
#ifndef CHAR_CLASSIFIER_FACTORY_H
|
||||
#define CHAR_CLASSIFIER_FACTORY_H
|
||||
|
||||
#include <string>
|
||||
#include "classifier_base.h"
|
||||
#include "lang_model.h"
|
||||
|
||||
namespace tesseract {
|
||||
class CharClassifierFactory {
|
||||
public:
|
||||
// Creates a CharClassifier object of the appropriate type depending on the
|
||||
// classifier type in the settings file
|
||||
static CharClassifier *Create(const string &data_file_path,
|
||||
const string &lang,
|
||||
LangModel *lang_mod,
|
||||
CharSet *char_set,
|
||||
TuningParams *params);
|
||||
};
|
||||
} // tesseract
|
||||
|
||||
#endif // CHAR_CLASSIFIER_FACTORY_H
|
@ -1,268 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: con_comp.cpp
|
||||
* Description: Implementation of a Connected Component class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "con_comp.h"
|
||||
#include "cube_const.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
ConComp::ConComp() {
|
||||
head_ = NULL;
|
||||
tail_ = NULL;
|
||||
left_ = 0;
|
||||
top_ = 0;
|
||||
right_ = 0;
|
||||
bottom_ = 0;
|
||||
left_most_ = false;
|
||||
right_most_ = false;
|
||||
id_ = -1;
|
||||
pt_cnt_ = 0;
|
||||
}
|
||||
|
||||
ConComp::~ConComp() {
|
||||
if (head_ != NULL) {
|
||||
ConCompPt *pt_ptr = head_;
|
||||
while (pt_ptr != NULL) {
|
||||
ConCompPt *pptNext = pt_ptr->Next();
|
||||
delete pt_ptr;
|
||||
pt_ptr = pptNext;
|
||||
}
|
||||
head_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// adds a pt to the conn comp and updates its boundaries
|
||||
bool ConComp::Add(int x, int y) {
|
||||
ConCompPt *pt_ptr = new ConCompPt(x, y);
|
||||
|
||||
if (head_ == NULL) {
|
||||
left_ = x;
|
||||
right_ = x;
|
||||
top_ = y;
|
||||
bottom_ = y;
|
||||
|
||||
head_ = pt_ptr;
|
||||
} else {
|
||||
left_ = left_ <= x ? left_ : x;
|
||||
top_ = top_ <= y ? top_ : y;
|
||||
right_ = right_ >= x ? right_ : x;
|
||||
bottom_ = bottom_ >= y ? bottom_ : y;
|
||||
}
|
||||
|
||||
if (tail_ != NULL) {
|
||||
tail_->SetNext(pt_ptr);
|
||||
}
|
||||
|
||||
tail_ = pt_ptr;
|
||||
pt_cnt_++;
|
||||
return true;
|
||||
}
|
||||
|
||||
// merges two connected components
|
||||
bool ConComp::Merge(ConComp *concomp) {
|
||||
if (head_ == NULL || tail_ == NULL ||
|
||||
concomp->head_ == NULL || concomp->tail_ == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
tail_->SetNext(concomp->head_);
|
||||
tail_ = concomp->tail_;
|
||||
left_ = left_ <= concomp->left_ ? left_ : concomp->left_;
|
||||
top_ = top_ <= concomp->top_ ? top_ : concomp->top_;
|
||||
right_ = right_ >= concomp->right_ ? right_ : concomp->right_;
|
||||
bottom_ = bottom_ >= concomp->bottom_ ? bottom_ : concomp->bottom_;
|
||||
pt_cnt_ += concomp->pt_cnt_;
|
||||
|
||||
concomp->head_ = NULL;
|
||||
concomp->tail_ = NULL;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Creates the x-coord density histogram after spreading
|
||||
// each x-coord position by the HIST_WND_RATIO fraction of the
|
||||
// height of the ConComp, but limited to max_hist_wnd
|
||||
int *ConComp::CreateHistogram(int max_hist_wnd) {
|
||||
int wid = right_ - left_ + 1,
|
||||
hgt = bottom_ - top_ + 1,
|
||||
hist_wnd = static_cast<int>(hgt * HIST_WND_RATIO);
|
||||
|
||||
if (hist_wnd > max_hist_wnd) {
|
||||
hist_wnd = max_hist_wnd;
|
||||
}
|
||||
|
||||
// alloc memo for histogram
|
||||
int *hist_array = new int[wid];
|
||||
|
||||
memset(hist_array, 0, wid * sizeof(*hist_array));
|
||||
|
||||
// compute windowed histogram
|
||||
ConCompPt *pt_ptr = head_;
|
||||
|
||||
while (pt_ptr != NULL) {
|
||||
int x = pt_ptr->x() - left_,
|
||||
xw = x - hist_wnd;
|
||||
|
||||
for (int xdel = -hist_wnd; xdel <= hist_wnd; xdel++, xw++) {
|
||||
if (xw >= 0 && xw < wid) {
|
||||
hist_array[xw]++;
|
||||
}
|
||||
}
|
||||
|
||||
pt_ptr = pt_ptr->Next();
|
||||
}
|
||||
|
||||
return hist_array;
|
||||
}
|
||||
|
||||
// find out the seg pts by looking for local minima in the histogram
|
||||
int *ConComp::SegmentHistogram(int *hist_array, int *seg_pt_cnt) {
|
||||
// init
|
||||
(*seg_pt_cnt) = 0;
|
||||
|
||||
int wid = right_ - left_ + 1,
|
||||
hgt = bottom_ - top_ + 1;
|
||||
|
||||
int *x_seg_pt = new int[wid];
|
||||
|
||||
int seg_pt_wnd = static_cast<int>(hgt * SEG_PT_WND_RATIO);
|
||||
|
||||
if (seg_pt_wnd > 1) {
|
||||
seg_pt_wnd = 1;
|
||||
}
|
||||
|
||||
for (int x = 2; x < (wid - 2); x++) {
|
||||
if (hist_array[x] < hist_array[x - 1] &&
|
||||
hist_array[x] < hist_array[x - 2] &&
|
||||
hist_array[x] <= hist_array[x + 1] &&
|
||||
hist_array[x] <= hist_array[x + 2]) {
|
||||
x_seg_pt[(*seg_pt_cnt)++] = x;
|
||||
x += seg_pt_wnd;
|
||||
} else if (hist_array[x] <= hist_array[x - 1] &&
|
||||
hist_array[x] <= hist_array[x - 2] &&
|
||||
hist_array[x] < hist_array[x + 1] &&
|
||||
hist_array[x] < hist_array[x + 2]) {
|
||||
x_seg_pt[(*seg_pt_cnt)++] = x;
|
||||
x += seg_pt_wnd;
|
||||
}
|
||||
}
|
||||
|
||||
// no segments, nothing to do
|
||||
if ((*seg_pt_cnt) == 0) {
|
||||
delete []x_seg_pt;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return x_seg_pt;
|
||||
}
|
||||
|
||||
// segments a concomp based on pixel density histogram local minima
|
||||
// if there were none found, it returns NULL
|
||||
// this is more useful than creating a clone of itself
|
||||
ConComp **ConComp::Segment(int max_hist_wnd, int *concomp_cnt) {
|
||||
// init
|
||||
(*concomp_cnt) = 0;
|
||||
|
||||
// No pts
|
||||
if (head_ == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int seg_pt_cnt = 0;
|
||||
|
||||
// create the histogram
|
||||
int *hist_array = CreateHistogram(max_hist_wnd);
|
||||
if (hist_array == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int *x_seg_pt = SegmentHistogram(hist_array, &seg_pt_cnt);
|
||||
|
||||
// free histogram
|
||||
delete []hist_array;
|
||||
|
||||
// no segments, nothing to do
|
||||
if (seg_pt_cnt == 0) {
|
||||
delete []x_seg_pt;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// create concomp array
|
||||
ConComp **concomp_array = new ConComp *[seg_pt_cnt + 1];
|
||||
|
||||
for (int concomp = 0; concomp <= seg_pt_cnt; concomp++) {
|
||||
concomp_array[concomp] = new ConComp();
|
||||
|
||||
// split concomps inherit the ID this concomp
|
||||
concomp_array[concomp]->SetID(id_);
|
||||
}
|
||||
|
||||
// set the left and right most attributes of the
|
||||
// appropriate concomps
|
||||
concomp_array[0]->left_most_ = true;
|
||||
concomp_array[seg_pt_cnt]->right_most_ = true;
|
||||
|
||||
// assign pts to concomps
|
||||
ConCompPt *pt_ptr = head_;
|
||||
while (pt_ptr != NULL) {
|
||||
int seg_pt;
|
||||
|
||||
// find the first seg-pt that exceeds the x value
|
||||
// of the pt
|
||||
for (seg_pt = 0; seg_pt < seg_pt_cnt; seg_pt++) {
|
||||
if ((x_seg_pt[seg_pt] + left_) > pt_ptr->x()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// add the pt to the proper concomp
|
||||
if (concomp_array[seg_pt]->Add(pt_ptr->x(), pt_ptr->y()) == false) {
|
||||
delete []x_seg_pt;
|
||||
delete []concomp_array;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pt_ptr = pt_ptr->Next();
|
||||
}
|
||||
|
||||
delete []x_seg_pt;
|
||||
|
||||
(*concomp_cnt) = (seg_pt_cnt + 1);
|
||||
|
||||
return concomp_array;
|
||||
}
|
||||
|
||||
// Shifts the co-ordinates of all points by the specified x & y deltas
|
||||
void ConComp::Shift(int dx, int dy) {
|
||||
ConCompPt *pt_ptr = head_;
|
||||
|
||||
while (pt_ptr != NULL) {
|
||||
pt_ptr->Shift(dx, dy);
|
||||
pt_ptr = pt_ptr->Next();
|
||||
}
|
||||
|
||||
left_ += dx;
|
||||
right_ += dx;
|
||||
top_ += dy;
|
||||
bottom_ += dy;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
124
cube/con_comp.h
124
cube/con_comp.h
@ -1,124 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: con_comp.h
|
||||
* Description: Declaration of a Connected Component class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef CONCOMP_H
|
||||
#define CONCOMP_H
|
||||
|
||||
// The ConComp class implements the functionality needed for a
|
||||
// Connected Component object and Connected Component (ConComp) points.
|
||||
// The points consituting a connected component are kept in a linked-list
|
||||
// The Concomp class provided methods to:
|
||||
// 1- Compare components in L2R and R2L reading orders.
|
||||
// 2- Merge ConComps
|
||||
// 3- Compute the windowed vertical pixel density histogram for a specific
|
||||
// windows size
|
||||
// 4- Segment a ConComp based on the local windowed vertical pixel
|
||||
// density histogram local minima
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Implments a ConComp point in a linked list of points
|
||||
class ConCompPt {
|
||||
public:
|
||||
ConCompPt(int x, int y) {
|
||||
x_ = x;
|
||||
y_ = y;
|
||||
next_pt_ = NULL;
|
||||
}
|
||||
inline int x() { return x_; }
|
||||
inline int y() { return y_; }
|
||||
inline void Shift(int dx, int dy) {
|
||||
x_ += dx;
|
||||
y_ += dy;
|
||||
}
|
||||
inline ConCompPt * Next() { return next_pt_; }
|
||||
inline void SetNext(ConCompPt *pt) { next_pt_ = pt; }
|
||||
|
||||
private:
|
||||
int x_;
|
||||
int y_;
|
||||
ConCompPt *next_pt_;
|
||||
};
|
||||
|
||||
class ConComp {
|
||||
public:
|
||||
ConComp();
|
||||
virtual ~ConComp();
|
||||
// accessors
|
||||
inline ConCompPt *Head() { return head_; }
|
||||
inline int Left() const { return left_; }
|
||||
inline int Top() const { return top_; }
|
||||
inline int Right() const { return right_; }
|
||||
inline int Bottom() const { return bottom_; }
|
||||
inline int Width() const { return right_ - left_ + 1; }
|
||||
inline int Height() const { return bottom_ - top_ + 1; }
|
||||
|
||||
// Comparer used for sorting L2R reading order
|
||||
inline static int Left2RightComparer(const void *comp1,
|
||||
const void *comp2) {
|
||||
return (*(reinterpret_cast<ConComp * const *>(comp1)))->left_ +
|
||||
(*(reinterpret_cast<ConComp * const *>(comp1)))->right_ -
|
||||
(*(reinterpret_cast<ConComp * const *>(comp2)))->left_ -
|
||||
(*(reinterpret_cast<ConComp * const *>(comp2)))->right_;
|
||||
}
|
||||
|
||||
// Comparer used for sorting R2L reading order
|
||||
inline static int Right2LeftComparer(const void *comp1,
|
||||
const void *comp2) {
|
||||
return (*(reinterpret_cast<ConComp * const *>(comp2)))->right_ -
|
||||
(*(reinterpret_cast<ConComp * const *>(comp1)))->right_;
|
||||
}
|
||||
|
||||
// accessors for attribues of a ConComp
|
||||
inline bool LeftMost() const { return left_most_; }
|
||||
inline bool RightMost() const { return right_most_; }
|
||||
inline void SetLeftMost(bool left_most) { left_most_ = left_most; }
|
||||
inline void SetRightMost(bool right_most) { right_most_ = right_most;
|
||||
}
|
||||
inline int ID () const { return id_; }
|
||||
inline void SetID(int id) { id_ = id; }
|
||||
inline int PtCnt () const { return pt_cnt_; }
|
||||
// Add a new pt
|
||||
bool Add(int x, int y);
|
||||
// Merge two connected components in-place
|
||||
bool Merge(ConComp *con_comp);
|
||||
// Shifts the co-ordinates of all points by the specified x & y deltas
|
||||
void Shift(int dx, int dy);
|
||||
// segments a concomp based on pixel density histogram local minima
|
||||
ConComp **Segment(int max_hist_wnd, int *concomp_cnt);
|
||||
// creates the vertical pixel density histogram of the concomp
|
||||
int *CreateHistogram(int max_hist_wnd);
|
||||
// find out the seg pts by looking for local minima in the histogram
|
||||
int *SegmentHistogram(int *hist_array, int *seg_pt_cnt);
|
||||
|
||||
private:
|
||||
int id_;
|
||||
bool left_most_;
|
||||
bool right_most_;
|
||||
int left_;
|
||||
int top_;
|
||||
int right_;
|
||||
int bottom_;
|
||||
ConCompPt *head_;
|
||||
ConCompPt *tail_;
|
||||
int pt_cnt_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CONCOMP_H
|
@ -1,354 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: charclassifier.cpp
|
||||
* Description: Implementation of Convolutional-NeuralNet Character Classifier
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <wctype.h>
|
||||
|
||||
#include "char_set.h"
|
||||
#include "classifier_base.h"
|
||||
#include "const.h"
|
||||
#include "conv_net_classifier.h"
|
||||
#include "cube_utils.h"
|
||||
#include "feature_base.h"
|
||||
#include "feature_bmp.h"
|
||||
#include "tess_lang_model.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
ConvNetCharClassifier::ConvNetCharClassifier(CharSet *char_set,
|
||||
TuningParams *params,
|
||||
FeatureBase *feat_extract)
|
||||
: CharClassifier(char_set, params, feat_extract) {
|
||||
char_net_ = NULL;
|
||||
net_input_ = NULL;
|
||||
net_output_ = NULL;
|
||||
}
|
||||
|
||||
ConvNetCharClassifier::~ConvNetCharClassifier() {
|
||||
if (char_net_ != NULL) {
|
||||
delete char_net_;
|
||||
char_net_ = NULL;
|
||||
}
|
||||
|
||||
if (net_input_ != NULL) {
|
||||
delete []net_input_;
|
||||
net_input_ = NULL;
|
||||
}
|
||||
|
||||
if (net_output_ != NULL) {
|
||||
delete []net_output_;
|
||||
net_output_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The main training function. Given a sample and a class ID the classifier
|
||||
* updates its parameters according to its learning algorithm. This function
|
||||
* is currently not implemented. TODO(ahmadab): implement end-2-end training
|
||||
*/
|
||||
bool ConvNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* A secondary function needed for training. Allows the trainer to set the
|
||||
* value of any train-time parameter. This function is currently not
|
||||
* implemented. TODO(ahmadab): implement end-2-end training
|
||||
*/
|
||||
bool ConvNetCharClassifier::SetLearnParam(char *var_name, float val) {
|
||||
// TODO(ahmadab): implementation of parameter initializing.
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Folds the output of the NeuralNet using the loaded folding sets
|
||||
*/
|
||||
void ConvNetCharClassifier::Fold() {
|
||||
// in case insensitive mode
|
||||
if (case_sensitive_ == false) {
|
||||
int class_cnt = char_set_->ClassCount();
|
||||
// fold case
|
||||
for (int class_id = 0; class_id < class_cnt; class_id++) {
|
||||
// get class string
|
||||
const char_32 *str32 = char_set_->ClassString(class_id);
|
||||
// get the upper case form of the string
|
||||
string_32 upper_form32 = str32;
|
||||
for (int ch = 0; ch < upper_form32.length(); ch++) {
|
||||
if (iswalpha(static_cast<int>(upper_form32[ch])) != 0) {
|
||||
upper_form32[ch] = towupper(upper_form32[ch]);
|
||||
}
|
||||
}
|
||||
|
||||
// find out the upperform class-id if any
|
||||
int upper_class_id =
|
||||
char_set_->ClassID(reinterpret_cast<const char_32 *>(
|
||||
upper_form32.c_str()));
|
||||
if (upper_class_id != -1 && class_id != upper_class_id) {
|
||||
float max_out = MAX(net_output_[class_id], net_output_[upper_class_id]);
|
||||
net_output_[class_id] = max_out;
|
||||
net_output_[upper_class_id] = max_out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The folding sets specify how groups of classes should be folded
|
||||
// Folding involved assigning a min-activation to all the members
|
||||
// of the folding set. The min-activation is a fraction of the max-activation
|
||||
// of the members of the folding set
|
||||
for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
|
||||
if (fold_set_len_[fold_set] == 0)
|
||||
continue;
|
||||
float max_prob = net_output_[fold_sets_[fold_set][0]];
|
||||
for (int ch = 1; ch < fold_set_len_[fold_set]; ch++) {
|
||||
if (net_output_[fold_sets_[fold_set][ch]] > max_prob) {
|
||||
max_prob = net_output_[fold_sets_[fold_set][ch]];
|
||||
}
|
||||
}
|
||||
for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
|
||||
net_output_[fold_sets_[fold_set][ch]] = MAX(max_prob * kFoldingRatio,
|
||||
net_output_[fold_sets_[fold_set][ch]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the features of specified charsamp and feedforward the
|
||||
* specified nets
|
||||
*/
|
||||
bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) {
|
||||
if (char_net_ == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
|
||||
"NeuralNet is NULL\n");
|
||||
return false;
|
||||
}
|
||||
int feat_cnt = char_net_->in_cnt();
|
||||
int class_cnt = char_set_->ClassCount();
|
||||
|
||||
// allocate i/p and o/p buffers if needed
|
||||
if (net_input_ == NULL) {
|
||||
net_input_ = new float[feat_cnt];
|
||||
net_output_ = new float[class_cnt];
|
||||
}
|
||||
|
||||
// compute input features
|
||||
if (feat_extract_->ComputeFeatures(char_samp, net_input_) == false) {
|
||||
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
|
||||
"unable to compute features\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (char_net_ != NULL) {
|
||||
if (char_net_->FeedForward(net_input_, net_output_) == false) {
|
||||
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
|
||||
"unable to run feed-forward\n");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
Fold();
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* return the cost of being a char
|
||||
*/
|
||||
int ConvNetCharClassifier::CharCost(CharSamp *char_samp) {
|
||||
if (RunNets(char_samp) == false) {
|
||||
return 0;
|
||||
}
|
||||
return CubeUtils::Prob2Cost(1.0f - net_output_[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* classifies a charsamp and returns an alternate list
|
||||
* of chars sorted by char costs
|
||||
*/
|
||||
CharAltList *ConvNetCharClassifier::Classify(CharSamp *char_samp) {
|
||||
// run the needed nets
|
||||
if (RunNets(char_samp) == false) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int class_cnt = char_set_->ClassCount();
|
||||
|
||||
// create an altlist
|
||||
CharAltList *alt_list = new CharAltList(char_set_, class_cnt);
|
||||
|
||||
for (int out = 1; out < class_cnt; out++) {
|
||||
int cost = CubeUtils::Prob2Cost(net_output_[out]);
|
||||
alt_list->Insert(out, cost);
|
||||
}
|
||||
|
||||
return alt_list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set an external net (for training purposes)
|
||||
*/
|
||||
void ConvNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) {
|
||||
if (char_net_ != NULL) {
|
||||
delete char_net_;
|
||||
char_net_ = NULL;
|
||||
}
|
||||
char_net_ = char_net;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function will return true if the file does not exist.
|
||||
* But will fail if the it did not pass the sanity checks
|
||||
*/
|
||||
bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path,
|
||||
const string &lang,
|
||||
LangModel *lang_mod) {
|
||||
fold_set_cnt_ = 0;
|
||||
string fold_file_name;
|
||||
fold_file_name = data_file_path + lang;
|
||||
fold_file_name += ".cube.fold";
|
||||
|
||||
// folding sets are optional
|
||||
FILE *fp = fopen(fold_file_name.c_str(), "rb");
|
||||
if (fp == NULL) {
|
||||
return true;
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
string fold_sets_str;
|
||||
if (!CubeUtils::ReadFileToString(fold_file_name,
|
||||
&fold_sets_str)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// split into lines
|
||||
vector<string> str_vec;
|
||||
CubeUtils::SplitStringUsing(fold_sets_str, "\r\n", &str_vec);
|
||||
fold_set_cnt_ = str_vec.size();
|
||||
|
||||
fold_sets_ = new int *[fold_set_cnt_];
|
||||
fold_set_len_ = new int[fold_set_cnt_];
|
||||
|
||||
for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
|
||||
reinterpret_cast<TessLangModel *>(lang_mod)->RemoveInvalidCharacters(
|
||||
&str_vec[fold_set]);
|
||||
|
||||
// if all or all but one character are invalid, invalidate this set
|
||||
if (str_vec[fold_set].length() <= 1) {
|
||||
fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::LoadFoldingSets): "
|
||||
"invalidating folding set %d\n", fold_set);
|
||||
fold_set_len_[fold_set] = 0;
|
||||
fold_sets_[fold_set] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
string_32 str32;
|
||||
CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32);
|
||||
fold_set_len_[fold_set] = str32.length();
|
||||
fold_sets_[fold_set] = new int[fold_set_len_[fold_set]];
|
||||
for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
|
||||
fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Init the classifier provided a data-path and a language string
|
||||
*/
|
||||
bool ConvNetCharClassifier::Init(const string &data_file_path,
|
||||
const string &lang,
|
||||
LangModel *lang_mod) {
|
||||
if (init_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// load the nets if any. This function will return true if the net file
|
||||
// does not exist. But will fail if the net did not pass the sanity checks
|
||||
if (!LoadNets(data_file_path, lang)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// load the folding sets if any. This function will return true if the
|
||||
// file does not exist. But will fail if the it did not pass the sanity checks
|
||||
if (!LoadFoldingSets(data_file_path, lang, lang_mod)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
init_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the classifier's Neural Nets
|
||||
* This function will return true if the net file does not exist.
|
||||
* But will fail if the net did not pass the sanity checks
|
||||
*/
|
||||
bool ConvNetCharClassifier::LoadNets(const string &data_file_path,
|
||||
const string &lang) {
|
||||
string char_net_file;
|
||||
|
||||
// add the lang identifier
|
||||
char_net_file = data_file_path + lang;
|
||||
char_net_file += ".cube.nn";
|
||||
|
||||
// neural network is optional
|
||||
FILE *fp = fopen(char_net_file.c_str(), "rb");
|
||||
if (fp == NULL) {
|
||||
return true;
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
// load main net
|
||||
char_net_ = tesseract::NeuralNet::FromFile(char_net_file);
|
||||
if (char_net_ == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
|
||||
"could not load %s\n", char_net_file.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
// validate net
|
||||
if (char_net_->in_cnt()!= feat_extract_->FeatureCnt()) {
|
||||
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
|
||||
"could not validate net %s\n", char_net_file.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
// alloc net i/o buffers
|
||||
int feat_cnt = char_net_->in_cnt();
|
||||
int class_cnt = char_set_->ClassCount();
|
||||
|
||||
if (char_net_->out_cnt() != class_cnt) {
|
||||
fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
|
||||
"output count (%d) and class count (%d) are not equal\n",
|
||||
char_net_->out_cnt(), class_cnt);
|
||||
return false;
|
||||
}
|
||||
|
||||
// allocate i/p and o/p buffers if needed
|
||||
if (net_input_ == NULL) {
|
||||
net_input_ = new float[feat_cnt];
|
||||
net_output_ = new float[class_cnt];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
} // tesseract
|
@ -1,94 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: conv_net_classifier.h
|
||||
* Description: Declaration of Convolutional-NeuralNet Character Classifier
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The ConvNetCharClassifier inherits from the base classifier class:
|
||||
// "CharClassifierBase". It implements a Convolutional Neural Net classifier
|
||||
// instance of the base classifier. It uses the Tesseract Neural Net library
|
||||
// The Neural Net takes a scaled version of a bitmap and feeds it to a
|
||||
// Convolutional Neural Net as input and performs a FeedForward. Each output
|
||||
// of the net corresponds to class_id in the CharSet passed at construction
|
||||
// time.
|
||||
// Afterwards, the outputs of the Net are "folded" using the folding set
|
||||
// (if any)
|
||||
#ifndef CONV_NET_CLASSIFIER_H
|
||||
#define CONV_NET_CLASSIFIER_H
|
||||
|
||||
#include <string>
|
||||
#include "char_samp.h"
|
||||
#include "char_altlist.h"
|
||||
#include "char_set.h"
|
||||
#include "feature_base.h"
|
||||
#include "classifier_base.h"
|
||||
#include "neural_net.h"
|
||||
#include "lang_model.h"
|
||||
#include "tuning_params.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Folding Ratio is the ratio of the max-activation of members of a folding
|
||||
// set that is used to compute the min-activation of the rest of the set
|
||||
static const float kFoldingRatio = 0.75;
|
||||
|
||||
class ConvNetCharClassifier : public CharClassifier {
|
||||
public:
|
||||
ConvNetCharClassifier(CharSet *char_set, TuningParams *params,
|
||||
FeatureBase *feat_extract);
|
||||
virtual ~ConvNetCharClassifier();
|
||||
// The main training function. Given a sample and a class ID the classifier
|
||||
// updates its parameters according to its learning algorithm. This function
|
||||
// is currently not implemented. TODO(ahmadab): implement end-2-end training
|
||||
virtual bool Train(CharSamp *char_samp, int ClassID);
|
||||
// A secondary function needed for training. Allows the trainer to set the
|
||||
// value of any train-time parameter. This function is currently not
|
||||
// implemented. TODO(ahmadab): implement end-2-end training
|
||||
virtual bool SetLearnParam(char *var_name, float val);
|
||||
// Externally sets the Neural Net used by the classifier. Used for training
|
||||
void SetNet(tesseract::NeuralNet *net);
|
||||
|
||||
// Classifies an input charsamp and return a CharAltList object containing
|
||||
// the possible candidates and corresponding scores
|
||||
virtual CharAltList * Classify(CharSamp *char_samp);
|
||||
// Computes the cost of a specific charsamp being a character (versus a
|
||||
// non-character: part-of-a-character OR more-than-one-character)
|
||||
virtual int CharCost(CharSamp *char_samp);
|
||||
|
||||
|
||||
private:
|
||||
// Neural Net object used for classification
|
||||
tesseract::NeuralNet *char_net_;
|
||||
// data buffers used to hold Neural Net inputs and outputs
|
||||
float *net_input_;
|
||||
float *net_output_;
|
||||
|
||||
// Init the classifier provided a data-path and a language string
|
||||
virtual bool Init(const string &data_file_path, const string &lang,
|
||||
LangModel *lang_mod);
|
||||
// Loads the NeuralNets needed for the classifier
|
||||
bool LoadNets(const string &data_file_path, const string &lang);
|
||||
// Loads the folding sets provided a data-path and a language string
|
||||
virtual bool LoadFoldingSets(const string &data_file_path,
|
||||
const string &lang,
|
||||
LangModel *lang_mod);
|
||||
// Folds the output of the NeuralNet using the loaded folding sets
|
||||
virtual void Fold();
|
||||
// Scales the input char_samp and feeds it to the NeuralNet as input
|
||||
bool RunNets(CharSamp *char_samp);
|
||||
};
|
||||
}
|
||||
#endif // CONV_NET_CLASSIFIER_H
|
@ -1,41 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: const.h
|
||||
* Description: Defintions of constants used by Cube
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef CUBE_CONST_H
|
||||
#define CUBE_CONST_H
|
||||
|
||||
// Scale used to normalize a log-prob to a cost
|
||||
#define PROB2COST_SCALE 4096.0
|
||||
// Maximum possible cost (-log prob of MIN_PROB)
|
||||
#define MIN_PROB_COST 65536
|
||||
// Probability corresponding to the max cost MIN_PROB_COST
|
||||
#define MIN_PROB 0.000000113
|
||||
// Worst possible cost (returned on failure)
|
||||
#define WORST_COST 0x40000
|
||||
// Oversegmentation hysteresis thresholds
|
||||
#define HIST_WND_RATIO 0.1f
|
||||
#define SEG_PT_WND_RATIO 0.1f
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifdef __GNUC__
|
||||
#include <climits>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif // CUBE_CONST_H
|
@ -1,249 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_line_object.cpp
|
||||
* Description: Implementation of the Cube Line Object Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <algorithm>
|
||||
#include "cube_line_object.h"
|
||||
|
||||
namespace tesseract {
|
||||
CubeLineObject::CubeLineObject(CubeRecoContext *cntxt, Pix *pix) {
|
||||
line_pix_ = pix;
|
||||
own_pix_ = false;
|
||||
processed_ = false;
|
||||
cntxt_ = cntxt;
|
||||
phrase_cnt_ = 0;
|
||||
phrases_ = NULL;
|
||||
}
|
||||
|
||||
CubeLineObject::~CubeLineObject() {
|
||||
if (line_pix_ != NULL && own_pix_ == true) {
|
||||
pixDestroy(&line_pix_);
|
||||
line_pix_ = NULL;
|
||||
}
|
||||
|
||||
if (phrases_ != NULL) {
|
||||
for (int phrase_idx = 0; phrase_idx < phrase_cnt_; phrase_idx++) {
|
||||
if (phrases_[phrase_idx] != NULL) {
|
||||
delete phrases_[phrase_idx];
|
||||
}
|
||||
}
|
||||
|
||||
delete []phrases_;
|
||||
phrases_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Recognize the specified pix as one line returning the recognized
|
||||
bool CubeLineObject::Process() {
|
||||
// do nothing if pix had already been processed
|
||||
if (processed_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// validate data
|
||||
if (line_pix_ == NULL || cntxt_ == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// create a CharSamp
|
||||
CharSamp *char_samp = CubeUtils::CharSampleFromPix(line_pix_, 0, 0,
|
||||
line_pix_->w,
|
||||
line_pix_->h);
|
||||
if (char_samp == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// compute connected components.
|
||||
int con_comp_cnt = 0;
|
||||
ConComp **con_comps = char_samp->FindConComps(&con_comp_cnt,
|
||||
cntxt_->Params()->MinConCompSize());
|
||||
// no longer need char_samp, delete it
|
||||
delete char_samp;
|
||||
// no connected components, bail out
|
||||
if (con_comp_cnt <= 0 || con_comps == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// sort connected components based on reading order
|
||||
bool rtl = (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L);
|
||||
qsort(con_comps, con_comp_cnt, sizeof(*con_comps), rtl ?
|
||||
ConComp::Right2LeftComparer : ConComp::Left2RightComparer);
|
||||
|
||||
// compute work breaking threshold as a ratio of line height
|
||||
bool ret_val = false;
|
||||
int word_break_threshold = ComputeWordBreakThreshold(con_comp_cnt, con_comps,
|
||||
rtl);
|
||||
if (word_break_threshold > 0) {
|
||||
// over-allocate phrases object buffer
|
||||
phrases_ = new CubeObject *[con_comp_cnt];
|
||||
// create a phrase if the horizontal distance between two consecutive
|
||||
// concomps is higher than threshold
|
||||
int start_con_idx = 0;
|
||||
int current_phrase_limit = rtl ? con_comps[0]->Left() :
|
||||
con_comps[0]->Right();
|
||||
|
||||
for (int con_idx = 1; con_idx <= con_comp_cnt; con_idx++) {
|
||||
bool create_new_phrase = true;
|
||||
// if not at the end, compute the distance between two consecutive
|
||||
// concomps
|
||||
if (con_idx < con_comp_cnt) {
|
||||
int dist = 0;
|
||||
if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) {
|
||||
dist = current_phrase_limit - con_comps[con_idx]->Right();
|
||||
} else {
|
||||
dist = con_comps[con_idx]->Left() - current_phrase_limit;
|
||||
}
|
||||
create_new_phrase = (dist > word_break_threshold);
|
||||
}
|
||||
|
||||
// create a new phrase
|
||||
if (create_new_phrase) {
|
||||
// create a phrase corresponding to a range on components
|
||||
bool left_most;
|
||||
bool right_most;
|
||||
CharSamp *phrase_char_samp =
|
||||
CharSamp::FromConComps(con_comps, start_con_idx,
|
||||
con_idx - start_con_idx, NULL,
|
||||
&left_most, &right_most,
|
||||
line_pix_->h);
|
||||
if (phrase_char_samp == NULL) {
|
||||
break;
|
||||
}
|
||||
phrases_[phrase_cnt_] = new CubeObject(cntxt_, phrase_char_samp);
|
||||
// set the ownership of the charsamp to the cube object
|
||||
phrases_[phrase_cnt_]->SetCharSampOwnership(true);
|
||||
phrase_cnt_++;
|
||||
// advance the starting index to the current index
|
||||
start_con_idx = con_idx;
|
||||
// set the limit of the newly starting phrase (if any)
|
||||
if (con_idx < con_comp_cnt) {
|
||||
current_phrase_limit = rtl ? con_comps[con_idx]->Left() :
|
||||
con_comps[con_idx]->Right();
|
||||
}
|
||||
} else {
|
||||
// update the limit of the current phrase
|
||||
if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) {
|
||||
current_phrase_limit = MIN(current_phrase_limit,
|
||||
con_comps[con_idx]->Left());
|
||||
} else {
|
||||
current_phrase_limit = MAX(current_phrase_limit,
|
||||
con_comps[con_idx]->Right());
|
||||
}
|
||||
}
|
||||
}
|
||||
ret_val = true;
|
||||
}
|
||||
|
||||
// clean-up connected comps
|
||||
for (int con_idx = 0; con_idx < con_comp_cnt; con_idx++) {
|
||||
delete con_comps[con_idx];
|
||||
}
|
||||
delete []con_comps;
|
||||
|
||||
// success
|
||||
processed_ = true;
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
// Compute the least word breaking threshold that is required to produce a
|
||||
// valid set of phrases. Phrases are validated using the Aspect ratio
|
||||
// constraints specified in the language specific Params object
|
||||
int CubeLineObject::ComputeWordBreakThreshold(int con_comp_cnt,
|
||||
ConComp **con_comps, bool rtl) {
|
||||
// initial estimate of word breaking threshold
|
||||
int word_break_threshold =
|
||||
static_cast<int>(line_pix_->h * cntxt_->Params()->MaxSpaceHeightRatio());
|
||||
bool valid = false;
|
||||
|
||||
// compute the resulting words and validate each's aspect ratio
|
||||
do {
|
||||
// group connected components into words based on breaking threshold
|
||||
int start_con_idx = 0;
|
||||
int current_phrase_limit = (rtl ? con_comps[0]->Left() :
|
||||
con_comps[0]->Right());
|
||||
int min_x = con_comps[0]->Left();
|
||||
int max_x = con_comps[0]->Right();
|
||||
int min_y = con_comps[0]->Top();
|
||||
int max_y = con_comps[0]->Bottom();
|
||||
valid = true;
|
||||
for (int con_idx = 1; con_idx <= con_comp_cnt; con_idx++) {
|
||||
bool create_new_phrase = true;
|
||||
// if not at the end, compute the distance between two consecutive
|
||||
// concomps
|
||||
if (con_idx < con_comp_cnt) {
|
||||
int dist = 0;
|
||||
if (rtl) {
|
||||
dist = current_phrase_limit - con_comps[con_idx]->Right();
|
||||
} else {
|
||||
dist = con_comps[con_idx]->Left() - current_phrase_limit;
|
||||
}
|
||||
create_new_phrase = (dist > word_break_threshold);
|
||||
}
|
||||
|
||||
// create a new phrase
|
||||
if (create_new_phrase) {
|
||||
// check aspect ratio. Break if invalid
|
||||
if ((max_x - min_x + 1) >
|
||||
(cntxt_->Params()->MaxWordAspectRatio() * (max_y - min_y + 1))) {
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
// advance the starting index to the current index
|
||||
start_con_idx = con_idx;
|
||||
// set the limit of the newly starting phrase (if any)
|
||||
if (con_idx < con_comp_cnt) {
|
||||
current_phrase_limit = rtl ? con_comps[con_idx]->Left() :
|
||||
con_comps[con_idx]->Right();
|
||||
// re-init bounding box
|
||||
min_x = con_comps[con_idx]->Left();
|
||||
max_x = con_comps[con_idx]->Right();
|
||||
min_y = con_comps[con_idx]->Top();
|
||||
max_y = con_comps[con_idx]->Bottom();
|
||||
}
|
||||
} else {
|
||||
// update the limit of the current phrase
|
||||
if (rtl) {
|
||||
current_phrase_limit = MIN(current_phrase_limit,
|
||||
con_comps[con_idx]->Left());
|
||||
} else {
|
||||
current_phrase_limit = MAX(current_phrase_limit,
|
||||
con_comps[con_idx]->Right());
|
||||
}
|
||||
// update bounding box
|
||||
UpdateRange(con_comps[con_idx]->Left(),
|
||||
con_comps[con_idx]->Right(), &min_x, &max_x);
|
||||
UpdateRange(con_comps[con_idx]->Top(),
|
||||
con_comps[con_idx]->Bottom(), &min_y, &max_y);
|
||||
}
|
||||
}
|
||||
|
||||
// return the breaking threshold if all broken word dimensions are valid
|
||||
if (valid) {
|
||||
return word_break_threshold;
|
||||
}
|
||||
|
||||
// decrease the threshold and try again
|
||||
word_break_threshold--;
|
||||
} while (!valid && word_break_threshold > 0);
|
||||
|
||||
// failed to find a threshold that achieves the target aspect ratio.
|
||||
// Just use the default threshold
|
||||
return static_cast<int>(line_pix_->h *
|
||||
cntxt_->Params()->MaxSpaceHeightRatio());
|
||||
}
|
||||
}
|
@ -1,67 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_line_object.h
|
||||
* Description: Declaration of the Cube Line Object Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CubeLineObject implements an objects that holds a line of text
|
||||
// Each line is broken into phrases. Phrases are blocks within the line that
|
||||
// are unambiguously separate collections of words
|
||||
|
||||
#ifndef CUBE_LINE_OBJECT_H
|
||||
#define CUBE_LINE_OBJECT_H
|
||||
|
||||
#include "cube_reco_context.h"
|
||||
#include "cube_object.h"
|
||||
#include "allheaders.h"
|
||||
|
||||
namespace tesseract {
|
||||
class CubeLineObject {
|
||||
public:
|
||||
CubeLineObject(CubeRecoContext *cntxt, Pix *pix);
|
||||
~CubeLineObject();
|
||||
|
||||
// accessors
|
||||
inline int PhraseCount() {
|
||||
if (!processed_ && !Process()) {
|
||||
return 0;
|
||||
}
|
||||
return phrase_cnt_;
|
||||
}
|
||||
inline CubeObject **Phrases() {
|
||||
if (!processed_ && !Process()) {
|
||||
return NULL;
|
||||
}
|
||||
return phrases_;
|
||||
}
|
||||
|
||||
private:
|
||||
CubeRecoContext *cntxt_;
|
||||
bool own_pix_;
|
||||
bool processed_;
|
||||
Pix *line_pix_;
|
||||
CubeObject **phrases_;
|
||||
int phrase_cnt_;
|
||||
bool Process();
|
||||
// Compute the least word breaking threshold that is required to produce a
|
||||
// valid set of phrases. Phrases are validated using the Aspect ratio
|
||||
// constraints specified in the language specific Params object
|
||||
int ComputeWordBreakThreshold(int con_comp_cnt, ConComp **con_comps,
|
||||
bool rtl);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CUBE_LINE_OBJECT_H
|
@ -1,949 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_page_segmenter.cpp
|
||||
* Description: Implementation of the Cube Page Segmenter Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "cube_line_segmenter.h"
|
||||
#include "ndminx.h"
|
||||
|
||||
namespace tesseract {
|
||||
// constants that worked for Arabic page segmenter
|
||||
const int CubeLineSegmenter::kLineSepMorphMinHgt = 20;
|
||||
const int CubeLineSegmenter::kHgtBins = 20;
|
||||
const double CubeLineSegmenter::kMaxValidLineRatio = 3.2;
|
||||
const int CubeLineSegmenter::kMaxConnCompHgt = 150;
|
||||
const int CubeLineSegmenter::kMaxConnCompWid = 500;
|
||||
const int CubeLineSegmenter::kMaxHorzAspectRatio = 50;
|
||||
const int CubeLineSegmenter::kMaxVertAspectRatio = 20;
|
||||
const int CubeLineSegmenter::kMinWid = 2;
|
||||
const int CubeLineSegmenter::kMinHgt = 2;
|
||||
const float CubeLineSegmenter::kMinValidLineHgtRatio = 2.5;
|
||||
|
||||
CubeLineSegmenter::CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img) {
|
||||
cntxt_ = cntxt;
|
||||
orig_img_ = img;
|
||||
img_ = NULL;
|
||||
lines_pixa_ = NULL;
|
||||
init_ = false;
|
||||
line_cnt_ = 0;
|
||||
columns_ = NULL;
|
||||
con_comps_ = NULL;
|
||||
est_alef_hgt_ = 0.0;
|
||||
est_dot_hgt_ = 0.0;
|
||||
}
|
||||
|
||||
CubeLineSegmenter::~CubeLineSegmenter() {
|
||||
if (img_ != NULL) {
|
||||
pixDestroy(&img_);
|
||||
img_ = NULL;
|
||||
}
|
||||
|
||||
if (lines_pixa_ != NULL) {
|
||||
pixaDestroy(&lines_pixa_);
|
||||
lines_pixa_ = NULL;
|
||||
}
|
||||
|
||||
if (con_comps_ != NULL) {
|
||||
pixaDestroy(&con_comps_);
|
||||
con_comps_ = NULL;
|
||||
}
|
||||
|
||||
if (columns_ != NULL) {
|
||||
pixaaDestroy(&columns_);
|
||||
columns_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// compute validity ratio for a line
|
||||
double CubeLineSegmenter::ValidityRatio(Pix *line_mask_pix, Box *line_box) {
|
||||
return line_box->h / est_alef_hgt_;
|
||||
}
|
||||
|
||||
// validate line
|
||||
bool CubeLineSegmenter::ValidLine(Pix *line_mask_pix, Box *line_box) {
|
||||
double validity_ratio = ValidityRatio(line_mask_pix, line_box);
|
||||
|
||||
return validity_ratio < kMaxValidLineRatio;
|
||||
}
|
||||
|
||||
// perform a vertical Closing with the specified threshold
|
||||
// returning the resulting conn comps as a pixa
|
||||
Pixa *CubeLineSegmenter::VerticalClosing(Pix *pix,
|
||||
int threshold, Boxa **boxa) {
|
||||
char sequence_str[16];
|
||||
|
||||
// do the morphology
|
||||
sprintf(sequence_str, "c100.%d", threshold);
|
||||
Pix *morphed_pix = pixMorphCompSequence(pix, sequence_str, 0);
|
||||
if (morphed_pix == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// get the resulting lines by computing concomps
|
||||
Pixa *pixac;
|
||||
(*boxa) = pixConnComp(morphed_pix, &pixac, 8);
|
||||
|
||||
pixDestroy(&morphed_pix);
|
||||
|
||||
if ((*boxa) == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return pixac;
|
||||
}
|
||||
|
||||
// Helper cleans up after CrackLine.
|
||||
static void CleanupCrackLine(int line_cnt, Pixa **lines_pixa,
|
||||
Boxa **line_con_comps,
|
||||
Pixa **line_con_comps_pix) {
|
||||
for (int line = 0; line < line_cnt; line++) {
|
||||
if (lines_pixa[line] != NULL) {
|
||||
pixaDestroy(&lines_pixa[line]);
|
||||
}
|
||||
}
|
||||
|
||||
delete []lines_pixa;
|
||||
boxaDestroy(line_con_comps);
|
||||
pixaDestroy(line_con_comps_pix);
|
||||
}
|
||||
|
||||
// do a desperate attempt at cracking lines
|
||||
Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix,
|
||||
Box *cracked_line_box, int line_cnt) {
|
||||
// create lines pixa array
|
||||
Pixa **lines_pixa = new Pixa*[line_cnt];
|
||||
|
||||
memset(lines_pixa, 0, line_cnt * sizeof(*lines_pixa));
|
||||
|
||||
// compute line conn comps
|
||||
Pixa *line_con_comps_pix;
|
||||
Boxa *line_con_comps = ComputeLineConComps(cracked_line_pix,
|
||||
cracked_line_box, &line_con_comps_pix);
|
||||
|
||||
if (line_con_comps == NULL) {
|
||||
delete []lines_pixa;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// assign each conn comp to the a line based on its centroid
|
||||
for (int con = 0; con < line_con_comps->n; con++) {
|
||||
Box *con_box = line_con_comps->box[con];
|
||||
Pix *con_pix = line_con_comps_pix->pix[con];
|
||||
int mid_y = (con_box->y - cracked_line_box->y) + (con_box->h / 2),
|
||||
line_idx = MIN(line_cnt - 1,
|
||||
(mid_y * line_cnt / cracked_line_box->h));
|
||||
|
||||
// create the line if it has not been created?
|
||||
if (lines_pixa[line_idx] == NULL) {
|
||||
lines_pixa[line_idx] = pixaCreate(line_con_comps->n);
|
||||
if (lines_pixa[line_idx] == NULL) {
|
||||
CleanupCrackLine(line_cnt, lines_pixa, &line_con_comps,
|
||||
&line_con_comps_pix);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// add the concomp to the line
|
||||
if (pixaAddPix(lines_pixa[line_idx], con_pix, L_CLONE) != 0 ||
|
||||
pixaAddBox(lines_pixa[line_idx], con_box, L_CLONE)) {
|
||||
CleanupCrackLine(line_cnt, lines_pixa, &line_con_comps,
|
||||
&line_con_comps_pix);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// create the lines pixa
|
||||
Pixa *lines = pixaCreate(line_cnt);
|
||||
bool success = true;
|
||||
|
||||
// create and check the validity of the lines
|
||||
for (int line = 0; line < line_cnt; line++) {
|
||||
Pixa *line_pixa = lines_pixa[line];
|
||||
|
||||
// skip invalid lines
|
||||
if (line_pixa == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// merge the pix, check the validity of the line
|
||||
// and add it to the lines pixa
|
||||
Box *line_box;
|
||||
Pix *line_pix = Pixa2Pix(line_pixa, &line_box);
|
||||
if (line_pix == NULL ||
|
||||
line_box == NULL ||
|
||||
ValidLine(line_pix, line_box) == false ||
|
||||
pixaAddPix(lines, line_pix, L_INSERT) != 0 ||
|
||||
pixaAddBox(lines, line_box, L_INSERT) != 0) {
|
||||
if (line_pix != NULL) {
|
||||
pixDestroy(&line_pix);
|
||||
}
|
||||
|
||||
if (line_box != NULL) {
|
||||
boxDestroy(&line_box);
|
||||
}
|
||||
|
||||
success = false;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// cleanup
|
||||
CleanupCrackLine(line_cnt, lines_pixa, &line_con_comps,
|
||||
&line_con_comps_pix);
|
||||
|
||||
if (success == false) {
|
||||
pixaDestroy(&lines);
|
||||
lines = NULL;
|
||||
}
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
// do a desperate attempt at cracking lines
|
||||
Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix,
|
||||
Box *cracked_line_box) {
|
||||
// estimate max line count
|
||||
int max_line_cnt = static_cast<int>((cracked_line_box->h /
|
||||
est_alef_hgt_) + 0.5);
|
||||
if (max_line_cnt < 2) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int line_cnt = 2; line_cnt < max_line_cnt; line_cnt++) {
|
||||
Pixa *lines = CrackLine(cracked_line_pix, cracked_line_box, line_cnt);
|
||||
if (lines != NULL) {
|
||||
return lines;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// split a line continuously until valid or fail
|
||||
Pixa *CubeLineSegmenter::SplitLine(Pix *line_mask_pix, Box *line_box) {
|
||||
// clone the line mask
|
||||
Pix *line_pix = pixClone(line_mask_pix);
|
||||
|
||||
if (line_pix == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// AND with the image to get the actual line
|
||||
pixRasterop(line_pix, 0, 0, line_pix->w, line_pix->h,
|
||||
PIX_SRC & PIX_DST, img_, line_box->x, line_box->y);
|
||||
|
||||
// continue to do rasterop morphology on the line until
|
||||
// it splits to valid lines or we fail
|
||||
int morph_hgt = kLineSepMorphMinHgt - 1,
|
||||
best_threshold = kLineSepMorphMinHgt - 1,
|
||||
max_valid_portion = 0;
|
||||
|
||||
Boxa *boxa;
|
||||
Pixa *pixac;
|
||||
|
||||
do {
|
||||
pixac = VerticalClosing(line_pix, morph_hgt, &boxa);
|
||||
|
||||
// add the box offset to all the lines
|
||||
// and check for the validity of each
|
||||
int line,
|
||||
valid_line_cnt = 0,
|
||||
valid_portion = 0;
|
||||
|
||||
for (line = 0; line < pixac->n; line++) {
|
||||
boxa->box[line]->x += line_box->x;
|
||||
boxa->box[line]->y += line_box->y;
|
||||
|
||||
if (ValidLine(pixac->pix[line], boxa->box[line]) == true) {
|
||||
// count valid lines
|
||||
valid_line_cnt++;
|
||||
|
||||
// and the valid portions
|
||||
valid_portion += boxa->box[line]->h;
|
||||
}
|
||||
}
|
||||
|
||||
// all the lines are valid
|
||||
if (valid_line_cnt == pixac->n) {
|
||||
boxaDestroy(&boxa);
|
||||
pixDestroy(&line_pix);
|
||||
return pixac;
|
||||
}
|
||||
|
||||
// a larger valid portion
|
||||
if (valid_portion > max_valid_portion) {
|
||||
max_valid_portion = valid_portion;
|
||||
best_threshold = morph_hgt;
|
||||
}
|
||||
|
||||
boxaDestroy(&boxa);
|
||||
pixaDestroy(&pixac);
|
||||
|
||||
morph_hgt--;
|
||||
}
|
||||
while (morph_hgt > 0);
|
||||
|
||||
// failed to break into valid lines
|
||||
// attempt to crack the line
|
||||
pixac = CrackLine(line_pix, line_box);
|
||||
if (pixac != NULL) {
|
||||
pixDestroy(&line_pix);
|
||||
return pixac;
|
||||
}
|
||||
|
||||
// try to leverage any of the lines
|
||||
// did the best threshold yield a non zero valid portion
|
||||
if (max_valid_portion > 0) {
|
||||
// use this threshold to break lines
|
||||
pixac = VerticalClosing(line_pix, best_threshold, &boxa);
|
||||
|
||||
// add the box offset to all the lines
|
||||
// and check for the validity of each
|
||||
for (int line = 0; line < pixac->n; line++) {
|
||||
boxa->box[line]->x += line_box->x;
|
||||
boxa->box[line]->y += line_box->y;
|
||||
|
||||
// remove invalid lines from the pixa
|
||||
if (ValidLine(pixac->pix[line], boxa->box[line]) == false) {
|
||||
pixaRemovePix(pixac, line);
|
||||
line--;
|
||||
}
|
||||
}
|
||||
|
||||
boxaDestroy(&boxa);
|
||||
pixDestroy(&line_pix);
|
||||
return pixac;
|
||||
}
|
||||
|
||||
// last resort: attempt to crack the line
|
||||
pixDestroy(&line_pix);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Checks of a line is too small
|
||||
bool CubeLineSegmenter::SmallLine(Box *line_box) {
|
||||
return line_box->h <= (kMinValidLineHgtRatio * est_dot_hgt_);
|
||||
}
|
||||
|
||||
// Compute the connected components in a line
|
||||
Boxa * CubeLineSegmenter::ComputeLineConComps(Pix *line_mask_pix,
|
||||
Box *line_box,
|
||||
Pixa **con_comps_pixa) {
|
||||
// clone the line mask
|
||||
Pix *line_pix = pixClone(line_mask_pix);
|
||||
|
||||
if (line_pix == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// AND with the image to get the actual line
|
||||
pixRasterop(line_pix, 0, 0, line_pix->w, line_pix->h,
|
||||
PIX_SRC & PIX_DST, img_, line_box->x, line_box->y);
|
||||
|
||||
// compute the connected components of the line to be merged
|
||||
Boxa *line_con_comps = pixConnComp(line_pix, con_comps_pixa, 8);
|
||||
|
||||
pixDestroy(&line_pix);
|
||||
|
||||
// offset boxes by the bbox of the line
|
||||
for (int con = 0; con < line_con_comps->n; con++) {
|
||||
line_con_comps->box[con]->x += line_box->x;
|
||||
line_con_comps->box[con]->y += line_box->y;
|
||||
}
|
||||
|
||||
return line_con_comps;
|
||||
}
|
||||
|
||||
// create a union of two arbitrary pix
|
||||
Pix *CubeLineSegmenter::PixUnion(Pix *dest_pix, Box *dest_box,
|
||||
Pix *src_pix, Box *src_box) {
|
||||
// compute dimensions of union rect
|
||||
BOX *union_box = boxBoundingRegion(src_box, dest_box);
|
||||
|
||||
// create the union pix
|
||||
Pix *union_pix = pixCreate(union_box->w, union_box->h, src_pix->d);
|
||||
if (union_pix == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// blt the src and dest pix
|
||||
pixRasterop(union_pix,
|
||||
src_box->x - union_box->x, src_box->y - union_box->y,
|
||||
src_box->w, src_box->h, PIX_SRC | PIX_DST, src_pix, 0, 0);
|
||||
|
||||
pixRasterop(union_pix,
|
||||
dest_box->x - union_box->x, dest_box->y - union_box->y,
|
||||
dest_box->w, dest_box->h, PIX_SRC | PIX_DST, dest_pix, 0, 0);
|
||||
|
||||
// replace the dest_box
|
||||
*dest_box = *union_box;
|
||||
|
||||
boxDestroy(&union_box);
|
||||
|
||||
return union_pix;
|
||||
}
|
||||
|
||||
// create a union of a number of arbitrary pix
|
||||
Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box,
|
||||
int start_pix, int pix_cnt) {
|
||||
// compute union_box
|
||||
int min_x = INT_MAX,
|
||||
max_x = INT_MIN,
|
||||
min_y = INT_MAX,
|
||||
max_y = INT_MIN;
|
||||
|
||||
for (int pix_idx = start_pix; pix_idx < (start_pix + pix_cnt); pix_idx++) {
|
||||
Box *pix_box = pixa->boxa->box[pix_idx];
|
||||
|
||||
UpdateRange(pix_box->x, pix_box->x + pix_box->w, &min_x, &max_x);
|
||||
UpdateRange(pix_box->y, pix_box->y + pix_box->h, &min_y, &max_y);
|
||||
}
|
||||
|
||||
(*dest_box) = boxCreate(min_x, min_y, max_x - min_x, max_y - min_y);
|
||||
if ((*dest_box) == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// create the union pix
|
||||
Pix *union_pix = pixCreate((*dest_box)->w, (*dest_box)->h, img_->d);
|
||||
if (union_pix == NULL) {
|
||||
boxDestroy(dest_box);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// create a pix corresponding to the union of all pixs
|
||||
// blt the src and dest pix
|
||||
for (int pix_idx = start_pix; pix_idx < (start_pix + pix_cnt); pix_idx++) {
|
||||
Box *pix_box = pixa->boxa->box[pix_idx];
|
||||
Pix *con_pix = pixa->pix[pix_idx];
|
||||
|
||||
pixRasterop(union_pix,
|
||||
pix_box->x - (*dest_box)->x, pix_box->y - (*dest_box)->y,
|
||||
pix_box->w, pix_box->h, PIX_SRC | PIX_DST, con_pix, 0, 0);
|
||||
}
|
||||
|
||||
return union_pix;
|
||||
}
|
||||
|
||||
// create a union of a number of arbitrary pix
|
||||
Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box) {
|
||||
return Pixa2Pix(pixa, dest_box, 0, pixa->n);
|
||||
}
|
||||
|
||||
// merges a number of lines into one line given a bounding box and a mask
|
||||
bool CubeLineSegmenter::MergeLine(Pix *line_mask_pix, Box *line_box,
|
||||
Pixa *lines, Boxaa *lines_con_comps) {
|
||||
// compute the connected components of the lines to be merged
|
||||
Pixa *small_con_comps_pix;
|
||||
Boxa *small_line_con_comps = ComputeLineConComps(line_mask_pix,
|
||||
line_box, &small_con_comps_pix);
|
||||
|
||||
if (small_line_con_comps == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// for each connected component
|
||||
for (int con = 0; con < small_line_con_comps->n; con++) {
|
||||
Box *small_con_comp_box = small_line_con_comps->box[con];
|
||||
int best_line = -1,
|
||||
best_dist = INT_MAX,
|
||||
small_box_right = small_con_comp_box->x + small_con_comp_box->w,
|
||||
small_box_bottom = small_con_comp_box->y + small_con_comp_box->h;
|
||||
|
||||
// for each valid line
|
||||
for (int line = 0; line < lines->n; line++) {
|
||||
if (SmallLine(lines->boxa->box[line]) == true) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// for all the connected components in the line
|
||||
Boxa *line_con_comps = lines_con_comps->boxa[line];
|
||||
|
||||
for (int lcon = 0; lcon < line_con_comps->n; lcon++) {
|
||||
Box *con_comp_box = line_con_comps->box[lcon];
|
||||
int xdist,
|
||||
ydist,
|
||||
box_right = con_comp_box->x + con_comp_box->w,
|
||||
box_bottom = con_comp_box->y + con_comp_box->h;
|
||||
|
||||
xdist = MAX(small_con_comp_box->x, con_comp_box->x) -
|
||||
MIN(small_box_right, box_right);
|
||||
|
||||
ydist = MAX(small_con_comp_box->y, con_comp_box->y) -
|
||||
MIN(small_box_bottom, box_bottom);
|
||||
|
||||
// if there is an overlap in x-direction
|
||||
if (xdist <= 0) {
|
||||
if (best_line == -1 || ydist < best_dist) {
|
||||
best_dist = ydist;
|
||||
best_line = line;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if the distance is too big, do not merged
|
||||
if (best_line != -1 && best_dist < est_alef_hgt_) {
|
||||
// add the pix to the best line
|
||||
Pix *new_line = PixUnion(lines->pix[best_line],
|
||||
lines->boxa->box[best_line],
|
||||
small_con_comps_pix->pix[con], small_con_comp_box);
|
||||
|
||||
if (new_line == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
pixDestroy(&lines->pix[best_line]);
|
||||
lines->pix[best_line] = new_line;
|
||||
}
|
||||
}
|
||||
|
||||
pixaDestroy(&small_con_comps_pix);
|
||||
boxaDestroy(&small_line_con_comps);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Creates new set of lines from the computed columns
|
||||
bool CubeLineSegmenter::AddLines(Pixa *lines) {
|
||||
// create an array that will hold the bounding boxes
|
||||
// of the concomps belonging to each line
|
||||
Boxaa *lines_con_comps = boxaaCreate(lines->n);
|
||||
if (lines_con_comps == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int line = 0; line < lines->n; line++) {
|
||||
// if the line is not valid
|
||||
if (ValidLine(lines->pix[line], lines->boxa->box[line]) == false) {
|
||||
// split it
|
||||
Pixa *split_lines = SplitLine(lines->pix[line],
|
||||
lines->boxa->box[line]);
|
||||
|
||||
// remove the old line
|
||||
if (pixaRemovePix(lines, line) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
line--;
|
||||
|
||||
if (split_lines == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// add the split lines instead and move the pointer
|
||||
for (int s_line = 0; s_line < split_lines->n; s_line++) {
|
||||
Pix *sp_line = pixaGetPix(split_lines, s_line, L_CLONE);
|
||||
Box *sp_box = boxaGetBox(split_lines->boxa, s_line, L_CLONE);
|
||||
|
||||
if (sp_line == NULL || sp_box == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// insert the new line
|
||||
if (pixaInsertPix(lines, ++line, sp_line, sp_box) != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// remove the split lines
|
||||
pixaDestroy(&split_lines);
|
||||
}
|
||||
}
|
||||
|
||||
// compute the concomps bboxes of each line
|
||||
for (int line = 0; line < lines->n; line++) {
|
||||
Boxa *line_con_comps = ComputeLineConComps(lines->pix[line],
|
||||
lines->boxa->box[line], NULL);
|
||||
|
||||
if (line_con_comps == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// insert it into the boxaa array
|
||||
if (boxaaAddBoxa(lines_con_comps, line_con_comps, L_INSERT) != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// post process the lines:
|
||||
// merge the contents of "small" lines info legitimate lines
|
||||
for (int line = 0; line < lines->n; line++) {
|
||||
// a small line detected
|
||||
if (SmallLine(lines->boxa->box[line]) == true) {
|
||||
// merge its components to one of the valid lines
|
||||
if (MergeLine(lines->pix[line], lines->boxa->box[line],
|
||||
lines, lines_con_comps) == true) {
|
||||
// remove the small line
|
||||
if (pixaRemovePix(lines, line) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (boxaaRemoveBoxa(lines_con_comps, line) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
line--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
boxaaDestroy(&lines_con_comps);
|
||||
|
||||
// add the pix masks
|
||||
if (pixaaAddPixa(columns_, lines, L_INSERT) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Index the specific pixa using RTL reading order
|
||||
int *CubeLineSegmenter::IndexRTL(Pixa *pixa) {
|
||||
int *pix_index = new int[pixa->n];
|
||||
|
||||
for (int pix = 0; pix < pixa->n; pix++) {
|
||||
pix_index[pix] = pix;
|
||||
}
|
||||
|
||||
for (int ipix = 0; ipix < pixa->n; ipix++) {
|
||||
for (int jpix = ipix + 1; jpix < pixa->n; jpix++) {
|
||||
Box *ipix_box = pixa->boxa->box[pix_index[ipix]],
|
||||
*jpix_box = pixa->boxa->box[pix_index[jpix]];
|
||||
|
||||
// swap?
|
||||
if ((ipix_box->x + ipix_box->w) < (jpix_box->x + jpix_box->w)) {
|
||||
int temp = pix_index[ipix];
|
||||
pix_index[ipix] = pix_index[jpix];
|
||||
pix_index[jpix] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return pix_index;
|
||||
}
|
||||
|
||||
// Performs line segmentation
|
||||
bool CubeLineSegmenter::LineSegment() {
|
||||
// Use full image morphology to find columns
|
||||
// This only works for simple layouts where each column
|
||||
// of text extends the full height of the input image.
|
||||
Pix *pix_temp1 = pixMorphCompSequence(img_, "c5.500", 0);
|
||||
if (pix_temp1 == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Mask with a single component over each column
|
||||
Pixa *pixam;
|
||||
Boxa *boxa = pixConnComp(pix_temp1, &pixam, 8);
|
||||
|
||||
if (boxa == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int init_morph_min_hgt = kLineSepMorphMinHgt;
|
||||
char sequence_str[16];
|
||||
sprintf(sequence_str, "c100.%d", init_morph_min_hgt);
|
||||
|
||||
// Use selective region-based morphology to get the textline mask.
|
||||
Pixa *pixad = pixaMorphSequenceByRegion(img_, pixam, sequence_str, 0, 0);
|
||||
if (pixad == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// for all columns
|
||||
int col_cnt = boxaGetCount(boxa);
|
||||
|
||||
// create columns
|
||||
columns_ = pixaaCreate(col_cnt);
|
||||
if (columns_ == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// index columns based on readind order (RTL)
|
||||
int *col_order = IndexRTL(pixad);
|
||||
if (col_order == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
line_cnt_ = 0;
|
||||
|
||||
for (int col_idx = 0; col_idx < col_cnt; col_idx++) {
|
||||
int col = col_order[col_idx];
|
||||
|
||||
// get the pix and box corresponding to the column
|
||||
Pix *pixt3 = pixaGetPix(pixad, col, L_CLONE);
|
||||
if (pixt3 == NULL) {
|
||||
delete []col_order;
|
||||
return false;
|
||||
}
|
||||
|
||||
Box *col_box = pixad->boxa->box[col];
|
||||
|
||||
Pixa *pixac;
|
||||
Boxa *boxa2 = pixConnComp(pixt3, &pixac, 8);
|
||||
if (boxa2 == NULL) {
|
||||
delete []col_order;
|
||||
return false;
|
||||
}
|
||||
|
||||
// offset the boxes by the column box
|
||||
for (int line = 0; line < pixac->n; line++) {
|
||||
pixac->boxa->box[line]->x += col_box->x;
|
||||
pixac->boxa->box[line]->y += col_box->y;
|
||||
}
|
||||
|
||||
// add the lines
|
||||
if (AddLines(pixac) == true) {
|
||||
if (pixaaAddBox(columns_, col_box, L_CLONE) != 0) {
|
||||
delete []col_order;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
pixDestroy(&pixt3);
|
||||
boxaDestroy(&boxa2);
|
||||
|
||||
line_cnt_ += columns_->pixa[col_idx]->n;
|
||||
}
|
||||
|
||||
pixaDestroy(&pixam);
|
||||
pixaDestroy(&pixad);
|
||||
boxaDestroy(&boxa);
|
||||
|
||||
delete []col_order;
|
||||
pixDestroy(&pix_temp1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Estimate the parameters of the font(s) used in the page
|
||||
bool CubeLineSegmenter::EstimateFontParams() {
|
||||
int hgt_hist[kHgtBins];
|
||||
int max_hgt;
|
||||
double mean_hgt;
|
||||
|
||||
// init hgt histogram of concomps
|
||||
memset(hgt_hist, 0, sizeof(hgt_hist));
|
||||
|
||||
// compute max hgt
|
||||
max_hgt = 0;
|
||||
|
||||
for (int con = 0; con < con_comps_->n; con++) {
|
||||
// skip conn comps that are too long or too wide
|
||||
if (con_comps_->boxa->box[con]->h > kMaxConnCompHgt ||
|
||||
con_comps_->boxa->box[con]->w > kMaxConnCompWid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
max_hgt = MAX(max_hgt, con_comps_->boxa->box[con]->h);
|
||||
}
|
||||
|
||||
if (max_hgt <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// init hgt histogram of concomps
|
||||
memset(hgt_hist, 0, sizeof(hgt_hist));
|
||||
|
||||
// compute histogram
|
||||
mean_hgt = 0.0;
|
||||
for (int con = 0; con < con_comps_->n; con++) {
|
||||
// skip conn comps that are too long or too wide
|
||||
if (con_comps_->boxa->box[con]->h > kMaxConnCompHgt ||
|
||||
con_comps_->boxa->box[con]->w > kMaxConnCompWid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int bin = static_cast<int>(kHgtBins * con_comps_->boxa->box[con]->h /
|
||||
max_hgt);
|
||||
bin = MIN(bin, kHgtBins - 1);
|
||||
hgt_hist[bin]++;
|
||||
mean_hgt += con_comps_->boxa->box[con]->h;
|
||||
}
|
||||
|
||||
mean_hgt /= con_comps_->n;
|
||||
|
||||
// find the top 2 bins
|
||||
int idx[kHgtBins];
|
||||
|
||||
for (int bin = 0; bin < kHgtBins; bin++) {
|
||||
idx[bin] = bin;
|
||||
}
|
||||
|
||||
for (int ibin = 0; ibin < 2; ibin++) {
|
||||
for (int jbin = ibin + 1; jbin < kHgtBins; jbin++) {
|
||||
if (hgt_hist[idx[ibin]] < hgt_hist[idx[jbin]]) {
|
||||
int swap = idx[ibin];
|
||||
idx[ibin] = idx[jbin];
|
||||
idx[jbin] = swap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// emperically, we found out that the 2 highest freq bins correspond
|
||||
// respectively to the dot and alef
|
||||
est_dot_hgt_ = (1.0 * (idx[0] + 1) * max_hgt / kHgtBins);
|
||||
est_alef_hgt_ = (1.0 * (idx[1] + 1) * max_hgt / kHgtBins);
|
||||
|
||||
// as a sanity check the dot hgt must be significanly lower than alef
|
||||
if (est_alef_hgt_ < (est_dot_hgt_ * 2)) {
|
||||
// use max_hgt to estimate instead
|
||||
est_alef_hgt_ = mean_hgt * 1.5;
|
||||
est_dot_hgt_ = est_alef_hgt_ / 5.0;
|
||||
}
|
||||
|
||||
est_alef_hgt_ = MAX(est_alef_hgt_, est_dot_hgt_ * 4.0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// clean up the image
|
||||
Pix *CubeLineSegmenter::CleanUp(Pix *orig_img) {
|
||||
// get rid of long horizontal lines
|
||||
Pix *pix_temp0 = pixMorphCompSequence(orig_img, "o300.2", 0);
|
||||
pixXor(pix_temp0, pix_temp0, orig_img);
|
||||
|
||||
// get rid of long vertical lines
|
||||
Pix *pix_temp1 = pixMorphCompSequence(pix_temp0, "o2.300", 0);
|
||||
pixXor(pix_temp1, pix_temp1, pix_temp0);
|
||||
|
||||
pixDestroy(&pix_temp0);
|
||||
|
||||
// detect connected components
|
||||
Pixa *con_comps;
|
||||
Boxa *boxa = pixConnComp(pix_temp1, &con_comps, 8);
|
||||
if (boxa == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// detect and remove suspicious conn comps
|
||||
for (int con = 0; con < con_comps->n; con++) {
|
||||
Box *box = boxa->box[con];
|
||||
|
||||
// remove if suspc. conn comp
|
||||
if ((box->w > (box->h * kMaxHorzAspectRatio)) ||
|
||||
(box->h > (box->w * kMaxVertAspectRatio)) ||
|
||||
(box->w < kMinWid && box->h < kMinHgt)) {
|
||||
pixRasterop(pix_temp1, box->x, box->y, box->w, box->h,
|
||||
PIX_SRC ^ PIX_DST, con_comps->pix[con], 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
pixaDestroy(&con_comps);
|
||||
boxaDestroy(&boxa);
|
||||
|
||||
return pix_temp1;
|
||||
}
|
||||
|
||||
// Init the page segmenter
|
||||
bool CubeLineSegmenter::Init() {
|
||||
if (init_ == true) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (orig_img_ == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// call the internal line segmentation
|
||||
return FindLines();
|
||||
}
|
||||
|
||||
// return the pix mask and box of a specific line
|
||||
Pix *CubeLineSegmenter::Line(int line, Box **line_box) {
|
||||
if (init_ == false && Init() == false) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (line < 0 || line >= line_cnt_) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
(*line_box) = lines_pixa_->boxa->box[line];
|
||||
return lines_pixa_->pix[line];
|
||||
}
|
||||
|
||||
// Implements a basic rudimentary layout analysis based on Leptonica
|
||||
// works OK for Arabic. For other languages, the function TesseractPageAnalysis
|
||||
// should be called instead.
|
||||
bool CubeLineSegmenter::FindLines() {
|
||||
// convert the image to gray scale if necessary
|
||||
Pix *gray_scale_img = NULL;
|
||||
if (orig_img_->d != 2 && orig_img_->d != 8) {
|
||||
gray_scale_img = pixConvertTo8(orig_img_, false);
|
||||
if (gray_scale_img == NULL) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
gray_scale_img = orig_img_;
|
||||
}
|
||||
|
||||
// threshold image
|
||||
Pix *thresholded_img;
|
||||
thresholded_img = pixThresholdToBinary(gray_scale_img, 128);
|
||||
// free the gray scale image if necessary
|
||||
if (gray_scale_img != orig_img_) {
|
||||
pixDestroy(&gray_scale_img);
|
||||
}
|
||||
// bail-out if thresholding failed
|
||||
if (thresholded_img == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// deskew
|
||||
Pix *deskew_img = pixDeskew(thresholded_img, 2);
|
||||
if (deskew_img == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
pixDestroy(&thresholded_img);
|
||||
|
||||
img_ = CleanUp(deskew_img);
|
||||
pixDestroy(&deskew_img);
|
||||
if (img_ == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
pixDestroy(&deskew_img);
|
||||
|
||||
// compute connected components
|
||||
Boxa *boxa = pixConnComp(img_, &con_comps_, 8);
|
||||
if (boxa == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
boxaDestroy(&boxa);
|
||||
|
||||
// estimate dot and alef hgts
|
||||
if (EstimateFontParams() == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// perform line segmentation
|
||||
if (LineSegment() == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// success
|
||||
init_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
@ -1,156 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_page_segmenter.h
|
||||
* Description: Declaration of the Cube Page Segmenter Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// TODO(ahmadab)
|
||||
// This is really a makeshift line segmenter that works well for Arabic
|
||||
// This should eventually be replaced by Ray Smith's Page segmenter
|
||||
// There are lots of magic numbers below that were determined empirically
|
||||
// but not thoroughly tested
|
||||
|
||||
#ifndef CUBE_LINE_SEGMENTER_H
|
||||
#define CUBE_LINE_SEGMENTER_H
|
||||
|
||||
#include "cube_reco_context.h"
|
||||
#include "allheaders.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class CubeLineSegmenter {
|
||||
public:
|
||||
CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img);
|
||||
~CubeLineSegmenter();
|
||||
|
||||
// Accessor functions
|
||||
Pix *PostProcessedImage() {
|
||||
if (init_ == false && Init() == false) {
|
||||
return NULL;
|
||||
}
|
||||
return img_;
|
||||
}
|
||||
int ColumnCnt() {
|
||||
if (init_ == false && Init() == false) {
|
||||
return 0;
|
||||
}
|
||||
return columns_->n;
|
||||
}
|
||||
Box *Column(int col) {
|
||||
if (init_ == false && Init() == false) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return columns_->boxa->box[col];
|
||||
}
|
||||
int LineCnt() {
|
||||
if (init_ == false && Init() == false) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return line_cnt_;
|
||||
}
|
||||
Pixa *ConComps() {
|
||||
if (init_ == false && Init() == false) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return con_comps_;
|
||||
}
|
||||
Pixaa *Columns() {
|
||||
if (init_ == false && Init() == false) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return columns_;
|
||||
}
|
||||
inline double AlefHgtEst() { return est_alef_hgt_; }
|
||||
inline double DotHgtEst() { return est_dot_hgt_; }
|
||||
Pix *Line(int line, Box **line_box);
|
||||
|
||||
private:
|
||||
static const float kMinValidLineHgtRatio;
|
||||
static const int kLineSepMorphMinHgt;
|
||||
static const int kHgtBins;
|
||||
static const int kMaxConnCompHgt;
|
||||
static const int kMaxConnCompWid;
|
||||
static const int kMaxHorzAspectRatio;
|
||||
static const int kMaxVertAspectRatio;
|
||||
static const int kMinWid;
|
||||
static const int kMinHgt;
|
||||
static const double kMaxValidLineRatio;
|
||||
|
||||
// Cube Reco context
|
||||
CubeRecoContext *cntxt_;
|
||||
// Original image
|
||||
Pix *orig_img_;
|
||||
// Post processed image
|
||||
Pix *img_;
|
||||
// Init flag
|
||||
bool init_;
|
||||
// Output Line and column info
|
||||
int line_cnt_;
|
||||
Pixaa *columns_;
|
||||
Pixa *con_comps_;
|
||||
Pixa *lines_pixa_;
|
||||
// Estimates for sizes of ALEF and DOT needed for Arabic analysis
|
||||
double est_alef_hgt_;
|
||||
double est_dot_hgt_;
|
||||
|
||||
// Init the page analysis
|
||||
bool Init();
|
||||
// Performs line segmentation
|
||||
bool LineSegment();
|
||||
// Cleanup function
|
||||
Pix *CleanUp(Pix *pix);
|
||||
// compute validity ratio for a line
|
||||
double ValidityRatio(Pix *line_mask_pix, Box *line_box);
|
||||
// validate line
|
||||
bool ValidLine(Pix *line_mask_pix, Box *line_box);
|
||||
// split a line continuously until valid or fail
|
||||
Pixa *SplitLine(Pix *line_mask_pix, Box *line_box);
|
||||
// do a desperate attempt at cracking lines
|
||||
Pixa *CrackLine(Pix *line_mask_pix, Box *line_box);
|
||||
Pixa *CrackLine(Pix *line_mask_pix, Box *line_box, int line_cnt);
|
||||
// Checks of a line is too small
|
||||
bool SmallLine(Box *line_box);
|
||||
// Compute the connected components in a line
|
||||
Boxa * ComputeLineConComps(Pix *line_mask_pix, Box *line_box,
|
||||
Pixa **con_comps_pixa);
|
||||
// create a union of two arbitrary pix
|
||||
Pix *PixUnion(Pix *dest_pix, Box *dest_box, Pix *src_pix, Box *src_box);
|
||||
// create a union of a pixa subset
|
||||
Pix *Pixa2Pix(Pixa *pixa, Box **dest_box, int start_pix, int pix_cnt);
|
||||
// create a union of a pixa
|
||||
Pix *Pixa2Pix(Pixa *pixa, Box **dest_box);
|
||||
// merges a number of lines into one line given a bounding box and a mask
|
||||
bool MergeLine(Pix *line_mask_pix, Box *line_box,
|
||||
Pixa *lines, Boxaa *lines_con_comps);
|
||||
// Creates new set of lines from the computed columns
|
||||
bool AddLines(Pixa *lines);
|
||||
// Estimate the parameters of the font(s) used in the page
|
||||
bool EstimateFontParams();
|
||||
// perform a vertical Closing with the specified threshold
|
||||
// returning the resulting conn comps as a pixa
|
||||
Pixa *VerticalClosing(Pix *pix, int thresold, Boxa **boxa);
|
||||
// Index the specific pixa using RTL reading order
|
||||
int *IndexRTL(Pixa *pixa);
|
||||
// Implements a rudimentary page & line segmenter
|
||||
bool FindLines();
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CUBE_LINE_SEGMENTER_H
|
@ -1,257 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_object.cpp
|
||||
* Description: Implementation of the Cube Object Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <math.h>
|
||||
#include "cube_object.h"
|
||||
#include "cube_utils.h"
|
||||
#include "word_list_lang_model.h"
|
||||
|
||||
namespace tesseract {
|
||||
CubeObject::CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp) {
|
||||
Init();
|
||||
char_samp_ = char_samp;
|
||||
cntxt_ = cntxt;
|
||||
}
|
||||
|
||||
CubeObject::CubeObject(CubeRecoContext *cntxt, Pix *pix,
|
||||
int left, int top, int wid, int hgt) {
|
||||
Init();
|
||||
char_samp_ = CubeUtils::CharSampleFromPix(pix, left, top, wid, hgt);
|
||||
own_char_samp_ = true;
|
||||
cntxt_ = cntxt;
|
||||
}
|
||||
|
||||
// Data member initialization function
|
||||
void CubeObject::Init() {
|
||||
char_samp_ = NULL;
|
||||
own_char_samp_ = false;
|
||||
alt_list_ = NULL;
|
||||
srch_obj_ = NULL;
|
||||
deslanted_alt_list_ = NULL;
|
||||
deslanted_srch_obj_ = NULL;
|
||||
deslanted_ = false;
|
||||
deslanted_char_samp_ = NULL;
|
||||
beam_obj_ = NULL;
|
||||
deslanted_beam_obj_ = NULL;
|
||||
cntxt_ = NULL;
|
||||
}
|
||||
|
||||
// Cleanup function
|
||||
void CubeObject::Cleanup() {
|
||||
delete alt_list_;
|
||||
alt_list_ = NULL;
|
||||
|
||||
delete deslanted_alt_list_;
|
||||
deslanted_alt_list_ = NULL;
|
||||
}
|
||||
|
||||
CubeObject::~CubeObject() {
|
||||
if (own_char_samp_ == true) {
|
||||
delete char_samp_;
|
||||
char_samp_ = NULL;
|
||||
}
|
||||
|
||||
delete srch_obj_;
|
||||
srch_obj_ = NULL;
|
||||
|
||||
delete deslanted_srch_obj_;
|
||||
deslanted_srch_obj_ = NULL;
|
||||
|
||||
delete beam_obj_;
|
||||
beam_obj_ = NULL;
|
||||
|
||||
delete deslanted_beam_obj_;
|
||||
deslanted_beam_obj_ = NULL;
|
||||
|
||||
delete deslanted_char_samp_;
|
||||
deslanted_char_samp_ = NULL;
|
||||
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
/**
|
||||
* Actually do the recognition using the specified language mode. If none
|
||||
* is specified, the default language model in the CubeRecoContext is used.
|
||||
* @return the sorted list of alternate answers
|
||||
* @param word_mode determines whether recognition is done as a word or a phrase
|
||||
*/
|
||||
WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
|
||||
if (char_samp_ == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// clear alt lists
|
||||
Cleanup();
|
||||
|
||||
// no specified language model, use the one in the reco context
|
||||
if (lang_mod == NULL) {
|
||||
lang_mod = cntxt_->LangMod();
|
||||
}
|
||||
|
||||
// normalize if necessary
|
||||
if (cntxt_->SizeNormalization()) {
|
||||
Normalize();
|
||||
}
|
||||
|
||||
// assume not de-slanted by default
|
||||
deslanted_ = false;
|
||||
|
||||
// create a beam search object
|
||||
if (beam_obj_ == NULL) {
|
||||
beam_obj_ = new BeamSearch(cntxt_, word_mode);
|
||||
}
|
||||
|
||||
// create a cube search object
|
||||
if (srch_obj_ == NULL) {
|
||||
srch_obj_ = new CubeSearchObject(cntxt_, char_samp_);
|
||||
}
|
||||
|
||||
// run a beam search against the tesslang model
|
||||
alt_list_ = beam_obj_->Search(srch_obj_, lang_mod);
|
||||
|
||||
// deslant (if supported by language) and re-reco if probability is low enough
|
||||
if (cntxt_->HasItalics() == true &&
|
||||
(alt_list_ == NULL || alt_list_->AltCount() < 1 ||
|
||||
alt_list_->AltCost(0) > CubeUtils::Prob2Cost(kMinProbSkipDeslanted))) {
|
||||
|
||||
if (deslanted_beam_obj_ == NULL) {
|
||||
deslanted_beam_obj_ = new BeamSearch(cntxt_);
|
||||
}
|
||||
|
||||
if (deslanted_srch_obj_ == NULL) {
|
||||
deslanted_char_samp_ = char_samp_->Clone();
|
||||
if (deslanted_char_samp_ == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not "
|
||||
"construct deslanted CharSamp\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (deslanted_char_samp_->Deslant() == false) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
deslanted_srch_obj_ = new CubeSearchObject(cntxt_, deslanted_char_samp_);
|
||||
}
|
||||
|
||||
// run a beam search against the tesslang model
|
||||
deslanted_alt_list_ = deslanted_beam_obj_->Search(deslanted_srch_obj_,
|
||||
lang_mod);
|
||||
// should we use de-slanted altlist?
|
||||
if (deslanted_alt_list_ != NULL && deslanted_alt_list_->AltCount() > 0) {
|
||||
if (alt_list_ == NULL || alt_list_->AltCount() < 1 ||
|
||||
deslanted_alt_list_->AltCost(0) < alt_list_->AltCost(0)) {
|
||||
deslanted_ = true;
|
||||
return deslanted_alt_list_;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return alt_list_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recognize the member char sample as a word
|
||||
*/
|
||||
WordAltList *CubeObject::RecognizeWord(LangModel *lang_mod) {
|
||||
return Recognize(lang_mod, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recognize the member char sample as a phrase
|
||||
*/
|
||||
WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) {
|
||||
return Recognize(lang_mod, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the cost of a specific string. This is done by performing
|
||||
* recognition of a language model that allows only the specified word
|
||||
*/
|
||||
int CubeObject::WordCost(const char *str) {
|
||||
WordListLangModel *lang_mod = new WordListLangModel(cntxt_);
|
||||
|
||||
if (lang_mod->AddString(str) == false) {
|
||||
delete lang_mod;
|
||||
return WORST_COST;
|
||||
}
|
||||
|
||||
// run a beam search against the single string wordlist model
|
||||
WordAltList *alt_list = RecognizeWord(lang_mod);
|
||||
delete lang_mod;
|
||||
|
||||
int cost = WORST_COST;
|
||||
if (alt_list != NULL) {
|
||||
if (alt_list->AltCount() > 0) {
|
||||
cost = alt_list->AltCost(0);
|
||||
}
|
||||
}
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
// Recognizes a single character and returns the list of results.
|
||||
CharAltList *CubeObject::RecognizeChar() {
|
||||
if (char_samp_ == NULL) return NULL;
|
||||
CharAltList* alt_list = NULL;
|
||||
CharClassifier *char_classifier = cntxt_->Classifier();
|
||||
ASSERT_HOST(char_classifier != NULL);
|
||||
alt_list = char_classifier->Classify(char_samp_);
|
||||
return alt_list;
|
||||
}
|
||||
|
||||
// Normalize the input word bitmap to have a minimum aspect ratio
|
||||
bool CubeObject::Normalize() {
|
||||
// create a cube search object
|
||||
CubeSearchObject *srch_obj = new CubeSearchObject(cntxt_, char_samp_);
|
||||
// Perform over-segmentation
|
||||
int seg_cnt = srch_obj->SegPtCnt();
|
||||
// Only perform normalization if segment count is large enough
|
||||
if (seg_cnt < kMinNormalizationSegmentCnt) {
|
||||
delete srch_obj;
|
||||
return true;
|
||||
}
|
||||
// compute the mean AR of the segments
|
||||
double ar_mean = 0.0;
|
||||
for (int seg_idx = 0; seg_idx <= seg_cnt; seg_idx++) {
|
||||
CharSamp *seg_samp = srch_obj->CharSample(seg_idx - 1, seg_idx);
|
||||
if (seg_samp != NULL && seg_samp->Width() > 0) {
|
||||
ar_mean += (1.0 * seg_samp->Height() / seg_samp->Width());
|
||||
}
|
||||
}
|
||||
ar_mean /= (seg_cnt + 1);
|
||||
// perform normalization if segment AR is too high
|
||||
if (ar_mean > kMinNormalizationAspectRatio) {
|
||||
// scale down the image in the y-direction to attain AR
|
||||
CharSamp *new_samp = char_samp_->Scale(char_samp_->Width(),
|
||||
2.0 * char_samp_->Height() / ar_mean,
|
||||
false);
|
||||
if (new_samp != NULL) {
|
||||
// free existing char samp if owned
|
||||
if (own_char_samp_) {
|
||||
delete char_samp_;
|
||||
}
|
||||
// update with new scaled charsamp and set ownership flag
|
||||
char_samp_ = new_samp;
|
||||
own_char_samp_ = true;
|
||||
}
|
||||
}
|
||||
delete srch_obj;
|
||||
return true;
|
||||
}
|
||||
}
|
@ -1,171 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_object.h
|
||||
* Description: Declaration of the Cube Object Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CubeObject class is the main class used to perform recognition of
|
||||
// a specific char_samp as a single word.
|
||||
// To recognize a word, a CubeObject is constructed for this word.
|
||||
// A Call to RecognizeWord is then issued specifying the language model that
|
||||
// will be used during recognition. If none is specified, the default language
|
||||
// model in the CubeRecoContext is used. The CubeRecoContext is passed at
|
||||
// construction time
|
||||
//
|
||||
// The typical usage pattern for Cube is shown below:
|
||||
//
|
||||
// // Create and initialize Tesseract object and get its
|
||||
// // CubeRecoContext object (note that Tesseract object owns it,
|
||||
// // so it will be freed when the Tesseract object is freed).
|
||||
// tesseract::Tesseract *tess_obj = new tesseract::Tesseract();
|
||||
// tess_obj->init_tesseract(data_path, lang, tesseract::OEM_CUBE_ONLY);
|
||||
// CubeRecoContext *cntxt = tess_obj->GetCubeRecoContext();
|
||||
// CHECK(cntxt != NULL) << "Unable to create a Cube reco context";
|
||||
// .
|
||||
// .
|
||||
// .
|
||||
// // Do this to recognize a word in pix whose co-ordinates are
|
||||
// // (left,top,width,height)
|
||||
// tesseract::CubeObject *cube_obj;
|
||||
// cube_obj = new tesseract::CubeObject(cntxt, pix,
|
||||
// left, top, width, height);
|
||||
//
|
||||
// // Get back Cube's list of answers
|
||||
// tesseract::WordAltList *alt_list = cube_obj->RecognizeWord();
|
||||
// CHECK(alt_list != NULL && alt_list->AltCount() > 0);
|
||||
//
|
||||
// // Get the string and cost of every alternate
|
||||
// for (int alt = 0; alt < alt_list->AltCount(); alt++) {
|
||||
// // Return the result as a UTF-32 string
|
||||
// string_32 res_str32 = alt_list->Alt(alt);
|
||||
// // Convert to UTF8 if need-be
|
||||
// string res_str;
|
||||
// CubeUtils::UTF32ToUTF8(res_str32.c_str(), &res_str);
|
||||
// // Get the string cost. This should get bigger as you go deeper
|
||||
// // in the list
|
||||
// int cost = alt_list->AltCost(alt);
|
||||
// }
|
||||
//
|
||||
// // Call this once you are done recognizing this word
|
||||
// delete cube_obj;
|
||||
//
|
||||
// // Call this once you are done recognizing all words with
|
||||
// // for the current language
|
||||
// delete tess_obj;
|
||||
//
|
||||
// Note that if the language supports "Italics" (see the CubeRecoContext), the
|
||||
// RecognizeWord function attempts to de-slant the word.
|
||||
|
||||
#ifndef CUBE_OBJECT_H
|
||||
#define CUBE_OBJECT_H
|
||||
|
||||
#include "char_samp.h"
|
||||
#include "word_altlist.h"
|
||||
#include "beam_search.h"
|
||||
#include "cube_search_object.h"
|
||||
#include "tess_lang_model.h"
|
||||
#include "cube_reco_context.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// minimum aspect ratio needed to normalize a char_samp before recognition
|
||||
static const float kMinNormalizationAspectRatio = 3.5;
|
||||
// minimum probability a top alt choice must meet before having
|
||||
// deslanted processing applied to it
|
||||
static const float kMinProbSkipDeslanted = 0.25;
|
||||
|
||||
class CubeObject {
|
||||
public:
|
||||
// Different flavors of constructor. They just differ in the way the
|
||||
// word image is specified
|
||||
CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp);
|
||||
CubeObject(CubeRecoContext *cntxt, Pix *pix,
|
||||
int left, int top, int wid, int hgt);
|
||||
~CubeObject();
|
||||
|
||||
// Perform the word recognition using the specified language mode. If none
|
||||
// is specified, the default language model in the CubeRecoContext is used.
|
||||
// Returns the sorted list of alternate word answers
|
||||
WordAltList *RecognizeWord(LangModel *lang_mod = NULL);
|
||||
// Same as RecognizeWord but recognizes as a phrase
|
||||
WordAltList *RecognizePhrase(LangModel *lang_mod = NULL);
|
||||
// Computes the cost of a specific string. This is done by performing
|
||||
// recognition of a language model that allows only the specified word.
|
||||
// The alternate list(s) will be permanently modified.
|
||||
int WordCost(const char *str);
|
||||
// Recognizes a single character and returns the list of results.
|
||||
CharAltList *RecognizeChar();
|
||||
|
||||
// Returns the BeamSearch object that resulted from the last call to
|
||||
// RecognizeWord
|
||||
inline BeamSearch *BeamObj() const {
|
||||
return (deslanted_ == true ? deslanted_beam_obj_ : beam_obj_);
|
||||
}
|
||||
// Returns the WordAltList object that resulted from the last call to
|
||||
// RecognizeWord
|
||||
inline WordAltList *AlternateList() const {
|
||||
return (deslanted_ == true ? deslanted_alt_list_ : alt_list_);
|
||||
}
|
||||
// Returns the CubeSearchObject object that resulted from the last call to
|
||||
// RecognizeWord
|
||||
inline CubeSearchObject *SrchObj() const {
|
||||
return (deslanted_ == true ? deslanted_srch_obj_ : srch_obj_);
|
||||
}
|
||||
// Returns the CharSamp object that resulted from the last call to
|
||||
// RecognizeWord. Note that this object is not necessarily identical to the
|
||||
// one passed at construction time as normalization might have occurred
|
||||
inline CharSamp *CharSample() const {
|
||||
return (deslanted_ == true ? deslanted_char_samp_ : char_samp_);
|
||||
}
|
||||
|
||||
// Set the ownership of the CharSamp
|
||||
inline void SetCharSampOwnership(bool own_char_samp) {
|
||||
own_char_samp_ = own_char_samp;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Normalize the CharSamp if its aspect ratio exceeds the below constant.
|
||||
bool Normalize();
|
||||
|
||||
private:
|
||||
// minimum segment count needed to normalize a char_samp before recognition
|
||||
static const int kMinNormalizationSegmentCnt = 4;
|
||||
|
||||
// Data member initialization function
|
||||
void Init();
|
||||
// Free alternate lists.
|
||||
void Cleanup();
|
||||
// Perform the actual recognition using the specified language mode. If none
|
||||
// is specified, the default language model in the CubeRecoContext is used.
|
||||
// Returns the sorted list of alternate answers. Called by both
|
||||
// RecognizerWord (word_mode is true) or RecognizePhrase (word mode is false)
|
||||
WordAltList *Recognize(LangModel *lang_mod, bool word_mode);
|
||||
|
||||
CubeRecoContext *cntxt_;
|
||||
BeamSearch *beam_obj_;
|
||||
BeamSearch *deslanted_beam_obj_;
|
||||
bool own_char_samp_;
|
||||
bool deslanted_;
|
||||
CharSamp *char_samp_;
|
||||
CharSamp *deslanted_char_samp_;
|
||||
CubeSearchObject *srch_obj_;
|
||||
CubeSearchObject *deslanted_srch_obj_;
|
||||
WordAltList *alt_list_;
|
||||
WordAltList *deslanted_alt_list_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CUBE_OBJECT_H
|
@ -1,421 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_search_object.cpp
|
||||
* Description: Implementation of the Cube Search Object Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "cube_search_object.h"
|
||||
#include "cube_utils.h"
|
||||
#include "ndminx.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
const bool CubeSearchObject::kUseCroppedChars = true;
|
||||
|
||||
CubeSearchObject::CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp)
|
||||
: SearchObject(cntxt) {
|
||||
init_ = false;
|
||||
reco_cache_ = NULL;
|
||||
samp_cache_ = NULL;
|
||||
segments_ = NULL;
|
||||
segment_cnt_ = 0;
|
||||
samp_ = samp;
|
||||
left_ = 0;
|
||||
itop_ = 0;
|
||||
space_cost_ = NULL;
|
||||
no_space_cost_ = NULL;
|
||||
wid_ = samp_->Width();
|
||||
hgt_ = samp_->Height();
|
||||
max_seg_per_char_ = cntxt_->Params()->MaxSegPerChar();
|
||||
rtl_ = (cntxt_->ReadingOrder() == CubeRecoContext::R2L);
|
||||
min_spc_gap_ =
|
||||
static_cast<int>(hgt_ * cntxt_->Params()->MinSpaceHeightRatio());
|
||||
max_spc_gap_ =
|
||||
static_cast<int>(hgt_ * cntxt_->Params()->MaxSpaceHeightRatio());
|
||||
}
|
||||
|
||||
CubeSearchObject::~CubeSearchObject() {
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
void CubeSearchObject::Cleanup() {
|
||||
// delete Recognition Cache
|
||||
if (reco_cache_) {
|
||||
for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++) {
|
||||
if (reco_cache_[strt_seg]) {
|
||||
for (int end_seg = 0; end_seg < segment_cnt_; end_seg++) {
|
||||
if (reco_cache_[strt_seg][end_seg]) {
|
||||
delete reco_cache_[strt_seg][end_seg];
|
||||
}
|
||||
}
|
||||
delete []reco_cache_[strt_seg];
|
||||
}
|
||||
}
|
||||
delete []reco_cache_;
|
||||
reco_cache_ = NULL;
|
||||
}
|
||||
|
||||
// delete CharSamp Cache
|
||||
if (samp_cache_) {
|
||||
for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++) {
|
||||
if (samp_cache_[strt_seg]) {
|
||||
for (int end_seg = 0; end_seg < segment_cnt_; end_seg++) {
|
||||
if (samp_cache_[strt_seg][end_seg]) {
|
||||
delete samp_cache_[strt_seg][end_seg];
|
||||
}
|
||||
}
|
||||
delete []samp_cache_[strt_seg];
|
||||
}
|
||||
}
|
||||
delete []samp_cache_;
|
||||
samp_cache_ = NULL;
|
||||
}
|
||||
|
||||
// delete segment list
|
||||
if (segments_) {
|
||||
for (int seg = 0; seg < segment_cnt_; seg++) {
|
||||
if (segments_[seg]) {
|
||||
delete segments_[seg];
|
||||
}
|
||||
}
|
||||
delete []segments_;
|
||||
segments_ = NULL;
|
||||
}
|
||||
|
||||
if (space_cost_) {
|
||||
delete []space_cost_;
|
||||
space_cost_ = NULL;
|
||||
}
|
||||
|
||||
if (no_space_cost_) {
|
||||
delete []no_space_cost_;
|
||||
no_space_cost_ = NULL;
|
||||
}
|
||||
|
||||
segment_cnt_ = 0;
|
||||
init_ = false;
|
||||
}
|
||||
|
||||
// # of segmentation points. One less than the count of segments
|
||||
int CubeSearchObject::SegPtCnt() {
|
||||
if (!init_ && !Init())
|
||||
return -1;
|
||||
return segment_cnt_ - 1;
|
||||
}
|
||||
|
||||
// init and allocate variables, perform segmentation
|
||||
bool CubeSearchObject::Init() {
|
||||
if (init_)
|
||||
return true;
|
||||
if (!Segment()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// init cache
|
||||
reco_cache_ = new CharAltList **[segment_cnt_];
|
||||
|
||||
samp_cache_ = new CharSamp **[segment_cnt_];
|
||||
|
||||
for (int seg = 0; seg < segment_cnt_; seg++) {
|
||||
reco_cache_[seg] = new CharAltList *[segment_cnt_];
|
||||
memset(reco_cache_[seg], 0, segment_cnt_ * sizeof(*reco_cache_[seg]));
|
||||
|
||||
samp_cache_[seg] = new CharSamp *[segment_cnt_];
|
||||
memset(samp_cache_[seg], 0, segment_cnt_ * sizeof(*samp_cache_[seg]));
|
||||
}
|
||||
|
||||
init_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// returns a char sample corresponding to the bitmap between 2 seg pts
|
||||
CharSamp *CubeSearchObject::CharSample(int start_pt, int end_pt) {
|
||||
// init if necessary
|
||||
if (!init_ && !Init())
|
||||
return NULL;
|
||||
// validate segment range
|
||||
if (!IsValidSegmentRange(start_pt, end_pt))
|
||||
return NULL;
|
||||
|
||||
// look for the samp in the cache
|
||||
if (samp_cache_ && samp_cache_[start_pt + 1] &&
|
||||
samp_cache_[start_pt + 1][end_pt]) {
|
||||
return samp_cache_[start_pt + 1][end_pt];
|
||||
}
|
||||
// create a char samp object from the specified range of segments
|
||||
bool left_most;
|
||||
bool right_most;
|
||||
CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1,
|
||||
end_pt - start_pt, NULL,
|
||||
&left_most, &right_most, hgt_);
|
||||
if (!samp)
|
||||
return NULL;
|
||||
|
||||
if (kUseCroppedChars) {
|
||||
CharSamp *cropped_samp = samp->Crop();
|
||||
// we no longer need the orig sample
|
||||
delete samp;
|
||||
if (!cropped_samp)
|
||||
return NULL;
|
||||
samp = cropped_samp;
|
||||
}
|
||||
|
||||
// get the dimensions of the new cropped sample
|
||||
int char_top = samp->Top();
|
||||
int char_wid = samp->Width();
|
||||
int char_hgt = samp->Height();
|
||||
|
||||
// for cursive languages, these features correspond to whether
|
||||
// the charsamp is at the beginning or end of conncomp
|
||||
if (cntxt_->Cursive() == true) {
|
||||
// first and last char flags depend on reading order
|
||||
bool first_char = rtl_ ? right_most : left_most;
|
||||
bool last_char = rtl_ ? left_most : right_most;
|
||||
|
||||
samp->SetFirstChar(first_char ? 255 : 0);
|
||||
samp->SetLastChar(last_char ? 255 : 0);
|
||||
} else {
|
||||
// for non cursive languages, these features correspond
|
||||
// to whether the charsamp is at the beginning or end of the word
|
||||
samp->SetFirstChar((start_pt == -1) ? 255 : 0);
|
||||
samp->SetLastChar((end_pt == (segment_cnt_ - 1)) ? 255 : 0);
|
||||
}
|
||||
samp->SetNormTop(255 * char_top / hgt_);
|
||||
samp->SetNormBottom(255 * (char_top + char_hgt) / hgt_);
|
||||
samp->SetNormAspectRatio(255 * char_wid / (char_wid + char_hgt));
|
||||
|
||||
// add to cache & return
|
||||
samp_cache_[start_pt + 1][end_pt] = samp;
|
||||
return samp;
|
||||
}
|
||||
|
||||
Box *CubeSearchObject::CharBox(int start_pt, int end_pt) {
|
||||
if (!init_ && !Init())
|
||||
return NULL;
|
||||
if (!IsValidSegmentRange(start_pt, end_pt)) {
|
||||
fprintf(stderr, "Cube ERROR (CubeSearchObject::CharBox): invalid "
|
||||
"segment range (%d, %d)\n", start_pt, end_pt);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// create a char samp object from the specified range of segments,
|
||||
// extract its dimensions into a leptonica box, and delete it
|
||||
bool left_most;
|
||||
bool right_most;
|
||||
CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1,
|
||||
end_pt - start_pt, NULL,
|
||||
&left_most, &right_most, hgt_);
|
||||
if (!samp)
|
||||
return NULL;
|
||||
if (kUseCroppedChars) {
|
||||
CharSamp *cropped_samp = samp->Crop();
|
||||
delete samp;
|
||||
if (!cropped_samp) {
|
||||
return NULL;
|
||||
}
|
||||
samp = cropped_samp;
|
||||
}
|
||||
Box *box = boxCreate(samp->Left(), samp->Top(),
|
||||
samp->Width(), samp->Height());
|
||||
delete samp;
|
||||
return box;
|
||||
}
|
||||
|
||||
// call from Beam Search to return the alt list corresponding to
|
||||
// recognizing the bitmap between two segmentation pts
|
||||
CharAltList * CubeSearchObject::RecognizeSegment(int start_pt, int end_pt) {
|
||||
// init if necessary
|
||||
if (!init_ && !Init()) {
|
||||
fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could "
|
||||
"not initialize CubeSearchObject\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// validate segment range
|
||||
if (!IsValidSegmentRange(start_pt, end_pt)) {
|
||||
fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): invalid "
|
||||
"segment range (%d, %d)\n", start_pt, end_pt);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// look for the recognition results in cache in the cache
|
||||
if (reco_cache_ && reco_cache_[start_pt + 1] &&
|
||||
reco_cache_[start_pt + 1][end_pt]) {
|
||||
return reco_cache_[start_pt + 1][end_pt];
|
||||
}
|
||||
|
||||
// create the char sample corresponding to the blob
|
||||
CharSamp *samp = CharSample(start_pt, end_pt);
|
||||
if (!samp) {
|
||||
fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could "
|
||||
"not construct CharSamp\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// recognize the char sample
|
||||
CharClassifier *char_classifier = cntxt_->Classifier();
|
||||
if (char_classifier) {
|
||||
reco_cache_[start_pt + 1][end_pt] = char_classifier->Classify(samp);
|
||||
} else {
|
||||
// no classifer: all characters are equally probable; add a penalty
|
||||
// that favors 2-segment characters and aspect ratios (w/h) > 1
|
||||
fprintf(stderr, "Cube WARNING (CubeSearchObject::RecognizeSegment): cube "
|
||||
"context has no character classifier!! Inventing a probability "
|
||||
"distribution.\n");
|
||||
int class_cnt = cntxt_->CharacterSet()->ClassCount();
|
||||
CharAltList *alt_list = new CharAltList(cntxt_->CharacterSet(), class_cnt);
|
||||
int seg_cnt = end_pt - start_pt;
|
||||
double prob_val = (1.0 / class_cnt) *
|
||||
exp(-fabs(seg_cnt - 2.0)) *
|
||||
exp(-samp->Width() / static_cast<double>(samp->Height()));
|
||||
|
||||
for (int class_idx = 0; class_idx < class_cnt; class_idx++) {
|
||||
alt_list->Insert(class_idx, CubeUtils::Prob2Cost(prob_val));
|
||||
}
|
||||
reco_cache_[start_pt + 1][end_pt] = alt_list;
|
||||
}
|
||||
|
||||
return reco_cache_[start_pt + 1][end_pt];
|
||||
}
|
||||
|
||||
// Perform segmentation of the bitmap by detecting connected components,
|
||||
// segmenting each connected component using windowed vertical pixel density
|
||||
// histogram and sorting the resulting segments in reading order
|
||||
bool CubeSearchObject::Segment() {
|
||||
if (!samp_)
|
||||
return false;
|
||||
segment_cnt_ = 0;
|
||||
segments_ = samp_->Segment(&segment_cnt_, rtl_,
|
||||
cntxt_->Params()->HistWindWid(),
|
||||
cntxt_->Params()->MinConCompSize());
|
||||
if (!segments_ || segment_cnt_ <= 0) {
|
||||
return false;
|
||||
}
|
||||
if (segment_cnt_ >= kMaxSegmentCnt) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// computes the space and no space costs at gaps between segments
|
||||
bool CubeSearchObject::ComputeSpaceCosts() {
|
||||
// init if necessary
|
||||
if (!init_ && !Init())
|
||||
return false;
|
||||
|
||||
// Already computed
|
||||
if (space_cost_)
|
||||
return true;
|
||||
|
||||
// No segmentation points
|
||||
if (segment_cnt_ < 2)
|
||||
return false;
|
||||
|
||||
// Compute the maximum x to the left of and minimum x to the right of each
|
||||
// segmentation point
|
||||
int *max_left_x = new int[segment_cnt_ - 1];
|
||||
int *min_right_x = new int[segment_cnt_ - 1];
|
||||
if (rtl_) {
|
||||
min_right_x[0] = segments_[0]->Left();
|
||||
max_left_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Right();
|
||||
for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) {
|
||||
min_right_x[pt_idx] =
|
||||
MIN(min_right_x[pt_idx - 1], segments_[pt_idx]->Left());
|
||||
max_left_x[segment_cnt_ - pt_idx - 2] =
|
||||
MAX(max_left_x[segment_cnt_ - pt_idx - 1],
|
||||
segments_[segment_cnt_ - pt_idx - 1]->Right());
|
||||
}
|
||||
} else {
|
||||
min_right_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Left();
|
||||
max_left_x[0] = segments_[0]->Right();
|
||||
for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) {
|
||||
min_right_x[segment_cnt_ - pt_idx - 2] =
|
||||
MIN(min_right_x[segment_cnt_ - pt_idx - 1],
|
||||
segments_[segment_cnt_ - pt_idx - 1]->Left());
|
||||
max_left_x[pt_idx] =
|
||||
MAX(max_left_x[pt_idx - 1], segments_[pt_idx]->Right());
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate memory for space and no space costs
|
||||
// trivial cases
|
||||
space_cost_ = new int[segment_cnt_ - 1];
|
||||
no_space_cost_ = new int[segment_cnt_ - 1];
|
||||
|
||||
// go through all segmentation points determining the horizontal gap between
|
||||
// the images on both sides of each break points. Use the gap to estimate
|
||||
// the probability of a space. The probability is modeled a linear function
|
||||
// of the gap width
|
||||
for (int pt_idx = 0; pt_idx < (segment_cnt_ - 1); pt_idx++) {
|
||||
// determine the gap at the segmentation point
|
||||
int gap = min_right_x[pt_idx] - max_left_x[pt_idx];
|
||||
float prob = 0.0;
|
||||
|
||||
// gap is too small => no space
|
||||
if (gap < min_spc_gap_ || max_spc_gap_ == min_spc_gap_) {
|
||||
prob = 0.0;
|
||||
} else if (gap > max_spc_gap_) {
|
||||
// gap is too big => definite space
|
||||
prob = 1.0;
|
||||
} else {
|
||||
// gap is somewhere in between, compute probability
|
||||
prob = (gap - min_spc_gap_) /
|
||||
static_cast<double>(max_spc_gap_ - min_spc_gap_);
|
||||
}
|
||||
|
||||
// compute cost of space and non-space
|
||||
space_cost_[pt_idx] = CubeUtils::Prob2Cost(prob) +
|
||||
CubeUtils::Prob2Cost(0.1);
|
||||
no_space_cost_[pt_idx] = CubeUtils::Prob2Cost(1.0 - prob);
|
||||
}
|
||||
|
||||
delete []min_right_x;
|
||||
delete []max_left_x;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns the cost of having a space before the specified segmentation point
|
||||
int CubeSearchObject::SpaceCost(int pt_idx) {
|
||||
if (!space_cost_ && !ComputeSpaceCosts()) {
|
||||
// Failed to compute costs return a zero prob
|
||||
return CubeUtils::Prob2Cost(0.0);
|
||||
}
|
||||
return space_cost_[pt_idx];
|
||||
}
|
||||
|
||||
// Returns the cost of not having a space before the specified
|
||||
// segmentation point
|
||||
int CubeSearchObject::NoSpaceCost(int pt_idx) {
|
||||
// If failed to compute costs, return a 1.0 prob
|
||||
if (!space_cost_ && !ComputeSpaceCosts())
|
||||
return CubeUtils::Prob2Cost(0.0);
|
||||
return no_space_cost_[pt_idx];
|
||||
}
|
||||
|
||||
// Returns the cost of not having any spaces within the specified range
|
||||
// of segmentation points
|
||||
int CubeSearchObject::NoSpaceCost(int st_pt, int end_pt) {
|
||||
// If fail to compute costs, return a 1.0 prob
|
||||
if (!space_cost_ && !ComputeSpaceCosts())
|
||||
return CubeUtils::Prob2Cost(1.0);
|
||||
int no_spc_cost = 0;
|
||||
for (int pt_idx = st_pt + 1; pt_idx < end_pt; pt_idx++)
|
||||
no_spc_cost += NoSpaceCost(pt_idx);
|
||||
return no_spc_cost;
|
||||
}
|
||||
}
|
@ -1,122 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_search_object.h
|
||||
* Description: Declaration of the Cube Search Object Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CubeSearchObject class represents a char_samp (a word bitmap) that is
|
||||
// being searched for characters (or recognizeable entities).
|
||||
// The Class detects the connected components and peforms an oversegmentation
|
||||
// on each ConComp. The result of which is a list of segments that are ordered
|
||||
// in reading order.
|
||||
// The class provided methods that inquire about the number of segments, the
|
||||
// CharSamp corresponding to any segment range and the recognition results
|
||||
// of any segment range
|
||||
// An object of Class CubeSearchObject is used by the BeamSearch algorithm
|
||||
// to recognize a CharSamp into a list of word alternates
|
||||
|
||||
#ifndef CUBE_SEARCH_OBJECT_H
|
||||
#define CUBE_SEARCH_OBJECT_H
|
||||
|
||||
#include "search_object.h"
|
||||
#include "char_samp.h"
|
||||
#include "conv_net_classifier.h"
|
||||
#include "cube_reco_context.h"
|
||||
#include "allheaders.h"
|
||||
|
||||
namespace tesseract {
|
||||
class CubeSearchObject : public SearchObject {
|
||||
public:
|
||||
CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp);
|
||||
~CubeSearchObject();
|
||||
|
||||
// returns the Segmentation Point count of the CharSamp owned by the class
|
||||
int SegPtCnt();
|
||||
// Recognize the set of segments given by the specified range and return
|
||||
// a list of possible alternate answers
|
||||
CharAltList * RecognizeSegment(int start_pt, int end_pt);
|
||||
// Returns the CharSamp corresponding to the specified segment range
|
||||
CharSamp *CharSample(int start_pt, int end_pt);
|
||||
// Returns a leptonica box corresponding to the specified segment range
|
||||
Box *CharBox(int start_pt, int end_pt);
|
||||
// Returns the cost of having a space before the specified segmentation pt
|
||||
int SpaceCost(int seg_pt);
|
||||
// Returns the cost of not having a space before the specified
|
||||
// segmentation pt
|
||||
int NoSpaceCost(int seg_pt);
|
||||
// Returns the cost of not having any spaces within the specified range
|
||||
// of segmentation points
|
||||
int NoSpaceCost(int seg_pt, int end_pt);
|
||||
|
||||
private:
|
||||
// Maximum reasonable segment count
|
||||
static const int kMaxSegmentCnt = 128;
|
||||
// Use cropped samples
|
||||
static const bool kUseCroppedChars;
|
||||
|
||||
// reading order flag
|
||||
bool rtl_;
|
||||
// cached dimensions of char samp
|
||||
int left_;
|
||||
int itop_;
|
||||
int wid_;
|
||||
int hgt_;
|
||||
// minimum and maximum and possible inter-segment gaps for spaces
|
||||
int min_spc_gap_;
|
||||
int max_spc_gap_;
|
||||
// initialization flag
|
||||
bool init_;
|
||||
// maximum segments per character: Cached from tuning parameters object
|
||||
int max_seg_per_char_;
|
||||
// char sample to be processed
|
||||
CharSamp *samp_;
|
||||
// segment count
|
||||
int segment_cnt_;
|
||||
// segments of the processed char samp
|
||||
ConComp **segments_;
|
||||
// Cache data members:
|
||||
// There are two caches kept; a CharSamp cache and a CharAltList cache
|
||||
// Each is a 2-D array of CharSamp and CharAltList pointers respectively
|
||||
// hence the triple pointer.
|
||||
CharAltList ***reco_cache_;
|
||||
CharSamp ***samp_cache_;
|
||||
// Cached costs of space and no-space after every segment. Computed only
|
||||
// in phrase mode
|
||||
int *space_cost_;
|
||||
int *no_space_cost_;
|
||||
|
||||
// init and allocate variables, perform segmentation
|
||||
bool Init();
|
||||
// Cleanup
|
||||
void Cleanup();
|
||||
// Perform segmentation of the bitmap by detecting connected components,
|
||||
// segmenting each connected component using windowed vertical pixel density
|
||||
// histogram and sorting the resulting segments in reading order
|
||||
// Returns true on success
|
||||
bool Segment();
|
||||
// validate the segment ranges.
|
||||
inline bool IsValidSegmentRange(int start_pt, int end_pt) {
|
||||
return (end_pt > start_pt && start_pt >= -1 && start_pt < segment_cnt_ &&
|
||||
end_pt >= 0 && end_pt <= segment_cnt_ &&
|
||||
end_pt <= (start_pt + max_seg_per_char_));
|
||||
}
|
||||
// computes the space and no space costs at gaps between segments
|
||||
// return true on success
|
||||
bool ComputeSpaceCosts();
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CUBE_SEARCH_OBJECT_H
|
@ -1,213 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_tuning_params.cpp
|
||||
* Description: Implementation of the CubeTuningParameters Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "cube_tuning_params.h"
|
||||
#include "tuning_params.h"
|
||||
#include "cube_utils.h"
|
||||
|
||||
namespace tesseract {
|
||||
CubeTuningParams::CubeTuningParams() {
|
||||
reco_wgt_ = 1.0;
|
||||
size_wgt_ = 1.0;
|
||||
char_bigrams_wgt_ = 1.0;
|
||||
word_unigrams_wgt_ = 0.0;
|
||||
max_seg_per_char_ = 8;
|
||||
beam_width_ = 32;
|
||||
tp_classifier_ = NN;
|
||||
tp_feat_ = BMP;
|
||||
conv_grid_size_ = 32;
|
||||
hist_wind_wid_ = 0;
|
||||
max_word_aspect_ratio_ = 10.0;
|
||||
min_space_height_ratio_ = 0.2;
|
||||
max_space_height_ratio_ = 0.3;
|
||||
min_con_comp_size_ = 0;
|
||||
combiner_run_thresh_ = 1.0;
|
||||
combiner_classifier_thresh_ = 0.5;
|
||||
ood_wgt_ = 1.0;
|
||||
num_wgt_ = 1.0;
|
||||
|
||||
}
|
||||
|
||||
CubeTuningParams::~CubeTuningParams() {
|
||||
}
|
||||
|
||||
// Create an Object given the data file path and the language by loading
|
||||
// the approporiate file
|
||||
CubeTuningParams *CubeTuningParams::Create(const string &data_file_path,
|
||||
const string &lang) {
|
||||
CubeTuningParams *obj = new CubeTuningParams();
|
||||
|
||||
string tuning_params_file;
|
||||
tuning_params_file = data_file_path + lang;
|
||||
tuning_params_file += ".cube.params";
|
||||
|
||||
if (!obj->Load(tuning_params_file)) {
|
||||
fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to "
|
||||
"load tuning parameters from %s\n", tuning_params_file.c_str());
|
||||
delete obj;
|
||||
obj = NULL;
|
||||
}
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
||||
// Loads the params file
|
||||
bool CubeTuningParams::Load(string tuning_params_file) {
|
||||
// load the string into memory
|
||||
string param_str;
|
||||
|
||||
if (CubeUtils::ReadFileToString(tuning_params_file, ¶m_str) == false) {
|
||||
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unable to read "
|
||||
"file %s\n", tuning_params_file.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
// split into lines
|
||||
vector<string> str_vec;
|
||||
CubeUtils::SplitStringUsing(param_str, "\r\n", &str_vec);
|
||||
if (str_vec.size() < 8) {
|
||||
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): number of rows "
|
||||
"in parameter file is too low\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// for all entries
|
||||
for (int entry = 0; entry < str_vec.size(); entry++) {
|
||||
// tokenize
|
||||
vector<string> str_tok;
|
||||
|
||||
// should be only two tokens
|
||||
CubeUtils::SplitStringUsing(str_vec[entry], "=", &str_tok);
|
||||
if (str_tok.size() != 2) {
|
||||
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format in "
|
||||
"line: %s.\n", str_vec[entry].c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
double val = 0;
|
||||
char peekchar = (str_tok[1].c_str())[0];
|
||||
if ((peekchar >= '0' && peekchar <= '9') ||
|
||||
peekchar == '-' || peekchar == '+' ||
|
||||
peekchar == '.') {
|
||||
// read the value
|
||||
if (sscanf(str_tok[1].c_str(), "%lf", &val) != 1) {
|
||||
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format "
|
||||
"in line: %s.\n", str_vec[entry].c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// token type
|
||||
if (str_tok[0] == "RecoWgt") {
|
||||
reco_wgt_ = val;
|
||||
} else if (str_tok[0] == "SizeWgt") {
|
||||
size_wgt_ = val;
|
||||
} else if (str_tok[0] == "CharBigramsWgt") {
|
||||
char_bigrams_wgt_ = val;
|
||||
} else if (str_tok[0] == "WordUnigramsWgt") {
|
||||
word_unigrams_wgt_ = val;
|
||||
} else if (str_tok[0] == "MaxSegPerChar") {
|
||||
max_seg_per_char_ = static_cast<int>(val);
|
||||
} else if (str_tok[0] == "BeamWidth") {
|
||||
beam_width_ = static_cast<int>(val);
|
||||
} else if (str_tok[0] == "Classifier") {
|
||||
if (str_tok[1] == "NN") {
|
||||
tp_classifier_ = TuningParams::NN;
|
||||
} else if (str_tok[1] == "HYBRID_NN") {
|
||||
tp_classifier_ = TuningParams::HYBRID_NN;
|
||||
} else {
|
||||
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid "
|
||||
"classifier type in line: %s.\n", str_vec[entry].c_str());
|
||||
return false;
|
||||
}
|
||||
} else if (str_tok[0] == "FeatureType") {
|
||||
if (str_tok[1] == "BMP") {
|
||||
tp_feat_ = TuningParams::BMP;
|
||||
} else if (str_tok[1] == "CHEBYSHEV") {
|
||||
tp_feat_ = TuningParams::CHEBYSHEV;
|
||||
} else if (str_tok[1] == "HYBRID") {
|
||||
tp_feat_ = TuningParams::HYBRID;
|
||||
} else {
|
||||
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid feature "
|
||||
"type in line: %s.\n", str_vec[entry].c_str());
|
||||
return false;
|
||||
}
|
||||
} else if (str_tok[0] == "ConvGridSize") {
|
||||
conv_grid_size_ = static_cast<int>(val);
|
||||
} else if (str_tok[0] == "HistWindWid") {
|
||||
hist_wind_wid_ = val;
|
||||
} else if (str_tok[0] == "MinConCompSize") {
|
||||
min_con_comp_size_ = val;
|
||||
} else if (str_tok[0] == "MaxWordAspectRatio") {
|
||||
max_word_aspect_ratio_ = val;
|
||||
} else if (str_tok[0] == "MinSpaceHeightRatio") {
|
||||
min_space_height_ratio_ = val;
|
||||
} else if (str_tok[0] == "MaxSpaceHeightRatio") {
|
||||
max_space_height_ratio_ = val;
|
||||
} else if (str_tok[0] == "CombinerRunThresh") {
|
||||
combiner_run_thresh_ = val;
|
||||
} else if (str_tok[0] == "CombinerClassifierThresh") {
|
||||
combiner_classifier_thresh_ = val;
|
||||
} else if (str_tok[0] == "OODWgt") {
|
||||
ood_wgt_ = val;
|
||||
} else if (str_tok[0] == "NumWgt") {
|
||||
num_wgt_ = val;
|
||||
} else {
|
||||
fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unknown parameter "
|
||||
"in line: %s.\n", str_vec[entry].c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Save the parameters to a file
|
||||
bool CubeTuningParams::Save(string file_name) {
|
||||
FILE *params_file = fopen(file_name.c_str(), "wb");
|
||||
if (params_file == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CubeTuningParams::Save): error opening file "
|
||||
"%s for write.\n", file_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
fprintf(params_file, "RecoWgt=%.4f\n", reco_wgt_);
|
||||
fprintf(params_file, "SizeWgt=%.4f\n", size_wgt_);
|
||||
fprintf(params_file, "CharBigramsWgt=%.4f\n", char_bigrams_wgt_);
|
||||
fprintf(params_file, "WordUnigramsWgt=%.4f\n", word_unigrams_wgt_);
|
||||
fprintf(params_file, "MaxSegPerChar=%d\n", max_seg_per_char_);
|
||||
fprintf(params_file, "BeamWidth=%d\n", beam_width_);
|
||||
fprintf(params_file, "ConvGridSize=%d\n", conv_grid_size_);
|
||||
fprintf(params_file, "HistWindWid=%d\n", hist_wind_wid_);
|
||||
fprintf(params_file, "MinConCompSize=%d\n", min_con_comp_size_);
|
||||
fprintf(params_file, "MaxWordAspectRatio=%.4f\n", max_word_aspect_ratio_);
|
||||
fprintf(params_file, "MinSpaceHeightRatio=%.4f\n", min_space_height_ratio_);
|
||||
fprintf(params_file, "MaxSpaceHeightRatio=%.4f\n", max_space_height_ratio_);
|
||||
fprintf(params_file, "CombinerRunThresh=%.4f\n", combiner_run_thresh_);
|
||||
fprintf(params_file, "CombinerClassifierThresh=%.4f\n",
|
||||
combiner_classifier_thresh_);
|
||||
fprintf(params_file, "OODWgt=%.4f\n", ood_wgt_);
|
||||
fprintf(params_file, "NumWgt=%.4f\n", num_wgt_);
|
||||
|
||||
fclose(params_file);
|
||||
return true;
|
||||
}
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_tuning_params.h
|
||||
* Description: Declaration of the CubeTuningParameters Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CubeTuningParams class abstracts all the parameters that are used
|
||||
// in Cube and are tuned/learned during the training process. Inherits
|
||||
// from the TuningParams class.
|
||||
|
||||
#ifndef CUBE_TUNING_PARAMS_H
|
||||
#define CUBE_TUNING_PARAMS_H
|
||||
|
||||
#include <string>
|
||||
#include "tuning_params.h"
|
||||
|
||||
namespace tesseract {
|
||||
class CubeTuningParams : public TuningParams {
|
||||
public:
|
||||
CubeTuningParams();
|
||||
~CubeTuningParams();
|
||||
|
||||
// Accessor functions
|
||||
inline double OODWgt() { return ood_wgt_; }
|
||||
inline double NumWgt() { return num_wgt_; }
|
||||
|
||||
inline void SetOODWgt(double wgt) { ood_wgt_ = wgt; }
|
||||
inline void SetNumWgt(double wgt) { num_wgt_ = wgt; }
|
||||
|
||||
// Create an object given the data file path and the language by loading
|
||||
// the approporiate file
|
||||
static CubeTuningParams * Create(const string &data_file,
|
||||
const string &lang);
|
||||
// Save and load the tuning parameters to a specified file
|
||||
bool Save(string file_name);
|
||||
bool Load(string file_name);
|
||||
|
||||
private:
|
||||
double ood_wgt_;
|
||||
double num_wgt_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // CUBE_TUNING_PARAMS_H
|
@ -1,399 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_utils.cpp
|
||||
* Description: Implementation of the Cube Utilities Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <math.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "cube_utils.h"
|
||||
#include "char_set.h"
|
||||
#include "unichar.h"
|
||||
|
||||
namespace tesseract {
|
||||
CubeUtils::CubeUtils() {
|
||||
}
|
||||
|
||||
CubeUtils::~CubeUtils() {
|
||||
}
|
||||
|
||||
/**
|
||||
* convert a prob to a cost (-ve log prob)
|
||||
*/
|
||||
int CubeUtils::Prob2Cost(double prob_val) {
|
||||
if (prob_val < MIN_PROB) {
|
||||
return MIN_PROB_COST;
|
||||
}
|
||||
return static_cast<int>(-log(prob_val) * PROB2COST_SCALE);
|
||||
}
|
||||
|
||||
/**
|
||||
* converts a cost to probability
|
||||
*/
|
||||
double CubeUtils::Cost2Prob(int cost) {
|
||||
return exp(-cost / PROB2COST_SCALE);
|
||||
}
|
||||
|
||||
/**
|
||||
* computes the length of a NULL terminated char_32 string
|
||||
*/
|
||||
int CubeUtils::StrLen(const char_32 *char_32_ptr) {
|
||||
if (char_32_ptr == NULL) {
|
||||
return 0;
|
||||
}
|
||||
int len = -1;
|
||||
while (char_32_ptr[++len]);
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* compares two char_32 strings
|
||||
*/
|
||||
int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
|
||||
const char_32 *pch1 = str1;
|
||||
const char_32 *pch2 = str2;
|
||||
|
||||
for (; (*pch1) != 0 && (*pch2) != 0; pch1++, pch2++) {
|
||||
if ((*pch1) != (*pch2)) {
|
||||
return (*pch1) - (*pch2);
|
||||
}
|
||||
}
|
||||
|
||||
if ((*pch1) == 0) {
|
||||
if ((*pch2) == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Duplicates a 32-bit char buffer
|
||||
*/
|
||||
char_32 *CubeUtils::StrDup(const char_32 *str32) {
|
||||
int len = StrLen(str32);
|
||||
char_32 *new_str = new char_32[len + 1];
|
||||
memcpy(new_str, str32, len * sizeof(*str32));
|
||||
new_str[len] = 0;
|
||||
return new_str;
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a char samp from a specified portion of the image
|
||||
*/
|
||||
CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top,
|
||||
int wid, int hgt) {
|
||||
// get the raw img data from the image
|
||||
unsigned char *temp_buff = GetImageData(pix, left, top, wid, hgt);
|
||||
if (temp_buff == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// create a char samp from temp buffer
|
||||
CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff);
|
||||
|
||||
// clean up temp buffer
|
||||
delete []temp_buff;
|
||||
return char_samp;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a B/W image from a char_sample
|
||||
*/
|
||||
Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) {
|
||||
// parameter check
|
||||
if (char_samp == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// get the raw data
|
||||
int stride = char_samp->Stride();
|
||||
int wid = char_samp->Width();
|
||||
int hgt = char_samp->Height();
|
||||
|
||||
Pix *pix = pixCreate(wid, hgt, 1);
|
||||
if (pix == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// copy the contents
|
||||
unsigned char *line = char_samp->RawData();
|
||||
for (int y = 0; y < hgt ; y++, line += stride) {
|
||||
for (int x = 0; x < wid; x++) {
|
||||
if (line[x] != 0) {
|
||||
pixSetPixel(pix, x, y, 0);
|
||||
} else {
|
||||
pixSetPixel(pix, x, y, 255);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return pix;
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a raw buffer from the specified location of the pix
|
||||
*/
|
||||
unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
|
||||
int wid, int hgt) {
|
||||
// skip invalid dimensions
|
||||
if (left < 0 || top < 0 || wid < 0 || hgt < 0 ||
|
||||
(left + wid) > pix->w || (top + hgt) > pix->h ||
|
||||
pix->d != 1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// copy the char img to a temp buffer
|
||||
unsigned char *temp_buff = new unsigned char[wid * hgt];
|
||||
l_int32 w;
|
||||
l_int32 h;
|
||||
l_int32 d;
|
||||
l_int32 wpl;
|
||||
l_uint32 *line;
|
||||
l_uint32 *data;
|
||||
|
||||
pixGetDimensions(pix, &w, &h, &d);
|
||||
wpl = pixGetWpl(pix);
|
||||
data = pixGetData(pix);
|
||||
line = data + (top * wpl);
|
||||
|
||||
for (int y = 0, off = 0; y < hgt ; y++) {
|
||||
for (int x = 0; x < wid; x++, off++) {
|
||||
temp_buff[off] = GET_DATA_BIT(line, x + left) ? 0 : 255;
|
||||
}
|
||||
line += wpl;
|
||||
}
|
||||
return temp_buff;
|
||||
}
|
||||
|
||||
/**
|
||||
* read file contents to a string
|
||||
*/
|
||||
bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
|
||||
str->clear();
|
||||
FILE *fp = fopen(file_name.c_str(), "rb");
|
||||
if (fp == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// get the size of the size
|
||||
fseek(fp, 0, SEEK_END);
|
||||
int file_size = ftell(fp);
|
||||
if (file_size < 1) {
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
// adjust string size
|
||||
str->reserve(file_size);
|
||||
// read the contents
|
||||
rewind(fp);
|
||||
char *buff = new char[file_size];
|
||||
int read_bytes = fread(buff, 1, static_cast<int>(file_size), fp);
|
||||
if (read_bytes == file_size) {
|
||||
str->append(buff, file_size);
|
||||
}
|
||||
delete []buff;
|
||||
fclose(fp);
|
||||
return (read_bytes == file_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* splits a string into vectors based on specified delimiters
|
||||
*/
|
||||
void CubeUtils::SplitStringUsing(const string &str,
|
||||
const string &delims,
|
||||
vector<string> *str_vec) {
|
||||
// Optimize the common case where delims is a single character.
|
||||
if (delims[0] != '\0' && delims[1] == '\0') {
|
||||
char c = delims[0];
|
||||
const char* p = str.data();
|
||||
const char* end = p + str.size();
|
||||
while (p != end) {
|
||||
if (*p == c) {
|
||||
++p;
|
||||
} else {
|
||||
const char* start = p;
|
||||
while (++p != end && *p != c);
|
||||
str_vec->push_back(string(start, p - start));
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
string::size_type begin_index, end_index;
|
||||
begin_index = str.find_first_not_of(delims);
|
||||
while (begin_index != string::npos) {
|
||||
end_index = str.find_first_of(delims, begin_index);
|
||||
if (end_index == string::npos) {
|
||||
str_vec->push_back(str.substr(begin_index));
|
||||
return;
|
||||
}
|
||||
str_vec->push_back(str.substr(begin_index, (end_index - begin_index)));
|
||||
begin_index = str.find_first_not_of(delims, end_index);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8 to UTF-32 conversion functions
|
||||
*/
|
||||
void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) {
|
||||
str32->clear();
|
||||
int len = strlen(utf8_str);
|
||||
int step = 0;
|
||||
for (int ch = 0; ch < len; ch += step) {
|
||||
step = UNICHAR::utf8_step(utf8_str + ch);
|
||||
if (step > 0) {
|
||||
UNICHAR uni_ch(utf8_str + ch, step);
|
||||
(*str32) += uni_ch.first_uni();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-32 to UTF-8 conversion functions
|
||||
*/
|
||||
void CubeUtils::UTF32ToUTF8(const char_32 *utf32_str, string *str) {
|
||||
str->clear();
|
||||
for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) {
|
||||
UNICHAR uni_ch((*ch_32));
|
||||
char *utf8 = uni_ch.utf8_str();
|
||||
if (utf8 != NULL) {
|
||||
(*str) += utf8;
|
||||
delete []utf8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool CubeUtils::IsCaseInvariant(const char_32 *str32, CharSet *char_set) {
|
||||
bool all_one_case = true;
|
||||
bool capitalized;
|
||||
bool prev_upper;
|
||||
bool prev_lower;
|
||||
bool first_upper;
|
||||
bool first_lower;
|
||||
bool cur_upper;
|
||||
bool cur_lower;
|
||||
|
||||
string str8;
|
||||
if (!char_set) {
|
||||
// If cube char_set is missing, use C-locale-dependent functions
|
||||
// on UTF8 characters to determine case properties.
|
||||
first_upper = isupper(str32[0]);
|
||||
first_lower = islower(str32[0]);
|
||||
if (first_upper)
|
||||
capitalized = true;
|
||||
prev_upper = first_upper;
|
||||
prev_lower = first_lower;
|
||||
for (int c = 1; str32[c] != 0; ++c) {
|
||||
cur_upper = isupper(str32[c]);
|
||||
cur_lower = islower(str32[c]);
|
||||
if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
|
||||
all_one_case = false;
|
||||
if (cur_upper)
|
||||
capitalized = false;
|
||||
prev_upper = cur_upper;
|
||||
prev_lower = cur_lower;
|
||||
}
|
||||
} else {
|
||||
UNICHARSET *unicharset = char_set->InternalUnicharset();
|
||||
// Use UNICHARSET functions to determine case properties
|
||||
first_upper = unicharset->get_isupper(char_set->ClassID(str32[0]));
|
||||
first_lower = unicharset->get_islower(char_set->ClassID(str32[0]));
|
||||
if (first_upper)
|
||||
capitalized = true;
|
||||
prev_upper = first_upper;
|
||||
prev_lower = first_lower;
|
||||
|
||||
for (int c = 1; c < StrLen(str32); ++c) {
|
||||
cur_upper = unicharset->get_isupper(char_set->ClassID(str32[c]));
|
||||
cur_lower = unicharset->get_islower(char_set->ClassID(str32[c]));
|
||||
if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
|
||||
all_one_case = false;
|
||||
if (cur_upper)
|
||||
capitalized = false;
|
||||
prev_upper = cur_upper;
|
||||
prev_lower = cur_lower;
|
||||
}
|
||||
}
|
||||
return all_one_case || capitalized;
|
||||
}
|
||||
|
||||
char_32 *CubeUtils::ToLower(const char_32 *str32, CharSet *char_set) {
|
||||
if (!char_set) {
|
||||
return NULL;
|
||||
}
|
||||
UNICHARSET *unicharset = char_set->InternalUnicharset();
|
||||
int len = StrLen(str32);
|
||||
char_32 *lower = new char_32[len + 1];
|
||||
for (int i = 0; i < len; ++i) {
|
||||
char_32 ch = str32[i];
|
||||
if (ch == INVALID_UNICHAR_ID) {
|
||||
delete [] lower;
|
||||
return NULL;
|
||||
}
|
||||
// convert upper-case characters to lower-case
|
||||
if (unicharset->get_isupper(char_set->ClassID(ch))) {
|
||||
UNICHAR_ID uid_lower = unicharset->get_other_case(char_set->ClassID(ch));
|
||||
const char_32 *str32_lower = char_set->ClassString(uid_lower);
|
||||
// expect lower-case version of character to be a single character
|
||||
if (!str32_lower || StrLen(str32_lower) != 1) {
|
||||
delete [] lower;
|
||||
return NULL;
|
||||
}
|
||||
lower[i] = str32_lower[0];
|
||||
} else {
|
||||
lower[i] = ch;
|
||||
}
|
||||
}
|
||||
lower[len] = 0;
|
||||
return lower;
|
||||
}
|
||||
|
||||
char_32 *CubeUtils::ToUpper(const char_32 *str32, CharSet *char_set) {
|
||||
if (!char_set) {
|
||||
return NULL;
|
||||
}
|
||||
UNICHARSET *unicharset = char_set->InternalUnicharset();
|
||||
int len = StrLen(str32);
|
||||
char_32 *upper = new char_32[len + 1];
|
||||
for (int i = 0; i < len; ++i) {
|
||||
char_32 ch = str32[i];
|
||||
if (ch == INVALID_UNICHAR_ID) {
|
||||
delete [] upper;
|
||||
return NULL;
|
||||
}
|
||||
// convert lower-case characters to upper-case
|
||||
if (unicharset->get_islower(char_set->ClassID(ch))) {
|
||||
UNICHAR_ID uid_upper = unicharset->get_other_case(char_set->ClassID(ch));
|
||||
const char_32 *str32_upper = char_set->ClassString(uid_upper);
|
||||
// expect upper-case version of character to be a single character
|
||||
if (!str32_upper || StrLen(str32_upper) != 1) {
|
||||
delete [] upper;
|
||||
return NULL;
|
||||
}
|
||||
upper[i] = str32_upper[0];
|
||||
} else {
|
||||
upper[i] = ch;
|
||||
}
|
||||
}
|
||||
upper[len] = 0;
|
||||
return upper;
|
||||
}
|
||||
} // namespace tesseract
|
@ -1,83 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: cube_utils.h
|
||||
* Description: Declaration of the Cube Utilities Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
*(C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0(the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The CubeUtils class provides miscellaneous utility and helper functions
|
||||
// to the rest of the Cube Engine
|
||||
|
||||
#ifndef CUBE_UTILS_H
|
||||
#define CUBE_UTILS_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "allheaders.h"
|
||||
#include "const.h"
|
||||
#include "char_set.h"
|
||||
#include "char_samp.h"
|
||||
|
||||
namespace tesseract {
|
||||
class CubeUtils {
|
||||
public:
|
||||
CubeUtils();
|
||||
~CubeUtils();
|
||||
|
||||
// Converts a probability value to a cost by getting the -log() of the
|
||||
// probability value to a known base
|
||||
static int Prob2Cost(double prob_val);
|
||||
// Converts a cost to probability by getting the exp(-normalized cost)
|
||||
static double Cost2Prob(int cost);
|
||||
// Computes the length of a 32-bit char buffer
|
||||
static int StrLen(const char_32 *str);
|
||||
// Compares two 32-bit char buffers
|
||||
static int StrCmp(const char_32 *str1, const char_32 *str2);
|
||||
// Duplicates a 32-bit char buffer
|
||||
static char_32 *StrDup(const char_32 *str);
|
||||
// Creates a CharSamp from an Pix and a bounding box
|
||||
static CharSamp *CharSampleFromPix(Pix *pix,
|
||||
int left, int top, int wid, int hgt);
|
||||
// Creates a Pix from a CharSamp
|
||||
static Pix *PixFromCharSample(CharSamp *char_samp);
|
||||
// read the contents of a file to a string
|
||||
static bool ReadFileToString(const string &file_name, string *str);
|
||||
// split a string into vectors using any of the specified delimiters
|
||||
static void SplitStringUsing(const string &str, const string &delims,
|
||||
vector<string> *str_vec);
|
||||
// UTF-8 to UTF-32 convesion functions
|
||||
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32);
|
||||
static void UTF32ToUTF8(const char_32 *utf32_str, string *str);
|
||||
// Returns true if input word has either 1) all-one-case, or 2)
|
||||
// first character upper-case, and remaining characters lower-case.
|
||||
// If char_set is not NULL, uses tesseract's unicharset functions
|
||||
// to determine case properties. Otherwise, uses C-locale-dependent
|
||||
// functions, which may be unreliable on non-ASCII characters.
|
||||
static bool IsCaseInvariant(const char_32 *str32, CharSet *char_set);
|
||||
// Returns char_32 pointer to the lower-case-transformed version of
|
||||
// the input string or NULL on error. If char_set is NULL returns NULL.
|
||||
// Return array must be freed by caller.
|
||||
static char_32 *ToLower(const char_32 *str32, CharSet *char_set);
|
||||
// Returns char_32 pointer to the upper-case-transformed version of
|
||||
// the input string or NULL on error. If char_set is NULL returns NULL.
|
||||
// Return array must be freed by caller.
|
||||
static char_32 *ToUpper(const char_32 *str32, CharSet *char_set);
|
||||
private:
|
||||
static unsigned char *GetImageData(Pix *pix,
|
||||
int left, int top, int wid, int hgt);
|
||||
};
|
||||
} // namespace tesseract
|
||||
#endif // CUBE_UTILS_H
|
@ -1,55 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: feature_base.h
|
||||
* Description: Declaration of the Feature Base Class
|
||||
* Author: Ping Ping (xiupingping), Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The FeatureBase class is the base class for any Feature Extraction class
|
||||
// It provided 3 pure virtual functions (to inherit):
|
||||
// 1- FeatureCnt: A method to returns the count of features
|
||||
// 2- ComputeFeatures: A method to compute the features for a given CharSamp
|
||||
// 3- ComputeFeatureBitmap: A method to render a visualization of the features
|
||||
// to a CharSamp. This is mainly used by visual-debuggers
|
||||
|
||||
#ifndef FEATURE_BASE_H
|
||||
#define FEATURE_BASE_H
|
||||
|
||||
#include "char_samp.h"
|
||||
#include "tuning_params.h"
|
||||
|
||||
namespace tesseract {
|
||||
class FeatureBase {
|
||||
public:
|
||||
explicit FeatureBase(TuningParams *params)
|
||||
: params_(params) {
|
||||
}
|
||||
virtual ~FeatureBase() {}
|
||||
|
||||
// Compute the features for a given CharSamp
|
||||
virtual bool ComputeFeatures(CharSamp *char_samp, float *features) = 0;
|
||||
// Render a visualization of the features to a CharSamp.
|
||||
// This is mainly used by visual-debuggers
|
||||
virtual CharSamp *ComputeFeatureBitmap(CharSamp *char_samp) = 0;
|
||||
// Returns the count of features
|
||||
virtual int FeatureCnt() = 0;
|
||||
|
||||
protected:
|
||||
TuningParams *params_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // FEATURE_BASE_H
|
||||
|
@ -1,50 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: feature_bmp.cpp
|
||||
* Description: Implementation of the Bitmap Feature Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include "feature_base.h"
|
||||
#include "feature_bmp.h"
|
||||
#include "cube_utils.h"
|
||||
#include "const.h"
|
||||
#include "char_samp.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
FeatureBmp::FeatureBmp(TuningParams *params)
|
||||
:FeatureBase(params) {
|
||||
conv_grid_size_ = params->ConvGridSize();
|
||||
}
|
||||
|
||||
FeatureBmp::~FeatureBmp() {
|
||||
}
|
||||
|
||||
// Render a visualization of the features to a CharSamp.
|
||||
// This is mainly used by visual-debuggers
|
||||
CharSamp *FeatureBmp::ComputeFeatureBitmap(CharSamp *char_samp) {
|
||||
return char_samp->Scale(conv_grid_size_, conv_grid_size_);
|
||||
}
|
||||
|
||||
// Compute the features for a given CharSamp
|
||||
bool FeatureBmp::ComputeFeatures(CharSamp *char_samp, float *features) {
|
||||
return char_samp->ComputeFeatures(conv_grid_size_, features);
|
||||
}
|
||||
}
|
||||
|
@ -1,53 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: feature_bmp.h
|
||||
* Description: Declaration of the Bitmap Feature Class
|
||||
* Author: PingPing xiu (xiupingping) & Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The FeatureBmp class implements a Bitmap feature extractor class. It
|
||||
// inherits from the FeatureBase class
|
||||
// The Bitmap feature vectors is the the bitmap of the specified CharSamp
|
||||
// scaled to a fixed grid size and then augmented by a 5 aux features that
|
||||
// describe the size, aspect ration and placement within a word
|
||||
|
||||
#ifndef FEATURE_BMP_H
|
||||
#define FEATURE_BMP_H
|
||||
|
||||
#include "char_samp.h"
|
||||
#include "feature_base.h"
|
||||
|
||||
namespace tesseract {
|
||||
class FeatureBmp : public FeatureBase {
|
||||
public:
|
||||
explicit FeatureBmp(TuningParams *params);
|
||||
virtual ~FeatureBmp();
|
||||
// Render a visualization of the features to a CharSamp.
|
||||
// This is mainly used by visual-debuggers
|
||||
virtual CharSamp *ComputeFeatureBitmap(CharSamp *samp);
|
||||
// Compute the features for a given CharSamp
|
||||
virtual bool ComputeFeatures(CharSamp *samp, float *features);
|
||||
// Returns the count of features
|
||||
virtual int FeatureCnt() {
|
||||
return 5 + (conv_grid_size_ * conv_grid_size_);
|
||||
}
|
||||
|
||||
protected:
|
||||
// grid size, cached from the TuningParams object
|
||||
int conv_grid_size_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // FEATURE_BMP_H
|
@ -1,138 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: feature_chebyshev.cpp
|
||||
* Description: Implementation of the Chebyshev coefficients Feature Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "feature_base.h"
|
||||
#include "feature_chebyshev.h"
|
||||
#include "cube_utils.h"
|
||||
#include "const.h"
|
||||
#include "char_samp.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
FeatureChebyshev::FeatureChebyshev(TuningParams *params)
|
||||
: FeatureBase(params) {
|
||||
}
|
||||
|
||||
FeatureChebyshev::~FeatureChebyshev() {
|
||||
}
|
||||
|
||||
// Render a visualization of the features to a CharSamp.
|
||||
// This is mainly used by visual-debuggers
|
||||
CharSamp *FeatureChebyshev::ComputeFeatureBitmap(CharSamp *char_samp) {
|
||||
return char_samp;
|
||||
}
|
||||
|
||||
// Compute Chebyshev coefficients for the specified vector
|
||||
void FeatureChebyshev::ChebyshevCoefficients(const vector<float> &input,
|
||||
int coeff_cnt, float *coeff) {
|
||||
// re-sample function
|
||||
int input_range = (input.size() - 1);
|
||||
vector<float> resamp(coeff_cnt);
|
||||
for (int samp_idx = 0; samp_idx < coeff_cnt; samp_idx++) {
|
||||
// compute sampling position
|
||||
float samp_pos = input_range *
|
||||
(1 + cos(M_PI * (samp_idx + 0.5) / coeff_cnt)) / 2;
|
||||
// interpolate
|
||||
int samp_start = static_cast<int>(samp_pos);
|
||||
int samp_end = static_cast<int>(samp_pos + 0.5);
|
||||
float func_delta = input[samp_end] - input[samp_start];
|
||||
resamp[samp_idx] = input[samp_start] +
|
||||
((samp_pos - samp_start) * func_delta);
|
||||
}
|
||||
// compute the coefficients
|
||||
float normalizer = 2.0 / coeff_cnt;
|
||||
for (int coeff_idx = 0; coeff_idx < coeff_cnt; coeff_idx++, coeff++) {
|
||||
double sum = 0.0;
|
||||
for (int samp_idx = 0; samp_idx < coeff_cnt; samp_idx++) {
|
||||
sum += resamp[samp_idx] * cos(M_PI * coeff_idx * (samp_idx + 0.5) /
|
||||
coeff_cnt);
|
||||
}
|
||||
(*coeff) = (normalizer * sum);
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the features of a given CharSamp
|
||||
bool FeatureChebyshev::ComputeFeatures(CharSamp *char_samp, float *features) {
|
||||
return ComputeChebyshevCoefficients(char_samp, features);
|
||||
}
|
||||
|
||||
// Compute the Chebyshev coefficients of a given CharSamp
|
||||
bool FeatureChebyshev::ComputeChebyshevCoefficients(CharSamp *char_samp,
|
||||
float *features) {
|
||||
if (char_samp->NormBottom() <= 0) {
|
||||
return false;
|
||||
}
|
||||
unsigned char *raw_data = char_samp->RawData();
|
||||
int stride = char_samp->Stride();
|
||||
// compute the height of the word
|
||||
int word_hgt = (255 * (char_samp->Top() + char_samp->Height()) /
|
||||
char_samp->NormBottom());
|
||||
// compute left & right profiles
|
||||
vector<float> left_profile(word_hgt, 0.0);
|
||||
vector<float> right_profile(word_hgt, 0.0);
|
||||
unsigned char *line_data = raw_data;
|
||||
for (int y = 0; y < char_samp->Height(); y++, line_data += stride) {
|
||||
int min_x = char_samp->Width();
|
||||
int max_x = -1;
|
||||
for (int x = 0; x < char_samp->Width(); x++) {
|
||||
if (line_data[x] == 0) {
|
||||
UpdateRange(x, &min_x, &max_x);
|
||||
}
|
||||
}
|
||||
left_profile[char_samp->Top() + y] =
|
||||
1.0 * (min_x == char_samp->Width() ? 0 : (min_x + 1)) /
|
||||
char_samp->Width();
|
||||
right_profile[char_samp->Top() + y] =
|
||||
1.0 * (max_x == -1 ? 0 : char_samp->Width() - max_x) /
|
||||
char_samp->Width();
|
||||
}
|
||||
|
||||
// compute top and bottom profiles
|
||||
vector<float> top_profile(char_samp->Width(), 0);
|
||||
vector<float> bottom_profile(char_samp->Width(), 0);
|
||||
for (int x = 0; x < char_samp->Width(); x++) {
|
||||
int min_y = word_hgt;
|
||||
int max_y = -1;
|
||||
line_data = raw_data;
|
||||
for (int y = 0; y < char_samp->Height(); y++, line_data += stride) {
|
||||
if (line_data[x] == 0) {
|
||||
UpdateRange(y + char_samp->Top(), &min_y, &max_y);
|
||||
}
|
||||
}
|
||||
top_profile[x] = 1.0 * (min_y == word_hgt ? 0 : (min_y + 1)) / word_hgt;
|
||||
bottom_profile[x] = 1.0 * (max_y == -1 ? 0 : (word_hgt - max_y)) / word_hgt;
|
||||
}
|
||||
|
||||
// compute the chebyshev coefficients of each profile
|
||||
ChebyshevCoefficients(left_profile, kChebychevCoefficientCnt, features);
|
||||
ChebyshevCoefficients(top_profile, kChebychevCoefficientCnt,
|
||||
features + kChebychevCoefficientCnt);
|
||||
ChebyshevCoefficients(right_profile, kChebychevCoefficientCnt,
|
||||
features + (2 * kChebychevCoefficientCnt));
|
||||
ChebyshevCoefficients(bottom_profile, kChebychevCoefficientCnt,
|
||||
features + (3 * kChebychevCoefficientCnt));
|
||||
return true;
|
||||
}
|
||||
} // namespace tesseract
|
@ -1,57 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: feature_chebyshev.h
|
||||
* Description: Declaration of the Chebyshev coefficients Feature Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The FeatureChebyshev class implements a Bitmap feature extractor class. It
|
||||
// inherits from the FeatureBase class
|
||||
// The feature vector is the composed of the chebyshev coefficients of 4 time
|
||||
// sequences. The time sequences are the left, top, right & bottom
|
||||
// bitmap profiles of the input samples
|
||||
|
||||
#ifndef FEATURE_CHEBYSHEV_H
|
||||
#define FEATURE_CHEBYSHEV_H
|
||||
|
||||
#include "char_samp.h"
|
||||
#include "feature_base.h"
|
||||
|
||||
namespace tesseract {
|
||||
class FeatureChebyshev : public FeatureBase {
|
||||
public:
|
||||
explicit FeatureChebyshev(TuningParams *params);
|
||||
virtual ~FeatureChebyshev();
|
||||
// Render a visualization of the features to a CharSamp.
|
||||
// This is mainly used by visual-debuggers
|
||||
virtual CharSamp *ComputeFeatureBitmap(CharSamp *samp);
|
||||
// Compute the features for a given CharSamp
|
||||
virtual bool ComputeFeatures(CharSamp *samp, float *features);
|
||||
// Returns the count of features
|
||||
virtual int FeatureCnt() {
|
||||
return (4 * kChebychevCoefficientCnt);
|
||||
}
|
||||
|
||||
protected:
|
||||
static const int kChebychevCoefficientCnt = 40;
|
||||
// Compute Chebychev coefficients for the specified vector
|
||||
void ChebyshevCoefficients(const vector<float> &input,
|
||||
int coeff_cnt, float *coeff);
|
||||
// Compute the features for a given CharSamp
|
||||
bool ComputeChebyshevCoefficients(CharSamp *samp, float *features);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // FEATURE_CHEBYSHEV_H
|
@ -1,64 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: feature_chebyshev.cpp
|
||||
* Description: Implementation of the Chebyshev coefficients Feature Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "feature_base.h"
|
||||
#include "feature_hybrid.h"
|
||||
#include "cube_utils.h"
|
||||
#include "const.h"
|
||||
#include "char_samp.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
FeatureHybrid::FeatureHybrid(TuningParams *params)
|
||||
:FeatureBase(params) {
|
||||
feature_bmp_ = new FeatureBmp(params);
|
||||
feature_chebyshev_ = new FeatureChebyshev(params);
|
||||
}
|
||||
|
||||
FeatureHybrid::~FeatureHybrid() {
|
||||
delete feature_bmp_;
|
||||
delete feature_chebyshev_;
|
||||
}
|
||||
|
||||
// Render a visualization of the features to a CharSamp.
|
||||
// This is mainly used by visual-debuggers
|
||||
CharSamp *FeatureHybrid::ComputeFeatureBitmap(CharSamp *char_samp) {
|
||||
return char_samp;
|
||||
}
|
||||
|
||||
|
||||
// Compute the features of a given CharSamp
|
||||
bool FeatureHybrid::ComputeFeatures(CharSamp *char_samp, float *features) {
|
||||
if (feature_bmp_ == NULL || feature_chebyshev_ == NULL) {
|
||||
return false;
|
||||
}
|
||||
if (!feature_bmp_->ComputeFeatures(char_samp, features)) {
|
||||
return false;
|
||||
}
|
||||
return feature_chebyshev_->ComputeFeatures(char_samp,
|
||||
features + feature_bmp_->FeatureCnt());
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
@ -1,56 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: feature_chebyshev.h
|
||||
* Description: Declaration of the Chebyshev coefficients Feature Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The FeatureHybrid class implements a Bitmap feature extractor class. It
|
||||
// inherits from the FeatureBase class
|
||||
// This class describes the a hybrid feature vector composed by combining
|
||||
// the bitmap and the chebyshev feature vectors
|
||||
|
||||
#ifndef FEATURE_HYBRID_H
|
||||
#define FEATURE_HYBRID_H
|
||||
|
||||
#include "char_samp.h"
|
||||
#include "feature_bmp.h"
|
||||
#include "feature_chebyshev.h"
|
||||
|
||||
namespace tesseract {
|
||||
class FeatureHybrid : public FeatureBase {
|
||||
public:
|
||||
explicit FeatureHybrid(TuningParams *params);
|
||||
virtual ~FeatureHybrid();
|
||||
// Render a visualization of the features to a CharSamp.
|
||||
// This is mainly used by visual-debuggers
|
||||
virtual CharSamp *ComputeFeatureBitmap(CharSamp *samp);
|
||||
// Compute the features for a given CharSamp
|
||||
virtual bool ComputeFeatures(CharSamp *samp, float *features);
|
||||
// Returns the count of features
|
||||
virtual int FeatureCnt() {
|
||||
if (feature_bmp_ == NULL || feature_chebyshev_ == NULL) {
|
||||
return 0;
|
||||
}
|
||||
return feature_bmp_->FeatureCnt() + feature_chebyshev_->FeatureCnt();
|
||||
}
|
||||
|
||||
protected:
|
||||
FeatureBmp *feature_bmp_;
|
||||
FeatureChebyshev *feature_chebyshev_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // FEATURE_HYBRID_H
|
@ -1,346 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: charclassifier.cpp
|
||||
* Description: Implementation of Convolutional-NeuralNet Character Classifier
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <wctype.h>
|
||||
|
||||
#include "classifier_base.h"
|
||||
#include "char_set.h"
|
||||
#include "const.h"
|
||||
#include "conv_net_classifier.h"
|
||||
#include "cube_utils.h"
|
||||
#include "feature_base.h"
|
||||
#include "feature_bmp.h"
|
||||
#include "hybrid_neural_net_classifier.h"
|
||||
#include "tess_lang_model.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
HybridNeuralNetCharClassifier::HybridNeuralNetCharClassifier(
|
||||
CharSet *char_set,
|
||||
TuningParams *params,
|
||||
FeatureBase *feat_extract)
|
||||
: CharClassifier(char_set, params, feat_extract) {
|
||||
net_input_ = NULL;
|
||||
net_output_ = NULL;
|
||||
}
|
||||
|
||||
HybridNeuralNetCharClassifier::~HybridNeuralNetCharClassifier() {
|
||||
for (int net_idx = 0; net_idx < nets_.size(); net_idx++) {
|
||||
if (nets_[net_idx] != NULL) {
|
||||
delete nets_[net_idx];
|
||||
}
|
||||
}
|
||||
nets_.clear();
|
||||
|
||||
if (net_input_ != NULL) {
|
||||
delete []net_input_;
|
||||
net_input_ = NULL;
|
||||
}
|
||||
|
||||
if (net_output_ != NULL) {
|
||||
delete []net_output_;
|
||||
net_output_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// The main training function. Given a sample and a class ID the classifier
|
||||
// updates its parameters according to its learning algorithm. This function
|
||||
// is currently not implemented. TODO(ahmadab): implement end-2-end training
|
||||
bool HybridNeuralNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// A secondary function needed for training. Allows the trainer to set the
|
||||
// value of any train-time parameter. This function is currently not
|
||||
// implemented. TODO(ahmadab): implement end-2-end training
|
||||
bool HybridNeuralNetCharClassifier::SetLearnParam(char *var_name, float val) {
|
||||
// TODO(ahmadab): implementation of parameter initializing.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Folds the output of the NeuralNet using the loaded folding sets
|
||||
void HybridNeuralNetCharClassifier::Fold() {
|
||||
// in case insensitive mode
|
||||
if (case_sensitive_ == false) {
|
||||
int class_cnt = char_set_->ClassCount();
|
||||
// fold case
|
||||
for (int class_id = 0; class_id < class_cnt; class_id++) {
|
||||
// get class string
|
||||
const char_32 *str32 = char_set_->ClassString(class_id);
|
||||
// get the upper case form of the string
|
||||
string_32 upper_form32 = str32;
|
||||
for (int ch = 0; ch < upper_form32.length(); ch++) {
|
||||
if (iswalpha(static_cast<int>(upper_form32[ch])) != 0) {
|
||||
upper_form32[ch] = towupper(upper_form32[ch]);
|
||||
}
|
||||
}
|
||||
|
||||
// find out the upperform class-id if any
|
||||
int upper_class_id =
|
||||
char_set_->ClassID(reinterpret_cast<const char_32 *>(
|
||||
upper_form32.c_str()));
|
||||
if (upper_class_id != -1 && class_id != upper_class_id) {
|
||||
float max_out = MAX(net_output_[class_id], net_output_[upper_class_id]);
|
||||
net_output_[class_id] = max_out;
|
||||
net_output_[upper_class_id] = max_out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The folding sets specify how groups of classes should be folded
|
||||
// Folding involved assigning a min-activation to all the members
|
||||
// of the folding set. The min-activation is a fraction of the max-activation
|
||||
// of the members of the folding set
|
||||
for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
|
||||
float max_prob = net_output_[fold_sets_[fold_set][0]];
|
||||
|
||||
for (int ch = 1; ch < fold_set_len_[fold_set]; ch++) {
|
||||
if (net_output_[fold_sets_[fold_set][ch]] > max_prob) {
|
||||
max_prob = net_output_[fold_sets_[fold_set][ch]];
|
||||
}
|
||||
}
|
||||
for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
|
||||
net_output_[fold_sets_[fold_set][ch]] = MAX(max_prob * kFoldingRatio,
|
||||
net_output_[fold_sets_[fold_set][ch]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// compute the features of specified charsamp and
|
||||
// feedforward the specified nets
|
||||
bool HybridNeuralNetCharClassifier::RunNets(CharSamp *char_samp) {
|
||||
int feat_cnt = feat_extract_->FeatureCnt();
|
||||
int class_cnt = char_set_->ClassCount();
|
||||
|
||||
// allocate i/p and o/p buffers if needed
|
||||
if (net_input_ == NULL) {
|
||||
net_input_ = new float[feat_cnt];
|
||||
net_output_ = new float[class_cnt];
|
||||
}
|
||||
|
||||
// compute input features
|
||||
if (feat_extract_->ComputeFeatures(char_samp, net_input_) == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// go through all the nets
|
||||
memset(net_output_, 0, class_cnt * sizeof(*net_output_));
|
||||
float *inputs = net_input_;
|
||||
for (int net_idx = 0; net_idx < nets_.size(); net_idx++) {
|
||||
// run each net
|
||||
vector<float> net_out(class_cnt, 0.0);
|
||||
if (!nets_[net_idx]->FeedForward(inputs, &net_out[0])) {
|
||||
return false;
|
||||
}
|
||||
// add the output values
|
||||
for (int class_idx = 0; class_idx < class_cnt; class_idx++) {
|
||||
net_output_[class_idx] += (net_out[class_idx] * net_wgts_[net_idx]);
|
||||
}
|
||||
// increment inputs pointer
|
||||
inputs += nets_[net_idx]->in_cnt();
|
||||
}
|
||||
|
||||
Fold();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// return the cost of being a char
|
||||
int HybridNeuralNetCharClassifier::CharCost(CharSamp *char_samp) {
|
||||
// it is by design that a character cost is equal to zero
|
||||
// when no nets are present. This is the case during training.
|
||||
if (RunNets(char_samp) == false) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return CubeUtils::Prob2Cost(1.0f - net_output_[0]);
|
||||
}
|
||||
|
||||
// classifies a charsamp and returns an alternate list
|
||||
// of chars sorted by char costs
|
||||
CharAltList *HybridNeuralNetCharClassifier::Classify(CharSamp *char_samp) {
|
||||
// run the needed nets
|
||||
if (RunNets(char_samp) == false) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int class_cnt = char_set_->ClassCount();
|
||||
|
||||
// create an altlist
|
||||
CharAltList *alt_list = new CharAltList(char_set_, class_cnt);
|
||||
|
||||
for (int out = 1; out < class_cnt; out++) {
|
||||
int cost = CubeUtils::Prob2Cost(net_output_[out]);
|
||||
alt_list->Insert(out, cost);
|
||||
}
|
||||
|
||||
return alt_list;
|
||||
}
|
||||
|
||||
// set an external net (for training purposes)
|
||||
void HybridNeuralNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) {
|
||||
}
|
||||
|
||||
// Load folding sets
|
||||
// This function returns true on success or if the file can't be read,
|
||||
// returns false if an error is encountered.
|
||||
bool HybridNeuralNetCharClassifier::LoadFoldingSets(
|
||||
const string &data_file_path, const string &lang, LangModel *lang_mod) {
|
||||
fold_set_cnt_ = 0;
|
||||
string fold_file_name;
|
||||
fold_file_name = data_file_path + lang;
|
||||
fold_file_name += ".cube.fold";
|
||||
|
||||
// folding sets are optional
|
||||
FILE *fp = fopen(fold_file_name.c_str(), "rb");
|
||||
if (fp == NULL) {
|
||||
return true;
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
string fold_sets_str;
|
||||
if (!CubeUtils::ReadFileToString(fold_file_name,
|
||||
&fold_sets_str)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// split into lines
|
||||
vector<string> str_vec;
|
||||
CubeUtils::SplitStringUsing(fold_sets_str, "\r\n", &str_vec);
|
||||
fold_set_cnt_ = str_vec.size();
|
||||
fold_sets_ = new int *[fold_set_cnt_];
|
||||
fold_set_len_ = new int[fold_set_cnt_];
|
||||
|
||||
for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
|
||||
reinterpret_cast<TessLangModel *>(lang_mod)->RemoveInvalidCharacters(
|
||||
&str_vec[fold_set]);
|
||||
|
||||
// if all or all but one character are invalid, invalidate this set
|
||||
if (str_vec[fold_set].length() <= 1) {
|
||||
fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::LoadFoldingSets): "
|
||||
"invalidating folding set %d\n", fold_set);
|
||||
fold_set_len_[fold_set] = 0;
|
||||
fold_sets_[fold_set] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
string_32 str32;
|
||||
CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32);
|
||||
fold_set_len_[fold_set] = str32.length();
|
||||
fold_sets_[fold_set] = new int[fold_set_len_[fold_set]];
|
||||
for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
|
||||
fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Init the classifier provided a data-path and a language string
|
||||
bool HybridNeuralNetCharClassifier::Init(const string &data_file_path,
|
||||
const string &lang,
|
||||
LangModel *lang_mod) {
|
||||
if (init_ == true) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// load the nets if any. This function will return true if the net file
|
||||
// does not exist. But will fail if the net did not pass the sanity checks
|
||||
if (!LoadNets(data_file_path, lang)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// load the folding sets if any. This function will return true if the
|
||||
// file does not exist. But will fail if the it did not pass the sanity checks
|
||||
if (!LoadFoldingSets(data_file_path, lang, lang_mod)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
init_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Load the classifier's Neural Nets
|
||||
// This function will return true if the net file does not exist.
|
||||
// But will fail if the net did not pass the sanity checks
|
||||
bool HybridNeuralNetCharClassifier::LoadNets(const string &data_file_path,
|
||||
const string &lang) {
|
||||
string hybrid_net_file;
|
||||
string junk_net_file;
|
||||
|
||||
// add the lang identifier
|
||||
hybrid_net_file = data_file_path + lang;
|
||||
hybrid_net_file += ".cube.hybrid";
|
||||
|
||||
// neural network is optional
|
||||
FILE *fp = fopen(hybrid_net_file.c_str(), "rb");
|
||||
if (fp == NULL) {
|
||||
return true;
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
string str;
|
||||
if (!CubeUtils::ReadFileToString(hybrid_net_file, &str)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// split into lines
|
||||
vector<string> str_vec;
|
||||
CubeUtils::SplitStringUsing(str, "\r\n", &str_vec);
|
||||
if (str_vec.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// create and add the nets
|
||||
nets_.resize(str_vec.size(), NULL);
|
||||
net_wgts_.resize(str_vec.size(), 0);
|
||||
int total_input_size = 0;
|
||||
for (int net_idx = 0; net_idx < str_vec.size(); net_idx++) {
|
||||
// parse the string
|
||||
vector<string> tokens_vec;
|
||||
CubeUtils::SplitStringUsing(str_vec[net_idx], " \t", &tokens_vec);
|
||||
// has to be 2 tokens, net name and input size
|
||||
if (tokens_vec.size() != 2) {
|
||||
return false;
|
||||
}
|
||||
// load the net
|
||||
string net_file_name = data_file_path + tokens_vec[0];
|
||||
nets_[net_idx] = tesseract::NeuralNet::FromFile(net_file_name);
|
||||
if (nets_[net_idx] == NULL) {
|
||||
return false;
|
||||
}
|
||||
// parse the input size and validate it
|
||||
net_wgts_[net_idx] = atof(tokens_vec[1].c_str());
|
||||
if (net_wgts_[net_idx] < 0.0) {
|
||||
return false;
|
||||
}
|
||||
total_input_size += nets_[net_idx]->in_cnt();
|
||||
}
|
||||
// validate total input count
|
||||
if (total_input_size != feat_extract_->FeatureCnt()) {
|
||||
return false;
|
||||
}
|
||||
// success
|
||||
return true;
|
||||
}
|
||||
} // tesseract
|
@ -1,90 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: conv_net_classifier.h
|
||||
* Description: Declaration of Convolutional-NeuralNet Character Classifier
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef HYBRID_NEURAL_NET_CLASSIFIER_H
|
||||
#define HYBRID_NEURAL_NET_CLASSIFIER_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "char_samp.h"
|
||||
#include "char_altlist.h"
|
||||
#include "char_set.h"
|
||||
#include "classifier_base.h"
|
||||
#include "feature_base.h"
|
||||
#include "lang_model.h"
|
||||
#include "neural_net.h"
|
||||
#include "tuning_params.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Folding Ratio is the ratio of the max-activation of members of a folding
|
||||
// set that is used to compute the min-activation of the rest of the set
|
||||
// static const float kFoldingRatio = 0.75; // see conv_net_classifier.h
|
||||
|
||||
class HybridNeuralNetCharClassifier : public CharClassifier {
|
||||
public:
|
||||
HybridNeuralNetCharClassifier(CharSet *char_set, TuningParams *params,
|
||||
FeatureBase *feat_extract);
|
||||
virtual ~HybridNeuralNetCharClassifier();
|
||||
// The main training function. Given a sample and a class ID the classifier
|
||||
// updates its parameters according to its learning algorithm. This function
|
||||
// is currently not implemented. TODO(ahmadab): implement end-2-end training
|
||||
virtual bool Train(CharSamp *char_samp, int ClassID);
|
||||
// A secondary function needed for training. Allows the trainer to set the
|
||||
// value of any train-time parameter. This function is currently not
|
||||
// implemented. TODO(ahmadab): implement end-2-end training
|
||||
virtual bool SetLearnParam(char *var_name, float val);
|
||||
// Externally sets the Neural Net used by the classifier. Used for training
|
||||
void SetNet(tesseract::NeuralNet *net);
|
||||
|
||||
// Classifies an input charsamp and return a CharAltList object containing
|
||||
// the possible candidates and corresponding scores
|
||||
virtual CharAltList *Classify(CharSamp *char_samp);
|
||||
// Computes the cost of a specific charsamp being a character (versus a
|
||||
// non-character: part-of-a-character OR more-than-one-character)
|
||||
virtual int CharCost(CharSamp *char_samp);
|
||||
|
||||
private:
|
||||
// Neural Net object used for classification
|
||||
vector<tesseract::NeuralNet *> nets_;
|
||||
vector<float> net_wgts_;
|
||||
|
||||
// data buffers used to hold Neural Net inputs and outputs
|
||||
float *net_input_;
|
||||
float *net_output_;
|
||||
|
||||
// Init the classifier provided a data-path and a language string
|
||||
virtual bool Init(const string &data_file_path, const string &lang,
|
||||
LangModel *lang_mod);
|
||||
// Loads the NeuralNets needed for the classifier
|
||||
bool LoadNets(const string &data_file_path, const string &lang);
|
||||
// Load folding sets
|
||||
// This function returns true on success or if the file can't be read,
|
||||
// returns false if an error is encountered.
|
||||
virtual bool LoadFoldingSets(const string &data_file_path,
|
||||
const string &lang,
|
||||
LangModel *lang_mod);
|
||||
// Folds the output of the NeuralNet using the loaded folding sets
|
||||
virtual void Fold();
|
||||
// Scales the input char_samp and feeds it to the NeuralNet as input
|
||||
bool RunNets(CharSamp *char_samp);
|
||||
};
|
||||
}
|
||||
#endif // HYBRID_NEURAL_NET_CLASSIFIER_H
|
@ -1,73 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: lang_mod_edge.h
|
||||
* Description: Declaration of the Language Model Edge Base Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The LangModEdge abstracts an Edge in the language model trie
|
||||
// This is an abstract class that any Language Model Edge should inherit from
|
||||
// It provides methods for:
|
||||
// 1- Returns the class ID corresponding to the edge
|
||||
// 2- If the edge is a valid EndOfWord (EOW)
|
||||
// 3- If the edge is coming from a OutOfDictionary (OOF) state machine
|
||||
// 4- If the edge is a Terminal (has no children)
|
||||
// 5- A Hash of the edge that will be used to retrieve the edge
|
||||
// quickly from the BeamSearch lattice
|
||||
// 6- If two edges are identcial
|
||||
// 7- Returns a verbal description of the edge (use by debuggers)
|
||||
// 8- the language model cost of the edge (if any)
|
||||
// 9- The string corresponding to this edge
|
||||
// 10- Getting and setting the "Root" status of the edge
|
||||
|
||||
#ifndef LANG_MOD_EDGE_H
|
||||
#define LANG_MOD_EDGE_H
|
||||
|
||||
#include "cube_tuning_params.h"
|
||||
#include "char_set.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class LangModEdge {
|
||||
public:
|
||||
LangModEdge() {}
|
||||
virtual ~LangModEdge() {}
|
||||
|
||||
// The string corresponding to this edge
|
||||
virtual const char_32 * EdgeString() const = 0;
|
||||
// Returns the class ID corresponding to the edge
|
||||
virtual int ClassID() const = 0;
|
||||
// If the edge is the root edge
|
||||
virtual bool IsRoot() const = 0;
|
||||
// Set the Root flag
|
||||
virtual void SetRoot(bool flag) = 0;
|
||||
// If the edge is a valid EndOfWord (EOW)
|
||||
virtual bool IsEOW() const = 0;
|
||||
// is the edge is coming from a OutOfDictionary (OOF) state machine
|
||||
virtual bool IsOOD() const = 0;
|
||||
// Is the edge is a Terminal (has no children)
|
||||
virtual bool IsTerminal() const = 0;
|
||||
// Returns A hash of the edge that will be used to retrieve the edge
|
||||
virtual unsigned int Hash() const = 0;
|
||||
// Are the two edges identcial?
|
||||
virtual bool IsIdentical(LangModEdge *edge) const = 0;
|
||||
// a verbal description of the edge (use by debuggers)
|
||||
virtual char *Description() const = 0;
|
||||
// the language model cost of the edge (if any)
|
||||
virtual int PathCost() const = 0;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // LANG_MOD_EDGE_H
|
@ -1,78 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: lang_model.h
|
||||
* Description: Declaration of the Language Model Edge Base Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The LanguageModel class abstracts a State machine that is modeled as a Trie
|
||||
// structure. The state machine models the language being recognized by the OCR
|
||||
// Engine
|
||||
// This is an abstract class that is to be inherited by any language model
|
||||
|
||||
#ifndef LANG_MODEL_H
|
||||
#define LANG_MODEL_H
|
||||
|
||||
#include "lang_mod_edge.h"
|
||||
#include "char_altlist.h"
|
||||
#include "char_set.h"
|
||||
#include "tuning_params.h"
|
||||
|
||||
namespace tesseract {
|
||||
class LangModel {
|
||||
public:
|
||||
LangModel() {
|
||||
ood_enabled_ = true;
|
||||
numeric_enabled_ = true;
|
||||
word_list_enabled_ = true;
|
||||
punc_enabled_ = true;
|
||||
}
|
||||
virtual ~LangModel() {}
|
||||
|
||||
// Returns an edge pointer to the Root
|
||||
virtual LangModEdge *Root() = 0;
|
||||
// Returns the edges that fan-out of the specified edge and their count
|
||||
virtual LangModEdge **GetEdges(CharAltList *alt_list,
|
||||
LangModEdge *parent_edge,
|
||||
int *edge_cnt) = 0;
|
||||
// Returns is a sequence of 32-bit characters are valid within this language
|
||||
// model or net. And EndOfWord flag is specified. If true, the sequence has
|
||||
// to end on a valid word. The function also optionally returns the list
|
||||
// of language model edges traversed to parse the string
|
||||
virtual bool IsValidSequence(const char_32 *str, bool eow_flag,
|
||||
LangModEdge **edge_array = NULL) = 0;
|
||||
virtual bool IsLeadingPunc(char_32 ch) = 0;
|
||||
virtual bool IsTrailingPunc(char_32 ch) = 0;
|
||||
virtual bool IsDigit(char_32 ch) = 0;
|
||||
|
||||
// accessor functions
|
||||
inline bool OOD() { return ood_enabled_; }
|
||||
inline bool Numeric() { return numeric_enabled_; }
|
||||
inline bool WordList() { return word_list_enabled_; }
|
||||
inline bool Punc() { return punc_enabled_; }
|
||||
inline void SetOOD(bool ood) { ood_enabled_ = ood; }
|
||||
inline void SetNumeric(bool numeric) { numeric_enabled_ = numeric; }
|
||||
inline void SetWordList(bool word_list) { word_list_enabled_ = word_list; }
|
||||
inline void SetPunc(bool punc_enabled) { punc_enabled_ = punc_enabled; }
|
||||
|
||||
protected:
|
||||
bool ood_enabled_;
|
||||
bool numeric_enabled_;
|
||||
bool word_list_enabled_;
|
||||
bool punc_enabled_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // LANG_MODEL_H
|
@ -1,217 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: search_column.cpp
|
||||
* Description: Implementation of the Beam Search Column Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "search_column.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
SearchColumn::SearchColumn(int col_idx, int max_node) {
|
||||
col_idx_ = col_idx;
|
||||
node_cnt_ = 0;
|
||||
node_array_ = NULL;
|
||||
max_node_cnt_ = max_node;
|
||||
node_hash_table_ = NULL;
|
||||
init_ = false;
|
||||
min_cost_ = INT_MAX;
|
||||
max_cost_ = 0;
|
||||
}
|
||||
|
||||
// Cleanup data
|
||||
void SearchColumn::Cleanup() {
|
||||
if (node_array_ != NULL) {
|
||||
for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
|
||||
if (node_array_[node_idx] != NULL) {
|
||||
delete node_array_[node_idx];
|
||||
}
|
||||
}
|
||||
|
||||
delete []node_array_;
|
||||
node_array_ = NULL;
|
||||
}
|
||||
FreeHashTable();
|
||||
init_ = false;
|
||||
}
|
||||
|
||||
SearchColumn::~SearchColumn() {
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
// Initializations
|
||||
bool SearchColumn::Init() {
|
||||
if (init_ == true) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// create hash table
|
||||
if (node_hash_table_ == NULL) {
|
||||
node_hash_table_ = new SearchNodeHashTable();
|
||||
}
|
||||
|
||||
init_ = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Prune the nodes if necessary. Pruning is done such that a max
|
||||
// number of nodes is kept, i.e., the beam width
|
||||
void SearchColumn::Prune() {
|
||||
// no need to prune
|
||||
if (node_cnt_ <= max_node_cnt_) {
|
||||
return;
|
||||
}
|
||||
|
||||
// compute the cost histogram
|
||||
memset(score_bins_, 0, sizeof(score_bins_));
|
||||
int cost_range = max_cost_ - min_cost_ + 1;
|
||||
for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
|
||||
int cost_bin = static_cast<int>(
|
||||
((node_array_[node_idx]->BestCost() - min_cost_) *
|
||||
kScoreBins) / static_cast<double>(cost_range));
|
||||
if (cost_bin >= kScoreBins) {
|
||||
cost_bin = kScoreBins - 1;
|
||||
}
|
||||
score_bins_[cost_bin]++;
|
||||
}
|
||||
|
||||
// determine the pruning cost by scanning the cost histogram from
|
||||
// least to greatest cost bins and finding the cost at which the
|
||||
// max number of nodes is exceeded
|
||||
int pruning_cost = 0;
|
||||
int new_node_cnt = 0;
|
||||
for (int cost_bin = 0; cost_bin < kScoreBins; cost_bin++) {
|
||||
if (new_node_cnt > 0 &&
|
||||
(new_node_cnt + score_bins_[cost_bin]) > max_node_cnt_) {
|
||||
pruning_cost = min_cost_ + ((cost_bin * cost_range) / kScoreBins);
|
||||
break;
|
||||
}
|
||||
new_node_cnt += score_bins_[cost_bin];
|
||||
}
|
||||
|
||||
// prune out all the nodes above this cost
|
||||
for (int node_idx = new_node_cnt = 0; node_idx < node_cnt_; node_idx++) {
|
||||
// prune this node out
|
||||
if (node_array_[node_idx]->BestCost() > pruning_cost ||
|
||||
new_node_cnt > max_node_cnt_) {
|
||||
delete node_array_[node_idx];
|
||||
} else {
|
||||
// keep it
|
||||
node_array_[new_node_cnt++] = node_array_[node_idx];
|
||||
}
|
||||
}
|
||||
node_cnt_ = new_node_cnt;
|
||||
}
|
||||
|
||||
// sort all nodes
|
||||
void SearchColumn::Sort() {
|
||||
if (node_cnt_ > 0 && node_array_ != NULL) {
|
||||
qsort(node_array_, node_cnt_, sizeof(*node_array_),
|
||||
SearchNode::SearchNodeComparer);
|
||||
}
|
||||
}
|
||||
|
||||
// add a new node
|
||||
SearchNode *SearchColumn::AddNode(LangModEdge *edge, int reco_cost,
|
||||
SearchNode *parent_node,
|
||||
CubeRecoContext *cntxt) {
|
||||
// init if necessary
|
||||
if (init_ == false && Init() == false) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// find out if we have an node with the same edge
|
||||
// look in the hash table
|
||||
SearchNode *new_node = node_hash_table_->Lookup(edge, parent_node);
|
||||
// node does not exist
|
||||
if (new_node == NULL) {
|
||||
new_node = new SearchNode(cntxt, parent_node, reco_cost, edge, col_idx_);
|
||||
|
||||
// if the max node count has already been reached, check if the cost of
|
||||
// the new node exceeds the max cost. This indicates that it will be pruned
|
||||
// and so there is no point adding it
|
||||
if (node_cnt_ >= max_node_cnt_ && new_node->BestCost() > max_cost_) {
|
||||
delete new_node;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// expand the node buffer if necc
|
||||
if ((node_cnt_ % kNodeAllocChunk) == 0) {
|
||||
// alloc a new buff
|
||||
SearchNode **new_node_buff =
|
||||
new SearchNode *[node_cnt_ + kNodeAllocChunk];
|
||||
|
||||
// free existing after copying contents
|
||||
if (node_array_ != NULL) {
|
||||
memcpy(new_node_buff, node_array_, node_cnt_ * sizeof(*new_node_buff));
|
||||
delete []node_array_;
|
||||
}
|
||||
|
||||
node_array_ = new_node_buff;
|
||||
}
|
||||
|
||||
// add the node to the hash table only if it is non-OOD edge
|
||||
// because the langmod state is not unique
|
||||
if (edge->IsOOD() == false) {
|
||||
if (!node_hash_table_->Insert(edge, new_node)) {
|
||||
tprintf("Hash table full!!!");
|
||||
delete new_node;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
node_array_[node_cnt_++] = new_node;
|
||||
|
||||
} else {
|
||||
// node exists before
|
||||
// if no update occurred, return NULL
|
||||
if (new_node->UpdateParent(parent_node, reco_cost, edge) == false) {
|
||||
new_node = NULL;
|
||||
}
|
||||
|
||||
// free the edge
|
||||
delete edge;
|
||||
}
|
||||
|
||||
// update Min and Max Costs
|
||||
if (new_node != NULL) {
|
||||
if (min_cost_ > new_node->BestCost()) {
|
||||
min_cost_ = new_node->BestCost();
|
||||
}
|
||||
|
||||
if (max_cost_ < new_node->BestCost()) {
|
||||
max_cost_ = new_node->BestCost();
|
||||
}
|
||||
}
|
||||
|
||||
return new_node;
|
||||
}
|
||||
|
||||
SearchNode *SearchColumn::BestNode() {
|
||||
SearchNode *best_node = NULL;
|
||||
|
||||
for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
|
||||
if (best_node == NULL ||
|
||||
best_node->BestCost() > node_array_[node_idx]->BestCost()) {
|
||||
best_node = node_array_[node_idx];
|
||||
}
|
||||
}
|
||||
|
||||
return best_node;
|
||||
}
|
||||
} // namespace tesseract
|
@ -1,84 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: search_column.h
|
||||
* Description: Declaration of the Beam Search Column Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The SearchColumn class abstracts a column in the lattice that is created
|
||||
// by the BeamSearch during the recognition process
|
||||
// The class holds the lattice nodes. New nodes are added by calls to AddNode
|
||||
// made from the BeamSearch
|
||||
// The class maintains a hash table of the nodes to be able to lookup nodes
|
||||
// quickly using their lang_mod_edge. This is needed to merge similar paths
|
||||
// in the lattice
|
||||
|
||||
#ifndef SEARCH_COLUMN_H
|
||||
#define SEARCH_COLUMN_H
|
||||
|
||||
#include "search_node.h"
|
||||
#include "lang_mod_edge.h"
|
||||
#include "cube_reco_context.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class SearchColumn {
|
||||
public:
|
||||
SearchColumn(int col_idx, int max_node_cnt);
|
||||
~SearchColumn();
|
||||
// Accessor functions
|
||||
inline int ColIdx() const { return col_idx_; }
|
||||
inline int NodeCount() const { return node_cnt_; }
|
||||
inline SearchNode **Nodes() const { return node_array_; }
|
||||
|
||||
// Prune the nodes if necessary. Pruning is done such that a max
|
||||
// number of nodes is kept, i.e., the beam width
|
||||
void Prune();
|
||||
SearchNode *AddNode(LangModEdge *edge, int score,
|
||||
SearchNode *parent, CubeRecoContext *cntxt);
|
||||
// Returns the node with the least cost
|
||||
SearchNode *BestNode();
|
||||
// Sort the lattice nodes. Needed for visualization
|
||||
void Sort();
|
||||
// Free up the Hash Table. Added to be called by the Beam Search after
|
||||
// a column is pruned to reduce memory foot print
|
||||
void FreeHashTable() {
|
||||
if (node_hash_table_ != NULL) {
|
||||
delete node_hash_table_;
|
||||
node_hash_table_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static const int kNodeAllocChunk = 1024;
|
||||
static const int kScoreBins = 1024;
|
||||
bool init_;
|
||||
int min_cost_;
|
||||
int max_cost_;
|
||||
int max_node_cnt_;
|
||||
int node_cnt_;
|
||||
int col_idx_;
|
||||
int score_bins_[kScoreBins];
|
||||
SearchNode **node_array_;
|
||||
SearchNodeHashTable *node_hash_table_;
|
||||
|
||||
// Free node array and hash table
|
||||
void Cleanup();
|
||||
// Create hash table
|
||||
bool Init();
|
||||
};
|
||||
}
|
||||
|
||||
#endif // SEARCH_COLUMN_H
|
@ -1,229 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: search_node.cpp
|
||||
* Description: Implementation of the Beam Search Node Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "search_node.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// The constructor updates the best paths and costs:
|
||||
// mean_char_reco_cost_ (returned by BestRecoCost()) is the mean
|
||||
// char_reco cost of the best_path, including this node.
|
||||
// best_path_reco_cost is the total char_reco_cost of the best_path,
|
||||
// but excludes the char_reco_cost of this node.
|
||||
// best_cost is the mean mixed cost, i.e., mean_char_reco_cost_ +
|
||||
// current language model cost, all weighted by the cube context's
|
||||
// RecoWgt parameter
|
||||
SearchNode::SearchNode(CubeRecoContext *cntxt, SearchNode *parent_node,
|
||||
int char_reco_cost, LangModEdge *edge, int col_idx) {
|
||||
// copy data members
|
||||
cntxt_ = cntxt;
|
||||
lang_mod_edge_ = edge;
|
||||
col_idx_ = col_idx;
|
||||
parent_node_ = parent_node;
|
||||
char_reco_cost_ = char_reco_cost;
|
||||
|
||||
// the string of this node is the same as that of the language model edge
|
||||
str_ = (edge == NULL ? NULL : edge->EdgeString());
|
||||
|
||||
// compute best path total reco cost
|
||||
best_path_reco_cost_ = (parent_node_ == NULL) ? 0 :
|
||||
parent_node_->CharRecoCost() + parent_node_->BestPathRecoCost();
|
||||
|
||||
// update best path length
|
||||
best_path_len_ = (parent_node_ == NULL) ?
|
||||
1 : parent_node_->BestPathLength() + 1;
|
||||
if (edge != NULL && edge->IsRoot() && parent_node_ != NULL) {
|
||||
best_path_len_++;
|
||||
}
|
||||
|
||||
// compute best reco cost mean cost
|
||||
mean_char_reco_cost_ = static_cast<int>(
|
||||
(best_path_reco_cost_ + char_reco_cost_) /
|
||||
static_cast<double>(best_path_len_));
|
||||
|
||||
// get language model cost
|
||||
int lm_cost = LangModCost(lang_mod_edge_, parent_node_);
|
||||
|
||||
// compute aggregate best cost
|
||||
best_cost_ = static_cast<int>(cntxt_->Params()->RecoWgt() *
|
||||
(best_path_reco_cost_ + char_reco_cost_) /
|
||||
static_cast<double>(best_path_len_)
|
||||
) + lm_cost;
|
||||
}
|
||||
|
||||
SearchNode::~SearchNode() {
|
||||
if (lang_mod_edge_ != NULL) {
|
||||
delete lang_mod_edge_;
|
||||
}
|
||||
}
|
||||
|
||||
// update the parent_node node if provides a better (less) cost
|
||||
bool SearchNode::UpdateParent(SearchNode *new_parent, int new_reco_cost,
|
||||
LangModEdge *new_edge) {
|
||||
if (lang_mod_edge_ == NULL) {
|
||||
if (new_edge != NULL) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// to update the parent_node, we have to have the same target
|
||||
// state and char
|
||||
if (new_edge == NULL || !lang_mod_edge_->IsIdentical(new_edge) ||
|
||||
!SearchNode::IdenticalPath(parent_node_, new_parent)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// compute the path cost and combined cost of the new path
|
||||
int new_best_path_reco_cost;
|
||||
int new_cost;
|
||||
int new_best_path_len;
|
||||
|
||||
new_best_path_reco_cost = (new_parent == NULL) ?
|
||||
0 : new_parent->BestPathRecoCost() + new_parent->CharRecoCost();
|
||||
|
||||
new_best_path_len =
|
||||
(new_parent == NULL) ? 1 : new_parent->BestPathLength() + 1;
|
||||
|
||||
// compute the new language model cost
|
||||
int new_lm_cost = LangModCost(new_edge, new_parent);
|
||||
|
||||
new_cost = static_cast<int>(cntxt_->Params()->RecoWgt() *
|
||||
(new_best_path_reco_cost + new_reco_cost) /
|
||||
static_cast<double>(new_best_path_len)
|
||||
) + new_lm_cost;
|
||||
|
||||
// update if it is better (less) than the current one
|
||||
if (best_cost_ > new_cost) {
|
||||
parent_node_ = new_parent;
|
||||
char_reco_cost_ = new_reco_cost;
|
||||
best_path_reco_cost_ = new_best_path_reco_cost;
|
||||
best_path_len_ = new_best_path_len;
|
||||
mean_char_reco_cost_ = static_cast<int>(
|
||||
(best_path_reco_cost_ + char_reco_cost_) /
|
||||
static_cast<double>(best_path_len_));
|
||||
best_cost_ = static_cast<int>(cntxt_->Params()->RecoWgt() *
|
||||
(best_path_reco_cost_ + char_reco_cost_) /
|
||||
static_cast<double>(best_path_len_)
|
||||
) + new_lm_cost;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
char_32 *SearchNode::PathString() {
|
||||
SearchNode *node = this;
|
||||
|
||||
// compute string length
|
||||
int len = 0;
|
||||
|
||||
while (node != NULL) {
|
||||
if (node->str_ != NULL) {
|
||||
len += CubeUtils::StrLen(node->str_);
|
||||
}
|
||||
|
||||
// if the edge is a root and does not have a NULL parent, account for space
|
||||
LangModEdge *lm_edge = node->LangModelEdge();
|
||||
if (lm_edge != NULL && lm_edge->IsRoot() && node->ParentNode() != NULL) {
|
||||
len++;
|
||||
}
|
||||
|
||||
node = node->parent_node_;
|
||||
}
|
||||
|
||||
char_32 *char_ptr = new char_32[len + 1];
|
||||
|
||||
int ch_idx = len;
|
||||
|
||||
node = this;
|
||||
char_ptr[ch_idx--] = 0;
|
||||
|
||||
while (node != NULL) {
|
||||
int str_len = ((node->str_ == NULL) ? 0 : CubeUtils::StrLen(node->str_));
|
||||
while (str_len > 0) {
|
||||
char_ptr[ch_idx--] = node->str_[--str_len];
|
||||
}
|
||||
|
||||
// if the edge is a root and does not have a NULL parent, insert a space
|
||||
LangModEdge *lm_edge = node->LangModelEdge();
|
||||
if (lm_edge != NULL && lm_edge->IsRoot() && node->ParentNode() != NULL) {
|
||||
char_ptr[ch_idx--] = (char_32)' ';
|
||||
}
|
||||
|
||||
node = node->parent_node_;
|
||||
}
|
||||
|
||||
return char_ptr;
|
||||
}
|
||||
|
||||
// compares the path of two nodes and checks if its identical
|
||||
bool SearchNode::IdenticalPath(SearchNode *node1, SearchNode *node2) {
|
||||
if (node1 != NULL && node2 != NULL &&
|
||||
node1->best_path_len_ != node2->best_path_len_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// backtrack until either a root or a NULL edge is reached
|
||||
while (node1 != NULL && node2 != NULL) {
|
||||
if (node1->str_ != node2->str_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// stop if either nodes is a root
|
||||
if (node1->LangModelEdge()->IsRoot() || node2->LangModelEdge()->IsRoot()) {
|
||||
break;
|
||||
}
|
||||
|
||||
node1 = node1->parent_node_;
|
||||
node2 = node2->parent_node_;
|
||||
}
|
||||
|
||||
return ((node1 == NULL && node2 == NULL) ||
|
||||
(node1 != NULL && node1->LangModelEdge()->IsRoot() &&
|
||||
node2 != NULL && node2->LangModelEdge()->IsRoot()));
|
||||
}
|
||||
|
||||
// Computes the language model cost of a path
|
||||
int SearchNode::LangModCost(LangModEdge *current_lm_edge,
|
||||
SearchNode *parent_node) {
|
||||
int lm_cost = 0;
|
||||
int node_cnt = 0;
|
||||
|
||||
do {
|
||||
// check if root
|
||||
bool is_root = ((current_lm_edge != NULL && current_lm_edge->IsRoot()) ||
|
||||
parent_node == NULL);
|
||||
if (is_root) {
|
||||
node_cnt++;
|
||||
lm_cost += (current_lm_edge == NULL ? 0 : current_lm_edge->PathCost());
|
||||
}
|
||||
|
||||
// continue until we hit a null parent
|
||||
if (parent_node == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
// get the previous language model edge
|
||||
current_lm_edge = parent_node->LangModelEdge();
|
||||
// back track
|
||||
parent_node = parent_node->ParentNode();
|
||||
} while (true);
|
||||
|
||||
return static_cast<int>(lm_cost / static_cast<double>(node_cnt));
|
||||
}
|
||||
} // namespace tesseract
|
@ -1,168 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: search_node.h
|
||||
* Description: Declaration of the Beam Search Node Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The SearchNode class abstracts the search lattice node in the lattice
|
||||
// generated by the BeamSearch class
|
||||
// The SearchNode class holds the lang_mod_edge associated with the lattice
|
||||
// node. It also holds a pointer to the parent SearchNode in the search path
|
||||
// In addition it holds the recognition and the language model costs of the
|
||||
// node and the path leading to this node
|
||||
|
||||
#ifndef SEARCH_NODE_H
|
||||
#define SEARCH_NODE_H
|
||||
|
||||
#include "lang_mod_edge.h"
|
||||
#include "cube_reco_context.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class SearchNode {
|
||||
public:
|
||||
SearchNode(CubeRecoContext *cntxt, SearchNode *parent_node,
|
||||
int char_reco_cost, LangModEdge *edge, int col_idx);
|
||||
|
||||
~SearchNode();
|
||||
|
||||
// Updates the parent of the current node if the specified path yields
|
||||
// a better path cost
|
||||
bool UpdateParent(SearchNode *new_parent, int new_reco_cost,
|
||||
LangModEdge *new_edge);
|
||||
// returns the 32-bit string corresponding to the path leading to this node
|
||||
char_32 *PathString();
|
||||
// True if the two input nodes correspond to the same path
|
||||
static bool IdenticalPath(SearchNode *node1, SearchNode *node2);
|
||||
|
||||
inline const char_32 *NodeString() { return str_; }
|
||||
inline void SetString(char_32 *str) { str_ = str; }
|
||||
|
||||
// This node's character recognition cost.
|
||||
inline int CharRecoCost() { return char_reco_cost_; }
|
||||
// Total character recognition cost of the nodes in the best path,
|
||||
// excluding this node.
|
||||
inline int BestPathRecoCost() { return best_path_reco_cost_; }
|
||||
// Number of nodes in best path.
|
||||
inline int BestPathLength() { return best_path_len_; }
|
||||
// Mean mixed cost, i.e., mean character recognition cost +
|
||||
// current language model cost, all weighted by the RecoWgt parameter
|
||||
inline int BestCost() { return best_cost_; }
|
||||
// Mean character recognition cost of the nodes on the best path,
|
||||
// including this node.
|
||||
inline int BestRecoCost() { return mean_char_reco_cost_ ; }
|
||||
|
||||
inline int ColIdx() { return col_idx_; }
|
||||
inline SearchNode *ParentNode() { return parent_node_; }
|
||||
inline LangModEdge *LangModelEdge() { return lang_mod_edge_;}
|
||||
inline int LangModCost() { return LangModCost(lang_mod_edge_, parent_node_); }
|
||||
|
||||
// A comparer function that allows the SearchColumn class to sort the
|
||||
// nodes based on the path cost
|
||||
inline static int SearchNodeComparer(const void *node1, const void *node2) {
|
||||
return (*(reinterpret_cast<SearchNode * const *>(node1)))->best_cost_ -
|
||||
(*(reinterpret_cast<SearchNode * const *>(node2)))->best_cost_;
|
||||
}
|
||||
|
||||
private:
|
||||
CubeRecoContext *cntxt_;
|
||||
// Character code
|
||||
const char_32 *str_;
|
||||
// Recognition cost of most recent character
|
||||
int char_reco_cost_;
|
||||
// Mean mixed cost, i.e., mean character recognition cost +
|
||||
// current language model cost, all weighted by the RecoWgt parameter
|
||||
int best_cost_;
|
||||
// Mean character recognition cost of the nodes on the best path,
|
||||
// including this node.
|
||||
int mean_char_reco_cost_ ;
|
||||
// Total character recognition cost of the nodes in the best path,
|
||||
// excluding this node.
|
||||
int best_path_reco_cost_;
|
||||
// Number of nodes in best path.
|
||||
int best_path_len_;
|
||||
// Column index
|
||||
int col_idx_;
|
||||
// Parent Node
|
||||
SearchNode *parent_node_;
|
||||
// Language model edge
|
||||
LangModEdge *lang_mod_edge_;
|
||||
static int LangModCost(LangModEdge *lang_mod_edge, SearchNode *parent_node);
|
||||
};
|
||||
|
||||
// Implments a SearchNode hash table used to detect if a Search Node exists
|
||||
// or not. This is needed to make sure that identical paths in the BeamSearch
|
||||
// converge
|
||||
class SearchNodeHashTable {
|
||||
public:
|
||||
SearchNodeHashTable() {
|
||||
memset(bin_size_array_, 0, sizeof(bin_size_array_));
|
||||
}
|
||||
|
||||
~SearchNodeHashTable() {
|
||||
}
|
||||
|
||||
// inserts an entry in the hash table
|
||||
inline bool Insert(LangModEdge *lang_mod_edge, SearchNode *srch_node) {
|
||||
// compute hash based on the edge and its parent node edge
|
||||
unsigned int edge_hash = lang_mod_edge->Hash();
|
||||
unsigned int parent_hash = (srch_node->ParentNode() == NULL ?
|
||||
0 : srch_node->ParentNode()->LangModelEdge()->Hash());
|
||||
unsigned int hash_bin = (edge_hash + parent_hash) % kSearchNodeHashBins;
|
||||
|
||||
// already maxed out, just fail
|
||||
if (bin_size_array_[hash_bin] >= kMaxSearchNodePerBin) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bin_array_[hash_bin][bin_size_array_[hash_bin]++] = srch_node;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Looks up an entry in the hash table
|
||||
inline SearchNode *Lookup(LangModEdge *lang_mod_edge,
|
||||
SearchNode *parent_node) {
|
||||
// compute hash based on the edge and its parent node edge
|
||||
unsigned int edge_hash = lang_mod_edge->Hash();
|
||||
unsigned int parent_hash = (parent_node == NULL ?
|
||||
0 : parent_node->LangModelEdge()->Hash());
|
||||
unsigned int hash_bin = (edge_hash + parent_hash) % kSearchNodeHashBins;
|
||||
|
||||
// lookup the entries in the hash bin
|
||||
for (int node_idx = 0; node_idx < bin_size_array_[hash_bin]; node_idx++) {
|
||||
if (lang_mod_edge->IsIdentical(
|
||||
bin_array_[hash_bin][node_idx]->LangModelEdge()) == true &&
|
||||
SearchNode::IdenticalPath(
|
||||
bin_array_[hash_bin][node_idx]->ParentNode(), parent_node) == true) {
|
||||
return bin_array_[hash_bin][node_idx];
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
// Hash bin size parameters. These were determined emperically. These affect
|
||||
// the speed of the beam search but have no impact on accuracy
|
||||
static const int kSearchNodeHashBins = 4096;
|
||||
static const int kMaxSearchNodePerBin = 512;
|
||||
int bin_size_array_[kSearchNodeHashBins];
|
||||
SearchNode *bin_array_[kSearchNodeHashBins][kMaxSearchNodePerBin];
|
||||
};
|
||||
}
|
||||
|
||||
#endif // SEARCH_NODE_H
|
@ -1,55 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: search_object.h
|
||||
* Description: Declaration of the Beam Search Object Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The SearchObject class represents a char_samp (a word bitmap) that is
|
||||
// being searched for characters (or recognizeable entities).
|
||||
// This is an abstract class that all SearchObjects should inherit from
|
||||
// A SearchObject class provides methods to:
|
||||
// 1- Returns the count of segments
|
||||
// 2- Recognize a segment range
|
||||
// 3- Creates a CharSamp for a segment range
|
||||
|
||||
#ifndef SEARCH_OBJECT_H
|
||||
#define SEARCH_OBJECT_H
|
||||
|
||||
#include "char_altlist.h"
|
||||
#include "char_samp.h"
|
||||
#include "cube_reco_context.h"
|
||||
|
||||
namespace tesseract {
|
||||
class SearchObject {
|
||||
public:
|
||||
explicit SearchObject(CubeRecoContext *cntxt) { cntxt_ = cntxt; }
|
||||
virtual ~SearchObject() {}
|
||||
|
||||
virtual int SegPtCnt() = 0;
|
||||
virtual CharAltList *RecognizeSegment(int start_pt, int end_pt) = 0;
|
||||
virtual CharSamp *CharSample(int start_pt, int end_pt) = 0;
|
||||
virtual Box* CharBox(int start_pt, int end_pt) = 0;
|
||||
|
||||
virtual int SpaceCost(int seg_pt) = 0;
|
||||
virtual int NoSpaceCost(int seg_pt) = 0;
|
||||
virtual int NoSpaceCost(int start_pt, int end_pt) = 0;
|
||||
|
||||
protected:
|
||||
CubeRecoContext *cntxt_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // SEARCH_OBJECT_H
|
@ -1,44 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: string_32.h
|
||||
* Description: Declaration of a 32 Bit string class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2007
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// the string_32 class provides the functionality needed
|
||||
// for a 32-bit string class
|
||||
|
||||
#ifndef STRING_32_H
|
||||
#define STRING_32_H
|
||||
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#ifdef USE_STD_NAMESPACE
|
||||
using std::basic_string;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
#endif
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// basic definitions
|
||||
typedef signed int char_32;
|
||||
typedef basic_string<char_32> string_32;
|
||||
}
|
||||
|
||||
#endif // STRING_32_H
|
@ -1,120 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: tess_lang_mod_edge.cpp
|
||||
* Description: Implementation of the Tesseract Language Model Edge Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "tess_lang_mod_edge.h"
|
||||
#include "const.h"
|
||||
#include "unichar.h"
|
||||
|
||||
|
||||
|
||||
namespace tesseract {
|
||||
// OOD constructor
|
||||
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, int class_id) {
|
||||
root_ = false;
|
||||
cntxt_ = cntxt;
|
||||
dawg_ = NULL;
|
||||
start_edge_ = 0;
|
||||
end_edge_ = 0;
|
||||
edge_mask_ = 0;
|
||||
class_id_ = class_id;
|
||||
str_ = cntxt_->CharacterSet()->ClassString(class_id);
|
||||
path_cost_ = Cost();
|
||||
}
|
||||
|
||||
/**
|
||||
* leading, trailing punc constructor and single byte UTF char
|
||||
*/
|
||||
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt,
|
||||
const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
|
||||
root_ = false;
|
||||
cntxt_ = cntxt;
|
||||
dawg_ = dawg;
|
||||
start_edge_ = edge_idx;
|
||||
end_edge_ = edge_idx;
|
||||
edge_mask_ = 0;
|
||||
class_id_ = class_id;
|
||||
str_ = cntxt_->CharacterSet()->ClassString(class_id);
|
||||
path_cost_ = Cost();
|
||||
}
|
||||
|
||||
/**
|
||||
* dict constructor: multi byte UTF char
|
||||
*/
|
||||
TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg,
|
||||
EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
|
||||
int class_id) {
|
||||
root_ = false;
|
||||
cntxt_ = cntxt;
|
||||
dawg_ = dawg;
|
||||
start_edge_ = start_edge_idx;
|
||||
end_edge_ = end_edge_idx;
|
||||
edge_mask_ = 0;
|
||||
class_id_ = class_id;
|
||||
str_ = cntxt_->CharacterSet()->ClassString(class_id);
|
||||
path_cost_ = Cost();
|
||||
}
|
||||
|
||||
char *TessLangModEdge::Description() const {
|
||||
char *char_ptr = new char[256];
|
||||
|
||||
char dawg_str[256];
|
||||
char edge_str[32];
|
||||
if (dawg_ == (Dawg *)DAWG_OOD) {
|
||||
strcpy(dawg_str, "OOD");
|
||||
} else if (dawg_ == (Dawg *)DAWG_NUMBER) {
|
||||
strcpy(dawg_str, "NUM");
|
||||
} else if (dawg_->permuter() == SYSTEM_DAWG_PERM) {
|
||||
strcpy(dawg_str, "Main");
|
||||
} else if (dawg_->permuter() == USER_DAWG_PERM) {
|
||||
strcpy(dawg_str, "User");
|
||||
} else if (dawg_->permuter() == DOC_DAWG_PERM) {
|
||||
strcpy(dawg_str, "Doc");
|
||||
} else {
|
||||
strcpy(dawg_str, "N/A");
|
||||
}
|
||||
|
||||
sprintf(edge_str, "%d", static_cast<int>(start_edge_));
|
||||
if (IsLeadingPuncEdge(edge_mask_)) {
|
||||
strcat(edge_str, "-LP");
|
||||
}
|
||||
if (IsTrailingPuncEdge(edge_mask_)) {
|
||||
strcat(edge_str, "-TP");
|
||||
}
|
||||
sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d",
|
||||
dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_);
|
||||
|
||||
return char_ptr;
|
||||
}
|
||||
|
||||
int TessLangModEdge::CreateChildren(CubeRecoContext *cntxt,
|
||||
const Dawg *dawg,
|
||||
NODE_REF parent_node,
|
||||
LangModEdge **edge_array) {
|
||||
int edge_cnt = 0;
|
||||
NodeChildVector vec;
|
||||
dawg->unichar_ids_of(parent_node, &vec, false); // find all children
|
||||
for (int i = 0; i < vec.size(); ++i) {
|
||||
const NodeChild &child = vec[i];
|
||||
if (child.unichar_id == INVALID_UNICHAR_ID) continue;
|
||||
edge_array[edge_cnt++] =
|
||||
new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id);
|
||||
}
|
||||
return edge_cnt;
|
||||
}
|
||||
}
|
@ -1,233 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: tess_lang_mod_edge.h
|
||||
* Description: Declaration of the Tesseract Language Model Edge Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The TessLangModEdge models an edge in the Tesseract language models
|
||||
// It inherits from the LangModEdge class
|
||||
|
||||
#ifndef TESS_LANG_MOD_EDGE_H
|
||||
#define TESS_LANG_MOD_EDGE_H
|
||||
|
||||
#include "dawg.h"
|
||||
#include "char_set.h"
|
||||
|
||||
#include "lang_mod_edge.h"
|
||||
#include "cube_reco_context.h"
|
||||
#include "cube_utils.h"
|
||||
|
||||
// Macros needed to identify punctuation in the langmodel state
|
||||
#ifdef _HMSW32_H
|
||||
#define LEAD_PUNC_EDGE_REF_MASK (inT64) 0x0000000100000000i64
|
||||
#define TRAIL_PUNC_EDGE_REF_MASK (inT64) 0x0000000200000000i64
|
||||
#define TRAIL_PUNC_REPEAT_MASK (inT64) 0xffff000000000000i64
|
||||
#else
|
||||
#define LEAD_PUNC_EDGE_REF_MASK (inT64) 0x0000000100000000ll
|
||||
#define TRAIL_PUNC_EDGE_REF_MASK (inT64) 0x0000000200000000ll
|
||||
#define TRAIL_PUNC_REPEAT_MASK (inT64) 0xffff000000000000ll
|
||||
#endif
|
||||
|
||||
// Number state machine macros
|
||||
#define NUMBER_STATE_SHIFT 0
|
||||
#define NUMBER_STATE_MASK 0x0000000fl
|
||||
#define NUMBER_LITERAL_SHIFT 4
|
||||
#define NUMBER_LITERAL_MASK 0x000000f0l
|
||||
#define NUMBER_REPEAT_SHIFT 8
|
||||
#define NUMBER_REPEAT_MASK 0x00000f00l
|
||||
#define NUM_TRM -99
|
||||
#define TRAIL_PUNC_REPEAT_SHIFT 48
|
||||
|
||||
#define IsLeadingPuncEdge(edge_mask) \
|
||||
((edge_mask & LEAD_PUNC_EDGE_REF_MASK) != 0)
|
||||
#define IsTrailingPuncEdge(edge_mask) \
|
||||
((edge_mask & TRAIL_PUNC_EDGE_REF_MASK) != 0)
|
||||
#define TrailingPuncCount(edge_mask) \
|
||||
((edge_mask & TRAIL_PUNC_REPEAT_MASK) >> TRAIL_PUNC_REPEAT_SHIFT)
|
||||
#define TrailingPuncEdgeMask(Cnt) \
|
||||
(TRAIL_PUNC_EDGE_REF_MASK | ((Cnt) << TRAIL_PUNC_REPEAT_SHIFT))
|
||||
|
||||
// State machine IDs
|
||||
#define DAWG_OOD 0
|
||||
#define DAWG_NUMBER 1
|
||||
|
||||
namespace tesseract {
|
||||
class TessLangModEdge : public LangModEdge {
|
||||
public:
|
||||
// Different ways of constructing a TessLangModEdge
|
||||
TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array,
|
||||
EDGE_REF edge, int class_id);
|
||||
TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array,
|
||||
EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
|
||||
int class_id);
|
||||
TessLangModEdge(CubeRecoContext *cntxt, int class_id);
|
||||
~TessLangModEdge() {}
|
||||
|
||||
// Accessors
|
||||
inline bool IsRoot() const {
|
||||
return root_;
|
||||
}
|
||||
inline void SetRoot(bool flag) { root_ = flag; }
|
||||
|
||||
inline bool IsOOD() const {
|
||||
return (dawg_ == (Dawg *)DAWG_OOD);
|
||||
}
|
||||
|
||||
inline bool IsNumber() const {
|
||||
return (dawg_ == (Dawg *)DAWG_NUMBER);
|
||||
}
|
||||
|
||||
inline bool IsEOW() const {
|
||||
return (IsTerminal() || (dawg_->end_of_word(end_edge_) != 0));
|
||||
}
|
||||
|
||||
inline const Dawg *GetDawg() const { return dawg_; }
|
||||
inline EDGE_REF StartEdge() const { return start_edge_; }
|
||||
inline EDGE_REF EndEdge() const { return end_edge_; }
|
||||
inline EDGE_REF EdgeMask() const { return edge_mask_; }
|
||||
inline const char_32 * EdgeString() const { return str_; }
|
||||
inline int ClassID () const { return class_id_; }
|
||||
inline int PathCost() const { return path_cost_; }
|
||||
inline void SetEdgeMask(EDGE_REF edge_mask) { edge_mask_ = edge_mask; }
|
||||
inline void SetDawg(Dawg *dawg) { dawg_ = dawg; }
|
||||
inline void SetStartEdge(EDGE_REF edge_idx) { start_edge_ = edge_idx; }
|
||||
inline void SetEndEdge(EDGE_REF edge_idx) { end_edge_ = edge_idx; }
|
||||
|
||||
// is this a terminal node:
|
||||
// we can terminate at any OOD char, trailing punc or
|
||||
// when the dawg terminates
|
||||
inline bool IsTerminal() const {
|
||||
return (IsOOD() || IsNumber() || IsTrailingPuncEdge(start_edge_) ||
|
||||
dawg_->next_node(end_edge_) == 0);
|
||||
}
|
||||
|
||||
// How many signals does the LM provide for tuning. These are flags like:
|
||||
// OOD or not, Number of not that are used by the training to compute
|
||||
// extra costs for each word.
|
||||
inline int SignalCnt() const {
|
||||
return 2;
|
||||
}
|
||||
|
||||
// returns the weight assigned to a specified signal
|
||||
inline double SignalWgt(int signal) const {
|
||||
CubeTuningParams *params =
|
||||
reinterpret_cast<CubeTuningParams *>(cntxt_->Params());
|
||||
if (params != NULL) {
|
||||
switch (signal) {
|
||||
case 0:
|
||||
return params->OODWgt();
|
||||
break;
|
||||
|
||||
case 1:
|
||||
return params->NumWgt();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// sets the weight assigned to a specified signal: Used in training
|
||||
void SetSignalWgt(int signal, double wgt) {
|
||||
CubeTuningParams *params =
|
||||
reinterpret_cast<CubeTuningParams *>(cntxt_->Params());
|
||||
if (params != NULL) {
|
||||
switch (signal) {
|
||||
case 0:
|
||||
params->SetOODWgt(wgt);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
params->SetNumWgt(wgt);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// returns the actual value of a specified signal
|
||||
int Signal(int signal) {
|
||||
switch (signal) {
|
||||
case 0:
|
||||
return IsOOD() ? MIN_PROB_COST : 0;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
return IsNumber() ? MIN_PROB_COST : 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// returns the Hash value of the edge. Used by the SearchNode hash table
|
||||
// to quickly lookup exisiting edges to converge during search
|
||||
inline unsigned int Hash() const {
|
||||
return static_cast<unsigned int>(
|
||||
((start_edge_ | end_edge_) ^ ((reinterpret_cast<uintptr_t>(dawg_)))) ^
|
||||
((unsigned int)edge_mask_) ^ class_id_);
|
||||
}
|
||||
|
||||
// A verbal description of the edge: Used by visualizers
|
||||
char *Description() const;
|
||||
|
||||
// Is this edge identical to the specified edge
|
||||
inline bool IsIdentical(LangModEdge *lang_mod_edge) const {
|
||||
return (class_id_ ==
|
||||
reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->class_id_ &&
|
||||
str_ == reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->str_ &&
|
||||
dawg_ == reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->dawg_ &&
|
||||
start_edge_ ==
|
||||
reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->start_edge_ &&
|
||||
end_edge_ ==
|
||||
reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->end_edge_ &&
|
||||
edge_mask_ ==
|
||||
reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->edge_mask_);
|
||||
}
|
||||
|
||||
// Creates a set of fan-out edges for the specified edge
|
||||
static int CreateChildren(CubeRecoContext *cntxt,
|
||||
const Dawg *edges,
|
||||
NODE_REF edge_reg,
|
||||
LangModEdge **lm_edges);
|
||||
|
||||
private:
|
||||
bool root_;
|
||||
CubeRecoContext *cntxt_;
|
||||
const Dawg *dawg_;
|
||||
EDGE_REF start_edge_;
|
||||
EDGE_REF end_edge_;
|
||||
EDGE_REF edge_mask_;
|
||||
int path_cost_;
|
||||
int class_id_;
|
||||
const char_32 * str_;
|
||||
// returns the cost of the lang_mod_edge
|
||||
inline int Cost() const {
|
||||
if (cntxt_ != NULL) {
|
||||
CubeTuningParams *params =
|
||||
reinterpret_cast<CubeTuningParams *>(cntxt_->Params());
|
||||
if (dawg_ == (Dawg *)DAWG_OOD) {
|
||||
return static_cast<int>(params->OODWgt() * MIN_PROB_COST);
|
||||
} else if (dawg_ == (Dawg *)DAWG_NUMBER) {
|
||||
return static_cast<int>(params->NumWgt() * MIN_PROB_COST);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESS_LANG_MOD_EDGE_H
|
@ -1,506 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: tess_lang_model.cpp
|
||||
* Description: Implementation of the Tesseract Language Model Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The TessLangModel class abstracts the Tesseract language model. It inherits
|
||||
// from the LangModel class. The Tesseract language model encompasses several
|
||||
// Dawgs (words from training data, punctuation, numbers, document words).
|
||||
// On top of this Cube adds an OOD state machine
|
||||
// The class provides methods to traverse the language model in a generative
|
||||
// fashion. Given any node in the DAWG, the language model can generate a list
|
||||
// of children (or fan-out) edges
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "char_samp.h"
|
||||
#include "cube_utils.h"
|
||||
#include "dict.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tess_lang_model.h"
|
||||
#include "tessdatamanager.h"
|
||||
#include "unicharset.h"
|
||||
|
||||
namespace tesseract {
|
||||
// max fan-out (used for preallocation). Initialized here, but modified by
|
||||
// constructor
|
||||
int TessLangModel::max_edge_ = 4096;
|
||||
|
||||
// Language model extra State machines
|
||||
const Dawg *TessLangModel::ood_dawg_ = reinterpret_cast<Dawg *>(DAWG_OOD);
|
||||
const Dawg *TessLangModel::number_dawg_ = reinterpret_cast<Dawg *>(DAWG_NUMBER);
|
||||
|
||||
// number state machine
|
||||
const int TessLangModel::num_state_machine_[kStateCnt][kNumLiteralCnt] = {
|
||||
{0, 1, 1, NUM_TRM, NUM_TRM},
|
||||
{NUM_TRM, 1, 1, 3, 2},
|
||||
{NUM_TRM, NUM_TRM, 1, NUM_TRM, 2},
|
||||
{NUM_TRM, NUM_TRM, 3, NUM_TRM, 2},
|
||||
};
|
||||
const int TessLangModel::num_max_repeat_[kStateCnt] = {3, 32, 8, 3};
|
||||
|
||||
// thresholds and penalties
|
||||
int TessLangModel::max_ood_shape_cost_ = CubeUtils::Prob2Cost(1e-4);
|
||||
|
||||
TessLangModel::TessLangModel(const string &lm_params,
|
||||
const string &data_file_path,
|
||||
bool load_system_dawg,
|
||||
TessdataManager *tessdata_manager,
|
||||
CubeRecoContext *cntxt) {
|
||||
cntxt_ = cntxt;
|
||||
has_case_ = cntxt_->HasCase();
|
||||
// Load the rest of the language model elements from file
|
||||
LoadLangModelElements(lm_params);
|
||||
// Load word_dawgs_ if needed.
|
||||
if (tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) {
|
||||
word_dawgs_ = new DawgVector();
|
||||
if (load_system_dawg &&
|
||||
tessdata_manager->SeekToStart(TESSDATA_CUBE_SYSTEM_DAWG)) {
|
||||
// The last parameter to the Dawg constructor (the debug level) is set to
|
||||
// false, until Cube has a way to express its preferred debug level.
|
||||
*word_dawgs_ += new SquishedDawg(tessdata_manager->GetDataFilePtr(),
|
||||
DAWG_TYPE_WORD,
|
||||
cntxt_->Lang().c_str(),
|
||||
SYSTEM_DAWG_PERM, false);
|
||||
}
|
||||
} else {
|
||||
word_dawgs_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup an edge array
|
||||
void TessLangModel::FreeEdges(int edge_cnt, LangModEdge **edge_array) {
|
||||
if (edge_array != NULL) {
|
||||
for (int edge_idx = 0; edge_idx < edge_cnt; edge_idx++) {
|
||||
if (edge_array[edge_idx] != NULL) {
|
||||
delete edge_array[edge_idx];
|
||||
}
|
||||
}
|
||||
delete []edge_array;
|
||||
}
|
||||
}
|
||||
|
||||
// Determines if a sequence of 32-bit chars is valid in this language model
|
||||
// starting from the specified edge. If the eow_flag is ON, also checks for
|
||||
// a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last
|
||||
// edge
|
||||
bool TessLangModel::IsValidSequence(LangModEdge *edge,
|
||||
const char_32 *sequence,
|
||||
bool eow_flag,
|
||||
LangModEdge **final_edge) {
|
||||
// get the edges emerging from this edge
|
||||
int edge_cnt = 0;
|
||||
LangModEdge **edge_array = GetEdges(NULL, edge, &edge_cnt);
|
||||
|
||||
// find the 1st char in the sequence in the children
|
||||
for (int edge_idx = 0; edge_idx < edge_cnt; edge_idx++) {
|
||||
// found a match
|
||||
if (sequence[0] == edge_array[edge_idx]->EdgeString()[0]) {
|
||||
// if this is the last char
|
||||
if (sequence[1] == 0) {
|
||||
// succeed if we are in prefix mode or this is a terminal edge
|
||||
if (eow_flag == false || edge_array[edge_idx]->IsEOW()) {
|
||||
if (final_edge != NULL) {
|
||||
(*final_edge) = edge_array[edge_idx];
|
||||
edge_array[edge_idx] = NULL;
|
||||
}
|
||||
|
||||
FreeEdges(edge_cnt, edge_array);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
// not the last char continue checking
|
||||
if (IsValidSequence(edge_array[edge_idx], sequence + 1, eow_flag,
|
||||
final_edge) == true) {
|
||||
FreeEdges(edge_cnt, edge_array);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FreeEdges(edge_cnt, edge_array);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Determines if a sequence of 32-bit chars is valid in this language model
|
||||
// starting from the root. If the eow_flag is ON, also checks for
|
||||
// a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last
|
||||
// edge
|
||||
bool TessLangModel::IsValidSequence(const char_32 *sequence, bool eow_flag,
|
||||
LangModEdge **final_edge) {
|
||||
if (final_edge != NULL) {
|
||||
(*final_edge) = NULL;
|
||||
}
|
||||
|
||||
return IsValidSequence(NULL, sequence, eow_flag, final_edge);
|
||||
}
|
||||
|
||||
bool TessLangModel::IsLeadingPunc(const char_32 ch) {
|
||||
return lead_punc_.find(ch) != string::npos;
|
||||
}
|
||||
|
||||
bool TessLangModel::IsTrailingPunc(const char_32 ch) {
|
||||
return trail_punc_.find(ch) != string::npos;
|
||||
}
|
||||
|
||||
bool TessLangModel::IsDigit(const char_32 ch) {
|
||||
return digits_.find(ch) != string::npos;
|
||||
}
|
||||
|
||||
// The general fan-out generation function. Returns the list of edges
|
||||
// fanning-out of the specified edge and their count. If an AltList is
|
||||
// specified, only the class-ids with a minimum cost are considered
|
||||
LangModEdge ** TessLangModel::GetEdges(CharAltList *alt_list,
|
||||
LangModEdge *lang_mod_edge,
|
||||
int *edge_cnt) {
|
||||
TessLangModEdge *tess_lm_edge =
|
||||
reinterpret_cast<TessLangModEdge *>(lang_mod_edge);
|
||||
LangModEdge **edge_array = NULL;
|
||||
(*edge_cnt) = 0;
|
||||
|
||||
// if we are starting from the root, we'll instantiate every DAWG
|
||||
// and get the all the edges that emerge from the root
|
||||
if (tess_lm_edge == NULL) {
|
||||
// get DAWG count from Tesseract
|
||||
int dawg_cnt = NumDawgs();
|
||||
// preallocate the edge buffer
|
||||
(*edge_cnt) = dawg_cnt * max_edge_;
|
||||
edge_array = new LangModEdge *[(*edge_cnt)];
|
||||
|
||||
for (int dawg_idx = (*edge_cnt) = 0; dawg_idx < dawg_cnt; dawg_idx++) {
|
||||
const Dawg *curr_dawg = GetDawg(dawg_idx);
|
||||
// Only look through word Dawgs (since there is a special way of
|
||||
// handling numbers and punctuation).
|
||||
if (curr_dawg->type() == DAWG_TYPE_WORD) {
|
||||
(*edge_cnt) += FanOut(alt_list, curr_dawg, 0, 0, NULL, true,
|
||||
edge_array + (*edge_cnt));
|
||||
}
|
||||
} // dawg
|
||||
|
||||
(*edge_cnt) += FanOut(alt_list, number_dawg_, 0, 0, NULL, true,
|
||||
edge_array + (*edge_cnt));
|
||||
|
||||
// OOD: it is intentionally not added to the list to make sure it comes
|
||||
// at the end
|
||||
(*edge_cnt) += FanOut(alt_list, ood_dawg_, 0, 0, NULL, true,
|
||||
edge_array + (*edge_cnt));
|
||||
|
||||
// set the root flag for all root edges
|
||||
for (int edge_idx = 0; edge_idx < (*edge_cnt); edge_idx++) {
|
||||
edge_array[edge_idx]->SetRoot(true);
|
||||
}
|
||||
} else { // not starting at the root
|
||||
// preallocate the edge buffer
|
||||
(*edge_cnt) = max_edge_;
|
||||
// allocate memory for edges
|
||||
edge_array = new LangModEdge *[(*edge_cnt)];
|
||||
|
||||
// get the FanOut edges from the root of each dawg
|
||||
(*edge_cnt) = FanOut(alt_list,
|
||||
tess_lm_edge->GetDawg(),
|
||||
tess_lm_edge->EndEdge(), tess_lm_edge->EdgeMask(),
|
||||
tess_lm_edge->EdgeString(), false, edge_array);
|
||||
}
|
||||
return edge_array;
|
||||
}
|
||||
|
||||
// generate edges from an NULL terminated string
|
||||
// (used for punctuation, operators and digits)
|
||||
int TessLangModel::Edges(const char *strng, const Dawg *dawg,
|
||||
EDGE_REF edge_ref, EDGE_REF edge_mask,
|
||||
LangModEdge **edge_array) {
|
||||
int edge_idx,
|
||||
edge_cnt = 0;
|
||||
|
||||
for (edge_idx = 0; strng[edge_idx] != 0; edge_idx++) {
|
||||
int class_id = cntxt_->CharacterSet()->ClassID((char_32)strng[edge_idx]);
|
||||
if (class_id != INVALID_UNICHAR_ID) {
|
||||
// create an edge object
|
||||
edge_array[edge_cnt] = new TessLangModEdge(cntxt_, dawg, edge_ref,
|
||||
class_id);
|
||||
|
||||
reinterpret_cast<TessLangModEdge *>(edge_array[edge_cnt])->
|
||||
SetEdgeMask(edge_mask);
|
||||
edge_cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
return edge_cnt;
|
||||
}
|
||||
|
||||
// generate OOD edges
|
||||
int TessLangModel::OODEdges(CharAltList *alt_list, EDGE_REF edge_ref,
|
||||
EDGE_REF edge_ref_mask, LangModEdge **edge_array) {
|
||||
int class_cnt = cntxt_->CharacterSet()->ClassCount();
|
||||
int edge_cnt = 0;
|
||||
for (int class_id = 0; class_id < class_cnt; class_id++) {
|
||||
// produce an OOD edge only if the cost of the char is low enough
|
||||
if ((alt_list == NULL ||
|
||||
alt_list->ClassCost(class_id) <= max_ood_shape_cost_)) {
|
||||
// create an edge object
|
||||
edge_array[edge_cnt] = new TessLangModEdge(cntxt_, class_id);
|
||||
edge_cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
return edge_cnt;
|
||||
}
|
||||
|
||||
// computes and returns the edges that fan out of an edge ref
|
||||
int TessLangModel::FanOut(CharAltList *alt_list, const Dawg *dawg,
|
||||
EDGE_REF edge_ref, EDGE_REF edge_mask,
|
||||
const char_32 *str, bool root_flag,
|
||||
LangModEdge **edge_array) {
|
||||
int edge_cnt = 0;
|
||||
NODE_REF next_node = NO_EDGE;
|
||||
|
||||
// OOD
|
||||
if (dawg == reinterpret_cast<Dawg *>(DAWG_OOD)) {
|
||||
if (ood_enabled_ == true) {
|
||||
return OODEdges(alt_list, edge_ref, edge_mask, edge_array);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
} else if (dawg == reinterpret_cast<Dawg *>(DAWG_NUMBER)) {
|
||||
// Number
|
||||
if (numeric_enabled_ == true) {
|
||||
return NumberEdges(edge_ref, edge_array);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
} else if (IsTrailingPuncEdge(edge_mask)) {
|
||||
// a TRAILING PUNC MASK, generate more trailing punctuation and return
|
||||
if (punc_enabled_ == true) {
|
||||
EDGE_REF trail_cnt = TrailingPuncCount(edge_mask);
|
||||
return Edges(trail_punc_.c_str(), dawg, edge_ref,
|
||||
TrailingPuncEdgeMask(trail_cnt + 1), edge_array);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
} else if (root_flag == true || edge_ref == 0) {
|
||||
// Root, generate leading punctuation and continue
|
||||
if (root_flag) {
|
||||
if (punc_enabled_ == true) {
|
||||
edge_cnt += Edges(lead_punc_.c_str(), dawg, 0, LEAD_PUNC_EDGE_REF_MASK,
|
||||
edge_array);
|
||||
}
|
||||
}
|
||||
next_node = 0;
|
||||
} else {
|
||||
// a node in the main trie
|
||||
bool eow_flag = (dawg->end_of_word(edge_ref) != 0);
|
||||
|
||||
// for EOW
|
||||
if (eow_flag == true) {
|
||||
// generate trailing punctuation
|
||||
if (punc_enabled_ == true) {
|
||||
edge_cnt += Edges(trail_punc_.c_str(), dawg, edge_ref,
|
||||
TrailingPuncEdgeMask((EDGE_REF)1), edge_array);
|
||||
// generate a hyphen and go back to the root
|
||||
edge_cnt += Edges("-/", dawg, 0, 0, edge_array + edge_cnt);
|
||||
}
|
||||
}
|
||||
|
||||
// advance node
|
||||
next_node = dawg->next_node(edge_ref);
|
||||
if (next_node == 0 || next_node == NO_EDGE) {
|
||||
return edge_cnt;
|
||||
}
|
||||
}
|
||||
|
||||
// now get all the emerging edges if word list is enabled
|
||||
if (word_list_enabled_ == true && next_node != NO_EDGE) {
|
||||
// create child edges
|
||||
int child_edge_cnt =
|
||||
TessLangModEdge::CreateChildren(cntxt_, dawg, next_node,
|
||||
edge_array + edge_cnt);
|
||||
int strt_cnt = edge_cnt;
|
||||
|
||||
// set the edge mask
|
||||
for (int child = 0; child < child_edge_cnt; child++) {
|
||||
reinterpret_cast<TessLangModEdge *>(edge_array[edge_cnt++])->
|
||||
SetEdgeMask(edge_mask);
|
||||
}
|
||||
|
||||
// if we are at the root, create upper case forms of these edges if possible
|
||||
if (root_flag == true) {
|
||||
for (int child = 0; child < child_edge_cnt; child++) {
|
||||
TessLangModEdge *child_edge =
|
||||
reinterpret_cast<TessLangModEdge *>(edge_array[strt_cnt + child]);
|
||||
|
||||
if (has_case_ == true) {
|
||||
const char_32 *edge_str = child_edge->EdgeString();
|
||||
if (edge_str != NULL && islower(edge_str[0]) != 0 &&
|
||||
edge_str[1] == 0) {
|
||||
int class_id =
|
||||
cntxt_->CharacterSet()->ClassID(toupper(edge_str[0]));
|
||||
if (class_id != INVALID_UNICHAR_ID) {
|
||||
// generate an upper case edge for lower case chars
|
||||
edge_array[edge_cnt] = new TessLangModEdge(cntxt_, dawg,
|
||||
child_edge->StartEdge(), child_edge->EndEdge(), class_id);
|
||||
|
||||
reinterpret_cast<TessLangModEdge *>(edge_array[edge_cnt])->
|
||||
SetEdgeMask(edge_mask);
|
||||
edge_cnt++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return edge_cnt;
|
||||
}
|
||||
|
||||
// Generate the edges fanning-out from an edge in the number state machine
|
||||
int TessLangModel::NumberEdges(EDGE_REF edge_ref, LangModEdge **edge_array) {
|
||||
EDGE_REF new_state,
|
||||
state;
|
||||
|
||||
inT64 repeat_cnt,
|
||||
new_repeat_cnt;
|
||||
|
||||
state = ((edge_ref & NUMBER_STATE_MASK) >> NUMBER_STATE_SHIFT);
|
||||
repeat_cnt = ((edge_ref & NUMBER_REPEAT_MASK) >> NUMBER_REPEAT_SHIFT);
|
||||
|
||||
if (state < 0 || state >= kStateCnt) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// go through all valid transitions from the state
|
||||
int edge_cnt = 0;
|
||||
|
||||
EDGE_REF new_edge_ref;
|
||||
|
||||
for (int lit = 0; lit < kNumLiteralCnt; lit++) {
|
||||
// move to the new state
|
||||
new_state = num_state_machine_[state][lit];
|
||||
if (new_state == NUM_TRM) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (new_state == state) {
|
||||
new_repeat_cnt = repeat_cnt + 1;
|
||||
} else {
|
||||
new_repeat_cnt = 1;
|
||||
}
|
||||
|
||||
// not allowed to repeat beyond this
|
||||
if (new_repeat_cnt > num_max_repeat_[state]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
new_edge_ref = (new_state << NUMBER_STATE_SHIFT) |
|
||||
(lit << NUMBER_LITERAL_SHIFT) |
|
||||
(new_repeat_cnt << NUMBER_REPEAT_SHIFT);
|
||||
|
||||
edge_cnt += Edges(literal_str_[lit]->c_str(), number_dawg_,
|
||||
new_edge_ref, 0, edge_array + edge_cnt);
|
||||
}
|
||||
|
||||
return edge_cnt;
|
||||
}
|
||||
|
||||
// Loads Language model elements from contents of the <lang>.cube.lm file
|
||||
bool TessLangModel::LoadLangModelElements(const string &lm_params) {
|
||||
bool success = true;
|
||||
// split into lines, each corresponding to a token type below
|
||||
vector<string> str_vec;
|
||||
CubeUtils::SplitStringUsing(lm_params, "\r\n", &str_vec);
|
||||
for (int entry = 0; entry < str_vec.size(); entry++) {
|
||||
vector<string> tokens;
|
||||
// should be only two tokens: type and value
|
||||
CubeUtils::SplitStringUsing(str_vec[entry], "=", &tokens);
|
||||
if (tokens.size() != 2)
|
||||
success = false;
|
||||
if (tokens[0] == "LeadPunc") {
|
||||
lead_punc_ = tokens[1];
|
||||
} else if (tokens[0] == "TrailPunc") {
|
||||
trail_punc_ = tokens[1];
|
||||
} else if (tokens[0] == "NumLeadPunc") {
|
||||
num_lead_punc_ = tokens[1];
|
||||
} else if (tokens[0] == "NumTrailPunc") {
|
||||
num_trail_punc_ = tokens[1];
|
||||
} else if (tokens[0] == "Operators") {
|
||||
operators_ = tokens[1];
|
||||
} else if (tokens[0] == "Digits") {
|
||||
digits_ = tokens[1];
|
||||
} else if (tokens[0] == "Alphas") {
|
||||
alphas_ = tokens[1];
|
||||
} else {
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
|
||||
RemoveInvalidCharacters(&num_lead_punc_);
|
||||
RemoveInvalidCharacters(&num_trail_punc_);
|
||||
RemoveInvalidCharacters(&digits_);
|
||||
RemoveInvalidCharacters(&operators_);
|
||||
RemoveInvalidCharacters(&alphas_);
|
||||
|
||||
// form the array of literal strings needed for number state machine
|
||||
// It is essential that the literal strings go in the order below
|
||||
literal_str_[0] = &num_lead_punc_;
|
||||
literal_str_[1] = &num_trail_punc_;
|
||||
literal_str_[2] = &digits_;
|
||||
literal_str_[3] = &operators_;
|
||||
literal_str_[4] = &alphas_;
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
void TessLangModel::RemoveInvalidCharacters(string *lm_str) {
|
||||
CharSet *char_set = cntxt_->CharacterSet();
|
||||
tesseract::string_32 lm_str32;
|
||||
CubeUtils::UTF8ToUTF32(lm_str->c_str(), &lm_str32);
|
||||
|
||||
int len = CubeUtils::StrLen(lm_str32.c_str());
|
||||
char_32 *clean_str32 = new char_32[len + 1];
|
||||
int clean_len = 0;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
int class_id = char_set->ClassID((char_32)lm_str32[i]);
|
||||
if (class_id != INVALID_UNICHAR_ID) {
|
||||
clean_str32[clean_len] = lm_str32[i];
|
||||
++clean_len;
|
||||
}
|
||||
}
|
||||
clean_str32[clean_len] = 0;
|
||||
if (clean_len < len) {
|
||||
lm_str->clear();
|
||||
CubeUtils::UTF32ToUTF8(clean_str32, lm_str);
|
||||
}
|
||||
delete [] clean_str32;
|
||||
}
|
||||
|
||||
int TessLangModel::NumDawgs() const {
|
||||
return (word_dawgs_ != NULL) ?
|
||||
word_dawgs_->size() : cntxt_->TesseractObject()->getDict().NumDawgs();
|
||||
}
|
||||
|
||||
// Returns the dawgs with the given index from either the dawgs
|
||||
// stored by the Tesseract object, or the word_dawgs_.
|
||||
const Dawg *TessLangModel::GetDawg(int index) const {
|
||||
if (word_dawgs_ != NULL) {
|
||||
ASSERT_HOST(index < word_dawgs_->size());
|
||||
return (*word_dawgs_)[index];
|
||||
} else {
|
||||
ASSERT_HOST(index < cntxt_->TesseractObject()->getDict().NumDawgs());
|
||||
return cntxt_->TesseractObject()->getDict().GetDawg(index);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,142 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: tess_lang_model.h
|
||||
* Description: Declaration of the Tesseract Language Model Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef TESS_LANG_MODEL_H
|
||||
#define TESS_LANG_MODEL_H
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "char_altlist.h"
|
||||
#include "cube_reco_context.h"
|
||||
#include "cube_tuning_params.h"
|
||||
#include "dict.h"
|
||||
#include "lang_model.h"
|
||||
#include "tessdatamanager.h"
|
||||
#include "tess_lang_mod_edge.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
const int kStateCnt = 4;
|
||||
const int kNumLiteralCnt = 5;
|
||||
|
||||
class TessLangModel : public LangModel {
|
||||
public:
|
||||
TessLangModel(const string &lm_params,
|
||||
const string &data_file_path,
|
||||
bool load_system_dawg,
|
||||
TessdataManager *tessdata_manager,
|
||||
CubeRecoContext *cntxt);
|
||||
~TessLangModel() {
|
||||
if (word_dawgs_ != NULL) {
|
||||
word_dawgs_->delete_data_pointers();
|
||||
delete word_dawgs_;
|
||||
}
|
||||
}
|
||||
|
||||
// returns a pointer to the root of the language model
|
||||
inline TessLangModEdge *Root() {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// The general fan-out generation function. Returns the list of edges
|
||||
// fanning-out of the specified edge and their count. If an AltList is
|
||||
// specified, only the class-ids with a minimum cost are considered
|
||||
LangModEdge **GetEdges(CharAltList *alt_list,
|
||||
LangModEdge *edge,
|
||||
int *edge_cnt);
|
||||
// Determines if a sequence of 32-bit chars is valid in this language model
|
||||
// starting from the root. If the eow_flag is ON, also checks for
|
||||
// a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last
|
||||
// edge
|
||||
bool IsValidSequence(const char_32 *sequence, bool eow_flag,
|
||||
LangModEdge **final_edge = NULL);
|
||||
bool IsLeadingPunc(char_32 ch);
|
||||
bool IsTrailingPunc(char_32 ch);
|
||||
bool IsDigit(char_32 ch);
|
||||
|
||||
void RemoveInvalidCharacters(string *lm_str);
|
||||
private:
|
||||
// static LM state machines
|
||||
static const Dawg *ood_dawg_;
|
||||
static const Dawg *number_dawg_;
|
||||
static const int num_state_machine_[kStateCnt][kNumLiteralCnt];
|
||||
static const int num_max_repeat_[kStateCnt];
|
||||
// word_dawgs_ should only be loaded if cube has its own version of the
|
||||
// unicharset (different from the one used by tesseract) and therefore
|
||||
// can not use the dawgs loaded for tesseract (since the unichar ids
|
||||
// encoded in the dawgs differ).
|
||||
DawgVector *word_dawgs_;
|
||||
|
||||
static int max_edge_;
|
||||
static int max_ood_shape_cost_;
|
||||
|
||||
// remaining language model elements needed by cube. These get loaded from
|
||||
// the .lm file
|
||||
string lead_punc_;
|
||||
string trail_punc_;
|
||||
string num_lead_punc_;
|
||||
string num_trail_punc_;
|
||||
string operators_;
|
||||
string digits_;
|
||||
string alphas_;
|
||||
// String of characters in RHS of each line of <lang>.cube.lm
|
||||
// Each element is hard-coded to correspond to a specific token type
|
||||
// (see LoadLangModelElements)
|
||||
string *literal_str_[kNumLiteralCnt];
|
||||
// Recognition context needed to access language properties
|
||||
// (case, cursive,..)
|
||||
CubeRecoContext *cntxt_;
|
||||
bool has_case_;
|
||||
|
||||
// computes and returns the edges that fan out of an edge ref
|
||||
int FanOut(CharAltList *alt_list,
|
||||
const Dawg *dawg, EDGE_REF edge_ref, EDGE_REF edge_ref_mask,
|
||||
const char_32 *str, bool root_flag, LangModEdge **edge_array);
|
||||
// generate edges from an NULL terminated string
|
||||
// (used for punctuation, operators and digits)
|
||||
int Edges(const char *strng, const Dawg *dawg,
|
||||
EDGE_REF edge_ref, EDGE_REF edge_ref_mask,
|
||||
LangModEdge **edge_array);
|
||||
// Generate the edges fanning-out from an edge in the number state machine
|
||||
int NumberEdges(EDGE_REF edge_ref, LangModEdge **edge_array);
|
||||
// Generate OOD edges
|
||||
int OODEdges(CharAltList *alt_list, EDGE_REF edge_ref,
|
||||
EDGE_REF edge_ref_mask, LangModEdge **edge_array);
|
||||
// Cleanup an edge array
|
||||
void FreeEdges(int edge_cnt, LangModEdge **edge_array);
|
||||
// Determines if a sequence of 32-bit chars is valid in this language model
|
||||
// starting from the specified edge. If the eow_flag is ON, also checks for
|
||||
// a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last
|
||||
// edge
|
||||
bool IsValidSequence(LangModEdge *edge, const char_32 *sequence,
|
||||
bool eow_flag, LangModEdge **final_edge);
|
||||
// Parse language model elements from the given string, which should
|
||||
// have been loaded from <lang>.cube.lm file, e.g. in CubeRecoContext
|
||||
bool LoadLangModelElements(const string &lm_params);
|
||||
|
||||
// Returns the number of word Dawgs in the language model.
|
||||
int NumDawgs() const;
|
||||
|
||||
// Returns the dawgs with the given index from either the dawgs
|
||||
// stored by the Tesseract object, or the word_dawgs_.
|
||||
const Dawg *GetDawg(int index) const;
|
||||
};
|
||||
} // tesseract
|
||||
|
||||
#endif // TESS_LANG_MODEL_H
|
@ -1,129 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: tuning_params.h
|
||||
* Description: Declaration of the Tuning Parameters Base Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The TuningParams class abstracts all the parameters that can be learned or
|
||||
// tuned during the training process. It is a base class that all TuningParams
|
||||
// classes should inherit from.
|
||||
|
||||
#ifndef TUNING_PARAMS_H
|
||||
#define TUNING_PARAMS_H
|
||||
|
||||
#include <string>
|
||||
#ifdef USE_STD_NAMESPACE
|
||||
using std::string;
|
||||
#endif
|
||||
|
||||
namespace tesseract {
|
||||
class TuningParams {
|
||||
public:
|
||||
enum type_classifer {
|
||||
NN,
|
||||
HYBRID_NN
|
||||
};
|
||||
enum type_feature {
|
||||
BMP,
|
||||
CHEBYSHEV,
|
||||
HYBRID
|
||||
};
|
||||
|
||||
TuningParams() {}
|
||||
virtual ~TuningParams() {}
|
||||
// Accessor functions
|
||||
inline double RecoWgt() const { return reco_wgt_; }
|
||||
inline double SizeWgt() const { return size_wgt_; }
|
||||
inline double CharBigramWgt() const { return char_bigrams_wgt_; }
|
||||
inline double WordUnigramWgt() const { return word_unigrams_wgt_; }
|
||||
inline int MaxSegPerChar() const { return max_seg_per_char_; }
|
||||
inline int BeamWidth() const { return beam_width_; }
|
||||
inline int TypeClassifier() const { return tp_classifier_; }
|
||||
inline int TypeFeature() const { return tp_feat_; }
|
||||
inline int ConvGridSize() const { return conv_grid_size_; }
|
||||
inline int HistWindWid() const { return hist_wind_wid_; }
|
||||
inline int MinConCompSize() const { return min_con_comp_size_; }
|
||||
inline double MaxWordAspectRatio() const { return max_word_aspect_ratio_; }
|
||||
inline double MinSpaceHeightRatio() const { return min_space_height_ratio_; }
|
||||
inline double MaxSpaceHeightRatio() const { return max_space_height_ratio_; }
|
||||
inline double CombinerRunThresh() const { return combiner_run_thresh_; }
|
||||
inline double CombinerClassifierThresh() const {
|
||||
return combiner_classifier_thresh_; }
|
||||
|
||||
inline void SetRecoWgt(double wgt) { reco_wgt_ = wgt; }
|
||||
inline void SetSizeWgt(double wgt) { size_wgt_ = wgt; }
|
||||
inline void SetCharBigramWgt(double wgt) { char_bigrams_wgt_ = wgt; }
|
||||
inline void SetWordUnigramWgt(double wgt) { word_unigrams_wgt_ = wgt; }
|
||||
inline void SetMaxSegPerChar(int max_seg_per_char) {
|
||||
max_seg_per_char_ = max_seg_per_char;
|
||||
}
|
||||
inline void SetBeamWidth(int beam_width) { beam_width_ = beam_width; }
|
||||
inline void SetTypeClassifier(type_classifer tp_classifier) {
|
||||
tp_classifier_ = tp_classifier;
|
||||
}
|
||||
inline void SetTypeFeature(type_feature tp_feat) {tp_feat_ = tp_feat;}
|
||||
inline void SetHistWindWid(int hist_wind_wid) {
|
||||
hist_wind_wid_ = hist_wind_wid;
|
||||
}
|
||||
|
||||
virtual bool Save(string file_name) = 0;
|
||||
virtual bool Load(string file_name) = 0;
|
||||
|
||||
protected:
|
||||
// weight of recognition cost. This includes the language model cost
|
||||
double reco_wgt_;
|
||||
// weight of size cost
|
||||
double size_wgt_;
|
||||
// weight of character bigrams cost
|
||||
double char_bigrams_wgt_;
|
||||
// weight of word unigrams cost
|
||||
double word_unigrams_wgt_;
|
||||
// Maximum number of segments per character
|
||||
int max_seg_per_char_;
|
||||
// Beam width equal to the maximum number of nodes kept in the beam search
|
||||
// trellis column after pruning
|
||||
int beam_width_;
|
||||
// Classifier type: See enum type_classifer for classifier types
|
||||
type_classifer tp_classifier_;
|
||||
// Feature types: See enum type_feature for feature types
|
||||
type_feature tp_feat_;
|
||||
// Grid size to scale a grapheme bitmap used by the BMP feature type
|
||||
int conv_grid_size_;
|
||||
// Histogram window size as a ratio of the word height used in computing
|
||||
// the vertical pixel density histogram in the segmentation algorithm
|
||||
int hist_wind_wid_;
|
||||
// Minimum possible size of a connected component
|
||||
int min_con_comp_size_;
|
||||
// Maximum aspect ratio of a word (width / height)
|
||||
double max_word_aspect_ratio_;
|
||||
// Minimum ratio relative to the line height of a gap to be considered as
|
||||
// a word break
|
||||
double min_space_height_ratio_;
|
||||
// Maximum ratio relative to the line height of a gap to be considered as
|
||||
// a definite word break
|
||||
double max_space_height_ratio_;
|
||||
// When Cube and Tesseract are run in combined mode, only run
|
||||
// combiner classifier when tesseract confidence is below this
|
||||
// threshold. When Cube is run without Tesseract, this is ignored.
|
||||
double combiner_run_thresh_;
|
||||
// When Cube and tesseract are run in combined mode, threshold on
|
||||
// output of combiner binary classifier (chosen from ROC during
|
||||
// combiner training). When Cube is run without Tesseract, this is ignored.
|
||||
double combiner_classifier_thresh_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // TUNING_PARAMS_H
|
@ -1,117 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: word_altlist.cpp
|
||||
* Description: Implementation of the Word Alternate List Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "word_altlist.h"
|
||||
|
||||
namespace tesseract {
|
||||
WordAltList::WordAltList(int max_alt)
|
||||
: AltList(max_alt) {
|
||||
word_alt_ = NULL;
|
||||
}
|
||||
|
||||
WordAltList::~WordAltList() {
|
||||
if (word_alt_ != NULL) {
|
||||
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
|
||||
if (word_alt_[alt_idx] != NULL) {
|
||||
delete []word_alt_[alt_idx];
|
||||
}
|
||||
}
|
||||
delete []word_alt_;
|
||||
word_alt_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* insert an alternate word with the specified cost and tag
|
||||
*/
|
||||
bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
|
||||
if (word_alt_ == NULL || alt_cost_ == NULL) {
|
||||
word_alt_ = new char_32*[max_alt_];
|
||||
alt_cost_ = new int[max_alt_];
|
||||
alt_tag_ = new void *[max_alt_];
|
||||
memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
|
||||
} else {
|
||||
// check if alt already exists
|
||||
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
|
||||
if (CubeUtils::StrCmp(word_str, word_alt_[alt_idx]) == 0) {
|
||||
// update the cost if we have a lower one
|
||||
if (cost < alt_cost_[alt_idx]) {
|
||||
alt_cost_[alt_idx] = cost;
|
||||
alt_tag_[alt_idx] = tag;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// determine length of alternate
|
||||
int len = CubeUtils::StrLen(word_str);
|
||||
|
||||
word_alt_[alt_cnt_] = new char_32[len + 1];
|
||||
|
||||
if (len > 0) {
|
||||
memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str));
|
||||
}
|
||||
|
||||
word_alt_[alt_cnt_][len] = 0;
|
||||
alt_cost_[alt_cnt_] = cost;
|
||||
alt_tag_[alt_cnt_] = tag;
|
||||
|
||||
alt_cnt_++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* sort the alternate in descending order based on the cost
|
||||
*/
|
||||
void WordAltList::Sort() {
|
||||
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
|
||||
for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
|
||||
if (alt_cost_[alt_idx] > alt_cost_[alt]) {
|
||||
char_32 *pchTemp = word_alt_[alt_idx];
|
||||
word_alt_[alt_idx] = word_alt_[alt];
|
||||
word_alt_[alt] = pchTemp;
|
||||
|
||||
int temp = alt_cost_[alt_idx];
|
||||
alt_cost_[alt_idx] = alt_cost_[alt];
|
||||
alt_cost_[alt] = temp;
|
||||
|
||||
void *tag = alt_tag_[alt_idx];
|
||||
alt_tag_[alt_idx] = alt_tag_[alt];
|
||||
alt_tag_[alt] = tag;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WordAltList::PrintDebug() {
|
||||
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
|
||||
char_32 *word_32 = word_alt_[alt_idx];
|
||||
string word_str;
|
||||
CubeUtils::UTF32ToUTF8(word_32, &word_str);
|
||||
int num_unichars = CubeUtils::StrLen(word_32);
|
||||
fprintf(stderr, "Alt[%d]=%s (cost=%d, num_unichars=%d); unichars=", alt_idx,
|
||||
word_str.c_str(), alt_cost_[alt_idx], num_unichars);
|
||||
for (int i = 0; i < num_unichars; ++i)
|
||||
fprintf(stderr, "%d ", word_32[i]);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
} // namespace tesseract
|
@ -1,50 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: word_altlist.h
|
||||
* Description: Declaration of the Word Alternate List Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The WordAltList abstracts a alternate list of words and their corresponding
|
||||
// costs that result from the word recognition process. The class inherits
|
||||
// from the AltList class
|
||||
// It provides methods to add a new word alternate, its corresponding score and
|
||||
// a tag.
|
||||
|
||||
#ifndef WORD_ALT_LIST_H
|
||||
#define WORD_ALT_LIST_H
|
||||
|
||||
#include "altlist.h"
|
||||
|
||||
namespace tesseract {
|
||||
class WordAltList : public AltList {
|
||||
public:
|
||||
explicit WordAltList(int max_alt);
|
||||
~WordAltList();
|
||||
// Sort the list of alternates based on cost
|
||||
void Sort();
|
||||
// insert an alternate word with the specified cost and tag
|
||||
bool Insert(char_32 *char_ptr, int cost, void *tag = NULL);
|
||||
// returns the alternate string at the specified position
|
||||
inline char_32 * Alt(int alt_idx) { return word_alt_[alt_idx]; }
|
||||
// print each entry of the altlist, both UTF8 and unichar ids, and
|
||||
// their costs, to stderr
|
||||
void PrintDebug();
|
||||
private:
|
||||
char_32 **word_alt_;
|
||||
};
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // WORD_ALT_LIST_H
|
@ -1,199 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: word_list_lang_model.cpp
|
||||
* Description: Implementation of the Word List Language Model Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "word_list_lang_model.h"
|
||||
#include "cube_utils.h"
|
||||
|
||||
#include "ratngs.h"
|
||||
#include "trie.h"
|
||||
|
||||
namespace tesseract {
|
||||
WordListLangModel::WordListLangModel(CubeRecoContext *cntxt) {
|
||||
cntxt_ = cntxt;
|
||||
dawg_ = NULL;
|
||||
init_ = false;
|
||||
}
|
||||
|
||||
WordListLangModel::~WordListLangModel() {
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
void WordListLangModel::Cleanup() {
|
||||
if (dawg_ != NULL) {
|
||||
delete dawg_;
|
||||
dawg_ = NULL;
|
||||
}
|
||||
init_ = false;
|
||||
}
|
||||
|
||||
// Initialize the language model
|
||||
bool WordListLangModel::Init() {
|
||||
if (init_ == true) {
|
||||
return true;
|
||||
}
|
||||
// The last parameter to the Trie constructor (the debug level) is set to
|
||||
// false for now, until Cube has a way to express its preferred debug level.
|
||||
dawg_ = new Trie(DAWG_TYPE_WORD, "", NO_PERM,
|
||||
cntxt_->CharacterSet()->ClassCount(), false);
|
||||
init_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// return a pointer to the root
|
||||
LangModEdge * WordListLangModel::Root() {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// return the edges emerging from the current state
|
||||
LangModEdge **WordListLangModel::GetEdges(CharAltList *alt_list,
|
||||
LangModEdge *edge,
|
||||
int *edge_cnt) {
|
||||
// initialize if necessary
|
||||
if (init_ == false) {
|
||||
if (Init() == false) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
(*edge_cnt) = 0;
|
||||
|
||||
EDGE_REF edge_ref;
|
||||
|
||||
TessLangModEdge *tess_lm_edge = reinterpret_cast<TessLangModEdge *>(edge);
|
||||
|
||||
if (tess_lm_edge == NULL) {
|
||||
edge_ref = 0;
|
||||
} else {
|
||||
edge_ref = tess_lm_edge->EndEdge();
|
||||
|
||||
// advance node
|
||||
edge_ref = dawg_->next_node(edge_ref);
|
||||
if (edge_ref == 0) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// allocate memory for edges
|
||||
LangModEdge **edge_array = new LangModEdge *[kMaxEdge];
|
||||
|
||||
// now get all the emerging edges
|
||||
(*edge_cnt) += TessLangModEdge::CreateChildren(cntxt_, dawg_, edge_ref,
|
||||
edge_array + (*edge_cnt));
|
||||
|
||||
return edge_array;
|
||||
}
|
||||
|
||||
// returns true if the char_32 is supported by the language model
|
||||
// TODO(ahmadab) currently not implemented
|
||||
bool WordListLangModel::IsValidSequence(const char_32 *sequence,
|
||||
bool terminal, LangModEdge **edges) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Recursive helper function for WordVariants().
|
||||
void WordListLangModel::WordVariants(const CharSet &char_set,
|
||||
string_32 prefix_str32,
|
||||
WERD_CHOICE *word_so_far,
|
||||
string_32 str32,
|
||||
vector<WERD_CHOICE *> *word_variants) {
|
||||
int str_len = str32.length();
|
||||
if (str_len == 0) {
|
||||
if (word_so_far->length() > 0) {
|
||||
word_variants->push_back(new WERD_CHOICE(*word_so_far));
|
||||
}
|
||||
} else {
|
||||
// Try out all the possible prefixes of the str32.
|
||||
for (int len = 1; len <= str_len; len++) {
|
||||
// Check if prefix is supported in character set.
|
||||
string_32 str_pref32 = str32.substr(0, len);
|
||||
int class_id = char_set.ClassID(reinterpret_cast<const char_32 *>(
|
||||
str_pref32.c_str()));
|
||||
if (class_id <= 0) {
|
||||
continue;
|
||||
} else {
|
||||
string_32 new_prefix_str32 = prefix_str32 + str_pref32;
|
||||
string_32 new_str32 = str32.substr(len);
|
||||
word_so_far->append_unichar_id(class_id, 1, 0.0, 0.0);
|
||||
WordVariants(char_set, new_prefix_str32, word_so_far, new_str32,
|
||||
word_variants);
|
||||
word_so_far->remove_last_unichar_id();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute all the variants of a 32-bit string in terms of the class-ids
|
||||
// This is needed for languages that have ligatures. A word can then have more
|
||||
// than one spelling in terms of the class-ids
|
||||
void WordListLangModel::WordVariants(const CharSet &char_set,
|
||||
const UNICHARSET *uchset, string_32 str32,
|
||||
vector<WERD_CHOICE *> *word_variants) {
|
||||
for (int i = 0; i < word_variants->size(); i++) {
|
||||
delete (*word_variants)[i];
|
||||
}
|
||||
word_variants->clear();
|
||||
string_32 prefix_str32;
|
||||
WERD_CHOICE word_so_far(uchset);
|
||||
WordVariants(char_set, prefix_str32, &word_so_far, str32, word_variants);
|
||||
}
|
||||
|
||||
// add a new UTF-8 string to the lang model
|
||||
bool WordListLangModel::AddString(const char *char_ptr) {
|
||||
if (!init_ && !Init()) { // initialize if necessary
|
||||
return false;
|
||||
}
|
||||
|
||||
string_32 str32;
|
||||
CubeUtils::UTF8ToUTF32(char_ptr, &str32);
|
||||
if (str32.length() < 1) {
|
||||
return false;
|
||||
}
|
||||
return AddString32(str32.c_str());
|
||||
}
|
||||
|
||||
// add a new UTF-32 string to the lang model
|
||||
bool WordListLangModel::AddString32(const char_32 *char_32_ptr) {
|
||||
if (char_32_ptr == NULL) {
|
||||
return false;
|
||||
}
|
||||
// get all the word variants
|
||||
vector<WERD_CHOICE *> word_variants;
|
||||
WordVariants(*(cntxt_->CharacterSet()), cntxt_->TessUnicharset(),
|
||||
char_32_ptr, &word_variants);
|
||||
|
||||
if (word_variants.size() > 0) {
|
||||
// find the shortest variant
|
||||
int shortest_word = 0;
|
||||
for (int word = 1; word < word_variants.size(); word++) {
|
||||
if (word_variants[shortest_word]->length() >
|
||||
word_variants[word]->length()) {
|
||||
shortest_word = word;
|
||||
}
|
||||
}
|
||||
// only add the shortest grapheme interpretation of string to the word list
|
||||
dawg_->add_word_to_dawg(*word_variants[shortest_word]);
|
||||
}
|
||||
for (int i = 0; i < word_variants.size(); i++) { delete word_variants[i]; }
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
@ -1,89 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: word_list_lang_model.h
|
||||
* Description: Declaration of the Word List Language Model Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The WordListLangModel class abstracts a language model that is based on
|
||||
// a list of words. It inherits from the LangModel abstract class
|
||||
// Besides providing the methods inherited from the LangModel abstract class,
|
||||
// the class provided methods to add new strings to the Language Model:
|
||||
// AddString & AddString32
|
||||
|
||||
#ifndef WORD_LIST_LANG_MODEL_H
|
||||
#define WORD_LIST_LANG_MODEL_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "cube_reco_context.h"
|
||||
#include "lang_model.h"
|
||||
#include "tess_lang_mod_edge.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class Trie;
|
||||
|
||||
class WordListLangModel : public LangModel {
|
||||
public:
|
||||
explicit WordListLangModel(CubeRecoContext *cntxt);
|
||||
~WordListLangModel();
|
||||
// Returns an edge pointer to the Root
|
||||
LangModEdge *Root();
|
||||
// Returns the edges that fan-out of the specified edge and their count
|
||||
LangModEdge **GetEdges(CharAltList *alt_list,
|
||||
LangModEdge *edge,
|
||||
int *edge_cnt);
|
||||
// Returns is a sequence of 32-bit characters are valid within this language
|
||||
// model or net. And EndOfWord flag is specified. If true, the sequence has
|
||||
// to end on a valid word. The function also optionally returns the list
|
||||
// of language model edges traversed to parse the string
|
||||
bool IsValidSequence(const char_32 *sequence,
|
||||
bool eow_flag,
|
||||
LangModEdge **edges);
|
||||
bool IsLeadingPunc(char_32 ch) { return false; } // not yet implemented
|
||||
bool IsTrailingPunc(char_32 ch) { return false; } // not yet implemented
|
||||
bool IsDigit(char_32 ch) { return false; } // not yet implemented
|
||||
// Adds a new UTF-8 string to the language model
|
||||
bool AddString(const char *char_ptr);
|
||||
// Adds a new UTF-32 string to the language model
|
||||
bool AddString32(const char_32 *char_32_ptr);
|
||||
// Compute all the variants of a 32-bit string in terms of the class-ids.
|
||||
// This is needed for languages that have ligatures. A word can then have
|
||||
// more than one spelling in terms of the class-ids.
|
||||
static void WordVariants(const CharSet &char_set, const UNICHARSET *uchset,
|
||||
string_32 str32,
|
||||
vector<WERD_CHOICE *> *word_variants);
|
||||
private:
|
||||
// constants needed to configure the language model
|
||||
static const int kMaxEdge = 512;
|
||||
|
||||
CubeRecoContext *cntxt_;
|
||||
Trie *dawg_;
|
||||
bool init_;
|
||||
// Initialize the language model
|
||||
bool Init();
|
||||
// Cleanup
|
||||
void Cleanup();
|
||||
// Recursive helper function for WordVariants().
|
||||
static void WordVariants(
|
||||
const CharSet &char_set,
|
||||
string_32 prefix_str32, WERD_CHOICE *word_so_far,
|
||||
string_32 str32,
|
||||
vector<WERD_CHOICE *> *word_variants);
|
||||
};
|
||||
} // tesseract
|
||||
|
||||
#endif // WORD_LIST_LANG_MODEL_H
|
@ -1,286 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: word_size_model.cpp
|
||||
* Description: Implementation of the Word Size Model Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <math.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "word_size_model.h"
|
||||
#include "cube_utils.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
WordSizeModel::WordSizeModel(CharSet * char_set, bool contextual) {
|
||||
char_set_ = char_set;
|
||||
contextual_ = contextual;
|
||||
}
|
||||
|
||||
WordSizeModel::~WordSizeModel() {
|
||||
for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
|
||||
FontPairSizeInfo fnt_info = font_pair_size_models_[fnt];
|
||||
delete []fnt_info.pair_size_info[0];
|
||||
delete []fnt_info.pair_size_info;
|
||||
}
|
||||
}
|
||||
|
||||
WordSizeModel *WordSizeModel::Create(const string &data_file_path,
|
||||
const string &lang,
|
||||
CharSet *char_set,
|
||||
bool contextual) {
|
||||
WordSizeModel *obj = new WordSizeModel(char_set, contextual);
|
||||
|
||||
if (!obj->Init(data_file_path, lang)) {
|
||||
delete obj;
|
||||
return NULL;
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
bool WordSizeModel::Init(const string &data_file_path, const string &lang) {
|
||||
string stats_file_name;
|
||||
stats_file_name = data_file_path + lang;
|
||||
stats_file_name += ".cube.size";
|
||||
|
||||
// read file to memory
|
||||
string str_data;
|
||||
|
||||
if (!CubeUtils::ReadFileToString(stats_file_name, &str_data)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// split to words
|
||||
vector<string> tokens;
|
||||
CubeUtils::SplitStringUsing(str_data, "\t\r\n", &tokens);
|
||||
if (tokens.size() < 1) {
|
||||
fprintf(stderr, "Cube ERROR (WordSizeModel::Init): invalid "
|
||||
"file contents: %s\n", stats_file_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
font_pair_size_models_.clear();
|
||||
|
||||
// token count per line depends on whether the language is contextual or not
|
||||
int token_cnt = contextual_ ?
|
||||
(kExpectedTokenCount + 4) : kExpectedTokenCount;
|
||||
// the count of size classes depends on whether the language is contextual
|
||||
// or not. For non contextual languages (Ex: Eng), it is equal to the class
|
||||
// count. For contextual languages (Ex: Ara), it is equal to the class count
|
||||
// multiplied by the position count (4: start, middle, final, isolated)
|
||||
int size_class_cnt = contextual_ ?
|
||||
(char_set_->ClassCount() * 4) : char_set_->ClassCount();
|
||||
string fnt_name = "";
|
||||
|
||||
for (int tok = 0; tok < tokens.size(); tok += token_cnt) {
|
||||
// a new font, write the old font data and re-init
|
||||
if (tok == 0 || fnt_name != tokens[tok]) {
|
||||
FontPairSizeInfo fnt_info;
|
||||
|
||||
fnt_info.pair_size_info = new PairSizeInfo *[size_class_cnt];
|
||||
|
||||
fnt_info.pair_size_info[0] =
|
||||
new PairSizeInfo[size_class_cnt * size_class_cnt];
|
||||
|
||||
memset(fnt_info.pair_size_info[0], 0, size_class_cnt * size_class_cnt *
|
||||
sizeof(PairSizeInfo));
|
||||
|
||||
for (int cls = 1; cls < size_class_cnt; cls++) {
|
||||
fnt_info.pair_size_info[cls] =
|
||||
fnt_info.pair_size_info[cls - 1] + size_class_cnt;
|
||||
}
|
||||
|
||||
// strip out path and extension
|
||||
string stripped_font_name = tokens[tok].substr(0, tokens[tok].find('.'));
|
||||
string::size_type strt_pos = stripped_font_name.find_last_of("/\\");
|
||||
if (strt_pos != string::npos) {
|
||||
fnt_info.font_name = stripped_font_name.substr(strt_pos);
|
||||
} else {
|
||||
fnt_info.font_name = stripped_font_name;
|
||||
}
|
||||
font_pair_size_models_.push_back(fnt_info);
|
||||
}
|
||||
|
||||
// parse the data
|
||||
int cls_0;
|
||||
int cls_1;
|
||||
double delta_top;
|
||||
double wid_0;
|
||||
double hgt_0;
|
||||
double wid_1;
|
||||
double hgt_1;
|
||||
int size_code_0;
|
||||
int size_code_1;
|
||||
|
||||
// read and parse the tokens
|
||||
if (contextual_) {
|
||||
int start_0;
|
||||
int end_0;
|
||||
int start_1;
|
||||
int end_1;
|
||||
// The expected format for a character size bigram is as follows:
|
||||
// ClassId0<delim>Start-flag0<delim>End-flag0<delim>String0(ignored)
|
||||
// Width0<delim>Height0<delim>
|
||||
// ClassId1<delim>Start-flag1<delim>End-flag1<delim>String1(ignored)
|
||||
// HeightDelta<delim>Width1<delim>Height0<delim>
|
||||
// In case of non-contextual languages, the Start and End flags are
|
||||
// omitted
|
||||
if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 ||
|
||||
sscanf(tokens[tok + 2].c_str(), "%d", &start_0) != 1 ||
|
||||
sscanf(tokens[tok + 3].c_str(), "%d", &end_0) != 1 ||
|
||||
sscanf(tokens[tok + 5].c_str(), "%lf", &wid_0) != 1 ||
|
||||
sscanf(tokens[tok + 6].c_str(), "%lf", &hgt_0) != 1 ||
|
||||
sscanf(tokens[tok + 7].c_str(), "%d", &cls_1) != 1 ||
|
||||
sscanf(tokens[tok + 8].c_str(), "%d", &start_1) != 1 ||
|
||||
sscanf(tokens[tok + 9].c_str(), "%d", &end_1) != 1 ||
|
||||
sscanf(tokens[tok + 11].c_str(), "%lf", &delta_top) != 1 ||
|
||||
sscanf(tokens[tok + 12].c_str(), "%lf", &wid_1) != 1 ||
|
||||
sscanf(tokens[tok + 13].c_str(), "%lf", &hgt_1) != 1 ||
|
||||
(start_0 != 0 && start_0 != 1) || (end_0 != 0 && end_0 != 1) ||
|
||||
(start_1 != 0 && start_1 != 1) || (end_1 != 0 && end_1 != 1)) {
|
||||
fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at "
|
||||
"line %d\n", 1 + (tok / token_cnt));
|
||||
return false;
|
||||
}
|
||||
size_code_0 = SizeCode(cls_0, start_0, end_0);
|
||||
size_code_1 = SizeCode(cls_1, start_1, end_1);
|
||||
} else {
|
||||
if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 ||
|
||||
sscanf(tokens[tok + 3].c_str(), "%lf", &wid_0) != 1 ||
|
||||
sscanf(tokens[tok + 4].c_str(), "%lf", &hgt_0) != 1 ||
|
||||
sscanf(tokens[tok + 5].c_str(), "%d", &cls_1) != 1 ||
|
||||
sscanf(tokens[tok + 7].c_str(), "%lf", &delta_top) != 1 ||
|
||||
sscanf(tokens[tok + 8].c_str(), "%lf", &wid_1) != 1 ||
|
||||
sscanf(tokens[tok + 9].c_str(), "%lf", &hgt_1) != 1) {
|
||||
fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at "
|
||||
"line %d\n", 1 + (tok / token_cnt));
|
||||
return false;
|
||||
}
|
||||
size_code_0 = cls_0;
|
||||
size_code_1 = cls_1;
|
||||
}
|
||||
|
||||
// copy the data to the size tables
|
||||
FontPairSizeInfo fnt_info = font_pair_size_models_.back();
|
||||
fnt_info.pair_size_info[size_code_0][size_code_1].delta_top =
|
||||
static_cast<int>(delta_top * kShapeModelScale);
|
||||
fnt_info.pair_size_info[size_code_0][size_code_1].wid_0 =
|
||||
static_cast<int>(wid_0 * kShapeModelScale);
|
||||
fnt_info.pair_size_info[size_code_0][size_code_1].hgt_0 =
|
||||
static_cast<int>(hgt_0 * kShapeModelScale);
|
||||
fnt_info.pair_size_info[size_code_0][size_code_1].wid_1 =
|
||||
static_cast<int>(wid_1 * kShapeModelScale);
|
||||
fnt_info.pair_size_info[size_code_0][size_code_1].hgt_1 =
|
||||
static_cast<int>(hgt_1 * kShapeModelScale);
|
||||
|
||||
fnt_name = tokens[tok];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int WordSizeModel::Cost(CharSamp **samp_array, int samp_cnt) const {
|
||||
if (samp_cnt < 2) {
|
||||
return 0;
|
||||
}
|
||||
double best_dist = static_cast<double>(WORST_COST);
|
||||
int best_fnt = -1;
|
||||
for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
|
||||
const FontPairSizeInfo *fnt_info = &font_pair_size_models_[fnt];
|
||||
double mean_dist = 0;
|
||||
int pair_cnt = 0;
|
||||
|
||||
for (int smp_0 = 0; smp_0 < samp_cnt; smp_0++) {
|
||||
int cls_0 = char_set_->ClassID(samp_array[smp_0]->StrLabel());
|
||||
if (cls_0 < 1) {
|
||||
continue;
|
||||
}
|
||||
// compute size code for samp 0 based on class id and position
|
||||
int size_code_0;
|
||||
if (contextual_) {
|
||||
size_code_0 = SizeCode(cls_0,
|
||||
samp_array[smp_0]->FirstChar() == 0 ? 0 : 1,
|
||||
samp_array[smp_0]->LastChar() == 0 ? 0 : 1);
|
||||
} else {
|
||||
size_code_0 = cls_0;
|
||||
}
|
||||
|
||||
int char0_height = samp_array[smp_0]->Height();
|
||||
int char0_width = samp_array[smp_0]->Width();
|
||||
int char0_top = samp_array[smp_0]->Top();
|
||||
|
||||
for (int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) {
|
||||
int cls_1 = char_set_->ClassID(samp_array[smp_1]->StrLabel());
|
||||
if (cls_1 < 1) {
|
||||
continue;
|
||||
}
|
||||
// compute size code for samp 0 based on class id and position
|
||||
int size_code_1;
|
||||
if (contextual_) {
|
||||
size_code_1 = SizeCode(cls_1,
|
||||
samp_array[smp_1]->FirstChar() == 0 ? 0 : 1,
|
||||
samp_array[smp_1]->LastChar() == 0 ? 0 : 1);
|
||||
} else {
|
||||
size_code_1 = cls_1;
|
||||
}
|
||||
double dist = PairCost(
|
||||
char0_width, char0_height, char0_top, samp_array[smp_1]->Width(),
|
||||
samp_array[smp_1]->Height(), samp_array[smp_1]->Top(),
|
||||
fnt_info->pair_size_info[size_code_0][size_code_1]);
|
||||
if (dist > 0) {
|
||||
mean_dist += dist;
|
||||
pair_cnt++;
|
||||
}
|
||||
} // smp_1
|
||||
} // smp_0
|
||||
if (pair_cnt == 0) {
|
||||
continue;
|
||||
}
|
||||
mean_dist /= pair_cnt;
|
||||
if (best_fnt == -1 || mean_dist < best_dist) {
|
||||
best_dist = mean_dist;
|
||||
best_fnt = fnt;
|
||||
}
|
||||
}
|
||||
if (best_fnt == -1) {
|
||||
return static_cast<int>(WORST_COST);
|
||||
} else {
|
||||
return static_cast<int>(best_dist);
|
||||
}
|
||||
}
|
||||
|
||||
double WordSizeModel::PairCost(int width_0, int height_0, int top_0,
|
||||
int width_1, int height_1, int top_1,
|
||||
const PairSizeInfo& pair_info) {
|
||||
double scale_factor = static_cast<double>(pair_info.hgt_0) /
|
||||
static_cast<double>(height_0);
|
||||
double dist = 0.0;
|
||||
if (scale_factor > 0) {
|
||||
double norm_width_0 = width_0 * scale_factor;
|
||||
double norm_width_1 = width_1 * scale_factor;
|
||||
double norm_height_1 = height_1 * scale_factor;
|
||||
double norm_delta_top = (top_1 - top_0) * scale_factor;
|
||||
|
||||
// accumulate the distance between the model character and the
|
||||
// predicted one on all dimensions of the pair
|
||||
dist += fabs(pair_info.wid_0 - norm_width_0);
|
||||
dist += fabs(pair_info.wid_1 - norm_width_1);
|
||||
dist += fabs(pair_info.hgt_1 - norm_height_1);
|
||||
dist += fabs(pair_info.delta_top - norm_delta_top);
|
||||
}
|
||||
return dist;
|
||||
}
|
||||
} // namespace tesseract
|
@ -1,100 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: word_size_model.h
|
||||
* Description: Declaration of the Word Size Model Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The WordSizeModel class abstracts the geometrical relationships
|
||||
// between characters/shapes in the same word (presumeably of the same font)
|
||||
// A non-parametric bigram model describes the three geometrical properties of a
|
||||
// character pair:
|
||||
// 1- Normalized Width
|
||||
// 2- Normalized Top
|
||||
// 3- Normalized Height
|
||||
// These dimensions are computed for each character pair in a word. These are
|
||||
// then compared to the same information for each of the fonts that the size
|
||||
// model knows about. The WordSizeCost is the cost of the font that matches
|
||||
// best.
|
||||
|
||||
#ifndef WORD_SIZE_MODEL_H
|
||||
#define WORD_SIZE_MODEL_H
|
||||
|
||||
#include <string>
|
||||
#include "char_samp.h"
|
||||
#include "char_set.h"
|
||||
|
||||
namespace tesseract {
|
||||
struct PairSizeInfo {
|
||||
int delta_top;
|
||||
int wid_0;
|
||||
int hgt_0;
|
||||
int wid_1;
|
||||
int hgt_1;
|
||||
};
|
||||
|
||||
struct FontPairSizeInfo {
|
||||
string font_name;
|
||||
PairSizeInfo **pair_size_info;
|
||||
};
|
||||
|
||||
class WordSizeModel {
|
||||
public:
|
||||
WordSizeModel(CharSet *, bool contextual);
|
||||
virtual ~WordSizeModel();
|
||||
static WordSizeModel *Create(const string &data_file_path,
|
||||
const string &lang,
|
||||
CharSet *char_set,
|
||||
bool contextual);
|
||||
// Given a word and number of unichars, return the size cost,
|
||||
// minimized over all fonts in the size model.
|
||||
int Cost(CharSamp **samp_array, int samp_cnt) const;
|
||||
// Given dimensions of a pair of character samples and a font size
|
||||
// model for that character pair, return the pair's size cost for
|
||||
// the font.
|
||||
static double PairCost(int width_0, int height_0, int top_0,
|
||||
int width_1, int height_1, int top_1,
|
||||
const PairSizeInfo& pair_info);
|
||||
bool Save(string file_name);
|
||||
// Number of fonts in size model.
|
||||
inline int FontCount() const {
|
||||
return font_pair_size_models_.size();
|
||||
}
|
||||
inline const FontPairSizeInfo *FontInfo() const {
|
||||
return &font_pair_size_models_[0];
|
||||
}
|
||||
// Helper functions to convert between size codes, class id and position
|
||||
// codes
|
||||
static inline int SizeCode(int cls_id, int start, int end) {
|
||||
return (cls_id << 2) + (end << 1) + start;
|
||||
}
|
||||
|
||||
private:
|
||||
// Scaling constant used to convert floating point ratios in size table
|
||||
// to fixed point
|
||||
static const int kShapeModelScale = 1000;
|
||||
static const int kExpectedTokenCount = 10;
|
||||
|
||||
// Language properties
|
||||
bool contextual_;
|
||||
CharSet *char_set_;
|
||||
// Size ratios table
|
||||
vector<FontPairSizeInfo> font_pair_size_models_;
|
||||
|
||||
// Initialize the word size model object
|
||||
bool Init(const string &data_file_path, const string &lang);
|
||||
};
|
||||
}
|
||||
#endif // WORD_SIZE_MODEL_H
|
@ -1,252 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: word_unigrams.cpp
|
||||
* Description: Implementation of the Word Unigrams Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <math.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include "const.h"
|
||||
#include "cube_utils.h"
|
||||
#include "ndminx.h"
|
||||
#include "word_unigrams.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
WordUnigrams::WordUnigrams() {
|
||||
costs_ = NULL;
|
||||
words_ = NULL;
|
||||
word_cnt_ = 0;
|
||||
}
|
||||
|
||||
WordUnigrams::~WordUnigrams() {
|
||||
if (words_ != NULL) {
|
||||
if (words_[0] != NULL) {
|
||||
delete []words_[0];
|
||||
}
|
||||
|
||||
delete []words_;
|
||||
words_ = NULL;
|
||||
}
|
||||
|
||||
if (costs_ != NULL) {
|
||||
delete []costs_;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the word-list and unigrams from file and create an object
|
||||
* The word list is assumed to be sorted in lexicographic order.
|
||||
*/
|
||||
WordUnigrams *WordUnigrams::Create(const string &data_file_path,
|
||||
const string &lang) {
|
||||
string file_name;
|
||||
string str;
|
||||
|
||||
file_name = data_file_path + lang;
|
||||
file_name += ".cube.word-freq";
|
||||
|
||||
// load the string into memory
|
||||
if (CubeUtils::ReadFileToString(file_name, &str) == false) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// split into lines
|
||||
vector<string> str_vec;
|
||||
CubeUtils::SplitStringUsing(str, "\r\n \t", &str_vec);
|
||||
if (str_vec.size() < 2) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// allocate memory
|
||||
WordUnigrams *word_unigrams_obj = new WordUnigrams();
|
||||
|
||||
int full_len = str.length();
|
||||
int word_cnt = str_vec.size() / 2;
|
||||
word_unigrams_obj->words_ = new char*[word_cnt];
|
||||
word_unigrams_obj->costs_ = new int[word_cnt];
|
||||
|
||||
word_unigrams_obj->words_[0] = new char[full_len];
|
||||
|
||||
// construct sorted list of words and costs
|
||||
word_unigrams_obj->word_cnt_ = 0;
|
||||
char *char_buff = word_unigrams_obj->words_[0];
|
||||
word_cnt = 0;
|
||||
int max_cost = 0;
|
||||
|
||||
for (int wrd = 0; wrd < str_vec.size(); wrd += 2) {
|
||||
word_unigrams_obj->words_[word_cnt] = char_buff;
|
||||
|
||||
strcpy(char_buff, str_vec[wrd].c_str());
|
||||
char_buff += (str_vec[wrd].length() + 1);
|
||||
|
||||
if (sscanf(str_vec[wrd + 1].c_str(), "%d",
|
||||
word_unigrams_obj->costs_ + word_cnt) != 1) {
|
||||
fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error reading "
|
||||
"word unigram data.\n");
|
||||
delete word_unigrams_obj;
|
||||
return NULL;
|
||||
}
|
||||
// update max cost
|
||||
max_cost = MAX(max_cost, word_unigrams_obj->costs_[word_cnt]);
|
||||
word_cnt++;
|
||||
}
|
||||
word_unigrams_obj->word_cnt_ = word_cnt;
|
||||
|
||||
// compute the not-in-list-cost by assuming that a word not in the list
|
||||
// [ahmadab]: This can be computed as follows:
|
||||
// - Given that the distribution of words follow Zipf's law:
|
||||
// (F = K / (rank ^ S)), where s is slightly > 1.0
|
||||
// - Number of words in the list is N
|
||||
// - The mean frequency of a word that did not appear in the list is the
|
||||
// area under the rest of the Zipf's curve divided by 2 (the mean)
|
||||
// - The area would be the bound integral from N to infinity =
|
||||
// (K * S) / (N ^ (S + 1)) ~= K / (N ^ 2)
|
||||
// - Given that cost = -LOG(prob), the cost of an unlisted word would be
|
||||
// = max_cost + 2*LOG(N)
|
||||
word_unigrams_obj->not_in_list_cost_ = max_cost +
|
||||
(2 * CubeUtils::Prob2Cost(1.0 / word_cnt));
|
||||
// success
|
||||
return word_unigrams_obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* Split input into space-separated tokens, strip trailing punctuation
|
||||
* from each, determine case properties, call UTF-8 flavor of cost
|
||||
* function on each word, and aggregate all into single mean word
|
||||
* cost.
|
||||
*/
|
||||
int WordUnigrams::Cost(const char_32 *key_str32,
|
||||
LangModel *lang_mod,
|
||||
CharSet *char_set) const {
|
||||
if (!key_str32)
|
||||
return 0;
|
||||
// convert string to UTF8 to split into space-separated words
|
||||
string key_str;
|
||||
CubeUtils::UTF32ToUTF8(key_str32, &key_str);
|
||||
vector<string> words;
|
||||
CubeUtils::SplitStringUsing(key_str, " \t", &words);
|
||||
|
||||
// no words => no cost
|
||||
if (words.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// aggregate the costs of all the words
|
||||
int cost = 0;
|
||||
for (int word_idx = 0; word_idx < words.size(); word_idx++) {
|
||||
// convert each word back to UTF32 for analyzing case and punctuation
|
||||
string_32 str32;
|
||||
CubeUtils::UTF8ToUTF32(words[word_idx].c_str(), &str32);
|
||||
int len = CubeUtils::StrLen(str32.c_str());
|
||||
|
||||
// strip all trailing punctuation
|
||||
string clean_str;
|
||||
int clean_len = len;
|
||||
bool trunc = false;
|
||||
while (clean_len > 0 &&
|
||||
lang_mod->IsTrailingPunc(str32.c_str()[clean_len - 1])) {
|
||||
--clean_len;
|
||||
trunc = true;
|
||||
}
|
||||
|
||||
// If either the original string was not truncated (no trailing
|
||||
// punctuation) or the entire string was removed (all characters
|
||||
// are trailing punctuation), evaluate original word as is;
|
||||
// otherwise, copy all but the trailing punctuation characters
|
||||
char_32 *clean_str32 = NULL;
|
||||
if (clean_len == 0 || !trunc) {
|
||||
clean_str32 = CubeUtils::StrDup(str32.c_str());
|
||||
} else {
|
||||
clean_str32 = new char_32[clean_len + 1];
|
||||
for (int i = 0; i < clean_len; ++i) {
|
||||
clean_str32[i] = str32[i];
|
||||
}
|
||||
clean_str32[clean_len] = '\0';
|
||||
}
|
||||
ASSERT_HOST(clean_str32 != NULL);
|
||||
|
||||
string str8;
|
||||
CubeUtils::UTF32ToUTF8(clean_str32, &str8);
|
||||
int word_cost = CostInternal(str8.c_str());
|
||||
|
||||
// if case invariant, get costs of all-upper-case and all-lower-case
|
||||
// versions and return the min cost
|
||||
if (clean_len >= kMinLengthNumOrCaseInvariant &&
|
||||
CubeUtils::IsCaseInvariant(clean_str32, char_set)) {
|
||||
char_32 *lower_32 = CubeUtils::ToLower(clean_str32, char_set);
|
||||
if (lower_32) {
|
||||
string lower_8;
|
||||
CubeUtils::UTF32ToUTF8(lower_32, &lower_8);
|
||||
word_cost = MIN(word_cost, CostInternal(lower_8.c_str()));
|
||||
delete [] lower_32;
|
||||
}
|
||||
char_32 *upper_32 = CubeUtils::ToUpper(clean_str32, char_set);
|
||||
if (upper_32) {
|
||||
string upper_8;
|
||||
CubeUtils::UTF32ToUTF8(upper_32, &upper_8);
|
||||
word_cost = MIN(word_cost, CostInternal(upper_8.c_str()));
|
||||
delete [] upper_32;
|
||||
}
|
||||
}
|
||||
|
||||
if (clean_len >= kMinLengthNumOrCaseInvariant) {
|
||||
// if characters are all numeric, incur 0 word cost
|
||||
bool is_numeric = true;
|
||||
for (int i = 0; i < clean_len; ++i) {
|
||||
if (!lang_mod->IsDigit(clean_str32[i]))
|
||||
is_numeric = false;
|
||||
}
|
||||
if (is_numeric)
|
||||
word_cost = 0;
|
||||
}
|
||||
delete [] clean_str32;
|
||||
cost += word_cost;
|
||||
} // word_idx
|
||||
|
||||
// return the mean cost
|
||||
return static_cast<int>(cost / static_cast<double>(words.size()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for UTF-8 string using binary search of sorted words_ array.
|
||||
*/
|
||||
int WordUnigrams::CostInternal(const char *key_str) const {
|
||||
if (strlen(key_str) == 0)
|
||||
return not_in_list_cost_;
|
||||
int hi = word_cnt_ - 1;
|
||||
int lo = 0;
|
||||
while (lo <= hi) {
|
||||
int current = (hi + lo) / 2;
|
||||
int comp = strcmp(key_str, words_[current]);
|
||||
// a match
|
||||
if (comp == 0) {
|
||||
return costs_[current];
|
||||
}
|
||||
if (comp < 0) {
|
||||
// go lower
|
||||
hi = current - 1;
|
||||
} else {
|
||||
// go higher
|
||||
lo = current + 1;
|
||||
}
|
||||
}
|
||||
return not_in_list_cost_;
|
||||
}
|
||||
} // namespace tesseract
|
@ -1,69 +0,0 @@
|
||||
/**********************************************************************
|
||||
* File: word_unigrams.h
|
||||
* Description: Declaration of the Word Unigrams Class
|
||||
* Author: Ahmad Abdulkader
|
||||
* Created: 2008
|
||||
*
|
||||
* (C) Copyright 2008, Google Inc.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
// The WordUnigram class holds the unigrams of the most frequent set of words
|
||||
// in a language. It is an optional component of the Cube OCR engine. If
|
||||
// present, the unigram cost of a word is aggregated with the other costs
|
||||
// (Recognition, Language Model, Size) to compute a cost for a word.
|
||||
// The word list is assumed to be sorted in lexicographic order.
|
||||
|
||||
#ifndef WORD_UNIGRAMS_H
|
||||
#define WORD_UNIGRAMS_H
|
||||
|
||||
#include <string>
|
||||
#include "char_set.h"
|
||||
#include "lang_model.h"
|
||||
|
||||
namespace tesseract {
|
||||
class WordUnigrams {
|
||||
public:
|
||||
WordUnigrams();
|
||||
~WordUnigrams();
|
||||
// Load the word-list and unigrams from file and create an object
|
||||
// The word list is assumed to be sorted
|
||||
static WordUnigrams *Create(const string &data_file_path,
|
||||
const string &lang);
|
||||
// Compute the unigram cost of a UTF-32 string. Splits into
|
||||
// space-separated tokens, strips trailing punctuation from each
|
||||
// token, evaluates case properties, and calls internal Cost()
|
||||
// function on UTF-8 version. To avoid unnecessarily penalizing
|
||||
// all-one-case words or capitalized words (first-letter
|
||||
// upper-case and remaining letters lower-case) when not all
|
||||
// versions of the word appear in the <lang>.cube.word-freq file, a
|
||||
// case-invariant cost is computed in those cases, assuming the word
|
||||
// meets a minimum length.
|
||||
int Cost(const char_32 *str32, LangModel *lang_mod,
|
||||
CharSet *char_set) const;
|
||||
protected:
|
||||
// Compute the word unigram cost of a UTF-8 string with binary
|
||||
// search of sorted words_ array.
|
||||
int CostInternal(const char *str) const;
|
||||
private:
|
||||
// Only words this length or greater qualify for all-numeric or
|
||||
// case-invariant word unigram cost.
|
||||
static const int kMinLengthNumOrCaseInvariant = 4;
|
||||
|
||||
int word_cnt_;
|
||||
char **words_;
|
||||
int *costs_;
|
||||
int not_in_list_cost_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // WORD_UNIGRAMS_H
|
@ -1,25 +0,0 @@
|
||||
AM_CPPFLAGS += \
|
||||
-DUSE_STD_NAMESPACE \
|
||||
-I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \
|
||||
-I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \
|
||||
-I$(top_srcdir)/image -I$(top_srcdir)/viewer
|
||||
|
||||
if VISIBILITY
|
||||
AM_CPPFLAGS += -DTESS_EXPORTS \
|
||||
-fvisibility=hidden -fvisibility-inlines-hidden
|
||||
endif
|
||||
|
||||
noinst_HEADERS = \
|
||||
input_file_buffer.h neural_net.h neuron.h
|
||||
|
||||
if !USING_MULTIPLELIBS
|
||||
noinst_LTLIBRARIES = libtesseract_neural.la
|
||||
else
|
||||
lib_LTLIBRARIES = libtesseract_neural.la
|
||||
libtesseract_neural_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
|
||||
endif
|
||||
|
||||
libtesseract_neural_la_SOURCES = \
|
||||
input_file_buffer.cpp neural_net.cpp neuron.cpp sigmoid_table.cpp
|
||||
|
||||
|
@ -1,45 +0,0 @@
|
||||
// Copyright 2008 Google Inc.
|
||||
// All Rights Reserved.
|
||||
// Author: ahmadab@google.com (Ahmad Abdulkader)
|
||||
//
|
||||
// input_file_buffer.h: Declarations of a class for an object that
|
||||
// represents an input file buffer.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string>
|
||||
#include "input_file_buffer.h"
|
||||
|
||||
namespace tesseract {
|
||||
// default and only constructor
|
||||
InputFileBuffer::InputFileBuffer(const string &file_name)
|
||||
: file_name_(file_name) {
|
||||
fp_ = NULL;
|
||||
}
|
||||
|
||||
// virtual destructor
|
||||
InputFileBuffer::~InputFileBuffer() {
|
||||
if (fp_ != NULL) {
|
||||
fclose(fp_);
|
||||
}
|
||||
}
|
||||
|
||||
// Read the specified number of bytes to the specified input buffer
|
||||
int InputFileBuffer::Read(void *buffer, int bytes_to_read) {
|
||||
// open the file if necessary
|
||||
if (fp_ == NULL) {
|
||||
fp_ = fopen(file_name_.c_str(), "rb");
|
||||
if (fp_ == NULL) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return fread(buffer, 1, bytes_to_read, fp_);
|
||||
}
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
// Copyright 2008 Google Inc.
|
||||
// All Rights Reserved.
|
||||
// Author: ahmadab@google.com (Ahmad Abdulkader)
|
||||
//
|
||||
// input_file_buffer.h: Declarations of a class for an object that
|
||||
// represents an input file buffer.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef INPUT_FILE_BUFFER_H
|
||||
#define INPUT_FILE_BUFFER_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#ifdef USE_STD_NAMESPACE
|
||||
using std::string;
|
||||
#endif
|
||||
|
||||
namespace tesseract {
|
||||
class InputFileBuffer {
|
||||
public:
|
||||
explicit InputFileBuffer(const string &file_name);
|
||||
virtual ~InputFileBuffer();
|
||||
int Read(void *buffer, int bytes_to_read);
|
||||
|
||||
protected:
|
||||
string file_name_;
|
||||
FILE *fp_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // INPUT_FILE_BUFFER_H__
|
@ -1,308 +0,0 @@
|
||||
// Copyright 2008 Google Inc.
|
||||
// All Rights Reserved.
|
||||
// Author: ahmadab@google.com (Ahmad Abdulkader)
|
||||
//
|
||||
// neural_net.cpp: Declarations of a class for an object that
|
||||
// represents an arbitrary network of neurons
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "neural_net.h"
|
||||
#include "input_file_buffer.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
NeuralNet::NeuralNet() {
|
||||
Init();
|
||||
}
|
||||
|
||||
NeuralNet::~NeuralNet() {
|
||||
// clean up the wts chunks vector
|
||||
for (int vec = 0; vec < static_cast<int>(wts_vec_.size()); vec++) {
|
||||
delete wts_vec_[vec];
|
||||
}
|
||||
// clean up neurons
|
||||
delete []neurons_;
|
||||
// clean up nodes
|
||||
for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) {
|
||||
delete []fast_nodes_[node_idx].inputs;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Initiaization function
|
||||
void NeuralNet::Init() {
|
||||
read_only_ = true;
|
||||
auto_encoder_ = false;
|
||||
alloc_wgt_cnt_ = 0;
|
||||
wts_cnt_ = 0;
|
||||
neuron_cnt_ = 0;
|
||||
in_cnt_ = 0;
|
||||
out_cnt_ = 0;
|
||||
wts_vec_.clear();
|
||||
neurons_ = NULL;
|
||||
inputs_mean_.clear();
|
||||
inputs_std_dev_.clear();
|
||||
inputs_min_.clear();
|
||||
inputs_max_.clear();
|
||||
}
|
||||
|
||||
// Does a fast feedforward for read_only nets
|
||||
// Templatized for float and double Types
|
||||
template <typename Type> bool NeuralNet::FastFeedForward(const Type *inputs,
|
||||
Type *outputs) {
|
||||
int node_idx = 0;
|
||||
Node *node = &fast_nodes_[0];
|
||||
// feed inputs in and offset them by the pre-computed bias
|
||||
for (node_idx = 0; node_idx < in_cnt_; node_idx++, node++) {
|
||||
node->out = inputs[node_idx] - node->bias;
|
||||
}
|
||||
// compute nodes activations and outputs
|
||||
for (;node_idx < neuron_cnt_; node_idx++, node++) {
|
||||
double activation = -node->bias;
|
||||
for (int fan_in_idx = 0; fan_in_idx < node->fan_in_cnt; fan_in_idx++) {
|
||||
activation += (node->inputs[fan_in_idx].input_weight *
|
||||
node->inputs[fan_in_idx].input_node->out);
|
||||
}
|
||||
node->out = Neuron::Sigmoid(activation);
|
||||
}
|
||||
// copy the outputs to the output buffers
|
||||
node = &fast_nodes_[neuron_cnt_ - out_cnt_];
|
||||
for (node_idx = 0; node_idx < out_cnt_; node_idx++, node++) {
|
||||
outputs[node_idx] = node->out;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Performs a feedforward for general nets. Used mainly in training mode
|
||||
// Templatized for float and double Types
|
||||
template <typename Type> bool NeuralNet::FeedForward(const Type *inputs,
|
||||
Type *outputs) {
|
||||
// call the fast version in case of readonly nets
|
||||
if (read_only_) {
|
||||
return FastFeedForward(inputs, outputs);
|
||||
}
|
||||
// clear all neurons
|
||||
Clear();
|
||||
// for auto encoders, apply no input normalization
|
||||
if (auto_encoder_) {
|
||||
for (int in = 0; in < in_cnt_; in++) {
|
||||
neurons_[in].set_output(inputs[in]);
|
||||
}
|
||||
} else {
|
||||
// Input normalization : subtract mean and divide by stddev
|
||||
for (int in = 0; in < in_cnt_; in++) {
|
||||
neurons_[in].set_output((inputs[in] - inputs_min_[in]) /
|
||||
(inputs_max_[in] - inputs_min_[in]));
|
||||
neurons_[in].set_output((neurons_[in].output() - inputs_mean_[in]) /
|
||||
inputs_std_dev_[in]);
|
||||
}
|
||||
}
|
||||
// compute the net outputs: follow a pull model each output pulls the
|
||||
// outputs of its input nodes and so on
|
||||
for (int out = neuron_cnt_ - out_cnt_; out < neuron_cnt_; out++) {
|
||||
neurons_[out].FeedForward();
|
||||
// copy the values to the output buffer
|
||||
outputs[out] = neurons_[out].output();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Sets a connection between two neurons
|
||||
bool NeuralNet::SetConnection(int from, int to) {
|
||||
// allocate the wgt
|
||||
float *wts = AllocWgt(1);
|
||||
if (wts == NULL) {
|
||||
return false;
|
||||
}
|
||||
// register the connection
|
||||
neurons_[to].AddFromConnection(neurons_ + from, wts, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Create a fast readonly version of the net
|
||||
bool NeuralNet::CreateFastNet() {
|
||||
fast_nodes_.resize(neuron_cnt_);
|
||||
// build the node structures
|
||||
int wts_cnt = 0;
|
||||
for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) {
|
||||
Node *node = &fast_nodes_[node_idx];
|
||||
if (neurons_[node_idx].node_type() == Neuron::Input) {
|
||||
// Input neurons have no fan-in
|
||||
node->fan_in_cnt = 0;
|
||||
node->inputs = NULL;
|
||||
// Input bias is the normalization offset computed from
|
||||
// training input stats
|
||||
if (fabs(inputs_max_[node_idx] - inputs_min_[node_idx]) <
|
||||
kMinInputRange) {
|
||||
// if the range approaches zero, the stdev is not defined,
|
||||
// this indicates that this input does not change.
|
||||
// Set the bias to zero
|
||||
node->bias = 0.0f;
|
||||
} else {
|
||||
node->bias = inputs_min_[node_idx] + (inputs_mean_[node_idx] *
|
||||
(inputs_max_[node_idx] - inputs_min_[node_idx]));
|
||||
}
|
||||
} else {
|
||||
node->bias = neurons_[node_idx].bias();
|
||||
node->fan_in_cnt = neurons_[node_idx].fan_in_cnt();
|
||||
// allocate memory for fan-in nodes
|
||||
node->inputs = new WeightedNode[node->fan_in_cnt];
|
||||
for (int fan_in = 0; fan_in < node->fan_in_cnt; fan_in++) {
|
||||
// identify fan-in neuron
|
||||
const int id = neurons_[node_idx].fan_in(fan_in)->id();
|
||||
// Feedback connections are not allowed and should never happen
|
||||
if (id >= node_idx) {
|
||||
return false;
|
||||
}
|
||||
// add the the fan-in neuron and its wgt
|
||||
node->inputs[fan_in].input_node = &fast_nodes_[id];
|
||||
float wgt_val = neurons_[node_idx].fan_in_wts(fan_in);
|
||||
// for input neurons normalize the wgt by the input scaling
|
||||
// values to save time during feedforward
|
||||
if (neurons_[node_idx].fan_in(fan_in)->node_type() == Neuron::Input) {
|
||||
// if the range approaches zero, the stdev is not defined,
|
||||
// this indicates that this input does not change.
|
||||
// Set the weight to zero
|
||||
if (fabs(inputs_max_[id] - inputs_min_[id]) < kMinInputRange) {
|
||||
wgt_val = 0.0f;
|
||||
} else {
|
||||
wgt_val /= ((inputs_max_[id] - inputs_min_[id]) *
|
||||
inputs_std_dev_[id]);
|
||||
}
|
||||
}
|
||||
node->inputs[fan_in].input_weight = wgt_val;
|
||||
}
|
||||
// incr wgt count to validate against at the end
|
||||
wts_cnt += node->fan_in_cnt;
|
||||
}
|
||||
}
|
||||
// sanity check
|
||||
return wts_cnt_ == wts_cnt;
|
||||
}
|
||||
|
||||
// returns a pointer to the requested set of weights
|
||||
// Allocates in chunks
|
||||
float * NeuralNet::AllocWgt(int wgt_cnt) {
|
||||
// see if need to allocate a new chunk of wts
|
||||
if (wts_vec_.size() == 0 || (alloc_wgt_cnt_ + wgt_cnt) > kWgtChunkSize) {
|
||||
// add the new chunck to the wts_chunks vector
|
||||
wts_vec_.push_back(new vector<float> (kWgtChunkSize));
|
||||
alloc_wgt_cnt_ = 0;
|
||||
}
|
||||
float *ret_ptr = &((*wts_vec_.back())[alloc_wgt_cnt_]);
|
||||
// incr usage counts
|
||||
alloc_wgt_cnt_ += wgt_cnt;
|
||||
wts_cnt_ += wgt_cnt;
|
||||
return ret_ptr;
|
||||
}
|
||||
|
||||
// create a new net object using an input file as a source
|
||||
NeuralNet *NeuralNet::FromFile(const string file_name) {
|
||||
// open the file
|
||||
InputFileBuffer input_buff(file_name);
|
||||
// create a new net object using input buffer
|
||||
NeuralNet *net_obj = FromInputBuffer(&input_buff);
|
||||
return net_obj;
|
||||
}
|
||||
|
||||
// create a net object from an input buffer
|
||||
NeuralNet *NeuralNet::FromInputBuffer(InputFileBuffer *ib) {
|
||||
// create a new net object
|
||||
NeuralNet *net_obj = new NeuralNet();
|
||||
// load the net
|
||||
if (!net_obj->ReadBinary(ib)) {
|
||||
delete net_obj;
|
||||
net_obj = NULL;
|
||||
}
|
||||
return net_obj;
|
||||
}
|
||||
|
||||
// Compute the output of a specific output node.
|
||||
// This function is useful for application that are interested in a single
|
||||
// output of the net and do not want to waste time on the rest
|
||||
// This is the fast-read-only version of this function
|
||||
template <typename Type> bool NeuralNet::FastGetNetOutput(const Type *inputs,
|
||||
int output_id,
|
||||
Type *output) {
|
||||
// feed inputs in and offset them by the pre-computed bias
|
||||
int node_idx = 0;
|
||||
Node *node = &fast_nodes_[0];
|
||||
for (node_idx = 0; node_idx < in_cnt_; node_idx++, node++) {
|
||||
node->out = inputs[node_idx] - node->bias;
|
||||
}
|
||||
|
||||
// compute nodes' activations and outputs for hidden nodes if any
|
||||
int hidden_node_cnt = neuron_cnt_ - out_cnt_;
|
||||
for (;node_idx < hidden_node_cnt; node_idx++, node++) {
|
||||
double activation = -node->bias;
|
||||
for (int fan_in_idx = 0; fan_in_idx < node->fan_in_cnt; fan_in_idx++) {
|
||||
activation += (node->inputs[fan_in_idx].input_weight *
|
||||
node->inputs[fan_in_idx].input_node->out);
|
||||
}
|
||||
node->out = Neuron::Sigmoid(activation);
|
||||
}
|
||||
|
||||
// compute the output of the required output node
|
||||
node += output_id;
|
||||
double activation = -node->bias;
|
||||
for (int fan_in_idx = 0; fan_in_idx < node->fan_in_cnt; fan_in_idx++) {
|
||||
activation += (node->inputs[fan_in_idx].input_weight *
|
||||
node->inputs[fan_in_idx].input_node->out);
|
||||
}
|
||||
(*output) = Neuron::Sigmoid(activation);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Performs a feedforward for general nets. Used mainly in training mode
|
||||
// Templatized for float and double Types
|
||||
template <typename Type> bool NeuralNet::GetNetOutput(const Type *inputs,
|
||||
int output_id,
|
||||
Type *output) {
|
||||
// validate output id
|
||||
if (output_id < 0 || output_id >= out_cnt_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// call the fast version in case of readonly nets
|
||||
if (read_only_) {
|
||||
return FastGetNetOutput(inputs, output_id, output);
|
||||
}
|
||||
|
||||
// For the slow version, we'll just call FeedForward and return the
|
||||
// appropriate output
|
||||
vector<Type> outputs(out_cnt_);
|
||||
if (!FeedForward(inputs, &outputs[0])) {
|
||||
return false;
|
||||
}
|
||||
(*output) = outputs[output_id];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Instantiate all supported templates now that the functions have been defined.
|
||||
template bool NeuralNet::FeedForward(const float *inputs, float *outputs);
|
||||
template bool NeuralNet::FeedForward(const double *inputs, double *outputs);
|
||||
template bool NeuralNet::FastFeedForward(const float *inputs, float *outputs);
|
||||
template bool NeuralNet::FastFeedForward(const double *inputs,
|
||||
double *outputs);
|
||||
template bool NeuralNet::GetNetOutput(const float *inputs, int output_id,
|
||||
float *output);
|
||||
template bool NeuralNet::GetNetOutput(const double *inputs, int output_id,
|
||||
double *output);
|
||||
template bool NeuralNet::FastGetNetOutput(const float *inputs, int output_id,
|
||||
float *output);
|
||||
template bool NeuralNet::FastGetNetOutput(const double *inputs, int output_id,
|
||||
double *output);
|
||||
template bool NeuralNet::ReadBinary(InputFileBuffer *input_buffer);
|
||||
|
||||
}
|
@ -1,252 +0,0 @@
|
||||
// Copyright 2008 Google Inc.
|
||||
// All Rights Reserved.
|
||||
// Author: ahmadab@google.com (Ahmad Abdulkader)
|
||||
//
|
||||
// neural_net.h: Declarations of a class for an object that
|
||||
// represents an arbitrary network of neurons
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef NEURAL_NET_H
|
||||
#define NEURAL_NET_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "neuron.h"
|
||||
#include "input_file_buffer.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Minimum input range below which we set the input weight to zero
|
||||
static const float kMinInputRange = 1e-6f;
|
||||
|
||||
class NeuralNet {
|
||||
public:
|
||||
NeuralNet();
|
||||
virtual ~NeuralNet();
|
||||
// create a net object from a file. Uses stdio
|
||||
static NeuralNet *FromFile(const string file_name);
|
||||
// create a net object from an input buffer
|
||||
static NeuralNet *FromInputBuffer(InputFileBuffer *ib);
|
||||
// Different flavors of feed forward function
|
||||
template <typename Type> bool FeedForward(const Type *inputs,
|
||||
Type *outputs);
|
||||
// Compute the output of a specific output node.
|
||||
// This function is useful for application that are interested in a single
|
||||
// output of the net and do not want to waste time on the rest
|
||||
template <typename Type> bool GetNetOutput(const Type *inputs,
|
||||
int output_id,
|
||||
Type *output);
|
||||
// Accessor functions
|
||||
int in_cnt() const { return in_cnt_; }
|
||||
int out_cnt() const { return out_cnt_; }
|
||||
|
||||
protected:
|
||||
struct Node;
|
||||
// A node-weight pair
|
||||
struct WeightedNode {
|
||||
Node *input_node;
|
||||
float input_weight;
|
||||
};
|
||||
// node struct used for fast feedforward in
|
||||
// Read only nets
|
||||
struct Node {
|
||||
float out;
|
||||
float bias;
|
||||
int fan_in_cnt;
|
||||
WeightedNode *inputs;
|
||||
};
|
||||
// Read-Only flag (no training: On by default)
|
||||
// will presumeably be set to false by
|
||||
// the inherting TrainableNeuralNet class
|
||||
bool read_only_;
|
||||
// input count
|
||||
int in_cnt_;
|
||||
// output count
|
||||
int out_cnt_;
|
||||
// Total neuron count (including inputs)
|
||||
int neuron_cnt_;
|
||||
// count of unique weights
|
||||
int wts_cnt_;
|
||||
// Neuron vector
|
||||
Neuron *neurons_;
|
||||
// size of allocated weight chunk (in weights)
|
||||
// This is basically the size of the biggest network
|
||||
// that I have trained. However, the class will allow
|
||||
// a bigger sized net if desired
|
||||
static const int kWgtChunkSize = 0x10000;
|
||||
// Magic number expected at the beginning of the NN
|
||||
// binary file
|
||||
static const unsigned int kNetSignature = 0xFEFEABD0;
|
||||
// count of allocated wgts in the last chunk
|
||||
int alloc_wgt_cnt_;
|
||||
// vector of weights buffers
|
||||
vector<vector<float> *>wts_vec_;
|
||||
// Is the net an auto-encoder type
|
||||
bool auto_encoder_;
|
||||
// vector of input max values
|
||||
vector<float> inputs_max_;
|
||||
// vector of input min values
|
||||
vector<float> inputs_min_;
|
||||
// vector of input mean values
|
||||
vector<float> inputs_mean_;
|
||||
// vector of input standard deviation values
|
||||
vector<float> inputs_std_dev_;
|
||||
// vector of input offsets used by fast read-only
|
||||
// feedforward function
|
||||
vector<Node> fast_nodes_;
|
||||
// Network Initialization function
|
||||
void Init();
|
||||
// Clears all neurons
|
||||
void Clear() {
|
||||
for (int node = 0; node < neuron_cnt_; node++) {
|
||||
neurons_[node].Clear();
|
||||
}
|
||||
}
|
||||
// Reads the net from an input buffer
|
||||
template<class ReadBuffType> bool ReadBinary(ReadBuffType *input_buff) {
|
||||
// Init vars
|
||||
Init();
|
||||
// is this an autoencoder
|
||||
unsigned int read_val;
|
||||
unsigned int auto_encode;
|
||||
// read and verify signature
|
||||
if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
|
||||
return false;
|
||||
}
|
||||
if (read_val != kNetSignature) {
|
||||
return false;
|
||||
}
|
||||
if (input_buff->Read(&auto_encode, sizeof(auto_encode)) !=
|
||||
sizeof(auto_encode)) {
|
||||
return false;
|
||||
}
|
||||
auto_encoder_ = auto_encode;
|
||||
// read and validate total # of nodes
|
||||
if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
|
||||
return false;
|
||||
}
|
||||
neuron_cnt_ = read_val;
|
||||
if (neuron_cnt_ <= 0) {
|
||||
return false;
|
||||
}
|
||||
// set the size of the neurons vector
|
||||
neurons_ = new Neuron[neuron_cnt_];
|
||||
// read & validate inputs
|
||||
if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
|
||||
return false;
|
||||
}
|
||||
in_cnt_ = read_val;
|
||||
if (in_cnt_ <= 0) {
|
||||
return false;
|
||||
}
|
||||
// read outputs
|
||||
if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
|
||||
return false;
|
||||
}
|
||||
out_cnt_ = read_val;
|
||||
if (out_cnt_ <= 0) {
|
||||
return false;
|
||||
}
|
||||
// set neuron ids and types
|
||||
for (int idx = 0; idx < neuron_cnt_; idx++) {
|
||||
neurons_[idx].set_id(idx);
|
||||
// input type
|
||||
if (idx < in_cnt_) {
|
||||
neurons_[idx].set_node_type(Neuron::Input);
|
||||
} else if (idx >= (neuron_cnt_ - out_cnt_)) {
|
||||
neurons_[idx].set_node_type(Neuron::Output);
|
||||
} else {
|
||||
neurons_[idx].set_node_type(Neuron::Hidden);
|
||||
}
|
||||
}
|
||||
// read the connections
|
||||
for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) {
|
||||
// read fanout
|
||||
if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
|
||||
return false;
|
||||
}
|
||||
// read the neuron's info
|
||||
int fan_out_cnt = read_val;
|
||||
for (int fan_out_idx = 0; fan_out_idx < fan_out_cnt; fan_out_idx++) {
|
||||
// read the neuron id
|
||||
if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
|
||||
return false;
|
||||
}
|
||||
// create the connection
|
||||
if (!SetConnection(node_idx, read_val)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
// read all the neurons' fan-in connections
|
||||
for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) {
|
||||
// read
|
||||
if (!neurons_[node_idx].ReadBinary(input_buff)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// size input stats vector to expected input size
|
||||
inputs_mean_.resize(in_cnt_);
|
||||
inputs_std_dev_.resize(in_cnt_);
|
||||
inputs_min_.resize(in_cnt_);
|
||||
inputs_max_.resize(in_cnt_);
|
||||
// read stats
|
||||
if (input_buff->Read(&(inputs_mean_.front()),
|
||||
sizeof(inputs_mean_[0]) * in_cnt_) !=
|
||||
sizeof(inputs_mean_[0]) * in_cnt_) {
|
||||
return false;
|
||||
}
|
||||
if (input_buff->Read(&(inputs_std_dev_.front()),
|
||||
sizeof(inputs_std_dev_[0]) * in_cnt_) !=
|
||||
sizeof(inputs_std_dev_[0]) * in_cnt_) {
|
||||
return false;
|
||||
}
|
||||
if (input_buff->Read(&(inputs_min_.front()),
|
||||
sizeof(inputs_min_[0]) * in_cnt_) !=
|
||||
sizeof(inputs_min_[0]) * in_cnt_) {
|
||||
return false;
|
||||
}
|
||||
if (input_buff->Read(&(inputs_max_.front()),
|
||||
sizeof(inputs_max_[0]) * in_cnt_) !=
|
||||
sizeof(inputs_max_[0]) * in_cnt_) {
|
||||
return false;
|
||||
}
|
||||
// create a readonly version for fast feedforward
|
||||
if (read_only_) {
|
||||
return CreateFastNet();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// creates a connection between two nodes
|
||||
bool SetConnection(int from, int to);
|
||||
// Create a read only version of the net that
|
||||
// has faster feedforward performance
|
||||
bool CreateFastNet();
|
||||
// internal function to allocate a new set of weights
|
||||
// Centralized weight allocation attempts to increase
|
||||
// weights locality of reference making it more cache friendly
|
||||
float *AllocWgt(int wgt_cnt);
|
||||
// different flavors read-only feedforward function
|
||||
template <typename Type> bool FastFeedForward(const Type *inputs,
|
||||
Type *outputs);
|
||||
// Compute the output of a specific output node.
|
||||
// This function is useful for application that are interested in a single
|
||||
// output of the net and do not want to waste time on the rest
|
||||
// This is the fast-read-only version of this function
|
||||
template <typename Type> bool FastGetNetOutput(const Type *inputs,
|
||||
int output_id,
|
||||
Type *output);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // NEURAL_NET_H__
|
@ -1,103 +0,0 @@
|
||||
// Copyright 2008 Google Inc.
|
||||
// All Rights Reserved.
|
||||
// Author: ahmadab@google.com (Ahmad Abdulkader)
|
||||
//
|
||||
// neuron.cpp: The implementation of a class for an object
|
||||
// that represents a single neuron in a neural network
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "neuron.h"
|
||||
#include "input_file_buffer.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Instantiate all supported templates
|
||||
template bool Neuron::ReadBinary(InputFileBuffer *input_buffer);
|
||||
|
||||
// default and only constructor
|
||||
Neuron::Neuron() {
|
||||
Init();
|
||||
}
|
||||
|
||||
// virtual destructor
|
||||
Neuron::~Neuron() {
|
||||
}
|
||||
|
||||
// Initializer
|
||||
void Neuron::Init() {
|
||||
id_ = -1;
|
||||
frwd_dirty_ = false;
|
||||
fan_in_.clear();
|
||||
fan_in_weights_.clear();
|
||||
activation_ = 0.0f;
|
||||
output_ = 0.0f;
|
||||
bias_ = 0.0f;
|
||||
node_type_ = Unknown;
|
||||
}
|
||||
|
||||
// Computes the activation and output of the neuron if not fresh
|
||||
// by pulling the outputs of all fan-in neurons
|
||||
void Neuron::FeedForward() {
|
||||
if (!frwd_dirty_ ) {
|
||||
return;
|
||||
}
|
||||
// nothing to do for input nodes: just pass the input to the o/p
|
||||
// otherwise, pull the output of all fan-in neurons
|
||||
if (node_type_ != Input) {
|
||||
int fan_in_cnt = fan_in_.size();
|
||||
// sum out the activation
|
||||
activation_ = -bias_;
|
||||
for (int in = 0; in < fan_in_cnt; in++) {
|
||||
if (fan_in_[in]->frwd_dirty_) {
|
||||
fan_in_[in]->FeedForward();
|
||||
}
|
||||
activation_ += ((*(fan_in_weights_[in])) * fan_in_[in]->output_);
|
||||
}
|
||||
// sigmoid it
|
||||
output_ = Sigmoid(activation_);
|
||||
}
|
||||
frwd_dirty_ = false;
|
||||
}
|
||||
|
||||
// set the type of the neuron
|
||||
void Neuron::set_node_type(NeuronTypes Type) {
|
||||
node_type_ = Type;
|
||||
}
|
||||
|
||||
// Adds new connections *to* this neuron *From*
|
||||
// a target neuron using specfied params
|
||||
// Note that what is actually copied in this function are pointers to the
|
||||
// specified Neurons and weights and not the actualt values. This is by
|
||||
// design to centralize the alloction of neurons and weights and so
|
||||
// increase the locality of reference and improve cache-hits resulting
|
||||
// in a faster net. This technique resulted in a 2X-10X speedup
|
||||
// (depending on network size and processor)
|
||||
void Neuron::AddFromConnection(Neuron *neurons,
|
||||
float *wts_offset,
|
||||
int from_cnt) {
|
||||
for (int in = 0; in < from_cnt; in++) {
|
||||
fan_in_.push_back(neurons + in);
|
||||
fan_in_weights_.push_back(wts_offset + in);
|
||||
}
|
||||
}
|
||||
|
||||
// fast computation of sigmoid function using a lookup table
|
||||
// defined in sigmoid_table.cpp
|
||||
float Neuron::Sigmoid(float activation) {
|
||||
if (activation <= -10.0f) {
|
||||
return 0.0f;
|
||||
} else if (activation >= 10.0f) {
|
||||
return 1.0f;
|
||||
} else {
|
||||
return kSigmoidTable[static_cast<int>(100 * (activation + 10.0))];
|
||||
}
|
||||
}
|
||||
}
|
@ -1,156 +0,0 @@
|
||||
// Copyright 2008 Google Inc.
|
||||
// All Rights Reserved.
|
||||
// Author: ahmadab@google.com (Ahmad Abdulkader)
|
||||
//
|
||||
// neuron.h: Declarations of a class for an object that
|
||||
// represents a single neuron in a neural network
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef NEURON_H
|
||||
#define NEURON_H
|
||||
|
||||
#include <math.h>
|
||||
#include <vector>
|
||||
|
||||
#ifdef USE_STD_NAMESPACE
|
||||
using std::vector;
|
||||
#endif
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Input Node bias values
|
||||
static const float kInputNodeBias = 0.0f;
|
||||
|
||||
class Neuron {
|
||||
public:
|
||||
// Types of nodes
|
||||
enum NeuronTypes {
|
||||
Unknown = 0,
|
||||
Input,
|
||||
Hidden,
|
||||
Output
|
||||
};
|
||||
Neuron();
|
||||
~Neuron();
|
||||
// set the forward dirty flag indicating that the
|
||||
// activation of the net is not fresh
|
||||
void Clear() {
|
||||
frwd_dirty_ = true;
|
||||
}
|
||||
// Read a binary representation of the neuron info from
|
||||
// an input buffer.
|
||||
template <class BuffType> bool ReadBinary(BuffType *input_buff) {
|
||||
float val;
|
||||
if (input_buff->Read(&val, sizeof(val)) != sizeof(val)) {
|
||||
return false;
|
||||
}
|
||||
// input nodes should have no biases
|
||||
if (node_type_ == Input) {
|
||||
bias_ = kInputNodeBias;
|
||||
} else {
|
||||
bias_ = val;
|
||||
}
|
||||
// read fanin count
|
||||
int fan_in_cnt;
|
||||
if (input_buff->Read(&fan_in_cnt, sizeof(fan_in_cnt)) !=
|
||||
sizeof(fan_in_cnt)) {
|
||||
return false;
|
||||
}
|
||||
// validate fan-in cnt
|
||||
if (fan_in_cnt != fan_in_.size()) {
|
||||
return false;
|
||||
}
|
||||
// read the weights
|
||||
for (int in = 0; in < fan_in_cnt; in++) {
|
||||
if (input_buff->Read(&val, sizeof(val)) != sizeof(val)) {
|
||||
return false;
|
||||
}
|
||||
*(fan_in_weights_[in]) = val;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Add a new connection from this neuron *From*
|
||||
// a target neuron using specfied params
|
||||
// Note that what is actually copied in this function are pointers to the
|
||||
// specified Neurons and weights and not the actualt values. This is by
|
||||
// design to centralize the alloction of neurons and weights and so
|
||||
// increase the locality of reference and improve cache-hits resulting
|
||||
// in a faster net. This technique resulted in a 2X-10X speedup
|
||||
// (depending on network size and processor)
|
||||
void AddFromConnection(Neuron *neuron_vec,
|
||||
float *wts_offset,
|
||||
int from_cnt);
|
||||
// Set the type of a neuron
|
||||
void set_node_type(NeuronTypes type);
|
||||
// Computes the output of the node by
|
||||
// "pulling" the output of the fan-in nodes
|
||||
void FeedForward();
|
||||
// fast computation of sigmoid function using a lookup table
|
||||
// defined in sigmoid_table.cpp
|
||||
static float Sigmoid(float activation);
|
||||
// Accessor functions
|
||||
float output() const {
|
||||
return output_;
|
||||
}
|
||||
void set_output(float out_val) {
|
||||
output_ = out_val;
|
||||
}
|
||||
int id() const {
|
||||
return id_;
|
||||
}
|
||||
int fan_in_cnt() const {
|
||||
return fan_in_.size();
|
||||
}
|
||||
Neuron * fan_in(int idx) const {
|
||||
return fan_in_[idx];
|
||||
}
|
||||
float fan_in_wts(int idx) const {
|
||||
return *(fan_in_weights_[idx]);
|
||||
}
|
||||
void set_id(int id) {
|
||||
id_ = id;
|
||||
}
|
||||
float bias() const {
|
||||
return bias_;
|
||||
}
|
||||
Neuron::NeuronTypes node_type() const {
|
||||
return node_type_;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Type of Neuron
|
||||
NeuronTypes node_type_;
|
||||
// unqique id of the neuron
|
||||
int id_;
|
||||
// node bias
|
||||
float bias_;
|
||||
// node net activation
|
||||
float activation_;
|
||||
// node output
|
||||
float output_;
|
||||
// pointers to fanin nodes
|
||||
vector<Neuron *> fan_in_;
|
||||
// pointers to fanin weights
|
||||
vector<float *> fan_in_weights_;
|
||||
// Sigmoid function lookup table used for fast computation
|
||||
// of sigmoid function
|
||||
static const float kSigmoidTable[];
|
||||
// flag determining if the activation of the node
|
||||
// is fresh or not (dirty)
|
||||
bool frwd_dirty_;
|
||||
// Initializer
|
||||
void Init();
|
||||
};
|
||||
}
|
||||
|
||||
#endif // NEURON_H__
|
@ -1,523 +0,0 @@
|
||||
// Copyright 2007 Google Inc.
|
||||
// All Rights Reserved.
|
||||
// Author: ahmadab@google.com (Ahmad Abdulkader)
|
||||
//
|
||||
// sigmoid_table.cpp: Sigmoid function lookup table
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "neuron.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
const float Neuron::kSigmoidTable[] = {
|
||||
4.53979E-05f, 4.58541E-05f, 4.63149E-05f, 4.67804E-05f,
|
||||
4.72505E-05f, 4.77254E-05f, 4.8205E-05f, 4.86894E-05f,
|
||||
4.91787E-05f, 4.9673E-05f, 5.01722E-05f, 5.06764E-05f,
|
||||
5.11857E-05f, 5.17001E-05f, 5.22196E-05f, 5.27444E-05f,
|
||||
5.32745E-05f, 5.38099E-05f, 5.43506E-05f, 5.48968E-05f,
|
||||
5.54485E-05f, 5.60058E-05f, 5.65686E-05f, 5.71371E-05f,
|
||||
5.77113E-05f, 5.82913E-05f, 5.88771E-05f, 5.94688E-05f,
|
||||
6.00664E-05f, 6.067E-05f, 6.12797E-05f, 6.18956E-05f,
|
||||
6.25176E-05f, 6.31459E-05f, 6.37805E-05f, 6.44214E-05f,
|
||||
6.50688E-05f, 6.57227E-05f, 6.63832E-05f, 6.70503E-05f,
|
||||
6.77241E-05f, 6.84047E-05f, 6.90922E-05f, 6.97865E-05f,
|
||||
7.04878E-05f, 7.11962E-05f, 7.19117E-05f, 7.26343E-05f,
|
||||
7.33643E-05f, 7.41016E-05f, 7.48462E-05f, 7.55984E-05f,
|
||||
7.63581E-05f, 7.71255E-05f, 7.79005E-05f, 7.86834E-05f,
|
||||
7.94741E-05f, 8.02728E-05f, 8.10794E-05f, 8.18942E-05f,
|
||||
8.27172E-05f, 8.35485E-05f, 8.43881E-05f, 8.52361E-05f,
|
||||
8.60927E-05f, 8.69579E-05f, 8.78317E-05f, 8.87144E-05f,
|
||||
8.96059E-05f, 9.05064E-05f, 9.14159E-05f, 9.23345E-05f,
|
||||
9.32624E-05f, 9.41996E-05f, 9.51463E-05f, 9.61024E-05f,
|
||||
9.70682E-05f, 9.80436E-05f, 9.90289E-05f, 0.000100024f,
|
||||
0.000101029f, 0.000102044f, 0.00010307f, 0.000104106f,
|
||||
0.000105152f, 0.000106209f, 0.000107276f, 0.000108354f,
|
||||
0.000109443f, 0.000110542f, 0.000111653f, 0.000112775f,
|
||||
0.000113909f, 0.000115053f, 0.000116209f, 0.000117377f,
|
||||
0.000118557f, 0.000119748f, 0.000120951f, 0.000122167f,
|
||||
0.000123395f, 0.000124635f, 0.000125887f, 0.000127152f,
|
||||
0.00012843f, 0.00012972f, 0.000131024f, 0.000132341f,
|
||||
0.00013367f, 0.000135014f, 0.00013637f, 0.000137741f,
|
||||
0.000139125f, 0.000140523f, 0.000141935f, 0.000143361f,
|
||||
0.000144802f, 0.000146257f, 0.000147727f, 0.000149211f,
|
||||
0.00015071f, 0.000152225f, 0.000153754f, 0.000155299f,
|
||||
0.00015686f, 0.000158436f, 0.000160028f, 0.000161636f,
|
||||
0.000163261f, 0.000164901f, 0.000166558f, 0.000168232f,
|
||||
0.000169922f, 0.00017163f, 0.000173354f, 0.000175096f,
|
||||
0.000176856f, 0.000178633f, 0.000180428f, 0.000182241f,
|
||||
0.000184072f, 0.000185922f, 0.00018779f, 0.000189677f,
|
||||
0.000191583f, 0.000193508f, 0.000195452f, 0.000197416f,
|
||||
0.0001994f, 0.000201403f, 0.000203427f, 0.000205471f,
|
||||
0.000207536f, 0.000209621f, 0.000211727f, 0.000213855f,
|
||||
0.000216003f, 0.000218174f, 0.000220366f, 0.00022258f,
|
||||
0.000224817f, 0.000227076f, 0.000229357f, 0.000231662f,
|
||||
0.00023399f, 0.000236341f, 0.000238715f, 0.000241114f,
|
||||
0.000243537f, 0.000245984f, 0.000248455f, 0.000250951f,
|
||||
0.000253473f, 0.00025602f, 0.000258592f, 0.00026119f,
|
||||
0.000263815f, 0.000266465f, 0.000269143f, 0.000271847f,
|
||||
0.000274578f, 0.000277337f, 0.000280123f, 0.000282938f,
|
||||
0.000285781f, 0.000288652f, 0.000291552f, 0.000294481f,
|
||||
0.00029744f, 0.000300429f, 0.000303447f, 0.000306496f,
|
||||
0.000309575f, 0.000312685f, 0.000315827f, 0.000319f,
|
||||
0.000322205f, 0.000325442f, 0.000328712f, 0.000332014f,
|
||||
0.00033535f, 0.000338719f, 0.000342122f, 0.00034556f,
|
||||
0.000349031f, 0.000352538f, 0.00035608f, 0.000359657f,
|
||||
0.00036327f, 0.00036692f, 0.000370606f, 0.000374329f,
|
||||
0.00037809f, 0.000381888f, 0.000385725f, 0.0003896f,
|
||||
0.000393514f, 0.000397467f, 0.00040146f, 0.000405494f,
|
||||
0.000409567f, 0.000413682f, 0.000417838f, 0.000422035f,
|
||||
0.000426275f, 0.000430557f, 0.000434882f, 0.000439251f,
|
||||
0.000443664f, 0.000448121f, 0.000452622f, 0.000457169f,
|
||||
0.000461762f, 0.0004664f, 0.000471085f, 0.000475818f,
|
||||
0.000480597f, 0.000485425f, 0.000490301f, 0.000495226f,
|
||||
0.000500201f, 0.000505226f, 0.000510301f, 0.000515427f,
|
||||
0.000520604f, 0.000525833f, 0.000531115f, 0.00053645f,
|
||||
0.000541839f, 0.000547281f, 0.000552779f, 0.000558331f,
|
||||
0.000563939f, 0.000569604f, 0.000575325f, 0.000581104f,
|
||||
0.00058694f, 0.000592836f, 0.00059879f, 0.000604805f,
|
||||
0.000610879f, 0.000617015f, 0.000623212f, 0.000629472f,
|
||||
0.000635794f, 0.00064218f, 0.00064863f, 0.000655144f,
|
||||
0.000661724f, 0.00066837f, 0.000675083f, 0.000681863f,
|
||||
0.000688711f, 0.000695628f, 0.000702614f, 0.00070967f,
|
||||
0.000716798f, 0.000723996f, 0.000731267f, 0.000738611f,
|
||||
0.000746029f, 0.000753521f, 0.000761088f, 0.000768731f,
|
||||
0.000776451f, 0.000784249f, 0.000792124f, 0.000800079f,
|
||||
0.000808113f, 0.000816228f, 0.000824425f, 0.000832703f,
|
||||
0.000841065f, 0.000849511f, 0.000858041f, 0.000866657f,
|
||||
0.00087536f, 0.000884149f, 0.000893027f, 0.000901994f,
|
||||
0.000911051f, 0.000920199f, 0.000929439f, 0.000938771f,
|
||||
0.000948197f, 0.000957717f, 0.000967333f, 0.000977045f,
|
||||
0.000986855f, 0.000996763f, 0.001006771f, 0.001016879f,
|
||||
0.001027088f, 0.0010374f, 0.001047815f, 0.001058334f,
|
||||
0.00106896f, 0.001079691f, 0.00109053f, 0.001101478f,
|
||||
0.001112536f, 0.001123705f, 0.001134985f, 0.001146379f,
|
||||
0.001157887f, 0.00116951f, 0.00118125f, 0.001193108f,
|
||||
0.001205084f, 0.001217181f, 0.001229399f, 0.001241739f,
|
||||
0.001254203f, 0.001266792f, 0.001279507f, 0.00129235f,
|
||||
0.001305321f, 0.001318423f, 0.001331655f, 0.001345021f,
|
||||
0.00135852f, 0.001372155f, 0.001385926f, 0.001399835f,
|
||||
0.001413884f, 0.001428073f, 0.001442405f, 0.00145688f,
|
||||
0.001471501f, 0.001486267f, 0.001501182f, 0.001516247f,
|
||||
0.001531462f, 0.001546829f, 0.001562351f, 0.001578028f,
|
||||
0.001593862f, 0.001609855f, 0.001626008f, 0.001642323f,
|
||||
0.001658801f, 0.001675444f, 0.001692254f, 0.001709233f,
|
||||
0.001726381f, 0.001743701f, 0.001761195f, 0.001778864f,
|
||||
0.00179671f, 0.001814734f, 0.001832939f, 0.001851326f,
|
||||
0.001869898f, 0.001888655f, 0.0019076f, 0.001926735f,
|
||||
0.001946061f, 0.001965581f, 0.001985296f, 0.002005209f,
|
||||
0.00202532f, 0.002045634f, 0.00206615f, 0.002086872f,
|
||||
0.002107801f, 0.00212894f, 0.00215029f, 0.002171854f,
|
||||
0.002193633f, 0.002215631f, 0.002237849f, 0.002260288f,
|
||||
0.002282953f, 0.002305844f, 0.002328964f, 0.002352316f,
|
||||
0.002375901f, 0.002399721f, 0.002423781f, 0.00244808f,
|
||||
0.002472623f, 0.002497411f, 0.002522447f, 0.002547734f,
|
||||
0.002573273f, 0.002599068f, 0.00262512f, 0.002651433f,
|
||||
0.002678009f, 0.002704851f, 0.002731961f, 0.002759342f,
|
||||
0.002786996f, 0.002814927f, 0.002843137f, 0.002871629f,
|
||||
0.002900406f, 0.00292947f, 0.002958825f, 0.002988472f,
|
||||
0.003018416f, 0.003048659f, 0.003079205f, 0.003110055f,
|
||||
0.003141213f, 0.003172683f, 0.003204467f, 0.003236568f,
|
||||
0.00326899f, 0.003301735f, 0.003334807f, 0.00336821f,
|
||||
0.003401946f, 0.003436018f, 0.003470431f, 0.003505187f,
|
||||
0.00354029f, 0.003575744f, 0.003611551f, 0.003647715f,
|
||||
0.00368424f, 0.003721129f, 0.003758387f, 0.003796016f,
|
||||
0.00383402f, 0.003872403f, 0.00391117f, 0.003950322f,
|
||||
0.003989865f, 0.004029802f, 0.004070138f, 0.004110875f,
|
||||
0.004152019f, 0.004193572f, 0.00423554f, 0.004277925f,
|
||||
0.004320734f, 0.004363968f, 0.004407633f, 0.004451734f,
|
||||
0.004496273f, 0.004541256f, 0.004586687f, 0.004632571f,
|
||||
0.004678911f, 0.004725713f, 0.00477298f, 0.004820718f,
|
||||
0.004868931f, 0.004917624f, 0.004966802f, 0.005016468f,
|
||||
0.005066629f, 0.005117289f, 0.005168453f, 0.005220126f,
|
||||
0.005272312f, 0.005325018f, 0.005378247f, 0.005432006f,
|
||||
0.005486299f, 0.005541132f, 0.005596509f, 0.005652437f,
|
||||
0.005708921f, 0.005765966f, 0.005823577f, 0.005881761f,
|
||||
0.005940522f, 0.005999867f, 0.006059801f, 0.006120331f,
|
||||
0.006181461f, 0.006243198f, 0.006305547f, 0.006368516f,
|
||||
0.006432108f, 0.006496332f, 0.006561193f, 0.006626697f,
|
||||
0.006692851f, 0.006759661f, 0.006827132f, 0.006895273f,
|
||||
0.006964089f, 0.007033587f, 0.007103774f, 0.007174656f,
|
||||
0.00724624f, 0.007318533f, 0.007391541f, 0.007465273f,
|
||||
0.007539735f, 0.007614933f, 0.007690876f, 0.00776757f,
|
||||
0.007845023f, 0.007923242f, 0.008002235f, 0.008082009f,
|
||||
0.008162571f, 0.00824393f, 0.008326093f, 0.008409068f,
|
||||
0.008492863f, 0.008577485f, 0.008662944f, 0.008749246f,
|
||||
0.0088364f, 0.008924415f, 0.009013299f, 0.009103059f,
|
||||
0.009193705f, 0.009285246f, 0.009377689f, 0.009471044f,
|
||||
0.009565319f, 0.009660523f, 0.009756666f, 0.009853756f,
|
||||
0.009951802f, 0.010050814f, 0.010150801f, 0.010251772f,
|
||||
0.010353738f, 0.010456706f, 0.010560688f, 0.010665693f,
|
||||
0.01077173f, 0.01087881f, 0.010986943f, 0.011096138f,
|
||||
0.011206406f, 0.011317758f, 0.011430203f, 0.011543752f,
|
||||
0.011658417f, 0.011774206f, 0.011891132f, 0.012009204f,
|
||||
0.012128435f, 0.012248835f, 0.012370415f, 0.012493186f,
|
||||
0.012617161f, 0.012742349f, 0.012868764f, 0.012996417f,
|
||||
0.013125318f, 0.013255481f, 0.013386918f, 0.01351964f,
|
||||
0.013653659f, 0.013788989f, 0.01392564f, 0.014063627f,
|
||||
0.014202961f, 0.014343656f, 0.014485724f, 0.014629178f,
|
||||
0.014774032f, 0.014920298f, 0.01506799f, 0.015217121f,
|
||||
0.015367706f, 0.015519757f, 0.015673288f, 0.015828314f,
|
||||
0.015984848f, 0.016142905f, 0.016302499f, 0.016463645f,
|
||||
0.016626356f, 0.016790648f, 0.016956536f, 0.017124033f,
|
||||
0.017293157f, 0.01746392f, 0.01763634f, 0.017810432f,
|
||||
0.01798621f, 0.018163691f, 0.018342891f, 0.018523825f,
|
||||
0.01870651f, 0.018890962f, 0.019077197f, 0.019265233f,
|
||||
0.019455085f, 0.01964677f, 0.019840306f, 0.020035709f,
|
||||
0.020232997f, 0.020432187f, 0.020633297f, 0.020836345f,
|
||||
0.021041347f, 0.021248323f, 0.02145729f, 0.021668266f,
|
||||
0.021881271f, 0.022096322f, 0.022313439f, 0.022532639f,
|
||||
0.022753943f, 0.02297737f, 0.023202938f, 0.023430668f,
|
||||
0.023660578f, 0.023892689f, 0.024127021f, 0.024363594f,
|
||||
0.024602428f, 0.024843544f, 0.025086962f, 0.025332703f,
|
||||
0.025580788f, 0.025831239f, 0.026084075f, 0.02633932f,
|
||||
0.026596994f, 0.026857119f, 0.027119717f, 0.027384811f,
|
||||
0.027652422f, 0.027922574f, 0.028195288f, 0.028470588f,
|
||||
0.028748496f, 0.029029036f, 0.029312231f, 0.029598104f,
|
||||
0.02988668f, 0.030177981f, 0.030472033f, 0.030768859f,
|
||||
0.031068484f, 0.031370932f, 0.031676228f, 0.031984397f,
|
||||
0.032295465f, 0.032609455f, 0.032926395f, 0.033246309f,
|
||||
0.033569223f, 0.033895164f, 0.034224158f, 0.03455623f,
|
||||
0.034891409f, 0.035229719f, 0.035571189f, 0.035915846f,
|
||||
0.036263716f, 0.036614828f, 0.036969209f, 0.037326887f,
|
||||
0.037687891f, 0.038052247f, 0.038419986f, 0.038791134f,
|
||||
0.039165723f, 0.03954378f, 0.039925334f, 0.040310415f,
|
||||
0.040699054f, 0.041091278f, 0.041487119f, 0.041886607f,
|
||||
0.042289772f, 0.042696644f, 0.043107255f, 0.043521635f,
|
||||
0.043939815f, 0.044361828f, 0.044787703f, 0.045217473f,
|
||||
0.045651171f, 0.046088827f, 0.046530475f, 0.046976146f,
|
||||
0.047425873f, 0.04787969f, 0.048337629f, 0.048799723f,
|
||||
0.049266006f, 0.049736512f, 0.050211273f, 0.050690325f,
|
||||
0.051173701f, 0.051661435f, 0.052153563f, 0.052650118f,
|
||||
0.053151136f, 0.053656652f, 0.0541667f, 0.054681317f,
|
||||
0.055200538f, 0.055724398f, 0.056252934f, 0.056786181f,
|
||||
0.057324176f, 0.057866955f, 0.058414556f, 0.058967013f,
|
||||
0.059524366f, 0.06008665f, 0.060653903f, 0.061226163f,
|
||||
0.061803466f, 0.062385851f, 0.062973356f, 0.063566018f,
|
||||
0.064163876f, 0.064766969f, 0.065375333f, 0.065989009f,
|
||||
0.066608036f, 0.067232451f, 0.067862294f, 0.068497604f,
|
||||
0.06913842f, 0.069784783f, 0.070436731f, 0.071094304f,
|
||||
0.071757542f, 0.072426485f, 0.073101173f, 0.073781647f,
|
||||
0.074467945f, 0.075160109f, 0.07585818f, 0.076562197f,
|
||||
0.077272202f, 0.077988235f, 0.078710337f, 0.079438549f,
|
||||
0.080172912f, 0.080913467f, 0.081660255f, 0.082413318f,
|
||||
0.083172696f, 0.083938432f, 0.084710566f, 0.085489139f,
|
||||
0.086274194f, 0.087065772f, 0.087863915f, 0.088668663f,
|
||||
0.089480059f, 0.090298145f, 0.091122961f, 0.09195455f,
|
||||
0.092792953f, 0.093638212f, 0.094490369f, 0.095349465f,
|
||||
0.096215542f, 0.097088641f, 0.097968804f, 0.098856073f,
|
||||
0.099750489f, 0.100652094f, 0.101560928f, 0.102477033f,
|
||||
0.103400451f, 0.104331223f, 0.10526939f, 0.106214992f,
|
||||
0.10716807f, 0.108128667f, 0.109096821f, 0.110072574f,
|
||||
0.111055967f, 0.112047039f, 0.11304583f, 0.114052381f,
|
||||
0.115066732f, 0.116088922f, 0.117118991f, 0.118156978f,
|
||||
0.119202922f, 0.120256862f, 0.121318838f, 0.122388887f,
|
||||
0.123467048f, 0.124553358f, 0.125647857f, 0.12675058f,
|
||||
0.127861566f, 0.128980852f, 0.130108474f, 0.131244469f,
|
||||
0.132388874f, 0.133541723f, 0.134703052f, 0.135872897f,
|
||||
0.137051293f, 0.138238273f, 0.139433873f, 0.140638126f,
|
||||
0.141851065f, 0.143072723f, 0.144303134f, 0.145542329f,
|
||||
0.14679034f, 0.148047198f, 0.149312935f, 0.15058758f,
|
||||
0.151871164f, 0.153163716f, 0.154465265f, 0.15577584f,
|
||||
0.157095469f, 0.158424179f, 0.159761997f, 0.16110895f,
|
||||
0.162465063f, 0.163830361f, 0.16520487f, 0.166588614f,
|
||||
0.167981615f, 0.169383897f, 0.170795482f, 0.172216392f,
|
||||
0.173646647f, 0.175086268f, 0.176535275f, 0.177993686f,
|
||||
0.179461519f, 0.180938793f, 0.182425524f, 0.183921727f,
|
||||
0.185427419f, 0.186942614f, 0.188467325f, 0.190001566f,
|
||||
0.191545349f, 0.193098684f, 0.194661584f, 0.196234056f,
|
||||
0.197816111f, 0.199407757f, 0.201009f, 0.202619846f,
|
||||
0.204240302f, 0.205870372f, 0.207510059f, 0.209159365f,
|
||||
0.210818293f, 0.212486844f, 0.214165017f, 0.215852811f,
|
||||
0.217550224f, 0.219257252f, 0.220973892f, 0.222700139f,
|
||||
0.224435986f, 0.226181426f, 0.227936451f, 0.229701051f,
|
||||
0.231475217f, 0.233258936f, 0.235052196f, 0.236854984f,
|
||||
0.238667285f, 0.240489083f, 0.242320361f, 0.244161101f,
|
||||
0.246011284f, 0.247870889f, 0.249739894f, 0.251618278f,
|
||||
0.253506017f, 0.255403084f, 0.257309455f, 0.259225101f,
|
||||
0.261149994f, 0.263084104f, 0.265027401f, 0.266979851f,
|
||||
0.268941421f, 0.270912078f, 0.272891784f, 0.274880502f,
|
||||
0.276878195f, 0.278884822f, 0.280900343f, 0.282924715f,
|
||||
0.284957894f, 0.286999837f, 0.289050497f, 0.291109827f,
|
||||
0.293177779f, 0.295254302f, 0.297339346f, 0.299432858f,
|
||||
0.301534784f, 0.30364507f, 0.30576366f, 0.307890496f,
|
||||
0.310025519f, 0.312168669f, 0.314319886f, 0.316479106f,
|
||||
0.318646266f, 0.320821301f, 0.323004144f, 0.325194727f,
|
||||
0.327392983f, 0.32959884f, 0.331812228f, 0.334033073f,
|
||||
0.336261303f, 0.338496841f, 0.340739612f, 0.342989537f,
|
||||
0.345246539f, 0.347510538f, 0.349781451f, 0.352059198f,
|
||||
0.354343694f, 0.356634854f, 0.358932594f, 0.361236825f,
|
||||
0.36354746f, 0.365864409f, 0.368187582f, 0.370516888f,
|
||||
0.372852234f, 0.375193526f, 0.377540669f, 0.379893568f,
|
||||
0.382252125f, 0.384616244f, 0.386985824f, 0.389360766f,
|
||||
0.391740969f, 0.394126332f, 0.39651675f, 0.398912121f,
|
||||
0.40131234f, 0.403717301f, 0.406126897f, 0.408541022f,
|
||||
0.410959566f, 0.413382421f, 0.415809477f, 0.418240623f,
|
||||
0.420675748f, 0.423114739f, 0.425557483f, 0.428003867f,
|
||||
0.430453776f, 0.432907095f, 0.435363708f, 0.437823499f,
|
||||
0.440286351f, 0.442752145f, 0.445220765f, 0.44769209f,
|
||||
0.450166003f, 0.452642382f, 0.455121108f, 0.457602059f,
|
||||
0.460085115f, 0.462570155f, 0.465057055f, 0.467545694f,
|
||||
0.470035948f, 0.472527696f, 0.475020813f, 0.477515175f,
|
||||
0.48001066f, 0.482507142f, 0.485004498f, 0.487502604f,
|
||||
0.490001333f, 0.492500562f, 0.495000167f, 0.497500021f,
|
||||
0.5f, 0.502499979f, 0.504999833f, 0.507499438f,
|
||||
0.509998667f, 0.512497396f, 0.514995502f, 0.517492858f,
|
||||
0.51998934f, 0.522484825f, 0.524979187f, 0.527472304f,
|
||||
0.529964052f, 0.532454306f, 0.534942945f, 0.537429845f,
|
||||
0.539914885f, 0.542397941f, 0.544878892f, 0.547357618f,
|
||||
0.549833997f, 0.55230791f, 0.554779235f, 0.557247855f,
|
||||
0.559713649f, 0.562176501f, 0.564636292f, 0.567092905f,
|
||||
0.569546224f, 0.571996133f, 0.574442517f, 0.576885261f,
|
||||
0.579324252f, 0.581759377f, 0.584190523f, 0.586617579f,
|
||||
0.589040434f, 0.591458978f, 0.593873103f, 0.596282699f,
|
||||
0.59868766f, 0.601087879f, 0.60348325f, 0.605873668f,
|
||||
0.608259031f, 0.610639234f, 0.613014176f, 0.615383756f,
|
||||
0.617747875f, 0.620106432f, 0.622459331f, 0.624806474f,
|
||||
0.627147766f, 0.629483112f, 0.631812418f, 0.634135591f,
|
||||
0.63645254f, 0.638763175f, 0.641067406f, 0.643365146f,
|
||||
0.645656306f, 0.647940802f, 0.650218549f, 0.652489462f,
|
||||
0.654753461f, 0.657010463f, 0.659260388f, 0.661503159f,
|
||||
0.663738697f, 0.665966927f, 0.668187772f, 0.67040116f,
|
||||
0.672607017f, 0.674805273f, 0.676995856f, 0.679178699f,
|
||||
0.681353734f, 0.683520894f, 0.685680114f, 0.687831331f,
|
||||
0.689974481f, 0.692109504f, 0.69423634f, 0.69635493f,
|
||||
0.698465216f, 0.700567142f, 0.702660654f, 0.704745698f,
|
||||
0.706822221f, 0.708890173f, 0.710949503f, 0.713000163f,
|
||||
0.715042106f, 0.717075285f, 0.719099657f, 0.721115178f,
|
||||
0.723121805f, 0.725119498f, 0.727108216f, 0.729087922f,
|
||||
0.731058579f, 0.733020149f, 0.734972599f, 0.736915896f,
|
||||
0.738850006f, 0.740774899f, 0.742690545f, 0.744596916f,
|
||||
0.746493983f, 0.748381722f, 0.750260106f, 0.752129111f,
|
||||
0.753988716f, 0.755838899f, 0.757679639f, 0.759510917f,
|
||||
0.761332715f, 0.763145016f, 0.764947804f, 0.766741064f,
|
||||
0.768524783f, 0.770298949f, 0.772063549f, 0.773818574f,
|
||||
0.775564014f, 0.777299861f, 0.779026108f, 0.780742748f,
|
||||
0.782449776f, 0.784147189f, 0.785834983f, 0.787513156f,
|
||||
0.789181707f, 0.790840635f, 0.792489941f, 0.794129628f,
|
||||
0.795759698f, 0.797380154f, 0.798991f, 0.800592243f,
|
||||
0.802183889f, 0.803765944f, 0.805338416f, 0.806901316f,
|
||||
0.808454651f, 0.809998434f, 0.811532675f, 0.813057386f,
|
||||
0.814572581f, 0.816078273f, 0.817574476f, 0.819061207f,
|
||||
0.820538481f, 0.822006314f, 0.823464725f, 0.824913732f,
|
||||
0.826353353f, 0.827783608f, 0.829204518f, 0.830616103f,
|
||||
0.832018385f, 0.833411386f, 0.83479513f, 0.836169639f,
|
||||
0.837534937f, 0.83889105f, 0.840238003f, 0.841575821f,
|
||||
0.842904531f, 0.84422416f, 0.845534735f, 0.846836284f,
|
||||
0.848128836f, 0.84941242f, 0.850687065f, 0.851952802f,
|
||||
0.85320966f, 0.854457671f, 0.855696866f, 0.856927277f,
|
||||
0.858148935f, 0.859361874f, 0.860566127f, 0.861761727f,
|
||||
0.862948707f, 0.864127103f, 0.865296948f, 0.866458277f,
|
||||
0.867611126f, 0.868755531f, 0.869891526f, 0.871019148f,
|
||||
0.872138434f, 0.87324942f, 0.874352143f, 0.875446642f,
|
||||
0.876532952f, 0.877611113f, 0.878681162f, 0.879743138f,
|
||||
0.880797078f, 0.881843022f, 0.882881009f, 0.883911078f,
|
||||
0.884933268f, 0.885947619f, 0.88695417f, 0.887952961f,
|
||||
0.888944033f, 0.889927426f, 0.890903179f, 0.891871333f,
|
||||
0.89283193f, 0.893785008f, 0.89473061f, 0.895668777f,
|
||||
0.896599549f, 0.897522967f, 0.898439072f, 0.899347906f,
|
||||
0.900249511f, 0.901143927f, 0.902031196f, 0.902911359f,
|
||||
0.903784458f, 0.904650535f, 0.905509631f, 0.906361788f,
|
||||
0.907207047f, 0.90804545f, 0.908877039f, 0.909701855f,
|
||||
0.910519941f, 0.911331337f, 0.912136085f, 0.912934228f,
|
||||
0.913725806f, 0.914510861f, 0.915289434f, 0.916061568f,
|
||||
0.916827304f, 0.917586682f, 0.918339745f, 0.919086533f,
|
||||
0.919827088f, 0.920561451f, 0.921289663f, 0.922011765f,
|
||||
0.922727798f, 0.923437803f, 0.92414182f, 0.924839891f,
|
||||
0.925532055f, 0.926218353f, 0.926898827f, 0.927573515f,
|
||||
0.928242458f, 0.928905696f, 0.929563269f, 0.930215217f,
|
||||
0.93086158f, 0.931502396f, 0.932137706f, 0.932767549f,
|
||||
0.933391964f, 0.934010991f, 0.934624667f, 0.935233031f,
|
||||
0.935836124f, 0.936433982f, 0.937026644f, 0.937614149f,
|
||||
0.938196534f, 0.938773837f, 0.939346097f, 0.93991335f,
|
||||
0.940475634f, 0.941032987f, 0.941585444f, 0.942133045f,
|
||||
0.942675824f, 0.943213819f, 0.943747066f, 0.944275602f,
|
||||
0.944799462f, 0.945318683f, 0.9458333f, 0.946343348f,
|
||||
0.946848864f, 0.947349882f, 0.947846437f, 0.948338565f,
|
||||
0.948826299f, 0.949309675f, 0.949788727f, 0.950263488f,
|
||||
0.950733994f, 0.951200277f, 0.951662371f, 0.95212031f,
|
||||
0.952574127f, 0.953023854f, 0.953469525f, 0.953911173f,
|
||||
0.954348829f, 0.954782527f, 0.955212297f, 0.955638172f,
|
||||
0.956060185f, 0.956478365f, 0.956892745f, 0.957303356f,
|
||||
0.957710228f, 0.958113393f, 0.958512881f, 0.958908722f,
|
||||
0.959300946f, 0.959689585f, 0.960074666f, 0.96045622f,
|
||||
0.960834277f, 0.961208866f, 0.961580014f, 0.961947753f,
|
||||
0.962312109f, 0.962673113f, 0.963030791f, 0.963385172f,
|
||||
0.963736284f, 0.964084154f, 0.964428811f, 0.964770281f,
|
||||
0.965108591f, 0.96544377f, 0.965775842f, 0.966104836f,
|
||||
0.966430777f, 0.966753691f, 0.967073605f, 0.967390545f,
|
||||
0.967704535f, 0.968015603f, 0.968323772f, 0.968629068f,
|
||||
0.968931516f, 0.969231141f, 0.969527967f, 0.969822019f,
|
||||
0.97011332f, 0.970401896f, 0.970687769f, 0.970970964f,
|
||||
0.971251504f, 0.971529412f, 0.971804712f, 0.972077426f,
|
||||
0.972347578f, 0.972615189f, 0.972880283f, 0.973142881f,
|
||||
0.973403006f, 0.97366068f, 0.973915925f, 0.974168761f,
|
||||
0.974419212f, 0.974667297f, 0.974913038f, 0.975156456f,
|
||||
0.975397572f, 0.975636406f, 0.975872979f, 0.976107311f,
|
||||
0.976339422f, 0.976569332f, 0.976797062f, 0.97702263f,
|
||||
0.977246057f, 0.977467361f, 0.977686561f, 0.977903678f,
|
||||
0.978118729f, 0.978331734f, 0.97854271f, 0.978751677f,
|
||||
0.978958653f, 0.979163655f, 0.979366703f, 0.979567813f,
|
||||
0.979767003f, 0.979964291f, 0.980159694f, 0.98035323f,
|
||||
0.980544915f, 0.980734767f, 0.980922803f, 0.981109038f,
|
||||
0.98129349f, 0.981476175f, 0.981657109f, 0.981836309f,
|
||||
0.98201379f, 0.982189568f, 0.98236366f, 0.98253608f,
|
||||
0.982706843f, 0.982875967f, 0.983043464f, 0.983209352f,
|
||||
0.983373644f, 0.983536355f, 0.983697501f, 0.983857095f,
|
||||
0.984015152f, 0.984171686f, 0.984326712f, 0.984480243f,
|
||||
0.984632294f, 0.984782879f, 0.98493201f, 0.985079702f,
|
||||
0.985225968f, 0.985370822f, 0.985514276f, 0.985656344f,
|
||||
0.985797039f, 0.985936373f, 0.98607436f, 0.986211011f,
|
||||
0.986346341f, 0.98648036f, 0.986613082f, 0.986744519f,
|
||||
0.986874682f, 0.987003583f, 0.987131236f, 0.987257651f,
|
||||
0.987382839f, 0.987506814f, 0.987629585f, 0.987751165f,
|
||||
0.987871565f, 0.987990796f, 0.988108868f, 0.988225794f,
|
||||
0.988341583f, 0.988456248f, 0.988569797f, 0.988682242f,
|
||||
0.988793594f, 0.988903862f, 0.989013057f, 0.98912119f,
|
||||
0.98922827f, 0.989334307f, 0.989439312f, 0.989543294f,
|
||||
0.989646262f, 0.989748228f, 0.989849199f, 0.989949186f,
|
||||
0.990048198f, 0.990146244f, 0.990243334f, 0.990339477f,
|
||||
0.990434681f, 0.990528956f, 0.990622311f, 0.990714754f,
|
||||
0.990806295f, 0.990896941f, 0.990986701f, 0.991075585f,
|
||||
0.9911636f, 0.991250754f, 0.991337056f, 0.991422515f,
|
||||
0.991507137f, 0.991590932f, 0.991673907f, 0.99175607f,
|
||||
0.991837429f, 0.991917991f, 0.991997765f, 0.992076758f,
|
||||
0.992154977f, 0.99223243f, 0.992309124f, 0.992385067f,
|
||||
0.992460265f, 0.992534727f, 0.992608459f, 0.992681467f,
|
||||
0.99275376f, 0.992825344f, 0.992896226f, 0.992966413f,
|
||||
0.993035911f, 0.993104727f, 0.993172868f, 0.993240339f,
|
||||
0.993307149f, 0.993373303f, 0.993438807f, 0.993503668f,
|
||||
0.993567892f, 0.993631484f, 0.993694453f, 0.993756802f,
|
||||
0.993818539f, 0.993879669f, 0.993940199f, 0.994000133f,
|
||||
0.994059478f, 0.994118239f, 0.994176423f, 0.994234034f,
|
||||
0.994291079f, 0.994347563f, 0.994403491f, 0.994458868f,
|
||||
0.994513701f, 0.994567994f, 0.994621753f, 0.994674982f,
|
||||
0.994727688f, 0.994779874f, 0.994831547f, 0.994882711f,
|
||||
0.994933371f, 0.994983532f, 0.995033198f, 0.995082376f,
|
||||
0.995131069f, 0.995179282f, 0.99522702f, 0.995274287f,
|
||||
0.995321089f, 0.995367429f, 0.995413313f, 0.995458744f,
|
||||
0.995503727f, 0.995548266f, 0.995592367f, 0.995636032f,
|
||||
0.995679266f, 0.995722075f, 0.99576446f, 0.995806428f,
|
||||
0.995847981f, 0.995889125f, 0.995929862f, 0.995970198f,
|
||||
0.996010135f, 0.996049678f, 0.99608883f, 0.996127597f,
|
||||
0.99616598f, 0.996203984f, 0.996241613f, 0.996278871f,
|
||||
0.99631576f, 0.996352285f, 0.996388449f, 0.996424256f,
|
||||
0.99645971f, 0.996494813f, 0.996529569f, 0.996563982f,
|
||||
0.996598054f, 0.99663179f, 0.996665193f, 0.996698265f,
|
||||
0.99673101f, 0.996763432f, 0.996795533f, 0.996827317f,
|
||||
0.996858787f, 0.996889945f, 0.996920795f, 0.996951341f,
|
||||
0.996981584f, 0.997011528f, 0.997041175f, 0.99707053f,
|
||||
0.997099594f, 0.997128371f, 0.997156863f, 0.997185073f,
|
||||
0.997213004f, 0.997240658f, 0.997268039f, 0.997295149f,
|
||||
0.997321991f, 0.997348567f, 0.99737488f, 0.997400932f,
|
||||
0.997426727f, 0.997452266f, 0.997477553f, 0.997502589f,
|
||||
0.997527377f, 0.99755192f, 0.997576219f, 0.997600279f,
|
||||
0.997624099f, 0.997647684f, 0.997671036f, 0.997694156f,
|
||||
0.997717047f, 0.997739712f, 0.997762151f, 0.997784369f,
|
||||
0.997806367f, 0.997828146f, 0.99784971f, 0.99787106f,
|
||||
0.997892199f, 0.997913128f, 0.99793385f, 0.997954366f,
|
||||
0.99797468f, 0.997994791f, 0.998014704f, 0.998034419f,
|
||||
0.998053939f, 0.998073265f, 0.9980924f, 0.998111345f,
|
||||
0.998130102f, 0.998148674f, 0.998167061f, 0.998185266f,
|
||||
0.99820329f, 0.998221136f, 0.998238805f, 0.998256299f,
|
||||
0.998273619f, 0.998290767f, 0.998307746f, 0.998324556f,
|
||||
0.998341199f, 0.998357677f, 0.998373992f, 0.998390145f,
|
||||
0.998406138f, 0.998421972f, 0.998437649f, 0.998453171f,
|
||||
0.998468538f, 0.998483753f, 0.998498818f, 0.998513733f,
|
||||
0.998528499f, 0.99854312f, 0.998557595f, 0.998571927f,
|
||||
0.998586116f, 0.998600165f, 0.998614074f, 0.998627845f,
|
||||
0.99864148f, 0.998654979f, 0.998668345f, 0.998681577f,
|
||||
0.998694679f, 0.99870765f, 0.998720493f, 0.998733208f,
|
||||
0.998745797f, 0.998758261f, 0.998770601f, 0.998782819f,
|
||||
0.998794916f, 0.998806892f, 0.99881875f, 0.99883049f,
|
||||
0.998842113f, 0.998853621f, 0.998865015f, 0.998876295f,
|
||||
0.998887464f, 0.998898522f, 0.99890947f, 0.998920309f,
|
||||
0.99893104f, 0.998941666f, 0.998952185f, 0.9989626f,
|
||||
0.998972912f, 0.998983121f, 0.998993229f, 0.999003237f,
|
||||
0.999013145f, 0.999022955f, 0.999032667f, 0.999042283f,
|
||||
0.999051803f, 0.999061229f, 0.999070561f, 0.999079801f,
|
||||
0.999088949f, 0.999098006f, 0.999106973f, 0.999115851f,
|
||||
0.99912464f, 0.999133343f, 0.999141959f, 0.999150489f,
|
||||
0.999158935f, 0.999167297f, 0.999175575f, 0.999183772f,
|
||||
0.999191887f, 0.999199921f, 0.999207876f, 0.999215751f,
|
||||
0.999223549f, 0.999231269f, 0.999238912f, 0.999246479f,
|
||||
0.999253971f, 0.999261389f, 0.999268733f, 0.999276004f,
|
||||
0.999283202f, 0.99929033f, 0.999297386f, 0.999304372f,
|
||||
0.999311289f, 0.999318137f, 0.999324917f, 0.99933163f,
|
||||
0.999338276f, 0.999344856f, 0.99935137f, 0.99935782f,
|
||||
0.999364206f, 0.999370528f, 0.999376788f, 0.999382985f,
|
||||
0.999389121f, 0.999395195f, 0.99940121f, 0.999407164f,
|
||||
0.99941306f, 0.999418896f, 0.999424675f, 0.999430396f,
|
||||
0.999436061f, 0.999441669f, 0.999447221f, 0.999452719f,
|
||||
0.999458161f, 0.99946355f, 0.999468885f, 0.999474167f,
|
||||
0.999479396f, 0.999484573f, 0.999489699f, 0.999494774f,
|
||||
0.999499799f, 0.999504774f, 0.999509699f, 0.999514575f,
|
||||
0.999519403f, 0.999524182f, 0.999528915f, 0.9995336f,
|
||||
0.999538238f, 0.999542831f, 0.999547378f, 0.999551879f,
|
||||
0.999556336f, 0.999560749f, 0.999565118f, 0.999569443f,
|
||||
0.999573725f, 0.999577965f, 0.999582162f, 0.999586318f,
|
||||
0.999590433f, 0.999594506f, 0.99959854f, 0.999602533f,
|
||||
0.999606486f, 0.9996104f, 0.999614275f, 0.999618112f,
|
||||
0.99962191f, 0.999625671f, 0.999629394f, 0.99963308f,
|
||||
0.99963673f, 0.999640343f, 0.99964392f, 0.999647462f,
|
||||
0.999650969f, 0.99965444f, 0.999657878f, 0.999661281f,
|
||||
0.99966465f, 0.999667986f, 0.999671288f, 0.999674558f,
|
||||
0.999677795f, 0.999681f, 0.999684173f, 0.999687315f,
|
||||
0.999690425f, 0.999693504f, 0.999696553f, 0.999699571f,
|
||||
0.99970256f, 0.999705519f, 0.999708448f, 0.999711348f,
|
||||
0.999714219f, 0.999717062f, 0.999719877f, 0.999722663f,
|
||||
0.999725422f, 0.999728153f, 0.999730857f, 0.999733535f,
|
||||
0.999736185f, 0.99973881f, 0.999741408f, 0.99974398f,
|
||||
0.999746527f, 0.999749049f, 0.999751545f, 0.999754016f,
|
||||
0.999756463f, 0.999758886f, 0.999761285f, 0.999763659f,
|
||||
0.99976601f, 0.999768338f, 0.999770643f, 0.999772924f,
|
||||
0.999775183f, 0.99977742f, 0.999779634f, 0.999781826f,
|
||||
0.999783997f, 0.999786145f, 0.999788273f, 0.999790379f,
|
||||
0.999792464f, 0.999794529f, 0.999796573f, 0.999798597f,
|
||||
0.9998006f, 0.999802584f, 0.999804548f, 0.999806492f,
|
||||
0.999808417f, 0.999810323f, 0.99981221f, 0.999814078f,
|
||||
0.999815928f, 0.999817759f, 0.999819572f, 0.999821367f,
|
||||
0.999823144f, 0.999824904f, 0.999826646f, 0.99982837f,
|
||||
0.999830078f, 0.999831768f, 0.999833442f, 0.999835099f,
|
||||
0.999836739f, 0.999838364f, 0.999839972f, 0.999841564f,
|
||||
0.99984314f, 0.999844701f, 0.999846246f, 0.999847775f,
|
||||
0.99984929f, 0.999850789f, 0.999852273f, 0.999853743f,
|
||||
0.999855198f, 0.999856639f, 0.999858065f, 0.999859477f,
|
||||
0.999860875f, 0.999862259f, 0.99986363f, 0.999864986f,
|
||||
0.99986633f, 0.999867659f, 0.999868976f, 0.99987028f,
|
||||
0.99987157f, 0.999872848f, 0.999874113f, 0.999875365f,
|
||||
0.999876605f, 0.999877833f, 0.999879049f, 0.999880252f,
|
||||
0.999881443f, 0.999882623f, 0.999883791f, 0.999884947f,
|
||||
0.999886091f, 0.999887225f, 0.999888347f, 0.999889458f,
|
||||
0.999890557f, 0.999891646f, 0.999892724f, 0.999893791f,
|
||||
0.999894848f, 0.999895894f, 0.99989693f, 0.999897956f,
|
||||
0.999898971f, 0.999899976f, 0.999900971f, 0.999901956f,
|
||||
0.999902932f, 0.999903898f, 0.999904854f, 0.9999058f,
|
||||
0.999906738f, 0.999907665f, 0.999908584f, 0.999909494f,
|
||||
0.999910394f, 0.999911286f, 0.999912168f, 0.999913042f,
|
||||
0.999913907f, 0.999914764f, 0.999915612f, 0.999916452f,
|
||||
0.999917283f, 0.999918106f, 0.999918921f, 0.999919727f,
|
||||
0.999920526f, 0.999921317f, 0.999922099f, 0.999922875f,
|
||||
0.999923642f, 0.999924402f, 0.999925154f, 0.999925898f,
|
||||
0.999926636f, 0.999927366f, 0.999928088f, 0.999928804f,
|
||||
0.999929512f, 0.999930213f, 0.999930908f, 0.999931595f,
|
||||
0.999932276f, 0.99993295f, 0.999933617f, 0.999934277f,
|
||||
0.999934931f, 0.999935579f, 0.99993622f, 0.999936854f,
|
||||
0.999937482f, 0.999938104f, 0.99993872f, 0.99993933f,
|
||||
0.999939934f, 0.999940531f, 0.999941123f, 0.999941709f,
|
||||
0.999942289f, 0.999942863f, 0.999943431f, 0.999943994f,
|
||||
0.999944551f, 0.999945103f, 0.999945649f, 0.99994619f,
|
||||
0.999946726f, 0.999947256f, 0.99994778f, 0.9999483f,
|
||||
0.999948814f, 0.999949324f, 0.999949828f, 0.999950327f,
|
||||
0.999950821f, 0.999951311f, 0.999951795f, 0.999952275f,
|
||||
0.999952749f, 0.99995322f, 0.999953685f, 0.999954146f,
|
||||
0.999954602f
|
||||
};
|
||||
} // namespace tesseract
|
Loading…
Reference in New Issue
Block a user