Misc fixes, mostly clang formatting, but some bug fixes in matrix, werd, and tesstrain_utils. Also updates unicharset to match traineddata files.

This commit is contained in:
Ray Smith 2015-07-09 14:28:20 -07:00
parent d00d833b9b
commit a303ab9d00
16 changed files with 19138 additions and 21635 deletions

View File

@ -637,8 +637,8 @@ bool TessPDFRenderer::BeginDocumentHandler() {
">>\n" ">>\n"
"stream\n", size, size); "stream\n", size, size);
if (n >= sizeof(buf)) { if (n >= sizeof(buf)) {
delete[] buffer; delete[] buffer;
return false; return false;
} }
AppendString(buf); AppendString(buf);
objsize = strlen(buf); objsize = strlen(buf);

View File

@ -314,6 +314,7 @@ void Tesseract::do_re_display(
image_win->Image(pix_binary_, 0, 0); image_win->Image(pix_binary_, 0, 0);
} }
image_win->Brush(ScrollView::NONE);
PAGE_RES_IT pr_it(current_page_res); PAGE_RES_IT pr_it(current_page_res);
for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) { for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) {
(this->*word_painter)(&pr_it); (this->*word_painter)(&pr_it);

View File

@ -1,8 +1,10 @@
/********************************************************************** /**********************************************************************
* File: tessedit.cpp (Formerly tessedit.c) * File: tessedit.cpp (Formerly tessedit.c)
* Description: Main program for merge of tess and editor. * Description: (Previously) Main program for merge of tess and editor.
* Author: Ray Smith * Now just code to load the language model and various
* Created: Tue Jan 07 15:21:46 GMT 1992 * engine-specific data files.
* Author: Ray Smith
* Created: Tue Jan 07 15:21:46 GMT 1992
* *
* (C) Copyright 1992, Hewlett-Packard Ltd. * (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -96,11 +96,11 @@ MATRIX* MATRIX::DeepCopy() const {
int band_width = bandwidth(); int band_width = bandwidth();
MATRIX* result = new MATRIX(dim, band_width); MATRIX* result = new MATRIX(dim, band_width);
for (int col = 0; col < dim; ++col) { for (int col = 0; col < dim; ++col) {
for (int row = col; row < col + band_width; ++row) { for (int row = col; row < dim && row < col + band_width; ++row) {
BLOB_CHOICE_LIST* choices = get(col, row); BLOB_CHOICE_LIST* choices = get(col, row);
if (choices != NULL) { if (choices != NULL) {
BLOB_CHOICE_LIST* copy_choices = new BLOB_CHOICE_LIST; BLOB_CHOICE_LIST* copy_choices = new BLOB_CHOICE_LIST;
choices->deep_copy(copy_choices, &BLOB_CHOICE::deep_copy); copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy);
result->put(col, row, copy_choices); result->put(col, row, copy_choices);
} }
} }

View File

@ -50,17 +50,14 @@ WERD::WERD(C_BLOB_LIST *blob_list, uinT8 blank_count, const char *text)
flags(0), flags(0),
script_id_(0), script_id_(0),
correct(text) { correct(text) {
C_BLOB_IT start_it = blob_list; C_BLOB_IT start_it = &cblobs;
C_BLOB_IT end_it = blob_list;
C_BLOB_IT rej_cblob_it = &rej_cblobs; C_BLOB_IT rej_cblob_it = &rej_cblobs;
C_OUTLINE_IT c_outline_it; C_OUTLINE_IT c_outline_it;
inT16 inverted_vote = 0; inT16 inverted_vote = 0;
inT16 non_inverted_vote = 0; inT16 non_inverted_vote = 0;
// Move blob_list's elements into cblobs. // Move blob_list's elements into cblobs.
while (!end_it.at_last()) start_it.add_list_after(blob_list);
end_it.forward();
cblobs.assign_to_sublist(&start_it, &end_it);
/* /*
Set white on black flag for the WERD, moving any duff blobs onto the Set white on black flag for the WERD, moving any duff blobs onto the

View File

@ -99,12 +99,12 @@ void UNICHARSET::UNICHAR_PROPERTIES::SetRangesOpen() {
max_bottom = MAX_UINT8; max_bottom = MAX_UINT8;
min_top = 0; min_top = 0;
max_top = MAX_UINT8; max_top = MAX_UINT8;
min_width = 0; width = 0.0f;
max_width = MAX_INT16; width_sd = 0.0f;
min_bearing = 0; bearing = 0.0f;
max_bearing = MAX_INT16; bearing_sd = 0.0f;
min_advance = 0; advance = 0.0f;
max_advance = MAX_INT16; advance_sd = 0.0f;
} }
// Sets all ranges to empty. Used before expanding with font-based data. // Sets all ranges to empty. Used before expanding with font-based data.
@ -113,20 +113,18 @@ void UNICHARSET::UNICHAR_PROPERTIES::SetRangesEmpty() {
max_bottom = 0; max_bottom = 0;
min_top = MAX_UINT8; min_top = MAX_UINT8;
max_top = 0; max_top = 0;
min_width = MAX_INT16; width = 0.0f;
max_width = 0; width_sd = 0.0f;
min_bearing = MAX_INT16; bearing = 0.0f;
max_bearing = 0; bearing_sd = 0.0f;
min_advance = MAX_INT16; advance = 0.0f;
max_advance = 0; advance_sd = 0.0f;
} }
// Returns true if any of the top/bottom/width/bearing/advance ranges is // Returns true if any of the top/bottom/width/bearing/advance ranges/stats
// emtpy. // is emtpy.
bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty() const { bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty() const {
return min_bottom > max_bottom || min_top > max_top || return width == 0.0f || advance == 0.0f;
min_width > max_width || min_bearing > max_bearing ||
min_advance > max_advance;
} }
// Expands the ranges with the ranges from the src properties. // Expands the ranges with the ranges from the src properties.
@ -136,12 +134,18 @@ void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(
UpdateRange(src.max_bottom, &min_bottom, &max_bottom); UpdateRange(src.max_bottom, &min_bottom, &max_bottom);
UpdateRange(src.min_top, &min_top, &max_top); UpdateRange(src.min_top, &min_top, &max_top);
UpdateRange(src.max_top, &min_top, &max_top); UpdateRange(src.max_top, &min_top, &max_top);
UpdateRange(src.min_width, &min_width, &max_width); if (src.width_sd > width_sd) {
UpdateRange(src.max_width, &min_width, &max_width); width = src.width;
UpdateRange(src.min_bearing, &min_bearing, &max_bearing); width_sd = src.width_sd;
UpdateRange(src.max_bearing, &min_bearing, &max_bearing); }
UpdateRange(src.min_advance, &min_advance, &max_advance); if (src.bearing_sd > bearing_sd) {
UpdateRange(src.max_advance, &min_advance, &max_advance); bearing = src.bearing;
bearing_sd = src.bearing_sd;
}
if (src.advance_sd > advance_sd) {
advance = src.advance;
advance_sd = src.advance_sd;
}
} }
// Copies the properties from src into this. // Copies the properties from src into this.
@ -430,8 +434,6 @@ void UNICHARSET::PartialSetPropertiesFromOther(int start_index,
} }
unichars[ch].properties.CopyFrom(properties); unichars[ch].properties.CopyFrom(properties);
set_normed_ids(ch); set_normed_ids(ch);
} else {
tprintf("Failed to get properties for index %d = %s\n", ch, utf8);
} }
} }
} }
@ -473,15 +475,15 @@ void UNICHARSET::AppendOtherUnicharset(const UNICHARSET& src) {
for (int ch = 0; ch < src.size_used; ++ch) { for (int ch = 0; ch < src.size_used; ++ch) {
const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties; const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
const char* utf8 = src.id_to_unichar(ch); const char* utf8 = src.id_to_unichar(ch);
if (strcmp(utf8, " ") != 0 && src_props.AnyRangeEmpty()) { if (ch >= SPECIAL_UNICHAR_CODES_COUNT && src_props.AnyRangeEmpty()) {
// Only use fully valid entries. // Only use fully valid entries.
tprintf("Bad properties for index %d, char %s: " tprintf("Bad properties for index %d, char %s: "
"%d,%d %d,%d %d,%d %d,%d %d,%d\n", "%d,%d %d,%d %g,%g %g,%g %g,%g\n",
ch, utf8, src_props.min_bottom, src_props.max_bottom, ch, utf8, src_props.min_bottom, src_props.max_bottom,
src_props.min_top, src_props.max_top, src_props.min_top, src_props.max_top,
src_props.min_width, src_props.max_width, src_props.width, src_props.width_sd,
src_props.min_bearing, src_props.max_bearing, src_props.bearing, src_props.bearing_sd,
src_props.min_advance, src_props.max_advance); src_props.advance, src_props.advance_sd);
continue; continue;
} }
int id = size_used; int id = size_used;
@ -564,8 +566,6 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
UNICHAR_PROPERTIES* props) const { UNICHAR_PROPERTIES* props) const {
props->Init(); props->Init();
props->SetRangesEmpty(); props->SetRangesEmpty();
props->min_advance = 0;
props->max_advance = 0;
int total_unicodes = 0; int total_unicodes = 0;
GenericVector<UNICHAR_ID> encoding; GenericVector<UNICHAR_ID> encoding;
if (!encode_string(utf8_str, true, &encoding, NULL, NULL)) if (!encode_string(utf8_str, true, &encoding, NULL, NULL))
@ -586,21 +586,16 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom); UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom);
UpdateRange(src_props.min_top, &props->min_top, &props->max_top); UpdateRange(src_props.min_top, &props->min_top, &props->max_top);
UpdateRange(src_props.max_top, &props->min_top, &props->max_top); UpdateRange(src_props.max_top, &props->min_top, &props->max_top);
int bearing = ClipToRange(props->min_advance + src_props.min_bearing, float bearing = props->advance + src_props.bearing;
-MAX_INT16, MAX_INT16); if (total_unicodes == 0 || bearing < props->bearing) {
if (total_unicodes == 0 || bearing < props->min_bearing) props->bearing = bearing;
props->min_bearing = bearing; props->bearing_sd = props->advance_sd + src_props.bearing_sd;
bearing = ClipToRange(props->max_advance + src_props.max_bearing, }
-MAX_INT16, MAX_INT16); props->advance += src_props.advance;
if (total_unicodes == 0 || bearing < props->max_bearing) props->advance_sd += src_props.advance_sd;
props->max_bearing = bearing;
props->min_advance = ClipToRange(props->min_advance + src_props.min_advance,
-MAX_INT16, MAX_INT16);
props->max_advance = ClipToRange(props->max_advance + src_props.max_advance,
-MAX_INT16, MAX_INT16);
// With a single width, just use the widths stored in the unicharset. // With a single width, just use the widths stored in the unicharset.
props->min_width = src_props.min_width; props->width = src_props.width;
props->max_width = src_props.max_width; props->width_sd = src_props.width_sd;
// Use the first script id, other_case, mirror, direction. // Use the first script id, other_case, mirror, direction.
// Note that these will need translation, except direction. // Note that these will need translation, except direction.
if (total_unicodes == 0) { if (total_unicodes == 0) {
@ -616,10 +611,8 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
} }
if (total_unicodes > 1) { if (total_unicodes > 1) {
// Estimate the total widths from the advance - bearing. // Estimate the total widths from the advance - bearing.
props->min_width = ClipToRange(props->min_advance - props->max_bearing, props->width = props->advance - props->bearing;
-MAX_INT16, MAX_INT16); props->width_sd = props->advance_sd + props->bearing_sd;
props->max_width = ClipToRange(props->max_advance - props->min_bearing,
-MAX_INT16, MAX_INT16);
} }
return total_unicodes > 0; return total_unicodes > 0;
} }
@ -707,12 +700,12 @@ bool UNICHARSET::save_to_string(STRING *str) const {
for (UNICHAR_ID id = 0; id < this->size(); ++id) { for (UNICHAR_ID id = 0; id < this->size(); ++id) {
int min_bottom, max_bottom, min_top, max_top; int min_bottom, max_bottom, min_top, max_top;
get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top); get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top);
int min_width, max_width; float width, width_sd;
get_width_range(id, &min_width, &max_width); get_width_stats(id, &width, &width_sd);
int min_bearing, max_bearing; float bearing, bearing_sd;
get_bearing_range(id, &min_bearing, &max_bearing); get_bearing_stats(id, &bearing, &bearing_sd);
int min_advance, max_advance; float advance, advance_sd;
get_advance_range(id, &min_advance, &max_advance); get_advance_stats(id, &advance, &advance_sd);
unsigned int properties = this->get_properties(id); unsigned int properties = this->get_properties(id);
if (strcmp(this->id_to_unichar(id), " ") == 0) { if (strcmp(this->id_to_unichar(id), " ") == 0) {
snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties, snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties,
@ -720,10 +713,10 @@ bool UNICHARSET::save_to_string(STRING *str) const {
this->get_other_case(id)); this->get_other_case(id));
} else { } else {
snprintf(buffer, kFileBufSize, snprintf(buffer, kFileBufSize,
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %s %d %d %d %s\t# %s\n", "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %s %d %d %d %s\t# %s\n",
this->id_to_unichar(id), properties, this->id_to_unichar(id), properties,
min_bottom, max_bottom, min_top, max_top, min_width, max_width, min_bottom, max_bottom, min_top, max_top, width, width_sd,
min_bearing, max_bearing, min_advance, max_advance, bearing, bearing_sd, advance, advance_sd,
this->get_script_from_script_id(this->get_script(id)), this->get_script_from_script_id(this->get_script(id)),
this->get_other_case(id), this->get_direction(id), this->get_other_case(id), this->get_direction(id),
this->get_mirror(id), this->get_normed_unichar(id), this->get_mirror(id), this->get_normed_unichar(id),
@ -821,12 +814,12 @@ bool UNICHARSET::load_via_fgets(
int max_bottom = MAX_UINT8; int max_bottom = MAX_UINT8;
int min_top = 0; int min_top = 0;
int max_top = MAX_UINT8; int max_top = MAX_UINT8;
int min_width = 0; float width = 0.0f;
int max_width = MAX_INT16; float width_sd = 0.0f;
int min_bearing = 0; float bearing = 0.0f;
int max_bearing = MAX_INT16; float bearing_sd = 0.0f;
int min_advance = 0; float advance = 0.0f;
int max_advance = MAX_INT16; float advance_sd = 0.0f;
// TODO(eger): check that this default it ok // TODO(eger): check that this default it ok
// after enabling BiDi iterator for Arabic+Cube. // after enabling BiDi iterator for Arabic+Cube.
int direction = UNICHARSET::U_LEFT_TO_RIGHT; int direction = UNICHARSET::U_LEFT_TO_RIGHT;
@ -836,19 +829,19 @@ bool UNICHARSET::load_via_fgets(
int v = -1; int v = -1;
if (fgets_cb->Run(buffer, sizeof (buffer)) == NULL || if (fgets_cb->Run(buffer, sizeof (buffer)) == NULL ||
((v = sscanf(buffer, ((v = sscanf(buffer,
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d %63s", "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d %63s",
unichar, &properties, unichar, &properties,
&min_bottom, &max_bottom, &min_top, &max_top, &min_bottom, &max_bottom, &min_top, &max_top,
&min_width, &max_width, &min_bearing, &max_bearing, &width, &width_sd, &bearing, &bearing_sd,
&min_advance, &max_advance, script, &other_case, &advance, &advance_sd, script, &other_case,
&direction, &mirror, normed)) != 17 && &direction, &mirror, normed)) != 17 &&
(v = sscanf(buffer, (v = sscanf(buffer,
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d", "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d",
unichar, &properties, unichar, &properties,
&min_bottom, &max_bottom, &min_top, &max_top, &min_bottom, &max_bottom, &min_top, &max_top,
&min_width, &max_width, &min_bearing, &max_bearing, &width, &width_sd, &bearing, &bearing_sd,
&min_advance, &max_advance, &advance, &advance_sd, script, &other_case,
script, &other_case, &direction, &mirror)) != 16 && &direction, &mirror)) != 16 &&
(v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d", (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d",
unichar, &properties, unichar, &properties,
&min_bottom, &max_bottom, &min_top, &max_top, &min_bottom, &max_bottom, &min_top, &max_top,
@ -888,9 +881,9 @@ bool UNICHARSET::load_via_fgets(
this->set_script(id, script); this->set_script(id, script);
this->unichars[id].properties.enabled = true; this->unichars[id].properties.enabled = true;
this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top); this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top);
this->set_width_range(id, min_width, max_width); this->set_width_stats(id, width, width_sd);
this->set_bearing_range(id, min_bearing, max_bearing); this->set_bearing_stats(id, bearing, bearing_sd);
this->set_advance_range(id, min_advance, max_advance); this->set_advance_stats(id, advance, advance_sd);
this->set_direction(id, static_cast<UNICHARSET::Direction>(direction)); this->set_direction(id, static_cast<UNICHARSET::Direction>(direction));
ASSERT_HOST(other_case < unicharset_size); ASSERT_HOST(other_case < unicharset_size);
this->set_other_case(id, (v>3) ? other_case : id); this->set_other_case(id, (v>3) ? other_case : id);

View File

@ -554,68 +554,56 @@ class UNICHARSET {
unichars[unichar_id].properties.max_top = unichars[unichar_id].properties.max_top =
static_cast<uinT8>(ClipToRange(max_top, 0, MAX_UINT8)); static_cast<uinT8>(ClipToRange(max_top, 0, MAX_UINT8));
} }
// Returns the width range of the given unichar in baseline-normalized // Returns the width stats (as mean, sd) of the given unichar relative to the
// coordinates, ie, where the baseline is kBlnBaselineOffset and the // median advance of all characters in the character set.
// meanline is kBlnBaselineOffset + kBlnXHeight. void get_width_stats(UNICHAR_ID unichar_id,
// (See normalis.h for the definitions). float* width, float* width_sd) const {
void get_width_range(UNICHAR_ID unichar_id,
int* min_width, int* max_width) const {
if (INVALID_UNICHAR_ID == unichar_id) { if (INVALID_UNICHAR_ID == unichar_id) {
*min_width = 0; *width = 0.0f;
*max_width = 256; // kBlnCellHeight; *width_sd = 0.0f;;
return; return;
} }
ASSERT_HOST(contains_unichar_id(unichar_id)); ASSERT_HOST(contains_unichar_id(unichar_id));
*min_width = unichars[unichar_id].properties.min_width; *width = unichars[unichar_id].properties.width;
*max_width = unichars[unichar_id].properties.max_width; *width_sd = unichars[unichar_id].properties.width_sd;
} }
void set_width_range(UNICHAR_ID unichar_id, int min_width, int max_width) { void set_width_stats(UNICHAR_ID unichar_id, float width, float width_sd) {
unichars[unichar_id].properties.min_width = unichars[unichar_id].properties.width = width;
static_cast<inT16>(ClipToRange(min_width, 0, MAX_INT16)); unichars[unichar_id].properties.width_sd = width_sd;
unichars[unichar_id].properties.max_width =
static_cast<inT16>(ClipToRange(max_width, 0, MAX_INT16));
} }
// Returns the range of the x-bearing of the given unichar in // Returns the stats of the x-bearing (as mean, sd) of the given unichar
// baseline-normalized coordinates, ie, where the baseline is // relative to the median advance of all characters in the character set.
// kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight. void get_bearing_stats(UNICHAR_ID unichar_id,
// (See normalis.h for the definitions). float* bearing, float* bearing_sd) const {
void get_bearing_range(UNICHAR_ID unichar_id,
int* min_bearing, int* max_bearing) const {
if (INVALID_UNICHAR_ID == unichar_id) { if (INVALID_UNICHAR_ID == unichar_id) {
*min_bearing = *max_bearing = 0; *bearing = *bearing_sd = 0.0f;
return; return;
} }
ASSERT_HOST(contains_unichar_id(unichar_id)); ASSERT_HOST(contains_unichar_id(unichar_id));
*min_bearing = unichars[unichar_id].properties.min_bearing; *bearing = unichars[unichar_id].properties.bearing;
*max_bearing = unichars[unichar_id].properties.max_bearing; *bearing_sd = unichars[unichar_id].properties.bearing_sd;
} }
void set_bearing_range(UNICHAR_ID unichar_id, void set_bearing_stats(UNICHAR_ID unichar_id,
int min_bearing, int max_bearing) { float bearing, float bearing_sd) {
unichars[unichar_id].properties.min_bearing = unichars[unichar_id].properties.bearing = bearing;
static_cast<inT16>(ClipToRange(min_bearing, 0, MAX_INT16)); unichars[unichar_id].properties.bearing_sd = bearing_sd;
unichars[unichar_id].properties.max_bearing =
static_cast<inT16>(ClipToRange(max_bearing, 0, MAX_INT16));
} }
// Returns the range of the x-advance of the given unichar in // Returns the stats of the x-advance of the given unichar (as mean, sd)
// baseline-normalized coordinates, ie, where the baseline is // relative to the median advance of all characters in the character set.
// kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight. void get_advance_stats(UNICHAR_ID unichar_id,
// (See normalis.h for the definitions). float* advance, float* advance_sd) const {
void get_advance_range(UNICHAR_ID unichar_id,
int* min_advance, int* max_advance) const {
if (INVALID_UNICHAR_ID == unichar_id) { if (INVALID_UNICHAR_ID == unichar_id) {
*min_advance = *max_advance = 0; *advance = *advance_sd = 0;
return; return;
} }
ASSERT_HOST(contains_unichar_id(unichar_id)); ASSERT_HOST(contains_unichar_id(unichar_id));
*min_advance = unichars[unichar_id].properties.min_advance; *advance = unichars[unichar_id].properties.advance;
*max_advance = unichars[unichar_id].properties.max_advance; *advance_sd = unichars[unichar_id].properties.advance_sd;
} }
void set_advance_range(UNICHAR_ID unichar_id, void set_advance_stats(UNICHAR_ID unichar_id,
int min_advance, int max_advance) { float advance, float advance_sd) {
unichars[unichar_id].properties.min_advance = unichars[unichar_id].properties.advance = advance;
static_cast<inT16>(ClipToRange(min_advance, 0, MAX_INT16)); unichars[unichar_id].properties.advance_sd = advance_sd;
unichars[unichar_id].properties.max_advance =
static_cast<inT16>(ClipToRange(max_advance, 0, MAX_INT16));
} }
// Returns true if the font metrics properties are empty. // Returns true if the font metrics properties are empty.
bool PropertiesIncomplete(UNICHAR_ID unichar_id) const { bool PropertiesIncomplete(UNICHAR_ID unichar_id) const {
@ -873,8 +861,8 @@ class UNICHARSET {
void SetRangesOpen(); void SetRangesOpen();
// Sets all ranges to empty. Used before expanding with font-based data. // Sets all ranges to empty. Used before expanding with font-based data.
void SetRangesEmpty(); void SetRangesEmpty();
// Returns true if any of the top/bottom/width/bearing/advance ranges is // Returns true if any of the top/bottom/width/bearing/advance ranges/stats
// emtpy. // is emtpy.
bool AnyRangeEmpty() const; bool AnyRangeEmpty() const;
// Expands the ranges with the ranges from the src properties. // Expands the ranges with the ranges from the src properties.
void ExpandRangesFrom(const UNICHAR_PROPERTIES& src); void ExpandRangesFrom(const UNICHAR_PROPERTIES& src);
@ -896,14 +884,14 @@ class UNICHARSET {
uinT8 max_bottom; uinT8 max_bottom;
uinT8 min_top; uinT8 min_top;
uinT8 max_top; uinT8 max_top;
// Limits on the widths of bounding box, also in baseline-normalized coords. // Statstics of the widths of bounding box, relative to the median advance.
inT16 min_width; float width;
inT16 max_width; float width_sd;
// Limits on the x-bearing and advance, also in baseline-normalized coords. // Stats of the x-bearing and advance, also relative to the median advance.
inT16 min_bearing; float bearing;
inT16 max_bearing; float bearing_sd;
inT16 min_advance; float advance;
inT16 max_advance; float advance_sd;
int script_id; int script_id;
UNICHAR_ID other_case; // id of the corresponding upper/lower case unichar UNICHAR_ID other_case; // id of the corresponding upper/lower case unichar
Direction direction; // direction of this unichar Direction direction; // direction of this unichar

File diff suppressed because it is too large Load Diff

View File

@ -30,8 +30,6 @@
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#define MIN_INERTIA (0.00001)
/*---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
----------------------------------------------------------------------------*/ ----------------------------------------------------------------------------*/
@ -475,71 +473,6 @@ void ComputeDirection(MFEDGEPT *Start,
Finish->PreviousDirection = Start->Direction; Finish->PreviousDirection = Start->Direction;
} /* ComputeDirection */ } /* ComputeDirection */
/*---------------------------------------------------------------------------*/
void FinishOutlineStats(register OUTLINE_STATS *OutlineStats) {
/*
** Parameters:
** OutlineStats statistics about a set of outlines
** Globals: none
** Operation: Use the preliminary statistics accumulated in OutlineStats
** to compute the final statistics.
** (see Dan Johnson's Tesseract lab
** notebook #2, pgs. 74-78).
** Return: none
** Exceptions: none
** History: Fri Dec 14 10:13:36 1990, DSJ, Created.
*/
OutlineStats->x = 0.5 * OutlineStats->My / OutlineStats->L;
OutlineStats->y = 0.5 * OutlineStats->Mx / OutlineStats->L;
OutlineStats->Ix = (OutlineStats->Ix / 3.0 -
OutlineStats->y * OutlineStats->Mx +
OutlineStats->y * OutlineStats->y * OutlineStats->L);
OutlineStats->Iy = (OutlineStats->Iy / 3.0 -
OutlineStats->x * OutlineStats->My +
OutlineStats->x * OutlineStats->x * OutlineStats->L);
/* Ix and/or Iy could possibly be negative due to roundoff error */
if (OutlineStats->Ix < 0.0)
OutlineStats->Ix = MIN_INERTIA;
if (OutlineStats->Iy < 0.0)
OutlineStats->Iy = MIN_INERTIA;
OutlineStats->Rx = sqrt (OutlineStats->Ix / OutlineStats->L);
OutlineStats->Ry = sqrt (OutlineStats->Iy / OutlineStats->L);
OutlineStats->Mx *= 0.5;
OutlineStats->My *= 0.5;
} /* FinishOutlineStats */
/*---------------------------------------------------------------------------*/
void InitOutlineStats(OUTLINE_STATS *OutlineStats) {
/*
** Parameters:
** OutlineStats stats data structure to be initialized
** Globals: none
** Operation: Initialize the outline statistics data structure so
** that it is ready to start accumulating statistics.
** Return: none
** Exceptions: none
** History: Fri Dec 14 08:55:22 1990, DSJ, Created.
*/
OutlineStats->Mx = 0.0;
OutlineStats->My = 0.0;
OutlineStats->L = 0.0;
OutlineStats->x = 0.0;
OutlineStats->y = 0.0;
OutlineStats->Ix = 0.0;
OutlineStats->Iy = 0.0;
OutlineStats->Rx = 0.0;
OutlineStats->Ry = 0.0;
} /* InitOutlineStats */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) { MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
/* /*
@ -569,51 +502,3 @@ MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
return (EdgePoint); return (EdgePoint);
} /* NextDirectionChange */ } /* NextDirectionChange */
/*---------------------------------------------------------------------------*/
void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats,
register FLOAT32 x1,
register FLOAT32 x2,
register FLOAT32 y1,
register FLOAT32 y2) {
/*
** Parameters:
** OutlineStats statistics to add this segment to
** x1, y1, x2, y2 segment to be added to statistics
** Globals: none
** Operation: This routine adds the statistics for the specified
** line segment to OutlineStats. The statistics that are
** kept are:
** sum of length of all segments
** sum of 2*Mx for all segments
** sum of 2*My for all segments
** sum of 2*Mx*(y1+y2) - L*y1*y2 for all segments
** sum of 2*My*(x1+x2) - L*x1*x2 for all segments
** These numbers, once collected can later be used to easily
** compute the center of mass, first and second moments,
** and radii of gyration. (see Dan Johnson's Tesseract lab
** notebook #2, pgs. 74-78).
** Return: none
** Exceptions: none
** History: Fri Dec 14 08:59:17 1990, DSJ, Created.
*/
register FLOAT64 L;
register FLOAT64 Mx2;
register FLOAT64 My2;
/* compute length of segment */
L = sqrt ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1));
OutlineStats->L += L;
/* compute 2Mx and 2My components */
Mx2 = L * (y1 + y2);
My2 = L * (x1 + x2);
OutlineStats->Mx += Mx2;
OutlineStats->My += My2;
/* compute second moment component */
OutlineStats->Ix += Mx2 * (y1 + y2) - L * y1 * y2;
OutlineStats->Iy += My2 * (x1 + x2) - L * x1 * x2;
} /* UpdateOutlineStats */

View File

@ -50,14 +50,6 @@ typedef enum {
outer, hole outer, hole
} OUTLINETYPE; } OUTLINETYPE;
typedef struct {
FLOAT64 Mx, My; /* first moment of all outlines */
FLOAT64 L; /* total length of all outlines */
FLOAT64 x, y; /* center of mass of all outlines */
FLOAT64 Ix, Iy; /* second moments about center of mass axes */
FLOAT64 Rx, Ry; /* radius of gyration about center of mass axes */
} OUTLINE_STATS;
typedef enum { typedef enum {
baseline, character baseline, character
} NORM_METHOD; } NORM_METHOD;
@ -127,16 +119,6 @@ void ComputeDirection(MFEDGEPT *Start,
FLOAT32 MinSlope, FLOAT32 MinSlope,
FLOAT32 MaxSlope); FLOAT32 MaxSlope);
void FinishOutlineStats(register OUTLINE_STATS *OutlineStats);
void InitOutlineStats(OUTLINE_STATS *OutlineStats);
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint); MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint);
void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats,
register FLOAT32 x1,
register FLOAT32 x2,
register FLOAT32 y1,
register FLOAT32 y2);
#endif #endif

View File

@ -176,10 +176,9 @@ class TessLangModEdge : public LangModEdge {
// returns the Hash value of the edge. Used by the SearchNode hash table // returns the Hash value of the edge. Used by the SearchNode hash table
// to quickly lookup exisiting edges to converge during search // to quickly lookup exisiting edges to converge during search
inline unsigned int Hash() const { inline unsigned int Hash() const {
return static_cast<unsigned int>(((start_edge_ | end_edge_) ^ return static_cast<unsigned int>(
((reinterpret_cast<uintptr_t>(dawg_)))) ^ ((start_edge_ | end_edge_) ^ ((reinterpret_cast<uintptr_t>(dawg_)))) ^
((unsigned int)edge_mask_) ^ ((unsigned int)edge_mask_) ^ class_id_);
class_id_);
} }
// A verbal description of the edge: Used by visualizers // A verbal description of the edge: Used by visualizers

View File

@ -2669,7 +2669,8 @@ PERF_COUNT_START("HistogramRectOCL")
int numThreads = block_size*numWorkGroups; int numThreads = block_size*numWorkGroups;
size_t local_work_size[] = {static_cast<size_t>(block_size)}; size_t local_work_size[] = {static_cast<size_t>(block_size)};
size_t global_work_size[] = {static_cast<size_t>(numThreads)}; size_t global_work_size[] = {static_cast<size_t>(numThreads)};
size_t red_global_work_size[] = {static_cast<size_t>(block_size*kHistogramSize*bytes_per_pixel)}; size_t red_global_work_size[] = {
static_cast<size_t>(block_size * kHistogramSize * bytes_per_pixel)};
/* map histogramAllChannels as write only */ /* map histogramAllChannels as write only */
int numBins = kHistogramSize*bytes_per_pixel*numWorkGroups; int numBins = kHistogramSize*bytes_per_pixel*numWorkGroups;

View File

@ -152,11 +152,11 @@ InputBuffer::~InputBuffer() {
} }
} }
bool InputBuffer::Read(string *out) { bool InputBuffer::Read(string* out) {
char buf[BUFSIZ+1]; char buf[BUFSIZ + 1];
int l; int l;
while((l = fread(buf, 1, BUFSIZ, stream_)) > 0) { while ((l = fread(buf, 1, BUFSIZ, stream_)) > 0) {
if(ferror(stream_)) { if (ferror(stream_)) {
clearerr(stream_); clearerr(stream_);
return false; return false;
} }

View File

@ -140,6 +140,14 @@ void StringRenderer::set_resolution(const int resolution) {
font_.set_resolution(resolution); font_.set_resolution(resolution);
} }
void StringRenderer::set_underline_start_prob(const double frac) {
underline_start_prob_ = min(max(frac, 0.0), 1.0);
}
void StringRenderer::set_underline_continuation_prob(const double frac) {
underline_continuation_prob_ = min(max(frac, 0.0), 1.0);
}
StringRenderer::~StringRenderer() { StringRenderer::~StringRenderer() {
ClearBoxes(); ClearBoxes();
FreePangoCairo(); FreePangoCairo();

View File

@ -83,14 +83,10 @@ class StringRenderer {
// Sets the probability (value in [0, 1]) of starting to render a word with an // Sets the probability (value in [0, 1]) of starting to render a word with an
// underline. This implementation consider words to be space-delimited // underline. This implementation consider words to be space-delimited
// sequences of characters. // sequences of characters.
void set_underline_start_prob(const double frac) { void set_underline_start_prob(const double frac);
underline_start_prob_ = std::min(std::max(frac, 0.0), 1.0);
}
// Set the probability (value in [0, 1]) of continuing a started underline to // Set the probability (value in [0, 1]) of continuing a started underline to
// the next word. // the next word.
void set_underline_continuation_prob(const double frac) { void set_underline_continuation_prob(const double frac);
underline_continuation_prob_ = std::min(std::max(frac, 0.0), 1.0);
}
void set_underline_style(const PangoUnderline style) { void set_underline_style(const PangoUnderline style) {
underline_style_ = style; underline_style_ = style;
} }

View File

@ -184,9 +184,9 @@ parse_flags() {
TRAINING_TEXT=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.training_text TRAINING_TEXT=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.training_text
fi fi
if [[ -z ${WORDLIST_FILE} ]]; then if [[ -z ${WORDLIST_FILE} ]]; then
WORDLIST_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.wordlist.clean WORDLIST_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.wordlist
fi fi
WORD_BIGRAMS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.word.bigrams.clean WORD_BIGRAMS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.word.bigrams
NUMBERS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.numbers NUMBERS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.numbers
PUNC_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.punc PUNC_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.punc
BIGRAM_FREQS_FILE=${TRAINING_TEXT}.bigram_freqs BIGRAM_FREQS_FILE=${TRAINING_TEXT}.bigram_freqs