mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 20:59:36 +08:00
Misc fixes, mostly clang formatting, but some bug fixes in matrix, werd, and tesstrain_utils. Also updates unicharset to match traineddata files.
This commit is contained in:
parent
d00d833b9b
commit
a303ab9d00
@ -314,6 +314,7 @@ void Tesseract::do_re_display(
|
||||
image_win->Image(pix_binary_, 0, 0);
|
||||
}
|
||||
|
||||
image_win->Brush(ScrollView::NONE);
|
||||
PAGE_RES_IT pr_it(current_page_res);
|
||||
for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) {
|
||||
(this->*word_painter)(&pr_it);
|
||||
|
@ -1,6 +1,8 @@
|
||||
/**********************************************************************
|
||||
* File: tessedit.cpp (Formerly tessedit.c)
|
||||
* Description: Main program for merge of tess and editor.
|
||||
* Description: (Previously) Main program for merge of tess and editor.
|
||||
* Now just code to load the language model and various
|
||||
* engine-specific data files.
|
||||
* Author: Ray Smith
|
||||
* Created: Tue Jan 07 15:21:46 GMT 1992
|
||||
*
|
||||
|
@ -96,11 +96,11 @@ MATRIX* MATRIX::DeepCopy() const {
|
||||
int band_width = bandwidth();
|
||||
MATRIX* result = new MATRIX(dim, band_width);
|
||||
for (int col = 0; col < dim; ++col) {
|
||||
for (int row = col; row < col + band_width; ++row) {
|
||||
for (int row = col; row < dim && row < col + band_width; ++row) {
|
||||
BLOB_CHOICE_LIST* choices = get(col, row);
|
||||
if (choices != NULL) {
|
||||
BLOB_CHOICE_LIST* copy_choices = new BLOB_CHOICE_LIST;
|
||||
choices->deep_copy(copy_choices, &BLOB_CHOICE::deep_copy);
|
||||
copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy);
|
||||
result->put(col, row, copy_choices);
|
||||
}
|
||||
}
|
||||
|
@ -50,17 +50,14 @@ WERD::WERD(C_BLOB_LIST *blob_list, uinT8 blank_count, const char *text)
|
||||
flags(0),
|
||||
script_id_(0),
|
||||
correct(text) {
|
||||
C_BLOB_IT start_it = blob_list;
|
||||
C_BLOB_IT end_it = blob_list;
|
||||
C_BLOB_IT start_it = &cblobs;
|
||||
C_BLOB_IT rej_cblob_it = &rej_cblobs;
|
||||
C_OUTLINE_IT c_outline_it;
|
||||
inT16 inverted_vote = 0;
|
||||
inT16 non_inverted_vote = 0;
|
||||
|
||||
// Move blob_list's elements into cblobs.
|
||||
while (!end_it.at_last())
|
||||
end_it.forward();
|
||||
cblobs.assign_to_sublist(&start_it, &end_it);
|
||||
start_it.add_list_after(blob_list);
|
||||
|
||||
/*
|
||||
Set white on black flag for the WERD, moving any duff blobs onto the
|
||||
|
@ -99,12 +99,12 @@ void UNICHARSET::UNICHAR_PROPERTIES::SetRangesOpen() {
|
||||
max_bottom = MAX_UINT8;
|
||||
min_top = 0;
|
||||
max_top = MAX_UINT8;
|
||||
min_width = 0;
|
||||
max_width = MAX_INT16;
|
||||
min_bearing = 0;
|
||||
max_bearing = MAX_INT16;
|
||||
min_advance = 0;
|
||||
max_advance = MAX_INT16;
|
||||
width = 0.0f;
|
||||
width_sd = 0.0f;
|
||||
bearing = 0.0f;
|
||||
bearing_sd = 0.0f;
|
||||
advance = 0.0f;
|
||||
advance_sd = 0.0f;
|
||||
}
|
||||
|
||||
// Sets all ranges to empty. Used before expanding with font-based data.
|
||||
@ -113,20 +113,18 @@ void UNICHARSET::UNICHAR_PROPERTIES::SetRangesEmpty() {
|
||||
max_bottom = 0;
|
||||
min_top = MAX_UINT8;
|
||||
max_top = 0;
|
||||
min_width = MAX_INT16;
|
||||
max_width = 0;
|
||||
min_bearing = MAX_INT16;
|
||||
max_bearing = 0;
|
||||
min_advance = MAX_INT16;
|
||||
max_advance = 0;
|
||||
width = 0.0f;
|
||||
width_sd = 0.0f;
|
||||
bearing = 0.0f;
|
||||
bearing_sd = 0.0f;
|
||||
advance = 0.0f;
|
||||
advance_sd = 0.0f;
|
||||
}
|
||||
|
||||
// Returns true if any of the top/bottom/width/bearing/advance ranges is
|
||||
// emtpy.
|
||||
// Returns true if any of the top/bottom/width/bearing/advance ranges/stats
|
||||
// is emtpy.
|
||||
bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty() const {
|
||||
return min_bottom > max_bottom || min_top > max_top ||
|
||||
min_width > max_width || min_bearing > max_bearing ||
|
||||
min_advance > max_advance;
|
||||
return width == 0.0f || advance == 0.0f;
|
||||
}
|
||||
|
||||
// Expands the ranges with the ranges from the src properties.
|
||||
@ -136,12 +134,18 @@ void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(
|
||||
UpdateRange(src.max_bottom, &min_bottom, &max_bottom);
|
||||
UpdateRange(src.min_top, &min_top, &max_top);
|
||||
UpdateRange(src.max_top, &min_top, &max_top);
|
||||
UpdateRange(src.min_width, &min_width, &max_width);
|
||||
UpdateRange(src.max_width, &min_width, &max_width);
|
||||
UpdateRange(src.min_bearing, &min_bearing, &max_bearing);
|
||||
UpdateRange(src.max_bearing, &min_bearing, &max_bearing);
|
||||
UpdateRange(src.min_advance, &min_advance, &max_advance);
|
||||
UpdateRange(src.max_advance, &min_advance, &max_advance);
|
||||
if (src.width_sd > width_sd) {
|
||||
width = src.width;
|
||||
width_sd = src.width_sd;
|
||||
}
|
||||
if (src.bearing_sd > bearing_sd) {
|
||||
bearing = src.bearing;
|
||||
bearing_sd = src.bearing_sd;
|
||||
}
|
||||
if (src.advance_sd > advance_sd) {
|
||||
advance = src.advance;
|
||||
advance_sd = src.advance_sd;
|
||||
}
|
||||
}
|
||||
|
||||
// Copies the properties from src into this.
|
||||
@ -430,8 +434,6 @@ void UNICHARSET::PartialSetPropertiesFromOther(int start_index,
|
||||
}
|
||||
unichars[ch].properties.CopyFrom(properties);
|
||||
set_normed_ids(ch);
|
||||
} else {
|
||||
tprintf("Failed to get properties for index %d = %s\n", ch, utf8);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -473,15 +475,15 @@ void UNICHARSET::AppendOtherUnicharset(const UNICHARSET& src) {
|
||||
for (int ch = 0; ch < src.size_used; ++ch) {
|
||||
const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
|
||||
const char* utf8 = src.id_to_unichar(ch);
|
||||
if (strcmp(utf8, " ") != 0 && src_props.AnyRangeEmpty()) {
|
||||
if (ch >= SPECIAL_UNICHAR_CODES_COUNT && src_props.AnyRangeEmpty()) {
|
||||
// Only use fully valid entries.
|
||||
tprintf("Bad properties for index %d, char %s: "
|
||||
"%d,%d %d,%d %d,%d %d,%d %d,%d\n",
|
||||
"%d,%d %d,%d %g,%g %g,%g %g,%g\n",
|
||||
ch, utf8, src_props.min_bottom, src_props.max_bottom,
|
||||
src_props.min_top, src_props.max_top,
|
||||
src_props.min_width, src_props.max_width,
|
||||
src_props.min_bearing, src_props.max_bearing,
|
||||
src_props.min_advance, src_props.max_advance);
|
||||
src_props.width, src_props.width_sd,
|
||||
src_props.bearing, src_props.bearing_sd,
|
||||
src_props.advance, src_props.advance_sd);
|
||||
continue;
|
||||
}
|
||||
int id = size_used;
|
||||
@ -564,8 +566,6 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
|
||||
UNICHAR_PROPERTIES* props) const {
|
||||
props->Init();
|
||||
props->SetRangesEmpty();
|
||||
props->min_advance = 0;
|
||||
props->max_advance = 0;
|
||||
int total_unicodes = 0;
|
||||
GenericVector<UNICHAR_ID> encoding;
|
||||
if (!encode_string(utf8_str, true, &encoding, NULL, NULL))
|
||||
@ -586,21 +586,16 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
|
||||
UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom);
|
||||
UpdateRange(src_props.min_top, &props->min_top, &props->max_top);
|
||||
UpdateRange(src_props.max_top, &props->min_top, &props->max_top);
|
||||
int bearing = ClipToRange(props->min_advance + src_props.min_bearing,
|
||||
-MAX_INT16, MAX_INT16);
|
||||
if (total_unicodes == 0 || bearing < props->min_bearing)
|
||||
props->min_bearing = bearing;
|
||||
bearing = ClipToRange(props->max_advance + src_props.max_bearing,
|
||||
-MAX_INT16, MAX_INT16);
|
||||
if (total_unicodes == 0 || bearing < props->max_bearing)
|
||||
props->max_bearing = bearing;
|
||||
props->min_advance = ClipToRange(props->min_advance + src_props.min_advance,
|
||||
-MAX_INT16, MAX_INT16);
|
||||
props->max_advance = ClipToRange(props->max_advance + src_props.max_advance,
|
||||
-MAX_INT16, MAX_INT16);
|
||||
float bearing = props->advance + src_props.bearing;
|
||||
if (total_unicodes == 0 || bearing < props->bearing) {
|
||||
props->bearing = bearing;
|
||||
props->bearing_sd = props->advance_sd + src_props.bearing_sd;
|
||||
}
|
||||
props->advance += src_props.advance;
|
||||
props->advance_sd += src_props.advance_sd;
|
||||
// With a single width, just use the widths stored in the unicharset.
|
||||
props->min_width = src_props.min_width;
|
||||
props->max_width = src_props.max_width;
|
||||
props->width = src_props.width;
|
||||
props->width_sd = src_props.width_sd;
|
||||
// Use the first script id, other_case, mirror, direction.
|
||||
// Note that these will need translation, except direction.
|
||||
if (total_unicodes == 0) {
|
||||
@ -616,10 +611,8 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
|
||||
}
|
||||
if (total_unicodes > 1) {
|
||||
// Estimate the total widths from the advance - bearing.
|
||||
props->min_width = ClipToRange(props->min_advance - props->max_bearing,
|
||||
-MAX_INT16, MAX_INT16);
|
||||
props->max_width = ClipToRange(props->max_advance - props->min_bearing,
|
||||
-MAX_INT16, MAX_INT16);
|
||||
props->width = props->advance - props->bearing;
|
||||
props->width_sd = props->advance_sd + props->bearing_sd;
|
||||
}
|
||||
return total_unicodes > 0;
|
||||
}
|
||||
@ -707,12 +700,12 @@ bool UNICHARSET::save_to_string(STRING *str) const {
|
||||
for (UNICHAR_ID id = 0; id < this->size(); ++id) {
|
||||
int min_bottom, max_bottom, min_top, max_top;
|
||||
get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top);
|
||||
int min_width, max_width;
|
||||
get_width_range(id, &min_width, &max_width);
|
||||
int min_bearing, max_bearing;
|
||||
get_bearing_range(id, &min_bearing, &max_bearing);
|
||||
int min_advance, max_advance;
|
||||
get_advance_range(id, &min_advance, &max_advance);
|
||||
float width, width_sd;
|
||||
get_width_stats(id, &width, &width_sd);
|
||||
float bearing, bearing_sd;
|
||||
get_bearing_stats(id, &bearing, &bearing_sd);
|
||||
float advance, advance_sd;
|
||||
get_advance_stats(id, &advance, &advance_sd);
|
||||
unsigned int properties = this->get_properties(id);
|
||||
if (strcmp(this->id_to_unichar(id), " ") == 0) {
|
||||
snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties,
|
||||
@ -720,10 +713,10 @@ bool UNICHARSET::save_to_string(STRING *str) const {
|
||||
this->get_other_case(id));
|
||||
} else {
|
||||
snprintf(buffer, kFileBufSize,
|
||||
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %s %d %d %d %s\t# %s\n",
|
||||
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %s %d %d %d %s\t# %s\n",
|
||||
this->id_to_unichar(id), properties,
|
||||
min_bottom, max_bottom, min_top, max_top, min_width, max_width,
|
||||
min_bearing, max_bearing, min_advance, max_advance,
|
||||
min_bottom, max_bottom, min_top, max_top, width, width_sd,
|
||||
bearing, bearing_sd, advance, advance_sd,
|
||||
this->get_script_from_script_id(this->get_script(id)),
|
||||
this->get_other_case(id), this->get_direction(id),
|
||||
this->get_mirror(id), this->get_normed_unichar(id),
|
||||
@ -821,12 +814,12 @@ bool UNICHARSET::load_via_fgets(
|
||||
int max_bottom = MAX_UINT8;
|
||||
int min_top = 0;
|
||||
int max_top = MAX_UINT8;
|
||||
int min_width = 0;
|
||||
int max_width = MAX_INT16;
|
||||
int min_bearing = 0;
|
||||
int max_bearing = MAX_INT16;
|
||||
int min_advance = 0;
|
||||
int max_advance = MAX_INT16;
|
||||
float width = 0.0f;
|
||||
float width_sd = 0.0f;
|
||||
float bearing = 0.0f;
|
||||
float bearing_sd = 0.0f;
|
||||
float advance = 0.0f;
|
||||
float advance_sd = 0.0f;
|
||||
// TODO(eger): check that this default it ok
|
||||
// after enabling BiDi iterator for Arabic+Cube.
|
||||
int direction = UNICHARSET::U_LEFT_TO_RIGHT;
|
||||
@ -836,19 +829,19 @@ bool UNICHARSET::load_via_fgets(
|
||||
int v = -1;
|
||||
if (fgets_cb->Run(buffer, sizeof (buffer)) == NULL ||
|
||||
((v = sscanf(buffer,
|
||||
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d %63s",
|
||||
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d %63s",
|
||||
unichar, &properties,
|
||||
&min_bottom, &max_bottom, &min_top, &max_top,
|
||||
&min_width, &max_width, &min_bearing, &max_bearing,
|
||||
&min_advance, &max_advance, script, &other_case,
|
||||
&width, &width_sd, &bearing, &bearing_sd,
|
||||
&advance, &advance_sd, script, &other_case,
|
||||
&direction, &mirror, normed)) != 17 &&
|
||||
(v = sscanf(buffer,
|
||||
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d",
|
||||
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d",
|
||||
unichar, &properties,
|
||||
&min_bottom, &max_bottom, &min_top, &max_top,
|
||||
&min_width, &max_width, &min_bearing, &max_bearing,
|
||||
&min_advance, &max_advance,
|
||||
script, &other_case, &direction, &mirror)) != 16 &&
|
||||
&width, &width_sd, &bearing, &bearing_sd,
|
||||
&advance, &advance_sd, script, &other_case,
|
||||
&direction, &mirror)) != 16 &&
|
||||
(v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d",
|
||||
unichar, &properties,
|
||||
&min_bottom, &max_bottom, &min_top, &max_top,
|
||||
@ -888,9 +881,9 @@ bool UNICHARSET::load_via_fgets(
|
||||
this->set_script(id, script);
|
||||
this->unichars[id].properties.enabled = true;
|
||||
this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top);
|
||||
this->set_width_range(id, min_width, max_width);
|
||||
this->set_bearing_range(id, min_bearing, max_bearing);
|
||||
this->set_advance_range(id, min_advance, max_advance);
|
||||
this->set_width_stats(id, width, width_sd);
|
||||
this->set_bearing_stats(id, bearing, bearing_sd);
|
||||
this->set_advance_stats(id, advance, advance_sd);
|
||||
this->set_direction(id, static_cast<UNICHARSET::Direction>(direction));
|
||||
ASSERT_HOST(other_case < unicharset_size);
|
||||
this->set_other_case(id, (v>3) ? other_case : id);
|
||||
|
@ -554,68 +554,56 @@ class UNICHARSET {
|
||||
unichars[unichar_id].properties.max_top =
|
||||
static_cast<uinT8>(ClipToRange(max_top, 0, MAX_UINT8));
|
||||
}
|
||||
// Returns the width range of the given unichar in baseline-normalized
|
||||
// coordinates, ie, where the baseline is kBlnBaselineOffset and the
|
||||
// meanline is kBlnBaselineOffset + kBlnXHeight.
|
||||
// (See normalis.h for the definitions).
|
||||
void get_width_range(UNICHAR_ID unichar_id,
|
||||
int* min_width, int* max_width) const {
|
||||
// Returns the width stats (as mean, sd) of the given unichar relative to the
|
||||
// median advance of all characters in the character set.
|
||||
void get_width_stats(UNICHAR_ID unichar_id,
|
||||
float* width, float* width_sd) const {
|
||||
if (INVALID_UNICHAR_ID == unichar_id) {
|
||||
*min_width = 0;
|
||||
*max_width = 256; // kBlnCellHeight;
|
||||
*width = 0.0f;
|
||||
*width_sd = 0.0f;;
|
||||
return;
|
||||
}
|
||||
ASSERT_HOST(contains_unichar_id(unichar_id));
|
||||
*min_width = unichars[unichar_id].properties.min_width;
|
||||
*max_width = unichars[unichar_id].properties.max_width;
|
||||
*width = unichars[unichar_id].properties.width;
|
||||
*width_sd = unichars[unichar_id].properties.width_sd;
|
||||
}
|
||||
void set_width_range(UNICHAR_ID unichar_id, int min_width, int max_width) {
|
||||
unichars[unichar_id].properties.min_width =
|
||||
static_cast<inT16>(ClipToRange(min_width, 0, MAX_INT16));
|
||||
unichars[unichar_id].properties.max_width =
|
||||
static_cast<inT16>(ClipToRange(max_width, 0, MAX_INT16));
|
||||
void set_width_stats(UNICHAR_ID unichar_id, float width, float width_sd) {
|
||||
unichars[unichar_id].properties.width = width;
|
||||
unichars[unichar_id].properties.width_sd = width_sd;
|
||||
}
|
||||
// Returns the range of the x-bearing of the given unichar in
|
||||
// baseline-normalized coordinates, ie, where the baseline is
|
||||
// kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight.
|
||||
// (See normalis.h for the definitions).
|
||||
void get_bearing_range(UNICHAR_ID unichar_id,
|
||||
int* min_bearing, int* max_bearing) const {
|
||||
// Returns the stats of the x-bearing (as mean, sd) of the given unichar
|
||||
// relative to the median advance of all characters in the character set.
|
||||
void get_bearing_stats(UNICHAR_ID unichar_id,
|
||||
float* bearing, float* bearing_sd) const {
|
||||
if (INVALID_UNICHAR_ID == unichar_id) {
|
||||
*min_bearing = *max_bearing = 0;
|
||||
*bearing = *bearing_sd = 0.0f;
|
||||
return;
|
||||
}
|
||||
ASSERT_HOST(contains_unichar_id(unichar_id));
|
||||
*min_bearing = unichars[unichar_id].properties.min_bearing;
|
||||
*max_bearing = unichars[unichar_id].properties.max_bearing;
|
||||
*bearing = unichars[unichar_id].properties.bearing;
|
||||
*bearing_sd = unichars[unichar_id].properties.bearing_sd;
|
||||
}
|
||||
void set_bearing_range(UNICHAR_ID unichar_id,
|
||||
int min_bearing, int max_bearing) {
|
||||
unichars[unichar_id].properties.min_bearing =
|
||||
static_cast<inT16>(ClipToRange(min_bearing, 0, MAX_INT16));
|
||||
unichars[unichar_id].properties.max_bearing =
|
||||
static_cast<inT16>(ClipToRange(max_bearing, 0, MAX_INT16));
|
||||
void set_bearing_stats(UNICHAR_ID unichar_id,
|
||||
float bearing, float bearing_sd) {
|
||||
unichars[unichar_id].properties.bearing = bearing;
|
||||
unichars[unichar_id].properties.bearing_sd = bearing_sd;
|
||||
}
|
||||
// Returns the range of the x-advance of the given unichar in
|
||||
// baseline-normalized coordinates, ie, where the baseline is
|
||||
// kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight.
|
||||
// (See normalis.h for the definitions).
|
||||
void get_advance_range(UNICHAR_ID unichar_id,
|
||||
int* min_advance, int* max_advance) const {
|
||||
// Returns the stats of the x-advance of the given unichar (as mean, sd)
|
||||
// relative to the median advance of all characters in the character set.
|
||||
void get_advance_stats(UNICHAR_ID unichar_id,
|
||||
float* advance, float* advance_sd) const {
|
||||
if (INVALID_UNICHAR_ID == unichar_id) {
|
||||
*min_advance = *max_advance = 0;
|
||||
*advance = *advance_sd = 0;
|
||||
return;
|
||||
}
|
||||
ASSERT_HOST(contains_unichar_id(unichar_id));
|
||||
*min_advance = unichars[unichar_id].properties.min_advance;
|
||||
*max_advance = unichars[unichar_id].properties.max_advance;
|
||||
*advance = unichars[unichar_id].properties.advance;
|
||||
*advance_sd = unichars[unichar_id].properties.advance_sd;
|
||||
}
|
||||
void set_advance_range(UNICHAR_ID unichar_id,
|
||||
int min_advance, int max_advance) {
|
||||
unichars[unichar_id].properties.min_advance =
|
||||
static_cast<inT16>(ClipToRange(min_advance, 0, MAX_INT16));
|
||||
unichars[unichar_id].properties.max_advance =
|
||||
static_cast<inT16>(ClipToRange(max_advance, 0, MAX_INT16));
|
||||
void set_advance_stats(UNICHAR_ID unichar_id,
|
||||
float advance, float advance_sd) {
|
||||
unichars[unichar_id].properties.advance = advance;
|
||||
unichars[unichar_id].properties.advance_sd = advance_sd;
|
||||
}
|
||||
// Returns true if the font metrics properties are empty.
|
||||
bool PropertiesIncomplete(UNICHAR_ID unichar_id) const {
|
||||
@ -873,8 +861,8 @@ class UNICHARSET {
|
||||
void SetRangesOpen();
|
||||
// Sets all ranges to empty. Used before expanding with font-based data.
|
||||
void SetRangesEmpty();
|
||||
// Returns true if any of the top/bottom/width/bearing/advance ranges is
|
||||
// emtpy.
|
||||
// Returns true if any of the top/bottom/width/bearing/advance ranges/stats
|
||||
// is emtpy.
|
||||
bool AnyRangeEmpty() const;
|
||||
// Expands the ranges with the ranges from the src properties.
|
||||
void ExpandRangesFrom(const UNICHAR_PROPERTIES& src);
|
||||
@ -896,14 +884,14 @@ class UNICHARSET {
|
||||
uinT8 max_bottom;
|
||||
uinT8 min_top;
|
||||
uinT8 max_top;
|
||||
// Limits on the widths of bounding box, also in baseline-normalized coords.
|
||||
inT16 min_width;
|
||||
inT16 max_width;
|
||||
// Limits on the x-bearing and advance, also in baseline-normalized coords.
|
||||
inT16 min_bearing;
|
||||
inT16 max_bearing;
|
||||
inT16 min_advance;
|
||||
inT16 max_advance;
|
||||
// Statstics of the widths of bounding box, relative to the median advance.
|
||||
float width;
|
||||
float width_sd;
|
||||
// Stats of the x-bearing and advance, also relative to the median advance.
|
||||
float bearing;
|
||||
float bearing_sd;
|
||||
float advance;
|
||||
float advance_sd;
|
||||
int script_id;
|
||||
UNICHAR_ID other_case; // id of the corresponding upper/lower case unichar
|
||||
Direction direction; // direction of this unichar
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -30,8 +30,6 @@
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define MIN_INERTIA (0.00001)
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------*/
|
||||
@ -475,71 +473,6 @@ void ComputeDirection(MFEDGEPT *Start,
|
||||
Finish->PreviousDirection = Start->Direction;
|
||||
} /* ComputeDirection */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FinishOutlineStats(register OUTLINE_STATS *OutlineStats) {
|
||||
/*
|
||||
** Parameters:
|
||||
** OutlineStats statistics about a set of outlines
|
||||
** Globals: none
|
||||
** Operation: Use the preliminary statistics accumulated in OutlineStats
|
||||
** to compute the final statistics.
|
||||
** (see Dan Johnson's Tesseract lab
|
||||
** notebook #2, pgs. 74-78).
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Dec 14 10:13:36 1990, DSJ, Created.
|
||||
*/
|
||||
OutlineStats->x = 0.5 * OutlineStats->My / OutlineStats->L;
|
||||
OutlineStats->y = 0.5 * OutlineStats->Mx / OutlineStats->L;
|
||||
|
||||
OutlineStats->Ix = (OutlineStats->Ix / 3.0 -
|
||||
OutlineStats->y * OutlineStats->Mx +
|
||||
OutlineStats->y * OutlineStats->y * OutlineStats->L);
|
||||
|
||||
OutlineStats->Iy = (OutlineStats->Iy / 3.0 -
|
||||
OutlineStats->x * OutlineStats->My +
|
||||
OutlineStats->x * OutlineStats->x * OutlineStats->L);
|
||||
|
||||
/* Ix and/or Iy could possibly be negative due to roundoff error */
|
||||
if (OutlineStats->Ix < 0.0)
|
||||
OutlineStats->Ix = MIN_INERTIA;
|
||||
if (OutlineStats->Iy < 0.0)
|
||||
OutlineStats->Iy = MIN_INERTIA;
|
||||
|
||||
OutlineStats->Rx = sqrt (OutlineStats->Ix / OutlineStats->L);
|
||||
OutlineStats->Ry = sqrt (OutlineStats->Iy / OutlineStats->L);
|
||||
|
||||
OutlineStats->Mx *= 0.5;
|
||||
OutlineStats->My *= 0.5;
|
||||
|
||||
} /* FinishOutlineStats */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void InitOutlineStats(OUTLINE_STATS *OutlineStats) {
|
||||
/*
|
||||
** Parameters:
|
||||
** OutlineStats stats data structure to be initialized
|
||||
** Globals: none
|
||||
** Operation: Initialize the outline statistics data structure so
|
||||
** that it is ready to start accumulating statistics.
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Dec 14 08:55:22 1990, DSJ, Created.
|
||||
*/
|
||||
OutlineStats->Mx = 0.0;
|
||||
OutlineStats->My = 0.0;
|
||||
OutlineStats->L = 0.0;
|
||||
OutlineStats->x = 0.0;
|
||||
OutlineStats->y = 0.0;
|
||||
OutlineStats->Ix = 0.0;
|
||||
OutlineStats->Iy = 0.0;
|
||||
OutlineStats->Rx = 0.0;
|
||||
OutlineStats->Ry = 0.0;
|
||||
} /* InitOutlineStats */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
|
||||
/*
|
||||
@ -569,51 +502,3 @@ MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
|
||||
|
||||
return (EdgePoint);
|
||||
} /* NextDirectionChange */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats,
|
||||
register FLOAT32 x1,
|
||||
register FLOAT32 x2,
|
||||
register FLOAT32 y1,
|
||||
register FLOAT32 y2) {
|
||||
/*
|
||||
** Parameters:
|
||||
** OutlineStats statistics to add this segment to
|
||||
** x1, y1, x2, y2 segment to be added to statistics
|
||||
** Globals: none
|
||||
** Operation: This routine adds the statistics for the specified
|
||||
** line segment to OutlineStats. The statistics that are
|
||||
** kept are:
|
||||
** sum of length of all segments
|
||||
** sum of 2*Mx for all segments
|
||||
** sum of 2*My for all segments
|
||||
** sum of 2*Mx*(y1+y2) - L*y1*y2 for all segments
|
||||
** sum of 2*My*(x1+x2) - L*x1*x2 for all segments
|
||||
** These numbers, once collected can later be used to easily
|
||||
** compute the center of mass, first and second moments,
|
||||
** and radii of gyration. (see Dan Johnson's Tesseract lab
|
||||
** notebook #2, pgs. 74-78).
|
||||
** Return: none
|
||||
** Exceptions: none
|
||||
** History: Fri Dec 14 08:59:17 1990, DSJ, Created.
|
||||
*/
|
||||
register FLOAT64 L;
|
||||
register FLOAT64 Mx2;
|
||||
register FLOAT64 My2;
|
||||
|
||||
/* compute length of segment */
|
||||
L = sqrt ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1));
|
||||
OutlineStats->L += L;
|
||||
|
||||
/* compute 2Mx and 2My components */
|
||||
Mx2 = L * (y1 + y2);
|
||||
My2 = L * (x1 + x2);
|
||||
OutlineStats->Mx += Mx2;
|
||||
OutlineStats->My += My2;
|
||||
|
||||
/* compute second moment component */
|
||||
OutlineStats->Ix += Mx2 * (y1 + y2) - L * y1 * y2;
|
||||
OutlineStats->Iy += My2 * (x1 + x2) - L * x1 * x2;
|
||||
|
||||
} /* UpdateOutlineStats */
|
||||
|
@ -50,14 +50,6 @@ typedef enum {
|
||||
outer, hole
|
||||
} OUTLINETYPE;
|
||||
|
||||
typedef struct {
|
||||
FLOAT64 Mx, My; /* first moment of all outlines */
|
||||
FLOAT64 L; /* total length of all outlines */
|
||||
FLOAT64 x, y; /* center of mass of all outlines */
|
||||
FLOAT64 Ix, Iy; /* second moments about center of mass axes */
|
||||
FLOAT64 Rx, Ry; /* radius of gyration about center of mass axes */
|
||||
} OUTLINE_STATS;
|
||||
|
||||
typedef enum {
|
||||
baseline, character
|
||||
} NORM_METHOD;
|
||||
@ -127,16 +119,6 @@ void ComputeDirection(MFEDGEPT *Start,
|
||||
FLOAT32 MinSlope,
|
||||
FLOAT32 MaxSlope);
|
||||
|
||||
void FinishOutlineStats(register OUTLINE_STATS *OutlineStats);
|
||||
|
||||
void InitOutlineStats(OUTLINE_STATS *OutlineStats);
|
||||
|
||||
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint);
|
||||
|
||||
void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats,
|
||||
register FLOAT32 x1,
|
||||
register FLOAT32 x2,
|
||||
register FLOAT32 y1,
|
||||
register FLOAT32 y2);
|
||||
|
||||
#endif
|
||||
|
@ -176,10 +176,9 @@ class TessLangModEdge : public LangModEdge {
|
||||
// returns the Hash value of the edge. Used by the SearchNode hash table
|
||||
// to quickly lookup exisiting edges to converge during search
|
||||
inline unsigned int Hash() const {
|
||||
return static_cast<unsigned int>(((start_edge_ | end_edge_) ^
|
||||
((reinterpret_cast<uintptr_t>(dawg_)))) ^
|
||||
((unsigned int)edge_mask_) ^
|
||||
class_id_);
|
||||
return static_cast<unsigned int>(
|
||||
((start_edge_ | end_edge_) ^ ((reinterpret_cast<uintptr_t>(dawg_)))) ^
|
||||
((unsigned int)edge_mask_) ^ class_id_);
|
||||
}
|
||||
|
||||
// A verbal description of the edge: Used by visualizers
|
||||
|
@ -2669,7 +2669,8 @@ PERF_COUNT_START("HistogramRectOCL")
|
||||
int numThreads = block_size*numWorkGroups;
|
||||
size_t local_work_size[] = {static_cast<size_t>(block_size)};
|
||||
size_t global_work_size[] = {static_cast<size_t>(numThreads)};
|
||||
size_t red_global_work_size[] = {static_cast<size_t>(block_size*kHistogramSize*bytes_per_pixel)};
|
||||
size_t red_global_work_size[] = {
|
||||
static_cast<size_t>(block_size * kHistogramSize * bytes_per_pixel)};
|
||||
|
||||
/* map histogramAllChannels as write only */
|
||||
int numBins = kHistogramSize*bytes_per_pixel*numWorkGroups;
|
||||
|
@ -140,6 +140,14 @@ void StringRenderer::set_resolution(const int resolution) {
|
||||
font_.set_resolution(resolution);
|
||||
}
|
||||
|
||||
void StringRenderer::set_underline_start_prob(const double frac) {
|
||||
underline_start_prob_ = min(max(frac, 0.0), 1.0);
|
||||
}
|
||||
|
||||
void StringRenderer::set_underline_continuation_prob(const double frac) {
|
||||
underline_continuation_prob_ = min(max(frac, 0.0), 1.0);
|
||||
}
|
||||
|
||||
StringRenderer::~StringRenderer() {
|
||||
ClearBoxes();
|
||||
FreePangoCairo();
|
||||
|
@ -83,14 +83,10 @@ class StringRenderer {
|
||||
// Sets the probability (value in [0, 1]) of starting to render a word with an
|
||||
// underline. This implementation consider words to be space-delimited
|
||||
// sequences of characters.
|
||||
void set_underline_start_prob(const double frac) {
|
||||
underline_start_prob_ = std::min(std::max(frac, 0.0), 1.0);
|
||||
}
|
||||
void set_underline_start_prob(const double frac);
|
||||
// Set the probability (value in [0, 1]) of continuing a started underline to
|
||||
// the next word.
|
||||
void set_underline_continuation_prob(const double frac) {
|
||||
underline_continuation_prob_ = std::min(std::max(frac, 0.0), 1.0);
|
||||
}
|
||||
void set_underline_continuation_prob(const double frac);
|
||||
void set_underline_style(const PangoUnderline style) {
|
||||
underline_style_ = style;
|
||||
}
|
||||
|
@ -184,9 +184,9 @@ parse_flags() {
|
||||
TRAINING_TEXT=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.training_text
|
||||
fi
|
||||
if [[ -z ${WORDLIST_FILE} ]]; then
|
||||
WORDLIST_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.wordlist.clean
|
||||
WORDLIST_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.wordlist
|
||||
fi
|
||||
WORD_BIGRAMS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.word.bigrams.clean
|
||||
WORD_BIGRAMS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.word.bigrams
|
||||
NUMBERS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.numbers
|
||||
PUNC_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.punc
|
||||
BIGRAM_FREQS_FILE=${TRAINING_TEXT}.bigram_freqs
|
||||
|
Loading…
Reference in New Issue
Block a user