mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 20:59:36 +08:00
Misc fixes, mostly clang formatting, but some bug fixes in matrix, werd, and tesstrain_utils. Also updates unicharset to match traineddata files.
This commit is contained in:
parent
d00d833b9b
commit
a303ab9d00
@ -637,8 +637,8 @@ bool TessPDFRenderer::BeginDocumentHandler() {
|
|||||||
">>\n"
|
">>\n"
|
||||||
"stream\n", size, size);
|
"stream\n", size, size);
|
||||||
if (n >= sizeof(buf)) {
|
if (n >= sizeof(buf)) {
|
||||||
delete[] buffer;
|
delete[] buffer;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
AppendString(buf);
|
AppendString(buf);
|
||||||
objsize = strlen(buf);
|
objsize = strlen(buf);
|
||||||
|
@ -314,6 +314,7 @@ void Tesseract::do_re_display(
|
|||||||
image_win->Image(pix_binary_, 0, 0);
|
image_win->Image(pix_binary_, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
image_win->Brush(ScrollView::NONE);
|
||||||
PAGE_RES_IT pr_it(current_page_res);
|
PAGE_RES_IT pr_it(current_page_res);
|
||||||
for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) {
|
for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) {
|
||||||
(this->*word_painter)(&pr_it);
|
(this->*word_painter)(&pr_it);
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
/**********************************************************************
|
/**********************************************************************
|
||||||
* File: tessedit.cpp (Formerly tessedit.c)
|
* File: tessedit.cpp (Formerly tessedit.c)
|
||||||
* Description: Main program for merge of tess and editor.
|
* Description: (Previously) Main program for merge of tess and editor.
|
||||||
* Author: Ray Smith
|
* Now just code to load the language model and various
|
||||||
* Created: Tue Jan 07 15:21:46 GMT 1992
|
* engine-specific data files.
|
||||||
|
* Author: Ray Smith
|
||||||
|
* Created: Tue Jan 07 15:21:46 GMT 1992
|
||||||
*
|
*
|
||||||
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
@ -96,11 +96,11 @@ MATRIX* MATRIX::DeepCopy() const {
|
|||||||
int band_width = bandwidth();
|
int band_width = bandwidth();
|
||||||
MATRIX* result = new MATRIX(dim, band_width);
|
MATRIX* result = new MATRIX(dim, band_width);
|
||||||
for (int col = 0; col < dim; ++col) {
|
for (int col = 0; col < dim; ++col) {
|
||||||
for (int row = col; row < col + band_width; ++row) {
|
for (int row = col; row < dim && row < col + band_width; ++row) {
|
||||||
BLOB_CHOICE_LIST* choices = get(col, row);
|
BLOB_CHOICE_LIST* choices = get(col, row);
|
||||||
if (choices != NULL) {
|
if (choices != NULL) {
|
||||||
BLOB_CHOICE_LIST* copy_choices = new BLOB_CHOICE_LIST;
|
BLOB_CHOICE_LIST* copy_choices = new BLOB_CHOICE_LIST;
|
||||||
choices->deep_copy(copy_choices, &BLOB_CHOICE::deep_copy);
|
copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy);
|
||||||
result->put(col, row, copy_choices);
|
result->put(col, row, copy_choices);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -50,17 +50,14 @@ WERD::WERD(C_BLOB_LIST *blob_list, uinT8 blank_count, const char *text)
|
|||||||
flags(0),
|
flags(0),
|
||||||
script_id_(0),
|
script_id_(0),
|
||||||
correct(text) {
|
correct(text) {
|
||||||
C_BLOB_IT start_it = blob_list;
|
C_BLOB_IT start_it = &cblobs;
|
||||||
C_BLOB_IT end_it = blob_list;
|
|
||||||
C_BLOB_IT rej_cblob_it = &rej_cblobs;
|
C_BLOB_IT rej_cblob_it = &rej_cblobs;
|
||||||
C_OUTLINE_IT c_outline_it;
|
C_OUTLINE_IT c_outline_it;
|
||||||
inT16 inverted_vote = 0;
|
inT16 inverted_vote = 0;
|
||||||
inT16 non_inverted_vote = 0;
|
inT16 non_inverted_vote = 0;
|
||||||
|
|
||||||
// Move blob_list's elements into cblobs.
|
// Move blob_list's elements into cblobs.
|
||||||
while (!end_it.at_last())
|
start_it.add_list_after(blob_list);
|
||||||
end_it.forward();
|
|
||||||
cblobs.assign_to_sublist(&start_it, &end_it);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Set white on black flag for the WERD, moving any duff blobs onto the
|
Set white on black flag for the WERD, moving any duff blobs onto the
|
||||||
|
@ -99,12 +99,12 @@ void UNICHARSET::UNICHAR_PROPERTIES::SetRangesOpen() {
|
|||||||
max_bottom = MAX_UINT8;
|
max_bottom = MAX_UINT8;
|
||||||
min_top = 0;
|
min_top = 0;
|
||||||
max_top = MAX_UINT8;
|
max_top = MAX_UINT8;
|
||||||
min_width = 0;
|
width = 0.0f;
|
||||||
max_width = MAX_INT16;
|
width_sd = 0.0f;
|
||||||
min_bearing = 0;
|
bearing = 0.0f;
|
||||||
max_bearing = MAX_INT16;
|
bearing_sd = 0.0f;
|
||||||
min_advance = 0;
|
advance = 0.0f;
|
||||||
max_advance = MAX_INT16;
|
advance_sd = 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sets all ranges to empty. Used before expanding with font-based data.
|
// Sets all ranges to empty. Used before expanding with font-based data.
|
||||||
@ -113,20 +113,18 @@ void UNICHARSET::UNICHAR_PROPERTIES::SetRangesEmpty() {
|
|||||||
max_bottom = 0;
|
max_bottom = 0;
|
||||||
min_top = MAX_UINT8;
|
min_top = MAX_UINT8;
|
||||||
max_top = 0;
|
max_top = 0;
|
||||||
min_width = MAX_INT16;
|
width = 0.0f;
|
||||||
max_width = 0;
|
width_sd = 0.0f;
|
||||||
min_bearing = MAX_INT16;
|
bearing = 0.0f;
|
||||||
max_bearing = 0;
|
bearing_sd = 0.0f;
|
||||||
min_advance = MAX_INT16;
|
advance = 0.0f;
|
||||||
max_advance = 0;
|
advance_sd = 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if any of the top/bottom/width/bearing/advance ranges is
|
// Returns true if any of the top/bottom/width/bearing/advance ranges/stats
|
||||||
// emtpy.
|
// is emtpy.
|
||||||
bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty() const {
|
bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty() const {
|
||||||
return min_bottom > max_bottom || min_top > max_top ||
|
return width == 0.0f || advance == 0.0f;
|
||||||
min_width > max_width || min_bearing > max_bearing ||
|
|
||||||
min_advance > max_advance;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Expands the ranges with the ranges from the src properties.
|
// Expands the ranges with the ranges from the src properties.
|
||||||
@ -136,12 +134,18 @@ void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(
|
|||||||
UpdateRange(src.max_bottom, &min_bottom, &max_bottom);
|
UpdateRange(src.max_bottom, &min_bottom, &max_bottom);
|
||||||
UpdateRange(src.min_top, &min_top, &max_top);
|
UpdateRange(src.min_top, &min_top, &max_top);
|
||||||
UpdateRange(src.max_top, &min_top, &max_top);
|
UpdateRange(src.max_top, &min_top, &max_top);
|
||||||
UpdateRange(src.min_width, &min_width, &max_width);
|
if (src.width_sd > width_sd) {
|
||||||
UpdateRange(src.max_width, &min_width, &max_width);
|
width = src.width;
|
||||||
UpdateRange(src.min_bearing, &min_bearing, &max_bearing);
|
width_sd = src.width_sd;
|
||||||
UpdateRange(src.max_bearing, &min_bearing, &max_bearing);
|
}
|
||||||
UpdateRange(src.min_advance, &min_advance, &max_advance);
|
if (src.bearing_sd > bearing_sd) {
|
||||||
UpdateRange(src.max_advance, &min_advance, &max_advance);
|
bearing = src.bearing;
|
||||||
|
bearing_sd = src.bearing_sd;
|
||||||
|
}
|
||||||
|
if (src.advance_sd > advance_sd) {
|
||||||
|
advance = src.advance;
|
||||||
|
advance_sd = src.advance_sd;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copies the properties from src into this.
|
// Copies the properties from src into this.
|
||||||
@ -430,8 +434,6 @@ void UNICHARSET::PartialSetPropertiesFromOther(int start_index,
|
|||||||
}
|
}
|
||||||
unichars[ch].properties.CopyFrom(properties);
|
unichars[ch].properties.CopyFrom(properties);
|
||||||
set_normed_ids(ch);
|
set_normed_ids(ch);
|
||||||
} else {
|
|
||||||
tprintf("Failed to get properties for index %d = %s\n", ch, utf8);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -473,15 +475,15 @@ void UNICHARSET::AppendOtherUnicharset(const UNICHARSET& src) {
|
|||||||
for (int ch = 0; ch < src.size_used; ++ch) {
|
for (int ch = 0; ch < src.size_used; ++ch) {
|
||||||
const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
|
const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
|
||||||
const char* utf8 = src.id_to_unichar(ch);
|
const char* utf8 = src.id_to_unichar(ch);
|
||||||
if (strcmp(utf8, " ") != 0 && src_props.AnyRangeEmpty()) {
|
if (ch >= SPECIAL_UNICHAR_CODES_COUNT && src_props.AnyRangeEmpty()) {
|
||||||
// Only use fully valid entries.
|
// Only use fully valid entries.
|
||||||
tprintf("Bad properties for index %d, char %s: "
|
tprintf("Bad properties for index %d, char %s: "
|
||||||
"%d,%d %d,%d %d,%d %d,%d %d,%d\n",
|
"%d,%d %d,%d %g,%g %g,%g %g,%g\n",
|
||||||
ch, utf8, src_props.min_bottom, src_props.max_bottom,
|
ch, utf8, src_props.min_bottom, src_props.max_bottom,
|
||||||
src_props.min_top, src_props.max_top,
|
src_props.min_top, src_props.max_top,
|
||||||
src_props.min_width, src_props.max_width,
|
src_props.width, src_props.width_sd,
|
||||||
src_props.min_bearing, src_props.max_bearing,
|
src_props.bearing, src_props.bearing_sd,
|
||||||
src_props.min_advance, src_props.max_advance);
|
src_props.advance, src_props.advance_sd);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int id = size_used;
|
int id = size_used;
|
||||||
@ -564,8 +566,6 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
|
|||||||
UNICHAR_PROPERTIES* props) const {
|
UNICHAR_PROPERTIES* props) const {
|
||||||
props->Init();
|
props->Init();
|
||||||
props->SetRangesEmpty();
|
props->SetRangesEmpty();
|
||||||
props->min_advance = 0;
|
|
||||||
props->max_advance = 0;
|
|
||||||
int total_unicodes = 0;
|
int total_unicodes = 0;
|
||||||
GenericVector<UNICHAR_ID> encoding;
|
GenericVector<UNICHAR_ID> encoding;
|
||||||
if (!encode_string(utf8_str, true, &encoding, NULL, NULL))
|
if (!encode_string(utf8_str, true, &encoding, NULL, NULL))
|
||||||
@ -586,21 +586,16 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
|
|||||||
UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom);
|
UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom);
|
||||||
UpdateRange(src_props.min_top, &props->min_top, &props->max_top);
|
UpdateRange(src_props.min_top, &props->min_top, &props->max_top);
|
||||||
UpdateRange(src_props.max_top, &props->min_top, &props->max_top);
|
UpdateRange(src_props.max_top, &props->min_top, &props->max_top);
|
||||||
int bearing = ClipToRange(props->min_advance + src_props.min_bearing,
|
float bearing = props->advance + src_props.bearing;
|
||||||
-MAX_INT16, MAX_INT16);
|
if (total_unicodes == 0 || bearing < props->bearing) {
|
||||||
if (total_unicodes == 0 || bearing < props->min_bearing)
|
props->bearing = bearing;
|
||||||
props->min_bearing = bearing;
|
props->bearing_sd = props->advance_sd + src_props.bearing_sd;
|
||||||
bearing = ClipToRange(props->max_advance + src_props.max_bearing,
|
}
|
||||||
-MAX_INT16, MAX_INT16);
|
props->advance += src_props.advance;
|
||||||
if (total_unicodes == 0 || bearing < props->max_bearing)
|
props->advance_sd += src_props.advance_sd;
|
||||||
props->max_bearing = bearing;
|
|
||||||
props->min_advance = ClipToRange(props->min_advance + src_props.min_advance,
|
|
||||||
-MAX_INT16, MAX_INT16);
|
|
||||||
props->max_advance = ClipToRange(props->max_advance + src_props.max_advance,
|
|
||||||
-MAX_INT16, MAX_INT16);
|
|
||||||
// With a single width, just use the widths stored in the unicharset.
|
// With a single width, just use the widths stored in the unicharset.
|
||||||
props->min_width = src_props.min_width;
|
props->width = src_props.width;
|
||||||
props->max_width = src_props.max_width;
|
props->width_sd = src_props.width_sd;
|
||||||
// Use the first script id, other_case, mirror, direction.
|
// Use the first script id, other_case, mirror, direction.
|
||||||
// Note that these will need translation, except direction.
|
// Note that these will need translation, except direction.
|
||||||
if (total_unicodes == 0) {
|
if (total_unicodes == 0) {
|
||||||
@ -616,10 +611,8 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
|
|||||||
}
|
}
|
||||||
if (total_unicodes > 1) {
|
if (total_unicodes > 1) {
|
||||||
// Estimate the total widths from the advance - bearing.
|
// Estimate the total widths from the advance - bearing.
|
||||||
props->min_width = ClipToRange(props->min_advance - props->max_bearing,
|
props->width = props->advance - props->bearing;
|
||||||
-MAX_INT16, MAX_INT16);
|
props->width_sd = props->advance_sd + props->bearing_sd;
|
||||||
props->max_width = ClipToRange(props->max_advance - props->min_bearing,
|
|
||||||
-MAX_INT16, MAX_INT16);
|
|
||||||
}
|
}
|
||||||
return total_unicodes > 0;
|
return total_unicodes > 0;
|
||||||
}
|
}
|
||||||
@ -707,12 +700,12 @@ bool UNICHARSET::save_to_string(STRING *str) const {
|
|||||||
for (UNICHAR_ID id = 0; id < this->size(); ++id) {
|
for (UNICHAR_ID id = 0; id < this->size(); ++id) {
|
||||||
int min_bottom, max_bottom, min_top, max_top;
|
int min_bottom, max_bottom, min_top, max_top;
|
||||||
get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top);
|
get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top);
|
||||||
int min_width, max_width;
|
float width, width_sd;
|
||||||
get_width_range(id, &min_width, &max_width);
|
get_width_stats(id, &width, &width_sd);
|
||||||
int min_bearing, max_bearing;
|
float bearing, bearing_sd;
|
||||||
get_bearing_range(id, &min_bearing, &max_bearing);
|
get_bearing_stats(id, &bearing, &bearing_sd);
|
||||||
int min_advance, max_advance;
|
float advance, advance_sd;
|
||||||
get_advance_range(id, &min_advance, &max_advance);
|
get_advance_stats(id, &advance, &advance_sd);
|
||||||
unsigned int properties = this->get_properties(id);
|
unsigned int properties = this->get_properties(id);
|
||||||
if (strcmp(this->id_to_unichar(id), " ") == 0) {
|
if (strcmp(this->id_to_unichar(id), " ") == 0) {
|
||||||
snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties,
|
snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties,
|
||||||
@ -720,10 +713,10 @@ bool UNICHARSET::save_to_string(STRING *str) const {
|
|||||||
this->get_other_case(id));
|
this->get_other_case(id));
|
||||||
} else {
|
} else {
|
||||||
snprintf(buffer, kFileBufSize,
|
snprintf(buffer, kFileBufSize,
|
||||||
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %s %d %d %d %s\t# %s\n",
|
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %s %d %d %d %s\t# %s\n",
|
||||||
this->id_to_unichar(id), properties,
|
this->id_to_unichar(id), properties,
|
||||||
min_bottom, max_bottom, min_top, max_top, min_width, max_width,
|
min_bottom, max_bottom, min_top, max_top, width, width_sd,
|
||||||
min_bearing, max_bearing, min_advance, max_advance,
|
bearing, bearing_sd, advance, advance_sd,
|
||||||
this->get_script_from_script_id(this->get_script(id)),
|
this->get_script_from_script_id(this->get_script(id)),
|
||||||
this->get_other_case(id), this->get_direction(id),
|
this->get_other_case(id), this->get_direction(id),
|
||||||
this->get_mirror(id), this->get_normed_unichar(id),
|
this->get_mirror(id), this->get_normed_unichar(id),
|
||||||
@ -821,12 +814,12 @@ bool UNICHARSET::load_via_fgets(
|
|||||||
int max_bottom = MAX_UINT8;
|
int max_bottom = MAX_UINT8;
|
||||||
int min_top = 0;
|
int min_top = 0;
|
||||||
int max_top = MAX_UINT8;
|
int max_top = MAX_UINT8;
|
||||||
int min_width = 0;
|
float width = 0.0f;
|
||||||
int max_width = MAX_INT16;
|
float width_sd = 0.0f;
|
||||||
int min_bearing = 0;
|
float bearing = 0.0f;
|
||||||
int max_bearing = MAX_INT16;
|
float bearing_sd = 0.0f;
|
||||||
int min_advance = 0;
|
float advance = 0.0f;
|
||||||
int max_advance = MAX_INT16;
|
float advance_sd = 0.0f;
|
||||||
// TODO(eger): check that this default it ok
|
// TODO(eger): check that this default it ok
|
||||||
// after enabling BiDi iterator for Arabic+Cube.
|
// after enabling BiDi iterator for Arabic+Cube.
|
||||||
int direction = UNICHARSET::U_LEFT_TO_RIGHT;
|
int direction = UNICHARSET::U_LEFT_TO_RIGHT;
|
||||||
@ -836,19 +829,19 @@ bool UNICHARSET::load_via_fgets(
|
|||||||
int v = -1;
|
int v = -1;
|
||||||
if (fgets_cb->Run(buffer, sizeof (buffer)) == NULL ||
|
if (fgets_cb->Run(buffer, sizeof (buffer)) == NULL ||
|
||||||
((v = sscanf(buffer,
|
((v = sscanf(buffer,
|
||||||
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d %63s",
|
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d %63s",
|
||||||
unichar, &properties,
|
unichar, &properties,
|
||||||
&min_bottom, &max_bottom, &min_top, &max_top,
|
&min_bottom, &max_bottom, &min_top, &max_top,
|
||||||
&min_width, &max_width, &min_bearing, &max_bearing,
|
&width, &width_sd, &bearing, &bearing_sd,
|
||||||
&min_advance, &max_advance, script, &other_case,
|
&advance, &advance_sd, script, &other_case,
|
||||||
&direction, &mirror, normed)) != 17 &&
|
&direction, &mirror, normed)) != 17 &&
|
||||||
(v = sscanf(buffer,
|
(v = sscanf(buffer,
|
||||||
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d",
|
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d",
|
||||||
unichar, &properties,
|
unichar, &properties,
|
||||||
&min_bottom, &max_bottom, &min_top, &max_top,
|
&min_bottom, &max_bottom, &min_top, &max_top,
|
||||||
&min_width, &max_width, &min_bearing, &max_bearing,
|
&width, &width_sd, &bearing, &bearing_sd,
|
||||||
&min_advance, &max_advance,
|
&advance, &advance_sd, script, &other_case,
|
||||||
script, &other_case, &direction, &mirror)) != 16 &&
|
&direction, &mirror)) != 16 &&
|
||||||
(v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d",
|
(v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d",
|
||||||
unichar, &properties,
|
unichar, &properties,
|
||||||
&min_bottom, &max_bottom, &min_top, &max_top,
|
&min_bottom, &max_bottom, &min_top, &max_top,
|
||||||
@ -888,9 +881,9 @@ bool UNICHARSET::load_via_fgets(
|
|||||||
this->set_script(id, script);
|
this->set_script(id, script);
|
||||||
this->unichars[id].properties.enabled = true;
|
this->unichars[id].properties.enabled = true;
|
||||||
this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top);
|
this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top);
|
||||||
this->set_width_range(id, min_width, max_width);
|
this->set_width_stats(id, width, width_sd);
|
||||||
this->set_bearing_range(id, min_bearing, max_bearing);
|
this->set_bearing_stats(id, bearing, bearing_sd);
|
||||||
this->set_advance_range(id, min_advance, max_advance);
|
this->set_advance_stats(id, advance, advance_sd);
|
||||||
this->set_direction(id, static_cast<UNICHARSET::Direction>(direction));
|
this->set_direction(id, static_cast<UNICHARSET::Direction>(direction));
|
||||||
ASSERT_HOST(other_case < unicharset_size);
|
ASSERT_HOST(other_case < unicharset_size);
|
||||||
this->set_other_case(id, (v>3) ? other_case : id);
|
this->set_other_case(id, (v>3) ? other_case : id);
|
||||||
|
@ -554,68 +554,56 @@ class UNICHARSET {
|
|||||||
unichars[unichar_id].properties.max_top =
|
unichars[unichar_id].properties.max_top =
|
||||||
static_cast<uinT8>(ClipToRange(max_top, 0, MAX_UINT8));
|
static_cast<uinT8>(ClipToRange(max_top, 0, MAX_UINT8));
|
||||||
}
|
}
|
||||||
// Returns the width range of the given unichar in baseline-normalized
|
// Returns the width stats (as mean, sd) of the given unichar relative to the
|
||||||
// coordinates, ie, where the baseline is kBlnBaselineOffset and the
|
// median advance of all characters in the character set.
|
||||||
// meanline is kBlnBaselineOffset + kBlnXHeight.
|
void get_width_stats(UNICHAR_ID unichar_id,
|
||||||
// (See normalis.h for the definitions).
|
float* width, float* width_sd) const {
|
||||||
void get_width_range(UNICHAR_ID unichar_id,
|
|
||||||
int* min_width, int* max_width) const {
|
|
||||||
if (INVALID_UNICHAR_ID == unichar_id) {
|
if (INVALID_UNICHAR_ID == unichar_id) {
|
||||||
*min_width = 0;
|
*width = 0.0f;
|
||||||
*max_width = 256; // kBlnCellHeight;
|
*width_sd = 0.0f;;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ASSERT_HOST(contains_unichar_id(unichar_id));
|
ASSERT_HOST(contains_unichar_id(unichar_id));
|
||||||
*min_width = unichars[unichar_id].properties.min_width;
|
*width = unichars[unichar_id].properties.width;
|
||||||
*max_width = unichars[unichar_id].properties.max_width;
|
*width_sd = unichars[unichar_id].properties.width_sd;
|
||||||
}
|
}
|
||||||
void set_width_range(UNICHAR_ID unichar_id, int min_width, int max_width) {
|
void set_width_stats(UNICHAR_ID unichar_id, float width, float width_sd) {
|
||||||
unichars[unichar_id].properties.min_width =
|
unichars[unichar_id].properties.width = width;
|
||||||
static_cast<inT16>(ClipToRange(min_width, 0, MAX_INT16));
|
unichars[unichar_id].properties.width_sd = width_sd;
|
||||||
unichars[unichar_id].properties.max_width =
|
|
||||||
static_cast<inT16>(ClipToRange(max_width, 0, MAX_INT16));
|
|
||||||
}
|
}
|
||||||
// Returns the range of the x-bearing of the given unichar in
|
// Returns the stats of the x-bearing (as mean, sd) of the given unichar
|
||||||
// baseline-normalized coordinates, ie, where the baseline is
|
// relative to the median advance of all characters in the character set.
|
||||||
// kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight.
|
void get_bearing_stats(UNICHAR_ID unichar_id,
|
||||||
// (See normalis.h for the definitions).
|
float* bearing, float* bearing_sd) const {
|
||||||
void get_bearing_range(UNICHAR_ID unichar_id,
|
|
||||||
int* min_bearing, int* max_bearing) const {
|
|
||||||
if (INVALID_UNICHAR_ID == unichar_id) {
|
if (INVALID_UNICHAR_ID == unichar_id) {
|
||||||
*min_bearing = *max_bearing = 0;
|
*bearing = *bearing_sd = 0.0f;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ASSERT_HOST(contains_unichar_id(unichar_id));
|
ASSERT_HOST(contains_unichar_id(unichar_id));
|
||||||
*min_bearing = unichars[unichar_id].properties.min_bearing;
|
*bearing = unichars[unichar_id].properties.bearing;
|
||||||
*max_bearing = unichars[unichar_id].properties.max_bearing;
|
*bearing_sd = unichars[unichar_id].properties.bearing_sd;
|
||||||
}
|
}
|
||||||
void set_bearing_range(UNICHAR_ID unichar_id,
|
void set_bearing_stats(UNICHAR_ID unichar_id,
|
||||||
int min_bearing, int max_bearing) {
|
float bearing, float bearing_sd) {
|
||||||
unichars[unichar_id].properties.min_bearing =
|
unichars[unichar_id].properties.bearing = bearing;
|
||||||
static_cast<inT16>(ClipToRange(min_bearing, 0, MAX_INT16));
|
unichars[unichar_id].properties.bearing_sd = bearing_sd;
|
||||||
unichars[unichar_id].properties.max_bearing =
|
|
||||||
static_cast<inT16>(ClipToRange(max_bearing, 0, MAX_INT16));
|
|
||||||
}
|
}
|
||||||
// Returns the range of the x-advance of the given unichar in
|
// Returns the stats of the x-advance of the given unichar (as mean, sd)
|
||||||
// baseline-normalized coordinates, ie, where the baseline is
|
// relative to the median advance of all characters in the character set.
|
||||||
// kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight.
|
void get_advance_stats(UNICHAR_ID unichar_id,
|
||||||
// (See normalis.h for the definitions).
|
float* advance, float* advance_sd) const {
|
||||||
void get_advance_range(UNICHAR_ID unichar_id,
|
|
||||||
int* min_advance, int* max_advance) const {
|
|
||||||
if (INVALID_UNICHAR_ID == unichar_id) {
|
if (INVALID_UNICHAR_ID == unichar_id) {
|
||||||
*min_advance = *max_advance = 0;
|
*advance = *advance_sd = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ASSERT_HOST(contains_unichar_id(unichar_id));
|
ASSERT_HOST(contains_unichar_id(unichar_id));
|
||||||
*min_advance = unichars[unichar_id].properties.min_advance;
|
*advance = unichars[unichar_id].properties.advance;
|
||||||
*max_advance = unichars[unichar_id].properties.max_advance;
|
*advance_sd = unichars[unichar_id].properties.advance_sd;
|
||||||
}
|
}
|
||||||
void set_advance_range(UNICHAR_ID unichar_id,
|
void set_advance_stats(UNICHAR_ID unichar_id,
|
||||||
int min_advance, int max_advance) {
|
float advance, float advance_sd) {
|
||||||
unichars[unichar_id].properties.min_advance =
|
unichars[unichar_id].properties.advance = advance;
|
||||||
static_cast<inT16>(ClipToRange(min_advance, 0, MAX_INT16));
|
unichars[unichar_id].properties.advance_sd = advance_sd;
|
||||||
unichars[unichar_id].properties.max_advance =
|
|
||||||
static_cast<inT16>(ClipToRange(max_advance, 0, MAX_INT16));
|
|
||||||
}
|
}
|
||||||
// Returns true if the font metrics properties are empty.
|
// Returns true if the font metrics properties are empty.
|
||||||
bool PropertiesIncomplete(UNICHAR_ID unichar_id) const {
|
bool PropertiesIncomplete(UNICHAR_ID unichar_id) const {
|
||||||
@ -873,8 +861,8 @@ class UNICHARSET {
|
|||||||
void SetRangesOpen();
|
void SetRangesOpen();
|
||||||
// Sets all ranges to empty. Used before expanding with font-based data.
|
// Sets all ranges to empty. Used before expanding with font-based data.
|
||||||
void SetRangesEmpty();
|
void SetRangesEmpty();
|
||||||
// Returns true if any of the top/bottom/width/bearing/advance ranges is
|
// Returns true if any of the top/bottom/width/bearing/advance ranges/stats
|
||||||
// emtpy.
|
// is emtpy.
|
||||||
bool AnyRangeEmpty() const;
|
bool AnyRangeEmpty() const;
|
||||||
// Expands the ranges with the ranges from the src properties.
|
// Expands the ranges with the ranges from the src properties.
|
||||||
void ExpandRangesFrom(const UNICHAR_PROPERTIES& src);
|
void ExpandRangesFrom(const UNICHAR_PROPERTIES& src);
|
||||||
@ -896,14 +884,14 @@ class UNICHARSET {
|
|||||||
uinT8 max_bottom;
|
uinT8 max_bottom;
|
||||||
uinT8 min_top;
|
uinT8 min_top;
|
||||||
uinT8 max_top;
|
uinT8 max_top;
|
||||||
// Limits on the widths of bounding box, also in baseline-normalized coords.
|
// Statstics of the widths of bounding box, relative to the median advance.
|
||||||
inT16 min_width;
|
float width;
|
||||||
inT16 max_width;
|
float width_sd;
|
||||||
// Limits on the x-bearing and advance, also in baseline-normalized coords.
|
// Stats of the x-bearing and advance, also relative to the median advance.
|
||||||
inT16 min_bearing;
|
float bearing;
|
||||||
inT16 max_bearing;
|
float bearing_sd;
|
||||||
inT16 min_advance;
|
float advance;
|
||||||
inT16 max_advance;
|
float advance_sd;
|
||||||
int script_id;
|
int script_id;
|
||||||
UNICHAR_ID other_case; // id of the corresponding upper/lower case unichar
|
UNICHAR_ID other_case; // id of the corresponding upper/lower case unichar
|
||||||
Direction direction; // direction of this unichar
|
Direction direction; // direction of this unichar
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -30,8 +30,6 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#define MIN_INERTIA (0.00001)
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
Public Code
|
Public Code
|
||||||
----------------------------------------------------------------------------*/
|
----------------------------------------------------------------------------*/
|
||||||
@ -475,71 +473,6 @@ void ComputeDirection(MFEDGEPT *Start,
|
|||||||
Finish->PreviousDirection = Start->Direction;
|
Finish->PreviousDirection = Start->Direction;
|
||||||
} /* ComputeDirection */
|
} /* ComputeDirection */
|
||||||
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
|
||||||
void FinishOutlineStats(register OUTLINE_STATS *OutlineStats) {
|
|
||||||
/*
|
|
||||||
** Parameters:
|
|
||||||
** OutlineStats statistics about a set of outlines
|
|
||||||
** Globals: none
|
|
||||||
** Operation: Use the preliminary statistics accumulated in OutlineStats
|
|
||||||
** to compute the final statistics.
|
|
||||||
** (see Dan Johnson's Tesseract lab
|
|
||||||
** notebook #2, pgs. 74-78).
|
|
||||||
** Return: none
|
|
||||||
** Exceptions: none
|
|
||||||
** History: Fri Dec 14 10:13:36 1990, DSJ, Created.
|
|
||||||
*/
|
|
||||||
OutlineStats->x = 0.5 * OutlineStats->My / OutlineStats->L;
|
|
||||||
OutlineStats->y = 0.5 * OutlineStats->Mx / OutlineStats->L;
|
|
||||||
|
|
||||||
OutlineStats->Ix = (OutlineStats->Ix / 3.0 -
|
|
||||||
OutlineStats->y * OutlineStats->Mx +
|
|
||||||
OutlineStats->y * OutlineStats->y * OutlineStats->L);
|
|
||||||
|
|
||||||
OutlineStats->Iy = (OutlineStats->Iy / 3.0 -
|
|
||||||
OutlineStats->x * OutlineStats->My +
|
|
||||||
OutlineStats->x * OutlineStats->x * OutlineStats->L);
|
|
||||||
|
|
||||||
/* Ix and/or Iy could possibly be negative due to roundoff error */
|
|
||||||
if (OutlineStats->Ix < 0.0)
|
|
||||||
OutlineStats->Ix = MIN_INERTIA;
|
|
||||||
if (OutlineStats->Iy < 0.0)
|
|
||||||
OutlineStats->Iy = MIN_INERTIA;
|
|
||||||
|
|
||||||
OutlineStats->Rx = sqrt (OutlineStats->Ix / OutlineStats->L);
|
|
||||||
OutlineStats->Ry = sqrt (OutlineStats->Iy / OutlineStats->L);
|
|
||||||
|
|
||||||
OutlineStats->Mx *= 0.5;
|
|
||||||
OutlineStats->My *= 0.5;
|
|
||||||
|
|
||||||
} /* FinishOutlineStats */
|
|
||||||
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
|
||||||
void InitOutlineStats(OUTLINE_STATS *OutlineStats) {
|
|
||||||
/*
|
|
||||||
** Parameters:
|
|
||||||
** OutlineStats stats data structure to be initialized
|
|
||||||
** Globals: none
|
|
||||||
** Operation: Initialize the outline statistics data structure so
|
|
||||||
** that it is ready to start accumulating statistics.
|
|
||||||
** Return: none
|
|
||||||
** Exceptions: none
|
|
||||||
** History: Fri Dec 14 08:55:22 1990, DSJ, Created.
|
|
||||||
*/
|
|
||||||
OutlineStats->Mx = 0.0;
|
|
||||||
OutlineStats->My = 0.0;
|
|
||||||
OutlineStats->L = 0.0;
|
|
||||||
OutlineStats->x = 0.0;
|
|
||||||
OutlineStats->y = 0.0;
|
|
||||||
OutlineStats->Ix = 0.0;
|
|
||||||
OutlineStats->Iy = 0.0;
|
|
||||||
OutlineStats->Rx = 0.0;
|
|
||||||
OutlineStats->Ry = 0.0;
|
|
||||||
} /* InitOutlineStats */
|
|
||||||
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
|
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
|
||||||
/*
|
/*
|
||||||
@ -569,51 +502,3 @@ MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
|
|||||||
|
|
||||||
return (EdgePoint);
|
return (EdgePoint);
|
||||||
} /* NextDirectionChange */
|
} /* NextDirectionChange */
|
||||||
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
|
||||||
void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats,
|
|
||||||
register FLOAT32 x1,
|
|
||||||
register FLOAT32 x2,
|
|
||||||
register FLOAT32 y1,
|
|
||||||
register FLOAT32 y2) {
|
|
||||||
/*
|
|
||||||
** Parameters:
|
|
||||||
** OutlineStats statistics to add this segment to
|
|
||||||
** x1, y1, x2, y2 segment to be added to statistics
|
|
||||||
** Globals: none
|
|
||||||
** Operation: This routine adds the statistics for the specified
|
|
||||||
** line segment to OutlineStats. The statistics that are
|
|
||||||
** kept are:
|
|
||||||
** sum of length of all segments
|
|
||||||
** sum of 2*Mx for all segments
|
|
||||||
** sum of 2*My for all segments
|
|
||||||
** sum of 2*Mx*(y1+y2) - L*y1*y2 for all segments
|
|
||||||
** sum of 2*My*(x1+x2) - L*x1*x2 for all segments
|
|
||||||
** These numbers, once collected can later be used to easily
|
|
||||||
** compute the center of mass, first and second moments,
|
|
||||||
** and radii of gyration. (see Dan Johnson's Tesseract lab
|
|
||||||
** notebook #2, pgs. 74-78).
|
|
||||||
** Return: none
|
|
||||||
** Exceptions: none
|
|
||||||
** History: Fri Dec 14 08:59:17 1990, DSJ, Created.
|
|
||||||
*/
|
|
||||||
register FLOAT64 L;
|
|
||||||
register FLOAT64 Mx2;
|
|
||||||
register FLOAT64 My2;
|
|
||||||
|
|
||||||
/* compute length of segment */
|
|
||||||
L = sqrt ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1));
|
|
||||||
OutlineStats->L += L;
|
|
||||||
|
|
||||||
/* compute 2Mx and 2My components */
|
|
||||||
Mx2 = L * (y1 + y2);
|
|
||||||
My2 = L * (x1 + x2);
|
|
||||||
OutlineStats->Mx += Mx2;
|
|
||||||
OutlineStats->My += My2;
|
|
||||||
|
|
||||||
/* compute second moment component */
|
|
||||||
OutlineStats->Ix += Mx2 * (y1 + y2) - L * y1 * y2;
|
|
||||||
OutlineStats->Iy += My2 * (x1 + x2) - L * x1 * x2;
|
|
||||||
|
|
||||||
} /* UpdateOutlineStats */
|
|
||||||
|
@ -50,14 +50,6 @@ typedef enum {
|
|||||||
outer, hole
|
outer, hole
|
||||||
} OUTLINETYPE;
|
} OUTLINETYPE;
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
FLOAT64 Mx, My; /* first moment of all outlines */
|
|
||||||
FLOAT64 L; /* total length of all outlines */
|
|
||||||
FLOAT64 x, y; /* center of mass of all outlines */
|
|
||||||
FLOAT64 Ix, Iy; /* second moments about center of mass axes */
|
|
||||||
FLOAT64 Rx, Ry; /* radius of gyration about center of mass axes */
|
|
||||||
} OUTLINE_STATS;
|
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
baseline, character
|
baseline, character
|
||||||
} NORM_METHOD;
|
} NORM_METHOD;
|
||||||
@ -127,16 +119,6 @@ void ComputeDirection(MFEDGEPT *Start,
|
|||||||
FLOAT32 MinSlope,
|
FLOAT32 MinSlope,
|
||||||
FLOAT32 MaxSlope);
|
FLOAT32 MaxSlope);
|
||||||
|
|
||||||
void FinishOutlineStats(register OUTLINE_STATS *OutlineStats);
|
|
||||||
|
|
||||||
void InitOutlineStats(OUTLINE_STATS *OutlineStats);
|
|
||||||
|
|
||||||
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint);
|
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint);
|
||||||
|
|
||||||
void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats,
|
|
||||||
register FLOAT32 x1,
|
|
||||||
register FLOAT32 x2,
|
|
||||||
register FLOAT32 y1,
|
|
||||||
register FLOAT32 y2);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -176,10 +176,9 @@ class TessLangModEdge : public LangModEdge {
|
|||||||
// returns the Hash value of the edge. Used by the SearchNode hash table
|
// returns the Hash value of the edge. Used by the SearchNode hash table
|
||||||
// to quickly lookup exisiting edges to converge during search
|
// to quickly lookup exisiting edges to converge during search
|
||||||
inline unsigned int Hash() const {
|
inline unsigned int Hash() const {
|
||||||
return static_cast<unsigned int>(((start_edge_ | end_edge_) ^
|
return static_cast<unsigned int>(
|
||||||
((reinterpret_cast<uintptr_t>(dawg_)))) ^
|
((start_edge_ | end_edge_) ^ ((reinterpret_cast<uintptr_t>(dawg_)))) ^
|
||||||
((unsigned int)edge_mask_) ^
|
((unsigned int)edge_mask_) ^ class_id_);
|
||||||
class_id_);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// A verbal description of the edge: Used by visualizers
|
// A verbal description of the edge: Used by visualizers
|
||||||
|
@ -2669,7 +2669,8 @@ PERF_COUNT_START("HistogramRectOCL")
|
|||||||
int numThreads = block_size*numWorkGroups;
|
int numThreads = block_size*numWorkGroups;
|
||||||
size_t local_work_size[] = {static_cast<size_t>(block_size)};
|
size_t local_work_size[] = {static_cast<size_t>(block_size)};
|
||||||
size_t global_work_size[] = {static_cast<size_t>(numThreads)};
|
size_t global_work_size[] = {static_cast<size_t>(numThreads)};
|
||||||
size_t red_global_work_size[] = {static_cast<size_t>(block_size*kHistogramSize*bytes_per_pixel)};
|
size_t red_global_work_size[] = {
|
||||||
|
static_cast<size_t>(block_size * kHistogramSize * bytes_per_pixel)};
|
||||||
|
|
||||||
/* map histogramAllChannels as write only */
|
/* map histogramAllChannels as write only */
|
||||||
int numBins = kHistogramSize*bytes_per_pixel*numWorkGroups;
|
int numBins = kHistogramSize*bytes_per_pixel*numWorkGroups;
|
||||||
|
@ -152,11 +152,11 @@ InputBuffer::~InputBuffer() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool InputBuffer::Read(string *out) {
|
bool InputBuffer::Read(string* out) {
|
||||||
char buf[BUFSIZ+1];
|
char buf[BUFSIZ + 1];
|
||||||
int l;
|
int l;
|
||||||
while((l = fread(buf, 1, BUFSIZ, stream_)) > 0) {
|
while ((l = fread(buf, 1, BUFSIZ, stream_)) > 0) {
|
||||||
if(ferror(stream_)) {
|
if (ferror(stream_)) {
|
||||||
clearerr(stream_);
|
clearerr(stream_);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -140,6 +140,14 @@ void StringRenderer::set_resolution(const int resolution) {
|
|||||||
font_.set_resolution(resolution);
|
font_.set_resolution(resolution);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void StringRenderer::set_underline_start_prob(const double frac) {
|
||||||
|
underline_start_prob_ = min(max(frac, 0.0), 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void StringRenderer::set_underline_continuation_prob(const double frac) {
|
||||||
|
underline_continuation_prob_ = min(max(frac, 0.0), 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
StringRenderer::~StringRenderer() {
|
StringRenderer::~StringRenderer() {
|
||||||
ClearBoxes();
|
ClearBoxes();
|
||||||
FreePangoCairo();
|
FreePangoCairo();
|
||||||
|
@ -83,14 +83,10 @@ class StringRenderer {
|
|||||||
// Sets the probability (value in [0, 1]) of starting to render a word with an
|
// Sets the probability (value in [0, 1]) of starting to render a word with an
|
||||||
// underline. This implementation consider words to be space-delimited
|
// underline. This implementation consider words to be space-delimited
|
||||||
// sequences of characters.
|
// sequences of characters.
|
||||||
void set_underline_start_prob(const double frac) {
|
void set_underline_start_prob(const double frac);
|
||||||
underline_start_prob_ = std::min(std::max(frac, 0.0), 1.0);
|
|
||||||
}
|
|
||||||
// Set the probability (value in [0, 1]) of continuing a started underline to
|
// Set the probability (value in [0, 1]) of continuing a started underline to
|
||||||
// the next word.
|
// the next word.
|
||||||
void set_underline_continuation_prob(const double frac) {
|
void set_underline_continuation_prob(const double frac);
|
||||||
underline_continuation_prob_ = std::min(std::max(frac, 0.0), 1.0);
|
|
||||||
}
|
|
||||||
void set_underline_style(const PangoUnderline style) {
|
void set_underline_style(const PangoUnderline style) {
|
||||||
underline_style_ = style;
|
underline_style_ = style;
|
||||||
}
|
}
|
||||||
|
@ -184,9 +184,9 @@ parse_flags() {
|
|||||||
TRAINING_TEXT=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.training_text
|
TRAINING_TEXT=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.training_text
|
||||||
fi
|
fi
|
||||||
if [[ -z ${WORDLIST_FILE} ]]; then
|
if [[ -z ${WORDLIST_FILE} ]]; then
|
||||||
WORDLIST_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.wordlist.clean
|
WORDLIST_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.wordlist
|
||||||
fi
|
fi
|
||||||
WORD_BIGRAMS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.word.bigrams.clean
|
WORD_BIGRAMS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.word.bigrams
|
||||||
NUMBERS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.numbers
|
NUMBERS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.numbers
|
||||||
PUNC_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.punc
|
PUNC_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.punc
|
||||||
BIGRAM_FREQS_FILE=${TRAINING_TEXT}.bigram_freqs
|
BIGRAM_FREQS_FILE=${TRAINING_TEXT}.bigram_freqs
|
||||||
|
Loading…
Reference in New Issue
Block a user