Misc fixes, mostly clang formatting, but some bug fixes in matrix, werd, and tesstrain_utils. Also updates unicharset to match traineddata files.

This commit is contained in:
Ray Smith 2015-07-09 14:28:20 -07:00
parent d00d833b9b
commit a303ab9d00
16 changed files with 19138 additions and 21635 deletions

View File

@ -636,9 +636,9 @@ bool TessPDFRenderer::BeginDocumentHandler() {
" /Length1 %ld\n"
">>\n"
"stream\n", size, size);
if (n >= sizeof(buf)) {
delete[] buffer;
return false;
if (n >= sizeof(buf)) {
delete[] buffer;
return false;
}
AppendString(buf);
objsize = strlen(buf);

View File

@ -314,6 +314,7 @@ void Tesseract::do_re_display(
image_win->Image(pix_binary_, 0, 0);
}
image_win->Brush(ScrollView::NONE);
PAGE_RES_IT pr_it(current_page_res);
for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) {
(this->*word_painter)(&pr_it);

View File

@ -1,8 +1,10 @@
/**********************************************************************
* File: tessedit.cpp (Formerly tessedit.c)
* Description: Main program for merge of tess and editor.
* Author: Ray Smith
* Created: Tue Jan 07 15:21:46 GMT 1992
* Description: (Previously) Main program for merge of tess and editor.
* Now just code to load the language model and various
* engine-specific data files.
* Author: Ray Smith
* Created: Tue Jan 07 15:21:46 GMT 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");

View File

@ -96,11 +96,11 @@ MATRIX* MATRIX::DeepCopy() const {
int band_width = bandwidth();
MATRIX* result = new MATRIX(dim, band_width);
for (int col = 0; col < dim; ++col) {
for (int row = col; row < col + band_width; ++row) {
for (int row = col; row < dim && row < col + band_width; ++row) {
BLOB_CHOICE_LIST* choices = get(col, row);
if (choices != NULL) {
BLOB_CHOICE_LIST* copy_choices = new BLOB_CHOICE_LIST;
choices->deep_copy(copy_choices, &BLOB_CHOICE::deep_copy);
copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy);
result->put(col, row, copy_choices);
}
}

View File

@ -50,17 +50,14 @@ WERD::WERD(C_BLOB_LIST *blob_list, uinT8 blank_count, const char *text)
flags(0),
script_id_(0),
correct(text) {
C_BLOB_IT start_it = blob_list;
C_BLOB_IT end_it = blob_list;
C_BLOB_IT start_it = &cblobs;
C_BLOB_IT rej_cblob_it = &rej_cblobs;
C_OUTLINE_IT c_outline_it;
inT16 inverted_vote = 0;
inT16 non_inverted_vote = 0;
// Move blob_list's elements into cblobs.
while (!end_it.at_last())
end_it.forward();
cblobs.assign_to_sublist(&start_it, &end_it);
start_it.add_list_after(blob_list);
/*
Set white on black flag for the WERD, moving any duff blobs onto the

View File

@ -99,12 +99,12 @@ void UNICHARSET::UNICHAR_PROPERTIES::SetRangesOpen() {
max_bottom = MAX_UINT8;
min_top = 0;
max_top = MAX_UINT8;
min_width = 0;
max_width = MAX_INT16;
min_bearing = 0;
max_bearing = MAX_INT16;
min_advance = 0;
max_advance = MAX_INT16;
width = 0.0f;
width_sd = 0.0f;
bearing = 0.0f;
bearing_sd = 0.0f;
advance = 0.0f;
advance_sd = 0.0f;
}
// Sets all ranges to empty. Used before expanding with font-based data.
@ -113,20 +113,18 @@ void UNICHARSET::UNICHAR_PROPERTIES::SetRangesEmpty() {
max_bottom = 0;
min_top = MAX_UINT8;
max_top = 0;
min_width = MAX_INT16;
max_width = 0;
min_bearing = MAX_INT16;
max_bearing = 0;
min_advance = MAX_INT16;
max_advance = 0;
width = 0.0f;
width_sd = 0.0f;
bearing = 0.0f;
bearing_sd = 0.0f;
advance = 0.0f;
advance_sd = 0.0f;
}
// Returns true if any of the top/bottom/width/bearing/advance ranges is
// emtpy.
// Returns true if any of the top/bottom/width/bearing/advance ranges/stats
// is emtpy.
bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty() const {
return min_bottom > max_bottom || min_top > max_top ||
min_width > max_width || min_bearing > max_bearing ||
min_advance > max_advance;
return width == 0.0f || advance == 0.0f;
}
// Expands the ranges with the ranges from the src properties.
@ -136,12 +134,18 @@ void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(
UpdateRange(src.max_bottom, &min_bottom, &max_bottom);
UpdateRange(src.min_top, &min_top, &max_top);
UpdateRange(src.max_top, &min_top, &max_top);
UpdateRange(src.min_width, &min_width, &max_width);
UpdateRange(src.max_width, &min_width, &max_width);
UpdateRange(src.min_bearing, &min_bearing, &max_bearing);
UpdateRange(src.max_bearing, &min_bearing, &max_bearing);
UpdateRange(src.min_advance, &min_advance, &max_advance);
UpdateRange(src.max_advance, &min_advance, &max_advance);
if (src.width_sd > width_sd) {
width = src.width;
width_sd = src.width_sd;
}
if (src.bearing_sd > bearing_sd) {
bearing = src.bearing;
bearing_sd = src.bearing_sd;
}
if (src.advance_sd > advance_sd) {
advance = src.advance;
advance_sd = src.advance_sd;
}
}
// Copies the properties from src into this.
@ -430,8 +434,6 @@ void UNICHARSET::PartialSetPropertiesFromOther(int start_index,
}
unichars[ch].properties.CopyFrom(properties);
set_normed_ids(ch);
} else {
tprintf("Failed to get properties for index %d = %s\n", ch, utf8);
}
}
}
@ -473,15 +475,15 @@ void UNICHARSET::AppendOtherUnicharset(const UNICHARSET& src) {
for (int ch = 0; ch < src.size_used; ++ch) {
const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
const char* utf8 = src.id_to_unichar(ch);
if (strcmp(utf8, " ") != 0 && src_props.AnyRangeEmpty()) {
if (ch >= SPECIAL_UNICHAR_CODES_COUNT && src_props.AnyRangeEmpty()) {
// Only use fully valid entries.
tprintf("Bad properties for index %d, char %s: "
"%d,%d %d,%d %d,%d %d,%d %d,%d\n",
"%d,%d %d,%d %g,%g %g,%g %g,%g\n",
ch, utf8, src_props.min_bottom, src_props.max_bottom,
src_props.min_top, src_props.max_top,
src_props.min_width, src_props.max_width,
src_props.min_bearing, src_props.max_bearing,
src_props.min_advance, src_props.max_advance);
src_props.width, src_props.width_sd,
src_props.bearing, src_props.bearing_sd,
src_props.advance, src_props.advance_sd);
continue;
}
int id = size_used;
@ -564,8 +566,6 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
UNICHAR_PROPERTIES* props) const {
props->Init();
props->SetRangesEmpty();
props->min_advance = 0;
props->max_advance = 0;
int total_unicodes = 0;
GenericVector<UNICHAR_ID> encoding;
if (!encode_string(utf8_str, true, &encoding, NULL, NULL))
@ -586,21 +586,16 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom);
UpdateRange(src_props.min_top, &props->min_top, &props->max_top);
UpdateRange(src_props.max_top, &props->min_top, &props->max_top);
int bearing = ClipToRange(props->min_advance + src_props.min_bearing,
-MAX_INT16, MAX_INT16);
if (total_unicodes == 0 || bearing < props->min_bearing)
props->min_bearing = bearing;
bearing = ClipToRange(props->max_advance + src_props.max_bearing,
-MAX_INT16, MAX_INT16);
if (total_unicodes == 0 || bearing < props->max_bearing)
props->max_bearing = bearing;
props->min_advance = ClipToRange(props->min_advance + src_props.min_advance,
-MAX_INT16, MAX_INT16);
props->max_advance = ClipToRange(props->max_advance + src_props.max_advance,
-MAX_INT16, MAX_INT16);
float bearing = props->advance + src_props.bearing;
if (total_unicodes == 0 || bearing < props->bearing) {
props->bearing = bearing;
props->bearing_sd = props->advance_sd + src_props.bearing_sd;
}
props->advance += src_props.advance;
props->advance_sd += src_props.advance_sd;
// With a single width, just use the widths stored in the unicharset.
props->min_width = src_props.min_width;
props->max_width = src_props.max_width;
props->width = src_props.width;
props->width_sd = src_props.width_sd;
// Use the first script id, other_case, mirror, direction.
// Note that these will need translation, except direction.
if (total_unicodes == 0) {
@ -616,10 +611,8 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
}
if (total_unicodes > 1) {
// Estimate the total widths from the advance - bearing.
props->min_width = ClipToRange(props->min_advance - props->max_bearing,
-MAX_INT16, MAX_INT16);
props->max_width = ClipToRange(props->max_advance - props->min_bearing,
-MAX_INT16, MAX_INT16);
props->width = props->advance - props->bearing;
props->width_sd = props->advance_sd + props->bearing_sd;
}
return total_unicodes > 0;
}
@ -707,12 +700,12 @@ bool UNICHARSET::save_to_string(STRING *str) const {
for (UNICHAR_ID id = 0; id < this->size(); ++id) {
int min_bottom, max_bottom, min_top, max_top;
get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top);
int min_width, max_width;
get_width_range(id, &min_width, &max_width);
int min_bearing, max_bearing;
get_bearing_range(id, &min_bearing, &max_bearing);
int min_advance, max_advance;
get_advance_range(id, &min_advance, &max_advance);
float width, width_sd;
get_width_stats(id, &width, &width_sd);
float bearing, bearing_sd;
get_bearing_stats(id, &bearing, &bearing_sd);
float advance, advance_sd;
get_advance_stats(id, &advance, &advance_sd);
unsigned int properties = this->get_properties(id);
if (strcmp(this->id_to_unichar(id), " ") == 0) {
snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties,
@ -720,10 +713,10 @@ bool UNICHARSET::save_to_string(STRING *str) const {
this->get_other_case(id));
} else {
snprintf(buffer, kFileBufSize,
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %s %d %d %d %s\t# %s\n",
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %s %d %d %d %s\t# %s\n",
this->id_to_unichar(id), properties,
min_bottom, max_bottom, min_top, max_top, min_width, max_width,
min_bearing, max_bearing, min_advance, max_advance,
min_bottom, max_bottom, min_top, max_top, width, width_sd,
bearing, bearing_sd, advance, advance_sd,
this->get_script_from_script_id(this->get_script(id)),
this->get_other_case(id), this->get_direction(id),
this->get_mirror(id), this->get_normed_unichar(id),
@ -821,12 +814,12 @@ bool UNICHARSET::load_via_fgets(
int max_bottom = MAX_UINT8;
int min_top = 0;
int max_top = MAX_UINT8;
int min_width = 0;
int max_width = MAX_INT16;
int min_bearing = 0;
int max_bearing = MAX_INT16;
int min_advance = 0;
int max_advance = MAX_INT16;
float width = 0.0f;
float width_sd = 0.0f;
float bearing = 0.0f;
float bearing_sd = 0.0f;
float advance = 0.0f;
float advance_sd = 0.0f;
// TODO(eger): check that this default it ok
// after enabling BiDi iterator for Arabic+Cube.
int direction = UNICHARSET::U_LEFT_TO_RIGHT;
@ -836,19 +829,19 @@ bool UNICHARSET::load_via_fgets(
int v = -1;
if (fgets_cb->Run(buffer, sizeof (buffer)) == NULL ||
((v = sscanf(buffer,
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d %63s",
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d %63s",
unichar, &properties,
&min_bottom, &max_bottom, &min_top, &max_top,
&min_width, &max_width, &min_bearing, &max_bearing,
&min_advance, &max_advance, script, &other_case,
&width, &width_sd, &bearing, &bearing_sd,
&advance, &advance_sd, script, &other_case,
&direction, &mirror, normed)) != 17 &&
(v = sscanf(buffer,
"%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d",
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d",
unichar, &properties,
&min_bottom, &max_bottom, &min_top, &max_top,
&min_width, &max_width, &min_bearing, &max_bearing,
&min_advance, &max_advance,
script, &other_case, &direction, &mirror)) != 16 &&
&width, &width_sd, &bearing, &bearing_sd,
&advance, &advance_sd, script, &other_case,
&direction, &mirror)) != 16 &&
(v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d",
unichar, &properties,
&min_bottom, &max_bottom, &min_top, &max_top,
@ -888,9 +881,9 @@ bool UNICHARSET::load_via_fgets(
this->set_script(id, script);
this->unichars[id].properties.enabled = true;
this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top);
this->set_width_range(id, min_width, max_width);
this->set_bearing_range(id, min_bearing, max_bearing);
this->set_advance_range(id, min_advance, max_advance);
this->set_width_stats(id, width, width_sd);
this->set_bearing_stats(id, bearing, bearing_sd);
this->set_advance_stats(id, advance, advance_sd);
this->set_direction(id, static_cast<UNICHARSET::Direction>(direction));
ASSERT_HOST(other_case < unicharset_size);
this->set_other_case(id, (v>3) ? other_case : id);

View File

@ -554,68 +554,56 @@ class UNICHARSET {
unichars[unichar_id].properties.max_top =
static_cast<uinT8>(ClipToRange(max_top, 0, MAX_UINT8));
}
// Returns the width range of the given unichar in baseline-normalized
// coordinates, ie, where the baseline is kBlnBaselineOffset and the
// meanline is kBlnBaselineOffset + kBlnXHeight.
// (See normalis.h for the definitions).
void get_width_range(UNICHAR_ID unichar_id,
int* min_width, int* max_width) const {
// Returns the width stats (as mean, sd) of the given unichar relative to the
// median advance of all characters in the character set.
void get_width_stats(UNICHAR_ID unichar_id,
float* width, float* width_sd) const {
if (INVALID_UNICHAR_ID == unichar_id) {
*min_width = 0;
*max_width = 256; // kBlnCellHeight;
*width = 0.0f;
*width_sd = 0.0f;;
return;
}
ASSERT_HOST(contains_unichar_id(unichar_id));
*min_width = unichars[unichar_id].properties.min_width;
*max_width = unichars[unichar_id].properties.max_width;
*width = unichars[unichar_id].properties.width;
*width_sd = unichars[unichar_id].properties.width_sd;
}
void set_width_range(UNICHAR_ID unichar_id, int min_width, int max_width) {
unichars[unichar_id].properties.min_width =
static_cast<inT16>(ClipToRange(min_width, 0, MAX_INT16));
unichars[unichar_id].properties.max_width =
static_cast<inT16>(ClipToRange(max_width, 0, MAX_INT16));
void set_width_stats(UNICHAR_ID unichar_id, float width, float width_sd) {
unichars[unichar_id].properties.width = width;
unichars[unichar_id].properties.width_sd = width_sd;
}
// Returns the range of the x-bearing of the given unichar in
// baseline-normalized coordinates, ie, where the baseline is
// kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight.
// (See normalis.h for the definitions).
void get_bearing_range(UNICHAR_ID unichar_id,
int* min_bearing, int* max_bearing) const {
// Returns the stats of the x-bearing (as mean, sd) of the given unichar
// relative to the median advance of all characters in the character set.
void get_bearing_stats(UNICHAR_ID unichar_id,
float* bearing, float* bearing_sd) const {
if (INVALID_UNICHAR_ID == unichar_id) {
*min_bearing = *max_bearing = 0;
*bearing = *bearing_sd = 0.0f;
return;
}
ASSERT_HOST(contains_unichar_id(unichar_id));
*min_bearing = unichars[unichar_id].properties.min_bearing;
*max_bearing = unichars[unichar_id].properties.max_bearing;
*bearing = unichars[unichar_id].properties.bearing;
*bearing_sd = unichars[unichar_id].properties.bearing_sd;
}
void set_bearing_range(UNICHAR_ID unichar_id,
int min_bearing, int max_bearing) {
unichars[unichar_id].properties.min_bearing =
static_cast<inT16>(ClipToRange(min_bearing, 0, MAX_INT16));
unichars[unichar_id].properties.max_bearing =
static_cast<inT16>(ClipToRange(max_bearing, 0, MAX_INT16));
void set_bearing_stats(UNICHAR_ID unichar_id,
float bearing, float bearing_sd) {
unichars[unichar_id].properties.bearing = bearing;
unichars[unichar_id].properties.bearing_sd = bearing_sd;
}
// Returns the range of the x-advance of the given unichar in
// baseline-normalized coordinates, ie, where the baseline is
// kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight.
// (See normalis.h for the definitions).
void get_advance_range(UNICHAR_ID unichar_id,
int* min_advance, int* max_advance) const {
// Returns the stats of the x-advance of the given unichar (as mean, sd)
// relative to the median advance of all characters in the character set.
void get_advance_stats(UNICHAR_ID unichar_id,
float* advance, float* advance_sd) const {
if (INVALID_UNICHAR_ID == unichar_id) {
*min_advance = *max_advance = 0;
*advance = *advance_sd = 0;
return;
}
ASSERT_HOST(contains_unichar_id(unichar_id));
*min_advance = unichars[unichar_id].properties.min_advance;
*max_advance = unichars[unichar_id].properties.max_advance;
*advance = unichars[unichar_id].properties.advance;
*advance_sd = unichars[unichar_id].properties.advance_sd;
}
void set_advance_range(UNICHAR_ID unichar_id,
int min_advance, int max_advance) {
unichars[unichar_id].properties.min_advance =
static_cast<inT16>(ClipToRange(min_advance, 0, MAX_INT16));
unichars[unichar_id].properties.max_advance =
static_cast<inT16>(ClipToRange(max_advance, 0, MAX_INT16));
void set_advance_stats(UNICHAR_ID unichar_id,
float advance, float advance_sd) {
unichars[unichar_id].properties.advance = advance;
unichars[unichar_id].properties.advance_sd = advance_sd;
}
// Returns true if the font metrics properties are empty.
bool PropertiesIncomplete(UNICHAR_ID unichar_id) const {
@ -873,8 +861,8 @@ class UNICHARSET {
void SetRangesOpen();
// Sets all ranges to empty. Used before expanding with font-based data.
void SetRangesEmpty();
// Returns true if any of the top/bottom/width/bearing/advance ranges is
// emtpy.
// Returns true if any of the top/bottom/width/bearing/advance ranges/stats
// is emtpy.
bool AnyRangeEmpty() const;
// Expands the ranges with the ranges from the src properties.
void ExpandRangesFrom(const UNICHAR_PROPERTIES& src);
@ -896,14 +884,14 @@ class UNICHARSET {
uinT8 max_bottom;
uinT8 min_top;
uinT8 max_top;
// Limits on the widths of bounding box, also in baseline-normalized coords.
inT16 min_width;
inT16 max_width;
// Limits on the x-bearing and advance, also in baseline-normalized coords.
inT16 min_bearing;
inT16 max_bearing;
inT16 min_advance;
inT16 max_advance;
// Statstics of the widths of bounding box, relative to the median advance.
float width;
float width_sd;
// Stats of the x-bearing and advance, also relative to the median advance.
float bearing;
float bearing_sd;
float advance;
float advance_sd;
int script_id;
UNICHAR_ID other_case; // id of the corresponding upper/lower case unichar
Direction direction; // direction of this unichar

File diff suppressed because it is too large Load Diff

View File

@ -30,8 +30,6 @@
#include <math.h>
#include <stdio.h>
#define MIN_INERTIA (0.00001)
/*----------------------------------------------------------------------------
Public Code
----------------------------------------------------------------------------*/
@ -475,71 +473,6 @@ void ComputeDirection(MFEDGEPT *Start,
Finish->PreviousDirection = Start->Direction;
} /* ComputeDirection */
/*---------------------------------------------------------------------------*/
void FinishOutlineStats(register OUTLINE_STATS *OutlineStats) {
/*
** Parameters:
** OutlineStats statistics about a set of outlines
** Globals: none
** Operation: Use the preliminary statistics accumulated in OutlineStats
** to compute the final statistics.
** (see Dan Johnson's Tesseract lab
** notebook #2, pgs. 74-78).
** Return: none
** Exceptions: none
** History: Fri Dec 14 10:13:36 1990, DSJ, Created.
*/
OutlineStats->x = 0.5 * OutlineStats->My / OutlineStats->L;
OutlineStats->y = 0.5 * OutlineStats->Mx / OutlineStats->L;
OutlineStats->Ix = (OutlineStats->Ix / 3.0 -
OutlineStats->y * OutlineStats->Mx +
OutlineStats->y * OutlineStats->y * OutlineStats->L);
OutlineStats->Iy = (OutlineStats->Iy / 3.0 -
OutlineStats->x * OutlineStats->My +
OutlineStats->x * OutlineStats->x * OutlineStats->L);
/* Ix and/or Iy could possibly be negative due to roundoff error */
if (OutlineStats->Ix < 0.0)
OutlineStats->Ix = MIN_INERTIA;
if (OutlineStats->Iy < 0.0)
OutlineStats->Iy = MIN_INERTIA;
OutlineStats->Rx = sqrt (OutlineStats->Ix / OutlineStats->L);
OutlineStats->Ry = sqrt (OutlineStats->Iy / OutlineStats->L);
OutlineStats->Mx *= 0.5;
OutlineStats->My *= 0.5;
} /* FinishOutlineStats */
/*---------------------------------------------------------------------------*/
void InitOutlineStats(OUTLINE_STATS *OutlineStats) {
/*
** Parameters:
** OutlineStats stats data structure to be initialized
** Globals: none
** Operation: Initialize the outline statistics data structure so
** that it is ready to start accumulating statistics.
** Return: none
** Exceptions: none
** History: Fri Dec 14 08:55:22 1990, DSJ, Created.
*/
OutlineStats->Mx = 0.0;
OutlineStats->My = 0.0;
OutlineStats->L = 0.0;
OutlineStats->x = 0.0;
OutlineStats->y = 0.0;
OutlineStats->Ix = 0.0;
OutlineStats->Iy = 0.0;
OutlineStats->Rx = 0.0;
OutlineStats->Ry = 0.0;
} /* InitOutlineStats */
/*---------------------------------------------------------------------------*/
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
/*
@ -569,51 +502,3 @@ MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
return (EdgePoint);
} /* NextDirectionChange */
/*---------------------------------------------------------------------------*/
void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats,
register FLOAT32 x1,
register FLOAT32 x2,
register FLOAT32 y1,
register FLOAT32 y2) {
/*
** Parameters:
** OutlineStats statistics to add this segment to
** x1, y1, x2, y2 segment to be added to statistics
** Globals: none
** Operation: This routine adds the statistics for the specified
** line segment to OutlineStats. The statistics that are
** kept are:
** sum of length of all segments
** sum of 2*Mx for all segments
** sum of 2*My for all segments
** sum of 2*Mx*(y1+y2) - L*y1*y2 for all segments
** sum of 2*My*(x1+x2) - L*x1*x2 for all segments
** These numbers, once collected can later be used to easily
** compute the center of mass, first and second moments,
** and radii of gyration. (see Dan Johnson's Tesseract lab
** notebook #2, pgs. 74-78).
** Return: none
** Exceptions: none
** History: Fri Dec 14 08:59:17 1990, DSJ, Created.
*/
register FLOAT64 L;
register FLOAT64 Mx2;
register FLOAT64 My2;
/* compute length of segment */
L = sqrt ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1));
OutlineStats->L += L;
/* compute 2Mx and 2My components */
Mx2 = L * (y1 + y2);
My2 = L * (x1 + x2);
OutlineStats->Mx += Mx2;
OutlineStats->My += My2;
/* compute second moment component */
OutlineStats->Ix += Mx2 * (y1 + y2) - L * y1 * y2;
OutlineStats->Iy += My2 * (x1 + x2) - L * x1 * x2;
} /* UpdateOutlineStats */

View File

@ -50,14 +50,6 @@ typedef enum {
outer, hole
} OUTLINETYPE;
typedef struct {
FLOAT64 Mx, My; /* first moment of all outlines */
FLOAT64 L; /* total length of all outlines */
FLOAT64 x, y; /* center of mass of all outlines */
FLOAT64 Ix, Iy; /* second moments about center of mass axes */
FLOAT64 Rx, Ry; /* radius of gyration about center of mass axes */
} OUTLINE_STATS;
typedef enum {
baseline, character
} NORM_METHOD;
@ -127,16 +119,6 @@ void ComputeDirection(MFEDGEPT *Start,
FLOAT32 MinSlope,
FLOAT32 MaxSlope);
void FinishOutlineStats(register OUTLINE_STATS *OutlineStats);
void InitOutlineStats(OUTLINE_STATS *OutlineStats);
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint);
void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats,
register FLOAT32 x1,
register FLOAT32 x2,
register FLOAT32 y1,
register FLOAT32 y2);
#endif

View File

@ -176,10 +176,9 @@ class TessLangModEdge : public LangModEdge {
// returns the Hash value of the edge. Used by the SearchNode hash table
// to quickly lookup exisiting edges to converge during search
inline unsigned int Hash() const {
return static_cast<unsigned int>(((start_edge_ | end_edge_) ^
((reinterpret_cast<uintptr_t>(dawg_)))) ^
((unsigned int)edge_mask_) ^
class_id_);
return static_cast<unsigned int>(
((start_edge_ | end_edge_) ^ ((reinterpret_cast<uintptr_t>(dawg_)))) ^
((unsigned int)edge_mask_) ^ class_id_);
}
// A verbal description of the edge: Used by visualizers

View File

@ -2669,7 +2669,8 @@ PERF_COUNT_START("HistogramRectOCL")
int numThreads = block_size*numWorkGroups;
size_t local_work_size[] = {static_cast<size_t>(block_size)};
size_t global_work_size[] = {static_cast<size_t>(numThreads)};
size_t red_global_work_size[] = {static_cast<size_t>(block_size*kHistogramSize*bytes_per_pixel)};
size_t red_global_work_size[] = {
static_cast<size_t>(block_size * kHistogramSize * bytes_per_pixel)};
/* map histogramAllChannels as write only */
int numBins = kHistogramSize*bytes_per_pixel*numWorkGroups;

View File

@ -152,11 +152,11 @@ InputBuffer::~InputBuffer() {
}
}
bool InputBuffer::Read(string *out) {
char buf[BUFSIZ+1];
bool InputBuffer::Read(string* out) {
char buf[BUFSIZ + 1];
int l;
while((l = fread(buf, 1, BUFSIZ, stream_)) > 0) {
if(ferror(stream_)) {
while ((l = fread(buf, 1, BUFSIZ, stream_)) > 0) {
if (ferror(stream_)) {
clearerr(stream_);
return false;
}

View File

@ -140,6 +140,14 @@ void StringRenderer::set_resolution(const int resolution) {
font_.set_resolution(resolution);
}
void StringRenderer::set_underline_start_prob(const double frac) {
underline_start_prob_ = min(max(frac, 0.0), 1.0);
}
void StringRenderer::set_underline_continuation_prob(const double frac) {
underline_continuation_prob_ = min(max(frac, 0.0), 1.0);
}
StringRenderer::~StringRenderer() {
ClearBoxes();
FreePangoCairo();

View File

@ -83,14 +83,10 @@ class StringRenderer {
// Sets the probability (value in [0, 1]) of starting to render a word with an
// underline. This implementation consider words to be space-delimited
// sequences of characters.
void set_underline_start_prob(const double frac) {
underline_start_prob_ = std::min(std::max(frac, 0.0), 1.0);
}
void set_underline_start_prob(const double frac);
// Set the probability (value in [0, 1]) of continuing a started underline to
// the next word.
void set_underline_continuation_prob(const double frac) {
underline_continuation_prob_ = std::min(std::max(frac, 0.0), 1.0);
}
void set_underline_continuation_prob(const double frac);
void set_underline_style(const PangoUnderline style) {
underline_style_ = style;
}

View File

@ -184,9 +184,9 @@ parse_flags() {
TRAINING_TEXT=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.training_text
fi
if [[ -z ${WORDLIST_FILE} ]]; then
WORDLIST_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.wordlist.clean
WORDLIST_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.wordlist
fi
WORD_BIGRAMS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.word.bigrams.clean
WORD_BIGRAMS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.word.bigrams
NUMBERS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.numbers
PUNC_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.punc
BIGRAM_FREQS_FILE=${TRAINING_TEXT}.bigram_freqs