mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-07 18:02:40 +08:00
Fixed issues 1081-1090
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1046 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
790a3da22f
commit
2fcea93846
@ -1428,10 +1428,10 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
|
||||
break;
|
||||
}
|
||||
hocr_str += ">";
|
||||
const char *font_name;
|
||||
bool bold, italic, underlined, monospace, serif, smallcaps;
|
||||
int pointsize, font_id;
|
||||
font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
|
||||
// TODO(rays): Is hOCR interested in the font name?
|
||||
(void) res_it->WordFontAttributes(&bold, &italic, &underlined,
|
||||
&monospace, &serif, &smallcaps,
|
||||
&pointsize, &font_id);
|
||||
bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
|
||||
|
@ -659,12 +659,6 @@ class TESS_API TessBaseAPI {
|
||||
*/
|
||||
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
|
||||
|
||||
|
||||
/** Sets Dict::params_model_classify_ function to point to the given
|
||||
* function.
|
||||
*/
|
||||
void SetParamsModelClassifyFunc(ParamsModelClassifyFunc f);
|
||||
|
||||
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
|
||||
void SetFillLatticeFunc(FillLatticeFunc f);
|
||||
|
||||
@ -704,9 +698,6 @@ class TESS_API TessBaseAPI {
|
||||
/** Return the number of dawgs loaded into tesseract_ object. */
|
||||
int NumDawgs() const;
|
||||
|
||||
/** Return the language used in the last valid initialization. */
|
||||
const char* GetLastInitLanguage() const;
|
||||
|
||||
/** Returns a ROW object created from the input row specification. */
|
||||
static ROW *MakeTessOCRRow(float baseline, float xheight,
|
||||
float descender, float ascender);
|
||||
|
@ -322,7 +322,7 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
|
||||
|
||||
/* Walk rows in block testing for row rejection */
|
||||
row_no = 0;
|
||||
while ((word = page_res_it.word()) != NULL &&
|
||||
while (page_res_it.word() != NULL &&
|
||||
page_res_it.block() == current_block) {
|
||||
current_row = page_res_it.row();
|
||||
row_no++;
|
||||
|
@ -96,7 +96,6 @@ class ScriptDetector {
|
||||
ScriptDetector(const GenericVector<int>* allowed_scripts,
|
||||
OSResults* osr, tesseract::Tesseract* tess);
|
||||
void detect_blob(BLOB_CHOICE_LIST* scores);
|
||||
void get_script() ;
|
||||
bool must_stop(int orientation);
|
||||
private:
|
||||
OSResults* osr_;
|
||||
|
@ -728,15 +728,14 @@ BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row,
|
||||
* Normalize word and display in word window
|
||||
*/
|
||||
BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
|
||||
TWERD *bln_word = word_res->chopped_word;
|
||||
if (bln_word == NULL) {
|
||||
if (word_res->chopped_word == NULL) {
|
||||
// Setup word normalization parameters.
|
||||
word_res->SetupForRecognition(unicharset, this, BestPix(),
|
||||
tessedit_ocr_engine_mode, NULL,
|
||||
classify_bln_numeric_mode,
|
||||
textord_use_cjk_fp_model,
|
||||
poly_allow_detailed_fx,
|
||||
row, block);
|
||||
bln_word = word_res->chopped_word;
|
||||
}
|
||||
bln_word_window_handle()->Clear();
|
||||
display_bln_lines(bln_word_window_handle(), ScrollView::CYAN,
|
||||
|
@ -130,7 +130,8 @@ bool ParseBoxFileStr(const char* boxfile_str, int* page_number,
|
||||
// Test for long space-delimited string label.
|
||||
if (strcmp(uch, kMultiBlobLabelCode) == 0 &&
|
||||
(buffptr = strchr(buffptr, '#')) != NULL) {
|
||||
strncpy(uch, buffptr + 1, kBoxReadBufSize);
|
||||
strncpy(uch, buffptr + 1, kBoxReadBufSize - 1);
|
||||
uch[kBoxReadBufSize - 1] = '\0'; // Prevent buffer overrun.
|
||||
chomp_string(uch);
|
||||
uch_len = strlen(uch);
|
||||
}
|
||||
|
@ -1146,15 +1146,12 @@ void Classify::ExpandShapesAndApplyCorrections(
|
||||
if (classes != NULL) {
|
||||
// Adapted result.
|
||||
fontinfo_id = GetFontinfoId(classes[class_id], int_result.Config);
|
||||
if (int_result.Config2 >= 0)
|
||||
fontinfo_id2 = GetFontinfoId(classes[class_id], int_result.Config2);
|
||||
} else {
|
||||
// Pre-trained result.
|
||||
fontinfo_id = ClassAndConfigIDToFontOrShapeID(class_id, int_result.Config);
|
||||
if (int_result.Config2 >= 0) {
|
||||
fontinfo_id2 = ClassAndConfigIDToFontOrShapeID(class_id,
|
||||
int_result.Config2);
|
||||
}
|
||||
if (shape_table_ != NULL) {
|
||||
// Actually fontinfo_id is an index into the shape_table_ and it
|
||||
// contains a list of unchar_id/font_id pairs.
|
||||
|
@ -42,6 +42,8 @@ struct INT_RESULT_STRUCT {
|
||||
INT_RESULT_STRUCT() : Rating(0.0f), Config(0), Config2(0), FeatureMisses(0) {}
|
||||
|
||||
FLOAT32 Rating;
|
||||
// TODO(rays) It might be desirable for these to be able to represent a
|
||||
// null config.
|
||||
uinT8 Config;
|
||||
uinT8 Config2;
|
||||
uinT16 FeatureMisses;
|
||||
|
@ -462,6 +462,7 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) {
|
||||
if (fscanf(fontinfo_file, "%s %d", kerned_uch, &x_gap) != 2) {
|
||||
tprintf("Bad format of font spacing file %s\n", filename);
|
||||
fclose(fontinfo_file);
|
||||
delete spacing;
|
||||
return false;
|
||||
}
|
||||
if (!valid || !unicharset_.contains_unichar(kerned_uch)) continue;
|
||||
|
@ -223,6 +223,7 @@ Bmp8 * Bmp8::FromCharDumpFile(CachedFile *fp) {
|
||||
|
||||
if (bmp_obj->LoadFromCharDumpFile(fp) == false) {
|
||||
delete bmp_obj;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return bmp_obj;
|
||||
@ -320,6 +321,7 @@ Bmp8 * Bmp8::FromCharDumpFile(FILE *fp) {
|
||||
|
||||
if (bmp_obj->LoadFromCharDumpFile(fp) == false) {
|
||||
delete bmp_obj;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return bmp_obj;
|
||||
|
@ -83,6 +83,7 @@ CharBigrams *CharBigrams::Create(const string &data_file_path,
|
||||
if (sscanf(str_vec[big].c_str(), "%d %x %x", &cnt, &ch1, &ch2) != 3) {
|
||||
fprintf(stderr, "Cube ERROR (CharBigrams::Create): invalid format "
|
||||
"reading line: %s\n", str_vec[big].c_str());
|
||||
delete char_bigrams_obj;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -117,6 +118,7 @@ CharBigrams *CharBigrams::Create(const string &data_file_path,
|
||||
if (bigram == NULL) {
|
||||
fprintf(stderr, "Cube ERROR (CharBigrams::Create): error allocating "
|
||||
"memory for bigram.\n");
|
||||
delete char_bigrams_obj;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -280,7 +280,11 @@ double StructuredTable::CalculateCellFilledPercentage(int row, int column) {
|
||||
if (text->IsTextType())
|
||||
area_covered += text->bounding_box().intersection(kCellBox).area();
|
||||
}
|
||||
return MIN(1.0, area_covered / kCellBox.area());
|
||||
const inT32 current_area = kCellBox.area();
|
||||
if (current_area == 0) {
|
||||
return 1.0;
|
||||
}
|
||||
return MIN(1.0, area_covered / current_area);
|
||||
}
|
||||
|
||||
void StructuredTable::Display(ScrollView* window, ScrollView::Color color) {
|
||||
|
@ -1037,6 +1037,7 @@ BOOL8 fixed_pitch_row(TO_ROW *row, // row to do
|
||||
break;
|
||||
case PITCH_MAYBE_FIXED:
|
||||
res_string = "MF";
|
||||
break;
|
||||
default:
|
||||
res_string = "??";
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user