mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-10 20:23:12 +08:00
Fixed issues 1081-1090
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1046 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
790a3da22f
commit
2fcea93846
@ -1428,10 +1428,10 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
hocr_str += ">";
|
hocr_str += ">";
|
||||||
const char *font_name;
|
|
||||||
bool bold, italic, underlined, monospace, serif, smallcaps;
|
bool bold, italic, underlined, monospace, serif, smallcaps;
|
||||||
int pointsize, font_id;
|
int pointsize, font_id;
|
||||||
font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
|
// TODO(rays): Is hOCR interested in the font name?
|
||||||
|
(void) res_it->WordFontAttributes(&bold, &italic, &underlined,
|
||||||
&monospace, &serif, &smallcaps,
|
&monospace, &serif, &smallcaps,
|
||||||
&pointsize, &font_id);
|
&pointsize, &font_id);
|
||||||
bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
|
bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
|
||||||
|
@ -659,12 +659,6 @@ class TESS_API TessBaseAPI {
|
|||||||
*/
|
*/
|
||||||
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
|
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
|
||||||
|
|
||||||
|
|
||||||
/** Sets Dict::params_model_classify_ function to point to the given
|
|
||||||
* function.
|
|
||||||
*/
|
|
||||||
void SetParamsModelClassifyFunc(ParamsModelClassifyFunc f);
|
|
||||||
|
|
||||||
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
|
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
|
||||||
void SetFillLatticeFunc(FillLatticeFunc f);
|
void SetFillLatticeFunc(FillLatticeFunc f);
|
||||||
|
|
||||||
@ -704,9 +698,6 @@ class TESS_API TessBaseAPI {
|
|||||||
/** Return the number of dawgs loaded into tesseract_ object. */
|
/** Return the number of dawgs loaded into tesseract_ object. */
|
||||||
int NumDawgs() const;
|
int NumDawgs() const;
|
||||||
|
|
||||||
/** Return the language used in the last valid initialization. */
|
|
||||||
const char* GetLastInitLanguage() const;
|
|
||||||
|
|
||||||
/** Returns a ROW object created from the input row specification. */
|
/** Returns a ROW object created from the input row specification. */
|
||||||
static ROW *MakeTessOCRRow(float baseline, float xheight,
|
static ROW *MakeTessOCRRow(float baseline, float xheight,
|
||||||
float descender, float ascender);
|
float descender, float ascender);
|
||||||
|
@ -322,7 +322,7 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
|
|||||||
|
|
||||||
/* Walk rows in block testing for row rejection */
|
/* Walk rows in block testing for row rejection */
|
||||||
row_no = 0;
|
row_no = 0;
|
||||||
while ((word = page_res_it.word()) != NULL &&
|
while (page_res_it.word() != NULL &&
|
||||||
page_res_it.block() == current_block) {
|
page_res_it.block() == current_block) {
|
||||||
current_row = page_res_it.row();
|
current_row = page_res_it.row();
|
||||||
row_no++;
|
row_no++;
|
||||||
|
@ -96,7 +96,6 @@ class ScriptDetector {
|
|||||||
ScriptDetector(const GenericVector<int>* allowed_scripts,
|
ScriptDetector(const GenericVector<int>* allowed_scripts,
|
||||||
OSResults* osr, tesseract::Tesseract* tess);
|
OSResults* osr, tesseract::Tesseract* tess);
|
||||||
void detect_blob(BLOB_CHOICE_LIST* scores);
|
void detect_blob(BLOB_CHOICE_LIST* scores);
|
||||||
void get_script() ;
|
|
||||||
bool must_stop(int orientation);
|
bool must_stop(int orientation);
|
||||||
private:
|
private:
|
||||||
OSResults* osr_;
|
OSResults* osr_;
|
||||||
|
@ -728,15 +728,14 @@ BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row,
|
|||||||
* Normalize word and display in word window
|
* Normalize word and display in word window
|
||||||
*/
|
*/
|
||||||
BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
|
BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
|
||||||
TWERD *bln_word = word_res->chopped_word;
|
if (word_res->chopped_word == NULL) {
|
||||||
if (bln_word == NULL) {
|
// Setup word normalization parameters.
|
||||||
word_res->SetupForRecognition(unicharset, this, BestPix(),
|
word_res->SetupForRecognition(unicharset, this, BestPix(),
|
||||||
tessedit_ocr_engine_mode, NULL,
|
tessedit_ocr_engine_mode, NULL,
|
||||||
classify_bln_numeric_mode,
|
classify_bln_numeric_mode,
|
||||||
textord_use_cjk_fp_model,
|
textord_use_cjk_fp_model,
|
||||||
poly_allow_detailed_fx,
|
poly_allow_detailed_fx,
|
||||||
row, block);
|
row, block);
|
||||||
bln_word = word_res->chopped_word;
|
|
||||||
}
|
}
|
||||||
bln_word_window_handle()->Clear();
|
bln_word_window_handle()->Clear();
|
||||||
display_bln_lines(bln_word_window_handle(), ScrollView::CYAN,
|
display_bln_lines(bln_word_window_handle(), ScrollView::CYAN,
|
||||||
|
@ -130,7 +130,8 @@ bool ParseBoxFileStr(const char* boxfile_str, int* page_number,
|
|||||||
// Test for long space-delimited string label.
|
// Test for long space-delimited string label.
|
||||||
if (strcmp(uch, kMultiBlobLabelCode) == 0 &&
|
if (strcmp(uch, kMultiBlobLabelCode) == 0 &&
|
||||||
(buffptr = strchr(buffptr, '#')) != NULL) {
|
(buffptr = strchr(buffptr, '#')) != NULL) {
|
||||||
strncpy(uch, buffptr + 1, kBoxReadBufSize);
|
strncpy(uch, buffptr + 1, kBoxReadBufSize - 1);
|
||||||
|
uch[kBoxReadBufSize - 1] = '\0'; // Prevent buffer overrun.
|
||||||
chomp_string(uch);
|
chomp_string(uch);
|
||||||
uch_len = strlen(uch);
|
uch_len = strlen(uch);
|
||||||
}
|
}
|
||||||
|
@ -1146,15 +1146,12 @@ void Classify::ExpandShapesAndApplyCorrections(
|
|||||||
if (classes != NULL) {
|
if (classes != NULL) {
|
||||||
// Adapted result.
|
// Adapted result.
|
||||||
fontinfo_id = GetFontinfoId(classes[class_id], int_result.Config);
|
fontinfo_id = GetFontinfoId(classes[class_id], int_result.Config);
|
||||||
if (int_result.Config2 >= 0)
|
|
||||||
fontinfo_id2 = GetFontinfoId(classes[class_id], int_result.Config2);
|
fontinfo_id2 = GetFontinfoId(classes[class_id], int_result.Config2);
|
||||||
} else {
|
} else {
|
||||||
// Pre-trained result.
|
// Pre-trained result.
|
||||||
fontinfo_id = ClassAndConfigIDToFontOrShapeID(class_id, int_result.Config);
|
fontinfo_id = ClassAndConfigIDToFontOrShapeID(class_id, int_result.Config);
|
||||||
if (int_result.Config2 >= 0) {
|
|
||||||
fontinfo_id2 = ClassAndConfigIDToFontOrShapeID(class_id,
|
fontinfo_id2 = ClassAndConfigIDToFontOrShapeID(class_id,
|
||||||
int_result.Config2);
|
int_result.Config2);
|
||||||
}
|
|
||||||
if (shape_table_ != NULL) {
|
if (shape_table_ != NULL) {
|
||||||
// Actually fontinfo_id is an index into the shape_table_ and it
|
// Actually fontinfo_id is an index into the shape_table_ and it
|
||||||
// contains a list of unchar_id/font_id pairs.
|
// contains a list of unchar_id/font_id pairs.
|
||||||
|
@ -42,6 +42,8 @@ struct INT_RESULT_STRUCT {
|
|||||||
INT_RESULT_STRUCT() : Rating(0.0f), Config(0), Config2(0), FeatureMisses(0) {}
|
INT_RESULT_STRUCT() : Rating(0.0f), Config(0), Config2(0), FeatureMisses(0) {}
|
||||||
|
|
||||||
FLOAT32 Rating;
|
FLOAT32 Rating;
|
||||||
|
// TODO(rays) It might be desirable for these to be able to represent a
|
||||||
|
// null config.
|
||||||
uinT8 Config;
|
uinT8 Config;
|
||||||
uinT8 Config2;
|
uinT8 Config2;
|
||||||
uinT16 FeatureMisses;
|
uinT16 FeatureMisses;
|
||||||
|
@ -462,6 +462,7 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) {
|
|||||||
if (fscanf(fontinfo_file, "%s %d", kerned_uch, &x_gap) != 2) {
|
if (fscanf(fontinfo_file, "%s %d", kerned_uch, &x_gap) != 2) {
|
||||||
tprintf("Bad format of font spacing file %s\n", filename);
|
tprintf("Bad format of font spacing file %s\n", filename);
|
||||||
fclose(fontinfo_file);
|
fclose(fontinfo_file);
|
||||||
|
delete spacing;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!valid || !unicharset_.contains_unichar(kerned_uch)) continue;
|
if (!valid || !unicharset_.contains_unichar(kerned_uch)) continue;
|
||||||
|
@ -223,6 +223,7 @@ Bmp8 * Bmp8::FromCharDumpFile(CachedFile *fp) {
|
|||||||
|
|
||||||
if (bmp_obj->LoadFromCharDumpFile(fp) == false) {
|
if (bmp_obj->LoadFromCharDumpFile(fp) == false) {
|
||||||
delete bmp_obj;
|
delete bmp_obj;
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return bmp_obj;
|
return bmp_obj;
|
||||||
@ -320,6 +321,7 @@ Bmp8 * Bmp8::FromCharDumpFile(FILE *fp) {
|
|||||||
|
|
||||||
if (bmp_obj->LoadFromCharDumpFile(fp) == false) {
|
if (bmp_obj->LoadFromCharDumpFile(fp) == false) {
|
||||||
delete bmp_obj;
|
delete bmp_obj;
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return bmp_obj;
|
return bmp_obj;
|
||||||
|
@ -83,6 +83,7 @@ CharBigrams *CharBigrams::Create(const string &data_file_path,
|
|||||||
if (sscanf(str_vec[big].c_str(), "%d %x %x", &cnt, &ch1, &ch2) != 3) {
|
if (sscanf(str_vec[big].c_str(), "%d %x %x", &cnt, &ch1, &ch2) != 3) {
|
||||||
fprintf(stderr, "Cube ERROR (CharBigrams::Create): invalid format "
|
fprintf(stderr, "Cube ERROR (CharBigrams::Create): invalid format "
|
||||||
"reading line: %s\n", str_vec[big].c_str());
|
"reading line: %s\n", str_vec[big].c_str());
|
||||||
|
delete char_bigrams_obj;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,6 +118,7 @@ CharBigrams *CharBigrams::Create(const string &data_file_path,
|
|||||||
if (bigram == NULL) {
|
if (bigram == NULL) {
|
||||||
fprintf(stderr, "Cube ERROR (CharBigrams::Create): error allocating "
|
fprintf(stderr, "Cube ERROR (CharBigrams::Create): error allocating "
|
||||||
"memory for bigram.\n");
|
"memory for bigram.\n");
|
||||||
|
delete char_bigrams_obj;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -280,7 +280,11 @@ double StructuredTable::CalculateCellFilledPercentage(int row, int column) {
|
|||||||
if (text->IsTextType())
|
if (text->IsTextType())
|
||||||
area_covered += text->bounding_box().intersection(kCellBox).area();
|
area_covered += text->bounding_box().intersection(kCellBox).area();
|
||||||
}
|
}
|
||||||
return MIN(1.0, area_covered / kCellBox.area());
|
const inT32 current_area = kCellBox.area();
|
||||||
|
if (current_area == 0) {
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
|
return MIN(1.0, area_covered / current_area);
|
||||||
}
|
}
|
||||||
|
|
||||||
void StructuredTable::Display(ScrollView* window, ScrollView::Color color) {
|
void StructuredTable::Display(ScrollView* window, ScrollView::Color color) {
|
||||||
|
@ -1037,6 +1037,7 @@ BOOL8 fixed_pitch_row(TO_ROW *row, // row to do
|
|||||||
break;
|
break;
|
||||||
case PITCH_MAYBE_FIXED:
|
case PITCH_MAYBE_FIXED:
|
||||||
res_string = "MF";
|
res_string = "MF";
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
res_string = "??";
|
res_string = "??";
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user