diff --git a/api/baseapi.cpp b/api/baseapi.cpp index 87d00ac25..5be3232ea 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -1428,12 +1428,12 @@ char* TessBaseAPI::GetHOCRText(int page_number) { break; } hocr_str += ">"; - const char *font_name; bool bold, italic, underlined, monospace, serif, smallcaps; int pointsize, font_id; - font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, - &monospace, &serif, &smallcaps, - &pointsize, &font_id); + // TODO(rays): Is hOCR interested in the font name? + (void) res_it->WordFontAttributes(&bold, &italic, &underlined, + &monospace, &serif, &smallcaps, + &pointsize, &font_id); bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD); bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD); bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); diff --git a/api/baseapi.h b/api/baseapi.h index 835625dba..40e77632b 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -659,12 +659,6 @@ class TESS_API TessBaseAPI { */ void SetProbabilityInContextFunc(ProbabilityInContextFunc f); - - /** Sets Dict::params_model_classify_ function to point to the given - * function. - */ - void SetParamsModelClassifyFunc(ParamsModelClassifyFunc f); - /** Sets Wordrec::fill_lattice_ function to point to the given function. */ void SetFillLatticeFunc(FillLatticeFunc f); @@ -704,9 +698,6 @@ class TESS_API TessBaseAPI { /** Return the number of dawgs loaded into tesseract_ object. */ int NumDawgs() const; - /** Return the language used in the last valid initialization. */ - const char* GetLastInitLanguage() const; - /** Returns a ROW object created from the input row specification. */ static ROW *MakeTessOCRRow(float baseline, float xheight, float descender, float ascender); diff --git a/ccmain/docqual.cpp b/ccmain/docqual.cpp index 16141d82b..30bf193cc 100644 --- a/ccmain/docqual.cpp +++ b/ccmain/docqual.cpp @@ -322,7 +322,7 @@ void Tesseract::doc_and_block_rejection( //reject big chunks /* Walk rows in block testing for row rejection */ row_no = 0; - while ((word = page_res_it.word()) != NULL && + while (page_res_it.word() != NULL && page_res_it.block() == current_block) { current_row = page_res_it.row(); row_no++; diff --git a/ccmain/osdetect.h b/ccmain/osdetect.h index 1972c3095..dec07d584 100644 --- a/ccmain/osdetect.h +++ b/ccmain/osdetect.h @@ -96,7 +96,6 @@ class ScriptDetector { ScriptDetector(const GenericVector* allowed_scripts, OSResults* osr, tesseract::Tesseract* tess); void detect_blob(BLOB_CHOICE_LIST* scores); - void get_script() ; bool must_stop(int orientation); private: OSResults* osr_; diff --git a/ccmain/pgedit.cpp b/ccmain/pgedit.cpp index 9a9de439d..99ab51468 100644 --- a/ccmain/pgedit.cpp +++ b/ccmain/pgedit.cpp @@ -728,15 +728,14 @@ BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row, * Normalize word and display in word window */ BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) { - TWERD *bln_word = word_res->chopped_word; - if (bln_word == NULL) { + if (word_res->chopped_word == NULL) { + // Setup word normalization parameters. word_res->SetupForRecognition(unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL, classify_bln_numeric_mode, textord_use_cjk_fp_model, poly_allow_detailed_fx, row, block); - bln_word = word_res->chopped_word; } bln_word_window_handle()->Clear(); display_bln_lines(bln_word_window_handle(), ScrollView::CYAN, diff --git a/ccstruct/boxread.cpp b/ccstruct/boxread.cpp index cdf63e535..2dcadbed1 100644 --- a/ccstruct/boxread.cpp +++ b/ccstruct/boxread.cpp @@ -130,7 +130,8 @@ bool ParseBoxFileStr(const char* boxfile_str, int* page_number, // Test for long space-delimited string label. if (strcmp(uch, kMultiBlobLabelCode) == 0 && (buffptr = strchr(buffptr, '#')) != NULL) { - strncpy(uch, buffptr + 1, kBoxReadBufSize); + strncpy(uch, buffptr + 1, kBoxReadBufSize - 1); + uch[kBoxReadBufSize - 1] = '\0'; // Prevent buffer overrun. chomp_string(uch); uch_len = strlen(uch); } diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index a4dbed9d9..1aef80613 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -1146,15 +1146,12 @@ void Classify::ExpandShapesAndApplyCorrections( if (classes != NULL) { // Adapted result. fontinfo_id = GetFontinfoId(classes[class_id], int_result.Config); - if (int_result.Config2 >= 0) - fontinfo_id2 = GetFontinfoId(classes[class_id], int_result.Config2); + fontinfo_id2 = GetFontinfoId(classes[class_id], int_result.Config2); } else { // Pre-trained result. fontinfo_id = ClassAndConfigIDToFontOrShapeID(class_id, int_result.Config); - if (int_result.Config2 >= 0) { - fontinfo_id2 = ClassAndConfigIDToFontOrShapeID(class_id, - int_result.Config2); - } + fontinfo_id2 = ClassAndConfigIDToFontOrShapeID(class_id, + int_result.Config2); if (shape_table_ != NULL) { // Actually fontinfo_id is an index into the shape_table_ and it // contains a list of unchar_id/font_id pairs. diff --git a/classify/intmatcher.h b/classify/intmatcher.h index d2d4c7e7c..8df6d6fdb 100644 --- a/classify/intmatcher.h +++ b/classify/intmatcher.h @@ -42,6 +42,8 @@ struct INT_RESULT_STRUCT { INT_RESULT_STRUCT() : Rating(0.0f), Config(0), Config2(0), FeatureMisses(0) {} FLOAT32 Rating; + // TODO(rays) It might be desirable for these to be able to represent a + // null config. uinT8 Config; uinT8 Config2; uinT16 FeatureMisses; diff --git a/classify/mastertrainer.cpp b/classify/mastertrainer.cpp index 67c0e62c0..965e996a1 100644 --- a/classify/mastertrainer.cpp +++ b/classify/mastertrainer.cpp @@ -462,6 +462,7 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) { if (fscanf(fontinfo_file, "%s %d", kerned_uch, &x_gap) != 2) { tprintf("Bad format of font spacing file %s\n", filename); fclose(fontinfo_file); + delete spacing; return false; } if (!valid || !unicharset_.contains_unichar(kerned_uch)) continue; diff --git a/cube/bmp_8.cpp b/cube/bmp_8.cpp index dba957a99..c2d86b5ae 100644 --- a/cube/bmp_8.cpp +++ b/cube/bmp_8.cpp @@ -223,6 +223,7 @@ Bmp8 * Bmp8::FromCharDumpFile(CachedFile *fp) { if (bmp_obj->LoadFromCharDumpFile(fp) == false) { delete bmp_obj; + return NULL; } return bmp_obj; @@ -320,6 +321,7 @@ Bmp8 * Bmp8::FromCharDumpFile(FILE *fp) { if (bmp_obj->LoadFromCharDumpFile(fp) == false) { delete bmp_obj; + return NULL; } return bmp_obj; diff --git a/cube/char_bigrams.cpp b/cube/char_bigrams.cpp index b565a2f44..b36b1f6cd 100644 --- a/cube/char_bigrams.cpp +++ b/cube/char_bigrams.cpp @@ -83,6 +83,7 @@ CharBigrams *CharBigrams::Create(const string &data_file_path, if (sscanf(str_vec[big].c_str(), "%d %x %x", &cnt, &ch1, &ch2) != 3) { fprintf(stderr, "Cube ERROR (CharBigrams::Create): invalid format " "reading line: %s\n", str_vec[big].c_str()); + delete char_bigrams_obj; return NULL; } @@ -117,6 +118,7 @@ CharBigrams *CharBigrams::Create(const string &data_file_path, if (bigram == NULL) { fprintf(stderr, "Cube ERROR (CharBigrams::Create): error allocating " "memory for bigram.\n"); + delete char_bigrams_obj; return NULL; } diff --git a/textord/tablerecog.cpp b/textord/tablerecog.cpp index 09d84c788..598436aaf 100644 --- a/textord/tablerecog.cpp +++ b/textord/tablerecog.cpp @@ -280,7 +280,11 @@ double StructuredTable::CalculateCellFilledPercentage(int row, int column) { if (text->IsTextType()) area_covered += text->bounding_box().intersection(kCellBox).area(); } - return MIN(1.0, area_covered / kCellBox.area()); + const inT32 current_area = kCellBox.area(); + if (current_area == 0) { + return 1.0; + } + return MIN(1.0, area_covered / current_area); } void StructuredTable::Display(ScrollView* window, ScrollView::Color color) { diff --git a/textord/topitch.cpp b/textord/topitch.cpp index aa3b4aa80..78fcf77d3 100644 --- a/textord/topitch.cpp +++ b/textord/topitch.cpp @@ -1037,6 +1037,7 @@ BOOL8 fixed_pitch_row(TO_ROW *row, // row to do break; case PITCH_MAYBE_FIXED: res_string = "MF"; + break; default: res_string = "??"; }