Replace remaining STRING by std::string in src/ccmain

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2021-03-14 10:48:06 +01:00
parent d7823a71c2
commit db9f963411
21 changed files with 112 additions and 135 deletions

View File

@ -1658,7 +1658,7 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(const UNICHARSET &char_se
/* Single Leading punctuation char*/
if (s[offset] != '\0' && STRING(chs_leading_punct).contains(s[offset]))
if (s[offset] != '\0' && chs_leading_punct.contains(s[offset]))
offset += lengths[i++];
leading_punct_count = i;
@ -1705,10 +1705,10 @@ Allow a single hyphen in a lower case word
}
/* Up to two different, constrained trailing punctuation chars */
if (lengths[i] == 1 && s[offset] != '\0' && STRING(chs_trailing_punct1).contains(s[offset]))
if (lengths[i] == 1 && s[offset] != '\0' && chs_trailing_punct1.contains(s[offset]))
offset += lengths[i++];
if (lengths[i] == 1 && s[offset] != '\0' && i > 0 && s[offset - lengths[i - 1]] != s[offset] &&
STRING(chs_trailing_punct2).contains(s[offset]))
chs_trailing_punct2.contains(s[offset]))
offset += lengths[i++];
if (s[offset] != '\0')

View File

@ -107,9 +107,9 @@ void Tesseract::unrej_good_chs(WERD_RES *word) {
int16_t Tesseract::count_outline_errs(char c, int16_t outline_count) {
int expected_outline_count;
if (STRING(outlines_odd).contains(c))
if (outlines_odd.contains(c))
return 0; // Don't use this char
else if (STRING(outlines_2).contains(c))
else if (outlines_2.contains(c))
expected_outline_count = 2;
else
expected_outline_count = 1;

View File

@ -216,7 +216,7 @@ void EquationDetect::IdentifySpecialText(BLOBNBOX *blobnbox, const int height_th
BlobSpecialTextType EquationDetect::EstimateTypeForUnichar(const UNICHARSET &unicharset,
const UNICHAR_ID id) const {
const STRING s = unicharset.id_to_unichar(id);
const std::string s = unicharset.id_to_unichar(id);
if (unicharset.get_isalpha(id)) {
return BSTT_NONE;
}
@ -237,8 +237,8 @@ BlobSpecialTextType EquationDetect::EstimateTypeForUnichar(const UNICHARSET &uni
// Check if it is digit. In addition to the isdigit attribute, we also check
// if this character belongs to those likely to be confused with a digit.
static const STRING kDigitsChars = "|";
if (unicharset.get_isdigit(id) || (s.length() == 1 && kDigitsChars.contains(s[0]))) {
static const char kDigitsChars[] = "|";
if (unicharset.get_isdigit(id) || (s.length() == 1 && strchr(kDigitsChars, s[0]) != nullptr)) {
return BSTT_DIGIT;
} else {
return BSTT_MATH;
@ -286,8 +286,8 @@ void EquationDetect::IdentifySpecialText() {
lang_tesseract_->classify_integer_matcher_multiplier.set_value(classify_integer_matcher);
if (equationdetect_save_spt_image) { // For debug.
STRING outfile;
GetOutputTiffName("_spt", &outfile);
std::string outfile;
GetOutputTiffName("_spt", outfile);
PaintSpecialTexts(outfile);
}
}
@ -351,11 +351,11 @@ int EquationDetect::FindEquationParts(ColPartitionGrid *part_grid, ColPartitionS
part_grid_ = part_grid;
best_columns_ = best_columns;
resolution_ = lang_tesseract_->source_resolution();
STRING outfile;
std::string outfile;
page_count_++;
if (equationdetect_save_bi_image) {
GetOutputTiffName("_bi", &outfile);
GetOutputTiffName("_bi", outfile);
pixWrite(outfile.c_str(), lang_tesseract_->pix_binary(), IFF_TIFF_G4);
}
@ -371,7 +371,7 @@ int EquationDetect::FindEquationParts(ColPartitionGrid *part_grid, ColPartitionS
IdentifyInlineParts();
if (equationdetect_save_seed_image) {
GetOutputTiffName("_seed", &outfile);
GetOutputTiffName("_seed", outfile);
PaintColParts(outfile);
}
@ -396,7 +396,7 @@ int EquationDetect::FindEquationParts(ColPartitionGrid *part_grid, ColPartitionS
ProcessMathBlockSatelliteParts();
if (equationdetect_save_merged_image) { // For debug.
GetOutputTiffName("_merged", &outfile);
GetOutputTiffName("_merged", outfile);
PaintColParts(outfile);
}
@ -1383,14 +1383,14 @@ bool EquationDetect::IsNearMathNeighbor(const int y_gap, const ColPartition *nei
return neighbor->type() == PT_EQUATION && y_gap <= kYGapTh;
}
void EquationDetect::GetOutputTiffName(const char *name, STRING *image_name) const {
ASSERT_HOST(image_name && name);
void EquationDetect::GetOutputTiffName(const char *name, std::string &image_name) const {
ASSERT_HOST(name);
char page[50];
snprintf(page, sizeof(page), "%04d", page_count_);
*image_name = STRING(lang_tesseract_->imagebasename) + page + name + ".tif";
image_name = (lang_tesseract_->imagebasename) + page + name + ".tif";
}
void EquationDetect::PaintSpecialTexts(const STRING &outfile) const {
void EquationDetect::PaintSpecialTexts(const std::string &outfile) const {
Pix *pix = nullptr, *pixBi = lang_tesseract_->pix_binary();
pix = pixConvertTo32(pixBi);
ColPartitionGridSearch gsearch(part_grid_);
@ -1407,7 +1407,7 @@ void EquationDetect::PaintSpecialTexts(const STRING &outfile) const {
pixDestroy(&pix);
}
void EquationDetect::PaintColParts(const STRING &outfile) const {
void EquationDetect::PaintColParts(const std::string &outfile) const {
Pix *pix = pixConvertTo32(lang_tesseract_->BestPix());
ColPartitionGridSearch gsearch(part_grid_);
gsearch.StartFullSearch();

View File

@ -202,16 +202,16 @@ protected:
bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const;
// Generate the tiff file name for output/debug file.
void GetOutputTiffName(const char *name, STRING *image_name) const;
void GetOutputTiffName(const char *name, std::string &image_name) const;
// Debugger function that renders ColPartitions on the input image, where:
// parts labeled as PT_EQUATION will be painted in red, PT_INLINE_EQUATION
// will be painted in green, and other parts will be painted in blue.
void PaintColParts(const STRING &outfile) const;
void PaintColParts(const std::string &outfile) const;
// Debugger function that renders the blobs in part_grid_ over the input
// image.
void PaintSpecialTexts(const STRING &outfile) const;
void PaintSpecialTexts(const std::string &outfile) const;
// Debugger function that print the math blobs density values for a
// ColPartition object.

View File

@ -37,7 +37,6 @@
#include <tesseract/ocrclass.h> // for ETEXT_DESC
#include <tesseract/unichar.h> // for UNICHAR_ID
#include "strngs.h" // for STRING
#include <cstdint> // for INT16_MAX, int16_t, int32_t
@ -288,7 +287,7 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
((word_done && word->best_choice->unichar_lengths().c_str()[0] == 1 &&
word->best_choice->unichar_string()[0] == '1') ||
(!word_done &&
STRING(conflict_set_I_l_1).contains(word->best_choice->unichar_string()[0])))))) {
conflict_set_I_l_1.contains(word->best_choice->unichar_string()[0])))))) {
total_score += prev_word_score;
if (prev_word_done)
done_word_count++;
@ -330,7 +329,7 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
prev_char_1 =
((word_done && (word->best_choice->unichar_string()[offset] == '1')) ||
(!word_done &&
STRING(conflict_set_I_l_1).contains(word->best_choice->unichar_string()[offset])));
conflict_set_I_l_1.contains(word->best_choice->unichar_string()[offset])));
}
/* Find next word */
do {
@ -356,7 +355,7 @@ bool Tesseract::digit_or_numeric_punct(WERD_RES *word, int char_position) {
word->uch_set->get_isdigit(word->best_choice->unichar_string().c_str() + offset,
word->best_choice->unichar_lengths()[i]) ||
(word->best_choice->permuter() == NUMBER_PERM &&
STRING(numeric_punctuation).contains(word->best_choice->unichar_string().c_str()[offset])));
numeric_punctuation.contains(word->best_choice->unichar_string().c_str()[offset])));
}
/**

View File

@ -24,8 +24,6 @@
#include <allheaders.h>
#include "strngs.h"
namespace tesseract {
LTRResultIterator::LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
@ -46,7 +44,7 @@ LTRResultIterator::~LTRResultIterator() = default;
char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
if (it_->word() == nullptr)
return nullptr; // Already at the end!
STRING text;
std::string text;
PAGE_RES_IT res_it(*it_);
WERD_CHOICE *best_choice = res_it.word()->best_choice;
ASSERT_HOST(best_choice != nullptr);
@ -306,7 +304,7 @@ bool LTRResultIterator::EquivalentToTruth(const char *str) const {
char *LTRResultIterator::WordTruthUTF8Text() const {
if (!HasTruthString())
return nullptr;
STRING truth_text = it_->word()->blamer_bundle->TruthString();
std::string truth_text = it_->word()->blamer_bundle->TruthString();
int length = truth_text.length() + 1;
char *result = new char[length];
strncpy(result, truth_text.c_str(), length);
@ -318,7 +316,7 @@ char *LTRResultIterator::WordTruthUTF8Text() const {
char *LTRResultIterator::WordNormedUTF8Text() const {
if (it_->word() == nullptr)
return nullptr; // Already at the end!
STRING ocr_text;
std::string ocr_text;
WERD_CHOICE *best_choice = it_->word()->best_choice;
const UNICHARSET *unicharset = it_->word()->uch_set;
ASSERT_HOST(best_choice != nullptr);

View File

@ -32,7 +32,6 @@
#include "ratngs.h" // for WERD_CHOICE
#include "rect.h" // for TBOX
#include "statistc.h" // for STATS
#include "strngs.h" // for STRING
#include "tprintf.h" // for tprintf
#include "unicharset.h" // for UNICHARSET
#include "werd.h" // for WERD, W_REP_CHAR
@ -91,16 +90,9 @@ static bool AcceptableRowArgs(int debug_level, int min_num_rows, const char *fun
// =============================== Debug Code ================================
// Convert an integer to a decimal string.
static STRING StrOf(int num) {
char buffer[30];
snprintf(buffer, sizeof(buffer), "%d", num);
return STRING(buffer);
}
// Given a row-major matrix of unicode text and a column separator, print
// a formatted table. For ASCII, we get good column alignment.
static void PrintTable(const std::vector<std::vector<STRING>> &rows, const STRING &colsep) {
static void PrintTable(const std::vector<std::vector<std::string>> &rows, const char *colsep) {
std::vector<int> max_col_widths;
for (const auto &row : rows) {
int num_columns = row.size();
@ -119,56 +111,56 @@ static void PrintTable(const std::vector<std::vector<STRING>> &rows, const STRIN
}
}
std::vector<STRING> col_width_patterns;
std::vector<std::string> col_width_patterns;
for (int c = 0; c < max_col_widths.size(); c++) {
col_width_patterns.push_back(STRING("%-") + StrOf(max_col_widths[c]) + "s");
col_width_patterns.push_back(std::string("%-") + std::to_string(max_col_widths[c]) + "s");
}
for (int r = 0; r < rows.size(); r++) {
for (int c = 0; c < rows[r].size(); c++) {
if (c > 0)
tprintf("%s", colsep.c_str());
tprintf("%s", colsep);
tprintf(col_width_patterns[c].c_str(), rows[r][c].c_str());
}
tprintf("\n");
}
}
static STRING RtlEmbed(const STRING &word, bool rtlify) {
static std::string RtlEmbed(const std::string &word, bool rtlify) {
if (rtlify)
return STRING(kRLE) + word + STRING(kPDF);
return std::string(kRLE) + word + std::string(kPDF);
return word;
}
// Print the current thoughts of the paragraph detector.
static void PrintDetectorState(const ParagraphTheory &theory,
const GenericVector<RowScratchRegisters> &rows) {
std::vector<std::vector<STRING>> output;
output.push_back(std::vector<STRING>());
std::vector<std::vector<std::string>> output;
output.push_back(std::vector<std::string>());
output.back().push_back("#row");
output.back().push_back("space");
output.back().push_back("..");
output.back().push_back("lword[widthSEL]");
output.back().push_back("rword[widthSEL]");
RowScratchRegisters::AppendDebugHeaderFields(&output.back());
RowScratchRegisters::AppendDebugHeaderFields(output.back());
output.back().push_back("text");
for (int i = 0; i < rows.size(); i++) {
output.push_back(std::vector<STRING>());
std::vector<STRING> &row = output.back();
output.push_back(std::vector<std::string>());
std::vector<std::string> &row = output.back();
const RowInfo &ri = *rows[i].ri_;
row.push_back(StrOf(i));
row.push_back(StrOf(ri.average_interword_space));
row.push_back(std::to_string(i));
row.push_back(std::to_string(ri.average_interword_space));
row.push_back(ri.has_leaders ? ".." : " ");
row.push_back(RtlEmbed(ri.lword_text, !ri.ltr) + "[" + StrOf(ri.lword_box.width()) +
row.push_back(RtlEmbed(ri.lword_text, !ri.ltr) + "[" + std::to_string(ri.lword_box.width()) +
(ri.lword_likely_starts_idea ? "S" : "s") +
(ri.lword_likely_ends_idea ? "E" : "e") +
(ri.lword_indicates_list_item ? "L" : "l") + "]");
row.push_back(RtlEmbed(ri.rword_text, !ri.ltr) + "[" + StrOf(ri.rword_box.width()) +
row.push_back(RtlEmbed(ri.rword_text, !ri.ltr) + "[" + std::to_string(ri.rword_box.width()) +
(ri.rword_likely_starts_idea ? "S" : "s") +
(ri.rword_likely_ends_idea ? "E" : "e") +
(ri.rword_indicates_list_item ? "L" : "l") + "]");
rows[i].AppendDebugInfo(theory, &row);
rows[i].AppendDebugInfo(theory, row);
row.push_back(RtlEmbed(ri.text, !ri.ltr));
}
PrintTable(output, " ");
@ -180,11 +172,11 @@ static void PrintDetectorState(const ParagraphTheory &theory,
}
}
static void DebugDump(bool should_print, const STRING &phase, const ParagraphTheory &theory,
static void DebugDump(bool should_print, const char *phase, const ParagraphTheory &theory,
const GenericVector<RowScratchRegisters> &rows) {
if (!should_print)
return;
tprintf("# %s\n", phase.c_str());
tprintf("# %s\n", phase);
PrintDetectorState(theory, rows);
}
@ -240,7 +232,7 @@ static const char *SkipOne(const char *str, const char *toskip) {
// Return whether it is very likely that this is a numeral marker that could
// start a list item. Some examples include:
// A I iii. VI (2) 3.5. [C-4]
static bool LikelyListNumeral(const STRING &word) {
static bool LikelyListNumeral(const std::string &word) {
const char *kRomans = "ivxlmdIVXLMD";
const char *kDigits = "012345789";
const char *kOpen = "[{(";
@ -274,12 +266,12 @@ static bool LikelyListNumeral(const STRING &word) {
return *pos == '\0';
}
static bool LikelyListMark(const STRING &word) {
static bool LikelyListMark(const std::string &word) {
const char *kListMarks = "0Oo*.,+.";
return word.size() == 1 && strchr(kListMarks, word[0]) != nullptr;
}
bool AsciiLikelyListItem(const STRING &word) {
bool AsciiLikelyListItem(const std::string &word) {
return LikelyListMark(word) || LikelyListNumeral(word);
}
@ -348,7 +340,7 @@ int UnicodeSpanSkipper::SkipAlpha(int pos) {
static bool LikelyListMarkUnicode(int ch) {
if (ch < 0x80) {
STRING single_ch;
std::string single_ch;
single_ch += ch;
return LikelyListMark(single_ch);
}
@ -413,7 +405,7 @@ static bool UniLikelyListItem(const UNICHARSET *u, const WERD_CHOICE *werd) {
// is_list - this word might be a list number or bullet.
// starts_idea - this word is likely to start a sentence.
// ends_idea - this word is likely to end a sentence.
void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8,
void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8,
bool *is_list, bool *starts_idea, bool *ends_idea) {
*is_list = false;
*starts_idea = false;
@ -459,7 +451,7 @@ void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, c
// is_list - this word might be a list number or bullet.
// starts_idea - this word is likely to start a sentence.
// ends_idea - this word is likely to end a sentence.
void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8,
void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8,
bool *is_list, bool *starts_idea, bool *ends_idea) {
*is_list = false;
*starts_idea = false;
@ -492,17 +484,17 @@ void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,
// =============== Implementation of RowScratchRegisters =====================
/* static */
void RowScratchRegisters::AppendDebugHeaderFields(std::vector<STRING> *header) {
header->push_back("[lmarg,lind;rind,rmarg]");
header->push_back("model");
void RowScratchRegisters::AppendDebugHeaderFields(std::vector<std::string> &header) {
header.push_back("[lmarg,lind;rind,rmarg]");
header.push_back("model");
}
void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
std::vector<STRING> *dbg) const {
std::vector<std::string> &dbg) const {
char s[30];
snprintf(s, sizeof(s), "[%3d,%3d;%3d,%3d]", lmargin_, lindent_, rindent_, rmargin_);
dbg->push_back(s);
STRING model_string;
dbg.push_back(s);
std::string model_string;
model_string += static_cast<char>(GetLineType());
model_string += ":";
@ -513,7 +505,7 @@ void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
if (model_numbers > 0)
model_string += ",";
if (StrongModel(hypotheses_[h].model)) {
model_string += StrOf(1 + theory.IndexOf(hypotheses_[h].model));
model_string += std::to_string(1 + theory.IndexOf(hypotheses_[h].model));
} else if (hypotheses_[h].model == kCrownLeft) {
model_string += "CrL";
} else if (hypotheses_[h].model == kCrownRight) {
@ -524,7 +516,7 @@ void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
if (model_numbers == 0)
model_string += "0";
dbg->push_back(model_string);
dbg.push_back(model_string);
}
void RowScratchRegisters::Init(const RowInfo &row) {
@ -2323,7 +2315,7 @@ void DetectParagraphs(int debug_level, std::vector<RowInfo> *row_infos,
static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowInfo *info) {
// Set up text, lword_text, and rword_text (mostly for debug printing).
STRING fake_text;
std::string fake_text;
PageIterator pit(static_cast<const PageIterator &>(it));
bool first_word = true;
if (!pit.Empty(RIL_WORD)) {

View File

@ -21,8 +21,8 @@
#define TESSERACT_CCMAIN_PARAGRAPHS_H_
#include <list>
#include <string>
#include "rect.h" // for TBOX
#include "strngs.h" // for STRING
namespace tesseract {
@ -41,7 +41,7 @@ class GenericVector;
class RowInfo {
public:
// Constant data derived from Tesseract output.
STRING text; // the full UTF-8 text of the line.
std::string text; // the full UTF-8 text of the line.
bool ltr; // whether the majority of the text is left-to-right
// TODO(eger) make this more fine-grained.
@ -56,8 +56,8 @@ public:
TBOX lword_box; // in normalized (horiz text rows) space
TBOX rword_box; // in normalized (horiz text rows) space
STRING lword_text; // the UTF-8 text of the leftmost werd
STRING rword_text; // the UTF-8 text of the rightmost werd
std::string lword_text; // the UTF-8 text of the leftmost werd
std::string rword_text; // the UTF-8 text of the rightmost werd
// The text of a paragraph typically starts with the start of an idea and
// ends with the end of an idea. Here we define paragraph as something that

View File

@ -32,7 +32,7 @@ class WERD_CHOICE;
// Return whether the given word is likely to be a list item start word.
TESS_API
bool AsciiLikelyListItem(const STRING &word);
bool AsciiLikelyListItem(const std::string &word);
// Return the first Unicode Codepoint from werd[pos].
int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos);
@ -40,12 +40,12 @@ int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos);
// Set right word attributes given either a unicharset and werd or a utf8
// string.
TESS_API
void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8,
void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8,
bool *is_list, bool *starts_idea, bool *ends_idea);
// Set left word attributes given either a unicharset and werd or a utf8 string.
TESS_API
void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8,
void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8,
bool *is_list, bool *starts_idea, bool *ends_idea);
enum LineType {
@ -171,10 +171,10 @@ public:
}
// Append header fields to a vector of row headings.
static void AppendDebugHeaderFields(std::vector<STRING> *header);
static void AppendDebugHeaderFields(std::vector<std::string> &header);
// Append data for this row to a vector of debug strings.
void AppendDebugInfo(const ParagraphTheory &theory, std::vector<STRING> *dbg) const;
void AppendDebugInfo(const ParagraphTheory &theory, std::vector<std::string> &dbg) const;
const RowInfo *ri_;

View File

@ -151,11 +151,7 @@ std::string ParamContent::GetValue() const {
} else if (param_type_ == VT_DOUBLE) {
result += std::to_string(*dIt);
} else if (param_type_ == VT_STRING) {
if (STRING(*(sIt)).c_str() != nullptr) {
result = sIt->c_str();
} else {
result = "Null";
}
result = sIt->c_str();
}
return result;
}
@ -183,8 +179,8 @@ void ParamContent::SetValue(const char *val) {
// Gets the up to the first 3 prefixes from s (split by _).
// For example, tesseract_foo_bar will be split into tesseract,foo and bar.
void ParamsEditor::GetPrefixes(const char *s, STRING *level_one, STRING *level_two,
STRING *level_three) {
void ParamsEditor::GetPrefixes(const char *s, std::string *level_one, std::string *level_two,
std::string *level_three) {
std::unique_ptr<char[]> p(new char[1024]);
GetFirstWords(s, 1, p.get());
*level_one = p.get();
@ -234,9 +230,9 @@ SVMenuNode *ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) {
// Count the # of entries starting with a specific prefix.
for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) {
ParamContent *vc = vc_it.data();
STRING tag;
STRING tag2;
STRING tag3;
std::string tag;
std::string tag2;
std::string tag3;
GetPrefixes(vc->GetName(), &tag, &tag2, &tag3);
amount[tag.c_str()]++;
@ -252,9 +248,9 @@ SVMenuNode *ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) {
vc_it.move_to_first();
for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) {
ParamContent *vc = vc_it.data();
STRING tag;
STRING tag2;
STRING tag3;
std::string tag;
std::string tag2;
std::string tag3;
GetPrefixes(vc->GetName(), &tag, &tag2, &tag3);
if (amount[tag.c_str()] == 1) {
@ -304,7 +300,7 @@ ParamsEditor::ParamsEditor(tesseract::Tesseract *tess, ScrollView *sv) {
SVMenuNode *svMenuRoot = BuildListOfAllLeaves(tess);
STRING paramfile;
std::string paramfile;
paramfile = tess->datadir;
paramfile += VARDIR; // parameters dir
paramfile += "edited"; // actual name

View File

@ -25,7 +25,6 @@
# include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
# include "scrollview.h" // for ScrollView (ptr only), SVEvent (ptr only)
# include "strngs.h" // for STRING
namespace tesseract {
@ -107,7 +106,7 @@ public:
private:
// Gets the up to the first 3 prefixes from s (split by _).
// For example, tesseract_foo_bar will be split into tesseract,foo and bar.
void GetPrefixes(const char *s, STRING *level_one, STRING *level_two, STRING *level_three);
void GetPrefixes(const char *s, std::string *level_one, std::string *level_two, std::string *level_three);
// Gets the first n words (split by _) and puts them in t.
// For example, tesseract_foo_bar with N=2 will yield tesseract_foo_.

View File

@ -794,8 +794,8 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
}
// Display correct text and blamer information.
STRING text;
STRING blame;
std::string text;
std::string blame;
if (word->display_flag(DF_TEXT) && word->text() != nullptr) {
text = word->text();
}
@ -810,7 +810,7 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
text = blamer_bundle->TruthString();
}
text += " -> ";
STRING best_choice_str;
std::string best_choice_str;
if (word_res->best_choice == nullptr) {
best_choice_str = "NULL";
} else {

View File

@ -98,7 +98,7 @@ void Tesseract::recog_training_segmented(const char *filename, PAGE_RES *page_re
PAGE_RES_IT page_res_it;
page_res_it.page_res = page_res;
page_res_it.restart_page();
STRING label;
std::string label;
// Process all the words on this page.
TBOX tbox; // tesseract-identified box
@ -108,14 +108,14 @@ void Tesseract::recog_training_segmented(const char *filename, PAGE_RES *page_re
int examined_words = 0;
do {
keep_going = read_t(&page_res_it, &tbox);
keep_going &= ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox);
keep_going &= ReadNextBox(applybox_page, &line_number, box_file, label, &bbox);
// Align bottom left points of the TBOXes.
while (keep_going && !NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) {
if (bbox.bottom() < tbox.bottom()) {
page_res_it.forward();
keep_going = read_t(&page_res_it, &tbox);
} else {
keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox);
keep_going = ReadNextBox(applybox_page, &line_number, box_file, label, &bbox);
}
}
while (keep_going && !NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) {
@ -123,7 +123,7 @@ void Tesseract::recog_training_segmented(const char *filename, PAGE_RES *page_re
page_res_it.forward();
keep_going = read_t(&page_res_it, &tbox);
} else {
keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox);
keep_going = ReadNextBox(applybox_page, &line_number, box_file, label, &bbox);
}
}
// OCR the word if top right points of the TBOXes are similar.

View File

@ -53,9 +53,6 @@ int16_t Tesseract::safe_dict_word(const WERD_RES *werd_res) {
namespace tesseract {
CLISTIZEH(STRING)
CLISTIZE(STRING)
/*************************************************************************
* set_done()
*
@ -196,7 +193,7 @@ void Tesseract::reject_I_1_L(WERD_RES *word) {
for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
offset += word->best_choice->unichar_lengths()[i], i += 1) {
if (STRING(conflict_set_I_l_1).contains(word->best_choice->unichar_string()[offset])) {
if (conflict_set_I_l_1.contains(word->best_choice->unichar_string()[offset])) {
// rej 1Il conflict
word->reject_map[i].setrej_1Il_conflict();
}
@ -316,7 +313,7 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
offset += lengths[i++])
non_conflict_set_char = (word_res->uch_set->get_isalpha(word + offset, lengths[i]) ||
word_res->uch_set->get_isdigit(word + offset, lengths[i])) &&
!STRING(conflict_set_I_l_1).contains(word[offset]);
!conflict_set_I_l_1.contains(word[offset]);
if (!non_conflict_set_char) {
if (update_map)
reject_I_1_L(word_res);
@ -409,7 +406,7 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
for (i = 0, offset = 0; word[offset] != '\0';
offset += word_res->best_choice->unichar_lengths()[i++]) {
if ((!allow_1s || (word[offset] != '1')) &&
STRING(conflict_set_I_l_1).contains(word[offset])) {
conflict_set_I_l_1.contains(word[offset])) {
if (update_map)
word_res->reject_map[i].setrej_1Il_conflict();
conflict = true;
@ -425,7 +422,7 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
if ((word_type == AC_LOWER_CASE) || (word_type == AC_INITIAL_CAP)) {
first_alphanum_index_ = first_alphanum_index(word, lengths);
first_alphanum_offset_ = first_alphanum_offset(word, lengths);
if (STRING(conflict_set_I_l_1).contains(word[first_alphanum_offset_])) {
if (conflict_set_I_l_1.contains(word[first_alphanum_offset_])) {
if (update_map)
word_res->reject_map[first_alphanum_index_].setrej_1Il_conflict();
return true;
@ -502,7 +499,7 @@ void Tesseract::dont_allow_1Il(WERD_RES *word) {
for (i = 0, offset = 0; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) {
if (word->reject_map[i].accepted()) {
if (STRING(conflict_set_I_l_1).contains(s[offset])) {
if (conflict_set_I_l_1.contains(s[offset])) {
accepted_1Il = true;
} else {
if (word->uch_set->get_isalpha(s + offset, lengths[i]) ||
@ -515,7 +512,7 @@ void Tesseract::dont_allow_1Il(WERD_RES *word) {
return; // Nothing to worry about
for (i = 0, offset = 0; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) {
if (STRING(conflict_set_I_l_1).contains(s[offset]) && word->reject_map[i].accepted())
if (conflict_set_I_l_1.contains(s[offset]) && word->reject_map[i].accepted())
word->reject_map[i].setrej_postNN_1Il();
}
}
@ -549,7 +546,7 @@ bool Tesseract::repeated_nonalphanum_wd(WERD_RES *word, ROW *row) {
if (word->best_choice->unichar_lengths().length() <= 1)
return false;
if (!STRING(ok_repeated_ch_non_alphanum_wds).contains(word->best_choice->unichar_string()[0]))
if (!ok_repeated_ch_non_alphanum_wds.contains(word->best_choice->unichar_string()[0]))
return false;
UNICHAR_ID uch_id = word->best_choice->unichar_id(0);

View File

@ -440,7 +440,7 @@ Dict &Tesseract::getDict() {
}
void Tesseract::Clear() {
STRING debug_name = imagebasename + "_debug.pdf";
std::string debug_name = imagebasename + "_debug.pdf";
pixa_debug_.WritePDF(debug_name.c_str());
pixDestroy(&pix_binary_);
pixDestroy(&pix_grey_);

View File

@ -98,9 +98,9 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool
int num_boxes = 0;
for (int i = 0; i < lines.size(); ++i) {
int page = 0;
STRING utf8_str;
std::string utf8_str;
TBOX box;
if (!ParseBoxFileStr(lines[i].c_str(), &page, &utf8_str, &box)) {
if (!ParseBoxFileStr(lines[i].c_str(), &page, utf8_str, &box)) {
if (continue_on_failure)
continue;
else
@ -137,14 +137,14 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool
// for valid utf-8 and allows space or tab between fields.
// utf8_str is set with the unichar string, and bounding box with the box.
// If there are page numbers in the file, it reads them all.
bool ReadNextBox(int *line_number, FILE *box_file, STRING *utf8_str, TBOX *bounding_box) {
bool ReadNextBox(int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box) {
return ReadNextBox(-1, line_number, box_file, utf8_str, bounding_box);
}
// As ReadNextBox above, but get a specific page number. (0-based)
// Use -1 to read any page number. Files without page number all
// read as if they are page 0.
bool ReadNextBox(int target_page, int *line_number, FILE *box_file, STRING *utf8_str,
bool ReadNextBox(int target_page, int *line_number, FILE *box_file, std::string &utf8_str,
TBOX *bounding_box) {
int page = 0;
char buff[kBoxReadBufSize]; // boxfile read buffer
@ -185,10 +185,10 @@ bool ReadNextBox(int target_page, int *line_number, FILE *box_file, STRING *utf8
// and for word/line-level boxes:
// WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
// See applyybox.cpp for more information.
bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str,
bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf8_str,
TBOX *bounding_box) {
*bounding_box = TBOX(); // Initialize it to empty.
*utf8_str = "";
utf8_str = "";
char uch[kBoxReadBufSize];
const char *buffptr = boxfile_str;
// Read the unichar without messing up on Tibetan.
@ -245,7 +245,7 @@ bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str
}
used += new_used;
}
*utf8_str = uch;
utf8_str = uch;
if (x_min > x_max)
std::swap(x_min, x_max);
if (y_min > y_max)

View File

@ -64,18 +64,18 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool
// utf8_str is set with the unichar string, and bounding box with the box.
// If there are page numbers in the file, it reads them all.
TESS_API
bool ReadNextBox(int *line_number, FILE *box_file, STRING *utf8_str, TBOX *bounding_box);
bool ReadNextBox(int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box);
// As ReadNextBox above, but get a specific page number. (0-based)
// Use -1 to read any page number. Files without page number all
// read as if they are page 0.
TESS_API
bool ReadNextBox(int target_page, int *line_number, FILE *box_file, STRING *utf8_str,
bool ReadNextBox(int target_page, int *line_number, FILE *box_file, std::string &utf8_str,
TBOX *bounding_box);
// Parses the given box file string into a page_number, utf8_str, and
// bounding_box. Returns true on a successful parse.
TESS_API
bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str,
bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf8_str,
TBOX *bounding_box);
// Creates a box file string from a unichar string, TBOX and page number.

View File

@ -91,7 +91,7 @@ public:
}
}
}
// Fetches the value of the named param as a STRING. Returns false if not
// Fetches the value of the named param as a string. Returns false if not
// found.
static bool GetParamAsString(const char *name, const ParamsVectors *member_params,
std::string *value);
@ -242,6 +242,9 @@ public:
const char *c_str() const {
return value_.c_str();
}
bool contains(char c) {
return value_.find(c) != std::string::npos;
}
bool empty() {
return value_.length() <= 0;
}

View File

@ -96,10 +96,6 @@ bool STRING::SkipDeSerialize(TFile *fp) {
return fp->Skip(len);
}
bool STRING::contains(const char c) const {
return (c != '\0') && (strchr(c_str(), c) != nullptr);
}
void STRING::split(const char c, std::vector<STRING> *splited) {
int start_index = 0;
const int len = length();

View File

@ -59,9 +59,6 @@ public:
TESS_API
static bool SkipDeSerialize(tesseract::TFile *fp);
TESS_API
bool contains(char c) const;
TESS_API
void split(char c, std::vector<STRING> *splited);
};

View File

@ -149,9 +149,9 @@ void MasterTrainer::ReadTrainingSamples(const char *page_name,
if (font_id < 0)
font_id = 0;
int page_number;
STRING unichar;
std::string unichar;
TBOX bounding_box;
if (!ParseBoxFileStr(space, &page_number, &unichar, &bounding_box)) {
if (!ParseBoxFileStr(space, &page_number, unichar, &bounding_box)) {
tprintf("Bad format in tr file, reading box coords\n");
continue;
}