mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
Added std:: to some stl types
This commit is contained in:
parent
1cc511188d
commit
500bfaf315
@ -207,7 +207,7 @@ void BoxChar::ReorderRTLText(std::vector<BoxChar*>* boxes) {
|
|||||||
for (int start = 0; start < boxes->size(); start = end + 1) {
|
for (int start = 0; start < boxes->size(); start = end + 1) {
|
||||||
end = start + 1;
|
end = start + 1;
|
||||||
while (end < boxes->size() && (*boxes)[end]->ch_ != "\t") ++end;
|
while (end < boxes->size() && (*boxes)[end]->ch_ != "\t") ++end;
|
||||||
sort(boxes->begin() + start, boxes->begin() + end, sorter);
|
std::sort(boxes->begin() + start, boxes->begin() + end, sorter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,27 +66,27 @@ class BoxChar {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void TranslateBoxes(int xshift, int yshift,
|
static void TranslateBoxes(int xshift, int yshift,
|
||||||
vector<BoxChar*>* boxes);
|
std::vector<BoxChar*>* boxes);
|
||||||
|
|
||||||
// Prepares for writing the boxes to a file by inserting newlines, spaces,
|
// Prepares for writing the boxes to a file by inserting newlines, spaces,
|
||||||
// and re-ordering so the boxes are strictly left-to-right.
|
// and re-ordering so the boxes are strictly left-to-right.
|
||||||
static void PrepareToWrite(vector<BoxChar*>* boxes);
|
static void PrepareToWrite(std::vector<BoxChar*>* boxes);
|
||||||
// Inserts newline (tab) characters into the vector at newline positions.
|
// Inserts newline (tab) characters into the vector at newline positions.
|
||||||
static void InsertNewlines(bool rtl_rules, bool vertical_rules,
|
static void InsertNewlines(bool rtl_rules, bool vertical_rules,
|
||||||
vector<BoxChar*>* boxes);
|
std::vector<BoxChar*>* boxes);
|
||||||
// Converts nullptr boxes to space characters, with appropriate bounding
|
// Converts nullptr boxes to space characters, with appropriate bounding
|
||||||
// boxes.
|
// boxes.
|
||||||
static void InsertSpaces(bool rtl_rules, bool vertical_rules,
|
static void InsertSpaces(bool rtl_rules, bool vertical_rules,
|
||||||
vector<BoxChar*>* boxes);
|
std::vector<BoxChar*>* boxes);
|
||||||
// Reorders text in a right-to-left script in left-to-right order.
|
// Reorders text in a right-to-left script in left-to-right order.
|
||||||
static void ReorderRTLText(vector<BoxChar*>* boxes);
|
static void ReorderRTLText(std::vector<BoxChar*>* boxes);
|
||||||
// Returns true if the vector contains mostly RTL characters.
|
// Returns true if the vector contains mostly RTL characters.
|
||||||
static bool ContainsMostlyRTL(const vector<BoxChar*>& boxes);
|
static bool ContainsMostlyRTL(const std::vector<BoxChar*>& boxes);
|
||||||
// Returns true if the text is mostly laid out vertically.
|
// Returns true if the text is mostly laid out vertically.
|
||||||
static bool MostlyVertical(const vector<BoxChar*>& boxes);
|
static bool MostlyVertical(const std::vector<BoxChar*>& boxes);
|
||||||
|
|
||||||
// Returns the total length of all the strings in the boxes.
|
// Returns the total length of all the strings in the boxes.
|
||||||
static int TotalByteLength(const vector<BoxChar*>& boxes);
|
static int TotalByteLength(const std::vector<BoxChar*>& boxes);
|
||||||
|
|
||||||
// Rotate the vector of boxes between start and end by the given rotation.
|
// Rotate the vector of boxes between start and end by the given rotation.
|
||||||
// The rotation is in radians clockwise about the given center.
|
// The rotation is in radians clockwise about the given center.
|
||||||
@ -95,15 +95,16 @@ class BoxChar {
|
|||||||
int ycenter,
|
int ycenter,
|
||||||
int start_box,
|
int start_box,
|
||||||
int end_box,
|
int end_box,
|
||||||
vector<BoxChar*>* boxes);
|
std::vector<BoxChar*>* boxes);
|
||||||
|
|
||||||
// Create a tesseract box file from the vector of boxes. The image height
|
// Create a tesseract box file from the vector of boxes. The image height
|
||||||
// is needed to convert to tesseract coordinates.
|
// is needed to convert to tesseract coordinates.
|
||||||
static void WriteTesseractBoxFile(const string& name, int height,
|
static void WriteTesseractBoxFile(const string& name, int height,
|
||||||
const vector<BoxChar*>& boxes);
|
const std::vector<BoxChar*>& boxes);
|
||||||
// Gets the tesseract box file as a string from the vector of boxes.
|
// Gets the tesseract box file as a string from the vector of boxes.
|
||||||
// The image height is needed to convert to tesseract coordinates.
|
// The image height is needed to convert to tesseract coordinates.
|
||||||
static string GetTesseractBoxStr(int height, const vector<BoxChar*>& boxes);
|
static string GetTesseractBoxStr(int height,
|
||||||
|
const std::vector<BoxChar*>& boxes);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
string ch_;
|
string ch_;
|
||||||
|
@ -387,12 +387,12 @@ bool PangoFontInfo::GetSpacingProperties(const string& utf8_char,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool PangoFontInfo::CanRenderString(const char* utf8_word, int len) const {
|
bool PangoFontInfo::CanRenderString(const char* utf8_word, int len) const {
|
||||||
vector<string> graphemes;
|
std::vector<string> graphemes;
|
||||||
return CanRenderString(utf8_word, len, &graphemes);
|
return CanRenderString(utf8_word, len, &graphemes);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PangoFontInfo::CanRenderString(const char* utf8_word, int len,
|
bool PangoFontInfo::CanRenderString(const char* utf8_word, int len,
|
||||||
vector<string>* graphemes) const {
|
std::vector<string>* graphemes) const {
|
||||||
if (graphemes) graphemes->clear();
|
if (graphemes) graphemes->clear();
|
||||||
// We check for font coverage of the text first, as otherwise Pango could
|
// We check for font coverage of the text first, as otherwise Pango could
|
||||||
// (undesirably) fall back to another font that does have the required
|
// (undesirably) fall back to another font that does have the required
|
||||||
@ -508,7 +508,7 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len,
|
|||||||
|
|
||||||
|
|
||||||
// ------------------------ FontUtils ------------------------------------
|
// ------------------------ FontUtils ------------------------------------
|
||||||
vector<string> FontUtils::available_fonts_; // cache list
|
std::vector<string> FontUtils::available_fonts_; // cache list
|
||||||
|
|
||||||
// Returns whether the specified font description is available in the fonts
|
// Returns whether the specified font description is available in the fonts
|
||||||
// directory.
|
// directory.
|
||||||
@ -591,7 +591,7 @@ static bool ShouldIgnoreFontFamilyName(const char* query) {
|
|||||||
|
|
||||||
// Outputs description names of available fonts.
|
// Outputs description names of available fonts.
|
||||||
/* static */
|
/* static */
|
||||||
const vector<string>& FontUtils::ListAvailableFonts() {
|
const std::vector<string>& FontUtils::ListAvailableFonts() {
|
||||||
if (!available_fonts_.empty()) {
|
if (!available_fonts_.empty()) {
|
||||||
return available_fonts_;
|
return available_fonts_;
|
||||||
}
|
}
|
||||||
@ -634,13 +634,13 @@ const vector<string>& FontUtils::ListAvailableFonts() {
|
|||||||
g_free(faces);
|
g_free(faces);
|
||||||
}
|
}
|
||||||
g_free(families);
|
g_free(families);
|
||||||
sort(available_fonts_.begin(), available_fonts_.end());
|
std::sort(available_fonts_.begin(), available_fonts_.end());
|
||||||
return available_fonts_;
|
return available_fonts_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void CharCoverageMapToBitmap(PangoCoverage* coverage,
|
static void CharCoverageMapToBitmap(PangoCoverage* coverage,
|
||||||
vector<bool>* unichar_bitmap) {
|
std::vector<bool>* unichar_bitmap) {
|
||||||
const int kMinUnicodeValue = 33;
|
const int kMinUnicodeValue = 33;
|
||||||
const int kMaxUnicodeValue = 0x10FFFF;
|
const int kMaxUnicodeValue = 0x10FFFF;
|
||||||
unichar_bitmap->resize(kMaxUnicodeValue + 1, false);
|
unichar_bitmap->resize(kMaxUnicodeValue + 1, false);
|
||||||
@ -654,14 +654,14 @@ static void CharCoverageMapToBitmap(PangoCoverage* coverage,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
void FontUtils::GetAllRenderableCharacters(vector<bool>* unichar_bitmap) {
|
void FontUtils::GetAllRenderableCharacters(std::vector<bool>* unichar_bitmap) {
|
||||||
const vector<string>& all_fonts = ListAvailableFonts();
|
const std::vector<string>& all_fonts = ListAvailableFonts();
|
||||||
return GetAllRenderableCharacters(all_fonts, unichar_bitmap);
|
return GetAllRenderableCharacters(all_fonts, unichar_bitmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
void FontUtils::GetAllRenderableCharacters(const string& font_name,
|
void FontUtils::GetAllRenderableCharacters(const string& font_name,
|
||||||
vector<bool>* unichar_bitmap) {
|
std::vector<bool>* unichar_bitmap) {
|
||||||
PangoFontInfo font_info(font_name);
|
PangoFontInfo font_info(font_name);
|
||||||
PangoCoverage* coverage =
|
PangoCoverage* coverage =
|
||||||
pango_font_get_coverage(font_info.ToPangoFont(), nullptr);
|
pango_font_get_coverage(font_info.ToPangoFont(), nullptr);
|
||||||
@ -669,8 +669,8 @@ void FontUtils::GetAllRenderableCharacters(const string& font_name,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
void FontUtils::GetAllRenderableCharacters(const vector<string>& fonts,
|
void FontUtils::GetAllRenderableCharacters(const std::vector<string>& fonts,
|
||||||
vector<bool>* unichar_bitmap) {
|
std::vector<bool>* unichar_bitmap) {
|
||||||
// Form the union of coverage maps from the fonts
|
// Form the union of coverage maps from the fonts
|
||||||
PangoCoverage* all_coverage = pango_coverage_new();
|
PangoCoverage* all_coverage = pango_coverage_new();
|
||||||
tlog(1, "Processing %d fonts\n", fonts.size());
|
tlog(1, "Processing %d fonts\n", fonts.size());
|
||||||
@ -691,7 +691,7 @@ void FontUtils::GetAllRenderableCharacters(const vector<string>& fonts,
|
|||||||
/* static */
|
/* static */
|
||||||
int FontUtils::FontScore(const std::unordered_map<char32, inT64>& ch_map,
|
int FontUtils::FontScore(const std::unordered_map<char32, inT64>& ch_map,
|
||||||
const string& fontname, int* raw_score,
|
const string& fontname, int* raw_score,
|
||||||
vector<bool>* ch_flags) {
|
std::vector<bool>* ch_flags) {
|
||||||
PangoFontInfo font_info;
|
PangoFontInfo font_info;
|
||||||
if (!font_info.ParseFontDescriptionName(fontname)) {
|
if (!font_info.ParseFontDescriptionName(fontname)) {
|
||||||
tprintf("ERROR: Could not parse %s\n", fontname.c_str());
|
tprintf("ERROR: Could not parse %s\n", fontname.c_str());
|
||||||
@ -723,22 +723,23 @@ int FontUtils::FontScore(const std::unordered_map<char32, inT64>& ch_map,
|
|||||||
|
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
string FontUtils::BestFonts(const std::unordered_map<char32, inT64>& ch_map,
|
string FontUtils::BestFonts(
|
||||||
vector<pair<const char*, vector<bool> > >* fonts) {
|
const std::unordered_map<char32, inT64>& ch_map,
|
||||||
|
std::vector<std::pair<const char*, std::vector<bool> > >* fonts) {
|
||||||
const double kMinOKFraction = 0.99;
|
const double kMinOKFraction = 0.99;
|
||||||
// Weighted fraction of characters that must be renderable in a font to make
|
// Weighted fraction of characters that must be renderable in a font to make
|
||||||
// it OK even if the raw count is not good.
|
// it OK even if the raw count is not good.
|
||||||
const double kMinWeightedFraction = 0.99995;
|
const double kMinWeightedFraction = 0.99995;
|
||||||
|
|
||||||
fonts->clear();
|
fonts->clear();
|
||||||
vector<vector<bool> > font_flags;
|
std::vector<std::vector<bool> > font_flags;
|
||||||
vector<int> font_scores;
|
std::vector<int> font_scores;
|
||||||
vector<int> raw_scores;
|
std::vector<int> raw_scores;
|
||||||
int most_ok_chars = 0;
|
int most_ok_chars = 0;
|
||||||
int best_raw_score = 0;
|
int best_raw_score = 0;
|
||||||
const vector<string>& font_names = FontUtils::ListAvailableFonts();
|
const std::vector<string>& font_names = FontUtils::ListAvailableFonts();
|
||||||
for (int i = 0; i < font_names.size(); ++i) {
|
for (int i = 0; i < font_names.size(); ++i) {
|
||||||
vector<bool> ch_flags;
|
std::vector<bool> ch_flags;
|
||||||
int raw_score = 0;
|
int raw_score = 0;
|
||||||
int ok_chars = FontScore(ch_map, font_names[i], &raw_score, &ch_flags);
|
int ok_chars = FontScore(ch_map, font_names[i], &raw_score, &ch_flags);
|
||||||
most_ok_chars = MAX(ok_chars, most_ok_chars);
|
most_ok_chars = MAX(ok_chars, most_ok_chars);
|
||||||
@ -767,7 +768,7 @@ string FontUtils::BestFonts(const std::unordered_map<char32, inT64>& ch_map,
|
|||||||
int raw_score = raw_scores[i];
|
int raw_score = raw_scores[i];
|
||||||
if ((score >= least_good_enough && raw_score >= least_raw_enough) ||
|
if ((score >= least_good_enough && raw_score >= least_raw_enough) ||
|
||||||
score >= override_enough) {
|
score >= override_enough) {
|
||||||
fonts->push_back(make_pair(font_names[i].c_str(), font_flags[i]));
|
fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i]));
|
||||||
tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n",
|
tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n",
|
||||||
font_names[i].c_str(),
|
font_names[i].c_str(),
|
||||||
100.0 * score / most_ok_chars,
|
100.0 * score / most_ok_chars,
|
||||||
@ -786,20 +787,20 @@ string FontUtils::BestFonts(const std::unordered_map<char32, inT64>& ch_map,
|
|||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
bool FontUtils::SelectFont(const char* utf8_word, const int utf8_len,
|
bool FontUtils::SelectFont(const char* utf8_word, const int utf8_len,
|
||||||
string* font_name, vector<string>* graphemes) {
|
string* font_name, std::vector<string>* graphemes) {
|
||||||
return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name,
|
return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name,
|
||||||
graphemes);
|
graphemes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
bool FontUtils::SelectFont(const char* utf8_word, const int utf8_len,
|
bool FontUtils::SelectFont(const char* utf8_word, const int utf8_len,
|
||||||
const vector<string>& all_fonts,
|
const std::vector<string>& all_fonts,
|
||||||
string* font_name, vector<string>* graphemes) {
|
string* font_name, std::vector<string>* graphemes) {
|
||||||
if (font_name) font_name->clear();
|
if (font_name) font_name->clear();
|
||||||
if (graphemes) graphemes->clear();
|
if (graphemes) graphemes->clear();
|
||||||
for (int i = 0; i < all_fonts.size(); ++i) {
|
for (int i = 0; i < all_fonts.size(); ++i) {
|
||||||
PangoFontInfo font;
|
PangoFontInfo font;
|
||||||
vector<string> found_graphemes;
|
std::vector<string> found_graphemes;
|
||||||
ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_fonts[i]),
|
ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_fonts[i]),
|
||||||
"Could not parse font desc name %s\n",
|
"Could not parse font desc name %s\n",
|
||||||
all_fonts[i].c_str());
|
all_fonts[i].c_str());
|
||||||
|
@ -326,7 +326,7 @@ int StringRenderer::FindFirstPageBreakOffset(const char* text,
|
|||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
const vector<BoxChar*>& StringRenderer::GetBoxes() const {
|
const std::vector<BoxChar*>& StringRenderer::GetBoxes() const {
|
||||||
return boxchars_;
|
return boxchars_;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -358,8 +358,8 @@ void StringRenderer::WriteAllBoxes(const string& filename) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Returns cluster strings in logical order.
|
// Returns cluster strings in logical order.
|
||||||
bool StringRenderer::GetClusterStrings(vector<string>* cluster_text) {
|
bool StringRenderer::GetClusterStrings(std::vector<string>* cluster_text) {
|
||||||
map<int, string> start_byte_to_text;
|
std::map<int, string> start_byte_to_text;
|
||||||
PangoLayoutIter* run_iter = pango_layout_get_iter(layout_);
|
PangoLayoutIter* run_iter = pango_layout_get_iter(layout_);
|
||||||
const char* full_text = pango_layout_get_text(layout_);
|
const char* full_text = pango_layout_get_text(layout_);
|
||||||
do {
|
do {
|
||||||
@ -396,7 +396,7 @@ bool StringRenderer::GetClusterStrings(vector<string>* cluster_text) {
|
|||||||
pango_layout_iter_free(run_iter);
|
pango_layout_iter_free(run_iter);
|
||||||
|
|
||||||
cluster_text->clear();
|
cluster_text->clear();
|
||||||
for (map<int, string>::const_iterator it = start_byte_to_text.begin();
|
for (std::map<int, string>::const_iterator it = start_byte_to_text.begin();
|
||||||
it != start_byte_to_text.end(); ++it) {
|
it != start_byte_to_text.end(); ++it) {
|
||||||
cluster_text->push_back(it->second);
|
cluster_text->push_back(it->second);
|
||||||
}
|
}
|
||||||
@ -413,8 +413,8 @@ bool StringRenderer::GetClusterStrings(vector<string>* cluster_text) {
|
|||||||
// hyphens. When this is detected the word is split at that location into
|
// hyphens. When this is detected the word is split at that location into
|
||||||
// multiple BoxChars. Otherwise, each resulting BoxChar will contain a word and
|
// multiple BoxChars. Otherwise, each resulting BoxChar will contain a word and
|
||||||
// its bounding box.
|
// its bounding box.
|
||||||
static void MergeBoxCharsToWords(vector<BoxChar*>* boxchars) {
|
static void MergeBoxCharsToWords(std::vector<BoxChar*>* boxchars) {
|
||||||
vector<BoxChar*> result;
|
std::vector<BoxChar*> result;
|
||||||
bool started_word = false;
|
bool started_word = false;
|
||||||
for (int i = 0; i < boxchars->size(); ++i) {
|
for (int i = 0; i < boxchars->size(); ++i) {
|
||||||
if (boxchars->at(i)->ch() == " " || boxchars->at(i)->box() == nullptr) {
|
if (boxchars->at(i)->ch() == " " || boxchars->at(i)->box() == nullptr) {
|
||||||
@ -469,7 +469,7 @@ void StringRenderer::ComputeClusterBoxes() {
|
|||||||
PangoLayoutIter* cluster_iter = pango_layout_get_iter(layout_);
|
PangoLayoutIter* cluster_iter = pango_layout_get_iter(layout_);
|
||||||
|
|
||||||
// Do a first pass to store cluster start indexes.
|
// Do a first pass to store cluster start indexes.
|
||||||
vector<int> cluster_start_indices;
|
std::vector<int> cluster_start_indices;
|
||||||
do {
|
do {
|
||||||
cluster_start_indices.push_back(pango_layout_iter_get_index(cluster_iter));
|
cluster_start_indices.push_back(pango_layout_iter_get_index(cluster_iter));
|
||||||
tlog(3, "Added %d\n", cluster_start_indices.back());
|
tlog(3, "Added %d\n", cluster_start_indices.back());
|
||||||
@ -478,8 +478,8 @@ void StringRenderer::ComputeClusterBoxes() {
|
|||||||
cluster_start_indices.push_back(strlen(text));
|
cluster_start_indices.push_back(strlen(text));
|
||||||
tlog(3, "Added last index %d\n", cluster_start_indices.back());
|
tlog(3, "Added last index %d\n", cluster_start_indices.back());
|
||||||
// Sort the indices and create a map from start to end indices.
|
// Sort the indices and create a map from start to end indices.
|
||||||
sort(cluster_start_indices.begin(), cluster_start_indices.end());
|
std::sort(cluster_start_indices.begin(), cluster_start_indices.end());
|
||||||
map<int, int> cluster_start_to_end_index;
|
std::map<int, int> cluster_start_to_end_index;
|
||||||
for (int i = 0; i < cluster_start_indices.size() - 1; ++i) {
|
for (int i = 0; i < cluster_start_indices.size() - 1; ++i) {
|
||||||
cluster_start_to_end_index[cluster_start_indices[i]]
|
cluster_start_to_end_index[cluster_start_indices[i]]
|
||||||
= cluster_start_indices[i + 1];
|
= cluster_start_indices[i + 1];
|
||||||
@ -489,7 +489,7 @@ void StringRenderer::ComputeClusterBoxes() {
|
|||||||
// cluster extent information.
|
// cluster extent information.
|
||||||
cluster_iter = pango_layout_get_iter(layout_);
|
cluster_iter = pango_layout_get_iter(layout_);
|
||||||
// Store BoxChars* sorted by their byte start positions
|
// Store BoxChars* sorted by their byte start positions
|
||||||
map<int, BoxChar*> start_byte_to_box;
|
std::map<int, BoxChar*> start_byte_to_box;
|
||||||
do {
|
do {
|
||||||
PangoRectangle cluster_rect;
|
PangoRectangle cluster_rect;
|
||||||
pango_layout_iter_get_cluster_extents(cluster_iter, &cluster_rect, nullptr);
|
pango_layout_iter_get_cluster_extents(cluster_iter, &cluster_rect, nullptr);
|
||||||
@ -548,21 +548,21 @@ void StringRenderer::ComputeClusterBoxes() {
|
|||||||
// accurate.
|
// accurate.
|
||||||
// TODO(ranjith): Revisit whether this is still needed in newer versions of
|
// TODO(ranjith): Revisit whether this is still needed in newer versions of
|
||||||
// pango.
|
// pango.
|
||||||
vector<string> cluster_text;
|
std::vector<string> cluster_text;
|
||||||
if (GetClusterStrings(&cluster_text)) {
|
if (GetClusterStrings(&cluster_text)) {
|
||||||
ASSERT_HOST(cluster_text.size() == start_byte_to_box.size());
|
ASSERT_HOST(cluster_text.size() == start_byte_to_box.size());
|
||||||
int ind = 0;
|
int ind = 0;
|
||||||
for (map<int, BoxChar*>::iterator it = start_byte_to_box.begin();
|
for (std::map<int, BoxChar*>::iterator it = start_byte_to_box.begin();
|
||||||
it != start_byte_to_box.end(); ++it, ++ind) {
|
it != start_byte_to_box.end(); ++it, ++ind) {
|
||||||
it->second->mutable_ch()->swap(cluster_text[ind]);
|
it->second->mutable_ch()->swap(cluster_text[ind]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Append to the boxchars list in byte order.
|
// Append to the boxchars list in byte order.
|
||||||
vector<BoxChar*> page_boxchars;
|
std::vector<BoxChar*> page_boxchars;
|
||||||
page_boxchars.reserve(start_byte_to_box.size());
|
page_boxchars.reserve(start_byte_to_box.size());
|
||||||
string last_ch;
|
string last_ch;
|
||||||
for (map<int, BoxChar*>::const_iterator it = start_byte_to_box.begin();
|
for (std::map<int, BoxChar*>::const_iterator it = start_byte_to_box.begin();
|
||||||
it != start_byte_to_box.end(); ++it) {
|
it != start_byte_to_box.end(); ++it) {
|
||||||
if (it->second->ch() == kWordJoinerUTF8) {
|
if (it->second->ch() == kWordJoinerUTF8) {
|
||||||
// Skip zero-width joiner characters (ZWJs) here.
|
// Skip zero-width joiner characters (ZWJs) here.
|
||||||
@ -574,7 +574,7 @@ void StringRenderer::ComputeClusterBoxes() {
|
|||||||
CorrectBoxPositionsToLayout(&page_boxchars);
|
CorrectBoxPositionsToLayout(&page_boxchars);
|
||||||
|
|
||||||
if (render_fullwidth_latin_) {
|
if (render_fullwidth_latin_) {
|
||||||
for (map<int, BoxChar*>::iterator it = start_byte_to_box.begin();
|
for (std::map<int, BoxChar*>::iterator it = start_byte_to_box.begin();
|
||||||
it != start_byte_to_box.end(); ++it) {
|
it != start_byte_to_box.end(); ++it) {
|
||||||
// Convert fullwidth Latin characters to their halfwidth forms.
|
// Convert fullwidth Latin characters to their halfwidth forms.
|
||||||
string half(ConvertFullwidthLatinToBasicLatin(it->second->ch()));
|
string half(ConvertFullwidthLatinToBasicLatin(it->second->ch()));
|
||||||
@ -606,7 +606,8 @@ void StringRenderer::ComputeClusterBoxes() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void StringRenderer::CorrectBoxPositionsToLayout(vector<BoxChar*>* boxchars) {
|
void StringRenderer::CorrectBoxPositionsToLayout(
|
||||||
|
std::vector<BoxChar*>* boxchars) {
|
||||||
if (vertical_text_) {
|
if (vertical_text_) {
|
||||||
const double rotation = - pango_gravity_to_rotation(
|
const double rotation = - pango_gravity_to_rotation(
|
||||||
pango_context_get_base_gravity(pango_layout_get_context(layout_)));
|
pango_context_get_base_gravity(pango_layout_get_context(layout_)));
|
||||||
@ -864,7 +865,7 @@ int StringRenderer::RenderAllFontsToImage(double min_coverage,
|
|||||||
}
|
}
|
||||||
tprintf("Total chars = %d\n", total_chars_);
|
tprintf("Total chars = %d\n", total_chars_);
|
||||||
}
|
}
|
||||||
const vector<string>& all_fonts = FontUtils::ListAvailableFonts();
|
const std::vector<string>& all_fonts = FontUtils::ListAvailableFonts();
|
||||||
for (int i = font_index_; i < all_fonts.size(); ++i) {
|
for (int i = font_index_; i < all_fonts.size(); ++i) {
|
||||||
++font_index_;
|
++font_index_;
|
||||||
int raw_score = 0;
|
int raw_score = 0;
|
||||||
|
@ -179,7 +179,7 @@ struct SpacingProperties {
|
|||||||
// used by the FreeType font engine.
|
// used by the FreeType font engine.
|
||||||
int x_gap_before; // horizontal x bearing
|
int x_gap_before; // horizontal x bearing
|
||||||
int x_gap_after; // horizontal advance - x_gap_before - width
|
int x_gap_after; // horizontal advance - x_gap_before - width
|
||||||
map<string, int> kerned_x_gaps;
|
std::map<string, int> kerned_x_gaps;
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool IsWhitespaceBox(const BoxChar* boxchar) {
|
static bool IsWhitespaceBox(const BoxChar* boxchar) {
|
||||||
@ -215,9 +215,9 @@ static string StringReplace(const string& in,
|
|||||||
void ExtractFontProperties(const string &utf8_text,
|
void ExtractFontProperties(const string &utf8_text,
|
||||||
StringRenderer *render,
|
StringRenderer *render,
|
||||||
const string &output_base) {
|
const string &output_base) {
|
||||||
map<string, SpacingProperties> spacing_map;
|
std::map<string, SpacingProperties> spacing_map;
|
||||||
map<string, SpacingProperties>::iterator spacing_map_it0;
|
std::map<string, SpacingProperties>::iterator spacing_map_it0;
|
||||||
map<string, SpacingProperties>::iterator spacing_map_it1;
|
std::map<string, SpacingProperties>::iterator spacing_map_it1;
|
||||||
int x_bearing, x_advance;
|
int x_bearing, x_advance;
|
||||||
int len = utf8_text.length();
|
int len = utf8_text.length();
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
@ -225,7 +225,7 @@ void ExtractFontProperties(const string &utf8_text,
|
|||||||
while (offset < len) {
|
while (offset < len) {
|
||||||
offset +=
|
offset +=
|
||||||
render->RenderToImage(text + offset, strlen(text + offset), nullptr);
|
render->RenderToImage(text + offset, strlen(text + offset), nullptr);
|
||||||
const vector<BoxChar*> &boxes = render->GetBoxes();
|
const std::vector<BoxChar*> &boxes = render->GetBoxes();
|
||||||
|
|
||||||
// If the page break split a bigram, correct the offset so we try the bigram
|
// If the page break split a bigram, correct the offset so we try the bigram
|
||||||
// on the next iteration.
|
// on the next iteration.
|
||||||
@ -291,7 +291,7 @@ void ExtractFontProperties(const string &utf8_text,
|
|||||||
char buf[kBufSize];
|
char buf[kBufSize];
|
||||||
snprintf(buf, kBufSize, "%d\n", static_cast<int>(spacing_map.size()));
|
snprintf(buf, kBufSize, "%d\n", static_cast<int>(spacing_map.size()));
|
||||||
output_string.append(buf);
|
output_string.append(buf);
|
||||||
map<string, SpacingProperties>::const_iterator spacing_map_it;
|
std::map<string, SpacingProperties>::const_iterator spacing_map_it;
|
||||||
for (spacing_map_it = spacing_map.begin();
|
for (spacing_map_it = spacing_map.begin();
|
||||||
spacing_map_it != spacing_map.end(); ++spacing_map_it) {
|
spacing_map_it != spacing_map.end(); ++spacing_map_it) {
|
||||||
snprintf(buf, kBufSize,
|
snprintf(buf, kBufSize,
|
||||||
@ -300,7 +300,7 @@ void ExtractFontProperties(const string &utf8_text,
|
|||||||
spacing_map_it->second.x_gap_after,
|
spacing_map_it->second.x_gap_after,
|
||||||
static_cast<int>(spacing_map_it->second.kerned_x_gaps.size()));
|
static_cast<int>(spacing_map_it->second.kerned_x_gaps.size()));
|
||||||
output_string.append(buf);
|
output_string.append(buf);
|
||||||
map<string, int>::const_iterator kern_it;
|
std::map<string, int>::const_iterator kern_it;
|
||||||
for (kern_it = spacing_map_it->second.kerned_x_gaps.begin();
|
for (kern_it = spacing_map_it->second.kerned_x_gaps.begin();
|
||||||
kern_it != spacing_map_it->second.kerned_x_gaps.end(); ++kern_it) {
|
kern_it != spacing_map_it->second.kerned_x_gaps.end(); ++kern_it) {
|
||||||
snprintf(buf, kBufSize,
|
snprintf(buf, kBufSize,
|
||||||
@ -313,7 +313,7 @@ void ExtractFontProperties(const string &utf8_text,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool MakeIndividualGlyphs(Pix* pix,
|
bool MakeIndividualGlyphs(Pix* pix,
|
||||||
const vector<BoxChar*>& vbox,
|
const std::vector<BoxChar*>& vbox,
|
||||||
const int input_tiff_page) {
|
const int input_tiff_page) {
|
||||||
// If checks fail, return false without exiting text2image
|
// If checks fail, return false without exiting text2image
|
||||||
if (!pix) {
|
if (!pix) {
|
||||||
@ -421,7 +421,7 @@ int main(int argc, char** argv) {
|
|||||||
tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
|
tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
|
||||||
|
|
||||||
if (FLAGS_list_available_fonts) {
|
if (FLAGS_list_available_fonts) {
|
||||||
const vector<string>& all_fonts = FontUtils::ListAvailableFonts();
|
const std::vector<string>& all_fonts = FontUtils::ListAvailableFonts();
|
||||||
for (int i = 0; i < all_fonts.size(); ++i) {
|
for (int i = 0; i < all_fonts.size(); ++i) {
|
||||||
printf("%3d: %s\n", i, all_fonts[i].c_str());
|
printf("%3d: %s\n", i, all_fonts[i].c_str());
|
||||||
ASSERT_HOST_MSG(FontUtils::IsAvailableFont(all_fonts[i].c_str()),
|
ASSERT_HOST_MSG(FontUtils::IsAvailableFont(all_fonts[i].c_str()),
|
||||||
@ -536,11 +536,11 @@ int main(int argc, char** argv) {
|
|||||||
const char *str8 = src_utf8.c_str();
|
const char *str8 = src_utf8.c_str();
|
||||||
int len = src_utf8.length();
|
int len = src_utf8.length();
|
||||||
int step;
|
int step;
|
||||||
vector<pair<int, int> > offsets;
|
std::vector<std::pair<int, int> > offsets;
|
||||||
int offset = SpanUTF8Whitespace(str8);
|
int offset = SpanUTF8Whitespace(str8);
|
||||||
while (offset < len) {
|
while (offset < len) {
|
||||||
step = SpanUTF8NotWhitespace(str8 + offset);
|
step = SpanUTF8NotWhitespace(str8 + offset);
|
||||||
offsets.push_back(make_pair(offset, step));
|
offsets.push_back(std::make_pair(offset, step));
|
||||||
offset += step;
|
offset += step;
|
||||||
offset += SpanUTF8Whitespace(str8 + offset);
|
offset += SpanUTF8Whitespace(str8 + offset);
|
||||||
}
|
}
|
||||||
@ -575,12 +575,12 @@ int main(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int im = 0;
|
int im = 0;
|
||||||
vector<float> page_rotation;
|
std::vector<float> page_rotation;
|
||||||
const char* to_render_utf8 = src_utf8.c_str();
|
const char* to_render_utf8 = src_utf8.c_str();
|
||||||
|
|
||||||
tesseract::TRand randomizer;
|
tesseract::TRand randomizer;
|
||||||
randomizer.set_seed(kRandomSeed);
|
randomizer.set_seed(kRandomSeed);
|
||||||
vector<string> font_names;
|
std::vector<string> font_names;
|
||||||
// We use a two pass mechanism to rotate images in both direction.
|
// We use a two pass mechanism to rotate images in both direction.
|
||||||
// The first pass(0) will rotate the images in random directions and
|
// The first pass(0) will rotate the images in random directions and
|
||||||
// the second pass(1) will mirror those rotations.
|
// the second pass(1) will mirror those rotations.
|
||||||
|
Loading…
Reference in New Issue
Block a user