mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-18 03:19:15 +08:00
Added the option to get the timesteps separated by the suggested segmentation
Signed-off-by: Noah Metzger <noah.metzger@bib.uni-mannheim.de>
This commit is contained in:
parent
d2c3309df9
commit
754e38d2b4
@ -130,7 +130,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
|
||||
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
|
||||
return nullptr;
|
||||
|
||||
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1;
|
||||
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, scnt = 1, tcnt = 1, gcnt = 1;
|
||||
int page_id = page_number + 1; // hOCR uses 1-based page numbers.
|
||||
bool para_is_ltr = true; // Default direction is LTR
|
||||
const char* paragraph_lang = nullptr;
|
||||
@ -215,8 +215,11 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
|
||||
// Now, process the word...
|
||||
std::vector<std::vector<std::pair<const char*, float>>>* confidencemap =
|
||||
nullptr;
|
||||
std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
|
||||
symbolMap = nullptr;
|
||||
if (tesseract_->lstm_choice_mode) {
|
||||
confidencemap = res_it->GetBestLSTMSymbolChoices();
|
||||
symbolMap = res_it->GetBestSegmentedLSTMSymbolChoices();
|
||||
}
|
||||
hocr_str << "\n <span class='ocrx_word'"
|
||||
<< " id='"
|
||||
@ -324,6 +327,38 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
|
||||
tcnt++;
|
||||
}
|
||||
}
|
||||
} else if (tesseract_->lstm_choice_mode == 3 && symbolMap != nullptr) {
|
||||
for (size_t j = 0; j < symbolMap->size(); j++) {
|
||||
std::vector<std::vector<std::pair<const char*, float>>> timesteps =
|
||||
(*symbolMap)[j];
|
||||
hocr_str << "\n <span class='ocr_symbol'"
|
||||
<< " id='"
|
||||
<< "symbolstep_" << page_id << "_" << wcnt << "_" << scnt
|
||||
<< "'>"
|
||||
<< timesteps[0][0].first;
|
||||
for (size_t i = 1; i < timesteps.size(); i++) {
|
||||
hocr_str << "\n <span class='ocrx_cinfo'"
|
||||
<< " id='"
|
||||
<< "timestep_" << page_id << "_" << wcnt << "_" << tcnt
|
||||
<< "'"
|
||||
<< ">";
|
||||
std::vector<std::pair<const char*, float>> timestep =
|
||||
timesteps[i];
|
||||
for (std::pair<const char*, float> conf : timestep) {
|
||||
hocr_str << "<span class='ocr_glyph'"
|
||||
<< " id='"
|
||||
<< "choice_" << page_id << "_" << wcnt << "_" << gcnt
|
||||
<< "'"
|
||||
<< " title='x_confs " << int(conf.second * 100) << "'>"
|
||||
<< conf.first << "</span>";
|
||||
gcnt++;
|
||||
}
|
||||
hocr_str << "</span>";
|
||||
tcnt++;
|
||||
}
|
||||
hocr_str << "</span>";
|
||||
scnt++;
|
||||
}
|
||||
}
|
||||
hocr_str << "</span>";
|
||||
tcnt = 1;
|
||||
|
@ -605,7 +605,8 @@ char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::pair<const char*, float>>>* ResultIterator::GetBestLSTMSymbolChoices() const {
|
||||
std::vector<std::vector<std::pair<const char*, float>>>*
|
||||
ResultIterator::GetBestLSTMSymbolChoices() const {
|
||||
if (it_->word() != nullptr) {
|
||||
return &it_->word()->timesteps;
|
||||
} else {
|
||||
@ -613,6 +614,15 @@ std::vector<std::vector<std::pair<const char*, float>>>* ResultIterator::GetBest
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
|
||||
ResultIterator::GetBestSegmentedLSTMSymbolChoices() const {
|
||||
if (it_->word() != nullptr) {
|
||||
return &it_->word()->symbol_steps;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void ResultIterator::AppendUTF8WordText(STRING *text) const {
|
||||
if (!it_->word()) return;
|
||||
ASSERT_HOST(it_->word()->best_choice != nullptr);
|
||||
|
@ -100,7 +100,10 @@ class TESS_API ResultIterator : public LTRResultIterator {
|
||||
/**
|
||||
* Returns the LSTM choices for every LSTM timestep for the current word.
|
||||
*/
|
||||
virtual std::vector<std::vector<std::pair<const char*, float>>>* GetBestLSTMSymbolChoices() const;
|
||||
virtual std::vector<std::vector<std::pair<const char*, float>>>*
|
||||
GetBestLSTMSymbolChoices() const;
|
||||
virtual std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
|
||||
GetBestSegmentedLSTMSymbolChoices() const;
|
||||
|
||||
/**
|
||||
* Return whether the current paragraph's dominant reading direction
|
||||
|
@ -526,7 +526,9 @@ Tesseract::Tesseract()
|
||||
"Allows to include alternative symbols choices in the hOCR output. "
|
||||
"Valid input values are 0, 1 and 2. 0 is the default value. "
|
||||
"With 1 the alternative symbol choices per timestep are included. "
|
||||
"With 2 the alternative symbol choices are accumulated per character.",
|
||||
"With 2 the alternative symbol choices are accumulated per character."
|
||||
"With 3 the alternative symbol choices per timestep are included and "
|
||||
"separated by the suggested segmentation of Tesseract",
|
||||
this->params()),
|
||||
|
||||
backup_config_file_(nullptr),
|
||||
|
@ -1127,7 +1127,9 @@ class Tesseract : public Wordrec {
|
||||
"Allows to include alternative symbols choices in the hOCR output. "
|
||||
"Valid input values are 0, 1 and 2. 0 is the default value. "
|
||||
"With 1 the alternative symbol choices per timestep are included. "
|
||||
"With 2 the alternative symbol choices are accumulated per character.");
|
||||
"With 2 the alternative symbol choices are accumulated per character."
|
||||
"With 3 the alternative symbol choices per timestep are included and "
|
||||
"separated by the suggested segmentation of Tesseract");
|
||||
|
||||
//// ambigsrecog.cpp /////////////////////////////////////////////////////////
|
||||
FILE *init_recog_training(const STRING &fname);
|
||||
|
@ -222,6 +222,8 @@ class WERD_RES : public ELIST_LINK {
|
||||
GenericVector<int> blob_gaps;
|
||||
// Stores the lstm choices of every timestep
|
||||
std::vector<std::vector<std::pair<const char*, float>>> timesteps;
|
||||
std::vector<std::vector<std::vector<std::pair<const char*, float>>>>
|
||||
symbol_steps;
|
||||
// Ratings matrix contains classifier choices for each classified combination
|
||||
// of blobs. The dimension is the same as the number of blobs in chopped_word
|
||||
// and the leading diagonal corresponds to classifier results of the blobs
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <deque>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include <algorithm>
|
||||
@ -187,7 +188,7 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
|
||||
GenericVector<int> xcoords;
|
||||
GenericVector<const RecodeNode*> best_nodes;
|
||||
GenericVector<const RecodeNode*> second_nodes;
|
||||
std::deque<std::pair<int,int>> best_choices;
|
||||
std::deque<std::tuple<int, int, double>> best_choices;
|
||||
ExtractBestPaths(&best_nodes, &second_nodes);
|
||||
if (debug) {
|
||||
DebugPath(unicharset, best_nodes);
|
||||
@ -201,12 +202,13 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
|
||||
int timestepEnd = 0;
|
||||
//if lstm choice mode is required in granularity level 2 it stores the x
|
||||
//Coordinates of every chosen character to match the alternative choices to it
|
||||
if (lstm_choice_mode == 2) {
|
||||
if (lstm_choice_mode == 2 || lstm_choice_mode == 3) {
|
||||
ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
|
||||
&xcoords, &best_choices);
|
||||
if (best_choices.size() > 0) {
|
||||
current_char = best_choices.front().first;
|
||||
timestepEnd = best_choices.front().second;
|
||||
current_char = std::get<0>(best_choices.front());
|
||||
timestepEnd = std::get<1>(best_choices.front());
|
||||
if(lstm_choice_mode == 2)
|
||||
best_choices.pop_front();
|
||||
}
|
||||
} else {
|
||||
@ -258,7 +260,7 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
|
||||
choice_pairs.push_back(choice);
|
||||
}
|
||||
}
|
||||
if ((best_choices.size() > 0 && i == best_choices.front().second - 1)
|
||||
if ((best_choices.size() > 0 && i == std::get<1>(best_choices.front()) - 1)
|
||||
|| i == xcoords[word_end]-1) {
|
||||
std::map<const char*, float> summed_propabilities;
|
||||
for (auto it = choice_pairs.begin(); it != choice_pairs.end(); ++it) {
|
||||
@ -283,7 +285,7 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
|
||||
it->second));
|
||||
}
|
||||
if (best_choices.size() > 0) {
|
||||
current_char = best_choices.front().first;
|
||||
current_char = std::get<0>(best_choices.front());
|
||||
best_choices.pop_front();
|
||||
}
|
||||
choice_pairs.clear();
|
||||
@ -292,6 +294,25 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
|
||||
}
|
||||
}
|
||||
timestepEnd = xcoords[word_end];
|
||||
} else if (lstm_choice_mode == 3) {
|
||||
std::vector<std::vector<std::pair<const char*, float>>> currentSymbol;
|
||||
for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
|
||||
if (i == std::get<1>(best_choices.front())) {
|
||||
if (currentSymbol.size() > 0) {
|
||||
word_res->symbol_steps.push_back(currentSymbol);
|
||||
currentSymbol.clear();
|
||||
}
|
||||
std::vector<std::pair<const char*, float>> choice_Header;
|
||||
choice_Header.push_back(std::pair<const char*, float>(
|
||||
unicharset->id_to_unichar_ext(std::get<0>(best_choices.front())),
|
||||
2.0));
|
||||
currentSymbol.push_back(choice_Header);
|
||||
if(best_choices.size()>1) best_choices.pop_front();
|
||||
}
|
||||
currentSymbol.push_back(timesteps[i]);
|
||||
}
|
||||
word_res->symbol_steps.push_back(currentSymbol);
|
||||
timestepEnd = xcoords[word_end];
|
||||
}
|
||||
for (int i = word_start; i < word_end; ++i) {
|
||||
BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
|
||||
@ -366,7 +387,7 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
|
||||
const GenericVector<const RecodeNode*>& best_nodes,
|
||||
GenericVector<int>* unichar_ids, GenericVector<float>* certs,
|
||||
GenericVector<float>* ratings, GenericVector<int>* xcoords,
|
||||
std::deque<std::pair<int, int>>* best_choices) {
|
||||
std::deque<std::tuple<int, int, double>>* best_choices) {
|
||||
unichar_ids->truncate(0);
|
||||
certs->truncate(0);
|
||||
ratings->truncate(0);
|
||||
@ -375,6 +396,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
|
||||
int t = 0;
|
||||
int width = best_nodes.size();
|
||||
while (t < width) {
|
||||
int id;
|
||||
int tposition;
|
||||
double certainty = 0.0;
|
||||
double rating = 0.0;
|
||||
while (t < width && best_nodes[t]->unichar_id == INVALID_UNICHAR_ID) {
|
||||
@ -396,7 +419,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
|
||||
unichar_ids->push_back(unichar_id);
|
||||
xcoords->push_back(t);
|
||||
if (best_choices != nullptr) {
|
||||
best_choices->push_back(std::pair<int, int>(unichar_id, t));
|
||||
tposition = t;
|
||||
id = unichar_id;
|
||||
}
|
||||
do {
|
||||
double cert = best_nodes[t++]->certainty;
|
||||
@ -414,6 +438,10 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
|
||||
if (certainty < certs->back()) certs->back() = certainty;
|
||||
ratings->back() += rating;
|
||||
}
|
||||
if (best_choices != nullptr) {
|
||||
best_choices->push_back(
|
||||
std::tuple<int, int, double>(id, tposition, rating));
|
||||
}
|
||||
}
|
||||
xcoords->push_back(width);
|
||||
}
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "unicharcompress.h"
|
||||
#include <deque>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
namespace tesseract {
|
||||
@ -281,7 +282,7 @@ class RecodeBeamSearch {
|
||||
const GenericVector<const RecodeNode*>& best_nodes,
|
||||
GenericVector<int>* unichar_ids, GenericVector<float>* certs,
|
||||
GenericVector<float>* ratings, GenericVector<int>* xcoords,
|
||||
std::deque<std::pair<int,int>>* best_choices = nullptr);
|
||||
std::deque<std::tuple<int,int,double>>* best_choices = nullptr);
|
||||
|
||||
// Sets up a word with the ratings matrix and fake blobs with boxes in the
|
||||
// right places.
|
||||
|
Loading…
Reference in New Issue
Block a user