mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-30 15:39:05 +08:00
commit
c1180a8bc0
@ -772,6 +772,8 @@ void LSTM::CountAlternators(const Network &other, TFloat *same, TFloat *changed)
|
||||
}
|
||||
}
|
||||
|
||||
#if DEBUG_DETAIL > 3
|
||||
|
||||
// Prints the weights for debug purposes.
|
||||
void LSTM::PrintW() {
|
||||
tprintf("Weight state:%s\n", name_.c_str());
|
||||
@ -834,6 +836,8 @@ void LSTM::PrintDW() {
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Resizes forward data to cope with an input image of the given width.
|
||||
void LSTM::ResizeForward(const NetworkIO &input) {
|
||||
int rounded_inputs = gate_weights_[CI].RoundInputs(na_);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/**********************************************************************
|
||||
* File: drawfx.cpp (Formerly drawfx.c)
|
||||
* File: drawfx.cpp
|
||||
* Description: Draw things to do with feature extraction.
|
||||
* Author: Ray Smith
|
||||
*
|
||||
@ -40,7 +40,6 @@ namespace tesseract {
|
||||
# define DEBUG_WIN_NAME "FXDebug"
|
||||
|
||||
ScrollView *fx_win = nullptr;
|
||||
FILE *fx_debug = nullptr;
|
||||
|
||||
/**********************************************************************
|
||||
* create_fx_win
|
||||
|
@ -1,5 +1,5 @@
|
||||
/**********************************************************************
|
||||
* File: drawfx.h (Formerly drawfx.h)
|
||||
* File: drawfx.h
|
||||
* Description: Draw things to do with feature extraction.
|
||||
* Author: Ray Smith
|
||||
*
|
||||
@ -27,7 +27,6 @@ namespace tesseract {
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
extern ScrollView *fx_win;
|
||||
#endif // !GRAPHICS_DISABLED
|
||||
extern FILE *fx_debug;
|
||||
void create_fx_win(); // make features win
|
||||
void clear_fx_win(); // make features win
|
||||
void create_fxdebug_win(); // make gradients win
|
||||
|
@ -1,6 +1,6 @@
|
||||
/******************************************************************************
|
||||
*
|
||||
* File: pieces.cpp (Formerly pieces.c)
|
||||
* File: pieces.cpp
|
||||
* Description:
|
||||
* Author: Mark Seaman, OCR Technology
|
||||
*
|
||||
@ -86,239 +86,4 @@ int SortByRating(const void *void1, const void *void2) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* fill_filtered_fragment_list
|
||||
*
|
||||
* Filter the fragment list so that the filtered_choices only contain
|
||||
* fragments that are in the correct position. choices is the list
|
||||
* that we are going to filter. fragment_pos is the position in the
|
||||
* fragment that we are looking for and num_frag_parts is the the
|
||||
* total number of pieces. The result will be appended to
|
||||
* filtered_choices.
|
||||
**********************************************************************/
|
||||
void Wordrec::fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, int fragment_pos,
|
||||
int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices) {
|
||||
BLOB_CHOICE_IT filtered_choices_it(filtered_choices);
|
||||
BLOB_CHOICE_IT choices_it(choices);
|
||||
|
||||
for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) {
|
||||
UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id();
|
||||
const CHAR_FRAGMENT *frag = unicharset.get_fragment(choice_unichar_id);
|
||||
|
||||
if (frag != nullptr && frag->get_pos() == fragment_pos && frag->get_total() == num_frag_parts) {
|
||||
// Recover the unichar_id of the unichar that this fragment is
|
||||
// a part of
|
||||
auto *b = new BLOB_CHOICE(*choices_it.data());
|
||||
int original_unichar = unicharset.unichar_to_id(frag->get_unichar());
|
||||
b->set_unichar_id(original_unichar);
|
||||
filtered_choices_it.add_to_end(b);
|
||||
}
|
||||
}
|
||||
|
||||
filtered_choices->sort(SortByUnicharID<BLOB_CHOICE>);
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* merge_and_put_fragment_lists
|
||||
*
|
||||
* Merge the fragment lists in choice_lists and append it to the
|
||||
* ratings matrix.
|
||||
**********************************************************************/
|
||||
void Wordrec::merge_and_put_fragment_lists(int16_t row, int16_t column, int16_t num_frag_parts,
|
||||
BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings) {
|
||||
auto *choice_lists_it = new BLOB_CHOICE_IT[num_frag_parts];
|
||||
|
||||
for (int i = 0; i < num_frag_parts; i++) {
|
||||
choice_lists_it[i].set_to_list(&choice_lists[i]);
|
||||
choice_lists_it[i].mark_cycle_pt();
|
||||
}
|
||||
|
||||
BLOB_CHOICE_LIST *merged_choice = ratings->get(row, column);
|
||||
if (merged_choice == nullptr) {
|
||||
merged_choice = new BLOB_CHOICE_LIST;
|
||||
}
|
||||
|
||||
bool end_of_list = false;
|
||||
BLOB_CHOICE_IT merged_choice_it(merged_choice);
|
||||
while (!end_of_list) {
|
||||
// Find the maximum unichar_id of the current entry the iterators
|
||||
// are pointing at
|
||||
UNICHAR_ID max_unichar_id = choice_lists_it[0].data()->unichar_id();
|
||||
for (int i = 0; i < num_frag_parts; i++) {
|
||||
UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
|
||||
if (max_unichar_id < unichar_id) {
|
||||
max_unichar_id = unichar_id;
|
||||
}
|
||||
}
|
||||
|
||||
// Move the each iterators until it gets to an entry that has a
|
||||
// value greater than or equal to max_unichar_id
|
||||
for (int i = 0; i < num_frag_parts; i++) {
|
||||
UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
|
||||
while (!choice_lists_it[i].cycled_list() && unichar_id < max_unichar_id) {
|
||||
choice_lists_it[i].forward();
|
||||
unichar_id = choice_lists_it[i].data()->unichar_id();
|
||||
}
|
||||
if (choice_lists_it[i].cycled_list()) {
|
||||
end_of_list = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (end_of_list) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Checks if the fragments are parts of the same character
|
||||
UNICHAR_ID first_unichar_id = choice_lists_it[0].data()->unichar_id();
|
||||
bool same_unichar = true;
|
||||
for (int i = 1; i < num_frag_parts; i++) {
|
||||
UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
|
||||
if (unichar_id != first_unichar_id) {
|
||||
same_unichar = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (same_unichar) {
|
||||
// Add the merged character to the result
|
||||
UNICHAR_ID merged_unichar_id = first_unichar_id;
|
||||
auto merged_fonts = choice_lists_it[0].data()->fonts();
|
||||
float merged_min_xheight = choice_lists_it[0].data()->min_xheight();
|
||||
float merged_max_xheight = choice_lists_it[0].data()->max_xheight();
|
||||
float positive_yshift = 0, negative_yshift = 0;
|
||||
int merged_script_id = choice_lists_it[0].data()->script_id();
|
||||
BlobChoiceClassifier classifier = choice_lists_it[0].data()->classifier();
|
||||
|
||||
float merged_rating = 0, merged_certainty = 0;
|
||||
for (int i = 0; i < num_frag_parts; i++) {
|
||||
float rating = choice_lists_it[i].data()->rating();
|
||||
float certainty = choice_lists_it[i].data()->certainty();
|
||||
|
||||
if (i == 0 || certainty < merged_certainty) {
|
||||
merged_certainty = certainty;
|
||||
}
|
||||
merged_rating += rating;
|
||||
|
||||
choice_lists_it[i].forward();
|
||||
if (choice_lists_it[i].cycled_list()) {
|
||||
end_of_list = true;
|
||||
}
|
||||
IntersectRange(choice_lists_it[i].data()->min_xheight(),
|
||||
choice_lists_it[i].data()->max_xheight(), &merged_min_xheight,
|
||||
&merged_max_xheight);
|
||||
float yshift = choice_lists_it[i].data()->yshift();
|
||||
if (yshift > positive_yshift) {
|
||||
positive_yshift = yshift;
|
||||
}
|
||||
if (yshift < negative_yshift) {
|
||||
negative_yshift = yshift;
|
||||
}
|
||||
// Use the min font rating over the parts.
|
||||
// TODO(rays) font lists are unsorted. Need to be faster?
|
||||
const auto &frag_fonts = choice_lists_it[i].data()->fonts();
|
||||
for (auto frag_font : frag_fonts) {
|
||||
int merged_f = 0;
|
||||
for (; merged_f < merged_fonts.size() &&
|
||||
merged_fonts[merged_f].fontinfo_id != frag_font.fontinfo_id;
|
||||
++merged_f) {
|
||||
}
|
||||
if (merged_f == merged_fonts.size()) {
|
||||
merged_fonts.push_back(frag_font);
|
||||
} else if (merged_fonts[merged_f].score > frag_font.score) {
|
||||
merged_fonts[merged_f].score = frag_font.score;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float merged_yshift =
|
||||
positive_yshift != 0 ? (negative_yshift != 0 ? 0 : positive_yshift) : negative_yshift;
|
||||
auto *choice =
|
||||
new BLOB_CHOICE(merged_unichar_id, merged_rating, merged_certainty, merged_script_id,
|
||||
merged_min_xheight, merged_max_xheight, merged_yshift, classifier);
|
||||
choice->set_fonts(merged_fonts);
|
||||
merged_choice_it.add_to_end(choice);
|
||||
}
|
||||
}
|
||||
|
||||
if (classify_debug_level) {
|
||||
print_ratings_list("Merged Fragments", merged_choice, unicharset);
|
||||
}
|
||||
|
||||
if (merged_choice->empty()) {
|
||||
delete merged_choice;
|
||||
} else {
|
||||
ratings->put(row, column, merged_choice);
|
||||
}
|
||||
|
||||
delete[] choice_lists_it;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* get_fragment_lists
|
||||
*
|
||||
* Recursively go through the ratings matrix to find lists of fragments
|
||||
* to be merged in the function merge_and_put_fragment_lists.
|
||||
* current_frag is the position of the piece we are looking for.
|
||||
* current_row is the row in the rating matrix we are currently at.
|
||||
* start is the row we started initially, so that we can know where
|
||||
* to append the results to the matrix. num_frag_parts is the total
|
||||
* number of pieces we are looking for and num_blobs is the size of the
|
||||
* ratings matrix.
|
||||
**********************************************************************/
|
||||
void Wordrec::get_fragment_lists(int16_t current_frag, int16_t current_row, int16_t start,
|
||||
int16_t num_frag_parts, int16_t num_blobs, MATRIX *ratings,
|
||||
BLOB_CHOICE_LIST *choice_lists) {
|
||||
if (current_frag == num_frag_parts) {
|
||||
merge_and_put_fragment_lists(start, current_row - 1, num_frag_parts, choice_lists, ratings);
|
||||
return;
|
||||
}
|
||||
|
||||
for (int16_t x = current_row; x < num_blobs; x++) {
|
||||
BLOB_CHOICE_LIST *choices = ratings->get(current_row, x);
|
||||
if (choices == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
fill_filtered_fragment_list(choices, current_frag, num_frag_parts, &choice_lists[current_frag]);
|
||||
if (!choice_lists[current_frag].empty()) {
|
||||
get_fragment_lists(current_frag + 1, x + 1, start, num_frag_parts, num_blobs, ratings,
|
||||
choice_lists);
|
||||
choice_lists[current_frag].clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
* merge_fragments
|
||||
*
|
||||
* Try to merge fragments in the ratings matrix and put the result in
|
||||
* the corresponding row and column
|
||||
**********************************************************************/
|
||||
void Wordrec::merge_fragments(MATRIX *ratings, int16_t num_blobs) {
|
||||
BLOB_CHOICE_LIST choice_lists[CHAR_FRAGMENT::kMaxChunks];
|
||||
for (int16_t start = 0; start < num_blobs; start++) {
|
||||
for (int frag_parts = 2; frag_parts <= CHAR_FRAGMENT::kMaxChunks; frag_parts++) {
|
||||
get_fragment_lists(0, start, start, frag_parts, num_blobs, ratings, choice_lists);
|
||||
}
|
||||
}
|
||||
|
||||
// Delete fragments from the rating matrix
|
||||
for (int16_t x = 0; x < num_blobs; x++) {
|
||||
for (int16_t y = x; y < num_blobs; y++) {
|
||||
BLOB_CHOICE_LIST *choices = ratings->get(x, y);
|
||||
if (choices != nullptr) {
|
||||
BLOB_CHOICE_IT choices_it(choices);
|
||||
for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) {
|
||||
UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id();
|
||||
const CHAR_FRAGMENT *frag = unicharset.get_fragment(choice_unichar_id);
|
||||
if (frag != nullptr) {
|
||||
delete choices_it.extract();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -30,12 +30,6 @@
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
void Wordrec::DoSegSearch(WERD_RES *word_res) {
|
||||
BestChoiceBundle best_choice_bundle(word_res->ratings->dimension());
|
||||
// Run Segmentation Search.
|
||||
SegSearch(word_res, &best_choice_bundle, nullptr);
|
||||
}
|
||||
|
||||
void Wordrec::SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle,
|
||||
BlamerBundle *blamer_bundle) {
|
||||
LMPainPoints pain_points(segsearch_max_pain_points, segsearch_max_char_wh_ratio,
|
||||
|
@ -318,10 +318,6 @@ public:
|
||||
std::vector<SegSearchPending> *pending,
|
||||
BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle);
|
||||
|
||||
// Runs SegSearch() function (above) without needing a best_choice_bundle
|
||||
// or blamer_bundle. Used for testing.
|
||||
void DoSegSearch(WERD_RES *word_res);
|
||||
|
||||
// chop.cpp
|
||||
PRIORITY point_priority(EDGEPT *point);
|
||||
void add_point_to_list(PointHeap *point_heap, EDGEPT *point);
|
||||
@ -380,32 +376,6 @@ public:
|
||||
virtual BLOB_CHOICE_LIST *classify_piece(const std::vector<SEAM *> &seams, int16_t start,
|
||||
int16_t end, const char *description, TWERD *word,
|
||||
BlamerBundle *blamer_bundle);
|
||||
// Try to merge fragments in the ratings matrix and put the result in
|
||||
// the corresponding row and column
|
||||
void merge_fragments(MATRIX *ratings, int16_t num_blobs);
|
||||
// Recursively go through the ratings matrix to find lists of fragments
|
||||
// to be merged in the function merge_and_put_fragment_lists.
|
||||
// current_frag is the position of the piece we are looking for.
|
||||
// current_row is the row in the rating matrix we are currently at.
|
||||
// start is the row we started initially, so that we can know where
|
||||
// to append the results to the matrix. num_frag_parts is the total
|
||||
// number of pieces we are looking for and num_blobs is the size of the
|
||||
// ratings matrix.
|
||||
void get_fragment_lists(int16_t current_frag, int16_t current_row, int16_t start,
|
||||
int16_t num_frag_parts, int16_t num_blobs, MATRIX *ratings,
|
||||
BLOB_CHOICE_LIST *choice_lists);
|
||||
// Merge the fragment lists in choice_lists and append it to the
|
||||
// ratings matrix
|
||||
void merge_and_put_fragment_lists(int16_t row, int16_t column, int16_t num_frag_parts,
|
||||
BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings);
|
||||
// Filter the fragment list so that the filtered_choices only contain
|
||||
// fragments that are in the correct position. choices is the list
|
||||
// that we are going to filter. fragment_pos is the position in the
|
||||
// fragment that we are looking for and num_frag_parts is the the
|
||||
// total number of pieces. The result will be appended to
|
||||
// filtered_choices.
|
||||
void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts,
|
||||
BLOB_CHOICE_LIST *filtered_choices);
|
||||
|
||||
// Member variables.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user