mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-30 23:49:05 +08:00
commit
c1180a8bc0
@ -772,6 +772,8 @@ void LSTM::CountAlternators(const Network &other, TFloat *same, TFloat *changed)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if DEBUG_DETAIL > 3
|
||||||
|
|
||||||
// Prints the weights for debug purposes.
|
// Prints the weights for debug purposes.
|
||||||
void LSTM::PrintW() {
|
void LSTM::PrintW() {
|
||||||
tprintf("Weight state:%s\n", name_.c_str());
|
tprintf("Weight state:%s\n", name_.c_str());
|
||||||
@ -834,6 +836,8 @@ void LSTM::PrintDW() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
// Resizes forward data to cope with an input image of the given width.
|
// Resizes forward data to cope with an input image of the given width.
|
||||||
void LSTM::ResizeForward(const NetworkIO &input) {
|
void LSTM::ResizeForward(const NetworkIO &input) {
|
||||||
int rounded_inputs = gate_weights_[CI].RoundInputs(na_);
|
int rounded_inputs = gate_weights_[CI].RoundInputs(na_);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/**********************************************************************
|
/**********************************************************************
|
||||||
* File: drawfx.cpp (Formerly drawfx.c)
|
* File: drawfx.cpp
|
||||||
* Description: Draw things to do with feature extraction.
|
* Description: Draw things to do with feature extraction.
|
||||||
* Author: Ray Smith
|
* Author: Ray Smith
|
||||||
*
|
*
|
||||||
@ -40,7 +40,6 @@ namespace tesseract {
|
|||||||
# define DEBUG_WIN_NAME "FXDebug"
|
# define DEBUG_WIN_NAME "FXDebug"
|
||||||
|
|
||||||
ScrollView *fx_win = nullptr;
|
ScrollView *fx_win = nullptr;
|
||||||
FILE *fx_debug = nullptr;
|
|
||||||
|
|
||||||
/**********************************************************************
|
/**********************************************************************
|
||||||
* create_fx_win
|
* create_fx_win
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/**********************************************************************
|
/**********************************************************************
|
||||||
* File: drawfx.h (Formerly drawfx.h)
|
* File: drawfx.h
|
||||||
* Description: Draw things to do with feature extraction.
|
* Description: Draw things to do with feature extraction.
|
||||||
* Author: Ray Smith
|
* Author: Ray Smith
|
||||||
*
|
*
|
||||||
@ -27,7 +27,6 @@ namespace tesseract {
|
|||||||
#ifndef GRAPHICS_DISABLED
|
#ifndef GRAPHICS_DISABLED
|
||||||
extern ScrollView *fx_win;
|
extern ScrollView *fx_win;
|
||||||
#endif // !GRAPHICS_DISABLED
|
#endif // !GRAPHICS_DISABLED
|
||||||
extern FILE *fx_debug;
|
|
||||||
void create_fx_win(); // make features win
|
void create_fx_win(); // make features win
|
||||||
void clear_fx_win(); // make features win
|
void clear_fx_win(); // make features win
|
||||||
void create_fxdebug_win(); // make gradients win
|
void create_fxdebug_win(); // make gradients win
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
*
|
*
|
||||||
* File: pieces.cpp (Formerly pieces.c)
|
* File: pieces.cpp
|
||||||
* Description:
|
* Description:
|
||||||
* Author: Mark Seaman, OCR Technology
|
* Author: Mark Seaman, OCR Technology
|
||||||
*
|
*
|
||||||
@ -86,239 +86,4 @@ int SortByRating(const void *void1, const void *void2) {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**********************************************************************
|
|
||||||
* fill_filtered_fragment_list
|
|
||||||
*
|
|
||||||
* Filter the fragment list so that the filtered_choices only contain
|
|
||||||
* fragments that are in the correct position. choices is the list
|
|
||||||
* that we are going to filter. fragment_pos is the position in the
|
|
||||||
* fragment that we are looking for and num_frag_parts is the the
|
|
||||||
* total number of pieces. The result will be appended to
|
|
||||||
* filtered_choices.
|
|
||||||
**********************************************************************/
|
|
||||||
void Wordrec::fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, int fragment_pos,
|
|
||||||
int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices) {
|
|
||||||
BLOB_CHOICE_IT filtered_choices_it(filtered_choices);
|
|
||||||
BLOB_CHOICE_IT choices_it(choices);
|
|
||||||
|
|
||||||
for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) {
|
|
||||||
UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id();
|
|
||||||
const CHAR_FRAGMENT *frag = unicharset.get_fragment(choice_unichar_id);
|
|
||||||
|
|
||||||
if (frag != nullptr && frag->get_pos() == fragment_pos && frag->get_total() == num_frag_parts) {
|
|
||||||
// Recover the unichar_id of the unichar that this fragment is
|
|
||||||
// a part of
|
|
||||||
auto *b = new BLOB_CHOICE(*choices_it.data());
|
|
||||||
int original_unichar = unicharset.unichar_to_id(frag->get_unichar());
|
|
||||||
b->set_unichar_id(original_unichar);
|
|
||||||
filtered_choices_it.add_to_end(b);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
filtered_choices->sort(SortByUnicharID<BLOB_CHOICE>);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**********************************************************************
|
|
||||||
* merge_and_put_fragment_lists
|
|
||||||
*
|
|
||||||
* Merge the fragment lists in choice_lists and append it to the
|
|
||||||
* ratings matrix.
|
|
||||||
**********************************************************************/
|
|
||||||
void Wordrec::merge_and_put_fragment_lists(int16_t row, int16_t column, int16_t num_frag_parts,
|
|
||||||
BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings) {
|
|
||||||
auto *choice_lists_it = new BLOB_CHOICE_IT[num_frag_parts];
|
|
||||||
|
|
||||||
for (int i = 0; i < num_frag_parts; i++) {
|
|
||||||
choice_lists_it[i].set_to_list(&choice_lists[i]);
|
|
||||||
choice_lists_it[i].mark_cycle_pt();
|
|
||||||
}
|
|
||||||
|
|
||||||
BLOB_CHOICE_LIST *merged_choice = ratings->get(row, column);
|
|
||||||
if (merged_choice == nullptr) {
|
|
||||||
merged_choice = new BLOB_CHOICE_LIST;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool end_of_list = false;
|
|
||||||
BLOB_CHOICE_IT merged_choice_it(merged_choice);
|
|
||||||
while (!end_of_list) {
|
|
||||||
// Find the maximum unichar_id of the current entry the iterators
|
|
||||||
// are pointing at
|
|
||||||
UNICHAR_ID max_unichar_id = choice_lists_it[0].data()->unichar_id();
|
|
||||||
for (int i = 0; i < num_frag_parts; i++) {
|
|
||||||
UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
|
|
||||||
if (max_unichar_id < unichar_id) {
|
|
||||||
max_unichar_id = unichar_id;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move the each iterators until it gets to an entry that has a
|
|
||||||
// value greater than or equal to max_unichar_id
|
|
||||||
for (int i = 0; i < num_frag_parts; i++) {
|
|
||||||
UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
|
|
||||||
while (!choice_lists_it[i].cycled_list() && unichar_id < max_unichar_id) {
|
|
||||||
choice_lists_it[i].forward();
|
|
||||||
unichar_id = choice_lists_it[i].data()->unichar_id();
|
|
||||||
}
|
|
||||||
if (choice_lists_it[i].cycled_list()) {
|
|
||||||
end_of_list = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (end_of_list) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Checks if the fragments are parts of the same character
|
|
||||||
UNICHAR_ID first_unichar_id = choice_lists_it[0].data()->unichar_id();
|
|
||||||
bool same_unichar = true;
|
|
||||||
for (int i = 1; i < num_frag_parts; i++) {
|
|
||||||
UNICHAR_ID unichar_id = choice_lists_it[i].data()->unichar_id();
|
|
||||||
if (unichar_id != first_unichar_id) {
|
|
||||||
same_unichar = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (same_unichar) {
|
|
||||||
// Add the merged character to the result
|
|
||||||
UNICHAR_ID merged_unichar_id = first_unichar_id;
|
|
||||||
auto merged_fonts = choice_lists_it[0].data()->fonts();
|
|
||||||
float merged_min_xheight = choice_lists_it[0].data()->min_xheight();
|
|
||||||
float merged_max_xheight = choice_lists_it[0].data()->max_xheight();
|
|
||||||
float positive_yshift = 0, negative_yshift = 0;
|
|
||||||
int merged_script_id = choice_lists_it[0].data()->script_id();
|
|
||||||
BlobChoiceClassifier classifier = choice_lists_it[0].data()->classifier();
|
|
||||||
|
|
||||||
float merged_rating = 0, merged_certainty = 0;
|
|
||||||
for (int i = 0; i < num_frag_parts; i++) {
|
|
||||||
float rating = choice_lists_it[i].data()->rating();
|
|
||||||
float certainty = choice_lists_it[i].data()->certainty();
|
|
||||||
|
|
||||||
if (i == 0 || certainty < merged_certainty) {
|
|
||||||
merged_certainty = certainty;
|
|
||||||
}
|
|
||||||
merged_rating += rating;
|
|
||||||
|
|
||||||
choice_lists_it[i].forward();
|
|
||||||
if (choice_lists_it[i].cycled_list()) {
|
|
||||||
end_of_list = true;
|
|
||||||
}
|
|
||||||
IntersectRange(choice_lists_it[i].data()->min_xheight(),
|
|
||||||
choice_lists_it[i].data()->max_xheight(), &merged_min_xheight,
|
|
||||||
&merged_max_xheight);
|
|
||||||
float yshift = choice_lists_it[i].data()->yshift();
|
|
||||||
if (yshift > positive_yshift) {
|
|
||||||
positive_yshift = yshift;
|
|
||||||
}
|
|
||||||
if (yshift < negative_yshift) {
|
|
||||||
negative_yshift = yshift;
|
|
||||||
}
|
|
||||||
// Use the min font rating over the parts.
|
|
||||||
// TODO(rays) font lists are unsorted. Need to be faster?
|
|
||||||
const auto &frag_fonts = choice_lists_it[i].data()->fonts();
|
|
||||||
for (auto frag_font : frag_fonts) {
|
|
||||||
int merged_f = 0;
|
|
||||||
for (; merged_f < merged_fonts.size() &&
|
|
||||||
merged_fonts[merged_f].fontinfo_id != frag_font.fontinfo_id;
|
|
||||||
++merged_f) {
|
|
||||||
}
|
|
||||||
if (merged_f == merged_fonts.size()) {
|
|
||||||
merged_fonts.push_back(frag_font);
|
|
||||||
} else if (merged_fonts[merged_f].score > frag_font.score) {
|
|
||||||
merged_fonts[merged_f].score = frag_font.score;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
float merged_yshift =
|
|
||||||
positive_yshift != 0 ? (negative_yshift != 0 ? 0 : positive_yshift) : negative_yshift;
|
|
||||||
auto *choice =
|
|
||||||
new BLOB_CHOICE(merged_unichar_id, merged_rating, merged_certainty, merged_script_id,
|
|
||||||
merged_min_xheight, merged_max_xheight, merged_yshift, classifier);
|
|
||||||
choice->set_fonts(merged_fonts);
|
|
||||||
merged_choice_it.add_to_end(choice);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (classify_debug_level) {
|
|
||||||
print_ratings_list("Merged Fragments", merged_choice, unicharset);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (merged_choice->empty()) {
|
|
||||||
delete merged_choice;
|
|
||||||
} else {
|
|
||||||
ratings->put(row, column, merged_choice);
|
|
||||||
}
|
|
||||||
|
|
||||||
delete[] choice_lists_it;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**********************************************************************
|
|
||||||
* get_fragment_lists
|
|
||||||
*
|
|
||||||
* Recursively go through the ratings matrix to find lists of fragments
|
|
||||||
* to be merged in the function merge_and_put_fragment_lists.
|
|
||||||
* current_frag is the position of the piece we are looking for.
|
|
||||||
* current_row is the row in the rating matrix we are currently at.
|
|
||||||
* start is the row we started initially, so that we can know where
|
|
||||||
* to append the results to the matrix. num_frag_parts is the total
|
|
||||||
* number of pieces we are looking for and num_blobs is the size of the
|
|
||||||
* ratings matrix.
|
|
||||||
**********************************************************************/
|
|
||||||
void Wordrec::get_fragment_lists(int16_t current_frag, int16_t current_row, int16_t start,
|
|
||||||
int16_t num_frag_parts, int16_t num_blobs, MATRIX *ratings,
|
|
||||||
BLOB_CHOICE_LIST *choice_lists) {
|
|
||||||
if (current_frag == num_frag_parts) {
|
|
||||||
merge_and_put_fragment_lists(start, current_row - 1, num_frag_parts, choice_lists, ratings);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int16_t x = current_row; x < num_blobs; x++) {
|
|
||||||
BLOB_CHOICE_LIST *choices = ratings->get(current_row, x);
|
|
||||||
if (choices == nullptr) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
fill_filtered_fragment_list(choices, current_frag, num_frag_parts, &choice_lists[current_frag]);
|
|
||||||
if (!choice_lists[current_frag].empty()) {
|
|
||||||
get_fragment_lists(current_frag + 1, x + 1, start, num_frag_parts, num_blobs, ratings,
|
|
||||||
choice_lists);
|
|
||||||
choice_lists[current_frag].clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**********************************************************************
|
|
||||||
* merge_fragments
|
|
||||||
*
|
|
||||||
* Try to merge fragments in the ratings matrix and put the result in
|
|
||||||
* the corresponding row and column
|
|
||||||
**********************************************************************/
|
|
||||||
void Wordrec::merge_fragments(MATRIX *ratings, int16_t num_blobs) {
|
|
||||||
BLOB_CHOICE_LIST choice_lists[CHAR_FRAGMENT::kMaxChunks];
|
|
||||||
for (int16_t start = 0; start < num_blobs; start++) {
|
|
||||||
for (int frag_parts = 2; frag_parts <= CHAR_FRAGMENT::kMaxChunks; frag_parts++) {
|
|
||||||
get_fragment_lists(0, start, start, frag_parts, num_blobs, ratings, choice_lists);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete fragments from the rating matrix
|
|
||||||
for (int16_t x = 0; x < num_blobs; x++) {
|
|
||||||
for (int16_t y = x; y < num_blobs; y++) {
|
|
||||||
BLOB_CHOICE_LIST *choices = ratings->get(x, y);
|
|
||||||
if (choices != nullptr) {
|
|
||||||
BLOB_CHOICE_IT choices_it(choices);
|
|
||||||
for (choices_it.mark_cycle_pt(); !choices_it.cycled_list(); choices_it.forward()) {
|
|
||||||
UNICHAR_ID choice_unichar_id = choices_it.data()->unichar_id();
|
|
||||||
const CHAR_FRAGMENT *frag = unicharset.get_fragment(choice_unichar_id);
|
|
||||||
if (frag != nullptr) {
|
|
||||||
delete choices_it.extract();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace tesseract
|
} // namespace tesseract
|
||||||
|
@ -30,12 +30,6 @@
|
|||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
|
|
||||||
void Wordrec::DoSegSearch(WERD_RES *word_res) {
|
|
||||||
BestChoiceBundle best_choice_bundle(word_res->ratings->dimension());
|
|
||||||
// Run Segmentation Search.
|
|
||||||
SegSearch(word_res, &best_choice_bundle, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Wordrec::SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle,
|
void Wordrec::SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle,
|
||||||
BlamerBundle *blamer_bundle) {
|
BlamerBundle *blamer_bundle) {
|
||||||
LMPainPoints pain_points(segsearch_max_pain_points, segsearch_max_char_wh_ratio,
|
LMPainPoints pain_points(segsearch_max_pain_points, segsearch_max_char_wh_ratio,
|
||||||
|
@ -318,10 +318,6 @@ public:
|
|||||||
std::vector<SegSearchPending> *pending,
|
std::vector<SegSearchPending> *pending,
|
||||||
BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle);
|
BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle);
|
||||||
|
|
||||||
// Runs SegSearch() function (above) without needing a best_choice_bundle
|
|
||||||
// or blamer_bundle. Used for testing.
|
|
||||||
void DoSegSearch(WERD_RES *word_res);
|
|
||||||
|
|
||||||
// chop.cpp
|
// chop.cpp
|
||||||
PRIORITY point_priority(EDGEPT *point);
|
PRIORITY point_priority(EDGEPT *point);
|
||||||
void add_point_to_list(PointHeap *point_heap, EDGEPT *point);
|
void add_point_to_list(PointHeap *point_heap, EDGEPT *point);
|
||||||
@ -380,32 +376,6 @@ public:
|
|||||||
virtual BLOB_CHOICE_LIST *classify_piece(const std::vector<SEAM *> &seams, int16_t start,
|
virtual BLOB_CHOICE_LIST *classify_piece(const std::vector<SEAM *> &seams, int16_t start,
|
||||||
int16_t end, const char *description, TWERD *word,
|
int16_t end, const char *description, TWERD *word,
|
||||||
BlamerBundle *blamer_bundle);
|
BlamerBundle *blamer_bundle);
|
||||||
// Try to merge fragments in the ratings matrix and put the result in
|
|
||||||
// the corresponding row and column
|
|
||||||
void merge_fragments(MATRIX *ratings, int16_t num_blobs);
|
|
||||||
// Recursively go through the ratings matrix to find lists of fragments
|
|
||||||
// to be merged in the function merge_and_put_fragment_lists.
|
|
||||||
// current_frag is the position of the piece we are looking for.
|
|
||||||
// current_row is the row in the rating matrix we are currently at.
|
|
||||||
// start is the row we started initially, so that we can know where
|
|
||||||
// to append the results to the matrix. num_frag_parts is the total
|
|
||||||
// number of pieces we are looking for and num_blobs is the size of the
|
|
||||||
// ratings matrix.
|
|
||||||
void get_fragment_lists(int16_t current_frag, int16_t current_row, int16_t start,
|
|
||||||
int16_t num_frag_parts, int16_t num_blobs, MATRIX *ratings,
|
|
||||||
BLOB_CHOICE_LIST *choice_lists);
|
|
||||||
// Merge the fragment lists in choice_lists and append it to the
|
|
||||||
// ratings matrix
|
|
||||||
void merge_and_put_fragment_lists(int16_t row, int16_t column, int16_t num_frag_parts,
|
|
||||||
BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings);
|
|
||||||
// Filter the fragment list so that the filtered_choices only contain
|
|
||||||
// fragments that are in the correct position. choices is the list
|
|
||||||
// that we are going to filter. fragment_pos is the position in the
|
|
||||||
// fragment that we are looking for and num_frag_parts is the the
|
|
||||||
// total number of pieces. The result will be appended to
|
|
||||||
// filtered_choices.
|
|
||||||
void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts,
|
|
||||||
BLOB_CHOICE_LIST *filtered_choices);
|
|
||||||
|
|
||||||
// Member variables.
|
// Member variables.
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user