From ded24d036712577b7b181b4b9d79b156558b1888 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 1 Jul 2019 21:59:48 +0200 Subject: [PATCH] ccmain: Use C++-11 code instead of TessCallback1 Signed-off-by: Stefan Weil --- src/ccmain/control.cpp | 9 ++- src/ccmain/docqual.cpp | 123 +++++++++++++++--------------------- src/ccmain/docqual.h | 3 +- src/ccmain/reject.cpp | 2 +- src/ccmain/tesseractclass.h | 6 +- src/ccstruct/boxword.cpp | 5 +- src/ccstruct/boxword.h | 5 +- 7 files changed, 65 insertions(+), 88 deletions(-) diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp index b786bca2..ebe60505 100644 --- a/src/ccmain/control.cpp +++ b/src/ccmain/control.cpp @@ -91,11 +91,11 @@ bool Tesseract::recog_interactive(PAGE_RES_IT* pr_it) { #ifndef DISABLED_LEGACY_ENGINE if (tessedit_debug_quality_metrics) { WERD_RES* word_res = pr_it->word(); - word_char_quality(word_res, pr_it->row()->row, &char_qual, &good_char_qual); + word_char_quality(word_res, &char_qual, &good_char_qual); tprintf("\n%d chars; word_blob_quality: %d; outline_errs: %d; " "char_quality: %d; good_char_quality: %d\n", word_res->reject_map.length(), - word_blob_quality(word_res, pr_it->row()->row), + word_blob_quality(word_res), word_outline_errs(word_res), char_qual, good_char_qual); } #endif // ndef DISABLED_LEGACY_ENGINE @@ -647,14 +647,13 @@ void Tesseract::rejection_passes(PAGE_RES* page_res, const int chars_in_word = word->reject_map.length(); const int rejects_in_word = word->reject_map.reject_count(); - const int blob_quality = word_blob_quality(word, page_res_it.row()->row); + const int blob_quality = word_blob_quality(word); stats_.doc_blob_quality += blob_quality; const int outline_errs = word_outline_errs(word); stats_.doc_outline_errs += outline_errs; int16_t all_char_quality; int16_t accepted_all_char_quality; - word_char_quality(word, page_res_it.row()->row, - &all_char_quality, &accepted_all_char_quality); + word_char_quality(word, &all_char_quality, &accepted_all_char_quality); stats_.doc_char_quality += all_char_quality; const uint8_t permuter_type = word->best_choice->permuter(); if ((permuter_type == SYSTEM_DAWG_PERM) || diff --git a/src/ccmain/docqual.cpp b/src/ccmain/docqual.cpp index c8b32a5f..5e86d324 100644 --- a/src/ccmain/docqual.cpp +++ b/src/ccmain/docqual.cpp @@ -1,8 +1,7 @@ /****************************************************************** * File: docqual.cpp (Formerly docqual.c) * Description: Document Quality Metrics - * Author: Phil Cheatle - * Created: Mon May 9 11:27:28 BST 1994 + * Author: Phil Cheatle * * (C) Copyright 1994, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,39 +16,31 @@ * **********************************************************************/ -#include -#include "docqual.h" -#include "reject.h" -#include "tesscallback.h" -#include "tessvars.h" -#include "tesseractclass.h" +#include +#include "docqual.h" +#include "reject.h" +#include "tessvars.h" +#include "tesseractclass.h" namespace tesseract{ -// A little class to provide the callbacks as we have no pre-bound args. -struct DocQualCallbacks { - explicit DocQualCallbacks(WERD_RES* word0) - : word(word0), match_count(0), accepted_match_count(0) {} +static void countMatchingBlobs(int16_t& match_count, int /*index*/) { + ++match_count; +} - void CountMatchingBlobs(int index) { - ++match_count; +static void countAcceptedBlobs(WERD_RES* word, int16_t& match_count, + int16_t& accepted_match_count, int index) { + if (word->reject_map[index].accepted()) { + ++accepted_match_count; } + ++match_count; +} - void CountAcceptedBlobs(int index) { - if (word->reject_map[index].accepted()) - ++accepted_match_count; - ++match_count; +static void acceptIfGoodQuality(WERD_RES* word, int index) { + if (word->reject_map[index].accept_if_good_quality()) { + word->reject_map[index].setrej_quality_accept(); } - - void AcceptIfGoodQuality(int index) { - if (word->reject_map[index].accept_if_good_quality()) - word->reject_map[index].setrej_quality_accept(); - } - - WERD_RES* word; - int16_t match_count; - int16_t accepted_match_count; -}; +} /************************************************************************* * word_blob_quality() @@ -57,16 +48,16 @@ struct DocQualCallbacks { * ASSUME blobs in both initial word and box_word are in ascending order of * left hand blob edge. *************************************************************************/ -int16_t Tesseract::word_blob_quality(WERD_RES *word, ROW *row) { - if (word->bln_boxes == nullptr || - word->rebuild_word == nullptr || word->rebuild_word->blobs.empty()) - return 0; - - DocQualCallbacks cb(word); - word->bln_boxes->ProcessMatchedBlobs( - *word->rebuild_word, - NewPermanentTessCallback(&cb, &DocQualCallbacks::CountMatchingBlobs)); - return cb.match_count; +int16_t Tesseract::word_blob_quality(WERD_RES* word) { + int16_t match_count = 0; + if (word->bln_boxes != nullptr && word->rebuild_word != nullptr && + !word->rebuild_word->blobs.empty()) { + using namespace std::placeholders; // for _1 + word->bln_boxes->ProcessMatchedBlobs( + *word->rebuild_word, + std::bind(countMatchingBlobs, match_count, _1)); + } + return match_count; } int16_t Tesseract::word_outline_errs(WERD_RES *word) { @@ -89,38 +80,31 @@ int16_t Tesseract::word_outline_errs(WERD_RES *word) { * Combination of blob quality and outline quality - how many good chars are * there? - I.e chars which pass the blob AND outline tests. *************************************************************************/ -void Tesseract::word_char_quality(WERD_RES *word, - ROW *row, - int16_t *match_count, - int16_t *accepted_match_count) { - if (word->bln_boxes == nullptr || word->rebuild_word == nullptr || - word->rebuild_word->blobs.empty()) { - *match_count = 0; - *accepted_match_count = 0; - return; +void Tesseract::word_char_quality(WERD_RES* word, int16_t* match_count, + int16_t* accepted_match_count) { + *match_count = 0; + *accepted_match_count = 0; + if (word->bln_boxes != nullptr && word->rebuild_word != nullptr && + !word->rebuild_word->blobs.empty()) { + using namespace std::placeholders; // for _1 + word->bln_boxes->ProcessMatchedBlobs( + *word->rebuild_word, + std::bind(countAcceptedBlobs, + word, *match_count, *accepted_match_count, _1)); } - - DocQualCallbacks cb(word); - word->bln_boxes->ProcessMatchedBlobs( - *word->rebuild_word, - NewPermanentTessCallback(&cb, &DocQualCallbacks::CountAcceptedBlobs)); - *match_count = cb.match_count; - *accepted_match_count = cb.accepted_match_count; } /************************************************************************* * unrej_good_chs() * Unreject POTENTIAL rejects if the blob passes the blob and outline checks *************************************************************************/ -void Tesseract::unrej_good_chs(WERD_RES *word, ROW *row) { - if (word->bln_boxes == nullptr || - word->rebuild_word == nullptr || word->rebuild_word->blobs.empty()) - return; - - DocQualCallbacks cb(word); - word->bln_boxes->ProcessMatchedBlobs( - *word->rebuild_word, - NewPermanentTessCallback(&cb, &DocQualCallbacks::AcceptIfGoodQuality)); +void Tesseract::unrej_good_chs(WERD_RES* word) { + if (word->bln_boxes != nullptr && word->rebuild_word != nullptr && + word->rebuild_word->blobs.empty()) { + using namespace std::placeholders; // for _1 + word->bln_boxes->ProcessMatchedBlobs( + *word->rebuild_word, std::bind(acceptIfGoodQuality, word, _1)); + } } int16_t Tesseract::count_outline_errs(char c, int16_t outline_count) { @@ -186,12 +170,12 @@ void Tesseract::unrej_good_quality_words( //unreject potential word->best_choice->unichar_string().string(), word->best_choice->unichar_lengths().string()) != AC_UNACCEPTABLE)) { - unrej_good_chs(word, page_res_it.row ()->row); + unrej_good_chs(word); } page_res_it.forward (); } else { - /* Skip to end of dodgy row */ + // Skip to end of dodgy row. current_row = page_res_it.row (); while ((page_res_it.word () != nullptr) && (page_res_it.row () == current_row)) @@ -285,10 +269,8 @@ void Tesseract::doc_and_block_rejection( //reject big chunks word->best_choice->unichar_string().string(), word->best_choice->unichar_lengths().string()) != AC_UNACCEPTABLE) { - word_char_quality(word, page_res_it.row()->row, - &char_quality, - &accepted_char_quality); - rej_word = char_quality != word->reject_map.length(); + word_char_quality(word, &char_quality, &accepted_char_quality); + rej_word = char_quality != word->reject_map.length(); } } else { rej_word = true; @@ -356,8 +338,7 @@ void Tesseract::doc_and_block_rejection( //reject big chunks word->best_choice->unichar_string().string(), word->best_choice->unichar_lengths().string()) != AC_UNACCEPTABLE) { - word_char_quality(word, page_res_it.row()->row, - &char_quality, + word_char_quality(word, &char_quality, &accepted_char_quality); rej_word = char_quality != word->reject_map.length(); } diff --git a/src/ccmain/docqual.h b/src/ccmain/docqual.h index 22f40e21..1a92bd5b 100644 --- a/src/ccmain/docqual.h +++ b/src/ccmain/docqual.h @@ -2,7 +2,6 @@ * File: docqual.h (Formerly docqual.h) * Description: Document Quality Metrics * Author: Phil Cheatle - * Created: Mon May 9 11:27:28 BST 1994 * * (C) Copyright 1994, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -34,7 +33,7 @@ enum GARBAGE_LEVEL G_TERRIBLE }; -int16_t word_blob_quality(WERD_RES *word, ROW *row); +int16_t word_blob_quality(WERD_RES* word); void reject_whole_page(PAGE_RES_IT &page_res_it); #endif diff --git a/src/ccmain/reject.cpp b/src/ccmain/reject.cpp index 1df83309..a62996f6 100644 --- a/src/ccmain/reject.cpp +++ b/src/ccmain/reject.cpp @@ -596,7 +596,7 @@ bool Tesseract::repeated_nonalphanum_wd(WERD_RES* word, ROW* row) { if (word->best_choice->unichar_id(i) != uch_id) return false; } - word_char_quality(word, row, &char_quality, &accepted_char_quality); + word_char_quality(word, &char_quality, &accepted_char_quality); if ((word->best_choice->unichar_lengths().length () == char_quality) && (char_quality == accepted_char_quality)) diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index dd723cc8..c742a587 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -648,10 +648,10 @@ class Tesseract : public Wordrec { void quality_based_rejection(PAGE_RES_IT& page_res_it, bool good_quality_doc); void convert_bad_unlv_chs(WERD_RES* word_res); void tilde_delete(PAGE_RES_IT& page_res_it); - int16_t word_blob_quality(WERD_RES* word, ROW* row); - void word_char_quality(WERD_RES* word, ROW* row, int16_t* match_count, + int16_t word_blob_quality(WERD_RES* word); + void word_char_quality(WERD_RES* word, int16_t* match_count, int16_t* accepted_match_count); - void unrej_good_chs(WERD_RES* word, ROW* row); + void unrej_good_chs(WERD_RES* word); int16_t count_outline_errs(char c, int16_t outline_count); int16_t word_outline_errs(WERD_RES* word); #ifndef DISABLED_LEGACY_ENGINE diff --git a/src/ccstruct/boxword.cpp b/src/ccstruct/boxword.cpp index eb37b2c0..b1d513b1 100644 --- a/src/ccstruct/boxword.cpp +++ b/src/ccstruct/boxword.cpp @@ -188,13 +188,12 @@ void BoxWord::ComputeBoundingBox() { // for each blob index where the bounding boxes match. // The callback is deleted on completion. void BoxWord::ProcessMatchedBlobs(const TWERD& other, - TessCallback1* cb) const { + std::function cb) const { for (int i = 0; i < length_ && i < other.NumBlobs(); ++i) { TBOX blob_box = other.blobs[i]->bounding_box(); if (blob_box == boxes_[i]) - cb->Run(i); + cb(i); } - delete cb; } } // namespace tesseract. diff --git a/src/ccstruct/boxword.h b/src/ccstruct/boxword.h index a1e6352c..9953c605 100644 --- a/src/ccstruct/boxword.h +++ b/src/ccstruct/boxword.h @@ -20,6 +20,7 @@ #ifndef TESSERACT_CSTRUCT_BOXWORD_H_ #define TESSERACT_CSTRUCT_BOXWORD_H_ +#include // for std::function #include "genericvector.h" // for GenericVector #include "rect.h" // for TBOX @@ -28,8 +29,6 @@ class WERD; struct TWERD; -template class TessCallback1; - namespace tesseract { // Class to hold an array of bounding boxes for an output word and @@ -75,7 +74,7 @@ class BoxWord { // This and other putatively are the same, so call the (permanent) callback // for each blob index where the bounding boxes match. // The callback is deleted on completion. - void ProcessMatchedBlobs(const TWERD& other, TessCallback1* cb) const; + void ProcessMatchedBlobs(const TWERD& other, std::function cb) const; const TBOX& bounding_box() const { return bbox_;