mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-04 01:39:16 +08:00
ccmain: Use C++-11 code instead of TessCallback1
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
eeec9c66d4
commit
ded24d0367
@ -91,11 +91,11 @@ bool Tesseract::recog_interactive(PAGE_RES_IT* pr_it) {
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
if (tessedit_debug_quality_metrics) {
|
||||
WERD_RES* word_res = pr_it->word();
|
||||
word_char_quality(word_res, pr_it->row()->row, &char_qual, &good_char_qual);
|
||||
word_char_quality(word_res, &char_qual, &good_char_qual);
|
||||
tprintf("\n%d chars; word_blob_quality: %d; outline_errs: %d; "
|
||||
"char_quality: %d; good_char_quality: %d\n",
|
||||
word_res->reject_map.length(),
|
||||
word_blob_quality(word_res, pr_it->row()->row),
|
||||
word_blob_quality(word_res),
|
||||
word_outline_errs(word_res), char_qual, good_char_qual);
|
||||
}
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
@ -647,14 +647,13 @@ void Tesseract::rejection_passes(PAGE_RES* page_res,
|
||||
const int chars_in_word = word->reject_map.length();
|
||||
const int rejects_in_word = word->reject_map.reject_count();
|
||||
|
||||
const int blob_quality = word_blob_quality(word, page_res_it.row()->row);
|
||||
const int blob_quality = word_blob_quality(word);
|
||||
stats_.doc_blob_quality += blob_quality;
|
||||
const int outline_errs = word_outline_errs(word);
|
||||
stats_.doc_outline_errs += outline_errs;
|
||||
int16_t all_char_quality;
|
||||
int16_t accepted_all_char_quality;
|
||||
word_char_quality(word, page_res_it.row()->row,
|
||||
&all_char_quality, &accepted_all_char_quality);
|
||||
word_char_quality(word, &all_char_quality, &accepted_all_char_quality);
|
||||
stats_.doc_char_quality += all_char_quality;
|
||||
const uint8_t permuter_type = word->best_choice->permuter();
|
||||
if ((permuter_type == SYSTEM_DAWG_PERM) ||
|
||||
|
@ -1,8 +1,7 @@
|
||||
/******************************************************************
|
||||
* File: docqual.cpp (Formerly docqual.c)
|
||||
* Description: Document Quality Metrics
|
||||
* Author: Phil Cheatle
|
||||
* Created: Mon May 9 11:27:28 BST 1994
|
||||
* Author: Phil Cheatle
|
||||
*
|
||||
* (C) Copyright 1994, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -17,39 +16,31 @@
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include <cctype>
|
||||
#include "docqual.h"
|
||||
#include "reject.h"
|
||||
#include "tesscallback.h"
|
||||
#include "tessvars.h"
|
||||
#include "tesseractclass.h"
|
||||
#include <cctype>
|
||||
#include "docqual.h"
|
||||
#include "reject.h"
|
||||
#include "tessvars.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
namespace tesseract{
|
||||
|
||||
// A little class to provide the callbacks as we have no pre-bound args.
|
||||
struct DocQualCallbacks {
|
||||
explicit DocQualCallbacks(WERD_RES* word0)
|
||||
: word(word0), match_count(0), accepted_match_count(0) {}
|
||||
static void countMatchingBlobs(int16_t& match_count, int /*index*/) {
|
||||
++match_count;
|
||||
}
|
||||
|
||||
void CountMatchingBlobs(int index) {
|
||||
++match_count;
|
||||
static void countAcceptedBlobs(WERD_RES* word, int16_t& match_count,
|
||||
int16_t& accepted_match_count, int index) {
|
||||
if (word->reject_map[index].accepted()) {
|
||||
++accepted_match_count;
|
||||
}
|
||||
++match_count;
|
||||
}
|
||||
|
||||
void CountAcceptedBlobs(int index) {
|
||||
if (word->reject_map[index].accepted())
|
||||
++accepted_match_count;
|
||||
++match_count;
|
||||
static void acceptIfGoodQuality(WERD_RES* word, int index) {
|
||||
if (word->reject_map[index].accept_if_good_quality()) {
|
||||
word->reject_map[index].setrej_quality_accept();
|
||||
}
|
||||
|
||||
void AcceptIfGoodQuality(int index) {
|
||||
if (word->reject_map[index].accept_if_good_quality())
|
||||
word->reject_map[index].setrej_quality_accept();
|
||||
}
|
||||
|
||||
WERD_RES* word;
|
||||
int16_t match_count;
|
||||
int16_t accepted_match_count;
|
||||
};
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
* word_blob_quality()
|
||||
@ -57,16 +48,16 @@ struct DocQualCallbacks {
|
||||
* ASSUME blobs in both initial word and box_word are in ascending order of
|
||||
* left hand blob edge.
|
||||
*************************************************************************/
|
||||
int16_t Tesseract::word_blob_quality(WERD_RES *word, ROW *row) {
|
||||
if (word->bln_boxes == nullptr ||
|
||||
word->rebuild_word == nullptr || word->rebuild_word->blobs.empty())
|
||||
return 0;
|
||||
|
||||
DocQualCallbacks cb(word);
|
||||
word->bln_boxes->ProcessMatchedBlobs(
|
||||
*word->rebuild_word,
|
||||
NewPermanentTessCallback(&cb, &DocQualCallbacks::CountMatchingBlobs));
|
||||
return cb.match_count;
|
||||
int16_t Tesseract::word_blob_quality(WERD_RES* word) {
|
||||
int16_t match_count = 0;
|
||||
if (word->bln_boxes != nullptr && word->rebuild_word != nullptr &&
|
||||
!word->rebuild_word->blobs.empty()) {
|
||||
using namespace std::placeholders; // for _1
|
||||
word->bln_boxes->ProcessMatchedBlobs(
|
||||
*word->rebuild_word,
|
||||
std::bind(countMatchingBlobs, match_count, _1));
|
||||
}
|
||||
return match_count;
|
||||
}
|
||||
|
||||
int16_t Tesseract::word_outline_errs(WERD_RES *word) {
|
||||
@ -89,38 +80,31 @@ int16_t Tesseract::word_outline_errs(WERD_RES *word) {
|
||||
* Combination of blob quality and outline quality - how many good chars are
|
||||
* there? - I.e chars which pass the blob AND outline tests.
|
||||
*************************************************************************/
|
||||
void Tesseract::word_char_quality(WERD_RES *word,
|
||||
ROW *row,
|
||||
int16_t *match_count,
|
||||
int16_t *accepted_match_count) {
|
||||
if (word->bln_boxes == nullptr || word->rebuild_word == nullptr ||
|
||||
word->rebuild_word->blobs.empty()) {
|
||||
*match_count = 0;
|
||||
*accepted_match_count = 0;
|
||||
return;
|
||||
void Tesseract::word_char_quality(WERD_RES* word, int16_t* match_count,
|
||||
int16_t* accepted_match_count) {
|
||||
*match_count = 0;
|
||||
*accepted_match_count = 0;
|
||||
if (word->bln_boxes != nullptr && word->rebuild_word != nullptr &&
|
||||
!word->rebuild_word->blobs.empty()) {
|
||||
using namespace std::placeholders; // for _1
|
||||
word->bln_boxes->ProcessMatchedBlobs(
|
||||
*word->rebuild_word,
|
||||
std::bind(countAcceptedBlobs,
|
||||
word, *match_count, *accepted_match_count, _1));
|
||||
}
|
||||
|
||||
DocQualCallbacks cb(word);
|
||||
word->bln_boxes->ProcessMatchedBlobs(
|
||||
*word->rebuild_word,
|
||||
NewPermanentTessCallback(&cb, &DocQualCallbacks::CountAcceptedBlobs));
|
||||
*match_count = cb.match_count;
|
||||
*accepted_match_count = cb.accepted_match_count;
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
* unrej_good_chs()
|
||||
* Unreject POTENTIAL rejects if the blob passes the blob and outline checks
|
||||
*************************************************************************/
|
||||
void Tesseract::unrej_good_chs(WERD_RES *word, ROW *row) {
|
||||
if (word->bln_boxes == nullptr ||
|
||||
word->rebuild_word == nullptr || word->rebuild_word->blobs.empty())
|
||||
return;
|
||||
|
||||
DocQualCallbacks cb(word);
|
||||
word->bln_boxes->ProcessMatchedBlobs(
|
||||
*word->rebuild_word,
|
||||
NewPermanentTessCallback(&cb, &DocQualCallbacks::AcceptIfGoodQuality));
|
||||
void Tesseract::unrej_good_chs(WERD_RES* word) {
|
||||
if (word->bln_boxes != nullptr && word->rebuild_word != nullptr &&
|
||||
word->rebuild_word->blobs.empty()) {
|
||||
using namespace std::placeholders; // for _1
|
||||
word->bln_boxes->ProcessMatchedBlobs(
|
||||
*word->rebuild_word, std::bind(acceptIfGoodQuality, word, _1));
|
||||
}
|
||||
}
|
||||
|
||||
int16_t Tesseract::count_outline_errs(char c, int16_t outline_count) {
|
||||
@ -186,12 +170,12 @@ void Tesseract::unrej_good_quality_words( //unreject potential
|
||||
word->best_choice->unichar_string().string(),
|
||||
word->best_choice->unichar_lengths().string())
|
||||
!= AC_UNACCEPTABLE)) {
|
||||
unrej_good_chs(word, page_res_it.row ()->row);
|
||||
unrej_good_chs(word);
|
||||
}
|
||||
page_res_it.forward ();
|
||||
}
|
||||
else {
|
||||
/* Skip to end of dodgy row */
|
||||
// Skip to end of dodgy row.
|
||||
current_row = page_res_it.row ();
|
||||
while ((page_res_it.word () != nullptr) &&
|
||||
(page_res_it.row () == current_row))
|
||||
@ -285,10 +269,8 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
|
||||
word->best_choice->unichar_string().string(),
|
||||
word->best_choice->unichar_lengths().string()) !=
|
||||
AC_UNACCEPTABLE) {
|
||||
word_char_quality(word, page_res_it.row()->row,
|
||||
&char_quality,
|
||||
&accepted_char_quality);
|
||||
rej_word = char_quality != word->reject_map.length();
|
||||
word_char_quality(word, &char_quality, &accepted_char_quality);
|
||||
rej_word = char_quality != word->reject_map.length();
|
||||
}
|
||||
} else {
|
||||
rej_word = true;
|
||||
@ -356,8 +338,7 @@ void Tesseract::doc_and_block_rejection( //reject big chunks
|
||||
word->best_choice->unichar_string().string(),
|
||||
word->best_choice->unichar_lengths().string()) !=
|
||||
AC_UNACCEPTABLE) {
|
||||
word_char_quality(word, page_res_it.row()->row,
|
||||
&char_quality,
|
||||
word_char_quality(word, &char_quality,
|
||||
&accepted_char_quality);
|
||||
rej_word = char_quality != word->reject_map.length();
|
||||
}
|
||||
|
@ -2,7 +2,6 @@
|
||||
* File: docqual.h (Formerly docqual.h)
|
||||
* Description: Document Quality Metrics
|
||||
* Author: Phil Cheatle
|
||||
* Created: Mon May 9 11:27:28 BST 1994
|
||||
*
|
||||
* (C) Copyright 1994, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -34,7 +33,7 @@ enum GARBAGE_LEVEL
|
||||
G_TERRIBLE
|
||||
};
|
||||
|
||||
int16_t word_blob_quality(WERD_RES *word, ROW *row);
|
||||
int16_t word_blob_quality(WERD_RES* word);
|
||||
void reject_whole_page(PAGE_RES_IT &page_res_it);
|
||||
|
||||
#endif
|
||||
|
@ -596,7 +596,7 @@ bool Tesseract::repeated_nonalphanum_wd(WERD_RES* word, ROW* row) {
|
||||
if (word->best_choice->unichar_id(i) != uch_id) return false;
|
||||
}
|
||||
|
||||
word_char_quality(word, row, &char_quality, &accepted_char_quality);
|
||||
word_char_quality(word, &char_quality, &accepted_char_quality);
|
||||
|
||||
if ((word->best_choice->unichar_lengths().length () == char_quality) &&
|
||||
(char_quality == accepted_char_quality))
|
||||
|
@ -648,10 +648,10 @@ class Tesseract : public Wordrec {
|
||||
void quality_based_rejection(PAGE_RES_IT& page_res_it, bool good_quality_doc);
|
||||
void convert_bad_unlv_chs(WERD_RES* word_res);
|
||||
void tilde_delete(PAGE_RES_IT& page_res_it);
|
||||
int16_t word_blob_quality(WERD_RES* word, ROW* row);
|
||||
void word_char_quality(WERD_RES* word, ROW* row, int16_t* match_count,
|
||||
int16_t word_blob_quality(WERD_RES* word);
|
||||
void word_char_quality(WERD_RES* word, int16_t* match_count,
|
||||
int16_t* accepted_match_count);
|
||||
void unrej_good_chs(WERD_RES* word, ROW* row);
|
||||
void unrej_good_chs(WERD_RES* word);
|
||||
int16_t count_outline_errs(char c, int16_t outline_count);
|
||||
int16_t word_outline_errs(WERD_RES* word);
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
@ -188,13 +188,12 @@ void BoxWord::ComputeBoundingBox() {
|
||||
// for each blob index where the bounding boxes match.
|
||||
// The callback is deleted on completion.
|
||||
void BoxWord::ProcessMatchedBlobs(const TWERD& other,
|
||||
TessCallback1<int>* cb) const {
|
||||
std::function<void(int)> cb) const {
|
||||
for (int i = 0; i < length_ && i < other.NumBlobs(); ++i) {
|
||||
TBOX blob_box = other.blobs[i]->bounding_box();
|
||||
if (blob_box == boxes_[i])
|
||||
cb->Run(i);
|
||||
cb(i);
|
||||
}
|
||||
delete cb;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -20,6 +20,7 @@
|
||||
#ifndef TESSERACT_CSTRUCT_BOXWORD_H_
|
||||
#define TESSERACT_CSTRUCT_BOXWORD_H_
|
||||
|
||||
#include <functional> // for std::function
|
||||
#include "genericvector.h" // for GenericVector
|
||||
#include "rect.h" // for TBOX
|
||||
|
||||
@ -28,8 +29,6 @@ class WERD;
|
||||
|
||||
struct TWERD;
|
||||
|
||||
template <class A1> class TessCallback1;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Class to hold an array of bounding boxes for an output word and
|
||||
@ -75,7 +74,7 @@ class BoxWord {
|
||||
// This and other putatively are the same, so call the (permanent) callback
|
||||
// for each blob index where the bounding boxes match.
|
||||
// The callback is deleted on completion.
|
||||
void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
|
||||
void ProcessMatchedBlobs(const TWERD& other, std::function<void(int)> cb) const;
|
||||
|
||||
const TBOX& bounding_box() const {
|
||||
return bbox_;
|
||||
|
Loading…
Reference in New Issue
Block a user