Remove old code which was used for Ocropus

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2020-04-27 16:33:34 +02:00
parent cdebe13d81
commit 1188e0a516
4 changed files with 0 additions and 528 deletions

View File

@ -738,54 +738,6 @@ class TESS_API TessBaseAPI {
void GetBlockTextOrientations(int** block_orientation,
bool** vertical_writing);
#ifndef DISABLED_LEGACY_ENGINE
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
void SetFillLatticeFunc(FillLatticeFunc f);
/** Find lines from the image making the BLOCK_LIST. */
BLOCK_LIST* FindLinesCreateBlockList();
/**
* Delete a block list.
* This is to keep BLOCK_LIST pointer opaque
* and let go of including the other headers.
*/
static void DeleteBlockList(BLOCK_LIST* block_list);
/** Returns a ROW object created from the input row specification. */
static ROW* MakeTessOCRRow(float baseline, float xheight, float descender,
float ascender);
/** Returns a TBLOB corresponding to the entire input image. */
static TBLOB* MakeTBLOB(Pix* pix);
/**
* This method baseline normalizes a TBLOB in-place. The input row is used
* for normalization. The denorm is an optional parameter in which the
* normalization-antidote is returned.
*/
static void NormalizeTBLOB(TBLOB* tblob, ROW* row, bool numeric_mode);
/** This method returns the features associated with the input image. */
void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
int* num_features, int* feature_outline_index);
/**
* This method returns the row to which a box of specified dimensions would
* belong. If no good match is found, it returns nullptr.
*/
static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, int right,
int bottom);
/**
* Method to run adaptive classifier on a blob.
* It returns at max num_max_matches results.
*/
void RunAdaptiveClassifier(TBLOB* blob, int num_max_matches, int* unichar_ids,
float* ratings, int* num_matches_returned);
#endif // ndef DISABLED_LEGACY_ENGINE
/** This method returns the string form of the specified unichar. */
const char* GetUnichar(int unichar_id);
@ -848,40 +800,6 @@ class TESS_API TessBaseAPI {
//// paragraphs.cpp ////////////////////////////////////////////////////
TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
#ifndef DISABLED_LEGACY_ENGINE
/** @defgroup ocropusAddOns ocropus add-ons */
/* @{ */
/**
* Adapt to recognize the current image as the given character.
* The image must be preloaded and be just an image of a single character.
*/
TESS_LOCAL void AdaptToCharacter(const char* unichar_repr, int length,
float baseline, float xheight,
float descender, float ascender);
/** Recognize text doing one pass only, using settings for a given pass. */
TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
PAGE_RES* pass1_result);
/**
* Extract the OCR results, costs (penalty points for uncertainty),
* and the bounding boxes of the characters.
*/
TESS_LOCAL static int TesseractExtractResult(char** text, int** lengths,
float** costs, int** x0,
int** y0, int** x1, int** y1,
PAGE_RES* page_res);
TESS_LOCAL const PAGE_RES* GetPageRes() const {
return page_res_;
}
/* @} */
#endif // ndef DISABLED_LEGACY_ENGINE
protected:
Tesseract* tesseract_; ///< The underlying data object.
Tesseract* osd_tesseract_; ///< For orientation & script detection.

View File

@ -524,40 +524,6 @@ TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC* monitor,
TESS_API int TessMonitorGetProgress(ETEXT_DESC* monitor);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor, int deadline);
#ifndef DISABLED_LEGACY_ENGINE
# ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API void TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle,
TessFillLatticeFunc f);
TESS_API void TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob,
INT_FEATURE_STRUCT* int_features,
int* num_features,
int* FeatureOutlineIndex);
TESS_API ROW* TessFindRowForBox(BLOCK_LIST* blocks, int left, int top,
int right, int bottom);
TESS_API void TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob,
int num_max_matches,
int* unichar_ids, float* ratings,
int* num_matches_returned);
TESS_API ROW* TessMakeTessOCRRow(float baseline, float xheight, float descender,
float ascender);
TESS_API TBLOB* TessMakeTBLOB(Pix* pix);
TESS_API void TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode);
TESS_API BLOCK_LIST* TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
TESS_API void TessDeleteBlockList(BLOCK_LIST* block_list);
# endif // def TESS_CAPI_INCLUDE_BASEAPI
#endif // ndef DISABLED_LEGACY_ENGINE
#ifdef __cplusplus
}
#endif

View File

@ -2001,13 +2001,6 @@ void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) {
}
}
#ifndef DISABLED_LEGACY_ENGINE
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) {
if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f;
}
#endif // ndef DISABLED_LEGACY_ENGINE
/** Common code for setting the image. */
bool TessBaseAPI::InternalSetImage() {
if (tesseract_ == nullptr) {
@ -2338,361 +2331,4 @@ STRING HOcrEscape(const char* text) {
return ret;
}
#ifndef DISABLED_LEGACY_ENGINE
// ____________________________________________________________________________
// Ocropus add-ons.
/** Find lines from the image making the BLOCK_LIST. */
BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() {
ASSERT_HOST(FindLines() == 0);
BLOCK_LIST* result = block_list_;
block_list_ = nullptr;
return result;
}
/**
* Delete a block list.
* This is to keep BLOCK_LIST pointer opaque
* and let go of including the other headers.
*/
void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) {
delete block_list;
}
ROW *TessBaseAPI::MakeTessOCRRow(float baseline,
float xheight,
float descender,
float ascender) {
int32_t xstarts[] = {-32000};
double quad_coeffs[] = {0, 0, baseline};
return new ROW(1,
xstarts,
quad_coeffs,
xheight,
ascender - (baseline + xheight),
descender - baseline,
0,
0);
}
/** Creates a TBLOB* from the whole pix. */
TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) {
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
BLOCK block("a character", true, 0, 0, 0, 0, width, height);
// Create C_BLOBs from the page
extract_edges(pix, &block);
// Merge all C_BLOBs
C_BLOB_LIST *list = block.blob_list();
C_BLOB_IT c_blob_it(list);
if (c_blob_it.empty())
return nullptr;
// Move all the outlines to the first blob.
C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
for (c_blob_it.forward();
!c_blob_it.at_first();
c_blob_it.forward()) {
C_BLOB *c_blob = c_blob_it.data();
ol_it.add_list_after(c_blob->out_list());
}
// Convert the first blob to the output TBLOB.
return TBLOB::PolygonalCopy(false, c_blob_it.data());
}
/**
* This method baseline normalizes a TBLOB in-place. The input row is used
* for normalization. The denorm is an optional parameter in which the
* normalization-antidote is returned.
*/
void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) {
TBOX box = tblob->bounding_box();
float x_center = (box.left() + box.right()) / 2.0f;
float baseline = row->base_line(x_center);
float scale = kBlnXHeight / row->x_height();
tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale,
0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
}
/**
* Return a TBLOB * from the whole pix.
* To be freed later with delete.
*/
static TBLOB *make_tesseract_blob(float baseline, float xheight,
float descender, float ascender,
bool numeric_mode, Pix* pix) {
TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
// Normalize TBLOB
ROW *row =
TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode);
delete row;
return tblob;
}
/**
* Adapt to recognize the current image as the given character.
* The image must be preloaded into pix_binary_ and be just an image
* of a single character.
*/
void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
int length,
float baseline,
float xheight,
float descender,
float ascender) {
UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
tesseract_->classify_bln_numeric_mode,
tesseract_->pix_binary());
float threshold;
float best_rating = -100;
// Classify to get a raw choice.
BLOB_CHOICE_LIST choices;
tesseract_->AdaptiveClassifier(blob, &choices);
BLOB_CHOICE_IT choice_it;
choice_it.set_to_list(&choices);
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
choice_it.forward()) {
if (choice_it.data()->rating() > best_rating) {
best_rating = choice_it.data()->rating();
}
}
threshold = tesseract_->matcher_good_threshold;
if (blob->outlines)
tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
tesseract_->AdaptedTemplates);
delete blob;
}
PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
auto *page_res = new PAGE_RES(false, block_list,
&(tesseract_->prev_word_best_choice_));
tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1);
return page_res;
}
PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
PAGE_RES* pass1_result) {
if (!pass1_result)
pass1_result = new PAGE_RES(false, block_list,
&(tesseract_->prev_word_best_choice_));
tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2);
return pass1_result;
}
struct TESS_CHAR : ELIST_LINK {
char *unicode_repr;
int length; // of unicode_repr
float cost;
TBOX box;
TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) {
length = (len == -1 ? strlen(repr) : len);
unicode_repr = new char[length + 1];
strncpy(unicode_repr, repr, length);
}
TESS_CHAR()
: unicode_repr(nullptr),
length(0),
cost(0.0f)
{ // Satisfies ELISTIZE.
}
~TESS_CHAR() {
delete [] unicode_repr;
}
};
ELISTIZEH(TESS_CHAR)
ELISTIZE(TESS_CHAR)
static void add_space(TESS_CHAR_IT* it) {
auto *t = new TESS_CHAR(0, " ");
it->add_after_then_move(t);
}
static float rating_to_cost(float rating) {
rating = 100 + rating;
// cuddled that to save from coverage profiler
// (I have never seen ratings worse than -100,
// but the check won't hurt)
if (rating < 0) rating = 0;
return rating;
}
/**
* Extract the OCR results, costs (penalty points for uncertainty),
* and the bounding boxes of the characters.
*/
static void extract_result(TESS_CHAR_IT* out,
PAGE_RES* page_res) {
PAGE_RES_IT page_res_it(page_res);
int word_count = 0;
while (page_res_it.word() != nullptr) {
WERD_RES *word = page_res_it.word();
const char *str = word->best_choice->unichar_string().c_str();
const char *len = word->best_choice->unichar_lengths().c_str();
TBOX real_rect = word->word->bounding_box();
if (word_count)
add_space(out);
int n = strlen(len);
for (int i = 0; i < n; i++) {
auto *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
str, *len);
tc->box = real_rect.intersection(word->box_word->BlobBox(i));
out->add_after_then_move(tc);
str += *len;
len++;
}
page_res_it.forward();
word_count++;
}
}
/**
* Extract the OCR results, costs (penalty points for uncertainty),
* and the bounding boxes of the characters.
*/
int TessBaseAPI::TesseractExtractResult(char** text,
int** lengths,
float** costs,
int** x0,
int** y0,
int** x1,
int** y1,
PAGE_RES* page_res) {
TESS_CHAR_LIST tess_chars;
TESS_CHAR_IT tess_chars_it(&tess_chars);
extract_result(&tess_chars_it, page_res);
tess_chars_it.move_to_first();
int n = tess_chars.length();
int text_len = 0;
*lengths = new int[n];
*costs = new float[n];
*x0 = new int[n];
*y0 = new int[n];
*x1 = new int[n];
*y1 = new int[n];
int i = 0;
for (tess_chars_it.mark_cycle_pt();
!tess_chars_it.cycled_list();
tess_chars_it.forward(), i++) {
TESS_CHAR *tc = tess_chars_it.data();
text_len += (*lengths)[i] = tc->length;
(*costs)[i] = tc->cost;
(*x0)[i] = tc->box.left();
(*y0)[i] = tc->box.bottom();
(*x1)[i] = tc->box.right();
(*y1)[i] = tc->box.top();
}
char *p = *text = new char[text_len];
tess_chars_it.move_to_first();
for (tess_chars_it.mark_cycle_pt();
!tess_chars_it.cycled_list();
tess_chars_it.forward()) {
TESS_CHAR *tc = tess_chars_it.data();
strncpy(p, tc->unicode_repr, tc->length);
p += tc->length;
}
return n;
}
/** This method returns the features associated with the input blob. */
// The resulting features are returned in int_features, which must be
// of size MAX_NUM_INT_FEATURES. The number of features is returned in
// num_features (or 0 if there was a failure).
// On return feature_outline_index is filled with an index of the outline
// corresponding to each feature in int_features.
// TODO(rays) Fix the caller to out outline_counts instead.
void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob,
INT_FEATURE_STRUCT* int_features,
int* num_features,
int* feature_outline_index) {
GenericVector<int> outline_counts;
GenericVector<INT_FEATURE_STRUCT> bl_features;
GenericVector<INT_FEATURE_STRUCT> cn_features;
INT_FX_RESULT_STRUCT fx_info;
tesseract_->ExtractFeatures(*blob, false, &bl_features,
&cn_features, &fx_info, &outline_counts);
if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
*num_features = 0;
return; // Feature extraction failed.
}
*num_features = cn_features.size();
memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
// TODO(rays) Pass outline_counts back and simplify the calling code.
if (feature_outline_index != nullptr) {
int f = 0;
for (int i = 0; i < outline_counts.size(); ++i) {
while (f < outline_counts[i])
feature_outline_index[f++] = i;
}
}
}
// This method returns the row to which a box of specified dimensions would
// belong. If no good match is found, it returns nullptr.
ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks,
int left, int top, int right, int bottom) {
TBOX box(left, bottom, right, top);
BLOCK_IT b_it(blocks);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
BLOCK* block = b_it.data();
if (!box.major_overlap(block->pdblk.bounding_box()))
continue;
ROW_IT r_it(block->row_list());
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
ROW* row = r_it.data();
if (!box.major_overlap(row->bounding_box()))
continue;
WERD_IT w_it(row->word_list());
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
WERD* word = w_it.data();
if (box.major_overlap(word->bounding_box()))
return row;
}
}
}
return nullptr;
}
/** Method to run adaptive classifier on a blob. */
void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob,
int num_max_matches,
int* unichar_ids,
float* ratings,
int* num_matches_returned) {
auto* choices = new BLOB_CHOICE_LIST;
tesseract_->AdaptiveClassifier(blob, choices);
BLOB_CHOICE_IT choices_it(choices);
int& index = *num_matches_returned;
index = 0;
for (choices_it.mark_cycle_pt();
!choices_it.cycled_list() && index < num_max_matches;
choices_it.forward()) {
BLOB_CHOICE* choice = choices_it.data();
unichar_ids[index] = choice->unichar_id();
ratings[index] = choice->rating();
++index;
}
*num_matches_returned = index;
delete choices;
}
#endif // ndef DISABLED_LEGACY_ENGINE
} // namespace tesseract.

View File

@ -41,12 +41,6 @@ void TessDeleteIntArray(const int* arr) {
delete[] arr;
}
#ifndef DISABLED_LEGACY_ENGINE
void TessDeleteBlockList(BLOCK_LIST* block_list) {
TessBaseAPI::DeleteBlockList(block_list);
}
#endif
TessResultRenderer*
TessTextRendererCreate(const char* outputbase) {
return new tesseract::TessTextRenderer(outputbase);
@ -597,25 +591,6 @@ BOOL TessBaseAPIDetectOrientationScript(
return static_cast<BOOL>(success);
}
void TessBaseAPIGetFeaturesForBlob(
TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
int* num_features, int* FeatureOutlineIndex) {
handle->GetFeaturesForBlob(blob, int_features, num_features,
FeatureOutlineIndex);
}
ROW* TessFindRowForBox(BLOCK_LIST* blocks, int left, int top,
int right, int bottom) {
return TessBaseAPI::FindRowForBox(blocks, left, top, right, bottom);
}
void TessBaseAPIRunAdaptiveClassifier(
TessBaseAPI* handle, TBLOB* blob, int num_max_matches, int* unichar_ids,
float* ratings, int* num_matches_returned) {
handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings,
num_matches_returned);
}
#endif // ndef DISABLED_LEGACY_ENGINE
const char* TessBaseAPIGetUnichar(TessBaseAPI* handle,
@ -632,22 +607,6 @@ int TessBaseAPINumDawgs(const TessBaseAPI* handle) {
return handle->NumDawgs();
}
#ifndef DISABLED_LEGACY_ENGINE
ROW* TessMakeTessOCRRow(float baseline, float xheight,
float descender, float ascender) {
return TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
}
TBLOB* TessMakeTBLOB(struct Pix* pix) {
return TessBaseAPI::MakeTBLOB(pix);
}
void TessNormalizeTBLOB(TBLOB* tblob, ROW* row,
BOOL numeric_mode) {
TessBaseAPI::NormalizeTBLOB(tblob, row, static_cast<bool>(numeric_mode));
}
#endif // ndef DISABLED_LEGACY_ENGINE
TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI* handle) {
return handle->oem();
}
@ -667,13 +626,6 @@ void TessBaseGetBlockTextOrientations(
handle->GetBlockTextOrientations(block_orientation, vertical_writing);
}
#ifndef DISABLED_LEGACY_ENGINE
BLOCK_LIST*
TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle) {
return handle->FindLinesCreateBlockList();
}
#endif
void TessPageIteratorDelete(TessPageIterator* handle) {
delete handle;
}