mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-22 18:13:42 +08:00
Remove old code which was used for Ocropus
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
cdebe13d81
commit
1188e0a516
@ -738,54 +738,6 @@ class TESS_API TessBaseAPI {
|
|||||||
void GetBlockTextOrientations(int** block_orientation,
|
void GetBlockTextOrientations(int** block_orientation,
|
||||||
bool** vertical_writing);
|
bool** vertical_writing);
|
||||||
|
|
||||||
#ifndef DISABLED_LEGACY_ENGINE
|
|
||||||
|
|
||||||
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
|
|
||||||
void SetFillLatticeFunc(FillLatticeFunc f);
|
|
||||||
|
|
||||||
/** Find lines from the image making the BLOCK_LIST. */
|
|
||||||
BLOCK_LIST* FindLinesCreateBlockList();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Delete a block list.
|
|
||||||
* This is to keep BLOCK_LIST pointer opaque
|
|
||||||
* and let go of including the other headers.
|
|
||||||
*/
|
|
||||||
static void DeleteBlockList(BLOCK_LIST* block_list);
|
|
||||||
|
|
||||||
/** Returns a ROW object created from the input row specification. */
|
|
||||||
static ROW* MakeTessOCRRow(float baseline, float xheight, float descender,
|
|
||||||
float ascender);
|
|
||||||
|
|
||||||
/** Returns a TBLOB corresponding to the entire input image. */
|
|
||||||
static TBLOB* MakeTBLOB(Pix* pix);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method baseline normalizes a TBLOB in-place. The input row is used
|
|
||||||
* for normalization. The denorm is an optional parameter in which the
|
|
||||||
* normalization-antidote is returned.
|
|
||||||
*/
|
|
||||||
static void NormalizeTBLOB(TBLOB* tblob, ROW* row, bool numeric_mode);
|
|
||||||
|
|
||||||
/** This method returns the features associated with the input image. */
|
|
||||||
void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
|
|
||||||
int* num_features, int* feature_outline_index);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method returns the row to which a box of specified dimensions would
|
|
||||||
* belong. If no good match is found, it returns nullptr.
|
|
||||||
*/
|
|
||||||
static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, int right,
|
|
||||||
int bottom);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Method to run adaptive classifier on a blob.
|
|
||||||
* It returns at max num_max_matches results.
|
|
||||||
*/
|
|
||||||
void RunAdaptiveClassifier(TBLOB* blob, int num_max_matches, int* unichar_ids,
|
|
||||||
float* ratings, int* num_matches_returned);
|
|
||||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
|
||||||
|
|
||||||
/** This method returns the string form of the specified unichar. */
|
/** This method returns the string form of the specified unichar. */
|
||||||
const char* GetUnichar(int unichar_id);
|
const char* GetUnichar(int unichar_id);
|
||||||
|
|
||||||
@ -848,40 +800,6 @@ class TESS_API TessBaseAPI {
|
|||||||
//// paragraphs.cpp ////////////////////////////////////////////////////
|
//// paragraphs.cpp ////////////////////////////////////////////////////
|
||||||
TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
|
TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
|
||||||
|
|
||||||
#ifndef DISABLED_LEGACY_ENGINE
|
|
||||||
|
|
||||||
/** @defgroup ocropusAddOns ocropus add-ons */
|
|
||||||
/* @{ */
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adapt to recognize the current image as the given character.
|
|
||||||
* The image must be preloaded and be just an image of a single character.
|
|
||||||
*/
|
|
||||||
TESS_LOCAL void AdaptToCharacter(const char* unichar_repr, int length,
|
|
||||||
float baseline, float xheight,
|
|
||||||
float descender, float ascender);
|
|
||||||
|
|
||||||
/** Recognize text doing one pass only, using settings for a given pass. */
|
|
||||||
TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
|
|
||||||
|
|
||||||
TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
|
|
||||||
PAGE_RES* pass1_result);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract the OCR results, costs (penalty points for uncertainty),
|
|
||||||
* and the bounding boxes of the characters.
|
|
||||||
*/
|
|
||||||
TESS_LOCAL static int TesseractExtractResult(char** text, int** lengths,
|
|
||||||
float** costs, int** x0,
|
|
||||||
int** y0, int** x1, int** y1,
|
|
||||||
PAGE_RES* page_res);
|
|
||||||
|
|
||||||
TESS_LOCAL const PAGE_RES* GetPageRes() const {
|
|
||||||
return page_res_;
|
|
||||||
}
|
|
||||||
/* @} */
|
|
||||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Tesseract* tesseract_; ///< The underlying data object.
|
Tesseract* tesseract_; ///< The underlying data object.
|
||||||
Tesseract* osd_tesseract_; ///< For orientation & script detection.
|
Tesseract* osd_tesseract_; ///< For orientation & script detection.
|
||||||
|
@ -524,40 +524,6 @@ TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC* monitor,
|
|||||||
TESS_API int TessMonitorGetProgress(ETEXT_DESC* monitor);
|
TESS_API int TessMonitorGetProgress(ETEXT_DESC* monitor);
|
||||||
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor, int deadline);
|
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor, int deadline);
|
||||||
|
|
||||||
#ifndef DISABLED_LEGACY_ENGINE
|
|
||||||
|
|
||||||
# ifdef TESS_CAPI_INCLUDE_BASEAPI
|
|
||||||
TESS_API void TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle,
|
|
||||||
TessFillLatticeFunc f);
|
|
||||||
|
|
||||||
TESS_API void TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob,
|
|
||||||
INT_FEATURE_STRUCT* int_features,
|
|
||||||
int* num_features,
|
|
||||||
int* FeatureOutlineIndex);
|
|
||||||
|
|
||||||
TESS_API ROW* TessFindRowForBox(BLOCK_LIST* blocks, int left, int top,
|
|
||||||
int right, int bottom);
|
|
||||||
|
|
||||||
TESS_API void TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob,
|
|
||||||
int num_max_matches,
|
|
||||||
int* unichar_ids, float* ratings,
|
|
||||||
int* num_matches_returned);
|
|
||||||
|
|
||||||
TESS_API ROW* TessMakeTessOCRRow(float baseline, float xheight, float descender,
|
|
||||||
float ascender);
|
|
||||||
|
|
||||||
TESS_API TBLOB* TessMakeTBLOB(Pix* pix);
|
|
||||||
|
|
||||||
TESS_API void TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode);
|
|
||||||
|
|
||||||
TESS_API BLOCK_LIST* TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
|
|
||||||
|
|
||||||
TESS_API void TessDeleteBlockList(BLOCK_LIST* block_list);
|
|
||||||
|
|
||||||
# endif // def TESS_CAPI_INCLUDE_BASEAPI
|
|
||||||
|
|
||||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -2001,13 +2001,6 @@ void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef DISABLED_LEGACY_ENGINE
|
|
||||||
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
|
|
||||||
void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) {
|
|
||||||
if (tesseract_ != nullptr) tesseract_->fill_lattice_ = f;
|
|
||||||
}
|
|
||||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
|
||||||
|
|
||||||
/** Common code for setting the image. */
|
/** Common code for setting the image. */
|
||||||
bool TessBaseAPI::InternalSetImage() {
|
bool TessBaseAPI::InternalSetImage() {
|
||||||
if (tesseract_ == nullptr) {
|
if (tesseract_ == nullptr) {
|
||||||
@ -2338,361 +2331,4 @@ STRING HOcrEscape(const char* text) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifndef DISABLED_LEGACY_ENGINE
|
|
||||||
|
|
||||||
|
|
||||||
// ____________________________________________________________________________
|
|
||||||
// Ocropus add-ons.
|
|
||||||
|
|
||||||
/** Find lines from the image making the BLOCK_LIST. */
|
|
||||||
BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() {
|
|
||||||
ASSERT_HOST(FindLines() == 0);
|
|
||||||
BLOCK_LIST* result = block_list_;
|
|
||||||
block_list_ = nullptr;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Delete a block list.
|
|
||||||
* This is to keep BLOCK_LIST pointer opaque
|
|
||||||
* and let go of including the other headers.
|
|
||||||
*/
|
|
||||||
void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) {
|
|
||||||
delete block_list;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
ROW *TessBaseAPI::MakeTessOCRRow(float baseline,
|
|
||||||
float xheight,
|
|
||||||
float descender,
|
|
||||||
float ascender) {
|
|
||||||
int32_t xstarts[] = {-32000};
|
|
||||||
double quad_coeffs[] = {0, 0, baseline};
|
|
||||||
return new ROW(1,
|
|
||||||
xstarts,
|
|
||||||
quad_coeffs,
|
|
||||||
xheight,
|
|
||||||
ascender - (baseline + xheight),
|
|
||||||
descender - baseline,
|
|
||||||
0,
|
|
||||||
0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Creates a TBLOB* from the whole pix. */
|
|
||||||
TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) {
|
|
||||||
int width = pixGetWidth(pix);
|
|
||||||
int height = pixGetHeight(pix);
|
|
||||||
BLOCK block("a character", true, 0, 0, 0, 0, width, height);
|
|
||||||
|
|
||||||
// Create C_BLOBs from the page
|
|
||||||
extract_edges(pix, &block);
|
|
||||||
|
|
||||||
// Merge all C_BLOBs
|
|
||||||
C_BLOB_LIST *list = block.blob_list();
|
|
||||||
C_BLOB_IT c_blob_it(list);
|
|
||||||
if (c_blob_it.empty())
|
|
||||||
return nullptr;
|
|
||||||
// Move all the outlines to the first blob.
|
|
||||||
C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
|
|
||||||
for (c_blob_it.forward();
|
|
||||||
!c_blob_it.at_first();
|
|
||||||
c_blob_it.forward()) {
|
|
||||||
C_BLOB *c_blob = c_blob_it.data();
|
|
||||||
ol_it.add_list_after(c_blob->out_list());
|
|
||||||
}
|
|
||||||
// Convert the first blob to the output TBLOB.
|
|
||||||
return TBLOB::PolygonalCopy(false, c_blob_it.data());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method baseline normalizes a TBLOB in-place. The input row is used
|
|
||||||
* for normalization. The denorm is an optional parameter in which the
|
|
||||||
* normalization-antidote is returned.
|
|
||||||
*/
|
|
||||||
void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) {
|
|
||||||
TBOX box = tblob->bounding_box();
|
|
||||||
float x_center = (box.left() + box.right()) / 2.0f;
|
|
||||||
float baseline = row->base_line(x_center);
|
|
||||||
float scale = kBlnXHeight / row->x_height();
|
|
||||||
tblob->Normalize(nullptr, nullptr, nullptr, x_center, baseline, scale, scale,
|
|
||||||
0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a TBLOB * from the whole pix.
|
|
||||||
* To be freed later with delete.
|
|
||||||
*/
|
|
||||||
static TBLOB *make_tesseract_blob(float baseline, float xheight,
|
|
||||||
float descender, float ascender,
|
|
||||||
bool numeric_mode, Pix* pix) {
|
|
||||||
TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
|
|
||||||
|
|
||||||
// Normalize TBLOB
|
|
||||||
ROW *row =
|
|
||||||
TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
|
|
||||||
TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode);
|
|
||||||
delete row;
|
|
||||||
return tblob;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adapt to recognize the current image as the given character.
|
|
||||||
* The image must be preloaded into pix_binary_ and be just an image
|
|
||||||
* of a single character.
|
|
||||||
*/
|
|
||||||
void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
|
|
||||||
int length,
|
|
||||||
float baseline,
|
|
||||||
float xheight,
|
|
||||||
float descender,
|
|
||||||
float ascender) {
|
|
||||||
UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
|
|
||||||
TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
|
|
||||||
tesseract_->classify_bln_numeric_mode,
|
|
||||||
tesseract_->pix_binary());
|
|
||||||
float threshold;
|
|
||||||
float best_rating = -100;
|
|
||||||
|
|
||||||
|
|
||||||
// Classify to get a raw choice.
|
|
||||||
BLOB_CHOICE_LIST choices;
|
|
||||||
tesseract_->AdaptiveClassifier(blob, &choices);
|
|
||||||
BLOB_CHOICE_IT choice_it;
|
|
||||||
choice_it.set_to_list(&choices);
|
|
||||||
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
|
|
||||||
choice_it.forward()) {
|
|
||||||
if (choice_it.data()->rating() > best_rating) {
|
|
||||||
best_rating = choice_it.data()->rating();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
threshold = tesseract_->matcher_good_threshold;
|
|
||||||
|
|
||||||
if (blob->outlines)
|
|
||||||
tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
|
|
||||||
tesseract_->AdaptedTemplates);
|
|
||||||
delete blob;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
|
|
||||||
auto *page_res = new PAGE_RES(false, block_list,
|
|
||||||
&(tesseract_->prev_word_best_choice_));
|
|
||||||
tesseract_->recog_all_words(page_res, nullptr, nullptr, nullptr, 1);
|
|
||||||
return page_res;
|
|
||||||
}
|
|
||||||
|
|
||||||
PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
|
|
||||||
PAGE_RES* pass1_result) {
|
|
||||||
if (!pass1_result)
|
|
||||||
pass1_result = new PAGE_RES(false, block_list,
|
|
||||||
&(tesseract_->prev_word_best_choice_));
|
|
||||||
tesseract_->recog_all_words(pass1_result, nullptr, nullptr, nullptr, 2);
|
|
||||||
return pass1_result;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct TESS_CHAR : ELIST_LINK {
|
|
||||||
char *unicode_repr;
|
|
||||||
int length; // of unicode_repr
|
|
||||||
float cost;
|
|
||||||
TBOX box;
|
|
||||||
|
|
||||||
TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) {
|
|
||||||
length = (len == -1 ? strlen(repr) : len);
|
|
||||||
unicode_repr = new char[length + 1];
|
|
||||||
strncpy(unicode_repr, repr, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
TESS_CHAR()
|
|
||||||
: unicode_repr(nullptr),
|
|
||||||
length(0),
|
|
||||||
cost(0.0f)
|
|
||||||
{ // Satisfies ELISTIZE.
|
|
||||||
}
|
|
||||||
~TESS_CHAR() {
|
|
||||||
delete [] unicode_repr;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
ELISTIZEH(TESS_CHAR)
|
|
||||||
ELISTIZE(TESS_CHAR)
|
|
||||||
|
|
||||||
static void add_space(TESS_CHAR_IT* it) {
|
|
||||||
auto *t = new TESS_CHAR(0, " ");
|
|
||||||
it->add_after_then_move(t);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static float rating_to_cost(float rating) {
|
|
||||||
rating = 100 + rating;
|
|
||||||
// cuddled that to save from coverage profiler
|
|
||||||
// (I have never seen ratings worse than -100,
|
|
||||||
// but the check won't hurt)
|
|
||||||
if (rating < 0) rating = 0;
|
|
||||||
return rating;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract the OCR results, costs (penalty points for uncertainty),
|
|
||||||
* and the bounding boxes of the characters.
|
|
||||||
*/
|
|
||||||
static void extract_result(TESS_CHAR_IT* out,
|
|
||||||
PAGE_RES* page_res) {
|
|
||||||
PAGE_RES_IT page_res_it(page_res);
|
|
||||||
int word_count = 0;
|
|
||||||
while (page_res_it.word() != nullptr) {
|
|
||||||
WERD_RES *word = page_res_it.word();
|
|
||||||
const char *str = word->best_choice->unichar_string().c_str();
|
|
||||||
const char *len = word->best_choice->unichar_lengths().c_str();
|
|
||||||
TBOX real_rect = word->word->bounding_box();
|
|
||||||
|
|
||||||
if (word_count)
|
|
||||||
add_space(out);
|
|
||||||
int n = strlen(len);
|
|
||||||
for (int i = 0; i < n; i++) {
|
|
||||||
auto *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
|
|
||||||
str, *len);
|
|
||||||
tc->box = real_rect.intersection(word->box_word->BlobBox(i));
|
|
||||||
out->add_after_then_move(tc);
|
|
||||||
str += *len;
|
|
||||||
len++;
|
|
||||||
}
|
|
||||||
page_res_it.forward();
|
|
||||||
word_count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract the OCR results, costs (penalty points for uncertainty),
|
|
||||||
* and the bounding boxes of the characters.
|
|
||||||
*/
|
|
||||||
int TessBaseAPI::TesseractExtractResult(char** text,
|
|
||||||
int** lengths,
|
|
||||||
float** costs,
|
|
||||||
int** x0,
|
|
||||||
int** y0,
|
|
||||||
int** x1,
|
|
||||||
int** y1,
|
|
||||||
PAGE_RES* page_res) {
|
|
||||||
TESS_CHAR_LIST tess_chars;
|
|
||||||
TESS_CHAR_IT tess_chars_it(&tess_chars);
|
|
||||||
extract_result(&tess_chars_it, page_res);
|
|
||||||
tess_chars_it.move_to_first();
|
|
||||||
int n = tess_chars.length();
|
|
||||||
int text_len = 0;
|
|
||||||
*lengths = new int[n];
|
|
||||||
*costs = new float[n];
|
|
||||||
*x0 = new int[n];
|
|
||||||
*y0 = new int[n];
|
|
||||||
*x1 = new int[n];
|
|
||||||
*y1 = new int[n];
|
|
||||||
int i = 0;
|
|
||||||
for (tess_chars_it.mark_cycle_pt();
|
|
||||||
!tess_chars_it.cycled_list();
|
|
||||||
tess_chars_it.forward(), i++) {
|
|
||||||
TESS_CHAR *tc = tess_chars_it.data();
|
|
||||||
text_len += (*lengths)[i] = tc->length;
|
|
||||||
(*costs)[i] = tc->cost;
|
|
||||||
(*x0)[i] = tc->box.left();
|
|
||||||
(*y0)[i] = tc->box.bottom();
|
|
||||||
(*x1)[i] = tc->box.right();
|
|
||||||
(*y1)[i] = tc->box.top();
|
|
||||||
}
|
|
||||||
char *p = *text = new char[text_len];
|
|
||||||
|
|
||||||
tess_chars_it.move_to_first();
|
|
||||||
for (tess_chars_it.mark_cycle_pt();
|
|
||||||
!tess_chars_it.cycled_list();
|
|
||||||
tess_chars_it.forward()) {
|
|
||||||
TESS_CHAR *tc = tess_chars_it.data();
|
|
||||||
strncpy(p, tc->unicode_repr, tc->length);
|
|
||||||
p += tc->length;
|
|
||||||
}
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** This method returns the features associated with the input blob. */
|
|
||||||
// The resulting features are returned in int_features, which must be
|
|
||||||
// of size MAX_NUM_INT_FEATURES. The number of features is returned in
|
|
||||||
// num_features (or 0 if there was a failure).
|
|
||||||
// On return feature_outline_index is filled with an index of the outline
|
|
||||||
// corresponding to each feature in int_features.
|
|
||||||
// TODO(rays) Fix the caller to out outline_counts instead.
|
|
||||||
void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob,
|
|
||||||
INT_FEATURE_STRUCT* int_features,
|
|
||||||
int* num_features,
|
|
||||||
int* feature_outline_index) {
|
|
||||||
GenericVector<int> outline_counts;
|
|
||||||
GenericVector<INT_FEATURE_STRUCT> bl_features;
|
|
||||||
GenericVector<INT_FEATURE_STRUCT> cn_features;
|
|
||||||
INT_FX_RESULT_STRUCT fx_info;
|
|
||||||
tesseract_->ExtractFeatures(*blob, false, &bl_features,
|
|
||||||
&cn_features, &fx_info, &outline_counts);
|
|
||||||
if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
|
|
||||||
*num_features = 0;
|
|
||||||
return; // Feature extraction failed.
|
|
||||||
}
|
|
||||||
*num_features = cn_features.size();
|
|
||||||
memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
|
|
||||||
// TODO(rays) Pass outline_counts back and simplify the calling code.
|
|
||||||
if (feature_outline_index != nullptr) {
|
|
||||||
int f = 0;
|
|
||||||
for (int i = 0; i < outline_counts.size(); ++i) {
|
|
||||||
while (f < outline_counts[i])
|
|
||||||
feature_outline_index[f++] = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This method returns the row to which a box of specified dimensions would
|
|
||||||
// belong. If no good match is found, it returns nullptr.
|
|
||||||
ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks,
|
|
||||||
int left, int top, int right, int bottom) {
|
|
||||||
TBOX box(left, bottom, right, top);
|
|
||||||
BLOCK_IT b_it(blocks);
|
|
||||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
|
||||||
BLOCK* block = b_it.data();
|
|
||||||
if (!box.major_overlap(block->pdblk.bounding_box()))
|
|
||||||
continue;
|
|
||||||
ROW_IT r_it(block->row_list());
|
|
||||||
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
|
|
||||||
ROW* row = r_it.data();
|
|
||||||
if (!box.major_overlap(row->bounding_box()))
|
|
||||||
continue;
|
|
||||||
WERD_IT w_it(row->word_list());
|
|
||||||
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
|
|
||||||
WERD* word = w_it.data();
|
|
||||||
if (box.major_overlap(word->bounding_box()))
|
|
||||||
return row;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Method to run adaptive classifier on a blob. */
|
|
||||||
void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob,
|
|
||||||
int num_max_matches,
|
|
||||||
int* unichar_ids,
|
|
||||||
float* ratings,
|
|
||||||
int* num_matches_returned) {
|
|
||||||
auto* choices = new BLOB_CHOICE_LIST;
|
|
||||||
tesseract_->AdaptiveClassifier(blob, choices);
|
|
||||||
BLOB_CHOICE_IT choices_it(choices);
|
|
||||||
int& index = *num_matches_returned;
|
|
||||||
index = 0;
|
|
||||||
for (choices_it.mark_cycle_pt();
|
|
||||||
!choices_it.cycled_list() && index < num_max_matches;
|
|
||||||
choices_it.forward()) {
|
|
||||||
BLOB_CHOICE* choice = choices_it.data();
|
|
||||||
unichar_ids[index] = choice->unichar_id();
|
|
||||||
ratings[index] = choice->rating();
|
|
||||||
++index;
|
|
||||||
}
|
|
||||||
*num_matches_returned = index;
|
|
||||||
delete choices;
|
|
||||||
}
|
|
||||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
|
||||||
|
|
||||||
} // namespace tesseract.
|
} // namespace tesseract.
|
||||||
|
@ -41,12 +41,6 @@ void TessDeleteIntArray(const int* arr) {
|
|||||||
delete[] arr;
|
delete[] arr;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef DISABLED_LEGACY_ENGINE
|
|
||||||
void TessDeleteBlockList(BLOCK_LIST* block_list) {
|
|
||||||
TessBaseAPI::DeleteBlockList(block_list);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
TessResultRenderer*
|
TessResultRenderer*
|
||||||
TessTextRendererCreate(const char* outputbase) {
|
TessTextRendererCreate(const char* outputbase) {
|
||||||
return new tesseract::TessTextRenderer(outputbase);
|
return new tesseract::TessTextRenderer(outputbase);
|
||||||
@ -597,25 +591,6 @@ BOOL TessBaseAPIDetectOrientationScript(
|
|||||||
return static_cast<BOOL>(success);
|
return static_cast<BOOL>(success);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TessBaseAPIGetFeaturesForBlob(
|
|
||||||
TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
|
|
||||||
int* num_features, int* FeatureOutlineIndex) {
|
|
||||||
handle->GetFeaturesForBlob(blob, int_features, num_features,
|
|
||||||
FeatureOutlineIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
ROW* TessFindRowForBox(BLOCK_LIST* blocks, int left, int top,
|
|
||||||
int right, int bottom) {
|
|
||||||
return TessBaseAPI::FindRowForBox(blocks, left, top, right, bottom);
|
|
||||||
}
|
|
||||||
|
|
||||||
void TessBaseAPIRunAdaptiveClassifier(
|
|
||||||
TessBaseAPI* handle, TBLOB* blob, int num_max_matches, int* unichar_ids,
|
|
||||||
float* ratings, int* num_matches_returned) {
|
|
||||||
handle->RunAdaptiveClassifier(blob, num_max_matches, unichar_ids, ratings,
|
|
||||||
num_matches_returned);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||||
|
|
||||||
const char* TessBaseAPIGetUnichar(TessBaseAPI* handle,
|
const char* TessBaseAPIGetUnichar(TessBaseAPI* handle,
|
||||||
@ -632,22 +607,6 @@ int TessBaseAPINumDawgs(const TessBaseAPI* handle) {
|
|||||||
return handle->NumDawgs();
|
return handle->NumDawgs();
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef DISABLED_LEGACY_ENGINE
|
|
||||||
ROW* TessMakeTessOCRRow(float baseline, float xheight,
|
|
||||||
float descender, float ascender) {
|
|
||||||
return TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
|
|
||||||
}
|
|
||||||
|
|
||||||
TBLOB* TessMakeTBLOB(struct Pix* pix) {
|
|
||||||
return TessBaseAPI::MakeTBLOB(pix);
|
|
||||||
}
|
|
||||||
|
|
||||||
void TessNormalizeTBLOB(TBLOB* tblob, ROW* row,
|
|
||||||
BOOL numeric_mode) {
|
|
||||||
TessBaseAPI::NormalizeTBLOB(tblob, row, static_cast<bool>(numeric_mode));
|
|
||||||
}
|
|
||||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
|
||||||
|
|
||||||
TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI* handle) {
|
TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI* handle) {
|
||||||
return handle->oem();
|
return handle->oem();
|
||||||
}
|
}
|
||||||
@ -667,13 +626,6 @@ void TessBaseGetBlockTextOrientations(
|
|||||||
handle->GetBlockTextOrientations(block_orientation, vertical_writing);
|
handle->GetBlockTextOrientations(block_orientation, vertical_writing);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef DISABLED_LEGACY_ENGINE
|
|
||||||
BLOCK_LIST*
|
|
||||||
TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle) {
|
|
||||||
return handle->FindLinesCreateBlockList();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void TessPageIteratorDelete(TessPageIterator* handle) {
|
void TessPageIteratorDelete(TessPageIterator* handle) {
|
||||||
delete handle;
|
delete handle;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user