Allow for text angle/gradient to be retrieved (#4070)

* Added GetGradient function
This commit is contained in:
Balearica 2024-05-12 06:24:02 -07:00 committed by GitHub
parent 6a31e36e0c
commit c23792bc31
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 31 additions and 8 deletions

View File

@ -337,6 +337,11 @@ public:
*/
Pix *GetThresholdedImage();
/**
* Return average gradient of lines on page.
*/
float GetGradient();
/**
* Get the result of page layout analysis as a leptonica-style
* Boxa, Pixa pair, in reading order.

View File

@ -273,6 +273,7 @@ TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
int width, int height);
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
TESS_API float TessBaseAPIGetGradient(TessBaseAPI *handle);
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,

View File

@ -2189,6 +2189,13 @@ int TessBaseAPI::FindLines() {
return 0;
}
/**
* Return average gradient of lines on page.
*/
float TessBaseAPI::GetGradient() {
return tesseract_->gradient();
}
/** Delete the pageres and clear the block list ready for a new page. */
void TessBaseAPI::ClearResults() {
if (tesseract_ != nullptr) {

View File

@ -327,6 +327,10 @@ struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle) {
return handle->GetThresholdedImage();
}
float TessBaseAPIGetGradient(TessBaseAPI *handle) {
return handle->GetGradient();
}
void TessBaseAPIClearPersistentCache(TessBaseAPI * /*handle*/) {
TessBaseAPI::ClearPersistentCache();
}

View File

@ -168,7 +168,7 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
bool cjk_mode = textord_use_cjk_fp_model;
textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_, pix_thresholds_,
pix_grey_, splitting || cjk_mode, &diacritic_blobs, blocks, &to_blocks);
pix_grey_, splitting || cjk_mode, &diacritic_blobs, blocks, &to_blocks, &gradient_);
return auto_page_seg_ret_val;
}

View File

@ -461,6 +461,7 @@ Tesseract::Tesseract()
, scaled_factor_(-1)
, deskew_(1.0f, 0.0f)
, reskew_(1.0f, 0.0f)
, gradient_(0.0f)
, most_recently_used_(this)
, font_table_size_(0)
#ifndef DISABLED_LEGACY_ENGINE
@ -498,6 +499,7 @@ void Tesseract::Clear() {
scaled_color_.destroy();
deskew_ = FCOORD(1.0f, 0.0f);
reskew_ = FCOORD(1.0f, 0.0f);
gradient_ = 0.0f;
splitter_.Clear();
scaled_factor_ = -1;
for (auto &sub_lang : sub_langs_) {

View File

@ -200,6 +200,9 @@ public:
const FCOORD &reskew() const {
return reskew_;
}
float gradient() const {
return gradient_;
}
// Destroy any existing pix and return a pointer to the pointer.
Image *mutable_pix_binary() {
pix_binary_.destroy();
@ -1004,6 +1007,7 @@ private:
int scaled_factor_;
FCOORD deskew_;
FCOORD reskew_;
float gradient_;
TesseractStats stats_;
// Sub-languages to be tried in addition to this.
std::vector<Tesseract *> sub_langs_;

View File

@ -177,7 +177,7 @@ Textord::Textord(CCStruct *ccstruct)
void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
TO_BLOCK_LIST *to_blocks) {
TO_BLOCK_LIST *to_blocks, float *gradient) {
page_tr_.set_x(width);
page_tr_.set_y(height);
if (to_blocks->empty()) {
@ -219,15 +219,14 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int wi
TO_BLOCK_IT to_block_it(to_blocks);
TO_BLOCK *to_block = to_block_it.data();
// Make the rows in the block.
float gradient;
// Do it the old fashioned way.
if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
gradient = make_rows(page_tr_, to_blocks);
*gradient = make_rows(page_tr_, to_blocks);
} else if (!PSM_SPARSE(pageseg_mode)) {
// RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks);
*gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks);
} else {
gradient = 0.0f;
*gradient = 0.0f;
}
BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks);
baseline_detector.ComputeStraightBaselines(use_box_bottoms);
@ -236,7 +235,7 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int wi
// Now make the words in the lines.
if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
// SINGLE_LINE uses the old word maker on the single line.
make_words(this, page_tr_, gradient, blocks, to_blocks);
make_words(this, page_tr_, *gradient, blocks, to_blocks);
} else {
// SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
// single word, and in SINGLE_CHAR mode, all the outlines

View File

@ -89,7 +89,8 @@ public:
// to the appropriate word(s) in case they are really diacritics.
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks,
float *gradient);
// If we were supposed to return only a single textline, and there is more
// than one, clean up and leave only the best.