From c23792bc3164d3190677c8bf2273d53f32b57caa Mon Sep 17 00:00:00 2001 From: Balearica Date: Sun, 12 May 2024 06:24:02 -0700 Subject: [PATCH] Allow for text angle/gradient to be retrieved (#4070) * Added GetGradient function --- include/tesseract/baseapi.h | 5 +++++ include/tesseract/capi.h | 1 + src/api/baseapi.cpp | 7 +++++++ src/api/capi.cpp | 4 ++++ src/ccmain/pagesegmain.cpp | 2 +- src/ccmain/tesseractclass.cpp | 2 ++ src/ccmain/tesseractclass.h | 4 ++++ src/textord/textord.cpp | 11 +++++------ src/textord/textord.h | 3 ++- 9 files changed, 31 insertions(+), 8 deletions(-) diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index 6ed9c187..9475fb27 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -337,6 +337,11 @@ public: */ Pix *GetThresholdedImage(); + /** + * Return average gradient of lines on page. + */ + float GetGradient(); + /** * Get the result of page layout analysis as a leptonica-style * Boxa, Pixa pair, in reading order. diff --git a/include/tesseract/capi.h b/include/tesseract/capi.h index 5e8adecb..589be0a7 100644 --- a/include/tesseract/capi.h +++ b/include/tesseract/capi.h @@ -273,6 +273,7 @@ TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top, int width, int height); TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle); +TESS_API float TessBaseAPIGetGradient(TessBaseAPI *handle); TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle, struct Pixa **pixa); TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle, diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 11892772..3a134980 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -2189,6 +2189,13 @@ int TessBaseAPI::FindLines() { return 0; } +/** + * Return average gradient of lines on page. + */ +float TessBaseAPI::GetGradient() { + return tesseract_->gradient(); +} + /** Delete the pageres and clear the block list ready for a new page. */ void TessBaseAPI::ClearResults() { if (tesseract_ != nullptr) { diff --git a/src/api/capi.cpp b/src/api/capi.cpp index 91391a6d..445f0832 100644 --- a/src/api/capi.cpp +++ b/src/api/capi.cpp @@ -327,6 +327,10 @@ struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle) { return handle->GetThresholdedImage(); } +float TessBaseAPIGetGradient(TessBaseAPI *handle) { + return handle->GetGradient(); +} + void TessBaseAPIClearPersistentCache(TessBaseAPI * /*handle*/) { TessBaseAPI::ClearPersistentCache(); } diff --git a/src/ccmain/pagesegmain.cpp b/src/ccmain/pagesegmain.cpp index c613badb..5ebcb3f7 100644 --- a/src/ccmain/pagesegmain.cpp +++ b/src/ccmain/pagesegmain.cpp @@ -168,7 +168,7 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract bool cjk_mode = textord_use_cjk_fp_model; textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_, pix_thresholds_, - pix_grey_, splitting || cjk_mode, &diacritic_blobs, blocks, &to_blocks); + pix_grey_, splitting || cjk_mode, &diacritic_blobs, blocks, &to_blocks, &gradient_); return auto_page_seg_ret_val; } diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index bb645aba..3f63ea01 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -461,6 +461,7 @@ Tesseract::Tesseract() , scaled_factor_(-1) , deskew_(1.0f, 0.0f) , reskew_(1.0f, 0.0f) + , gradient_(0.0f) , most_recently_used_(this) , font_table_size_(0) #ifndef DISABLED_LEGACY_ENGINE @@ -498,6 +499,7 @@ void Tesseract::Clear() { scaled_color_.destroy(); deskew_ = FCOORD(1.0f, 0.0f); reskew_ = FCOORD(1.0f, 0.0f); + gradient_ = 0.0f; splitter_.Clear(); scaled_factor_ = -1; for (auto &sub_lang : sub_langs_) { diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index c03e0457..42f8febc 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -200,6 +200,9 @@ public: const FCOORD &reskew() const { return reskew_; } + float gradient() const { + return gradient_; + } // Destroy any existing pix and return a pointer to the pointer. Image *mutable_pix_binary() { pix_binary_.destroy(); @@ -1004,6 +1007,7 @@ private: int scaled_factor_; FCOORD deskew_; FCOORD reskew_; + float gradient_; TesseractStats stats_; // Sub-languages to be tried in addition to this. std::vector sub_langs_; diff --git a/src/textord/textord.cpp b/src/textord/textord.cpp index 3abbb7a8..9526fe19 100644 --- a/src/textord/textord.cpp +++ b/src/textord/textord.cpp @@ -177,7 +177,7 @@ Textord::Textord(CCStruct *ccstruct) void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, - TO_BLOCK_LIST *to_blocks) { + TO_BLOCK_LIST *to_blocks, float *gradient) { page_tr_.set_x(width); page_tr_.set_y(height); if (to_blocks->empty()) { @@ -219,15 +219,14 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int wi TO_BLOCK_IT to_block_it(to_blocks); TO_BLOCK *to_block = to_block_it.data(); // Make the rows in the block. - float gradient; // Do it the old fashioned way. if (PSM_LINE_FIND_ENABLED(pageseg_mode)) { - gradient = make_rows(page_tr_, to_blocks); + *gradient = make_rows(page_tr_, to_blocks); } else if (!PSM_SPARSE(pageseg_mode)) { // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row. - gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks); + *gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, to_block, to_blocks); } else { - gradient = 0.0f; + *gradient = 0.0f; } BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks); baseline_detector.ComputeStraightBaselines(use_box_bottoms); @@ -236,7 +235,7 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int wi // Now make the words in the lines. if (PSM_WORD_FIND_ENABLED(pageseg_mode)) { // SINGLE_LINE uses the old word maker on the single line. - make_words(this, page_tr_, gradient, blocks, to_blocks); + make_words(this, page_tr_, *gradient, blocks, to_blocks); } else { // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a // single word, and in SINGLE_CHAR mode, all the outlines diff --git a/src/textord/textord.h b/src/textord/textord.h index df6750a7..308096b4 100644 --- a/src/textord/textord.h +++ b/src/textord/textord.h @@ -89,7 +89,8 @@ public: // to the appropriate word(s) in case they are really diacritics. void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, - BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); + BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, + float *gradient); // If we were supposed to return only a single textline, and there is more // than one, clean up and leave only the best.