mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-07 18:02:40 +08:00
Important fix to RTL languages saves last space on each line, which was previously lost
This commit is contained in:
parent
3f7735492f
commit
4e8018d013
@ -549,6 +549,12 @@ bool ResultIterator::IsAtFinalElement(PageIteratorLevel level,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns the number of blanks before the current word.
|
||||
int ResultIterator::BlanksBeforeWord() const {
|
||||
if (CurrentParagraphIsLtr()) return LTRResultIterator::BlanksBeforeWord();
|
||||
return IsAtBeginningOf(RIL_TEXTLINE) ? 0 : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the null terminated UTF-8 encoded text string for the current
|
||||
* object at the given level. Use delete [] to free after use.
|
||||
@ -585,7 +591,7 @@ char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
|
||||
if (at_beginning_of_minor_run_) {
|
||||
text += reading_direction_is_ltr ? kLRM : kRLM;
|
||||
}
|
||||
text = it_->word()->BestUTF8(blob_index_, !reading_direction_is_ltr);
|
||||
text = it_->word()->BestUTF8(blob_index_, false);
|
||||
if (IsAtFinalSymbolOfWord()) AppendSuffixMarks(&text);
|
||||
}
|
||||
break;
|
||||
@ -608,7 +614,7 @@ void ResultIterator::AppendUTF8WordText(STRING *text) const {
|
||||
GenericVector<int> blob_order;
|
||||
CalculateBlobOrder(&blob_order);
|
||||
for (int i = 0; i < blob_order.size(); i++) {
|
||||
*text += it_->word()->BestUTF8(blob_order[i], !reading_direction_is_ltr);
|
||||
*text += it_->word()->BestUTF8(blob_order[i], false);
|
||||
}
|
||||
AppendSuffixMarks(text);
|
||||
}
|
||||
@ -643,6 +649,9 @@ void ResultIterator::IterateAndAppendUTF8TextlineText(STRING *text) {
|
||||
}
|
||||
AppendUTF8WordText(text);
|
||||
words_appended++;
|
||||
if (BidiDebug(2)) {
|
||||
tprintf("Num spaces=%d, text=%s\n", numSpaces, text->string());
|
||||
}
|
||||
} while (Next(RIL_WORD) && !IsAtBeginningOf(RIL_TEXTLINE));
|
||||
if (BidiDebug(1)) {
|
||||
tprintf("%d words printed\n", words_appended);
|
||||
|
@ -82,6 +82,10 @@ class TESS_API ResultIterator : public LTRResultIterator {
|
||||
virtual bool IsAtFinalElement(PageIteratorLevel level,
|
||||
PageIteratorLevel element) const;
|
||||
|
||||
// ============= Functions that refer to words only ============.
|
||||
// Returns the number of blanks before the current word.
|
||||
int BlanksBeforeWord() const;
|
||||
|
||||
// ============= Accessing data ==============.
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user