mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 12:49:35 +08:00
fix #1900: intraword spacing for slightly better pdf copy-paste performance
This commit is contained in:
parent
137e6de56f
commit
546a9e81eb
@ -466,6 +466,10 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
|
||||
}
|
||||
res_it->Next(RIL_SYMBOL);
|
||||
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
|
||||
if (res_it->IsAtBeginningOf(RIL_WORD)) {
|
||||
pdf_word += "0020";
|
||||
pdf_word_len++;
|
||||
}
|
||||
if (word_length > 0 && pdf_word_len > 0) {
|
||||
double h_stretch =
|
||||
kCharWidth * prec(100.0 * word_length / (fontsize * pdf_word_len));
|
||||
|
Loading…
Reference in New Issue
Block a user