mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-23 02:24:09 +08:00
hocrrenderer: write scan_res property to the ocr_page
This will make Tesseract emit the DPI of the document, if known at OCR time. This is requird to properly interpret the x_fsize (font size) property of words, since Tesseract scales the font size to the DPI. See issue #3326 (https://github.com/tesseract-ocr/tesseract/issues/3326)
This commit is contained in:
parent
19cc9afb25
commit
ca177e72f3
@ -173,8 +173,11 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
} else {
|
||||
hocr_str << "unknown";
|
||||
}
|
||||
|
||||
hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " " << rect_width_ << " "
|
||||
<< rect_height_ << "; ppageno " << page_number << "'>\n";
|
||||
<< rect_height_ << "; ppageno " << page_number
|
||||
<< "; scan_res " << GetSourceYResolution() << " "
|
||||
<< GetSourceYResolution() << "'>\n";
|
||||
|
||||
std::unique_ptr<ResultIterator> res_it(GetIterator());
|
||||
while (!res_it->Empty(RIL_BLOCK)) {
|
||||
|
Loading…
Reference in New Issue
Block a user