mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-23 06:57:50 +08:00
Fix bug with linebreaking in hOCR
The hOCR output could incorrectly close span, p, and div tags early. Oops, my bad.
This commit is contained in:
parent
d71133a769
commit
78ae2cc073
@ -1584,6 +1584,10 @@ char* TessBaseAPI::GetHOCRText(struct ETEXT_DESC* monitor, int page_number) {
|
|||||||
}
|
}
|
||||||
hocr_str += "'>";
|
hocr_str += "'>";
|
||||||
|
|
||||||
|
bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
|
||||||
|
bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
|
||||||
|
bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
|
||||||
|
|
||||||
if (bold) hocr_str += "<strong>";
|
if (bold) hocr_str += "<strong>";
|
||||||
if (italic) hocr_str += "<em>";
|
if (italic) hocr_str += "<em>";
|
||||||
|
|
||||||
@ -1614,9 +1618,6 @@ char* TessBaseAPI::GetHOCRText(struct ETEXT_DESC* monitor, int page_number) {
|
|||||||
if (bold) hocr_str += "</strong>";
|
if (bold) hocr_str += "</strong>";
|
||||||
hocr_str += "</span> ";
|
hocr_str += "</span> ";
|
||||||
|
|
||||||
bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
|
|
||||||
bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
|
|
||||||
bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
|
|
||||||
wcnt++;
|
wcnt++;
|
||||||
// Close any ending block/paragraph/textline.
|
// Close any ending block/paragraph/textline.
|
||||||
if (last_word_in_line) {
|
if (last_word_in_line) {
|
||||||
|
Loading…
Reference in New Issue
Block a user