Fix bug with linebreaking in hOCR

The hOCR output could incorrectly close span, p, and div tags
early. Oops, my bad.
This commit is contained in:
Nick White 2016-06-29 09:22:48 +01:00
parent d71133a769
commit 78ae2cc073

View File

@ -1584,6 +1584,10 @@ char* TessBaseAPI::GetHOCRText(struct ETEXT_DESC* monitor, int page_number) {
} }
hocr_str += "'>"; hocr_str += "'>";
bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
if (bold) hocr_str += "<strong>"; if (bold) hocr_str += "<strong>";
if (italic) hocr_str += "<em>"; if (italic) hocr_str += "<em>";
@ -1614,9 +1618,6 @@ char* TessBaseAPI::GetHOCRText(struct ETEXT_DESC* monitor, int page_number) {
if (bold) hocr_str += "</strong>"; if (bold) hocr_str += "</strong>";
hocr_str += "</span> "; hocr_str += "</span> ";
bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
wcnt++; wcnt++;
// Close any ending block/paragraph/textline. // Close any ending block/paragraph/textline.
if (last_word_in_line) { if (last_word_in_line) {