mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-22 18:13:42 +08:00
Merge pull request #2705 from stweil/title
Add new parameter "document_title" to set the title in OCR output files
This commit is contained in:
commit
247cd0edc4
@ -20,6 +20,7 @@
|
||||
# include "host.h" // windows.h for MultiByteToWideChar, ...
|
||||
#endif
|
||||
#include "renderer.h"
|
||||
#include "strngs.h" // for STRING
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -68,7 +69,7 @@ bool TessAltoRenderer::BeginDocumentHandler() {
|
||||
AppendString(title());
|
||||
|
||||
AppendString(
|
||||
"\t\t\t</fileName>\n"
|
||||
"</fileName>\n"
|
||||
"\t\t</sourceImageInformation>\n"
|
||||
"\t\t<OCRProcessing ID=\"OCR_0\">\n"
|
||||
"\t\t\t<ocrProcessingStep>\n"
|
||||
|
@ -96,6 +96,7 @@
|
||||
#include "werd.h" // for WERD, WERD_IT, W_FUZZY_NON, W_FUZZY_SP
|
||||
|
||||
static BOOL_VAR(stream_filelist, false, "Stream a filelist from stdin");
|
||||
static STRING_VAR(document_title, "", "Title of output document (used for hOCR and PDF output)");
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -993,7 +994,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
|
||||
}
|
||||
|
||||
// Begin producing output
|
||||
if (renderer && !renderer->BeginDocument(unknown_title_)) {
|
||||
if (renderer && !renderer->BeginDocument(document_title.c_str())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1208,7 +1209,7 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
|
||||
}
|
||||
|
||||
// Begin the output
|
||||
if (renderer && !renderer->BeginDocument(unknown_title_)) {
|
||||
if (renderer && !renderer->BeginDocument(document_title.c_str())) {
|
||||
pixDestroy(&pix);
|
||||
return false;
|
||||
}
|
||||
|
@ -923,12 +923,6 @@ class TESS_API TessBaseAPI {
|
||||
int timeout_millisec,
|
||||
TessResultRenderer* renderer,
|
||||
int tessedit_page_number);
|
||||
// There's currently no way to pass a document title from the
|
||||
// Tesseract command line, and we have multiple places that choose
|
||||
// to set the title to an empty string. Using a single named
|
||||
// variable will hopefully reduce confusion if the situation changes
|
||||
// in the future.
|
||||
const char *unknown_title_ = "";
|
||||
}; // class TessBaseAPI.
|
||||
|
||||
/** Escape a char string - remove &<>"' with HTML codes. */
|
||||
|
Loading…
Reference in New Issue
Block a user