mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-22 18:13:42 +08:00
Merge pull request #2705 from stweil/title
Add new parameter "document_title" to set the title in OCR output files
This commit is contained in:
commit
247cd0edc4
@ -20,6 +20,7 @@
|
|||||||
# include "host.h" // windows.h for MultiByteToWideChar, ...
|
# include "host.h" // windows.h for MultiByteToWideChar, ...
|
||||||
#endif
|
#endif
|
||||||
#include "renderer.h"
|
#include "renderer.h"
|
||||||
|
#include "strngs.h" // for STRING
|
||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
|
|
||||||
@ -68,7 +69,7 @@ bool TessAltoRenderer::BeginDocumentHandler() {
|
|||||||
AppendString(title());
|
AppendString(title());
|
||||||
|
|
||||||
AppendString(
|
AppendString(
|
||||||
"\t\t\t</fileName>\n"
|
"</fileName>\n"
|
||||||
"\t\t</sourceImageInformation>\n"
|
"\t\t</sourceImageInformation>\n"
|
||||||
"\t\t<OCRProcessing ID=\"OCR_0\">\n"
|
"\t\t<OCRProcessing ID=\"OCR_0\">\n"
|
||||||
"\t\t\t<ocrProcessingStep>\n"
|
"\t\t\t<ocrProcessingStep>\n"
|
||||||
|
@ -96,6 +96,7 @@
|
|||||||
#include "werd.h" // for WERD, WERD_IT, W_FUZZY_NON, W_FUZZY_SP
|
#include "werd.h" // for WERD, WERD_IT, W_FUZZY_NON, W_FUZZY_SP
|
||||||
|
|
||||||
static BOOL_VAR(stream_filelist, false, "Stream a filelist from stdin");
|
static BOOL_VAR(stream_filelist, false, "Stream a filelist from stdin");
|
||||||
|
static STRING_VAR(document_title, "", "Title of output document (used for hOCR and PDF output)");
|
||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
|
|
||||||
@ -993,7 +994,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Begin producing output
|
// Begin producing output
|
||||||
if (renderer && !renderer->BeginDocument(unknown_title_)) {
|
if (renderer && !renderer->BeginDocument(document_title.c_str())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1208,7 +1209,7 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Begin the output
|
// Begin the output
|
||||||
if (renderer && !renderer->BeginDocument(unknown_title_)) {
|
if (renderer && !renderer->BeginDocument(document_title.c_str())) {
|
||||||
pixDestroy(&pix);
|
pixDestroy(&pix);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -923,12 +923,6 @@ class TESS_API TessBaseAPI {
|
|||||||
int timeout_millisec,
|
int timeout_millisec,
|
||||||
TessResultRenderer* renderer,
|
TessResultRenderer* renderer,
|
||||||
int tessedit_page_number);
|
int tessedit_page_number);
|
||||||
// There's currently no way to pass a document title from the
|
|
||||||
// Tesseract command line, and we have multiple places that choose
|
|
||||||
// to set the title to an empty string. Using a single named
|
|
||||||
// variable will hopefully reduce confusion if the situation changes
|
|
||||||
// in the future.
|
|
||||||
const char *unknown_title_ = "";
|
|
||||||
}; // class TessBaseAPI.
|
}; // class TessBaseAPI.
|
||||||
|
|
||||||
/** Escape a char string - remove &<>"' with HTML codes. */
|
/** Escape a char string - remove &<>"' with HTML codes. */
|
||||||
|
Loading…
Reference in New Issue
Block a user