diff --git a/ChangeLog b/ChangeLog index a5d86224..d33fd9b8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,6 @@ 2013-09-20 v3.03 +* Tesseract executable: support for output to stdout; limited support for one + page images from stdin (especially on Windows) * Added Renderer to API to allow document-level processing and output of document formats, like hOCR, PDF. * Major refactor of word-level recognition, beam search, eliminating dead code. diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 3e3f721a..a8cc6a63 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -29,6 +29,14 @@ #include "tprintf.h" #include "openclwrapper.h" +#include +#include + +#ifdef _WIN32 +#include +#include +#endif // _WIN32 + /********************************************************************** * main() * @@ -119,7 +127,7 @@ int main(int argc, char **argv) { } if (output == NULL && noocr == false) { - fprintf(stderr, "Usage:\n %s imagename outputbase|stdout [options...] " + fprintf(stderr, "Usage:\n %s imagename|stdin outputbase|stdout [options...] " "[configfile...]\n\n", argv[0]); fprintf(stderr, "OCR options:\n"); @@ -215,14 +223,7 @@ int main(int argc, char **argv) { // It would be simpler if we could set the value before Init, // but that doesn't work. if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) - api.SetPageSegMode(pagesegmode); - - FILE* fin = fopen(image, "rb"); - if (fin == NULL) { - fprintf(stderr, "Cannot open input file: %s\n", image); - exit(2); - } - fclose(fin); + api.SetPageSegMode(pagesegmode); tesseract::TessResultRenderer* renderer = NULL; bool b; @@ -234,7 +235,47 @@ int main(int argc, char **argv) { if (renderer == NULL) renderer = new tesseract::TessTextRenderer(); - if (!api.ProcessPages(image, NULL, 0, renderer)) { + bool stdInput = false; + if (!strcmp(image, "stdin") || !strcmp(image, "-")) + stdInput = true; + + if (stdInput) { + char byt; + PIX *pixd = NULL; + std::vector ch_data; + std::istream file(std::cin.rdbuf()); + +#ifdef WIN32 + if (_setmode(_fileno(stdin), _O_BINARY) == -1) + tprintf("ERROR: cin to binary: %s", strerror(errno)); +#endif // WIN32 + + while (file.get(byt)) { + ch_data.push_back(byt); + } + std::cin.ignore(std::cin.rdbuf()->in_avail() + 1); + + size_t size = ch_data.size(); + l_uint8 *data; + if ( (data = (l_uint8 *) malloc( size )) != NULL ) { + memcpy(data, &(ch_data)[0], size); + } else { + tprintf("Memory allocation error\n"); + exit(1); + } + + pixd = pixReadMem(data, size); + api.ProcessPage(pixd, 0, NULL, NULL, 0, renderer); + } else { + FILE* fin = fopen(image, "rb"); + if (fin == NULL) { + fprintf(stderr, "Cannot open input file: %s\n", image); + exit(2); + } + fclose(fin); + } + + if (!stdInput && !api.ProcessPages(image, NULL, 0, renderer)) { fprintf(stderr, "Error during processing.\n"); } else { for (tesseract::TessResultRenderer* r = renderer; r != NULL;