diff --git a/api/capi.cpp b/api/capi.cpp index 4f6973140..3c84698fc 100644 --- a/api/capi.cpp +++ b/api/capi.cpp @@ -65,9 +65,9 @@ TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outpu } TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, - BOOL textonly) + BOOL textonly, int jpg_quality) { - return new TessPDFRenderer(outputbase, datadir, textonly); + return new TessPDFRenderer(outputbase, datadir, textonly, jpg_quality); } TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase) diff --git a/api/capi.h b/api/capi.h index f3fc3833c..13a686c3f 100644 --- a/api/capi.h +++ b/api/capi.h @@ -120,7 +120,7 @@ TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* output TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info); TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, - BOOL textonly); + BOOL textonly, int jpg_quality); TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase); diff --git a/api/pdfrenderer.cpp b/api/pdfrenderer.cpp index 14eac7ec5..3e6b44170 100644 --- a/api/pdfrenderer.cpp +++ b/api/pdfrenderer.cpp @@ -181,11 +181,12 @@ static const int kMaxBytesPerCodepoint = 20; **********************************************************************/ TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir, - bool textonly) + bool textonly, int jpg_quality) : TessResultRenderer(outputbase, "pdf") { obj_ = 0; datadir_ = datadir; textonly_ = textonly; + jpg_quality_ = jpg_quality; offsets_.push_back(0); } @@ -695,7 +696,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, char *filename, long int objnum, char **pdf_object, - long int *pdf_object_size) { + long int *pdf_object_size, int jpg_quality) { size_t n; char b0[kBasicBufSize]; char b1[kBasicBufSize]; @@ -708,8 +709,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, return false; L_Compressed_Data *cid = NULL; - const int kJpegQuality = 85; - + const int kJpegQuality = jpg_quality; int format, sad; findFileFormat(filename, &format); if (pixGetSpp(pix) == 4 && format == IFF_PNG) { @@ -908,7 +908,7 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) { if (!textonly_) { char *pdf_object = nullptr; - if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) { + if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize, jpg_quality_)) { return false; } AppendData(pdf_object, objsize); diff --git a/api/renderer.h b/api/renderer.h index a6f6d1e7f..e36d31d88 100644 --- a/api/renderer.h +++ b/api/renderer.h @@ -187,7 +187,7 @@ class TESS_API TessPDFRenderer : public TessResultRenderer { public: // datadir is the location of the TESSDATA. We need it because // we load a custom PDF font from this location. - TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly); + TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly, int jpg_quality); protected: virtual bool BeginDocumentHandler(); @@ -205,6 +205,7 @@ class TESS_API TessPDFRenderer : public TessResultRenderer { GenericVector pages_; // object number for every /Page object const char *datadir_; // where to find the custom font bool textonly_; // skip images if set + int jpg_quality_; // set JPEG quality // Bookkeeping only. DIY = Do It Yourself. void AppendPDFObjectDIY(size_t objectsize); // Bookkeeping + emit data. @@ -213,7 +214,7 @@ class TESS_API TessPDFRenderer : public TessResultRenderer { char* GetPDFTextObjects(TessBaseAPI* api, double width, double height); // Turn an image into a PDF object. Only transcode if we have to. static bool imageToPDFObj(Pix *pix, char *filename, long int objnum, - char **pdf_object, long int *pdf_object_size); + char **pdf_object, long int *pdf_object_size, int jpg_quality); }; diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 783e26278..a24c6116a 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -366,8 +366,10 @@ void PreloadRenderers( if (b) { bool textonly; api->GetBoolVariable("textonly_pdf", &textonly); + int jpg_quality; + api->GetIntVariable("jpg_quality", &jpg_quality); renderers->push_back(new tesseract::TessPDFRenderer( - outputbase, api->GetDatapath(), textonly)); + outputbase, api->GetDatapath(), textonly, jpg_quality)); } api->GetBoolVariable("tessedit_write_unlv", &b); diff --git a/ccmain/tesseractclass.cpp b/ccmain/tesseractclass.cpp index 17140ccb5..dd56a2f35 100644 --- a/ccmain/tesseractclass.cpp +++ b/ccmain/tesseractclass.cpp @@ -394,6 +394,7 @@ Tesseract::Tesseract() BOOL_MEMBER(textonly_pdf, false, "Create PDF with only one invisible text layer", this->params()), + INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params()), STRING_MEMBER(unrecognised_char, "|", "Output char for unidentified blobs", this->params()), INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()), diff --git a/ccmain/tesseractclass.h b/ccmain/tesseractclass.h index d59466472..755a6cf8a 100644 --- a/ccmain/tesseractclass.h +++ b/ccmain/tesseractclass.h @@ -1032,6 +1032,7 @@ class Tesseract : public Wordrec { BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file"); BOOL_VAR_H(textonly_pdf, false, "Create PDF with only one invisible text layer"); + INT_VAR_H(jpg_quality, 85, "Set JPEG quality level"); STRING_VAR_H(unrecognised_char, "|", "Output char for unidentified blobs"); INT_VAR_H(suspect_level, 99, "Suspect marker level");