Added JPEG quality option parameter (-c jpg_quality=n)

This commit is contained in:
Thijs Leegwater 2018-01-11 09:11:30 +01:00
parent c4d8f27019
commit f061503a14
7 changed files with 16 additions and 11 deletions

View File

@ -65,9 +65,9 @@ TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outpu
} }
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
BOOL textonly) BOOL textonly, int jpg_quality)
{ {
return new TessPDFRenderer(outputbase, datadir, textonly); return new TessPDFRenderer(outputbase, datadir, textonly, jpg_quality);
} }
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase) TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase)

View File

@ -120,7 +120,7 @@ TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* output
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info); TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info);
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir, TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
BOOL textonly); BOOL textonly, int jpg_quality);
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase); TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase);

View File

@ -181,11 +181,12 @@ static const int kMaxBytesPerCodepoint = 20;
**********************************************************************/ **********************************************************************/
TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir, TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir,
bool textonly) bool textonly, int jpg_quality)
: TessResultRenderer(outputbase, "pdf") { : TessResultRenderer(outputbase, "pdf") {
obj_ = 0; obj_ = 0;
datadir_ = datadir; datadir_ = datadir;
textonly_ = textonly; textonly_ = textonly;
jpg_quality_ = jpg_quality;
offsets_.push_back(0); offsets_.push_back(0);
} }
@ -695,7 +696,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
char *filename, char *filename,
long int objnum, long int objnum,
char **pdf_object, char **pdf_object,
long int *pdf_object_size) { long int *pdf_object_size, int jpg_quality) {
size_t n; size_t n;
char b0[kBasicBufSize]; char b0[kBasicBufSize];
char b1[kBasicBufSize]; char b1[kBasicBufSize];
@ -708,8 +709,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
return false; return false;
L_Compressed_Data *cid = NULL; L_Compressed_Data *cid = NULL;
const int kJpegQuality = 85; const int kJpegQuality = jpg_quality;
int format, sad; int format, sad;
findFileFormat(filename, &format); findFileFormat(filename, &format);
if (pixGetSpp(pix) == 4 && format == IFF_PNG) { if (pixGetSpp(pix) == 4 && format == IFF_PNG) {
@ -908,7 +908,7 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
if (!textonly_) { if (!textonly_) {
char *pdf_object = nullptr; char *pdf_object = nullptr;
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) { if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize, jpg_quality_)) {
return false; return false;
} }
AppendData(pdf_object, objsize); AppendData(pdf_object, objsize);

View File

@ -187,7 +187,7 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
public: public:
// datadir is the location of the TESSDATA. We need it because // datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location. // we load a custom PDF font from this location.
TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly); TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly, int jpg_quality);
protected: protected:
virtual bool BeginDocumentHandler(); virtual bool BeginDocumentHandler();
@ -205,6 +205,7 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
GenericVector<long int> pages_; // object number for every /Page object GenericVector<long int> pages_; // object number for every /Page object
const char *datadir_; // where to find the custom font const char *datadir_; // where to find the custom font
bool textonly_; // skip images if set bool textonly_; // skip images if set
int jpg_quality_; // set JPEG quality
// Bookkeeping only. DIY = Do It Yourself. // Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize); void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data. // Bookkeeping + emit data.
@ -213,7 +214,7 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
char* GetPDFTextObjects(TessBaseAPI* api, double width, double height); char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
// Turn an image into a PDF object. Only transcode if we have to. // Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, char *filename, long int objnum, static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size); char **pdf_object, long int *pdf_object_size, int jpg_quality);
}; };

View File

@ -366,8 +366,10 @@ void PreloadRenderers(
if (b) { if (b) {
bool textonly; bool textonly;
api->GetBoolVariable("textonly_pdf", &textonly); api->GetBoolVariable("textonly_pdf", &textonly);
int jpg_quality;
api->GetIntVariable("jpg_quality", &jpg_quality);
renderers->push_back(new tesseract::TessPDFRenderer( renderers->push_back(new tesseract::TessPDFRenderer(
outputbase, api->GetDatapath(), textonly)); outputbase, api->GetDatapath(), textonly, jpg_quality));
} }
api->GetBoolVariable("tessedit_write_unlv", &b); api->GetBoolVariable("tessedit_write_unlv", &b);

View File

@ -394,6 +394,7 @@ Tesseract::Tesseract()
BOOL_MEMBER(textonly_pdf, false, BOOL_MEMBER(textonly_pdf, false,
"Create PDF with only one invisible text layer", "Create PDF with only one invisible text layer",
this->params()), this->params()),
INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params()),
STRING_MEMBER(unrecognised_char, "|", STRING_MEMBER(unrecognised_char, "|",
"Output char for unidentified blobs", this->params()), "Output char for unidentified blobs", this->params()),
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()), INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),

View File

@ -1032,6 +1032,7 @@ class Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file"); BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
BOOL_VAR_H(textonly_pdf, false, BOOL_VAR_H(textonly_pdf, false,
"Create PDF with only one invisible text layer"); "Create PDF with only one invisible text layer");
INT_VAR_H(jpg_quality, 85, "Set JPEG quality level");
STRING_VAR_H(unrecognised_char, "|", STRING_VAR_H(unrecognised_char, "|",
"Output char for unidentified blobs"); "Output char for unidentified blobs");
INT_VAR_H(suspect_level, 99, "Suspect marker level"); INT_VAR_H(suspect_level, 99, "Suspect marker level");