mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-23 15:07:49 +08:00
Added JPEG quality option parameter (-c jpg_quality=n)
This commit is contained in:
parent
c4d8f27019
commit
f061503a14
@ -65,9 +65,9 @@ TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outpu
|
||||
}
|
||||
|
||||
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
|
||||
BOOL textonly)
|
||||
BOOL textonly, int jpg_quality)
|
||||
{
|
||||
return new TessPDFRenderer(outputbase, datadir, textonly);
|
||||
return new TessPDFRenderer(outputbase, datadir, textonly, jpg_quality);
|
||||
}
|
||||
|
||||
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase)
|
||||
|
@ -120,7 +120,7 @@ TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* output
|
||||
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
|
||||
BOOL textonly);
|
||||
BOOL textonly, int jpg_quality);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase);
|
||||
TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase);
|
||||
|
||||
|
@ -181,11 +181,12 @@ static const int kMaxBytesPerCodepoint = 20;
|
||||
**********************************************************************/
|
||||
|
||||
TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir,
|
||||
bool textonly)
|
||||
bool textonly, int jpg_quality)
|
||||
: TessResultRenderer(outputbase, "pdf") {
|
||||
obj_ = 0;
|
||||
datadir_ = datadir;
|
||||
textonly_ = textonly;
|
||||
jpg_quality_ = jpg_quality;
|
||||
offsets_.push_back(0);
|
||||
}
|
||||
|
||||
@ -695,7 +696,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
|
||||
char *filename,
|
||||
long int objnum,
|
||||
char **pdf_object,
|
||||
long int *pdf_object_size) {
|
||||
long int *pdf_object_size, int jpg_quality) {
|
||||
size_t n;
|
||||
char b0[kBasicBufSize];
|
||||
char b1[kBasicBufSize];
|
||||
@ -708,8 +709,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
|
||||
return false;
|
||||
|
||||
L_Compressed_Data *cid = NULL;
|
||||
const int kJpegQuality = 85;
|
||||
|
||||
const int kJpegQuality = jpg_quality;
|
||||
int format, sad;
|
||||
findFileFormat(filename, &format);
|
||||
if (pixGetSpp(pix) == 4 && format == IFF_PNG) {
|
||||
@ -908,7 +908,7 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
|
||||
|
||||
if (!textonly_) {
|
||||
char *pdf_object = nullptr;
|
||||
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) {
|
||||
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize, jpg_quality_)) {
|
||||
return false;
|
||||
}
|
||||
AppendData(pdf_object, objsize);
|
||||
|
@ -187,7 +187,7 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
|
||||
public:
|
||||
// datadir is the location of the TESSDATA. We need it because
|
||||
// we load a custom PDF font from this location.
|
||||
TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly);
|
||||
TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly, int jpg_quality);
|
||||
|
||||
protected:
|
||||
virtual bool BeginDocumentHandler();
|
||||
@ -205,6 +205,7 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
|
||||
GenericVector<long int> pages_; // object number for every /Page object
|
||||
const char *datadir_; // where to find the custom font
|
||||
bool textonly_; // skip images if set
|
||||
int jpg_quality_; // set JPEG quality
|
||||
// Bookkeeping only. DIY = Do It Yourself.
|
||||
void AppendPDFObjectDIY(size_t objectsize);
|
||||
// Bookkeeping + emit data.
|
||||
@ -213,7 +214,7 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
|
||||
char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
|
||||
// Turn an image into a PDF object. Only transcode if we have to.
|
||||
static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size);
|
||||
char **pdf_object, long int *pdf_object_size, int jpg_quality);
|
||||
};
|
||||
|
||||
|
||||
|
@ -366,8 +366,10 @@ void PreloadRenderers(
|
||||
if (b) {
|
||||
bool textonly;
|
||||
api->GetBoolVariable("textonly_pdf", &textonly);
|
||||
int jpg_quality;
|
||||
api->GetIntVariable("jpg_quality", &jpg_quality);
|
||||
renderers->push_back(new tesseract::TessPDFRenderer(
|
||||
outputbase, api->GetDatapath(), textonly));
|
||||
outputbase, api->GetDatapath(), textonly, jpg_quality));
|
||||
}
|
||||
|
||||
api->GetBoolVariable("tessedit_write_unlv", &b);
|
||||
|
@ -394,6 +394,7 @@ Tesseract::Tesseract()
|
||||
BOOL_MEMBER(textonly_pdf, false,
|
||||
"Create PDF with only one invisible text layer",
|
||||
this->params()),
|
||||
INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params()),
|
||||
STRING_MEMBER(unrecognised_char, "|",
|
||||
"Output char for unidentified blobs", this->params()),
|
||||
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),
|
||||
|
@ -1032,6 +1032,7 @@ class Tesseract : public Wordrec {
|
||||
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
|
||||
BOOL_VAR_H(textonly_pdf, false,
|
||||
"Create PDF with only one invisible text layer");
|
||||
INT_VAR_H(jpg_quality, 85, "Set JPEG quality level");
|
||||
STRING_VAR_H(unrecognised_char, "|",
|
||||
"Output char for unidentified blobs");
|
||||
INT_VAR_H(suspect_level, 99, "Suspect marker level");
|
||||
|
Loading…
Reference in New Issue
Block a user