mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-23 23:17:49 +08:00
Added JPEG quality option parameter (-c jpg_quality=n)
This commit is contained in:
parent
c4d8f27019
commit
f061503a14
@ -65,9 +65,9 @@ TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outpu
|
|||||||
}
|
}
|
||||||
|
|
||||||
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
|
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
|
||||||
BOOL textonly)
|
BOOL textonly, int jpg_quality)
|
||||||
{
|
{
|
||||||
return new TessPDFRenderer(outputbase, datadir, textonly);
|
return new TessPDFRenderer(outputbase, datadir, textonly, jpg_quality);
|
||||||
}
|
}
|
||||||
|
|
||||||
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase)
|
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase)
|
||||||
|
@ -120,7 +120,7 @@ TESS_API TessResultRenderer* TESS_CALL TessTextRendererCreate(const char* output
|
|||||||
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase);
|
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate(const char* outputbase);
|
||||||
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info);
|
TESS_API TessResultRenderer* TESS_CALL TessHOcrRendererCreate2(const char* outputbase, BOOL font_info);
|
||||||
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
|
TESS_API TessResultRenderer* TESS_CALL TessPDFRendererCreate(const char* outputbase, const char* datadir,
|
||||||
BOOL textonly);
|
BOOL textonly, int jpg_quality);
|
||||||
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase);
|
TESS_API TessResultRenderer* TESS_CALL TessUnlvRendererCreate(const char* outputbase);
|
||||||
TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase);
|
TESS_API TessResultRenderer* TESS_CALL TessBoxTextRendererCreate(const char* outputbase);
|
||||||
|
|
||||||
|
@ -181,11 +181,12 @@ static const int kMaxBytesPerCodepoint = 20;
|
|||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir,
|
TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir,
|
||||||
bool textonly)
|
bool textonly, int jpg_quality)
|
||||||
: TessResultRenderer(outputbase, "pdf") {
|
: TessResultRenderer(outputbase, "pdf") {
|
||||||
obj_ = 0;
|
obj_ = 0;
|
||||||
datadir_ = datadir;
|
datadir_ = datadir;
|
||||||
textonly_ = textonly;
|
textonly_ = textonly;
|
||||||
|
jpg_quality_ = jpg_quality;
|
||||||
offsets_.push_back(0);
|
offsets_.push_back(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -695,7 +696,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
|
|||||||
char *filename,
|
char *filename,
|
||||||
long int objnum,
|
long int objnum,
|
||||||
char **pdf_object,
|
char **pdf_object,
|
||||||
long int *pdf_object_size) {
|
long int *pdf_object_size, int jpg_quality) {
|
||||||
size_t n;
|
size_t n;
|
||||||
char b0[kBasicBufSize];
|
char b0[kBasicBufSize];
|
||||||
char b1[kBasicBufSize];
|
char b1[kBasicBufSize];
|
||||||
@ -708,8 +709,7 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix,
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
L_Compressed_Data *cid = NULL;
|
L_Compressed_Data *cid = NULL;
|
||||||
const int kJpegQuality = 85;
|
const int kJpegQuality = jpg_quality;
|
||||||
|
|
||||||
int format, sad;
|
int format, sad;
|
||||||
findFileFormat(filename, &format);
|
findFileFormat(filename, &format);
|
||||||
if (pixGetSpp(pix) == 4 && format == IFF_PNG) {
|
if (pixGetSpp(pix) == 4 && format == IFF_PNG) {
|
||||||
@ -908,7 +908,7 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
|
|||||||
|
|
||||||
if (!textonly_) {
|
if (!textonly_) {
|
||||||
char *pdf_object = nullptr;
|
char *pdf_object = nullptr;
|
||||||
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) {
|
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize, jpg_quality_)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
AppendData(pdf_object, objsize);
|
AppendData(pdf_object, objsize);
|
||||||
|
@ -187,7 +187,7 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
|
|||||||
public:
|
public:
|
||||||
// datadir is the location of the TESSDATA. We need it because
|
// datadir is the location of the TESSDATA. We need it because
|
||||||
// we load a custom PDF font from this location.
|
// we load a custom PDF font from this location.
|
||||||
TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly);
|
TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly, int jpg_quality);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual bool BeginDocumentHandler();
|
virtual bool BeginDocumentHandler();
|
||||||
@ -205,6 +205,7 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
|
|||||||
GenericVector<long int> pages_; // object number for every /Page object
|
GenericVector<long int> pages_; // object number for every /Page object
|
||||||
const char *datadir_; // where to find the custom font
|
const char *datadir_; // where to find the custom font
|
||||||
bool textonly_; // skip images if set
|
bool textonly_; // skip images if set
|
||||||
|
int jpg_quality_; // set JPEG quality
|
||||||
// Bookkeeping only. DIY = Do It Yourself.
|
// Bookkeeping only. DIY = Do It Yourself.
|
||||||
void AppendPDFObjectDIY(size_t objectsize);
|
void AppendPDFObjectDIY(size_t objectsize);
|
||||||
// Bookkeeping + emit data.
|
// Bookkeeping + emit data.
|
||||||
@ -213,7 +214,7 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
|
|||||||
char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
|
char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
|
||||||
// Turn an image into a PDF object. Only transcode if we have to.
|
// Turn an image into a PDF object. Only transcode if we have to.
|
||||||
static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
|
static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
|
||||||
char **pdf_object, long int *pdf_object_size);
|
char **pdf_object, long int *pdf_object_size, int jpg_quality);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -366,8 +366,10 @@ void PreloadRenderers(
|
|||||||
if (b) {
|
if (b) {
|
||||||
bool textonly;
|
bool textonly;
|
||||||
api->GetBoolVariable("textonly_pdf", &textonly);
|
api->GetBoolVariable("textonly_pdf", &textonly);
|
||||||
|
int jpg_quality;
|
||||||
|
api->GetIntVariable("jpg_quality", &jpg_quality);
|
||||||
renderers->push_back(new tesseract::TessPDFRenderer(
|
renderers->push_back(new tesseract::TessPDFRenderer(
|
||||||
outputbase, api->GetDatapath(), textonly));
|
outputbase, api->GetDatapath(), textonly, jpg_quality));
|
||||||
}
|
}
|
||||||
|
|
||||||
api->GetBoolVariable("tessedit_write_unlv", &b);
|
api->GetBoolVariable("tessedit_write_unlv", &b);
|
||||||
|
@ -394,6 +394,7 @@ Tesseract::Tesseract()
|
|||||||
BOOL_MEMBER(textonly_pdf, false,
|
BOOL_MEMBER(textonly_pdf, false,
|
||||||
"Create PDF with only one invisible text layer",
|
"Create PDF with only one invisible text layer",
|
||||||
this->params()),
|
this->params()),
|
||||||
|
INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params()),
|
||||||
STRING_MEMBER(unrecognised_char, "|",
|
STRING_MEMBER(unrecognised_char, "|",
|
||||||
"Output char for unidentified blobs", this->params()),
|
"Output char for unidentified blobs", this->params()),
|
||||||
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),
|
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),
|
||||||
|
@ -1032,6 +1032,7 @@ class Tesseract : public Wordrec {
|
|||||||
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
|
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
|
||||||
BOOL_VAR_H(textonly_pdf, false,
|
BOOL_VAR_H(textonly_pdf, false,
|
||||||
"Create PDF with only one invisible text layer");
|
"Create PDF with only one invisible text layer");
|
||||||
|
INT_VAR_H(jpg_quality, 85, "Set JPEG quality level");
|
||||||
STRING_VAR_H(unrecognised_char, "|",
|
STRING_VAR_H(unrecognised_char, "|",
|
||||||
"Output char for unidentified blobs");
|
"Output char for unidentified blobs");
|
||||||
INT_VAR_H(suspect_level, 99, "Suspect marker level");
|
INT_VAR_H(suspect_level, 99, "Suspect marker level");
|
||||||
|
Loading…
Reference in New Issue
Block a user