mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 12:49:35 +08:00
pdf: move rendering image and resolution to the pdf renderer only.
pdf: tests add lib leptonica dependency in the make toolchain
This commit is contained in:
parent
94b95b1b4a
commit
09b6875853
@ -1472,7 +1472,7 @@ progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)
|
||||
|
||||
pdfrenderer_test_SOURCES = unittest/pdfrenderer_test.cc
|
||||
pdfrenderer_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
pdfrenderer_test_LDADD = $(TESS_LIBS) $(TRAINING_LIBS)
|
||||
pdfrenderer_test_LDADD = $(TESS_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
|
||||
qrsequence_test_SOURCES = unittest/qrsequence_test.cc
|
||||
qrsequence_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
|
@ -106,23 +106,6 @@ public:
|
||||
return imagenum_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies an alternate image to render with the extracted text.
|
||||
* It must be called after BeginDocument and before AddImage.
|
||||
*/
|
||||
void SetRenderingImage(Pix *rendering_image) {
|
||||
rendering_image_ = rendering_image;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies the expected rendering resolution.
|
||||
* If not set, rendering_dpi api params will be used, else the source image
|
||||
* resolution.
|
||||
*/
|
||||
void SetRenderingResolution(int rendering_dpi) {
|
||||
rendering_dpi_ = rendering_dpi;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Called by concrete classes.
|
||||
@ -156,21 +139,6 @@ protected:
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendData(const char *s, int len);
|
||||
|
||||
// Renderers can call this to get the actual image to render with extracted
|
||||
// text. This method returns:
|
||||
// - the rendering image set by the caller or
|
||||
// - the input image scaled to the rendering_dpi field if defined or
|
||||
// - the input image from the api otherwise
|
||||
Pix *GetRenderingImage(TessBaseAPI *api);
|
||||
|
||||
// Resolution of the rendering image either set manually by the caller or with
|
||||
// the rendering_dpi api parameter.
|
||||
int GetRenderingResolution(TessBaseAPI *api);
|
||||
|
||||
// Reset rendering image and dpi to previous state. Destroy scaled rendered
|
||||
// image if exists.
|
||||
void ResetRenderingState(Pix *rendering_image_prev, int rendering_dpi_prev);
|
||||
|
||||
template <typename T>
|
||||
auto AppendData(T &&d) {
|
||||
AppendData(d.data(), d.size());
|
||||
@ -183,8 +151,6 @@ private:
|
||||
const char *file_extension_; // standard extension for generated output
|
||||
std::string title_; // title of document being rendered
|
||||
int imagenum_; // index of last image added
|
||||
Pix *rendering_image_; // Image to render with the extracted text
|
||||
int rendering_dpi_; // Resolution of the rendering_image
|
||||
bool happy_; // I get grumpy when the disk fills up, etc.
|
||||
};
|
||||
|
||||
@ -275,6 +241,26 @@ public:
|
||||
// we load a custom PDF font from this location.
|
||||
TessPDFRenderer(const char *outputbase, const char *datadir,
|
||||
bool textonly = false);
|
||||
// Reset rendering image and dpi to previous state. Destroy scaled rendered
|
||||
// image if exists.
|
||||
void ResetRenderingState(Pix *rendering_image_prev, int rendering_dpi_prev);
|
||||
|
||||
/**
|
||||
* Specifies an alternate image to render with the extracted text.
|
||||
* It must be called after BeginDocument and before AddImage.
|
||||
*/
|
||||
void SetRenderingImage(Pix *rendering_image) {
|
||||
rendering_image_ = rendering_image;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies the expected rendering resolution.
|
||||
* If not set, rendering_dpi api params will be used, else the source image
|
||||
* resolution.
|
||||
*/
|
||||
void SetRenderingResolution(int rendering_dpi) {
|
||||
rendering_dpi_ = rendering_dpi;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool BeginDocumentHandler() override;
|
||||
@ -292,12 +278,24 @@ private:
|
||||
std::vector<long int> pages_; // object number for every /Page object
|
||||
std::string datadir_; // where to find the custom font
|
||||
bool textonly_; // skip images if set
|
||||
Pix *rendering_image_; // Image to render with the extracted text
|
||||
int rendering_dpi_; // Resolution of the rendering_image
|
||||
// Bookkeeping only. DIY = Do It Yourself.
|
||||
void AppendPDFObjectDIY(size_t objectsize);
|
||||
// Bookkeeping + emit data.
|
||||
void AppendPDFObject(const char *data);
|
||||
// Create the /Contents object for an entire page.
|
||||
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
|
||||
// Renderers can call this to get the actual image to render with extracted
|
||||
// text. This method returns:
|
||||
// - the rendering image set by the caller or
|
||||
// - the input image scaled to the rendering_dpi field if defined or
|
||||
// - the input image from the api otherwise
|
||||
Pix *GetRenderingImage(TessBaseAPI *api);
|
||||
// Resolution of the rendering image either set manually by the caller or with
|
||||
// the rendering_dpi api parameter.
|
||||
int GetRenderingResolution(TessBaseAPI *api);
|
||||
|
||||
// Turn an image into a PDF object. Only transcode if we have to.
|
||||
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size,
|
||||
|
@ -192,7 +192,10 @@ static const int kMaxBytesPerCodepoint = 20;
|
||||
* PDF Renderer interface implementation
|
||||
**********************************************************************/
|
||||
TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir, bool textonly)
|
||||
: TessResultRenderer(outputbase, "pdf"), datadir_(datadir) {
|
||||
: TessResultRenderer(outputbase, "pdf")
|
||||
, datadir_(datadir)
|
||||
, rendering_image_(nullptr)
|
||||
, rendering_dpi_(0) {
|
||||
obj_ = 0;
|
||||
textonly_ = textonly;
|
||||
offsets_.push_back(0);
|
||||
@ -834,7 +837,60 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, const char *filename, long int obj
|
||||
return true;
|
||||
}
|
||||
|
||||
void TessPDFRenderer::ResetRenderingState(Pix *rendering_image_prev,
|
||||
int rendering_dpi_prev) {
|
||||
if (rendering_image_ != rendering_image_prev) {
|
||||
pixDestroy(&rendering_image_);
|
||||
rendering_image_ = rendering_image_prev;
|
||||
}
|
||||
rendering_dpi_ = rendering_dpi_prev;
|
||||
}
|
||||
|
||||
Pix *TessPDFRenderer::GetRenderingImage(TessBaseAPI *api) {
|
||||
if (!rendering_image_) {
|
||||
Pix *source_image = api->GetInputImage();
|
||||
int source_dpi = api->GetSourceYResolution();
|
||||
if (!source_image || source_dpi <= 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int rendering_dpi = GetRenderingResolution(api);
|
||||
if (rendering_dpi != source_dpi) {
|
||||
float scale = (float)rendering_dpi / (float)source_dpi;
|
||||
rendering_image_ = pixScale(source_image, scale, scale);
|
||||
} else {
|
||||
return source_image;
|
||||
}
|
||||
}
|
||||
return rendering_image_;
|
||||
}
|
||||
|
||||
int TessPDFRenderer::GetRenderingResolution(tesseract::TessBaseAPI *api) {
|
||||
if (rendering_dpi_) {
|
||||
return rendering_dpi_;
|
||||
}
|
||||
int source_dpi = api->GetSourceYResolution();
|
||||
int rendering_dpi;
|
||||
if (api->GetIntVariable("rendering_dpi", &rendering_dpi) &&
|
||||
rendering_dpi > 0 && rendering_dpi != source_dpi) {
|
||||
if (rendering_dpi < kMinCredibleResolution ||
|
||||
rendering_dpi > kMaxCredibleResolution) {
|
||||
#if !defined(NDEBUG)
|
||||
tprintf(
|
||||
"Warning: User defined rendering dpi %d is outside of expected range "
|
||||
"(%d - %d)!\n",
|
||||
rendering_dpi, kMinCredibleResolution, kMaxCredibleResolution);
|
||||
#endif
|
||||
}
|
||||
rendering_dpi_ = rendering_dpi;
|
||||
return rendering_dpi_;
|
||||
}
|
||||
return source_dpi;
|
||||
}
|
||||
|
||||
bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) {
|
||||
Pix *rendering_image_prev = rendering_image_;
|
||||
int rendering_dpi_prev = rendering_dpi_;
|
||||
Pix *pix = GetRenderingImage(api);
|
||||
const char *filename = api->GetInputName();
|
||||
int ppi = GetRenderingResolution(api);
|
||||
@ -920,12 +976,14 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) {
|
||||
int jpg_quality;
|
||||
api->GetIntVariable("jpg_quality", &jpg_quality);
|
||||
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize, jpg_quality)) {
|
||||
ResetRenderingState(rendering_image_prev, rendering_dpi_prev);
|
||||
return false;
|
||||
}
|
||||
AppendData(pdf_object, objsize);
|
||||
AppendPDFObjectDIY(objsize);
|
||||
delete[] pdf_object;
|
||||
}
|
||||
ResetRenderingState(rendering_image_prev, rendering_dpi_prev);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -18,14 +18,12 @@
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config_auto.h"
|
||||
#endif
|
||||
#include <allheaders.h>
|
||||
#include <tesseract/baseapi.h>
|
||||
#include <tesseract/renderer.h>
|
||||
#include <cstring>
|
||||
#include <memory> // std::unique_ptr
|
||||
#include <string> // std::string
|
||||
#include "serialis.h" // Serialize
|
||||
#include "tprintf.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -38,8 +36,6 @@ TessResultRenderer::TessResultRenderer(const char *outputbase, const char *exten
|
||||
, file_extension_(extension)
|
||||
, title_("")
|
||||
, imagenum_(-1)
|
||||
, rendering_image_(nullptr)
|
||||
, rendering_dpi_(0)
|
||||
, happy_(true) {
|
||||
if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
|
||||
std::string outfile = std::string(outputbase) + "." + extension;
|
||||
@ -94,71 +90,13 @@ bool TessResultRenderer::AddImage(TessBaseAPI *api) {
|
||||
return false;
|
||||
}
|
||||
++imagenum_;
|
||||
Pix *rendering_image_prev = rendering_image_;
|
||||
int rendering_dpi_prev = rendering_dpi_;
|
||||
bool ok = AddImageHandler(api);
|
||||
ResetRenderingState(rendering_image_prev, rendering_dpi_prev);
|
||||
if (next_) {
|
||||
ok = next_->AddImage(api) && ok;
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
void TessResultRenderer::ResetRenderingState(Pix *rendering_image_prev,
|
||||
int rendering_dpi_prev) {
|
||||
if (rendering_image_ != rendering_image_prev) {
|
||||
pixDestroy(&rendering_image_);
|
||||
rendering_image_ = rendering_image_prev;
|
||||
}
|
||||
if (rendering_dpi_ != rendering_dpi_prev) {
|
||||
rendering_dpi_ = rendering_dpi_prev;
|
||||
}
|
||||
}
|
||||
|
||||
Pix *TessResultRenderer::GetRenderingImage(TessBaseAPI *api) {
|
||||
if (!rendering_image_) {
|
||||
Pix *source_image = api->GetInputImage();
|
||||
int source_dpi = api->GetSourceYResolution();
|
||||
if (!source_image || source_dpi <= 0) {
|
||||
happy_ = false;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int rendering_dpi = GetRenderingResolution(api);
|
||||
if (rendering_dpi != source_dpi) {
|
||||
float scale = (float)rendering_dpi / (float)source_dpi;
|
||||
|
||||
rendering_image_ = pixScale(source_image, scale, scale);
|
||||
} else {
|
||||
return source_image;
|
||||
}
|
||||
}
|
||||
return rendering_image_;
|
||||
}
|
||||
|
||||
int TessResultRenderer::GetRenderingResolution(tesseract::TessBaseAPI *api) {
|
||||
if (rendering_dpi_) {
|
||||
return rendering_dpi_;
|
||||
}
|
||||
int source_dpi = api->GetSourceYResolution();
|
||||
int rendering_dpi;
|
||||
if (api->GetIntVariable("rendering_dpi", &rendering_dpi) &&
|
||||
rendering_dpi > 0 && rendering_dpi != source_dpi) {
|
||||
if (rendering_dpi < kMinCredibleResolution ||
|
||||
rendering_dpi > kMaxCredibleResolution) {
|
||||
#if !defined(NDEBUG)
|
||||
tprintf(
|
||||
"Warning: User defined rendering dpi %d is outside of expected range "
|
||||
"(%d - %d)!\n",
|
||||
rendering_dpi, kMinCredibleResolution, kMaxCredibleResolution);
|
||||
#endif
|
||||
}
|
||||
rendering_dpi_ = rendering_dpi;
|
||||
return rendering_dpi_;
|
||||
}
|
||||
return source_dpi;
|
||||
}
|
||||
|
||||
bool TessResultRenderer::EndDocument() {
|
||||
if (!happy_) {
|
||||
return false;
|
||||
|
Loading…
Reference in New Issue
Block a user