mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-12 07:29:07 +08:00
Avoid tesseract writing Pix out/reading them back.
By default, when we ImageData::SetPix, we write the data out as a PNG, just to read it back in to get a compressed buffer of data. We then use this to generate a new Pix. In builds of Tesseract on systems where we don't have temp files, writing files out is problematic. Not only that, but compressing/uncompressing is slow, and on minimal builds of leptonica, where we've disabled the format writers to reduce memory footprint, we get no compression anyway. In such cases, it'd be far nicer just to keep the original Pix as the internal data. Also, when recovering the pixmap from the ImageData, if we know we're only going to read from the data, we can avoid duplicating it and just use the original. This is exactly the case when GRAPHICS_DISABLED is set. So, introduce a TESSERACT_IMAGEDATA_AS_PIX predefine that we can use to cause the internal data to be a Pix rather than a compressed buffer. Given we don't do compression, and they were writing to memory, this was all just more effort than we needed. Also, if we're using GRAPHICS_DISABLED, we might as well just pixCopy rather than pixClone as only the scaler uses this.
This commit is contained in:
parent
cdebe13d81
commit
6bcb941bcf
@ -207,12 +207,28 @@ bool ImageData::SkipDeSerialize(TFile* fp) {
|
||||
// In case of missing PNG support in Leptonica use PNM format,
|
||||
// which requires more memory.
|
||||
void ImageData::SetPix(Pix* pix) {
|
||||
#ifdef TESSERACT_IMAGEDATA_AS_PIX
|
||||
internal_pix_ = pix;
|
||||
#else
|
||||
SetPixInternal(pix, &image_data_);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Returns the Pix image for *this. Must be pixDestroyed after use.
|
||||
Pix* ImageData::GetPix() const {
|
||||
#ifdef TESSERACT_IMAGEDATA_AS_PIX
|
||||
#ifdef GRAPHICS_DISABLED
|
||||
/* The only caller of this is the scaling functions to prescale the
|
||||
* source. Thus we can just return a new pointer to the same data. */
|
||||
return pixClone(internal_pix_);
|
||||
#else
|
||||
/* pixCopy always does an actual copy, so the caller can modify the
|
||||
* changed data. */
|
||||
return pixCopy(NULL, internal_pix_);
|
||||
#endif
|
||||
#else
|
||||
return GetPixInternal(image_data_);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Gets anything and everything with a non-nullptr pointer, prescaled to a
|
||||
@ -320,6 +336,7 @@ void ImageData::AddBoxes(const GenericVector<TBOX>& boxes,
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef TESSERACT_IMAGEDATA_AS_PIX
|
||||
// Saves the given Pix as a PNG-encoded string and destroys it.
|
||||
// In case of missing PNG support in Leptonica use PNM format,
|
||||
// which requires more memory.
|
||||
@ -348,6 +365,7 @@ Pix* ImageData::GetPixInternal(const GenericVector<char>& image_data) {
|
||||
}
|
||||
return pix;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Parses the text string as a box file and adds any discovered boxes that
|
||||
// match the page number. Returns false on error.
|
||||
|
@ -196,6 +196,9 @@ class ImageData {
|
||||
private:
|
||||
STRING imagefilename_; // File to read image from.
|
||||
int32_t page_number_; // Page number if multi-page tif or -1.
|
||||
#ifdef TESSERACT_IMAGEDATA_AS_PIX
|
||||
Pix *internal_pix_;
|
||||
#endif
|
||||
GenericVector<char> image_data_; // PNG/PNM file data.
|
||||
STRING language_; // Language code for image.
|
||||
STRING transcription_; // UTF-8 ground truth of image.
|
||||
|
Loading…
Reference in New Issue
Block a user