mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
191 lines
7.7 KiB
C++
191 lines
7.7 KiB
C++
///////////////////////////////////////////////////////////////////////
|
|
// File: thresholder.h
|
|
// Description: Base API for thresolding images in tesseract.
|
|
// Author: Ray Smith
|
|
// Created: Mon May 12 11:00:15 PDT 2008
|
|
//
|
|
// (C) Copyright 2008, Google Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef TESSERACT_CCMAIN_THRESHOLDER_H_
|
|
#define TESSERACT_CCMAIN_THRESHOLDER_H_
|
|
|
|
#include "platform.h"
|
|
#include "publictypes.h"
|
|
|
|
struct Pix;
|
|
|
|
namespace tesseract {
|
|
|
|
/// Base class for all tesseract image thresholding classes.
|
|
/// Specific classes can add new thresholding methods by
|
|
/// overriding ThresholdToPix.
|
|
/// Each instance deals with a single image, but the design is intended to
|
|
/// be useful for multiple calls to SetRectangle and ThresholdTo* if
|
|
/// desired.
|
|
class TESS_API ImageThresholder {
|
|
public:
|
|
ImageThresholder();
|
|
virtual ~ImageThresholder();
|
|
|
|
/// Destroy the Pix if there is one, freeing memory.
|
|
virtual void Clear();
|
|
|
|
/// Return true if no image has been set.
|
|
bool IsEmpty() const;
|
|
|
|
/// SetImage makes a copy of all the image data, so it may be deleted
|
|
/// immediately after this call.
|
|
/// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
|
|
/// Palette color images will not work properly and must be converted to
|
|
/// 24 bit.
|
|
/// Binary images of 1 bit per pixel may also be given but they must be
|
|
/// byte packed with the MSB of the first byte being the first pixel, and a
|
|
/// one pixel is WHITE. For binary images set bytes_per_pixel=0.
|
|
void SetImage(const unsigned char* imagedata, int width, int height,
|
|
int bytes_per_pixel, int bytes_per_line);
|
|
|
|
/// Store the coordinates of the rectangle to process for later use.
|
|
/// Doesn't actually do any thresholding.
|
|
void SetRectangle(int left, int top, int width, int height);
|
|
|
|
/// Get enough parameters to be able to rebuild bounding boxes in the
|
|
/// original image (not just within the rectangle).
|
|
/// Left and top are enough with top-down coordinates, but
|
|
/// the height of the rectangle and the image are needed for bottom-up.
|
|
virtual void GetImageSizes(int* left, int* top, int* width, int* height,
|
|
int* imagewidth, int* imageheight);
|
|
|
|
/// Return true if the source image is color.
|
|
bool IsColor() const {
|
|
return pix_channels_ >= 3;
|
|
}
|
|
|
|
/// Returns true if the source image is binary.
|
|
bool IsBinary() const {
|
|
return pix_channels_ == 0;
|
|
}
|
|
|
|
int GetScaleFactor() const {
|
|
return scale_;
|
|
}
|
|
|
|
// Set the resolution of the source image in pixels per inch.
|
|
// This should be called right after SetImage(), and will let us return
|
|
// appropriate font sizes for the text.
|
|
void SetSourceYResolution(int ppi) {
|
|
yres_ = ppi;
|
|
estimated_res_ = ppi;
|
|
}
|
|
int GetSourceYResolution() const {
|
|
return yres_;
|
|
}
|
|
int GetScaledYResolution() const {
|
|
return scale_ * yres_;
|
|
}
|
|
// Set the resolution of the source image in pixels per inch, as estimated
|
|
// by the thresholder from the text size found during thresholding.
|
|
// This value will be used to set internal size thresholds during recognition
|
|
// and will not influence the output "point size." The default value is
|
|
// the same as the source resolution. (yres_)
|
|
void SetEstimatedResolution(int ppi) {
|
|
estimated_res_ = ppi;
|
|
}
|
|
// Returns the estimated resolution, including any active scaling.
|
|
// This value will be used to set internal size thresholds during recognition.
|
|
int GetScaledEstimatedResolution() const {
|
|
return scale_ * estimated_res_;
|
|
}
|
|
|
|
/// Pix vs raw, which to use? Pix is the preferred input for efficiency,
|
|
/// since raw buffers are copied.
|
|
/// SetImage for Pix clones its input, so the source pix may be pixDestroyed
|
|
/// immediately after, but may not go away until after the Thresholder has
|
|
/// finished with it.
|
|
void SetImage(const Pix* pix);
|
|
|
|
/// Threshold the source image as efficiently as possible to the output Pix.
|
|
/// Creates a Pix and sets pix to point to the resulting pointer.
|
|
/// Caller must use pixDestroy to free the created Pix.
|
|
/// Returns false on error.
|
|
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix** pix);
|
|
|
|
// Gets a pix that contains an 8 bit threshold value at each pixel. The
|
|
// returned pix may be an integer reduction of the binary image such that
|
|
// the scale factor may be inferred from the ratio of the sizes, even down
|
|
// to the extreme of a 1x1 pixel thresholds image.
|
|
// Ideally the 8 bit threshold should be the exact threshold used to generate
|
|
// the binary image in ThresholdToPix, but this is not a hard constraint.
|
|
// Returns NULL if the input is binary. PixDestroy after use.
|
|
virtual Pix* GetPixRectThresholds();
|
|
|
|
/// Get a clone/copy of the source image rectangle.
|
|
/// The returned Pix must be pixDestroyed.
|
|
/// This function will be used in the future by the page layout analysis, and
|
|
/// the layout analysis that uses it will only be available with Leptonica,
|
|
/// so there is no raw equivalent.
|
|
Pix* GetPixRect();
|
|
|
|
// Get a clone/copy of the source image rectangle, reduced to greyscale,
|
|
// and at the same resolution as the output binary.
|
|
// The returned Pix must be pixDestroyed.
|
|
// Provided to the classifier to extract features from the greyscale image.
|
|
virtual Pix* GetPixRectGrey();
|
|
|
|
protected:
|
|
// ----------------------------------------------------------------------
|
|
// Utility functions that may be useful components for other thresholders.
|
|
|
|
/// Common initialization shared between SetImage methods.
|
|
virtual void Init();
|
|
|
|
/// Return true if we are processing the full image.
|
|
bool IsFullImage() const {
|
|
return rect_left_ == 0 && rect_top_ == 0 &&
|
|
rect_width_ == image_width_ && rect_height_ == image_height_;
|
|
}
|
|
|
|
// Otsu thresholds the rectangle, taking the rectangle from *this.
|
|
void OtsuThresholdRectToPix(Pix* src_pix, Pix** out_pix) const;
|
|
|
|
/// Threshold the rectangle, taking everything except the src_pix
|
|
/// from the class, using thresholds/hi_values to the output pix.
|
|
/// NOTE that num_channels is the size of the thresholds and hi_values
|
|
// arrays and also the bytes per pixel in src_pix.
|
|
void ThresholdRectToPix(Pix* src_pix, int num_channels,
|
|
const int* thresholds, const int* hi_values,
|
|
Pix** pix) const;
|
|
|
|
protected:
|
|
/// Clone or other copy of the source Pix.
|
|
/// The pix will always be PixDestroy()ed on destruction of the class.
|
|
Pix* pix_;
|
|
|
|
int image_width_; //< Width of source pix_.
|
|
int image_height_; //< Height of source pix_.
|
|
int pix_channels_; //< Number of 8-bit channels in pix_.
|
|
int pix_wpl_; //< Words per line of pix_.
|
|
// Limits of image rectangle to be processed.
|
|
int scale_; //< Scale factor from original image.
|
|
int yres_; //< y pixels/inch in source image.
|
|
int estimated_res_; //< Resolution estimate from text size.
|
|
int rect_left_;
|
|
int rect_top_;
|
|
int rect_width_;
|
|
int rect_height_;
|
|
};
|
|
|
|
} // namespace tesseract.
|
|
|
|
#endif // TESSERACT_CCMAIN_THRESHOLDER_H_
|