mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-07 09:52:40 +08:00
Merge 5fb2b623a4
into 3b7c70e34d
This commit is contained in:
commit
86e52c7fb8
@ -317,6 +317,17 @@ public:
|
||||
*/
|
||||
void SetImage(Pix *pix);
|
||||
|
||||
/**
|
||||
* Preprocessing the InputImage
|
||||
* Grayscale normalizatin based on nlbin (Thomas Breuel)
|
||||
* Current modes:
|
||||
* - 0 = No normalization
|
||||
* - 1 = Thresholding+Recognition
|
||||
* - 2 = Thresholding
|
||||
* - 3 = Recognition
|
||||
*/
|
||||
bool NormalizeImage(int mode);
|
||||
|
||||
/**
|
||||
* Set the resolution of the source image in pixels per inch so font size
|
||||
* information can be calculated in results. Call this after SetImage().
|
||||
|
@ -849,6 +849,25 @@ Pix *TessBaseAPI::GetInputImage() {
|
||||
return tesseract_->pix_original();
|
||||
}
|
||||
|
||||
// Grayscale normalization (preprocessing)
|
||||
bool TessBaseAPI::NormalizeImage(int mode){
|
||||
if (!GetInputImage()){
|
||||
tprintf("Please use SetImage before applying the image pre-processing steps.");
|
||||
return false;
|
||||
}
|
||||
if (mode == 1) {
|
||||
SetInputImage(thresholder_->GetPixNormRectGrey());
|
||||
thresholder_->SetImage(GetInputImage());
|
||||
} else if (mode == 2) {
|
||||
thresholder_->SetImage(thresholder_->GetPixNormRectGrey());
|
||||
} else if (mode == 3) {
|
||||
SetInputImage(thresholder_->GetPixNormRectGrey());
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const char *TessBaseAPI::GetInputName() {
|
||||
if (!input_file_.empty()) {
|
||||
return input_file_.c_str();
|
||||
@ -1192,8 +1211,31 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c
|
||||
bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
|
||||
const char *retry_config, int timeout_millisec,
|
||||
TessResultRenderer *renderer) {
|
||||
|
||||
SetInputName(filename);
|
||||
|
||||
SetImage(pix);
|
||||
|
||||
// Image preprocessing on image
|
||||
// Grayscale normalization
|
||||
int graynorm_mode;
|
||||
GetIntVariable("preprocess_graynorm_mode", &graynorm_mode);
|
||||
if (graynorm_mode > 0 && NormalizeImage(graynorm_mode) && tesseract_->tessedit_write_images) {
|
||||
// Write normalized image
|
||||
std::string output_filename = output_file_ + ".preprocessed";
|
||||
if (page_index > 0) {
|
||||
output_filename += std::to_string(page_index);
|
||||
}
|
||||
output_filename += ".tif";
|
||||
if (graynorm_mode == 2) {
|
||||
pixWrite(output_filename.c_str(), thresholder_->GetPixRect(), IFF_TIFF_G4);
|
||||
} else {
|
||||
pixWrite(output_filename.c_str(), GetInputImage(), IFF_TIFF_G4);
|
||||
}
|
||||
}
|
||||
|
||||
// Recognition
|
||||
|
||||
bool failed = false;
|
||||
|
||||
if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
|
||||
@ -1240,6 +1282,11 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
|
||||
// Switch to alternate mode for retry.
|
||||
ReadConfigFile(retry_config);
|
||||
SetImage(pix);
|
||||
|
||||
// Apply image preprocessing
|
||||
NormalizeImage(graynorm_mode);
|
||||
|
||||
//if (normalize_grayscale) thresholder_->SetImage(thresholder_->GetPixNormRectGrey());
|
||||
Recognize(nullptr);
|
||||
// Restore saved config variables.
|
||||
ReadConfigFile(kOldVarsFile);
|
||||
@ -1248,7 +1295,7 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
|
||||
if (renderer && !failed) {
|
||||
failed = !renderer->AddImage(this);
|
||||
}
|
||||
|
||||
//pixDestroy(&pixs);
|
||||
return !failed;
|
||||
}
|
||||
|
||||
|
@ -81,6 +81,11 @@ Tesseract::Tesseract()
|
||||
"11=sparse_text, 12=sparse_text+osd, 13=raw_line"
|
||||
" (Values from PageSegMode enum in tesseract/publictypes.h)",
|
||||
this->params())
|
||||
, INT_MEMBER(preprocess_graynorm_mode, 0,
|
||||
"Grayscale normalization mode: 0=no normalization, 1=tresholding+recognition, "
|
||||
"2=tresholding_only, 3=recognition_only "
|
||||
"The modes 1–3 are applied on the fullimage",
|
||||
this->params())
|
||||
, INT_MEMBER(thresholding_method,
|
||||
static_cast<int>(ThresholdMethod::Otsu),
|
||||
"Thresholding method: 0 = Otsu, 1 = LeptonicaOtsu, 2 = "
|
||||
|
@ -762,6 +762,7 @@ public:
|
||||
BOOL_VAR_H(tessedit_do_invert);
|
||||
double_VAR_H(invert_threshold);
|
||||
INT_VAR_H(tessedit_pageseg_mode);
|
||||
INT_VAR_H(preprocess_graynorm_mode);
|
||||
INT_VAR_H(thresholding_method);
|
||||
BOOL_VAR_H(thresholding_debug);
|
||||
double_VAR_H(thresholding_window_size);
|
||||
|
@ -179,6 +179,103 @@ void ImageThresholder::SetImage(const Image pix) {
|
||||
Init();
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------*
|
||||
* Non-linear contrast normalization *
|
||||
*----------------------------------------------------------------------*/
|
||||
/*!
|
||||
* \brief pixNLNorm()
|
||||
*
|
||||
* \param[in] pixs 8 or 32 bpp
|
||||
* \param[out] ptresh l_int32 global threshold value
|
||||
* \return pixd 8 bpp grayscale, or NULL on error
|
||||
*
|
||||
* <pre>
|
||||
* Notes:
|
||||
* (1) This composite operation is good for adaptively removing
|
||||
* dark background. Adaption of Thomas Breuel's nlbin version
|
||||
* from ocropus.
|
||||
* (2) A good thresholder together NLNorm is WAN
|
||||
* </pre>
|
||||
*/
|
||||
Pix *ImageThresholder::pixNLNorm(Pix *pixs, int *pthresh) {
|
||||
l_int32 d, thresh, w1, h1, w2, h2, fgval, bgval;
|
||||
l_uint32 black_val, white_val;
|
||||
l_float32 factor, threshpos, avefg, avebg;
|
||||
PIX *pixg, *pixd, *pixd2;
|
||||
BOX *pixbox;
|
||||
NUMA *na;
|
||||
|
||||
PROCNAME("pixNLNorm");
|
||||
|
||||
if (!pixs || (d = pixGetDepth(pixs)) < 8) {
|
||||
return (PIX *)ERROR_PTR("pixs undefined or d < 8 bpp", procName, NULL);
|
||||
}
|
||||
if (d == 32) {
|
||||
// ITU-R 601-2 luma
|
||||
pixg = pixConvertRGBToGray(pixs, 0.299, 0.587, 0.114);
|
||||
// Legacy converting
|
||||
// pixg = pixConvertRGBToGray(pixs, 0.3, 0.4, 0.3);
|
||||
} else {
|
||||
pixg = pixConvertTo8(pixs, 0);
|
||||
}
|
||||
|
||||
/// Normalize contrast
|
||||
// pixGetBlackOrWhiteVal(pixg, L_GET_BLACK_VAL, &black_val);
|
||||
// if (black_val>0) pixAddConstantGray(pixg, -1 * black_val);
|
||||
// pixGetBlackOrWhiteVal(pixg, L_GET_WHITE_VAL, &white_val);
|
||||
// if (white_val<255) pixMultConstantGray(pixg, (255. / white_val));
|
||||
pixd = pixMaxDynamicRange(pixg, L_LINEAR_SCALE);
|
||||
pixDestroy(&pixg);
|
||||
pixg = pixCopy(nullptr, pixd);
|
||||
pixDestroy(&pixd);
|
||||
|
||||
/// Calculate flat version
|
||||
pixGetDimensions(pixg, &w1, &h1, NULL);
|
||||
pixd = pixScaleGeneral(pixg, 0.5, 0.5, 0.0, 0);
|
||||
pixd2 = pixRankFilter(pixd, 20, 2, 0.8);
|
||||
pixDestroy(&pixd);
|
||||
pixd = pixRankFilter(pixd2, 2, 20, 0.8);
|
||||
pixDestroy(&pixd2);
|
||||
pixGetDimensions(pixd, &w2, &h2, NULL);
|
||||
pixd2 = pixScaleGrayLI(pixd, (l_float32)w1 / (l_float32)w2,
|
||||
(l_float32)h1 / (l_float32)h2);
|
||||
pixDestroy(&pixd);
|
||||
pixInvert(pixd2, pixd2);
|
||||
pixAddGray(pixg, pixg, pixd2);
|
||||
pixDestroy(&pixd2);
|
||||
|
||||
/// Local contrast enhancement
|
||||
// Ignore a border of 10 % and get a mean threshold,
|
||||
// background and foreground value
|
||||
pixbox = boxCreate(w1 * 0.1, h1 * 0.1, w1 * 0.9, h1 * 0.9);
|
||||
na = pixGetGrayHistogramInRect(pixg, pixbox, 1);
|
||||
numaSplitDistribution(na, 0.1, &thresh, &avefg, &avebg, NULL, NULL, NULL);
|
||||
boxDestroy(&pixbox);
|
||||
numaDestroy(&na);
|
||||
|
||||
/// Subtract by a foreground value and multiply by factor to
|
||||
// set a background value to 255
|
||||
fgval = (l_int32)(avefg + 0.5);
|
||||
bgval = (l_int32)(avebg + 0.5);
|
||||
threshpos = (l_float32)(thresh - fgval) / (bgval - fgval);
|
||||
// Todo: fgval or fgval + slightly offset
|
||||
fgval = fgval; // + (l_int32) ((thresh - fgval)*.25);
|
||||
bgval = bgval +
|
||||
(l_int32)std::min((l_int32)((bgval - thresh) * .5), (255 - bgval));
|
||||
factor = 255. / (bgval - fgval);
|
||||
if (pthresh) {
|
||||
*pthresh = (l_int32)threshpos * factor - threshpos * .1;
|
||||
}
|
||||
pixAddConstantGray(pixg, -1 * fgval);
|
||||
pixMultConstantGray(pixg, factor);
|
||||
|
||||
return pixg;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------*
|
||||
* Thresholding *
|
||||
*----------------------------------------------------------------------*/
|
||||
|
||||
std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
|
||||
TessBaseAPI *api,
|
||||
ThresholdMethod method) {
|
||||
@ -199,7 +296,7 @@ std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
|
||||
int r;
|
||||
|
||||
l_int32 pix_w, pix_h;
|
||||
pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr);
|
||||
pixGetDimensions(pix_, &pix_w, &pix_h, nullptr);
|
||||
|
||||
bool thresholding_debug;
|
||||
api->GetBoolVariable("thresholding_debug", &thresholding_debug);
|
||||
@ -374,6 +471,17 @@ Image ImageThresholder::GetPixRectGrey() {
|
||||
return pix;
|
||||
}
|
||||
|
||||
// Get a clone/copy of the source image rectangle, reduced to normalized greyscale,
|
||||
// and at the same resolution as the output binary.
|
||||
// The returned Pix must be pixDestroyed.
|
||||
// Provided to the classifier to extract features from the greyscale image.
|
||||
Image ImageThresholder::GetPixNormRectGrey() {
|
||||
auto pix = GetPixRect();
|
||||
auto result = ImageThresholder::pixNLNorm(pix, nullptr);
|
||||
pix.destroy();
|
||||
return result;
|
||||
}
|
||||
|
||||
// Otsu thresholds the rectangle, taking the rectangle from *this.
|
||||
void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const {
|
||||
std::vector<int> thresholds;
|
||||
|
@ -154,6 +154,12 @@ public:
|
||||
// Provided to the classifier to extract features from the greyscale image.
|
||||
virtual Image GetPixRectGrey();
|
||||
|
||||
// Get a clone/copy of the source image rectangle, reduced to normalized greyscale,
|
||||
// and at the same resolution as the output binary.
|
||||
// The returned Pix must be pixDestroyed.
|
||||
// Provided to the classifier to extract features from the greyscale image.
|
||||
virtual Image GetPixNormRectGrey();
|
||||
|
||||
protected:
|
||||
// ----------------------------------------------------------------------
|
||||
// Utility functions that may be useful components for other thresholders.
|
||||
@ -170,6 +176,9 @@ protected:
|
||||
// Otsu thresholds the rectangle, taking the rectangle from *this.
|
||||
void OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const;
|
||||
|
||||
// Return non-linear normalized grayscale
|
||||
Pix *pixNLNorm(Pix *pixs, int *pthresh);
|
||||
|
||||
/// Threshold the rectangle, taking everything except the src_pix
|
||||
/// from the class, using thresholds/hi_values to the output pix.
|
||||
/// NOTE that num_channels is the size of the thresholds and hi_values
|
||||
|
Loading…
Reference in New Issue
Block a user