This commit is contained in:
JKamlah 2025-05-26 10:14:18 +02:00 committed by GitHub
commit 86e52c7fb8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 183 additions and 2 deletions

View File

@ -317,6 +317,17 @@ public:
*/
void SetImage(Pix *pix);
/**
* Preprocessing the InputImage
* Grayscale normalizatin based on nlbin (Thomas Breuel)
* Current modes:
* - 0 = No normalization
* - 1 = Thresholding+Recognition
* - 2 = Thresholding
* - 3 = Recognition
*/
bool NormalizeImage(int mode);
/**
* Set the resolution of the source image in pixels per inch so font size
* information can be calculated in results. Call this after SetImage().

View File

@ -849,6 +849,25 @@ Pix *TessBaseAPI::GetInputImage() {
return tesseract_->pix_original();
}
// Grayscale normalization (preprocessing)
bool TessBaseAPI::NormalizeImage(int mode){
if (!GetInputImage()){
tprintf("Please use SetImage before applying the image pre-processing steps.");
return false;
}
if (mode == 1) {
SetInputImage(thresholder_->GetPixNormRectGrey());
thresholder_->SetImage(GetInputImage());
} else if (mode == 2) {
thresholder_->SetImage(thresholder_->GetPixNormRectGrey());
} else if (mode == 3) {
SetInputImage(thresholder_->GetPixNormRectGrey());
} else {
return false;
}
return true;
}
const char *TessBaseAPI::GetInputName() {
if (!input_file_.empty()) {
return input_file_.c_str();
@ -1192,8 +1211,31 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c
bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer) {
SetInputName(filename);
SetImage(pix);
// Image preprocessing on image
// Grayscale normalization
int graynorm_mode;
GetIntVariable("preprocess_graynorm_mode", &graynorm_mode);
if (graynorm_mode > 0 && NormalizeImage(graynorm_mode) && tesseract_->tessedit_write_images) {
// Write normalized image
std::string output_filename = output_file_ + ".preprocessed";
if (page_index > 0) {
output_filename += std::to_string(page_index);
}
output_filename += ".tif";
if (graynorm_mode == 2) {
pixWrite(output_filename.c_str(), thresholder_->GetPixRect(), IFF_TIFF_G4);
} else {
pixWrite(output_filename.c_str(), GetInputImage(), IFF_TIFF_G4);
}
}
// Recognition
bool failed = false;
if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
@ -1240,6 +1282,11 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
// Switch to alternate mode for retry.
ReadConfigFile(retry_config);
SetImage(pix);
// Apply image preprocessing
NormalizeImage(graynorm_mode);
//if (normalize_grayscale) thresholder_->SetImage(thresholder_->GetPixNormRectGrey());
Recognize(nullptr);
// Restore saved config variables.
ReadConfigFile(kOldVarsFile);
@ -1248,7 +1295,7 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
if (renderer && !failed) {
failed = !renderer->AddImage(this);
}
//pixDestroy(&pixs);
return !failed;
}

View File

@ -81,6 +81,11 @@ Tesseract::Tesseract()
"11=sparse_text, 12=sparse_text+osd, 13=raw_line"
" (Values from PageSegMode enum in tesseract/publictypes.h)",
this->params())
, INT_MEMBER(preprocess_graynorm_mode, 0,
"Grayscale normalization mode: 0=no normalization, 1=tresholding+recognition, "
"2=tresholding_only, 3=recognition_only "
"The modes 13 are applied on the fullimage",
this->params())
, INT_MEMBER(thresholding_method,
static_cast<int>(ThresholdMethod::Otsu),
"Thresholding method: 0 = Otsu, 1 = LeptonicaOtsu, 2 = "

View File

@ -762,6 +762,7 @@ public:
BOOL_VAR_H(tessedit_do_invert);
double_VAR_H(invert_threshold);
INT_VAR_H(tessedit_pageseg_mode);
INT_VAR_H(preprocess_graynorm_mode);
INT_VAR_H(thresholding_method);
BOOL_VAR_H(thresholding_debug);
double_VAR_H(thresholding_window_size);

View File

@ -179,6 +179,103 @@ void ImageThresholder::SetImage(const Image pix) {
Init();
}
/*----------------------------------------------------------------------*
* Non-linear contrast normalization *
*----------------------------------------------------------------------*/
/*!
* \brief pixNLNorm()
*
* \param[in] pixs 8 or 32 bpp
* \param[out] ptresh l_int32 global threshold value
* \return pixd 8 bpp grayscale, or NULL on error
*
* <pre>
* Notes:
* (1) This composite operation is good for adaptively removing
* dark background. Adaption of Thomas Breuel's nlbin version
* from ocropus.
* (2) A good thresholder together NLNorm is WAN
* </pre>
*/
Pix *ImageThresholder::pixNLNorm(Pix *pixs, int *pthresh) {
l_int32 d, thresh, w1, h1, w2, h2, fgval, bgval;
l_uint32 black_val, white_val;
l_float32 factor, threshpos, avefg, avebg;
PIX *pixg, *pixd, *pixd2;
BOX *pixbox;
NUMA *na;
PROCNAME("pixNLNorm");
if (!pixs || (d = pixGetDepth(pixs)) < 8) {
return (PIX *)ERROR_PTR("pixs undefined or d < 8 bpp", procName, NULL);
}
if (d == 32) {
// ITU-R 601-2 luma
pixg = pixConvertRGBToGray(pixs, 0.299, 0.587, 0.114);
// Legacy converting
// pixg = pixConvertRGBToGray(pixs, 0.3, 0.4, 0.3);
} else {
pixg = pixConvertTo8(pixs, 0);
}
/// Normalize contrast
// pixGetBlackOrWhiteVal(pixg, L_GET_BLACK_VAL, &black_val);
// if (black_val>0) pixAddConstantGray(pixg, -1 * black_val);
// pixGetBlackOrWhiteVal(pixg, L_GET_WHITE_VAL, &white_val);
// if (white_val<255) pixMultConstantGray(pixg, (255. / white_val));
pixd = pixMaxDynamicRange(pixg, L_LINEAR_SCALE);
pixDestroy(&pixg);
pixg = pixCopy(nullptr, pixd);
pixDestroy(&pixd);
/// Calculate flat version
pixGetDimensions(pixg, &w1, &h1, NULL);
pixd = pixScaleGeneral(pixg, 0.5, 0.5, 0.0, 0);
pixd2 = pixRankFilter(pixd, 20, 2, 0.8);
pixDestroy(&pixd);
pixd = pixRankFilter(pixd2, 2, 20, 0.8);
pixDestroy(&pixd2);
pixGetDimensions(pixd, &w2, &h2, NULL);
pixd2 = pixScaleGrayLI(pixd, (l_float32)w1 / (l_float32)w2,
(l_float32)h1 / (l_float32)h2);
pixDestroy(&pixd);
pixInvert(pixd2, pixd2);
pixAddGray(pixg, pixg, pixd2);
pixDestroy(&pixd2);
/// Local contrast enhancement
// Ignore a border of 10 % and get a mean threshold,
// background and foreground value
pixbox = boxCreate(w1 * 0.1, h1 * 0.1, w1 * 0.9, h1 * 0.9);
na = pixGetGrayHistogramInRect(pixg, pixbox, 1);
numaSplitDistribution(na, 0.1, &thresh, &avefg, &avebg, NULL, NULL, NULL);
boxDestroy(&pixbox);
numaDestroy(&na);
/// Subtract by a foreground value and multiply by factor to
// set a background value to 255
fgval = (l_int32)(avefg + 0.5);
bgval = (l_int32)(avebg + 0.5);
threshpos = (l_float32)(thresh - fgval) / (bgval - fgval);
// Todo: fgval or fgval + slightly offset
fgval = fgval; // + (l_int32) ((thresh - fgval)*.25);
bgval = bgval +
(l_int32)std::min((l_int32)((bgval - thresh) * .5), (255 - bgval));
factor = 255. / (bgval - fgval);
if (pthresh) {
*pthresh = (l_int32)threshpos * factor - threshpos * .1;
}
pixAddConstantGray(pixg, -1 * fgval);
pixMultConstantGray(pixg, factor);
return pixg;
}
/*----------------------------------------------------------------------*
* Thresholding *
*----------------------------------------------------------------------*/
std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
TessBaseAPI *api,
ThresholdMethod method) {
@ -199,7 +296,7 @@ std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
int r;
l_int32 pix_w, pix_h;
pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr);
pixGetDimensions(pix_, &pix_w, &pix_h, nullptr);
bool thresholding_debug;
api->GetBoolVariable("thresholding_debug", &thresholding_debug);
@ -374,6 +471,17 @@ Image ImageThresholder::GetPixRectGrey() {
return pix;
}
// Get a clone/copy of the source image rectangle, reduced to normalized greyscale,
// and at the same resolution as the output binary.
// The returned Pix must be pixDestroyed.
// Provided to the classifier to extract features from the greyscale image.
Image ImageThresholder::GetPixNormRectGrey() {
auto pix = GetPixRect();
auto result = ImageThresholder::pixNLNorm(pix, nullptr);
pix.destroy();
return result;
}
// Otsu thresholds the rectangle, taking the rectangle from *this.
void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const {
std::vector<int> thresholds;

View File

@ -154,6 +154,12 @@ public:
// Provided to the classifier to extract features from the greyscale image.
virtual Image GetPixRectGrey();
// Get a clone/copy of the source image rectangle, reduced to normalized greyscale,
// and at the same resolution as the output binary.
// The returned Pix must be pixDestroyed.
// Provided to the classifier to extract features from the greyscale image.
virtual Image GetPixNormRectGrey();
protected:
// ----------------------------------------------------------------------
// Utility functions that may be useful components for other thresholders.
@ -170,6 +176,9 @@ protected:
// Otsu thresholds the rectangle, taking the rectangle from *this.
void OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const;
// Return non-linear normalized grayscale
Pix *pixNLNorm(Pix *pixs, int *pthresh);
/// Threshold the rectangle, taking everything except the src_pix
/// from the class, using thresholds/hi_values to the output pix.
/// NOTE that num_channels is the size of the thresholds and hi_values