Merge 5fb2b623a4 into 3b7c70e34d

2025-06-07 09:52:40 +08:00 · 2025-05-26 10:14:18 +02:00 · 2025-05-26 10:14:18 +02:00 · 86e52c7fb8
commit 86e52c7fb8
parent 3b7c70e34d 5fb2b623a4
6 changed files with 183 additions and 2 deletions
--- a/include/tesseract/baseapi.h
+++ b/include/tesseract/baseapi.h
@ -317,6 +317,17 @@ public:
   */
  void SetImage(Pix *pix);

+  /**
+   * Preprocessing the InputImage 
+   * Grayscale normalizatin based on nlbin (Thomas Breuel)
+   * Current modes: 
+   *  - 0 = No normalization
+   *  - 1 = Thresholding+Recognition
+   *  - 2 = Thresholding
+   *  - 3 = Recognition
+   */
+  bool NormalizeImage(int mode);
+
  /**
   * Set the resolution of the source image in pixels per inch so font size
   * information can be calculated in results.  Call this after SetImage().
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@ -849,6 +849,25 @@ Pix *TessBaseAPI::GetInputImage() {
  return tesseract_->pix_original();
 }

+// Grayscale normalization (preprocessing)
+bool TessBaseAPI::NormalizeImage(int mode){
+  if (!GetInputImage()){
+    tprintf("Please use SetImage before applying the image pre-processing steps.");
+    return false;
+  }
+  if (mode == 1) {
+    SetInputImage(thresholder_->GetPixNormRectGrey());
+    thresholder_->SetImage(GetInputImage());
+  } else if (mode == 2) {
+    thresholder_->SetImage(thresholder_->GetPixNormRectGrey());
+  } else if (mode == 3) {
+    SetInputImage(thresholder_->GetPixNormRectGrey());
+  } else {
+    return false;
+  }
+  return true;
+}
+
 const char *TessBaseAPI::GetInputName() {
  if (!input_file_.empty()) {
    return input_file_.c_str();
@ -1192,8 +1211,31 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c
 bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
                              const char *retry_config, int timeout_millisec,
                              TessResultRenderer *renderer) {
+
  SetInputName(filename);
+
  SetImage(pix);
+
+  // Image preprocessing on image
+  // Grayscale normalization
+  int graynorm_mode;
+  GetIntVariable("preprocess_graynorm_mode", &graynorm_mode);
+  if (graynorm_mode > 0 && NormalizeImage(graynorm_mode) && tesseract_->tessedit_write_images) {
+    // Write normalized image 
+    std::string output_filename = output_file_ + ".preprocessed";
+    if (page_index > 0) {
+      output_filename += std::to_string(page_index);
+    }
+    output_filename += ".tif";
+    if (graynorm_mode == 2) {
+      pixWrite(output_filename.c_str(), thresholder_->GetPixRect(), IFF_TIFF_G4);
+    } else {
+      pixWrite(output_filename.c_str(), GetInputImage(), IFF_TIFF_G4);
+    }
+  }
+
+  // Recognition
+
  bool failed = false;

  if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
@ -1240,6 +1282,11 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
    // Switch to alternate mode for retry.
    ReadConfigFile(retry_config);
    SetImage(pix);
+    
+    // Apply image preprocessing
+    NormalizeImage(graynorm_mode);
+
+    //if (normalize_grayscale) thresholder_->SetImage(thresholder_->GetPixNormRectGrey());
    Recognize(nullptr);
    // Restore saved config variables.
    ReadConfigFile(kOldVarsFile);
@ -1248,7 +1295,7 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
  if (renderer && !failed) {
    failed = !renderer->AddImage(this);
  }
-
+  //pixDestroy(&pixs);
  return !failed;
 }

--- a/src/ccmain/tesseractclass.cpp
+++ b/src/ccmain/tesseractclass.cpp
@ -81,6 +81,11 @@ Tesseract::Tesseract()
               "11=sparse_text, 12=sparse_text+osd, 13=raw_line"
               " (Values from PageSegMode enum in tesseract/publictypes.h)",
               this->params())
+    , INT_MEMBER(preprocess_graynorm_mode, 0, 
+                "Grayscale normalization mode: 0=no normalization, 1=tresholding+recognition, "
+                "2=tresholding_only, 3=recognition_only "
+                "The modes 1–3 are applied on the fullimage", 
+                this->params())
    , INT_MEMBER(thresholding_method,
                 static_cast<int>(ThresholdMethod::Otsu),
                 "Thresholding method: 0 = Otsu, 1 = LeptonicaOtsu, 2 = "
--- a/src/ccmain/tesseractclass.h
+++ b/src/ccmain/tesseractclass.h
@ -762,6 +762,7 @@ public:
  BOOL_VAR_H(tessedit_do_invert);
  double_VAR_H(invert_threshold);
  INT_VAR_H(tessedit_pageseg_mode);
+  INT_VAR_H(preprocess_graynorm_mode);
  INT_VAR_H(thresholding_method);
  BOOL_VAR_H(thresholding_debug);
  double_VAR_H(thresholding_window_size);
--- a/src/ccmain/thresholder.cpp
+++ b/src/ccmain/thresholder.cpp
@ -179,6 +179,103 @@ void ImageThresholder::SetImage(const Image pix) {
  Init();
 }

+/*----------------------------------------------------------------------*
+ *                  Non-linear contrast normalization                   *
+ *----------------------------------------------------------------------*/
+/*!
+ * \brief   pixNLNorm()
+ *
+ * \param[in]    pixs          8 or 32 bpp
+ * \param[out]   ptresh        l_int32 global threshold value
+ * \return       pixd          8 bpp grayscale, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This composite operation is good for adaptively removing
+ *          dark background. Adaption of Thomas Breuel's nlbin version
+ *          from ocropus.
+ *      (2) A good thresholder together NLNorm is WAN
+ * </pre>
+ */
+Pix *ImageThresholder::pixNLNorm(Pix *pixs, int *pthresh) {
+  l_int32 d, thresh, w1, h1, w2, h2, fgval, bgval;
+  l_uint32 black_val, white_val;
+  l_float32 factor, threshpos, avefg, avebg;
+  PIX *pixg, *pixd, *pixd2;
+  BOX *pixbox;
+  NUMA *na;
+
+  PROCNAME("pixNLNorm");
+
+  if (!pixs || (d = pixGetDepth(pixs)) < 8) {
+    return (PIX *)ERROR_PTR("pixs undefined or d < 8 bpp", procName, NULL);
+  }
+  if (d == 32) {
+    // ITU-R 601-2 luma
+    pixg = pixConvertRGBToGray(pixs, 0.299, 0.587, 0.114);
+    // Legacy converting
+    // pixg = pixConvertRGBToGray(pixs, 0.3, 0.4, 0.3);
+  } else {
+    pixg = pixConvertTo8(pixs, 0);
+  }
+
+  /// Normalize contrast
+  //  pixGetBlackOrWhiteVal(pixg, L_GET_BLACK_VAL, &black_val);
+  //  if (black_val>0) pixAddConstantGray(pixg, -1 * black_val);
+  //  pixGetBlackOrWhiteVal(pixg, L_GET_WHITE_VAL, &white_val);
+  //  if (white_val<255) pixMultConstantGray(pixg, (255. / white_val));
+  pixd = pixMaxDynamicRange(pixg, L_LINEAR_SCALE);
+  pixDestroy(&pixg);
+  pixg = pixCopy(nullptr, pixd);
+  pixDestroy(&pixd);
+
+  /// Calculate flat version
+  pixGetDimensions(pixg, &w1, &h1, NULL);
+  pixd = pixScaleGeneral(pixg, 0.5, 0.5, 0.0, 0);
+  pixd2 = pixRankFilter(pixd, 20, 2, 0.8);
+  pixDestroy(&pixd);
+  pixd = pixRankFilter(pixd2, 2, 20, 0.8);
+  pixDestroy(&pixd2);
+  pixGetDimensions(pixd, &w2, &h2, NULL);
+  pixd2 = pixScaleGrayLI(pixd, (l_float32)w1 / (l_float32)w2,
+                         (l_float32)h1 / (l_float32)h2);
+  pixDestroy(&pixd);
+  pixInvert(pixd2, pixd2);
+  pixAddGray(pixg, pixg, pixd2);
+  pixDestroy(&pixd2);
+
+  /// Local contrast enhancement
+  //  Ignore a border of 10 % and get a mean threshold,
+  //  background and foreground value
+  pixbox = boxCreate(w1 * 0.1, h1 * 0.1, w1 * 0.9, h1 * 0.9);
+  na = pixGetGrayHistogramInRect(pixg, pixbox, 1);
+  numaSplitDistribution(na, 0.1, &thresh, &avefg, &avebg, NULL, NULL, NULL);
+  boxDestroy(&pixbox);
+  numaDestroy(&na);
+
+  /// Subtract by a foreground value and multiply by factor to
+  //  set a background value to 255
+  fgval = (l_int32)(avefg + 0.5);
+  bgval = (l_int32)(avebg + 0.5);
+  threshpos = (l_float32)(thresh - fgval) / (bgval - fgval);
+  // Todo: fgval or fgval + slightly offset
+  fgval = fgval; // + (l_int32) ((thresh - fgval)*.25);
+  bgval = bgval +
+          (l_int32)std::min((l_int32)((bgval - thresh) * .5), (255 - bgval));
+  factor = 255. / (bgval - fgval);
+  if (pthresh) {
+    *pthresh = (l_int32)threshpos * factor - threshpos * .1;
+  }
+  pixAddConstantGray(pixg, -1 * fgval);
+  pixMultConstantGray(pixg, factor);
+  
+  return pixg;
+}
+
+/*----------------------------------------------------------------------*
+ *                          Thresholding                                *
+ *----------------------------------------------------------------------*/
+
 std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
                                                      TessBaseAPI *api,
                                                      ThresholdMethod method) {
@ -199,7 +296,7 @@ std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
  int r;

  l_int32 pix_w, pix_h;
-  pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr);
+  pixGetDimensions(pix_, &pix_w, &pix_h, nullptr);

  bool thresholding_debug;
  api->GetBoolVariable("thresholding_debug", &thresholding_debug);
@ -374,6 +471,17 @@ Image ImageThresholder::GetPixRectGrey() {
  return pix;
 }

+// Get a clone/copy of the source image rectangle, reduced to normalized greyscale,
+// and at the same resolution as the output binary.
+// The returned Pix must be pixDestroyed.
+// Provided to the classifier to extract features from the greyscale image.
+Image ImageThresholder::GetPixNormRectGrey() {
+  auto pix = GetPixRect();
+  auto result = ImageThresholder::pixNLNorm(pix, nullptr);
+  pix.destroy();
+  return result;
+}
+
 // Otsu thresholds the rectangle, taking the rectangle from *this.
 void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const {
  std::vector<int> thresholds;
--- a/src/ccmain/thresholder.h
+++ b/src/ccmain/thresholder.h
@ -154,6 +154,12 @@ public:
  // Provided to the classifier to extract features from the greyscale image.
  virtual Image GetPixRectGrey();

+  // Get a clone/copy of the source image rectangle, reduced to normalized greyscale,
+  // and at the same resolution as the output binary.
+  // The returned Pix must be pixDestroyed.
+  // Provided to the classifier to extract features from the greyscale image.
+  virtual Image GetPixNormRectGrey();
+
 protected:
  // ----------------------------------------------------------------------
  // Utility functions that may be useful components for other thresholders.
@ -170,6 +176,9 @@ protected:
  // Otsu thresholds the rectangle, taking the rectangle from *this.
  void OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const;

+  // Return non-linear normalized grayscale
+  Pix *pixNLNorm(Pix *pixs, int *pthresh);
+
  /// Threshold the rectangle, taking everything except the src_pix
  /// from the class, using thresholds/hi_values to the output pix.
  /// NOTE that num_channels is the size of the thresholds and hi_values