Generalized feature extractor to allow fx from greyscale

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@875 d0cd1f9f-072b-0410-8dd7-cf729c803f20
2025-01-18 14:41:36 +08:00 · 2013-09-23 15:19:50 +00:00 · 2013-09-23 15:19:50 +00:00 · b0fb616299
commit b0fb616299
parent dfc1a92628
5 changed files with 121 additions and 31 deletions
--- a/ccmain/osdetect.cpp
+++ b/ccmain/osdetect.cpp
@ -326,7 +326,7 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
  tess->tess_cn_matching.set_value(true); // turn it on
  tess->tess_bn_matching.set_value(false);
  C_BLOB* blob = bbox->cblob();
-  TBLOB* tblob = TBLOB::PolygonalCopy(blob);
+  TBLOB* tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob);
  TBOX box = tblob->bounding_box();
  FCOORD current_rotation(1.0f, 0.0f);
  FCOORD rotation90(0.0f, 1.0f);
@ -347,13 +347,12 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
      scaling = static_cast<float>(kBlnXHeight) / box.width();
      x_origin = i == 1 ? box.left() : box.right();
    }
-    DENORM denorm;
-    denorm.SetupNormalization(NULL, NULL, &current_rotation, NULL, NULL, 0,
-                              x_origin, y_origin, scaling, scaling,
-                              0.0f, static_cast<float>(kBlnBaselineOffset));
    TBLOB* rotated_blob = new TBLOB(*tblob);
-    rotated_blob->Normalize(denorm);
-    tess->AdaptiveClassifier(rotated_blob, denorm, ratings + i, NULL);
+    rotated_blob->Normalize(NULL, &current_rotation, NULL,
+                            x_origin, y_origin, scaling, scaling,
+                            0.0f, static_cast<float>(kBlnBaselineOffset),
+                            false, NULL);
+    tess->AdaptiveClassifier(rotated_blob, ratings + i, NULL);
    delete rotated_blob;
    current_rotation.rotate(rotation90);
  }
--- a/ccmain/pagesegmain.cpp
+++ b/ccmain/pagesegmain.cpp
@ -163,8 +163,12 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
      tprintf("Empty page\n");
    return 0;  // AutoPageSeg found an empty page.
  }
+  bool splitting =
+      pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
+  bool cjk_mode = textord_use_cjk_fp_model;

-  textord_.TextordPage(pageseg_mode, width, height, pix_binary_,
+  textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
+                       pix_thresholds_, pix_grey_, splitting || cjk_mode,
                       blocks, &to_blocks);
  return auto_page_seg_ret_val;
 }
@ -243,6 +247,7 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode,
    }
    result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_,
                                to_block, photomask_pix,
+                                pix_thresholds_, pix_grey_,
                                &found_blocks, to_blocks);
    if (result >= 0)
      finder->GetDeskewVectors(&deskew_, &reskew_);
--- a/ccmain/pgedit.cpp
+++ b/ccmain/pgedit.cpp
@ -41,6 +41,7 @@
 #include "svmnode.h"
 #include "statistc.h"
 #include "tesseractclass.h"
+#include "werdit.h"

 #ifndef GRAPHICS_DISABLED
 #define ASC_HEIGHT     (2 * kBlnBaselineOffset + kBlnXHeight)
@ -73,6 +74,7 @@ enum CMD_EVENTS
  QUIT_CMD_EVENT,
  RECOG_WERDS,
  RECOG_PSEUDO,
+  SHOW_BLOB_FEATURES,
  SHOW_SUBSCRIPT_CMD_EVENT,
  SHOW_SUPERSCRIPT_CMD_EVENT,
  SHOW_ITALIC_CMD_EVENT,
@ -266,6 +268,7 @@ SVMenuNode *Tesseract::build_menu_new() {
  modes_menu_item->AddChild("Config Words", DEBUG_WERD_CMD_EVENT);
  modes_menu_item->AddChild("Recog Words", RECOG_WERDS);
  modes_menu_item->AddChild("Recog Blobs", RECOG_PSEUDO);
+  modes_menu_item->AddChild("Show Blob Features", SHOW_BLOB_FEATURES);

  parent_menu = root_menu_item->AddChild("DISPLAY");

@ -433,6 +436,7 @@ BOOL8 Tesseract::process_cmd_win_event(                 // UI command semantics
    case SHOW_BLN_WERD_CMD_EVENT:
    case RECOG_WERDS:
    case RECOG_PSEUDO:
+    case SHOW_BLOB_FEATURES:
      mode =(CMD_EVENTS) cmd_event;
      break;
    case DEBUG_WERD_CMD_EVENT:
@ -617,6 +621,9 @@ void Tesseract::process_image_event( // action in image win
          image_win->AddMessage("Recogging selected blobs");
          recog_pseudo_word(current_page_res, selection_box);
          break;
+        case SHOW_BLOB_FEATURES:
+          blob_feature_display(current_page_res, selection_box);
+          break;

        default:
          sprintf(msg, "Mode %d not yet implemented", mode);
@ -725,14 +732,21 @@ BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
  TWERD *bln_word = word_res->chopped_word;
  if (bln_word == NULL) {
    word_res->SetupForTessRecognition(unicharset, this, BestPix(), false,
-                                      this->textord_use_cjk_fp_model,
+                                      textord_use_cjk_fp_model,
+                                      poly_allow_detailed_fx,
                                      row, block);
    bln_word = word_res->chopped_word;
  }
  bln_word_window_handle()->Clear();
  display_bln_lines(bln_word_window_handle(), ScrollView::CYAN,
                     1.0, 0.0f, -1000.0f, 1000.0f);
-  bln_word->plot(bln_word_window_handle());
+  C_BLOB_IT it(word_res->word->cblob_list());
+  ScrollView::Color color = WERD::NextColor(ScrollView::BLACK);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    it.data()->plot_normed(word_res->denorm, color, ScrollView::BROWN,
+                           bln_word_window_handle());
+    color = WERD::NextColor(color);
+  }
  bln_word_window_handle()->Update();
  return TRUE;
 }
@ -754,6 +768,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {

  if (color_mode != CM_RAINBOW && word_res->box_word != NULL) {
    BoxWord* box_word = word_res->box_word;
+    WERD_CHOICE* best_choice = word_res->best_choice;
    int length = box_word->length();
    if (word_res->fontinfo == NULL) return false;
    const FontInfo& font_info = *word_res->fontinfo;
@ -761,11 +776,11 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
      ScrollView::Color color = ScrollView::GREEN;
      switch (color_mode) {
        case CM_SUBSCRIPT:
-          if (box_word->BlobPosition(i) == SP_SUBSCRIPT)
+          if (best_choice->BlobPosition(i) == SP_SUBSCRIPT)
            color = ScrollView::RED;
          break;
        case CM_SUPERSCRIPT:
-          if (box_word->BlobPosition(i) == SP_SUPERSCRIPT)
+          if (best_choice->BlobPosition(i) == SP_SUPERSCRIPT)
            color = ScrollView::RED;
          break;
        case CM_ITALIC:
@ -789,7 +804,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
            color = ScrollView::RED;
          break;
        case CM_DROPCAPS:
-          if (box_word->BlobPosition(i) == SP_DROPCAP)
+          if (best_choice->BlobPosition(i) == SP_DROPCAP)
            color = ScrollView::RED;
          break;
          // TODO(rays) underline is currently completely unsupported.
@ -833,7 +848,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
                                 // display poly approx
  if (word->display_flag(DF_POLYGONAL)) {
                                 // need to convert
-    TWERD* tword = TWERD::PolygonalCopy(word);
+    TWERD* tword = TWERD::PolygonalCopy(poly_allow_detailed_fx, word);
    tword->plot(image_win);
    delete tword;
    displayed_something = TRUE;
@ -847,15 +862,13 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
  }
  if (word->display_flag(DF_BLAMER) &&
      !(word_res->blamer_bundle != NULL &&
-        word_res->blamer_bundle->incorrect_result_reason == IRR_CORRECT)) {
+        word_res->blamer_bundle->incorrect_result_reason() == IRR_CORRECT)) {
    text = "";
    const BlamerBundle *blamer_bundle = word_res->blamer_bundle;
    if (blamer_bundle == NULL) {
      text += "NULL";
    } else {
-      for (int i = 0; i < blamer_bundle->truth_text.length(); ++i) {
-        text += blamer_bundle->truth_text[i];
-      }
+      text = blamer_bundle->TruthString();
    }
    text += " -> ";
    STRING best_choice_str;
@ -866,7 +879,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
    }
    text += best_choice_str;
    IncorrectResultReason reason = (blamer_bundle == NULL) ?
-        IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason;
+        IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason();
    ASSERT_HOST(reason < IRR_NUM_REASONS)
    blame += " [";
    blame += BlamerBundle::IncorrectReasonName(reason);
@ -915,9 +928,9 @@ BOOL8 Tesseract::word_dumper(BLOCK* block, ROW* row, WERD_RES* word_res) {
  tprintf("\nWord data...\n");
  word_res->word->print();
  if (word_res->blamer_bundle != NULL && wordrec_debug_blamer &&
-      word_res->blamer_bundle->incorrect_result_reason != IRR_CORRECT) {
+      word_res->blamer_bundle->incorrect_result_reason() != IRR_CORRECT) {
    tprintf("Current blamer debug: %s\n",
-            word_res->blamer_bundle->debug.string());
+            word_res->blamer_bundle->debug().string());
  }
  return TRUE;
 }
@ -939,6 +952,46 @@ BOOL8 Tesseract::word_set_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
  word->set_display_flag(DF_BLAMER, word_display_mode.bit(DF_BLAMER));
  return word_display(block, row, word_res);
 }
+
+// page_res is non-const because the iterator doesn't know if you are going
+// to change the items it points to! Really a const here though.
+void Tesseract::blob_feature_display(PAGE_RES* page_res,
+                                     const TBOX& selection_box) {
+  ROW* row;               // row of word
+  BLOCK* block;           // block of word
+  WERD* word = make_pseudo_word(page_res, selection_box, block, row);
+  if (word != NULL) {
+    WERD_RES word_res(word);
+    word_res.x_height = row->x_height();
+    word_res.SetupForTessRecognition(unicharset, this, BestPix(), false,
+                                     textord_use_cjk_fp_model,
+                                     poly_allow_detailed_fx,
+                                     row, block);
+    TWERD* bln_word = word_res.chopped_word;
+    TBLOB* bln_blob = bln_word->blobs[0];
+    INT_FX_RESULT_STRUCT fx_info;
+    GenericVector<INT_FEATURE_STRUCT> bl_features;
+    GenericVector<INT_FEATURE_STRUCT> cn_features;
+    Classify::ExtractFeatures(*bln_blob, classify_nonlinear_norm, &bl_features,
+                              &cn_features, &fx_info, NULL);
+    // Display baseline features.
+    ScrollView* bl_win = CreateFeatureSpaceWindow("BL Features", 512, 0);
+    ClearFeatureSpaceWindow(baseline, bl_win);
+    for (int f = 0; f < bl_features.size(); ++f)
+      RenderIntFeature(bl_win, &bl_features[f], ScrollView::GREEN);
+    bl_win->Update();
+    // Display cn features.
+    ScrollView* cn_win = CreateFeatureSpaceWindow("CN Features", 512, 0);
+    ClearFeatureSpaceWindow(character, cn_win);
+    for (int f = 0; f < cn_features.size(); ++f)
+      RenderIntFeature(cn_win, &cn_features[f], ScrollView::GREEN);
+    cn_win->Update();
+
+    delete word;
+  }
+}
+
+
 #endif  // GRAPHICS_DISABLED

 }  // namespace tesseract
--- a/ccmain/thresholder.cpp
+++ b/ccmain/thresholder.cpp
@ -169,6 +169,32 @@ void ImageThresholder::ThresholdToPix(Pix** pix) {
  }
 }

+// Gets a pix that contains an 8 bit threshold value at each pixel. The
+// returned pix may be an integer reduction of the binary image such that
+// the scale factor may be inferred from the ratio of the sizes, even down
+// to the extreme of a 1x1 pixel thresholds image.
+// Ideally the 8 bit threshold should be the exact threshold used to generate
+// the binary image in ThresholdToPix, but this is not a hard constraint.
+// Returns NULL if the input is binary. PixDestroy after use.
+Pix* ImageThresholder::GetPixRectThresholds() {
+  if (IsBinary()) return NULL;
+  Pix* pix_grey = GetPixRectGrey();
+  int width = pixGetWidth(pix_grey);
+  int height = pixGetHeight(pix_grey);
+  int* thresholds;
+  int* hi_values;
+  OtsuThreshold(reinterpret_cast<const unsigned char*>(pixGetData(pix_grey)),
+                1, pixGetWpl(pix_grey) * sizeof(l_uint32),
+                0, 0, width, height, &thresholds, &hi_values);
+  pixDestroy(&pix_grey);
+  Pix* pix_thresholds = pixCreate(width, height, 8);
+  int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
+  pixSetAllArbitrary(pix_thresholds, threshold);
+  delete [] thresholds;
+  delete [] hi_values;
+  return pix_thresholds;
+}
+
 // Common initialization shared between SetImage methods.
 void ImageThresholder::Init() {
  SetRectangle(0, 0, image_width_, image_height_);
@ -198,11 +224,10 @@ Pix* ImageThresholder::GetPixRect() {
  return raw_pix;
 }

-// Get a clone/copy of the source image rectangle, reduced to greyscale.
+// Get a clone/copy of the source image rectangle, reduced to greyscale,
+// and at the same resolution as the output binary.
 // The returned Pix must be pixDestroyed.
-// This function will be used in the future by the page layout analysis, and
-// the layout analysis that uses it will only be available with Leptonica,
-// so there is no raw equivalent.
+// Provided to the classifier to extract features from the greyscale image.
 Pix* ImageThresholder::GetPixRectGrey() {
  Pix* pix = GetPixRect();  // May have to be reduced to grey.
  int depth = pixGetDepth(pix);
--- a/ccmain/thresholder.h
+++ b/ccmain/thresholder.h
@ -122,6 +122,15 @@ class ImageThresholder {
  /// Caller must use pixDestroy to free the created Pix.
  virtual void ThresholdToPix(Pix** pix);

+  // Gets a pix that contains an 8 bit threshold value at each pixel. The
+  // returned pix may be an integer reduction of the binary image such that
+  // the scale factor may be inferred from the ratio of the sizes, even down
+  // to the extreme of a 1x1 pixel thresholds image.
+  // Ideally the 8 bit threshold should be the exact threshold used to generate
+  // the binary image in ThresholdToPix, but this is not a hard constraint.
+  // Returns NULL if the input is binary. PixDestroy after use.
+  virtual Pix* GetPixRectThresholds();
+
  /// Get a clone/copy of the source image rectangle.
  /// The returned Pix must be pixDestroyed.
  /// This function will be used in the future by the page layout analysis, and
@ -129,12 +138,11 @@ class ImageThresholder {
  /// so there is no raw equivalent.
  Pix* GetPixRect();

-  /// Get a clone/copy of the source image rectangle, reduced to greyscale.
-  /// The returned Pix must be pixDestroyed.
-  /// This function will be used in the future by the page layout analysis, and
-  /// the layout analysis that uses it will only be available with Leptonica,
-  /// so there is no raw equivalent.
-  Pix* GetPixRectGrey();
+  // Get a clone/copy of the source image rectangle, reduced to greyscale,
+  // and at the same resolution as the output binary.
+  // The returned Pix must be pixDestroyed.
+  // Provided to the classifier to extract features from the greyscale image.
+  virtual Pix* GetPixRectGrey();

 protected:
  // ----------------------------------------------------------------------