Generalized feature extractor to allow fx from greyscale

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@877 d0cd1f9f-072b-0410-8dd7-cf729c803f20
2025-06-07 09:52:40 +08:00 · 2013-09-23 15:22:37 +00:00 · 2013-09-23 15:22:37 +00:00 · 2c909702c9
commit 2c909702c9
parent ec026cadfe
5 changed files with 36 additions and 25 deletions
--- a/textord/colfind.cpp
+++ b/textord/colfind.cpp
@ -262,7 +262,7 @@ void ColumnFinder::CorrectOrientation(TO_BLOCK* block,
  // Setup the denormalization.
  ASSERT_HOST(denorm_ == NULL);
  denorm_ = new DENORM;
-  denorm_->SetupNormalization(NULL, NULL, &rotation_, NULL, NULL, 0,
+  denorm_->SetupNormalization(NULL, &rotation_, NULL,
                              0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
 }

@ -279,11 +279,17 @@ void ColumnFinder::CorrectOrientation(TO_BLOCK* block,
 // it is still divided into blocks of equal line spacing/text size.
 // scaled_color is scaled down by scaled_factor from the input color image,
 // and may be NULL if the input was not color.
+// grey_pix is optional, but if present must match the photo_mask_pix in size,
+// and must be a *real* grey image instead of binary_pix * 255.
+// thresholds_pix is expected to be present iff grey_pix is present and
+// can be an integer factor reduction of the grey_pix. It represents the
+// thresholds that were used to create the binary_pix from the grey_pix.
 // Returns -1 if the user hits the 'd' key in the blocks window while running
 // in debug mode, which requests a retry with more debug info.
 int ColumnFinder::FindBlocks(PageSegMode pageseg_mode,
                             Pix* scaled_color, int scaled_factor,
                             TO_BLOCK* input_block, Pix* photo_mask_pix,
+                             Pix* thresholds_pix, Pix* grey_pix,
                             BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
  pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
  stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
@ -311,6 +317,11 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode,
  big_parts_.clear();
  delete stroke_width_;
  stroke_width_ = NULL;
+  // Compute the edge offsets whether or not there is a grey_pix. It is done
+  // here as the c_blobs haven't been touched by rotation or anything yet,
+  // so no denorm is required, yet the text has been separated from image, so
+  // no time is wasted running it on image blobs.
+  input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);

  // A note about handling right-to-left scripts (Hebrew/Arabic):
  // The columns must be reversed and come out in right-to-left instead of
@ -347,7 +358,7 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode,
                     min_gutter_width_, &part_grid_, &deskew_, &reskew_);
      // Add the deskew to the denorm_.
      DENORM* new_denorm = new DENORM;
-      new_denorm->SetupNormalization(NULL, NULL, &deskew_, denorm_, NULL, 0,
+      new_denorm->SetupNormalization(NULL, &deskew_, denorm_,
                                     0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
      denorm_ = new_denorm;
    }
@ -357,6 +368,7 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode,
    // Make the column_sets_.
    if (!MakeColumns(false)) {
      tprintf("Empty page!!\n");
+      part_grid_.DeleteParts();
      return 0;  // This is an empty page.
    }

@ -581,24 +593,23 @@ bool ColumnFinder::MakeColumns(bool single_column) {
  }
  if (textord_debug_tabfind)
    PrintColumnCandidates("Final Columns");
-  if (!column_sets_.empty()) {
+  bool has_columns = !column_sets_.empty();
+  if (has_columns) {
    // Divide the page into sections of uniform column layout.
    AssignColumns(part_sets);
    if (textord_tabfind_show_columns) {
      DisplayColumnBounds(&part_sets);
    }
    ComputeMeanColumnGap();
-    ColPartition_LIST parts;
-    for (int i = 0; i < part_sets.size(); ++i) {
-      ColPartitionSet* line_set = part_sets.get(i);
-      if (line_set != NULL) {
-        line_set->RelinquishParts();
-        delete line_set;
-      }
-    }
-    return true;
  }
-  return false;
+  for (int i = 0; i < part_sets.size(); ++i) {
+    ColPartitionSet* line_set = part_sets.get(i);
+    if (line_set != NULL) {
+      line_set->RelinquishParts();
+      delete line_set;
+    }
+  }
+  return has_columns;
 }

 // Attempt to improve the column_candidates by expanding the columns
@ -1464,7 +1475,7 @@ void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) {
  ReflectBlobList(&input_block->large_blobs);
  // Update the denorm with the reflection.
  DENORM* new_denorm = new DENORM;
-  new_denorm->SetupNormalization(NULL, NULL, NULL, denorm_, NULL, 0,
+  new_denorm->SetupNormalization(NULL, NULL, denorm_,
                                 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f);
  denorm_ = new_denorm;
 }
@ -1605,6 +1616,7 @@ FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) {
            block->index(), block->poly_block()->isA(),
            block->re_rotation().x(), block->re_rotation().y(),
            classify_rotation.x(), classify_rotation.y());
+    block->bounding_box().print();
  }
  return blob_rotation;
 }
--- a/textord/colfind.h
+++ b/textord/colfind.h
@ -141,11 +141,17 @@ class ColumnFinder : public TabFind {
  // it is still divided into blocks of equal line spacing/text size.
  // scaled_color is scaled down by scaled_factor from the input color image,
  // and may be NULL if the input was not color.
+  // grey_pix is optional, but if present must match the photo_mask_pix in size,
+  // and must be a *real* grey image instead of binary_pix * 255.
+  // thresholds_pix is expected to be present iff grey_pix is present and
+  // can be an integer factor reduction of the grey_pix. It represents the
+  // thresholds that were used to create the binary_pix from the grey_pix.
  // Returns -1 if the user hits the 'd' key in the blocks window while running
  // in debug mode, which requests a retry with more debug info.
  int FindBlocks(PageSegMode pageseg_mode,
                 Pix* scaled_color, int scaled_factor,
                 TO_BLOCK* block, Pix* photo_mask_pix,
+                 Pix* thresholds_pix, Pix* grey_pix,
                 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);

  // Get the rotation required to deskew, and its inverse rotation.
--- a/textord/edgloop.cpp
+++ b/textord/edgloop.cpp
@ -28,8 +28,6 @@

 #define MINEDGELENGTH   8        // min decent length

-INT_VAR(edges_maxedgelength, 16000, "Max steps in any outline");
-
 /**********************************************************************
 * complete_edge
 *
@ -94,7 +92,7 @@ ScrollView::Color check_path_legal(                  //certify outline
    }
    edgept = edgept->next;
  }
-  while (edgept != start && length < edges_maxedgelength);
+  while (edgept != start && length < C_OUTLINE::kMaxOutlineLength);

  if ((chainsum != 4 && chainsum != -4)
  || edgept != start || length < MINEDGELENGTH) {
--- a/textord/edgloop.h
+++ b/textord/edgloop.h
@ -30,15 +30,10 @@
 #define BUCKETSIZE      16


-extern double_VAR_H (edges_threshold_greyfraction, 0.07,
-"Min edge diff for grad vector");
-extern BOOL_VAR_H (edges_show_paths, FALSE, "Draw raw outlines");
-extern BOOL_VAR_H (edges_show_needles, FALSE, "Draw edge needles");
 extern INT_VAR_H (edges_children_per_grandchild, 10,
 "Importance ratio for chucking outlines");
 extern INT_VAR_H (edges_children_count_limit, 45,
 "Max holes allowed in blob");
-extern INT_VAR_H (edges_maxedgelength, 16000, "Max steps in any outline");
 extern double_VAR_H (edges_childarea, 0.5,
 "Max area fraction of child outline");
 extern double_VAR_H (edges_boxarea, 0.8,
--- a/textord/textlineprojection.cpp
+++ b/textord/textlineprojection.cpp
@ -237,8 +237,8 @@ int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box,
  if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
    if (denorm != NULL) {
      // Denormalize the start and end.
-      denorm->DenormTransform(start_pt, &start_pt);
-      denorm->DenormTransform(end_pt, &end_pt);
+      denorm->DenormTransform(NULL, start_pt, &start_pt);
+      denorm->DenormTransform(NULL, end_pt, &end_pt);
    }
    if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
      perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y,
@ -741,7 +741,7 @@ void TextlineProjection::TransformToPixCoords(const DENORM* denorm,
                                              TPOINT* pt) const {
  if (denorm != NULL) {
    // Denormalize the point.
-    denorm->DenormTransform(*pt, pt);
+    denorm->DenormTransform(NULL, *pt, pt);
  }
  pt->x = ImageXToProjectionX(pt->x);
  pt->y = ImageYToProjectionY(pt->y);