Generalized feature extractor to allow fx from greyscale

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@877 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2013-09-23 15:22:37 +00:00
parent ec026cadfe
commit 2c909702c9
5 changed files with 36 additions and 25 deletions

View File

@ -262,7 +262,7 @@ void ColumnFinder::CorrectOrientation(TO_BLOCK* block,
// Setup the denormalization. // Setup the denormalization.
ASSERT_HOST(denorm_ == NULL); ASSERT_HOST(denorm_ == NULL);
denorm_ = new DENORM; denorm_ = new DENORM;
denorm_->SetupNormalization(NULL, NULL, &rotation_, NULL, NULL, 0, denorm_->SetupNormalization(NULL, &rotation_, NULL,
0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
} }
@ -279,11 +279,17 @@ void ColumnFinder::CorrectOrientation(TO_BLOCK* block,
// it is still divided into blocks of equal line spacing/text size. // it is still divided into blocks of equal line spacing/text size.
// scaled_color is scaled down by scaled_factor from the input color image, // scaled_color is scaled down by scaled_factor from the input color image,
// and may be NULL if the input was not color. // and may be NULL if the input was not color.
// grey_pix is optional, but if present must match the photo_mask_pix in size,
// and must be a *real* grey image instead of binary_pix * 255.
// thresholds_pix is expected to be present iff grey_pix is present and
// can be an integer factor reduction of the grey_pix. It represents the
// thresholds that were used to create the binary_pix from the grey_pix.
// Returns -1 if the user hits the 'd' key in the blocks window while running // Returns -1 if the user hits the 'd' key in the blocks window while running
// in debug mode, which requests a retry with more debug info. // in debug mode, which requests a retry with more debug info.
int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, int ColumnFinder::FindBlocks(PageSegMode pageseg_mode,
Pix* scaled_color, int scaled_factor, Pix* scaled_color, int scaled_factor,
TO_BLOCK* input_block, Pix* photo_mask_pix, TO_BLOCK* input_block, Pix* photo_mask_pix,
Pix* thresholds_pix, Pix* grey_pix,
BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
pixOr(photo_mask_pix, photo_mask_pix, nontext_map_); pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
stroke_width_->FindLeaderPartitions(input_block, &part_grid_); stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
@ -311,6 +317,11 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode,
big_parts_.clear(); big_parts_.clear();
delete stroke_width_; delete stroke_width_;
stroke_width_ = NULL; stroke_width_ = NULL;
// Compute the edge offsets whether or not there is a grey_pix. It is done
// here as the c_blobs haven't been touched by rotation or anything yet,
// so no denorm is required, yet the text has been separated from image, so
// no time is wasted running it on image blobs.
input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
// A note about handling right-to-left scripts (Hebrew/Arabic): // A note about handling right-to-left scripts (Hebrew/Arabic):
// The columns must be reversed and come out in right-to-left instead of // The columns must be reversed and come out in right-to-left instead of
@ -347,7 +358,7 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode,
min_gutter_width_, &part_grid_, &deskew_, &reskew_); min_gutter_width_, &part_grid_, &deskew_, &reskew_);
// Add the deskew to the denorm_. // Add the deskew to the denorm_.
DENORM* new_denorm = new DENORM; DENORM* new_denorm = new DENORM;
new_denorm->SetupNormalization(NULL, NULL, &deskew_, denorm_, NULL, 0, new_denorm->SetupNormalization(NULL, &deskew_, denorm_,
0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
denorm_ = new_denorm; denorm_ = new_denorm;
} }
@ -357,6 +368,7 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode,
// Make the column_sets_. // Make the column_sets_.
if (!MakeColumns(false)) { if (!MakeColumns(false)) {
tprintf("Empty page!!\n"); tprintf("Empty page!!\n");
part_grid_.DeleteParts();
return 0; // This is an empty page. return 0; // This is an empty page.
} }
@ -581,24 +593,23 @@ bool ColumnFinder::MakeColumns(bool single_column) {
} }
if (textord_debug_tabfind) if (textord_debug_tabfind)
PrintColumnCandidates("Final Columns"); PrintColumnCandidates("Final Columns");
if (!column_sets_.empty()) { bool has_columns = !column_sets_.empty();
if (has_columns) {
// Divide the page into sections of uniform column layout. // Divide the page into sections of uniform column layout.
AssignColumns(part_sets); AssignColumns(part_sets);
if (textord_tabfind_show_columns) { if (textord_tabfind_show_columns) {
DisplayColumnBounds(&part_sets); DisplayColumnBounds(&part_sets);
} }
ComputeMeanColumnGap(); ComputeMeanColumnGap();
ColPartition_LIST parts;
for (int i = 0; i < part_sets.size(); ++i) {
ColPartitionSet* line_set = part_sets.get(i);
if (line_set != NULL) {
line_set->RelinquishParts();
delete line_set;
}
}
return true;
} }
return false; for (int i = 0; i < part_sets.size(); ++i) {
ColPartitionSet* line_set = part_sets.get(i);
if (line_set != NULL) {
line_set->RelinquishParts();
delete line_set;
}
}
return has_columns;
} }
// Attempt to improve the column_candidates by expanding the columns // Attempt to improve the column_candidates by expanding the columns
@ -1464,7 +1475,7 @@ void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) {
ReflectBlobList(&input_block->large_blobs); ReflectBlobList(&input_block->large_blobs);
// Update the denorm with the reflection. // Update the denorm with the reflection.
DENORM* new_denorm = new DENORM; DENORM* new_denorm = new DENORM;
new_denorm->SetupNormalization(NULL, NULL, NULL, denorm_, NULL, 0, new_denorm->SetupNormalization(NULL, NULL, denorm_,
0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f); 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f);
denorm_ = new_denorm; denorm_ = new_denorm;
} }
@ -1605,6 +1616,7 @@ FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) {
block->index(), block->poly_block()->isA(), block->index(), block->poly_block()->isA(),
block->re_rotation().x(), block->re_rotation().y(), block->re_rotation().x(), block->re_rotation().y(),
classify_rotation.x(), classify_rotation.y()); classify_rotation.x(), classify_rotation.y());
block->bounding_box().print();
} }
return blob_rotation; return blob_rotation;
} }

View File

@ -141,11 +141,17 @@ class ColumnFinder : public TabFind {
// it is still divided into blocks of equal line spacing/text size. // it is still divided into blocks of equal line spacing/text size.
// scaled_color is scaled down by scaled_factor from the input color image, // scaled_color is scaled down by scaled_factor from the input color image,
// and may be NULL if the input was not color. // and may be NULL if the input was not color.
// grey_pix is optional, but if present must match the photo_mask_pix in size,
// and must be a *real* grey image instead of binary_pix * 255.
// thresholds_pix is expected to be present iff grey_pix is present and
// can be an integer factor reduction of the grey_pix. It represents the
// thresholds that were used to create the binary_pix from the grey_pix.
// Returns -1 if the user hits the 'd' key in the blocks window while running // Returns -1 if the user hits the 'd' key in the blocks window while running
// in debug mode, which requests a retry with more debug info. // in debug mode, which requests a retry with more debug info.
int FindBlocks(PageSegMode pageseg_mode, int FindBlocks(PageSegMode pageseg_mode,
Pix* scaled_color, int scaled_factor, Pix* scaled_color, int scaled_factor,
TO_BLOCK* block, Pix* photo_mask_pix, TO_BLOCK* block, Pix* photo_mask_pix,
Pix* thresholds_pix, Pix* grey_pix,
BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
// Get the rotation required to deskew, and its inverse rotation. // Get the rotation required to deskew, and its inverse rotation.

View File

@ -28,8 +28,6 @@
#define MINEDGELENGTH 8 // min decent length #define MINEDGELENGTH 8 // min decent length
INT_VAR(edges_maxedgelength, 16000, "Max steps in any outline");
/********************************************************************** /**********************************************************************
* complete_edge * complete_edge
* *
@ -94,7 +92,7 @@ ScrollView::Color check_path_legal( //certify outline
} }
edgept = edgept->next; edgept = edgept->next;
} }
while (edgept != start && length < edges_maxedgelength); while (edgept != start && length < C_OUTLINE::kMaxOutlineLength);
if ((chainsum != 4 && chainsum != -4) if ((chainsum != 4 && chainsum != -4)
|| edgept != start || length < MINEDGELENGTH) { || edgept != start || length < MINEDGELENGTH) {

View File

@ -30,15 +30,10 @@
#define BUCKETSIZE 16 #define BUCKETSIZE 16
extern double_VAR_H (edges_threshold_greyfraction, 0.07,
"Min edge diff for grad vector");
extern BOOL_VAR_H (edges_show_paths, FALSE, "Draw raw outlines");
extern BOOL_VAR_H (edges_show_needles, FALSE, "Draw edge needles");
extern INT_VAR_H (edges_children_per_grandchild, 10, extern INT_VAR_H (edges_children_per_grandchild, 10,
"Importance ratio for chucking outlines"); "Importance ratio for chucking outlines");
extern INT_VAR_H (edges_children_count_limit, 45, extern INT_VAR_H (edges_children_count_limit, 45,
"Max holes allowed in blob"); "Max holes allowed in blob");
extern INT_VAR_H (edges_maxedgelength, 16000, "Max steps in any outline");
extern double_VAR_H (edges_childarea, 0.5, extern double_VAR_H (edges_childarea, 0.5,
"Max area fraction of child outline"); "Max area fraction of child outline");
extern double_VAR_H (edges_boxarea, 0.8, extern double_VAR_H (edges_boxarea, 0.8,

View File

@ -237,8 +237,8 @@ int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box,
if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) { if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
if (denorm != NULL) { if (denorm != NULL) {
// Denormalize the start and end. // Denormalize the start and end.
denorm->DenormTransform(start_pt, &start_pt); denorm->DenormTransform(NULL, start_pt, &start_pt);
denorm->DenormTransform(end_pt, &end_pt); denorm->DenormTransform(NULL, end_pt, &end_pt);
} }
if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) { if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y, perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y,
@ -741,7 +741,7 @@ void TextlineProjection::TransformToPixCoords(const DENORM* denorm,
TPOINT* pt) const { TPOINT* pt) const {
if (denorm != NULL) { if (denorm != NULL) {
// Denormalize the point. // Denormalize the point.
denorm->DenormTransform(*pt, pt); denorm->DenormTransform(NULL, *pt, pt);
} }
pt->x = ImageXToProjectionX(pt->x); pt->x = ImageXToProjectionX(pt->x);
pt->y = ImageYToProjectionY(pt->y); pt->y = ImageYToProjectionY(pt->y);