Fixed occurrence of small rotated blocks in loosely spaced text

This commit is contained in:
Ray Smith 2015-06-12 11:05:00 -07:00
parent d74c625e52
commit 78b5e1a77d
6 changed files with 125 additions and 67 deletions

View File

@ -242,12 +242,9 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
BLOCK_LIST found_blocks;
TO_BLOCK_LIST temp_blocks;
bool single_column = !PSM_COL_FIND_ENABLED(pageseg_mode);
bool osd_enabled = PSM_OSD_ENABLED(pageseg_mode);
bool osd_only = pageseg_mode == PSM_OSD_ONLY;
ColumnFinder* finder = SetupPageSegAndDetectOrientation(
single_column, osd_enabled, osd_only, blocks, osd_tess, osr,
&temp_blocks, &photomask_pix, &musicmask_pix);
pageseg_mode, blocks, osd_tess, osr, &temp_blocks, &photomask_pix,
&musicmask_pix);
int result = 0;
if (finder != NULL) {
TO_BLOCK_IT to_block_it(&temp_blocks);
@ -310,9 +307,9 @@ static void AddAllScriptsConverted(const UNICHARSET& sid_set,
* The returned ColumnFinder must be deleted after use.
*/
ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
bool single_column, bool osd, bool only_osd,
BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr,
TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix) {
PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,
OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
Pix** music_mask_pix) {
int vertical_x = 0;
int vertical_y = 1;
TabVector_LIST v_lines;
@ -334,8 +331,7 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
*photo_mask_pix = ImageFind::FindImages(pix_binary_);
if (tessedit_dump_pageseg_images)
pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);
if (single_column)
v_lines.clear();
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();
// The rest of the algorithm uses the usual connected components.
textord_.find_components(pix_binary_, blocks, to_blocks);
@ -355,7 +351,7 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
textord_tabfind_aligned_gap_fraction,
&v_lines, &h_lines, vertical_x, vertical_y);
finder->SetupAndFilterNoise(*photo_mask_pix, to_block);
finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);
if (equ_detect_) {
equ_detect_->LabelSpecialText(to_block);
@ -367,13 +363,15 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
// We want the text lines horizontal, (vertical text indicates vertical
// textlines) which may conflict (eg vertically written CJK).
int osd_orientation = 0;
bool vertical_text = textord_tabfind_force_vertical_text;
if (!vertical_text && textord_tabfind_vertical_text) {
bool vertical_text = textord_tabfind_force_vertical_text ||
pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
if (!vertical_text && textord_tabfind_vertical_text &&
PSM_ORIENTATION_ENABLED(pageseg_mode)) {
vertical_text =
finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio,
to_block, &osd_blobs);
}
if (osd && osd_tess != NULL && osr != NULL) {
if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != NULL && osr != NULL) {
GenericVector<int> osd_scripts;
if (osd_tess != this) {
// We are running osd as part of layout analysis, so constrain the
@ -385,7 +383,7 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
}
}
os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess);
if (only_osd) {
if (pageseg_mode == PSM_OSD_ONLY) {
delete finder;
return NULL;
}

View File

@ -287,9 +287,9 @@ class Tesseract : public Wordrec {
TO_BLOCK_LIST* to_blocks, BLOBNBOX_LIST* diacritic_blobs,
Tesseract* osd_tess, OSResults* osr);
ColumnFinder* SetupPageSegAndDetectOrientation(
bool single_column, bool osd, bool only_osd,
BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr,
TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix);
PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,
OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
Pix** music_mask_pix);
// par_control.cpp
void PrerecAllWordsPar(const GenericVector<WordData>& words);

View File

@ -150,7 +150,8 @@ ColumnFinder::~ColumnFinder() {
// direction, so the textline projection_ map can be setup.
// On return, IsVerticallyAlignedText may be called (now optionally) to
// determine the gross textline alignment of the page.
void ColumnFinder::SetupAndFilterNoise(Pix* photo_mask_pix,
void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode,
Pix* photo_mask_pix,
TO_BLOCK* input_block) {
part_grid_.Init(gridsize(), bleft(), tright());
if (stroke_width_ != NULL)
@ -172,7 +173,8 @@ void ColumnFinder::SetupAndFilterNoise(Pix* photo_mask_pix,
// Remove obvious noise and make the initial non-text map.
nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind,
photo_mask_pix, input_block);
stroke_width_->FindTextlineDirectionAndFixBrokenCJK(cjk_script_, input_block);
stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_,
input_block);
// Clear the strokewidth grid ready for rotation or leader finding.
stroke_width_->Clear();
}
@ -305,8 +307,8 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color,
input_block);
SetBlockRuleEdges(input_block);
stroke_width_->GradeBlobsIntoPartitions(
rerotate_, input_block, nontext_map_, denorm_, cjk_script_, &projection_,
diacritic_blobs, &part_grid_, &big_parts_);
pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_,
&projection_, diacritic_blobs, &part_grid_, &big_parts_);
if (!PSM_SPARSE(pageseg_mode)) {
ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
input_block, this, &part_grid_, &big_parts_);

View File

@ -110,7 +110,8 @@ class ColumnFinder : public TabFind {
// direction, so the textline projection_ map can be setup.
// On return, IsVerticallyAlignedText may be called (now optionally) to
// determine the gross textline alignment of the page.
void SetupAndFilterNoise(Pix* photo_mask_pix, TO_BLOCK* input_block);
void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix* photo_mask_pix,
TO_BLOCK* input_block);
// Tests for vertical alignment of text (returning true if so), and generates
// a list of blobs (in osd_blobs) for orientation and script detection.

View File

@ -164,14 +164,15 @@ void StrokeWidth::SetNeighboursOnMediumBlobs(TO_BLOCK* block) {
// and large blobs with optional repair of broken CJK characters first.
// Repair of broken CJK is needed here because broken CJK characters
// can fool the textline direction detection algorithm.
void StrokeWidth::FindTextlineDirectionAndFixBrokenCJK(bool cjk_merge,
void StrokeWidth::FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode,
bool cjk_merge,
TO_BLOCK* input_block) {
// Setup the grid with the remaining (non-noise) blobs.
InsertBlobs(input_block);
// Repair broken CJK characters if needed.
while (cjk_merge && FixBrokenCJK(input_block));
// Grade blobs by inspection of neighbours.
FindTextlineFlowDirection(false);
FindTextlineFlowDirection(pageseg_mode, false);
// Clear the grid ready for rotation or leader finding.
Clear();
}
@ -351,10 +352,10 @@ void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) {
// Large blobs that cause overlap are put in separate partitions and added
// to the big_parts list.
void StrokeWidth::GradeBlobsIntoPartitions(
const FCOORD& rerotation, TO_BLOCK* block, Pix* nontext_pix,
const DENORM* denorm, bool cjk_script, TextlineProjection* projection,
BLOBNBOX_LIST* diacritic_blobs, ColPartitionGrid* part_grid,
ColPartition_LIST* big_parts) {
PageSegMode pageseg_mode, const FCOORD& rerotation, TO_BLOCK* block,
Pix* nontext_pix, const DENORM* denorm, bool cjk_script,
TextlineProjection* projection, BLOBNBOX_LIST* diacritic_blobs,
ColPartitionGrid* part_grid, ColPartition_LIST* big_parts) {
nontext_map_ = nontext_pix;
projection_ = projection;
denorm_ = denorm;
@ -367,7 +368,7 @@ void StrokeWidth::GradeBlobsIntoPartitions(
if (cjk_script) {
FixBrokenCJK(block);
}
FindTextlineFlowDirection(false);
FindTextlineFlowDirection(pageseg_mode, false);
projection_->ConstructProjection(block, rerotation, nontext_map_);
if (textord_tabfind_show_strokewidths) {
ScrollView* line_blobs_win = MakeWindow(0, 0, "Initial textline Blobs");
@ -380,17 +381,18 @@ void StrokeWidth::GradeBlobsIntoPartitions(
Clear();
InsertBlobs(block);
FCOORD skew;
FindTextlineFlowDirection(true);
PartitionFindResult r = FindInitialPartitions(
rerotation, true, block, diacritic_blobs, part_grid, big_parts, &skew);
FindTextlineFlowDirection(pageseg_mode, true);
PartitionFindResult r =
FindInitialPartitions(pageseg_mode, rerotation, true, block,
diacritic_blobs, part_grid, big_parts, &skew);
if (r == PFR_NOISE) {
tprintf("Detected %d diacritics\n", diacritic_blobs->length());
// Noise was found, and removed.
Clear();
InsertBlobs(block);
FindTextlineFlowDirection(true);
r = FindInitialPartitions(rerotation, false, block, diacritic_blobs,
part_grid, big_parts, &skew);
FindTextlineFlowDirection(pageseg_mode, true);
r = FindInitialPartitions(pageseg_mode, rerotation, false, block,
diacritic_blobs, part_grid, big_parts, &skew);
}
nontext_map_ = NULL;
projection_ = NULL;
@ -805,7 +807,8 @@ void StrokeWidth::AccumulateOverlaps(const BLOBNBOX* not_this, bool debug,
// flags in the BLOBNBOXes currently in this grid.
// This function is called more than once if page orientation is uncertain,
// so display_if_debugging is true on the final call to display the results.
void StrokeWidth::FindTextlineFlowDirection(bool display_if_debugging) {
void StrokeWidth::FindTextlineFlowDirection(PageSegMode pageseg_mode,
bool display_if_debugging) {
BlobGridSearch gsearch(this);
BLOBNBOX* bbox;
// For every bbox in the grid, set its neighbours.
@ -821,7 +824,15 @@ void StrokeWidth::FindTextlineFlowDirection(bool display_if_debugging) {
// Now try to make the blobs only vertical or horizontal using neighbours.
gsearch.StartFullSearch();
while ((bbox = gsearch.NextFullSearch()) != NULL) {
SetNeighbourFlows(bbox);
if (FindingVerticalOnly(pageseg_mode)) {
bbox->set_vert_possible(true);
bbox->set_horz_possible(false);
} else if (FindingHorizontalOnly(pageseg_mode)) {
bbox->set_vert_possible(false);
bbox->set_horz_possible(true);
} else {
SetNeighbourFlows(bbox);
}
}
if ((textord_tabfind_show_strokewidths && display_if_debugging) ||
textord_tabfind_show_strokewidths > 1) {
@ -830,17 +841,17 @@ void StrokeWidth::FindTextlineFlowDirection(bool display_if_debugging) {
// Improve flow direction with neighbours.
gsearch.StartFullSearch();
while ((bbox = gsearch.NextFullSearch()) != NULL) {
SmoothNeighbourTypes(bbox, false);
SmoothNeighbourTypes(pageseg_mode, false, bbox);
}
// Now allow reset of firm values to fix renegades.
gsearch.StartFullSearch();
while ((bbox = gsearch.NextFullSearch()) != NULL) {
SmoothNeighbourTypes(bbox, true);
SmoothNeighbourTypes(pageseg_mode, true, bbox);
}
// Repeat.
gsearch.StartFullSearch();
while ((bbox = gsearch.NextFullSearch()) != NULL) {
SmoothNeighbourTypes(bbox, true);
SmoothNeighbourTypes(pageseg_mode, true, bbox);
}
if ((textord_tabfind_show_strokewidths && display_if_debugging) ||
textord_tabfind_show_strokewidths > 1) {
@ -1198,7 +1209,8 @@ void StrokeWidth::SimplifyObviousNeighbours(BLOBNBOX* blob) {
// Smoothes the vertical/horizontal type of the blob based on the
// 2nd-order neighbours. If reset_all is true, then all blobs are
// changed. Otherwise, only ambiguous blobs are processed.
void StrokeWidth::SmoothNeighbourTypes(BLOBNBOX* blob, bool reset_all) {
void StrokeWidth::SmoothNeighbourTypes(PageSegMode pageseg_mode, bool reset_all,
BLOBNBOX* blob) {
if ((blob->vert_possible() && blob->horz_possible()) || reset_all) {
// There are both horizontal and vertical so try to fix it.
BLOBNBOX_CLIST neighbours;
@ -1214,11 +1226,12 @@ void StrokeWidth::SmoothNeighbourTypes(BLOBNBOX* blob, bool reset_all) {
tprintf("pure_h=%d, pure_v=%d\n",
pure_h_count, pure_v_count);
}
if (pure_h_count > pure_v_count) {
if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) {
// Horizontal gaps are clear winners. Clear vertical neighbours.
blob->set_vert_possible(false);
blob->set_horz_possible(true);
} else if (pure_v_count > pure_h_count) {
} else if (pure_v_count > pure_h_count &&
!FindingHorizontalOnly(pageseg_mode)) {
// Vertical gaps are clear winners. Clear horizontal neighbours.
blob->set_horz_possible(false);
blob->set_vert_possible(true);
@ -1244,11 +1257,12 @@ void StrokeWidth::SmoothNeighbourTypes(BLOBNBOX* blob, bool reset_all) {
// is not PFR_OK, the job is incomplete, and FindInitialPartitions must be
// called again after cleaning up the partly done work.
PartitionFindResult StrokeWidth::FindInitialPartitions(
const FCOORD& rerotation, bool find_problems, TO_BLOCK* block,
BLOBNBOX_LIST* diacritic_blobs, ColPartitionGrid* part_grid,
ColPartition_LIST* big_parts, FCOORD* skew_angle) {
FindVerticalTextChains(part_grid);
FindHorizontalTextChains(part_grid);
PageSegMode pageseg_mode, const FCOORD& rerotation, bool find_problems,
TO_BLOCK* block, BLOBNBOX_LIST* diacritic_blobs,
ColPartitionGrid* part_grid, ColPartition_LIST* big_parts,
FCOORD* skew_angle) {
if (!FindingHorizontalOnly(pageseg_mode)) FindVerticalTextChains(part_grid);
if (!FindingVerticalOnly(pageseg_mode)) FindHorizontalTextChains(part_grid);
if (textord_tabfind_show_strokewidths) {
chains_win_ = MakeWindow(0, 400, "Initial text chains");
part_grid->DisplayBoxes(chains_win_);
@ -1279,7 +1293,7 @@ PartitionFindResult StrokeWidth::FindInitialPartitions(
part_grid->DisplayBoxes(textlines_win_);
diacritics_win_ = DisplayDiacritics("Diacritics", 0, 0, block);
}
PartitionRemainingBlobs(part_grid);
PartitionRemainingBlobs(pageseg_mode, part_grid);
part_grid->SplitOverlappingPartitions(big_parts);
EasyMerges(part_grid);
while (part_grid->GridSmoothNeighbours(BTFT_CHAIN, nontext_map_, grid_box,
@ -1363,6 +1377,9 @@ static BLOBNBOX* MutualUnusedVNeighbour(const BLOBNBOX* blob,
// Finds vertical chains of text-like blobs and puts them in ColPartitions.
void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) {
// A PageSegMode that forces vertical textlines with the current rotation.
PageSegMode pageseg_mode =
rerotation_.y() == 0.0f ? PSM_SINGLE_BLOCK_VERT_TEXT : PSM_SINGLE_COLUMN;
BlobGridSearch gsearch(this);
BLOBNBOX* bbox;
gsearch.StartFullSearch();
@ -1384,7 +1401,7 @@ void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) {
part->AddBox(blob);
blob = MutualUnusedVNeighbour(blob, BND_BELOW);
}
CompletePartition(part, part_grid);
CompletePartition(pageseg_mode, part, part_grid);
}
}
}
@ -1406,6 +1423,9 @@ static BLOBNBOX* MutualUnusedHNeighbour(const BLOBNBOX* blob,
// Finds horizontal chains of text-like blobs and puts them in ColPartitions.
void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) {
// A PageSegMode that forces horizontal textlines with the current rotation.
PageSegMode pageseg_mode =
rerotation_.y() == 0.0f ? PSM_SINGLE_COLUMN : PSM_SINGLE_BLOCK_VERT_TEXT;
BlobGridSearch gsearch(this);
BLOBNBOX* bbox;
gsearch.StartFullSearch();
@ -1425,7 +1445,7 @@ void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) {
part->AddBox(blob);
blob = MutualUnusedVNeighbour(blob, BND_LEFT);
}
CompletePartition(part, part_grid);
CompletePartition(pageseg_mode, part, part_grid);
}
}
}
@ -1769,7 +1789,8 @@ void StrokeWidth::RemoveLargeUnusedBlobs(TO_BLOCK* block,
}
// All remaining unused blobs are put in individual ColPartitions.
void StrokeWidth::PartitionRemainingBlobs(ColPartitionGrid* part_grid) {
void StrokeWidth::PartitionRemainingBlobs(PageSegMode pageseg_mode,
ColPartitionGrid* part_grid) {
BlobGridSearch gsearch(this);
BLOBNBOX* bbox;
int prev_grid_x = -1;
@ -1783,7 +1804,8 @@ void StrokeWidth::PartitionRemainingBlobs(ColPartitionGrid* part_grid) {
int grid_y = gsearch.GridY();
if (grid_x != prev_grid_x || grid_y != prev_grid_y) {
// New cell. Process old cell.
MakePartitionsFromCellList(cell_all_noise, part_grid, &cell_list);
MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
&cell_list);
cell_it.set_to_list(&cell_list);
prev_grid_x = grid_x;
prev_grid_y = grid_y;
@ -1797,12 +1819,14 @@ void StrokeWidth::PartitionRemainingBlobs(ColPartitionGrid* part_grid) {
cell_all_noise = false;
}
}
MakePartitionsFromCellList(cell_all_noise, part_grid, &cell_list);
MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
&cell_list);
}
// If combine, put all blobs in the cell_list into a single partition, otherwise
// put each one into its own partition.
void StrokeWidth::MakePartitionsFromCellList(bool combine,
void StrokeWidth::MakePartitionsFromCellList(PageSegMode pageseg_mode,
bool combine,
ColPartitionGrid* part_grid,
BLOBNBOX_CLIST* cell_list) {
if (cell_list->empty())
@ -1816,27 +1840,34 @@ void StrokeWidth::MakePartitionsFromCellList(bool combine,
for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) {
part->AddBox(cell_it.extract());
}
CompletePartition(part, part_grid);
CompletePartition(pageseg_mode, part, part_grid);
} else {
for (; !cell_it.empty(); cell_it.forward()) {
BLOBNBOX* bbox = cell_it.extract();
ColPartition* part = new ColPartition(bbox->region_type(), ICOORD(0, 1));
part->set_flow(bbox->flow());
part->AddBox(bbox);
CompletePartition(part, part_grid);
CompletePartition(pageseg_mode, part, part_grid);
}
}
}
// Helper function to finish setting up a ColPartition and insert into
// part_grid.
void StrokeWidth::CompletePartition(ColPartition* part,
void StrokeWidth::CompletePartition(PageSegMode pageseg_mode,
ColPartition* part,
ColPartitionGrid* part_grid) {
part->ComputeLimits();
TBOX box = part->bounding_box();
bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
box.bottom());
int value = projection_->EvaluateColPartition(*part, denorm_, debug);
// Override value if pageseg_mode disagrees.
if (value > 0 && FindingVerticalOnly(pageseg_mode)) {
value = part->boxes_count() == 1 ? 0 : -2;
} else if (value < 0 && FindingHorizontalOnly(pageseg_mode)) {
value = part->boxes_count() == 1 ? 0 : 2;
}
part->SetRegionAndFlowTypesFromProjectionValue(value);
part->ClaimBoxes();
part_grid->InsertBBox(true, true, part);

View File

@ -68,7 +68,8 @@ class StrokeWidth : public BlobGrid {
// and large blobs with optional repair of broken CJK characters first.
// Repair of broken CJK is needed here because broken CJK characters
// can fool the textline direction detection algorithm.
void FindTextlineDirectionAndFixBrokenCJK(bool cjk_merge,
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode,
bool cjk_merge,
TO_BLOCK* input_block);
// To save computation, the process of generating partitions is broken
@ -118,7 +119,8 @@ class StrokeWidth : public BlobGrid {
// part_grid is the output grid of textline partitions.
// Large blobs that cause overlap are put in separate partitions and added
// to the big_parts list.
void GradeBlobsIntoPartitions(const FCOORD& rerotation, TO_BLOCK* block,
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode,
const FCOORD& rerotation, TO_BLOCK* block,
Pix* nontext_pix, const DENORM* denorm,
bool cjk_script, TextlineProjection* projection,
BLOBNBOX_LIST* diacritic_blobs,
@ -171,7 +173,8 @@ class StrokeWidth : public BlobGrid {
// flags in the BLOBNBOXes currently in this grid.
// This function is called more than once if page orientation is uncertain,
// so display_if_debugging is true on the final call to display the results.
void FindTextlineFlowDirection(bool display_if_debugging);
void FindTextlineFlowDirection(PageSegMode pageseg_mode,
bool display_if_debugging);
// Sets the neighbours and good_stroke_neighbours members of the blob by
// searching close on all 4 sides.
@ -199,7 +202,8 @@ class StrokeWidth : public BlobGrid {
// Smoothes the vertical/horizontal type of the blob based on the
// 2nd-order neighbours. If reset_all is true, then all blobs are
// changed. Otherwise, only ambiguous blobs are processed.
void SmoothNeighbourTypes(BLOBNBOX* blob, bool desperate);
void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate,
BLOBNBOX* blob);
// Checks the left or right side of the given leader partition and sets the
// (opposite) leader_on_right or leader_on_left flags for blobs
@ -218,7 +222,8 @@ class StrokeWidth : public BlobGrid {
// the components, saves the skew_angle and returns PFR_SKEW.] If the return
// is not PFR_OK, the job is incomplete, and FindInitialPartitions must be
// called again after cleaning up the partly done work.
PartitionFindResult FindInitialPartitions(const FCOORD& rerotation,
PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode,
const FCOORD& rerotation,
bool find_problems, TO_BLOCK* block,
BLOBNBOX_LIST* diacritic_blobs,
ColPartitionGrid* part_grid,
@ -269,17 +274,38 @@ class StrokeWidth : public BlobGrid {
ColPartition_LIST* big_parts);
// All remaining unused blobs are put in individual ColPartitions.
void PartitionRemainingBlobs(ColPartitionGrid* part_grid);
void PartitionRemainingBlobs(PageSegMode pageseg_mode,
ColPartitionGrid* part_grid);
// If combine, put all blobs in the cell_list into a single partition,
// otherwise put each one into its own partition.
void MakePartitionsFromCellList(bool combine,
void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine,
ColPartitionGrid* part_grid,
BLOBNBOX_CLIST* cell_list);
// Helper function to finish setting up a ColPartition and insert into
// part_grid.
void CompletePartition(ColPartition* part, ColPartitionGrid* part_grid);
void CompletePartition(PageSegMode pageseg_mode, ColPartition* part,
ColPartitionGrid* part_grid);
// Helper returns true if we are looking only for vertical textlines,
// taking into account any rotation that has been done.
bool FindingVerticalOnly(PageSegMode pageseg_mode) const {
if (rerotation_.y() == 0.0f) {
return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
}
return !PSM_ORIENTATION_ENABLED(pageseg_mode) &&
pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
}
// Helper returns true if we are looking only for horizontal textlines,
// taking into account any rotation that has been done.
bool FindingHorizontalOnly(PageSegMode pageseg_mode) const {
if (rerotation_.y() == 0.0f) {
return !PSM_ORIENTATION_ENABLED(pageseg_mode) &&
pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
}
return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
}
// Merge partitions where the merge appears harmless.
void EasyMerges(ColPartitionGrid* part_grid);