mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
Fixed occurrence of small rotated blocks in loosely spaced text
This commit is contained in:
parent
d74c625e52
commit
78b5e1a77d
@ -242,12 +242,9 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
|
||||
BLOCK_LIST found_blocks;
|
||||
TO_BLOCK_LIST temp_blocks;
|
||||
|
||||
bool single_column = !PSM_COL_FIND_ENABLED(pageseg_mode);
|
||||
bool osd_enabled = PSM_OSD_ENABLED(pageseg_mode);
|
||||
bool osd_only = pageseg_mode == PSM_OSD_ONLY;
|
||||
ColumnFinder* finder = SetupPageSegAndDetectOrientation(
|
||||
single_column, osd_enabled, osd_only, blocks, osd_tess, osr,
|
||||
&temp_blocks, &photomask_pix, &musicmask_pix);
|
||||
pageseg_mode, blocks, osd_tess, osr, &temp_blocks, &photomask_pix,
|
||||
&musicmask_pix);
|
||||
int result = 0;
|
||||
if (finder != NULL) {
|
||||
TO_BLOCK_IT to_block_it(&temp_blocks);
|
||||
@ -310,9 +307,9 @@ static void AddAllScriptsConverted(const UNICHARSET& sid_set,
|
||||
* The returned ColumnFinder must be deleted after use.
|
||||
*/
|
||||
ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
|
||||
bool single_column, bool osd, bool only_osd,
|
||||
BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr,
|
||||
TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix) {
|
||||
PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,
|
||||
OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
|
||||
Pix** music_mask_pix) {
|
||||
int vertical_x = 0;
|
||||
int vertical_y = 1;
|
||||
TabVector_LIST v_lines;
|
||||
@ -334,8 +331,7 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
|
||||
*photo_mask_pix = ImageFind::FindImages(pix_binary_);
|
||||
if (tessedit_dump_pageseg_images)
|
||||
pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);
|
||||
if (single_column)
|
||||
v_lines.clear();
|
||||
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();
|
||||
|
||||
// The rest of the algorithm uses the usual connected components.
|
||||
textord_.find_components(pix_binary_, blocks, to_blocks);
|
||||
@ -355,7 +351,7 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
|
||||
textord_tabfind_aligned_gap_fraction,
|
||||
&v_lines, &h_lines, vertical_x, vertical_y);
|
||||
|
||||
finder->SetupAndFilterNoise(*photo_mask_pix, to_block);
|
||||
finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);
|
||||
|
||||
if (equ_detect_) {
|
||||
equ_detect_->LabelSpecialText(to_block);
|
||||
@ -367,13 +363,15 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
|
||||
// We want the text lines horizontal, (vertical text indicates vertical
|
||||
// textlines) which may conflict (eg vertically written CJK).
|
||||
int osd_orientation = 0;
|
||||
bool vertical_text = textord_tabfind_force_vertical_text;
|
||||
if (!vertical_text && textord_tabfind_vertical_text) {
|
||||
bool vertical_text = textord_tabfind_force_vertical_text ||
|
||||
pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
|
||||
if (!vertical_text && textord_tabfind_vertical_text &&
|
||||
PSM_ORIENTATION_ENABLED(pageseg_mode)) {
|
||||
vertical_text =
|
||||
finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio,
|
||||
to_block, &osd_blobs);
|
||||
}
|
||||
if (osd && osd_tess != NULL && osr != NULL) {
|
||||
if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != NULL && osr != NULL) {
|
||||
GenericVector<int> osd_scripts;
|
||||
if (osd_tess != this) {
|
||||
// We are running osd as part of layout analysis, so constrain the
|
||||
@ -385,7 +383,7 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
|
||||
}
|
||||
}
|
||||
os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess);
|
||||
if (only_osd) {
|
||||
if (pageseg_mode == PSM_OSD_ONLY) {
|
||||
delete finder;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -287,9 +287,9 @@ class Tesseract : public Wordrec {
|
||||
TO_BLOCK_LIST* to_blocks, BLOBNBOX_LIST* diacritic_blobs,
|
||||
Tesseract* osd_tess, OSResults* osr);
|
||||
ColumnFinder* SetupPageSegAndDetectOrientation(
|
||||
bool single_column, bool osd, bool only_osd,
|
||||
BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr,
|
||||
TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix);
|
||||
PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,
|
||||
OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,
|
||||
Pix** music_mask_pix);
|
||||
// par_control.cpp
|
||||
void PrerecAllWordsPar(const GenericVector<WordData>& words);
|
||||
|
||||
|
@ -150,7 +150,8 @@ ColumnFinder::~ColumnFinder() {
|
||||
// direction, so the textline projection_ map can be setup.
|
||||
// On return, IsVerticallyAlignedText may be called (now optionally) to
|
||||
// determine the gross textline alignment of the page.
|
||||
void ColumnFinder::SetupAndFilterNoise(Pix* photo_mask_pix,
|
||||
void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode,
|
||||
Pix* photo_mask_pix,
|
||||
TO_BLOCK* input_block) {
|
||||
part_grid_.Init(gridsize(), bleft(), tright());
|
||||
if (stroke_width_ != NULL)
|
||||
@ -172,7 +173,8 @@ void ColumnFinder::SetupAndFilterNoise(Pix* photo_mask_pix,
|
||||
// Remove obvious noise and make the initial non-text map.
|
||||
nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind,
|
||||
photo_mask_pix, input_block);
|
||||
stroke_width_->FindTextlineDirectionAndFixBrokenCJK(cjk_script_, input_block);
|
||||
stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_,
|
||||
input_block);
|
||||
// Clear the strokewidth grid ready for rotation or leader finding.
|
||||
stroke_width_->Clear();
|
||||
}
|
||||
@ -305,8 +307,8 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color,
|
||||
input_block);
|
||||
SetBlockRuleEdges(input_block);
|
||||
stroke_width_->GradeBlobsIntoPartitions(
|
||||
rerotate_, input_block, nontext_map_, denorm_, cjk_script_, &projection_,
|
||||
diacritic_blobs, &part_grid_, &big_parts_);
|
||||
pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_,
|
||||
&projection_, diacritic_blobs, &part_grid_, &big_parts_);
|
||||
if (!PSM_SPARSE(pageseg_mode)) {
|
||||
ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
|
||||
input_block, this, &part_grid_, &big_parts_);
|
||||
|
@ -110,7 +110,8 @@ class ColumnFinder : public TabFind {
|
||||
// direction, so the textline projection_ map can be setup.
|
||||
// On return, IsVerticallyAlignedText may be called (now optionally) to
|
||||
// determine the gross textline alignment of the page.
|
||||
void SetupAndFilterNoise(Pix* photo_mask_pix, TO_BLOCK* input_block);
|
||||
void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix* photo_mask_pix,
|
||||
TO_BLOCK* input_block);
|
||||
|
||||
// Tests for vertical alignment of text (returning true if so), and generates
|
||||
// a list of blobs (in osd_blobs) for orientation and script detection.
|
||||
|
@ -164,14 +164,15 @@ void StrokeWidth::SetNeighboursOnMediumBlobs(TO_BLOCK* block) {
|
||||
// and large blobs with optional repair of broken CJK characters first.
|
||||
// Repair of broken CJK is needed here because broken CJK characters
|
||||
// can fool the textline direction detection algorithm.
|
||||
void StrokeWidth::FindTextlineDirectionAndFixBrokenCJK(bool cjk_merge,
|
||||
void StrokeWidth::FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode,
|
||||
bool cjk_merge,
|
||||
TO_BLOCK* input_block) {
|
||||
// Setup the grid with the remaining (non-noise) blobs.
|
||||
InsertBlobs(input_block);
|
||||
// Repair broken CJK characters if needed.
|
||||
while (cjk_merge && FixBrokenCJK(input_block));
|
||||
// Grade blobs by inspection of neighbours.
|
||||
FindTextlineFlowDirection(false);
|
||||
FindTextlineFlowDirection(pageseg_mode, false);
|
||||
// Clear the grid ready for rotation or leader finding.
|
||||
Clear();
|
||||
}
|
||||
@ -351,10 +352,10 @@ void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) {
|
||||
// Large blobs that cause overlap are put in separate partitions and added
|
||||
// to the big_parts list.
|
||||
void StrokeWidth::GradeBlobsIntoPartitions(
|
||||
const FCOORD& rerotation, TO_BLOCK* block, Pix* nontext_pix,
|
||||
const DENORM* denorm, bool cjk_script, TextlineProjection* projection,
|
||||
BLOBNBOX_LIST* diacritic_blobs, ColPartitionGrid* part_grid,
|
||||
ColPartition_LIST* big_parts) {
|
||||
PageSegMode pageseg_mode, const FCOORD& rerotation, TO_BLOCK* block,
|
||||
Pix* nontext_pix, const DENORM* denorm, bool cjk_script,
|
||||
TextlineProjection* projection, BLOBNBOX_LIST* diacritic_blobs,
|
||||
ColPartitionGrid* part_grid, ColPartition_LIST* big_parts) {
|
||||
nontext_map_ = nontext_pix;
|
||||
projection_ = projection;
|
||||
denorm_ = denorm;
|
||||
@ -367,7 +368,7 @@ void StrokeWidth::GradeBlobsIntoPartitions(
|
||||
if (cjk_script) {
|
||||
FixBrokenCJK(block);
|
||||
}
|
||||
FindTextlineFlowDirection(false);
|
||||
FindTextlineFlowDirection(pageseg_mode, false);
|
||||
projection_->ConstructProjection(block, rerotation, nontext_map_);
|
||||
if (textord_tabfind_show_strokewidths) {
|
||||
ScrollView* line_blobs_win = MakeWindow(0, 0, "Initial textline Blobs");
|
||||
@ -380,17 +381,18 @@ void StrokeWidth::GradeBlobsIntoPartitions(
|
||||
Clear();
|
||||
InsertBlobs(block);
|
||||
FCOORD skew;
|
||||
FindTextlineFlowDirection(true);
|
||||
PartitionFindResult r = FindInitialPartitions(
|
||||
rerotation, true, block, diacritic_blobs, part_grid, big_parts, &skew);
|
||||
FindTextlineFlowDirection(pageseg_mode, true);
|
||||
PartitionFindResult r =
|
||||
FindInitialPartitions(pageseg_mode, rerotation, true, block,
|
||||
diacritic_blobs, part_grid, big_parts, &skew);
|
||||
if (r == PFR_NOISE) {
|
||||
tprintf("Detected %d diacritics\n", diacritic_blobs->length());
|
||||
// Noise was found, and removed.
|
||||
Clear();
|
||||
InsertBlobs(block);
|
||||
FindTextlineFlowDirection(true);
|
||||
r = FindInitialPartitions(rerotation, false, block, diacritic_blobs,
|
||||
part_grid, big_parts, &skew);
|
||||
FindTextlineFlowDirection(pageseg_mode, true);
|
||||
r = FindInitialPartitions(pageseg_mode, rerotation, false, block,
|
||||
diacritic_blobs, part_grid, big_parts, &skew);
|
||||
}
|
||||
nontext_map_ = NULL;
|
||||
projection_ = NULL;
|
||||
@ -805,7 +807,8 @@ void StrokeWidth::AccumulateOverlaps(const BLOBNBOX* not_this, bool debug,
|
||||
// flags in the BLOBNBOXes currently in this grid.
|
||||
// This function is called more than once if page orientation is uncertain,
|
||||
// so display_if_debugging is true on the final call to display the results.
|
||||
void StrokeWidth::FindTextlineFlowDirection(bool display_if_debugging) {
|
||||
void StrokeWidth::FindTextlineFlowDirection(PageSegMode pageseg_mode,
|
||||
bool display_if_debugging) {
|
||||
BlobGridSearch gsearch(this);
|
||||
BLOBNBOX* bbox;
|
||||
// For every bbox in the grid, set its neighbours.
|
||||
@ -821,7 +824,15 @@ void StrokeWidth::FindTextlineFlowDirection(bool display_if_debugging) {
|
||||
// Now try to make the blobs only vertical or horizontal using neighbours.
|
||||
gsearch.StartFullSearch();
|
||||
while ((bbox = gsearch.NextFullSearch()) != NULL) {
|
||||
SetNeighbourFlows(bbox);
|
||||
if (FindingVerticalOnly(pageseg_mode)) {
|
||||
bbox->set_vert_possible(true);
|
||||
bbox->set_horz_possible(false);
|
||||
} else if (FindingHorizontalOnly(pageseg_mode)) {
|
||||
bbox->set_vert_possible(false);
|
||||
bbox->set_horz_possible(true);
|
||||
} else {
|
||||
SetNeighbourFlows(bbox);
|
||||
}
|
||||
}
|
||||
if ((textord_tabfind_show_strokewidths && display_if_debugging) ||
|
||||
textord_tabfind_show_strokewidths > 1) {
|
||||
@ -830,17 +841,17 @@ void StrokeWidth::FindTextlineFlowDirection(bool display_if_debugging) {
|
||||
// Improve flow direction with neighbours.
|
||||
gsearch.StartFullSearch();
|
||||
while ((bbox = gsearch.NextFullSearch()) != NULL) {
|
||||
SmoothNeighbourTypes(bbox, false);
|
||||
SmoothNeighbourTypes(pageseg_mode, false, bbox);
|
||||
}
|
||||
// Now allow reset of firm values to fix renegades.
|
||||
gsearch.StartFullSearch();
|
||||
while ((bbox = gsearch.NextFullSearch()) != NULL) {
|
||||
SmoothNeighbourTypes(bbox, true);
|
||||
SmoothNeighbourTypes(pageseg_mode, true, bbox);
|
||||
}
|
||||
// Repeat.
|
||||
gsearch.StartFullSearch();
|
||||
while ((bbox = gsearch.NextFullSearch()) != NULL) {
|
||||
SmoothNeighbourTypes(bbox, true);
|
||||
SmoothNeighbourTypes(pageseg_mode, true, bbox);
|
||||
}
|
||||
if ((textord_tabfind_show_strokewidths && display_if_debugging) ||
|
||||
textord_tabfind_show_strokewidths > 1) {
|
||||
@ -1198,7 +1209,8 @@ void StrokeWidth::SimplifyObviousNeighbours(BLOBNBOX* blob) {
|
||||
// Smoothes the vertical/horizontal type of the blob based on the
|
||||
// 2nd-order neighbours. If reset_all is true, then all blobs are
|
||||
// changed. Otherwise, only ambiguous blobs are processed.
|
||||
void StrokeWidth::SmoothNeighbourTypes(BLOBNBOX* blob, bool reset_all) {
|
||||
void StrokeWidth::SmoothNeighbourTypes(PageSegMode pageseg_mode, bool reset_all,
|
||||
BLOBNBOX* blob) {
|
||||
if ((blob->vert_possible() && blob->horz_possible()) || reset_all) {
|
||||
// There are both horizontal and vertical so try to fix it.
|
||||
BLOBNBOX_CLIST neighbours;
|
||||
@ -1214,11 +1226,12 @@ void StrokeWidth::SmoothNeighbourTypes(BLOBNBOX* blob, bool reset_all) {
|
||||
tprintf("pure_h=%d, pure_v=%d\n",
|
||||
pure_h_count, pure_v_count);
|
||||
}
|
||||
if (pure_h_count > pure_v_count) {
|
||||
if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) {
|
||||
// Horizontal gaps are clear winners. Clear vertical neighbours.
|
||||
blob->set_vert_possible(false);
|
||||
blob->set_horz_possible(true);
|
||||
} else if (pure_v_count > pure_h_count) {
|
||||
} else if (pure_v_count > pure_h_count &&
|
||||
!FindingHorizontalOnly(pageseg_mode)) {
|
||||
// Vertical gaps are clear winners. Clear horizontal neighbours.
|
||||
blob->set_horz_possible(false);
|
||||
blob->set_vert_possible(true);
|
||||
@ -1244,11 +1257,12 @@ void StrokeWidth::SmoothNeighbourTypes(BLOBNBOX* blob, bool reset_all) {
|
||||
// is not PFR_OK, the job is incomplete, and FindInitialPartitions must be
|
||||
// called again after cleaning up the partly done work.
|
||||
PartitionFindResult StrokeWidth::FindInitialPartitions(
|
||||
const FCOORD& rerotation, bool find_problems, TO_BLOCK* block,
|
||||
BLOBNBOX_LIST* diacritic_blobs, ColPartitionGrid* part_grid,
|
||||
ColPartition_LIST* big_parts, FCOORD* skew_angle) {
|
||||
FindVerticalTextChains(part_grid);
|
||||
FindHorizontalTextChains(part_grid);
|
||||
PageSegMode pageseg_mode, const FCOORD& rerotation, bool find_problems,
|
||||
TO_BLOCK* block, BLOBNBOX_LIST* diacritic_blobs,
|
||||
ColPartitionGrid* part_grid, ColPartition_LIST* big_parts,
|
||||
FCOORD* skew_angle) {
|
||||
if (!FindingHorizontalOnly(pageseg_mode)) FindVerticalTextChains(part_grid);
|
||||
if (!FindingVerticalOnly(pageseg_mode)) FindHorizontalTextChains(part_grid);
|
||||
if (textord_tabfind_show_strokewidths) {
|
||||
chains_win_ = MakeWindow(0, 400, "Initial text chains");
|
||||
part_grid->DisplayBoxes(chains_win_);
|
||||
@ -1279,7 +1293,7 @@ PartitionFindResult StrokeWidth::FindInitialPartitions(
|
||||
part_grid->DisplayBoxes(textlines_win_);
|
||||
diacritics_win_ = DisplayDiacritics("Diacritics", 0, 0, block);
|
||||
}
|
||||
PartitionRemainingBlobs(part_grid);
|
||||
PartitionRemainingBlobs(pageseg_mode, part_grid);
|
||||
part_grid->SplitOverlappingPartitions(big_parts);
|
||||
EasyMerges(part_grid);
|
||||
while (part_grid->GridSmoothNeighbours(BTFT_CHAIN, nontext_map_, grid_box,
|
||||
@ -1363,6 +1377,9 @@ static BLOBNBOX* MutualUnusedVNeighbour(const BLOBNBOX* blob,
|
||||
|
||||
// Finds vertical chains of text-like blobs and puts them in ColPartitions.
|
||||
void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) {
|
||||
// A PageSegMode that forces vertical textlines with the current rotation.
|
||||
PageSegMode pageseg_mode =
|
||||
rerotation_.y() == 0.0f ? PSM_SINGLE_BLOCK_VERT_TEXT : PSM_SINGLE_COLUMN;
|
||||
BlobGridSearch gsearch(this);
|
||||
BLOBNBOX* bbox;
|
||||
gsearch.StartFullSearch();
|
||||
@ -1384,7 +1401,7 @@ void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) {
|
||||
part->AddBox(blob);
|
||||
blob = MutualUnusedVNeighbour(blob, BND_BELOW);
|
||||
}
|
||||
CompletePartition(part, part_grid);
|
||||
CompletePartition(pageseg_mode, part, part_grid);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1406,6 +1423,9 @@ static BLOBNBOX* MutualUnusedHNeighbour(const BLOBNBOX* blob,
|
||||
|
||||
// Finds horizontal chains of text-like blobs and puts them in ColPartitions.
|
||||
void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) {
|
||||
// A PageSegMode that forces horizontal textlines with the current rotation.
|
||||
PageSegMode pageseg_mode =
|
||||
rerotation_.y() == 0.0f ? PSM_SINGLE_COLUMN : PSM_SINGLE_BLOCK_VERT_TEXT;
|
||||
BlobGridSearch gsearch(this);
|
||||
BLOBNBOX* bbox;
|
||||
gsearch.StartFullSearch();
|
||||
@ -1425,7 +1445,7 @@ void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) {
|
||||
part->AddBox(blob);
|
||||
blob = MutualUnusedVNeighbour(blob, BND_LEFT);
|
||||
}
|
||||
CompletePartition(part, part_grid);
|
||||
CompletePartition(pageseg_mode, part, part_grid);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1769,7 +1789,8 @@ void StrokeWidth::RemoveLargeUnusedBlobs(TO_BLOCK* block,
|
||||
}
|
||||
|
||||
// All remaining unused blobs are put in individual ColPartitions.
|
||||
void StrokeWidth::PartitionRemainingBlobs(ColPartitionGrid* part_grid) {
|
||||
void StrokeWidth::PartitionRemainingBlobs(PageSegMode pageseg_mode,
|
||||
ColPartitionGrid* part_grid) {
|
||||
BlobGridSearch gsearch(this);
|
||||
BLOBNBOX* bbox;
|
||||
int prev_grid_x = -1;
|
||||
@ -1783,7 +1804,8 @@ void StrokeWidth::PartitionRemainingBlobs(ColPartitionGrid* part_grid) {
|
||||
int grid_y = gsearch.GridY();
|
||||
if (grid_x != prev_grid_x || grid_y != prev_grid_y) {
|
||||
// New cell. Process old cell.
|
||||
MakePartitionsFromCellList(cell_all_noise, part_grid, &cell_list);
|
||||
MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
|
||||
&cell_list);
|
||||
cell_it.set_to_list(&cell_list);
|
||||
prev_grid_x = grid_x;
|
||||
prev_grid_y = grid_y;
|
||||
@ -1797,12 +1819,14 @@ void StrokeWidth::PartitionRemainingBlobs(ColPartitionGrid* part_grid) {
|
||||
cell_all_noise = false;
|
||||
}
|
||||
}
|
||||
MakePartitionsFromCellList(cell_all_noise, part_grid, &cell_list);
|
||||
MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
|
||||
&cell_list);
|
||||
}
|
||||
|
||||
// If combine, put all blobs in the cell_list into a single partition, otherwise
|
||||
// put each one into its own partition.
|
||||
void StrokeWidth::MakePartitionsFromCellList(bool combine,
|
||||
void StrokeWidth::MakePartitionsFromCellList(PageSegMode pageseg_mode,
|
||||
bool combine,
|
||||
ColPartitionGrid* part_grid,
|
||||
BLOBNBOX_CLIST* cell_list) {
|
||||
if (cell_list->empty())
|
||||
@ -1816,27 +1840,34 @@ void StrokeWidth::MakePartitionsFromCellList(bool combine,
|
||||
for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) {
|
||||
part->AddBox(cell_it.extract());
|
||||
}
|
||||
CompletePartition(part, part_grid);
|
||||
CompletePartition(pageseg_mode, part, part_grid);
|
||||
} else {
|
||||
for (; !cell_it.empty(); cell_it.forward()) {
|
||||
BLOBNBOX* bbox = cell_it.extract();
|
||||
ColPartition* part = new ColPartition(bbox->region_type(), ICOORD(0, 1));
|
||||
part->set_flow(bbox->flow());
|
||||
part->AddBox(bbox);
|
||||
CompletePartition(part, part_grid);
|
||||
CompletePartition(pageseg_mode, part, part_grid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to finish setting up a ColPartition and insert into
|
||||
// part_grid.
|
||||
void StrokeWidth::CompletePartition(ColPartition* part,
|
||||
void StrokeWidth::CompletePartition(PageSegMode pageseg_mode,
|
||||
ColPartition* part,
|
||||
ColPartitionGrid* part_grid) {
|
||||
part->ComputeLimits();
|
||||
TBOX box = part->bounding_box();
|
||||
bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
|
||||
box.bottom());
|
||||
int value = projection_->EvaluateColPartition(*part, denorm_, debug);
|
||||
// Override value if pageseg_mode disagrees.
|
||||
if (value > 0 && FindingVerticalOnly(pageseg_mode)) {
|
||||
value = part->boxes_count() == 1 ? 0 : -2;
|
||||
} else if (value < 0 && FindingHorizontalOnly(pageseg_mode)) {
|
||||
value = part->boxes_count() == 1 ? 0 : 2;
|
||||
}
|
||||
part->SetRegionAndFlowTypesFromProjectionValue(value);
|
||||
part->ClaimBoxes();
|
||||
part_grid->InsertBBox(true, true, part);
|
||||
|
@ -68,7 +68,8 @@ class StrokeWidth : public BlobGrid {
|
||||
// and large blobs with optional repair of broken CJK characters first.
|
||||
// Repair of broken CJK is needed here because broken CJK characters
|
||||
// can fool the textline direction detection algorithm.
|
||||
void FindTextlineDirectionAndFixBrokenCJK(bool cjk_merge,
|
||||
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode,
|
||||
bool cjk_merge,
|
||||
TO_BLOCK* input_block);
|
||||
|
||||
// To save computation, the process of generating partitions is broken
|
||||
@ -118,7 +119,8 @@ class StrokeWidth : public BlobGrid {
|
||||
// part_grid is the output grid of textline partitions.
|
||||
// Large blobs that cause overlap are put in separate partitions and added
|
||||
// to the big_parts list.
|
||||
void GradeBlobsIntoPartitions(const FCOORD& rerotation, TO_BLOCK* block,
|
||||
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode,
|
||||
const FCOORD& rerotation, TO_BLOCK* block,
|
||||
Pix* nontext_pix, const DENORM* denorm,
|
||||
bool cjk_script, TextlineProjection* projection,
|
||||
BLOBNBOX_LIST* diacritic_blobs,
|
||||
@ -171,7 +173,8 @@ class StrokeWidth : public BlobGrid {
|
||||
// flags in the BLOBNBOXes currently in this grid.
|
||||
// This function is called more than once if page orientation is uncertain,
|
||||
// so display_if_debugging is true on the final call to display the results.
|
||||
void FindTextlineFlowDirection(bool display_if_debugging);
|
||||
void FindTextlineFlowDirection(PageSegMode pageseg_mode,
|
||||
bool display_if_debugging);
|
||||
|
||||
// Sets the neighbours and good_stroke_neighbours members of the blob by
|
||||
// searching close on all 4 sides.
|
||||
@ -199,7 +202,8 @@ class StrokeWidth : public BlobGrid {
|
||||
// Smoothes the vertical/horizontal type of the blob based on the
|
||||
// 2nd-order neighbours. If reset_all is true, then all blobs are
|
||||
// changed. Otherwise, only ambiguous blobs are processed.
|
||||
void SmoothNeighbourTypes(BLOBNBOX* blob, bool desperate);
|
||||
void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate,
|
||||
BLOBNBOX* blob);
|
||||
|
||||
// Checks the left or right side of the given leader partition and sets the
|
||||
// (opposite) leader_on_right or leader_on_left flags for blobs
|
||||
@ -218,7 +222,8 @@ class StrokeWidth : public BlobGrid {
|
||||
// the components, saves the skew_angle and returns PFR_SKEW.] If the return
|
||||
// is not PFR_OK, the job is incomplete, and FindInitialPartitions must be
|
||||
// called again after cleaning up the partly done work.
|
||||
PartitionFindResult FindInitialPartitions(const FCOORD& rerotation,
|
||||
PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode,
|
||||
const FCOORD& rerotation,
|
||||
bool find_problems, TO_BLOCK* block,
|
||||
BLOBNBOX_LIST* diacritic_blobs,
|
||||
ColPartitionGrid* part_grid,
|
||||
@ -269,17 +274,38 @@ class StrokeWidth : public BlobGrid {
|
||||
ColPartition_LIST* big_parts);
|
||||
|
||||
// All remaining unused blobs are put in individual ColPartitions.
|
||||
void PartitionRemainingBlobs(ColPartitionGrid* part_grid);
|
||||
void PartitionRemainingBlobs(PageSegMode pageseg_mode,
|
||||
ColPartitionGrid* part_grid);
|
||||
|
||||
// If combine, put all blobs in the cell_list into a single partition,
|
||||
// otherwise put each one into its own partition.
|
||||
void MakePartitionsFromCellList(bool combine,
|
||||
void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine,
|
||||
ColPartitionGrid* part_grid,
|
||||
BLOBNBOX_CLIST* cell_list);
|
||||
|
||||
// Helper function to finish setting up a ColPartition and insert into
|
||||
// part_grid.
|
||||
void CompletePartition(ColPartition* part, ColPartitionGrid* part_grid);
|
||||
void CompletePartition(PageSegMode pageseg_mode, ColPartition* part,
|
||||
ColPartitionGrid* part_grid);
|
||||
|
||||
// Helper returns true if we are looking only for vertical textlines,
|
||||
// taking into account any rotation that has been done.
|
||||
bool FindingVerticalOnly(PageSegMode pageseg_mode) const {
|
||||
if (rerotation_.y() == 0.0f) {
|
||||
return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
|
||||
}
|
||||
return !PSM_ORIENTATION_ENABLED(pageseg_mode) &&
|
||||
pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
|
||||
}
|
||||
// Helper returns true if we are looking only for horizontal textlines,
|
||||
// taking into account any rotation that has been done.
|
||||
bool FindingHorizontalOnly(PageSegMode pageseg_mode) const {
|
||||
if (rerotation_.y() == 0.0f) {
|
||||
return !PSM_ORIENTATION_ENABLED(pageseg_mode) &&
|
||||
pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
|
||||
}
|
||||
return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
|
||||
}
|
||||
|
||||
// Merge partitions where the merge appears harmless.
|
||||
void EasyMerges(ColPartitionGrid* part_grid);
|
||||
|
Loading…
Reference in New Issue
Block a user