tesseract/textord/textord.h

404 lines
19 KiB
C
Raw Normal View History

///////////////////////////////////////////////////////////////////////
// File: textord.h
// Description: The Textord class definition gathers text line and word
// finding functionality.
// Author: Ray Smith
// Created: Fri Mar 13 14:29:01 PDT 2009
//
// (C) Copyright 2009, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_TEXTORD_TEXTORD_H_
#define TESSERACT_TEXTORD_TEXTORD_H_
#include "ccstruct.h"
#include "bbgrid.h"
#include "blobbox.h"
#include "gap_map.h"
#include "publictypes.h" // For PageSegMode.
class FCOORD;
class BLOCK_LIST;
class PAGE_RES;
class TO_BLOCK;
class TO_BLOCK_LIST;
class ScrollView;
namespace tesseract {
// A simple class that can be used by BBGrid to hold a word and an expanded
// bounding box that makes it easy to find words to put diacritics.
class WordWithBox {
public:
WordWithBox() : word_(NULL) {}
explicit WordWithBox(WERD *word)
: word_(word), bounding_box_(word->bounding_box()) {
int height = bounding_box_.height();
bounding_box_.pad(height, height);
}
const TBOX &bounding_box() const { return bounding_box_; }
// Returns the bounding box of only the good blobs.
TBOX true_bounding_box() const { return word_->true_bounding_box(); }
C_BLOB_LIST *RejBlobs() const { return word_->rej_cblob_list(); }
const WERD *word() const { return word_; }
private:
// Borrowed pointer to a real word somewhere that must outlive this class.
WERD *word_;
// Cached expanded bounding box of the word, padded all round by its height.
TBOX bounding_box_;
};
// Make it usable by BBGrid.
CLISTIZEH(WordWithBox)
typedef BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT> WordGrid;
typedef GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT> WordSearch;
class Textord {
public:
explicit Textord(CCStruct* ccstruct);
~Textord();
// Make the textlines and words inside each block.
// binary_pix is mandatory and is the binarized input after line removal.
// grey_pix is optional, but if present must match the binary_pix in size,
// and must be a *real* grey image instead of binary_pix * 255.
// thresholds_pix is expected to be present iff grey_pix is present and
// can be an integer factor reduction of the grey_pix. It represents the
// thresholds that were used to create the binary_pix from the grey_pix.
// diacritic_blobs contain small confusing components that should be added
// to the appropriate word(s) in case they are really diacritics.
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width,
int height, Pix *binary_pix, Pix *thresholds_pix,
Pix *grey_pix, bool use_box_bottoms,
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
TO_BLOCK_LIST *to_blocks);
// If we were supposed to return only a single textline, and there is more
// than one, clean up and leave only the best.
void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res);
bool use_cjk_fp_model() const {
return use_cjk_fp_model_;
}
void set_use_cjk_fp_model(bool flag) {
use_cjk_fp_model_ = flag;
}
// tospace.cpp ///////////////////////////////////////////
void to_spacing(
ICOORD page_tr, //topright of page
TO_BLOCK_LIST *blocks //blocks on page
);
ROW *make_prop_words(TO_ROW *row, // row to make
FCOORD rotation // for drawing
);
ROW *make_blob_words(TO_ROW *row, // row to make
FCOORD rotation // for drawing
);
// tordmain.cpp ///////////////////////////////////////////
void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on);
private:
// For underlying memory management and other utilities.
CCStruct* ccstruct_;
// The size of the input image.
ICOORD page_tr_;
bool use_cjk_fp_model_;
// makerow.cpp ///////////////////////////////////////////
// Make the textlines inside each block.
void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew,
int width, int height, TO_BLOCK_LIST* to_blocks);
// Make the textlines inside a single block.
void MakeBlockRows(int min_spacing, int max_spacing,
const FCOORD& skew, TO_BLOCK* block,
ScrollView* win);
public:
void compute_block_xheight(TO_BLOCK *block, float gradient);
void compute_row_xheight(TO_ROW *row, // row to do
const FCOORD& rotation,
float gradient, // global skew
int block_line_size);
void make_spline_rows(TO_BLOCK *block, // block to do
float gradient, // gradient to fit
BOOL8 testing_on);
private:
//// oldbasel.cpp ////////////////////////////////////////
void make_old_baselines(TO_BLOCK *block, // block to do
BOOL8 testing_on, // correct orientation
float gradient);
void correlate_lines(TO_BLOCK *block, float gradient);
void correlate_neighbours(TO_BLOCK *block, // block rows are in.
TO_ROW **rows, // rows of block.
int rowcount); // no of rows to do.
int correlate_with_stats(TO_ROW **rows, // rows of block.
int rowcount, // no of rows to do.
TO_BLOCK* block);
void find_textlines(TO_BLOCK *block, // block row is in
TO_ROW *row, // row to do
int degree, // required approximation
QSPLINE *spline); // starting spline
// tospace.cpp ///////////////////////////////////////////
//DEBUG USE ONLY
void block_spacing_stats(TO_BLOCK *block,
GAPMAP *gapmap,
BOOL8 &old_text_ord_proportional,
//resulting estimate
inT16 &block_space_gap_width,
//resulting estimate
inT16 &block_non_space_gap_width
);
void row_spacing_stats(TO_ROW *row,
GAPMAP *gapmap,
inT16 block_idx,
inT16 row_idx,
//estimate for block
inT16 block_space_gap_width,
//estimate for block
inT16 block_non_space_gap_width
);
void old_to_method(TO_ROW *row,
STATS *all_gap_stats,
STATS *space_gap_stats,
STATS *small_gap_stats,
inT16 block_space_gap_width,
//estimate for block
inT16 block_non_space_gap_width
);
BOOL8 isolated_row_stats(TO_ROW *row,
GAPMAP *gapmap,
STATS *all_gap_stats,
BOOL8 suspected_table,
inT16 block_idx,
inT16 row_idx);
inT16 stats_count_under(STATS *stats, inT16 threshold);
void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats);
BOOL8 make_a_word_break(TO_ROW *row, // row being made
TBOX blob_box, // for next_blob // how many blanks?
inT16 prev_gap,
TBOX prev_blob_box,
inT16 real_current_gap,
inT16 within_xht_current_gap,
TBOX next_blob_box,
inT16 next_gap,
uinT8 &blanks,
BOOL8 &fuzzy_sp,
BOOL8 &fuzzy_non,
BOOL8& prev_gap_was_a_space,
BOOL8& break_at_next_gap);
BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box);
BOOL8 wide_blob(TO_ROW *row, TBOX blob_box);
BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box);
void peek_at_next_gap(TO_ROW *row,
BLOBNBOX_IT box_it,
TBOX &next_blob_box,
inT16 &next_gap,
inT16 &next_within_xht_gap);
void mark_gap(TBOX blob, //blob following gap
inT16 rule, // heuristic id
inT16 prev_gap,
inT16 prev_blob_width,
inT16 current_gap,
inT16 next_blob_width,
inT16 next_gap);
float find_mean_blob_spacing(WERD *word);
BOOL8 ignore_big_gap(TO_ROW *row,
inT32 row_length,
GAPMAP *gapmap,
inT16 left,
inT16 right);
//get bounding box
TBOX reduced_box_next(TO_ROW *row, //current row
BLOBNBOX_IT *it //iterator to blobds
);
TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, inT16 *left_above_xht);
// tordmain.cpp ///////////////////////////////////////////
float filter_noise_blobs(BLOBNBOX_LIST *src_list,
BLOBNBOX_LIST *noise_list,
BLOBNBOX_LIST *small_list,
BLOBNBOX_LIST *large_list);
// Fixes the block so it obeys all the rules:
// Must have at least one ROW.
// Must have at least one WERD.
// WERDs contain a fake blob.
void cleanup_nontext_block(BLOCK* block);
void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks);
BOOL8 clean_noise_from_row(ROW *row);
void clean_noise_from_words(ROW *row);
// Remove outlines that are a tiny fraction in either width or height
// of the word height.
void clean_small_noise_from_words(ROW *row);
// Groups blocks by rotation, then, for each group, makes a WordGrid and calls
// TransferDiacriticsToWords to copy the diacritic blobs to the most
// appropriate words in the group of blocks. Source blobs are not touched.
void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs,
BLOCK_LIST* blocks);
// Places a copy of blobs that are near a word (after applying rotation to the
// blob) in the most appropriate word, unless there is doubt, in which case a
// blob can end up in two words. Source blobs are not touched.
void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs,
const FCOORD &rotation, WordGrid *word_grid);
public:
// makerow.cpp ///////////////////////////////////////////
BOOL_VAR_H(textord_single_height_mode, false,
"Script has no xheight, so use a single mode for horizontal text");
// tospace.cpp ///////////////////////////////////////////
BOOL_VAR_H(tosp_old_to_method, false, "Space stats use prechopping?");
BOOL_VAR_H(tosp_old_to_constrain_sp_kn, false,
"Constrain relative values of inter and intra-word gaps for "
"old_to_method.");
BOOL_VAR_H(tosp_only_use_prop_rows, true,
"Block stats to use fixed pitch rows?");
BOOL_VAR_H(tosp_force_wordbreak_on_punct, false,
"Force word breaks on punct to break long lines in non-space "
"delimited langs");
BOOL_VAR_H(tosp_use_pre_chopping, false,
"Space stats use prechopping?");
BOOL_VAR_H(tosp_old_to_bug_fix, false,
"Fix suspected bug in old code");
BOOL_VAR_H(tosp_block_use_cert_spaces, true,
"Only stat OBVIOUS spaces");
BOOL_VAR_H(tosp_row_use_cert_spaces, true,
"Only stat OBVIOUS spaces");
BOOL_VAR_H(tosp_narrow_blobs_not_cert, true,
"Only stat OBVIOUS spaces");
BOOL_VAR_H(tosp_row_use_cert_spaces1, true,
"Only stat OBVIOUS spaces");
BOOL_VAR_H(tosp_recovery_isolated_row_stats, true,
"Use row alone when inadequate cert spaces");
BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess");
BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?");
BOOL_VAR_H(tosp_fuzzy_limit_all, true,
"Don't restrict kn->sp fuzzy limit to tables");
BOOL_VAR_H(tosp_stats_use_xht_gaps, true,
"Use within xht gap for wd breaks");
BOOL_VAR_H(tosp_use_xht_gaps, true,
"Use within xht gap for wd breaks");
BOOL_VAR_H(tosp_only_use_xht_gaps, false,
"Only use within xht gap for wd breaks");
BOOL_VAR_H(tosp_rule_9_test_punct, false,
"Don't chng kn to space next to punct");
BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip");
BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip");
BOOL_VAR_H(tosp_improve_thresh, false,
"Enable improvement heuristic");
INT_VAR_H(tosp_debug_level, 0, "Debug data");
INT_VAR_H(tosp_enough_space_samples_for_median, 3,
"or should we use mean");
INT_VAR_H(tosp_redo_kern_limit, 10,
"No.samples reqd to reestimate for row");
INT_VAR_H(tosp_few_samples, 40,
"No.gaps reqd with 1 large gap to treat as a table");
INT_VAR_H(tosp_short_row, 20,
"No.gaps reqd with few cert spaces to use certs");
INT_VAR_H(tosp_sanity_method, 1, "How to avoid being silly");
double_VAR_H(tosp_old_sp_kn_th_factor, 2.0,
"Factor for defining space threshold in terms of space and "
"kern sizes");
double_VAR_H(tosp_threshold_bias1, 0,
"how far between kern and space?");
double_VAR_H(tosp_threshold_bias2, 0,
"how far between kern and space?");
double_VAR_H(tosp_narrow_fraction, 0.3,
"Fract of xheight for narrow");
double_VAR_H(tosp_narrow_aspect_ratio, 0.48,
"narrow if w/h less than this");
double_VAR_H(tosp_wide_fraction, 0.52, "Fract of xheight for wide");
double_VAR_H(tosp_wide_aspect_ratio, 0.0,
"wide if w/h less than this");
double_VAR_H(tosp_fuzzy_space_factor, 0.6,
"Fract of xheight for fuzz sp");
double_VAR_H(tosp_fuzzy_space_factor1, 0.5,
"Fract of xheight for fuzz sp");
double_VAR_H(tosp_fuzzy_space_factor2, 0.72,
"Fract of xheight for fuzz sp");
double_VAR_H(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern");
double_VAR_H(tosp_kern_gap_factor1, 2.0,
"gap ratio to flip kern->sp");
double_VAR_H(tosp_kern_gap_factor2, 1.3,
"gap ratio to flip kern->sp");
double_VAR_H(tosp_kern_gap_factor3, 2.5,
"gap ratio to flip kern->sp");
double_VAR_H(tosp_ignore_big_gaps, -1, "xht multiplier");
double_VAR_H(tosp_ignore_very_big_gaps, 3.5, "xht multiplier");
double_VAR_H(tosp_rep_space, 1.6, "rep gap multiplier for space");
double_VAR_H(tosp_enough_small_gaps, 0.65,
"Fract of kerns reqd for isolated row stats");
double_VAR_H(tosp_table_kn_sp_ratio, 2.25,
"Min difference of kn & sp in table");
double_VAR_H(tosp_table_xht_sp_ratio, 0.33,
"Expect spaces bigger than this");
double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0,
"Fuzzy if less than this");
double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");
double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");
double_VAR_H(tosp_min_sane_kn_sp, 1.5,
"Don't trust spaces less than this time kn");
double_VAR_H(tosp_init_guess_kn_mult, 2.2,
"Thresh guess - mult kn by this");
double_VAR_H(tosp_init_guess_xht_mult, 0.28,
"Thresh guess - mult xht by this");
double_VAR_H(tosp_max_sane_kn_thresh, 5.0,
"Multiplier on kn to limit thresh");
double_VAR_H(tosp_flip_caution, 0.0,
"Don't autoflip kn to sp when large separation");
double_VAR_H(tosp_large_kerning, 0.19,
"Limit use of xht gap with large kns");
double_VAR_H(tosp_dont_fool_with_small_kerns, -1,
"Limit use of xht gap with odd small kns");
double_VAR_H(tosp_near_lh_edge, 0,
"Don't reduce box if the top left is non blank");
double_VAR_H(tosp_silly_kn_sp_gap, 0.2,
"Don't let sp minus kn get too small");
double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75,
"How wide fuzzies need context");
// tordmain.cpp ///////////////////////////////////////////
BOOL_VAR_H(textord_no_rejects, false, "Don't remove noise blobs");
BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs");
BOOL_VAR_H(textord_show_boxes, false, "Display boxes");
INT_VAR_H(textord_max_noise_size, 7, "Pixel size of noise");
INT_VAR_H(textord_baseline_debug, 0, "Baseline debug level");
double_VAR_H(textord_blob_size_bigile, 95, "Percentile for large blobs");
double_VAR_H(textord_noise_area_ratio, 0.7,
"Fraction of bounding box for noise");
double_VAR_H(textord_blob_size_smallile, 20, "Percentile for small blobs");
double_VAR_H(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess");
double_VAR_H(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess");
INT_VAR_H(textord_noise_sizefraction, 10, "Fraction of size for maxima");
double_VAR_H(textord_noise_sizelimit, 0.5, "Fraction of x for big t count");
INT_VAR_H(textord_noise_translimit, 16, "Transitions for normal blob");
double_VAR_H(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion");
BOOL_VAR_H(textord_noise_rejwords, true, "Reject noise-like words");
BOOL_VAR_H(textord_noise_rejrows, true, "Reject noise-like rows");
double_VAR_H(textord_noise_syfract, 0.2, "xh fract error for norm blobs");
double_VAR_H(textord_noise_sxfract, 0.4,
"xh fract width error for norm blobs");
double_VAR_H(textord_noise_hfract, 1.0/64,
"Height fraction to discard outlines as speckle noise");
INT_VAR_H(textord_noise_sncount, 1, "super norm blobs to save row");
double_VAR_H(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion");
BOOL_VAR_H(textord_noise_debug, FALSE, "Debug row garbage detector");
double_VAR_H(textord_blshift_maxshift, 0.00, "Max baseline shift");
double_VAR_H(textord_blshift_xfraction, 9.99, "Min size of baseline shift");
};
} // namespace tesseract.
#endif // TESSERACT_TEXTORD_TEXTORD_H_