Fixed blob division params to fix CJK training speed.

This commit is contained in:
Ray Smith 2015-06-12 10:59:26 -07:00
parent 4c7ab0caea
commit d74c625e52
3 changed files with 128 additions and 114 deletions

View File

@ -33,12 +33,14 @@
namespace tesseract {
Classify::Classify()
: BOOL_MEMBER(prioritize_division, FALSE,
: BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping",
this->params()),
BOOL_MEMBER(prioritize_division, FALSE,
"Prioritize blob division over chopping", this->params()),
INT_MEMBER(tessedit_single_match, FALSE,
"Top choice only from CP", this->params()),
BOOL_MEMBER(classify_enable_learning, true,
"Enable adaptive classifier", this->params()),
INT_MEMBER(tessedit_single_match, FALSE, "Top choice only from CP",
this->params()),
BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier",
this->params()),
INT_MEMBER(classify_debug_level, 0, "Classify debug level",
this->params()),
INT_MEMBER(classify_norm_method, character, "Normalization Method ...",
@ -47,11 +49,13 @@ Classify::Classify()
"Character Normalization Range ...", this->params()),
double_MEMBER(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...",
this->params()), /* PREV DEFAULT 0.1 */
double_MEMBER(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ...",
double_MEMBER(classify_max_norm_scale_x, 0.325,
"Max char x-norm scale ...",
this->params()), /* PREV DEFAULT 0.3 */
double_MEMBER(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...",
this->params()), /* PREV DEFAULT 0.1 */
double_MEMBER(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...",
double_MEMBER(classify_max_norm_scale_y, 0.325,
"Max char y-norm scale ...",
this->params()), /* PREV DEFAULT 0.3 */
double_MEMBER(classify_max_rating_ratio, 1.5,
"Veto ratio between classifier ratings", this->params()),
@ -63,8 +67,7 @@ Classify::Classify()
BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching",
this->params()),
BOOL_MEMBER(classify_enable_adaptive_matcher, 1,
"Enable adaptive classifier",
this->params()),
"Enable adaptive classifier", this->params()),
BOOL_MEMBER(classify_use_pre_adapted_templates, 0,
"Use pre-adapted classifier templates", this->params()),
BOOL_MEMBER(classify_save_adapted_templates, 0,
@ -79,7 +82,7 @@ Classify::Classify()
this->params()),
double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)",
this->params()),
double_MEMBER(matcher_great_threshold, 0.0, "Great Match (0-1)",
double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)",
this->params()),
double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)",
this->params()),
@ -108,7 +111,8 @@ Classify::Classify()
this->params()),
double_MEMBER(tessedit_class_miss_scale, 0.00390625,
"Scale factor for features not used", this->params()),
double_MEMBER(classify_adapted_pruning_factor, 2.5,
double_MEMBER(
classify_adapted_pruning_factor, 2.5,
"Prune poor adapted results this much worse than best result",
this->params()),
double_MEMBER(classify_adapted_pruning_threshold, -1.0,
@ -122,16 +126,20 @@ Classify::Classify()
this->params()),
BOOL_MEMBER(disable_character_fragments, TRUE,
"Do not include character fragments in the"
" results of the classifier", this->params()),
" results of the classifier",
this->params()),
double_MEMBER(classify_character_fragments_garbage_certainty_threshold,
-3.0, "Exclude fragments that do not look like whole"
" characters from training and adaption", this->params()),
-3.0,
"Exclude fragments that do not look like whole"
" characters from training and adaption",
this->params()),
BOOL_MEMBER(classify_debug_character_fragments, FALSE,
"Bring up graphical debugging windows for fragments training",
this->params()),
BOOL_MEMBER(matcher_debug_separate_windows, FALSE,
"Use two different windows for debugging the matching: "
"One for the protos and one for the features.", this->params()),
"One for the protos and one for the features.",
this->params()),
STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning",
this->params()),
INT_MEMBER(classify_class_pruner_threshold, 229,

View File

@ -374,6 +374,12 @@ class Classify : public CCStruct {
// Member variables.
// Parameters.
// Set during training (in lang.config) to indicate whether the divisible
// blobs chopper should be used (true for latin script.)
BOOL_VAR_H(allow_blob_division, true, "Use divisible blobs chopping");
// Set during training (in lang.config) to indicate whether the divisible
// blobs chopper should be used in preference to chopping. Set to true for
// southern Indic scripts.
BOOL_VAR_H(prioritize_division, FALSE,
"Prioritize blob division over chopping");
INT_VAR_H(tessedit_single_match, FALSE, "Top choice only from CP");

View File

@ -200,7 +200,7 @@ SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, inT32 blob_number,
if (seam == NULL) {
if (repair_unchopped_blobs)
restore_outline_tree(blob->outlines);
if (word->latin_script) {
if (allow_blob_division && !prioritize_division) {
// If the blob can simply be divided into outlines, then do that.
TPOINT location;
if (divisible_blob(blob, italic_blob, &location)) {