diff --git a/classify/normmatch.cpp b/classify/normmatch.cpp index 9dfe5a76..7d6016bc 100644 --- a/classify/normmatch.cpp +++ b/classify/normmatch.cpp @@ -94,7 +94,7 @@ FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId, PROTOTYPE *Proto; int ProtoId; - if (ClassId > NormProtos->NumProtos) { + if (ClassId >= NormProtos->NumProtos) { ClassId = NO_CLASS; } diff --git a/dict/trie.h b/dict/trie.h index 211e2f02..ba60b4e2 100644 --- a/dict/trie.h +++ b/dict/trie.h @@ -68,7 +68,7 @@ class Trie : public Dawg { }; // Minimum number of concrete characters at the beginning of user patterns. - static const int kSaneNumConcreteChars = 4; + static const int kSaneNumConcreteChars = 0; // Various unicode whitespace characters are used to denote unichar patterns, // (character classifier would never produce these whitespace characters as a // valid classification). diff --git a/tessdata/configs/bazaar b/tessdata/configs/bazaar new file mode 100644 index 00000000..1b2ee831 --- /dev/null +++ b/tessdata/configs/bazaar @@ -0,0 +1,4 @@ +load_system_dawg F +load_freq_dawg F +user_words_suffix user-words +user_patterns_suffix user-patterns diff --git a/tessdata/eng.user-patterns b/tessdata/eng.user-patterns new file mode 100644 index 00000000..5daba44d --- /dev/null +++ b/tessdata/eng.user-patterns @@ -0,0 +1,2 @@ +1-\d\d\d-GOOG-411 +www.\n\\\*.com diff --git a/tessdata/eng.user-words b/tessdata/eng.user-words new file mode 100644 index 00000000..e0c5a630 --- /dev/null +++ b/tessdata/eng.user-words @@ -0,0 +1,5 @@ +the +quick +brown +fox +jumped