From 53a3e0f88af8506cf174debf8318e5dbdeb960fa Mon Sep 17 00:00:00 2001 From: "zdenop@gmail.com" Date: Sun, 20 Oct 2013 20:20:10 +0000 Subject: [PATCH] fix issue 755; add example config files from tesseract manpage git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@894 d0cd1f9f-072b-0410-8dd7-cf729c803f20 --- classify/normmatch.cpp | 2 +- dict/trie.h | 2 +- tessdata/configs/bazaar | 4 ++++ tessdata/eng.user-patterns | 2 ++ tessdata/eng.user-words | 5 +++++ 5 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 tessdata/configs/bazaar create mode 100644 tessdata/eng.user-patterns create mode 100644 tessdata/eng.user-words diff --git a/classify/normmatch.cpp b/classify/normmatch.cpp index 9dfe5a76..7d6016bc 100644 --- a/classify/normmatch.cpp +++ b/classify/normmatch.cpp @@ -94,7 +94,7 @@ FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId, PROTOTYPE *Proto; int ProtoId; - if (ClassId > NormProtos->NumProtos) { + if (ClassId >= NormProtos->NumProtos) { ClassId = NO_CLASS; } diff --git a/dict/trie.h b/dict/trie.h index 211e2f02..ba60b4e2 100644 --- a/dict/trie.h +++ b/dict/trie.h @@ -68,7 +68,7 @@ class Trie : public Dawg { }; // Minimum number of concrete characters at the beginning of user patterns. - static const int kSaneNumConcreteChars = 4; + static const int kSaneNumConcreteChars = 0; // Various unicode whitespace characters are used to denote unichar patterns, // (character classifier would never produce these whitespace characters as a // valid classification). diff --git a/tessdata/configs/bazaar b/tessdata/configs/bazaar new file mode 100644 index 00000000..1b2ee831 --- /dev/null +++ b/tessdata/configs/bazaar @@ -0,0 +1,4 @@ +load_system_dawg F +load_freq_dawg F +user_words_suffix user-words +user_patterns_suffix user-patterns diff --git a/tessdata/eng.user-patterns b/tessdata/eng.user-patterns new file mode 100644 index 00000000..5daba44d --- /dev/null +++ b/tessdata/eng.user-patterns @@ -0,0 +1,2 @@ +1-\d\d\d-GOOG-411 +www.\n\\\*.com diff --git a/tessdata/eng.user-words b/tessdata/eng.user-words new file mode 100644 index 00000000..e0c5a630 --- /dev/null +++ b/tessdata/eng.user-words @@ -0,0 +1,5 @@ +the +quick +brown +fox +jumped