Deleted lots of dead code, including PBLOB

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@560 d0cd1f9f-072b-0410-8dd7-cf729c803f20
2025-08-06 13:56:47 +08:00 · 2011-03-18 21:53:35 +00:00 · 2011-03-18 21:53:35 +00:00 · 7cd3c74419
commit 7cd3c74419
parent df738bb9a4
4 changed files with 9 additions and 50 deletions
--- a/dict/dict.cpp
+++ b/dict/dict.cpp
@ -75,21 +75,12 @@ Dict::Dict(Image* image_ptr)
                 getImage()->getCCUtil()->params()),
      INT_MEMBER(hyphen_debug_level, 0, "Debug level for hyphenated words.",
                 getImage()->getCCUtil()->params()),
-      INT_MEMBER(ngram_permuter_debug_level, 0,
-                 "Debug level for the ngram permuter.",
-                 getImage()->getCCUtil()->params()),
-      double_MEMBER(ngram_permuter_nonmatch_score, -40.0,
-                    "Average classifier score of a non-matching unichar.",
-                    getImage()->getCCUtil()->params()),
      INT_MEMBER(max_viterbi_list_size, 10, "Maximum size of viterbi list.",
                 getImage()->getCCUtil()->params()),
      BOOL_MEMBER(use_only_first_uft8_step, false,
                  "Use only the first UTF8 step of the given string"
                  " when computing log probabilities.",
                  getImage()->getCCUtil()->params()),
-      double_MEMBER(ngram_model_scale_factor, 1.0, "Relative strength of the"
-                    " ngram model relative to the character classifier ",
-                    getImage()->getCCUtil()->params()),
      double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
                    getImage()->getCCUtil()->params()),
      double_MEMBER(stopper_nondict_certainty_base, -2.50,
--- a/dict/dict.h
+++ b/dict/dict.h
@ -451,13 +451,6 @@ class Dict {
  /// Returns true if the word looks like an absolute garbage
  /// (e.g. image mistakenly recognized as text).
  bool absolute_garbage(const WERD_CHOICE &word, const UNICHARSET &unicharset);
-  /* permngram.cpp ***********************************************************/
-  WERD_CHOICE *ngram_permute_and_select(
-      const BLOB_CHOICE_LIST_VECTOR &char_choices,
-      float rating_limit, float adjust_factor);
-  float compute_ngram_cost(float certainty, float denom,
-                         const char *str, const char *context,
-                         const UNICHARSET &unicharset);

  /* dict.cpp ****************************************************************/

@ -532,8 +525,6 @@ class Dict {
  int def_letter_is_okay(void* void_dawg_args,
                         UNICHAR_ID unichar_id, bool word_end);

-  int new_letter_is_okay(void* void_dawg_args,
-                         UNICHAR_ID unichar_id, bool word_end);
  int (Dict::*letter_is_okay_)(void* void_dawg_args,
                               UNICHAR_ID unichar_id, bool word_end);
  /// Calls letter_is_okay_ member function.
@ -544,7 +535,8 @@ class Dict {


  /// Probability in context function used by the ngram permuter.
-  double (Dict::*probability_in_context_)(const char* context,
+  double (Dict::*probability_in_context_)(const char* lang,
+                                          const char* context,
                                          int context_bytes,
                                          const char* character,
                                          int character_bytes);
@ -553,13 +545,15 @@ class Dict {
                              int context_bytes,
                              const char* character,
                              int character_bytes) {
-    return (this->*probability_in_context_)(context, context_bytes,
-                                            character, character_bytes);
+    return (this->*probability_in_context_)(
+        getImage()->getCCUtil()->lang.string(),
+        context, context_bytes,
+        character, character_bytes);
  }

  /// Default (no-op) implementation of probability in context function.
  double def_probability_in_context(
-      const char* context, int context_bytes,
+      const char* lang, const char* context, int context_bytes,
      const char* character, int character_bytes) {
    (void) context;
    (void) context_bytes;
@ -567,7 +561,8 @@ class Dict {
    (void) character_bytes;
    return 0.0;
  }
-  double ngram_probability_in_context(const char* context,
+  double ngram_probability_in_context(const char* lang,
+                                      const char* context,
                                      int context_bytes,
                                      const char* character,
                                      int character_bytes);
@ -779,16 +774,10 @@ class Dict {
  INT_VAR_H(dawg_debug_level, 0, "Set to 1 for general debug info"
            ", to 2 for more details, to 3 to see all the debug messages");
  INT_VAR_H(hyphen_debug_level, 0, "Debug level for hyphenated words.");
-  INT_VAR_H(ngram_permuter_debug_level, 0,
-            "Debug level for the ngram permuter.");
-  double_VAR_H(ngram_permuter_nonmatch_score, -40.0,
-               "Average classifier score of a non-matching unichar.");
  INT_VAR_H(max_viterbi_list_size, 10, "Maximum size of viterbi list.");
  BOOL_VAR_H(use_only_first_uft8_step, false,
             "Use only the first UTF8 step of the given string"
             " when computing log probabilities.");
-  double_VAR_H(ngram_model_scale_factor, 1.0, "Relative strength of the"
-               " ngram model relative to the character classifier ");
  double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor");
  double_VAR_H(stopper_nondict_certainty_base, -2.50,
               "Certainty threshold for non-dict words");
--- a/dict/permute.cpp
+++ b/dict/permute.cpp
@ -49,7 +49,6 @@
 #include "image.h"
 #include "globals.h"
 #include "ndminx.h"
-#include "permngram.h"
 #include "ratngs.h"
 #include "stopper.h"
 #include "tprintf.h"
@ -365,15 +364,6 @@ WERD_CHOICE *Dict::permute_all(const BLOB_CHOICE_LIST_VECTOR &char_choices,
    LogNewChoice(adjust_factor, certainties, false, result2);
    result1 = get_best_delete_other(result1, result2);

-    if (ngram_permuter_activated &&
-        (best_choice->rating() == WERD_CHOICE::kBadRating ||
-         best_choice->permuter() == TOP_CHOICE_PERM) &&
-        result1->permuter() == TOP_CHOICE_PERM) {
-      result2 = ngram_permute_and_select(char_choices, best_choice->rating(),
-                                         segment_reward_ngram_best_choice);
-      result1 = get_best_delete_other(result1, result2);
-    }
-
    if (segment_segcost_rating) incorporate_segcost(result1);
  } else {
    result1 = permute_top_choice(char_choices, &top_choice_rating_limit,
@ -402,14 +392,6 @@ WERD_CHOICE *Dict::permute_all(const BLOB_CHOICE_LIST_VECTOR &char_choices,

    result2 = permute_compound_words(char_choices, best_choice->rating());
    result1 = get_best_delete_other(result1, result2);
-
-    if (ngram_permuter_activated &&
-        best_choice->permuter() < SYSTEM_DAWG_PERM &&
-        result1->permuter() < SYSTEM_DAWG_PERM) {
-      result2 = ngram_permute_and_select(char_choices, best_choice->rating(),
-                                         segment_penalty_ngram_best_choice);
-      result1 = get_best_delete_other(result1, result2);
-    }
  }
  return result1;
 }
--- a/dict/permute.h
+++ b/dict/permute.h
@ -65,9 +65,6 @@ extern double_VAR_H(segment_reward_ngram_best_choice, 0.99,
                    "Score multipler for ngram permuter's best choice"
                    " (only used in the Han script path).");

-extern BOOL_VAR_H(ngram_permuter_activated, false,
-                  "Activate character-level n-gram-based permuter");
-
 extern INT_VAR_H(max_permuter_attempts, 100000,
                 "Maximum number of different character choices to consider"
                 " during permutation. This limit is especially useful when"