Merge pull request #2305 from stweil/fuzz

Fix Heap-buffer-overflow in GenericVector<int>::size (issue #2298)
2024-11-27 20:59:36 +08:00 · 2019-03-10 16:36:26 +01:00 · 2019-03-10 16:36:26 +01:00 · 0e72733121
commit 0e72733121
parent b7279f6d67 71d4990c6d
2 changed files with 14 additions and 7 deletions
--- a/src/dict/dict.h
+++ b/src/dict/dict.h
@ -107,8 +107,10 @@ class Dict {

  // Returns true if unichar_id is a word compounding character like - or /.
  inline bool compound_marker(UNICHAR_ID unichar_id) {
+    const UNICHARSET& unicharset = getUnicharset();
+    ASSERT_HOST(unicharset.contains_unichar_id(unichar_id));
    const GenericVector<UNICHAR_ID>& normed_ids =
-        getUnicharset().normed_ids(unichar_id);
+        unicharset.normed_ids(unichar_id);
    return normed_ids.size() == 1 &&
        (normed_ids[0] == hyphen_unichar_id_ ||
         normed_ids[0] == slash_unichar_id_);
@ -116,8 +118,10 @@ class Dict {
  // Returns true if unichar_id is an apostrophe-like character that may
  // separate prefix/suffix words from a main body word.
  inline bool is_apostrophe(UNICHAR_ID unichar_id) {
+    const UNICHARSET& unicharset = getUnicharset();
+    ASSERT_HOST(unicharset.contains_unichar_id(unichar_id));
    const GenericVector<UNICHAR_ID>& normed_ids =
-        getUnicharset().normed_ids(unichar_id);
+        unicharset.normed_ids(unichar_id);
    return normed_ids.size() == 1 && normed_ids[0] == apostrophe_unichar_id_;
  }

@ -141,17 +145,20 @@ class Dict {
    }
  }
  /// Check whether the word has a hyphen at the end.
-  inline bool has_hyphen_end(UNICHAR_ID unichar_id, bool first_pos) const {
+  inline bool has_hyphen_end(const UNICHARSET* unicharset,
+                             UNICHAR_ID unichar_id, bool first_pos) const {
    if (!last_word_on_line_ || first_pos)
      return false;
+    ASSERT_HOST(unicharset->contains_unichar_id(unichar_id));
    const GenericVector<UNICHAR_ID>& normed_ids =
-        getUnicharset().normed_ids(unichar_id);
+        unicharset->normed_ids(unichar_id);
    return normed_ids.size() == 1 && normed_ids[0] == hyphen_unichar_id_;
  }
  /// Same as above, but check the unichar at the end of the word.
  inline bool has_hyphen_end(const WERD_CHOICE &word) const {
    int word_index = word.length() - 1;
-    return has_hyphen_end(word.unichar_id(word_index), word_index == 0);
+    return has_hyphen_end(word.unicharset(), word.unichar_id(word_index),
+                          word_index == 0);
  }
  /// Unless the previous word was the last one on the line, and the current
  /// one is not (thus it is the first one on the line), erase hyphen_word_,
--- a/src/wordrec/language_model.cpp
+++ b/src/wordrec/language_model.cpp
@ -3,7 +3,6 @@
 // Description: Functions that utilize the knowledge about the properties,
 //              structure and statistics of the language to help recognition.
 // Author:      Daria Antonova
-// Created:     Mon Nov 11 11:26:43 PST 2009
 //
 // (C) Copyright 2009, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -803,7 +802,8 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(
  }

  // Deal with hyphenated words.
-  if (word_end && dict_->has_hyphen_end(b.unichar_id(), curr_col == 0)) {
+  if (word_end && dict_->has_hyphen_end(&dict_->getUnicharset(),
+                                        b.unichar_id(), curr_col == 0)) {
    if (language_model_debug_level > 0) tprintf("Hyphenated word found\n");
    return new LanguageModelDawgInfo(dawg_args_.active_dawgs, COMPOUND_PERM);
  }