Merge pull request #2305 from stweil/fuzz

Fix Heap-buffer-overflow in GenericVector<int>::size (issue #2298)
This commit is contained in:
zdenop 2019-03-10 16:36:26 +01:00 committed by GitHub
commit 0e72733121
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 7 deletions

View File

@ -107,8 +107,10 @@ class Dict {
// Returns true if unichar_id is a word compounding character like - or /.
inline bool compound_marker(UNICHAR_ID unichar_id) {
const UNICHARSET& unicharset = getUnicharset();
ASSERT_HOST(unicharset.contains_unichar_id(unichar_id));
const GenericVector<UNICHAR_ID>& normed_ids =
getUnicharset().normed_ids(unichar_id);
unicharset.normed_ids(unichar_id);
return normed_ids.size() == 1 &&
(normed_ids[0] == hyphen_unichar_id_ ||
normed_ids[0] == slash_unichar_id_);
@ -116,8 +118,10 @@ class Dict {
// Returns true if unichar_id is an apostrophe-like character that may
// separate prefix/suffix words from a main body word.
inline bool is_apostrophe(UNICHAR_ID unichar_id) {
const UNICHARSET& unicharset = getUnicharset();
ASSERT_HOST(unicharset.contains_unichar_id(unichar_id));
const GenericVector<UNICHAR_ID>& normed_ids =
getUnicharset().normed_ids(unichar_id);
unicharset.normed_ids(unichar_id);
return normed_ids.size() == 1 && normed_ids[0] == apostrophe_unichar_id_;
}
@ -141,17 +145,20 @@ class Dict {
}
}
/// Check whether the word has a hyphen at the end.
inline bool has_hyphen_end(UNICHAR_ID unichar_id, bool first_pos) const {
inline bool has_hyphen_end(const UNICHARSET* unicharset,
UNICHAR_ID unichar_id, bool first_pos) const {
if (!last_word_on_line_ || first_pos)
return false;
ASSERT_HOST(unicharset->contains_unichar_id(unichar_id));
const GenericVector<UNICHAR_ID>& normed_ids =
getUnicharset().normed_ids(unichar_id);
unicharset->normed_ids(unichar_id);
return normed_ids.size() == 1 && normed_ids[0] == hyphen_unichar_id_;
}
/// Same as above, but check the unichar at the end of the word.
inline bool has_hyphen_end(const WERD_CHOICE &word) const {
int word_index = word.length() - 1;
return has_hyphen_end(word.unichar_id(word_index), word_index == 0);
return has_hyphen_end(word.unicharset(), word.unichar_id(word_index),
word_index == 0);
}
/// Unless the previous word was the last one on the line, and the current
/// one is not (thus it is the first one on the line), erase hyphen_word_,

View File

@ -3,7 +3,6 @@
// Description: Functions that utilize the knowledge about the properties,
// structure and statistics of the language to help recognition.
// Author: Daria Antonova
// Created: Mon Nov 11 11:26:43 PST 2009
//
// (C) Copyright 2009, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
@ -803,7 +802,8 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(
}
// Deal with hyphenated words.
if (word_end && dict_->has_hyphen_end(b.unichar_id(), curr_col == 0)) {
if (word_end && dict_->has_hyphen_end(&dict_->getUnicharset(),
b.unichar_id(), curr_col == 0)) {
if (language_model_debug_level > 0) tprintf("Hyphenated word found\n");
return new LanguageModelDawgInfo(dawg_args_.active_dawgs, COMPOUND_PERM);
}