diff --git a/src/training/validate_indic.cpp b/src/training/validate_indic.cpp index d633d570..7f709b92 100644 --- a/src/training/validate_indic.cpp +++ b/src/training/validate_indic.cpp @@ -120,7 +120,7 @@ bool ValidateIndic::ConsumeViramaIfValid(IndicPair joiner, bool post_matra) { ASSERT_HOST(!CodeOnlyToOutput()); } else { // Half-form with optional Nukta. - int len = output_.size() + 1 - output_used_; + unsigned len = output_.size() + 1 - output_used_; if (UseMultiCode(len)) return true; } if (codes_used_ < num_codes && @@ -179,7 +179,7 @@ bool ValidateIndic::ConsumeConsonantHeadIfValid() { CodeOnlyToOutput(); // Special Sinhala case of [H Z Yayana/Rayana]. int index = output_.size() - 3; - if (output_used_ <= index && + if (output_used_ + 3 <= output_.size() && (output_.back() == kYayana || output_.back() == kRayana) && IsVirama(output_[index]) && output_[index + 1] == kZeroWidthJoiner) { MultiCodePart(3); @@ -192,7 +192,7 @@ bool ValidateIndic::ConsumeConsonantHeadIfValid() { } // Test for subscript conjunct. index = output_.size() - 2 - have_nukta; - if (output_used_ <= index && IsSubscriptScript() && + if (output_used_ + 2 + have_nukta <= output_.size() && IsSubscriptScript() && IsVirama(output_[index])) { // Output previous virama, consonant + optional nukta. MultiCodePart(2 + have_nukta); diff --git a/src/training/validate_javanese.cpp b/src/training/validate_javanese.cpp index acd6e728..410cf540 100644 --- a/src/training/validate_javanese.cpp +++ b/src/training/validate_javanese.cpp @@ -2,7 +2,6 @@ * File: validate_javanese.cpp * Description: Text validator for Javanese Script - aksara jawa. * Author: Shree Devi Kumar - * Created: August 03, 2018 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -90,7 +89,7 @@ bool ValidateJavanese::ConsumeViramaIfValid(IndicPair joiner, bool post_matra) { ASSERT_HOST(!CodeOnlyToOutput()); } else { // Half-form with optional Nukta. - int len = output_.size() + 1 - output_used_; + unsigned len = output_.size() + 1 - output_used_; if (UseMultiCode(len)) return true; } if (codes_used_ < num_codes && @@ -149,7 +148,7 @@ bool ValidateJavanese::ConsumeConsonantHeadIfValid() { CodeOnlyToOutput(); // Special Sinhala case of [H Z Yayana/Rayana]. int index = output_.size() - 3; - if (output_used_ <= index && + if (output_used_ + 3 <= output_.size() && (output_.back() == kPengkal || output_.back() == kCakra) && IsVirama(output_[index]) && output_[index + 1] == kZeroWidthJoiner) { MultiCodePart(3); @@ -162,7 +161,7 @@ bool ValidateJavanese::ConsumeConsonantHeadIfValid() { } // Test for subscript conjunct. index = output_.size() - 2 - have_nukta; - if (output_used_ <= index && IsSubscriptScript() && + if (output_used_ + 2 + have_nukta <= output_.size() && IsSubscriptScript() && IsVirama(output_[index])) { // Output previous virama, consonant + optional nukta. MultiCodePart(2 + have_nukta); diff --git a/src/training/validate_khmer.cpp b/src/training/validate_khmer.cpp index 1cc607fc..c830d624 100644 --- a/src/training/validate_khmer.cpp +++ b/src/training/validate_khmer.cpp @@ -45,7 +45,7 @@ bool ValidateKhmer::ConsumeGraphemeIfValid() { if (UseMultiCode(1)) return true; } } - int num_matra_parts = 0; + unsigned num_matra_parts = 0; if (codes_[codes_used_].second == kZeroWidthJoiner || codes_[codes_used_].second == kZeroWidthNonJoiner) { if (CodeOnlyToOutput()) { diff --git a/src/training/validator.h b/src/training/validator.h index 6d8f36f4..81e8f06d 100644 --- a/src/training/validator.h +++ b/src/training/validator.h @@ -3,7 +3,6 @@ * Description: Base class for various text validators. Intended mainly for * scripts that use a virama character. * Author: Ray Smith - * Created: Tue May 23 2017 * * (C) Copyright 2017, Google Inc. * Licensed under the Apache License, Version 2.0 (the "License"); @@ -179,7 +178,7 @@ class Validator { // output_, adds unicodes as single-element vectors to parts_ to catch // output_used_ up to output->size() - length before adding the length-element // vector. - void MultiCodePart(int length) { + void MultiCodePart(unsigned length) { while (output_used_ + length < output_.size()) { parts_.emplace_back( std::initializer_list{output_[output_used_++]}); @@ -193,7 +192,7 @@ class Validator { // Helper function appends the next element of codes_ to output_, and then // calls MultiCodePart to add the appropriate components to parts_. // Returns true at the end of codes_. - bool UseMultiCode(int length) { + bool UseMultiCode(unsigned length) { output_.push_back(codes_[codes_used_].second); MultiCodePart(length); return ++codes_used_ == codes_.size();