From 449f1cd4ba17534483e181d0f502688d991d6d42 Mon Sep 17 00:00:00 2001 From: Shree Date: Mon, 25 Feb 2019 18:47:42 +0000 Subject: [PATCH] Remove test for Word started with a combiner --- src/training/validate_grapheme.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/training/validate_grapheme.cpp b/src/training/validate_grapheme.cpp index 759e6e6cb..4a598bf3d 100644 --- a/src/training/validate_grapheme.cpp +++ b/src/training/validate_grapheme.cpp @@ -15,11 +15,12 @@ bool ValidateGrapheme::ConsumeGraphemeIfValid() { char32 ch = codes_[codes_used_].second; const bool is_combiner = cc == CharClass::kCombiner || cc == CharClass::kVirama; - // Reject easily detected badly formed sequences. - if (prev_cc == CharClass::kWhitespace && is_combiner) { - if (report_errors_) tprintf("Word started with a combiner:0x%x\n", ch); - return false; - } + // TODO: Reject easily detected badly formed sequences. + // https://github.com/tesseract-ocr/tesseract/pull/2266#issuecomment-467114751 + // if (prev_cc == CharClass::kWhitespace && is_combiner) { + // if (report_errors_) tprintf("Word started with a combiner:0x%x\n", ch); + // return false; + //} if (prev_cc == CharClass::kVirama && cc == CharClass::kVirama) { if (report_errors_) tprintf("Two grapheme links in a row:0x%x 0x%x\n", prev_ch, ch);