From 18c67f49891c264187189257c6868876bf53dbcd Mon Sep 17 00:00:00 2001 From: zhuangzhuang1988 Date: Mon, 8 Jul 2019 14:35:17 +0800 Subject: [PATCH] fix tesstrain.py error --- src/training/tesstrain_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/training/tesstrain_utils.py b/src/training/tesstrain_utils.py index 0b70d9b9..1877fc92 100644 --- a/src/training/tesstrain_utils.py +++ b/src/training/tesstrain_utils.py @@ -380,7 +380,7 @@ def phase_I_generate_image(ctx, par_factor=None): # for tesseract to recognize during training. Take only the ngrams whose # combined weight accounts for 95% of all the bigrams in the language. lines = pathlib.Path(ctx.bigram_freqs_file).read_text(encoding="utf-8").split("\n") - records = (line.split(" ") for line in lines) + records = (line.split() for line in lines) p = 0.99 ngram_frac = p * sum(int(rec[1]) for rec in records if len(rec) >= 2)