From 7f31a0634d85d12ce11989af2182b2ad4195f954 Mon Sep 17 00:00:00 2001 From: Bharat123rox Date: Tue, 21 May 2019 23:24:50 +0530 Subject: [PATCH 1/3] Some LGTM fixes and potential bugfixes --- src/ccutil/unicharmap.cpp | 2 +- src/classify/trainingsampleset.cpp | 1 + src/training/tesstrain_utils.py | 14 ++++++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/ccutil/unicharmap.cpp b/src/ccutil/unicharmap.cpp index 03ff4d4dc..254ce7bbf 100644 --- a/src/ccutil/unicharmap.cpp +++ b/src/ccutil/unicharmap.cpp @@ -39,7 +39,7 @@ UNICHAR_ID UNICHARMAP::unichar_to_id(const char* const unichar_repr, assert(length > 0 && length <= UNICHAR_LEN); int index = 0; - if (index >= length || unichar_repr[index] == '\0') return INVALID_UNICHAR_ID; + if (unichar_repr[index] == '\0') return INVALID_UNICHAR_ID; do { if (index + 1 >= length || unichar_repr[index + 1] == '\0') return current_nodes[static_cast(unichar_repr[index])].id; diff --git a/src/classify/trainingsampleset.cpp b/src/classify/trainingsampleset.cpp index 620b52fa0..058307c08 100644 --- a/src/classify/trainingsampleset.cpp +++ b/src/classify/trainingsampleset.cpp @@ -618,6 +618,7 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap& map, if (dist > max_dist) { max_dist = dist; if (dist > max_max_dist) { + max_max_dist = dist; max_s1 = s1; max_s2 = s2; } diff --git a/src/training/tesstrain_utils.py b/src/training/tesstrain_utils.py index f7ba62bbb..fbcb7a477 100644 --- a/src/training/tesstrain_utils.py +++ b/src/training/tesstrain_utils.py @@ -56,6 +56,16 @@ class TrainingArgs(argparse.Namespace): self.extract_font_properties = True self.distort_image = False + def __eq__(self, other): + return (argparse.Namespace.__eq__(self, other) and + self.uname = other.uname and self.lang_code = other.lang_code and + self.timestamp = other.timestamp and self.font_config_cache = other.font_config_cache and + self.fonts_dir = other.fonts_dir and self.max_pages = other.max_pages and + self.save_box_tiff = other.save_box_tiff and self.overwrite = other.overwrite and + self.linedata = other.linedata and self.run_shape_clustering = other.run_shape_clustering and + self.extract_font_properties = other.extract_font_properties and + self.distort_image = other.distort_image) + def err_exit(msg): log.critical(msg) @@ -356,7 +366,7 @@ def generate_font_image(ctx, font, exposure, char_spacing): # Phase I : Generate (I)mages from training text for each font. -def phase_I_generate_image(ctx, par_factor): +def phase_I_generate_image(ctx, par_factor=None): if not par_factor or par_factor <= 0: par_factor = 1 @@ -386,7 +396,7 @@ def phase_I_generate_image(ctx, par_factor): with tqdm( total=len(ctx.fonts) - ) as pbar, concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor: + ) as pbar, concurrent.futures.ThreadPoolExecutor(max_workers=par_factor) as executor: futures = [ executor.submit(generate_font_image, ctx, font, exposure, char_spacing) for font in ctx.fonts From 945ccac85a48df2b746649cfa8c7a2529611e44c Mon Sep 17 00:00:00 2001 From: Bharat123rox Date: Wed, 22 May 2019 10:10:12 +0530 Subject: [PATCH 2/3] Fix syntax error --- src/training/tesstrain_utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/training/tesstrain_utils.py b/src/training/tesstrain_utils.py index fbcb7a477..b7e638de4 100644 --- a/src/training/tesstrain_utils.py +++ b/src/training/tesstrain_utils.py @@ -58,13 +58,13 @@ class TrainingArgs(argparse.Namespace): def __eq__(self, other): return (argparse.Namespace.__eq__(self, other) and - self.uname = other.uname and self.lang_code = other.lang_code and - self.timestamp = other.timestamp and self.font_config_cache = other.font_config_cache and - self.fonts_dir = other.fonts_dir and self.max_pages = other.max_pages and - self.save_box_tiff = other.save_box_tiff and self.overwrite = other.overwrite and - self.linedata = other.linedata and self.run_shape_clustering = other.run_shape_clustering and - self.extract_font_properties = other.extract_font_properties and - self.distort_image = other.distort_image) + self.uname == other.uname and self.lang_code == other.lang_code and + self.timestamp == other.timestamp and self.font_config_cache == other.font_config_cache and + self.fonts_dir == other.fonts_dir and self.max_pages == other.max_pages and + self.save_box_tiff == other.save_box_tiff and self.overwrite == other.overwrite and + self.linedata == other.linedata and self.run_shape_clustering == other.run_shape_clustering and + self.extract_font_properties == other.extract_font_properties and + self.distort_image == other.distort_image) def err_exit(msg): From 0bf45e81e77abd5ad8155ab1e7c9a9d899bed834 Mon Sep 17 00:00:00 2001 From: Bharat123rox Date: Wed, 22 May 2019 11:23:27 +0530 Subject: [PATCH 3/3] Fix LGTM and revert bugfix for later PR --- src/ccutil/unicharmap.cpp | 2 +- src/classify/trainingsampleset.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ccutil/unicharmap.cpp b/src/ccutil/unicharmap.cpp index 254ce7bbf..ea5ced41c 100644 --- a/src/ccutil/unicharmap.cpp +++ b/src/ccutil/unicharmap.cpp @@ -39,7 +39,7 @@ UNICHAR_ID UNICHARMAP::unichar_to_id(const char* const unichar_repr, assert(length > 0 && length <= UNICHAR_LEN); int index = 0; - if (unichar_repr[index] == '\0') return INVALID_UNICHAR_ID; + if (length <= 0 || unichar_repr[index] == '\0') return INVALID_UNICHAR_ID; do { if (index + 1 >= length || unichar_repr[index + 1] == '\0') return current_nodes[static_cast(unichar_repr[index])].id; diff --git a/src/classify/trainingsampleset.cpp b/src/classify/trainingsampleset.cpp index 058307c08..620b52fa0 100644 --- a/src/classify/trainingsampleset.cpp +++ b/src/classify/trainingsampleset.cpp @@ -618,7 +618,6 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap& map, if (dist > max_dist) { max_dist = dist; if (dist > max_max_dist) { - max_max_dist = dist; max_s1 = s1; max_s2 = s2; }