/****************************************************************************** ** Filename: mftraining.c ** Purpose: Separates training pages into files for each character. ** Strips from files only the features and there parameters of the feature type mf. ** Author: Dan Johnson ** Revisment: Christy Russon ** Environment: HPUX 6.5 ** Library: HPUX 6.5 ** History: Fri Aug 18 08:53:50 1989, DSJ, Created. ** 5/25/90, DSJ, Adapted to multiple feature types. ** Tuesday, May 17, 1998 Changes made to make feature specific and ** simplify structures. First step in simplifying training process. ** ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ /*---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------*/ #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif #include #include #define _USE_MATH_DEFINES #include #ifdef _WIN32 #ifndef M_PI #define M_PI 3.14159265358979323846 #endif #endif #include "classify.h" #include "cluster.h" #include "clusttool.h" #include "commontraining.h" #include "danerror.h" #include "efio.h" #include "emalloc.h" #include "featdefs.h" #include "fontinfo.h" #include "genericvector.h" #include "indexmapbidi.h" #include "intproto.h" #include "mastertrainer.h" #include "mergenf.h" #include "mf.h" #include "ndminx.h" #include "ocrfeatures.h" #include "oldlist.h" #include "protos.h" #include "shapetable.h" #include "tessopt.h" #include "tprintf.h" #include "unicity_table.h" using tesseract::Classify; using tesseract::FontInfo; using tesseract::FontSpacingInfo; using tesseract::IndexMapBiDi; using tesseract::MasterTrainer; using tesseract::Shape; using tesseract::ShapeTable; #define PROGRAM_FEATURE_TYPE "mf" // Max length of a fake shape label. const int kMaxShapeLabelLength = 10; DECLARE_STRING_PARAM_FLAG(test_ch); /*---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------*/ int main ( int argc, char **argv); /*---------------------------------------------------------------------------- Public Code -----------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED static void DisplayProtoList(const char* ch, LIST protolist) { void* window = c_create_window("Char samples", 50, 200, 520, 520, -130.0, 130.0, -130.0, 130.0); LIST proto = protolist; iterate(proto) { PROTOTYPE* prototype = reinterpret_cast(first_node(proto)); if (prototype->Significant) c_line_color_index(window, Green); else if (prototype->NumSamples == 0) c_line_color_index(window, Blue); else if (prototype->Merged) c_line_color_index(window, Magenta); else c_line_color_index(window, Red); float x = CenterX(prototype->Mean); float y = CenterY(prototype->Mean); double angle = OrientationOf(prototype->Mean) * 2 * M_PI; float dx = static_cast(LengthOf(prototype->Mean) * cos(angle) / 2); float dy = static_cast(LengthOf(prototype->Mean) * sin(angle) / 2); c_move(window, (x - dx) * 256, (y - dy) * 256); c_draw(window, (x + dx) * 256, (y + dy) * 256); if (prototype->Significant) tprintf("Green proto at (%g,%g)+(%g,%g) %d samples\n", x, y, dx, dy, prototype->NumSamples); else if (prototype->NumSamples > 0 && !prototype->Merged) tprintf("Red proto at (%g,%g)+(%g,%g) %d samples\n", x, y, dx, dy, prototype->NumSamples); } c_make_current(window); } #endif // GRAPHICS_DISABLED // Helper to run clustering on a single config. // Mostly copied from the old mftraining, but with renamed variables. static LIST ClusterOneConfig(int shape_id, const char* class_label, LIST mf_classes, const ShapeTable& shape_table, MasterTrainer* trainer) { int num_samples; CLUSTERER *clusterer = trainer->SetupForClustering(shape_table, feature_defs, shape_id, &num_samples); Config.MagicSamples = num_samples; LIST proto_list = ClusterSamples(clusterer, &Config); CleanUpUnusedData(proto_list); // Merge protos where reasonable to make more of them significant by // representing almost all samples of the class/font. MergeInsignificantProtos(proto_list, class_label, clusterer, &Config); #ifndef GRAPHICS_DISABLED if (strcmp(FLAGS_test_ch.c_str(), class_label) == 0) DisplayProtoList(FLAGS_test_ch.c_str(), proto_list); #endif // GRAPHICS_DISABLED // Delete the protos that will not be used in the inttemp output file. proto_list = RemoveInsignificantProtos(proto_list, true, false, clusterer->SampleSize); FreeClusterer(clusterer); MERGE_CLASS merge_class = FindClass(mf_classes, class_label); if (merge_class == NULL) { merge_class = NewLabeledClass(class_label); mf_classes = push(mf_classes, merge_class); } int config_id = AddConfigToClass(merge_class->Class); merge_class->Class->font_set.push_back(shape_id); LIST proto_it = proto_list; iterate(proto_it) { PROTOTYPE* prototype = reinterpret_cast(first_node(proto_it)); // See if proto can be approximated by existing proto. int p_id = FindClosestExistingProto(merge_class->Class, merge_class->NumMerged, prototype); if (p_id == NO_PROTO) { // Need to make a new proto, as it doesn't match anything. p_id = AddProtoToClass(merge_class->Class); MakeNewFromOld(ProtoIn(merge_class->Class, p_id), prototype); merge_class->NumMerged[p_id] = 1; } else { PROTO_STRUCT dummy_proto; MakeNewFromOld(&dummy_proto, prototype); // Merge with the similar proto. ComputeMergedProto(ProtoIn(merge_class->Class, p_id), &dummy_proto, static_cast(merge_class->NumMerged[p_id]), 1.0, ProtoIn(merge_class->Class, p_id)); merge_class->NumMerged[p_id]++; } AddProtoToConfig(p_id, merge_class->Class->Configurations[config_id]); } FreeProtoList(&proto_list); return mf_classes; } // Helper to setup the config map. // Setup an index mapping from the shapes in the shape table to the classes // that will be trained. In keeping with the original design, each shape // with the same list of unichars becomes a different class and the configs // represent the different combinations of fonts. static void SetupConfigMap(ShapeTable* shape_table, IndexMapBiDi* config_map) { int num_configs = shape_table->NumShapes(); config_map->Init(num_configs, true); config_map->Setup(); for (int c1 = 0; c1 < num_configs; ++c1) { // Only process ids that are not already merged. if (config_map->SparseToCompact(c1) == c1) { Shape* shape1 = shape_table->MutableShape(c1); // Find all the subsequent shapes that are equal. for (int c2 = c1 + 1; c2 < num_configs; ++c2) { if (shape_table->MutableShape(c2)->IsEqualUnichars(shape1)) { config_map->Merge(c1, c2); } } } } config_map->CompleteMerges(); } /** * This program reads in a text file consisting of feature * samples from a training page in the following format: * @verbatim FontName UTF8-char-str xmin ymin xmax ymax page-number NumberOfFeatureTypes(N) FeatureTypeName1 NumberOfFeatures(M) Feature1 ... FeatureM FeatureTypeName2 NumberOfFeatures(M) Feature1 ... FeatureM ... FeatureTypeNameN NumberOfFeatures(M) Feature1 ... FeatureM FontName CharName ... @endverbatim * The result of this program is a binary inttemp file used by * the OCR engine. * @param argc number of command line arguments * @param argv array of command line arguments * @return none * @note Exceptions: none * @note History: Fri Aug 18 08:56:17 1989, DSJ, Created. * @note History: Mon May 18 1998, Christy Russson, Revistion started. */ int main (int argc, char **argv) { ParseArguments(&argc, &argv); ShapeTable* shape_table = NULL; STRING file_prefix; // Load the training data. MasterTrainer* trainer = tesseract::LoadTrainingData(argc, argv, false, &shape_table, &file_prefix); if (trainer == NULL) return 1; // Failed. // Setup an index mapping from the shapes in the shape table to the classes // that will be trained. In keeping with the original design, each shape // with the same list of unichars becomes a different class and the configs // represent the different combinations of fonts. IndexMapBiDi config_map; SetupConfigMap(shape_table, &config_map); WriteShapeTable(file_prefix, *shape_table); // If the shape_table is flat, then either we didn't run shape clustering, or // it did nothing, so we just output the trainer's unicharset. // Otherwise shape_set will hold a fake unicharset with an entry for each // shape in the shape table, and we will output that instead. UNICHARSET shape_set; const UNICHARSET* unicharset = &trainer->unicharset(); // If we ran shapeclustering (and it worked) then at least one shape will // have multiple unichars, so we have to build a fake unicharset. if (shape_table->AnyMultipleUnichars()) { unicharset = &shape_set; // Now build a fake unicharset for the compact shape space to keep the // output modules happy that we are doing things correctly. int num_shapes = config_map.CompactSize(); for (int s = 0; s < num_shapes; ++s) { char shape_label[kMaxShapeLabelLength + 1]; snprintf(shape_label, kMaxShapeLabelLength, "sh%04d", s); shape_set.unichar_insert(shape_label); } } // Now train each config separately. int num_configs = shape_table->NumShapes(); LIST mf_classes = NIL_LIST; for (int s = 0; s < num_configs; ++s) { int unichar_id, font_id; if (unicharset == &shape_set) { // Using fake unichar_ids from the config_map/shape_set. unichar_id = config_map.SparseToCompact(s); } else { // Get the real unichar_id from the shape table/unicharset. shape_table->GetFirstUnicharAndFont(s, &unichar_id, &font_id); } const char* class_label = unicharset->id_to_unichar(unichar_id); mf_classes = ClusterOneConfig(s, class_label, mf_classes, *shape_table, trainer); } STRING inttemp_file = file_prefix; inttemp_file += "inttemp"; STRING pffmtable_file = file_prefix; pffmtable_file += "pffmtable"; CLASS_STRUCT* float_classes = SetUpForFloat2Int(*unicharset, mf_classes); // Now write the inttemp and pffmtable. trainer->WriteInttempAndPFFMTable(trainer->unicharset(), *unicharset, *shape_table, float_classes, inttemp_file.string(), pffmtable_file.string()); delete [] float_classes; FreeLabeledClassList(mf_classes); delete trainer; delete shape_table; printf("Done!\n"); if (!FLAGS_test_ch.empty()) { // If we are displaying debug window(s), wait for the user to look at them. printf("Hit return to exit...\n"); while (getchar() != '\n'); } return 0; } /* main */