/****************************************************************************** ** Filename: blobclass.c ** Purpose: High level blob classification and training routines. ** Author: Dan Johnson ** History: 7/21/89, DSJ, Created. ** ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "blobclass.h" #include "fxdefs.h" #include "extract.h" #include "efio.h" #include "callcpp.h" #include "chartoname.h" #include #include #include #define MAXFILENAME 80 #define MAXMATCHES 10 STRING_VAR(classify_font_name, "UnknownFont", "Default font name to be used in training"); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ /* name of current image file being processed */ extern char imagefile[]; /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ void LearnBlob (const STRING& filename, TBLOB * Blob, TEXTROW * Row, char BlobText[]) /* ** Parameters: ** Blob blob whose micro-features are to be learned ** Row row of text that blob came from ** BlobText text that corresponds to blob ** TextLength number of characters in blob ** Globals: ** imagefile base filename of the page being learned ** classify_font_name ** name of font currently being trained on ** Operation: ** Extract micro-features from the specified blob and append ** them to the appropriate file. ** Return: none ** Exceptions: none ** History: 7/28/89, DSJ, Created. */ #define TRAIN_SUFFIX ".tr" { static FILE *FeatureFile = NULL; STRING Filename(filename); CHAR_DESC CharDesc; LINE_STATS LineStats; EnterLearnMode; GetLineStatsFromRow(Row, &LineStats); CharDesc = ExtractBlobFeatures (Blob, &LineStats); if (CharDesc == NULL) { cprintf("LearnBLob: CharDesc was NULL. Aborting.\n"); return; } // If no fontname was set, try to extract it from the filename char CurrFontName[32] = ""; strncpy(CurrFontName, static_cast(classify_font_name).string(), 32); if (!strcmp(CurrFontName, "UnknownFont")) { // filename is expected to be of the form [lang].[fontname].exp[num] // The [lang], [fontname] and [num] fields should not have '.' characters. const char *basename = strrchr(filename.string(), '/'); const char *firstdot = strchr(basename, '.'); const char *lastdot = strrchr(filename.string(), '.'); if (firstdot != lastdot && firstdot != NULL && lastdot != NULL) { strncpy(CurrFontName, firstdot + 1, lastdot - firstdot - 1); } } // if a feature file is not yet open, open it // the name of the file is the name of the image plus TRAIN_SUFFIX if (FeatureFile == NULL) { Filename += TRAIN_SUFFIX; FeatureFile = Efopen (Filename.string(), "w"); cprintf ("TRAINING ... Font name = %s\n", CurrFontName); } // label the features with a class name and font name fprintf (FeatureFile, "\n%s %s ", CurrFontName, BlobText); // write micro-features to file and clean up WriteCharDescription(FeatureFile, CharDesc); FreeCharDescription(CharDesc); } // LearnBlob