/****************************************************************************** ** Filename: clustertool.c ** Purpose: Misc. tools for use with the clustering routines ** Author: Dan Johnson ** History: 6/6/89, DSJ, Created. ** ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ //--------------------------Include Files---------------------------------- #include "clusttool.h" #include "const.h" #include "danerror.h" #include "emalloc.h" #include "scanutils.h" #include #include using tesseract::TFile; //---------------Global Data Definitions and Declarations-------------------- #define TOKENSIZE 80 //< max size of tokens read from an input file #define QUOTED_TOKENSIZE "79" #define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space //#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block // size) /** * This routine reads a single integer from the specified * file and checks to ensure that it is between 0 and * MAXSAMPLESIZE. * @param File open text file to read sample size from * @return Sample size * @note Globals: None * @note Exceptions: ILLEGALSAMPLESIZE illegal format or range * @note History: 6/6/89, DSJ, Created. */ uinT16 ReadSampleSize(TFile *fp) { int SampleSize = 0; const int kMaxLineSize = 100; char line[kMaxLineSize]; if (fp->FGets(line, kMaxLineSize) == nullptr || sscanf(line, "%d", &SampleSize) != 1 || (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE)) DoError (ILLEGALSAMPLESIZE, "Illegal sample size"); return (SampleSize); } /** * This routine reads textual descriptions of sets of parameters * which describe the characteristics of feature dimensions. * * Exceptions: * - ILLEGALCIRCULARSPEC * - ILLEGALESSENTIALSPEC * - ILLEGALMINMAXSPEC * @param File open text file to read N parameter descriptions from * @param N number of parameter descriptions to read * @return Pointer to an array of parameter descriptors. * @note Globals: None * @note History: 6/6/89, DSJ, Created. */ PARAM_DESC *ReadParamDesc(TFile *fp, uinT16 N) { PARAM_DESC *ParamDesc; char linear_token[TOKENSIZE], essential_token[TOKENSIZE]; ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC)); for (int i = 0; i < N; i++) { const int kMaxLineSize = TOKENSIZE * 4; char line[kMaxLineSize]; if (fp->FGets(line, kMaxLineSize) == nullptr || sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %f %f", linear_token, essential_token, &ParamDesc[i].Min, &ParamDesc[i].Max) != 4) DoError(ILLEGALCIRCULARSPEC, "Illegal Parameter specification"); if (linear_token[0] == 'c') ParamDesc[i].Circular = TRUE; else ParamDesc[i].Circular = FALSE; if (linear_token[0] == 'e') ParamDesc[i].NonEssential = FALSE; else ParamDesc[i].NonEssential = TRUE; ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min; ParamDesc[i].HalfRange = ParamDesc[i].Range / 2; ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2; } return (ParamDesc); } /** * This routine reads a textual description of a prototype from * the specified file. * * Exceptions: * - ILLEGALSIGNIFICANCESPEC * - ILLEGALSAMPLECOUNT * - ILLEGALMEANSPEC * - ILLEGALVARIANCESPEC * - ILLEGALDISTRIBUTION * @param File open text file to read prototype from * @param N number of dimensions used in prototype * @return List of prototypes * @note Globals: None * @note History: 6/6/89, DSJ, Created. */ PROTOTYPE *ReadPrototype(TFile *fp, uinT16 N) { char sig_token[TOKENSIZE], shape_token[TOKENSIZE]; PROTOTYPE *Proto; int SampleCount; int i; const int kMaxLineSize = TOKENSIZE * 4; char line[kMaxLineSize]; if (fp->FGets(line, kMaxLineSize) == nullptr || sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d", sig_token, shape_token, &SampleCount) != 3) { tprintf("Invalid prototype: %s\n", line); return nullptr; } Proto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE)); Proto->Cluster = NULL; if (sig_token[0] == 's') Proto->Significant = TRUE; else Proto->Significant = FALSE; Proto->Style = ReadProtoStyle(shape_token); if (SampleCount < 0) DoError(ILLEGALSAMPLECOUNT, "Illegal sample count"); Proto->NumSamples = SampleCount; Proto->Mean = ReadNFloats(fp, N, NULL); if (Proto->Mean == NULL) DoError(ILLEGALMEANSPEC, "Illegal prototype mean"); switch (Proto->Style) { case spherical: if (ReadNFloats(fp, 1, &(Proto->Variance.Spherical)) == NULL) DoError(ILLEGALVARIANCESPEC, "Illegal prototype variance"); Proto->Magnitude.Spherical = 1.0 / sqrt((double)(2.0 * PI * Proto->Variance.Spherical)); Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, (float)N); Proto->LogMagnitude = log((double)Proto->TotalMagnitude); Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical; Proto->Distrib = NULL; break; case elliptical: Proto->Variance.Elliptical = ReadNFloats(fp, N, NULL); if (Proto->Variance.Elliptical == NULL) DoError(ILLEGALVARIANCESPEC, "Illegal prototype variance"); Proto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); Proto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); Proto->TotalMagnitude = 1.0; for (i = 0; i < N; i++) { Proto->Magnitude.Elliptical[i] = 1.0 / sqrt((double)(2.0 * PI * Proto->Variance.Elliptical[i])); Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i]; Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i]; } Proto->LogMagnitude = log((double)Proto->TotalMagnitude); Proto->Distrib = NULL; break; default: Efree(Proto); tprintf("Invalid prototype style\n"); return nullptr; } return Proto; } /** * This routine reads an single token from the specified * text file and interprets it as a prototype specification. * @param File open text file to read prototype style from * @return Prototype style read from text file * @note Globals: None * @note Exceptions: ILLEGALSTYLESPEC illegal prototype style specification * @note History: 6/8/89, DSJ, Created. */ PROTOSTYLE ReadProtoStyle(const char *shape) { switch (shape[0]) { case 's': return spherical; case 'e': return elliptical; case 'a': return automatic; default: break; } tprintf("Invalid prototype style specification:%s\n", shape); return elliptical; } /** * This routine reads N floats from the specified text file * and places them into Buffer. If Buffer is NULL, a buffer * is created and passed back to the caller. If EOF is * encountered before any floats can be read, NULL is * returned. * @param File open text file to read floats from * @param N number of floats to read * @param Buffer pointer to buffer to place floats into * @return Pointer to buffer holding floats or NULL if EOF * @note Globals: None * @note Exceptions: ILLEGALFLOAT * @note History: 6/6/89, DSJ, Created. */ FLOAT32 *ReadNFloats(TFile *fp, uinT16 N, FLOAT32 Buffer[]) { const int kMaxLineSize = 1024; char line[kMaxLineSize]; if (fp->FGets(line, kMaxLineSize) == nullptr) { tprintf("Hit EOF in ReadNFloats!\n"); return nullptr; } bool needs_free = false; if (Buffer == NULL) { Buffer = reinterpret_cast(Emalloc(N * sizeof(FLOAT32))); needs_free = true; } char *startptr = line; for (int i = 0; i < N; i++) { char *endptr; Buffer[i] = strtof(startptr, &endptr); if (endptr == startptr) { tprintf("Read of %d floats failed!\n", N); if (needs_free) Efree(Buffer); return nullptr; } startptr = endptr; } return Buffer; } /** * This routine writes an array of dimension descriptors to * the specified text file. * @param File open text file to write param descriptors to * @param N number of param descriptors to write * @param ParamDesc array of param descriptors to write * @return None * @note Globals: None * @note Exceptions: None * @note History: 6/6/89, DSJ, Created. */ void WriteParamDesc(FILE *File, uinT16 N, const PARAM_DESC ParamDesc[]) { int i; for (i = 0; i < N; i++) { if (ParamDesc[i].Circular) fprintf (File, "circular "); else fprintf (File, "linear "); if (ParamDesc[i].NonEssential) fprintf (File, "non-essential "); else fprintf (File, "essential "); fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max); } } /** * This routine writes a textual description of a prototype * to the specified text file. * @param File open text file to write prototype to * @param N number of dimensions in feature space * @param Proto prototype to write out * @return None * @note Globals: None * @note Exceptions: None * @note History: 6/12/89, DSJ, Created. */ void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto) { int i; if (Proto->Significant) fprintf (File, "significant "); else fprintf (File, "insignificant "); WriteProtoStyle (File, (PROTOSTYLE) Proto->Style); fprintf (File, "%6d\n\t", Proto->NumSamples); WriteNFloats (File, N, Proto->Mean); fprintf (File, "\t"); switch (Proto->Style) { case spherical: WriteNFloats (File, 1, &(Proto->Variance.Spherical)); break; case elliptical: WriteNFloats (File, N, Proto->Variance.Elliptical); break; case mixed: for (i = 0; i < N; i++) switch (Proto->Distrib[i]) { case normal: fprintf (File, " %9s", "normal"); break; case uniform: fprintf (File, " %9s", "uniform"); break; case D_random: fprintf (File, " %9s", "random"); break; case DISTRIBUTION_COUNT: ASSERT_HOST(!"Distribution count not allowed!"); } fprintf (File, "\n\t"); WriteNFloats (File, N, Proto->Variance.Elliptical); } } /** * This routine writes a text representation of N floats from * an array to a file. All of the floats are placed on one line. * @param File open text file to write N floats to * @param N number of floats to write * @param Array array of floats to write * @return None * @note Globals: None * @note Exceptions: None * @note History: 6/6/89, DSJ, Created. */ void WriteNFloats(FILE * File, uinT16 N, FLOAT32 Array[]) { for (int i = 0; i < N; i++) fprintf(File, " %9.6f", Array[i]); fprintf(File, "\n"); } /** * This routine writes to the specified text file a word * which represents the ProtoStyle. It does not append * a carriage return to the end. * @param File open text file to write prototype style to * @param ProtoStyle prototype style to write * @return None * @note Globals: None * @note Exceptions: None * @note History: 6/8/89, DSJ, Created. */ void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) { switch (ProtoStyle) { case spherical: fprintf (File, "spherical"); break; case elliptical: fprintf (File, "elliptical"); break; case mixed: fprintf (File, "mixed"); break; case automatic: fprintf (File, "automatic"); break; } } /** * This routine writes a textual description of each prototype * in the prototype list to the specified file. It also * writes a file header which includes the number of dimensions * in feature space and the descriptions for each dimension. * @param File open text file to write prototypes to * @param N number of dimensions in feature space * @param ParamDesc descriptions for each dimension * @param ProtoList list of prototypes to be written * @param WriteSigProtos TRUE to write out significant prototypes * @param WriteInsigProtos TRUE to write out insignificants * @note Globals: None * @return None * @note Exceptions: None * @note History: 6/12/89, DSJ, Created. */ void WriteProtoList(FILE *File, uinT16 N, PARAM_DESC ParamDesc[], LIST ProtoList, BOOL8 WriteSigProtos, BOOL8 WriteInsigProtos) { PROTOTYPE *Proto; /* write file header */ fprintf(File,"%0d\n",N); WriteParamDesc(File,N,ParamDesc); /* write prototypes */ iterate(ProtoList) { Proto = (PROTOTYPE *) first_node ( ProtoList ); if ((Proto->Significant && WriteSigProtos) || (!Proto->Significant && WriteInsigProtos)) WritePrototype(File, N, Proto); } }