tesseract/classify/adaptive.cpp

551 lines
18 KiB
C++

/******************************************************************************
** Filename: adaptive.c
** Purpose: Adaptive matcher.
** Author: Dan Johnson
** History: Fri Mar 8 10:00:21 1991, DSJ, Created.
**
** (c) Copyright Hewlett-Packard Company, 1988.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
******************************************************************************/
/*----------------------------------------------------------------------------
Include Files and Type Defines
----------------------------------------------------------------------------*/
#include "adaptive.h"
#include "emalloc.h"
#include "globals.h"
#include "classify.h"
#ifdef __UNIX__
#include <assert.h>
#endif
#include <stdio.h>
using tesseract::TFile;
/*----------------------------------------------------------------------------
Public Code
----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
/**
* This routine adds a new adapted class to an existing
* set of adapted templates.
*
* @param Templates set of templates to add new class to
* @param Class new class to add to templates
* @param ClassId class id to associate with new class
*
* @note Globals: none
* @note Exceptions: none
* @note History: Thu Mar 14 13:06:09 1991, DSJ, Created.
*/
void AddAdaptedClass(ADAPT_TEMPLATES Templates,
ADAPT_CLASS Class,
CLASS_ID ClassId) {
INT_CLASS IntClass;
assert (Templates != NULL);
assert (Class != NULL);
assert (LegalClassId (ClassId));
assert (UnusedClassIdIn (Templates->Templates, ClassId));
assert (Class->NumPermConfigs == 0);
IntClass = NewIntClass (1, 1);
AddIntClass (Templates->Templates, ClassId, IntClass);
assert (Templates->Class[ClassId] == NULL);
Templates->Class[ClassId] = Class;
} /* AddAdaptedClass */
/*---------------------------------------------------------------------------*/
/**
* This routine frees all memory consumed by a temporary
* configuration.
*
* @param Config config to be freed
*
* @note Globals: none
* @note Exceptions: none
* @note History: Thu Mar 14 13:34:23 1991, DSJ, Created.
*/
void FreeTempConfig(TEMP_CONFIG Config) {
assert (Config != NULL);
FreeBitVector (Config->Protos);
free(Config);
} /* FreeTempConfig */
/*---------------------------------------------------------------------------*/
void FreeTempProto(void *arg) {
PROTO proto = (PROTO) arg;
free(proto);
}
void FreePermConfig(PERM_CONFIG Config) {
assert(Config != NULL);
delete [] Config->Ambigs;
free(Config);
}
/*---------------------------------------------------------------------------*/
/**
* This operation allocates and initializes a new adapted
* class data structure and returns a ptr to it.
*
* @return Ptr to new class data structure.
*
* @note Globals: none
* @note Exceptions: none
* @note History: Thu Mar 14 12:58:13 1991, DSJ, Created.
*/
ADAPT_CLASS NewAdaptedClass() {
ADAPT_CLASS Class;
int i;
Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
Class->NumPermConfigs = 0;
Class->MaxNumTimesSeen = 0;
Class->TempProtos = NIL_LIST;
Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
zero_all_bits (Class->PermProtos, WordsInVectorOfSize (MAX_NUM_PROTOS));
zero_all_bits (Class->PermConfigs, WordsInVectorOfSize (MAX_NUM_CONFIGS));
for (i = 0; i < MAX_NUM_CONFIGS; i++)
TempConfigFor (Class, i) = NULL;
return (Class);
} /* NewAdaptedClass */
/*-------------------------------------------------------------------------*/
void free_adapted_class(ADAPT_CLASS adapt_class) {
int i;
for (i = 0; i < MAX_NUM_CONFIGS; i++) {
if (ConfigIsPermanent (adapt_class, i)
&& PermConfigFor (adapt_class, i) != NULL)
FreePermConfig (PermConfigFor (adapt_class, i));
else if (!ConfigIsPermanent (adapt_class, i)
&& TempConfigFor (adapt_class, i) != NULL)
FreeTempConfig (TempConfigFor (adapt_class, i));
}
FreeBitVector (adapt_class->PermProtos);
FreeBitVector (adapt_class->PermConfigs);
destroy_nodes (adapt_class->TempProtos, FreeTempProto);
Efree(adapt_class);
}
/*---------------------------------------------------------------------------*/
namespace tesseract {
/**
* Allocates memory for adapted tempates.
* each char in unicharset to the newly created templates
*
* @param InitFromUnicharset if true, add an empty class for
* @return Ptr to new adapted templates.
*
* @note Globals: none
* @note Exceptions: none
* @note History: Fri Mar 8 10:15:28 1991, DSJ, Created.
*/
ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) {
ADAPT_TEMPLATES Templates;
int i;
Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
Templates->Templates = NewIntTemplates ();
Templates->NumPermClasses = 0;
Templates->NumNonEmptyClasses = 0;
/* Insert an empty class for each unichar id in unicharset */
for (i = 0; i < MAX_NUM_CLASSES; i++) {
Templates->Class[i] = NULL;
if (InitFromUnicharset && i < unicharset.size()) {
AddAdaptedClass(Templates, NewAdaptedClass(), i);
}
}
return (Templates);
} /* NewAdaptedTemplates */
// Returns FontinfoId of the given config of the given adapted class.
int Classify::GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId) {
return (ConfigIsPermanent(Class, ConfigId) ?
PermConfigFor(Class, ConfigId)->FontinfoId :
TempConfigFor(Class, ConfigId)->FontinfoId);
}
} // namespace tesseract
/*----------------------------------------------------------------------------*/
void free_adapted_templates(ADAPT_TEMPLATES templates) {
if (templates != NULL) {
int i;
for (i = 0; i < (templates->Templates)->NumClasses; i++)
free_adapted_class (templates->Class[i]);
free_int_templates (templates->Templates);
Efree(templates);
}
}
/*---------------------------------------------------------------------------*/
/**
* This routine allocates and returns a new temporary config.
*
* @param MaxProtoId max id of any proto in new config
* @param FontinfoId font information from pre-trained templates
* @return Ptr to new temp config.
*
* @note Globals: none
* @note Exceptions: none
* @note History: Thu Mar 14 13:28:21 1991, DSJ, Created.
*/
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
int NumProtos = MaxProtoId + 1;
TEMP_CONFIG Config = (TEMP_CONFIG)malloc(sizeof(TEMP_CONFIG_STRUCT));
Config->Protos = NewBitVector (NumProtos);
Config->NumTimesSeen = 1;
Config->MaxProtoId = MaxProtoId;
Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
zero_all_bits (Config->Protos, Config->ProtoVectorSize);
Config->FontinfoId = FontinfoId;
return (Config);
} /* NewTempConfig */
/*---------------------------------------------------------------------------*/
/**
* This routine allocates and returns a new temporary proto.
*
* @return Ptr to new temporary proto.
*
* @note Globals: none
* @note Exceptions: none
* @note History: Thu Mar 14 13:31:31 1991, DSJ, Created.
*/
TEMP_PROTO NewTempProto() {
return (TEMP_PROTO)malloc(sizeof(TEMP_PROTO_STRUCT));
} /* NewTempProto */
/*---------------------------------------------------------------------------*/
namespace tesseract {
/**
* This routine prints a summary of the adapted templates
* in Templates to File.
*
* @param File open text file to print Templates to
* @param Templates adapted templates to print to File
*
* @note Globals: none
* @note Exceptions: none
* @note History: Wed Mar 20 13:35:29 1991, DSJ, Created.
*/
void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
int i;
INT_CLASS IClass;
ADAPT_CLASS AClass;
fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
Templates->NumNonEmptyClasses, Templates->NumPermClasses);
fprintf (File, " Id NC NPC NP NPP\n");
fprintf (File, "------------------------\n");
for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
IClass = Templates->Templates->Class[i];
AClass = Templates->Class[i];
if (!IsEmptyAdaptedClass (AClass)) {
fprintf (File, "%5d %s %3d %3d %3d %3d\n",
i, unicharset.id_to_unichar(i),
IClass->NumConfigs, AClass->NumPermConfigs,
IClass->NumProtos,
IClass->NumProtos - count (AClass->TempProtos));
}
}
fprintf (File, "\n");
} /* PrintAdaptedTemplates */
} // namespace tesseract
/*---------------------------------------------------------------------------*/
/**
* Read an adapted class description from File and return
* a ptr to the adapted class.
*
* @param File open file to read adapted class from
* @return Ptr to new adapted class.
*
* @note Globals: none
* @note Exceptions: none
* @note History: Tue Mar 19 14:11:01 1991, DSJ, Created.
*/
ADAPT_CLASS ReadAdaptedClass(TFile *fp) {
int NumTempProtos;
int NumConfigs;
int i;
ADAPT_CLASS Class;
/* first read high level adapted class structure */
Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
/* then read in the definitions of the permanent protos and configs */
Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
fp->FRead(Class->PermProtos, sizeof(uinT32),
WordsInVectorOfSize(MAX_NUM_PROTOS));
fp->FRead(Class->PermConfigs, sizeof(uinT32),
WordsInVectorOfSize(MAX_NUM_CONFIGS));
/* then read in the list of temporary protos */
fp->FRead(&NumTempProtos, sizeof(int), 1);
Class->TempProtos = NIL_LIST;
for (i = 0; i < NumTempProtos; i++) {
TEMP_PROTO TempProto = (TEMP_PROTO)malloc(sizeof(TEMP_PROTO_STRUCT));
fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
Class->TempProtos = push_last (Class->TempProtos, TempProto);
}
/* then read in the adapted configs */
fp->FRead(&NumConfigs, sizeof(int), 1);
for (i = 0; i < NumConfigs; i++)
if (test_bit (Class->PermConfigs, i))
Class->Config[i].Perm = ReadPermConfig(fp);
else
Class->Config[i].Temp = ReadTempConfig(fp);
return (Class);
} /* ReadAdaptedClass */
/*---------------------------------------------------------------------------*/
namespace tesseract {
/**
* Read a set of adapted templates from File and return
* a ptr to the templates.
*
* @param File open text file to read adapted templates from
* @return Ptr to adapted templates read from File.
*
* @note Globals: none
* @note Exceptions: none
* @note History: Mon Mar 18 15:18:10 1991, DSJ, Created.
*/
ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(TFile *fp) {
int i;
ADAPT_TEMPLATES Templates;
/* first read the high level adaptive template struct */
Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
/* then read in the basic integer templates */
Templates->Templates = ReadIntTemplates(fp);
/* then read in the adaptive info for each class */
for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
Templates->Class[i] = ReadAdaptedClass(fp);
}
return (Templates);
} /* ReadAdaptedTemplates */
} // namespace tesseract
/*---------------------------------------------------------------------------*/
/**
* Read a permanent configuration description from File
* and return a ptr to it.
*
* @param File open file to read permanent config from
* @return Ptr to new permanent configuration description.
*
* @note Globals: none
* @note Exceptions: none
* @note History: Tue Mar 19 14:25:26 1991, DSJ, Created.
*/
PERM_CONFIG ReadPermConfig(TFile *fp) {
PERM_CONFIG Config = (PERM_CONFIG)malloc(sizeof(PERM_CONFIG_STRUCT));
uinT8 NumAmbigs;
fp->FRead(&NumAmbigs, sizeof(uinT8), 1);
Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
Config->Ambigs[NumAmbigs] = -1;
fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
return (Config);
} /* ReadPermConfig */
/*---------------------------------------------------------------------------*/
/**
* Read a temporary configuration description from File
* and return a ptr to it.
*
* @param File open file to read temporary config from
* @return Ptr to new temporary configuration description.
*
* @note Globals: none
* @note Exceptions: none
* @note History: Tue Mar 19 14:29:59 1991, DSJ, Created.
*/
TEMP_CONFIG ReadTempConfig(TFile *fp) {
TEMP_CONFIG Config = (TEMP_CONFIG)malloc(sizeof(TEMP_CONFIG_STRUCT));
fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
fp->FRead(Config->Protos, sizeof(uinT32), Config->ProtoVectorSize);
return (Config);
} /* ReadTempConfig */
/*---------------------------------------------------------------------------*/
/**
* This routine writes a binary representation of Class
* to File.
*
* @param File open file to write Class to
* @param Class adapted class to write to File
* @param NumConfigs number of configs in Class
*
* @note Globals: none
* @note Exceptions: none
* @note History: Tue Mar 19 13:33:51 1991, DSJ, Created.
*/
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
int NumTempProtos;
LIST TempProtos;
int i;
/* first write high level adapted class structure */
fwrite ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
/* then write out the definitions of the permanent protos and configs */
fwrite ((char *) Class->PermProtos, sizeof (uinT32),
WordsInVectorOfSize (MAX_NUM_PROTOS), File);
fwrite ((char *) Class->PermConfigs, sizeof (uinT32),
WordsInVectorOfSize (MAX_NUM_CONFIGS), File);
/* then write out the list of temporary protos */
NumTempProtos = count (Class->TempProtos);
fwrite ((char *) &NumTempProtos, sizeof (int), 1, File);
TempProtos = Class->TempProtos;
iterate (TempProtos) {
void* proto = first_node(TempProtos);
fwrite ((char *) proto, sizeof (TEMP_PROTO_STRUCT), 1, File);
}
/* then write out the adapted configs */
fwrite ((char *) &NumConfigs, sizeof (int), 1, File);
for (i = 0; i < NumConfigs; i++)
if (test_bit (Class->PermConfigs, i))
WritePermConfig (File, Class->Config[i].Perm);
else
WriteTempConfig (File, Class->Config[i].Temp);
} /* WriteAdaptedClass */
/*---------------------------------------------------------------------------*/
namespace tesseract {
/**
* This routine saves Templates to File in a binary format.
*
* @param File open text file to write Templates to
* @param Templates set of adapted templates to write to File
*
* @note Globals: none
* @note Exceptions: none
* @note History: Mon Mar 18 15:07:32 1991, DSJ, Created.
*/
void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
int i;
/* first write the high level adaptive template struct */
fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
/* then write out the basic integer templates */
WriteIntTemplates (File, Templates->Templates, unicharset);
/* then write out the adaptive info for each class */
for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
WriteAdaptedClass (File, Templates->Class[i],
Templates->Templates->Class[i]->NumConfigs);
}
} /* WriteAdaptedTemplates */
} // namespace tesseract
/*---------------------------------------------------------------------------*/
/**
* This routine writes a binary representation of a
* permanent configuration to File.
*
* @param File open file to write Config to
* @param Config permanent config to write to File
*
* @note Globals: none
* @note Exceptions: none
* @note History: Tue Mar 19 13:55:44 1991, DSJ, Created.
*/
void WritePermConfig(FILE *File, PERM_CONFIG Config) {
uinT8 NumAmbigs = 0;
assert (Config != NULL);
while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs;
fwrite((char *) &NumAmbigs, sizeof(uinT8), 1, File);
fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
} /* WritePermConfig */
/*---------------------------------------------------------------------------*/
/**
* This routine writes a binary representation of a
* temporary configuration to File.
*
* @param File open file to write Config to
* @param Config temporary config to write to File
*
* @note Globals: none
* @note Exceptions: none
* @note History: Tue Mar 19 14:00:28 1991, DSJ, Created.
*/
void WriteTempConfig(FILE *File, TEMP_CONFIG Config) {
assert (Config != NULL);
fwrite ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
fwrite ((char *) Config->Protos, sizeof (uinT32),
Config->ProtoVectorSize, File);
} /* WriteTempConfig */