From 694d3f2c206e0c4256bf26256359220e78a2abd4 Mon Sep 17 00:00:00 2001 From: theraysmith Date: Sat, 11 Jul 2009 02:17:36 +0000 Subject: [PATCH] Changes to classify for 3.00 git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@291 d0cd1f9f-072b-0410-8dd7-cf729c803f20 --- classify/Makefile.am | 32 +- classify/Makefile.in | 51 +- classify/adaptive.cpp | 314 ++-- classify/adaptive.h | 79 +- classify/adaptmatch.cpp | 3175 ++++++++++++++++++-------------------- classify/adaptmatch.h | 48 +- classify/baseline.cpp | 20 +- classify/baseline.h | 48 +- classify/blobclass.cpp | 65 +- classify/blobclass.h | 5 +- classify/classify.cpp | 86 ++ classify/classify.h | 230 +++ classify/classify.vcproj | 433 ++++++ classify/cluster.h | 2 +- classify/cutoffs.cpp | 39 +- classify/cutoffs.h | 4 - classify/extract.cpp | 19 - classify/extract.h | 6 +- classify/featdefs.cpp | 2 - classify/flexfx.cpp | 40 +- classify/flexfx.h | 3 +- classify/float2int.cpp | 16 +- classify/float2int.h | 31 +- classify/fpoint.cpp | 14 +- classify/fpoint.h | 9 - classify/fxdefs.cpp | 10 +- classify/hideedge.cpp | 2 - classify/intfx.cpp | 56 +- classify/intmatcher.cpp | 294 ++-- classify/intmatcher.h | 34 +- classify/intproto.cpp | 1743 +++++++++++---------- classify/intproto.h | 180 +-- classify/mf.cpp | 38 +- classify/mf.h | 4 +- classify/mfoutline.cpp | 725 ++++----- classify/mfoutline.h | 118 +- classify/mfx.cpp | 133 +- classify/mfx.h | 36 +- classify/normmatch.cpp | 115 +- classify/normmatch.h | 13 +- classify/ocrfeatures.cpp | 42 +- classify/ocrfeatures.h | 11 +- classify/outfeat.cpp | 97 +- classify/outfeat.h | 26 +- classify/picofeat.cpp | 81 +- classify/picofeat.h | 27 +- classify/protos.cpp | 35 +- classify/protos.h | 88 +- classify/sigmenu.cpp | 225 --- classify/sigmenu.h | 39 - classify/speckle.cpp | 85 +- classify/speckle.h | 36 +- classify/xform2d.cpp | 92 +- classify/xform2d.h | 14 +- 54 files changed, 4436 insertions(+), 4734 deletions(-) create mode 100644 classify/classify.cpp create mode 100644 classify/classify.h create mode 100755 classify/classify.vcproj delete mode 100644 classify/sigmenu.cpp delete mode 100644 classify/sigmenu.h diff --git a/classify/Makefile.am b/classify/Makefile.am index d8ee29e36..cd7517753 100644 --- a/classify/Makefile.am +++ b/classify/Makefile.am @@ -2,23 +2,29 @@ SUBDIRS = AM_CPPFLAGS = \ -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \ -I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \ - -I$(top_srcdir)/viewer + -I$(top_srcdir)/image -I$(top_srcdir)/viewer + +EXTRA_DIST = classify.vcproj include_HEADERS = \ adaptive.h adaptmatch.h baseline.h blobclass.h chartoname.h \ - cluster.h clusttool.h cutoffs.h extern.h extract.h featdefs.h \ - flexfx.h float2int.h fpoint.h fxdefs.h fxid.h hideedge.h \ - intfx.h intmatcher.h intproto.h kdtree.h mfdefs.h mf.h \ - mfoutline.h mfx.h normfeat.h normmatch.h ocrfeatures.h \ - outfeat.h picofeat.h protos.h sigmenu.h speckle.h xform2d.h + classify.h cluster.h clusttool.h cutoffs.h \ + extern.h extract.h \ + featdefs.h flexfx.h float2int.h fpoint.h fxdefs.h fxid.h \ + hideedge.h intfx.h intmatcher.h intproto.h kdtree.h \ + mf.h mfdefs.h mfoutline.h mfx.h \ + normfeat.h normmatch.h \ + ocrfeatures.h outfeat.h picofeat.h protos.h \ + speckle.h xform2d.h lib_LIBRARIES = libtesseract_classify.a libtesseract_classify_a_SOURCES = \ adaptive.cpp adaptmatch.cpp baseline.cpp blobclass.cpp \ - chartoname.cpp cluster.cpp clusttool.cpp cutoffs.cpp \ - extract.cpp featdefs.cpp flexfx.cpp float2int.cpp \ - fpoint.cpp fxdefs.cpp hideedge.cpp intfx.cpp intmatcher.cpp \ - intproto.cpp kdtree.cpp mf.cpp mfdefs.cpp mfoutline.cpp \ - mfx.cpp normfeat.cpp normmatch.cpp ocrfeatures.cpp \ - outfeat.cpp picofeat.cpp protos.cpp sigmenu.cpp speckle.cpp \ - xform2d.cpp + chartoname.cpp classify.cpp cluster.cpp clusttool.cpp cutoffs.cpp \ + extract.cpp \ + featdefs.cpp flexfx.cpp float2int.cpp fpoint.cpp fxdefs.cpp \ + hideedge.cpp intfx.cpp intmatcher.cpp intproto.cpp kdtree.cpp \ + mf.cpp mfdefs.cpp mfoutline.cpp mfx.cpp \ + normfeat.cpp normmatch.cpp \ + ocrfeatures.cpp outfeat.cpp picofeat.cpp protos.cpp \ + speckle.cpp xform2d.cpp diff --git a/classify/Makefile.in b/classify/Makefile.in index 36b1ba433..4b98d2e64 100644 --- a/classify/Makefile.in +++ b/classify/Makefile.in @@ -59,15 +59,15 @@ libtesseract_classify_a_AR = $(AR) $(ARFLAGS) libtesseract_classify_a_LIBADD = am_libtesseract_classify_a_OBJECTS = adaptive.$(OBJEXT) \ adaptmatch.$(OBJEXT) baseline.$(OBJEXT) blobclass.$(OBJEXT) \ - chartoname.$(OBJEXT) cluster.$(OBJEXT) clusttool.$(OBJEXT) \ - cutoffs.$(OBJEXT) extract.$(OBJEXT) featdefs.$(OBJEXT) \ - flexfx.$(OBJEXT) float2int.$(OBJEXT) fpoint.$(OBJEXT) \ - fxdefs.$(OBJEXT) hideedge.$(OBJEXT) intfx.$(OBJEXT) \ - intmatcher.$(OBJEXT) intproto.$(OBJEXT) kdtree.$(OBJEXT) \ - mf.$(OBJEXT) mfdefs.$(OBJEXT) mfoutline.$(OBJEXT) \ - mfx.$(OBJEXT) normfeat.$(OBJEXT) normmatch.$(OBJEXT) \ - ocrfeatures.$(OBJEXT) outfeat.$(OBJEXT) picofeat.$(OBJEXT) \ - protos.$(OBJEXT) sigmenu.$(OBJEXT) speckle.$(OBJEXT) \ + chartoname.$(OBJEXT) classify.$(OBJEXT) cluster.$(OBJEXT) \ + clusttool.$(OBJEXT) cutoffs.$(OBJEXT) extract.$(OBJEXT) \ + featdefs.$(OBJEXT) flexfx.$(OBJEXT) float2int.$(OBJEXT) \ + fpoint.$(OBJEXT) fxdefs.$(OBJEXT) hideedge.$(OBJEXT) \ + intfx.$(OBJEXT) intmatcher.$(OBJEXT) intproto.$(OBJEXT) \ + kdtree.$(OBJEXT) mf.$(OBJEXT) mfdefs.$(OBJEXT) \ + mfoutline.$(OBJEXT) mfx.$(OBJEXT) normfeat.$(OBJEXT) \ + normmatch.$(OBJEXT) ocrfeatures.$(OBJEXT) outfeat.$(OBJEXT) \ + picofeat.$(OBJEXT) protos.$(OBJEXT) speckle.$(OBJEXT) \ xform2d.$(OBJEXT) libtesseract_classify_a_OBJECTS = \ $(am_libtesseract_classify_a_OBJECTS) @@ -203,26 +203,31 @@ SUBDIRS = AM_CPPFLAGS = \ -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \ -I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \ - -I$(top_srcdir)/viewer + -I$(top_srcdir)/image -I$(top_srcdir)/viewer +EXTRA_DIST = classify.vcproj include_HEADERS = \ adaptive.h adaptmatch.h baseline.h blobclass.h chartoname.h \ - cluster.h clusttool.h cutoffs.h extern.h extract.h featdefs.h \ - flexfx.h float2int.h fpoint.h fxdefs.h fxid.h hideedge.h \ - intfx.h intmatcher.h intproto.h kdtree.h mfdefs.h mf.h \ - mfoutline.h mfx.h normfeat.h normmatch.h ocrfeatures.h \ - outfeat.h picofeat.h protos.h sigmenu.h speckle.h xform2d.h + classify.h cluster.h clusttool.h cutoffs.h \ + extern.h extract.h \ + featdefs.h flexfx.h float2int.h fpoint.h fxdefs.h fxid.h \ + hideedge.h intfx.h intmatcher.h intproto.h kdtree.h \ + mf.h mfdefs.h mfoutline.h mfx.h \ + normfeat.h normmatch.h \ + ocrfeatures.h outfeat.h picofeat.h protos.h \ + speckle.h xform2d.h lib_LIBRARIES = libtesseract_classify.a libtesseract_classify_a_SOURCES = \ adaptive.cpp adaptmatch.cpp baseline.cpp blobclass.cpp \ - chartoname.cpp cluster.cpp clusttool.cpp cutoffs.cpp \ - extract.cpp featdefs.cpp flexfx.cpp float2int.cpp \ - fpoint.cpp fxdefs.cpp hideedge.cpp intfx.cpp intmatcher.cpp \ - intproto.cpp kdtree.cpp mf.cpp mfdefs.cpp mfoutline.cpp \ - mfx.cpp normfeat.cpp normmatch.cpp ocrfeatures.cpp \ - outfeat.cpp picofeat.cpp protos.cpp sigmenu.cpp speckle.cpp \ - xform2d.cpp + chartoname.cpp classify.cpp cluster.cpp clusttool.cpp cutoffs.cpp \ + extract.cpp \ + featdefs.cpp flexfx.cpp float2int.cpp fpoint.cpp fxdefs.cpp \ + hideedge.cpp intfx.cpp intmatcher.cpp intproto.cpp kdtree.cpp \ + mf.cpp mfdefs.cpp mfoutline.cpp mfx.cpp \ + normfeat.cpp normmatch.cpp \ + ocrfeatures.cpp outfeat.cpp picofeat.cpp protos.cpp \ + speckle.cpp xform2d.cpp all: all-recursive @@ -302,6 +307,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/baseline.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blobclass.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chartoname.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/classify.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cluster.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clusttool.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cutoffs.Po@am__quote@ @@ -326,7 +332,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/outfeat.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/picofeat.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/protos.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sigmenu.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/speckle.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xform2d.Po@am__quote@ diff --git a/classify/adaptive.cpp b/classify/adaptive.cpp index 5b7a0b5b3..25bffa991 100644 --- a/classify/adaptive.cpp +++ b/classify/adaptive.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: adaptive.c - ** Purpose: Adaptive matcher. - ** Author: Dan Johnson - ** History: Fri Mar 8 10:00:21 1991, DSJ, Created. + ** Filename: adaptive.c + ** Purpose: Adaptive matcher. + ** Author: Dan Johnson + ** History: Fri Mar 8 10:00:21 1991, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -23,6 +23,7 @@ #include "emalloc.h" #include "freelist.h" #include "globals.h" +#include "classify.h" #ifdef __UNIX__ #include @@ -33,23 +34,22 @@ Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -int AddAdaptedClass(ADAPT_TEMPLATES Templates, - ADAPT_CLASS Class, - CLASS_ID ClassId) { +void AddAdaptedClass(ADAPT_TEMPLATES Templates, + ADAPT_CLASS Class, + CLASS_ID ClassId) { /* - ** Parameters: - ** Templates set of templates to add new class to - ** Class new class to add to templates - ** ClassId class id to associate with new class - ** Globals: none - ** Operation: This routine adds a new adapted class to an existing - ** set of adapted templates. - ** Return: The class index of the new class. - ** Exceptions: none - ** History: Thu Mar 14 13:06:09 1991, DSJ, Created. + ** Parameters: + ** Templates set of templates to add new class to + ** Class new class to add to templates + ** ClassId class id to associate with new class + ** Globals: none + ** Operation: This routine adds a new adapted class to an existing + ** set of adapted templates. + ** Return: none + ** Exceptions: none + ** History: Thu Mar 14 13:06:09 1991, DSJ, Created. */ INT_CLASS IntClass; - CLASS_INDEX ClassIndex; assert (Templates != NULL); assert (Class != NULL); @@ -58,13 +58,10 @@ int AddAdaptedClass(ADAPT_TEMPLATES Templates, assert (Class->NumPermConfigs == 0); IntClass = NewIntClass (1, 1); - ClassIndex = AddIntClass (Templates->Templates, ClassId, IntClass); + AddIntClass (Templates->Templates, ClassId, IntClass); - assert (Templates->Class[ClassIndex] == NULL); - - Templates->Class[ClassIndex] = Class; - - return (ClassIndex); + assert (Templates->Class[ClassId] == NULL); + Templates->Class[ClassId] = Class; } /* AddAdaptedClass */ @@ -72,14 +69,14 @@ int AddAdaptedClass(ADAPT_TEMPLATES Templates, /*---------------------------------------------------------------------------*/ void FreeTempConfig(TEMP_CONFIG Config) { /* - ** Parameters: - ** Config config to be freed - ** Globals: none - ** Operation: This routine frees all memory consumed by a temporary - ** configuration. - ** Return: none - ** Exceptions: none - ** History: Thu Mar 14 13:34:23 1991, DSJ, Created. + ** Parameters: + ** Config config to be freed + ** Globals: none + ** Operation: This routine frees all memory consumed by a temporary + ** configuration. + ** Return: none + ** Exceptions: none + ** History: Thu Mar 14 13:34:23 1991, DSJ, Created. */ assert (Config != NULL); @@ -101,13 +98,13 @@ void FreeTempProto(void *arg) { /*---------------------------------------------------------------------------*/ ADAPT_CLASS NewAdaptedClass() { /* - ** Parameters: none - ** Globals: none - ** Operation: This operation allocates and initializes a new adapted - ** class data structure and returns a ptr to it. - ** Return: Ptr to new class data structure. - ** Exceptions: none - ** History: Thu Mar 14 12:58:13 1991, DSJ, Created. + ** Parameters: none + ** Globals: none + ** Operation: This operation allocates and initializes a new adapted + ** class data structure and returns a ptr to it. + ** Return: Ptr to new class data structure. + ** Exceptions: none + ** History: Thu Mar 14 12:58:13 1991, DSJ, Created. */ ADAPT_CLASS Class; int i; @@ -149,14 +146,18 @@ void free_adapted_class(ADAPT_CLASS adapt_class) { /*---------------------------------------------------------------------------*/ -ADAPT_TEMPLATES NewAdaptedTemplates() { +namespace tesseract { +ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) { /* - ** Parameters: none - ** Globals: none - ** Operation: - ** Return: none - ** Exceptions: none - ** History: Fri Mar 8 10:15:28 1991, DSJ, Created. + ** Parameters: + ** PopulateFromUnicharset if true, add an empty class for + ** each char in unicharset to the + ** newly created templates + ** Globals: none + ** Operation: Allocates memory for adapted tempates. + ** Return: Ptr to new adapted templates. + ** Exceptions: none + ** History: Fri Mar 8 10:15:28 1991, DSJ, Created. */ ADAPT_TEMPLATES Templates; int i; @@ -165,14 +166,20 @@ ADAPT_TEMPLATES NewAdaptedTemplates() { Templates->Templates = NewIntTemplates (); Templates->NumPermClasses = 0; + Templates->NumNonEmptyClasses = 0; - for (i = 0; i < MAX_NUM_CLASSES; i++) + /* Insert an empty class for each unichar id in unicharset */ + for (i = 0; i < MAX_NUM_CLASSES; i++) { Templates->Class[i] = NULL; + if (InitFromUnicharset && i < unicharset.size()) { + AddAdaptedClass(Templates, NewAdaptedClass(), i); + } + } return (Templates); } /* NewAdaptedTemplates */ - +} // namespace tesseract /*----------------------------------------------------------------------------*/ void free_adapted_templates(ADAPT_TEMPLATES templates) { @@ -190,14 +197,14 @@ void free_adapted_templates(ADAPT_TEMPLATES templates) { /*---------------------------------------------------------------------------*/ TEMP_CONFIG NewTempConfig(int MaxProtoId) { /* - ** Parameters: - ** MaxProtoId max id of any proto in new config - ** Globals: none - ** Operation: This routine allocates and returns a new temporary - ** config. - ** Return: Ptr to new temp config. - ** Exceptions: none - ** History: Thu Mar 14 13:28:21 1991, DSJ, Created. + ** Parameters: + ** MaxProtoId max id of any proto in new config + ** Globals: none + ** Operation: This routine allocates and returns a new temporary + ** config. + ** Return: Ptr to new temp config. + ** Exceptions: none + ** History: Thu Mar 14 13:28:21 1991, DSJ, Created. */ TEMP_CONFIG Config; int NumProtos = MaxProtoId + 1; @@ -221,12 +228,12 @@ TEMP_CONFIG NewTempConfig(int MaxProtoId) { /*---------------------------------------------------------------------------*/ TEMP_PROTO NewTempProto() { /* - ** Parameters: none - ** Globals: none - ** Operation: This routine allocates and returns a new temporary proto. - ** Return: Ptr to new temporary proto. - ** Exceptions: none - ** History: Thu Mar 14 13:31:31 1991, DSJ, Created. + ** Parameters: none + ** Globals: none + ** Operation: This routine allocates and returns a new temporary proto. + ** Return: Ptr to new temporary proto. + ** Exceptions: none + ** History: Thu Mar 14 13:31:31 1991, DSJ, Created. */ return ((TEMP_PROTO) alloc_struct (sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT")); @@ -234,17 +241,18 @@ TEMP_PROTO NewTempProto() { /*---------------------------------------------------------------------------*/ -void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { +namespace tesseract { +void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { /* - ** Parameters: - ** File open text file to print Templates to - ** Templates adapted templates to print to File - ** Globals: none - ** Operation: This routine prints a summary of the adapted templates - ** in Templates to File. - ** Return: none - ** Exceptions: none - ** History: Wed Mar 20 13:35:29 1991, DSJ, Created. + ** Parameters: + ** File open text file to print Templates to + ** Templates adapted templates to print to File + ** Globals: none + ** Operation: This routine prints a summary of the adapted templates + ** in Templates to File. + ** Return: none + ** Exceptions: none + ** History: Wed Mar 20 13:35:29 1991, DSJ, Created. */ int i; INT_CLASS IClass; @@ -253,37 +261,39 @@ void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { #ifndef SECURE_NAMES fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n"); fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n", - (Templates->Templates)->NumClasses, Templates->NumPermClasses); - fprintf (File, "Index Id NC NPC NP NPP\n"); + Templates->NumNonEmptyClasses, Templates->NumPermClasses); + fprintf (File, " Id NC NPC NP NPP\n"); fprintf (File, "------------------------\n"); for (i = 0; i < (Templates->Templates)->NumClasses; i++) { IClass = Templates->Templates->Class[i]; AClass = Templates->Class[i]; - - fprintf (File, "%5d %s %3d %3d %3d %3d\n", - i, unicharset.id_to_unichar(Templates->Templates->ClassIdFor[i]), + if (!IsEmptyAdaptedClass (AClass)) { + fprintf (File, "%5d %s %3d %3d %3d %3d\n", + i, unicharset.id_to_unichar(i), IClass->NumConfigs, AClass->NumPermConfigs, IClass->NumProtos, IClass->NumProtos - count (AClass->TempProtos)); + } } #endif fprintf (File, "\n"); } /* PrintAdaptedTemplates */ +} // namespace tesseract /*---------------------------------------------------------------------------*/ ADAPT_CLASS ReadAdaptedClass(FILE *File) { /* - ** Parameters: - ** File open file to read adapted class from - ** Globals: none - ** Operation: Read an adapted class description from File and return - ** a ptr to the adapted class. - ** Return: Ptr to new adapted class. - ** Exceptions: none - ** History: Tue Mar 19 14:11:01 1991, DSJ, Created. + ** Parameters: + ** File open file to read adapted class from + ** Globals: none + ** Operation: Read an adapted class description from File and return + ** a ptr to the adapted class. + ** Return: Ptr to new adapted class. + ** Exceptions: none + ** History: Tue Mar 19 14:11:01 1991, DSJ, Created. */ int NumTempProtos; int NumConfigs; @@ -328,16 +338,17 @@ ADAPT_CLASS ReadAdaptedClass(FILE *File) { /*---------------------------------------------------------------------------*/ -ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File) { +namespace tesseract { +ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(FILE *File) { /* - ** Parameters: - ** File open text file to read adapted templates from - ** Globals: none - ** Operation: Read a set of adapted templates from File and return - ** a ptr to the templates. - ** Return: Ptr to adapted templates read from File. - ** Exceptions: none - ** History: Mon Mar 18 15:18:10 1991, DSJ, Created. + ** Parameters: + ** File open text file to read adapted templates from + ** Globals: none + ** Operation: Read a set of adapted templates from File and return + ** a ptr to the templates. + ** Return: Ptr to adapted templates read from File. + ** Exceptions: none + ** History: Mon Mar 18 15:18:10 1991, DSJ, Created. */ int i; ADAPT_TEMPLATES Templates; @@ -347,7 +358,7 @@ ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File) { fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File); /* then read in the basic integer templates */ - Templates->Templates = ReadIntTemplates (File, FALSE); + Templates->Templates = ReadIntTemplates (File); /* then read in the adaptive info for each class */ for (i = 0; i < (Templates->Templates)->NumClasses; i++) { @@ -356,25 +367,26 @@ ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File) { return (Templates); } /* ReadAdaptedTemplates */ +} // namespace tesseract /*---------------------------------------------------------------------------*/ PERM_CONFIG ReadPermConfig(FILE *File) { /* - ** Parameters: - ** File open file to read permanent config from - ** Globals: none - ** Operation: Read a permanent configuration description from File - ** and return a ptr to it. - ** Return: Ptr to new permanent configuration description. - ** Exceptions: none - ** History: Tue Mar 19 14:25:26 1991, DSJ, Created. + ** Parameters: + ** File open file to read permanent config from + ** Globals: none + ** Operation: Read a permanent configuration description from File + ** and return a ptr to it. + ** Return: Ptr to new permanent configuration description. + ** Exceptions: none + ** History: Tue Mar 19 14:25:26 1991, DSJ, Created. */ PERM_CONFIG Config; uinT8 NumAmbigs; fread ((char *) &NumAmbigs, sizeof (uinT8), 1, File); - Config = (PERM_CONFIG) Emalloc (sizeof (char) * (NumAmbigs + 1)); + Config = (PERM_CONFIG) Emalloc (sizeof (UNICHAR_ID) * (NumAmbigs + 1)); fread (Config, sizeof (UNICHAR_ID), NumAmbigs, File); Config[NumAmbigs] = -1; @@ -386,14 +398,14 @@ PERM_CONFIG ReadPermConfig(FILE *File) { /*---------------------------------------------------------------------------*/ TEMP_CONFIG ReadTempConfig(FILE *File) { /* - ** Parameters: - ** File open file to read temporary config from - ** Globals: none - ** Operation: Read a temporary configuration description from File - ** and return a ptr to it. - ** Return: Ptr to new temporary configuration description. - ** Exceptions: none - ** History: Tue Mar 19 14:29:59 1991, DSJ, Created. + ** Parameters: + ** File open file to read temporary config from + ** Globals: none + ** Operation: Read a temporary configuration description from File + ** and return a ptr to it. + ** Return: Ptr to new temporary configuration description. + ** Exceptions: none + ** History: Tue Mar 19 14:29:59 1991, DSJ, Created. */ TEMP_CONFIG Config; @@ -414,16 +426,16 @@ TEMP_CONFIG ReadTempConfig(FILE *File) { /*---------------------------------------------------------------------------*/ void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) { /* - ** Parameters: - ** File open file to write Class to - ** Class adapted class to write to File - ** NumConfigs number of configs in Class - ** Globals: none - ** Operation: This routine writes a binary representation of Class - ** to File. - ** Return: none - ** Exceptions: none - ** History: Tue Mar 19 13:33:51 1991, DSJ, Created. + ** Parameters: + ** File open file to write Class to + ** Class adapted class to write to File + ** NumConfigs number of configs in Class + ** Globals: none + ** Operation: This routine writes a binary representation of Class + ** to File. + ** Return: none + ** Exceptions: none + ** History: Tue Mar 19 13:33:51 1991, DSJ, Created. */ int NumTempProtos; LIST TempProtos; @@ -459,16 +471,17 @@ void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) { /*---------------------------------------------------------------------------*/ -void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { +namespace tesseract { +void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { /* - ** Parameters: - ** File open text file to write Templates to - ** Templates set of adapted templates to write to File - ** Globals: none - ** Operation: This routine saves Templates to File in a binary format. - ** Return: none - ** Exceptions: none - ** History: Mon Mar 18 15:07:32 1991, DSJ, Created. + ** Parameters: + ** File open text file to write Templates to + ** Templates set of adapted templates to write to File + ** Globals: none + ** Operation: This routine saves Templates to File in a binary format. + ** Return: none + ** Exceptions: none + ** History: Mon Mar 18 15:07:32 1991, DSJ, Created. */ int i; @@ -484,20 +497,21 @@ void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { Templates->Templates->Class[i]->NumConfigs); } } /* WriteAdaptedTemplates */ +} // namespace tesseract /*---------------------------------------------------------------------------*/ void WritePermConfig(FILE *File, PERM_CONFIG Config) { /* - ** Parameters: - ** File open file to write Config to - ** Config permanent config to write to File - ** Globals: none - ** Operation: This routine writes a binary representation of a - ** permanent configuration to File. - ** Return: none - ** Exceptions: none - ** History: Tue Mar 19 13:55:44 1991, DSJ, Created. + ** Parameters: + ** File open file to write Config to + ** Config permanent config to write to File + ** Globals: none + ** Operation: This routine writes a binary representation of a + ** permanent configuration to File. + ** Return: none + ** Exceptions: none + ** History: Tue Mar 19 13:55:44 1991, DSJ, Created. */ uinT8 NumAmbigs = 0; @@ -514,15 +528,15 @@ void WritePermConfig(FILE *File, PERM_CONFIG Config) { /*---------------------------------------------------------------------------*/ void WriteTempConfig(FILE *File, TEMP_CONFIG Config) { /* - ** Parameters: - ** File open file to write Config to - ** Config temporary config to write to File - ** Globals: none - ** Operation: This routine writes a binary representation of a - ** temporary configuration to File. - ** Return: none - ** Exceptions: none - ** History: Tue Mar 19 14:00:28 1991, DSJ, Created. + ** Parameters: + ** File open file to write Config to + ** Config temporary config to write to File + ** Globals: none + ** Operation: This routine writes a binary representation of a + ** temporary configuration to File. + ** Return: none + ** Exceptions: none + ** History: Tue Mar 19 14:00:28 1991, DSJ, Created. */ assert (Config != NULL); /* contexts not yet implemented */ diff --git a/classify/adaptive.h b/classify/adaptive.h index ae5376a21..ff21c65c5 100644 --- a/classify/adaptive.h +++ b/classify/adaptive.h @@ -68,6 +68,7 @@ typedef ADAPT_CLASS_STRUCT *ADAPT_CLASS; typedef struct { INT_TEMPLATES Templates; + int NumNonEmptyClasses; uinT8 NumPermClasses; uinT8 dummy[3]; ADAPT_CLASS Class[MAX_NUM_CLASSES]; @@ -77,6 +78,11 @@ typedef ADAPT_TEMPLATES_STRUCT *ADAPT_TEMPLATES; /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ +#define NumNonEmptyClassesIn(Template) ((Template)->NumNonEmptyClasses) + +#define IsEmptyAdaptedClass(Class) ((Class)->NumPermConfigs == 0 && \ +(Class)->TempProtos == NIL) + #define ConfigIsPermanent(Class,ConfigId) \ (test_bit ((Class)->PermConfigs, ConfigId)) @@ -95,7 +101,7 @@ typedef ADAPT_TEMPLATES_STRUCT *ADAPT_TEMPLATES; #define IncreaseConfidence(TempConfig) \ ((TempConfig)->NumTimesSeen++) -int AddAdaptedClass(ADAPT_TEMPLATES Templates, +void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId); @@ -107,93 +113,22 @@ ADAPT_CLASS NewAdaptedClass(); void free_adapted_class(ADAPT_CLASS adapt_class); -ADAPT_TEMPLATES NewAdaptedTemplates(); - void free_adapted_templates(ADAPT_TEMPLATES templates); TEMP_CONFIG NewTempConfig(int MaxProtoId); TEMP_PROTO NewTempProto(); -void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); - ADAPT_CLASS ReadAdaptedClass(FILE *File); -ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File); - PERM_CONFIG ReadPermConfig(FILE *File); TEMP_CONFIG ReadTempConfig(FILE *File); void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs); -void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); - void WritePermConfig(FILE *File, PERM_CONFIG Config); void WriteTempConfig(FILE *File, TEMP_CONFIG Config); -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* adaptive.c -int AddAdaptedClass - _ARGS((ADAPT_TEMPLATES Templates, - ADAPT_CLASS Class, - CLASS_ID ClassId)); - -void FreeTempConfig - _ARGS((TEMP_CONFIG Config)); - -ADAPT_CLASS NewAdaptedClass - _ARGS((void)); - -ADAPT_TEMPLATES NewAdaptedTemplates - _ARGS((void)); - -TEMP_CONFIG NewTempConfig - _ARGS((int MaxProtoId)); - -TEMP_PROTO NewTempProto - _ARGS((void)); - -void PrintAdaptedTemplates - _ARGS((FILE *File, - ADAPT_TEMPLATES Templates)); - -ADAPT_CLASS ReadAdaptedClass - _ARGS((FILE *File)); - -ADAPT_TEMPLATES ReadAdaptedTemplates - _ARGS((FILE *File)); - -PERM_CONFIG ReadPermConfig - _ARGS((FILE *File)); - -TEMP_CONFIG ReadTempConfig - _ARGS((FILE *File)); - -void WriteAdaptedClass - _ARGS((FILE *File, - ADAPT_CLASS Class, - int NumConfigs)); - -void WriteAdaptedTemplates - _ARGS((FILE *File, - ADAPT_TEMPLATES Templates)); - -void WritePermConfig - _ARGS((FILE *File, - PERM_CONFIG Config)); - -void WriteTempConfig - _ARGS((FILE *File, - TEMP_CONFIG Config)); - -#undef _ARGS -*/ #endif diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index 3e7cfa0ca..fc8712b63 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -28,11 +28,9 @@ #include "outfeat.h" #include "emalloc.h" #include "intfx.h" -#include "permnum.h" #include "speckle.h" #include "efio.h" #include "normmatch.h" -#include "stopper.h" #include "permute.h" #include "context.h" #include "ndminx.h" @@ -42,6 +40,9 @@ #include "werd.h" #include "callcpp.h" #include "tordvars.h" +#include "varable.h" +#include "classify.h" +#include "unicharset.h" #include #include @@ -53,24 +54,23 @@ #endif #define ADAPT_TEMPLATE_SUFFIX ".a" -#define BUILT_IN_TEMPLATES_FILE "inttemp" -#define BUILT_IN_CUTOFFS_FILE "pffmtable" #define MAX_MATCHES 10 #define UNLIKELY_NUM_FEAT 200 #define NO_DEBUG 0 #define MAX_ADAPTABLE_WERD_SIZE 40 -#define ADAPTABLE_WERD (GOOD_NUMBER + 0.05) +#define ADAPTABLE_WERD (GOOD_WERD + 0.05) #define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT) #define WORST_POSSIBLE_RATING (1.0) -typedef struct +struct ADAPT_RESULTS { inT32 BlobLength; int NumMatches; + bool HasNonfragment; CLASS_ID Classes[MAX_NUM_CLASSES]; FLOAT32 Ratings[MAX_CLASS_ID + 1]; uinT8 Configs[MAX_CLASS_ID + 1]; @@ -78,10 +78,23 @@ typedef struct CLASS_ID BestClass; uinT8 BestConfig; CLASS_PRUNER_RESULTS CPResults; -} + + // Initializes data members to the default values. Sets the initial + // rating of each class to be the worst possible rating (1.0). + inline void Initialize() { + BlobLength = MAX_INT32; + NumMatches = 0; + HasNonfragment = false; + BestRating = WORST_POSSIBLE_RATING; + BestClass = NO_CLASS; + BestConfig = 0; + for (int i = 0; i <= MAX_CLASS_ID; ++i) { + Ratings[i] = WORST_POSSIBLE_RATING; + } + } +}; -ADAPT_RESULTS; typedef struct { @@ -97,10 +110,10 @@ PROTO_KEY; Private Macros ----------------------------------------------------------------------------**/ #define MarginalMatch(Rating) \ -((Rating) > GreatAdaptiveMatch) +((Rating) > matcher_great_threshold) #define TempConfigReliable(Config) \ -((Config)->NumTimesSeen >= ReliableConfigThreshold) +((Config)->NumTimesSeen >= matcher_min_examples_for_prototyping) #define InitIntFX() (FeaturesHaveBeenExtracted = FALSE) @@ -117,11 +130,6 @@ void AdaptToPunc(TBLOB *Blob, CLASS_ID ClassId, FLOAT32 Threshold); -void AddNewResult(ADAPT_RESULTS *Results, - CLASS_ID ClassId, - FLOAT32 Rating, - int ConfigId); - void AmbigClassifier(TBLOB *Blob, LINE_STATS *LineStats, INT_TEMPLATES Templates, @@ -140,14 +148,13 @@ void CharNormClassifier(TBLOB *Blob, INT_TEMPLATES Templates, ADAPT_RESULTS *Results); -void ClassifyAsNoise(TBLOB *Blob, - LINE_STATS *LineStats, - ADAPT_RESULTS *Results); +void ClassifyAsNoise(ADAPT_RESULTS *Results); int CompareCurrentRatings(const void *arg1, const void *arg2); -LIST ConvertMatchesToChoices(ADAPT_RESULTS *Results); +void ConvertMatchesToChoices(ADAPT_RESULTS *Results, + BLOB_CHOICE_LIST *Choices); void DebugAdaptiveClassifier(TBLOB *Blob, LINE_STATS *LineStats, @@ -166,6 +173,7 @@ UNICHAR_ID *GetAmbiguities(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID CorrectClass); +namespace tesseract { int GetBaselineFeatures(TBLOB *Blob, LINE_STATS *LineStats, INT_TEMPLATES Templates, @@ -173,14 +181,6 @@ int GetBaselineFeatures(TBLOB *Blob, CLASS_NORMALIZATION_ARRAY CharNormArray, inT32 *BlobLength); -FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId); - -int GetCharNormFeatures(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength); int GetIntBaselineFeatures(TBLOB *Blob, LINE_STATS *LineStats, @@ -189,21 +189,10 @@ int GetIntBaselineFeatures(TBLOB *Blob, CLASS_NORMALIZATION_ARRAY CharNormArray, inT32 *BlobLength); -int GetIntCharNormFeatures(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength); +} // namespace tesseract. void InitMatcherRatings(register FLOAT32 *Rating); -int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int NumFeatures, - INT_FEATURE_ARRAY Features, - FEATURE_SET FloatFeatures); - PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], @@ -239,10 +228,6 @@ void ShowBestMatchFor(TBLOB *Blob, /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ -/* name of current image file being processed */ -extern char imagefile[]; -INT_VAR(tessedit_single_match, FALSE, "Top choice only from CP"); - /* variables used to hold performance statistics */ static int AdaptiveMatcherCalls = 0; static int BaselineClassifierCalls = 0; @@ -271,215 +256,178 @@ comparison function passes to qsort can get at them */ static FLOAT32 *CurrentRatings; /* define globals to hold filenames of training data */ -static const char *BuiltInTemplatesFile = BUILT_IN_TEMPLATES_FILE; -static const char *BuiltInCutoffsFile = BUILT_IN_CUTOFFS_FILE; static CLASS_CUTOFF_ARRAY CharNormCutoffs; static CLASS_CUTOFF_ARRAY BaselineCutoffs; -/* use global variables to hold onto built-in templates and adapted -templates */ -static INT_TEMPLATES PreTrainedTemplates; -static ADAPT_TEMPLATES AdaptedTemplates; - -/* create dummy proto and config masks for use with the built-in templates */ -static BIT_VECTOR AllProtosOn; -static BIT_VECTOR PrunedProtos; -static BIT_VECTOR AllConfigsOn; -static BIT_VECTOR AllProtosOff; -static BIT_VECTOR AllConfigsOff; -static BIT_VECTOR TempProtoMask; - /* define control knobs for adaptive matcher */ -make_toggle_const(EnableAdaptiveMatcher, 1, MakeEnableAdaptiveMatcher); -/* PREV DEFAULT 0 */ +BOOL_VAR(classify_enable_adaptive_matcher, 1, "Enable adaptive classifier"); -make_toggle_const(UsePreAdaptedTemplates, 0, MakeUsePreAdaptedTemplates); -make_toggle_const(SaveAdaptedTemplates, 0, MakeSaveAdaptedTemplates); +BOOL_VAR(classify_use_pre_adapted_templates, 0, + "Use pre-adapted classifier templates"); -make_toggle_var(EnableAdaptiveDebugger, 0, MakeEnableAdaptiveDebugger, -18, 1, SetEnableAdaptiveDebugger, "Enable match debugger"); +BOOL_VAR(classify_save_adapted_templates, 0, + "Save adapted templates to a file"); -make_int_var(MatcherDebugLevel, 0, MakeMatcherDebugLevel, -18, 2, SetMatcherDebugLevel, "Matcher Debug Level: "); +BOOL_VAR(classify_enable_adaptive_debugger, 0, "Enable match debugger"); -make_int_var(MatchDebugFlags, 0, MakeMatchDebugFlags, -18, 3, SetMatchDebugFlags, "Matcher Debug Flags: "); +INT_VAR(matcher_debug_level, 0, "Matcher Debug Level"); +INT_VAR(matcher_debug_flags, 0, "Matcher Debug Flags"); -make_toggle_var(EnableLearning, 1, MakeEnableLearning, -18, 4, SetEnableLearning, "Enable learning"); -/* PREV DEFAULT 0 */ - /*record it for multiple pages */ -static int old_enable_learning = 1; +INT_VAR(classify_learning_debug_level, 0, "Learning Debug Level: "); -make_int_var(LearningDebugLevel, 0, MakeLearningDebugLevel, -18, 5, SetLearningDebugLevel, "Learning Debug Level: "); +double_VAR(matcher_good_threshold, 0.125, "Good Match (0-1)"); +double_VAR(matcher_great_threshold, 0.0, "Great Match (0-1)"); -make_float_var(GoodAdaptiveMatch, 0.125, MakeGoodAdaptiveMatch, -18, 6, SetGoodAdaptiveMatch, "Good Match (0-1): "); +double_VAR(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)"); +double_VAR(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)"); +double_VAR(matcher_rating_margin, 0.1, "New template margin (0-1)"); +double_VAR(matcher_avg_noise_size, 12.0, "Avg. noise blob length: "); -make_float_var(GreatAdaptiveMatch, 0.0, MakeGreatAdaptiveMatch, -18, 7, SetGreatAdaptiveMatch, "Great Match (0-1): "); -/* PREV DEFAULT 0.10 */ +INT_VAR(matcher_permanent_classes_min, 1, "Min # of permanent classes"); -make_float_var(PerfectRating, 0.02, MakePerfectRating, -18, 8, SetPerfectRating, "Perfect Match (0-1): "); +INT_VAR(matcher_min_examples_for_prototyping, 3, "Reliable Config Threshold"); -make_float_var(BadMatchPad, 0.15, MakeBadMatchPad, -18, 9, SetBadMatchPad, "Bad Match Pad (0-1): "); +double_VAR(matcher_clustering_max_angle_delta, 0.015, + "Maximum angle delta for prototype clustering"); -make_float_var(RatingMargin, 0.1, MakeRatingMargin, -18, 10, SetRatingMargin, "New template margin (0-1): "); +BOOL_VAR(classify_enable_int_fx, 1, "Enable integer fx"); -make_float_var(NoiseBlobLength, 12.0, MakeNoiseBlobLength, -18, 11, SetNoiseBlobLength, "Avg. noise blob length: "); +BOOL_VAR(classify_enable_new_adapt_rules, 1, "Enable new adaptation rules"); -make_int_var(MinNumPermClasses, 1, MakeMinNumPermClasses, -18, 12, SetMinNumPermClasses, "Min # of permanent classes: "); -/* PREV DEFAULT 200 */ +double_VAR(rating_scale, 1.5, "Rating scaling factor"); +extern double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor"); -make_int_var(ReliableConfigThreshold, 2, MakeReliableConfigThreshold, -18, 13, SetReliableConfigThreshold, -"Reliable Config Threshold: "); +INT_VAR(matcher_failed_adaptations_before_reset, 150, + "Number of failed adaptions before adapted templates reset"); -make_float_var(MaxAngleDelta, 0.015, MakeMaxAngleDelta, -18, 14, SetMaxAngleDelta, -"Maximum angle delta for proto clustering: "); - -make_toggle_var(EnableIntFX, 1, MakeEnableIntFX, -18, 15, SetEnableIntFX, "Enable integer fx"); -/* PREV DEFAULT 0 */ - -make_toggle_var(EnableNewAdaptRules, 1, MakeEnableNewAdaptRules, -18, 16, SetEnableNewAdaptRules, -"Enable new adaptation rules"); -/* PREV DEFAULT 0 */ - -make_float_var(RatingScale, 1.5, MakeRatingScale, -18, 17, SetRatingScale, "Rating scale: "); - -make_float_var(CertaintyScale, 20.0, MakeCertaintyScale, -18, 18, SetCertaintyScale, "CertaintyScale: "); - -make_int_var(FailedAdaptionsBeforeReset, 150, MakeFailedAdaptionsBeforeReset, -18, 19, SetFailedAdaptionsBeforeReset, -"Number of failed adaptions before adapted templates reset: "); double_VAR(tessedit_class_miss_scale, 0.00390625, "Scale factor for features not used"); -int tess_cn_matching = 0; -int tess_bn_matching = 0; +BOOL_VAR(tess_cn_matching, 0, "Character Normalized Matching"); +BOOL_VAR(tess_bn_matching, 0, "Baseline Normalized Matching"); /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -LIST AdaptiveClassifier(TBLOB *Blob, TBLOB *DotBlob, TEXTROW *Row) { +namespace tesseract { +void Classify::AdaptiveClassifier(TBLOB *Blob, + TBLOB *DotBlob, + TEXTROW *Row, + BLOB_CHOICE_LIST *Choices, + CLASS_PRUNER_RESULTS CPResults) { /* - ** Parameters: - ** Blob blob to be classified - ** DotBlob (obsolete) - ** Row row of text that word appears in - ** Globals: - ** CurrentRatings - used by compare function for qsort -** Operation: This routine calls the adaptive matcher which returns -** (in an array) the class id of each class matched. It also -** returns the number of classes matched. -** For each class matched it places the best rating -** found for that class into the Ratings array. -** Bad matches are then removed so that they don't need to be -** sorted. The remaining good matches are then sorted and -** converted to choices. -** This routine also performs some simple speckle filtering. -** Return: List of choices found by adaptive matcher. -** Exceptions: none -** History: Mon Mar 11 10:00:58 1991, DSJ, Created. -*/ - LIST Choices; - ADAPT_RESULTS* Results = new ADAPT_RESULTS; + ** Parameters: Blob blob to be classified + ** DotBlob (obsolete) + ** Row row of text that word appears in + ** Globals: CurrentRatings used by compare function for qsort + ** Operation: This routine calls the adaptive matcher + ** which returns (in an array) the class id of each + ** class matched. + ** It also returns the number of classes matched. + ** For each class matched it places the best rating + ** found for that class into the Ratings array. + ** Bad matches are then removed so that they don't + ** need to be sorted. The remaining good matches are + ** then sorted and converted to choices. + ** This routine also performs some simple speckle + ** filtering. + ** Return: Choices List of choices found by adaptive matcher. + ** CPResults Array of CPResultStruct of size MAX_NUM_CLASSES is + ** filled on return with the choices found by the + ** class pruner and the ratings therefrom. Also + ** contains the detailed results of the integer matcher. + ** Exceptions: none + ** History: Mon Mar 11 10:00:58 1991, DSJ, Created. + */ + assert(Choices != NULL); + ADAPT_RESULTS *Results = new ADAPT_RESULTS(); LINE_STATS LineStats; - if (FailedAdaptionsBeforeReset >= 0 && - NumAdaptationsFailed >= FailedAdaptionsBeforeReset) { + if (matcher_failed_adaptations_before_reset >= 0 && + NumAdaptationsFailed >= matcher_failed_adaptations_before_reset) { NumAdaptationsFailed = 0; ResetAdaptiveClassifier(); } if (AdaptedTemplates == NULL) - AdaptedTemplates = NewAdaptedTemplates (); + AdaptedTemplates = NewAdaptedTemplates (true); + EnterClassifyMode; - Results->BlobLength = MAX_INT32; - Results->NumMatches = 0; - Results->BestRating = WORST_POSSIBLE_RATING; - Results->BestClass = NO_CLASS; - Results->BestConfig = 0; + Results->Initialize(); GetLineStatsFromRow(Row, &LineStats); - InitMatcherRatings (Results->Ratings); DoAdaptiveMatch(Blob, &LineStats, Results); + if (CPResults != NULL) + memcpy(CPResults, Results->CPResults, + sizeof(CPResults[0]) * Results->NumMatches); RemoveBadMatches(Results); /* save ratings in a global so that CompareCurrentRatings() can see them */ CurrentRatings = Results->Ratings; - qsort((void*) (Results->Classes), Results->NumMatches, + qsort ((void *) (Results->Classes), Results->NumMatches, sizeof (CLASS_ID), CompareCurrentRatings); - RemoveExtraPuncs(Results); - Choices = ConvertMatchesToChoices(Results); - if (MatcherDebugLevel >= 1) { + RemoveExtraPuncs(Results); + ConvertMatchesToChoices(Results, Choices); + + if (matcher_debug_level >= 1) { cprintf ("AD Matches = "); PrintAdaptiveMatchResults(stdout, Results); } if (LargeSpeckle (Blob, Row)) - Choices = AddLargeSpeckleTo (Choices); + AddLargeSpeckleTo(Choices); #ifndef GRAPHICS_DISABLED - if (EnableAdaptiveDebugger) + if (classify_enable_adaptive_debugger) DebugAdaptiveClassifier(Blob, &LineStats, Results); #endif - NumClassesOutput += count (Choices); - if (Choices == NIL) { - char empty_lengths[] = {0}; + NumClassesOutput += Choices->length(); + if (Choices->length() == 0) { if (!bln_numericmode) - tprintf ("Nil classification!\n"); // Should never normally happen. - return (append_choice (NIL, "", empty_lengths, 50.0f, -20.0f, -1)); + tprintf ("Empty classification!\n"); // Should never normally happen. + Choices = new BLOB_CHOICE_LIST(); + BLOB_CHOICE_IT temp_it; + temp_it.set_to_list(Choices); + temp_it.add_to_end(new BLOB_CHOICE(0, 50.0f, -20.0f, -1, NULL)); } delete Results; - return Choices; } /* AdaptiveClassifier */ /*---------------------------------------------------------------------------*/ -void AdaptToWord(TWERD *Word, - TEXTROW *Row, - const WERD_CHOICE& BestChoice, - const WERD_CHOICE& BestRawChoice, - const char *rejmap) { +void Classify::AdaptToWord(TWERD *Word, + TEXTROW *Row, + const WERD_CHOICE& BestChoice, + const WERD_CHOICE& BestRawChoice, + const char *rejmap) { /* ** Parameters: ** Word - word to be adapted to -** Row - row of text that word is found in -** BestChoice - best choice for word found by system -** BestRawChoice - best choice for word found by classifier only -** Globals: -** EnableLearning - TRUE if learning is enabled -** Operation: This routine implements a preliminary version of the -** rules which are used to decide which characters to adapt to. -** A word is adapted to if it is in the dictionary or if it -** is a "good" number (no trailing units, etc.). It cannot -** contain broken or merged characters. Within that word, only -** letters and digits are adapted to (no punctuation). -** Return: none -** Exceptions: none -** History: Thu Mar 14 07:40:36 1991, DSJ, Created. + ** word to be adapted to + ** Row + ** row of text that word is found in + ** BestChoice + ** best choice for word found by system + ** BestRawChoice + ** best choice for word found by classifier only + ** Globals: + ** EnableLearning + ** TRUE if learning is enabled + ** Operation: This routine implements a preliminary + ** version of the rules which are used to decide + ** which characters to adapt to. + ** A word is adapted to if it is in the dictionary or + ** if it is a "good" number (no trailing units, etc.). + ** It cannot contain broken or merged characters. + ** Within that word, only letters and digits are + ** adapted to (no punctuation). + ** Return: none + ** Exceptions: none + ** History: Thu Mar 14 07:40:36 1991, DSJ, Created. */ TBLOB *Blob; LINE_STATS LineStats; @@ -487,8 +435,8 @@ void AdaptToWord(TWERD *Word, FLOAT32 *Threshold; const char *map = rejmap; char map_char = '1'; - const char* BestChoice_string = BestChoice.string().string(); - const char* BestChoice_lengths = BestChoice.lengths().string(); + const char* BestChoice_string = BestChoice.unichar_string().string(); + const char* BestChoice_lengths = BestChoice.unichar_lengths().string(); if (strlen(BestChoice_lengths) > MAX_ADAPTABLE_WERD_SIZE) return; @@ -497,8 +445,9 @@ void AdaptToWord(TWERD *Word, NumWordsAdaptedTo++; #ifndef SECURE_NAMES - if (LearningDebugLevel >= 1) - cprintf ("\n\nAdapting to word = %s\n", BestChoice.string().string()); + if (classify_learning_debug_level >= 1) + cprintf ("\n\nAdapting to word = %s\n", + BestChoice.debug_string(unicharset).string()); #endif GetLineStatsFromRow(Row, &LineStats); @@ -545,14 +494,14 @@ void AdaptToWord(TWERD *Word, *(BestChoice_lengths - 1)))) || (!il1_adaption_test && NumOutlinesInBlob(Blob) != 2))) { - if (LearningDebugLevel >= 1) + if (classify_learning_debug_level >= 1) cprintf ("Rejecting char = %s\n", unicharset.id_to_unichar( unicharset.unichar_to_id(BestChoice_string, *BestChoice_lengths))); } else { #ifndef SECURE_NAMES - if (LearningDebugLevel >= 1) + if (classify_learning_debug_level >= 1) cprintf ("Adapting to char = %s, thr= %g\n", unicharset.id_to_unichar( unicharset.unichar_to_id(BestChoice_string, @@ -572,43 +521,43 @@ void AdaptToWord(TWERD *Word, // *Threshold); } } - if (LearningDebugLevel >= 1) + if (classify_learning_debug_level >= 1) cprintf ("\n"); } } /* AdaptToWord */ /*---------------------------------------------------------------------------*/ -void EndAdaptiveClassifier() { +void Classify::EndAdaptiveClassifier() { /* ** Parameters: none ** Globals: ** AdaptedTemplates - current set of adapted templates -** SaveAdaptedTemplates - TRUE if templates should be saved -** EnableAdaptiveMatcher - TRUE if adaptive matcher is enabled -** Operation: This routine performs cleanup operations on the -** adaptive classifier. It should be called before the -** program is terminated. Its main function is to save -** the adapted templates to a file. -** Return: none -** Exceptions: none -** History: Tue Mar 19 14:37:06 1991, DSJ, Created. + ** current set of adapted templates + ** classify_save_adapted_templates + ** TRUE if templates should be saved + ** classify_enable_adaptive_matcher + ** TRUE if adaptive matcher is enabled + ** Operation: This routine performs cleanup operations + ** on the adaptive classifier. It should be called + ** before the program is terminated. Its main function + ** is to save the adapted templates to a file. + ** Return: none + ** Exceptions: none + ** History: Tue Mar 19 14:37:06 1991, DSJ, Created. */ - char Filename[256]; + STRING Filename; FILE *File; #ifndef SECURE_NAMES - if (EnableAdaptiveMatcher && SaveAdaptedTemplates) { - strcpy(Filename, imagefile); - strcat(Filename, ADAPT_TEMPLATE_SUFFIX); - File = fopen (Filename, "wb"); + if (AdaptedTemplates != NULL && + classify_enable_adaptive_matcher && classify_save_adapted_templates) { + Filename = imagefile + ADAPT_TEMPLATE_SUFFIX; + File = fopen (Filename.string(), "wb"); if (File == NULL) - cprintf ("Unable to save adapted templates to %s!\n", Filename); + cprintf ("Unable to save adapted templates to %s!\n", Filename.string()); else { - cprintf ("\nSaving adapted templates to %s ...", Filename); + cprintf ("\nSaving adapted templates to %s ...", Filename.string()); fflush(stdout); WriteAdaptedTemplates(File, AdaptedTemplates); cprintf ("\n"); @@ -616,91 +565,93 @@ void EndAdaptiveClassifier() { } } #endif - if (PreTrainedTemplates == NULL) - return; // This function isn't safe to run twice. - EndDangerousAmbigs(); + + if (AdaptedTemplates != NULL) { + free_adapted_templates(AdaptedTemplates); + AdaptedTemplates = NULL; + } + + if (PreTrainedTemplates != NULL) { + free_int_templates(PreTrainedTemplates); + PreTrainedTemplates = NULL; + } + getDict().EndDangerousAmbigs(); FreeNormProtos(); - free_int_templates(PreTrainedTemplates); - PreTrainedTemplates = NULL; - FreeBitVector(AllProtosOn); - FreeBitVector(PrunedProtos); - FreeBitVector(AllConfigsOn); - FreeBitVector(AllProtosOff); - FreeBitVector(AllConfigsOff); - FreeBitVector(TempProtoMask); - AllProtosOn = NULL; - PrunedProtos = NULL; - AllConfigsOn = NULL; - AllProtosOff = NULL; - AllConfigsOff = NULL; - TempProtoMask = NULL; + if (AllProtosOn != NULL) { + FreeBitVector(AllProtosOn); + FreeBitVector(PrunedProtos); + FreeBitVector(AllConfigsOn); + FreeBitVector(AllProtosOff); + FreeBitVector(AllConfigsOff); + FreeBitVector(TempProtoMask); + AllProtosOn = NULL; + PrunedProtos = NULL; + AllConfigsOn = NULL; + AllProtosOff = NULL; + AllConfigsOff = NULL; + TempProtoMask = NULL; + } } /* EndAdaptiveClassifier */ /*---------------------------------------------------------------------------*/ -void InitAdaptiveClassifier() { +void Classify::InitAdaptiveClassifier() { /* ** Parameters: none ** Globals: ** BuiltInTemplatesFile - file to get built-in temps from -** BuiltInCutoffsFile - file to get avg. feat per class from -** PreTrainedTemplates - pre-trained configs and protos -** AdaptedTemplates - templates adapted to current page -** CharNormCutoffs - avg # of features per class -** AllProtosOn - dummy proto mask with all bits 1 -** AllConfigsOn - dummy config mask with all bits 1 -** UsePreAdaptedTemplates - enables use of pre-adapted templates -** Operation: This routine reads in the training information needed -** by the adaptive classifier and saves it into global -** variables. -** Return: none -** Exceptions: none -** History: Mon Mar 11 12:49:34 1991, DSJ, Created. + ** file to get built-in temps from + ** BuiltInCutoffsFile + ** file to get avg. feat per class from + ** PreTrainedTemplates + ** pre-trained configs and protos + ** AdaptedTemplates + ** templates adapted to current page + ** CharNormCutoffs + ** avg # of features per class + ** AllProtosOn + ** dummy proto mask with all bits 1 + ** AllConfigsOn + ** dummy config mask with all bits 1 + ** classify_use_pre_adapted_templates + ** enables use of pre-adapted templates + ** Operation: This routine reads in the training + ** information needed by the adaptive classifier + ** and saves it into global variables. + ** Return: none + ** Exceptions: none + ** History: Mon Mar 11 12:49:34 1991, DSJ, Created. */ - int i; - FILE *File; - STRING Filename; - - if (!EnableAdaptiveMatcher) + if (!classify_enable_adaptive_matcher) return; - if (PreTrainedTemplates != NULL) + if (AllProtosOn != NULL) EndAdaptiveClassifier(); // Don't leak with multiple inits. - Filename = language_data_path_prefix; - Filename += BuiltInTemplatesFile; - #ifndef SECURE_NAMES - // cprintf( "\nReading built-in templates from %s ...", - // Filename); - fflush(stdout); - #endif + // If there is no language_data_path_prefix, the classifier will be + // adaptive only. + if (language_data_path_prefix.length() > 0) { + if (!tessdata_manager.SeekToStart(TESSDATA_INTTEMP)) { + inttemp_loaded_ = false; + } else { + PreTrainedTemplates = + ReadIntTemplates(tessdata_manager.GetDataFilePtr()); + if (global_tessdata_manager_debug_level) tprintf("Loaded inttemp\n"); - #ifdef __UNIX__ - File = Efopen (Filename.string(), "r"); - #else - File = Efopen (Filename.string(), "rb"); - #endif - PreTrainedTemplates = ReadIntTemplates (File, TRUE); - fclose(File); + ASSERT_HOST(tessdata_manager.SeekToStart(TESSDATA_PFFMTABLE)); + ReadNewCutoffs(tessdata_manager.GetDataFilePtr(), + tessdata_manager.GetEndOffset(TESSDATA_PFFMTABLE), + CharNormCutoffs); + if (global_tessdata_manager_debug_level) tprintf("Loaded pffmtable\n"); - Filename = language_data_path_prefix; - Filename += BuiltInCutoffsFile; - #ifndef SECURE_NAMES - // cprintf( "\nReading built-in pico-feature cutoffs from %s ...", - // Filename); - fflush(stdout); - #endif - ReadNewCutoffs (Filename.string(), PreTrainedTemplates->IndexFor, - CharNormCutoffs); + ASSERT_HOST(tessdata_manager.SeekToStart(TESSDATA_NORMPROTO)); + NormProtos = + ReadNormProtos(tessdata_manager.GetDataFilePtr(), + tessdata_manager.GetEndOffset(TESSDATA_NORMPROTO)); + if (global_tessdata_manager_debug_level) tprintf("Loaded normproto\n"); - GetNormProtos(); + inttemp_loaded_ = true; + } + } InitIntegerMatcher(); InitIntegerFX(); @@ -717,103 +668,57 @@ void InitAdaptiveClassifier() { zero_all_bits(AllProtosOff, WordsInVectorOfSize(MAX_NUM_PROTOS)); zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS)); - if (UsePreAdaptedTemplates) { + if (classify_use_pre_adapted_templates) { + FILE *File; + STRING Filename; + Filename = imagefile; Filename += ADAPT_TEMPLATE_SUFFIX; - File = fopen (Filename.string(), "rb"); - if (File == NULL) - AdaptedTemplates = NewAdaptedTemplates (); - else { + File = fopen(Filename.string(), "rb"); + if (File == NULL) { + AdaptedTemplates = NewAdaptedTemplates(true); + } else { #ifndef SECURE_NAMES - cprintf ("\nReading pre-adapted templates from %s ...", Filename.string()); + cprintf("\nReading pre-adapted templates from %s ...\n", + Filename.string()); fflush(stdout); #endif - AdaptedTemplates = ReadAdaptedTemplates (File); - cprintf ("\n"); + AdaptedTemplates = ReadAdaptedTemplates(File); + cprintf("\n"); fclose(File); PrintAdaptedTemplates(stdout, AdaptedTemplates); - for (i = 0; i < (AdaptedTemplates->Templates)->NumClasses; i++) { - BaselineCutoffs[i] = - CharNormCutoffs[PreTrainedTemplates->IndexFor[ - AdaptedTemplates->Templates->ClassIdFor[i]]]; + for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) { + BaselineCutoffs[i] = CharNormCutoffs[i]; } } } else { if (AdaptedTemplates != NULL) free_adapted_templates(AdaptedTemplates); - AdaptedTemplates = NewAdaptedTemplates (); + AdaptedTemplates = NewAdaptedTemplates(true); } - old_enable_learning = EnableLearning; - } /* InitAdaptiveClassifier */ -void ResetAdaptiveClassifier() { +void Classify::ResetAdaptiveClassifier() { free_adapted_templates(AdaptedTemplates); AdaptedTemplates = NULL; } +} // namespace tesseract /*---------------------------------------------------------------------------*/ -void InitAdaptiveClassifierVars() { -/* - ** Parameters: none - ** Globals: none - ** Operation: This routine installs the control knobs used by the - ** adaptive matcher. - ** Return: none - ** Exceptions: none - ** History: Mon Mar 11 12:49:34 1991, DSJ, Created. - */ - VALUE dummy; - - string_variable (BuiltInTemplatesFile, "BuiltInTemplatesFile", - BUILT_IN_TEMPLATES_FILE); - string_variable (BuiltInCutoffsFile, "BuiltInCutoffsFile", - BUILT_IN_CUTOFFS_FILE); - - MakeEnableAdaptiveMatcher(); - MakeUsePreAdaptedTemplates(); - MakeSaveAdaptedTemplates(); - - MakeEnableLearning(); - MakeEnableAdaptiveDebugger(); - MakeBadMatchPad(); - MakeGoodAdaptiveMatch(); - MakeGreatAdaptiveMatch(); - MakeNoiseBlobLength(); - MakeMinNumPermClasses(); - MakeReliableConfigThreshold(); - MakeMaxAngleDelta(); - MakeLearningDebugLevel(); - MakeMatcherDebugLevel(); - MakeMatchDebugFlags(); - MakeRatingMargin(); - MakePerfectRating(); - MakeEnableIntFX(); - MakeEnableNewAdaptRules(); - MakeRatingScale(); - MakeCertaintyScale(); - MakeFailedAdaptionsBeforeReset(); - - InitPicoFXVars(); - InitOutlineFXVars(); //? - -} /* InitAdaptiveClassifierVars */ - - -/*---------------------------------------------------------------------------*/ -void PrintAdaptiveStatistics(FILE *File) { +namespace tesseract { +void Classify::PrintAdaptiveStatistics(FILE *File) { /* ** Parameters: ** File - open text file to print adaptive statistics to -** Globals: none -** Operation: Print to File the statistics which have been gathered -** for the adaptive matcher. -** Return: none -** Exceptions: none -** History: Thu Apr 18 14:37:37 1991, DSJ, Created. + ** open text file to print adaptive statistics to + ** Globals: none + ** Operation: Print to File the statistics which have + ** been gathered for the adaptive matcher. + ** Return: none + ** Exceptions: none + ** History: Thu Apr 18 14:37:37 1991, DSJ, Created. */ #ifndef SECURE_NAMES @@ -840,84 +745,89 @@ void PrintAdaptiveStatistics(FILE *File) { fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo); fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo); - if (UsePreAdaptedTemplates) - PrintAdaptedTemplates(File, AdaptedTemplates); + PrintAdaptedTemplates(File, AdaptedTemplates); #endif } /* PrintAdaptiveStatistics */ /*---------------------------------------------------------------------------*/ -void SettupPass1() { +void Classify::SettupPass1() { /* ** Parameters: none ** Globals: ** EnableLearning - set to TRUE by this routine -** Operation: This routine prepares the adaptive matcher for the start -** of the first pass. Learning is enabled (unless it is -** disabled for the whole program). -** Return: none -** Exceptions: none -** History: Mon Apr 15 16:39:29 1991, DSJ, Created. + ** set to TRUE by this routine + ** Operation: This routine prepares the adaptive + ** matcher for the start + ** of the first pass. Learning is enabled (unless it + ** is disabled for the whole program). + ** Return: none + ** Exceptions: none + ** History: Mon Apr 15 16:39:29 1991, DSJ, Created. */ /* Note: this is somewhat redundant, it simply says that if learning is enabled then it will remain enabled on the first pass. If it is disabled, then it will remain disabled. This is only put here to make it very clear that learning is controlled directly by the global setting of EnableLearning. */ - EnableLearning = old_enable_learning; + EnableLearning = classify_enable_learning; - SettupStopperPass1(); + getDict().SettupStopperPass1(); } /* SettupPass1 */ /*---------------------------------------------------------------------------*/ -void SettupPass2() { +void Classify::SettupPass2() { /* ** Parameters: none ** Globals: ** EnableLearning - set to FALSE by this routine -** Operation: This routine prepares the adaptive matcher for the start -** of the second pass. Further learning is disabled. -** Return: none -** Exceptions: none -** History: Mon Apr 15 16:39:29 1991, DSJ, Created. + ** set to FALSE by this routine + ** Operation: This routine prepares the adaptive + ** matcher for the start of the second pass. Further + ** learning is disabled. + ** Return: none + ** Exceptions: none + ** History: Mon Apr 15 16:39:29 1991, DSJ, Created. */ EnableLearning = FALSE; - SettupStopperPass2(); + getDict().SettupStopperPass2(); } /* SettupPass2 */ /*---------------------------------------------------------------------------*/ -void MakeNewAdaptedClass(TBLOB *Blob, - LINE_STATS *LineStats, - CLASS_ID ClassId, - ADAPT_TEMPLATES Templates) { +void Classify::InitAdaptedClass(TBLOB *Blob, + LINE_STATS *LineStats, + CLASS_ID ClassId, + ADAPT_CLASS Class, + ADAPT_TEMPLATES Templates) { /* - ** Parameters: - ** Blob - blob to model new class after -** LineStats - statistics for text row blob is in -** ClassId - id of new class to be created -** Templates - adapted templates to add new class to -** Globals: -** AllProtosOn - dummy mask with all 1's -** BaselineCutoffs - kludge needed to get cutoffs -** PreTrainedTemplates - kludge needed to get cutoffs -** Operation: This routine creates a new adapted class and uses Blob -** as the model for the first config in that class. -** Return: none -** Exceptions: none -** History: Thu Mar 14 12:49:39 1991, DSJ, Created. + ** Parameters: + ** Blob + ** blob to model new class after + ** LineStats + ** statistics for text row blob is in + ** ClassId + ** id of the class to be initialized + ** Class + ** adapted class to be initialized + ** Templates + ** adapted templates to add new class to + ** Globals: + ** AllProtosOn + ** dummy mask with all 1's + ** BaselineCutoffs + ** kludge needed to get cutoffs + ** PreTrainedTemplates + ** kludge needed to get cutoffs + ** Operation: This routine creates a new adapted + ** class and uses Blob as the model for the first + ** config in that class. + ** Return: none + ** Exceptions: none + ** History: Thu Mar 14 12:49:39 1991, DSJ, Created. */ FEATURE_SET Features; int Fid, Pid; @@ -925,28 +835,23 @@ void MakeNewAdaptedClass(TBLOB *Blob, int NumFeatures; TEMP_PROTO TempProto; PROTO Proto; - ADAPT_CLASS Class; INT_CLASS IClass; - CLASS_INDEX ClassIndex; TEMP_CONFIG Config; - NormMethod = baseline; + classify_norm_method.set_value(baseline); Features = ExtractOutlineFeatures (Blob, LineStats); NumFeatures = Features->NumFeatures; - if (NumFeatures > UNLIKELY_NUM_FEAT) { + if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) { FreeFeatureSet(Features); return; } - Class = NewAdaptedClass (); - ClassIndex = AddAdaptedClass (Templates, Class, ClassId); Config = NewTempConfig (NumFeatures - 1); TempConfigFor (Class, 0) = Config; /* this is a kludge to construct cutoffs for adapted templates */ if (Templates == AdaptedTemplates) - BaselineCutoffs[ClassIndex] = - CharNormCutoffs[PreTrainedTemplates->IndexFor[ClassId]]; + BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId]; IClass = ClassForClassId (Templates->Templates, ClassId); @@ -980,11 +885,15 @@ void MakeNewAdaptedClass(TBLOB *Blob, AddIntConfig(IClass); ConvertConfig (AllProtosOn, 0, IClass); - if (LearningDebugLevel >= 1) { - cprintf ("Added new class '%s' with index %d and %d protos.\n", - unicharset.id_to_unichar(ClassId), ClassIndex, NumFeatures); + if (classify_learning_debug_level >= 1) { + cprintf ("Added new class '%s' with class id %d and %d protos.\n", + unicharset.id_to_unichar(ClassId), ClassId, NumFeatures); } -} /* MakeNewAdaptedClass */ + + if (IsEmptyAdaptedClass(Class)) + (Templates->NumNonEmptyClasses)++; +} /* InitAdaptedClass */ +} // namespace tesseract /*---------------------------------------------------------------------------*/ @@ -995,27 +904,30 @@ int GetAdaptiveFeatures(TBLOB *Blob, /* ** Parameters: ** Blob - blob to extract features from -** LineStats - statistics about text row blob is in -** IntFeatures - array to fill with integer features -** FloatFeatures - place to return actual floating-pt features -** Globals: none -** Operation: This routine sets up the feature extractor to extract -** baseline normalized pico-features. -** The extracted pico-features are converted -** to integer form and placed in IntFeatures. The original -** floating-pt. features are returned in FloatFeatures. -** Return: Number of pico-features returned (0 if an error occurred) -** Exceptions: none -** History: Tue Mar 12 17:55:18 1991, DSJ, Created. + ** blob to extract features from + ** LineStats + ** statistics about text row blob is in + ** IntFeatures + ** array to fill with integer features + ** FloatFeatures + ** place to return actual floating-pt features + ** Globals: none + ** Operation: This routine sets up the feature + ** extractor to extract baseline normalized + ** pico-features. + ** The extracted pico-features are converted + ** to integer form and placed in IntFeatures. The + ** original floating-pt. features are returned in + ** FloatFeatures. + ** Return: Number of pico-features returned (0 if + ** an error occurred) + ** Exceptions: none + ** History: Tue Mar 12 17:55:18 1991, DSJ, Created. */ FEATURE_SET Features; int NumFeatures; - NormMethod = baseline; + classify_norm_method.set_value(baseline); Features = ExtractPicoFeatures (Blob, LineStats); NumFeatures = Features->NumFeatures; @@ -1036,66 +948,45 @@ int GetAdaptiveFeatures(TBLOB *Blob, Private Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -int AdaptableWord(TWERD *Word, - const char *BestChoice, - const char *BestChoice_lengths, - const char *BestRawChoice, - const char *BestRawChoice_lengths) { +namespace tesseract { +int Classify::AdaptableWord(TWERD *Word, + const WERD_CHOICE &BestChoiceWord, + const WERD_CHOICE &RawChoiceWord) { /* ** Parameters: ** Word - current word -** BestChoice - best overall choice for word with context -** BestRawChoice - best choice for word without context -** Globals: none -** Operation: Return TRUE if the specified word is acceptable for -** adaptation. -** Return: TRUE or FALSE -** Exceptions: none -** History: Thu May 30 14:25:06 1991, DSJ, Created. + ** current word + ** BestChoice + ** best overall choice for word with context + ** BestRawChoice + ** best choice for word without context + ** Globals: none + ** Operation: Return TRUE if the specified word is + ** acceptable for adaptation. + ** Return: TRUE or FALSE + ** Exceptions: none + ** History: Thu May 30 14:25:06 1991, DSJ, Created. */ - int BestChoiceLength; - - return ( /* rules that apply in general - simplest to compute first */ - /* EnableLearning && */ - /* new rules */ - BestChoice != NULL && BestRawChoice != NULL && Word != NULL && - (BestChoiceLength = strlen (BestChoice_lengths)) > 0 && + int BestChoiceLength = BestChoiceWord.length(); + return ( // rules that apply in general - simplest to compute first + BestChoiceLength > 0 && BestChoiceLength == NumBlobsIn (Word) && BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE && ( - (EnableNewAdaptRules - && - CurrentBestChoiceAdjustFactor - () - <= - ADAPTABLE_WERD - && - AlternativeChoicesWorseThan - (ADAPTABLE_WERD) - && - CurrentBestChoiceIs - (BestChoice, BestChoice_lengths)) - || - /* old rules */ - (!EnableNewAdaptRules - && - BestChoiceLength - == - strlen - (BestRawChoice_lengths) - && - ((valid_word (BestChoice) && case_ok (BestChoice, BestChoice_lengths)) || (valid_number (BestChoice, BestChoice_lengths) && pure_number (BestChoice, BestChoice_lengths))) && punctuation_ok (BestChoice, BestChoice_lengths) != -1 && punctuation_ok (BestChoice, BestChoice_lengths) <= 1))); - -} /* AdaptableWord */ - + (classify_enable_new_adapt_rules && + getDict().CurrentBestChoiceAdjustFactor() <= ADAPTABLE_WERD && + getDict().AlternativeChoicesWorseThan(ADAPTABLE_WERD) && + getDict().CurrentBestChoiceIs(BestChoiceWord)) || + (!classify_enable_new_adapt_rules && // old rules + BestChoiceLength == RawChoiceWord.length() && + ((getDict().valid_word_or_number(BestChoiceWord) && + Context::case_ok(BestChoiceWord, getDict().getUnicharset())))))); +} /*---------------------------------------------------------------------------*/ -void AdaptToChar(TBLOB *Blob, - LINE_STATS *LineStats, - CLASS_ID ClassId, - FLOAT32 Threshold) { + void Classify::AdaptToChar(TBLOB *Blob, + LINE_STATS *LineStats, + CLASS_ID ClassId, + FLOAT32 Threshold) { /* ** Parameters: ** Blob @@ -1121,7 +1012,6 @@ void AdaptToChar(TBLOB *Blob, int NumFeatures; INT_FEATURE_ARRAY IntFeatures; INT_RESULT_STRUCT IntResult; - CLASS_INDEX ClassIndex; INT_CLASS IClass; ADAPT_CLASS Class; TEMP_CONFIG TempConfig; @@ -1132,13 +1022,13 @@ void AdaptToChar(TBLOB *Blob, if (!LegalClassId (ClassId)) return; - if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) { - MakeNewAdaptedClass(Blob, LineStats, ClassId, AdaptedTemplates); + Class = AdaptedTemplates->Class[ClassId]; + assert(Class != NULL); + if (IsEmptyAdaptedClass(Class)) { + InitAdaptedClass(Blob, LineStats, ClassId, Class, AdaptedTemplates); } else { IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId); - ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId]; - Class = AdaptedTemplates->Class[ClassIndex]; NumFeatures = GetAdaptiveFeatures (Blob, LineStats, IntFeatures, &FloatFeatures); @@ -1154,7 +1044,7 @@ void AdaptToChar(TBLOB *Blob, if (IntResult.Rating <= Threshold) { if (ConfigIsPermanent (Class, IntResult.Config)) { - if (LearningDebugLevel >= 1) + if (classify_learning_debug_level >= 1) cprintf ("Found good match to perm config %d = %4.1f%%.\n", IntResult.Config, (1.0 - IntResult.Rating) * 100.0); FreeFeatureSet(FloatFeatures); @@ -1163,7 +1053,7 @@ void AdaptToChar(TBLOB *Blob, TempConfig = TempConfigFor (Class, IntResult.Config); IncreaseConfidence(TempConfig); - if (LearningDebugLevel >= 1) + if (classify_learning_debug_level >= 1) cprintf ("Increasing reliability of temp config %d to %d.\n", IntResult.Config, TempConfig->NumTimesSeen); @@ -1172,7 +1062,7 @@ void AdaptToChar(TBLOB *Blob, Blob, LineStats); } else { - if (LearningDebugLevel >= 1) + if (classify_learning_debug_level >= 1) cprintf ("Found poor match to temp config %d = %4.1f%%.\n", IntResult.Config, (1.0 - IntResult.Rating) * 100.0); NewTempConfigId = MakeNewTemporaryConfig(AdaptedTemplates, @@ -1187,13 +1077,13 @@ void AdaptToChar(TBLOB *Blob, Blob, LineStats); #ifndef GRAPHICS_DISABLED - if (LearningDebugLevel >= 1) { + if (classify_learning_debug_level >= 1) { IntegerMatcher (IClass, AllProtosOn, AllConfigsOn, NumFeatures, NumFeatures, IntFeatures, 0, &IntResult, NO_DEBUG); cprintf ("Best match to temp config %d = %4.1f%%.\n", IntResult.Config, (1.0 - IntResult.Rating) * 100.0); - if (LearningDebugLevel >= 2) { + if (classify_learning_debug_level >= 2) { uinT32 ConfigMask; ConfigMask = 1 << IntResult.Config; ShowMatchDisplay(); @@ -1204,7 +1094,7 @@ void AdaptToChar(TBLOB *Blob, GetClassToDebug ("Adapting"); } } -#endif // GRAPHICS_DISABLED +#endif } FreeFeatureSet(FloatFeatures); } @@ -1212,10 +1102,10 @@ void AdaptToChar(TBLOB *Blob, /*---------------------------------------------------------------------------*/ -void AdaptToPunc(TBLOB *Blob, - LINE_STATS *LineStats, - CLASS_ID ClassId, - FLOAT32 Threshold) { +void Classify::AdaptToPunc(TBLOB *Blob, + LINE_STATS *LineStats, + CLASS_ID ClassId, + FLOAT32 Threshold) { /* ** Parameters: ** Blob @@ -1234,45 +1124,40 @@ void AdaptToPunc(TBLOB *Blob, ** Exceptions: none ** History: Thu Mar 14 09:36:03 1991, DSJ, Created. */ - ADAPT_RESULTS Results; + ADAPT_RESULTS *Results = new ADAPT_RESULTS(); int i; - Results.BlobLength = MAX_INT32; - Results.NumMatches = 0; - Results.BestRating = WORST_POSSIBLE_RATING; - Results.BestClass = NO_CLASS; - Results.BestConfig = 0; - InitMatcherRatings (Results.Ratings); - CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results); - RemoveBadMatches(&Results); + Results->Initialize(); + CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results); + RemoveBadMatches(Results); - if (Results.NumMatches != 1) { - if (LearningDebugLevel >= 1) { + if (Results->NumMatches != 1) { + if (classify_learning_debug_level >= 1) { cprintf ("Rejecting punc = %s (Alternatives = ", unicharset.id_to_unichar(ClassId)); - for (i = 0; i < Results.NumMatches; i++) - cprintf ("%s", unicharset.id_to_unichar(Results.Classes[i])); + for (i = 0; i < Results->NumMatches; i++) + cprintf ("%s", unicharset.id_to_unichar(Results->Classes[i])); cprintf (")\n"); } - return; + } else { + + #ifndef SECURE_NAMES + if (classify_learning_debug_level >= 1) + cprintf ("Adapting to punc = %s, thr= %g\n", + unicharset.id_to_unichar(ClassId), Threshold); + #endif + AdaptToChar(Blob, LineStats, ClassId, Threshold); } - - #ifndef SECURE_NAMES - if (LearningDebugLevel >= 1) - cprintf ("Adapting to punc = %s, thr= %g\n", - unicharset.id_to_unichar(ClassId), Threshold); - #endif - AdaptToChar(Blob, LineStats, ClassId, Threshold); - + delete Results; } /* AdaptToPunc */ /*---------------------------------------------------------------------------*/ -void AddNewResult(ADAPT_RESULTS *Results, - CLASS_ID ClassId, - FLOAT32 Rating, - int ConfigId) { +void Classify::AddNewResult(ADAPT_RESULTS *Results, + CLASS_ID ClassId, + FLOAT32 Rating, + int ConfigId) { /* ** Parameters: ** Results @@ -1284,7 +1169,7 @@ void AddNewResult(ADAPT_RESULTS *Results, ** ConfigId config id of new result ** Globals: -** BadMatchPad +** matcher_bad_match_pad defines limits of an acceptable match ** Operation: This routine adds the result of a classification into ** Results. If the new rating is much worse than the current @@ -1303,16 +1188,25 @@ void AddNewResult(ADAPT_RESULTS *Results, INT_CLASS_STRUCT* CharClass = NULL; OldRating = Results->Ratings[ClassId]; - if (Rating <= Results->BestRating + BadMatchPad && Rating < OldRating) { + if (Rating <= Results->BestRating + matcher_bad_match_pad && Rating < OldRating) { + if (!unicharset.get_fragment(ClassId)) { + Results->HasNonfragment = true; + } Results->Ratings[ClassId] = Rating; if (ClassId != NO_CLASS) CharClass = ClassForClassId(PreTrainedTemplates, ClassId); - if (CharClass != NULL && CharClass->NumConfigs == 32) + if (CharClass != NULL) Results->Configs[ClassId] = ConfigId; else Results->Configs[ClassId] = ~0; - if (Rating < Results->BestRating) { + if (Rating < Results->BestRating && + // Ensure that fragments do no affect best rating, class and config. + // This is needed so that at least one non-fragmented character is + // always present in the Results. + // TODO(daria): verify that this helps accuracy and does not + // hurt performance. + !unicharset.get_fragment(ClassId)) { Results->BestRating = Rating; Results->BestClass = ClassId; Results->BestConfig = ConfigId; @@ -1326,11 +1220,11 @@ void AddNewResult(ADAPT_RESULTS *Results, /*---------------------------------------------------------------------------*/ -void AmbigClassifier(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - UNICHAR_ID *Ambiguities, - ADAPT_RESULTS *Results) { +void Classify::AmbigClassifier(TBLOB *Blob, + LINE_STATS *LineStats, + INT_TEMPLATES Templates, + UNICHAR_ID *Ambiguities, + ADAPT_RESULTS *Results) { /* ** Parameters: ** Blob @@ -1361,7 +1255,6 @@ void AmbigClassifier(TBLOB *Blob, CLASS_NORMALIZATION_ARRAY CharNormArray; INT_RESULT_STRUCT IntResult; CLASS_ID ClassId; - CLASS_INDEX ClassIndex; AmbigClassifierCalls++; @@ -1372,20 +1265,19 @@ void AmbigClassifier(TBLOB *Blob, if (NumFeatures <= 0) return; - if (MatcherDebugLevel >= 2) + if (matcher_debug_level >= 2) cprintf ("AM Matches = "); while (*Ambiguities >= 0) { ClassId = *Ambiguities; - ClassIndex = Templates->IndexFor[ClassId]; SetCharNormMatch(); IntegerMatcher (ClassForClassId (Templates, ClassId), AllProtosOn, AllConfigsOn, Results->BlobLength, NumFeatures, IntFeatures, - CharNormArray[ClassIndex], &IntResult, NO_DEBUG); + CharNormArray[ClassId], &IntResult, NO_DEBUG); - if (MatcherDebugLevel >= 2) + if (matcher_debug_level >= 2) cprintf ("%s-%-2d %2.0f ", unicharset.id_to_unichar(ClassId), IntResult.Config, IntResult.Rating * 100.0); @@ -1396,7 +1288,7 @@ void AmbigClassifier(TBLOB *Blob, NumAmbigClassesTried++; } - if (MatcherDebugLevel >= 2) + if (matcher_debug_level >= 2) cprintf ("\n"); } /* AmbigClassifier */ @@ -1404,32 +1296,31 @@ void AmbigClassifier(TBLOB *Blob, /*---------------------------------------------------------------------------*/ // Factored-out calls to IntegerMatcher based on class pruner results. // Returns integer matcher results inside CLASS_PRUNER_RESULTS structure. -void MasterMatcher(INT_TEMPLATES templates, - inT16 num_features, - INT_FEATURE_ARRAY features, - CLASS_NORMALIZATION_ARRAY norm_factors, - ADAPT_CLASS* classes, - int debug, - int num_classes, - CLASS_PRUNER_RESULTS results, - ADAPT_RESULTS* final_results) { +void Classify::MasterMatcher(INT_TEMPLATES templates, + inT16 num_features, + INT_FEATURE_ARRAY features, + CLASS_NORMALIZATION_ARRAY norm_factors, + ADAPT_CLASS* classes, + int debug, + int num_classes, + CLASS_PRUNER_RESULTS results, + ADAPT_RESULTS* final_results) { for (int c = 0; c < num_classes; c++) { CLASS_ID class_id = results[c].Class; INT_RESULT_STRUCT& int_result = results[c].IMResult; - CLASS_INDEX class_index = templates->IndexFor[class_id]; - BIT_VECTOR protos = classes != NULL ? classes[class_index]->PermProtos + BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos : AllProtosOn; - BIT_VECTOR configs = classes != NULL ? classes[class_index]->PermConfigs + BIT_VECTOR configs = classes != NULL ? classes[class_id]->PermConfigs : AllConfigsOn; IntegerMatcher(ClassForClassId(templates, class_id), protos, configs, final_results->BlobLength, - num_features, features, norm_factors[class_index], - &int_result, NO_DEBUG); + num_features, features, norm_factors[class_id], + &int_result, debug); // Compute class feature corrections. double miss_penalty = tessedit_class_miss_scale * int_result.FeatureMisses; - if (MatcherDebugLevel >= 2 || display_ratings > 1) { + if (matcher_debug_level >= 2 || tord_display_ratings > 1) { cprintf("%s-%-2d %2.1f(CP%2.1f, IM%2.1f + MP%2.1f) ", unicharset.id_to_unichar(class_id), int_result.Config, (int_result.Rating + miss_penalty) * 100.0, @@ -1442,16 +1333,42 @@ void MasterMatcher(INT_TEMPLATES templates, if (int_result.Rating > WORST_POSSIBLE_RATING) int_result.Rating = WORST_POSSIBLE_RATING; AddNewResult(final_results, class_id, int_result.Rating, int_result.Config); + // Add unichars ambiguous with class_id with the same rating as class_id. + if (use_definite_ambigs_for_classifier) { + const UnicharIdVector *definite_ambigs = + getDict().getUnicharAmbigs().OneToOneDefiniteAmbigs(class_id); + int ambigs_size = (definite_ambigs == NULL) ? 0 : definite_ambigs->size(); + for (int ambig = 0; ambig < ambigs_size; ++ambig) { + UNICHAR_ID ambig_class_id = (*definite_ambigs)[ambig]; + if (matcher_debug_level >= 3) { + tprintf("class: %d definite ambig: %d rating: old %.4f new %.4f\n", + class_id, ambig_class_id, + final_results->Ratings[ambig_class_id], int_result.Rating); + } + if (final_results->Ratings[ambig_class_id] < WORST_POSSIBLE_RATING) { + // ambig_class_id was already added to final_results, + // so just need to modify the rating. + if (int_result.Rating < final_results->Ratings[ambig_class_id]) { + final_results->Ratings[ambig_class_id] = int_result.Rating; + } + } else { + AddNewResult(final_results, ambig_class_id, + int_result.Rating, int_result.Config); + } + } + } } - if (MatcherDebugLevel >= 2 || display_ratings > 1) + if (matcher_debug_level >= 2 || tord_display_ratings > 1) cprintf("\n"); } +} // namespace tesseract /*---------------------------------------------------------------------------*/ -UNICHAR_ID *BaselineClassifier(TBLOB *Blob, - LINE_STATS *LineStats, - ADAPT_TEMPLATES Templates, - ADAPT_RESULTS *Results) { +namespace tesseract { +UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, + LINE_STATS *LineStats, + ADAPT_TEMPLATES Templates, + ADAPT_RESULTS *Results) { /* ** Parameters: ** Blob @@ -1478,7 +1395,6 @@ UNICHAR_ID *BaselineClassifier(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures; CLASS_NORMALIZATION_ARRAY CharNormArray; CLASS_ID ClassId; - CLASS_INDEX ClassIndex; BaselineClassifierCalls++; @@ -1492,16 +1408,16 @@ UNICHAR_ID *BaselineClassifier(TBLOB *Blob, NumClasses = ClassPruner (Templates->Templates, NumFeatures, IntFeatures, CharNormArray, BaselineCutoffs, Results->CPResults, - MatchDebugFlags); + matcher_debug_flags); NumBaselineClassesTried += NumClasses; - if (MatcherDebugLevel >= 2 || display_ratings > 1) + if (matcher_debug_level >= 2 || tord_display_ratings > 1) cprintf ("BL Matches = "); SetBaseLineMatch(); MasterMatcher(Templates->Templates, NumFeatures, IntFeatures, CharNormArray, - Templates->Class, MatchDebugFlags, NumClasses, + Templates->Class, matcher_debug_flags, NumClasses, Results->CPResults, Results); ClassId = Results->BestClass; @@ -1509,17 +1425,15 @@ UNICHAR_ID *BaselineClassifier(TBLOB *Blob, return (NULL); /* this is a bug - maybe should return "" */ - ClassIndex = Templates->Templates->IndexFor[ClassId]; - return (Templates->Class[ClassIndex]-> - Config[Results->BestConfig].Perm); + return (Templates->Class[ClassId]->Config[Results->BestConfig].Perm); } /* BaselineClassifier */ /*---------------------------------------------------------------------------*/ -void CharNormClassifier(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - ADAPT_RESULTS *Results) { +int Classify::CharNormClassifier(TBLOB *Blob, + LINE_STATS *LineStats, + INT_TEMPLATES Templates, + ADAPT_RESULTS *Results) { /* ** Parameters: ** Blob @@ -1557,41 +1471,33 @@ void CharNormClassifier(TBLOB *Blob, IntFeatures, CharNormArray, &(Results->BlobLength)); if (NumFeatures <= 0) - return; + return 0; NumClasses = ClassPruner(Templates, NumFeatures, IntFeatures, CharNormArray, CharNormCutoffs, Results->CPResults, - MatchDebugFlags); + matcher_debug_flags); if (tessedit_single_match && NumClasses > 1) NumClasses = 1; NumCharNormClassesTried += NumClasses; - if (MatcherDebugLevel >= 2 || display_ratings > 1) - cprintf("CN Matches = "); - SetCharNormMatch(); MasterMatcher(Templates, NumFeatures, IntFeatures, CharNormArray, - NULL, MatchDebugFlags, NumClasses, + NULL, matcher_debug_flags, NumClasses, Results->CPResults, Results); + return NumFeatures; } /* CharNormClassifier */ /*---------------------------------------------------------------------------*/ -void ClassifyAsNoise(TBLOB *Blob, - LINE_STATS *LineStats, - ADAPT_RESULTS *Results) { +void Classify::ClassifyAsNoise(ADAPT_RESULTS *Results) { /* ** Parameters: - ** Blob - blob to be classified -** LineStats - statistics for text line Blob is in ** Results results to add noise classification to ** Globals: -** NoiseBlobLength +** matcher_avg_noise_size avg. length of a noise blob ** Operation: This routine computes a rating which reflects the ** likelihood that the blob being classified is a noise @@ -1603,12 +1509,13 @@ void ClassifyAsNoise(TBLOB *Blob, */ register FLOAT32 Rating; - Rating = Results->BlobLength / NoiseBlobLength; + Rating = Results->BlobLength / matcher_avg_noise_size; Rating *= Rating; Rating /= 1.0 + Rating; AddNewResult (Results, NO_CLASS, Rating, 0); } /* ClassifyAsNoise */ +} // namespace tesserct /*---------------------------------------------------------------------------*/ @@ -1649,59 +1556,60 @@ int CompareCurrentRatings( //CLASS_ID *Class1, /*---------------------------------------------------------------------------*/ -LIST ConvertMatchesToChoices(ADAPT_RESULTS *Results) { -/* - ** Parameters: - ** Results - adaptive matcher results to convert to choices -** Globals: none -** Operation: This routine creates a choice for each matching class -** in Results (up to MAX_MATCHES) and returns a list of -** these choices. The match -** ratings are converted to be the ratings and certainties -** as used by the context checkers. -** Return: List of choices. -** Exceptions: none -** History: Tue Mar 12 08:55:37 1991, DSJ, Created. -*/ +// The function converts the given match ratings to the list of blob +// choices with ratings and certainties (used by the context checkers). +// If character fragments are present in the results, this function also makes +// sure that there is at least one non-fragmented classification included. +// For each classificaiton result check the unicharset for "definite" +// ambiguities and modify the resulting Choices accordingly. +namespace tesseract { +void Classify::ConvertMatchesToChoices(ADAPT_RESULTS *Results, + BLOB_CHOICE_LIST *Choices) { + assert(Choices != NULL); int i; - LIST Choices; CLASS_ID NextMatch; FLOAT32 Rating; FLOAT32 Certainty; - const char *NextMatch_unichar; - char choice_lengths[2] = {0, 0}; - - if (Results->NumMatches > MAX_MATCHES) - Results->NumMatches = MAX_MATCHES; - - for (Choices = NIL, i = 0; i < Results->NumMatches; i++) { + BLOB_CHOICE_IT temp_it; + bool contains_nonfrag = false; + temp_it.set_to_list(Choices); + int choices_length = 0; + for (i = 0; i < Results->NumMatches; i++) { NextMatch = Results->Classes[i]; - Rating = Certainty = Results->Ratings[NextMatch]; - Rating *= RatingScale * Results->BlobLength; - Certainty *= -CertaintyScale; - if (NextMatch != NO_CLASS) - NextMatch_unichar = unicharset.id_to_unichar(NextMatch); - else - NextMatch_unichar = ""; - choice_lengths[0] = strlen(NextMatch_unichar); - Choices = append_choice (Choices, - NextMatch_unichar, - choice_lengths, - Rating, Certainty, - Results->Configs[NextMatch], - unicharset.get_script(NextMatch)); + bool current_is_frag = (unicharset.get_fragment(NextMatch) != NULL); + if (temp_it.length()+1 == MAX_MATCHES && + !contains_nonfrag && current_is_frag) { + continue; // look for a non-fragmented character to fill the + // last spot in Choices if only fragments are present + } + // BlobLength can never be legally 0, this means recognition failed. + // But we must return a classification result because some invoking + // functions (chopper/permuter) do not anticipate a null blob choice. + // So we need to assign a poor, but not infinitely bad score. + if (Results->BlobLength == 0) { + Certainty = -20; + Rating = 100; // should be -certainty * real_blob_length + } else { + Rating = Certainty = Results->Ratings[NextMatch]; + Rating *= rating_scale * Results->BlobLength; + Certainty *= -certainty_scale; + } + temp_it.add_to_end(new BLOB_CHOICE(NextMatch, Rating, Certainty, + Results->Configs[NextMatch], + unicharset.get_script(NextMatch))); + contains_nonfrag |= !current_is_frag; // update contains_nonfrag + choices_length++; + if (choices_length >= MAX_MATCHES) break; } - return (Choices); - -} /* ConvertMatchesToChoices */ + Results->NumMatches = choices_length; +} // ConvertMatchesToChoices /*---------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED -void DebugAdaptiveClassifier(TBLOB *Blob, - LINE_STATS *LineStats, - ADAPT_RESULTS *Results) { +void Classify::DebugAdaptiveClassifier(TBLOB *Blob, + LINE_STATS *LineStats, + ADAPT_RESULTS *Results) { /* ** Parameters: ** Blob @@ -1768,55 +1676,53 @@ void DebugAdaptiveClassifier(TBLOB *Blob, #endif /*---------------------------------------------------------------------------*/ -void DoAdaptiveMatch(TBLOB *Blob, +void Classify::DoAdaptiveMatch(TBLOB *Blob, LINE_STATS *LineStats, ADAPT_RESULTS *Results) { -/* - ** Parameters: - ** Blob - blob to be classified -** LineStats - statistics for text line Blob is in -** Results - place to put match results -** Globals: -** PreTrainedTemplates - built-in training templates -** AdaptedTemplates - templates adapted for this page -** GreatAdaptiveMatch - rating limit for a great match -** Operation: This routine performs an adaptive classification. -** If we have not yet adapted to enough classes, a simple -** classification to the pre-trained templates is performed. -** Otherwise, we match the blob against the adapted templates. -** If the adapted templates do not match well, we try a -** match against the pre-trained templates. If an adapted -** template match is found, we do a match to any pre-trained -** templates which could be ambiguous. The results from all -** of these classifications are merged together into Results. -** Return: none -** Exceptions: none -** History: Tue Mar 12 08:50:11 1991, DSJ, Created. -*/ + /* + ** Parameters: + ** Blob + blob to be classified + ** LineStats + statistics for text line Blob is in + ** Results + place to put match results + ** Globals: + ** PreTrainedTemplates + built-in training templates + ** AdaptedTemplates + templates adapted for this page + ** matcher_great_threshold + rating limit for a great match + ** Operation: This routine performs an adaptive classification. + ** If we have not yet adapted to enough classes, a simple + ** classification to the pre-trained templates is performed. + ** Otherwise, we match the blob against the adapted templates. + ** If the adapted templates do not match well, we try a + ** match against the pre-trained templates. If an adapted + ** template match is found, we do a match to any pre-trained + ** templates which could be ambiguous. The results from all + ** of these classifications are merged together into Results. + ** Return: none + ** Exceptions: none + ** History: Tue Mar 12 08:50:11 1991, DSJ, Created. + */ UNICHAR_ID *Ambiguities; AdaptiveMatcherCalls++; InitIntFX(); - if (AdaptedTemplates->NumPermClasses < MinNumPermClasses - || tess_cn_matching) { + if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min + || tess_cn_matching) { CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results); } else { - Ambiguities = BaselineClassifier (Blob, LineStats, - AdaptedTemplates, Results); - + Ambiguities = BaselineClassifier(Blob, LineStats, + AdaptedTemplates, Results); if ((Results->NumMatches > 0 && MarginalMatch (Results->BestRating) - && !tess_bn_matching) || Results->NumMatches == 0) { + && !tess_bn_matching) || Results->NumMatches == 0) { CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results); - } - else if (Ambiguities && *Ambiguities >= 0) { + } else if (Ambiguities && *Ambiguities >= 0) { AmbigClassifier(Blob, LineStats, PreTrainedTemplates, @@ -1825,67 +1731,75 @@ void DoAdaptiveMatch(TBLOB *Blob, } } + // Force the blob to be classified as noise + // if the results contain only fragments. + // TODO(daria): verify that this is better than + // just adding a NULL classificaiton. + if (!Results->HasNonfragment) { + Results->NumMatches = 0; + } if (Results->NumMatches == 0) - ClassifyAsNoise(Blob, LineStats, Results); - /**/} /* DoAdaptiveMatch */ + ClassifyAsNoise(Results); +} /* DoAdaptiveMatch */ - /*---------------------------------------------------------------------------*/ - void - GetAdaptThresholds (TWERD * Word, - LINE_STATS * LineStats, - const WERD_CHOICE& BestChoice, - const WERD_CHOICE& BestRawChoice, FLOAT32 Thresholds[]) { +/*---------------------------------------------------------------------------*/ +void +Classify::GetAdaptThresholds (TWERD * Word, + LINE_STATS * LineStats, + const WERD_CHOICE& BestChoice, + const WERD_CHOICE& BestRawChoice, + FLOAT32 Thresholds[]) { /* ** Parameters: ** Word - current word - ** LineStats - line stats for row word is in - ** BestChoice - best choice for current word with context - ** BestRawChoice - best choice for current word without context - ** Thresholds - array of thresholds to be filled in - ** Globals: - ** EnableNewAdaptRules - ** GoodAdaptiveMatch - ** PerfectRating - ** RatingMargin - ** Operation: This routine tries to estimate how tight the adaptation - ** threshold should be set for each character in the current - ** word. In general, the routine tries to set tighter - ** thresholds for a character when the current set of templates - ** would have made an error on that character. It tries - ** to set a threshold tight enough to eliminate the error. - ** Two different sets of rules can be used to determine the - ** desired thresholds. - ** Return: none (results are returned in Thresholds) - ** Exceptions: none - ** History: Fri May 31 09:22:08 1991, DSJ, Created. - */ - TBLOB *Blob; - const char* BestChoice_string = BestChoice.string().string(); - const char* BestChoice_lengths = BestChoice.lengths().string(); - const char* BestRawChoice_string = BestRawChoice.string().string(); - const char* BestRawChoice_lengths = BestRawChoice.lengths().string(); + current word + ** LineStats + line stats for row word is in + ** BestChoice + best choice for current word with context + ** BestRawChoice + best choice for current word without context + ** Thresholds + array of thresholds to be filled in + ** Globals: + ** classify_enable_new_adapt_rules + ** matcher_good_threshold + ** matcher_perfect_threshold + ** matcher_rating_margin + ** Operation: This routine tries to estimate how tight the adaptation + ** threshold should be set for each character in the current + ** word. In general, the routine tries to set tighter + ** thresholds for a character when the current set of templates + ** would have made an error on that character. It tries + ** to set a threshold tight enough to eliminate the error. + ** Two different sets of rules can be used to determine the + ** desired thresholds. + ** Return: none (results are returned in Thresholds) + ** Exceptions: none + ** History: Fri May 31 09:22:08 1991, DSJ, Created. + */ + TBLOB *Blob; + const char* BestChoice_string = BestChoice.unichar_string().string(); + const char* BestChoice_lengths = BestChoice.unichar_lengths().string(); + const char* BestRawChoice_string = BestRawChoice.unichar_string().string(); + const char* BestRawChoice_lengths = BestRawChoice.unichar_lengths().string(); - if (EnableNewAdaptRules && /* new rules */ - CurrentBestChoiceIs (BestChoice_string, BestChoice_lengths)) { - FindClassifierErrors(PerfectRating, - GoodAdaptiveMatch, - RatingMargin, - Thresholds); - } - else { /* old rules */ - for (Blob = Word->blobs; - Blob != NULL; - Blob = Blob->next, BestChoice_string += *(BestChoice_lengths++), - BestRawChoice_string += *(BestRawChoice_lengths++), Thresholds++) - if (*(BestChoice_lengths) == *(BestRawChoice_lengths) && - strncmp(BestChoice_string, BestRawChoice_string, - *(BestChoice_lengths)) == 0) - *Thresholds = GoodAdaptiveMatch; + if (classify_enable_new_adapt_rules && /* new rules */ + getDict().CurrentBestChoiceIs(BestChoice)) { + getDict().FindClassifierErrors(matcher_perfect_threshold, + matcher_good_threshold, + matcher_rating_margin, + Thresholds); + } + else { /* old rules */ + for (Blob = Word->blobs; + Blob != NULL; + Blob = Blob->next, BestChoice_string += *(BestChoice_lengths++), + BestRawChoice_string += *(BestRawChoice_lengths++), Thresholds++) + if (*(BestChoice_lengths) == *(BestRawChoice_lengths) && + strncmp(BestChoice_string, BestRawChoice_string, + *(BestChoice_lengths)) == 0) + *Thresholds = matcher_good_threshold; else { /* the blob was incorrectly classified - find the rating threshold needed to create a template which will correct the error with @@ -1895,1064 +1809,1029 @@ void DoAdaptiveMatch(TBLOB *Blob, unicharset.unichar_to_id( BestChoice_string, *BestChoice_lengths)); - *Thresholds *= (1.0 - RatingMargin); - if (*Thresholds > GoodAdaptiveMatch) - *Thresholds = GoodAdaptiveMatch; - if (*Thresholds < PerfectRating) - *Thresholds = PerfectRating; + *Thresholds *= (1.0 - matcher_rating_margin); + if (*Thresholds > matcher_good_threshold) + *Thresholds = matcher_good_threshold; + if (*Thresholds < matcher_perfect_threshold) + *Thresholds = matcher_perfect_threshold; } - } - } /* GetAdaptThresholds */ + } +} /* GetAdaptThresholds */ - /*---------------------------------------------------------------------------*/ - UNICHAR_ID *GetAmbiguities(TBLOB *Blob, - LINE_STATS *LineStats, - CLASS_ID CorrectClass) { +/*---------------------------------------------------------------------------*/ +UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, + LINE_STATS *LineStats, + CLASS_ID CorrectClass) { /* ** Parameters: ** Blob - blob to get classification ambiguities for - ** LineStats - statistics for text line blob is in - ** CorrectClass - correct class for Blob - ** Globals: - ** CurrentRatings - used by qsort compare routine - ** PreTrainedTemplates - built-in templates - ** Operation: This routine matches blob to the built-in templates - ** to find out if there are any classes other than the correct - ** class which are potential ambiguities. - ** Return: String containing all possible ambiguous classes. - ** Exceptions: none - ** History: Fri Mar 15 08:08:22 1991, DSJ, Created. - */ - ADAPT_RESULTS Results; - UNICHAR_ID *Ambiguities; - int i; + blob to get classification ambiguities for + ** LineStats + statistics for text line blob is in + ** CorrectClass + correct class for Blob + ** Globals: + ** CurrentRatings + used by qsort compare routine + ** PreTrainedTemplates + built-in templates + ** Operation: This routine matches blob to the built-in templates + ** to find out if there are any classes other than the correct + ** class which are potential ambiguities. + ** Return: String containing all possible ambiguous classes. + ** Exceptions: none + ** History: Fri Mar 15 08:08:22 1991, DSJ, Created. + */ + ADAPT_RESULTS *Results = new ADAPT_RESULTS(); + UNICHAR_ID *Ambiguities; + int i; - EnterClassifyMode; + EnterClassifyMode; - Results.NumMatches = 0; - Results.BestRating = WORST_POSSIBLE_RATING; - Results.BestClass = NO_CLASS; - Results.BestConfig = 0; - InitMatcherRatings (Results.Ratings); + Results->Initialize(); - CharNormClassifier(Blob, LineStats, PreTrainedTemplates, &Results); - RemoveBadMatches(&Results); + CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results); + RemoveBadMatches(Results); - /* save ratings in a global so that CompareCurrentRatings() can see them */ - CurrentRatings = Results.Ratings; - qsort ((void *) (Results.Classes), Results.NumMatches, - sizeof (CLASS_ID), CompareCurrentRatings); + /* save ratings in a global so that CompareCurrentRatings() can see them */ + CurrentRatings = Results->Ratings; + qsort ((void *) (Results->Classes), Results->NumMatches, + sizeof (CLASS_ID), CompareCurrentRatings); - /* copy the class id's into an string of ambiguities - don't copy if - the correct class is the only class id matched */ - Ambiguities = (UNICHAR_ID *) Emalloc (sizeof (UNICHAR_ID) * - (Results.NumMatches + 1)); - if (Results.NumMatches > 1 || - (Results.NumMatches == 1 && Results.Classes[0] != CorrectClass)) { - for (i = 0; i < Results.NumMatches; i++) - Ambiguities[i] = Results.Classes[i]; - Ambiguities[i] = -1; - } - else - Ambiguities[0] = -1; + /* copy the class id's into an string of ambiguities - don't copy if + the correct class is the only class id matched */ + Ambiguities = (UNICHAR_ID *) Emalloc (sizeof (UNICHAR_ID) * + (Results->NumMatches + 1)); + if (Results->NumMatches > 1 || + (Results->NumMatches == 1 && Results->Classes[0] != CorrectClass)) { + for (i = 0; i < Results->NumMatches; i++) + Ambiguities[i] = Results->Classes[i]; + Ambiguities[i] = -1; + } + else + Ambiguities[0] = -1; - return (Ambiguities); + delete Results; + return (Ambiguities); +} /* GetAmbiguities */ - } /* GetAmbiguities */ - - /*---------------------------------------------------------------------------*/ - int GetBaselineFeatures(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength) { +/*---------------------------------------------------------------------------*/ +int GetBaselineFeatures(TBLOB *Blob, + LINE_STATS *LineStats, + INT_TEMPLATES Templates, + INT_FEATURE_ARRAY IntFeatures, + CLASS_NORMALIZATION_ARRAY CharNormArray, + inT32 *BlobLength) { /* ** Parameters: ** Blob - blob to extract features from - ** LineStats - statistics about text row blob is in - ** Templates - used to compute char norm adjustments - ** IntFeatures - array to fill with integer features - ** CharNormArray - array to fill with dummy char norm adjustments - ** BlobLength - length of blob in baseline-normalized units - ** Globals: none - ** Operation: This routine sets up the feature extractor to extract - ** baseline normalized pico-features. - ** The extracted pico-features are converted - ** to integer form and placed in IntFeatures. CharNormArray - ** is filled with 0's to indicate to the matcher that no - ** character normalization adjustment needs to be done. - ** The total length of all blob outlines - ** in baseline normalized units is also returned. - ** Return: Number of pico-features returned (0 if an error occurred) - ** Exceptions: none - ** History: Tue Mar 12 17:55:18 1991, DSJ, Created. - */ - FEATURE_SET Features; - int NumFeatures; + blob to extract features from + ** LineStats + statistics about text row blob is in + ** Templates + used to compute char norm adjustments + ** IntFeatures + array to fill with integer features + ** CharNormArray + array to fill with dummy char norm adjustments + ** BlobLength + length of blob in baseline-normalized units + ** Globals: none + ** Operation: This routine sets up the feature extractor to extract + ** baseline normalized pico-features. + ** The extracted pico-features are converted + ** to integer form and placed in IntFeatures. CharNormArray + ** is filled with 0's to indicate to the matcher that no + ** character normalization adjustment needs to be done. + ** The total length of all blob outlines + ** in baseline normalized units is also returned. + ** Return: Number of pico-features returned (0 if an error occurred) + ** Exceptions: none + ** History: Tue Mar 12 17:55:18 1991, DSJ, Created. + */ + FEATURE_SET Features; + int NumFeatures; - if (EnableIntFX) - return (GetIntBaselineFeatures (Blob, LineStats, Templates, - IntFeatures, CharNormArray, BlobLength)); + if (classify_enable_int_fx) + return (GetIntBaselineFeatures (Blob, LineStats, Templates, + IntFeatures, CharNormArray, BlobLength)); - NormMethod = baseline; - Features = ExtractPicoFeatures (Blob, LineStats); - - NumFeatures = Features->NumFeatures; - *BlobLength = NumFeatures; - if (NumFeatures > UNLIKELY_NUM_FEAT) { - FreeFeatureSet(Features); - return (0); - } - - ComputeIntFeatures(Features, IntFeatures); - ClearCharNormArray(Templates, CharNormArray); + classify_norm_method.set_value(baseline); + Features = ExtractPicoFeatures (Blob, LineStats); + NumFeatures = Features->NumFeatures; + *BlobLength = NumFeatures; + if (NumFeatures > UNLIKELY_NUM_FEAT) { FreeFeatureSet(Features); - return (NumFeatures); + return (0); + } - } /* GetBaselineFeatures */ + ComputeIntFeatures(Features, IntFeatures); + ClearCharNormArray(Templates, CharNormArray); - /*---------------------------------------------------------------------------*/ - FLOAT32 GetBestRatingFor(TBLOB *Blob, + FreeFeatureSet(Features); + return NumFeatures; +} /* GetBaselineFeatures */ + +FLOAT32 Classify::GetBestRatingFor(TBLOB *Blob, + LINE_STATS *LineStats, + CLASS_ID ClassId) { + /* + ** Parameters: + ** Blob + blob to get best rating for + ** LineStats + statistics about text line blob is in + ** ClassId + class blob is to be compared to + ** Globals: + ** PreTrainedTemplates + built-in templates + ** AdaptedTemplates + current set of adapted templates + ** AllProtosOn + dummy mask to enable all protos + ** AllConfigsOn + dummy mask to enable all configs + ** Operation: This routine classifies Blob against both sets of + ** templates for the specified class and returns the best + ** rating found. + ** Return: Best rating for match of Blob to ClassId. + ** Exceptions: none + ** History: Tue Apr 9 09:01:24 1991, DSJ, Created. + */ + int NumCNFeatures, NumBLFeatures; + INT_FEATURE_ARRAY CNFeatures, BLFeatures; + INT_RESULT_STRUCT CNResult, BLResult; + inT32 BlobLength; + + CNResult.Rating = BLResult.Rating = 1.0; + + if (!LegalClassId(ClassId)) + return (1.0); + + uinT8 *CNAdjust = new uinT8[MAX_NUM_CLASSES]; + uinT8 *BLAdjust = new uinT8[MAX_NUM_CLASSES]; + + if (!UnusedClassIdIn(PreTrainedTemplates, ClassId)) { + NumCNFeatures = GetCharNormFeatures(Blob, LineStats, + PreTrainedTemplates, + CNFeatures, CNAdjust, &BlobLength); + if (NumCNFeatures > 0) { + SetCharNormMatch(); + IntegerMatcher(ClassForClassId(PreTrainedTemplates, ClassId), + AllProtosOn, AllConfigsOn, + BlobLength, NumCNFeatures, CNFeatures, + CNAdjust[ClassId], &CNResult, NO_DEBUG); + } + } + + if (!UnusedClassIdIn(AdaptedTemplates->Templates, ClassId)) { + NumBLFeatures = GetBaselineFeatures(Blob, LineStats, + AdaptedTemplates->Templates, + BLFeatures, BLAdjust, &BlobLength); + if (NumBLFeatures > 0) { + SetBaseLineMatch(); + IntegerMatcher(ClassForClassId(AdaptedTemplates->Templates, ClassId), + AdaptedTemplates->Class[ClassId]->PermProtos, + AdaptedTemplates->Class[ClassId]->PermConfigs, + BlobLength, NumBLFeatures, BLFeatures, + BLAdjust[ClassId], &BLResult, NO_DEBUG); + } + } + + // Clean up. + delete[] CNAdjust; + delete[] BLAdjust; + + return (MIN (BLResult.Rating, CNResult.Rating)); +} /* GetBestRatingFor */ + +/*---------------------------------------------------------------------------*/ +int Classify::GetCharNormFeatures(TBLOB *Blob, + LINE_STATS *LineStats, + INT_TEMPLATES Templates, + INT_FEATURE_ARRAY IntFeatures, + CLASS_NORMALIZATION_ARRAY CharNormArray, + inT32 *BlobLength) { + /* + ** Parameters: + ** Blob + blob to extract features from + ** LineStats + statistics about text row blob is in + ** Templates + used to compute char norm adjustments + ** IntFeatures + array to fill with integer features + ** CharNormArray + array to fill with char norm adjustments + ** BlobLength + length of blob in baseline-normalized units + ** Globals: none + ** Operation: This routine sets up the feature extractor to extract + ** character normalization features and character normalized + ** pico-features. The extracted pico-features are converted + ** to integer form and placed in IntFeatures. The character + ** normalization features are matched to each class in + ** templates and the resulting adjustment factors are returned + ** in CharNormArray. The total length of all blob outlines + ** in baseline normalized units is also returned. + ** Return: Number of pico-features returned (0 if an error occurred) + ** Exceptions: none + ** History: Tue Mar 12 17:55:18 1991, DSJ, Created. + */ + return (GetIntCharNormFeatures (Blob, LineStats, Templates, + IntFeatures, CharNormArray, BlobLength)); +} /* GetCharNormFeatures */ + +/*---------------------------------------------------------------------------*/ +int GetIntBaselineFeatures(TBLOB *Blob, LINE_STATS *LineStats, - CLASS_ID ClassId) { + INT_TEMPLATES Templates, + INT_FEATURE_ARRAY IntFeatures, + CLASS_NORMALIZATION_ARRAY CharNormArray, + inT32 *BlobLength) { /* ** Parameters: ** Blob - blob to get best rating for - ** LineStats - statistics about text line blob is in - ** ClassId - class blob is to be compared to - ** Globals: - ** PreTrainedTemplates - built-in templates - ** AdaptedTemplates - current set of adapted templates - ** AllProtosOn - dummy mask to enable all protos - ** AllConfigsOn - dummy mask to enable all configs - ** Operation: This routine classifies Blob against both sets of - ** templates for the specified class and returns the best - ** rating found. - ** Return: Best rating for match of Blob to ClassId. - ** Exceptions: none - ** History: Tue Apr 9 09:01:24 1991, DSJ, Created. - */ - int NumCNFeatures, NumBLFeatures; - INT_FEATURE_ARRAY CNFeatures, BLFeatures; - INT_RESULT_STRUCT CNResult, BLResult; - CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust; - CLASS_INDEX ClassIndex; - inT32 BlobLength; + blob to extract features from + ** LineStats + statistics about text row blob is in + ** Templates + used to compute char norm adjustments + ** IntFeatures + array to fill with integer features + ** CharNormArray + array to fill with dummy char norm adjustments + ** BlobLength + length of blob in baseline-normalized units + ** Globals: + ** FeaturesHaveBeenExtracted + TRUE if fx has been done + ** BaselineFeatures + holds extracted baseline feat + ** CharNormFeatures + holds extracted char norm feat + ** FXInfo + holds misc. FX info + ** Operation: This routine calls the integer (Hardware) feature + ** extractor if it has not been called before for this blob. + ** The results from the feature extractor are placed into + ** globals so that they can be used in other routines without + ** re-extracting the features. + ** It then copies the baseline features into the IntFeatures + ** array provided by the caller. + ** Return: Number of features extracted or 0 if an error occured. + ** Exceptions: none + ** History: Tue May 28 10:40:52 1991, DSJ, Created. + */ + register INT_FEATURE Src, Dest, End; - CNResult.Rating = BLResult.Rating = 1.0; + if (!FeaturesHaveBeenExtracted) { + FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures, + CharNormFeatures, &FXInfo); + FeaturesHaveBeenExtracted = TRUE; + } - if (!LegalClassId (ClassId)) - return (1.0); - - if (!UnusedClassIdIn (PreTrainedTemplates, ClassId)) { - NumCNFeatures = GetCharNormFeatures (Blob, LineStats, - PreTrainedTemplates, - CNFeatures, CNAdjust, &BlobLength); - if (NumCNFeatures > 0) { - ClassIndex = PreTrainedTemplates->IndexFor[ClassId]; - - SetCharNormMatch(); - IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), - AllProtosOn, AllConfigsOn, - BlobLength, NumCNFeatures, CNFeatures, - CNAdjust[ClassIndex], &CNResult, NO_DEBUG); - } - } - - if (!UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) { - NumBLFeatures = GetBaselineFeatures (Blob, LineStats, - AdaptedTemplates->Templates, - BLFeatures, BLAdjust, &BlobLength); - if (NumBLFeatures > 0) { - ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId]; - - SetBaseLineMatch(); - IntegerMatcher (ClassForClassId - (AdaptedTemplates->Templates, ClassId), - AdaptedTemplates->Class[ClassIndex]->PermProtos, - AdaptedTemplates->Class[ClassIndex]->PermConfigs, - BlobLength, NumBLFeatures, BLFeatures, - BLAdjust[ClassIndex], &BLResult, NO_DEBUG); - } - } - - return (MIN (BLResult.Rating, CNResult.Rating)); - - } /* GetBestRatingFor */ - - /*---------------------------------------------------------------------------*/ - int GetCharNormFeatures(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength) { - /* - ** Parameters: - ** Blob - blob to extract features from - ** LineStats - statistics about text row blob is in - ** Templates - used to compute char norm adjustments - ** IntFeatures - array to fill with integer features - ** CharNormArray - array to fill with char norm adjustments - ** BlobLength - length of blob in baseline-normalized units - ** Globals: none - ** Operation: This routine sets up the feature extractor to extract - ** character normalization features and character normalized - ** pico-features. The extracted pico-features are converted - ** to integer form and placed in IntFeatures. The character - ** normalization features are matched to each class in - ** templates and the resulting adjustment factors are returned - ** in CharNormArray. The total length of all blob outlines - ** in baseline normalized units is also returned. - ** Return: Number of pico-features returned (0 if an error occurred) - ** Exceptions: none - ** History: Tue Mar 12 17:55:18 1991, DSJ, Created. - */ - return (GetIntCharNormFeatures (Blob, LineStats, Templates, - IntFeatures, CharNormArray, BlobLength)); - } /* GetCharNormFeatures */ - - /*---------------------------------------------------------------------------*/ - int GetIntBaselineFeatures(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength) { - /* - ** Parameters: - ** Blob - blob to extract features from - ** LineStats - statistics about text row blob is in - ** Templates - used to compute char norm adjustments - ** IntFeatures - array to fill with integer features - ** CharNormArray - array to fill with dummy char norm adjustments - ** BlobLength - length of blob in baseline-normalized units - ** Globals: - ** FeaturesHaveBeenExtracted - TRUE if fx has been done - ** BaselineFeatures - holds extracted baseline feat - ** CharNormFeatures - holds extracted char norm feat - ** FXInfo - holds misc. FX info - ** Operation: This routine calls the integer (Hardware) feature - ** extractor if it has not been called before for this blob. - ** The results from the feature extractor are placed into - ** globals so that they can be used in other routines without - ** re-extracting the features. - ** It then copies the baseline features into the IntFeatures - ** array provided by the caller. - ** Return: Number of features extracted or 0 if an error occured. - ** Exceptions: none - ** History: Tue May 28 10:40:52 1991, DSJ, Created. - */ - register INT_FEATURE Src, Dest, End; - - if (!FeaturesHaveBeenExtracted) { - FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures, - CharNormFeatures, &FXInfo); - FeaturesHaveBeenExtracted = TRUE; - } - - if (!FeaturesOK) { - *BlobLength = FXInfo.NumBL; - return (0); - } - - for (Src = BaselineFeatures, End = Src + FXInfo.NumBL, Dest = IntFeatures; - Src < End; *Dest++ = *Src++); - - ClearCharNormArray(Templates, CharNormArray); + if (!FeaturesOK) { *BlobLength = FXInfo.NumBL; - return (FXInfo.NumBL); + return (0); + } - } /* GetIntBaselineFeatures */ + for (Src = BaselineFeatures, End = Src + FXInfo.NumBL, Dest = IntFeatures; + Src < End; + *Dest++ = *Src++); - /*---------------------------------------------------------------------------*/ - int GetIntCharNormFeatures(TBLOB *Blob, - LINE_STATS *LineStats, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - CLASS_NORMALIZATION_ARRAY CharNormArray, - inT32 *BlobLength) { + ClearCharNormArray(Templates, CharNormArray); + *BlobLength = FXInfo.NumBL; + return (FXInfo.NumBL); +} /* GetIntBaselineFeatures */ + +/*---------------------------------------------------------------------------*/ +int Classify::GetIntCharNormFeatures(TBLOB *Blob, + LINE_STATS *LineStats, + INT_TEMPLATES Templates, + INT_FEATURE_ARRAY IntFeatures, + CLASS_NORMALIZATION_ARRAY CharNormArray, + inT32 *BlobLength) { /* ** Parameters: ** Blob - blob to extract features from - ** LineStats - statistics about text row blob is in - ** Templates - used to compute char norm adjustments - ** IntFeatures - array to fill with integer features - ** CharNormArray - array to fill with dummy char norm adjustments - ** BlobLength - length of blob in baseline-normalized units - ** Globals: - ** FeaturesHaveBeenExtracted - TRUE if fx has been done - ** BaselineFeatures - holds extracted baseline feat - ** CharNormFeatures - holds extracted char norm feat - ** FXInfo - holds misc. FX info - ** Operation: This routine calls the integer (Hardware) feature - ** extractor if it has not been called before for this blob. - ** The results from the feature extractor are placed into - ** globals so that they can be used in other routines without - ** re-extracting the features. - ** It then copies the char norm features into the IntFeatures - ** array provided by the caller. - ** Return: Number of features extracted or 0 if an error occured. - ** Exceptions: none - ** History: Tue May 28 10:40:52 1991, DSJ, Created. - */ - register INT_FEATURE Src, Dest, End; - FEATURE NormFeature; - FLOAT32 Baseline, Scale; + blob to extract features from + ** LineStats + statistics about text row blob is in + ** Templates + used to compute char norm adjustments + ** IntFeatures + array to fill with integer features + ** CharNormArray + array to fill with dummy char norm adjustments + ** BlobLength + length of blob in baseline-normalized units + ** Globals: + ** FeaturesHaveBeenExtracted + TRUE if fx has been done + ** BaselineFeatures + holds extracted baseline feat + ** CharNormFeatures + holds extracted char norm feat + ** FXInfo + holds misc. FX info + ** Operation: This routine calls the integer (Hardware) feature + ** extractor if it has not been called before for this blob. + ** The results from the feature extractor are placed into + ** globals so that they can be used in other routines without + ** re-extracting the features. + ** It then copies the char norm features into the IntFeatures + ** array provided by the caller. + ** Return: Number of features extracted or 0 if an error occured. + ** Exceptions: none + ** History: Tue May 28 10:40:52 1991, DSJ, Created. + */ + register INT_FEATURE Src, Dest, End; + FEATURE NormFeature; + FLOAT32 Baseline, Scale; - if (!FeaturesHaveBeenExtracted) { - FeaturesOK = ExtractIntFeat (Blob, BaselineFeatures, - CharNormFeatures, &FXInfo); - FeaturesHaveBeenExtracted = TRUE; - } - - if (!FeaturesOK) { - *BlobLength = FXInfo.NumBL; - return (0); - } - - for (Src = CharNormFeatures, End = Src + FXInfo.NumCN, Dest = IntFeatures; - Src < End; *Dest++ = *Src++); - - NormFeature = NewFeature (&CharNormDesc); - Baseline = BaselineAt (LineStats, FXInfo.Xmean); - Scale = ComputeScaleFactor (LineStats); - NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale; - NormFeature->Params[CharNormLength] = - FXInfo.Length * Scale / LENGTH_COMPRESSION; - NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale; - NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale; - ComputeIntCharNormArray(NormFeature, Templates, CharNormArray); - FreeFeature(NormFeature); + if (!FeaturesHaveBeenExtracted) { + FeaturesOK = ExtractIntFeat(Blob, BaselineFeatures, + CharNormFeatures, &FXInfo); + FeaturesHaveBeenExtracted = TRUE; + } + if (!FeaturesOK) { *BlobLength = FXInfo.NumBL; - return (FXInfo.NumCN); + return (0); + } - } /* GetIntCharNormFeatures */ + for (Src = CharNormFeatures, End = Src + FXInfo.NumCN, Dest = IntFeatures; + Src < End; + *Dest++ = *Src++); - /*---------------------------------------------------------------------------*/ - void InitMatcherRatings(register FLOAT32 *Rating) { - /* - ** Parameters: - ** Rating - ptr to array of ratings to be initialized - ** Globals: none - ** Operation: This routine initializes the best rating for each class - ** to be the worst possible rating (1.0). - ** Return: none - ** Exceptions: none - ** History: Tue Mar 12 13:43:28 1991, DSJ, Created. - */ - register FLOAT32 *LastRating; - register FLOAT32 WorstRating = WORST_POSSIBLE_RATING; + NormFeature = NewFeature(&CharNormDesc); + Baseline = BaselineAt(LineStats, FXInfo.Xmean); + Scale = ComputeScaleFactor(LineStats); + NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale; + NormFeature->Params[CharNormLength] = + FXInfo.Length * Scale / LENGTH_COMPRESSION; + NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale; + NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale; + ComputeIntCharNormArray(NormFeature, Templates, CharNormArray); + FreeFeature(NormFeature); - for (LastRating = Rating + MAX_CLASS_ID; - Rating <= LastRating; *Rating++ = WorstRating); + *BlobLength = FXInfo.NumBL; + return (FXInfo.NumCN); +} /* GetIntCharNormFeatures */ - } /* InitMatcherRatings */ - - /*---------------------------------------------------------------------------*/ - int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int NumFeatures, - INT_FEATURE_ARRAY Features, - FEATURE_SET FloatFeatures) { +/*---------------------------------------------------------------------------*/ +int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, + CLASS_ID ClassId, + int NumFeatures, + INT_FEATURE_ARRAY Features, + FEATURE_SET FloatFeatures) { /* ** Parameters: ** Templates - adapted templates to add new config to - ** ClassId - class id to associate with new config - ** NumFeatures - number of features in IntFeatures - ** Features - features describing model for new config - ** FloatFeatures - floating-pt representation of features - ** Globals: - ** AllProtosOn - mask to enable all protos - ** AllConfigsOff - mask to disable all configs - ** TempProtoMask - defines old protos matched in new config - ** Operation: - ** Return: The id of the new config created, a negative integer in - ** case of error. - ** Exceptions: none - ** History: Fri Mar 15 08:49:46 1991, DSJ, Created. - */ - CLASS_INDEX ClassIndex; - INT_CLASS IClass; - ADAPT_CLASS Class; - PROTO_ID OldProtos[MAX_NUM_PROTOS]; - FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES]; - int NumOldProtos; - int NumBadFeatures; - int MaxProtoId, OldMaxProtoId; - int BlobLength = 0; - int MaskSize; - int ConfigId; - TEMP_CONFIG Config; - int i; - int debug_level = NO_DEBUG; + adapted templates to add new config to + ** ClassId + class id to associate with new config + ** NumFeatures + number of features in IntFeatures + ** Features + features describing model for new config + ** FloatFeatures + floating-pt representation of features + ** Globals: + ** AllProtosOn + mask to enable all protos + ** AllConfigsOff + mask to disable all configs + ** TempProtoMask + defines old protos matched in new config + ** Operation: + ** Return: The id of the new config created, a negative integer in + ** case of error. + ** Exceptions: none + ** History: Fri Mar 15 08:49:46 1991, DSJ, Created. + */ + INT_CLASS IClass; + ADAPT_CLASS Class; + PROTO_ID OldProtos[MAX_NUM_PROTOS]; + FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES]; + int NumOldProtos; + int NumBadFeatures; + int MaxProtoId, OldMaxProtoId; + int BlobLength = 0; + int MaskSize; + int ConfigId; + TEMP_CONFIG Config; + int i; + int debug_level = NO_DEBUG; - if (LearningDebugLevel >= 3) - debug_level = + if (classify_learning_debug_level >= 3) + debug_level = PRINT_MATCH_SUMMARY | PRINT_FEATURE_MATCHES | PRINT_PROTO_MATCHES; - ClassIndex = Templates->Templates->IndexFor[ClassId]; - IClass = ClassForClassId (Templates->Templates, ClassId); - Class = Templates->Class[ClassIndex]; + IClass = ClassForClassId(Templates->Templates, ClassId); + Class = Templates->Class[ClassId]; - if (IClass->NumConfigs >= MAX_NUM_CONFIGS) - { - ++NumAdaptationsFailed; - if (LearningDebugLevel >= 1) - cprintf ("Cannot make new temporary config: maximum number exceeded.\n"); - return -1; - } + if (IClass->NumConfigs >= MAX_NUM_CONFIGS) { + ++NumAdaptationsFailed; + if (classify_learning_debug_level >= 1) + cprintf("Cannot make new temporary config: maximum number exceeded.\n"); + return -1; + } - OldMaxProtoId = IClass->NumProtos - 1; + OldMaxProtoId = IClass->NumProtos - 1; - NumOldProtos = FindGoodProtos (IClass, AllProtosOn, AllConfigsOff, - BlobLength, NumFeatures, Features, - OldProtos, debug_level); + NumOldProtos = FindGoodProtos(IClass, AllProtosOn, AllConfigsOff, + BlobLength, NumFeatures, Features, + OldProtos, debug_level); - MaskSize = WordsInVectorOfSize (MAX_NUM_PROTOS); - zero_all_bits(TempProtoMask, MaskSize); - for (i = 0; i < NumOldProtos; i++) - SET_BIT (TempProtoMask, OldProtos[i]); + MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS); + zero_all_bits(TempProtoMask, MaskSize); + for (i = 0; i < NumOldProtos; i++) + SET_BIT(TempProtoMask, OldProtos[i]); - NumBadFeatures = FindBadFeatures (IClass, TempProtoMask, AllConfigsOn, - BlobLength, NumFeatures, Features, - BadFeatures, debug_level); + NumBadFeatures = FindBadFeatures(IClass, TempProtoMask, AllConfigsOn, + BlobLength, NumFeatures, Features, + BadFeatures, debug_level); - MaxProtoId = MakeNewTempProtos (FloatFeatures, NumBadFeatures, BadFeatures, - IClass, Class, TempProtoMask); - if (MaxProtoId == NO_PROTO) - { - ++NumAdaptationsFailed; - if (LearningDebugLevel >= 1) - cprintf ("Cannot make new temp protos: maximum number exceeded.\n"); - return -1; - } + MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures, + IClass, Class, TempProtoMask); + if (MaxProtoId == NO_PROTO) { + ++NumAdaptationsFailed; + if (classify_learning_debug_level >= 1) + cprintf("Cannot make new temp protos: maximum number exceeded.\n"); + return -1; + } - ConfigId = AddIntConfig (IClass); - ConvertConfig(TempProtoMask, ConfigId, IClass); - Config = NewTempConfig (MaxProtoId); - TempConfigFor (Class, ConfigId) = Config; - copy_all_bits (TempProtoMask, Config->Protos, Config->ProtoVectorSize); + ConfigId = AddIntConfig(IClass); + ConvertConfig(TempProtoMask, ConfigId, IClass); + Config = NewTempConfig(MaxProtoId); + TempConfigFor(Class, ConfigId) = Config; + copy_all_bits(TempProtoMask, Config->Protos, Config->ProtoVectorSize); - if (LearningDebugLevel >= 1) - cprintf ("Making new temp config %d using %d old and %d new protos.\n", - ConfigId, NumOldProtos, MaxProtoId - OldMaxProtoId); + if (classify_learning_debug_level >= 1) + cprintf("Making new temp config %d using %d old and %d new protos.\n", + ConfigId, NumOldProtos, MaxProtoId - OldMaxProtoId); - return ConfigId; - } /* MakeNewTemporaryConfig */ + return ConfigId; +} /* MakeNewTemporaryConfig */ +} // namespace tesseract - /*---------------------------------------------------------------------------*/ - PROTO_ID - MakeNewTempProtos (FEATURE_SET Features, - int NumBadFeat, - FEATURE_ID BadFeat[], - INT_CLASS IClass, - ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) { +/*---------------------------------------------------------------------------*/ +PROTO_ID +MakeNewTempProtos(FEATURE_SET Features, + int NumBadFeat, + FEATURE_ID BadFeat[], + INT_CLASS IClass, + ADAPT_CLASS Class, BIT_VECTOR TempProtoMask) { /* ** Parameters: ** Features - floating-pt features describing new character - ** NumBadFeat - number of bad features to turn into protos - ** BadFeat - feature id's of bad features - ** IClass - integer class templates to add new protos to - ** Class - adapted class templates to add new protos to - ** TempProtoMask - proto mask to add new protos to - ** Globals: none - ** Operation: This routine finds sets of sequential bad features - ** that all have the same angle and converts each set into - ** a new temporary proto. The temp proto is added to the - ** proto pruner for IClass, pushed onto the list of temp - ** protos in Class, and added to TempProtoMask. - ** Return: Max proto id in class after all protos have been added. - ** Exceptions: none - ** History: Fri Mar 15 11:39:38 1991, DSJ, Created. - */ - FEATURE_ID *ProtoStart; - FEATURE_ID *ProtoEnd; - FEATURE_ID *LastBad; - TEMP_PROTO TempProto; - PROTO Proto; - FEATURE F1, F2; - FLOAT32 X1, X2, Y1, Y2; - FLOAT32 A1, A2, AngleDelta; - FLOAT32 SegmentLength; - PROTO_ID Pid; + floating-pt features describing new character + ** NumBadFeat + number of bad features to turn into protos + ** BadFeat + feature id's of bad features + ** IClass + integer class templates to add new protos to + ** Class + adapted class templates to add new protos to + ** TempProtoMask + proto mask to add new protos to + ** Globals: none + ** Operation: This routine finds sets of sequential bad features + ** that all have the same angle and converts each set into + ** a new temporary proto. The temp proto is added to the + ** proto pruner for IClass, pushed onto the list of temp + ** protos in Class, and added to TempProtoMask. + ** Return: Max proto id in class after all protos have been added. + ** Exceptions: none + ** History: Fri Mar 15 11:39:38 1991, DSJ, Created. + */ + FEATURE_ID *ProtoStart; + FEATURE_ID *ProtoEnd; + FEATURE_ID *LastBad; + TEMP_PROTO TempProto; + PROTO Proto; + FEATURE F1, F2; + FLOAT32 X1, X2, Y1, Y2; + FLOAT32 A1, A2, AngleDelta; + FLOAT32 SegmentLength; + PROTO_ID Pid; - for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat; - ProtoStart < LastBad; ProtoStart = ProtoEnd) { - F1 = Features->Features[*ProtoStart]; - X1 = F1->Params[PicoFeatX]; - Y1 = F1->Params[PicoFeatY]; - A1 = F1->Params[PicoFeatDir]; + for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat; + ProtoStart < LastBad; ProtoStart = ProtoEnd) { + F1 = Features->Features[*ProtoStart]; + X1 = F1->Params[PicoFeatX]; + Y1 = F1->Params[PicoFeatY]; + A1 = F1->Params[PicoFeatDir]; - for (ProtoEnd = ProtoStart + 1, - SegmentLength = GetPicoFeatureLength (); - ProtoEnd < LastBad; - ProtoEnd++, SegmentLength += GetPicoFeatureLength ()) { - F2 = Features->Features[*ProtoEnd]; - X2 = F2->Params[PicoFeatX]; - Y2 = F2->Params[PicoFeatY]; - A2 = F2->Params[PicoFeatDir]; - - AngleDelta = fabs (A1 - A2); - if (AngleDelta > 0.5) - AngleDelta = 1.0 - AngleDelta; - - if (AngleDelta > MaxAngleDelta || - fabs (X1 - X2) > SegmentLength || - fabs (Y1 - Y2) > SegmentLength) - break; - } - - F2 = Features->Features[*(ProtoEnd - 1)]; + for (ProtoEnd = ProtoStart + 1, + SegmentLength = GetPicoFeatureLength(); + ProtoEnd < LastBad; + ProtoEnd++, SegmentLength += GetPicoFeatureLength()) { + F2 = Features->Features[*ProtoEnd]; X2 = F2->Params[PicoFeatX]; Y2 = F2->Params[PicoFeatY]; A2 = F2->Params[PicoFeatDir]; - Pid = AddIntProto (IClass); - if (Pid == NO_PROTO) - return (NO_PROTO); + AngleDelta = fabs(A1 - A2); + if (AngleDelta > 0.5) + AngleDelta = 1.0 - AngleDelta; - TempProto = NewTempProto (); - Proto = &(TempProto->Proto); - - /* compute proto params - NOTE that Y_DIM_OFFSET must be used because - ConvertProto assumes that the Y dimension varies from -0.5 to 0.5 - instead of the -0.25 to 0.75 used in baseline normalization */ - Proto->Length = SegmentLength; - Proto->Angle = A1; - Proto->X = (X1 + X2) / 2.0; - Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET; - FillABC(Proto); - - TempProto->ProtoId = Pid; - SET_BIT(TempProtoMask, Pid); - - ConvertProto(Proto, Pid, IClass); - AddProtoToProtoPruner(Proto, Pid, IClass); - - Class->TempProtos = push (Class->TempProtos, TempProto); + if (AngleDelta > matcher_clustering_max_angle_delta || + fabs(X1 - X2) > SegmentLength || + fabs(Y1 - Y2) > SegmentLength) + break; } - return (IClass->NumProtos - 1); - } /* MakeNewTempProtos */ - /*---------------------------------------------------------------------------*/ - void MakePermanent(ADAPT_TEMPLATES Templates, - CLASS_ID ClassId, - int ConfigId, - TBLOB *Blob, - LINE_STATS *LineStats) { + F2 = Features->Features[*(ProtoEnd - 1)]; + X2 = F2->Params[PicoFeatX]; + Y2 = F2->Params[PicoFeatY]; + A2 = F2->Params[PicoFeatDir]; + + Pid = AddIntProto(IClass); + if (Pid == NO_PROTO) + return (NO_PROTO); + + TempProto = NewTempProto(); + Proto = &(TempProto->Proto); + + /* compute proto params - NOTE that Y_DIM_OFFSET must be used because + ConvertProto assumes that the Y dimension varies from -0.5 to 0.5 + instead of the -0.25 to 0.75 used in baseline normalization */ + Proto->Length = SegmentLength; + Proto->Angle = A1; + Proto->X = (X1 + X2) / 2.0; + Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET; + FillABC(Proto); + + TempProto->ProtoId = Pid; + SET_BIT(TempProtoMask, Pid); + + ConvertProto(Proto, Pid, IClass); + AddProtoToProtoPruner(Proto, Pid, IClass); + + Class->TempProtos = push(Class->TempProtos, TempProto); + } + return IClass->NumProtos - 1; +} /* MakeNewTempProtos */ + +/*---------------------------------------------------------------------------*/ +namespace tesseract { +void Classify::MakePermanent(ADAPT_TEMPLATES Templates, + CLASS_ID ClassId, + int ConfigId, + TBLOB *Blob, + LINE_STATS *LineStats) { /* ** Parameters: ** Templates - current set of adaptive templates - ** ClassId - class containing config to be made permanent - ** ConfigId - config to be made permanent - ** Blob - current blob being adapted to - ** LineStats - statistics about text line Blob is in - ** Globals: none - ** Operation: - ** Return: none - ** Exceptions: none - ** History: Thu Mar 14 15:54:08 1991, DSJ, Created. - */ - UNICHAR_ID *Ambigs; - TEMP_CONFIG Config; - CLASS_INDEX ClassIndex; - ADAPT_CLASS Class; - PROTO_KEY ProtoKey; + current set of adaptive templates + ** ClassId + class containing config to be made permanent + ** ConfigId + config to be made permanent + ** Blob + current blob being adapted to + ** LineStats + statistics about text line Blob is in + ** Globals: none + ** Operation: + ** Return: none + ** Exceptions: none + ** History: Thu Mar 14 15:54:08 1991, DSJ, Created. + */ + UNICHAR_ID *Ambigs; + TEMP_CONFIG Config; + ADAPT_CLASS Class; + PROTO_KEY ProtoKey; - ClassIndex = Templates->Templates->IndexFor[ClassId]; - Class = Templates->Class[ClassIndex]; - Config = TempConfigFor (Class, ConfigId); + Class = Templates->Class[ClassId]; + Config = TempConfigFor(Class, ConfigId); - MakeConfigPermanent(Class, ConfigId); - if (Class->NumPermConfigs == 0) - Templates->NumPermClasses++; - Class->NumPermConfigs++; + MakeConfigPermanent(Class, ConfigId); + if (Class->NumPermConfigs == 0) + Templates->NumPermClasses++; + Class->NumPermConfigs++; - ProtoKey.Templates = Templates; - ProtoKey.ClassId = ClassId; - ProtoKey.ConfigId = ConfigId; - Class->TempProtos = delete_d (Class->TempProtos, &ProtoKey, - MakeTempProtoPerm); - FreeTempConfig(Config); + ProtoKey.Templates = Templates; + ProtoKey.ClassId = ClassId; + ProtoKey.ConfigId = ConfigId; + Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, + MakeTempProtoPerm); + FreeTempConfig(Config); - Ambigs = GetAmbiguities (Blob, LineStats, ClassId); - PermConfigFor (Class, ConfigId) = Ambigs; + Ambigs = GetAmbiguities(Blob, LineStats, ClassId); + PermConfigFor(Class, ConfigId) = Ambigs; - if (LearningDebugLevel >= 1) { - cprintf ("Making config %d permanent with ambiguities '", - ConfigId, Ambigs); - for (UNICHAR_ID *AmbigsPointer = Ambigs; - *AmbigsPointer >= 0; ++AmbigsPointer) - cprintf("%s", unicharset.id_to_unichar(*AmbigsPointer)); - cprintf("'.\n"); - } + if (classify_learning_debug_level >= 1) { + cprintf("Making config %d permanent with ambiguities '", + ConfigId, Ambigs); + for (UNICHAR_ID *AmbigsPointer = Ambigs; + *AmbigsPointer >= 0; ++AmbigsPointer) + cprintf("%s", unicharset.id_to_unichar(*AmbigsPointer)); + cprintf("'.\n"); + } +} /* MakePermanent */ +} // namespace tesseract - } /* MakePermanent */ - - /*---------------------------------------------------------------------------*/ - int MakeTempProtoPerm(void *item1, //TEMP_PROTO TempProto, - void *item2) { //PROTO_KEY *ProtoKey) +/*---------------------------------------------------------------------------*/ +int MakeTempProtoPerm(void *item1, void *item2) { /* - ** Parameters: - ** TempProto - temporary proto to compare to key - ** ProtoKey - defines which protos to make permanent - ** Globals: none - ** Operation: This routine converts TempProto to be permanent if - ** its proto id is used by the configuration specified in - ** ProtoKey. - ** Return: TRUE if TempProto is converted, FALSE otherwise - ** Exceptions: none - ** History: Thu Mar 14 18:49:54 1991, DSJ, Created. - */ - CLASS_INDEX ClassIndex; - ADAPT_CLASS Class; - TEMP_CONFIG Config; - TEMP_PROTO TempProto; - PROTO_KEY *ProtoKey; + ** Parameters: + ** TempProto temporary proto to compare to key + ** ProtoKey defines which protos to make permanent + ** Globals: none + ** Operation: This routine converts TempProto to be permanent if + ** its proto id is used by the configuration specified in + ** ProtoKey. + ** Return: TRUE if TempProto is converted, FALSE otherwise + ** Exceptions: none + ** History: Thu Mar 14 18:49:54 1991, DSJ, Created. + */ + ADAPT_CLASS Class; + TEMP_CONFIG Config; + TEMP_PROTO TempProto; + PROTO_KEY *ProtoKey; - TempProto = (TEMP_PROTO) item1; - ProtoKey = (PROTO_KEY *) item2; + TempProto = (TEMP_PROTO) item1; + ProtoKey = (PROTO_KEY *) item2; - ClassIndex = ProtoKey->Templates->Templates->IndexFor[ProtoKey->ClassId]; - Class = ProtoKey->Templates->Class[ClassIndex]; - Config = TempConfigFor (Class, ProtoKey->ConfigId); + Class = ProtoKey->Templates->Class[ProtoKey->ClassId]; + Config = TempConfigFor(Class, ProtoKey->ConfigId); - if (TempProto->ProtoId > Config->MaxProtoId || + if (TempProto->ProtoId > Config->MaxProtoId || !test_bit (Config->Protos, TempProto->ProtoId)) - return (FALSE); + return FALSE; - MakeProtoPermanent (Class, TempProto->ProtoId); - AddProtoToClassPruner (&(TempProto->Proto), ProtoKey->ClassId, - ProtoKey->Templates->Templates); - FreeTempProto(TempProto); + MakeProtoPermanent(Class, TempProto->ProtoId); + AddProtoToClassPruner(&(TempProto->Proto), ProtoKey->ClassId, + ProtoKey->Templates->Templates); + FreeTempProto(TempProto); - return (TRUE); + return TRUE; +} /* MakeTempProtoPerm */ - } /* MakeTempProtoPerm */ - - /*---------------------------------------------------------------------------*/ - int NumBlobsIn(TWERD *Word) { +/*---------------------------------------------------------------------------*/ +int NumBlobsIn(TWERD *Word) { /* ** Parameters: ** Word - word to count blobs in - ** Globals: none - ** Operation: This routine returns the number of blobs in Word. - ** Return: Number of blobs in Word. - ** Exceptions: none - ** History: Thu Mar 14 08:30:27 1991, DSJ, Created. - */ - register TBLOB *Blob; - register int NumBlobs; + word to count blobs in + ** Globals: none + ** Operation: This routine returns the number of blobs in Word. + ** Return: Number of blobs in Word. + ** Exceptions: none + ** History: Thu Mar 14 08:30:27 1991, DSJ, Created. + */ + register TBLOB *Blob; + register int NumBlobs; - if (Word == NULL) - return (0); + if (Word == NULL) + return (0); - for (Blob = Word->blobs, NumBlobs = 0; - Blob != NULL; Blob = Blob->next, NumBlobs++); + for (Blob = Word->blobs, NumBlobs = 0; + Blob != NULL; Blob = Blob->next, NumBlobs++); - return (NumBlobs); + return (NumBlobs); - } /* NumBlobsIn */ +} /* NumBlobsIn */ - /*---------------------------------------------------------------------------*/ - int NumOutlinesInBlob(TBLOB *Blob) { +/*---------------------------------------------------------------------------*/ +int NumOutlinesInBlob(TBLOB *Blob) { /* ** Parameters: ** Blob - blob to count outlines in - ** Globals: none - ** Operation: This routine returns the number of OUTER outlines - ** in Blob. - ** Return: Number of outer outlines in Blob. - ** Exceptions: none - ** History: Mon Jun 10 15:46:20 1991, DSJ, Created. - */ - register TESSLINE *Outline; - register int NumOutlines; + blob to count outlines in + ** Globals: none + ** Operation: This routine returns the number of OUTER outlines + ** in Blob. + ** Return: Number of outer outlines in Blob. + ** Exceptions: none + ** History: Mon Jun 10 15:46:20 1991, DSJ, Created. + */ + register TESSLINE *Outline; + register int NumOutlines; - if (Blob == NULL) - return (0); + if (Blob == NULL) + return (0); - for (Outline = Blob->outlines, NumOutlines = 0; - Outline != NULL; Outline = Outline->next, NumOutlines++); + for (Outline = Blob->outlines, NumOutlines = 0; + Outline != NULL; Outline = Outline->next, NumOutlines++); - return (NumOutlines); + return (NumOutlines); - } /* NumOutlinesInBlob */ +} /* NumOutlinesInBlob */ - /*---------------------------------------------------------------------------*/ - void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) { +/*---------------------------------------------------------------------------*/ +namespace tesseract { +void Classify::PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) { /* ** Parameters: ** File - open text file to write Results to - ** Results - match results to write to File - ** Globals: none - ** Operation: This routine writes the matches in Results to File. - ** Return: none - ** Exceptions: none - ** History: Mon Mar 18 09:24:53 1991, DSJ, Created. - */ - for (int i = 0; i < Results->NumMatches; ++i) { - cprintf("%s(%d) %.2f ", - unicharset.debug_str(Results->Classes[i]).string(), - Results->Classes[i], - Results->Ratings[Results->Classes[i]] * 100.0); - } - } /* PrintAdaptiveMatchResults */ + open text file to write Results to + ** Results + match results to write to File + ** Globals: none + ** Operation: This routine writes the matches in Results to File. + ** Return: none + ** Exceptions: none + ** History: Mon Mar 18 09:24:53 1991, DSJ, Created. + */ + for (int i = 0; i < Results->NumMatches; ++i) { + cprintf("%s(%d) %.2f ", + unicharset.debug_str(Results->Classes[i]).string(), + Results->Classes[i], + Results->Ratings[Results->Classes[i]] * 100.0); + } + printf("\n"); +} /* PrintAdaptiveMatchResults */ - /*---------------------------------------------------------------------------*/ - void RemoveBadMatches(ADAPT_RESULTS *Results) { +/*---------------------------------------------------------------------------*/ +void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) { /* ** Parameters: ** Results - contains matches to be filtered - ** Globals: - ** BadMatchPad - defines a "bad match" - ** Operation: This routine steps thru each matching class in Results - ** and removes it from the match list if its rating - ** is worse than the BestRating plus a pad. In other words, - ** all good matches get moved to the front of the classes - ** array. - ** Return: none - ** Exceptions: none - ** History: Tue Mar 12 13:51:03 1991, DSJ, Created. - */ - int Next, NextGood; - FLOAT32 *Rating = Results->Ratings; - CLASS_ID *Match = Results->Classes; - FLOAT32 BadMatchThreshold; - static const char* romans = "i v x I V X"; - BadMatchThreshold = Results->BestRating + BadMatchPad; + contains matches to be filtered + ** Globals: + ** matcher_bad_match_pad + defines a "bad match" + ** Operation: This routine steps thru each matching class in Results + ** and removes it from the match list if its rating + ** is worse than the BestRating plus a pad. In other words, + ** all good matches get moved to the front of the classes + ** array. + ** Return: none + ** Exceptions: none + ** History: Tue Mar 12 13:51:03 1991, DSJ, Created. + */ + int Next, NextGood; + FLOAT32 *Rating = Results->Ratings; + CLASS_ID *Match = Results->Classes; + FLOAT32 BadMatchThreshold; + static const char* romans = "i v x I V X"; + BadMatchThreshold = Results->BestRating + matcher_bad_match_pad; - if (bln_numericmode) { - UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ? - unicharset.unichar_to_id("1") : -1; - UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ? - unicharset.unichar_to_id("0") : -1; - for (Next = NextGood = 0; Next < Results->NumMatches; Next++) { - if (Rating[Match[Next]] <= BadMatchThreshold) { - if (!unicharset.get_isalpha(Match[Next]) || - strstr(romans, unicharset.id_to_unichar(Match[Next])) != NULL) { - Match[NextGood++] = Match[Next]; - } else if (unichar_id_one >= 0 && unicharset.eq(Match[Next], "l") && - Rating[unichar_id_one] >= BadMatchThreshold) { - Match[NextGood++] = unichar_id_one; - Rating[unichar_id_one] = Rating[unicharset.unichar_to_id("l")]; - } else if (unichar_id_zero >= 0 && unicharset.eq(Match[Next], "O") && - Rating[unichar_id_zero] >= BadMatchThreshold) { - Match[NextGood++] = unichar_id_zero; - Rating[unichar_id_zero] = Rating[unicharset.unichar_to_id("O")]; - } + if (bln_numericmode) { + UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ? + unicharset.unichar_to_id("1") : -1; + UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ? + unicharset.unichar_to_id("0") : -1; + for (Next = NextGood = 0; Next < Results->NumMatches; Next++) { + if (Rating[Match[Next]] <= BadMatchThreshold) { + if (!unicharset.get_isalpha(Match[Next]) || + strstr(romans, unicharset.id_to_unichar(Match[Next])) != NULL) { + Match[NextGood++] = Match[Next]; + } else if (unichar_id_one >= 0 && unicharset.eq(Match[Next], "l") && + Rating[unichar_id_one] >= BadMatchThreshold) { + Match[NextGood++] = unichar_id_one; + Rating[unichar_id_one] = Rating[unicharset.unichar_to_id("l")]; + } else if (unichar_id_zero >= 0 && unicharset.eq(Match[Next], "O") && + Rating[unichar_id_zero] >= BadMatchThreshold) { + Match[NextGood++] = unichar_id_zero; + Rating[unichar_id_zero] = Rating[unicharset.unichar_to_id("O")]; } } } + } + else { + for (Next = NextGood = 0; Next < Results->NumMatches; Next++) { + if (Rating[Match[Next]] <= BadMatchThreshold) + Match[NextGood++] = Match[Next]; + } + } + + Results->NumMatches = NextGood; + +} /* RemoveBadMatches */ + +/*----------------------------------------------------------------------------*/ +void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) { + /* + ** Parameters: + ** Results + contains matches to be filtered + ** Globals: + ** matcher_bad_match_pad + defines a "bad match" + ** Operation: This routine steps thru each matching class in Results + ** and removes it from the match list if its rating + ** is worse than the BestRating plus a pad. In other words, + ** all good matches get moved to the front of the classes + ** array. + ** Return: none + ** Exceptions: none + ** History: Tue Mar 12 13:51:03 1991, DSJ, Created. + */ + int Next, NextGood; + int punc_count; /*no of garbage characters */ + int digit_count; + CLASS_ID *Match = Results->Classes; + /*garbage characters */ + static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^"; + static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9"; + + punc_count = 0; + digit_count = 0; + for (Next = NextGood = 0; Next < Results->NumMatches; Next++) { + if (strstr (punc_chars, + unicharset.id_to_unichar(Match[Next])) == NULL) { + if (strstr (digit_chars, + unicharset.id_to_unichar(Match[Next])) == NULL) { + Match[NextGood++] = Match[Next]; + } + else { + if (digit_count < 1) + Match[NextGood++] = Match[Next]; + digit_count++; + } + } else { - for (Next = NextGood = 0; Next < Results->NumMatches; Next++) { - if (Rating[Match[Next]] <= BadMatchThreshold) - Match[NextGood++] = Match[Next]; - } + if (punc_count < 2) + Match[NextGood++] = Match[Next]; + punc_count++; /*count them */ } + } + Results->NumMatches = NextGood; +} /* RemoveExtraPuncs */ +} // namespace tesseract - Results->NumMatches = NextGood; - - } /* RemoveBadMatches */ - - /*----------------------------------------------------------------------------------*/ - void RemoveExtraPuncs(ADAPT_RESULTS *Results) { - /* - ** Parameters: - ** Results - contains matches to be filtered - ** Globals: - ** BadMatchPad - defines a "bad match" - ** Operation: This routine steps thru each matching class in Results - ** and removes it from the match list if its rating - ** is worse than the BestRating plus a pad. In other words, - ** all good matches get moved to the front of the classes - ** array. - ** Return: none - ** Exceptions: none - ** History: Tue Mar 12 13:51:03 1991, DSJ, Created. - */ - int Next, NextGood; - int punc_count; /*no of garbage characters */ - int digit_count; - CLASS_ID *Match = Results->Classes; - /*garbage characters */ - static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^"; - static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9"; - - punc_count = 0; - digit_count = 0; - for (Next = NextGood = 0; Next < Results->NumMatches; Next++) { - if (strstr (punc_chars, - unicharset.id_to_unichar(Match[Next])) == NULL) { - if (strstr (digit_chars, - unicharset.id_to_unichar(Match[Next])) == NULL) { - Match[NextGood++] = Match[Next]; - } - else { - if (digit_count < 1) - Match[NextGood++] = Match[Next]; - digit_count++; - } - } - else { - if (punc_count < 2) - Match[NextGood++] = Match[Next]; - punc_count++; /*count them */ - } - } - Results->NumMatches = NextGood; - } /* RemoveExtraPuncs */ - - /*---------------------------------------------------------------------------*/ - void SetAdaptiveThreshold(FLOAT32 Threshold) { +/*---------------------------------------------------------------------------*/ +void SetAdaptiveThreshold(FLOAT32 Threshold) { /* ** Parameters: ** Threshold - threshold for creating new templates - ** Globals: - ** GoodAdaptiveMatch - default good match rating - ** Operation: This routine resets the internal thresholds inside - ** the integer matcher to correspond to the specified - ** threshold. - ** Return: none - ** Exceptions: none - ** History: Tue Apr 9 08:33:13 1991, DSJ, Created. - */ - if (Threshold == GoodAdaptiveMatch) { - /* the blob was probably classified correctly - use the default rating - threshold */ - SetProtoThresh (0.9); - SetFeatureThresh (0.9); - } - else { - /* the blob was probably incorrectly classified */ - SetProtoThresh (1.0 - Threshold); - SetFeatureThresh (1.0 - Threshold); - } - } /* SetAdaptiveThreshold */ + threshold for creating new templates + ** Globals: + ** matcher_good_threshold + default good match rating + ** Operation: This routine resets the internal thresholds inside + ** the integer matcher to correspond to the specified + ** threshold. + ** Return: none + ** Exceptions: none + ** History: Tue Apr 9 08:33:13 1991, DSJ, Created. + */ + if (Threshold == matcher_good_threshold) { + /* the blob was probably classified correctly - use the default rating + threshold */ + SetProtoThresh (0.9); + SetFeatureThresh (0.9); + } + else { + /* the blob was probably incorrectly classified */ + SetProtoThresh (1.0 - Threshold); + SetFeatureThresh (1.0 - Threshold); + } +} /* SetAdaptiveThreshold */ - /*---------------------------------------------------------------------------*/ - void ShowBestMatchFor(TBLOB *Blob, - LINE_STATS *LineStats, - CLASS_ID ClassId, - BOOL8 AdaptiveOn, - BOOL8 PreTrainedOn) { +/*---------------------------------------------------------------------------*/ +namespace tesseract { +void Classify::ShowBestMatchFor(TBLOB *Blob, + LINE_STATS *LineStats, + CLASS_ID ClassId, + BOOL8 AdaptiveOn, + BOOL8 PreTrainedOn) { /* ** Parameters: ** Blob - blob to show best matching config for - ** LineStats - statistics for text line Blob is in - ** ClassId - class whose configs are to be searched - ** AdaptiveOn - TRUE if adaptive configs are enabled - ** PreTrainedOn - TRUE if pretrained configs are enabled - ** Globals: - ** PreTrainedTemplates - built-in training - ** AdaptedTemplates - adaptive templates - ** AllProtosOn - dummy proto mask - ** AllConfigsOn - dummy config mask - ** Operation: This routine compares Blob to both sets of templates - ** (adaptive and pre-trained) and then displays debug - ** information for the config which matched best. - ** Return: none - ** Exceptions: none - ** History: Fri Mar 22 08:43:52 1991, DSJ, Created. - */ - int NumCNFeatures = 0, NumBLFeatures = 0; - INT_FEATURE_ARRAY CNFeatures, BLFeatures; - INT_RESULT_STRUCT CNResult, BLResult; - CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust; - CLASS_INDEX ClassIndex; - inT32 BlobLength; - uinT32 ConfigMask; - static int next_config = -1; + blob to show best matching config for + ** LineStats + statistics for text line Blob is in + ** ClassId + class whose configs are to be searched + ** AdaptiveOn + TRUE if adaptive configs are enabled + ** PreTrainedOn + TRUE if pretrained configs are enabled + ** Globals: + ** PreTrainedTemplates + built-in training + ** AdaptedTemplates + adaptive templates + ** AllProtosOn + dummy proto mask + ** AllConfigsOn + dummy config mask + ** Operation: This routine compares Blob to both sets of templates + ** (adaptive and pre-trained) and then displays debug + ** information for the config which matched best. + ** Return: none + ** Exceptions: none + ** History: Fri Mar 22 08:43:52 1991, DSJ, Created. + */ + int NumCNFeatures = 0, NumBLFeatures = 0; + INT_FEATURE_ARRAY CNFeatures, BLFeatures; + INT_RESULT_STRUCT CNResult, BLResult; + inT32 BlobLength; + uinT32 ConfigMask; + static int next_config = -1; - if (PreTrainedOn) next_config = -1; + if (PreTrainedOn) next_config = -1; - CNResult.Rating = BLResult.Rating = 2.0; + CNResult.Rating = BLResult.Rating = 2.0; - if (!LegalClassId (ClassId)) { - cprintf ("%d is not a legal class id!!\n", ClassId); - return; - } + if (!LegalClassId (ClassId)) { + cprintf ("%d is not a legal class id!!\n", ClassId); + return; + } - if (PreTrainedOn) { - if (UnusedClassIdIn (PreTrainedTemplates, ClassId)) - cprintf ("No built-in templates for class %d = %s\n", - ClassId, unicharset.id_to_unichar(ClassId)); - else { - NumCNFeatures = GetCharNormFeatures (Blob, LineStats, - PreTrainedTemplates, - CNFeatures, CNAdjust, - &BlobLength); - if (NumCNFeatures <= 0) - cprintf ("Illegal blob (char norm features)!\n"); - else { - ClassIndex = PreTrainedTemplates->IndexFor[ClassId]; + uinT8 *CNAdjust = new uinT8[MAX_NUM_CLASSES]; + uinT8 *BLAdjust = new uinT8[MAX_NUM_CLASSES]; - SetCharNormMatch(); - IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), - AllProtosOn, AllConfigsOn, - BlobLength, NumCNFeatures, CNFeatures, - CNAdjust[ClassIndex], &CNResult, NO_DEBUG); - - cprintf ("Best built-in template match is config %2d (%4.1f) (cn=%d)\n", - CNResult.Config, CNResult.Rating * 100.0, CNAdjust[ClassIndex]); - } - } - } - - if (AdaptiveOn) { - if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) - cprintf ("No AD templates for class %d = %s\n", - ClassId, unicharset.id_to_unichar(ClassId)); - else { - NumBLFeatures = GetBaselineFeatures (Blob, LineStats, - AdaptedTemplates->Templates, - BLFeatures, BLAdjust, - &BlobLength); - if (NumBLFeatures <= 0) - cprintf ("Illegal blob (baseline features)!\n"); - else { - ClassIndex =AdaptedTemplates->Templates->IndexFor[ClassId]; - - SetBaseLineMatch(); - IntegerMatcher (ClassForClassId - (AdaptedTemplates->Templates, ClassId), - AllProtosOn, AllConfigsOn, - // AdaptedTemplates->Class[ClassIndex]->PermProtos, - // AdaptedTemplates->Class[ClassIndex]->PermConfigs, - BlobLength, NumBLFeatures, BLFeatures, - BLAdjust[ClassIndex], &BLResult, NO_DEBUG); - - #ifndef SECURE_NAMES - int ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId]; - ADAPT_CLASS Class = AdaptedTemplates->Class[ClassIndex]; - cprintf ("Best adaptive template match is config %2d (%4.1f) %s\n", - BLResult.Config, BLResult.Rating * 100.0, - ConfigIsPermanent(Class, BLResult.Config) ? "Perm" : "Temp"); - #endif - } - } - } - - cprintf ("\n"); - if (BLResult.Rating < CNResult.Rating) { - ClassIndex = AdaptedTemplates->Templates->IndexFor[ClassId]; - if (next_config < 0) { - ConfigMask = 1 << BLResult.Config; - next_config = 0; - } else { - ConfigMask = 1 << next_config; - ++next_config; - } - NormMethod = baseline; - - SetBaseLineMatch(); - IntegerMatcher (ClassForClassId (AdaptedTemplates->Templates, ClassId), - AllProtosOn, -// AdaptedTemplates->Class[ClassIndex]->PermProtos, - (BIT_VECTOR) & ConfigMask, - BlobLength, NumBLFeatures, BLFeatures, - BLAdjust[ClassIndex], &BLResult, MatchDebugFlags); - cprintf ("Adaptive template match for config %2d is %4.1f\n", - BLResult.Config, BLResult.Rating * 100.0); - } + if (PreTrainedOn) { + if (UnusedClassIdIn (PreTrainedTemplates, ClassId)) + cprintf ("No built-in templates for class %d = %s\n", + ClassId, unicharset.id_to_unichar(ClassId)); else { - ClassIndex = PreTrainedTemplates->IndexFor[ClassId]; - ConfigMask = 1 << CNResult.Config; - NormMethod = character; + NumCNFeatures = GetCharNormFeatures (Blob, LineStats, + PreTrainedTemplates, + CNFeatures, CNAdjust, + &BlobLength); + if (NumCNFeatures <= 0) + cprintf ("Illegal blob (char norm features)!\n"); + else { + SetCharNormMatch(); + IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), + AllProtosOn, AllConfigsOn, + BlobLength, NumCNFeatures, CNFeatures, + CNAdjust[ClassId], &CNResult, NO_DEBUG); - SetCharNormMatch(); - //xiaofan - IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), AllProtosOn, (BIT_VECTOR) & ConfigMask, - BlobLength, NumCNFeatures, CNFeatures, - CNAdjust[ClassIndex], &CNResult, MatchDebugFlags); + cprintf ("Best built-in template match is config %2d (%4.1f) (cn=%d)\n", + CNResult.Config, CNResult.Rating * 100.0, CNAdjust[ClassId]); + } } - } /* ShowBestMatchFor */ + } + + if (AdaptiveOn) { + if (UnusedClassIdIn (AdaptedTemplates->Templates, ClassId)) + cprintf ("No AD templates for class %d = %s\n", + ClassId, unicharset.id_to_unichar(ClassId)); + else { + NumBLFeatures = GetBaselineFeatures (Blob, LineStats, + AdaptedTemplates->Templates, + BLFeatures, BLAdjust, + &BlobLength); + if (NumBLFeatures <= 0) + cprintf ("Illegal blob (baseline features)!\n"); + else { + SetBaseLineMatch(); + IntegerMatcher (ClassForClassId + (AdaptedTemplates->Templates, ClassId), + AllProtosOn, AllConfigsOn, + // AdaptedTemplates->Class[ClassId]->PermProtos, + // AdaptedTemplates->Class[ClassId]->PermConfigs, + BlobLength, NumBLFeatures, BLFeatures, + BLAdjust[ClassId], &BLResult, NO_DEBUG); + +#ifndef SECURE_NAMES + ADAPT_CLASS Class = AdaptedTemplates->Class[ClassId]; + cprintf ("Best adaptive template match is config %2d (%4.1f) %s\n", + BLResult.Config, BLResult.Rating * 100.0, + ConfigIsPermanent(Class, BLResult.Config) ? "Perm" : "Temp"); +#endif + } + } + } + + cprintf ("\n"); + if (BLResult.Rating < CNResult.Rating) { + if (next_config < 0) { + ConfigMask = 1 << BLResult.Config; + next_config = 0; + } else { + ConfigMask = 1 << next_config; + ++next_config; + } + classify_norm_method.set_value(baseline); + + SetBaseLineMatch(); + IntegerMatcher (ClassForClassId (AdaptedTemplates->Templates, ClassId), + AllProtosOn, + // AdaptedTemplates->Class[ClassId]->PermProtos, + (BIT_VECTOR) & ConfigMask, + BlobLength, NumBLFeatures, BLFeatures, + BLAdjust[ClassId], &BLResult, matcher_debug_flags); + cprintf ("Adaptive template match for config %2d is %4.1f\n", + BLResult.Config, BLResult.Rating * 100.0); + } + else { + ConfigMask = 1 << CNResult.Config; + classify_norm_method.set_value(character); + + SetCharNormMatch(); + //xiaofan + IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), AllProtosOn, (BIT_VECTOR) & ConfigMask, + BlobLength, NumCNFeatures, CNFeatures, + CNAdjust[ClassId], &CNResult, matcher_debug_flags); + } + + // Clean up. + delete[] CNAdjust; + delete[] BLAdjust; +} /* ShowBestMatchFor */ +} // namespace tesseract diff --git a/classify/adaptmatch.h b/classify/adaptmatch.h index af5b646de..59c049110 100644 --- a/classify/adaptmatch.h +++ b/classify/adaptmatch.h @@ -32,54 +32,24 @@ /*--------------------------------------------------------------------------- Variables ----------------------------------------------------------------------------*/ -extern float GoodAdaptiveMatch; -extern float GreatAdaptiveMatch; -extern int ReliableConfigThreshold; -extern int tess_cn_matching; -extern int tess_bn_matching; -extern int LearningDebugLevel; +extern double_VAR_H(matcher_good_threshold, 0.125, "Good Match (0-1)"); +extern double_VAR_H(matcher_great_threshold, 0.0, "Great Match (0-1)"); +extern INT_VAR_H(matcher_failed_adaptations_before_reset, 150, + "Number of failed adaptions before adapted templates reset"); +extern INT_VAR_H(matcher_min_examples_for_prototyping, 2, + "Reliable Config Threshold"); +extern BOOL_VAR_H(tess_cn_matching, 0, "Character Normalized Matching"); +extern BOOL_VAR_H(tess_bn_matching, 0, "Baseline Normalized Matching"); +extern INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: "); /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -LIST AdaptiveClassifier(TBLOB *Blob, TBLOB *DotBlob, TEXTROW *Row); -/**/ -void AdaptToWord(TWERD *Word, - TEXTROW *Row, - const WERD_CHOICE& BestChoice, - const WERD_CHOICE& BestRawChoice, - const char *rejmap); - -void EndAdaptiveClassifier(); - -void InitAdaptiveClassifier(); - -void ResetAdaptiveClassifier(); - -void InitAdaptiveClassifierVars(); - -void PrintAdaptiveStatistics(FILE *File); - -void SettupPass1(); - -void SettupPass2(); - -void MakeNewAdaptedClass(TBLOB *Blob, - LINE_STATS *LineStats, - CLASS_ID ClassId, - ADAPT_TEMPLATES Templates); - int GetAdaptiveFeatures(TBLOB *Blob, LINE_STATS *LineStats, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures); -int AdaptableWord(TWERD *Word, - const char *BestChoice, - const char *BestChoice_lengths, - const char *BestRawChoice, - const char *BestRawChoice_lengths); - /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ diff --git a/classify/baseline.cpp b/classify/baseline.cpp index c78937b89..be645b236 100644 --- a/classify/baseline.cpp +++ b/classify/baseline.cpp @@ -27,9 +27,8 @@ I n c l u d e s ---------------------------------------------------------------------*/ #include "baseline.h" -//#include "blobs.h" -#include "debug.h" #include "hideedge.h" +#include "varable.h" /*---------------------------------------------------------------------- T y p e s @@ -39,20 +38,5 @@ typedef TPOINT SCALE; /*---------------------------------------------------------------------- V a r i a b l e s ----------------------------------------------------------------------*/ -inT8 baseline_normalized = TRUE; -make_int_var (baseline_enable, 1, make_baseline_enable, -4, 3, set_baseline_enable, "Baseline Enable"); -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -/********************************************************************** - * init_baseline - * - * Initialize the needed baseline variables. - **********************************************************************/ -void init_baseline() { - make_baseline_enable(); - - baseline_normalized = baseline_enable; -} +INT_VAR(classify_baseline_normalized, 1, "Baseline Enable"); diff --git a/classify/baseline.h b/classify/baseline.h index 9d13a33ba..d7353144a 100644 --- a/classify/baseline.h +++ b/classify/baseline.h @@ -30,6 +30,7 @@ ----------------------------------------------------------------------*/ #include "general.h" #include "tessclas.h" +#include "varable.h" /*---------------------------------------------------------------------- T y p e s @@ -40,52 +41,7 @@ /*---------------------------------------------------------------------- V a r i a b l e s ----------------------------------------------------------------------*/ -extern inT8 baseline_normalized; -extern int baseline_enable; -/*---------------------------------------------------------------------- - M a c r o s -----------------------------------------------------------------------*/ -/********************************************************************** - * is_baseline_normalized - * - * Check the baseline_normalized flag to see if it is set. - **********************************************************************/ -#define is_baseline_normalized() \ -(baseline_normalized) +extern INT_VAR_H(classify_baseline_normalized, 1, "Baseline Enable"); -/********************************************************************** - * reset_baseline_normalized - * - * Reset the baseline_normalized flag to show that it is not being done. - **********************************************************************/ -#define reset_baseline_normalized() \ -(baseline_normalized = FALSE) - -/********************************************************************** - * set_baseline_normalized - * - * Set the baseline_normalized flag to show that it is being done. - **********************************************************************/ -#define set_baseline_normalized() \ -(baseline_normalized = TRUE) - -/*---------------------------------------------------------------------- - F u n c t i o n s -----------------------------------------------------------------------*/ -void init_baseline(); - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* baseline.c -void init_baseline - _ARGS((void)); - -#undef _ARGS -*/ #endif diff --git a/classify/blobclass.cpp b/classify/blobclass.cpp index 7ad4c1ae5..93831bf24 100644 --- a/classify/blobclass.cpp +++ b/classify/blobclass.cpp @@ -21,7 +21,6 @@ ----------------------------------------------------------------------------**/ #include "blobclass.h" #include "fxdefs.h" -#include "variables.h" #include "extract.h" #include "efio.h" #include "callcpp.h" @@ -34,8 +33,8 @@ #define MAXFILENAME 80 #define MAXMATCHES 10 -// define default font name to be used in training -#define FONT_NAME "UnknownFont" +STRING_VAR(classify_font_name, "UnknownFont", + "Default font name to be used in training"); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations @@ -43,34 +42,14 @@ /* name of current image file being processed */ extern char imagefile[]; -/* parameters used to control the training process */ -static const char *FontName = FONT_NAME; - /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -void InitBlobClassifierVars() { -/* - ** Parameters: none - ** Globals: - ** FontName name of font being trained on - ** Operation: Install blob classifier variables into the wiseowl - ** variable system. - ** Return: none - ** Exceptions: none - ** History: Fri Jan 19 16:13:33 1990, DSJ, Created. - */ - VALUE dummy; - - string_variable (FontName, "FontName", FONT_NAME); - -} /* InitBlobClassifierVars */ - /*---------------------------------------------------------------------------*/ void -LearnBlob (TBLOB * Blob, TEXTROW * Row, char BlobText[]) +LearnBlob (const STRING& filename, + TBLOB * Blob, TEXTROW * Row, char BlobText[]) /* ** Parameters: ** Blob blob whose micro-features are to be learned @@ -79,7 +58,8 @@ LearnBlob (TBLOB * Blob, TEXTROW * Row, char BlobText[]) ** TextLength number of characters in blob ** Globals: ** imagefile base filename of the page being learned - ** FontName name of font currently being trained on + ** classify_font_name + ** name of font currently being trained on ** Operation: ** Extract micro-features from the specified blob and append ** them to the appropriate file. @@ -87,13 +67,10 @@ LearnBlob (TBLOB * Blob, TEXTROW * Row, char BlobText[]) ** Exceptions: none ** History: 7/28/89, DSJ, Created. */ -#define MAXFILENAME 80 -#define MAXCHARNAME 20 -#define MAXFONTNAME 20 #define TRAIN_SUFFIX ".tr" { static FILE *FeatureFile = NULL; - char Filename[MAXFILENAME]; + STRING Filename(filename); CHAR_DESC CharDesc; LINE_STATS LineStats; @@ -102,19 +79,35 @@ LearnBlob (TBLOB * Blob, TEXTROW * Row, char BlobText[]) GetLineStatsFromRow(Row, &LineStats); CharDesc = ExtractBlobFeatures (Blob, &LineStats); + if (CharDesc == NULL) { + cprintf("LearnBLob: CharDesc was NULL. Aborting.\n"); + return; + } + + // If no fontname was set, try to extract it from the filename + char CurrFontName[32] = ""; + strncpy(CurrFontName, static_cast(classify_font_name).string(), 32); + if (!strcmp(CurrFontName, "UnknownFont")) { + // filename is expected to be of the form [lang].[fontname].exp[num] + // The [lang], [fontname] and [num] fields should not have '.' characters. + const char *basename = strrchr(filename.string(), '/'); + const char *firstdot = strchr(basename, '.'); + const char *lastdot = strrchr(filename.string(), '.'); + if (firstdot != lastdot && firstdot != NULL && lastdot != NULL) { + strncpy(CurrFontName, firstdot + 1, lastdot - firstdot - 1); + } + } // if a feature file is not yet open, open it // the name of the file is the name of the image plus TRAIN_SUFFIX if (FeatureFile == NULL) { - strcpy(Filename, imagefile); - strcat(Filename, TRAIN_SUFFIX); - FeatureFile = Efopen (Filename, "w"); - - cprintf ("TRAINING ... Font name = %s.\n", FontName); + Filename += TRAIN_SUFFIX; + FeatureFile = Efopen (Filename.string(), "w"); + cprintf ("TRAINING ... Font name = %s\n", CurrFontName); } // label the features with a class name and font name - fprintf (FeatureFile, "\n%s %s ", FontName, BlobText); + fprintf (FeatureFile, "\n%s %s ", CurrFontName, BlobText); // write micro-features to file and clean up WriteCharDescription(FeatureFile, CharDesc); diff --git a/classify/blobclass.h b/classify/blobclass.h index 80e603c19..5fac44540 100644 --- a/classify/blobclass.h +++ b/classify/blobclass.h @@ -38,9 +38,8 @@ /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -void InitBlobClassifierVars(); - -void LearnBlob (TBLOB * Blob, TEXTROW * Row, char BlobText[]); +void LearnBlob (const STRING& filename, + TBLOB * Blob, TEXTROW * Row, char BlobText[]); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations diff --git a/classify/classify.cpp b/classify/classify.cpp new file mode 100644 index 000000000..aeff801ca --- /dev/null +++ b/classify/classify.cpp @@ -0,0 +1,86 @@ +/////////////////////////////////////////////////////////////////////// +// File: classify.cpp +// Description: classify class. +// Author: Samuel Charron +// +// (C) Copyright 2006, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "classify.h" +#include "intproto.h" +#include "unicity_table.h" +#include + +namespace { + +// Compare FontInfo structures. +bool compare_fontinfo(const FontInfo& fi1, const FontInfo& fi2) { + // The font properties are required to be the same for two font with the same + // name, so there is no need to test them. + // Consequently, querying the table with only its font name as information is + // enough to retrieve its properties. + return strcmp(fi1.name, fi2.name) == 0; +} +// Compare FontSet structures. +bool compare_font_set(const FontSet& fs1, const FontSet& fs2) { + if (fs1.size != fs2.size) + return false; + for (int i = 0; i < fs1.size; ++i) { + if (fs1.configs[i] != fs2.configs[i]) + return false; + } + return true; +} + +void delete_callback(FontInfo f) { + delete[] f.name; +} +void delete_callback_fs(FontSet fs) { + delete[] fs.configs; +} + +} + +namespace tesseract { +Classify::Classify() + : INT_MEMBER(tessedit_single_match, FALSE, "Top choice only from CP"), + BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier"), + BOOL_MEMBER(classify_recog_devanagari, false, + "Whether recognizing a language with devanagari script."), + EnableLearning(true), + dict_(&image_) { + fontinfo_table_.set_compare_callback( + NewPermanentCallback(compare_fontinfo)); + fontinfo_table_.set_clear_callback( + NewPermanentCallback(delete_callback)); + fontset_table_.set_compare_callback( + NewPermanentCallback(compare_font_set)); + fontset_table_.set_clear_callback( + NewPermanentCallback(delete_callback_fs)); + AdaptedTemplates = NULL; + PreTrainedTemplates = NULL; + inttemp_loaded_ = false; + AllProtosOn = NULL; + PrunedProtos = NULL; + AllConfigsOn = NULL; + AllProtosOff = NULL; + AllConfigsOff = NULL; + TempProtoMask = NULL; + NormProtos = NULL; +} + +Classify::~Classify() { + EndAdaptiveClassifier(); +} + +} // namespace tesseract diff --git a/classify/classify.h b/classify/classify.h new file mode 100644 index 000000000..179ca3d8d --- /dev/null +++ b/classify/classify.h @@ -0,0 +1,230 @@ +/////////////////////////////////////////////////////////////////////// +// File: classify.h +// Description: classify class. +// Author: Samuel Charron +// +// (C) Copyright 2006, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_CLASSIFY_CLASSIFY_H__ +#define TESSERACT_CLASSIFY_CLASSIFY_H__ + +#include "adaptive.h" +#include "ccstruct.h" +#include "classify.h" +#include "dict.h" +#include "fxdefs.h" +#include "intmatcher.h" +#include "ratngs.h" +#include "ocrfeatures.h" +#include "unicity_table.h" + +class WERD_CHOICE; +struct ADAPT_RESULTS; +struct NORM_PROTOS; + +namespace tesseract { +class Classify : public CCStruct { + public: + Classify(); + ~Classify(); + Dict& getDict() { + return dict_; + } + /* adaptive.cpp ************************************************************/ + ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset); + int ClassPruner(INT_TEMPLATES IntTemplates, + inT16 NumFeatures, + INT_FEATURE_ARRAY Features, + CLASS_NORMALIZATION_ARRAY NormalizationFactors, + CLASS_CUTOFF_ARRAY ExpectedNumFeatures, + CLASS_PRUNER_RESULTS Results, + int Debug); + void ReadNewCutoffs(FILE *CutoffFile, inT64 end_offset, + CLASS_CUTOFF_ARRAY Cutoffs); + void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); + void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); + ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File); + /* normmatch.cpp ************************************************************/ + FLOAT32 ComputeNormMatch(CLASS_ID ClassId, FEATURE Feature, BOOL8 DebugMatch); + void FreeNormProtos(); + NORM_PROTOS *ReadNormProtos(FILE *File, inT64 end_offset); + /* protos.cpp ***************************************************************/ + void ReadClassFile(); + INT_TEMPLATES + CreateIntTemplates(CLASSES FloatProtos, + const UNICHARSET& target_unicharset); + /* adaptmatch.cpp ***********************************************************/ + void AdaptToWord(TWERD *Word, + TEXTROW *Row, + const WERD_CHOICE& BestChoice, + const WERD_CHOICE& BestRawChoice, + const char *rejmap); + void InitAdaptiveClassifier(); + void InitAdaptedClass(TBLOB *Blob, + LINE_STATS *LineStats, + CLASS_ID ClassId, + ADAPT_CLASS Class, + ADAPT_TEMPLATES Templates); + void AdaptToPunc(TBLOB *Blob, + LINE_STATS *LineStats, + CLASS_ID ClassId, + FLOAT32 Threshold); + void AmbigClassifier(TBLOB *Blob, + LINE_STATS *LineStats, + INT_TEMPLATES Templates, + UNICHAR_ID *Ambiguities, + ADAPT_RESULTS *Results); + void MasterMatcher(INT_TEMPLATES templates, + inT16 num_features, + INT_FEATURE_ARRAY features, + CLASS_NORMALIZATION_ARRAY norm_factors, + ADAPT_CLASS* classes, + int debug, + int num_classes, + CLASS_PRUNER_RESULTS results, + ADAPT_RESULTS* final_results); + void ConvertMatchesToChoices(ADAPT_RESULTS *Results, + BLOB_CHOICE_LIST *Choices); + void AddNewResult(ADAPT_RESULTS *Results, + CLASS_ID ClassId, + FLOAT32 Rating, + int ConfigId); +#ifndef GRAPHICS_DISABLED + void DebugAdaptiveClassifier(TBLOB *Blob, + LINE_STATS *LineStats, + ADAPT_RESULTS *Results); +#endif + void GetAdaptThresholds (TWERD * Word, + LINE_STATS * LineStats, + const WERD_CHOICE& BestChoice, + const WERD_CHOICE& BestRawChoice, + FLOAT32 Thresholds[]); + + int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, + CLASS_ID ClassId, + int NumFeatures, + INT_FEATURE_ARRAY Features, + FEATURE_SET FloatFeatures); + void MakePermanent(ADAPT_TEMPLATES Templates, + CLASS_ID ClassId, + int ConfigId, + TBLOB *Blob, + LINE_STATS *LineStats); + void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results); + void RemoveExtraPuncs(ADAPT_RESULTS *Results); + void RemoveBadMatches(ADAPT_RESULTS *Results); + void ShowBestMatchFor(TBLOB *Blob, + LINE_STATS *LineStats, + CLASS_ID ClassId, + BOOL8 AdaptiveOn, + BOOL8 PreTrainedOn); + UNICHAR_ID *BaselineClassifier(TBLOB *Blob, + LINE_STATS *LineStats, + ADAPT_TEMPLATES Templates, + ADAPT_RESULTS *Results); + int CharNormClassifier(TBLOB *Blob, + LINE_STATS *LineStats, + INT_TEMPLATES Templates, + ADAPT_RESULTS *Results); + UNICHAR_ID *GetAmbiguities(TBLOB *Blob, + LINE_STATS *LineStats, + CLASS_ID CorrectClass); + void DoAdaptiveMatch(TBLOB *Blob, + LINE_STATS *LineStats, + ADAPT_RESULTS *Results); + void AdaptToChar(TBLOB *Blob, + LINE_STATS *LineStats, + CLASS_ID ClassId, + FLOAT32 Threshold); + int AdaptableWord(TWERD *Word, + const WERD_CHOICE &BestChoiceWord, + const WERD_CHOICE &RawChoiceWord); + void EndAdaptiveClassifier(); + void PrintAdaptiveStatistics(FILE *File); + void SettupPass1(); + void SettupPass2(); + void AdaptiveClassifier(TBLOB *Blob, + TBLOB *DotBlob, + TEXTROW *Row, + BLOB_CHOICE_LIST *Choices, + CLASS_PRUNER_RESULTS cp_results); + void ClassifyAsNoise(ADAPT_RESULTS *Results); + void ResetAdaptiveClassifier(); + + FLOAT32 GetBestRatingFor(TBLOB *Blob, + LINE_STATS *LineStats, + CLASS_ID ClassId); + int GetCharNormFeatures(TBLOB *Blob, + LINE_STATS *LineStats, + INT_TEMPLATES Templates, + INT_FEATURE_ARRAY IntFeatures, + CLASS_NORMALIZATION_ARRAY CharNormArray, + inT32 *BlobLength); + int GetIntCharNormFeatures(TBLOB *Blob, + LINE_STATS *LineStats, + INT_TEMPLATES Templates, + INT_FEATURE_ARRAY IntFeatures, + CLASS_NORMALIZATION_ARRAY CharNormArray, + inT32 *BlobLength); + + /* float2int.cpp ************************************************************/ + void ComputeIntCharNormArray(FEATURE NormFeature, + INT_TEMPLATES Templates, + CLASS_NORMALIZATION_ARRAY CharNormArray); + /* intproto.cpp *************************************************************/ + INT_TEMPLATES ReadIntTemplates(FILE *File); + void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, + const UNICHARSET& target_unicharset); + CLASS_ID GetClassToDebug(const char *Prompt); + /* font detection ***********************************************************/ + UnicityTable& get_fontinfo_table() { + return fontinfo_table_; + } + UnicityTable& get_fontset_table() { + return fontset_table_; + } + /* adaptmatch.cpp ***********************************************************/ + /* name of current image file being processed */ + INT_VAR_H(tessedit_single_match, FALSE, "Top choice only from CP"); + /* use class variables to hold onto built-in templates and adapted + templates */ + INT_TEMPLATES PreTrainedTemplates; + ADAPT_TEMPLATES AdaptedTemplates; + // Successful load of inttemp allows base tesseract classfier to be used. + bool inttemp_loaded_; + + /* create dummy proto and config masks for use with the built-in templates */ + BIT_VECTOR AllProtosOn; + BIT_VECTOR PrunedProtos; + BIT_VECTOR AllConfigsOn; + BIT_VECTOR AllProtosOff; + BIT_VECTOR AllConfigsOff; + BIT_VECTOR TempProtoMask; + // External control of adaption. + BOOL_VAR_H(classify_enable_learning, true, "Enable adaptive classifier"); + // Internal control of Adaption so it doesn't work on pass2. + BOOL_VAR_H(classify_recog_devanagari, false, + "Whether recognizing a language with devanagari script."); + bool EnableLearning; + /* normmatch.cpp */ + NORM_PROTOS *NormProtos; + /* font detection ***********************************************************/ + UnicityTable fontinfo_table_; + UnicityTable fontset_table_; + private: + Dict dict_; +}; +} // namespace tesseract + +#endif // TESSERACT_CLASSIFY_CLASSIFY_H__ diff --git a/classify/classify.vcproj b/classify/classify.vcproj new file mode 100755 index 000000000..64431bfbf --- /dev/null +++ b/classify/classify.vcproj @@ -0,0 +1,433 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/classify/cluster.h b/classify/cluster.h index 985a9fbde..4843e32df 100644 --- a/classify/cluster.h +++ b/classify/cluster.h @@ -78,7 +78,7 @@ typedef union FLOATUNION; -typedef struct proto +typedef struct { unsigned Significant:1; // TRUE if prototype is significant unsigned Merged:1; // Merged after clustering so do not output diff --git a/classify/cutoffs.cpp b/classify/cutoffs.cpp index 328a58b66..baa506820 100644 --- a/classify/cutoffs.cpp +++ b/classify/cutoffs.cpp @@ -19,12 +19,16 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "cutoffs.h" + +#include + +#include "classify.h" #include "efio.h" +#include "globals.h" +#include "helpers.h" #include "scanutils.h" #include "serialis.h" #include "unichar.h" -#include "globals.h" -#include #define REALLY_QUOTE_IT(x) QUOTE_IT(x) @@ -34,40 +38,41 @@ Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -void ReadNewCutoffs(const char *Filename, - CLASS_TO_INDEX ClassMapper, - CLASS_CUTOFF_ARRAY Cutoffs) { +namespace tesseract { +void Classify::ReadNewCutoffs(FILE *CutoffFile, inT64 end_offset, + CLASS_CUTOFF_ARRAY Cutoffs) { /* ** Parameters: ** Filename name of file containing cutoff definitions - ** ClassMapper array which maps class id's to class indexes ** Cutoffs array to put cutoffs into ** Globals: none ** Operation: Open Filename, read in all of the class-id/cutoff pairs ** and insert them into the Cutoffs array. Cutoffs are - ** inserted in the array so that the array is indexed by - ** class index rather than class id. Unused entries in the + ** indexed in the array by class id. Unused entries in the ** array are set to an arbitrarily high cutoff value. ** Return: none ** Exceptions: none ** History: Wed Feb 20 09:38:26 1991, DSJ, Created. */ - FILE *CutoffFile; char Class[UNICHAR_LEN + 1]; CLASS_ID ClassId; int Cutoff; int i; - CutoffFile = Efopen (Filename, "r"); - for (i = 0; i < MAX_NUM_CLASSES; i++) Cutoffs[i] = MAX_CUTOFF; - while (fscanf (CutoffFile, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d", - Class, &Cutoff) == 2) { - ClassId = unicharset.unichar_to_id(Class); - Cutoffs[ClassMapper[ClassId]] = Cutoff; + while ((end_offset < 0 || ftell(CutoffFile) < end_offset) && + fscanf(CutoffFile, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d", + Class, &Cutoff) == 2) { + if (strcmp(Class, "NULL") == 0) { + ClassId = unicharset.unichar_to_id(" "); + } else { + ClassId = unicharset.unichar_to_id(Class); + } + Cutoffs[ClassId] = Cutoff; + SkipNewline(CutoffFile); } - fclose(CutoffFile); - } /* ReadNewCutoffs */ + +} // namespace tesseract diff --git a/classify/cutoffs.h b/classify/cutoffs.h index 01b7fa5a8..907e94539 100644 --- a/classify/cutoffs.h +++ b/classify/cutoffs.h @@ -28,9 +28,6 @@ typedef uinT16 CLASS_CUTOFF_ARRAY[MAX_NUM_CLASSES]; /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -void ReadNewCutoffs(const char *Filename, - CLASS_TO_INDEX ClassMapper, - CLASS_CUTOFF_ARRAY Cutoffs); /* #if defined(__STDC__) || defined(__cplusplus) @@ -42,7 +39,6 @@ void ReadNewCutoffs(const char *Filename, /* cutoffs.c void ReadNewCutoffs _ARGS((char *Filename, - CLASS_TO_INDEX ClassMapper, CLASS_CUTOFF_ARRAY Cutoffs)); #undef _ARGS */ diff --git a/classify/extract.cpp b/classify/extract.cpp index 74b34eb13..8293e4cdd 100644 --- a/classify/extract.cpp +++ b/classify/extract.cpp @@ -19,7 +19,6 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "extract.h" -#include "variables.h" #include "flexfx.h" #include "funcdefs.h" #include "danerror.h" @@ -61,24 +60,6 @@ CHAR_DESC ExtractBlobFeatures(TBLOB *Blob, LINE_STATS *LineStats) { return (ExtractFlexFeatures (Blob, LineStats)); } /* ExtractBlobFeatures */ - -/*---------------------------------------------------------------------------*/ -void InitExtractorVars() { -/* - ** Parameters: none - ** Globals: none - ** Operation: Install global extractor variables into the variable - ** system. - ** Return: none - ** Exceptions: none - ** History: Sun Jan 21 10:19:59 1990, DSJ, Created. - */ - // VALUE dummy; - InitFlexFXVars(); - -} /* InitExtractorVars */ - - /**---------------------------------------------------------------------------- Private Code ----------------------------------------------------------------------------**/ diff --git a/classify/extract.h b/classify/extract.h index 7a16e9179..b986303d3 100644 --- a/classify/extract.h +++ b/classify/extract.h @@ -25,12 +25,10 @@ /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -CHAR_DESC ExtractBlobFeatures(TBLOB *Blob, LINE_STATS *LineStats); - -void InitExtractorVars(); +CHAR_DESC ExtractBlobFeatures(TBLOB *Blob, LINE_STATS *LineStats); /*--------------------------------------------------------------------------- Private Function Prototypes ----------------------------------------------------------------------------*/ -void ExtractorStub(); +void ExtractorStub(); #endif diff --git a/classify/featdefs.cpp b/classify/featdefs.cpp index 1e06b975f..7d3e42ddc 100644 --- a/classify/featdefs.cpp +++ b/classify/featdefs.cpp @@ -22,8 +22,6 @@ #include "emalloc.h" #include "danerror.h" #include "scanutils.h" -#include "variables.h" -#include "sigmenu.h" #include #include diff --git a/classify/flexfx.cpp b/classify/flexfx.cpp index f288daeeb..b54c815aa 100644 --- a/classify/flexfx.cpp +++ b/classify/flexfx.cpp @@ -20,8 +20,6 @@ ----------------------------------------------------------------------------**/ #include "flexfx.h" #include "featdefs.h" -#include "variables.h" -#include "sigmenu.h" #include "emalloc.h" #include #include @@ -49,39 +47,15 @@ CHAR_DESC ExtractFlexFeatures(TBLOB *Blob, LINE_STATS *LineStats) { for (Type = 0; Type < CharDesc->NumFeatureSets; Type++) if (FeatureDefs.FeatureExtractors[Type] != NULL && - FeatureDefs.FeatureExtractors[Type]->Extractor != NULL) + FeatureDefs.FeatureExtractors[Type]->Extractor != NULL) { CharDesc->FeatureSets[Type] = - (FeatureDefs.FeatureExtractors[Type])->Extractor (Blob, LineStats); + (FeatureDefs.FeatureExtractors[Type])->Extractor(Blob, LineStats); + if (CharDesc->FeatureSets[Type] == NULL) { + FreeCharDescription(CharDesc); + return NULL; + } + } return (CharDesc); } /* ExtractFlexFeatures */ - - -/*---------------------------------------------------------------------------*/ -void -InitFlexFXVars () -/* - ** Parameters: none - ** Globals: none - ** Operation: Add any control variables used by the feature extractors - ** to the variable system. This includes the enable flag for - ** each individual extractor. This routine needs to create - ** a separate name for the enable for each feature extractor - ** and allocate a string to contain that name. This is - ** necessary since the "variables" routines do not create - ** copies of the string names passed to them. - ** Return: none - ** Exceptions: none - ** History: Wed May 23 15:59:23 1990, DSJ, Created. - */ -#define NamePrefix "Enable" -#define NameSuffix "Features" -{ - int Type; - - SetupExtractors(); - for (Type = 0; Type < FeatureDefs.NumFeatureTypes; Type++) { - (FeatureDefs.FeatureExtractors[Type])->InitExtractorVars (); - } -} /* InitFlexFXVars */ diff --git a/classify/flexfx.h b/classify/flexfx.h index d519466fb..3d9e90ded 100644 --- a/classify/flexfx.h +++ b/classify/flexfx.h @@ -28,7 +28,6 @@ /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -CHAR_DESC ExtractFlexFeatures(TBLOB *Blob, LINE_STATS *LineStats); +CHAR_DESC ExtractFlexFeatures(TBLOB *Blob, LINE_STATS *LineStats); -void InitFlexFXVars(); #endif diff --git a/classify/float2int.cpp b/classify/float2int.cpp index 41f6b448b..edf8c3f9e 100644 --- a/classify/float2int.cpp +++ b/classify/float2int.cpp @@ -21,6 +21,7 @@ #include "float2int.h" #include "normmatch.h" #include "mfoutline.h" +#include "classify.h" #include "picofeat.h" #define MAX_INT_CHAR_NORM (INT_CHAR_NORM_RANGE - 1) @@ -29,6 +30,8 @@ Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ +namespace tesseract { + void ClearCharNormArray(INT_TEMPLATES Templates, CLASS_NORMALIZATION_ARRAY CharNormArray) { /* @@ -53,9 +56,9 @@ void ClearCharNormArray(INT_TEMPLATES Templates, /*---------------------------------------------------------------------------*/ -void ComputeIntCharNormArray(FEATURE NormFeature, - INT_TEMPLATES Templates, - CLASS_NORMALIZATION_ARRAY CharNormArray) { +void Classify::ComputeIntCharNormArray( + FEATURE NormFeature, INT_TEMPLATES Templates, + CLASS_NORMALIZATION_ARRAY CharNormArray) { /* ** Parameters: ** NormFeature character normalization feature @@ -76,8 +79,7 @@ void ComputeIntCharNormArray(FEATURE NormFeature, for (i = 0; i < Templates->NumClasses; i++) { NormAdjust = (int) (INT_CHAR_NORM_RANGE * - ComputeNormMatch (Templates->ClassIdFor[i], - NormFeature, FALSE)); + ComputeNormMatch (i, NormFeature, FALSE)); if (NormAdjust < 0) NormAdjust = 0; else if (NormAdjust > MAX_INT_CHAR_NORM) @@ -85,9 +87,9 @@ void ComputeIntCharNormArray(FEATURE NormFeature, CharNormArray[i] = NormAdjust; } - } /* ComputeIntCharNormArray */ +} // namespace tesseract /*---------------------------------------------------------------------------*/ void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) { @@ -107,7 +109,7 @@ void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures) { FEATURE Feature; FLOAT32 YShift; - if (NormMethod == baseline) + if (classify_norm_method == baseline) YShift = BASELINE_Y_SHIFT; else YShift = Y_SHIFT; diff --git a/classify/float2int.h b/classify/float2int.h index 60d1dead3..0c6e42cce 100644 --- a/classify/float2int.h +++ b/classify/float2int.h @@ -30,36 +30,11 @@ /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ +namespace tesseract { void ClearCharNormArray(INT_TEMPLATES Templates, CLASS_NORMALIZATION_ARRAY CharNormArray); +} // namespace tesseract. -void ComputeIntCharNormArray(FEATURE NormFeature, - INT_TEMPLATES Templates, - CLASS_NORMALIZATION_ARRAY CharNormArray); +void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures); -void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures); - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* float2int.c -void ClearCharNormArray - _ARGS((INT_TEMPLATES Templates, - CLASS_NORMALIZATION_ARRAY CharNormArray)); - -void ComputeIntCharNormArray - _ARGS((FEATURE NormFeature, - INT_TEMPLATES Templates, - CLASS_NORMALIZATION_ARRAY CharNormArray)); - -void ComputeIntFeatures - _ARGS((FEATURE_SET Features, - INT_FEATURE_ARRAY IntFeatures)); - -#undef _ARGS -*/ #endif diff --git a/classify/fpoint.cpp b/classify/fpoint.cpp index 5e6bf7e79..d8625c82e 100644 --- a/classify/fpoint.cpp +++ b/classify/fpoint.cpp @@ -28,18 +28,10 @@ ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -void FillPoint(FPOINT &P, FLOAT32 X, FLOAT32 Y) { - P.x = X; - P.y = Y; -} -/* Commented out, because there is also a same function for TPOINT and this gets resolved to it. -void CopyPoint(FPOINT &A, FPOINT &B) { - B.x = A.x; - B.y = A.y; -} -*/ FLOAT32 DistanceBetween(FPOINT A, FPOINT B) { - return sqrt ((double) (XDelta(A,B) * XDelta(A,B) + YDelta(A,B) * YDelta(A,B))); + double xd = XDelta(A, B); + double yd = YDelta(A, B); + return sqrt(static_cast(xd * xd + yd * yd)); } diff --git a/classify/fpoint.h b/classify/fpoint.h index eb212b75f..74112761e 100644 --- a/classify/fpoint.h +++ b/classify/fpoint.h @@ -35,16 +35,9 @@ typedef FPOINT FVECTOR; /**---------------------------------------------------------------------------- Macros ----------------------------------------------------------------------------**/ -/* macros for manipulating simple point data structures */ -//#define Xof(P) ( (P).x ) -//#define Yof(P) ( (P).y ) -//#define YofP(P) ((P)->y) -//#define XofP(P) ((P)->x) - /* macros for computing miscellaneous functions of 2 points */ #define XDelta(A,B) ( (B).x - (A).x ) #define YDelta(A,B) ( (B).y - (A).y ) -#define CopyPoint(A, B) ((B).x = (A).x, (B).y = (A).y) // FIXME, gets expanded for FPOINT and TPOINT. #define SlopeFrom(A,B) ( YDelta(A,B) / XDelta(A,B) ) #define AngleFrom(A,B) ( atan2((double) YDelta(A,B), \ (double) XDelta(A,B) ) ) @@ -55,8 +48,6 @@ typedef FPOINT FVECTOR; Public Function Prototypes ---------------------------------------------------------------------------*/ -void FillPoint(FPOINT &P, FLOAT32 X, FLOAT32 Y); - FLOAT32 DistanceBetween(FPOINT A, FPOINT B); FLOAT32 NormalizedAngleFrom(FPOINT *Point1, FPOINT *Point2, FLOAT32 FullScale); diff --git a/classify/fxdefs.cpp b/classify/fxdefs.cpp index 0a688136e..166ace080 100644 --- a/classify/fxdefs.cpp +++ b/classify/fxdefs.cpp @@ -29,10 +29,10 @@ int ExtractMode; // Definitions of extractors separated from feature definitions. -DefineFeatureExt (MicroFeatureExt, ExtractMicros, InitMicroFXVars) -DefineFeatureExt (PicoFeatExt, NULL, DefaultInitFXVars) -DefineFeatureExt (CharNormExt, ExtractCharNormFeatures, DefaultInitFXVars) -DefineFeatureExt (OutlineFeatExt, NULL, DefaultInitFXVars) +DefineFeatureExt (MicroFeatureExt, ExtractMicros) +DefineFeatureExt (PicoFeatExt, NULL) +DefineFeatureExt (CharNormExt, ExtractCharNormFeatures) +DefineFeatureExt (OutlineFeatExt, NULL) FEATURE_EXT_STRUCT* ExtractorDefs[NUM_FEATURE_TYPES] = { &MicroFeatureExt, @@ -48,7 +48,7 @@ FEATURE_EXT_STRUCT* ExtractorDefs[NUM_FEATURE_TYPES] = { /*---------------------------------------------------------------------------*/ void SetupExtractors() { for (int i = 0; i < NUM_FEATURE_TYPES; ++i) - FeatureDefs.FeatureExtractors[i] = ExtractorDefs[i]; + FeatureDefs.FeatureExtractors[i] = ExtractorDefs[i]; } void GetLineStatsFromRow(TEXTROW *Row, LINE_STATS *LineStats) { diff --git a/classify/hideedge.cpp b/classify/hideedge.cpp index 49367dd95..5bf8e0a98 100644 --- a/classify/hideedge.cpp +++ b/classify/hideedge.cpp @@ -30,6 +30,4 @@ ---------------------------------------------------------------------- */ -/*#include "stdafx.h"*/ #include "hideedge.h" -#include "debug.h" diff --git a/classify/intfx.cpp b/classify/intfx.cpp index added866a..b5d1d740d 100644 --- a/classify/intfx.cpp +++ b/classify/intfx.cpp @@ -22,7 +22,6 @@ #include "intmatcher.h" #include "const.h" #ifdef __UNIX__ -#include #endif /**---------------------------------------------------------------------------- @@ -33,21 +32,17 @@ uinT8 TableLookup(); uinT8 MySqrt2(); void ClipRadius(); -make_int_var (RadiusGyrMinMan, 255, MakeRadiusGyrMinMan, -16, 10, SetRadiusGyrMinMan, -"Minimum Radius of Gyration Mantissa 0-255: "); +INT_VAR(classify_radius_gyr_min_man, 255, + "Minimum Radius of Gyration Mantissa 0-255: "); -make_int_var (RadiusGyrMinExp, 0, MakeRadiusGyrMinExp, -16, 11, SetRadiusGyrMinExp, -"Minimum Radius of Gyration Exponent 0-255: "); +INT_VAR(classify_radius_gyr_min_exp, 0, + "Minimum Radius of Gyration Exponent 0-255: "); -make_int_var (RadiusGyrMaxMan, 158, MakeRadiusGyrMaxMan, -16, 12, SetRadiusGyrMaxMan, -"Maximum Radius of Gyration Mantissa 0-255: "); +INT_VAR(classify_radius_gyr_max_man, 158, + "Maximum Radius of Gyration Mantissa 0-255: "); -make_int_var (RadiusGyrMaxExp, 8, MakeRadiusGyrMaxExp, -16, 13, SetRadiusGyrMaxExp, -"Maximum Radius of Gyration Exponent 0-255: "); +INT_VAR(classify_radius_gyr_max_exp, 8, + "Maximum Radius of Gyration Exponent 0-255: "); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations @@ -220,6 +215,13 @@ int ExtractIntFeat(TBLOB *Blob, Results->Rx = (inT16) (51.2 / (double) RxInv * pow (2.0, (double) RxExp)); Results->Ry = (inT16) (51.2 / (double) RyInv * pow (2.0, (double) RyExp)); + if (Results->Ry == 0) { + /* + This would result in features having 'nan' values. + Since the expression is always > 0, assign a value of 1. + */ + Results->Ry = 1; + } Results->NumBL = NumBLFeatures; /* extract character normalized features */ @@ -460,8 +462,8 @@ void ClipRadius(uinT8 *RxInv, uinT8 *RxExp, uinT8 *RyInv, uinT8 *RyExp) { register uinT8 BitN, LastCarry; int RxInvLarge, RyInvSmall; - AM = RadiusGyrMinMan; - AE = RadiusGyrMinExp; + AM = classify_radius_gyr_min_man; + AE = classify_radius_gyr_min_exp; BM = *RxInv; BE = *RxExp; LastCarry = 1; @@ -491,12 +493,12 @@ void ClipRadius(uinT8 *RxInv, uinT8 *RxExp, uinT8 *RyInv, uinT8 *RyExp) { BitN = BitN & 1; if (BitN == 1) { - *RxInv = RadiusGyrMinMan; - *RxExp = RadiusGyrMinExp; + *RxInv = classify_radius_gyr_min_man; + *RxExp = classify_radius_gyr_min_exp; } - AM = RadiusGyrMinMan; - AE = RadiusGyrMinExp; + AM = classify_radius_gyr_min_man; + AE = classify_radius_gyr_min_exp; BM = *RyInv; BE = *RyExp; LastCarry = 1; @@ -526,12 +528,12 @@ void ClipRadius(uinT8 *RxInv, uinT8 *RxExp, uinT8 *RyInv, uinT8 *RyExp) { BitN = BitN & 1; if (BitN == 1) { - *RyInv = RadiusGyrMinMan; - *RyExp = RadiusGyrMinExp; + *RyInv = classify_radius_gyr_min_man; + *RyExp = classify_radius_gyr_min_exp; } - AM = RadiusGyrMaxMan; - AE = RadiusGyrMaxExp; + AM = classify_radius_gyr_max_man; + AE = classify_radius_gyr_max_exp; BM = *RxInv; BE = *RxExp; LastCarry = 1; @@ -567,8 +569,8 @@ void ClipRadius(uinT8 *RxInv, uinT8 *RxExp, uinT8 *RyInv, uinT8 *RyExp) { AM = *RyInv; AE = *RyExp; - BM = RadiusGyrMaxMan; - BE = RadiusGyrMaxExp; + BM = classify_radius_gyr_max_man; + BE = classify_radius_gyr_max_exp; LastCarry = 1; while ((AM != 0) || (BM != 0)) { if (AE > BE) { @@ -601,8 +603,8 @@ void ClipRadius(uinT8 *RxInv, uinT8 *RxExp, uinT8 *RyInv, uinT8 *RyExp) { RyInvSmall = 0; if (RxInvLarge && RyInvSmall) { - *RyInv = RadiusGyrMaxMan; - *RyExp = RadiusGyrMaxExp; + *RyInv = classify_radius_gyr_max_man; + *RyExp = classify_radius_gyr_max_exp; } } diff --git a/classify/intmatcher.cpp b/classify/intmatcher.cpp index 2c35fc406..e5892bbcf 100644 --- a/classify/intmatcher.cpp +++ b/classify/intmatcher.cpp @@ -20,10 +20,12 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "intmatcher.h" +#include "intproto.h" #include "tordvars.h" #include "callcpp.h" #include "scrollview.h" #include "globals.h" +#include "classify.h" #include #define CLASS_MASK_SIZE ((MAX_NUM_CLASSES*NUM_BITS_PER_CLASS \ @@ -98,50 +100,46 @@ uinT32 EvidenceMultMask; static inT16 LocalMatcherMultiplier; -make_int_var (ClassPrunerThreshold, 229, MakeClassPrunerThreshold, -16, 20, SetClassPrunerThreshold, -"Class Pruner Threshold 0-255: "); +INT_VAR(classify_class_pruner_threshold, 229, + "Class Pruner Threshold 0-255: "); -make_int_var (ClassPrunerMultiplier, 30, MakeClassPrunerMultiplier, -16, 21, SetClassPrunerMultiplier, -"Class Pruner Multiplier 0-255: "); +INT_VAR(classify_class_pruner_multiplier, 30, + "Class Pruner Multiplier 0-255: "); -make_int_var (IntegerMatcherMultiplier, 14, MakeIntegerMatcherMultiplier, -16, 22, SetIntegerMatcherMultiplier, -"Integer Matcher Multiplier 0-255: "); +INT_VAR(classify_integer_matcher_multiplier, 14, + "Integer Matcher Multiplier 0-255: "); -make_int_var (IntThetaFudge, 128, MakeIntThetaFudge, -16, 23, SetIntThetaFudge, -"Integer Matcher Theta Fudge 0-255: "); +INT_VAR(classify_int_theta_fudge, 128, + "Integer Matcher Theta Fudge 0-255: "); -make_int_var (CPCutoffStrength, 7, MakeCPCutoffStrength, -16, 24, SetCPCutoffStrength, -"Class Pruner CutoffStrength: "); +INT_VAR(classify_cp_cutoff_strength, 7, + "Class Pruner CutoffStrength: "); -make_int_var (EvidenceTableBits, 9, MakeEvidenceTableBits, -16, 25, SetEvidenceTableBits, -"Bits in Similarity to Evidence Lookup 8-9: "); +INT_VAR(classify_evidence_table_bits, 9, + "Bits in Similarity to Evidence Lookup 8-9: "); -make_int_var (IntEvidenceTruncBits, 14, MakeIntEvidenceTruncBits, -16, 26, SetIntEvidenceTruncBits, -"Integer Evidence Truncation Bits (Distance) 8-14: "); +INT_VAR(classify_int_evidence_trunc_bits, 14, + "Integer Evidence Truncation Bits (Distance) 8-14: "); -make_float_var (SEExponentialMultiplier, 0, MakeSEExponentialMultiplier, -16, 27, SetSEExponentialMultiplier, -"Similarity to Evidence Table Exponential Multiplier: "); +double_VAR(classify_se_exponential_multiplier, 0, + "Similarity to Evidence Table Exponential Multiplier: "); -make_float_var (SimilarityCenter, 0.0075, MakeSimilarityCenter, -16, 28, SetSimilarityCenter, "Center of Similarity Curve: "); +double_VAR(classify_similarity_center, 0.0075, + "Center of Similarity Curve: "); -make_int_var (AdaptProtoThresh, 230, MakeAdaptProtoThresh, -16, 29, SetAdaptProtoThresh, -"Threshold for good protos during adaptive 0-255: "); +INT_VAR(classify_adapt_proto_thresh, 230, + "Threshold for good protos during adaptive 0-255: "); -make_int_var (AdaptFeatureThresh, 230, MakeAdaptFeatureThresh, -16, 30, SetAdaptFeatureThresh, -"Threshold for good features during adaptive 0-255: "); -//extern int display_ratings; -//extern inT32 cp_maps[4]; +INT_VAR(classify_adapt_feature_thresh, 230, + "Threshold for good features during adaptive 0-255: "); + +BOOL_VAR(disable_character_fragments, FALSE, + "Do not include character fragments in the" + " results of the classifier"); + +BOOL_VAR(matcher_debug_separate_windows, FALSE, + "Use two different windows for debugging the matching: " + "One for the protos and one for the features."); int protoword_lookups; int zero_protowords; @@ -154,13 +152,14 @@ int set_config_bits; Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -int ClassPruner(INT_TEMPLATES IntTemplates, - inT16 NumFeatures, - INT_FEATURE_ARRAY Features, - CLASS_NORMALIZATION_ARRAY NormalizationFactors, - CLASS_CUTOFF_ARRAY ExpectedNumFeatures, - CLASS_PRUNER_RESULTS Results, - int Debug) { +namespace tesseract { +int Classify::ClassPruner(INT_TEMPLATES IntTemplates, + inT16 NumFeatures, + INT_FEATURE_ARRAY Features, + CLASS_NORMALIZATION_ARRAY NormalizationFactors, + CLASS_CUTOFF_ARRAY ExpectedNumFeatures, + CLASS_PRUNER_RESULTS Results, + int Debug) { /* ** Parameters: ** IntTemplates Class pruner tables @@ -176,8 +175,8 @@ int ClassPruner(INT_TEMPLATES IntTemplates, ** (by CLASS_ID) ** Debug Debugger flag: 1=debugger on ** Globals: - ** ClassPrunerThreshold Cutoff threshold - ** ClassPrunerMultiplier Normalization factor multiplier + ** classify_class_pruner_threshold Cutoff threshold + ** classify_class_pruner_multiplier Normalization factor multiplier ** Operation: ** Prune the classes using a modified fast match table. ** Return a sorted list of classes along with the number @@ -201,20 +200,19 @@ int ClassPruner(INT_TEMPLATES IntTemplates, static int NormCount[MAX_NUM_CLASSES]; static int SortKey[MAX_NUM_CLASSES + 1]; static int SortIndex[MAX_NUM_CLASSES + 1]; - CLASS_INDEX Class; int out_class; int MaxNumClasses; int MaxCount; int NumClasses; FLOAT32 max_rating; //max allowed rating int *ClassCountPtr; - CLASS_ID classch; + CLASS_ID class_id; MaxNumClasses = IntTemplates->NumClasses; /* Clear Class Counts */ ClassCountPtr = &(ClassCount[0]); - for (Class = 0; Class < MaxNumClasses; Class++) { + for (class_id = 0; class_id < MaxNumClasses; class_id++) { *ClassCountPtr++ = 0; } @@ -222,18 +220,27 @@ int ClassPruner(INT_TEMPLATES IntTemplates, NumPruners = IntTemplates->NumClassPruners; for (feature_index = 0; feature_index < NumFeatures; feature_index++) { feature = &Features[feature_index]; - feature_address = (((feature->X * NUM_CP_BUCKETS >> 8) * NUM_CP_BUCKETS - + - (feature->Y * NUM_CP_BUCKETS >> 8)) * - NUM_CP_BUCKETS + - (feature->Theta * NUM_CP_BUCKETS >> 8)) << 1; + feature_address = (((feature->X * NUM_CP_BUCKETS >> 8) * NUM_CP_BUCKETS + + (feature->Y * NUM_CP_BUCKETS >> 8)) * NUM_CP_BUCKETS + + (feature->Theta * NUM_CP_BUCKETS >> 8)) << 1; ClassPruner = IntTemplates->ClassPruner; class_index = 0; + for (PrunerSet = 0; PrunerSet < NumPruners; PrunerSet++, ClassPruner++) { BasePrunerAddress = (uinT32 *) (*ClassPruner) + feature_address; for (Word = 0; Word < WERDS_PER_CP_VECTOR; Word++) { PrunerWord = *BasePrunerAddress++; + // This inner loop is unrolled to speed up the ClassPruner. + // Currently gcc would not unroll it unless it is set to O3 + // level of optimization or -funroll-loops is specified. + /* + uinT32 class_mask = (1 << NUM_BITS_PER_CLASS) - 1; + for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) { + ClassCount[class_index++] += PrunerWord & class_mask; + PrunerWord >>= NUM_BITS_PER_CLASS; + } + */ ClassCount[class_index++] += cp_maps[PrunerWord & 3]; PrunerWord >>= 2; ClassCount[class_index++] += cp_maps[PrunerWord & 3]; @@ -270,57 +277,70 @@ int ClassPruner(INT_TEMPLATES IntTemplates, } /* Adjust Class Counts for Number of Expected Features */ - for (Class = 0; Class < MaxNumClasses; Class++) { - if (NumFeatures < ExpectedNumFeatures[Class]) { - int deficit = ExpectedNumFeatures[Class] - NumFeatures; - ClassCount[Class] -= ClassCount[Class] * deficit / - (NumFeatures*CPCutoffStrength + deficit); + for (class_id = 0; class_id < MaxNumClasses; class_id++) { + if (NumFeatures < ExpectedNumFeatures[class_id]) { + int deficit = ExpectedNumFeatures[class_id] - NumFeatures; + ClassCount[class_id] -= ClassCount[class_id] * deficit / + (NumFeatures*classify_cp_cutoff_strength + deficit); + } + if (!unicharset.get_enabled(class_id)) + ClassCount[class_id] = 0; // This char is disabled! + + // Do not include character fragments in the class pruner + // results if disable_character_fragments is true. + if (disable_character_fragments && unicharset.get_fragment(class_id)) { + ClassCount[class_id] = 0; } - if (!unicharset.get_enabled(IntTemplates->ClassIdFor[Class])) - ClassCount[Class] = 0; // This char is disabled! } /* Adjust Class Counts for Normalization Factors */ MaxCount = 0; - for (Class = 0; Class < MaxNumClasses; Class++) { - NormCount[Class] = ClassCount[Class] - - ((ClassPrunerMultiplier * NormalizationFactors[Class]) >> 8) + for (class_id = 0; class_id < MaxNumClasses; class_id++) { + NormCount[class_id] = ClassCount[class_id] + - ((classify_class_pruner_multiplier * NormalizationFactors[class_id]) >> 8) * cp_maps[3] / 3; - if (NormCount[Class] > MaxCount) - MaxCount = NormCount[Class]; + if (NormCount[class_id] > MaxCount && + // This additional check is added in order to ensure that + // the classifier will return at least one non-fragmented + // character match. + // TODO(daria): verify that this helps accuracy and does not + // hurt performance. + !unicharset.get_fragment(class_id)) { + MaxCount = NormCount[class_id]; + } } /* Prune Classes */ - MaxCount *= ClassPrunerThreshold; + MaxCount *= classify_class_pruner_threshold; MaxCount >>= 8; /* Select Classes */ if (MaxCount < 1) MaxCount = 1; NumClasses = 0; - for (Class = 0; Class < MaxNumClasses; Class++) - if (NormCount[Class] >= MaxCount) { - NumClasses++; - SortIndex[NumClasses] = Class; - SortKey[NumClasses] = NormCount[Class]; + for (class_id = 0; class_id < MaxNumClasses; class_id++) { + if (NormCount[class_id] >= MaxCount) { + NumClasses++; + SortIndex[NumClasses] = class_id; + SortKey[NumClasses] = NormCount[class_id]; + } } /* Sort Classes using Heapsort Algorithm */ if (NumClasses > 1) HeapSort(NumClasses, SortKey, SortIndex); - if (display_ratings > 1) { + if (tord_display_ratings > 1) { cprintf ("CP:%d classes, %d features:\n", NumClasses, NumFeatures); - for (Class = 0; Class < NumClasses; Class++) { - classch = IntTemplates->ClassIdFor[SortIndex[NumClasses - Class]]; + for (class_id = 0; class_id < NumClasses; class_id++) { cprintf ("%s:C=%d, E=%d, N=%d, Rat=%d\n", - unicharset.id_to_unichar(classch), - ClassCount[SortIndex[NumClasses - Class]], - ExpectedNumFeatures[SortIndex[NumClasses - Class]], - SortKey[NumClasses - Class], - 1010 - 1000 * SortKey[NumClasses - Class] / + unicharset.debug_str(SortIndex[NumClasses - class_id]).string(), + ClassCount[SortIndex[NumClasses - class_id]], + ExpectedNumFeatures[SortIndex[NumClasses - class_id]], + SortKey[NumClasses - class_id], + 1010 - 1000 * SortKey[NumClasses - class_id] / (cp_maps[3] * NumFeatures)); } - if (display_ratings > 2) { + if (tord_display_ratings > 2) { NumPruners = IntTemplates->NumClassPruners; for (feature_index = 0; feature_index < NumFeatures; feature_index++) { @@ -339,10 +359,10 @@ int ClassPruner(INT_TEMPLATES IntTemplates, for (Word = 0; Word < WERDS_PER_CP_VECTOR; Word++) { PrunerWord = *BasePrunerAddress++; - for (Class = 0; Class < 16; Class++, class_index++) { + for (class_id = 0; class_id < 16; class_id++, class_index++) { if (NormCount[class_index] >= MaxCount) cprintf (" %s=%d,", - unicharset.id_to_unichar(IntTemplates->ClassIdFor[class_index]), + unicharset.id_to_unichar(class_index), PrunerWord & 3); PrunerWord >>= 2; } @@ -351,13 +371,12 @@ int ClassPruner(INT_TEMPLATES IntTemplates, cprintf ("\n"); } cprintf ("Adjustments:"); - for (Class = 0; Class < MaxNumClasses; Class++) { - if (NormCount[Class] > MaxCount) + for (class_id = 0; class_id < MaxNumClasses; class_id++) { + if (NormCount[class_id] > MaxCount) cprintf (" %s=%d,", - unicharset.id_to_unichar(IntTemplates->ClassIdFor[Class]), - -((ClassPrunerMultiplier * - NormalizationFactors[Class]) >> 8) * cp_maps[3] / - 3); + unicharset.id_to_unichar(class_id), + -((classify_class_pruner_multiplier * + NormalizationFactors[class_id]) >> 8) * cp_maps[3] / 3); } cprintf ("\n"); } @@ -365,19 +384,18 @@ int ClassPruner(INT_TEMPLATES IntTemplates, /* Set Up Results */ max_rating = 0.0f; - for (Class = 0, out_class = 0; Class < NumClasses; Class++) { - Results[out_class].Class = - IntTemplates->ClassIdFor[SortIndex[NumClasses - Class]]; + for (class_id = 0, out_class = 0; class_id < NumClasses; class_id++) { + Results[out_class].Class = SortIndex[NumClasses - class_id]; Results[out_class].Rating = 1.0 - SortKey[NumClasses - - Class] / ((float) cp_maps[3] * NumFeatures); + class_id] / ((float) cp_maps[3] * NumFeatures); out_class++; } NumClasses = out_class; return NumClasses; } - +} // namespace tesseract /*---------------------------------------------------------------------------*/ void IntegerMatcher(INT_CLASS ClassTemplate, @@ -402,7 +420,7 @@ void IntegerMatcher(INT_CLASS ClassTemplate, ** Debug Debugger flag: 1=debugger on ** Globals: ** LocalMatcherMultiplier Normalization factor multiplier - ** IntThetaFudge Theta fudge factor used for + ** classify_int_theta_fudge Theta fudge factor used for ** evidence calculation ** Operation: ** IntegerMatcher returns the best configuration and rating @@ -511,12 +529,12 @@ int FindGoodProtos(INT_CLASS ClassTemplate, ** Debug Debugger flag: 1=debugger on ** Globals: ** LocalMatcherMultiplier Normalization factor multiplier - ** IntThetaFudge Theta fudge factor used for + ** classify_int_theta_fudge Theta fudge factor used for ** evidence calculation - ** AdaptProtoThresh Threshold for good protos + ** classify_adapt_proto_thresh Threshold for good protos ** Operation: ** FindGoodProtos finds all protos whose normalized proto-evidence - ** exceed AdaptProtoThresh. The list is ordered by increasing + ** exceed classify_adapt_proto_thresh. The list is ordered by increasing ** proto id number. ** Return: ** Number of good protos in ProtoArray. @@ -571,7 +589,7 @@ int FindGoodProtos(INT_CLASS ClassTemplate, Temp /= ClassTemplate->ProtoLengths[ActualProtoNum]; /* Find Good Protos */ - if (Temp >= AdaptProtoThresh) { + if (Temp >= classify_adapt_proto_thresh) { *ProtoArray = ActualProtoNum; ProtoArray++; NumGoodProtos++; @@ -606,12 +624,12 @@ int FindBadFeatures(INT_CLASS ClassTemplate, ** Debug Debugger flag: 1=debugger on ** Globals: ** LocalMatcherMultiplier Normalization factor multiplier - ** IntThetaFudge Theta fudge factor used for + ** classify_int_theta_fudge Theta fudge factor used for ** evidence calculation - ** AdaptFeatureThresh Threshold for bad features + ** classify_adapt_feature_thresh Threshold for bad features ** Operation: ** FindBadFeatures finds all features whose maximum feature-evidence - ** was less than AdaptFeatureThresh. The list is ordered by increasing + ** was less than classify_adapt_feature_thresh. The list is ordered by increasing ** feature number. ** Return: ** Number of bad features in FeatureArray. @@ -650,7 +668,7 @@ int FindBadFeatures(INT_CLASS ClassTemplate, Temp = *UINT8Pointer; /* Find Bad Features */ - if (Temp < AdaptFeatureThresh) { + if (Temp < classify_adapt_feature_thresh) { *FeatureArray = Feature; FeatureArray++; NumBadFeatures++; @@ -691,15 +709,15 @@ void InitIntegerMatcher() { for (i = 0; i < SE_TABLE_SIZE; i++) { IntSimilarity = i << (27 - SE_TABLE_BITS); Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0; - Evidence = Similarity / SimilarityCenter; + Evidence = Similarity / classify_similarity_center; Evidence *= Evidence; Evidence += 1.0; Evidence = 1.0 / Evidence; Evidence *= 255.0; - if (SEExponentialMultiplier > 0.0) { - ScaleFactor = 1.0 - exp (-SEExponentialMultiplier) * - exp (SEExponentialMultiplier * ((double) i / SE_TABLE_SIZE)); + if (classify_se_exponential_multiplier > 0.0) { + ScaleFactor = 1.0 - exp (-classify_se_exponential_multiplier) * + exp (classify_se_exponential_multiplier * ((double) i / SE_TABLE_SIZE)); if (ScaleFactor > 1.0) ScaleFactor = 1.0; if (ScaleFactor < 0.0) @@ -712,28 +730,13 @@ void InitIntegerMatcher() { /* Initialize evidence computation variables */ EvidenceTableMask = - ((1 << EvidenceTableBits) - 1) << (9 - EvidenceTableBits); - MultTruncShiftBits = (14 - IntEvidenceTruncBits); + ((1 << classify_evidence_table_bits) - 1) << (9 - classify_evidence_table_bits); + MultTruncShiftBits = (14 - classify_int_evidence_trunc_bits); TableTruncShiftBits = (27 - SE_TABLE_BITS - (MultTruncShiftBits << 1)); - EvidenceMultMask = ((1 << IntEvidenceTruncBits) - 1); + EvidenceMultMask = ((1 << classify_int_evidence_trunc_bits) - 1); } - -/*---------------------------------------------------------------------------*/ -void InitIntegerMatcherVars() { - MakeClassPrunerThreshold(); - MakeClassPrunerMultiplier(); - MakeIntegerMatcherMultiplier(); - MakeIntThetaFudge(); - MakeCPCutoffStrength(); - MakeEvidenceTableBits(); - MakeIntEvidenceTruncBits(); - MakeSEExponentialMultiplier(); - MakeSimilarityCenter(); -} - - /*-------------------------------------------------------------------------*/ void PrintIntMatcherStats(FILE *f) { fprintf (f, "protoword_lookups=%d, zero_protowords=%d, proto_shifts=%d\n", @@ -745,21 +748,21 @@ void PrintIntMatcherStats(FILE *f) { /*-------------------------------------------------------------------------*/ void SetProtoThresh(FLOAT32 Threshold) { - AdaptProtoThresh = (int) (255 * Threshold); - if (AdaptProtoThresh < 0) - AdaptProtoThresh = 0; - if (AdaptProtoThresh > 255) - AdaptProtoThresh = 255; + classify_adapt_proto_thresh.set_value(255 * Threshold); + if (classify_adapt_proto_thresh < 0) + classify_adapt_proto_thresh.set_value(0); + if (classify_adapt_proto_thresh > 255) + classify_adapt_proto_thresh.set_value(255); } /*---------------------------------------------------------------------------*/ void SetFeatureThresh(FLOAT32 Threshold) { - AdaptFeatureThresh = (int) (255 * Threshold); - if (AdaptFeatureThresh < 0) - AdaptFeatureThresh = 0; - if (AdaptFeatureThresh > 255) - AdaptFeatureThresh = 255; + classify_adapt_feature_thresh.set_value(255 * Threshold); + if (classify_adapt_feature_thresh < 0) + classify_adapt_feature_thresh.set_value(0); + if (classify_adapt_feature_thresh > 255) + classify_adapt_feature_thresh.set_value(255); } @@ -771,7 +774,7 @@ void SetBaseLineMatch() { /*--------------------------------------------------------------------------*/ void SetCharNormMatch() { - LocalMatcherMultiplier = IntegerMatcherMultiplier; + LocalMatcherMultiplier = classify_integer_matcher_multiplier; } @@ -972,7 +975,7 @@ int Debug) { - (Proto->B * (Feature->Y - 128)) + (Proto->C << 9)); M3 = (((inT8) (Feature->Theta - Proto->Angle)) * - IntThetaFudge) << 1; + classify_int_theta_fudge) << 1; if (A3 < 0) A3 = ~A3; @@ -1203,13 +1206,12 @@ int Debug) { int NumProtos; register int Temp; - extern ScrollView *IntMatchWindow; - - if (IntMatchWindow == NULL) { - IntMatchWindow = c_create_window ("IntMatchWindow", 50, 200, - 520, 520, - -130.0, 130.0, -130.0, 130.0); + InitIntMatchWindowIfReqd(); + if (matcher_debug_separate_windows) { + InitFeatureDisplayWindowIfReqd(); + InitProtoDisplayWindowIfReqd(); } + NumProtos = ClassTemplate->NumProtos; for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets; @@ -1233,7 +1235,7 @@ int Debug) { if (ConfigWord) { /* Update display for current proto */ if (ClipMatchEvidenceOn (Debug)) { - if (Temp < AdaptProtoThresh) + if (Temp < classify_adapt_proto_thresh) DisplayIntProto (ClassTemplate, ActualProtoNum, (Temp / 255.0)); else @@ -1268,6 +1270,12 @@ void IMDisplayFeatureDebugInfo(INT_CLASS ClassTemplate, IMClearTables(ClassTemplate, SumOfFeatureEvidence, ProtoEvidence); + InitIntMatchWindowIfReqd(); + if (matcher_debug_separate_windows) { + InitFeatureDisplayWindowIfReqd(); + InitProtoDisplayWindowIfReqd(); + } + NumConfigs = ClassTemplate->NumConfigs; for (Feature = 0; Feature < NumFeatures; Feature++) { IMUpdateTablesForFeature (ClassTemplate, ProtoMask, ConfigMask, Feature, @@ -1283,7 +1291,7 @@ void IMDisplayFeatureDebugInfo(INT_CLASS ClassTemplate, /* Update display for current feature */ if (ClipMatchEvidenceOn (Debug)) { - if (Temp < AdaptFeatureThresh) + if (Temp < classify_adapt_feature_thresh) DisplayIntFeature (&(Features[Feature]), 0.0); else DisplayIntFeature (&(Features[Feature]), 1.0); @@ -1412,7 +1420,7 @@ uinT8 NormalizationFactor, INT_RESULT Result) { Best2Match = 0; IntPointer = SumOfFeatureEvidence; for (ConfigNum = 0; ConfigNum < NumConfigs; ConfigNum++, IntPointer++) { - if (display_ratings > 1) + if (tord_display_ratings > 1) cprintf ("Config %d, rating=%d\n", ConfigNum, *IntPointer); if (*IntPointer > BestMatch) { if (BestMatch > 0) { diff --git a/classify/intmatcher.h b/classify/intmatcher.h index ef4a63412..2516b3dc8 100644 --- a/classify/intmatcher.h +++ b/classify/intmatcher.h @@ -18,10 +18,23 @@ #ifndef INTMATCHER_H #define INTMATCHER_H +#include "varable.h" + +// Character fragments could be present in the trained templaes +// but turned on/off on the language-by-language basis or depending +// on particular properties of the corpus (e.g. when we expect the +// images to have low exposure). +extern BOOL_VAR_H(disable_character_fragments, FALSE, + "Do not include character fragments in the" + " results of the classifier"); + +extern INT_VAR_H(classify_integer_matcher_multiplier, 14, + "Integer Matcher Multiplier 0-255: "); + + /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ -#include "debug.h" #include "intproto.h" #include "cutoffs.h" @@ -54,19 +67,16 @@ typedef uinT8 CLASS_NORMALIZATION_ARRAY[MAX_NUM_CLASSES]; /*---------------------------------------------------------------------------- Variables -----------------------------------------------------------------------------*/ -extern int AdaptProtoThresh; -extern int AdaptFeatureThresh; + +extern INT_VAR_H(classify_adapt_proto_thresh, 230, + "Threshold for good protos during adaptive 0-255: "); + +extern INT_VAR_H(classify_adapt_feature_thresh, 230, + "Threshold for good features during adaptive 0-255: "); /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -int ClassPruner(INT_TEMPLATES IntTemplates, - inT16 NumFeatures, - INT_FEATURE_ARRAY Features, - CLASS_NORMALIZATION_ARRAY NormalizationFactors, - CLASS_CUTOFF_ARRAY ExpectedNumFeatures, - CLASS_PRUNER_RESULTS Results, - int Debug); void IntegerMatcher(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, @@ -98,8 +108,6 @@ int FindBadFeatures(INT_CLASS ClassTemplate, void InitIntegerMatcher(); -void InitIntegerMatcherVars(); - void PrintIntMatcherStats(FILE *f); void SetProtoThresh(FLOAT32 Threshold); @@ -193,7 +201,5 @@ void HeapSort (int n, register int ra[], register int rb[]); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ -extern int IntegerMatcherMultiplier; - extern uinT32 EvidenceMultMask; #endif diff --git a/classify/intproto.cpp b/classify/intproto.cpp index 85956115e..7b837a25e 100644 --- a/classify/intproto.cpp +++ b/classify/intproto.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: intproto.c - ** Purpose: Definition of data structures for integer protos. - ** Author: Dan Johnson - ** History: Thu Feb 7 14:38:16 1991, DSJ, Created. + ** Filename: intproto.c + ** Purpose: Definition of data structures for integer protos. + ** Author: Dan Johnson + ** History: Thu Feb 7 14:38:16 1991, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -20,7 +20,6 @@ ----------------------------------------------------------------------------**/ #include "intproto.h" #include "picofeat.h" -#include "debug.h" #include "mfoutline.h" #include "emalloc.h" #include "const.h" @@ -28,6 +27,8 @@ #include "svmnode.h" #include "adaptmatch.h" #include "globals.h" +#include "classify.h" +#include "genericvector.h" //extern GetPicoFeatureLength(); @@ -99,11 +100,11 @@ typedef struct FILL_SPEC; -enum IntmatcherDebugAction { - IDA_ADAPTIVE, - IDA_STATIC, - IDA_BOTH -}; + +/* constants for conversion from old inttemp format */ +#define OLD_MAX_NUM_CONFIGS 32 +#define OLD_WERDS_PER_CONFIG_VEC ((OLD_MAX_NUM_CONFIGS + BITS_PER_WERD - 1) /\ + BITS_PER_WERD) /**---------------------------------------------------------------------------- Macros @@ -130,11 +131,11 @@ void DoFill(FILL_SPEC *FillSpec, BOOL8 FillerDone(TABLE_FILLER *Filler); void FillPPCircularBits (uinT32 -ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], -int Bit, FLOAT32 Center, FLOAT32 Spread); + ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], + int Bit, FLOAT32 Center, FLOAT32 Spread); void FillPPLinearBits (uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], -int Bit, FLOAT32 Center, FLOAT32 Spread); + int Bit, FLOAT32 Center, FLOAT32 Spread); #ifndef GRAPHICS_DISABLED CLASS_ID GetClassToDebug(const char *Prompt); @@ -166,189 +167,113 @@ void RenderIntProto(void *window, int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id); -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* /users/danj/wiseowl/src/danj/microfeatures/intproto.c -FLOAT32 BucketStart - _ARGS((int Bucket, - FLOAT32 Offset, - int NumBuckets)); - -FLOAT32 BucketEnd - _ARGS((int Bucket, - FLOAT32 Offset, - int NumBuckets)); - -void DoFill - _ARGS((FILL_SPEC *FillSpec, - CLASS_PRUNER Pruner, - uinT32 ClassMask, - uinT32 ClassCount, - uinT32 WordIndex)); - -BOOL8 FillerDone - _ARGS((TABLE_FILLER *Filler)); - -void FillPPCircularBits - _ARGS((uinT32 ParamTable [NUM_PP_BUCKETS ][WERDS_PER_PP_VECTOR ], - int Bit, - FLOAT32 Center, - FLOAT32 Spread)); - -void FillPPLinearBits - _ARGS((uinT32 ParamTable [NUM_PP_BUCKETS ][WERDS_PER_PP_VECTOR ], - int Bit, - FLOAT32 Center, - FLOAT32 Spread)); - -void GetCPPadsForLevel - _ARGS((int Level, - FLOAT32 *EndPad, - FLOAT32 *SidePad, - FLOAT32 *AnglePad)); - -C_COL GetMatchColorFor - _ARGS((FLOAT32 Evidence)); - -void GetNextFill - _ARGS((TABLE_FILLER *Filler, - FILL_SPEC *Fill)); - -void InitTableFiller - _ARGS((FLOAT32 EndPad, - FLOAT32 SidePad, - FLOAT32 AnglePad, - PROTO Proto, - TABLE_FILLER *Filler)); - -void RenderIntFeature - _ARGS((SHAPE_LIST ShapeList, - INT_FEATURE Feature, - char *Color)); - -void RenderIntProto - _ARGS((SHAPE_LIST ShapeList, - INT_CLASS Class, - PROTO_ID ProtoId, - char *Color)); - -int TruncateParam - _ARGS((FLOAT32 Param, - int Min, - int Max, - char *Id)); - -#undef _ARGS -*/ - /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ -/* control knobs */ -make_int_const (NumCPLevels, 3, MakeNumCPLevels); -make_float_const (CPAnglePadLoose, 45.0, MakeCPAnglePadLoose); -make_float_const (CPAnglePadMedium, 20.0, MakeCPAnglePadMedium); -make_float_const (CPAnglePadTight, 10.0, MakeCPAnglePadTight); -make_float_const (CPEndPadLoose, 0.5, MakeCPEndPadLoose); -make_float_const (CPEndPadMedium, 0.5, MakeCPEndPadMedium); -make_float_const (CPEndPadTight, 0.5, MakeCPEndPadTight); -make_float_const (CPSidePadLoose, 2.5, MakeCPSidePadLoose); -make_float_const (CPSidePadMedium, 1.2, MakeCPSidePadMedium); -make_float_const (CPSidePadTight, 0.6, MakeCPSidePadTight); -make_float_const (PPAnglePad, 45.0, MakePPAnglePad); -make_float_const (PPEndPad, 0.5, MakePPEndPad); -make_float_const (PPSidePad, 2.5, MakePPSidePad); /* global display lists used to display proto and feature match information*/ ScrollView *IntMatchWindow = NULL; -//extern int LearningDebugLevel; +ScrollView *FeatureDisplayWindow = NULL; +ScrollView *ProtoDisplayWindow = NULL; + +/**---------------------------------------------------------------------------- + Variables +----------------------------------------------------------------------------**/ + +/* control knobs */ +INT_VAR(classify_num_cp_levels, 3, "Number of Class Pruner Levels"); +double_VAR(classify_cp_angle_pad_loose, 45.0, + "Class Pruner Angle Pad Loose"); +double_VAR(classify_cp_angle_pad_medium, 20.0, + "Class Pruner Angle Pad Medium"); +double_VAR(classify_cp_angle_pad_tight, 10.0, + "CLass Pruner Angle Pad Tight"); +double_VAR(classify_cp_end_pad_loose, 0.5, "Class Pruner End Pad Loose"); +double_VAR(classify_cp_end_pad_medium, 0.5, "Class Pruner End Pad Medium"); +double_VAR(classify_cp_end_pad_tight, 0.5, "Class Pruner End Pad Tight"); +double_VAR(classify_cp_side_pad_loose, 2.5, "Class Pruner Side Pad Loose"); +double_VAR(classify_cp_side_pad_medium, 1.2, "Class Pruner Side Pad Medium"); +double_VAR(classify_cp_side_pad_tight, 0.6, "Class Pruner Side Pad Tight"); +double_VAR(classify_pp_angle_pad, 45.0, "Proto Pruner Angle Pad"); +double_VAR(classify_pp_end_pad, 0.5, "Proto Prune End Pad"); +double_VAR(classify_pp_side_pad, 2.5, "Proto Pruner Side Pad"); /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -int AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class) { +void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class) { /* - ** Parameters: - ** Templates templates to add new class to - ** ClassId class id to associate new class with - ** Class class data structure to add to templates - ** Globals: none - ** Operation: This routine adds a new class structure to a set of - ** templates. - ** Return: The class index of the new class. - ** Exceptions: none - ** History: Mon Feb 11 11:52:08 1991, DSJ, Created. + ** Parameters: + ** Templates templates to add new class to + ** ClassId class id to associate new class with + ** Class class data structure to add to templates + ** Globals: none + ** Operation: This routine adds a new class structure to a set of + ** templates. Classes have to be added to Templates in + ** the order of increasing ClassIds. + ** Return: none + ** Exceptions: none + ** History: Mon Feb 11 11:52:08 1991, DSJ, Created. */ - int Index; int Pruner; uinT32 *Word; assert (LegalClassId (ClassId)); - assert (UnusedClassIdIn (Templates, ClassId)); - - Index = Templates->NumClasses; - Templates->IndexFor[ClassId] = Index; - Templates->ClassIdFor[Index] = ClassId; - + if (ClassId != Templates->NumClasses) { + fprintf(stderr, "Please make sure that classes are added to templates"); + fprintf(stderr, " in increasing order of ClassIds\n"); + exit(1); + } + ClassForClassId (Templates, ClassId) = Class; Templates->NumClasses++; - Templates->Class[Index] = Class; if (Templates->NumClasses > MaxNumClassesIn (Templates)) { - Pruner = Templates->NumClassPruners; - Templates->NumClassPruners++; + Pruner = Templates->NumClassPruners++; Templates->ClassPruner[Pruner] = (CLASS_PRUNER) Emalloc (sizeof (CLASS_PRUNER_STRUCT)); - for (Word = (uinT32 *) (Templates->ClassPruner[Pruner]); - Word < (uinT32 *) (Templates->ClassPruner[Pruner]) + WERDS_PER_CP; - *Word++ = 0); + for (Word = reinterpret_cast(Templates->ClassPruner[Pruner]); + Word < reinterpret_cast(Templates->ClassPruner[Pruner]) + + WERDS_PER_CP; + *Word++ = 0); } - - return (Index); - } /* AddIntClass */ /*---------------------------------------------------------------------------*/ int AddIntConfig(INT_CLASS Class) { /* - ** Parameters: - ** Class class to add new configuration to - ** Globals: none - ** Operation: This routine returns the index of the next free config - ** in Class. - ** Return: Index of next free config. - ** Exceptions: none - ** History: Mon Feb 11 14:44:40 1991, DSJ, Created. + ** Parameters: + ** Class class to add new configuration to + ** Globals: none + ** Operation: This routine returns the index of the next free config + ** in Class. + ** Return: Index of next free config. + ** Exceptions: none + ** History: Mon Feb 11 14:44:40 1991, DSJ, Created. */ int Index; - assert (Class->NumConfigs < MAX_NUM_CONFIGS); + assert(Class->NumConfigs < MAX_NUM_CONFIGS); - Index = Class->NumConfigs; - Class->NumConfigs++; + Index = Class->NumConfigs++; Class->ConfigLengths[Index] = 0; - return (Index); + return Index; } /* AddIntConfig */ /*---------------------------------------------------------------------------*/ int AddIntProto(INT_CLASS Class) { /* - ** Parameters: - ** Class class to add new proto to - ** Globals: none - ** Operation: This routine allocates the next free proto in Class and - ** returns its index. - ** Return: Proto index of new proto. - ** Exceptions: none - ** History: Mon Feb 11 13:26:41 1991, DSJ, Created. + ** Parameters: + ** Class class to add new proto to + ** Globals: none + ** Operation: This routine allocates the next free proto in Class and + ** returns its index. + ** Return: Proto index of new proto. + ** Exceptions: none + ** History: Mon Feb 11 13:26:41 1991, DSJ, Created. */ int Index; int ProtoSetId; @@ -364,23 +289,23 @@ int AddIntProto(INT_CLASS Class) { if (Class->NumProtos > MaxNumIntProtosIn(Class)) { ProtoSetId = Class->NumProtoSets++; - ProtoSet = (PROTO_SET) Emalloc (sizeof (PROTO_SET_STRUCT)); + ProtoSet = (PROTO_SET) Emalloc(sizeof(PROTO_SET_STRUCT)); Class->ProtoSets[ProtoSetId] = ProtoSet; - for (Word = (uinT32 *) (ProtoSet->ProtoPruner); - Word < (uinT32 *) (ProtoSet->ProtoPruner) + WERDS_PER_PP; - *Word++ = 0); + for (Word = reinterpret_cast(ProtoSet->ProtoPruner); + Word < reinterpret_cast(ProtoSet->ProtoPruner) + WERDS_PER_PP; + *Word++ = 0); /* reallocate space for the proto lengths and install in class */ - Class->ProtoLengths = (uinT8 *) Erealloc (Class->ProtoLengths, - MaxNumIntProtosIn (Class) * - sizeof (uinT8)); + Class->ProtoLengths = + (uinT8 *)Erealloc(Class->ProtoLengths, + MaxNumIntProtosIn(Class) * sizeof(uinT8)); } /* initialize proto so its length is zero and it isn't in any configs */ Class->ProtoLengths[Index] = 0; Proto = ProtoForProtoId (Class, Index); for (Word = Proto->Configs; - Word < Proto->Configs + WERDS_PER_CONFIG_VEC; *Word++ = 0); + Word < Proto->Configs + WERDS_PER_CONFIG_VEC; *Word++ = 0); return (Index); @@ -388,41 +313,39 @@ int AddIntProto(INT_CLASS Class) { /*---------------------------------------------------------------------------*/ -void -AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates) +void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId, + INT_TEMPLATES Templates) /* - ** Parameters: - ** Proto floating-pt proto to add to class pruner - ** ClassId class id corresponding to Proto - ** Templates set of templates containing class pruner - ** Globals: - ** NumCPLevels number of levels used in the class pruner - ** Operation: This routine adds Proto to the class pruning tables - ** for the specified class in Templates. - ** Return: none - ** Exceptions: none - ** History: Wed Feb 13 08:49:54 1991, DSJ, Created. + ** Parameters: + ** Proto floating-pt proto to add to class pruner + ** ClassId class id corresponding to Proto + ** Templates set of templates containing class pruner + ** Globals: + ** classify_num_cp_levels number of levels used in the class pruner + ** Operation: This routine adds Proto to the class pruning tables + ** for the specified class in Templates. + ** Return: none + ** Exceptions: none + ** History: Wed Feb 13 08:49:54 1991, DSJ, Created. */ #define MAX_LEVEL 2 { CLASS_PRUNER Pruner; uinT32 ClassMask; uinT32 ClassCount; - CLASS_INDEX ClassIndex; uinT32 WordIndex; int Level; FLOAT32 EndPad, SidePad, AnglePad; TABLE_FILLER TableFiller; FILL_SPEC FillSpec; - ClassIndex = Templates->IndexFor[ClassId]; - Pruner = Templates->ClassPruner [CPrunerIdFor (ClassIndex)]; - WordIndex = CPrunerWordIndexFor (ClassIndex); - ClassMask = CPrunerMaskFor (MAX_LEVEL, ClassIndex); + Pruner = CPrunerFor (Templates, ClassId); + WordIndex = CPrunerWordIndexFor (ClassId); + ClassMask = CPrunerMaskFor (MAX_LEVEL, ClassId); - for (Level = NumCPLevels - 1; Level >= 0; Level--) { + for (Level = classify_num_cp_levels - 1; Level >= 0; Level--) { GetCPPadsForLevel(Level, &EndPad, &SidePad, &AnglePad); - ClassCount = CPrunerMaskFor (Level, ClassIndex); + ClassCount = CPrunerMaskFor (Level, ClassId); InitTableFiller(EndPad, SidePad, AnglePad, Proto, &TableFiller); while (!FillerDone (&TableFiller)) { @@ -436,17 +359,17 @@ AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates) /*---------------------------------------------------------------------------*/ void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class) { /* - ** Parameters: - ** Proto floating-pt proto to be added to proto pruner - ** ProtoId id of proto - ** Class integer class that contains desired proto pruner - ** Globals: none - ** Operation: This routine updates the proto pruner lookup tables - ** for Class to include a new proto identified by ProtoId - ** and described by Proto. - ** Return: none - ** Exceptions: none - ** History: Fri Feb 8 13:07:19 1991, DSJ, Created. + ** Parameters: + ** Proto floating-pt proto to be added to proto pruner + ** ProtoId id of proto + ** Class integer class that contains desired proto pruner + ** Globals: none + ** Operation: This routine updates the proto pruner lookup tables + ** for Class to include a new proto identified by ProtoId + ** and described by Proto. + ** Return: none + ** Exceptions: none + ** History: Fri Feb 8 13:07:19 1991, DSJ, Created. */ FLOAT32 Angle, X, Y, Length; FLOAT32 Pad; @@ -454,92 +377,97 @@ void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class) { PROTO_SET ProtoSet; if (ProtoId >= Class->NumProtos) - cprintf ("AddProtoToProtoPruner:assert failed: %d < %d", - ProtoId, Class->NumProtos); - assert (ProtoId < Class->NumProtos); + cprintf("AddProtoToProtoPruner:assert failed: %d < %d", + ProtoId, Class->NumProtos); + assert(ProtoId < Class->NumProtos); Index = IndexForProto (ProtoId); ProtoSet = Class->ProtoSets[SetForProto (ProtoId)]; Angle = Proto->Angle; +#ifndef __MSW32__ + assert(!isnan(Angle)); +#endif + FillPPCircularBits (ProtoSet->ProtoPruner[PRUNER_ANGLE], Index, - Angle + ANGLE_SHIFT, PPAnglePad / 360.0); + Angle + ANGLE_SHIFT, classify_pp_angle_pad / 360.0); Angle *= 2.0 * PI; Length = Proto->Length; X = Proto->X + X_SHIFT; - Pad = max (fabs (cos (Angle)) * (Length / 2.0 + - PPEndPad * GetPicoFeatureLength ()), - fabs (sin (Angle)) * (PPSidePad * GetPicoFeatureLength ())); + Pad = MAX (fabs (cos (Angle)) * (Length / 2.0 + + classify_pp_end_pad * + GetPicoFeatureLength ()), + fabs (sin (Angle)) * (classify_pp_side_pad * + GetPicoFeatureLength ())); FillPPLinearBits (ProtoSet->ProtoPruner[PRUNER_X], Index, X, Pad); Y = Proto->Y + Y_SHIFT; - Pad = max (fabs (sin (Angle)) * (Length / 2.0 + - PPEndPad * GetPicoFeatureLength ()), - fabs (cos (Angle)) * (PPSidePad * GetPicoFeatureLength ())); - - FillPPLinearBits (ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad); + Pad = MAX (fabs (sin (Angle)) * (Length / 2.0 + + classify_pp_end_pad * + GetPicoFeatureLength ()), + fabs (cos (Angle)) * (classify_pp_side_pad * + GetPicoFeatureLength ())); + FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad); } /* AddProtoToProtoPruner */ /*---------------------------------------------------------------------------*/ int BucketFor(FLOAT32 Param, FLOAT32 Offset, int NumBuckets) { /* - ** Parameters: - ** Param parameter value to map into a bucket number - ** Offset amount to shift param before mapping it - ** NumBuckets number of buckets to map param into - ** Globals: none - ** Operation: This routine maps a parameter value into a bucket between - ** 0 and NumBuckets-1. Offset is added to the parameter - ** before mapping it. Values which map to buckets outside - ** the range are truncated to fit within the range. Mapping - ** is done by truncating rather than rounding. - ** Return: Bucket number corresponding to Param + Offset. - ** Exceptions: none - ** History: Thu Feb 14 13:24:33 1991, DSJ, Created. + ** Parameters: + ** Param parameter value to map into a bucket number + ** Offset amount to shift param before mapping it + ** NumBuckets number of buckets to map param into + ** Globals: none + ** Operation: This routine maps a parameter value into a bucket between + ** 0 and NumBuckets-1. Offset is added to the parameter + ** before mapping it. Values which map to buckets outside + ** the range are truncated to fit within the range. Mapping + ** is done by truncating rather than rounding. + ** Return: Bucket number corresponding to Param + Offset. + ** Exceptions: none + ** History: Thu Feb 14 13:24:33 1991, DSJ, Created. */ int Bucket; - Bucket = (int) MapParam (Param, Offset, NumBuckets); + Bucket = static_cast(MapParam(Param, Offset, NumBuckets)); if (Bucket < 0) Bucket = 0; else if (Bucket >= NumBuckets) Bucket = NumBuckets - 1; return (Bucket); - } /* BucketFor */ /*---------------------------------------------------------------------------*/ int CircBucketFor(FLOAT32 Param, FLOAT32 Offset, int NumBuckets) { /* - ** Parameters: - ** Param parameter value to map into a circular bucket - ** Offset amount to shift param before mapping it - ** NumBuckets number of buckets to map param into - ** Globals: none - ** Operation: This routine maps a parameter value into a bucket between - ** 0 and NumBuckets-1. Offset is added to the parameter - ** before mapping it. Values which map to buckets outside - ** the range are wrapped to a new value in a circular fashion. - ** Mapping is done by truncating rather than rounding. - ** Return: Bucket number corresponding to Param + Offset. - ** Exceptions: none - ** History: Thu Feb 14 13:24:33 1991, DSJ, Created. + ** Parameters: + ** Param parameter value to map into a circular bucket + ** Offset amount to shift param before mapping it + ** NumBuckets number of buckets to map param into + ** Globals: none + ** Operation: This routine maps a parameter value into a bucket between + ** 0 and NumBuckets-1. Offset is added to the parameter + ** before mapping it. Values which map to buckets outside + ** the range are wrapped to a new value in a circular fashion. + ** Mapping is done by truncating rather than rounding. + ** Return: Bucket number corresponding to Param + Offset. + ** Exceptions: none + ** History: Thu Feb 14 13:24:33 1991, DSJ, Created. */ int Bucket; - Bucket = (int) MapParam (Param, Offset, NumBuckets); + Bucket = static_cast(MapParam(Param, Offset, NumBuckets)); if (Bucket < 0) Bucket += NumBuckets; else if (Bucket >= NumBuckets) Bucket -= NumBuckets; - return (Bucket); - + return Bucket; } /* CircBucketFor */ @@ -547,15 +475,15 @@ int CircBucketFor(FLOAT32 Param, FLOAT32 Offset, int NumBuckets) { #ifndef GRAPHICS_DISABLED void UpdateMatchDisplay() { /* - ** Parameters: none - ** Globals: - ** FeatureShapes display list for features - ** ProtoShapes display list for protos - ** Operation: This routine clears the global feature and proto - ** display lists. - ** Return: none - ** Exceptions: none - ** History: Thu Mar 21 15:40:19 1991, DSJ, Created. + ** Parameters: none + ** Globals: + ** FeatureShapes display list for features + ** ProtoShapes display list for protos + ** Operation: This routine clears the global feature and proto + ** display lists. + ** Return: none + ** Exceptions: none + ** History: Thu Mar 21 15:40:19 1991, DSJ, Created. */ if (IntMatchWindow != NULL) c_make_current(IntMatchWindow); @@ -565,30 +493,31 @@ void UpdateMatchDisplay() { /*---------------------------------------------------------------------------*/ void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) { /* - ** Parameters: - ** Config config to be added to class - ** ConfigId id to be used for new config - ** Class class to add new config to - ** Globals: none - ** Operation: This operation updates the config vectors of all protos - ** in Class to indicate that the protos with 1's in Config - ** belong to a new configuration identified by ConfigId. - ** It is assumed that the length of the Config bit vector is - ** equal to the number of protos in Class. - ** Return: none - ** Exceptions: none - ** History: Mon Feb 11 14:57:31 1991, DSJ, Created. + ** Parameters: + ** Config config to be added to class + ** ConfigId id to be used for new config + ** Class class to add new config to + ** Globals: none + ** Operation: This operation updates the config vectors of all protos + ** in Class to indicate that the protos with 1's in Config + ** belong to a new configuration identified by ConfigId. + ** It is assumed that the length of the Config bit vector is + ** equal to the number of protos in Class. + ** Return: none + ** Exceptions: none + ** History: Mon Feb 11 14:57:31 1991, DSJ, Created. */ int ProtoId; INT_PROTO Proto; int TotalLength; for (ProtoId = 0, TotalLength = 0; - ProtoId < Class->NumProtos; ProtoId++) - if (test_bit (Config, ProtoId)) { - Proto = ProtoForProtoId (Class, ProtoId); - SET_BIT (Proto->Configs, ConfigId); - TotalLength += Class->ProtoLengths[ProtoId]; + ProtoId < Class->NumProtos; ProtoId++) { + if (test_bit(Config, ProtoId)) { + Proto = ProtoForProtoId(Class, ProtoId); + SET_BIT(Proto->Configs, ConfigId); + TotalLength += Class->ProtoLengths[ProtoId]; + } } Class->ConfigLengths[ConfigId] = TotalLength; } /* ConvertConfig */ @@ -597,32 +526,32 @@ void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class) { /*---------------------------------------------------------------------------*/ void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) { /* - ** Parameters: - ** Proto floating-pt proto to be converted to integer format - ** ProtoId id of proto - ** Class integer class to add converted proto to - ** Globals: none - ** Operation: This routine converts Proto to integer format and - ** installs it as ProtoId in Class. - ** Return: none - ** Exceptions: none - ** History: Fri Feb 8 11:22:43 1991, DSJ, Created. + ** Parameters: + ** Proto floating-pt proto to be converted to integer format + ** ProtoId id of proto + ** Class integer class to add converted proto to + ** Globals: none + ** Operation: This routine converts Proto to integer format and + ** installs it as ProtoId in Class. + ** Return: none + ** Exceptions: none + ** History: Fri Feb 8 11:22:43 1991, DSJ, Created. */ INT_PROTO P; FLOAT32 Param; - assert (ProtoId < Class->NumProtos); + assert(ProtoId < Class->NumProtos); - P = ProtoForProtoId (Class, ProtoId); + P = ProtoForProtoId(Class, ProtoId); Param = Proto->A * 128; - P->A = TruncateParam (Param, -128, 127, NULL); + P->A = TruncateParam(Param, -128, 127, NULL); Param = -Proto->B * 256; - P->B = TruncateParam (Param, 0, 255, NULL); + P->B = TruncateParam(Param, 0, 255, NULL); Param = Proto->C * 128; - P->C = TruncateParam (Param, -128, 127, NULL); + P->C = TruncateParam(Param, -128, 127, NULL); Param = Proto->Angle * 256; if (Param < 0 || Param >= 256) @@ -631,26 +560,28 @@ void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) { P->Angle = (uinT8) Param; /* round proto length to nearest integer number of pico-features */ - Param = (Proto->Length / GetPicoFeatureLength ()) + 0.5; - Class->ProtoLengths[ProtoId] = TruncateParam (Param, 1, 255, NULL); - if (LearningDebugLevel >= 2) - cprintf ("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)", - P->A, P->B, P->C, Class->ProtoLengths[ProtoId]); + Param = (Proto->Length / GetPicoFeatureLength()) + 0.5; + Class->ProtoLengths[ProtoId] = TruncateParam(Param, 1, 255, NULL); + if (classify_learning_debug_level >= 2) + cprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)", + P->A, P->B, P->C, Class->ProtoLengths[ProtoId]); } /* ConvertProto */ /*---------------------------------------------------------------------------*/ -INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos, - const UNICHARSET& target_unicharset) { +namespace tesseract { +INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos, + const UNICHARSET& + target_unicharset) { /* - ** Parameters: - ** FloatProtos prototypes in old floating pt format - ** Globals: none - ** Operation: This routine converts from the old floating point format - ** to the new integer format. - ** Return: New set of training templates in integer format. - ** Exceptions: none - ** History: Thu Feb 7 14:40:42 1991, DSJ, Created. + ** Parameters: + ** FloatProtos prototypes in old floating pt format + ** Globals: none + ** Operation: This routine converts from the old floating point format + ** to the new integer format. + ** Return: New set of training templates in integer format. + ** Exceptions: none + ** History: Thu Feb 7 14:40:42 1991, DSJ, Created. */ INT_TEMPLATES IntTemplates; CLASS_TYPE FClass; @@ -659,130 +590,122 @@ INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos, int ProtoId; int ConfigId; - IntTemplates = NewIntTemplates (); + IntTemplates = NewIntTemplates(); for (ClassId = 0; ClassId < target_unicharset.size(); ClassId++) { FClass = &(FloatProtos[ClassId]); - if (FClass->NumProtos > 0) { - assert (UnusedClassIdIn (IntTemplates, ClassId)); - IClass = NewIntClass (FClass->NumProtos, FClass->NumConfigs); - AddIntClass(IntTemplates, ClassId, IClass); + if (FClass->NumProtos == 0 && FClass->NumConfigs == 0 && + strcmp(target_unicharset.id_to_unichar(ClassId), " ") != 0) { + cprintf("Warning: no protos/configs for %s in CreateIntTemplates()\n", + target_unicharset.id_to_unichar(ClassId)); + } + assert(UnusedClassIdIn(IntTemplates, ClassId)); + IClass = NewIntClass(FClass->NumProtos, FClass->NumConfigs); + FontSet fs; + fs.size = FClass->font_set.size(); + fs.configs = new int[fs.size]; + for (int i = 0; i < fs.size; ++i) { + fs.configs[i] = FClass->font_set.get(i); + } + if (this->fontset_table_.contains(fs)) { + IClass->font_set_id = this->fontset_table_.get_id(fs); + delete[] fs.configs; + } else { + IClass->font_set_id = this->fontset_table_.push_back(fs); + } + AddIntClass(IntTemplates, ClassId, IClass); - for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) { - AddIntProto(IClass); - ConvertProto (ProtoIn (FClass, ProtoId), ProtoId, IClass); - AddProtoToProtoPruner (ProtoIn (FClass, ProtoId), ProtoId, - IClass); - AddProtoToClassPruner (ProtoIn (FClass, ProtoId), ClassId, - IntTemplates); - } + for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) { + AddIntProto(IClass); + ConvertProto(ProtoIn(FClass, ProtoId), ProtoId, IClass); + AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass); + AddProtoToClassPruner(ProtoIn(FClass, ProtoId), ClassId, IntTemplates); + } - for (ConfigId = 0; ConfigId < FClass->NumConfigs; ConfigId++) { - AddIntConfig(IClass); - ConvertConfig (FClass->Configurations[ConfigId], ConfigId, IClass); - } + for (ConfigId = 0; ConfigId < FClass->NumConfigs; ConfigId++) { + AddIntConfig(IClass); + ConvertConfig(FClass->Configurations[ConfigId], ConfigId, IClass); } } return (IntTemplates); } /* CreateIntTemplates */ +} // namespace tesseract /*---------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED void DisplayIntFeature(INT_FEATURE Feature, FLOAT32 Evidence) { /* - ** Parameters: - ** Feature pico-feature to be displayed - ** Evidence best evidence for this feature (0-1) - ** Globals: - ** FeatureShapes global display list for features - ** Operation: This routine renders the specified feature into a - ** global display list. - ** Return: none - ** Exceptions: none - ** History: Thu Mar 21 14:45:04 1991, DSJ, Created. + ** Parameters: + ** Feature pico-feature to be displayed + ** Evidence best evidence for this feature (0-1) + ** Globals: + ** FeatureShapes global display list for features + ** Operation: This routine renders the specified feature into a + ** global display list. + ** Return: none + ** Exceptions: none + ** History: Thu Mar 21 14:45:04 1991, DSJ, Created. */ C_COL Color; - Color = GetMatchColorFor (Evidence); + Color = GetMatchColorFor(Evidence); RenderIntFeature(IntMatchWindow, Feature, Color); + if (FeatureDisplayWindow) { + RenderIntFeature(FeatureDisplayWindow, Feature, Color); + } } /* DisplayIntFeature */ /*---------------------------------------------------------------------------*/ void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence) { /* - ** Parameters: - ** Class class to take proto from - ** ProtoId id of proto in Class to be displayed - ** Evidence total evidence for proto (0-1) - ** Globals: - ** ProtoShapes global display list for protos - ** Operation: This routine renders the specified proto into a - ** global display list. - ** Return: none - ** Exceptions: none - ** History: Thu Mar 21 14:45:04 1991, DSJ, Created. + ** Parameters: + ** Class class to take proto from + ** ProtoId id of proto in Class to be displayed + ** Evidence total evidence for proto (0-1) + ** Globals: + ** ProtoShapes global display list for protos + ** Operation: This routine renders the specified proto into a + ** global display list. + ** Return: none + ** Exceptions: none + ** History: Thu Mar 21 14:45:04 1991, DSJ, Created. */ C_COL Color; - Color = GetMatchColorFor (Evidence); + Color = GetMatchColorFor(Evidence); RenderIntProto(IntMatchWindow, Class, ProtoId, Color); - + if (ProtoDisplayWindow) { + RenderIntProto(ProtoDisplayWindow, Class, ProtoId, Color); + } } /* DisplayIntProto */ #endif -/*---------------------------------------------------------------------------*/ -void InitIntProtoVars() { -/* - ** Parameters: none - ** Globals: none - ** Operation: Initialize the control variables for the integer proto - ** routines. - ** Return: none - ** Exceptions: none - ** History: Tue Feb 12 08:04:34 1991, DSJ, Created. - */ - MakeNumCPLevels(); - MakeCPAnglePadLoose(); - MakeCPAnglePadMedium(); - MakeCPAnglePadTight(); - MakeCPEndPadLoose(); - MakeCPEndPadMedium(); - MakeCPEndPadTight(); - MakeCPSidePadLoose(); - MakeCPSidePadMedium(); - MakeCPSidePadTight(); - MakePPAnglePad(); - MakePPEndPad(); - MakePPSidePad(); -} /* InitIntProtoVars */ - - /*---------------------------------------------------------------------------*/ INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) { /* - ** Parameters: - ** MaxNumProtos number of protos to allocate space for - ** MaxNumConfigs number of configs to allocate space for - ** Globals: none - ** Operation: This routine creates a new integer class data structure - ** and returns it. Sufficient space is allocated - ** to handle the specified number of protos and configs. - ** Return: New class created. - ** Exceptions: none - ** History: Fri Feb 8 10:51:23 1991, DSJ, Created. + ** Parameters: + ** MaxNumProtos number of protos to allocate space for + ** MaxNumConfigs number of configs to allocate space for + ** Globals: none + ** Operation: This routine creates a new integer class data structure + ** and returns it. Sufficient space is allocated + ** to handle the specified number of protos and configs. + ** Return: New class created. + ** Exceptions: none + ** History: Fri Feb 8 10:51:23 1991, DSJ, Created. */ INT_CLASS Class; PROTO_SET ProtoSet; int i; register uinT32 *Word; - assert (MaxNumConfigs <= MAX_NUM_CONFIGS); + assert(MaxNumConfigs <= MAX_NUM_CONFIGS); - Class = (INT_CLASS) Emalloc (sizeof (INT_CLASS_STRUCT)); + Class = (INT_CLASS) Emalloc(sizeof(INT_CLASS_STRUCT)); Class->NumProtoSets = ((MaxNumProtos + PROTOS_PER_PROTO_SET - 1) / - PROTOS_PER_PROTO_SET); + PROTOS_PER_PROTO_SET); assert(Class->NumProtoSets <= MAX_NUM_PROTO_SETS); @@ -791,16 +714,18 @@ INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) { for (i = 0; i < Class->NumProtoSets; i++) { /* allocate space for a proto set, install in class, and initialize */ - ProtoSet = (PROTO_SET) Emalloc (sizeof (PROTO_SET_STRUCT)); + ProtoSet = (PROTO_SET) Emalloc(sizeof(PROTO_SET_STRUCT)); Class->ProtoSets[i] = ProtoSet; - for (Word = (uinT32 *) (ProtoSet->ProtoPruner); - Word < (uinT32 *) (ProtoSet->ProtoPruner) + WERDS_PER_PP; - *Word++ = 0); + for (Word = reinterpret_cast(ProtoSet->ProtoPruner); + Word < reinterpret_cast(ProtoSet->ProtoPruner) + WERDS_PER_PP; + *Word++ = 0); /* allocate space for the proto lengths and install in class */ } - Class->ProtoLengths = (uinT8 *) Emalloc (MaxNumIntProtosIn (Class) * - sizeof (uinT8)); + if (MaxNumIntProtosIn (Class) > 0) { + Class->ProtoLengths = + (uinT8 *)Emalloc(MaxNumIntProtosIn (Class) * sizeof (uinT8)); + } return (Class); @@ -808,14 +733,15 @@ INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs) { /*-------------------------------------------------------------------------*/ -void free_int_class( /*class to free */ - INT_CLASS int_class) { +void free_int_class(INT_CLASS int_class) { int i; for (i = 0; i < int_class->NumProtoSets; i++) { Efree (int_class->ProtoSets[i]); } - Efree (int_class->ProtoLengths); + if (int_class->ProtoLengths != NULL) { + Efree (int_class->ProtoLengths); + } Efree(int_class); } @@ -823,13 +749,13 @@ void free_int_class( /*class to free */ /*---------------------------------------------------------------------------*/ INT_TEMPLATES NewIntTemplates() { /* - ** Parameters: none - ** Globals: none - ** Operation: This routine allocates a new set of integer templates - ** initialized to hold 0 classes. - ** Return: The integer templates created. - ** Exceptions: none - ** History: Fri Feb 8 08:38:51 1991, DSJ, Created. + ** Parameters: none + ** Globals: none + ** Operation: This routine allocates a new set of integer templates + ** initialized to hold 0 classes. + ** Return: The integer templates created. + ** Exceptions: none + ** History: Fri Feb 8 08:38:51 1991, DSJ, Created. */ INT_TEMPLATES T; int i; @@ -838,14 +764,10 @@ INT_TEMPLATES NewIntTemplates() { T->NumClasses = 0; T->NumClassPruners = 0; - /* initialize mapping tables */ - for (i = 0; i <= MAX_CLASS_ID; i++) - T->IndexFor[i] = ILLEGAL_CLASS; for (i = 0; i < MAX_NUM_CLASSES; i++) - T->ClassIdFor[i] = NO_CLASS; + ClassForClassId (T, i) = NULL; return (T); - } /* NewIntTemplates */ @@ -854,27 +776,71 @@ void free_int_templates(INT_TEMPLATES templates) { int i; for (i = 0; i < templates->NumClasses; i++) - free_int_class (templates->Class[i]); + free_int_class(templates->Class[i]); for (i = 0; i < templates->NumClassPruners; i++) - Efree (templates->ClassPruner[i]); + Efree(templates->ClassPruner[i]); Efree(templates); } /*---------------------------------------------------------------------------*/ -INT_TEMPLATES ReadIntTemplates(FILE *File, BOOL8 swap) { +// Code to read/write Classify::font*table structures. +namespace { +void read_info(FILE* f, FontInfo* fi, bool swap) { + inT32 size; + fread(&size, sizeof(inT32), 1, f); + if (swap) + reverse32(&size); + fi->name = new char[size + 1]; + fread(fi->name, sizeof(char), size, f); + fi->name[size] = '\0'; + fread(&fi->properties, sizeof(fi->properties), 1, f); + if (swap) + reverse32(&fi->properties); +} + +void write_info(FILE* f, const FontInfo& fi) { + inT32 size = strlen(fi.name); + fwrite(&size, sizeof(inT32), 1, f); + fwrite(fi.name, sizeof(char), size, f); + fwrite(&fi.properties, sizeof(inT32), 1, f); +} + +void read_set(FILE* f, FontSet* fs, bool swap) { + fread(&fs->size, sizeof(inT32), 1, f); + if (swap) + reverse32(&fs->size); + fs->configs = new int[fs->size]; + for (int i = 0; i < fs->size; ++i) { + fread(&fs->configs[i], sizeof(inT32), 1, f); + if (swap) + reverse32(&fs->configs[i]); + } +} + +void write_set(FILE* f, const FontSet& fs) { + fwrite(&fs.size, sizeof(inT32), 1, f); + for (int i = 0; i < fs.size; ++i) { + fwrite(&fs.configs[i], sizeof(inT32), 1, f); + } +} +} + +namespace tesseract { +INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) { /* - ** Parameters: - ** File open file to read templates from - ** Globals: none - ** Operation: This routine reads a set of integer templates from - ** File. File must already be open and must be in the - ** correct binary format. - ** Return: Pointer to integer templates read from File. - ** Exceptions: none - ** History: Wed Feb 27 11:48:46 1991, DSJ, Created. + ** Parameters: + ** File open file to read templates from + ** Globals: none + ** Operation: This routine reads a set of integer templates from + ** File. File must already be open and must be in the + ** correct binary format. + ** Return: Pointer to integer templates read from File. + ** Exceptions: none + ** History: Wed Feb 27 11:48:46 1991, DSJ, Created. */ - int i, j, x, y, z; + int i, j, w, x, y, z; + BOOL8 swap; int nread; int unicharset_size; int version_id = 0; @@ -884,27 +850,36 @@ INT_TEMPLATES ReadIntTemplates(FILE *File, BOOL8 swap) { uinT8 *Lengths; PROTO_SET ProtoSet; + /* variables for conversion from older inttemp formats */ + int b, bit_number, last_cp_bit_number, new_b, new_i, new_w; + CLASS_ID class_id, max_class_id; + inT16 *IndexFor = new inT16[MAX_NUM_CLASSES]; + CLASS_ID *ClassIdFor = new CLASS_ID[MAX_NUM_CLASSES]; + CLASS_PRUNER *TempClassPruner = new CLASS_PRUNER[MAX_NUM_CLASS_PRUNERS]; + uinT32 SetBitsForMask = // word with NUM_BITS_PER_CLASS + (1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0 + uinT32 Mask, NewMask, ClassBits; + uinT32 *Word; + int MaxNumConfigs = MAX_NUM_CONFIGS; + int WerdsPerConfigVec = WERDS_PER_CONFIG_VEC; + /* first read the high level template struct */ - Templates = NewIntTemplates (); + Templates = NewIntTemplates(); // Read Templates in parts for 64 bit compatibility. if (fread(&unicharset_size, sizeof(int), 1, File) != 1) - cprintf ("Bad read of inttemp!\n"); - if (fread(&Templates->NumClasses, sizeof(int), 1, File) != 1 || - fread(&Templates->NumClassPruners, sizeof(int), 1, File) != 1) - cprintf ("Bad read of inttemp!\n"); + cprintf("Bad read of inttemp!\n"); + if (fread(&Templates->NumClasses, + sizeof(Templates->NumClasses), 1, File) != 1 || + fread(&Templates->NumClassPruners, + sizeof(Templates->NumClassPruners), 1, File) != 1) + cprintf("Bad read of inttemp!\n"); // Swap status is determined automatically. swap = Templates->NumClassPruners < 0 || - Templates->NumClassPruners > MAX_NUM_CLASS_PRUNERS; + Templates->NumClassPruners > MAX_NUM_CLASS_PRUNERS; if (swap) { - reverse32 (&Templates->NumClassPruners); - reverse32 (&Templates->NumClasses); - reverse32 (&unicharset_size); - } - if (unicharset_size != unicharset.size()) { - cprintf("Error: %d classes in inttemp while " - "unicharset contains %d unichars.\n", - unicharset_size, unicharset.size()); - exit(1); + reverse32(&Templates->NumClassPruners); + reverse32(&Templates->NumClasses); + reverse32(&unicharset_size); } if (Templates->NumClasses < 0) { // This file has a version id! @@ -913,42 +888,109 @@ INT_TEMPLATES ReadIntTemplates(FILE *File, BOOL8 swap) { 1, File) != 1) cprintf("Bad read of inttemp!\n"); if (swap) - reverse32 (&Templates->NumClasses); + reverse32(&Templates->NumClasses); } - for (i = 0; i < unicharset_size; ++i) { - if (fread(&Templates->IndexFor[i], sizeof(CLASS_INDEX), 1, File) != 1) - cprintf("Bad read of inttemp!\n"); + + if (version_id < 3) { + MaxNumConfigs = OLD_MAX_NUM_CONFIGS; + WerdsPerConfigVec = OLD_WERDS_PER_CONFIG_VEC; } - for (i = 0; i < Templates->NumClasses; ++i) { - if (fread(&Templates->ClassIdFor[i], sizeof(CLASS_ID), 1, File) != 1) - cprintf("Bad read of inttemp!\n"); - } - if (swap) { - for (i = 0; i < MAX_CLASS_ID + 1; i++) - reverse16 (&Templates->IndexFor[i]); - for (i = 0; i < MAX_NUM_CLASSES; i++) - reverse32 (&Templates->ClassIdFor[i]); + + if (version_id < 2) { + for (i = 0; i < unicharset_size; ++i) { + if (fread(&IndexFor[i], sizeof(inT16), 1, File) != 1) + cprintf("Bad read of inttemp!\n"); + } + for (i = 0; i < Templates->NumClasses; ++i) { + if (fread(&ClassIdFor[i], sizeof(CLASS_ID), 1, File) != 1) + cprintf("Bad read of inttemp!\n"); + } + if (swap) { + for (i = 0; i < Templates->NumClasses; i++) + reverse16(IndexFor[i]); + for (i = 0; i < Templates->NumClasses; i++) + reverse32(ClassIdFor[i]); + } } /* then read in the class pruners */ for (i = 0; i < Templates->NumClassPruners; i++) { - Pruner = (CLASS_PRUNER) Emalloc (sizeof (CLASS_PRUNER_STRUCT)); + Pruner = (CLASS_PRUNER) Emalloc(sizeof(CLASS_PRUNER_STRUCT)); if ((nread = - fread ((char *) Pruner, 1, sizeof (CLASS_PRUNER_STRUCT), - File)) != sizeof (CLASS_PRUNER_STRUCT)) - cprintf ("Bad read of inttemp!\n"); + fread((char *) Pruner, 1, sizeof(CLASS_PRUNER_STRUCT), + File)) != sizeof(CLASS_PRUNER_STRUCT)) + cprintf("Bad read of inttemp!\n"); if (swap) { - for (j = 0; j < NUM_CP_BUCKETS; j++) { - for (x = 0; x < NUM_CP_BUCKETS; x++) { - for (y = 0; y < NUM_CP_BUCKETS; y++) { - for (z = 0; z < WERDS_PER_CP_VECTOR; z++) { - reverse32 (&Pruner[j][x][y][z]); + for (x = 0; x < NUM_CP_BUCKETS; x++) { + for (y = 0; y < NUM_CP_BUCKETS; y++) { + for (z = 0; z < NUM_CP_BUCKETS; z++) { + for (w = 0; w < WERDS_PER_CP_VECTOR; w++) { + reverse32(&Pruner[x][y][z][w]); } } } } } - Templates->ClassPruner[i] = Pruner; + if (version_id < 2) { + TempClassPruner[i] = Pruner; + } else { + Templates->ClassPruner[i] = Pruner; + } + } + + /* fix class pruners if they came from an old version of inttemp */ + if (version_id < 2) { + // Allocate enough class pruners to cover all the class ids. + max_class_id = 0; + for (i = 0; i < Templates->NumClasses; i++) + if (ClassIdFor[i] > max_class_id) + max_class_id = ClassIdFor[i]; + for (i = 0; i <= CPrunerIdFor(max_class_id); i++) { + Templates->ClassPruner[i] = + (CLASS_PRUNER) Emalloc(sizeof(CLASS_PRUNER_STRUCT)); + for (Word = (uinT32 *) (Templates->ClassPruner[i]); + Word < (uinT32 *) (Templates->ClassPruner[i]) + WERDS_PER_CP; + *Word++ = 0); + } + // Convert class pruners from the old format (indexed by class index) + // to the new format (indexed by class id). + last_cp_bit_number = NUM_BITS_PER_CLASS * Templates->NumClasses - 1; + for (i = 0; i < Templates->NumClassPruners; i++) { + for (x = 0; x < NUM_CP_BUCKETS; x++) + for (y = 0; y < NUM_CP_BUCKETS; y++) + for (z = 0; z < NUM_CP_BUCKETS; z++) + for (w = 0; w < WERDS_PER_CP_VECTOR; w++) { + if (TempClassPruner[i][x][y][z][w] == 0) + continue; + for (b = 0; b < BITS_PER_WERD; b += NUM_BITS_PER_CLASS) { + bit_number = i * BITS_PER_CP_VECTOR + w * BITS_PER_WERD + b; + if (bit_number > last_cp_bit_number) + break; // the rest of the bits in this word are not used + class_id = ClassIdFor[bit_number / NUM_BITS_PER_CLASS]; + // Single out NUM_BITS_PER_CLASS bits relating to class_id. + Mask = SetBitsForMask << b; + ClassBits = TempClassPruner[i][x][y][z][w] & Mask; + // Move these bits to the new position in which they should + // appear (indexed corresponding to the class_id). + new_i = CPrunerIdFor(class_id); + new_w = CPrunerWordIndexFor(class_id); + new_b = CPrunerBitIndexFor(class_id) * NUM_BITS_PER_CLASS; + if (new_b > b) { + ClassBits <<= (new_b - b); + } else { + ClassBits >>= (b - new_b); + } + // Copy bits relating to class_id to the correct position + // in Templates->ClassPruner. + NewMask = SetBitsForMask << new_b; + Templates->ClassPruner[new_i][x][y][z][new_w] &= ~NewMask; + Templates->ClassPruner[new_i][x][y][z][new_w] |= ClassBits; + } + } + } + for (i = 0; i < Templates->NumClassPruners; i++) { + Efree (TempClassPruner[i]); + } } /* then read in each class */ @@ -967,86 +1009,161 @@ INT_TEMPLATES ReadIntTemplates(FILE *File, BOOL8 swap) { cprintf ("Bad read of inttemp!\n"); } } - for (j = 0; j < MAX_NUM_CONFIGS; ++j) { - if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1) - cprintf ("Bad read of inttemp!\n"); + if (version_id < 4) { + for (j = 0; j < MaxNumConfigs; ++j) { + if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1) + cprintf ("Bad read of inttemp!\n"); + } + if (swap) { + reverse16 (&Class->NumProtos); + for (j = 0; j < MaxNumConfigs; j++) + reverse16 (&Class->ConfigLengths[j]); + } + } else { + ASSERT_HOST(Class->NumConfigs < MaxNumConfigs); + for (j = 0; j < Class->NumConfigs; ++j) { + if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1) + cprintf ("Bad read of inttemp!\n"); + } + if (swap) { + reverse16 (&Class->NumProtos); + for (j = 0; j < MaxNumConfigs; j++) + reverse16 (&Class->ConfigLengths[j]); + } } - if (swap) { - reverse16 (&Class->NumProtos); - for (j = 0; j < MAX_NUM_CONFIGS; j++) - reverse16 (&Class->ConfigLengths[j]); + if (version_id < 2) { + ClassForClassId (Templates, ClassIdFor[i]) = Class; + } else { + ClassForClassId (Templates, i) = Class; } - Templates->Class[i] = Class; /* then read in the proto lengths */ - Lengths = (uinT8 *) Emalloc (sizeof (uinT8) * - MaxNumIntProtosIn (Class)); - if ((nread = fread ((char *) Lengths, sizeof (uinT8), - MaxNumIntProtosIn (Class), - File)) != MaxNumIntProtosIn (Class)) - cprintf ("Bad read of inttemp!\n"); + Lengths = NULL; + if (MaxNumIntProtosIn (Class) > 0) { + Lengths = (uinT8 *)Emalloc(sizeof(uinT8) * MaxNumIntProtosIn(Class)); + if ((nread = + fread((char *)Lengths, sizeof(uinT8), + MaxNumIntProtosIn(Class), File)) != MaxNumIntProtosIn (Class)) + cprintf ("Bad read of inttemp!\n"); + } Class->ProtoLengths = Lengths; /* then read in the proto sets */ for (j = 0; j < Class->NumProtoSets; j++) { - ProtoSet = (PROTO_SET) Emalloc (sizeof (PROTO_SET_STRUCT)); - if ((nread = - fread ((char *) ProtoSet, 1, sizeof (PROTO_SET_STRUCT), - File)) != sizeof (PROTO_SET_STRUCT)) - cprintf ("Bad read of inttemp!\n"); + ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT)); + if (version_id < 3) { + if ((nread = + fread((char *) &ProtoSet->ProtoPruner, 1, + sizeof(PROTO_PRUNER), File)) != sizeof(PROTO_PRUNER)) + cprintf("Bad read of inttemp!\n"); + for (x = 0; x < PROTOS_PER_PROTO_SET; x++) { + if ((nread = fread((char *) &ProtoSet->Protos[x].A, 1, + sizeof(inT8), File)) != sizeof(inT8) || + (nread = fread((char *) &ProtoSet->Protos[x].B, 1, + sizeof(uinT8), File)) != sizeof(uinT8) || + (nread = fread((char *) &ProtoSet->Protos[x].C, 1, + sizeof(inT8), File)) != sizeof(inT8) || + (nread = fread((char *) &ProtoSet->Protos[x].Angle, 1, + sizeof(uinT8), File)) != sizeof(uinT8)) + cprintf("Bad read of inttemp!\n"); + for (y = 0; y < WerdsPerConfigVec; y++) + if ((nread = fread((char *) &ProtoSet->Protos[x].Configs[y], 1, + sizeof(uinT32), File)) != sizeof(uinT32)) + cprintf("Bad read of inttemp!\n"); + } + } else { + if ((nread = + fread((char *) ProtoSet, 1, sizeof(PROTO_SET_STRUCT), + File)) != sizeof(PROTO_SET_STRUCT)) + cprintf("Bad read of inttemp!\n"); + } if (swap) { for (x = 0; x < NUM_PP_PARAMS; x++) for (y = 0; y < NUM_PP_BUCKETS; y++) for (z = 0; z < WERDS_PER_PP_VECTOR; z++) reverse32 (&ProtoSet->ProtoPruner[x][y][z]); for (x = 0; x < PROTOS_PER_PROTO_SET; x++) - for (y = 0; y < WERDS_PER_CONFIG_VEC; y++) + for (y = 0; y < WerdsPerConfigVec; y++) reverse32 (&ProtoSet->Protos[x].Configs[y]); } Class->ProtoSets[j] = ProtoSet; } + if (version_id < 4) + Class->font_set_id = -1; + else { + fread(&Class->font_set_id, sizeof(int), 1, File); + if (swap) + reverse32(&Class->font_set_id); + } } + + if (version_id < 2) { + /* add an empty NULL class with class id 0 */ + assert(UnusedClassIdIn (Templates, 0)); + ClassForClassId (Templates, 0) = NewIntClass (1, 1); + ClassForClassId (Templates, 0)->font_set_id = -1; + Templates->NumClasses++; + /* make sure the classes are contiguous */ + for (i = 0; i < MAX_NUM_CLASSES; i++) { + if (i < Templates->NumClasses) { + if (ClassForClassId (Templates, i) == NULL) { + fprintf(stderr, "Non-contiguous class ids in inttemp\n"); + exit(1); + } + } else { + if (ClassForClassId (Templates, i) != NULL) { + fprintf(stderr, "Class id %d exceeds NumClassesIn (Templates) %d\n", + i, Templates->NumClasses); + exit(1); + } + } + } + } + if (version_id >= 4) { + this->fontinfo_table_.read(File, NewPermanentCallback(read_info), swap); + this->fontset_table_.read(File, NewPermanentCallback(read_set), swap); + } + + // Clean up. + delete[] IndexFor; + delete[] ClassIdFor; + delete[] TempClassPruner; + return (Templates); } /* ReadIntTemplates */ +} // namespace tesseract + /*---------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED void ShowMatchDisplay() { /* - ** Parameters: none - ** Globals: - ** FeatureShapes display list containing feature matches - ** ProtoShapes display list containing proto matches - ** Operation: This routine sends the shapes in the global display - ** lists to the match debugger window. - ** Return: none - ** Exceptions: none - ** History: Thu Mar 21 15:47:33 1991, DSJ, Created. + ** Parameters: none + ** Globals: + ** FeatureShapes display list containing feature matches + ** ProtoShapes display list containing proto matches + ** Operation: This routine sends the shapes in the global display + ** lists to the match debugger window. + ** Return: none + ** Exceptions: none + ** History: Thu Mar 21 15:47:33 1991, DSJ, Created. */ void *window; /* Size of drawable */ - if (IntMatchWindow == NULL) { - IntMatchWindow = c_create_window ("IntMatchWindow", 50, 200, - 520, 520, - -130.0, 130.0, -130.0, 130.0); - SVMenuNode* popup_menu = new SVMenuNode(); - - popup_menu->AddChild("Debug Adapted classes", IDA_ADAPTIVE, - "x", "Class to debug"); - popup_menu->AddChild("Debug Static classes", IDA_STATIC, - "x", "Class to debug"); - popup_menu->AddChild("Debug Both", IDA_BOTH, - "x", "Class to debug"); - popup_menu->BuildMenu(IntMatchWindow, false); + InitIntMatchWindowIfReqd(); + c_clear_window(IntMatchWindow); + if (ProtoDisplayWindow) { + c_clear_window(ProtoDisplayWindow); + } + if (FeatureDisplayWindow) { + c_clear_window(FeatureDisplayWindow); } - else - c_clear_window(IntMatchWindow); window = IntMatchWindow; c_line_color_index(window, Grey); /* Default size of drawing */ - if (NormMethod == baseline) { + if (classify_norm_method == baseline) { c_move (window, -1000.0, INT_BASELINE); c_draw (window, 1000.0, INT_BASELINE); c_move (window, -1000.0, INT_DESCENDER); @@ -1080,28 +1197,43 @@ void ShowMatchDisplay() { } IntMatchWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, INT_MAX_X, INT_MAX_Y); + if (ProtoDisplayWindow) { + ProtoDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, + INT_MAX_X, INT_MAX_Y); + } + if (FeatureDisplayWindow) { + FeatureDisplayWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, + INT_MAX_X, INT_MAX_Y); + } } /* ShowMatchDisplay */ #endif /*---------------------------------------------------------------------------*/ -void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, - const UNICHARSET& target_unicharset) { +namespace tesseract { +void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, + const UNICHARSET& target_unicharset) { /* - ** Parameters: - ** File open file to write templates to - ** Templates templates to save into File - ** Globals: none - ** Operation: This routine writes Templates to File. The format - ** is an efficient binary format. File must already be open - ** for writing. - ** Return: none - ** Exceptions: none - ** History: Wed Feb 27 11:48:46 1991, DSJ, Created. + ** Parameters: + ** File open file to write templates to + ** Templates templates to save into File + ** Globals: none + ** Operation: This routine writes Templates to File. The format + ** is an efficient binary format. File must already be open + ** for writing. + ** Return: none + ** Exceptions: none + ** History: Wed Feb 27 11:48:46 1991, DSJ, Created. */ int i, j; INT_CLASS Class; int unicharset_size = target_unicharset.size(); - int version_id = -1; // When negated by the reader -1 becomes +1 etc. + int version_id = -4; // When negated by the reader -1 becomes +1 etc. + + if (Templates->NumClasses != unicharset_size) { + cprintf("Warning: executing WriteIntTemplates() with %d classes in" + " Templates, while target_unicharset size is %d\n", + Templates->NumClasses, unicharset_size); + } /* first write the high level template struct */ fwrite(&unicharset_size, sizeof(unicharset_size), 1, File); @@ -1109,10 +1241,6 @@ void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners), 1, File); fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File); - fwrite(&Templates->IndexFor[0], sizeof(Templates->IndexFor[0]), - unicharset_size, File); - fwrite(&Templates->ClassIdFor[0], sizeof(Templates->ClassIdFor[0]), - Templates->NumClasses, File); /* then write out the class pruners */ for (i = 0; i < Templates->NumClassPruners; i++) @@ -1126,21 +1254,32 @@ void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, /* first write out the high level struct for the class */ fwrite(&Class->NumProtos, sizeof(Class->NumProtos), 1, File); fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File); + ASSERT_HOST(Class->NumConfigs == this->fontset_table_.get(Class->font_set_id).size); fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File); - for (j = 0; j < MAX_NUM_CONFIGS; ++j) { + for (j = 0; j < Class->NumConfigs; ++j) { fwrite(&Class->ConfigLengths[j], sizeof(uinT16), 1, File); } /* then write out the proto lengths */ - fwrite ((char *) (Class->ProtoLengths), sizeof (uinT8), - MaxNumIntProtosIn (Class), File); + if (MaxNumIntProtosIn (Class) > 0) { + fwrite ((char *) (Class->ProtoLengths), sizeof (uinT8), + MaxNumIntProtosIn (Class), File); + } /* then write out the proto sets */ for (j = 0; j < Class->NumProtoSets; j++) fwrite ((char *) Class->ProtoSets[j], - sizeof (PROTO_SET_STRUCT), 1, File); + sizeof (PROTO_SET_STRUCT), 1, File); + + /* then write the fonts info */ + fwrite(&Class->font_set_id, sizeof(int), 1, File); } + + /* Write the fonts info tables */ + this->fontinfo_table_.write(File, NewPermanentCallback(write_info)); + this->fontset_table_.write(File, NewPermanentCallback(write_set)); } /* WriteIntTemplates */ +} // namespace tesseract /**---------------------------------------------------------------------------- @@ -1149,18 +1288,18 @@ void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, /*---------------------------------------------------------------------------*/ FLOAT32 BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets) { /* - ** Parameters: - ** Bucket bucket whose start is to be computed - ** Offset offset used to map params to buckets - ** NumBuckets total number of buckets - ** Globals: none - ** Operation: This routine returns the parameter value which - ** corresponds to the beginning of the specified bucket. - ** The bucket number should have been generated using the - ** BucketFor() function with parameters Offset and NumBuckets. - ** Return: Param value corresponding to start position of Bucket. - ** Exceptions: none - ** History: Thu Feb 14 13:24:33 1991, DSJ, Created. + ** Parameters: + ** Bucket bucket whose start is to be computed + ** Offset offset used to map params to buckets + ** NumBuckets total number of buckets + ** Globals: none + ** Operation: This routine returns the parameter value which + ** corresponds to the beginning of the specified bucket. + ** The bucket number should have been generated using the + ** BucketFor() function with parameters Offset and NumBuckets. + ** Return: Param value corresponding to start position of Bucket. + ** Exceptions: none + ** History: Thu Feb 14 13:24:33 1991, DSJ, Created. */ return (((FLOAT32) Bucket / NumBuckets) - Offset); @@ -1170,18 +1309,18 @@ FLOAT32 BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets) { /*---------------------------------------------------------------------------*/ FLOAT32 BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets) { /* - ** Parameters: - ** Bucket bucket whose end is to be computed - ** Offset offset used to map params to buckets - ** NumBuckets total number of buckets - ** Globals: none - ** Operation: This routine returns the parameter value which - ** corresponds to the end of the specified bucket. - ** The bucket number should have been generated using the - ** BucketFor() function with parameters Offset and NumBuckets. - ** Return: Param value corresponding to end position of Bucket. - ** Exceptions: none - ** History: Thu Feb 14 13:24:33 1991, DSJ, Created. + ** Parameters: + ** Bucket bucket whose end is to be computed + ** Offset offset used to map params to buckets + ** NumBuckets total number of buckets + ** Globals: none + ** Operation: This routine returns the parameter value which + ** corresponds to the end of the specified bucket. + ** The bucket number should have been generated using the + ** BucketFor() function with parameters Offset and NumBuckets. + ** Return: Param value corresponding to end position of Bucket. + ** Exceptions: none + ** History: Thu Feb 14 13:24:33 1991, DSJ, Created. */ return (((FLOAT32) (Bucket + 1) / NumBuckets) - Offset); } /* BucketEnd */ @@ -1194,19 +1333,19 @@ void DoFill(FILL_SPEC *FillSpec, register uinT32 ClassCount, register uinT32 WordIndex) { /* - ** Parameters: - ** FillSpec specifies which bits to fill in pruner - ** Pruner class pruner to be filled - ** ClassMask indicates which bits to change in each word - ** ClassCount indicates what to change bits to - ** WordIndex indicates which word to change - ** Globals: none - ** Operation: This routine fills in the section of a class pruner - ** corresponding to a single x value for a single proto of - ** a class. - ** Return: none - ** Exceptions: none - ** History: Tue Feb 19 11:11:29 1991, DSJ, Created. + ** Parameters: + ** FillSpec specifies which bits to fill in pruner + ** Pruner class pruner to be filled + ** ClassMask indicates which bits to change in each word + ** ClassCount indicates what to change bits to + ** WordIndex indicates which word to change + ** Globals: none + ** Operation: This routine fills in the section of a class pruner + ** corresponding to a single x value for a single proto of + ** a class. + ** Return: none + ** Exceptions: none + ** History: Tue Feb 19 11:11:29 1991, DSJ, Created. */ register int X, Y, Angle; register uinT32 OldWord; @@ -1224,30 +1363,30 @@ void DoFill(FILL_SPEC *FillSpec, for (Y = FillSpec->YStart; Y <= FillSpec->YEnd; Y++) for (Angle = FillSpec->AngleStart; - TRUE; CircularIncrement (Angle, NUM_CP_BUCKETS)) { - OldWord = Pruner[X][Y][Angle][WordIndex]; - if (ClassCount > (OldWord & ClassMask)) { - OldWord &= ~ClassMask; - OldWord |= ClassCount; - Pruner[X][Y][Angle][WordIndex] = OldWord; + TRUE; CircularIncrement (Angle, NUM_CP_BUCKETS)) { + OldWord = Pruner[X][Y][Angle][WordIndex]; + if (ClassCount > (OldWord & ClassMask)) { + OldWord &= ~ClassMask; + OldWord |= ClassCount; + Pruner[X][Y][Angle][WordIndex] = OldWord; + } + if (Angle == FillSpec->AngleEnd) + break; } - if (Angle == FillSpec->AngleEnd) - break; - } } /* DoFill */ /*---------------------------------------------------------------------------*/ BOOL8 FillerDone(TABLE_FILLER *Filler) { /* - ** Parameters: - ** Filler table filler to check if done - ** Globals: none - ** Operation: Return TRUE if the specified table filler is done, i.e. - ** if it has no more lines to fill. - ** Return: TRUE if no more lines to fill, FALSE otherwise. - ** Exceptions: none - ** History: Tue Feb 19 10:08:05 1991, DSJ, Created. + ** Parameters: + ** Filler table filler to check if done + ** Globals: none + ** Operation: Return TRUE if the specified table filler is done, i.e. + ** if it has no more lines to fill. + ** Return: TRUE if no more lines to fill, FALSE otherwise. + ** Exceptions: none + ** History: Tue Feb 19 10:08:05 1991, DSJ, Created. */ FILL_SWITCH *Next; @@ -1264,23 +1403,23 @@ BOOL8 FillerDone(TABLE_FILLER *Filler) { /*---------------------------------------------------------------------------*/ void FillPPCircularBits (uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], -int Bit, FLOAT32 Center, FLOAT32 Spread) { + int Bit, FLOAT32 Center, FLOAT32 Spread) { /* - ** Parameters: - ** ParamTable table of bit vectors, one per param bucket - ** Bit bit position in vectors to be filled - ** Center center of filled area - ** Spread spread of filled area - ** Globals: none - ** Operation: This routine sets Bit in each bit vector whose - ** bucket lies within the range Center +- Spread. The fill - ** is done for a circular dimension, i.e. bucket 0 is adjacent - ** to the last bucket. It is assumed that Center and Spread - ** are expressed in a circular coordinate system whose range - ** is 0 to 1. - ** Return: none - ** Exceptions: none - ** History: Tue Oct 16 09:26:54 1990, DSJ, Created. + ** Parameters: + ** ParamTable table of bit vectors, one per param bucket + ** Bit bit position in vectors to be filled + ** Center center of filled area + ** Spread spread of filled area + ** Globals: none + ** Operation: This routine sets Bit in each bit vector whose + ** bucket lies within the range Center +- Spread. The fill + ** is done for a circular dimension, i.e. bucket 0 is adjacent + ** to the last bucket. It is assumed that Center and Spread + ** are expressed in a circular coordinate system whose range + ** is 0 to 1. + ** Return: none + ** Exceptions: none + ** History: Tue Oct 16 09:26:54 1990, DSJ, Created. */ int i, FirstBucket, LastBucket; @@ -1294,7 +1433,7 @@ int Bit, FLOAT32 Center, FLOAT32 Spread) { LastBucket = (int) floor ((Center + Spread) * NUM_PP_BUCKETS); if (LastBucket >= NUM_PP_BUCKETS) LastBucket -= NUM_PP_BUCKETS; - if (LearningDebugLevel >= 2) + if (classify_learning_debug_level >= 2) cprintf ("Circular fill from %d to %d", FirstBucket, LastBucket); for (i = FirstBucket; TRUE; CircularIncrement (i, NUM_PP_BUCKETS)) { SET_BIT (ParamTable[i], Bit); @@ -1310,24 +1449,24 @@ int Bit, FLOAT32 Center, FLOAT32 Spread) { /*---------------------------------------------------------------------------*/ void FillPPLinearBits (uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], -int Bit, FLOAT32 Center, FLOAT32 Spread) { + int Bit, FLOAT32 Center, FLOAT32 Spread) { /* - ** Parameters: - ** ParamTable table of bit vectors, one per param bucket - ** Bit bit number being filled - ** Center center of filled area - ** Spread spread of filled area - ** Globals: none - ** Operation: This routine sets Bit in each bit vector whose - ** bucket lies within the range Center +- Spread. The fill - ** is done for a linear dimension, i.e. there is no wrap-around - ** for this dimension. It is assumed that Center and Spread - ** are expressed in a linear coordinate system whose range - ** is approximately 0 to 1. Values outside this range will - ** be clipped. - ** Return: none - ** Exceptions: none - ** History: Tue Oct 16 09:26:54 1990, DSJ, Created. + ** Parameters: + ** ParamTable table of bit vectors, one per param bucket + ** Bit bit number being filled + ** Center center of filled area + ** Spread spread of filled area + ** Globals: none + ** Operation: This routine sets Bit in each bit vector whose + ** bucket lies within the range Center +- Spread. The fill + ** is done for a linear dimension, i.e. there is no wrap-around + ** for this dimension. It is assumed that Center and Spread + ** are expressed in a linear coordinate system whose range + ** is approximately 0 to 1. Values outside this range will + ** be clipped. + ** Return: none + ** Exceptions: none + ** History: Tue Oct 16 09:26:54 1990, DSJ, Created. */ int i, FirstBucket, LastBucket; @@ -1339,7 +1478,7 @@ int Bit, FLOAT32 Center, FLOAT32 Spread) { if (LastBucket >= NUM_PP_BUCKETS) LastBucket = NUM_PP_BUCKETS - 1; - if (LearningDebugLevel >= 2) + if (classify_learning_debug_level >= 2) cprintf ("Linear fill from %d to %d", FirstBucket, LastBucket); for (i = FirstBucket; i <= LastBucket; i++) SET_BIT (ParamTable[i], Bit); @@ -1349,16 +1488,17 @@ int Bit, FLOAT32 Center, FLOAT32 Spread) { /*---------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED -CLASS_ID GetClassToDebug(const char *Prompt) { +namespace tesseract { +CLASS_ID Classify::GetClassToDebug(const char *Prompt) { /* - ** Parameters: - ** Prompt prompt to print while waiting for input from window - ** Globals: none - ** Operation: This routine prompts the user with Prompt and waits - ** for the user to enter something in the debug window. - ** Return: Character entered in the debug window. - ** Exceptions: none - ** History: Thu Mar 21 16:55:13 1991, DSJ, Created. + ** Parameters: + ** Prompt prompt to print while waiting for input from window + ** Globals: none + ** Operation: This routine prompts the user with Prompt and waits + ** for the user to enter something in the debug window. + ** Return: Character entered in the debug window. + ** Exceptions: none + ** History: Thu Mar 21 16:55:13 1991, DSJ, Created. */ tprintf("%s\n", Prompt); SVEvent* ev; @@ -1380,6 +1520,8 @@ CLASS_ID GetClassToDebug(const char *Prompt) { } while (ev_type != SVET_CLICK); return 0; } /* GetClassToDebug */ + +} // namespace tesseract #endif /*---------------------------------------------------------------------------*/ @@ -1388,44 +1530,44 @@ void GetCPPadsForLevel(int Level, FLOAT32 *SidePad, FLOAT32 *AnglePad) { /* - ** Parameters: - ** Level "tightness" level to return pads for - ** EndPad place to put end pad for Level - ** SidePad place to put side pad for Level - ** AnglePad place to put angle pad for Level - ** Globals: none - ** Operation: This routine copies the appropriate global pad variables - ** into EndPad, SidePad, and AnglePad. This is a kludge used - ** to get around the fact that global control variables cannot - ** be arrays. If the specified level is illegal, the tightest - ** possible pads are returned. - ** Return: none (results are returned in EndPad, SidePad, and AnglePad. - ** Exceptions: none - ** History: Thu Feb 14 08:26:49 1991, DSJ, Created. + ** Parameters: + ** Level "tightness" level to return pads for + ** EndPad place to put end pad for Level + ** SidePad place to put side pad for Level + ** AnglePad place to put angle pad for Level + ** Globals: none + ** Operation: This routine copies the appropriate global pad variables + ** into EndPad, SidePad, and AnglePad. This is a kludge used + ** to get around the fact that global control variables cannot + ** be arrays. If the specified level is illegal, the tightest + ** possible pads are returned. + ** Return: none (results are returned in EndPad, SidePad, and AnglePad. + ** Exceptions: none + ** History: Thu Feb 14 08:26:49 1991, DSJ, Created. */ switch (Level) { case 0: - *EndPad = CPEndPadLoose * GetPicoFeatureLength (); - *SidePad = CPSidePadLoose * GetPicoFeatureLength (); - *AnglePad = CPAnglePadLoose / 360.0; + *EndPad = classify_cp_end_pad_loose * GetPicoFeatureLength (); + *SidePad = classify_cp_side_pad_loose * GetPicoFeatureLength (); + *AnglePad = classify_cp_angle_pad_loose / 360.0; break; case 1: - *EndPad = CPEndPadMedium * GetPicoFeatureLength (); - *SidePad = CPSidePadMedium * GetPicoFeatureLength (); - *AnglePad = CPAnglePadMedium / 360.0; + *EndPad = classify_cp_end_pad_medium * GetPicoFeatureLength (); + *SidePad = classify_cp_side_pad_medium * GetPicoFeatureLength (); + *AnglePad = classify_cp_angle_pad_medium / 360.0; break; case 2: - *EndPad = CPEndPadTight * GetPicoFeatureLength (); - *SidePad = CPSidePadTight * GetPicoFeatureLength (); - *AnglePad = CPAnglePadTight / 360.0; + *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength (); + *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength (); + *AnglePad = classify_cp_angle_pad_tight / 360.0; break; default: - *EndPad = CPEndPadTight * GetPicoFeatureLength (); - *SidePad = CPSidePadTight * GetPicoFeatureLength (); - *AnglePad = CPAnglePadTight / 360.0; + *EndPad = classify_cp_end_pad_tight * GetPicoFeatureLength (); + *SidePad = classify_cp_side_pad_tight * GetPicoFeatureLength (); + *AnglePad = classify_cp_angle_pad_tight / 360.0; break; } if (*AnglePad > 0.5) @@ -1437,13 +1579,13 @@ void GetCPPadsForLevel(int Level, /*---------------------------------------------------------------------------*/ C_COL GetMatchColorFor(FLOAT32 Evidence) { /* - ** Parameters: - ** Evidence evidence value to return color for - ** Globals: none - ** Operation: - ** Return: Color which corresponds to specified Evidence value. - ** Exceptions: none - ** History: Thu Mar 21 15:24:52 1991, DSJ, Created. + ** Parameters: + ** Evidence evidence value to return color for + ** Globals: none + ** Operation: + ** Return: Color which corresponds to specified Evidence value. + ** Exceptions: none + ** History: Thu Mar 21 15:24:52 1991, DSJ, Created. */ assert (Evidence >= 0.0); @@ -1463,17 +1605,17 @@ C_COL GetMatchColorFor(FLOAT32 Evidence) { /*---------------------------------------------------------------------------*/ void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) { /* - ** Parameters: - ** Filler filler to get next fill spec from - ** Fill place to put spec for next fill - ** Globals: none - ** Operation: This routine returns (in Fill) the specification of - ** the next line to be filled from Filler. FillerDone() should - ** always be called before GetNextFill() to ensure that we - ** do not run past the end of the fill table. - ** Return: none (results are returned in Fill) - ** Exceptions: none - ** History: Tue Feb 19 10:17:42 1991, DSJ, Created. + ** Parameters: + ** Filler filler to get next fill spec from + ** Fill place to put spec for next fill + ** Globals: none + ** Operation: This routine returns (in Fill) the specification of + ** the next line to be filled from Filler. FillerDone() should + ** always be called before GetNextFill() to ensure that we + ** do not run past the end of the fill table. + ** Return: none (results are returned in Fill) + ** Exceptions: none + ** History: Tue Feb 19 10:17:42 1991, DSJ, Created. */ FILL_SWITCH *Next; @@ -1514,22 +1656,20 @@ void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) { /*---------------------------------------------------------------------------*/ -void -InitTableFiller (FLOAT32 EndPad, -FLOAT32 SidePad, -FLOAT32 AnglePad, PROTO Proto, TABLE_FILLER * Filler) +void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad, + FLOAT32 AnglePad, PROTO Proto, TABLE_FILLER * Filler) /* - ** Parameters: - ** EndPad, SidePad, AnglePad padding to add to proto - ** Proto proto to create a filler for - ** Filler place to put table filler - ** Globals: none - ** Operation: This routine computes a data structure (Filler) - ** which can be used to fill in a rectangle surrounding - ** the specified Proto. - ** Return: none (results are returned in Filler) - ** Exceptions: none - ** History: Thu Feb 14 09:27:05 1991, DSJ, Created. + ** Parameters: + ** EndPad, SidePad, AnglePad padding to add to proto + ** Proto proto to create a filler for + ** Filler place to put table filler + ** Globals: none + ** Operation: This routine computes a data structure (Filler) + ** which can be used to fill in a rectangle surrounding + ** the specified Proto. + ** Return: none (results are returned in Filler) + ** Exceptions: none + ** History: Thu Feb 14 09:27:05 1991, DSJ, Created. */ #define XS X_SHIFT #define YS Y_SHIFT @@ -1549,39 +1689,37 @@ FLOAT32 AnglePad, PROTO Proto, TABLE_FILLER * Filler) Y = Proto->Y; HalfLength = Proto->Length / 2.0; - Filler->AngleStart = CircBucketFor (Angle - AnglePad, AS, NB); - Filler->AngleEnd = CircBucketFor (Angle + AnglePad, AS, NB); + Filler->AngleStart = CircBucketFor(Angle - AnglePad, AS, NB); + Filler->AngleEnd = CircBucketFor(Angle + AnglePad, AS, NB); Filler->NextSwitch = 0; if (fabs (Angle - 0.0) < HV_TOLERANCE || fabs (Angle - 0.5) < HV_TOLERANCE) { /* horizontal proto - handle as special case */ - Filler->X = BucketFor (X - HalfLength - EndPad, XS, NB); - Filler->YStart = BucketFor (Y - SidePad, YS, NB * 256); - Filler->YEnd = BucketFor (Y + SidePad, YS, NB * 256); + Filler->X = BucketFor(X - HalfLength - EndPad, XS, NB); + Filler->YStart = BucketFor(Y - SidePad, YS, NB * 256); + Filler->YEnd = BucketFor(Y + SidePad, YS, NB * 256); Filler->StartDelta = 0; Filler->EndDelta = 0; Filler->Switch[0].Type = LastSwitch; - Filler->Switch[0].X = BucketFor (X + HalfLength + EndPad, XS, NB); - } - else if (fabs (Angle - 0.25) < HV_TOLERANCE || - fabs (Angle - 0.75) < HV_TOLERANCE) { + Filler->Switch[0].X = BucketFor(X + HalfLength + EndPad, XS, NB); + } else if (fabs(Angle - 0.25) < HV_TOLERANCE || + fabs(Angle - 0.75) < HV_TOLERANCE) { /* vertical proto - handle as special case */ - Filler->X = BucketFor (X - SidePad, XS, NB); - Filler->YStart = BucketFor (Y - HalfLength - EndPad, YS, NB * 256); - Filler->YEnd = BucketFor (Y + HalfLength + EndPad, YS, NB * 256); + Filler->X = BucketFor(X - SidePad, XS, NB); + Filler->YStart = BucketFor(Y - HalfLength - EndPad, YS, NB * 256); + Filler->YEnd = BucketFor(Y + HalfLength + EndPad, YS, NB * 256); Filler->StartDelta = 0; Filler->EndDelta = 0; Filler->Switch[0].Type = LastSwitch; - Filler->Switch[0].X = BucketFor (X + SidePad, XS, NB); - } - else { + Filler->Switch[0].X = BucketFor(X + SidePad, XS, NB); + } else { /* diagonal proto */ if ((Angle > 0.0 && Angle < 0.25) || (Angle > 0.5 && Angle < 0.75)) { /* rising diagonal proto */ Angle *= 2.0 * PI; - Cos = fabs (cos (Angle)); - Sin = fabs (sin (Angle)); + Cos = fabs(cos(Angle)); + Sin = fabs(sin(Angle)); /* compute the positions of the corners of the acceptance region */ Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin; @@ -1599,42 +1737,41 @@ FLOAT32 AnglePad, PROTO Proto, TABLE_FILLER * Filler) } /* translate into bucket positions and deltas */ - Filler->X = (inT8) MapParam (Start.x, XS, NB); + Filler->X = (inT8) MapParam(Start.x, XS, NB); Filler->StartDelta = -(inT16) ((Cos / Sin) * 256); Filler->EndDelta = (inT16) ((Sin / Cos) * 256); - XAdjust = BucketEnd (Filler->X, XS, NB) - Start.x; + XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x; YAdjust = XAdjust * Cos / Sin; - Filler->YStart = (inT16) MapParam (Start.y - YAdjust, YS, NB * 256); + Filler->YStart = (inT16) MapParam(Start.y - YAdjust, YS, NB * 256); YAdjust = XAdjust * Sin / Cos; - Filler->YEnd = (inT16) MapParam (Start.y + YAdjust, YS, NB * 256); + Filler->YEnd = (inT16) MapParam(Start.y + YAdjust, YS, NB * 256); Filler->Switch[S1].Type = StartSwitch; - Filler->Switch[S1].X = (inT8) MapParam (Switch1.x, XS, NB); - Filler->Switch[S1].Y = (inT8) MapParam (Switch1.y, YS, NB); - XAdjust = Switch1.x - BucketStart (Filler->Switch[S1].X, XS, NB); + Filler->Switch[S1].X = (inT8) MapParam(Switch1.x, XS, NB); + Filler->Switch[S1].Y = (inT8) MapParam(Switch1.y, YS, NB); + XAdjust = Switch1.x - BucketStart(Filler->Switch[S1].X, XS, NB); YAdjust = XAdjust * Sin / Cos; Filler->Switch[S1].YInit = - (inT16) MapParam (Switch1.y - YAdjust, YS, NB * 256); + (inT16) MapParam(Switch1.y - YAdjust, YS, NB * 256); Filler->Switch[S1].Delta = Filler->EndDelta; Filler->Switch[S2].Type = EndSwitch; - Filler->Switch[S2].X = (inT8) MapParam (Switch2.x, XS, NB); - Filler->Switch[S2].Y = (inT8) MapParam (Switch2.y, YS, NB); - XAdjust = Switch2.x - BucketStart (Filler->Switch[S2].X, XS, NB); + Filler->Switch[S2].X = (inT8) MapParam(Switch2.x, XS, NB); + Filler->Switch[S2].Y = (inT8) MapParam(Switch2.y, YS, NB); + XAdjust = Switch2.x - BucketStart(Filler->Switch[S2].X, XS, NB); YAdjust = XAdjust * Cos / Sin; Filler->Switch[S2].YInit = - (inT16) MapParam (Switch2.y + YAdjust, YS, NB * 256); + (inT16) MapParam(Switch2.y + YAdjust, YS, NB * 256); Filler->Switch[S2].Delta = Filler->StartDelta; Filler->Switch[2].Type = LastSwitch; - Filler->Switch[2].X = (inT8) MapParam (End.x, XS, NB); - } - else { + Filler->Switch[2].X = (inT8)MapParam(End.x, XS, NB); + } else { /* falling diagonal proto */ Angle *= 2.0 * PI; - Cos = fabs (cos (Angle)); - Sin = fabs (sin (Angle)); + Cos = fabs(cos(Angle)); + Sin = fabs(sin(Angle)); /* compute the positions of the corners of the acceptance region */ Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin; @@ -1652,36 +1789,36 @@ FLOAT32 AnglePad, PROTO Proto, TABLE_FILLER * Filler) } /* translate into bucket positions and deltas */ - Filler->X = (inT8) MapParam (Start.x, XS, NB); + Filler->X = (inT8) MapParam(Start.x, XS, NB); Filler->StartDelta = -(inT16) ((Sin / Cos) * 256); Filler->EndDelta = (inT16) ((Cos / Sin) * 256); - XAdjust = BucketEnd (Filler->X, XS, NB) - Start.x; + XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x; YAdjust = XAdjust * Sin / Cos; - Filler->YStart = (inT16) MapParam (Start.y - YAdjust, YS, NB * 256); + Filler->YStart = (inT16) MapParam(Start.y - YAdjust, YS, NB * 256); YAdjust = XAdjust * Cos / Sin; - Filler->YEnd = (inT16) MapParam (Start.y + YAdjust, YS, NB * 256); + Filler->YEnd = (inT16) MapParam(Start.y + YAdjust, YS, NB * 256); Filler->Switch[S1].Type = EndSwitch; - Filler->Switch[S1].X = (inT8) MapParam (Switch1.x, XS, NB); - Filler->Switch[S1].Y = (inT8) MapParam (Switch1.y, YS, NB); - XAdjust = Switch1.x - BucketStart (Filler->Switch[S1].X, XS, NB); + Filler->Switch[S1].X = (inT8) MapParam(Switch1.x, XS, NB); + Filler->Switch[S1].Y = (inT8) MapParam(Switch1.y, YS, NB); + XAdjust = Switch1.x - BucketStart(Filler->Switch[S1].X, XS, NB); YAdjust = XAdjust * Sin / Cos; Filler->Switch[S1].YInit = - (inT16) MapParam (Switch1.y + YAdjust, YS, NB * 256); + (inT16) MapParam(Switch1.y + YAdjust, YS, NB * 256); Filler->Switch[S1].Delta = Filler->StartDelta; Filler->Switch[S2].Type = StartSwitch; - Filler->Switch[S2].X = (inT8) MapParam (Switch2.x, XS, NB); - Filler->Switch[S2].Y = (inT8) MapParam (Switch2.y, YS, NB); - XAdjust = Switch2.x - BucketStart (Filler->Switch[S2].X, XS, NB); + Filler->Switch[S2].X = (inT8) MapParam(Switch2.x, XS, NB); + Filler->Switch[S2].Y = (inT8) MapParam(Switch2.y, YS, NB); + XAdjust = Switch2.x - BucketStart(Filler->Switch[S2].X, XS, NB); YAdjust = XAdjust * Cos / Sin; Filler->Switch[S2].YInit = - (inT16) MapParam (Switch2.y - YAdjust, YS, NB * 256); + (inT16) MapParam(Switch2.y - YAdjust, YS, NB * 256); Filler->Switch[S2].Delta = Filler->EndDelta; Filler->Switch[2].Type = LastSwitch; - Filler->Switch[2].X = (inT8) MapParam (End.x, XS, NB); + Filler->Switch[2].X = (inT8) MapParam(End.x, XS, NB); } } } /* InitTableFiller */ @@ -1691,32 +1828,32 @@ FLOAT32 AnglePad, PROTO Proto, TABLE_FILLER * Filler) #ifndef GRAPHICS_DISABLED void RenderIntFeature(void *window, INT_FEATURE Feature, C_COL Color) { /* - ** Parameters: - ** ShapeList shape list to add feature rendering to - ** Feature feature to be rendered - ** Color color to use for feature rendering - ** Globals: none - ** Operation: This routine renders the specified feature into ShapeList. - ** Return: New shape list with rendering of Feature added. - ** Exceptions: none - ** History: Thu Mar 21 14:57:41 1991, DSJ, Created. + ** Parameters: + ** ShapeList shape list to add feature rendering to + ** Feature feature to be rendered + ** Color color to use for feature rendering + ** Globals: none + ** Operation: This routine renders the specified feature into ShapeList. + ** Return: New shape list with rendering of Feature added. + ** Exceptions: none + ** History: Thu Mar 21 14:57:41 1991, DSJ, Created. */ FLOAT32 X, Y, Dx, Dy, Length; c_line_color_index(window, Color); - assert (Feature != NULL); - assert (Color != 0); + assert(Feature != NULL); + assert(Color != 0); X = Feature->X - DISPLAY_OFFSET; Y = Feature->Y - DISPLAY_OFFSET; - Length = GetPicoFeatureLength () * 0.7 * INT_CHAR_NORM_RANGE; - Dx = (Length / 2.0) * cos ((Feature->Theta / 256.0) * 2.0 * PI); - Dy = (Length / 2.0) * sin ((Feature->Theta / 256.0) * 2.0 * PI); + Length = GetPicoFeatureLength() * 0.7 * INT_CHAR_NORM_RANGE; + Dx = (Length / 2.0) * cos((Feature->Theta / 256.0) * 2.0 * PI); + Dy = (Length / 2.0) * sin((Feature->Theta / 256.0) * 2.0 * PI); - c_move (window, X - Dx, Y - Dy); - c_draw (window, X + Dx, Y + Dy); - c_move (window, X - Dx - Dy * DOUBLE_OFFSET, Y - Dy + Dx * DOUBLE_OFFSET); - c_draw (window, X + Dx - Dy * DOUBLE_OFFSET, Y + Dy + Dx * DOUBLE_OFFSET); + c_move(window, X - Dx, Y - Dy); + c_draw(window, X + Dx, Y + Dy); + c_move(window, X - Dx - Dy * DOUBLE_OFFSET, Y - Dy + Dx * DOUBLE_OFFSET); + c_draw(window, X + Dx - Dy * DOUBLE_OFFSET, Y + Dy + Dx * DOUBLE_OFFSET); } /* RenderIntFeature */ @@ -1726,18 +1863,18 @@ void RenderIntProto(void *window, PROTO_ID ProtoId, C_COL Color) { /* - ** Parameters: - ** ShapeList shape list to append proto rendering onto - ** Class class that proto is contained in - ** ProtoId id of proto to be rendered - ** Color color to render proto in - ** Globals: none - ** Operation: This routine extracts the parameters of the specified - ** proto from the class description and adds a rendering of - ** the proto onto the ShapeList. - ** Return: New shape list with a rendering of one proto added. - ** Exceptions: none - ** History: Thu Mar 21 10:21:09 1991, DSJ, Created. + ** Parameters: + ** ShapeList shape list to append proto rendering onto + ** Class class that proto is contained in + ** ProtoId id of proto to be rendered + ** Color color to render proto in + ** Globals: none + ** Operation: This routine extracts the parameters of the specified + ** proto from the class description and adds a rendering of + ** the proto onto the ShapeList. + ** Return: New shape list with a rendering of one proto added. + ** Exceptions: none + ** History: Thu Mar 21 10:21:09 1991, DSJ, Created. */ PROTO_SET ProtoSet; INT_PROTO Proto; @@ -1749,19 +1886,19 @@ void RenderIntProto(void *window, uinT32 ProtoMask; int Bucket; - assert (ProtoId >= 0); - assert (Class != NULL); - assert (ProtoId < Class->NumProtos); - assert (Color != 0); + assert(ProtoId >= 0); + assert(Class != NULL); + assert(ProtoId < Class->NumProtos); + assert(Color != 0); c_line_color_index(window, Color); - ProtoSet = Class->ProtoSets[SetForProto (ProtoId)]; - ProtoSetIndex = IndexForProto (ProtoId); + ProtoSet = Class->ProtoSets[SetForProto(ProtoId)]; + ProtoSetIndex = IndexForProto(ProtoId); Proto = &(ProtoSet->Protos[ProtoSetIndex]); Length = (Class->ProtoLengths[ProtoId] * - GetPicoFeatureLength () * INT_CHAR_NORM_RANGE); - ProtoMask = PPrunerMaskFor (ProtoId); - ProtoWordIndex = PPrunerWordIndexFor (ProtoId); + GetPicoFeatureLength() * INT_CHAR_NORM_RANGE); + ProtoMask = PPrunerMaskFor(ProtoId); + ProtoWordIndex = PPrunerWordIndexFor(ProtoId); // find the x and y extent of the proto from the proto pruning table Xmin = Ymin = NUM_PP_BUCKETS; @@ -1783,41 +1920,87 @@ void RenderIntProto(void *window, } X = (Xmin + Xmax + 1) / 2.0 * PROTO_PRUNER_SCALE - DISPLAY_OFFSET; Y = (Ymin + Ymax + 1) / 2.0 * PROTO_PRUNER_SCALE - DISPLAY_OFFSET; - Dx = (Length / 2.0) * cos ((Proto->Angle / 256.0) * 2.0 * PI); - Dy = (Length / 2.0) * sin ((Proto->Angle / 256.0) * 2.0 * PI); + Dx = (Length / 2.0) * cos((Proto->Angle / 256.0) * 2.0 * PI); + Dy = (Length / 2.0) * sin((Proto->Angle / 256.0) * 2.0 * PI); - c_move (window, X - Dx, Y - Dy); - c_draw (window, X + Dx, Y + Dy); + c_move(window, X - Dx, Y - Dy); + c_draw(window, X + Dx, Y + Dy); } /* RenderIntProto */ #endif /*---------------------------------------------------------------------------*/ int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id) { /* - ** Parameters: - ** Param parameter value to be truncated - ** Min, Max parameter limits (inclusive) - ** Id string id of parameter for error messages - ** Globals: none - ** Operation: This routine truncates Param to lie within the range - ** of Min-Max inclusive. If a truncation is performed, and - ** Id is not null, an warning message is printed. - ** Return: Truncated parameter. - ** Exceptions: none - ** History: Fri Feb 8 11:54:28 1991, DSJ, Created. + ** Parameters: + ** Param parameter value to be truncated + ** Min, Max parameter limits (inclusive) + ** Id string id of parameter for error messages + ** Globals: none + ** Operation: This routine truncates Param to lie within the range + ** of Min-Max inclusive. If a truncation is performed, and + ** Id is not null, an warning message is printed. + ** Return: Truncated parameter. + ** Exceptions: none + ** History: Fri Feb 8 11:54:28 1991, DSJ, Created. */ if (Param < Min) { if (Id) - cprintf ("Warning: Param %s truncated from %f to %d!\n", - Id, Param, Min); + cprintf("Warning: Param %s truncated from %f to %d!\n", + Id, Param, Min); Param = Min; - } - else if (Param > Max) { + } else if (Param > Max) { if (Id) - cprintf ("Warning: Param %s truncated from %f to %d!\n", - Id, Param, Max); + cprintf("Warning: Param %s truncated from %f to %d!\n", + Id, Param, Max); Param = Max; } - return (int) floor (Param); - + return static_cast(floor(Param)); } /* TruncateParam */ + + +/*---------------------------------------------------------------------------*/ + +void InitIntMatchWindowIfReqd() { +/* + ** Operation: Initializes the int matcher window if it is not already + ** initialized. + */ + if (IntMatchWindow == NULL) { + IntMatchWindow = c_create_window("IntMatchWindow", 50, 200, + 520, 520, + -130.0, 130.0, -130.0, 130.0); + SVMenuNode* popup_menu = new SVMenuNode(); + + popup_menu->AddChild("Debug Adapted classes", IDA_ADAPTIVE, + "x", "Class to debug"); + popup_menu->AddChild("Debug Static classes", IDA_STATIC, + "x", "Class to debug"); + popup_menu->AddChild("Debug Both", IDA_BOTH, + "x", "Class to debug"); + popup_menu->BuildMenu(IntMatchWindow, false); + } +} + +void InitProtoDisplayWindowIfReqd() { +/* + ** Operation: Initializes the proto display window if it is not already + ** initialized. + */ + if (ProtoDisplayWindow == NULL) { + ProtoDisplayWindow = c_create_window("ProtoDisplayWindow", 50, 200, + 520, 520, + -130.0, 130.0, -130.0, 130.0); + } +} + +void InitFeatureDisplayWindowIfReqd() { +/* + ** Operation: Initializes the feature display window if it is not already + ** initialized. + */ + if (FeatureDisplayWindow == NULL) { + FeatureDisplayWindow = c_create_window("FeatureDisplayWindow", 50, 200, + 520, 520, + -130.0, 130.0, -130.0, 130.0); + } +} diff --git a/classify/intproto.h b/classify/intproto.h index cf2759725..04abd9727 100644 --- a/classify/intproto.h +++ b/classify/intproto.h @@ -38,7 +38,8 @@ #define MAX_PROTO_INDEX 24 #define BITS_PER_WERD (8 * sizeof (uinT32)) -#define MAX_NUM_CONFIGS 32 +/* Script detection: increase this number to 128 */ +#define MAX_NUM_CONFIGS 64 #define MAX_NUM_PROTOS 512 #define PROTOS_PER_PROTO_SET 64 #define MAX_NUM_PROTO_SETS (MAX_NUM_PROTOS / PROTOS_PER_PROTO_SET) @@ -62,6 +63,11 @@ #define WERDS_PER_CONFIG_VEC ((MAX_NUM_CONFIGS + BITS_PER_WERD - 1) / \ BITS_PER_WERD) +/* The first 3 dimensions of the CLASS_PRUNER_STRUCT are the + * 3 axes of the quantized feature space. + * The position of the the bits recorded for each class in the + * 4th dimension is determined by using CPrunerWordIndexFor(c), + * where c is the corresponding class id. */ typedef uinT32 CLASS_PRUNER_STRUCT [NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]; @@ -80,9 +86,11 @@ typedef struct INT_PROTO_STRUCT, *INT_PROTO; +typedef uinT32 PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR]; + typedef struct { - uinT32 ProtoPruner[NUM_PP_PARAMS][NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR]; + PROTO_PRUNER ProtoPruner; INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]; } @@ -91,6 +99,35 @@ PROTO_SET_STRUCT, *PROTO_SET; typedef uinT32 CONFIG_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][4]; +/* + * font_properties contains properties about boldness, italicness, fixed pitch, + * serif, fraktur + */ +struct FontInfo { + char* name; + uinT32 properties; + bool is_italic() { return properties & 1; } + bool is_bold() { return (properties & 2) != 0; } + bool is_fixed_pitch() { return (properties & 4) != 0; } + bool is_serif() { return (properties & 8) != 0; } + bool is_fraktur() { return (properties & 16) != 0; } +}; + +// Every class (character) owns a FontSet that represents all the fonts that can +// render this character. +// Since almost all the characters from the same script share the same set of +// fonts, the sets are shared over multiple classes (see +// Classify::fontset_table_). Thus, a class only store an id to a set. +// Because some fonts cannot render just one character of a set, there are a +// lot of FontSet that differ only by one font. Rather than storing directly +// the FontInfo in the FontSet structure, it's better to share FontInfos among +// FontSets (Classify::fontinfo_table_). +struct FontSet { + int size; + int* configs; // FontInfo ids +}; + + typedef struct { uinT16 NumProtos; @@ -99,6 +136,7 @@ typedef struct PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]; uinT8 *ProtoLengths; uinT16 ConfigLengths[MAX_NUM_CONFIGS]; + int font_set_id; // FontSet id, see above } @@ -108,8 +146,6 @@ typedef struct { int NumClasses; int NumClassPruners; - CLASS_TO_INDEX IndexFor; - INDEX_TO_CLASS ClassIdFor; INT_CLASS Class[MAX_NUM_CLASSES]; CLASS_PRUNER ClassPruner[MAX_NUM_CLASS_PRUNERS]; } @@ -121,20 +157,24 @@ INT_TEMPLATES_STRUCT, *INT_TEMPLATES; #define MAX_NUM_INT_FEATURES 512 #define INT_CHAR_NORM_RANGE 256 -typedef struct +struct INT_FEATURE_STRUCT { uinT8 X; uinT8 Y; uinT8 Theta; inT8 CP_misses; -} +}; - -INT_FEATURE_STRUCT; typedef INT_FEATURE_STRUCT *INT_FEATURE; typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]; +enum IntmatcherDebugAction { + IDA_ADAPTIVE, + IDA_STATIC, + IDA_BOTH +}; + /**---------------------------------------------------------------------------- Macros ----------------------------------------------------------------------------**/ @@ -150,13 +190,15 @@ typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]; #define PPrunerMaskFor(I) (1 << PPrunerBitIndexFor (I)) #define MaxNumClassesIn(T) (T->NumClassPruners * CLASSES_PER_CP) -#define LegalClassId(C) ((C) > 0 && (C) <= MAX_CLASS_ID) -#define UnusedClassIdIn(T,C) (T->IndexFor[C] == ILLEGAL_CLASS) -#define ClassForClassId(T,C) (T->Class[(T->IndexFor[C])]) -#define CPrunerIdFor(I) ((I) / CLASSES_PER_CP) -#define CPrunerWordIndexFor(I) (((I) % CLASSES_PER_CP) / CLASSES_PER_CP_WERD) -#define CPrunerBitIndexFor(I) (((I) % CLASSES_PER_CP) % CLASSES_PER_CP_WERD) -#define CPrunerMaskFor(L,I) (((L)+1) << CPrunerBitIndexFor (I) * NUM_BITS_PER_CLASS) +#define LegalClassId(c) ((c) >= 0 && (c) <= MAX_CLASS_ID) +#define UnusedClassIdIn(T,c) ((T)->Class[c] == NULL) +#define ClassForClassId(T,c) ((T)->Class[c]) +#define ClassPrunersFor(T) ((T)->ClassPruner) +#define CPrunerIdFor(c) ((c) / CLASSES_PER_CP) +#define CPrunerFor(T,c) ((T)->ClassPruner [CPrunerIdFor (c)]) +#define CPrunerWordIndexFor(c) (((c) % CLASSES_PER_CP) / CLASSES_PER_CP_WERD) +#define CPrunerBitIndexFor(c) (((c) % CLASSES_PER_CP) % CLASSES_PER_CP_WERD) +#define CPrunerMaskFor(L,c) (((L)+1) << CPrunerBitIndexFor (c) * NUM_BITS_PER_CLASS) /* DEBUG macros*/ #define PRINT_MATCH_SUMMARY 0x001 @@ -177,7 +219,7 @@ typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]; /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -int AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class); +void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class); int AddIntConfig(INT_CLASS Class); @@ -199,122 +241,24 @@ void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class); void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class); -INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos, - const UNICHARSET& target_unicharset); - void DisplayIntFeature(INT_FEATURE Feature, FLOAT32 Evidence); void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence); -void InitIntProtoVars(); - INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs); -void free_int_class(INT_CLASS int_class); - INT_TEMPLATES NewIntTemplates(); void free_int_templates(INT_TEMPLATES templates); -INT_TEMPLATES ReadIntTemplates(FILE *File, BOOL8 swap); - void ShowMatchDisplay(); -CLASS_ID GetClassToDebug(const char *Prompt); +/*----------------------------------------------------------------------------*/ -void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, - const UNICHARSET& target_unicharset); +void InitIntMatchWindowIfReqd(); -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ +void InitProtoDisplayWindowIfReqd(); -/* intproto.c -int AddIntClass - _ARGS((INT_TEMPLATES Templates, - CLASS_ID ClassId, - INT_CLASS Class)); +void InitFeatureDisplayWindowIfReqd(); -int AddIntConfig - _ARGS((INT_CLASS Class)); - -int AddIntProto - _ARGS((INT_CLASS Class)); - -void AddProtoToClassPruner - _ARGS((PROTO Proto, - CLASS_ID ClassId, - INT_TEMPLATES Templates)); - -void AddProtoToProtoPruner - _ARGS((PROTO Proto, - int ProtoId, - INT_CLASS Class)); - -int BucketFor - _ARGS((FLOAT32 Param, - FLOAT32 Offset, - int NumBuckets)); - -int CircBucketFor - _ARGS((FLOAT32 Param, - FLOAT32 Offset, - int NumBuckets)); - -void UpdateMatchDisplay - _ARGS((void)); - -void ConvertConfig - _ARGS((BIT_VECTOR Config, - int ConfigId, - INT_CLASS Class)); - -void ConvertProto - _ARGS((PROTO Proto, - int ProtoId, - INT_CLASS Class)); - -INT_TEMPLATES CreateIntTemplates - _ARGS((CLASSES FloatProtos)); - -void DisplayIntFeature - _ARGS((INT_FEATURE Feature, - FLOAT32 Evidence)); - -void DisplayIntProto - _ARGS((INT_CLASS Class, - PROTO_ID ProtoId, - FLOAT32 Evidence)); - -void InitIntProtoVars - _ARGS((void)); - -INT_CLASS NewIntClass - _ARGS((int MaxNumProtos, - int MaxNumConfigs)); - -INT_TEMPLATES NewIntTemplates - _ARGS((void)); - -INT_TEMPLATES ReadIntTemplates - _ARGS((FILE *File)); - -void ShowMatchDisplay - _ARGS((void)); - -void WriteIntTemplates - _ARGS((FILE *File, - INT_TEMPLATES Templates)); - -CLASS_ID GetClassToDebug - _ARGS((char *Prompt)); - -C_COL GetMatchColorFor - _ARGS((FLOAT32 Evidence)); - -#undef _ARGS -*/ #endif diff --git a/classify/mf.cpp b/classify/mf.cpp index 943762717..7a0483d79 100644 --- a/classify/mf.cpp +++ b/classify/mf.cpp @@ -19,7 +19,6 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "mfdefs.h" -#include "variables.h" #include "mf.h" #include "fxdefs.h" #include "mfx.h" @@ -52,6 +51,8 @@ FEATURE_SET ExtractMicros(TBLOB *Blob, LINE_STATS *LineStats) { MICROFEATURE OldFeature; OldFeatures = (MICROFEATURES) BlobMicroFeatures (Blob, LineStats); + if (OldFeatures == NULL) + return NULL; NumFeatures = count (OldFeatures); FeatureSet = NewFeatureSet (NumFeatures); @@ -70,37 +71,16 @@ FEATURE_SET ExtractMicros(TBLOB *Blob, LINE_STATS *LineStats) { // ParamOf (Feature, MFBulge2) = SecondBulgeOf (OldFeature); Feature->Params[MFBulge1] = 0.0f; Feature->Params[MFBulge2] = 0.0f; - +#ifndef __MSW32__ + // Assert that feature parameters are well defined. + int i; + for (i = 0; i < Feature->Type->NumParams; i++) { + assert(!isnan(Feature->Params[i])); + } +#endif AddFeature(FeatureSet, Feature); } FreeMicroFeatures(OldFeatures); return (FeatureSet); } /* ExtractMicros */ - - -/*---------------------------------------------------------------------------*/ -void InitMicroFXVars() { -/* - ** Parameters: none - ** Globals: - ** ExtraPenaltyMagnitude controls for adjusting extra penalty - ** ExtraPenaltyWeight - ** ExtraPenaltyOrder - ** Operation: Initialize the microfeature extractor variables that can - ** be tuned without recompiling. - ** Return: none - ** Exceptions: none - ** History: Thu May 24 10:50:46 1990, DSJ, Created. - */ - /* - float_variable (ExtraPenaltyMagnitude, "MFExtraPenaltyMag", - EXTRA_PENALTY_MAGNITUDE); - float_variable (ExtraPenaltyWeight, "MFExtraPenaltyWeight", - EXTRA_PENALTY_WEIGHT); - float_variable (ExtraPenaltyOrder, "MFExtraPenaltyOrder", - EXTRA_PENALTY_ORDER); - */ - InitMicroFxVars(); - -} /* InitMicroFXVars */ diff --git a/classify/mf.h b/classify/mf.h index 79f1c8ef5..a41b159e6 100644 --- a/classify/mf.h +++ b/classify/mf.h @@ -32,9 +32,7 @@ typedef enum { /*---------------------------------------------------------------------------- Private Function Prototypes -----------------------------------------------------------------------------*/ -FEATURE_SET ExtractMicros(TBLOB *Blob, LINE_STATS *LineStats); - -void InitMicroFXVars(); +FEATURE_SET ExtractMicros(TBLOB *Blob, LINE_STATS *LineStats); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations diff --git a/classify/mfoutline.cpp b/classify/mfoutline.cpp index 86539e197..9691888a0 100644 --- a/classify/mfoutline.cpp +++ b/classify/mfoutline.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: mfoutline.c - ** Purpose: Interface to outline struct used for extracting features - ** Author: Dan Johnson - ** History: Thu May 17 08:14:18 1990, DSJ, Created. + ** Filename: mfoutline.c + ** Purpose: Interface to outline struct used for extracting features + ** Author: Dan Johnson + ** History: Thu May 17 08:14:18 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -21,61 +21,17 @@ #include "clusttool.h" //If remove you get cought in a loop somewhere #include "emalloc.h" #include "mfoutline.h" -#include "debug.h" #include "hideedge.h" #include "blobs.h" #include "const.h" #include "mfx.h" +#include "varable.h" #include #include #define MIN_INERTIA (0.00001) -/**---------------------------------------------------------------------------- - Private Function Prototypes -----------------------------------------------------------------------------**/ -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* /users/danj/wiseowl/src/danj/microfeatures/mfoutline.c -void ChangeDirection - _ARGS((MFOUTLINE Start, - MFOUTLINE End, - DIRECTION Direction)); - -void CharNormalizeOutline - _ARGS((MFOUTLINE Outline, - OUTLINE_STATS *OutlineStats)); - -void ComputeDirection - _ARGS((MFEDGEPT *Start, - MFEDGEPT *Finish, - FLOAT32 MinSlope, - FLOAT32 MaxSlope)); - -void FinishOutlineStats - _ARGS((OUTLINE_STATS *OutlineStats)); - -void InitOutlineStats - _ARGS((OUTLINE_STATS *OutlineStats)); - -MFOUTLINE NextDirectionChange - _ARGS((MFOUTLINE EdgePoint)); - -void UpdateOutlineStats - _ARGS((OUTLINE_STATS *OutlineStats, - FLOAT32 x1, - FLOAT32 y1, - FLOAT32 x2, - FLOAT32 y2)); - -#undef _ARGS -*/ /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ @@ -83,23 +39,21 @@ void UpdateOutlineStats expanded blobs */ static TPOINT BlobCenter; +/**---------------------------------------------------------------------------- + Variables +----------------------------------------------------------------------------**/ + /* control knobs used to control normalization of outlines */ -make_int_var (NormMethod, character, MakeNormMethod, -15, 10, SetNormMethod, "Normalization Method ...") +INT_VAR(classify_norm_method, character, "Normalization Method ..."); /* PREV DEFAULT "baseline" */ -make_float_var (CharNormRange, 0.2, MakeCharNormRange, -15, 11, SetCharNormRange, "Character Normalization Range ...") -make_float_var (MinNormScaleX, 0.0, MakeMinNormScaleX, -15, 12, SetMinNormScaleX, "Min char x-norm scale ...") +double_VAR(classify_char_norm_range, 0.2, "Character Normalization Range ..."); +double_VAR(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ..."); /* PREV DEFAULT 0.1 */ -make_float_var (MaxNormScaleX, 0.325, MakeMaxNormScaleX, -15, 13, SetMaxNormScaleX, "Max char x-norm scale ...") +double_VAR(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ..."); /* PREV DEFAULT 0.3 */ -make_float_var (MinNormScaleY, 0.0, MakeMinNormScaleY, -15, 14, SetMinNormScaleY, "Min char y-norm scale ...") +double_VAR(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ..."); /* PREV DEFAULT 0.1 */ -make_float_var (MaxNormScaleY, 0.325, MakeMaxNormScaleY, -15, 15, SetMaxNormScaleY, "Max char y-norm scale ...") +double_VAR(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ..."); /* PREV DEFAULT 0.3 */ /**---------------------------------------------------------------------------- Public Code @@ -107,19 +61,19 @@ make_float_var (MaxNormScaleY, 0.325, MakeMaxNormScaleY, /*---------------------------------------------------------------------------*/ void ComputeBlobCenter(TBLOB *Blob, TPOINT *BlobCenter) { /* - ** Parameters: - ** Blob blob to compute centerpoint of - ** BlobCenter data struct to place results in - ** Globals: none - ** Operation: - ** This routine computes the center point of the specified - ** blob using the bounding box of all top level outlines in the - ** blob. The center point is computed in a coordinate system - ** which is scaled up by VECSCALE from the page coordinate - ** system. - ** Return: none - ** Exceptions: none - ** History: Fri Sep 8 10:45:39 1989, DSJ, Created. + ** Parameters: + ** Blob blob to compute centerpoint of + ** BlobCenter data struct to place results in + ** Globals: none + ** Operation: + ** This routine computes the center point of the specified + ** blob using the bounding box of all top level outlines in the + ** blob. The center point is computed in a coordinate system + ** which is scaled up by VECSCALE from the page coordinate + ** system. + ** Return: none + ** Exceptions: none + ** History: Fri Sep 8 10:45:39 1989, DSJ, Created. */ TPOINT TopLeft; TPOINT BottomRight; @@ -135,13 +89,13 @@ void ComputeBlobCenter(TBLOB *Blob, TPOINT *BlobCenter) { /*---------------------------------------------------------------------------*/ LIST ConvertBlob(TBLOB *Blob) { /* - ** Parameters: - ** Blob blob to be converted - ** Globals: none - ** Operation: Convert Blob into a list of outlines. - ** Return: List of outlines representing blob. - ** Exceptions: none - ** History: Thu Dec 13 15:40:17 1990, DSJ, Created. + ** Parameters: + ** Blob blob to be converted + ** Globals: none + ** Operation: Convert Blob into a list of outlines. + ** Return: List of outlines representing blob. + ** Exceptions: none + ** History: Thu Dec 13 15:40:17 1990, DSJ, Created. */ LIST ConvertedOutlines = NIL; @@ -158,30 +112,30 @@ LIST ConvertBlob(TBLOB *Blob) { /*---------------------------------------------------------------------------*/ MFOUTLINE ConvertOutline(TESSLINE *Outline) { /* - ** Parameters: - ** Outline outline to be converted - ** Globals: - ** BlobCenter pre-computed center of current blob - ** Operation: - ** This routine converts the specified outline into a special - ** data structure which is used for extracting micro-features. - ** If the outline has been pre-normalized by the splitter, - ** then it is assumed to be in expanded form and all we must - ** do is copy the points. Otherwise, - ** if the outline is expanded, then the expanded form is used - ** and the coordinates of the points are returned to page - ** coordinates using the global variable BlobCenter and the - ** scaling factor REALSCALE. If the outline is not expanded, - ** then the compressed form is used. - ** Return: Outline converted into special micro-features format. - ** Exceptions: none - ** History: 8/2/89, DSJ, Created. - ** 9/8/89, DSJ, Added ability to convert expanded blobs. - ** 1/11/90, DSJ, Changed to use REALSCALE instead of VECSCALE - ** to eliminate round-off problems. - ** 2/21/91, DSJ, Added ability to work with pre-normalized - ** blobs. - ** 4/30/91, DSJ, Added concept of "hidden" segments. + ** Parameters: + ** Outline outline to be converted + ** Globals: + ** BlobCenter pre-computed center of current blob + ** Operation: + ** This routine converts the specified outline into a special + ** data structure which is used for extracting micro-features. + ** If the outline has been pre-normalized by the splitter, + ** then it is assumed to be in expanded form and all we must + ** do is copy the points. Otherwise, + ** if the outline is expanded, then the expanded form is used + ** and the coordinates of the points are returned to page + ** coordinates using the global variable BlobCenter and the + ** scaling factor REALSCALE. If the outline is not expanded, + ** then the compressed form is used. + ** Return: Outline converted into special micro-features format. + ** Exceptions: none + ** History: 8/2/89, DSJ, Created. + ** 9/8/89, DSJ, Added ability to convert expanded blobs. + ** 1/11/90, DSJ, Changed to use REALSCALE instead of VECSCALE + ** to eliminate round-off problems. + ** 2/21/91, DSJ, Added ability to work with pre-normalized + ** blobs. + ** 4/30/91, DSJ, Added concept of "hidden" segments. */ register BYTEVEC *Vector; TPOINT Position; @@ -197,7 +151,7 @@ MFOUTLINE ConvertOutline(TESSLINE *Outline) { return (MFOutline); /* have outlines been prenormalized */ - if (is_baseline_normalized ()) { + if (classify_baseline_normalized) { StartPoint = Outline->loop; EdgePoint = StartPoint; do { @@ -228,7 +182,8 @@ MFOUTLINE ConvertOutline(TESSLINE *Outline) { ClearMark(NewPoint); /* all edges are visible */ NewPoint->Hidden = FALSE; - CopyPoint (Position, NewPoint->Point); + NewPoint->Point.x = Position.x; + NewPoint->Point.y = Position.y; MFOutline = push (MFOutline, NewPoint); } Position.x += Vector->dx; @@ -272,21 +227,21 @@ LIST ConvertOutlines(TESSLINE *Outline, LIST ConvertedOutlines, OUTLINETYPE OutlineType) { /* - ** Parameters: - ** Outline first outline to be converted - ** ConvertedOutlines list to add converted outlines to - ** OutlineType are the outlines outer or holes? - ** Globals: none - ** Operation: + ** Parameters: + ** Outline first outline to be converted + ** ConvertedOutlines list to add converted outlines to + ** OutlineType are the outlines outer or holes? + ** Globals: none + ** Operation: ** This routine converts all given outlines into a new format. ** of outlines. Outline points to a list of the top level - ** outlines to be converted. The children of these outlines - ** are also recursively converted. All converted outlines - ** are added to ConvertedOutlines. This is a list of outlines, - ** one for each outline that was converted. - ** Return: Updated list of converted outlines. - ** Exceptions: none - ** History: Thu Dec 13 15:57:38 1990, DSJ, Created. + ** outlines to be converted. The children of these outlines + ** are also recursively converted. All converted outlines + ** are added to ConvertedOutlines. This is a list of outlines, + ** one for each outline that was converted. + ** Return: Updated list of converted outlines. + ** Exceptions: none + ** History: Thu Dec 13 15:57:38 1990, DSJ, Created. */ MFOUTLINE MFOutline; @@ -311,22 +266,22 @@ LIST ConvertOutlines(TESSLINE *Outline, /*---------------------------------------------------------------------------*/ void ComputeOutlineStats(LIST Outlines, OUTLINE_STATS *OutlineStats) { /* - ** Parameters: - ** Outlines list of outlines to compute stats for - ** OutlineStats place to put results - ** Globals: none - ** Operation: This routine computes several statistics about the outlines - ** in Outlines. These statistics are usually used to perform - ** anistropic normalization of all of the outlines. The - ** statistics generated are: - ** first moments about x and y axes - ** total length of all outlines - ** center of mass of all outlines - ** second moments about center of mass axes - ** radius of gyration about center of mass axes - ** Return: none (results are returned in OutlineStats) - ** Exceptions: none - ** History: Fri Dec 14 08:32:03 1990, DSJ, Created. + ** Parameters: + ** Outlines list of outlines to compute stats for + ** OutlineStats place to put results + ** Globals: none + ** Operation: This routine computes several statistics about the outlines + ** in Outlines. These statistics are usually used to perform + ** anistropic normalization of all of the outlines. The + ** statistics generated are: + ** first moments about x and y axes + ** total length of all outlines + ** center of mass of all outlines + ** second moments about center of mass axes + ** radius of gyration about center of mass axes + ** Return: none (results are returned in OutlineStats) + ** Exceptions: none + ** History: Fri Dec 14 08:32:03 1990, DSJ, Created. */ MFOUTLINE Outline; MFOUTLINE EdgePoint; @@ -360,17 +315,17 @@ void ComputeOutlineStats(LIST Outlines, OUTLINE_STATS *OutlineStats) { /*---------------------------------------------------------------------------*/ void FilterEdgeNoise(MFOUTLINE Outline, FLOAT32 NoiseSegmentLength) { /* - ** Parameters: - ** Outline outline to be filtered - ** NoiseSegmentLength maximum length of a "noise" segment - ** Globals: none - ** Operation: Filter out noise from the specified outline. This is - ** done by changing the direction of short segments of the - ** outline to the same direction as the preceding outline - ** segment. - ** Return: none - ** Exceptions: none - ** History: Fri May 4 10:23:45 1990, DSJ, Created. + ** Parameters: + ** Outline outline to be filtered + ** NoiseSegmentLength maximum length of a "noise" segment + ** Globals: none + ** Operation: Filter out noise from the specified outline. This is + ** done by changing the direction of short segments of the + ** outline to the same direction as the preceding outline + ** segment. + ** Return: none + ** Exceptions: none + ** History: Fri May 4 10:23:45 1990, DSJ, Created. */ MFOUTLINE Current; MFOUTLINE Last; @@ -427,21 +382,21 @@ void FindDirectionChanges(MFOUTLINE Outline, FLOAT32 MinSlope, FLOAT32 MaxSlope) { /* - ** Parameters: - ** Outline micro-feature outline to analyze - ** MinSlope controls "snapping" of segments to horizontal - ** MaxSlope controls "snapping" of segments to vertical - ** Globals: none - ** Operation: - ** This routine searches thru the specified outline, computes - ** a slope for each vector in the outline, and marks each - ** vector as having one of the following directions: - ** N, S, E, W, NE, NW, SE, SW - ** This information is then stored in the outline and the - ** outline is returned. - ** Return: none - ** Exceptions: none - ** History: 7/21/89, DSJ, Created. + ** Parameters: + ** Outline micro-feature outline to analyze + ** MinSlope controls "snapping" of segments to horizontal + ** MaxSlope controls "snapping" of segments to vertical + ** Globals: none + ** Operation: + ** This routine searches thru the specified outline, computes + ** a slope for each vector in the outline, and marks each + ** vector as having one of the following directions: + ** N, S, E, W, NE, NW, SE, SW + ** This information is then stored in the outline and the + ** outline is returned. + ** Return: none + ** Exceptions: none + ** History: 7/21/89, DSJ, Created. */ MFEDGEPT *Current; MFEDGEPT *Last; @@ -468,15 +423,15 @@ void FindDirectionChanges(MFOUTLINE Outline, /*---------------------------------------------------------------------------*/ void FreeMFOutline(void *arg) { //MFOUTLINE Outline) /* - ** Parameters: - ** Outline micro-feature outline to be freed - ** Globals: none - ** Operation: - ** This routine deallocates all of the memory consumed by - ** a micro-feature outline. - ** Return: none - ** Exceptions: none - ** History: 7/27/89, DSJ, Created. + ** Parameters: + ** Outline micro-feature outline to be freed + ** Globals: none + ** Operation: + ** This routine deallocates all of the memory consumed by + ** a micro-feature outline. + ** Return: none + ** Exceptions: none + ** History: 7/27/89, DSJ, Created. */ MFOUTLINE Start; MFOUTLINE Outline = (MFOUTLINE) arg; @@ -495,56 +450,36 @@ void FreeMFOutline(void *arg) { //MFOUTLINE Outline /*---------------------------------------------------------------------------*/ void FreeOutlines(LIST Outlines) { /* - ** Parameters: - ** Outlines list of mf-outlines to be freed - ** Globals: none - ** Operation: Release all memory consumed by the specified list - ** of outlines. - ** Return: none - ** Exceptions: none - ** History: Thu Dec 13 16:14:50 1990, DSJ, Created. + ** Parameters: + ** Outlines list of mf-outlines to be freed + ** Globals: none + ** Operation: Release all memory consumed by the specified list + ** of outlines. + ** Return: none + ** Exceptions: none + ** History: Thu Dec 13 16:14:50 1990, DSJ, Created. */ destroy_nodes(Outlines, FreeMFOutline); } /* FreeOutlines */ -/*---------------------------------------------------------------------------*/ -void InitMFOutlineVars() { -/* - ** Parameters: none - ** Globals: none - ** Operation: This routine initializes the global control knobs for - ** all routines in this file. - ** Return: none - ** Exceptions: none - ** History: Fri Dec 14 10:50:12 1990, DSJ, Created. - */ - MakeNormMethod(); - MakeCharNormRange(); - MakeMinNormScaleX(); - MakeMaxNormScaleX(); - MakeMinNormScaleY(); - MakeMaxNormScaleY(); -} /* InitMFOutlineVars */ - - /*---------------------------------------------------------------------------*/ void MarkDirectionChanges(MFOUTLINE Outline) { /* - ** Parameters: - ** Outline micro-feature outline to analyze - ** Globals: none - ** Operation: - ** This routine searches thru the specified outline and finds - ** the points at which the outline changes direction. These - ** points are then marked as "extremities". This routine is - ** used as an alternative to FindExtremities(). It forces the - ** endpoints of the microfeatures to be at the direction - ** changes rather than at the midpoint between direction - ** changes. - ** Return: none - ** Exceptions: none - ** History: 6/29/90, DSJ, Created. + ** Parameters: + ** Outline micro-feature outline to analyze + ** Globals: none + ** Operation: + ** This routine searches thru the specified outline and finds + ** the points at which the outline changes direction. These + ** points are then marked as "extremities". This routine is + ** used as an alternative to FindExtremities(). It forces the + ** endpoints of the microfeatures to be at the direction + ** changes rather than at the midpoint between direction + ** changes. + ** Return: none + ** Exceptions: none + ** History: 6/29/90, DSJ, Created. */ MFOUTLINE Current; MFOUTLINE Last; @@ -568,14 +503,14 @@ void MarkDirectionChanges(MFOUTLINE Outline) { /*---------------------------------------------------------------------------*/ MFEDGEPT *NewEdgePoint() { /* - ** Parameters: none - ** Globals: none - ** Operation: - ** This routine allocates and returns a new edge point for - ** a micro-feature outline. - ** Return: New edge point. - ** Exceptions: none - ** History: 7/21/89, DSJ, Created. + ** Parameters: none + ** Globals: none + ** Operation: + ** This routine allocates and returns a new edge point for + ** a micro-feature outline. + ** Return: New edge point. + ** Exceptions: none + ** History: 7/21/89, DSJ, Created. */ return ((MFEDGEPT *) alloc_struct (sizeof (MFEDGEPT), "MFEDGEPT")); @@ -585,18 +520,18 @@ MFEDGEPT *NewEdgePoint() { /*---------------------------------------------------------------------------*/ MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) { /* - ** Parameters: - ** EdgePoint start search from this point - ** Globals: none - ** Operation: - ** This routine returns the next point in the micro-feature - ** outline that is an extremity. The search starts after - ** EdgePoint. The routine assumes that the outline being - ** searched is not a degenerate outline (i.e. it must have - ** 2 or more edge points). - ** Return: Next extremity in the outline after EdgePoint. - ** Exceptions: none - ** History: 7/26/89, DSJ, Created. + ** Parameters: + ** EdgePoint start search from this point + ** Globals: none + ** Operation: + ** This routine returns the next point in the micro-feature + ** outline that is an extremity. The search starts after + ** EdgePoint. The routine assumes that the outline being + ** searched is not a degenerate outline (i.e. it must have + ** 2 or more edge points). + ** Return: Next extremity in the outline after EdgePoint. + ** Exceptions: none + ** History: 7/26/89, DSJ, Created. */ EdgePoint = NextPointAfter (EdgePoint); while (!PointAt (EdgePoint)->ExtremityMark) @@ -612,24 +547,24 @@ void NormalizeOutline(MFOUTLINE Outline, LINE_STATS *LineStats, FLOAT32 XOrigin) { /* - ** Parameters: - ** Outline outline to be normalized - ** LineStats statistics for text line normalization - ** XOrigin x-origin of text - ** Globals: none - ** Operation: - ** This routine normalizes the coordinates of the specified - ** outline so that the outline is deskewed down to the - ** baseline, translated so that x=0 is at XOrigin, and scaled - ** so that the height of a character cell from descender to - ** ascender is 1. Of this height, 0.25 is for the descender, - ** 0.25 for the ascender, and 0.5 for the x-height. The - ** y coordinate of the baseline is 0. - ** Return: none - ** Exceptions: none - ** History: 8/2/89, DSJ, Created. - ** 10/23/89, DSJ, Added ascender/descender stretching. - ** 11/89, DSJ, Removed ascender/descender stretching. + ** Parameters: + ** Outline outline to be normalized + ** LineStats statistics for text line normalization + ** XOrigin x-origin of text + ** Globals: none + ** Operation: + ** This routine normalizes the coordinates of the specified + ** outline so that the outline is deskewed down to the + ** baseline, translated so that x=0 is at XOrigin, and scaled + ** so that the height of a character cell from descender to + ** ascender is 1. Of this height, 0.25 is for the descender, + ** 0.25 for the ascender, and 0.5 for the x-height. The + ** y coordinate of the baseline is 0. + ** Return: none + ** Exceptions: none + ** History: 8/2/89, DSJ, Created. + ** 10/23/89, DSJ, Added ascender/descender stretching. + ** 11/89, DSJ, Removed ascender/descender stretching. */ MFEDGEPT *Current; MFOUTLINE EdgePoint; @@ -674,29 +609,29 @@ void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale) { /* - ** Parameters: - ** Outlines list of outlines to be normalized - ** LineStats statistics for text line normalization - ** XScale x-direction scale factor used by routine - ** YScale y-direction scale factor used by routine - ** Globals: - ** NormMethod method being used for normalization - ** CharNormRange map radius of gyration to this value - ** Operation: This routine normalizes every outline in Outlines - ** according to the currently selected normalization method. - ** It also returns the scale factors that it used to do this - ** scaling. The scale factors returned represent the x and - ** y sizes in the normalized coordinate system that correspond - ** to 1 pixel in the original coordinate system. - ** Return: none (Outlines are changed and XScale and YScale are updated) - ** Exceptions: none - ** History: Fri Dec 14 08:14:55 1990, DSJ, Created. + ** Parameters: + ** Outlines list of outlines to be normalized + ** LineStats statistics for text line normalization + ** XScale x-direction scale factor used by routine + ** YScale y-direction scale factor used by routine + ** Globals: + ** classify_norm_method method being used for normalization + ** classify_char_norm_range map radius of gyration to this value + ** Operation: This routine normalizes every outline in Outlines + ** according to the currently selected normalization method. + ** It also returns the scale factors that it used to do this + ** scaling. The scale factors returned represent the x and + ** y sizes in the normalized coordinate system that correspond + ** to 1 pixel in the original coordinate system. + ** Return: none (Outlines are changed and XScale and YScale are updated) + ** Exceptions: none + ** History: Fri Dec 14 08:14:55 1990, DSJ, Created. */ MFOUTLINE Outline; OUTLINE_STATS OutlineStats; FLOAT32 BaselineScale; - switch (NormMethod) { + switch (classify_norm_method) { case character: ComputeOutlineStats(Outlines, &OutlineStats); @@ -705,14 +640,15 @@ void NormalizeOutlines(LIST Outlines, *XScale = *YScale = BaselineScale = ComputeScaleFactor (LineStats); *XScale *= OutlineStats.Ry; *YScale *= OutlineStats.Rx; - if (*XScale < MinNormScaleX) - *XScale = MinNormScaleX; - if (*YScale < MinNormScaleY) - *YScale = MinNormScaleY; - if (*XScale > MaxNormScaleX && *YScale <= MaxNormScaleY) - *XScale = MaxNormScaleX; - *XScale = CharNormRange * BaselineScale / *XScale; - *YScale = CharNormRange * BaselineScale / *YScale; + if (*XScale < classify_min_norm_scale_x) + *XScale = classify_min_norm_scale_x; + if (*YScale < classify_min_norm_scale_y) + *YScale = classify_min_norm_scale_y; + if (*XScale > classify_max_norm_scale_x && + *YScale <= classify_max_norm_scale_y) + *XScale = classify_max_norm_scale_x; + *XScale = classify_char_norm_range * BaselineScale / *XScale; + *YScale = classify_char_norm_range * BaselineScale / *YScale; iterate(Outlines) { Outline = (MFOUTLINE) first_node (Outlines); @@ -736,41 +672,40 @@ void NormalizeOutlines(LIST Outlines, /*---------------------------------------------------------------------------*/ void SettupBlobConversion(TBLOB *Blob) { /* - ** Parameters: - ** Blob blob that is to be converted - ** Globals: - ** BlobCenter center of blob to be converted - ** Operation: Compute the center of the blob's bounding box and save - ** it in a global variable. This routine must be called before - ** any calls to ConvertOutline. It must be called once per - ** blob. - ** Return: none - ** Exceptions: none - ** History: Thu May 17 11:06:17 1990, DSJ, Created. + ** Parameters: + ** Blob blob that is to be converted + ** Globals: + ** BlobCenter center of blob to be converted + ** Operation: Compute the center of the blob's bounding box and save + ** it in a global variable. This routine must be called before + ** any calls to ConvertOutline. It must be called once per + ** blob. + ** Return: none + ** Exceptions: none + ** History: Thu May 17 11:06:17 1990, DSJ, Created. */ ComputeBlobCenter(Blob, &BlobCenter); - } /* SettupBlobConversion */ /*---------------------------------------------------------------------------*/ void SmearExtremities(MFOUTLINE Outline, FLOAT32 XScale, FLOAT32 YScale) { /* - ** Parameters: - ** Outline outline whose extremities are to be smeared - ** XScale factor used to normalize outline in x dir - ** YScale factor used to normalize outline in y dir - ** Globals: none - ** Operation: - ** This routine smears the extremities of the specified outline. - ** It does this by adding a random number between - ** -0.5 and 0.5 pixels (that is why X/YScale are needed) to - ** the x and y position of the point. This is done so that - ** the discrete nature of the original scanned image does not - ** affect the statistical clustering used during training. - ** Return: none - ** Exceptions: none - ** History: 1/11/90, DSJ, Created. + ** Parameters: + ** Outline outline whose extremities are to be smeared + ** XScale factor used to normalize outline in x dir + ** YScale factor used to normalize outline in y dir + ** Globals: none + ** Operation: + ** This routine smears the extremities of the specified outline. + ** It does this by adding a random number between + ** -0.5 and 0.5 pixels (that is why X/YScale are needed) to + ** the x and y position of the point. This is done so that + ** the discrete nature of the original scanned image does not + ** affect the statistical clustering used during training. + ** Return: none + ** Exceptions: none + ** History: 1/11/90, DSJ, Created. */ MFEDGEPT *Current; MFOUTLINE EdgePoint; @@ -807,18 +742,18 @@ void SmearExtremities(MFOUTLINE Outline, FLOAT32 XScale, FLOAT32 YScale) { /*---------------------------------------------------------------------------*/ void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { /* - ** Parameters: - ** Start, End defines segment of outline to be modified - ** Direction new direction to assign to segment - ** Globals: none - ** Operation: Change the direction of every vector in the specified - ** outline segment to Direction. The segment to be changed - ** starts at Start and ends at End. Note that the previous - ** direction of End must also be changed to reflect the - ** change in direction of the point before it. - ** Return: none - ** Exceptions: none - ** History: Fri May 4 10:42:04 1990, DSJ, Created. + ** Parameters: + ** Start, End defines segment of outline to be modified + ** Direction new direction to assign to segment + ** Globals: none + ** Operation: Change the direction of every vector in the specified + ** outline segment to Direction. The segment to be changed + ** starts at Start and ends at End. Note that the previous + ** direction of End must also be changed to reflect the + ** change in direction of the point before it. + ** Return: none + ** Exceptions: none + ** History: Fri May 4 10:42:04 1990, DSJ, Created. */ MFOUTLINE Current; @@ -837,17 +772,17 @@ void CharNormalizeOutline(MFOUTLINE Outline, FLOAT32 XScale, FLOAT32 YScale) { /* - ** Parameters: - ** Outline outline to be character normalized - ** XCenter, YCenter center point for normalization - ** XScale, YScale scale factors for normalization - ** Globals: none - ** Operation: This routine normalizes each point in Outline by - ** translating it to the specified center and scaling it - ** anisotropically according to the given scale factors. - ** Return: none - ** Exceptions: none - ** History: Fri Dec 14 10:27:11 1990, DSJ, Created. + ** Parameters: + ** Outline outline to be character normalized + ** XCenter, YCenter center point for normalization + ** XScale, YScale scale factors for normalization + ** Globals: none + ** Operation: This routine normalizes each point in Outline by + ** translating it to the specified center and scaling it + ** anisotropically according to the given scale factors. + ** Return: none + ** Exceptions: none + ** History: Fri Dec 14 10:27:11 1990, DSJ, Created. */ MFOUTLINE First, Current; MFEDGEPT *CurrentPoint; @@ -877,25 +812,25 @@ void ComputeDirection(MFEDGEPT *Start, FLOAT32 MinSlope, FLOAT32 MaxSlope) { /* - ** Parameters: - ** Start starting point to compute direction from - ** Finish finishing point to compute direction to - ** MinSlope slope below which lines are horizontal - ** MaxSlope slope above which lines are vertical - ** Globals: none - ** Operation: - ** This routine computes the slope from Start to Finish and - ** and then computes the approximate direction of the line - ** segment from Start to Finish. The direction is quantized - ** into 8 buckets: - ** N, S, E, W, NE, NW, SE, SW - ** Both the slope and the direction are then stored into - ** the appropriate fields of the Start edge point. The - ** direction is also stored into the PreviousDirection field - ** of the Finish edge point. - ** Return: none - ** Exceptions: none - ** History: 7/25/89, DSJ, Created. + ** Parameters: + ** Start starting point to compute direction from + ** Finish finishing point to compute direction to + ** MinSlope slope below which lines are horizontal + ** MaxSlope slope above which lines are vertical + ** Globals: none + ** Operation: + ** This routine computes the slope from Start to Finish and + ** and then computes the approximate direction of the line + ** segment from Start to Finish. The direction is quantized + ** into 8 buckets: + ** N, S, E, W, NE, NW, SE, SW + ** Both the slope and the direction are then stored into + ** the appropriate fields of the Start edge point. The + ** direction is also stored into the PreviousDirection field + ** of the Finish edge point. + ** Return: none + ** Exceptions: none + ** History: 7/25/89, DSJ, Created. */ FVECTOR Delta; @@ -951,16 +886,16 @@ void ComputeDirection(MFEDGEPT *Start, /*---------------------------------------------------------------------------*/ void FinishOutlineStats(register OUTLINE_STATS *OutlineStats) { /* - ** Parameters: - ** OutlineStats statistics about a set of outlines - ** Globals: none - ** Operation: Use the preliminary statistics accumulated in OutlineStats - ** to compute the final statistics. - ** (see Dan Johnson's Tesseract lab - ** notebook #2, pgs. 74-78). - ** Return: none - ** Exceptions: none - ** History: Fri Dec 14 10:13:36 1990, DSJ, Created. + ** Parameters: + ** OutlineStats statistics about a set of outlines + ** Globals: none + ** Operation: Use the preliminary statistics accumulated in OutlineStats + ** to compute the final statistics. + ** (see Dan Johnson's Tesseract lab + ** notebook #2, pgs. 74-78). + ** Return: none + ** Exceptions: none + ** History: Fri Dec 14 10:13:36 1990, DSJ, Created. */ OutlineStats->x = 0.5 * OutlineStats->My / OutlineStats->L; OutlineStats->y = 0.5 * OutlineStats->Mx / OutlineStats->L; @@ -991,14 +926,14 @@ void FinishOutlineStats(register OUTLINE_STATS *OutlineStats) { /*---------------------------------------------------------------------------*/ void InitOutlineStats(OUTLINE_STATS *OutlineStats) { /* - ** Parameters: - ** OutlineStats stats data structure to be initialized - ** Globals: none - ** Operation: Initialize the outline statistics data structure so - ** that it is ready to start accumulating statistics. - ** Return: none - ** Exceptions: none - ** History: Fri Dec 14 08:55:22 1990, DSJ, Created. + ** Parameters: + ** OutlineStats stats data structure to be initialized + ** Globals: none + ** Operation: Initialize the outline statistics data structure so + ** that it is ready to start accumulating statistics. + ** Return: none + ** Exceptions: none + ** History: Fri Dec 14 08:55:22 1990, DSJ, Created. */ OutlineStats->Mx = 0.0; OutlineStats->My = 0.0; @@ -1015,17 +950,17 @@ void InitOutlineStats(OUTLINE_STATS *OutlineStats) { /*---------------------------------------------------------------------------*/ MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) { /* - ** Parameters: - ** EdgePoint start search from this point - ** Globals: none - ** Operation: - ** This routine returns the next point in the micro-feature - ** outline that has a direction different than EdgePoint. The - ** routine assumes that the outline being searched is not a - ** degenerate outline (i.e. it must have 2 or more edge points). - ** Return: Point of next direction change in micro-feature outline. - ** Exceptions: none - ** History: 7/25/89, DSJ, Created. + ** Parameters: + ** EdgePoint start search from this point + ** Globals: none + ** Operation: + ** This routine returns the next point in the micro-feature + ** outline that has a direction different than EdgePoint. The + ** routine assumes that the outline being searched is not a + ** degenerate outline (i.e. it must have 2 or more edge points). + ** Return: Point of next direction change in micro-feature outline. + ** Exceptions: none + ** History: 7/25/89, DSJ, Created. */ DIRECTION InitialDirection; @@ -1046,25 +981,25 @@ void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats, register FLOAT32 y1, register FLOAT32 y2) { /* - ** Parameters: - ** OutlineStats statistics to add this segment to - ** x1, y1, x2, y2 segment to be added to statistics - ** Globals: none - ** Operation: This routine adds the statistics for the specified - ** line segment to OutlineStats. The statistics that are - ** kept are: - ** sum of length of all segments - ** sum of 2*Mx for all segments - ** sum of 2*My for all segments - ** sum of 2*Mx*(y1+y2) - L*y1*y2 for all segments - ** sum of 2*My*(x1+x2) - L*x1*x2 for all segments - ** These numbers, once collected can later be used to easily - ** compute the center of mass, first and second moments, - ** and radii of gyration. (see Dan Johnson's Tesseract lab - ** notebook #2, pgs. 74-78). - ** Return: none - ** Exceptions: none - ** History: Fri Dec 14 08:59:17 1990, DSJ, Created. + ** Parameters: + ** OutlineStats statistics to add this segment to + ** x1, y1, x2, y2 segment to be added to statistics + ** Globals: none + ** Operation: This routine adds the statistics for the specified + ** line segment to OutlineStats. The statistics that are + ** kept are: + ** sum of length of all segments + ** sum of 2*Mx for all segments + ** sum of 2*My for all segments + ** sum of 2*Mx*(y1+y2) - L*y1*y2 for all segments + ** sum of 2*My*(x1+x2) - L*x1*x2 for all segments + ** These numbers, once collected can later be used to easily + ** compute the center of mass, first and second moments, + ** and radii of gyration. (see Dan Johnson's Tesseract lab + ** notebook #2, pgs. 74-78). + ** Return: none + ** Exceptions: none + ** History: Fri Dec 14 08:59:17 1990, DSJ, Created. */ register FLOAT64 L; register FLOAT64 Mx2; diff --git a/classify/mfoutline.h b/classify/mfoutline.h index 5e13223d7..acee5db86 100644 --- a/classify/mfoutline.h +++ b/classify/mfoutline.h @@ -26,6 +26,7 @@ #include "fpoint.h" #include "fxdefs.h" #include "baseline.h" +#include "varable.h" #define NORMAL_X_HEIGHT (0.5) #define NORMAL_BASELINE (0.0) @@ -38,13 +39,7 @@ typedef enum { DIRECTION; -/* -typedef enum -{ -False, True -} -BOOLEAN; -*/ + typedef struct { FPOINT Point; @@ -88,7 +83,24 @@ NORM_METHOD; /*---------------------------------------------------------------------------- Variables ------------------------------------------------------------------------------*/ -extern int NormMethod; +/* control knobs used to control normalization of outlines */ +extern INT_VAR_H(classify_norm_method, character, + "Normalization Method ..."); +/* PREV DEFAULT "baseline" */ +extern double_VAR_H(classify_char_norm_range, 0.2, + "Character Normalization Range ..."); +extern double_VAR_H(classify_min_norm_scale_x, 0.0, + "Min char x-norm scale ..."); +/* PREV DEFAULT 0.1 */ +extern double_VAR_H(classify_max_norm_scale_x, 0.325, + "Max char x-norm scale ..."); +/* PREV DEFAULT 0.3 */ +extern double_VAR_H(classify_min_norm_scale_y, 0.0, + "Min char y-norm scale ..."); +/* PREV DEFAULT 0.1 */ +extern double_VAR_H(classify_max_norm_scale_y, 0.325, + "Max char y-norm scale ..."); +/* PREV DEFAULT 0.3 */ /**---------------------------------------------------------------------------- Macros @@ -100,7 +112,7 @@ extern int NormMethod; /* macro for computing the scale factor to use to normalize characters */ #define ComputeScaleFactor(L) \ -(NORMAL_X_HEIGHT / ((is_baseline_normalized ())? \ +(NORMAL_X_HEIGHT / ((classify_baseline_normalized)? \ (BASELINE_SCALE): \ ((L)->xheight))) @@ -111,17 +123,8 @@ extern int NormMethod; #define MakeOutlineCircular(O) (set_rest (last (O), (O))) /* macros for manipulating micro-feature outline edge points */ -//#define PositionOf(P) ((P)->Point) -//#define XPositionOf(P) ((P)->Point.x) -//#define YPositionOf(P) ((P)->Point.y) -//#define DirectionOf(P) ((P)->Direction) -//#define PreviousDirectionOf(P) ((P)->PreviousDirection) #define ClearMark(P) ((P)->ExtremityMark = FALSE) #define MarkPoint(P) ((P)->ExtremityMark = TRUE) -//#define IsExtremity(P) ((P)->ExtremityMark) -//#define NotExtremity(P) (!(P->ExtremityMark)) -//#define IsVisible(E) (! (E->Hidden)) -//#define IsHidden(E) ((E)->Hidden) /**---------------------------------------------------------------------------- Public Function Prototypes @@ -148,8 +151,6 @@ void FreeMFOutline(void *agr); //MFOUTLINE Outline) void FreeOutlines(LIST Outlines); -void InitMFOutlineVars(); - void MarkDirectionChanges(MFOUTLINE Outline); MFEDGEPT *NewEdgePoint(); @@ -197,81 +198,4 @@ void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats, register FLOAT32 y1, register FLOAT32 y2); -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* mfoutline.c -void ComputeBlobCenter - _ARGS((BLOB *Blob, - TPOINT *BlobCenter)); - -LIST ConvertBlob - _ARGS((BLOB *Blob)); - -MFOUTLINE ConvertOutline - _ARGS((TESSLINE *Outline)); - -LIST ConvertOutlines - _ARGS((TESSLINE *Outline, - LIST ConvertedOutlines, - OUTLINETYPE OutlineType)); - -void ComputeOutlineStats - _ARGS((LIST Outlines, - OUTLINE_STATS *OutlineStats)); - -void FilterEdgeNoise - _ARGS((MFOUTLINE Outline, - FLOAT32 NoiseSegmentLength)); - -void FindDirectionChanges - _ARGS((MFOUTLINE Outline, - FLOAT32 MinSlope, - FLOAT32 MaxSlope)); - -void FreeMFOutline - _ARGS((MFOUTLINE Outline)); - -void FreeOutlines - _ARGS((LIST Outlines)); - -void InitMFOutlineVars - _ARGS((void)); - -void MarkDirectionChanges - _ARGS((MFOUTLINE Outline)); - -MFEDGEPT *NewEdgePoint - _ARGS((void)); - -MFOUTLINE NextExtremity - _ARGS((MFOUTLINE EdgePoint)); - -void NormalizeOutline - _ARGS((MFOUTLINE Outline, - LINE_STATS *LineStats, - FLOAT32 XOrigin)); - -void NormalizeOutlines - _ARGS((LIST Outlines, - LINE_STATS *LineStats)); - -void SettupBlobConversion - _ARGS((BLOB *Blob)); - -void SmearExtremities - _ARGS((MFOUTLINE Outline, - FLOAT32 XScale, - FLOAT32 YScale)); - -#undef _ARGS -*/ -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -extern int NormMethod; /* normalized method currently selected */ #endif diff --git a/classify/mfx.cpp b/classify/mfx.cpp index e12ebb103..afa3abfe9 100644 --- a/classify/mfx.cpp +++ b/classify/mfx.cpp @@ -19,24 +19,25 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "mfdefs.h" -#include "variables.h" -#include "sigmenu.h" #include "mfoutline.h" #include "clusttool.h" //NEEDED #include "const.h" #include "intfx.h" +#include "varable.h" + #include -/* default values for tunable knobs */ +/**---------------------------------------------------------------------------- + Variables +----------------------------------------------------------------------------**/ + /* old numbers corresponded to 10.0 degrees and 80.0 degrees */ - /* PREV DEFAULT 0.176326981 approx. 10.0 degrees */ -#define MIN_SLOPE 0.414213562 - /* PREV DEFAULT 5.671281820 approx. 80.0 degrees */ -#define MAX_SLOPE 2.414213562 - /* no noise filtering */ -#define NOISE_SEGMENT_LENGTH (0.00) - /* no feature splitting */ -#define MAX_FEATURE_LENGTH (MAXFLOAT) +double_VAR(classify_min_slope, 0.414213562, + "Slope below which lines are called horizontal"); +double_VAR(classify_max_slope, 2.414213562, + "Slope above which lines are called vertical"); +double_VAR(classify_noise_segment_length, 0.00, + "Length below which outline segments are treated as noise"); /**---------------------------------------------------------------------------- Macros @@ -58,77 +59,9 @@ MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End); void SmearBulges(MICROFEATURES MicroFeatures, FLOAT32 XScale, FLOAT32 YScale); -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* /users/danj/wiseowl/src/danj/microfeatures/mfx.c -void ComputeBulges - _ARGS((MFOUTLINE Start, - MFOUTLINE End, - MICROFEATURE MicroFeature)); - -FLOAT32 ComputeOrientation - _ARGS((MFEDGEPT *Start, - MFEDGEPT *End)); - -MICROFEATURES ConvertToMicroFeatures - _ARGS((MFOUTLINE Outline, - MICROFEATURES MicroFeatures)); - -MICROFEATURE ExtractMicroFeature - _ARGS((MFOUTLINE Start, - MFOUTLINE End)); - -void SmearBulges - _ARGS((MICROFEATURES MicroFeatures, - FLOAT32 XScale, - FLOAT32 YScale)); - -#undef _ARGS -*/ - -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -/* tuning knobs that can be adjusted without recompilation */ -static FLOAT32 MinSlope; -static FLOAT32 MaxSlope; -static FLOAT32 NoiseSegmentLength; - /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -void InitMicroFxVars() { -/* - ** Parameters: none - ** Globals: - ** MinSlope slope below which lines are called horizontal - ** MaxSlope slope above which lines are called vertical - ** NoiseSegmentLength length below which outline segments - ** are treated as noise - ** MaxFeatureLength length above which a feature will - ** be split into 2 equal pieces - ** ExtremityMode controls how extremities are defined - ** XHeightAdjust allows xheight of line to be adjusted - ** Operation: Initialize the micro-feature extractor variables (knobs) - ** that can be tuned without recompiling. - ** Return: none - ** Exceptions: none - ** History: Mon May 14 11:24:40 1990, DSJ, Created. - */ - VALUE dummy; - - float_variable (MinSlope, "MinSlope", MIN_SLOPE); - float_variable (MaxSlope, "MaxSlope", MAX_SLOPE); - float_variable (NoiseSegmentLength, "NoiseSegmentLength", - NOISE_SEGMENT_LENGTH); -} /* InitMicroFxVars */ - /*---------------------------------------------------------------------------*/ CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats) { @@ -136,8 +69,6 @@ CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats) { ** Parameters: ** Blob blob to extract micro-features from ** LineStats statistics for text line normalization - ** Globals: - ** XHeightAdjust used for manually adjusting xheight ** Operation: ** This routine extracts micro-features from the specified ** blob and returns a list of the micro-features. All @@ -159,7 +90,8 @@ CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats) { if (Blob != NULL) { Outlines = ConvertBlob (Blob); // NormalizeOutlines(Outlines, LineStats, &XScale, &YScale); - ExtractIntFeat(Blob, blfeatures, cnfeatures, &results); + if (!ExtractIntFeat(Blob, blfeatures, cnfeatures, &results)) + return NULL; XScale = 0.2f / results.Ry; YScale = 0.2f / results.Rx; @@ -174,8 +106,8 @@ CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats) { RemainingOutlines = Outlines; iterate(RemainingOutlines) { Outline = (MFOUTLINE) first_node (RemainingOutlines); - FindDirectionChanges(Outline, MinSlope, MaxSlope); - FilterEdgeNoise(Outline, NoiseSegmentLength); + FindDirectionChanges(Outline, classify_min_slope, classify_max_slope); + FilterEdgeNoise(Outline, classify_noise_segment_length); MarkDirectionChanges(Outline); SmearExtremities(Outline, XScale, YScale); MicroFeatures = ConvertToMicroFeatures (Outline, MicroFeatures); @@ -197,8 +129,8 @@ CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats) { **********************************************************************/ #define angle_of(x1,y1,x2,y2) \ ((x2-x1) ? \ - (atan2 (y2-y1, x2-x1)) : \ - ((y2Point.x, -Origin->Point.y); SegmentEnd = Start; - FillPoint (CurrentPoint, 0, 0); + CurrentPoint.x = 0.0f; + CurrentPoint.y = 0.0f; BulgePosition = MicroFeature[MFLENGTH] / 3; - CopyPoint(CurrentPoint, LastPoint); + LastPoint = CurrentPoint; while (CurrentPoint.x < BulgePosition) { SegmentStart = SegmentEnd; SegmentEnd = NextPointAfter (SegmentStart); - CopyPoint(CurrentPoint, LastPoint); + LastPoint = CurrentPoint; - MapPoint (&Matrix, PointAt (SegmentEnd)->Point, CurrentPoint); + MapPoint(&Matrix, PointAt(SegmentEnd)->Point, &CurrentPoint); } MicroFeature[FIRSTBULGE] = XIntersectionOf(LastPoint, CurrentPoint, BulgePosition); @@ -276,12 +209,12 @@ void ComputeBulges(MFOUTLINE Start, MFOUTLINE End, MICROFEATURE MicroFeature) { // CurrentPoint will not change. (Which would cause to output nan // for the SecondBulge.) if (CurrentPoint.x < BulgePosition) - CopyPoint(CurrentPoint, LastPoint); + LastPoint = CurrentPoint; while (CurrentPoint.x < BulgePosition) { SegmentStart = SegmentEnd; SegmentEnd = NextPointAfter (SegmentStart); - CopyPoint(CurrentPoint, LastPoint); - MapPoint (&Matrix, PointAt (SegmentEnd)->Point, CurrentPoint); + LastPoint = CurrentPoint; + MapPoint(&Matrix, PointAt(SegmentEnd)->Point, &CurrentPoint); } MicroFeature[SECONDBULGE] = XIntersectionOf(LastPoint, CurrentPoint, BulgePosition); @@ -314,8 +247,7 @@ FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) { */ FLOAT32 Orientation; - Orientation = NormalizeAngle (AngleFrom (Start->Point, - End->Point)); + Orientation = NormalizeAngle (AngleFrom (Start->Point, End->Point)); /* ensure that round-off errors do not put circular param out of range */ if ((Orientation < 0) || (Orientation >= 1)) @@ -391,8 +323,7 @@ MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End) { NewFeature[XPOSITION] = AverageOf (P1->Point.x, P2->Point.x); NewFeature[YPOSITION] = AverageOf (P1->Point.y, P2->Point.y); NewFeature[MFLENGTH] = DistanceBetween (P1->Point, P2->Point); - NewFeature[ORIENTATION] = - NormalizedAngleFrom (&((P1)->Point), &((P2)->Point), 1.0); + NewFeature[ORIENTATION] = NormalizedAngleFrom(&P1->Point, &P2->Point, 1.0); ComputeBulges(Start, End, NewFeature); return (NewFeature); } /* ExtractMicroFeature */ @@ -403,8 +334,8 @@ void SmearBulges(MICROFEATURES MicroFeatures, FLOAT32 XScale, FLOAT32 YScale) { /* ** Parameters: ** MicroFeatures features to be smeared - ** XScale # of normalized units per pixel in x dir - ** YScale # of normalized units per pixel in y dir + ** XScale # of normalized units per pixel in x dir + ** YScale # of normalized units per pixel in y dir ** Globals: none ** Operation: Add a random amount to each bulge parameter of each ** feature. The amount added is between -0.5 pixels and @@ -423,8 +354,8 @@ void SmearBulges(MICROFEATURES MicroFeatures, FLOAT32 XScale, FLOAT32 YScale) { iterate(MicroFeatures) { MicroFeature = NextFeatureOf (MicroFeatures); - Cos = fabs (cos (2.0 * PI * MicroFeature[ORIENTATION])); - Sin = fabs (sin (2.0 * PI * MicroFeature[ORIENTATION])); + Cos = fabs(cos(2.0 * PI * MicroFeature[ORIENTATION])); + Sin = fabs(sin(2.0 * PI * MicroFeature[ORIENTATION])); Scale = YScale * Cos + XScale * Sin; MinSmear = -0.5 * Scale / (BULGENORMALIZER * MicroFeature[MFLENGTH]); diff --git a/classify/mfx.h b/classify/mfx.h index eabfc40b2..310495b3c 100644 --- a/classify/mfx.h +++ b/classify/mfx.h @@ -22,31 +22,23 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "fxdefs.h" +#include "varable.h" +/**---------------------------------------------------------------------------- + Variables +----------------------------------------------------------------------------**/ + +/* old numbers corresponded to 10.0 degrees and 80.0 degrees */ +extern double_VAR_H(classify_min_slope, 0.414213562, + "Slope below which lines are called horizontal"); +extern double_VAR_H(classify_max_slope, 2.414213562, + "Slope above which lines are called vertical"); +extern double_VAR_H(classify_noise_segment_length, 0.00, + "Length below which outline segments" + "are treated as noise"); -extern FLOAT32 MinSlope; -extern FLOAT32 MaxSlope; /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -void InitMicroFxVars(); +CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats); -CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, LINE_STATS *LineStats); - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* mfx.c -void InitMicroFxVars - _ARGS((void)); - -CHAR_FEATURES BlobMicroFeatures - _ARGS((BLOB *Blob, - LINE_STATS *LineStats)); - -#undef _ARGS -*/ #endif diff --git a/classify/normmatch.cpp b/classify/normmatch.cpp index b1c8eafa3..b573ee7d8 100644 --- a/classify/normmatch.cpp +++ b/classify/normmatch.cpp @@ -19,28 +19,29 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "normmatch.h" -#include "clusttool.h" -#include "normfeat.h" -#include "debug.h" -#include "const.h" -#include "efio.h" -#include "emalloc.h" -#include "globals.h" -#include "scanutils.h" #include #include -/* define default filenames for training data */ -#define NORM_PROTO_FILE "normproto" +#include "classify.h" +#include "clusttool.h" +#include "const.h" +#include "efio.h" +#include "emalloc.h" +#include "globals.h" +#include "helpers.h" +#include "normfeat.h" +#include "scanutils.h" +#include "unicharset.h" +#include "varable.h" -typedef struct +struct NORM_PROTOS { int NumParams; PARAM_DESC *ParamDesc; LIST* Protos; int NumProtos; -} NORM_PROTOS; +}; /**---------------------------------------------------------------------------- Private Function Prototypes @@ -55,24 +56,20 @@ void PrintNormMatch(FILE *File, NORM_PROTOS *ReadNormProtos(FILE *File); /**---------------------------------------------------------------------------- - Global Data Definitions and Declarations + Variables ----------------------------------------------------------------------------**/ -/* global data structure to hold char normalization protos */ -static NORM_PROTOS *NormProtos; - -/* name of file containing char normalization protos */ -static const char *NormProtoFile = NORM_PROTO_FILE; /* control knobs used to control the normalization adjustment process */ -make_float_var (NormAdjMidpoint, 32.0, MakeNormAdjMidpoint, -15, 16, SetNormAdjMidpoint, "Norm adjust midpoint ...") -make_float_var (NormAdjCurl, 2.0, MakeNormAdjCurl, -15, 17, SetNormAdjCurl, "Norm adjust curl ...") +double_VAR(classify_norm_adj_midpoint, 32.0, "Norm adjust midpoint ..."); +double_VAR(classify_norm_adj_curl, 2.0, "Norm adjust curl ..."); + /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -FLOAT32 ComputeNormMatch(CLASS_ID ClassId, FEATURE Feature, BOOL8 DebugMatch) { +namespace tesseract { +FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId, FEATURE Feature, + BOOL8 DebugMatch) { /* ** Parameters: ** ClassId id of class to match against @@ -139,32 +136,7 @@ FLOAT32 ComputeNormMatch(CLASS_ID ClassId, FEATURE Feature, BOOL8 DebugMatch) { return (1.0 - NormEvidenceOf (BestMatch)); } /* ComputeNormMatch */ - -/*---------------------------------------------------------------------------*/ -void GetNormProtos() { -/* - ** Parameters: none - ** Globals: - ** NormProtoFile name of file containing normalization protos - ** NormProtos global data structure to hold protos - ** Operation: This routine reads in a set of character normalization - ** protos from NormProtoFile and places them into NormProtos. - ** Return: none - ** Exceptions: none - ** History: Wed Dec 19 16:24:25 1990, DSJ, Created. - */ - FILE *File; - STRING name; - - name = language_data_path_prefix; - name += NormProtoFile; - File = Efopen (name.string(), "r"); - NormProtos = ReadNormProtos (File); - fclose(File); - -} /* GetNormProtos */ - -void FreeNormProtos() { +void Classify::FreeNormProtos() { if (NormProtos != NULL) { for (int i = 0; i < NormProtos->NumProtos; i++) FreeProtoList(&NormProtos->Protos[i]); @@ -174,28 +146,7 @@ void FreeNormProtos() { NormProtos = NULL; } } - -/*---------------------------------------------------------------------------*/ -void InitNormProtoVars() { -/* - ** Parameters: none - ** Globals: - ** NormProtoFile filename for normalization protos - ** Operation: Initialize the control variables for the normalization - ** matcher. - ** Return: none - ** Exceptions: none - ** History: Mon Nov 5 17:22:10 1990, DSJ, Created. - */ - VALUE dummy; - - string_variable (NormProtoFile, "NormProtoFile", NORM_PROTO_FILE); - - MakeNormAdjMidpoint(); - MakeNormAdjCurl(); - -} /* InitNormProtoVars */ - +} // namespace tesseract /**---------------------------------------------------------------------------- Private Code @@ -208,14 +159,14 @@ void InitNormProtoVars() { * 1 / (1 + (NormAdj / midpoint) ^ curl) **********************************************************************/ FLOAT32 NormEvidenceOf(register FLOAT32 NormAdj) { - NormAdj /= NormAdjMidpoint; + NormAdj /= classify_norm_adj_midpoint; - if (NormAdjCurl == 3) + if (classify_norm_adj_curl == 3) NormAdj = NormAdj * NormAdj * NormAdj; - else if (NormAdjCurl == 2) + else if (classify_norm_adj_curl == 2) NormAdj = NormAdj * NormAdj; else - NormAdj = pow (NormAdj, NormAdjCurl); + NormAdj = pow(static_cast(NormAdj), classify_norm_adj_curl); return (1.0 / (1.0 + NormAdj)); } @@ -242,8 +193,8 @@ void PrintNormMatch(FILE *File, FLOAT32 TotalMatch; for (i = 0, TotalMatch = 0.0; i < NumParams; i++) { - ParamMatch = (Feature->Params[i] - Mean (Proto, i)) / - StandardDeviation (Proto, i); + ParamMatch = (Feature->Params[i] - Mean(Proto, i)) / + StandardDeviation(Proto, i); fprintf (File, " %6.1f", ParamMatch); @@ -257,7 +208,8 @@ void PrintNormMatch(FILE *File, /*---------------------------------------------------------------------------*/ -NORM_PROTOS *ReadNormProtos(FILE *File) { +namespace tesseract { +NORM_PROTOS *Classify::ReadNormProtos(FILE *File, inT64 end_offset) { /* ** Parameters: ** File open text file to read normalization protos from @@ -288,7 +240,8 @@ NORM_PROTOS *ReadNormProtos(FILE *File) { NormProtos->ParamDesc = ReadParamDesc (File, NormProtos->NumParams); /* read protos for each class into a separate list */ - while (fscanf (File, "%s %d", unichar, &NumProtos) == 2) { + while ((end_offset < 0 || ftell(File) < end_offset) && + fscanf(File, "%s %d", unichar, &NumProtos) == 2) { if (unicharset.contains_unichar(unichar)) { unichar_id = unicharset.unichar_to_id(unichar); Protos = NormProtos->Protos[unichar_id]; @@ -298,8 +251,8 @@ NORM_PROTOS *ReadNormProtos(FILE *File) { NormProtos->Protos[unichar_id] = Protos; } else cprintf("Error: unichar %s in normproto file is not in unichar set.\n"); + SkipNewline(File); } - return (NormProtos); - } /* ReadNormProtos */ +} // namespace tesseract diff --git a/classify/normmatch.h b/classify/normmatch.h index 68ec5d4e1..df928f8cf 100644 --- a/classify/normmatch.h +++ b/classify/normmatch.h @@ -22,17 +22,16 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "matchdefs.h" -//#include "cluster.h" #include "ocrfeatures.h" +#include "varable.h" /**---------------------------------------------------------------------------- - Public Function Prototypes + Variables ----------------------------------------------------------------------------**/ -FLOAT32 ComputeNormMatch(CLASS_ID ClassId, FEATURE Feature, BOOL8 DebugMatch); -void GetNormProtos(); -void FreeNormProtos(); - -void InitNormProtoVars(); +/* control knobs used to control the normalization adjustment process */ +extern double_VAR_H(classify_norm_adj_midpoint, 32.0, + "Norm adjust midpoint ..."); +extern double_VAR_H(classify_norm_adj_curl, 2.0, "Norm adjust curl ..."); #endif diff --git a/classify/ocrfeatures.cpp b/classify/ocrfeatures.cpp index 3459d421c..f0a69f76f 100644 --- a/classify/ocrfeatures.cpp +++ b/classify/ocrfeatures.cpp @@ -25,6 +25,9 @@ #include "freelist.h" #include "scanutils.h" +#include +#include + /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ @@ -45,31 +48,13 @@ BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature) { */ if (FeatureSet->NumFeatures >= FeatureSet->MaxNumFeatures) { FreeFeature(Feature); - return (FALSE); + return FALSE; } - FeatureSet->Features[FeatureSet->NumFeatures] = Feature; - FeatureSet->NumFeatures++; - return (TRUE); - + FeatureSet->Features[FeatureSet->NumFeatures++] = Feature; + return TRUE; } /* AddFeature */ - -/*---------------------------------------------------------------------------*/ -void DefaultInitFXVars() { -/* - ** Parameters: none - ** Globals: none - ** Operation: This routine can be used by any feature extractor which - ** does not use adjustable controls. - ** It does nothing. - ** Return: none - ** Exceptions: none - ** History: Wed May 23 16:37:45 1990, DSJ, Created. - */ -} /* DefaultInitFXVars */ - - /*---------------------------------------------------------------------------*/ void FreeFeature(FEATURE Feature) { /* @@ -82,7 +67,7 @@ void FreeFeature(FEATURE Feature) { ** History: Mon May 21 13:33:27 1990, DSJ, Created. */ if (Feature) { - c_free_struct (Feature, sizeof (FEATURE_STRUCT) + free_struct (Feature, sizeof (FEATURE_STRUCT) + sizeof (FLOAT32) * (Feature->Type->NumParams - 1), "sizeof(FEATURE_STRUCT)+sizeof(FLOAT32)*(NumParamsIn(Feature)-1)"); } @@ -107,7 +92,7 @@ void FreeFeatureSet(FEATURE_SET FeatureSet) { if (FeatureSet) { for (i = 0; i < FeatureSet->NumFeatures; i++) - FreeFeature (FeatureSet->Features[i]); + FreeFeature(FeatureSet->Features[i]); memfree(FeatureSet); } } /* FreeFeatureSet */ @@ -184,6 +169,9 @@ FEATURE ReadFeature(FILE *File, FEATURE_DESC FeatureDesc) { for (i = 0; i < Feature->Type->NumParams; i++) { if (fscanf (File, "%f", &(Feature->Params[i])) != 1) DoError (ILLEGAL_FEATURE_PARAM, "Illegal feature parameter spec"); +#ifndef __MSW32__ + assert (!isnan(Feature->Params[i])); +#endif } return (Feature); @@ -241,8 +229,12 @@ void WriteFeature(FILE *File, FEATURE Feature) { */ int i; - for (i = 0; i < Feature->Type->NumParams; i++) - fprintf (File, " %12g", Feature->Params[i]); + for (i = 0; i < Feature->Type->NumParams; i++) { +#ifndef __MSW32__ + assert (!isnan(Feature->Params[i])); +#endif + fprintf (File, " %12g", Feature->Params[i]); + } fprintf (File, "\n"); } /* WriteFeature */ diff --git a/classify/ocrfeatures.h b/classify/ocrfeatures.h index b081e8bea..3b23fdf20 100644 --- a/classify/ocrfeatures.h +++ b/classify/ocrfeatures.h @@ -90,7 +90,6 @@ typedef FEATURE_DESC_STRUCT *FEATURE_DESC; typedef struct fxs { FX_FUNC Extractor; /* func to extract features */ - VOID_FUNC InitExtractorVars; /* func to init fx controls */ } FEATURE_EXT_STRUCT; /*---------------------------------------------------------------------- @@ -111,23 +110,19 @@ are as follows: DefineFeature (Name, NumLinear, NumCircular, MinFeatPerChar, MaxFeatPerChar, - LongName, ShortName, ParamName, - Extractor, Displayer, - ComputeExtraPenalty, - InitExtractor, InitExtractorVars, TweekExtractorVars) + LongName, ShortName, ParamName) ----------------------------------------------------------------------*/ #define DefineFeature(Name, NL, NC, Min, Max, LN, SN, PN) \ FEATURE_DESC_STRUCT Name = { \ ((NL) + (NC)), NL, NC, Min, Max, LN, SN, PN}; -#define DefineFeatureExt(Name, E, IEV) FEATURE_EXT_STRUCT Name = {E, IEV}; + +#define DefineFeatureExt(Name, E) FEATURE_EXT_STRUCT Name = {E}; /*---------------------------------------------------------------------- Generic routines that work for all feature types ----------------------------------------------------------------------*/ BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature); -void DefaultInitFXVars(); - void FreeFeature(FEATURE Feature); void FreeFeatureSet(FEATURE_SET FeatureSet); diff --git a/classify/outfeat.cpp b/classify/outfeat.cpp index 6ecc58851..a383976c5 100644 --- a/classify/outfeat.cpp +++ b/classify/outfeat.cpp @@ -20,42 +20,11 @@ ----------------------------------------------------------------------------**/ #include "outfeat.h" #include "mfoutline.h" -#include "variables.h" -#include "sigmenu.h" #include "ocrfeatures.h" //Debug #include //Debug #include "efio.h" //Debug -//#include "christydbg.h" -/**---------------------------------------------------------------------------- - Private Function Prototypes -----------------------------------------------------------------------------**/ -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* /users/danj/wiseowl/src/danj/microfeatures/outfeat.c -void AddOutlineFeatureToSet - _ARGS((FPOINT *Start, - FPOINT *End, - FEATURE_SET FeatureSet)); - -void ConvertToOutlineFeatures - _ARGS((MFOUTLINE Outline, - FEATURE_SET FeatureSet)); - -void NormalizeOutlineX - _ARGS((FEATURE_SET FeatureSet)); - -#undef _ARGS -*/ -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ @@ -89,60 +58,14 @@ FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob, LINE_STATS *LineStats) { RemainingOutlines = Outlines; iterate(RemainingOutlines) { Outline = (MFOUTLINE) first_node (RemainingOutlines); - /*---------Debug--------------------------------------------------* - OFile = fopen ("f:/ims/debug/ofOutline.logCPP", "r"); - if (OFile == NULL) - { - OFile = Efopen ("f:/ims/debug/ofOutline.logCPP", "w"); - WriteOutline(OFile, Outline); - } - else - { - fclose (OFile); - OFile = Efopen ("f:/ims/debug/ofOutline.logCPP", "a"); - } - WriteOutline(OFile, Outline); - fclose (OFile); - *--------------------------------------------------------------------*/ ConvertToOutlineFeatures(Outline, FeatureSet); } - if (NormMethod == baseline) + if (classify_norm_method == baseline) NormalizeOutlineX(FeatureSet); - /*---------Debug--------------------------------------------------* - File = fopen ("f:/ims/debug/ofFeatSet.logCPP", "r"); - if (File == NULL) - { - File = Efopen ("f:/ims/debug/ofFeatSet.logCPP", "w"); - WriteFeatureSet(File, FeatureSet); - } - else - { - fclose (File); - File = Efopen ("f:/ims/debug/ofFeatSet.logCPP", "a"); - } - WriteFeatureSet(File, FeatureSet); - fclose (File); - *--------------------------------------------------------------------*/ FreeOutlines(Outlines); return (FeatureSet); } /* ExtractOutlineFeatures */ - -/*---------------------------------------------------------------------------*/ -void InitOutlineFXVars() { - //once contained a dummy -/* - ** Parameters: none - ** Globals: none - ** Operation: Initialize the outline-feature extractor variables that can - ** be tuned without recompiling. - ** Return: none - ** Exceptions: none - ** History: 11/13/90, DSJ, Created. - */ -} /* InitOutlineFXVars */ - - /**---------------------------------------------------------------------------- Private Code ----------------------------------------------------------------------------**/ @@ -169,11 +92,11 @@ void AddOutlineFeatureToSet(FPOINT *Start, */ FEATURE Feature; - Feature = NewFeature (&OutlineFeatDesc); - Feature->Params[OutlineFeatDir] = NormalizedAngleFrom (Start, End, 1.0); - Feature->Params[OutlineFeatX] = AverageOf (Start->x, End->x); - Feature->Params[OutlineFeatY] = AverageOf (Start->y, End->y); - Feature->Params[OutlineFeatLength] = DistanceBetween (*Start, *End); + Feature = NewFeature(&OutlineFeatDesc); + Feature->Params[OutlineFeatDir] = NormalizedAngleFrom(Start, End, 1.0); + Feature->Params[OutlineFeatX] = AverageOf(Start->x, End->x); + Feature->Params[OutlineFeatY] = AverageOf(Start->y, End->y); + Feature->Params[OutlineFeatLength] = DistanceBetween(*Start, *End); AddFeature(FeatureSet, Feature); } /* AddOutlineFeatureToSet */ @@ -206,16 +129,16 @@ void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet) { First = Outline; Next = First; do { - CopyPoint (PointAt (Next)->Point, FeatureStart); - Next = NextPointAfter (Next); + FeatureStart = PointAt(Next)->Point; + Next = NextPointAfter(Next); /* note that an edge is hidden if the ending point of the edge is marked as hidden. This situation happens because the order of the outlines is reversed when they are converted from the old format. In the old format, a hidden edge is marked by the starting point for that edge. */ - if (! (PointAt (Next)->Hidden)) { - CopyPoint (PointAt (Next)->Point, FeatureEnd); + if (!PointAt(Next)->Hidden) { + FeatureEnd = PointAt(Next)->Point; AddOutlineFeatureToSet(&FeatureStart, &FeatureEnd, FeatureSet); } } diff --git a/classify/outfeat.h b/classify/outfeat.h index 818a0c3d2..5a930d7bb 100644 --- a/classify/outfeat.h +++ b/classify/outfeat.h @@ -36,9 +36,7 @@ typedef enum { /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob, LINE_STATS *LineStats); - -void InitOutlineFXVars(); +FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob, LINE_STATS *LineStats); /*--------------------------------------------------------------------------- Privat Function Prototypes @@ -47,27 +45,9 @@ void AddOutlineFeatureToSet(FPOINT *Start, FPOINT *End, FEATURE_SET FeatureSet); -void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet); +void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet); -void NormalizeOutlineX(FEATURE_SET FeatureSet); - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* outfeat.c * -FEATURE_SET ExtractOutlineFeatures - _ARGS((BLOB *Blob, - LINE_STATS *LineStats)); - -void InitOutlineFXVars - _ARGS((void)); - -#undef _ARGS -*/ +void NormalizeOutlineX(FEATURE_SET FeatureSet); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations diff --git a/classify/picofeat.cpp b/classify/picofeat.cpp index ac1ed8a67..60c0a96d1 100644 --- a/classify/picofeat.cpp +++ b/classify/picofeat.cpp @@ -20,19 +20,21 @@ ----------------------------------------------------------------------------**/ #include "picofeat.h" #include "mfoutline.h" -#include "variables.h" -#include "sigmenu.h" #include "hideedge.h" #include "fpoint.h" +#include "varable.h" #include #include "ocrfeatures.h" //Debug #include //Debug #include "efio.h" //Debug -//#include "christydbg.h" -#define PICO_FEATURE_LENGTH 0.05 +/*--------------------------------------------------------------------------- + Variables +----------------------------------------------------------------------------*/ + +double_VAR(classify_pico_feature_length, 0.05, "Pico Feature Length"); /*--------------------------------------------------------------------------- Private Function Prototypes @@ -45,33 +47,6 @@ void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet); void NormalizePicoX(FEATURE_SET FeatureSet); -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* /users/danj/wiseowl/src/danj/microfeatures/picofeat.c -void ConvertSegmentToPicoFeat - _ARGS((FPOINT *Start, - FPOINT *End, - FEATURE_SET FeatureSet)); - -void ConvertToPicoFeatures2 - _ARGS((MFOUTLINE Outline, - FEATURE_SET FeatureSet)); - -void NormalizePicoX - _ARGS((FEATURE_SET FeatureSet)); - -#undef _ARGS -*/ - -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ - /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ @@ -82,7 +57,7 @@ FEATURE_SET ExtractPicoFeatures(TBLOB *Blob, LINE_STATS *LineStats) { ** Blob blob to extract pico-features from ** LineStats statistics on text row blob is in ** Globals: - ** NormMethod normalization method currently specified + ** classify_norm_method normalization method currently specified ** Operation: Dummy for now. ** Return: Pico-features for Blob. ** Exceptions: none @@ -119,7 +94,7 @@ FEATURE_SET ExtractPicoFeatures(TBLOB *Blob, LINE_STATS *LineStats) { *--------------------------------------------------------------------*/ ConvertToPicoFeatures2(Outline, FeatureSet); } - if (NormMethod == baseline) + if (classify_norm_method == baseline) NormalizePicoX(FeatureSet); /*---------Debug--------------------------------------------------* File = fopen ("f:/ims/debug/pfFeatSet.logCPP", "r"); @@ -141,27 +116,6 @@ FEATURE_SET ExtractPicoFeatures(TBLOB *Blob, LINE_STATS *LineStats) { } /* ExtractPicoFeatures */ - -/*---------------------------------------------------------------------------*/ -void InitPicoFXVars() { -/* - ** Parameters: none - ** Globals: - ** PicoFeatureLength controls length of pico-features - ** Operation: Initialize the pico-feature extractor variables that can - ** be tuned without recompiling. - ** Return: none - ** Exceptions: none - ** History: 9/4/90, DSJ, Created. - */ - - VALUE dummy; - - float_variable (PicoFeatureLength, "PicoFeatureLength", - PICO_FEATURE_LENGTH); - -} /* InitPicoFXVars */ - /**---------------------------------------------------------------------------- Private Code ----------------------------------------------------------------------------**/ @@ -175,7 +129,7 @@ void ConvertSegmentToPicoFeat(FPOINT *Start, ** End ending point of pico-feature ** FeatureSet set to add pico-feature to ** Globals: - ** PicoFeatureLength length of a single pico-feature + ** classify_pico_feature_length length of a single pico-feature ** Operation: This routine converts an entire segment of an outline ** into a set of pico features which are added to ** FeatureSet. The length of the segment is rounded to the @@ -195,7 +149,7 @@ void ConvertSegmentToPicoFeat(FPOINT *Start, Angle = NormalizedAngleFrom (Start, End, 1.0); Length = DistanceBetween (*Start, *End); - NumFeatures = (int) floor (Length / PicoFeatureLength + 0.5); + NumFeatures = (int) floor (Length / classify_pico_feature_length + 0.5); if (NumFeatures < 1) NumFeatures = 1; @@ -228,7 +182,8 @@ void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) { ** Outline outline to extract micro-features from ** FeatureSet set of features to add pico-features to ** Globals: - ** PicoFeatureLength length of features to be extracted + ** classify_pico_feature_length + ** length of features to be extracted ** Operation: ** This routine steps thru the specified outline and cuts it ** up into pieces of equal length. These pieces become the @@ -242,24 +197,24 @@ void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet) { MFOUTLINE First; MFOUTLINE Current; - if (DegenerateOutline (Outline)) + if (DegenerateOutline(Outline)) return; First = Outline; Current = First; - Next = NextPointAfter (Current); + Next = NextPointAfter(Current); do { /* note that an edge is hidden if the ending point of the edge is marked as hidden. This situation happens because the order of the outlines is reversed when they are converted from the old format. In the old format, a hidden edge is marked by the starting point for that edge. */ - if (!(PointAt (Next)->Hidden)) - ConvertSegmentToPicoFeat (&(PointAt (Current)->Point), - &(PointAt (Next)->Point), FeatureSet); + if (!(PointAt(Next)->Hidden)) + ConvertSegmentToPicoFeat (&(PointAt(Current)->Point), + &(PointAt(Next)->Point), FeatureSet); Current = Next; - Next = NextPointAfter (Current); + Next = NextPointAfter(Current); } while (Current != First); diff --git a/classify/picofeat.h b/classify/picofeat.h index 8c08ee6b4..f06543d46 100644 --- a/classify/picofeat.h +++ b/classify/picofeat.h @@ -24,6 +24,7 @@ #include "ocrfeatures.h" #include "tessclas.h" #include "fxdefs.h" +#include "varable.h" typedef enum { PicoFeatY, PicoFeatDir, PicoFeatX } @@ -31,6 +32,13 @@ PICO_FEAT_PARAM_NAME; #define MAX_PICO_FEATURES (1000) +/*--------------------------------------------------------------------------- + Variables +----------------------------------------------------------------------------*/ + +extern double_VAR_H(classify_pico_feature_length, 0.05, "Pico Feature Length"); + + /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ @@ -38,25 +46,6 @@ PICO_FEAT_PARAM_NAME; FEATURE_SET ExtractPicoFeatures(TBLOB *Blob, LINE_STATS *LineStats); -void InitPicoFXVars(); - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* picofeat.c -FEATURE_SET ExtractPicoFeatures - _ARGS((BLOB *Blob, - LINE_STATS *LineStats)); - -void InitPicoFXVars - _ARGS((void)); - -#undef _ARGS -*/ /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ diff --git a/classify/protos.cpp b/classify/protos.cpp index 830c4c1b0..8cfd56646 100644 --- a/classify/protos.cpp +++ b/classify/protos.cpp @@ -26,7 +26,6 @@ I n c l u d e s ----------------------------------------------------------------------*/ #include "protos.h" -#include "debug.h" #include "const.h" #include "emalloc.h" #include "freelist.h" @@ -35,6 +34,8 @@ #include "adaptmatch.h" #include "scanutils.h" #include "globals.h" +#include "classify.h" +#include "varable.h" #include #include @@ -47,9 +48,7 @@ ----------------------------------------------------------------------*/ CLASS_STRUCT TrainingData[NUMBER_OF_CLASSES]; -char *TrainingFile; - -//extern int LearningDebugLevel; +STRING_VAR(classify_training_file, "MicroFeatures", "Training file"); /*---------------------------------------------------------------------- F u n c t i o n s @@ -79,8 +78,7 @@ int AddConfigToClass(CLASS_TYPE Class) { Class->MaxNumConfigs = NewNumConfigs; } - NewConfig = Class->NumConfigs; - Class->NumConfigs++; + NewConfig = Class->NumConfigs++; Config = NewBitVector (MaxNumProtos); Class->Configurations[NewConfig] = Config; zero_all_bits (Config, WordsInVectorOfSize (MaxNumProtos)); @@ -206,7 +204,7 @@ void FillABC(PROTO Proto) { void FreeClass(CLASS_TYPE Class) { if (Class) { FreeClassFields(Class); - memfree(Class); + delete Class; } } @@ -230,18 +228,6 @@ void FreeClassFields(CLASS_TYPE Class) { } } - -/********************************************************************** - * InitPrototypes - * - * Initialize anything that needs to be initialized to work with the - * functions in this file. - **********************************************************************/ -void InitPrototypes() { - string_variable (TrainingFile, "TrainingFile", "MicroFeatures"); -} - - /********************************************************************** * NewClass * @@ -251,7 +237,7 @@ void InitPrototypes() { CLASS_TYPE NewClass(int NumProtos, int NumConfigs) { CLASS_TYPE Class; - Class = (CLASS_TYPE) Emalloc (sizeof (CLASS_STRUCT)); + Class = new CLASS_STRUCT; if (NumProtos > 0) Class->Prototypes = (PROTO) Emalloc (NumProtos * sizeof (PROTO_STRUCT)); @@ -292,15 +278,17 @@ void PrintProtos(CLASS_TYPE Class) { * Read in the training data from a file. All of the classes are read * in. The results are stored in the global variable, 'TrainingData'. **********************************************************************/ -void ReadClassFile() { +namespace tesseract { +void Classify::ReadClassFile() { FILE *File; char TextLine[CHARS_PER_LINE]; char unichar[CHARS_PER_LINE]; - cprintf ("Reading training data from '%s' ...", TrainingFile); + cprintf ("Reading training data from '%s' ...", + static_cast(classify_training_file).string()); fflush(stdout); - File = open_file (TrainingFile, "r"); + File = open_file(static_cast(classify_training_file).string(), "r"); while (fgets (TextLine, CHARS_PER_LINE, File) != NULL) { sscanf(TextLine, "%s", unichar); @@ -311,6 +299,7 @@ void ReadClassFile() { fclose(File); new_line(); } +} // namespace tesseract /********************************************************************** * ReadClassFromFile diff --git a/classify/protos.h b/classify/protos.h index d77cff999..5e92b7ff3 100644 --- a/classify/protos.h +++ b/classify/protos.h @@ -31,6 +31,8 @@ #include "bitvec.h" #include "cutil.h" #include "unichar.h" +#include "unicity_table.h" +#include "varable.h" /*---------------------------------------------------------------------- T y p e s @@ -57,6 +59,7 @@ typedef struct inT16 NumConfigs; inT16 MaxNumConfigs; CONFIGS Configurations; + UnicityTableEqEq font_set; } CLASS_STRUCT; typedef CLASS_STRUCT *CLASS_TYPE; typedef CLASS_STRUCT *CLASSES; @@ -73,6 +76,8 @@ typedef CLASS_STRUCT *CLASSES; ----------------------------------------------------------------------*/ extern CLASS_STRUCT TrainingData[]; +extern STRING_VAR_H(classify_training_file, "MicroFeatures", "Training file"); + /*---------------------------------------------------------------------- M a c r o s ----------------------------------------------------------------------*/ @@ -124,10 +129,10 @@ extern CLASS_STRUCT TrainingData[]; #define PrintProto(Proto) \ (cprintf ("X=%4.2f, Y=%4.2f, Angle=%4.2f", \ - Proto->X, \ - Proto->Y, \ - Proto->Length, \ - Proto->Angle)) \ + Proto->X, \ + Proto->Y, \ + Proto->Length, \ + Proto->Angle)) \ /********************************************************************** @@ -168,8 +173,6 @@ CLASS_TYPE NewClass(int NumProtos, int NumConfigs); void PrintProtos(CLASS_TYPE Class); -void ReadClassFile(); - void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id); void ReadConfigs(register FILE *File, CLASS_TYPE Class); @@ -182,77 +185,4 @@ void WriteOldConfigFile(FILE *File, CLASS_TYPE Class); void WriteOldProtoFile(FILE *File, CLASS_TYPE Class); -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* protos.c * -int AddConfigToClass - _ARGS((CLASS_TYPE Class)); - -int AddProtoToClass - _ARGS((CLASS_TYPE Class)); - -FLOAT32 ClassConfigLength - _ARGS((CLASS_TYPE Class, - BIT_VECTOR Config)); - -FLOAT32 ClassProtoLength - _ARGS((CLASS_TYPE Class)); - -void CopyProto - _ARGS((PROTO Src, - PROTO Dest)); - -void FillABC - _ARGS((PROTO Proto)); - -void FreeClass - _ARGS((CLASS_TYPE Class)); - -void FreeClassFields - _ARGS((CLASS_TYPE Class)); - -void InitPrototypes - _ARGS((void)); - -CLASS_TYPE NewClass - _ARGS((int NumProtos, - int NumConfigs)); - -void PrintProtos - _ARGS((CLASS_TYPE Class)); - -void ReadClassFile - _ARGS((void)); - -void ReadClassFromFile - _ARGS((FILE *File, - int ClassChar)); - -void ReadConfigs - _ARGS((FILE *File, - CLASS_TYPE Class)); - -void ReadProtos - _ARGS((FILE *File, - CLASS_TYPE Class)); - -int SplitProto - _ARGS((CLASS_TYPE Class, - int OldPid)); - -void WriteOldConfigFile - _ARGS((FILE *File, - CLASS_TYPE Class)); - -void WriteOldProtoFile - _ARGS((FILE *File, - CLASS_TYPE Class)); - -#undef _ARGS -*/ #endif diff --git a/classify/sigmenu.cpp b/classify/sigmenu.cpp deleted file mode 100644 index 1f3f4e957..000000000 --- a/classify/sigmenu.cpp +++ /dev/null @@ -1,225 +0,0 @@ -/****************************************************************************** - ** Filename: sigmenu.c - ** Purpose: General purpose, menu-oriented signal handling routines - ** Author: Dan Johnson - ** History: Mon Oct 2 07:25:50 1989, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "sigmenu.h" -#include "oldlist.h" -#include "emalloc.h" -#include "secname.h" - -#include -#include -#include - -#define MAX_COMMAND_LENGTH 128 - -typedef struct -{ - int ItemNum; - char *ItemLabel; - int_void ItemFunc; -} SIG_MENU_ITEM; - -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -static LIST SignalMenus[NSIG]; /* must be initialized to NIL */ - -/**---------------------------------------------------------------------------- - Private Function Prototypes -----------------------------------------------------------------------------**/ -void MainSignalHandler(int Signal); - -SIG_MENU_ITEM *NewSignalMenuItem (int ItemNum, -const char ItemLabel[], int_void ItemFunc); - -int ItemCompare(void *arg1, //SIG_MENU_ITEM *Item1, - void *arg2); //SIG_MENU_ITEM *Item2); - -/**---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -void -AddSignalMenuItem (int Signal, -int ItemNum, const char ItemLabel[], int_void ItemFunc) { -/* - ** Parameters: - ** Signal signal to be trapped for this menu - ** ItemNum menu number for this item - ** ItemLabel menu label for this item - ** ItemFunc function to be called when item is selected - ** Globals: - ** SignalMenus list of menu items for each possible signal - ** Operation: - ** Add a new menu item to the list of menu items for Signal. - ** Whenever Signal is encountered, the user will be given - ** a list of options to choose from. This list is the list - ** of all of the menu items that have been specified for that - ** Signal. - ** Return: none - ** Exceptions: none - ** History: Mon Oct 2 07:42:19 1989, DSJ, Created. - */ - #if 0 - #ifndef SECURE_NAMES - SIG_MENU_ITEM *NewItem; - - /* check for a valid Signal */ - if (Signal >= NSIG || Signal <= 0) { - cprintf ("Illegal signal (%d) specified for menu item!\n", Signal); - return; - } - - /* if this is the first item for this signal, indicate that the - appropriate signal handler has been enabled */ - if (SignalMenus[Signal] == NIL) - cprintf ("Signal handler enabled for signal %d.\n", Signal); - - /* add the new menu item to the appropriate list of menu items */ - NewItem = NewSignalMenuItem (ItemNum, ItemLabel, ItemFunc); - SignalMenus[Signal] = s_adjoin (SignalMenus[Signal], NewItem, ItemCompare); - - /* set up the trap for the appropriate signal */ - signal(Signal, MainSignalHandler); - #endif - #endif -} /* AddSignalMenuItem */ - - -/**---------------------------------------------------------------------------- - Private Code -----------------------------------------------------------------------------**/ -/*---------------------------------------------------------------------------*/ -void MainSignalHandler(int Signal) { -/* - ** Parameters: - ** Signal signal that caused this function to be called - ** Globals: - ** SignalMenus list of menu items for each possible signal - ** Operation: Provide the user with a menu of actions for the trapped - ** signal. Execute the appropriate function. If the function - ** returns SIG_RESUME, then terminate the signal handler and - ** resume normal processing. If the function does not return - ** SIG_RESUME, remain in the main signal handler menu. - ** Return: none - ** Exceptions: none - ** History: Mon Oct 2 08:18:52 1989, DSJ, Created. - */ - #ifndef SECURE_NAMES - int Command; - char CommandLine[MAX_COMMAND_LENGTH]; - char *Params; - LIST Items; - SIG_MENU_ITEM *MenuItem; - - while (TRUE) { - Command = -1; - cprintf ("\nMAIN SIGNAL HANDLER FOR SIGNAL %d\n", Signal); - cprintf ("0. Resume normal operation\n"); - - Items = SignalMenus[Signal]; - iterate(Items) { - MenuItem = (SIG_MENU_ITEM *) first_node (Items); - cprintf ("%d. %s\n", MenuItem->ItemNum, MenuItem->ItemLabel); - } - cprintf ("\nEnter Selection: "); - - while (fgets (CommandLine, MAX_COMMAND_LENGTH, stdin) == NULL - || strlen (CommandLine) <= 0); - - Command = strtol (CommandLine, &Params, 10); - if (CommandLine == Params) { - cprintf ("\nIllegal command! - Try again.\n"); - continue; - } - - if (Command == 0) - signal(Signal, MainSignalHandler); - - Items = SignalMenus[Signal]; - iterate(Items) { - MenuItem = (SIG_MENU_ITEM *) first_node (Items); - if (Command == MenuItem->ItemNum) { - if ((*MenuItem->ItemFunc) ( /*Params */ ) == SIG_RESUME) - signal(Signal, MainSignalHandler); - break; - } - } - if (Items == NIL) - cprintf ("\nIllegal command! - Try again.\n"); - } - #endif -} /* MainSignalHandler */ - - -/*---------------------------------------------------------------------------*/ -SIG_MENU_ITEM * -NewSignalMenuItem (int ItemNum, const char ItemLabel[], int_void ItemFunc) { -/* - ** Parameters: - ** ItemNum menu number for this item - ** ItemLabel menu label for this item - ** ItemFunc function to be called when item is selected - ** Globals: none - ** Operation: Allocate, initialize, and return a new signal menu item. - ** Return: Ptr to new signal menu item data structure. - ** Exceptions: none - ** History: Mon Oct 2 08:04:20 1989, DSJ, Created. - */ - SIG_MENU_ITEM *NewItem; - - NewItem = (SIG_MENU_ITEM *) Emalloc (sizeof (SIG_MENU_ITEM)); - NewItem->ItemNum = ItemNum; - NewItem->ItemFunc = ItemFunc; - NewItem->ItemLabel = (char *) Emalloc (strlen (ItemLabel) + 1); - strcpy (NewItem->ItemLabel, ItemLabel); - return (NewItem); - -} /* NewSignalMenuItem */ - - -/*---------------------------------------------------------------------------*/ -int ItemCompare(void *arg1, //SIG_MENU_ITEM *Item1, - void *arg2) { //SIG_MENU_ITEM *Item2) -/* - ** Parameters: - ** Item1, Item2 two menu items to be compared - ** Globals: none - ** Operation: Return -1 if the ItemNum of Item1 is less than the - ** ItemNum of Item2. Return 0 if they are equal. Return +1 - ** if the ItemNum of Item1 is greater than the ItemNum of - ** Item2. This routine is used by the list sorter to sort - ** lists of menu items according to their item number. - ** Return: -1, 0, or 1 - ** Exceptions: none - ** History: Mon Oct 2 08:11:59 1989, DSJ, Created. - */ - SIG_MENU_ITEM *Item1 = (SIG_MENU_ITEM *) arg1; - SIG_MENU_ITEM *Item2 = (SIG_MENU_ITEM *) arg2; - - if (Item1->ItemNum < Item2->ItemNum) - return (-1); - else if (Item1->ItemNum == Item2->ItemNum) - return (0); - else if (Item1->ItemNum > Item2->ItemNum) - return (1); - else - return 0; -} /* ItemCompare */ diff --git a/classify/sigmenu.h b/classify/sigmenu.h deleted file mode 100644 index 7eab09f40..000000000 --- a/classify/sigmenu.h +++ /dev/null @@ -1,39 +0,0 @@ -/****************************************************************************** - ** Filename: sigmenu.h - ** Purpose: Definition of signal handler routines - ** Author: Dan Johnson - ** History: 10/2/89, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef SIGMENU_H -#define SIGMENU_H - -/**---------------------------------------------------------------------------- - Include Files and Type Defines -----------------------------------------------------------------------------**/ -#include "cutil.h" -#include - -/* functions to be placed in the signal menu look like: */ -//typedef int (*SIG_MENU_FUNC)(...); -/* the value returned from a SIG_MENU_FUNC must be one of the following */ -#define SIG_RESUME 1 -#define SIG_MENU 0 - -/**---------------------------------------------------------------------------- - Public Function Prototypes -----------------------------------------------------------------------------**/ -void AddSignalMenuItem (int Signal, -int ItemNum, -const char ItemLabel[], int_void ItemFunc); -#endif diff --git a/classify/speckle.cpp b/classify/speckle.cpp index 471b4a39a..ddfe2f83c 100644 --- a/classify/speckle.cpp +++ b/classify/speckle.cpp @@ -19,38 +19,35 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "speckle.h" -#include "debug.h" + #include "blobs.h" +#include "ratngs.h" +#include "varable.h" /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ /* define control knobs for adjusting definition of speckle*/ -make_float_var (MaxLargeSpeckleSize, 0.30, MakeMaxLargeSpeckleSize, -16, 2, SetMaxLargeSpeckleSize, "Max Large Speckle Size ..."); +double_VAR(speckle_large_max_size, 0.30, "Max large speckle size"); -make_float_var (SmallSpecklePenalty, 10.0, MakeSmallSpecklePenalty, -16, 3, SetSmallSpecklePenalty, "Small Speckle Penalty ..."); +double_VAR(speckle_small_penalty, 10.0, "Small speckle penalty"); -make_float_var (LargeSpecklePenalty, 10.0, MakeLargeSpecklePenalty, -16, 4, SetLargeSpecklePenalty, "Large Speckle Penalty ..."); +double_VAR(speckle_large_penalty, 10.0, "Large speckle penalty"); -make_float_var (SmallSpeckleCertainty, -1.0, MakeSmallSpeckleCertainty, -16, 5, SetSmallSpeckleCertainty, -"Small Speckle Certainty ..."); +double_VAR(speckle_small_certainty, -1.0, "Small speckle certainty"); /**---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -LIST AddLargeSpeckleTo(LIST Choices) { +void AddLargeSpeckleTo(BLOB_CHOICE_LIST *Choices) { /* ** Parameters: ** Choices choices to add a speckle choice to ** Globals: - ** SmallSpecklePenalty rating for a small speckle - ** LargeSpecklePenalty rating penalty for a large speckle - ** SmallSpeckleCertainty certainty for a small speckle + ** speckle_small_penalty rating for a small speckle + ** speckle_large_penalty rating penalty for a large speckle + ** speckle_small_certainty certainty for a small speckle ** Operation: This routine adds a null choice to Choices with a ** rating equal to the worst rating in Choices plus a pad. ** The certainty of the new choice is the same as the @@ -60,43 +57,31 @@ LIST AddLargeSpeckleTo(LIST Choices) { ** Exceptions: none ** History: Mon Mar 11 11:08:11 1991, DSJ, Created. */ - LIST WorstChoice; - char empty_lengths[] = {0}; + assert(Choices != NULL); + BLOB_CHOICE *blob_choice; + BLOB_CHOICE_IT temp_it; + temp_it.set_to_list(Choices); - /* if there are no other choices, use the small speckle penalty plus - the large speckle penalty */ - if (Choices == NIL) - return (append_choice (NIL, "", empty_lengths, SmallSpecklePenalty + LargeSpecklePenalty, - SmallSpeckleCertainty, -1)); - - /* if there are other choices, add a null choice that is slightly worse - than the worst choice so far */ - WorstChoice = last (Choices); - return (append_choice (Choices, "", empty_lengths, - best_probability (WorstChoice) + LargeSpecklePenalty, - best_certainty (WorstChoice), -1)); + // If there are no other choices, use the small speckle penalty plus + // the large speckle penalty. + if (Choices->length() == 0) { + blob_choice = + new BLOB_CHOICE(0, speckle_small_certainty + speckle_large_penalty, + speckle_small_certainty, -1, NULL); + temp_it.add_to_end(blob_choice); + return; + } + // If there are other choices, add a null choice that is slightly worse + // than the worst choice so far. + temp_it.move_to_last(); + blob_choice = temp_it.data(); // pick the worst choice + temp_it.add_to_end( + new BLOB_CHOICE(0, blob_choice->rating() + speckle_large_penalty, + blob_choice->certainty(), -1, NULL)); } /* AddLargeSpeckleTo */ -/*---------------------------------------------------------------------------*/ -void InitSpeckleVars() { -/* - ** Parameters: none - ** Globals: none - ** Operation: Install the control variables needed for the speckle - ** filters. - ** Return: none - ** Exceptions: none - ** History: Mon Mar 11 12:04:59 1991, DSJ, Created. - */ - MakeMaxLargeSpeckleSize(); - MakeSmallSpecklePenalty(); - MakeLargeSpecklePenalty(); - MakeSmallSpeckleCertainty(); -} /* InitSpeckleVars */ - - /*---------------------------------------------------------------------------*/ BOOL8 LargeSpeckle(TBLOB *Blob, TEXTROW *Row) { /* @@ -111,15 +96,15 @@ BOOL8 LargeSpeckle(TBLOB *Blob, TEXTROW *Row) { ** Exceptions: none ** History: Mon Mar 11 10:06:49 1991, DSJ, Created. */ - FLOAT32 SpeckleSize; + double speckle_size; TPOINT TopLeft; TPOINT BottomRight; - SpeckleSize = RowHeight (Row) * MaxLargeSpeckleSize; + speckle_size = RowHeight (Row) * speckle_large_max_size; blob_bounding_box(Blob, &TopLeft, &BottomRight); - if (TopLeft.y - BottomRight.y < SpeckleSize && - BottomRight.x - TopLeft.x < SpeckleSize) + if (TopLeft.y - BottomRight.y < speckle_size && + BottomRight.x - TopLeft.x < speckle_size) return (TRUE); else return (FALSE); diff --git a/classify/speckle.h b/classify/speckle.h index 1f6de31a9..8f896587e 100644 --- a/classify/speckle.h +++ b/classify/speckle.h @@ -21,49 +21,23 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ + #include "baseline.h" -#include "choices.h" +#include "ratngs.h" /**---------------------------------------------------------------------------- Macros ----------------------------------------------------------------------------**/ /* macro for getting the height of a row of text */ -#define RowHeight(R) ((is_baseline_normalized ())? \ +#define RowHeight(R) ((classify_baseline_normalized)? \ (BASELINE_SCALE): \ ((R)->lineheight)) /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -LIST AddLargeSpeckleTo(LIST Choices); +void AddLargeSpeckleTo(BLOB_CHOICE_LIST *Choices); -void InitSpeckleVars(); +BOOL8 LargeSpeckle(TBLOB *Blob, TEXTROW *Row); -BOOL8 LargeSpeckle(TBLOB *Blob, TEXTROW *Row); - -/* -#if defined(__STDC__) || defined(__cplusplus) -# define _ARGS(s) s -#else -# define _ARGS(s) () -#endif*/ - -/* speckle.c -LIST AddLargeSpeckleTo - _ARGS((LIST Choices)); - -void InitSpeckleVars - _ARGS((void)); - -BOOL8 LargeSpeckle - _ARGS((BLOB *Blob, - TEXTROW *Row)); - -#undef _ARGS -*/ -/**---------------------------------------------------------------------------- - Global Data Definitions and Declarations -----------------------------------------------------------------------------**/ -extern float SmallSpecklePenalty; -extern float SmallSpeckleCertainty; #endif diff --git a/classify/xform2d.cpp b/classify/xform2d.cpp index 31f7dc45a..05ec12632 100644 --- a/classify/xform2d.cpp +++ b/classify/xform2d.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: xform2d.c - ** Purpose: Library routines for performing 2D point transformations - ** Author: Dan Johnson - ** History: Fri Sep 22 09:54:17 1989, DSJ, Created. + ** Filename: xform2d.c + ** Purpose: Library routines for performing 2D point transformations + ** Author: Dan Johnson + ** History: Fri Sep 22 09:54:17 1989, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -26,33 +26,33 @@ ----------------------------------------------------------------------------**/ void InitMatrix(MATRIX_2D *M) { - M->a = 1; - M->b = 0; - M->c = 0; - M->d = 1; - M->tx = 0; - M->ty = 0; + M->a = 1; + M->b = 0; + M->c = 0; + M->d = 1; + M->tx = 0; + M->ty = 0; } void CopyMatrix(MATRIX_2D *A, MATRIX_2D *B) { - B->a = A->a; - B->b = A->b; - B->c = A->c; - B->d = A->d; - B->tx = A->tx; - B->ty = A->ty; + B->a = A->a; + B->b = A->b; + B->c = A->c; + B->d = A->d; + B->tx = A->tx; + B->ty = A->ty; } void TranslateMatrix(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y) { - M->tx += M->a * X + M->c * Y; - M->ty += M->b * X + M->d * Y; + M->tx += M->a * X + M->c * Y; + M->ty += M->b * X + M->d * Y; } void ScaleMatrix(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y) { - M->a *= X; - M->b *= X; - M->c *= Y; - M->d *= Y; + M->a *= X; + M->b *= X; + M->c *= Y; + M->d *= Y; } void MirrorMatrixInX(MATRIX_2D *M) {ScaleMatrix(M, -1, 1);} @@ -60,49 +60,49 @@ void MirrorMatrixInY(MATRIX_2D *M) {ScaleMatrix(M, 1, -1);} void MirrorMatrixInXY(MATRIX_2D *M) {ScaleMatrix(M, -1, -1);} FLOAT32 MapX(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y) { - return M->a * (X) + (M)->c * (Y) + (M)->tx; + return M->a * (X) + (M)->c * (Y) + (M)->tx; } FLOAT32 MapY(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y) { - return M->b * X + M->d * Y + M->ty; + return M->b * X + M->d * Y + M->ty; } -void MapPoint(MATRIX_2D *M, FPOINT &A, FPOINT &B) { - B.x = MapX (M, A.x, A.y); - B.y = MapY (M, A.x, A.y); +void MapPoint(MATRIX_2D *M, const FPOINT &A, FPOINT* B) { + B->x = MapX(M, A.x, A.y); + B->y = MapY(M, A.x, A.y); } FLOAT32 MapDx(MATRIX_2D *M, FLOAT32 DX, FLOAT32 DY) { - return M->a * DX + M->c * DY; + return M->a * DX + M->c * DY; } FLOAT32 MapDy(MATRIX_2D *M, FLOAT32 DX, FLOAT32 DY) { - return M->b * DX + M->d * DY; + return M->b * DX + M->d * DY; } /*---------------------------------------------------------------------------*/ void RotateMatrix(MATRIX_2D_PTR Matrix, FLOAT32 Angle) { /* - ** Parameters: - ** Matrix transformation matrix to rotate - ** Angle angle to rotate matrix - ** Globals: none - ** Operation: - ** Rotate the coordinate system (as specified by Matrix) about - ** its origin by Angle radians. In matrix notation the - ** effect is as follows: + ** Parameters: + ** Matrix transformation matrix to rotate + ** Angle angle to rotate matrix + ** Globals: none + ** Operation: + ** Rotate the coordinate system (as specified by Matrix) about + ** its origin by Angle radians. In matrix notation the + ** effect is as follows: ** - ** Matrix = R X Matrix + ** Matrix = R X Matrix ** - ** where R is the following matrix + ** where R is the following matrix ** - ** cos Angle sin Angle 0 - ** -sin Angle cos Angle 0 - ** 0 0 1 - ** Return: none - ** Exceptions: none - ** History: 7/27/89, DSJ, Create. + ** cos Angle sin Angle 0 + ** -sin Angle cos Angle 0 + ** 0 0 1 + ** Return: none + ** Exceptions: none + ** History: 7/27/89, DSJ, Create. */ FLOAT32 Cos, Sin; FLOAT32 NewA, NewB; diff --git a/classify/xform2d.h b/classify/xform2d.h index 551cf247c..1cd3bb121 100644 --- a/classify/xform2d.h +++ b/classify/xform2d.h @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: xform2d.h - ** Purpose: Definitions for using 2D point transformation library - ** Author: Dan Johnson - ** History: Fri Sep 22 09:57:08 1989, DSJ, Created. + ** Filename: xform2d.h + ** Purpose: Definitions for using 2D point transformation library + ** Author: Dan Johnson + ** History: Fri Sep 22 09:57:08 1989, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -51,10 +51,10 @@ FLOAT32 MapX(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y); FLOAT32 MapY(MATRIX_2D *M, FLOAT32 X, FLOAT32 Y); -void MapPoint(MATRIX_2D *M, FPOINT &A, FPOINT &B); +void MapPoint(MATRIX_2D *M, const FPOINT &A, FPOINT* B); FLOAT32 MapDx(MATRIX_2D *M, FLOAT32 DX, FLOAT32 DY); FLOAT32 MapDy(MATRIX_2D M, FLOAT32 DX, FLOAT32 DY); - void RotateMatrix(MATRIX_2D_PTR Matrix, FLOAT32 Angle); + void RotateMatrix(MATRIX_2D_PTR Matrix, FLOAT32 Angle); #endif