tesseract/training/commontraining.h
theraysmith 16c84ed6ec More Changes to training for 3.00
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@306 d0cd1f9f-072b-0410-8dd7-cf729c803f20
2009-07-11 02:51:09 +00:00

155 lines
4.2 KiB
C

// Copyright 2008 Google Inc. All Rights Reserved.
// Author: scharron@google.com (Samuel Charron)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_TRAINING_COMMONTRAINING_H__
#define TESSERACT_TRAINING_COMMONTRAINING_H__
#include "oldlist.h"
#include "cluster.h"
#include "intproto.h"
//////////////////////////////////////////////////////////////////////////////
// Macros ////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
#define MAXNAMESIZE 80
#define MINSD_ANGLE (1.0f / 64.0f)
//////////////////////////////////////////////////////////////////////////////
// Globals ///////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
extern BOOL8 ShowSignificantProtos;
extern BOOL8 ShowInsignificantProtos;
extern BOOL8 ShowAllSamples;
// Must be defined in the file that "implements" commonTraining facilities.
extern CLUSTERCONFIG Config;
extern FLOAT32 RoundingAccuracy;
extern char CTFontName[MAXNAMESIZE];
// globals used for parsing command line arguments
extern char *Directory;
extern const char* test_ch;
extern const char *InputUnicharsetFile;
extern const char *OutputUnicharsetFile;
extern const char *InputFontInfoFile;
// The unicharset used during training
extern UNICHARSET unicharset_training;
//////////////////////////////////////////////////////////////////////////////
// Structs ///////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
typedef struct
{
char *Label;
int SampleCount;
LIST List;
}
LABELEDLISTNODE, *LABELEDLIST;
typedef struct
{
char* Label;
int NumMerged[MAX_NUM_PROTOS];
CLASS_TYPE Class;
}MERGE_CLASS_NODE;
typedef MERGE_CLASS_NODE* MERGE_CLASS;
//////////////////////////////////////////////////////////////////////////////
// Functions /////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
void ParseArguments(
int argc,
char **argv);
char *GetNextFilename(int Argc, char** argv);
LABELEDLIST FindList(
LIST List,
char *Label);
LABELEDLIST NewLabeledList(
const char *Label);
void WriteTrainingSamples(
char *Directory,
LIST CharList,
const char *program_feature_type);
void FreeTrainingSamples(
LIST CharList);
void FreeLabeledList(
LABELEDLIST LabeledList);
void FreeLabeledClassList(
LIST ClassListList);
CLUSTERER *SetUpForClustering(
LABELEDLIST CharSample,
const char *program_feature_type);
LIST RemoveInsignificantProtos(
LIST ProtoList,
BOOL8 KeepSigProtos,
BOOL8 KeepInsigProtos,
int N);
void CleanUpUnusedData(
LIST ProtoList);
void MergeInsignificantProtos(
LIST ProtoList,
const char *label,
CLUSTERER *Clusterer,
CLUSTERCONFIG *Config);
MERGE_CLASS FindClass(
LIST List,
char *Label);
MERGE_CLASS NewLabeledClass(
char *Label);
void FreeTrainingSamples(
LIST CharList);
void SetUpForFloat2Int(
LIST LabeledClassList);
void Normalize(
float *Values);
void FreeNormProtoList(
LIST CharList);
void AddToNormProtosList(
LIST* NormProtoList,
LIST ProtoList,
char *CharName);
int NumberOfProtos(
LIST ProtoList,
BOOL8 CountSigProtos,
BOOL8 CountInsigProtos);
void allocNormProtos();
#endif // TESSERACT_TRAINING_COMMONTRAINING_H__