tesseract/classify/blobclass.cpp
theraysmith 694d3f2c20 Changes to classify for 3.00
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@291 d0cd1f9f-072b-0410-8dd7-cf729c803f20
2009-07-11 02:17:36 +00:00

117 lines
4.4 KiB
C++

/******************************************************************************
** Filename: blobclass.c
** Purpose: High level blob classification and training routines.
** Author: Dan Johnson
** History: 7/21/89, DSJ, Created.
**
** (c) Copyright Hewlett-Packard Company, 1988.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
******************************************************************************/
/**----------------------------------------------------------------------------
Include Files and Type Defines
----------------------------------------------------------------------------**/
#include "blobclass.h"
#include "fxdefs.h"
#include "extract.h"
#include "efio.h"
#include "callcpp.h"
#include "chartoname.h"
#include <math.h>
#include <stdio.h>
#include <signal.h>
#define MAXFILENAME 80
#define MAXMATCHES 10
STRING_VAR(classify_font_name, "UnknownFont",
"Default font name to be used in training");
/**----------------------------------------------------------------------------
Global Data Definitions and Declarations
----------------------------------------------------------------------------**/
/* name of current image file being processed */
extern char imagefile[];
/**----------------------------------------------------------------------------
Public Code
----------------------------------------------------------------------------**/
/*---------------------------------------------------------------------------*/
void
LearnBlob (const STRING& filename,
TBLOB * Blob, TEXTROW * Row, char BlobText[])
/*
** Parameters:
** Blob blob whose micro-features are to be learned
** Row row of text that blob came from
** BlobText text that corresponds to blob
** TextLength number of characters in blob
** Globals:
** imagefile base filename of the page being learned
** classify_font_name
** name of font currently being trained on
** Operation:
** Extract micro-features from the specified blob and append
** them to the appropriate file.
** Return: none
** Exceptions: none
** History: 7/28/89, DSJ, Created.
*/
#define TRAIN_SUFFIX ".tr"
{
static FILE *FeatureFile = NULL;
STRING Filename(filename);
CHAR_DESC CharDesc;
LINE_STATS LineStats;
EnterLearnMode;
GetLineStatsFromRow(Row, &LineStats);
CharDesc = ExtractBlobFeatures (Blob, &LineStats);
if (CharDesc == NULL) {
cprintf("LearnBLob: CharDesc was NULL. Aborting.\n");
return;
}
// If no fontname was set, try to extract it from the filename
char CurrFontName[32] = "";
strncpy(CurrFontName, static_cast<STRING>(classify_font_name).string(), 32);
if (!strcmp(CurrFontName, "UnknownFont")) {
// filename is expected to be of the form [lang].[fontname].exp[num]
// The [lang], [fontname] and [num] fields should not have '.' characters.
const char *basename = strrchr(filename.string(), '/');
const char *firstdot = strchr(basename, '.');
const char *lastdot = strrchr(filename.string(), '.');
if (firstdot != lastdot && firstdot != NULL && lastdot != NULL) {
strncpy(CurrFontName, firstdot + 1, lastdot - firstdot - 1);
}
}
// if a feature file is not yet open, open it
// the name of the file is the name of the image plus TRAIN_SUFFIX
if (FeatureFile == NULL) {
Filename += TRAIN_SUFFIX;
FeatureFile = Efopen (Filename.string(), "w");
cprintf ("TRAINING ... Font name = %s\n", CurrFontName);
}
// label the features with a class name and font name
fprintf (FeatureFile, "\n%s %s ", CurrFontName, BlobText);
// write micro-features to file and clean up
WriteCharDescription(FeatureFile, CharDesc);
FreeCharDescription(CharDesc);
} // LearnBlob