tesseract/ccmain/charsample.h

215 lines
4.6 KiB
C
Raw Normal View History

/**********************************************************************
* File: charsample.h (Formerly charsample.h)
* Description: Class to contain character samples and match scores
* to be used for adaption
* Author: Chris Newton
* Created: Thu Oct 7 13:40:37 BST 1993
*
* (C) Copyright 1993, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef CHARSAMPLE_H
#define CHARSAMPLE_H
#include "elst.h"
#include "pageres.h"
#include "memry.h"
#include "notdll.h"
#define BAD_SCORE MAX_INT32
#define FIRST_CHAR '!'
#define LAST_CHAR '~'
namespace tesseract {
class Tesseract; // Fwd decl.
}
enum ClusterType
{ UNKNOWN, BLOB_CLUSTER, IMAGE_CLUSTER };
class CHAR_SAMPLE; //forward decl
ELISTIZEH (CHAR_SAMPLE)
class CHAR_SAMPLES; //forward decl
ELISTIZEH (CHAR_SAMPLES)
class CHAR_PROTO; //forward decl
class CHAR_SAMPLE:public ELIST_LINK
{
public:
CHAR_SAMPLE(); // empty constructor
CHAR_SAMPLE( // simple constructor
PBLOB *blob,
DENORM *denorm,
char c
);
CHAR_SAMPLE( // simple constructor
IMAGE *image,
char c
);
~CHAR_SAMPLE () {
// We own the image, so it has to be deleted.
if (sample_image != NULL)
delete sample_image;
}
float match_sample(CHAR_SAMPLE *test_sample, BOOL8 updating,
tesseract::Tesseract* tess);
inT32 n_matches() {
return n_samples_matched;
}
IMAGE *image() {
return sample_image;
}
PBLOB *blob() {
return sample_blob;
}
DENORM *denorm() {
return sample_denorm;
}
double mean_score();
double variance();
char character() {
return ch;
}
void print(FILE *f);
void reset_match_statistics();
NEWDELETE2 (CHAR_SAMPLE) private:
IMAGE * sample_image;
PBLOB *sample_blob;
DENORM *sample_denorm;
inT32 n_samples_matched;
double total_match_scores;
double sumsq_match_scores;
char ch;
};
class CHAR_SAMPLES:public ELIST_LINK
{
public:
CHAR_SAMPLES(); //empty constructor
CHAR_SAMPLES(CHAR_SAMPLE *sample);
~CHAR_SAMPLES () { //destructor
}
inT32 n_samples() {
return samples.length ();
}
void add_sample(CHAR_SAMPLE *sample, tesseract::Tesseract*);
void build_prototype();
void rebuild_prototype(inT32 new_xsize, inT32 new_ysize);
void add_sample_to_prototype(CHAR_SAMPLE *sample);
CHAR_PROTO *prototype() {
return proto;
}
void find_best_sample();
float match_score(CHAR_SAMPLE *sample, tesseract::Tesseract* tess);
float nn_match_score(CHAR_SAMPLE *sample, tesseract::Tesseract* tess);
char character() {
return ch;
}
void assign_to_char();
void print(FILE *f);
NEWDELETE2 (CHAR_SAMPLES) private:
ClusterType type;
char ch;
CHAR_PROTO *proto;
CHAR_SAMPLE *best_sample;
CHAR_SAMPLE_LIST samples;
};
class CHAR_PROTO
{
public:
CHAR_PROTO(); // empty constructor
CHAR_PROTO(inT32 x_size,
inT32 y_size,
inT32 n_samples,
float initial_value,
char c);
CHAR_PROTO( // simple constructor
CHAR_SAMPLE *sample);
~CHAR_PROTO ();
float match_sample(CHAR_SAMPLE *test_sample);
float match(CHAR_PROTO *test_proto);
inT32 n_samples() {
return nsamples;
}
inT32 x_size() {
return xsize;
}
inT32 y_size() {
return ysize;
}
float **data() {
return proto;
}
char character() {
return ch;
}
void enlarge_prototype(inT32 new_xsize, inT32 new_ysize);
void add_sample(CHAR_SAMPLE *sample);
IMAGE *make_image();
void print(FILE *f);
NEWDELETE2 (CHAR_PROTO) private:
inT32 xsize;
inT32 ysize;
float *proto_data;
float **proto;
inT32 nsamples;
char ch;
};
#endif