Fixed issue 243, ungraded helpers, genericvector

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@340 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith 2010-05-19 22:35:35 +00:00
parent 451d7ac523
commit 8d654e7476
5 changed files with 204 additions and 107 deletions

View File

@ -566,27 +566,6 @@ float compute_reject_threshold( //compute threshold //detailed results
}
/**********************************************************************
* sort_floats
*
* qsort function to sort 2 floats.
**********************************************************************/
int sort_floats( //qsort function
const void *arg1, //ptrs to floats
const void *arg2) {
float diff; //difference
diff = *((float *) arg1) - *((float *) arg2);
if (diff > 0)
return 1;
else if (diff < 0)
return -1;
else
return 0;
}
/*************************************************************************
* reject_edge_blobs()
*

View File

@ -24,6 +24,7 @@
#include "callback.h"
#include "errcode.h"
#include "helpers.h"
template <typename T>
class GenericVector {
@ -46,9 +47,6 @@ class GenericVector {
// Double the size of the internal array.
void double_the_size();
// Init the object, allocating size memory.
void init(int size);
// Return the size used.
int size() const {
return size_used_;
@ -117,8 +115,11 @@ class GenericVector {
// Read/Write the array to a file. This does _NOT_ read/write the callbacks.
// The Callback given must be permanent since they will be called more than
// once. The given callback will be deleted at the end.
void write(FILE* f, Callback2<FILE*, T const &>* cb);
void read(FILE* f, Callback3<FILE*, T*, bool>* cb, bool swap);
// If the callbacks are NULL, then the data is simply read/written using
// fread (and swapping)/fwrite.
// Returns false on error or if the callback returns false.
bool write(FILE* f, ResultCallback2<bool, FILE*, T const &>* cb) const;
bool read(FILE* f, ResultCallback3<bool, FILE*, T*, bool>* cb, bool swap);
// Allocates a new array of double the current_size, copies over the
// information from data to the new location, deletes data and returns
@ -133,12 +134,16 @@ class GenericVector {
}
protected:
// Init the object, allocating size memory.
void init(int size);
// We are assuming that the object generally placed in thie
// vector are small enough that for efficiency it makes sence
// to start with a larger initial size.
static const int kDefaultVectorSize = 4;
int size_used_;
int size_reserved_;
inT32 size_used_;
inT32 size_reserved_;
T* data_;
Callback1<T>* clear_cb_;
// Mutable because Run method is not const
@ -330,6 +335,7 @@ void GenericVector<T>::clear() {
for (int i = 0; i < size_used_; ++i)
clear_cb_->Run(data_[i]);
delete[] data_;
data_ = NULL;
size_used_ = 0;
size_reserved_ = 0;
}
@ -353,29 +359,50 @@ void GenericVector<T>::delete_data_pointers() {
template <typename T>
void GenericVector<T>::write(FILE* f, Callback2<FILE*, T const &>* cb) {
fwrite(&size_reserved_, sizeof(int), 1, f);
fwrite(&size_used_, sizeof(int), 1, f);
for (int i = 0; i < size_used_; ++i) {
cb->Run(f, data_[i]);
bool GenericVector<T>::write(
FILE* f, ResultCallback2<bool, FILE*, T const &>* cb) const {
if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false;
if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
if (cb != NULL) {
for (int i = 0; i < size_used_; ++i) {
if (!cb->Run(f, data_[i])) {
delete cb;
return false;
}
}
delete cb;
} else {
if (fwrite(data_, sizeof(T), size_used_, f) != size_used_) return false;
}
delete cb;
return true;
}
template <typename T>
void GenericVector<T>::read(FILE* f, Callback3<FILE*, T*, bool>* cb, bool swap) {
bool GenericVector<T>::read(FILE* f,
ResultCallback3<bool, FILE*, T*, bool>* cb,
bool swap) {
uinT32 reserved;
fread(&reserved, sizeof(int), 1, f);
if (swap)
reserved = reverse32(reserved);
if (fread(&reserved, sizeof(reserved), 1, f) != 1) return false;
if (swap) Reverse32(&reserved);
reserve(reserved);
fread(&size_used_, sizeof(int), 1, f);
if (swap)
size_used_ = reverse32(size_used_);
for (int i = 0; i < size_used_; ++i) {
cb->Run(f, data_ + i, swap);
if (fread(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
if (swap) Reverse32(&size_used_);
if (cb != NULL) {
for (int i = 0; i < size_used_; ++i) {
if (!cb->Run(f, data_ + i, swap)) {
delete cb;
return false;
}
}
delete cb;
} else {
if (fread(data_, sizeof(T), size_used_, f) != size_used_) return false;
if (swap) {
for (int i = 0; i < size_used_; ++i)
ReverseN(&data_[i], sizeof(T));
}
}
delete cb;
return true;
}
// This method clear the current object, then, does a shallow copy of

View File

@ -5,7 +5,7 @@
* Description: General utility functions
* Author: Daria Antonova
* Created: Wed Apr 8 14:37:00 2009
* Language: C
* Language: C++
* Package: N/A
* Status: Reusable Software Component
*
@ -25,11 +25,14 @@
#ifndef TESSERACT_CCUTIL_HELPERS_H_
#define TESSERACT_CCUTIL_HELPERS_H_
#include <stdio.h>
#include <string.h>
// Remove newline (if any) at the end of the string.
inline void chomp_string(char *string) {
int last_index = strlen(string) - 1;
if (string[last_index] == '\n') {
string[last_index] = '\0';
inline void chomp_string(char *str) {
int last_index = strlen(str) - 1;
if (str[last_index] == '\n') {
str[last_index] = '\0';
}
}
@ -38,4 +41,94 @@ inline void SkipNewline(FILE *file) {
if (fgetc(file) != '\n') fseek(file, -1, SEEK_CUR);
}
#endif
// qsort function to sort 2 floats.
inline int sort_floats(const void *arg1, const void *arg2) {
float diff = *((float *) arg1) - *((float *) arg2);
if (diff > 0) {
return 1;
} else if (diff < 0) {
return -1;
} else {
return 0;
}
}
// return the smallest multiple of block_size greater than or equal to n.
inline int RoundUp(int n, int block_size) {
return block_size * ((n + block_size - 1) / block_size);
}
// Clip a numeric value to the interval [lower_bound, upper_bound].
template<typename T>
inline T ClipToRange(const T& x, const T& lower_bound, const T& upper_bound) {
if (x < lower_bound)
return lower_bound;
if (x > upper_bound)
return upper_bound;
return x;
}
// Extend the range [lower_bound, upper_bound] to include x.
template<typename T1, typename T2>
inline void UpdateRange(const T1& x, T2* lower_bound, T2* upper_bound) {
if (x < *lower_bound)
*lower_bound = x;
if (x > *upper_bound)
*upper_bound = x;
}
// Decrease lower_bound to be <= x_lo AND increase upper_bound to be >= x_hi.
template<typename T1, typename T2>
inline void UpdateRange(const T1& x_lo, const T1& x_hi,
T2* lower_bound, T2* upper_bound) {
if (x_lo < *lower_bound)
*lower_bound = x_lo;
if (x_hi > *upper_bound)
*upper_bound = x_hi;
}
// Proper modulo arithmetic operator. Returns a mod b that works for -ve a.
// For any integer a and positive b, returns r : 0<=r<b and a=n*b + r for
// some integer n.
inline int Modulo(int a, int b) {
return (a % b + b) % b;
}
// Integer division operator with rounding that works for negative input.
// Returns a divided by b, rounded to the nearest integer, without double
// counting at 0. With simple rounding 1/3 = 0, 0/3 = 0 -1/3 = 0, -2/3 = 0,
// -3/3 = 0 and -4/3 = -1.
// I want 1/3 = 0, 0/3 = 0, -1/3 = 0, -2/3 = -1, -3/3 = -1 and -4/3 = -1.
// Results with b negative are not defined.
inline int DivRounded(int a, int b) {
return a >= 0 ? (a + b / 2) / b : (a - b / 2) / b;
}
// Reverse the order of bytes in a n byte quantity for big/little-endian switch.
inline void ReverseN(void* ptr, int num_bytes) {
char *cptr = reinterpret_cast<char *>(ptr);
int halfsize = num_bytes / 2;
for (int i = 0; i < halfsize; ++i) {
char tmp = cptr[i];
cptr[i] = cptr[num_bytes - 1 - i];
cptr[num_bytes - 1 - i] = tmp;
}
}
// Reverse the order of bytes in a 16 bit quantity for big/little-endian switch.
inline void Reverse16(void *ptr) {
ReverseN(ptr, 2);
}
// Reverse the order of bytes in a 32 bit quantity for big/little-endian switch.
inline void Reverse32(void *ptr) {
ReverseN(ptr, 4);
}
// Reverse the order of bytes in a 64 bit quantity for big/little-endian switch.
inline void Reverse64(void* ptr) {
ReverseN(ptr, 8);
}
#endif // TESSERACT_CCUTIL_HELPERS_H_

View File

@ -81,9 +81,10 @@ class UnicityTable {
// Read/Write the table to a file. This does _NOT_ read/write the callbacks.
// The Callback given must be permanent since they will be called more than
// once. The given callback will be deleted at the end.
void write(FILE* f, Callback2<FILE*, T const &>* cb);
// Returns false on read/write error.
bool write(FILE* f, ResultCallback2<bool, FILE*, T const &>* cb);
// swap is used to switch the endianness.
void read(FILE* f, Callback3<FILE*, T*, bool>* cb, bool swap);
bool read(FILE* f, ResultCallback3<bool, FILE*, T*, bool>* cb, bool swap);
private:
GenericVector<T> table_;
@ -179,13 +180,15 @@ void UnicityTable<T>::clear() {
}
template <typename T>
void UnicityTable<T>::write(FILE* f, Callback2<FILE*, T const &>* cb) {
table_.write(f, cb);
bool UnicityTable<T>::write(FILE* f,
ResultCallback2<bool, FILE*, T const &>* cb) {
return table_.write(f, cb);
}
template <typename T>
void UnicityTable<T>::read(FILE* f, Callback3<FILE*, T*, bool>* cb, bool swap) {
table_.read(f, cb, swap);
bool UnicityTable<T>::read(
FILE* f, ResultCallback3<bool, FILE*, T*, bool>* cb, bool swap) {
return table_.read(f, cb, swap);
}
// This method clear the current object, then, does a shallow copy of

View File

@ -18,6 +18,7 @@
/**----------------------------------------------------------------------------
Include Files and Type Defines
----------------------------------------------------------------------------**/
#include "helpers.h"
#include "intproto.h"
#include "picofeat.h"
#include "mfoutline.h"
@ -432,14 +433,8 @@ int BucketFor(FLOAT32 Param, FLOAT32 Offset, int NumBuckets) {
** Exceptions: none
** History: Thu Feb 14 13:24:33 1991, DSJ, Created.
*/
int Bucket;
Bucket = static_cast<int>(MapParam(Param, Offset, NumBuckets));
if (Bucket < 0)
Bucket = 0;
else if (Bucket >= NumBuckets)
Bucket = NumBuckets - 1;
return (Bucket);
return ClipToRange(static_cast<int>(MapParam(Param, Offset, NumBuckets)),
0, NumBuckets - 1);
} /* BucketFor */
@ -786,45 +781,51 @@ void free_int_templates(INT_TEMPLATES templates) {
/*---------------------------------------------------------------------------*/
// Code to read/write Classify::font*table structures.
namespace {
void read_info(FILE* f, FontInfo* fi, bool swap) {
bool read_info(FILE* f, FontInfo* fi, bool swap) {
inT32 size;
fread(&size, sizeof(inT32), 1, f);
if (fread(&size, sizeof(size), 1, f) != 1) return false;
if (swap)
reverse32(&size);
fi->name = new char[size + 1];
fread(fi->name, sizeof(char), size, f);
fi->name[size] = '\0';
fread(&fi->properties, sizeof(fi->properties), 1, f);
Reverse32(&size);
char* font_name = new char[size + 1];
fi->name = font_name;
if (fread(font_name, sizeof(*font_name), size, f) != size) return false;
font_name[size] = '\0';
if (fread(&fi->properties, sizeof(fi->properties), 1, f) != 1) return false;
if (swap)
reverse32(&fi->properties);
Reverse32(&fi->properties);
return true;
}
void write_info(FILE* f, const FontInfo& fi) {
bool write_info(FILE* f, const FontInfo& fi) {
inT32 size = strlen(fi.name);
fwrite(&size, sizeof(inT32), 1, f);
fwrite(fi.name, sizeof(char), size, f);
fwrite(&fi.properties, sizeof(inT32), 1, f);
if (fwrite(&size, sizeof(size), 1, f) != 1) return false;
if (fwrite(fi.name, sizeof(*fi.name), size, f) != size) return false;
if (fwrite(&fi.properties, sizeof(fi.properties), 1, f) != 1) return false;
return true;
}
void read_set(FILE* f, FontSet* fs, bool swap) {
fread(&fs->size, sizeof(inT32), 1, f);
bool read_set(FILE* f, FontSet* fs, bool swap) {
if (fread(&fs->size, sizeof(fs->size), 1, f) != 1) return false;
if (swap)
reverse32(&fs->size);
Reverse32(&fs->size);
fs->configs = new int[fs->size];
for (int i = 0; i < fs->size; ++i) {
fread(&fs->configs[i], sizeof(inT32), 1, f);
if (fread(&fs->configs[i], sizeof(fs->configs[i]), 1, f) != 1) return false;
if (swap)
reverse32(&fs->configs[i]);
Reverse32(&fs->configs[i]);
}
return true;
}
void write_set(FILE* f, const FontSet& fs) {
fwrite(&fs.size, sizeof(inT32), 1, f);
bool write_set(FILE* f, const FontSet& fs) {
if (fwrite(&fs.size, sizeof(fs.size), 1, f) != 1) return false;
for (int i = 0; i < fs.size; ++i) {
fwrite(&fs.configs[i], sizeof(inT32), 1, f);
if (fwrite(&fs.configs[i], sizeof(fs.configs[i]), 1, f) != 1) return false;
}
return true;
}
}
} // namespace.
namespace tesseract {
INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
@ -877,9 +878,9 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
swap = Templates->NumClassPruners < 0 ||
Templates->NumClassPruners > MAX_NUM_CLASS_PRUNERS;
if (swap) {
reverse32(&Templates->NumClassPruners);
reverse32(&Templates->NumClasses);
reverse32(&unicharset_size);
Reverse32(&Templates->NumClassPruners);
Reverse32(&Templates->NumClasses);
Reverse32(&unicharset_size);
}
if (Templates->NumClasses < 0) {
// This file has a version id!
@ -888,7 +889,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
1, File) != 1)
cprintf("Bad read of inttemp!\n");
if (swap)
reverse32(&Templates->NumClasses);
Reverse32(&Templates->NumClasses);
}
if (version_id < 3) {
@ -907,9 +908,9 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
}
if (swap) {
for (i = 0; i < Templates->NumClasses; i++)
reverse16(IndexFor[i]);
Reverse16(&IndexFor[i]);
for (i = 0; i < Templates->NumClasses; i++)
reverse32(ClassIdFor[i]);
Reverse32(&ClassIdFor[i]);
}
}
@ -925,7 +926,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
for (y = 0; y < NUM_CP_BUCKETS; y++) {
for (z = 0; z < NUM_CP_BUCKETS; z++) {
for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
reverse32(&Pruner[x][y][z][w]);
Reverse32(&Pruner[x][y][z][w]);
}
}
}
@ -1015,9 +1016,9 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
cprintf ("Bad read of inttemp!\n");
}
if (swap) {
reverse16 (&Class->NumProtos);
Reverse16(&Class->NumProtos);
for (j = 0; j < MaxNumConfigs; j++)
reverse16 (&Class->ConfigLengths[j]);
Reverse16(&Class->ConfigLengths[j]);
}
} else {
ASSERT_HOST(Class->NumConfigs < MaxNumConfigs);
@ -1026,9 +1027,9 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
cprintf ("Bad read of inttemp!\n");
}
if (swap) {
reverse16 (&Class->NumProtos);
Reverse16(&Class->NumProtos);
for (j = 0; j < MaxNumConfigs; j++)
reverse16 (&Class->ConfigLengths[j]);
Reverse16(&Class->ConfigLengths[j]);
}
}
if (version_id < 2) {
@ -1081,10 +1082,10 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
for (x = 0; x < NUM_PP_PARAMS; x++)
for (y = 0; y < NUM_PP_BUCKETS; y++)
for (z = 0; z < WERDS_PER_PP_VECTOR; z++)
reverse32 (&ProtoSet->ProtoPruner[x][y][z]);
Reverse32(&ProtoSet->ProtoPruner[x][y][z]);
for (x = 0; x < PROTOS_PER_PROTO_SET; x++)
for (y = 0; y < WerdsPerConfigVec; y++)
reverse32 (&ProtoSet->Protos[x].Configs[y]);
Reverse32(&ProtoSet->Protos[x].Configs[y]);
}
Class->ProtoSets[j] = ProtoSet;
}
@ -1093,7 +1094,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
else {
fread(&Class->font_set_id, sizeof(int), 1, File);
if (swap)
reverse32(&Class->font_set_id);
Reverse32(&Class->font_set_id);
}
}
@ -1905,17 +1906,11 @@ void RenderIntProto(void *window,
Xmax = Ymax = 0;
for (Bucket = 0; Bucket < NUM_PP_BUCKETS; Bucket++) {
if (ProtoMask & ProtoSet->ProtoPruner[PRUNER_X][Bucket][ProtoWordIndex]) {
if (Bucket < Xmin)
Xmin = Bucket;
else if (Bucket > Xmax)
Xmax = Bucket;
UpdateRange(Bucket, &Xmin, &Xmax);
}
if (ProtoMask & ProtoSet->ProtoPruner[PRUNER_Y][Bucket][ProtoWordIndex]) {
if (Bucket < Ymin)
Ymin = Bucket;
else if (Bucket > Ymax)
Ymax = Bucket;
UpdateRange(Bucket, &Ymin, &Ymax);
}
}
X = (Xmin + Xmax + 1) / 2.0 * PROTO_PRUNER_SCALE - DISPLAY_OFFSET;