mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-08 02:12:40 +08:00
Fixed issue 669
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@743 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
d71045fa3a
commit
c7cef53ee3
@ -29,20 +29,17 @@
|
||||
namespace tesseract {
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool UnicharAndFonts::Serialize(FILE* fp) {
|
||||
inT32 uni_id = unichar_id;
|
||||
if (fwrite(&uni_id, sizeof(uni_id), 1, fp) != 1) return false;
|
||||
bool UnicharAndFonts::Serialize(FILE* fp) const {
|
||||
if (fwrite(&unichar_id, sizeof(unichar_id), 1, fp) != 1) return false;
|
||||
if (!font_ids.Serialize(fp)) return false;
|
||||
return true;
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool UnicharAndFonts::DeSerialize(bool swap, FILE* fp) {
|
||||
inT32 uni_id;
|
||||
if (fread(&uni_id, sizeof(uni_id), 1, fp) != 1) return false;
|
||||
if (fread(&unichar_id, sizeof(unichar_id), 1, fp) != 1) return false;
|
||||
if (swap)
|
||||
ReverseN(&uni_id, sizeof(uni_id));
|
||||
unichar_id = uni_id;
|
||||
ReverseN(&unichar_id, sizeof(unichar_id));
|
||||
if (!font_ids.DeSerialize(swap, fp)) return false;
|
||||
return true;
|
||||
}
|
||||
@ -55,8 +52,9 @@ int UnicharAndFonts::SortByUnicharId(const void* v1, const void* v2) {
|
||||
}
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Shape::Serialize(FILE* fp) {
|
||||
if (fwrite(&unichars_sorted_, 1, 1, fp) != 1)
|
||||
bool Shape::Serialize(FILE* fp) const {
|
||||
uinT8 sorted = unichars_sorted_;
|
||||
if (fwrite(&sorted, sizeof(sorted), 1, fp) != 1)
|
||||
return false;
|
||||
if (!unichars_.SerializeClasses(fp)) return false;
|
||||
return true;
|
||||
@ -64,8 +62,10 @@ bool Shape::Serialize(FILE* fp) {
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool Shape::DeSerialize(bool swap, FILE* fp) {
|
||||
if (fread(&unichars_sorted_, 1, 1, fp) != 1)
|
||||
uinT8 sorted;
|
||||
if (fread(&sorted, sizeof(sorted), 1, fp) != 1)
|
||||
return false;
|
||||
unichars_sorted_ = sorted != 0;
|
||||
if (!unichars_.DeSerializeClasses(swap, fp)) return false;
|
||||
return true;
|
||||
}
|
||||
@ -197,16 +197,28 @@ STRING ShapeTable::DebugStr(int shape_id) const {
|
||||
const Shape& shape = GetShape(shape_id);
|
||||
STRING result;
|
||||
result.add_str_int("Shape", shape_id);
|
||||
if (shape.size() > 100) {
|
||||
result.add_str_int(" Num unichars=", shape.size());
|
||||
return result;
|
||||
}
|
||||
for (int c = 0; c < shape.size(); ++c) {
|
||||
result.add_str_int(" c_id=", shape[c].unichar_id);
|
||||
result += "=";
|
||||
result += unicharset_->id_to_unichar(shape[c].unichar_id);
|
||||
if (shape.size() < 10) {
|
||||
result.add_str_int(", ", shape[c].font_ids.size());
|
||||
result += " fonts =";
|
||||
for (int f = 0; f < shape[c].font_ids.size(); ++f) {
|
||||
int num_fonts = shape[c].font_ids.size();
|
||||
if (num_fonts > 10) {
|
||||
result.add_str_int(" ", shape[c].font_ids[0]);
|
||||
result.add_str_int(" ... ", shape[c].font_ids[num_fonts - 1]);
|
||||
} else {
|
||||
for (int f = 0; f < num_fonts; ++f) {
|
||||
result.add_str_int(" ", shape[c].font_ids[f]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -327,12 +339,12 @@ int ShapeTable::BuildFromShape(const Shape& shape,
|
||||
}
|
||||
|
||||
// Returns true if the shapes are already merged.
|
||||
bool ShapeTable::AlreadyMerged(int shape_id1, int shape_id2) {
|
||||
bool ShapeTable::AlreadyMerged(int shape_id1, int shape_id2) const {
|
||||
return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2);
|
||||
}
|
||||
|
||||
// Returns true if any shape contains multiple unichars.
|
||||
bool ShapeTable::AnyMultipleUnichars() {
|
||||
bool ShapeTable::AnyMultipleUnichars() const {
|
||||
int num_shapes = NumShapes();
|
||||
for (int s1 = 0; s1 < num_shapes; ++s1) {
|
||||
if (MasterDestinationIndex(s1) != s1) continue;
|
||||
@ -408,10 +420,6 @@ void ShapeTable::MergeShapes(int shape_id1, int shape_id2) {
|
||||
shape_table_[master_id2]->set_destination_index(master_id1);
|
||||
// Add all the shapes of master_id2 to master_id1.
|
||||
shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
|
||||
tprintf("Merged shape %d->%d, %d->%d, now with %d unichars: %s\n",
|
||||
shape_id1, master_id1, shape_id2, master_id2,
|
||||
shape_table_[master_id1]->size(),
|
||||
DebugStr(master_id1).string());
|
||||
}
|
||||
|
||||
// Returns the destination of this shape, (if merged), taking into account
|
||||
|
@ -41,7 +41,7 @@ struct UnicharAndFonts {
|
||||
}
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(FILE* fp);
|
||||
bool Serialize(FILE* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, FILE* fp);
|
||||
@ -49,8 +49,8 @@ struct UnicharAndFonts {
|
||||
// Sort function to sort a pair of UnicharAndFonts by unichar_id.
|
||||
static int SortByUnicharId(const void* v1, const void* v2);
|
||||
|
||||
GenericVector<int> font_ids;
|
||||
int unichar_id;
|
||||
GenericVector<inT32> font_ids;
|
||||
inT32 unichar_id;
|
||||
};
|
||||
|
||||
// A Shape is a collection of unichar-ids and a list of fonts associated with
|
||||
@ -64,7 +64,7 @@ class Shape {
|
||||
Shape() : destination_index_(-1) {}
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(FILE* fp);
|
||||
bool Serialize(FILE* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, FILE* fp);
|
||||
@ -188,9 +188,9 @@ class ShapeTable {
|
||||
int BuildFromShape(const Shape& shape, const ShapeTable& master_shapes);
|
||||
|
||||
// Returns true if the shapes are already merged.
|
||||
bool AlreadyMerged(int shape_id1, int shape_id2);
|
||||
bool AlreadyMerged(int shape_id1, int shape_id2) const;
|
||||
// Returns true if any shape contains multiple unichars.
|
||||
bool AnyMultipleUnichars();
|
||||
bool AnyMultipleUnichars() const;
|
||||
// Returns the maximum number of unichars over all shapes.
|
||||
int MaxNumUnichars() const;
|
||||
// Merges shapes with a common unichar over the [start, end) interval.
|
||||
|
Loading…
Reference in New Issue
Block a user