mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-08 02:12:40 +08:00
Fixed issue 669
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@743 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
d71045fa3a
commit
c7cef53ee3
@ -29,20 +29,17 @@
|
|||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
|
|
||||||
// Writes to the given file. Returns false in case of error.
|
// Writes to the given file. Returns false in case of error.
|
||||||
bool UnicharAndFonts::Serialize(FILE* fp) {
|
bool UnicharAndFonts::Serialize(FILE* fp) const {
|
||||||
inT32 uni_id = unichar_id;
|
if (fwrite(&unichar_id, sizeof(unichar_id), 1, fp) != 1) return false;
|
||||||
if (fwrite(&uni_id, sizeof(uni_id), 1, fp) != 1) return false;
|
|
||||||
if (!font_ids.Serialize(fp)) return false;
|
if (!font_ids.Serialize(fp)) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
// Reads from the given file. Returns false in case of error.
|
// Reads from the given file. Returns false in case of error.
|
||||||
// If swap is true, assumes a big/little-endian swap is needed.
|
// If swap is true, assumes a big/little-endian swap is needed.
|
||||||
bool UnicharAndFonts::DeSerialize(bool swap, FILE* fp) {
|
bool UnicharAndFonts::DeSerialize(bool swap, FILE* fp) {
|
||||||
inT32 uni_id;
|
if (fread(&unichar_id, sizeof(unichar_id), 1, fp) != 1) return false;
|
||||||
if (fread(&uni_id, sizeof(uni_id), 1, fp) != 1) return false;
|
|
||||||
if (swap)
|
if (swap)
|
||||||
ReverseN(&uni_id, sizeof(uni_id));
|
ReverseN(&unichar_id, sizeof(unichar_id));
|
||||||
unichar_id = uni_id;
|
|
||||||
if (!font_ids.DeSerialize(swap, fp)) return false;
|
if (!font_ids.DeSerialize(swap, fp)) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -55,8 +52,9 @@ int UnicharAndFonts::SortByUnicharId(const void* v1, const void* v2) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Writes to the given file. Returns false in case of error.
|
// Writes to the given file. Returns false in case of error.
|
||||||
bool Shape::Serialize(FILE* fp) {
|
bool Shape::Serialize(FILE* fp) const {
|
||||||
if (fwrite(&unichars_sorted_, 1, 1, fp) != 1)
|
uinT8 sorted = unichars_sorted_;
|
||||||
|
if (fwrite(&sorted, sizeof(sorted), 1, fp) != 1)
|
||||||
return false;
|
return false;
|
||||||
if (!unichars_.SerializeClasses(fp)) return false;
|
if (!unichars_.SerializeClasses(fp)) return false;
|
||||||
return true;
|
return true;
|
||||||
@ -64,8 +62,10 @@ bool Shape::Serialize(FILE* fp) {
|
|||||||
// Reads from the given file. Returns false in case of error.
|
// Reads from the given file. Returns false in case of error.
|
||||||
// If swap is true, assumes a big/little-endian swap is needed.
|
// If swap is true, assumes a big/little-endian swap is needed.
|
||||||
bool Shape::DeSerialize(bool swap, FILE* fp) {
|
bool Shape::DeSerialize(bool swap, FILE* fp) {
|
||||||
if (fread(&unichars_sorted_, 1, 1, fp) != 1)
|
uinT8 sorted;
|
||||||
|
if (fread(&sorted, sizeof(sorted), 1, fp) != 1)
|
||||||
return false;
|
return false;
|
||||||
|
unichars_sorted_ = sorted != 0;
|
||||||
if (!unichars_.DeSerializeClasses(swap, fp)) return false;
|
if (!unichars_.DeSerializeClasses(swap, fp)) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -197,14 +197,26 @@ STRING ShapeTable::DebugStr(int shape_id) const {
|
|||||||
const Shape& shape = GetShape(shape_id);
|
const Shape& shape = GetShape(shape_id);
|
||||||
STRING result;
|
STRING result;
|
||||||
result.add_str_int("Shape", shape_id);
|
result.add_str_int("Shape", shape_id);
|
||||||
|
if (shape.size() > 100) {
|
||||||
|
result.add_str_int(" Num unichars=", shape.size());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
for (int c = 0; c < shape.size(); ++c) {
|
for (int c = 0; c < shape.size(); ++c) {
|
||||||
result.add_str_int(" c_id=", shape[c].unichar_id);
|
result.add_str_int(" c_id=", shape[c].unichar_id);
|
||||||
result += "=";
|
result += "=";
|
||||||
result += unicharset_->id_to_unichar(shape[c].unichar_id);
|
result += unicharset_->id_to_unichar(shape[c].unichar_id);
|
||||||
result.add_str_int(", ", shape[c].font_ids.size());
|
if (shape.size() < 10) {
|
||||||
result += " fonts =";
|
result.add_str_int(", ", shape[c].font_ids.size());
|
||||||
for (int f = 0; f < shape[c].font_ids.size(); ++f) {
|
result += " fonts =";
|
||||||
result.add_str_int(" ", shape[c].font_ids[f]);
|
int num_fonts = shape[c].font_ids.size();
|
||||||
|
if (num_fonts > 10) {
|
||||||
|
result.add_str_int(" ", shape[c].font_ids[0]);
|
||||||
|
result.add_str_int(" ... ", shape[c].font_ids[num_fonts - 1]);
|
||||||
|
} else {
|
||||||
|
for (int f = 0; f < num_fonts; ++f) {
|
||||||
|
result.add_str_int(" ", shape[c].font_ids[f]);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
@ -327,12 +339,12 @@ int ShapeTable::BuildFromShape(const Shape& shape,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if the shapes are already merged.
|
// Returns true if the shapes are already merged.
|
||||||
bool ShapeTable::AlreadyMerged(int shape_id1, int shape_id2) {
|
bool ShapeTable::AlreadyMerged(int shape_id1, int shape_id2) const {
|
||||||
return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2);
|
return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if any shape contains multiple unichars.
|
// Returns true if any shape contains multiple unichars.
|
||||||
bool ShapeTable::AnyMultipleUnichars() {
|
bool ShapeTable::AnyMultipleUnichars() const {
|
||||||
int num_shapes = NumShapes();
|
int num_shapes = NumShapes();
|
||||||
for (int s1 = 0; s1 < num_shapes; ++s1) {
|
for (int s1 = 0; s1 < num_shapes; ++s1) {
|
||||||
if (MasterDestinationIndex(s1) != s1) continue;
|
if (MasterDestinationIndex(s1) != s1) continue;
|
||||||
@ -408,10 +420,6 @@ void ShapeTable::MergeShapes(int shape_id1, int shape_id2) {
|
|||||||
shape_table_[master_id2]->set_destination_index(master_id1);
|
shape_table_[master_id2]->set_destination_index(master_id1);
|
||||||
// Add all the shapes of master_id2 to master_id1.
|
// Add all the shapes of master_id2 to master_id1.
|
||||||
shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
|
shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
|
||||||
tprintf("Merged shape %d->%d, %d->%d, now with %d unichars: %s\n",
|
|
||||||
shape_id1, master_id1, shape_id2, master_id2,
|
|
||||||
shape_table_[master_id1]->size(),
|
|
||||||
DebugStr(master_id1).string());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the destination of this shape, (if merged), taking into account
|
// Returns the destination of this shape, (if merged), taking into account
|
||||||
|
@ -41,7 +41,7 @@ struct UnicharAndFonts {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Writes to the given file. Returns false in case of error.
|
// Writes to the given file. Returns false in case of error.
|
||||||
bool Serialize(FILE* fp);
|
bool Serialize(FILE* fp) const;
|
||||||
// Reads from the given file. Returns false in case of error.
|
// Reads from the given file. Returns false in case of error.
|
||||||
// If swap is true, assumes a big/little-endian swap is needed.
|
// If swap is true, assumes a big/little-endian swap is needed.
|
||||||
bool DeSerialize(bool swap, FILE* fp);
|
bool DeSerialize(bool swap, FILE* fp);
|
||||||
@ -49,8 +49,8 @@ struct UnicharAndFonts {
|
|||||||
// Sort function to sort a pair of UnicharAndFonts by unichar_id.
|
// Sort function to sort a pair of UnicharAndFonts by unichar_id.
|
||||||
static int SortByUnicharId(const void* v1, const void* v2);
|
static int SortByUnicharId(const void* v1, const void* v2);
|
||||||
|
|
||||||
GenericVector<int> font_ids;
|
GenericVector<inT32> font_ids;
|
||||||
int unichar_id;
|
inT32 unichar_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
// A Shape is a collection of unichar-ids and a list of fonts associated with
|
// A Shape is a collection of unichar-ids and a list of fonts associated with
|
||||||
@ -64,7 +64,7 @@ class Shape {
|
|||||||
Shape() : destination_index_(-1) {}
|
Shape() : destination_index_(-1) {}
|
||||||
|
|
||||||
// Writes to the given file. Returns false in case of error.
|
// Writes to the given file. Returns false in case of error.
|
||||||
bool Serialize(FILE* fp);
|
bool Serialize(FILE* fp) const;
|
||||||
// Reads from the given file. Returns false in case of error.
|
// Reads from the given file. Returns false in case of error.
|
||||||
// If swap is true, assumes a big/little-endian swap is needed.
|
// If swap is true, assumes a big/little-endian swap is needed.
|
||||||
bool DeSerialize(bool swap, FILE* fp);
|
bool DeSerialize(bool swap, FILE* fp);
|
||||||
@ -188,9 +188,9 @@ class ShapeTable {
|
|||||||
int BuildFromShape(const Shape& shape, const ShapeTable& master_shapes);
|
int BuildFromShape(const Shape& shape, const ShapeTable& master_shapes);
|
||||||
|
|
||||||
// Returns true if the shapes are already merged.
|
// Returns true if the shapes are already merged.
|
||||||
bool AlreadyMerged(int shape_id1, int shape_id2);
|
bool AlreadyMerged(int shape_id1, int shape_id2) const;
|
||||||
// Returns true if any shape contains multiple unichars.
|
// Returns true if any shape contains multiple unichars.
|
||||||
bool AnyMultipleUnichars();
|
bool AnyMultipleUnichars() const;
|
||||||
// Returns the maximum number of unichars over all shapes.
|
// Returns the maximum number of unichars over all shapes.
|
||||||
int MaxNumUnichars() const;
|
int MaxNumUnichars() const;
|
||||||
// Merges shapes with a common unichar over the [start, end) interval.
|
// Merges shapes with a common unichar over the [start, end) interval.
|
||||||
|
Loading…
Reference in New Issue
Block a user