mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-07 18:27:48 +08:00
2b7df59187
They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de>
153 lines
4.8 KiB
C++
153 lines
4.8 KiB
C++
|
|
#include <string>
|
|
#include <utility>
|
|
|
|
#include "tesseract/ccutil/serialis.h"
|
|
#include "tesseract/ccutil/unicharset.h"
|
|
#include "tesseract/classify/shapetable.h"
|
|
|
|
namespace {
|
|
|
|
using tesseract::Shape;
|
|
using tesseract::ShapeTable;
|
|
using tesseract::TFile;
|
|
using tesseract::UnicharAndFonts;
|
|
|
|
static string TmpNameToPath(const string& name) {
|
|
return file::JoinPath(FLAGS_test_tmpdir, name);
|
|
}
|
|
|
|
// Sets up a simple shape with some unichars.
|
|
static void Setup352(int font_id, Shape* shape) {
|
|
shape->AddToShape(3, font_id);
|
|
shape->AddToShape(5, font_id);
|
|
shape->AddToShape(2, font_id);
|
|
}
|
|
|
|
// Verifies some properties of the 352 shape.
|
|
static void Expect352(int font_id, const Shape& shape) {
|
|
EXPECT_EQ(3, shape.size());
|
|
EXPECT_TRUE(shape.ContainsUnichar(2));
|
|
EXPECT_TRUE(shape.ContainsUnichar(3));
|
|
EXPECT_TRUE(shape.ContainsUnichar(5));
|
|
EXPECT_FALSE(shape.ContainsUnichar(1));
|
|
EXPECT_TRUE(shape.ContainsUnicharAndFont(2, font_id));
|
|
EXPECT_FALSE(shape.ContainsUnicharAndFont(2, font_id - 1));
|
|
EXPECT_FALSE(shape.ContainsUnicharAndFont(font_id, 2));
|
|
// It should be a subset of itself.
|
|
EXPECT_TRUE(shape.IsSubsetOf(shape));
|
|
}
|
|
|
|
// The fixture for testing Shape.
|
|
class ShapeTest : public testing::Test {
|
|
};
|
|
|
|
// Tests that a Shape works as expected for all the basic functions.
|
|
TEST_F(ShapeTest, BasicTest) {
|
|
Shape shape1;
|
|
EXPECT_EQ(0, shape1.size());
|
|
Setup352(101, &shape1);
|
|
Expect352(101, shape1);
|
|
// It should still work after file I/O.
|
|
string filename = TmpNameToPath("shapefile");
|
|
FILE* fp = fopen(filename.c_str(), "wb");
|
|
EXPECT_TRUE(fp != NULL);
|
|
EXPECT_TRUE(shape1.Serialize(fp));
|
|
fclose(fp);
|
|
TFile tfp;
|
|
EXPECT_TRUE(tfp.Open(filename.c_str(), nullptr));
|
|
Shape shape2;
|
|
EXPECT_TRUE(shape2.DeSerialize(&tfp));
|
|
Expect352(101, shape2);
|
|
// They should be subsets of each other.
|
|
EXPECT_TRUE(shape1.IsSubsetOf(shape2));
|
|
EXPECT_TRUE(shape2.IsSubsetOf(shape1));
|
|
// They should be equal unichars.
|
|
EXPECT_TRUE(shape1.IsEqualUnichars(&shape2));
|
|
// and still pass afterwards.
|
|
Expect352(101, shape1);
|
|
Expect352(101, shape2);
|
|
}
|
|
|
|
// Tests AddShape separately, as it takes quite a bit of work.
|
|
TEST_F(ShapeTest, AddShapeTest) {
|
|
Shape shape1;
|
|
Setup352(101, &shape1);
|
|
Expect352(101, shape1);
|
|
// Now setup a different shape with different content.
|
|
Shape shape2;
|
|
shape2.AddToShape(3, 101); // Duplicates shape1.
|
|
shape2.AddToShape(5, 110); // Different font to shape1.
|
|
shape2.AddToShape(7, 101); // Different unichar to shape1.
|
|
// They should NOT be subsets of each other.
|
|
EXPECT_FALSE(shape1.IsSubsetOf(shape2));
|
|
EXPECT_FALSE(shape2.IsSubsetOf(shape1));
|
|
// Now add shape2 to shape1.
|
|
shape1.AddShape(shape2);
|
|
// Test subsets again.
|
|
EXPECT_FALSE(shape1.IsSubsetOf(shape2));
|
|
EXPECT_TRUE(shape2.IsSubsetOf(shape1));
|
|
EXPECT_EQ(4, shape1.size());
|
|
EXPECT_FALSE(shape1.ContainsUnichar(1));
|
|
EXPECT_TRUE(shape1.ContainsUnicharAndFont(5, 101));
|
|
EXPECT_TRUE(shape1.ContainsUnicharAndFont(5, 110));
|
|
EXPECT_FALSE(shape1.ContainsUnicharAndFont(3, 110));
|
|
EXPECT_FALSE(shape1.ContainsUnicharAndFont(7, 110));
|
|
EXPECT_FALSE(shape1.IsEqualUnichars(&shape2));
|
|
}
|
|
|
|
// The fixture for testing Shape.
|
|
class ShapeTableTest : public testing::Test {
|
|
};
|
|
|
|
// Tests that a Shape works as expected for all the basic functions.
|
|
TEST_F(ShapeTableTest, FullTest) {
|
|
Shape shape1;
|
|
Setup352(101, &shape1);
|
|
// Build a shape table with the same data, but in separate shapes.
|
|
UNICHARSET unicharset;
|
|
unicharset.unichar_insert(" ");
|
|
for (int i = 1; i <= 10; ++i) {
|
|
string class_str = StringPrintf("class%d", i);
|
|
unicharset.unichar_insert(class_str.c_str());
|
|
}
|
|
ShapeTable st(unicharset);
|
|
EXPECT_EQ(0, st.AddShape(3, 101));
|
|
EXPECT_EQ(1, st.AddShape(5, 101));
|
|
EXPECT_EQ(2, st.AddShape(2, 101));
|
|
EXPECT_EQ(3, st.NumShapes());
|
|
Expect352(101, shape1);
|
|
EXPECT_EQ(3, st.AddShape(shape1));
|
|
for (int i = 0; i < 3; ++i) {
|
|
EXPECT_FALSE(st.MutableShape(i)->IsEqualUnichars(&shape1));
|
|
}
|
|
EXPECT_TRUE(st.MutableShape(3)->IsEqualUnichars(&shape1));
|
|
EXPECT_TRUE(st.AnyMultipleUnichars());
|
|
st.DeleteShape(3);
|
|
EXPECT_FALSE(st.AnyMultipleUnichars());
|
|
|
|
// Now merge to make a single shape like shape1.
|
|
EXPECT_EQ(1, st.MasterUnicharCount(0));
|
|
st.MergeShapes(0, 1);
|
|
EXPECT_EQ(3, st.MergedUnicharCount(1, 2));
|
|
st.MergeShapes(1, 2);
|
|
for (int i = 0; i < 3; ++i) {
|
|
EXPECT_EQ(3, st.MasterUnicharCount(i));
|
|
// Master font count is the sum of all the font counts in the shape, not
|
|
// the actual number of different fonts in the shape.
|
|
EXPECT_EQ(3, st.MasterFontCount(i));
|
|
}
|
|
EXPECT_EQ(0, st.MasterDestinationIndex(1));
|
|
EXPECT_EQ(0, st.MasterDestinationIndex(2));
|
|
ShapeTable st2;
|
|
st2.AppendMasterShapes(st, NULL);
|
|
EXPECT_EQ(1, st.NumMasterShapes());
|
|
EXPECT_EQ(1, st2.NumShapes());
|
|
EXPECT_TRUE(st2.MutableShape(0)->IsEqualUnichars(&shape1));
|
|
EXPECT_TRUE(st2.AnyMultipleUnichars());
|
|
}
|
|
|
|
} // namespace
|
|
|
|
|