mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 02:59:07 +08:00
Fix and enable dawg_test
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
aec992ebf8
commit
05cdbc7c9c
@ -1,6 +1,3 @@
|
||||
# Absolute path of directory 'src'.
|
||||
TESS_SRC_DIR=$(shell cd $(top_srcdir) && pwd)/src
|
||||
|
||||
# Absolute path of directory 'langdata'.
|
||||
LANGDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/langdata_lstm
|
||||
|
||||
@ -15,7 +12,7 @@ TESTING_DIR=$(shell cd $(top_srcdir) && pwd)/test/testing
|
||||
# (using submodule test).
|
||||
TESTDATA_DIR=$(shell cd $(top_srcdir) && pwd)/test/testdata
|
||||
|
||||
AM_CPPFLAGS += -DTESS_SRC_DIR="\"$(TESS_SRC_DIR)\""
|
||||
AM_CPPFLAGS += -DTESSBIN_DIR="\"$(abs_top_builddir)\""
|
||||
AM_CPPFLAGS += -DLANGDATA_DIR="\"$(LANGDATA_DIR)\""
|
||||
AM_CPPFLAGS += -DTESSDATA_DIR="\"$(TESSDATA_DIR)\""
|
||||
AM_CPPFLAGS += -DTESTING_DIR="\"$(TESTING_DIR)\""
|
||||
@ -107,6 +104,7 @@ check_PROGRAMS = \
|
||||
bitvector_test \
|
||||
cleanapi_test \
|
||||
colpartition_test \
|
||||
dawg_test \
|
||||
denorm_test \
|
||||
fileio_test \
|
||||
heap_test \
|
||||
|
@ -1,4 +1,6 @@
|
||||
|
||||
#include <cstdlib> // for system
|
||||
#include <fstream> // for ifstream
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -8,55 +10,42 @@
|
||||
#include "trie.h"
|
||||
|
||||
#include "include_gunit.h"
|
||||
#include "base/filelinereader.h"
|
||||
#include "util/process/subprocess.h"
|
||||
|
||||
namespace {
|
||||
|
||||
void RemoveTrailingLineTerminators(char* line) {
|
||||
char* end = line + strlen(line) - 1;
|
||||
while (end >= line && ('\n' == *end || '\r' == *end)) {
|
||||
*end-- = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void AddLineToSet(std::set<std::string>* words, char* line) {
|
||||
RemoveTrailingLineTerminators(line);
|
||||
words->insert(line);
|
||||
}
|
||||
|
||||
// Test some basic functionality dealing with Dawgs (compressed dictionaries,
|
||||
// aka Directed Acyclic Word Graphs).
|
||||
class DawgTest : public testing::Test {
|
||||
protected:
|
||||
void LoadWordlist(const std::string& filename, std::set<std::string>* words) const {
|
||||
FileLineReader::Options options;
|
||||
options.set_comment_char(0);
|
||||
FileLineReader flr(filename.c_str(), options);
|
||||
flr.set_line_callback(NewPermanentCallback(AddLineToSet, words));
|
||||
flr.Reload();
|
||||
std::ifstream file(filename);
|
||||
if (file.is_open()) {
|
||||
std::string line;
|
||||
while (getline(file, line)) {
|
||||
// Remove trailing line terminators from line.
|
||||
while (!line.empty() && (line.back() == '\n' || line.back() == '\r')) {
|
||||
line.resize(line.size() - 1);
|
||||
}
|
||||
// Add line to set.
|
||||
words->insert(line.c_str());
|
||||
}
|
||||
file.close();
|
||||
}
|
||||
}
|
||||
std::string TestDataNameToPath(const std::string& name) const {
|
||||
return file::JoinPath(TESTDATA_DIR, "/" + name);
|
||||
return file::JoinPath(TESTDATA_DIR, name);
|
||||
}
|
||||
std::string TessBinaryPath(const std::string& binary_name) const {
|
||||
return file::JoinPath(TESS_SRC_DIR,
|
||||
std::string TessBinaryPath(const std::string& name) const {
|
||||
return file::JoinPath(TESSBIN_DIR, "src/training/" + name);
|
||||
}
|
||||
std::string OutputNameToPath(const std::string& name) const {
|
||||
return file::JoinPath(FLAGS_test_tmpdir, name);
|
||||
}
|
||||
int RunCommand(const std::string& program, const std::string& arg1, const std::string& arg2,
|
||||
const std::string& arg3) const {
|
||||
SubProcess p;
|
||||
std::vector<std::string> argv;
|
||||
argv.push_back(program);
|
||||
argv.push_back(arg1);
|
||||
argv.push_back(arg2);
|
||||
argv.push_back(arg3);
|
||||
p.SetProgram(TessBinaryPath(program), argv);
|
||||
p.Start();
|
||||
p.Wait();
|
||||
return p.exit_code();
|
||||
int RunCommand(const std::string& program, const std::string& arg1,
|
||||
const std::string& arg2, const std::string& arg3) const {
|
||||
std::string cmdline =
|
||||
TessBinaryPath(program) + " " + arg1 + " " + arg2 + " " + arg3;
|
||||
return system(cmdline.c_str());
|
||||
}
|
||||
// Test that we are able to convert a wordlist file (one "word" per line) to
|
||||
// a dawg (a compressed format) and then extract the original wordlist back
|
||||
|
Loading…
Reference in New Issue
Block a user