From 05cdbc7c9c49e067de400609e9f776d52118bc22 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 21 Jan 2019 11:47:10 +0100 Subject: [PATCH] Fix and enable dawg_test Signed-off-by: Stefan Weil --- unittest/Makefile.am | 6 ++--- unittest/dawg_test.cc | 57 +++++++++++++++++-------------------------- 2 files changed, 25 insertions(+), 38 deletions(-) diff --git a/unittest/Makefile.am b/unittest/Makefile.am index 127684d6..c467832b 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -1,6 +1,3 @@ -# Absolute path of directory 'src'. -TESS_SRC_DIR=$(shell cd $(top_srcdir) && pwd)/src - # Absolute path of directory 'langdata'. LANGDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/langdata_lstm @@ -15,7 +12,7 @@ TESTING_DIR=$(shell cd $(top_srcdir) && pwd)/test/testing # (using submodule test). TESTDATA_DIR=$(shell cd $(top_srcdir) && pwd)/test/testdata -AM_CPPFLAGS += -DTESS_SRC_DIR="\"$(TESS_SRC_DIR)\"" +AM_CPPFLAGS += -DTESSBIN_DIR="\"$(abs_top_builddir)\"" AM_CPPFLAGS += -DLANGDATA_DIR="\"$(LANGDATA_DIR)\"" AM_CPPFLAGS += -DTESSDATA_DIR="\"$(TESSDATA_DIR)\"" AM_CPPFLAGS += -DTESTING_DIR="\"$(TESTING_DIR)\"" @@ -107,6 +104,7 @@ check_PROGRAMS = \ bitvector_test \ cleanapi_test \ colpartition_test \ + dawg_test \ denorm_test \ fileio_test \ heap_test \ diff --git a/unittest/dawg_test.cc b/unittest/dawg_test.cc index d21de65f..b8ee8fde 100644 --- a/unittest/dawg_test.cc +++ b/unittest/dawg_test.cc @@ -1,4 +1,6 @@ +#include // for system +#include // for ifstream #include #include #include @@ -8,55 +10,42 @@ #include "trie.h" #include "include_gunit.h" -#include "base/filelinereader.h" -#include "util/process/subprocess.h" namespace { -void RemoveTrailingLineTerminators(char* line) { - char* end = line + strlen(line) - 1; - while (end >= line && ('\n' == *end || '\r' == *end)) { - *end-- = 0; - } -} - -void AddLineToSet(std::set* words, char* line) { - RemoveTrailingLineTerminators(line); - words->insert(line); -} - // Test some basic functionality dealing with Dawgs (compressed dictionaries, // aka Directed Acyclic Word Graphs). class DawgTest : public testing::Test { protected: void LoadWordlist(const std::string& filename, std::set* words) const { - FileLineReader::Options options; - options.set_comment_char(0); - FileLineReader flr(filename.c_str(), options); - flr.set_line_callback(NewPermanentCallback(AddLineToSet, words)); - flr.Reload(); + std::ifstream file(filename); + if (file.is_open()) { + std::string line; + while (getline(file, line)) { + // Remove trailing line terminators from line. + while (!line.empty() && (line.back() == '\n' || line.back() == '\r')) { + line.resize(line.size() - 1); + } + // Add line to set. + words->insert(line.c_str()); + } + file.close(); + } } std::string TestDataNameToPath(const std::string& name) const { - return file::JoinPath(TESTDATA_DIR, "/" + name); + return file::JoinPath(TESTDATA_DIR, name); } - std::string TessBinaryPath(const std::string& binary_name) const { - return file::JoinPath(TESS_SRC_DIR, + std::string TessBinaryPath(const std::string& name) const { + return file::JoinPath(TESSBIN_DIR, "src/training/" + name); } std::string OutputNameToPath(const std::string& name) const { return file::JoinPath(FLAGS_test_tmpdir, name); } - int RunCommand(const std::string& program, const std::string& arg1, const std::string& arg2, - const std::string& arg3) const { - SubProcess p; - std::vector argv; - argv.push_back(program); - argv.push_back(arg1); - argv.push_back(arg2); - argv.push_back(arg3); - p.SetProgram(TessBinaryPath(program), argv); - p.Start(); - p.Wait(); - return p.exit_code(); + int RunCommand(const std::string& program, const std::string& arg1, + const std::string& arg2, const std::string& arg3) const { + std::string cmdline = + TessBinaryPath(program) + " " + arg1 + " " + arg2 + " " + arg3; + return system(cmdline.c_str()); } // Test that we are able to convert a wordlist file (one "word" per line) to // a dawg (a compressed format) and then extract the original wordlist back