Merge pull request #2668 from stweil/api

Remove STRING from the public Tesseract API
This commit is contained in:
Egor Pugin 2019-09-23 01:02:26 +03:00 committed by GitHub
commit 1fa7324cf7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 42 additions and 38 deletions

View File

@ -24,6 +24,7 @@
#include <string> // for std::string #include <string> // for std::string
#include "genericvector.h" #include "genericvector.h"
#include "platform.h" #include "platform.h"
#include "strngs.h" // for STRING
struct Pix; struct Pix;

View File

@ -56,7 +56,7 @@ bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename,
GenericVector<STRING>* box_texts, GenericVector<STRING>* box_texts,
GenericVector<int>* pages) { GenericVector<int>* pages) {
GenericVector<char> box_data; GenericVector<char> box_data;
if (!tesseract::LoadDataFromFile(BoxFileName(filename), &box_data)) if (!tesseract::LoadDataFromFile(BoxFileName(filename).c_str(), &box_data))
return false; return false;
// Convert the array of bytes to a string, so it can be used by the parser. // Convert the array of bytes to a string, so it can be used by the parser.
box_data.push_back('\0'); box_data.push_back('\0');

View File

@ -16,7 +16,7 @@ endif
pkginclude_HEADERS = \ pkginclude_HEADERS = \
genericvector.h helpers.h \ genericvector.h helpers.h \
ocrclass.h platform.h serialis.h strngs.h \ ocrclass.h platform.h serialis.h \
unichar.h unichar.h
noinst_HEADERS = \ noinst_HEADERS = \
@ -25,7 +25,8 @@ noinst_HEADERS = \
genericheap.h globaloc.h host.h \ genericheap.h globaloc.h host.h \
kdpair.h lsterr.h \ kdpair.h lsterr.h \
object_cache.h params.h qrsequence.h sorthelper.h \ object_cache.h params.h qrsequence.h sorthelper.h \
scanutils.h tessdatamanager.h tprintf.h \ scanutils.h strngs.h \
tessdatamanager.h tprintf.h \
unicharcompress.h unicharmap.h unicharset.h unicity_table.h unicodes.h unicharcompress.h unicharmap.h unicharset.h unicity_table.h unicodes.h
if !DISABLED_LEGACY_ENGINE if !DISABLED_LEGACY_ENGINE

View File

@ -21,13 +21,14 @@
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <climits> // for LONG_MAX
#include <cstdint> // for uint32_t
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <functional> // for std::function #include <functional> // for std::function
#include "helpers.h" #include "helpers.h"
#include "serialis.h" #include "serialis.h"
#include "strngs.h"
// Use PointerVector<T> below in preference to GenericVector<T*>, as that // Use PointerVector<T> below in preference to GenericVector<T*>, as that
// provides automatic deletion of pointers, [De]Serialize that works, and // provides automatic deletion of pointers, [De]Serialize that works, and
@ -361,16 +362,11 @@ inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
return result; return result;
} }
inline bool LoadDataFromFile(const STRING& filename,
GenericVector<char>* data) {
return LoadDataFromFile(filename.string(), data);
}
// The default FileWriter writes the vector of char to the filename file, // The default FileWriter writes the vector of char to the filename file,
// returning false on error. // returning false on error.
inline bool SaveDataToFile(const GenericVector<char>& data, inline bool SaveDataToFile(const GenericVector<char>& data,
const STRING& filename) { const char* filename) {
FILE* fp = fopen(filename.string(), "wb"); FILE* fp = fopen(filename, "wb");
if (fp == nullptr) { if (fp == nullptr) {
return false; return false;
} }
@ -379,17 +375,6 @@ inline bool SaveDataToFile(const GenericVector<char>& data,
fclose(fp); fclose(fp);
return result; return result;
} }
// Reads a file as a vector of STRING.
inline bool LoadFileLinesToStrings(const STRING& filename,
GenericVector<STRING>* lines) {
GenericVector<char> data;
if (!LoadDataFromFile(filename.string(), &data)) {
return false;
}
STRING lines_str(&data[0], data.size());
lines_str.split('\n', lines);
return true;
}
template <typename T> template <typename T>
bool cmp_eq(T const& t1, T const& t2) { bool cmp_eq(T const& t1, T const& t2) {

View File

@ -2,7 +2,6 @@
* File: serialis.cpp (Formerly serialmac.h) * File: serialis.cpp (Formerly serialmac.h)
* Description: Inline routines and macros for serialisation functions * Description: Inline routines and macros for serialisation functions
* Author: Phil Cheatle * Author: Phil Cheatle
* Created: Tue Oct 08 08:33:12 BST 1991
* *
* (C) Copyright 1990, Hewlett-Packard Ltd. * (C) Copyright 1990, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
@ -21,6 +20,7 @@
#include <cstdio> #include <cstdio>
#include "errcode.h" #include "errcode.h"
#include "genericvector.h" #include "genericvector.h"
#include "strngs.h" // for STRING
namespace tesseract { namespace tesseract {
@ -202,9 +202,9 @@ bool TFile::Open(const STRING& filename, FileReader reader) {
is_writing_ = false; is_writing_ = false;
swap_ = false; swap_ = false;
if (reader == nullptr) if (reader == nullptr)
return LoadDataFromFile(filename, data_); return LoadDataFromFile(filename.c_str(), data_);
else else
return (*reader)(filename, data_); return (*reader)(filename.c_str(), data_);
} }
bool TFile::Open(const char* data, int size) { bool TFile::Open(const char* data, int size) {
@ -310,9 +310,9 @@ void TFile::OpenWrite(GenericVector<char>* data) {
bool TFile::CloseWrite(const STRING& filename, FileWriter writer) { bool TFile::CloseWrite(const STRING& filename, FileWriter writer) {
ASSERT_HOST(is_writing_); ASSERT_HOST(is_writing_);
if (writer == nullptr) if (writer == nullptr)
return SaveDataToFile(*data_, filename); return SaveDataToFile(*data_, filename.c_str());
else else
return (*writer)(*data_, filename); return (*writer)(*data_, filename.c_str());
} }
int TFile::FWrite(const void* buffer, size_t size, int count) { int TFile::FWrite(const void* buffer, size_t size, int count) {

View File

@ -46,10 +46,10 @@ constexpr size_t countof(T const (&)[N]) noexcept {
// Function to read a GenericVector<char> from a whole file. // Function to read a GenericVector<char> from a whole file.
// Returns false on failure. // Returns false on failure.
using FileReader = bool (*)(const STRING&, GenericVector<char>*); using FileReader = bool (*)(const char* filename, GenericVector<char>* data);
// Function to write a GenericVector<char> to a whole file. // Function to write a GenericVector<char> to a whole file.
// Returns false on failure. // Returns false on failure.
using FileWriter = bool (*)(const GenericVector<char>&, const STRING&); using FileWriter = bool (*)(const GenericVector<char>& data, const char* filename);
// Deserialize data from file. // Deserialize data from file.
bool DeSerialize(FILE* fp, char* data, size_t n = 1); bool DeSerialize(FILE* fp, char* data, size_t n = 1);

View File

@ -157,9 +157,9 @@ bool TessdataManager::SaveFile(const STRING &filename,
GenericVector<char> data; GenericVector<char> data;
Serialize(&data); Serialize(&data);
if (writer == nullptr) if (writer == nullptr)
return SaveDataToFile(data, filename); return SaveDataToFile(data, filename.c_str());
else else
return (*writer)(data, filename); return (*writer)(data, filename.c_str());
} }
// Serializes to the given vector. // Serializes to the given vector.
@ -253,7 +253,7 @@ bool TessdataManager::CombineDataFiles(
FILE *fp = fopen(filename.string(), "rb"); FILE *fp = fopen(filename.string(), "rb");
if (fp != nullptr) { if (fp != nullptr) {
fclose(fp); fclose(fp);
if (!LoadDataFromFile(filename, &entries_[type])) { if (!LoadDataFromFile(filename.c_str(), &entries_[type])) {
tprintf("Load of file %s failed!\n", filename.string()); tprintf("Load of file %s failed!\n", filename.string());
return false; return false;
} }

View File

@ -2,7 +2,6 @@
// File: tessdatamanager.h // File: tessdatamanager.h
// Description: Functions to handle loading/combining tesseract data files. // Description: Functions to handle loading/combining tesseract data files.
// Author: Daria Antonova // Author: Daria Antonova
// Created: Wed Jun 03 11:26:43 PST 2009
// //
// (C) Copyright 2009, Google Inc. // (C) Copyright 2009, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
@ -21,6 +20,7 @@
#define TESSERACT_CCUTIL_TESSDATAMANAGER_H_ #define TESSERACT_CCUTIL_TESSDATAMANAGER_H_
#include "genericvector.h" #include "genericvector.h"
#include "strngs.h" // for STRING
static const char kTrainedDataSuffix[] = "traineddata"; static const char kTrainedDataSuffix[] = "traineddata";

View File

@ -27,6 +27,7 @@
#include "networkio.h" #include "networkio.h"
#include "serialis.h" #include "serialis.h"
#include "static_shape.h" #include "static_shape.h"
#include "strngs.h" // for STRING
#include "tprintf.h" #include "tprintf.h"
struct Pix; struct Pix;

View File

@ -2,7 +2,6 @@
* File: fileio.h * File: fileio.h
* Description: File I/O utilities. * Description: File I/O utilities.
* Author: Samuel Charron * Author: Samuel Charron
* Created: Tuesday, July 9, 2013
* *
* (C) Copyright 2013, Google Inc. * (C) Copyright 2013, Google Inc.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not * Licensed under the Apache License, Version 2.0 (the "License"); you may not
@ -21,10 +20,25 @@
#include <cstdio> #include <cstdio>
#include <string> #include <string>
#include "genericvector.h" // for GenericVector
#include "platform.h" #include "platform.h"
#include "strngs.h" // for STRING
namespace tesseract { namespace tesseract {
// Reads a file as a vector of STRING.
// TODO: Use std::vector and std::string for LoadFileLinesToStrings.
inline bool LoadFileLinesToStrings(const char* filename,
GenericVector<STRING>* lines) {
GenericVector<char> data;
if (!LoadDataFromFile(filename, &data)) {
return false;
}
STRING lines_str(&data[0], data.size());
lines_str.split('\n', lines);
return true;
}
// A class to manipulate FILE*s. // A class to manipulate FILE*s.
class File { class File {
public: public:

View File

@ -16,6 +16,7 @@
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
#include <thread> // for std::thread #include <thread> // for std::thread
#include "fileio.h" // for LoadFileLinesToStrings
#include "lstmtester.h" #include "lstmtester.h"
#include "genericvector.h" #include "genericvector.h"
@ -29,7 +30,7 @@ LSTMTester::LSTMTester(int64_t max_memory)
// loaded. The arg is a filename of a file that lists the filenames. // loaded. The arg is a filename of a file that lists the filenames.
bool LSTMTester::LoadAllEvalData(const STRING& filenames_file) { bool LSTMTester::LoadAllEvalData(const STRING& filenames_file) {
GenericVector<STRING> filenames; GenericVector<STRING> filenames;
if (!LoadFileLinesToStrings(filenames_file, &filenames)) { if (!LoadFileLinesToStrings(filenames_file.c_str(), &filenames)) {
tprintf("Failed to load list of eval filenames from %s\n", tprintf("Failed to load list of eval filenames from %s\n",
filenames_file.string()); filenames_file.string());
return false; return false;

View File

@ -320,7 +320,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) {
SaveTrainingDump(NO_BEST_TRAINER, this, &best_trainer_); SaveTrainingDump(NO_BEST_TRAINER, this, &best_trainer_);
if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) { if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) {
STRING best_model_name = DumpFilename(); STRING best_model_name = DumpFilename();
if (!SaveDataToFile(best_trainer_, best_model_name)) { if (!SaveDataToFile(best_trainer_, best_model_name.c_str())) {
*log_msg += " failed to write best model:"; *log_msg += " failed to write best model:";
} else { } else {
*log_msg += " wrote best model:"; *log_msg += " wrote best model:";
@ -358,7 +358,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) {
// Write a current checkpoint. // Write a current checkpoint.
GenericVector<char> checkpoint; GenericVector<char> checkpoint;
if (!SaveTrainingDump(FULL, this, &checkpoint) || if (!SaveTrainingDump(FULL, this, &checkpoint) ||
!SaveDataToFile(checkpoint, checkpoint_name_)) { !SaveDataToFile(checkpoint, checkpoint_name_.c_str())) {
*log_msg += " failed to write checkpoint."; *log_msg += " failed to write checkpoint.";
} else { } else {
*log_msg += " wrote checkpoint."; *log_msg += " wrote checkpoint.";

View File

@ -20,6 +20,7 @@
#endif #endif
#include <cerrno> #include <cerrno>
#include "commontraining.h" #include "commontraining.h"
#include "fileio.h" // for LoadFileLinesToStrings
#include "lstmtester.h" #include "lstmtester.h"
#include "lstmtrainer.h" #include "lstmtrainer.h"
#include "params.h" #include "params.h"