mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 12:49:35 +08:00
Use const char* for filename parameters
This replaces the proprietary STRING data type (801 instead of 838 lines remaining). It also removes STRING from osdetect.h and serialis.h. Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
d01b2e43b8
commit
8137cf35a6
@ -25,7 +25,6 @@
|
||||
class BLOBNBOX;
|
||||
class BLOBNBOX_CLIST;
|
||||
class BLOB_CHOICE_LIST;
|
||||
class STRING;
|
||||
class TO_BLOCK_LIST;
|
||||
class UNICHARSET;
|
||||
template <typename T>
|
||||
@ -117,7 +116,7 @@ class ScriptDetector {
|
||||
const GenericVector<int>* allowed_scripts_;
|
||||
};
|
||||
|
||||
int orientation_and_script_detection(STRING& filename, OSResults*,
|
||||
int orientation_and_script_detection(const char* filename, OSResults*,
|
||||
tesseract::Tesseract*);
|
||||
|
||||
int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr,
|
||||
|
@ -26,7 +26,6 @@
|
||||
|
||||
template <typename T>
|
||||
class GenericVector;
|
||||
class STRING;
|
||||
|
||||
/***********************************************************************
|
||||
QUOTE_IT MACRO DEFINITION
|
||||
@ -82,7 +81,7 @@ class TFile {
|
||||
// All the Open methods load the whole file into memory for reading.
|
||||
// Opens a file with a supplied reader, or nullptr to use the default.
|
||||
// Note that mixed read/write is not supported.
|
||||
bool Open(const STRING& filename, FileReader reader);
|
||||
bool Open(const char* filename, FileReader reader);
|
||||
// From an existing memory buffer.
|
||||
bool Open(const char* data, int size);
|
||||
// From an open file and an end offset.
|
||||
@ -139,7 +138,7 @@ class TFile {
|
||||
// calling FWrite, (no close required), or supply a nullptr data to OpenWrite
|
||||
// and call CloseWrite to write to a file after the FWrites.
|
||||
void OpenWrite(GenericVector<char>* data);
|
||||
bool CloseWrite(const STRING& filename, FileWriter writer);
|
||||
bool CloseWrite(const char* filename, FileWriter writer);
|
||||
|
||||
// Replicates fwrite, returning the number of items written.
|
||||
// To use fprintf, use snprintf and FWrite.
|
||||
|
@ -843,9 +843,9 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
|
||||
recognition_done_ = true;
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
if (tesseract_->tessedit_resegment_from_line_boxes) {
|
||||
page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_);
|
||||
page_res_ = tesseract_->ApplyBoxes(input_file_->c_str(), true, block_list_);
|
||||
} else if (tesseract_->tessedit_resegment_from_boxes) {
|
||||
page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
|
||||
page_res_ = tesseract_->ApplyBoxes(input_file_->c_str(), false, block_list_);
|
||||
} else
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
{
|
||||
@ -858,7 +858,7 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
|
||||
}
|
||||
|
||||
if (tesseract_->tessedit_train_line_recognizer) {
|
||||
if (!tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_)) {
|
||||
if (!tesseract_->TrainLineRecognizer(input_file_->c_str(), *output_file_, block_list_)) {
|
||||
return -1;
|
||||
}
|
||||
tesseract_->CorrectClassifyWords(page_res_);
|
||||
@ -895,13 +895,14 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
} else if (tesseract_->tessedit_train_from_boxes) {
|
||||
STRING fontname;
|
||||
ExtractFontName(*output_file_, &fontname);
|
||||
ExtractFontName(output_file_->c_str(), &fontname);
|
||||
tesseract_->ApplyBoxTraining(fontname, page_res_);
|
||||
} else if (tesseract_->tessedit_ambigs_training) {
|
||||
FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
|
||||
FILE* training_output_file =
|
||||
tesseract_->init_recog_training(input_file_->c_str());
|
||||
// OCR the page segmented into words by tesseract.
|
||||
tesseract_->recog_training_segmented(
|
||||
*input_file_, page_res_, monitor, training_output_file);
|
||||
input_file_->c_str(), page_res_, monitor, training_output_file);
|
||||
fclose(training_output_file);
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
} else {
|
||||
@ -1081,7 +1082,7 @@ bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
if (result) {
|
||||
if (tesseract_->tessedit_train_from_boxes &&
|
||||
!tesseract_->WriteTRFile(*output_file_)) {
|
||||
!tesseract_->WriteTRFile(output_file_->c_str())) {
|
||||
tprintf("Write of TR file failed: %s\n", output_file_->c_str());
|
||||
return false;
|
||||
}
|
||||
@ -2215,7 +2216,7 @@ bool TessBaseAPI::DetectOS(OSResults* osr) {
|
||||
|
||||
if (input_file_ == nullptr)
|
||||
input_file_ = new STRING(kInputFile);
|
||||
return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0;
|
||||
return orientation_and_script_detection(input_file_->c_str(), osr, tesseract_) > 0;
|
||||
}
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
|
@ -90,7 +90,7 @@ static void clear_any_old_text(BLOCK_LIST *block_list) {
|
||||
}
|
||||
}
|
||||
|
||||
// Applies the box file based on the image name fname, and resegments
|
||||
// Applies the box file based on the image name filename, and resegments
|
||||
// the words in the block_list (page), with:
|
||||
// blob-mode: one blob per line in the box file, words as input.
|
||||
// word/line-mode: one blob per space-delimited unit after the #, and one word
|
||||
@ -110,12 +110,12 @@ static void clear_any_old_text(BLOCK_LIST *block_list) {
|
||||
// Instead, the correct_text member of WERD_RES is set, and this may be later
|
||||
// converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords
|
||||
// is not required before calling ApplyBoxTraining.
|
||||
PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname,
|
||||
PAGE_RES* Tesseract::ApplyBoxes(const char* filename,
|
||||
bool find_segmentation,
|
||||
BLOCK_LIST *block_list) {
|
||||
GenericVector<TBOX> boxes;
|
||||
GenericVector<STRING> texts, full_texts;
|
||||
if (!ReadAllBoxes(applybox_page, true, fname, &boxes, &texts, &full_texts,
|
||||
if (!ReadAllBoxes(applybox_page, true, filename, &boxes, &texts, &full_texts,
|
||||
nullptr)) {
|
||||
return nullptr; // Can't do it.
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ const float kWorstDictCertainty = -25.0f;
|
||||
// Breaks the page into lines, according to the boxes, and writes them to a
|
||||
// serialized DocumentData based on output_basename.
|
||||
// Return true if successful, false if an error occurred.
|
||||
bool Tesseract::TrainLineRecognizer(const STRING& input_imagename,
|
||||
bool Tesseract::TrainLineRecognizer(const char* input_imagename,
|
||||
const STRING& output_basename,
|
||||
BLOCK_LIST *block_list) {
|
||||
STRING lstmf_name = output_basename + ".lstmf";
|
||||
@ -58,12 +58,12 @@ bool Tesseract::TrainLineRecognizer(const STRING& input_imagename,
|
||||
if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr,
|
||||
nullptr) ||
|
||||
boxes.empty()) {
|
||||
tprintf("Failed to read boxes from %s\n", input_imagename.c_str());
|
||||
tprintf("Failed to read boxes from %s\n", input_imagename);
|
||||
return false;
|
||||
}
|
||||
TrainFromBoxes(boxes, texts, block_list, &images);
|
||||
if (images.PagesSize() == 0) {
|
||||
tprintf("Failed to read pages from %s\n", input_imagename.c_str());
|
||||
tprintf("Failed to read pages from %s\n", input_imagename);
|
||||
return false;
|
||||
}
|
||||
images.Shuffle();
|
||||
|
@ -187,7 +187,7 @@ static void remove_nontext_regions(tesseract::Tesseract *tess,
|
||||
// Find connected components in the page and process a subset until finished or
|
||||
// a stopping criterion is met.
|
||||
// Returns the number of blobs used in making the estimate. 0 implies failure.
|
||||
int orientation_and_script_detection(STRING& filename,
|
||||
int orientation_and_script_detection(const char* filename,
|
||||
OSResults* osr,
|
||||
tesseract::Tesseract* tess) {
|
||||
STRING name = filename; //truncated name
|
||||
|
@ -33,7 +33,7 @@ const int16_t kMaxBoxEdgeDiff = 2;
|
||||
|
||||
// Sets flags necessary for recognition in the training mode.
|
||||
// Opens and returns the pointer to the output file.
|
||||
FILE* Tesseract::init_recog_training(const STRING& fname) {
|
||||
FILE* Tesseract::init_recog_training(const char* filename) {
|
||||
if (tessedit_ambigs_training) {
|
||||
tessedit_tess_adaption_mode.set_value(0); // turn off adaption
|
||||
tessedit_enable_doc_dict.set_value(0); // turn off document dictionary
|
||||
@ -41,7 +41,7 @@ FILE* Tesseract::init_recog_training(const STRING& fname) {
|
||||
getDict().stopper_no_acceptable_choices.set_value(1);
|
||||
}
|
||||
|
||||
STRING output_fname = fname;
|
||||
STRING output_fname = filename;
|
||||
const char* lastdot = strrchr(output_fname.c_str(), '.');
|
||||
if (lastdot != nullptr)
|
||||
output_fname[lastdot - output_fname.c_str()] = '\0';
|
||||
@ -81,11 +81,11 @@ static bool read_t(PAGE_RES_IT* page_res_it, TBOX* tbox) {
|
||||
// match to those specified by the input box file. For each word (ngram in a
|
||||
// single bounding box from the input box file) it outputs the ocred result,
|
||||
// the correct label, rating and certainty.
|
||||
void Tesseract::recog_training_segmented(const STRING& fname,
|
||||
void Tesseract::recog_training_segmented(const char* filename,
|
||||
PAGE_RES* page_res,
|
||||
volatile ETEXT_DESC* monitor,
|
||||
FILE* output_file) {
|
||||
STRING box_fname = fname;
|
||||
std::string box_fname = filename;
|
||||
const char* lastdot = strrchr(box_fname.c_str(), '.');
|
||||
if (lastdot != nullptr)
|
||||
box_fname[lastdot - box_fname.c_str()] = '\0';
|
||||
|
@ -340,7 +340,7 @@ class Tesseract : public Wordrec {
|
||||
// Breaks the page into lines, according to the boxes, and writes them to a
|
||||
// serialized DocumentData based on output_basename.
|
||||
// Return true if successful, false if an error occurred.
|
||||
bool TrainLineRecognizer(const STRING& input_imagename,
|
||||
bool TrainLineRecognizer(const char* input_imagename,
|
||||
const STRING& output_basename,
|
||||
BLOCK_LIST* block_list);
|
||||
// Generates training data for training a line recognizer, eg LSTM.
|
||||
@ -678,7 +678,7 @@ class Tesseract : public Wordrec {
|
||||
bool tess_acceptable_word(WERD_RES* word);
|
||||
|
||||
//// applybox.cpp //////////////////////////////////////////////////////
|
||||
// Applies the box file based on the image name fname, and resegments
|
||||
// Applies the box file based on the image name filename, and resegments
|
||||
// the words in the block_list (page), with:
|
||||
// blob-mode: one blob per line in the box file, words as input.
|
||||
// word/line-mode: one blob per space-delimited unit after the #, and one word
|
||||
@ -698,7 +698,7 @@ class Tesseract : public Wordrec {
|
||||
// Instead, the correct_text member of WERD_RES is set, and this may be later
|
||||
// converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords
|
||||
// is not required before calling ApplyBoxTraining.
|
||||
PAGE_RES* ApplyBoxes(const STRING& fname, bool find_segmentation,
|
||||
PAGE_RES* ApplyBoxes(const char* filename, bool find_segmentation,
|
||||
BLOCK_LIST* block_list);
|
||||
|
||||
// Any row xheight that is significantly different from the median is set
|
||||
@ -1102,8 +1102,8 @@ class Tesseract : public Wordrec {
|
||||
"Detect music staff and remove intersecting components");
|
||||
|
||||
//// ambigsrecog.cpp /////////////////////////////////////////////////////////
|
||||
FILE* init_recog_training(const STRING& fname);
|
||||
void recog_training_segmented(const STRING& fname, PAGE_RES* page_res,
|
||||
FILE* init_recog_training(const char* filename);
|
||||
void recog_training_segmented(const char* filename, PAGE_RES* page_res,
|
||||
volatile ETEXT_DESC* monitor,
|
||||
FILE* output_file);
|
||||
void ambigs_classify_and_output(const char* label, PAGE_RES_IT* pr_it,
|
||||
|
@ -51,8 +51,8 @@ static std::string BoxFileName(const char* image_filename) {
|
||||
}
|
||||
|
||||
// Open the boxfile based on the given image filename.
|
||||
FILE* OpenBoxFile(const STRING& fname) {
|
||||
std::string filename = BoxFileName(fname.c_str());
|
||||
FILE* OpenBoxFile(const char* fname) {
|
||||
std::string filename = BoxFileName(fname);
|
||||
FILE* box_file = nullptr;
|
||||
if (!(box_file = fopen(filename.c_str(), "rb"))) {
|
||||
CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s",
|
||||
@ -68,13 +68,13 @@ FILE* OpenBoxFile(const STRING& fname) {
|
||||
// a string is put in box_texts, with the corresponding page number in pages.
|
||||
// Each of the output vectors is optional (may be nullptr).
|
||||
// Returns false if no boxes are found.
|
||||
bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename,
|
||||
bool ReadAllBoxes(int target_page, bool skip_blanks, const char* filename,
|
||||
GenericVector<TBOX>* boxes,
|
||||
GenericVector<STRING>* texts,
|
||||
GenericVector<STRING>* box_texts,
|
||||
GenericVector<int>* pages) {
|
||||
GenericVector<char> box_data;
|
||||
if (!tesseract::LoadDataFromFile(BoxFileName(filename.c_str()).c_str(), &box_data))
|
||||
if (!tesseract::LoadDataFromFile(BoxFileName(filename).c_str(), &box_data))
|
||||
return false;
|
||||
// Convert the array of bytes to a string, so it can be used by the parser.
|
||||
box_data.push_back('\0');
|
||||
|
@ -32,7 +32,7 @@ const int kBoxReadBufSize = 1024;
|
||||
|
||||
// Open the boxfile based on the given image filename.
|
||||
// Returns nullptr if the box file cannot be opened.
|
||||
FILE* OpenBoxFile(const STRING& fname);
|
||||
FILE* OpenBoxFile(const char* filename);
|
||||
|
||||
// Reads all boxes from the given filename.
|
||||
// Reads a specific target_page number if >= 0, or all pages otherwise.
|
||||
@ -41,7 +41,7 @@ FILE* OpenBoxFile(const STRING& fname);
|
||||
// a string is put in box_texts, with the corresponding page number in pages.
|
||||
// Each of the output vectors is optional (may be nullptr).
|
||||
// Returns false if no boxes are found.
|
||||
bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename,
|
||||
bool ReadAllBoxes(int target_page, bool skip_blanks, const char* filename,
|
||||
GenericVector<TBOX>* boxes,
|
||||
GenericVector<STRING>* texts,
|
||||
GenericVector<STRING>* box_texts,
|
||||
|
@ -539,7 +539,7 @@ bool DocumentData::ReCachePages() {
|
||||
int loaded_pages = 0;
|
||||
pages_.truncate(0);
|
||||
TFile fp;
|
||||
if (!fp.Open(document_name_, reader_) ||
|
||||
if (!fp.Open(document_name_.c_str(), reader_) ||
|
||||
!PointerVector<ImageData>::DeSerializeSize(&fp, &loaded_pages) ||
|
||||
loaded_pages <= 0) {
|
||||
tprintf("Deserialize header failed: %s\n", document_name_.c_str());
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#include <mutex> // for std::mutex
|
||||
#include <functional> // for std::function
|
||||
#include <string>
|
||||
#include "ccutil.h"
|
||||
#include "errcode.h"
|
||||
#include <tesseract/genericvector.h>
|
||||
@ -56,7 +57,7 @@ class ObjectCache {
|
||||
// and return nullptr -- further attempts to load will fail (even
|
||||
// with a different loader) until DeleteUnusedObjects() is called.
|
||||
// We delete the given loader.
|
||||
T* Get(STRING id, std::function<T*()> loader) {
|
||||
T* Get(const std::string& id, std::function<T*()> loader) {
|
||||
T *retval = nullptr;
|
||||
std::lock_guard<std::mutex> guard(mu_);
|
||||
for (int i = 0; i < cache_.size(); i++) {
|
||||
@ -102,7 +103,7 @@ class ObjectCache {
|
||||
|
||||
private:
|
||||
struct ReferenceCount {
|
||||
STRING id; // A unique ID to identify the object (think path on disk)
|
||||
std::string id; // A unique ID to identify the object (think path on disk)
|
||||
T *object; // A copy of the object in memory. Can be delete'd.
|
||||
int count; // A count of the number of active users of this object.
|
||||
};
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include <cstdio>
|
||||
#include "errcode.h"
|
||||
#include <tesseract/genericvector.h>
|
||||
#include <tesseract/strngs.h> // for STRING
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -89,8 +88,8 @@ bool Serialize(FILE* fp, const uint32_t* data, size_t n) {
|
||||
}
|
||||
|
||||
TFile::TFile()
|
||||
: offset_(0),
|
||||
data_(nullptr),
|
||||
: data_(nullptr),
|
||||
offset_(0),
|
||||
data_is_owned_(false),
|
||||
is_writing_(false),
|
||||
swap_(false) {}
|
||||
@ -193,7 +192,7 @@ bool TFile::Skip(size_t count) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TFile::Open(const STRING& filename, FileReader reader) {
|
||||
bool TFile::Open(const char* filename, FileReader reader) {
|
||||
if (!data_is_owned_) {
|
||||
data_ = new GenericVector<char>;
|
||||
data_is_owned_ = true;
|
||||
@ -202,9 +201,9 @@ bool TFile::Open(const STRING& filename, FileReader reader) {
|
||||
is_writing_ = false;
|
||||
swap_ = false;
|
||||
if (reader == nullptr)
|
||||
return LoadDataFromFile(filename.c_str(), data_);
|
||||
return LoadDataFromFile(filename, data_);
|
||||
else
|
||||
return (*reader)(filename.c_str(), data_);
|
||||
return (*reader)(filename, data_);
|
||||
}
|
||||
|
||||
bool TFile::Open(const char* data, int size) {
|
||||
@ -307,12 +306,12 @@ void TFile::OpenWrite(GenericVector<char>* data) {
|
||||
data_->truncate(0);
|
||||
}
|
||||
|
||||
bool TFile::CloseWrite(const STRING& filename, FileWriter writer) {
|
||||
bool TFile::CloseWrite(const char* filename, FileWriter writer) {
|
||||
ASSERT_HOST(is_writing_);
|
||||
if (writer == nullptr)
|
||||
return SaveDataToFile(*data_, filename.c_str());
|
||||
return SaveDataToFile(*data_, filename);
|
||||
else
|
||||
return (*writer)(*data_, filename.c_str());
|
||||
return (*writer)(*data_, filename);
|
||||
}
|
||||
|
||||
int TFile::FWrite(const void* buffer, size_t size, int count) {
|
||||
|
@ -151,16 +151,16 @@ void TessdataManager::OverwriteEntry(TessdataType type, const char *data,
|
||||
}
|
||||
|
||||
// Saves to the given filename.
|
||||
bool TessdataManager::SaveFile(const STRING &filename,
|
||||
bool TessdataManager::SaveFile(const char* filename,
|
||||
FileWriter writer) const {
|
||||
// TODO: This method supports only the proprietary file format.
|
||||
ASSERT_HOST(is_loaded_);
|
||||
GenericVector<char> data;
|
||||
Serialize(&data);
|
||||
if (writer == nullptr)
|
||||
return SaveDataToFile(data, filename.c_str());
|
||||
return SaveDataToFile(data, filename);
|
||||
else
|
||||
return (*writer)(data, filename.c_str());
|
||||
return (*writer)(data, filename);
|
||||
}
|
||||
|
||||
// Serializes to the given vector.
|
||||
@ -249,7 +249,7 @@ bool TessdataManager::CombineDataFiles(
|
||||
for (auto filesuffix : kTessdataFileSuffixes) {
|
||||
TessdataType type;
|
||||
ASSERT_HOST(TessdataTypeFromFileSuffix(filesuffix, &type));
|
||||
STRING filename = language_data_path_prefix;
|
||||
std::string filename = language_data_path_prefix;
|
||||
filename += filesuffix;
|
||||
FILE *fp = fopen(filename.c_str(), "rb");
|
||||
if (fp != nullptr) {
|
||||
|
@ -19,6 +19,7 @@
|
||||
#ifndef TESSERACT_CCUTIL_TESSDATAMANAGER_H_
|
||||
#define TESSERACT_CCUTIL_TESSDATAMANAGER_H_
|
||||
|
||||
#include <string>
|
||||
#include <tesseract/genericvector.h>
|
||||
#include <tesseract/strngs.h> // for STRING
|
||||
|
||||
@ -148,7 +149,7 @@ class TessdataManager {
|
||||
void OverwriteEntry(TessdataType type, const char *data, int size);
|
||||
|
||||
// Saves to the given filename.
|
||||
bool SaveFile(const STRING &filename, FileWriter writer) const;
|
||||
bool SaveFile(const char* filename, FileWriter writer) const;
|
||||
// Serializes to the given vector.
|
||||
void Serialize(GenericVector<char> *data) const;
|
||||
// Resets to the initial state, keeping the reader.
|
||||
@ -183,7 +184,7 @@ class TessdataManager {
|
||||
bool IsLSTMAvailable() const { return !entries_[TESSDATA_LSTM].empty(); }
|
||||
|
||||
// Return the name of the underlying data file.
|
||||
const STRING &GetDataFileName() const { return data_file_name_; }
|
||||
const std::string& GetDataFileName() const { return data_file_name_; }
|
||||
|
||||
/**
|
||||
* Reads all the standard tesseract config and data files for a language
|
||||
@ -236,7 +237,7 @@ class TessdataManager {
|
||||
TessdataType *type);
|
||||
|
||||
// Name of file it came from.
|
||||
STRING data_file_name_;
|
||||
std::string data_file_name_;
|
||||
// Function to load the file when we need it.
|
||||
FileReader reader_;
|
||||
// True if the file has been loaded.
|
||||
|
@ -42,14 +42,14 @@ namespace tesseract {
|
||||
// /path/to/dir/[lang].[fontname].exp[num]
|
||||
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||
// If the global parameter classify_font_name is set, its value is used instead.
|
||||
void ExtractFontName(const STRING& filename, STRING* fontname) {
|
||||
void ExtractFontName(const char* filename, STRING* fontname) {
|
||||
*fontname = classify_font_name;
|
||||
if (*fontname == kUnknownFontName) {
|
||||
// filename is expected to be of the form [lang].[fontname].exp[num]
|
||||
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||
const char *basename = strrchr(filename.c_str(), '/');
|
||||
const char *firstdot = strchr(basename ? basename : filename.c_str(), '.');
|
||||
const char *lastdot = strrchr(filename.c_str(), '.');
|
||||
const char *basename = strrchr(filename, '/');
|
||||
const char *firstdot = strchr(basename ? basename : filename, '.');
|
||||
const char *lastdot = strrchr(filename, '.');
|
||||
if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) {
|
||||
++firstdot;
|
||||
*fontname = firstdot;
|
||||
@ -95,9 +95,10 @@ void Classify::LearnBlob(const STRING& fontname, TBLOB* blob,
|
||||
|
||||
// Writes stored training data to a .tr file based on the given filename.
|
||||
// Returns false on error.
|
||||
bool Classify::WriteTRFile(const STRING& filename) {
|
||||
bool Classify::WriteTRFile(const char* filename) {
|
||||
bool result = false;
|
||||
STRING tr_filename = filename + ".tr";
|
||||
std::string tr_filename = filename;
|
||||
tr_filename += ".tr";
|
||||
FILE* fp = fopen(tr_filename.c_str(), "wb");
|
||||
if (fp) {
|
||||
result =
|
||||
|
@ -32,7 +32,7 @@ namespace tesseract {
|
||||
// /path/to/dir/[lang].[fontname].exp[num]
|
||||
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||
// If the global parameter classify_font_name is set, its value is used instead.
|
||||
void ExtractFontName(const STRING& filename, STRING* fontname);
|
||||
void ExtractFontName(const char* filename, STRING* fontname);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
|
@ -412,7 +412,7 @@ class Classify : public CCStruct {
|
||||
const INT_FX_RESULT_STRUCT& fx_info, const char* blob_text);
|
||||
// Writes stored training data to a .tr file based on the given filename.
|
||||
// Returns false on error.
|
||||
bool WriteTRFile(const STRING& filename);
|
||||
bool WriteTRFile(const char* filename);
|
||||
|
||||
// Member variables.
|
||||
|
||||
|
@ -44,7 +44,7 @@ struct DawgLoader {
|
||||
Dawg *DawgCache::GetSquishedDawg(const STRING &lang,
|
||||
TessdataType tessdata_dawg_type,
|
||||
int debug_level, TessdataManager *data_file) {
|
||||
STRING data_id = data_file->GetDataFileName();
|
||||
std::string data_id = data_file->GetDataFileName();
|
||||
data_id += kTessdataFileSuffixes[tessdata_dawg_type];
|
||||
DawgLoader loader(lang, tessdata_dawg_type, debug_level, data_file);
|
||||
return dawgs_.Get(data_id, std::bind(&DawgLoader::Load, &loader));
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "matrix.h"
|
||||
#include "statistc.h"
|
||||
|
||||
class STRING;
|
||||
struct Pix;
|
||||
template <typename T> class UnicityTable;
|
||||
|
||||
|
@ -28,11 +28,11 @@ LSTMTester::LSTMTester(int64_t max_memory)
|
||||
// Loads a set of lstmf files that were created using the lstm.train config to
|
||||
// tesseract into memory ready for testing. Returns false if nothing was
|
||||
// loaded. The arg is a filename of a file that lists the filenames.
|
||||
bool LSTMTester::LoadAllEvalData(const STRING& filenames_file) {
|
||||
bool LSTMTester::LoadAllEvalData(const char* filenames_file) {
|
||||
GenericVector<STRING> filenames;
|
||||
if (!LoadFileLinesToStrings(filenames_file.c_str(), &filenames)) {
|
||||
if (!LoadFileLinesToStrings(filenames_file, &filenames)) {
|
||||
tprintf("Failed to load list of eval filenames from %s\n",
|
||||
filenames_file.c_str());
|
||||
filenames_file);
|
||||
return false;
|
||||
}
|
||||
return LoadAllEvalData(filenames);
|
||||
|
@ -34,7 +34,7 @@ class LSTMTester {
|
||||
// loaded. The arg is a filename of a file that lists the filenames, with one
|
||||
// name per line. Conveniently, tesstrain.sh generates such a file, along
|
||||
// with the files themselves.
|
||||
bool LoadAllEvalData(const STRING& filenames_file);
|
||||
bool LoadAllEvalData(const char* filenames_file);
|
||||
// Loads a set of lstmf files that were created using the lstm.train config to
|
||||
// tesseract into memory ready for testing. Returns false if nothing was
|
||||
// loaded.
|
||||
|
@ -890,7 +890,7 @@ bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager* mgr,
|
||||
}
|
||||
|
||||
// Writes the full recognition traineddata to the given filename.
|
||||
bool LSTMTrainer::SaveTraineddata(const STRING& filename) {
|
||||
bool LSTMTrainer::SaveTraineddata(const char* filename) {
|
||||
GenericVector<char> recognizer_data;
|
||||
SaveRecognitionDump(&recognizer_data);
|
||||
mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0],
|
||||
|
@ -291,7 +291,7 @@ class LSTMTrainer : public LSTMRecognizer {
|
||||
void SetupCheckpointInfo();
|
||||
|
||||
// Writes the full recognition traineddata to the given filename.
|
||||
bool SaveTraineddata(const STRING& filename);
|
||||
bool SaveTraineddata(const char* filename);
|
||||
|
||||
// Writes the recognizer to memory, so that it can be used for testing later.
|
||||
void SaveRecognitionDump(GenericVector<char>* data) const;
|
||||
|
@ -73,7 +73,7 @@ class ApplyBoxTest : public testing::Test {
|
||||
// Test the boxes by reading the target box file in parallel with the
|
||||
// bounding boxes in the ocr output.
|
||||
std::string box_filename = TestDataNameToPath(target_box_file);
|
||||
FILE* box_file = OpenBoxFile(STRING(box_filename.c_str()));
|
||||
FILE* box_file = OpenBoxFile(box_filename.c_str());
|
||||
ASSERT_TRUE(box_file != nullptr);
|
||||
int height = pixGetHeight(src_pix_);
|
||||
ResultIterator* it = api_.GetIterator();
|
||||
|
Loading…
Reference in New Issue
Block a user