mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
training: Replace NULL by nullptr
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
9d6e4f6c50
commit
cb6e9e0071
@ -56,10 +56,10 @@ int main(int argc, char** argv) {
|
|||||||
vars_vec.push_back("output_ambig_words_file");
|
vars_vec.push_back("output_ambig_words_file");
|
||||||
vars_values.push_back(output_file_str);
|
vars_values.push_back(output_file_str);
|
||||||
api.Init(tessdata_dir, lang.string(), tesseract::OEM_TESSERACT_ONLY,
|
api.Init(tessdata_dir, lang.string(), tesseract::OEM_TESSERACT_ONLY,
|
||||||
NULL, 0, &vars_vec, &vars_values, false);
|
nullptr, 0, &vars_vec, &vars_values, false);
|
||||||
tesseract::Dict &dict = api.tesseract()->getDict();
|
tesseract::Dict &dict = api.tesseract()->getDict();
|
||||||
FILE *input_file = fopen(input_file_str, "rb");
|
FILE *input_file = fopen(input_file_str, "rb");
|
||||||
if (input_file == NULL) {
|
if (input_file == nullptr) {
|
||||||
tprintf("Failed to open input wordlist file %s\n", input_file_str);
|
tprintf("Failed to open input wordlist file %s\n", input_file_str);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@ -67,10 +67,10 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
// Read word list and call Dict::NoDangerousAmbig() for each word
|
// Read word list and call Dict::NoDangerousAmbig() for each word
|
||||||
// to record ambiguities in the output file.
|
// to record ambiguities in the output file.
|
||||||
while (fgets(str, CHARS_PER_LINE, input_file) != NULL) {
|
while (fgets(str, CHARS_PER_LINE, input_file) != nullptr) {
|
||||||
chomp_string(str); // remove newline
|
chomp_string(str); // remove newline
|
||||||
WERD_CHOICE word(str, dict.getUnicharset());
|
WERD_CHOICE word(str, dict.getUnicharset());
|
||||||
dict.NoDangerousAmbig(&word, NULL, false, NULL);
|
dict.NoDangerousAmbig(&word, nullptr, false, nullptr);
|
||||||
}
|
}
|
||||||
// Clean up.
|
// Clean up.
|
||||||
fclose(input_file);
|
fclose(input_file);
|
||||||
|
@ -39,7 +39,7 @@ const int kMinNewlineRatio = 5;
|
|||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
|
|
||||||
BoxChar::BoxChar(const char* utf8_str, int len) : ch_(utf8_str, len) {
|
BoxChar::BoxChar(const char* utf8_str, int len) : ch_(utf8_str, len) {
|
||||||
box_ = NULL;
|
box_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
BoxChar::~BoxChar() { boxDestroy(&box_); }
|
BoxChar::~BoxChar() { boxDestroy(&box_); }
|
||||||
@ -53,7 +53,7 @@ void BoxChar::TranslateBoxes(int xshift, int yshift,
|
|||||||
std::vector<BoxChar*>* boxes) {
|
std::vector<BoxChar*>* boxes) {
|
||||||
for (int i = 0; i < boxes->size(); ++i) {
|
for (int i = 0; i < boxes->size(); ++i) {
|
||||||
BOX* box = (*boxes)[i]->box_;
|
BOX* box = (*boxes)[i]->box_;
|
||||||
if (box != NULL) {
|
if (box != nullptr) {
|
||||||
box->x += xshift;
|
box->x += xshift;
|
||||||
box->y += yshift;
|
box->y += yshift;
|
||||||
}
|
}
|
||||||
@ -69,7 +69,7 @@ void BoxChar::PrepareToWrite(std::vector<BoxChar*>* boxes) {
|
|||||||
InsertNewlines(rtl_rules, vertical_rules, boxes);
|
InsertNewlines(rtl_rules, vertical_rules, boxes);
|
||||||
InsertSpaces(rtl_rules, vertical_rules, boxes);
|
InsertSpaces(rtl_rules, vertical_rules, boxes);
|
||||||
for (int i = 0; i < boxes->size(); ++i) {
|
for (int i = 0; i < boxes->size(); ++i) {
|
||||||
if ((*boxes)[i]->box_ == NULL) tprintf("Null box at index %d\n", i);
|
if ((*boxes)[i]->box_ == nullptr) tprintf("Null box at index %d\n", i);
|
||||||
}
|
}
|
||||||
if (rtl_rules) {
|
if (rtl_rules) {
|
||||||
ReorderRTLText(boxes);
|
ReorderRTLText(boxes);
|
||||||
@ -84,14 +84,14 @@ void BoxChar::InsertNewlines(bool rtl_rules, bool vertical_rules,
|
|||||||
int max_shift = 0;
|
int max_shift = 0;
|
||||||
for (int i = 0; i < boxes->size(); ++i) {
|
for (int i = 0; i < boxes->size(); ++i) {
|
||||||
Box* box = (*boxes)[i]->box_;
|
Box* box = (*boxes)[i]->box_;
|
||||||
if (box == NULL) {
|
if (box == nullptr) {
|
||||||
if (prev_i < 0 || prev_i < i - 1 || i + 1 == boxes->size()) {
|
if (prev_i < 0 || prev_i < i - 1 || i + 1 == boxes->size()) {
|
||||||
// Erase null boxes at the start of a line and after another null box.
|
// Erase null boxes at the start of a line and after another null box.
|
||||||
do {
|
do {
|
||||||
delete (*boxes)[i];
|
delete (*boxes)[i];
|
||||||
boxes->erase(boxes->begin() + i);
|
boxes->erase(boxes->begin() + i);
|
||||||
--i;
|
--i;
|
||||||
} while (i >= 0 && i + 1 == boxes->size() && (*boxes)[i]->box_ == NULL);
|
} while (i >= 0 && i + 1 == boxes->size() && (*boxes)[i]->box_ == nullptr);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -139,7 +139,7 @@ void BoxChar::InsertNewlines(bool rtl_rules, bool vertical_rules,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Converts NULL boxes to space characters, with appropriate bounding boxes.
|
// Converts nullptr boxes to space characters, with appropriate bounding boxes.
|
||||||
/* static */
|
/* static */
|
||||||
void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules,
|
void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules,
|
||||||
std::vector<BoxChar*>* boxes) {
|
std::vector<BoxChar*>* boxes) {
|
||||||
@ -147,10 +147,10 @@ void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules,
|
|||||||
// singletons, so add a box to each remaining null box.
|
// singletons, so add a box to each remaining null box.
|
||||||
for (int i = 1; i + 1 < boxes->size(); ++i) {
|
for (int i = 1; i + 1 < boxes->size(); ++i) {
|
||||||
Box* box = (*boxes)[i]->box_;
|
Box* box = (*boxes)[i]->box_;
|
||||||
if (box == NULL) {
|
if (box == nullptr) {
|
||||||
Box* prev = (*boxes)[i - 1]->box_;
|
Box* prev = (*boxes)[i - 1]->box_;
|
||||||
Box* next = (*boxes)[i + 1]->box_;
|
Box* next = (*boxes)[i + 1]->box_;
|
||||||
ASSERT_HOST(prev != NULL && next != NULL);
|
ASSERT_HOST(prev != nullptr && next != nullptr);
|
||||||
int top = MIN(prev->y, next->y);
|
int top = MIN(prev->y, next->y);
|
||||||
int bottom = MAX(prev->y + prev->h, next->y + next->h);
|
int bottom = MAX(prev->y + prev->h, next->y + next->h);
|
||||||
int left = prev->x + prev->w;
|
int left = prev->x + prev->w;
|
||||||
@ -170,14 +170,14 @@ void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules,
|
|||||||
j >= 0 && (*boxes)[j]->ch_ != " " && (*boxes)[j]->ch_ != "\t";
|
j >= 0 && (*boxes)[j]->ch_ != " " && (*boxes)[j]->ch_ != "\t";
|
||||||
--j) {
|
--j) {
|
||||||
prev = (*boxes)[j]->box_;
|
prev = (*boxes)[j]->box_;
|
||||||
ASSERT_HOST(prev != NULL);
|
ASSERT_HOST(prev != nullptr);
|
||||||
if (prev->x < right) {
|
if (prev->x < right) {
|
||||||
right = prev->x;
|
right = prev->x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Left becomes the max right of all next boxes forward to the first
|
// Left becomes the max right of all next boxes forward to the first
|
||||||
// space or newline.
|
// space or newline.
|
||||||
for (int j = i + 2; j < boxes->size() && (*boxes)[j]->box_ != NULL &&
|
for (int j = i + 2; j < boxes->size() && (*boxes)[j]->box_ != nullptr &&
|
||||||
(*boxes)[j]->ch_ != "\t";
|
(*boxes)[j]->ch_ != "\t";
|
||||||
++j) {
|
++j) {
|
||||||
next = (*boxes)[j]->box_;
|
next = (*boxes)[j]->box_;
|
||||||
@ -244,7 +244,7 @@ bool BoxChar::ContainsMostlyRTL(const std::vector<BoxChar*>& boxes) {
|
|||||||
bool BoxChar::MostlyVertical(const std::vector<BoxChar*>& boxes) {
|
bool BoxChar::MostlyVertical(const std::vector<BoxChar*>& boxes) {
|
||||||
inT64 total_dx = 0, total_dy = 0;
|
inT64 total_dx = 0, total_dy = 0;
|
||||||
for (int i = 1; i < boxes.size(); ++i) {
|
for (int i = 1; i < boxes.size(); ++i) {
|
||||||
if (boxes[i - 1]->box_ != NULL && boxes[i]->box_ != NULL &&
|
if (boxes[i - 1]->box_ != nullptr && boxes[i]->box_ != nullptr &&
|
||||||
boxes[i - 1]->page_ == boxes[i]->page_) {
|
boxes[i - 1]->page_ == boxes[i]->page_) {
|
||||||
int dx = boxes[i]->box_->x - boxes[i - 1]->box_->x;
|
int dx = boxes[i]->box_->x - boxes[i - 1]->box_->x;
|
||||||
int dy = boxes[i]->box_->y - boxes[i - 1]->box_->y;
|
int dy = boxes[i]->box_->y - boxes[i - 1]->box_->y;
|
||||||
@ -303,7 +303,7 @@ string BoxChar::GetTesseractBoxStr(int height,
|
|||||||
char buffer[kMaxLineLength];
|
char buffer[kMaxLineLength];
|
||||||
for (int i = 0; i < boxes.size(); ++i) {
|
for (int i = 0; i < boxes.size(); ++i) {
|
||||||
const Box* box = boxes[i]->box_;
|
const Box* box = boxes[i]->box_;
|
||||||
if (box == NULL) {
|
if (box == nullptr) {
|
||||||
tprintf("Error: Call PrepareToWrite before WriteTesseractBoxFile!!\n");
|
tprintf("Error: Call PrepareToWrite before WriteTesseractBoxFile!!\n");
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
@ -60,8 +60,8 @@ class BoxChar {
|
|||||||
// Sort function for sorting by left edge of box. Note that this will not
|
// Sort function for sorting by left edge of box. Note that this will not
|
||||||
// work properly until after InsertNewlines and InsertSpaces.
|
// work properly until after InsertNewlines and InsertSpaces.
|
||||||
bool operator<(const BoxChar& other) const {
|
bool operator<(const BoxChar& other) const {
|
||||||
if (box_ == NULL) return true;
|
if (box_ == nullptr) return true;
|
||||||
if (other.box_ == NULL) return false;
|
if (other.box_ == nullptr) return false;
|
||||||
return box_->x < other.box_->x;
|
return box_->x < other.box_->x;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -74,7 +74,7 @@ class BoxChar {
|
|||||||
// Inserts newline (tab) characters into the vector at newline positions.
|
// Inserts newline (tab) characters into the vector at newline positions.
|
||||||
static void InsertNewlines(bool rtl_rules, bool vertical_rules,
|
static void InsertNewlines(bool rtl_rules, bool vertical_rules,
|
||||||
vector<BoxChar*>* boxes);
|
vector<BoxChar*>* boxes);
|
||||||
// Converts NULL boxes to space characters, with appropriate bounding boxes.
|
// Converts nullptr boxes to space characters, with appropriate bounding boxes.
|
||||||
static void InsertSpaces(bool rtl_rules, bool vertical_rules,
|
static void InsertSpaces(bool rtl_rules, bool vertical_rules,
|
||||||
vector<BoxChar*>* boxes);
|
vector<BoxChar*>* boxes);
|
||||||
// Reorders text in a right-to-left script in left-to-right order.
|
// Reorders text in a right-to-left script in left-to-right order.
|
||||||
|
@ -40,7 +40,7 @@ enum ClassifierName {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const char* names[] = {"pruner", "full",
|
const char* names[] = {"pruner", "full",
|
||||||
NULL};
|
nullptr};
|
||||||
|
|
||||||
static tesseract::ShapeClassifier* InitializeClassifier(
|
static tesseract::ShapeClassifier* InitializeClassifier(
|
||||||
const char* classifer_name, const UNICHARSET& unicharset,
|
const char* classifer_name, const UNICHARSET& unicharset,
|
||||||
@ -56,33 +56,33 @@ static tesseract::ShapeClassifier* InitializeClassifier(
|
|||||||
}
|
}
|
||||||
if (classifier == CN_COUNT) {
|
if (classifier == CN_COUNT) {
|
||||||
fprintf(stderr, "Invalid classifier name:%s\n", FLAGS_classifier.c_str());
|
fprintf(stderr, "Invalid classifier name:%s\n", FLAGS_classifier.c_str());
|
||||||
return NULL;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We need to initialize tesseract to test.
|
// We need to initialize tesseract to test.
|
||||||
*api = new tesseract::TessBaseAPI;
|
*api = new tesseract::TessBaseAPI;
|
||||||
tesseract::OcrEngineMode engine_mode = tesseract::OEM_TESSERACT_ONLY;
|
tesseract::OcrEngineMode engine_mode = tesseract::OEM_TESSERACT_ONLY;
|
||||||
tesseract::Tesseract* tesseract = NULL;
|
tesseract::Tesseract* tesseract = nullptr;
|
||||||
tesseract::Classify* classify = NULL;
|
tesseract::Classify* classify = nullptr;
|
||||||
if (
|
if (
|
||||||
classifier == CN_PRUNER || classifier == CN_FULL) {
|
classifier == CN_PRUNER || classifier == CN_FULL) {
|
||||||
if ((*api)->Init(FLAGS_tessdata_dir.c_str(), FLAGS_lang.c_str(),
|
if ((*api)->Init(FLAGS_tessdata_dir.c_str(), FLAGS_lang.c_str(),
|
||||||
engine_mode) < 0) {
|
engine_mode) < 0) {
|
||||||
fprintf(stderr, "Tesseract initialization failed!\n");
|
fprintf(stderr, "Tesseract initialization failed!\n");
|
||||||
return NULL;
|
return nullptr;
|
||||||
}
|
}
|
||||||
tesseract = const_cast<tesseract::Tesseract*>((*api)->tesseract());
|
tesseract = const_cast<tesseract::Tesseract*>((*api)->tesseract());
|
||||||
classify = reinterpret_cast<tesseract::Classify*>(tesseract);
|
classify = reinterpret_cast<tesseract::Classify*>(tesseract);
|
||||||
if (classify->shape_table() == NULL) {
|
if (classify->shape_table() == nullptr) {
|
||||||
fprintf(stderr, "Tesseract must contain a ShapeTable!\n");
|
fprintf(stderr, "Tesseract must contain a ShapeTable!\n");
|
||||||
return NULL;
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tesseract::ShapeClassifier* shape_classifier = NULL;
|
tesseract::ShapeClassifier* shape_classifier = nullptr;
|
||||||
|
|
||||||
if (!FLAGS_T.empty()) {
|
if (!FLAGS_T.empty()) {
|
||||||
const char* config_name;
|
const char* config_name;
|
||||||
while ((config_name = GetNextFilename(argc, argv)) != NULL) {
|
while ((config_name = GetNextFilename(argc, argv)) != nullptr) {
|
||||||
tprintf("Reading config file %s ...\n", config_name);
|
tprintf("Reading config file %s ...\n", config_name);
|
||||||
(*api)->ReadConfigFile(config_name);
|
(*api)->ReadConfigFile(config_name);
|
||||||
}
|
}
|
||||||
@ -93,7 +93,7 @@ static tesseract::ShapeClassifier* InitializeClassifier(
|
|||||||
shape_classifier = new tesseract::TessClassifier(false, classify);
|
shape_classifier = new tesseract::TessClassifier(false, classify);
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "%s tester not yet implemented\n", classifer_name);
|
fprintf(stderr, "%s tester not yet implemented\n", classifer_name);
|
||||||
return NULL;
|
return nullptr;
|
||||||
}
|
}
|
||||||
tprintf("Testing classifier %s:\n", classifer_name);
|
tprintf("Testing classifier %s:\n", classifer_name);
|
||||||
return shape_classifier;
|
return shape_classifier;
|
||||||
@ -122,12 +122,12 @@ int main(int argc, char **argv) {
|
|||||||
ParseArguments(&argc, &argv);
|
ParseArguments(&argc, &argv);
|
||||||
STRING file_prefix;
|
STRING file_prefix;
|
||||||
tesseract::MasterTrainer* trainer = tesseract::LoadTrainingData(
|
tesseract::MasterTrainer* trainer = tesseract::LoadTrainingData(
|
||||||
argc, argv, false, NULL, &file_prefix);
|
argc, argv, false, nullptr, &file_prefix);
|
||||||
tesseract::TessBaseAPI* api;
|
tesseract::TessBaseAPI* api;
|
||||||
// Decode the classifier string.
|
// Decode the classifier string.
|
||||||
tesseract::ShapeClassifier* shape_classifier = InitializeClassifier(
|
tesseract::ShapeClassifier* shape_classifier = InitializeClassifier(
|
||||||
FLAGS_classifier.c_str(), trainer->unicharset(), argc, argv, &api);
|
FLAGS_classifier.c_str(), trainer->unicharset(), argc, argv, &api);
|
||||||
if (shape_classifier == NULL) {
|
if (shape_classifier == nullptr) {
|
||||||
fprintf(stderr, "Classifier init failed!:%s\n", FLAGS_classifier.c_str());
|
fprintf(stderr, "Classifier init failed!:%s\n", FLAGS_classifier.c_str());
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -139,7 +139,7 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
trainer->TestClassifierOnSamples(tesseract:: CT_UNICHAR_TOP1_ERR,
|
trainer->TestClassifierOnSamples(tesseract:: CT_UNICHAR_TOP1_ERR,
|
||||||
MAX(3, FLAGS_debug_level), false,
|
MAX(3, FLAGS_debug_level), false,
|
||||||
shape_classifier, NULL);
|
shape_classifier, nullptr);
|
||||||
delete shape_classifier;
|
delete shape_classifier;
|
||||||
delete api;
|
delete api;
|
||||||
delete trainer;
|
delete trainer;
|
||||||
|
@ -137,7 +137,7 @@ int main(int argc, char *argv[]) {
|
|||||||
const char *PageName;
|
const char *PageName;
|
||||||
FILE *TrainingPage;
|
FILE *TrainingPage;
|
||||||
LIST CharList = NIL_LIST;
|
LIST CharList = NIL_LIST;
|
||||||
CLUSTERER *Clusterer = NULL;
|
CLUSTERER *Clusterer = nullptr;
|
||||||
LIST ProtoList = NIL_LIST;
|
LIST ProtoList = NIL_LIST;
|
||||||
LIST NormProtoList = NIL_LIST;
|
LIST NormProtoList = NIL_LIST;
|
||||||
LIST pCharList;
|
LIST pCharList;
|
||||||
@ -147,11 +147,11 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
ParseArguments(&argc, &argv);
|
ParseArguments(&argc, &argv);
|
||||||
int num_fonts = 0;
|
int num_fonts = 0;
|
||||||
while ((PageName = GetNextFilename(argc, argv)) != NULL) {
|
while ((PageName = GetNextFilename(argc, argv)) != nullptr) {
|
||||||
printf("Reading %s ...\n", PageName);
|
printf("Reading %s ...\n", PageName);
|
||||||
TrainingPage = Efopen(PageName, "rb");
|
TrainingPage = Efopen(PageName, "rb");
|
||||||
ReadTrainingSamples(FeatureDefs, PROGRAM_FEATURE_TYPE,
|
ReadTrainingSamples(FeatureDefs, PROGRAM_FEATURE_TYPE,
|
||||||
100, NULL, TrainingPage, &CharList);
|
100, nullptr, TrainingPage, &CharList);
|
||||||
fclose(TrainingPage);
|
fclose(TrainingPage);
|
||||||
++num_fonts;
|
++num_fonts;
|
||||||
}
|
}
|
||||||
@ -168,7 +168,7 @@ int main(int argc, char *argv[]) {
|
|||||||
CharSample = (LABELEDLIST)first_node(pCharList);
|
CharSample = (LABELEDLIST)first_node(pCharList);
|
||||||
Clusterer =
|
Clusterer =
|
||||||
SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE);
|
SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE);
|
||||||
if (Clusterer == NULL) { // To avoid a SIGSEGV
|
if (Clusterer == nullptr) { // To avoid a SIGSEGV
|
||||||
fprintf(stderr, "Error: NULL clusterer!\n");
|
fprintf(stderr, "Error: NULL clusterer!\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -229,7 +229,7 @@ void WriteNormProtos(const char *Directory, LIST LabeledProtoList,
|
|||||||
int N;
|
int N;
|
||||||
|
|
||||||
Filename = "";
|
Filename = "";
|
||||||
if (Directory != NULL && Directory[0] != '\0')
|
if (Directory != nullptr && Directory[0] != '\0')
|
||||||
{
|
{
|
||||||
Filename += Directory;
|
Filename += Directory;
|
||||||
Filename += "/";
|
Filename += "/";
|
||||||
|
@ -18,7 +18,7 @@ bool IntFlagExists(const char* flag_name, inT32* value) {
|
|||||||
GenericVector<IntParam*> empty;
|
GenericVector<IntParam*> empty;
|
||||||
IntParam *p = ParamUtils::FindParam<IntParam>(
|
IntParam *p = ParamUtils::FindParam<IntParam>(
|
||||||
full_flag_name.string(), GlobalParams()->int_params, empty);
|
full_flag_name.string(), GlobalParams()->int_params, empty);
|
||||||
if (p == NULL) return false;
|
if (p == nullptr) return false;
|
||||||
*value = (inT32)(*p);
|
*value = (inT32)(*p);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -29,7 +29,7 @@ bool DoubleFlagExists(const char* flag_name, double* value) {
|
|||||||
GenericVector<DoubleParam*> empty;
|
GenericVector<DoubleParam*> empty;
|
||||||
DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
|
DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
|
||||||
full_flag_name.string(), GlobalParams()->double_params, empty);
|
full_flag_name.string(), GlobalParams()->double_params, empty);
|
||||||
if (p == NULL) return false;
|
if (p == nullptr) return false;
|
||||||
*value = static_cast<double>(*p);
|
*value = static_cast<double>(*p);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -40,7 +40,7 @@ bool BoolFlagExists(const char* flag_name, bool* value) {
|
|||||||
GenericVector<BoolParam*> empty;
|
GenericVector<BoolParam*> empty;
|
||||||
BoolParam *p = ParamUtils::FindParam<BoolParam>(
|
BoolParam *p = ParamUtils::FindParam<BoolParam>(
|
||||||
full_flag_name.string(), GlobalParams()->bool_params, empty);
|
full_flag_name.string(), GlobalParams()->bool_params, empty);
|
||||||
if (p == NULL) return false;
|
if (p == nullptr) return false;
|
||||||
*value = (BOOL8)(*p);
|
*value = (BOOL8)(*p);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -51,8 +51,8 @@ bool StringFlagExists(const char* flag_name, const char** value) {
|
|||||||
GenericVector<StringParam*> empty;
|
GenericVector<StringParam*> empty;
|
||||||
StringParam *p = ParamUtils::FindParam<StringParam>(
|
StringParam *p = ParamUtils::FindParam<StringParam>(
|
||||||
full_flag_name.string(), GlobalParams()->string_params, empty);
|
full_flag_name.string(), GlobalParams()->string_params, empty);
|
||||||
*value = (p != NULL) ? p->string() : NULL;
|
*value = (p != nullptr) ? p->string() : nullptr;
|
||||||
return p != NULL;
|
return p != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -62,7 +62,7 @@ void SetIntFlagValue(const char* flag_name, const inT32 new_val) {
|
|||||||
GenericVector<IntParam*> empty;
|
GenericVector<IntParam*> empty;
|
||||||
IntParam *p = ParamUtils::FindParam<IntParam>(
|
IntParam *p = ParamUtils::FindParam<IntParam>(
|
||||||
full_flag_name.string(), GlobalParams()->int_params, empty);
|
full_flag_name.string(), GlobalParams()->int_params, empty);
|
||||||
ASSERT_HOST(p != NULL);
|
ASSERT_HOST(p != nullptr);
|
||||||
p->set_value(new_val);
|
p->set_value(new_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -72,7 +72,7 @@ void SetDoubleFlagValue(const char* flag_name, const double new_val) {
|
|||||||
GenericVector<DoubleParam*> empty;
|
GenericVector<DoubleParam*> empty;
|
||||||
DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
|
DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
|
||||||
full_flag_name.string(), GlobalParams()->double_params, empty);
|
full_flag_name.string(), GlobalParams()->double_params, empty);
|
||||||
ASSERT_HOST(p != NULL);
|
ASSERT_HOST(p != nullptr);
|
||||||
p->set_value(new_val);
|
p->set_value(new_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -82,7 +82,7 @@ void SetBoolFlagValue(const char* flag_name, const bool new_val) {
|
|||||||
GenericVector<BoolParam*> empty;
|
GenericVector<BoolParam*> empty;
|
||||||
BoolParam *p = ParamUtils::FindParam<BoolParam>(
|
BoolParam *p = ParamUtils::FindParam<BoolParam>(
|
||||||
full_flag_name.string(), GlobalParams()->bool_params, empty);
|
full_flag_name.string(), GlobalParams()->bool_params, empty);
|
||||||
ASSERT_HOST(p != NULL);
|
ASSERT_HOST(p != nullptr);
|
||||||
p->set_value(new_val);
|
p->set_value(new_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,20 +92,20 @@ void SetStringFlagValue(const char* flag_name, const char* new_val) {
|
|||||||
GenericVector<StringParam*> empty;
|
GenericVector<StringParam*> empty;
|
||||||
StringParam *p = ParamUtils::FindParam<StringParam>(
|
StringParam *p = ParamUtils::FindParam<StringParam>(
|
||||||
full_flag_name.string(), GlobalParams()->string_params, empty);
|
full_flag_name.string(), GlobalParams()->string_params, empty);
|
||||||
ASSERT_HOST(p != NULL);
|
ASSERT_HOST(p != nullptr);
|
||||||
p->set_value(STRING(new_val));
|
p->set_value(STRING(new_val));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SafeAtoi(const char* str, int* val) {
|
bool SafeAtoi(const char* str, int* val) {
|
||||||
char *endptr = NULL;
|
char *endptr = nullptr;
|
||||||
*val = strtol(str, &endptr, 10);
|
*val = strtol(str, &endptr, 10);
|
||||||
return endptr != NULL && *endptr == '\0';
|
return endptr != nullptr && *endptr == '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SafeAtod(const char* str, double* val) {
|
bool SafeAtod(const char* str, double* val) {
|
||||||
char *endptr = NULL;
|
char *endptr = nullptr;
|
||||||
*val = strtod(str, &endptr);
|
*val = strtod(str, &endptr);
|
||||||
return endptr != NULL && *endptr == '\0';
|
return endptr != nullptr && *endptr == '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
void PrintCommandLineFlags() {
|
void PrintCommandLineFlags() {
|
||||||
@ -181,13 +181,13 @@ void ParseCommandLineFlags(const char* usage,
|
|||||||
// Find the starting position of the value if it was specified in this
|
// Find the starting position of the value if it was specified in this
|
||||||
// string.
|
// string.
|
||||||
const char* equals_position = strchr(current_arg, '=');
|
const char* equals_position = strchr(current_arg, '=');
|
||||||
const char* rhs = NULL;
|
const char* rhs = nullptr;
|
||||||
if (equals_position != NULL) {
|
if (equals_position != nullptr) {
|
||||||
rhs = equals_position + 1;
|
rhs = equals_position + 1;
|
||||||
}
|
}
|
||||||
// Extract the flag name.
|
// Extract the flag name.
|
||||||
STRING lhs;
|
STRING lhs;
|
||||||
if (equals_position == NULL) {
|
if (equals_position == nullptr) {
|
||||||
lhs = current_arg;
|
lhs = current_arg;
|
||||||
} else {
|
} else {
|
||||||
lhs.assign(current_arg, equals_position - current_arg);
|
lhs.assign(current_arg, equals_position - current_arg);
|
||||||
@ -201,7 +201,7 @@ void ParseCommandLineFlags(const char* usage,
|
|||||||
// inT32 flag
|
// inT32 flag
|
||||||
inT32 int_val;
|
inT32 int_val;
|
||||||
if (IntFlagExists(lhs.string(), &int_val)) {
|
if (IntFlagExists(lhs.string(), &int_val)) {
|
||||||
if (rhs != NULL) {
|
if (rhs != nullptr) {
|
||||||
if (!strlen(rhs)) {
|
if (!strlen(rhs)) {
|
||||||
// Bad input of the format --int_flag=
|
// Bad input of the format --int_flag=
|
||||||
tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
|
tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
|
||||||
@ -233,7 +233,7 @@ void ParseCommandLineFlags(const char* usage,
|
|||||||
// double flag
|
// double flag
|
||||||
double double_val;
|
double double_val;
|
||||||
if (DoubleFlagExists(lhs.string(), &double_val)) {
|
if (DoubleFlagExists(lhs.string(), &double_val)) {
|
||||||
if (rhs != NULL) {
|
if (rhs != nullptr) {
|
||||||
if (!strlen(rhs)) {
|
if (!strlen(rhs)) {
|
||||||
// Bad input of the format --double_flag=
|
// Bad input of the format --double_flag=
|
||||||
tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
|
tprintf("ERROR: Bad argument: %s\n", (*argv)[i]);
|
||||||
@ -266,7 +266,7 @@ void ParseCommandLineFlags(const char* usage,
|
|||||||
// --flag=false, --flag=true, --flag=0 and --flag=1
|
// --flag=false, --flag=true, --flag=0 and --flag=1
|
||||||
bool bool_val;
|
bool bool_val;
|
||||||
if (BoolFlagExists(lhs.string(), &bool_val)) {
|
if (BoolFlagExists(lhs.string(), &bool_val)) {
|
||||||
if (rhs == NULL) {
|
if (rhs == nullptr) {
|
||||||
// --flag form
|
// --flag form
|
||||||
bool_val = true;
|
bool_val = true;
|
||||||
} else {
|
} else {
|
||||||
@ -291,7 +291,7 @@ void ParseCommandLineFlags(const char* usage,
|
|||||||
// string flag
|
// string flag
|
||||||
const char* string_val;
|
const char* string_val;
|
||||||
if (StringFlagExists(lhs.string(), &string_val)) {
|
if (StringFlagExists(lhs.string(), &string_val)) {
|
||||||
if (rhs != NULL) {
|
if (rhs != nullptr) {
|
||||||
string_val = rhs;
|
string_val = rhs;
|
||||||
} else {
|
} else {
|
||||||
// Pick the next argument
|
// Pick the next argument
|
||||||
|
@ -115,15 +115,15 @@ void ParseArguments(int* argc, char ***argv) {
|
|||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
// Helper loads shape table from the given file.
|
// Helper loads shape table from the given file.
|
||||||
ShapeTable* LoadShapeTable(const STRING& file_prefix) {
|
ShapeTable* LoadShapeTable(const STRING& file_prefix) {
|
||||||
ShapeTable* shape_table = NULL;
|
ShapeTable* shape_table = nullptr;
|
||||||
STRING shape_table_file = file_prefix;
|
STRING shape_table_file = file_prefix;
|
||||||
shape_table_file += kShapeTableFileSuffix;
|
shape_table_file += kShapeTableFileSuffix;
|
||||||
FILE* shape_fp = fopen(shape_table_file.string(), "rb");
|
FILE* shape_fp = fopen(shape_table_file.string(), "rb");
|
||||||
if (shape_fp != NULL) {
|
if (shape_fp != nullptr) {
|
||||||
shape_table = new ShapeTable;
|
shape_table = new ShapeTable;
|
||||||
if (!shape_table->DeSerialize(false, shape_fp)) {
|
if (!shape_table->DeSerialize(false, shape_fp)) {
|
||||||
delete shape_table;
|
delete shape_table;
|
||||||
shape_table = NULL;
|
shape_table = nullptr;
|
||||||
tprintf("Error: Failed to read shape table %s\n",
|
tprintf("Error: Failed to read shape table %s\n",
|
||||||
shape_table_file.string());
|
shape_table_file.string());
|
||||||
} else {
|
} else {
|
||||||
@ -144,7 +144,7 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table) {
|
|||||||
STRING shape_table_file = file_prefix;
|
STRING shape_table_file = file_prefix;
|
||||||
shape_table_file += kShapeTableFileSuffix;
|
shape_table_file += kShapeTableFileSuffix;
|
||||||
FILE* fp = fopen(shape_table_file.string(), "wb");
|
FILE* fp = fopen(shape_table_file.string(), "wb");
|
||||||
if (fp != NULL) {
|
if (fp != nullptr) {
|
||||||
if (!shape_table.Serialize(fp)) {
|
if (!shape_table.Serialize(fp)) {
|
||||||
fprintf(stderr, "Error writing shape table: %s\n",
|
fprintf(stderr, "Error writing shape table: %s\n",
|
||||||
shape_table_file.string());
|
shape_table_file.string());
|
||||||
@ -159,7 +159,7 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table) {
|
|||||||
/**
|
/**
|
||||||
* Creates a MasterTraininer and loads the training data into it:
|
* Creates a MasterTraininer and loads the training data into it:
|
||||||
* Initializes feature_defs and IntegerFX.
|
* Initializes feature_defs and IntegerFX.
|
||||||
* Loads the shape_table if shape_table != NULL.
|
* Loads the shape_table if shape_table != nullptr.
|
||||||
* Loads initial unicharset from -U command-line option.
|
* Loads initial unicharset from -U command-line option.
|
||||||
* If FLAGS_T is set, loads the majority of data from there, else:
|
* If FLAGS_T is set, loads the majority of data from there, else:
|
||||||
* - Loads font info from -F option.
|
* - Loads font info from -F option.
|
||||||
@ -168,7 +168,7 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table) {
|
|||||||
* - Deletes outliers and computes canonical samples.
|
* - Deletes outliers and computes canonical samples.
|
||||||
* - If FLAGS_output_trainer is set, saves the trainer for future use.
|
* - If FLAGS_output_trainer is set, saves the trainer for future use.
|
||||||
* Computes canonical and cloud features.
|
* Computes canonical and cloud features.
|
||||||
* If shape_table is not NULL, but failed to load, make a fake flat one,
|
* If shape_table is not nullptr, but failed to load, make a fake flat one,
|
||||||
* as shape clustering was not run.
|
* as shape clustering was not run.
|
||||||
*/
|
*/
|
||||||
MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
||||||
@ -182,14 +182,14 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
|||||||
*file_prefix += FLAGS_D.c_str();
|
*file_prefix += FLAGS_D.c_str();
|
||||||
*file_prefix += "/";
|
*file_prefix += "/";
|
||||||
}
|
}
|
||||||
// If we are shape clustering (NULL shape_table) or we successfully load
|
// If we are shape clustering (nullptr shape_table) or we successfully load
|
||||||
// a shape_table written by a previous shape clustering, then
|
// a shape_table written by a previous shape clustering, then
|
||||||
// shape_analysis will be true, meaning that the MasterTrainer will replace
|
// shape_analysis will be true, meaning that the MasterTrainer will replace
|
||||||
// some members of the unicharset with their fragments.
|
// some members of the unicharset with their fragments.
|
||||||
bool shape_analysis = false;
|
bool shape_analysis = false;
|
||||||
if (shape_table != NULL) {
|
if (shape_table != nullptr) {
|
||||||
*shape_table = LoadShapeTable(*file_prefix);
|
*shape_table = LoadShapeTable(*file_prefix);
|
||||||
if (*shape_table != NULL)
|
if (*shape_table != nullptr)
|
||||||
shape_analysis = true;
|
shape_analysis = true;
|
||||||
} else {
|
} else {
|
||||||
shape_analysis = true;
|
shape_analysis = true;
|
||||||
@ -206,19 +206,19 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
|||||||
if (!FLAGS_F.empty()) {
|
if (!FLAGS_F.empty()) {
|
||||||
if (!trainer->LoadFontInfo(FLAGS_F.c_str())) {
|
if (!trainer->LoadFontInfo(FLAGS_F.c_str())) {
|
||||||
delete trainer;
|
delete trainer;
|
||||||
return NULL;
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!FLAGS_X.empty()) {
|
if (!FLAGS_X.empty()) {
|
||||||
if (!trainer->LoadXHeights(FLAGS_X.c_str())) {
|
if (!trainer->LoadXHeights(FLAGS_X.c_str())) {
|
||||||
delete trainer;
|
delete trainer;
|
||||||
return NULL;
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
trainer->SetFeatureSpace(fs);
|
trainer->SetFeatureSpace(fs);
|
||||||
const char* page_name;
|
const char* page_name;
|
||||||
// Load training data from .tr files on the command line.
|
// Load training data from .tr files on the command line.
|
||||||
while ((page_name = GetNextFilename(argc, argv)) != NULL) {
|
while ((page_name = GetNextFilename(argc, argv)) != nullptr) {
|
||||||
tprintf("Reading %s ...\n", page_name);
|
tprintf("Reading %s ...\n", page_name);
|
||||||
trainer->ReadTrainingSamples(page_name, feature_defs, false);
|
trainer->ReadTrainingSamples(page_name, feature_defs, false);
|
||||||
|
|
||||||
@ -244,7 +244,7 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
|||||||
// Write the master trainer if required.
|
// Write the master trainer if required.
|
||||||
if (!FLAGS_output_trainer.empty()) {
|
if (!FLAGS_output_trainer.empty()) {
|
||||||
FILE* fp = fopen(FLAGS_output_trainer.c_str(), "wb");
|
FILE* fp = fopen(FLAGS_output_trainer.c_str(), "wb");
|
||||||
if (fp == NULL) {
|
if (fp == nullptr) {
|
||||||
tprintf("Can't create saved trainer data!\n");
|
tprintf("Can't create saved trainer data!\n");
|
||||||
} else {
|
} else {
|
||||||
trainer->Serialize(fp);
|
trainer->Serialize(fp);
|
||||||
@ -256,7 +256,7 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
|||||||
tprintf("Loading master trainer from file:%s\n",
|
tprintf("Loading master trainer from file:%s\n",
|
||||||
FLAGS_T.c_str());
|
FLAGS_T.c_str());
|
||||||
FILE* fp = fopen(FLAGS_T.c_str(), "rb");
|
FILE* fp = fopen(FLAGS_T.c_str(), "rb");
|
||||||
if (fp == NULL) {
|
if (fp == nullptr) {
|
||||||
tprintf("Can't read file %s to initialize master trainer\n",
|
tprintf("Can't read file %s to initialize master trainer\n",
|
||||||
FLAGS_T.c_str());
|
FLAGS_T.c_str());
|
||||||
} else {
|
} else {
|
||||||
@ -266,7 +266,7 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
|||||||
if (!success) {
|
if (!success) {
|
||||||
tprintf("Deserialize of master trainer failed!\n");
|
tprintf("Deserialize of master trainer failed!\n");
|
||||||
delete trainer;
|
delete trainer;
|
||||||
return NULL;
|
return nullptr;
|
||||||
}
|
}
|
||||||
trainer->SetFeatureSpace(fs);
|
trainer->SetFeatureSpace(fs);
|
||||||
}
|
}
|
||||||
@ -275,12 +275,12 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
|||||||
!trainer->unicharset().save_to_file(FLAGS_O.c_str())) {
|
!trainer->unicharset().save_to_file(FLAGS_O.c_str())) {
|
||||||
fprintf(stderr, "Failed to save unicharset to file %s\n", FLAGS_O.c_str());
|
fprintf(stderr, "Failed to save unicharset to file %s\n", FLAGS_O.c_str());
|
||||||
delete trainer;
|
delete trainer;
|
||||||
return NULL;
|
return nullptr;
|
||||||
}
|
}
|
||||||
if (shape_table != NULL) {
|
if (shape_table != nullptr) {
|
||||||
// If we previously failed to load a shapetable, then shape clustering
|
// If we previously failed to load a shapetable, then shape clustering
|
||||||
// wasn't run so make a flat one now.
|
// wasn't run so make a flat one now.
|
||||||
if (*shape_table == NULL) {
|
if (*shape_table == nullptr) {
|
||||||
*shape_table = new ShapeTable;
|
*shape_table = new ShapeTable;
|
||||||
trainer->SetupFlatShapeTable(*shape_table);
|
trainer->SetupFlatShapeTable(*shape_table);
|
||||||
tprintf("Flat shape table summary: %s\n",
|
tprintf("Flat shape table summary: %s\n",
|
||||||
@ -297,12 +297,12 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
|||||||
/**
|
/**
|
||||||
* This routine returns the next command line argument. If
|
* This routine returns the next command line argument. If
|
||||||
* there are no remaining command line arguments, it returns
|
* there are no remaining command line arguments, it returns
|
||||||
* NULL. This routine should only be called after all option
|
* nullptr. This routine should only be called after all option
|
||||||
* arguments have been parsed and removed with ParseArguments.
|
* arguments have been parsed and removed with ParseArguments.
|
||||||
*
|
*
|
||||||
* Globals:
|
* Globals:
|
||||||
* - tessoptind defined by tessopt sys call
|
* - tessoptind defined by tessopt sys call
|
||||||
* @return Next command line argument or NULL.
|
* @return Next command line argument or nullptr.
|
||||||
* @note Exceptions: none
|
* @note Exceptions: none
|
||||||
* @note History: Fri Aug 18 09:34:12 1989, DSJ, Created.
|
* @note History: Fri Aug 18 09:34:12 1989, DSJ, Created.
|
||||||
*/
|
*/
|
||||||
@ -310,17 +310,17 @@ const char *GetNextFilename(int argc, const char* const * argv) {
|
|||||||
if (tessoptind < argc)
|
if (tessoptind < argc)
|
||||||
return argv[tessoptind++];
|
return argv[tessoptind++];
|
||||||
else
|
else
|
||||||
return NULL;
|
return nullptr;
|
||||||
} /* GetNextFilename */
|
} /* GetNextFilename */
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
/**
|
/**
|
||||||
* This routine searches through a list of labeled lists to find
|
* This routine searches through a list of labeled lists to find
|
||||||
* a list with the specified label. If a matching labeled list
|
* a list with the specified label. If a matching labeled list
|
||||||
* cannot be found, NULL is returned.
|
* cannot be found, nullptr is returned.
|
||||||
* @param List list to search
|
* @param List list to search
|
||||||
* @param Label label to search for
|
* @param Label label to search for
|
||||||
* @return Labeled list with the specified Label or NULL.
|
* @return Labeled list with the specified label or nullptr.
|
||||||
* @note Globals: none
|
* @note Globals: none
|
||||||
* @note Exceptions: none
|
* @note Exceptions: none
|
||||||
* @note History: Fri Aug 18 15:57:41 1989, DSJ, Created.
|
* @note History: Fri Aug 18 15:57:41 1989, DSJ, Created.
|
||||||
@ -334,7 +334,7 @@ LABELEDLIST FindList(LIST List, char* Label) {
|
|||||||
if (strcmp (LabeledList->Label, Label) == 0)
|
if (strcmp (LabeledList->Label, Label) == 0)
|
||||||
return (LabeledList);
|
return (LabeledList);
|
||||||
}
|
}
|
||||||
return (NULL);
|
return (nullptr);
|
||||||
|
|
||||||
} /* FindList */
|
} /* FindList */
|
||||||
|
|
||||||
@ -401,12 +401,12 @@ void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
|
|||||||
char_sample->font_sample_count = 0;
|
char_sample->font_sample_count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (fgets(buffer, 2048, file) != NULL) {
|
while (fgets(buffer, 2048, file) != nullptr) {
|
||||||
if (buffer[0] == '\n')
|
if (buffer[0] == '\n')
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
sscanf(buffer, "%*s %s", unichar);
|
sscanf(buffer, "%*s %s", unichar);
|
||||||
if (unicharset != NULL && !unicharset->contains_unichar(unichar)) {
|
if (unicharset != nullptr && !unicharset->contains_unichar(unichar)) {
|
||||||
unicharset->unichar_insert(unichar);
|
unicharset->unichar_insert(unichar);
|
||||||
if (unicharset->size() > MAX_NUM_CLASSES) {
|
if (unicharset->size() > MAX_NUM_CLASSES) {
|
||||||
tprintf("Error: Size of unicharset in training is "
|
tprintf("Error: Size of unicharset in training is "
|
||||||
@ -415,7 +415,7 @@ void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
char_sample = FindList(*training_samples, unichar);
|
char_sample = FindList(*training_samples, unichar);
|
||||||
if (char_sample == NULL) {
|
if (char_sample == nullptr) {
|
||||||
char_sample = NewLabeledList(unichar);
|
char_sample = NewLabeledList(unichar);
|
||||||
*training_samples = push(*training_samples, char_sample);
|
*training_samples = push(*training_samples, char_sample);
|
||||||
}
|
}
|
||||||
@ -501,11 +501,11 @@ CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
|||||||
const char* program_feature_type) {
|
const char* program_feature_type) {
|
||||||
uinT16 N;
|
uinT16 N;
|
||||||
int i, j;
|
int i, j;
|
||||||
FLOAT32 *Sample = NULL;
|
FLOAT32 *Sample = nullptr;
|
||||||
CLUSTERER *Clusterer;
|
CLUSTERER *Clusterer;
|
||||||
inT32 CharID;
|
inT32 CharID;
|
||||||
LIST FeatureList = NULL;
|
LIST FeatureList = nullptr;
|
||||||
FEATURE_SET FeatureSet = NULL;
|
FEATURE_SET FeatureSet = nullptr;
|
||||||
|
|
||||||
int desc_index = ShortNameToFeatureType(FeatureDefs, program_feature_type);
|
int desc_index = ShortNameToFeatureType(FeatureDefs, program_feature_type);
|
||||||
N = FeatureDefs.FeatureDesc[desc_index]->NumParams;
|
N = FeatureDefs.FeatureDesc[desc_index]->NumParams;
|
||||||
@ -516,7 +516,7 @@ CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
|||||||
iterate(FeatureList) {
|
iterate(FeatureList) {
|
||||||
FeatureSet = (FEATURE_SET) first_node(FeatureList);
|
FeatureSet = (FEATURE_SET) first_node(FeatureList);
|
||||||
for (i = 0; i < FeatureSet->MaxNumFeatures; i++) {
|
for (i = 0; i < FeatureSet->MaxNumFeatures; i++) {
|
||||||
if (Sample == NULL)
|
if (Sample == nullptr)
|
||||||
Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
||||||
for (j = 0; j < N; j++)
|
for (j = 0; j < N; j++)
|
||||||
Sample[j] = FeatureSet->Features[i]->Params[j];
|
Sample[j] = FeatureSet->Features[i]->Params[j];
|
||||||
@ -541,7 +541,7 @@ void MergeInsignificantProtos(LIST ProtoList, const char* label,
|
|||||||
if (Prototype->Significant || Prototype->Merged)
|
if (Prototype->Significant || Prototype->Merged)
|
||||||
continue;
|
continue;
|
||||||
FLOAT32 best_dist = 0.125;
|
FLOAT32 best_dist = 0.125;
|
||||||
PROTOTYPE* best_match = NULL;
|
PROTOTYPE* best_match = nullptr;
|
||||||
// Find the nearest alive prototype.
|
// Find the nearest alive prototype.
|
||||||
LIST list_it = ProtoList;
|
LIST list_it = ProtoList;
|
||||||
iterate(list_it) {
|
iterate(list_it) {
|
||||||
@ -556,7 +556,7 @@ void MergeInsignificantProtos(LIST ProtoList, const char* label,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (best_match != NULL && !best_match->Significant) {
|
if (best_match != nullptr && !best_match->Significant) {
|
||||||
if (debug)
|
if (debug)
|
||||||
tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
|
tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
|
||||||
best_match->NumSamples, Prototype->NumSamples,
|
best_match->NumSamples, Prototype->NumSamples,
|
||||||
@ -570,7 +570,7 @@ void MergeInsignificantProtos(LIST ProtoList, const char* label,
|
|||||||
best_match->Mean, Prototype->Mean);
|
best_match->Mean, Prototype->Mean);
|
||||||
Prototype->NumSamples = 0;
|
Prototype->NumSamples = 0;
|
||||||
Prototype->Merged = 1;
|
Prototype->Merged = 1;
|
||||||
} else if (best_match != NULL) {
|
} else if (best_match != nullptr) {
|
||||||
if (debug)
|
if (debug)
|
||||||
tprintf("Red proto at %g,%g matched a green one at %g,%g\n",
|
tprintf("Red proto at %g,%g matched a green one at %g,%g\n",
|
||||||
Prototype->Mean[0], Prototype->Mean[1],
|
Prototype->Mean[0], Prototype->Mean[1],
|
||||||
@ -603,20 +603,20 @@ void CleanUpUnusedData(
|
|||||||
iterate(ProtoList)
|
iterate(ProtoList)
|
||||||
{
|
{
|
||||||
Prototype = (PROTOTYPE *) first_node (ProtoList);
|
Prototype = (PROTOTYPE *) first_node (ProtoList);
|
||||||
if(Prototype->Variance.Elliptical != NULL)
|
if(Prototype->Variance.Elliptical != nullptr)
|
||||||
{
|
{
|
||||||
memfree(Prototype->Variance.Elliptical);
|
memfree(Prototype->Variance.Elliptical);
|
||||||
Prototype->Variance.Elliptical = NULL;
|
Prototype->Variance.Elliptical = nullptr;
|
||||||
}
|
}
|
||||||
if(Prototype->Magnitude.Elliptical != NULL)
|
if(Prototype->Magnitude.Elliptical != nullptr)
|
||||||
{
|
{
|
||||||
memfree(Prototype->Magnitude.Elliptical);
|
memfree(Prototype->Magnitude.Elliptical);
|
||||||
Prototype->Magnitude.Elliptical = NULL;
|
Prototype->Magnitude.Elliptical = nullptr;
|
||||||
}
|
}
|
||||||
if(Prototype->Weight.Elliptical != NULL)
|
if(Prototype->Weight.Elliptical != nullptr)
|
||||||
{
|
{
|
||||||
memfree(Prototype->Weight.Elliptical);
|
memfree(Prototype->Weight.Elliptical);
|
||||||
Prototype->Weight.Elliptical = NULL;
|
Prototype->Weight.Elliptical = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -648,37 +648,37 @@ LIST RemoveInsignificantProtos(
|
|||||||
NewProto->Significant = Proto->Significant;
|
NewProto->Significant = Proto->Significant;
|
||||||
NewProto->Style = Proto->Style;
|
NewProto->Style = Proto->Style;
|
||||||
NewProto->NumSamples = Proto->NumSamples;
|
NewProto->NumSamples = Proto->NumSamples;
|
||||||
NewProto->Cluster = NULL;
|
NewProto->Cluster = nullptr;
|
||||||
NewProto->Distrib = NULL;
|
NewProto->Distrib = nullptr;
|
||||||
|
|
||||||
for (i=0; i < N; i++)
|
for (i=0; i < N; i++)
|
||||||
NewProto->Mean[i] = Proto->Mean[i];
|
NewProto->Mean[i] = Proto->Mean[i];
|
||||||
if (Proto->Variance.Elliptical != NULL)
|
if (Proto->Variance.Elliptical != nullptr)
|
||||||
{
|
{
|
||||||
NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
||||||
for (i=0; i < N; i++)
|
for (i=0; i < N; i++)
|
||||||
NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
|
NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
NewProto->Variance.Elliptical = NULL;
|
NewProto->Variance.Elliptical = nullptr;
|
||||||
//---------------------------------------------
|
//---------------------------------------------
|
||||||
if (Proto->Magnitude.Elliptical != NULL)
|
if (Proto->Magnitude.Elliptical != nullptr)
|
||||||
{
|
{
|
||||||
NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
||||||
for (i=0; i < N; i++)
|
for (i=0; i < N; i++)
|
||||||
NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
|
NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
NewProto->Magnitude.Elliptical = NULL;
|
NewProto->Magnitude.Elliptical = nullptr;
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
if (Proto->Weight.Elliptical != NULL)
|
if (Proto->Weight.Elliptical != nullptr)
|
||||||
{
|
{
|
||||||
NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
||||||
for (i=0; i < N; i++)
|
for (i=0; i < N; i++)
|
||||||
NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
|
NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
NewProto->Weight.Elliptical = NULL;
|
NewProto->Weight.Elliptical = nullptr;
|
||||||
|
|
||||||
NewProto->TotalMagnitude = Proto->TotalMagnitude;
|
NewProto->TotalMagnitude = Proto->TotalMagnitude;
|
||||||
NewProto->LogMagnitude = Proto->LogMagnitude;
|
NewProto->LogMagnitude = Proto->LogMagnitude;
|
||||||
@ -699,7 +699,7 @@ MERGE_CLASS FindClass(LIST List, const char* Label) {
|
|||||||
if (strcmp (MergeClass->Label, Label) == 0)
|
if (strcmp (MergeClass->Label, Label) == 0)
|
||||||
return (MergeClass);
|
return (MergeClass);
|
||||||
}
|
}
|
||||||
return (NULL);
|
return (nullptr);
|
||||||
|
|
||||||
} /* FindClass */
|
} /* FindClass */
|
||||||
|
|
||||||
|
@ -70,7 +70,7 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table);
|
|||||||
|
|
||||||
// Creates a MasterTraininer and loads the training data into it:
|
// Creates a MasterTraininer and loads the training data into it:
|
||||||
// Initializes feature_defs and IntegerFX.
|
// Initializes feature_defs and IntegerFX.
|
||||||
// Loads the shape_table if shape_table != NULL.
|
// Loads the shape_table if shape_table != nullptr.
|
||||||
// Loads initial unicharset from -U command-line option.
|
// Loads initial unicharset from -U command-line option.
|
||||||
// If FLAGS_input_trainer is set, loads the majority of data from there, else:
|
// If FLAGS_input_trainer is set, loads the majority of data from there, else:
|
||||||
// Loads font info from -F option.
|
// Loads font info from -F option.
|
||||||
@ -79,7 +79,7 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table);
|
|||||||
// Deletes outliers and computes canonical samples.
|
// Deletes outliers and computes canonical samples.
|
||||||
// If FLAGS_output_trainer is set, saves the trainer for future use.
|
// If FLAGS_output_trainer is set, saves the trainer for future use.
|
||||||
// Computes canonical and cloud features.
|
// Computes canonical and cloud features.
|
||||||
// If shape_table is not NULL, but failed to load, make a fake flat one,
|
// If shape_table is not nullptr, but failed to load, make a fake flat one,
|
||||||
// as shape clustering was not run.
|
// as shape clustering was not run.
|
||||||
MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
||||||
bool replication,
|
bool replication,
|
||||||
|
@ -29,9 +29,9 @@ tesseract::Dawg *LoadSquishedDawg(const UNICHARSET &unicharset,
|
|||||||
const char *filename) {
|
const char *filename) {
|
||||||
const int kDictDebugLevel = 1;
|
const int kDictDebugLevel = 1;
|
||||||
FILE *dawg_file = fopen(filename, "rb");
|
FILE *dawg_file = fopen(filename, "rb");
|
||||||
if (dawg_file == NULL) {
|
if (dawg_file == nullptr) {
|
||||||
tprintf("Could not open %s for reading.\n", filename);
|
tprintf("Could not open %s for reading.\n", filename);
|
||||||
return NULL;
|
return nullptr;
|
||||||
}
|
}
|
||||||
tprintf("Loading word list from %s\n", filename);
|
tprintf("Loading word list from %s\n", filename);
|
||||||
tesseract::Dawg *retval = new tesseract::SquishedDawg(
|
tesseract::Dawg *retval = new tesseract::SquishedDawg(
|
||||||
@ -55,7 +55,7 @@ int WriteDawgAsWordlist(const UNICHARSET &unicharset,
|
|||||||
const tesseract::Dawg *dawg,
|
const tesseract::Dawg *dawg,
|
||||||
const char *outfile_name) {
|
const char *outfile_name) {
|
||||||
FILE *out = fopen(outfile_name, "wb");
|
FILE *out = fopen(outfile_name, "wb");
|
||||||
if (out == NULL) {
|
if (out == nullptr) {
|
||||||
tprintf("Could not open %s for writing.\n", outfile_name);
|
tprintf("Could not open %s for writing.\n", outfile_name);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -83,7 +83,7 @@ int main(int argc, char *argv[]) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
tesseract::Dawg *dict = LoadSquishedDawg(unicharset, dawg_file);
|
tesseract::Dawg *dict = LoadSquishedDawg(unicharset, dawg_file);
|
||||||
if (dict == NULL) {
|
if (dict == nullptr) {
|
||||||
tprintf("Error loading dictionary from %s.\n", dawg_file);
|
tprintf("Error loading dictionary from %s.\n", dawg_file);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -64,7 +64,7 @@ const int kMinRampSize = 1000;
|
|||||||
// Degrade the pix as if by a print/copy/scan cycle with exposure > 0
|
// Degrade the pix as if by a print/copy/scan cycle with exposure > 0
|
||||||
// corresponding to darkening on the copier and <0 lighter and 0 not copied.
|
// corresponding to darkening on the copier and <0 lighter and 0 not copied.
|
||||||
// Exposures in [-2,2] are most useful, with -3 and 3 being extreme.
|
// Exposures in [-2,2] are most useful, with -3 and 3 being extreme.
|
||||||
// If rotation is NULL, rotation is skipped. If *rotation is non-zero, the pix
|
// If rotation is nullptr, rotation is skipped. If *rotation is non-zero, the pix
|
||||||
// is rotated by *rotation else it is randomly rotated and *rotation is
|
// is rotated by *rotation else it is randomly rotated and *rotation is
|
||||||
// modified.
|
// modified.
|
||||||
//
|
//
|
||||||
@ -108,11 +108,11 @@ Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer,
|
|||||||
pix = pixBlockconv(input, 1, 1);
|
pix = pixBlockconv(input, 1, 1);
|
||||||
pixDestroy(&input);
|
pixDestroy(&input);
|
||||||
// A small random rotation helps to make the edges jaggy in a realistic way.
|
// A small random rotation helps to make the edges jaggy in a realistic way.
|
||||||
if (rotation != NULL) {
|
if (rotation != nullptr) {
|
||||||
float radians_clockwise = 0.0f;
|
float radians_clockwise = 0.0f;
|
||||||
if (*rotation) {
|
if (*rotation) {
|
||||||
radians_clockwise = *rotation;
|
radians_clockwise = *rotation;
|
||||||
} else if (randomizer != NULL) {
|
} else if (randomizer != nullptr) {
|
||||||
radians_clockwise = randomizer->SignedRand(kRotationRange);
|
radians_clockwise = randomizer->SignedRand(kRotationRange);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -154,7 +154,7 @@ Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer,
|
|||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
for (int x = 0; x < width; ++x) {
|
for (int x = 0; x < width; ++x) {
|
||||||
int pixel = GET_DATA_BYTE(data, x);
|
int pixel = GET_DATA_BYTE(data, x);
|
||||||
if (randomizer != NULL)
|
if (randomizer != nullptr)
|
||||||
pixel += randomizer->IntRand() % (kSaltnPepper*2 + 1) - kSaltnPepper;
|
pixel += randomizer->IntRand() % (kSaltnPepper*2 + 1) - kSaltnPepper;
|
||||||
if (height + width > kMinRampSize)
|
if (height + width > kMinRampSize)
|
||||||
pixel -= (2*x + y) * 32 / (height + width);
|
pixel -= (2*x + y) * 32 / (height + width);
|
||||||
@ -171,15 +171,15 @@ Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Creates and returns a Pix distorted by various means according to the bool
|
// Creates and returns a Pix distorted by various means according to the bool
|
||||||
// flags. If boxes is not NULL, the boxes are resized/positioned according to
|
// flags. If boxes is not nullptr, the boxes are resized/positioned according to
|
||||||
// any spatial distortion and also by the integer reduction factor box_scale
|
// any spatial distortion and also by the integer reduction factor box_scale
|
||||||
// so they will match what the network will output.
|
// so they will match what the network will output.
|
||||||
// Returns NULL on error. The returned Pix must be pixDestroyed.
|
// Returns nullptr on error. The returned Pix must be pixDestroyed.
|
||||||
Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
|
Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
|
||||||
bool white_noise, bool smooth_noise, bool blur,
|
bool white_noise, bool smooth_noise, bool blur,
|
||||||
int box_reduction, TRand* randomizer,
|
int box_reduction, TRand* randomizer,
|
||||||
GenericVector<TBOX>* boxes) {
|
GenericVector<TBOX>* boxes) {
|
||||||
Pix* distorted = pixCopy(NULL, const_cast<Pix*>(pix));
|
Pix* distorted = pixCopy(nullptr, const_cast<Pix*>(pix));
|
||||||
// Things to do to synthetic training data.
|
// Things to do to synthetic training data.
|
||||||
if (invert && randomizer->SignedRand(1.0) < 0)
|
if (invert && randomizer->SignedRand(1.0) < 0)
|
||||||
pixInvert(distorted, distorted);
|
pixInvert(distorted, distorted);
|
||||||
@ -203,7 +203,7 @@ Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
|
|||||||
}
|
}
|
||||||
if (perspective)
|
if (perspective)
|
||||||
GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
|
GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
|
||||||
if (boxes != NULL) {
|
if (boxes != nullptr) {
|
||||||
for (int b = 0; b < boxes->size(); ++b) {
|
for (int b = 0; b < boxes->size(); ++b) {
|
||||||
(*boxes)[b].scale(1.0f / box_reduction);
|
(*boxes)[b].scale(1.0f / box_reduction);
|
||||||
if ((*boxes)[b].width() <= 0)
|
if ((*boxes)[b].width() <= 0)
|
||||||
@ -218,25 +218,25 @@ Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
|
|||||||
// is no pix. If there is a pix, then they will be taken from there.
|
// is no pix. If there is a pix, then they will be taken from there.
|
||||||
void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer,
|
void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer,
|
||||||
Pix** pix, GenericVector<TBOX>* boxes) {
|
Pix** pix, GenericVector<TBOX>* boxes) {
|
||||||
if (pix != NULL && *pix != NULL) {
|
if (pix != nullptr && *pix != nullptr) {
|
||||||
width = pixGetWidth(*pix);
|
width = pixGetWidth(*pix);
|
||||||
height = pixGetHeight(*pix);
|
height = pixGetHeight(*pix);
|
||||||
}
|
}
|
||||||
float* im_coeffs = NULL;
|
float* im_coeffs = nullptr;
|
||||||
float* box_coeffs = NULL;
|
float* box_coeffs = nullptr;
|
||||||
l_int32 incolor =
|
l_int32 incolor =
|
||||||
ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
|
ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
|
||||||
if (pix != NULL && *pix != NULL) {
|
if (pix != nullptr && *pix != nullptr) {
|
||||||
// Transform the image.
|
// Transform the image.
|
||||||
Pix* transformed = pixProjective(*pix, im_coeffs, incolor);
|
Pix* transformed = pixProjective(*pix, im_coeffs, incolor);
|
||||||
if (transformed == NULL) {
|
if (transformed == nullptr) {
|
||||||
tprintf("Projective transformation failed!!\n");
|
tprintf("Projective transformation failed!!\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
pixDestroy(pix);
|
pixDestroy(pix);
|
||||||
*pix = transformed;
|
*pix = transformed;
|
||||||
}
|
}
|
||||||
if (boxes != NULL) {
|
if (boxes != nullptr) {
|
||||||
// Transform the boxes.
|
// Transform the boxes.
|
||||||
for (int b = 0; b < boxes->size(); ++b) {
|
for (int b = 0; b < boxes->size(); ++b) {
|
||||||
int x1, y1, x2, y2;
|
int x1, y1, x2, y2;
|
||||||
|
@ -29,17 +29,17 @@ namespace tesseract {
|
|||||||
|
|
||||||
// Degrade the pix as if by a print/copy/scan cycle with exposure > 0
|
// Degrade the pix as if by a print/copy/scan cycle with exposure > 0
|
||||||
// corresponding to darkening on the copier and <0 lighter and 0 not copied.
|
// corresponding to darkening on the copier and <0 lighter and 0 not copied.
|
||||||
// If rotation is not NULL, the clockwise rotation in radians is saved there.
|
// If rotation is not nullptr, the clockwise rotation in radians is saved there.
|
||||||
// The input pix must be 8 bit grey. (Binary with values 0 and 255 is OK.)
|
// The input pix must be 8 bit grey. (Binary with values 0 and 255 is OK.)
|
||||||
// The input image is destroyed and a different image returned.
|
// The input image is destroyed and a different image returned.
|
||||||
struct Pix* DegradeImage(struct Pix* input, int exposure, TRand* randomizer,
|
struct Pix* DegradeImage(struct Pix* input, int exposure, TRand* randomizer,
|
||||||
float* rotation);
|
float* rotation);
|
||||||
|
|
||||||
// Creates and returns a Pix distorted by various means according to the bool
|
// Creates and returns a Pix distorted by various means according to the bool
|
||||||
// flags. If boxes is not NULL, the boxes are resized/positioned according to
|
// flags. If boxes is not nullptr, the boxes are resized/positioned according to
|
||||||
// any spatial distortion and also by the integer reduction factor box_scale
|
// any spatial distortion and also by the integer reduction factor box_scale
|
||||||
// so they will match what the network will output.
|
// so they will match what the network will output.
|
||||||
// Returns NULL on error. The returned Pix must be pixDestroyed.
|
// Returns nullptr on error. The returned Pix must be pixDestroyed.
|
||||||
Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
|
Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
|
||||||
bool white_noise, bool smooth_noise, bool blur,
|
bool white_noise, bool smooth_noise, bool blur,
|
||||||
int box_reduction, TRand* randomizer,
|
int box_reduction, TRand* randomizer,
|
||||||
|
@ -42,7 +42,7 @@ FILE* File::Open(const string& filename, const string& mode) {
|
|||||||
FILE* File::OpenOrDie(const string& filename,
|
FILE* File::OpenOrDie(const string& filename,
|
||||||
const string& mode) {
|
const string& mode) {
|
||||||
FILE* stream = fopen(filename.c_str(), mode.c_str());
|
FILE* stream = fopen(filename.c_str(), mode.c_str());
|
||||||
if (stream == NULL) {
|
if (stream == nullptr) {
|
||||||
tprintf("Unable to open '%s' in mode '%s'\n", filename.c_str(),
|
tprintf("Unable to open '%s' in mode '%s'\n", filename.c_str(),
|
||||||
mode.c_str());
|
mode.c_str());
|
||||||
}
|
}
|
||||||
@ -52,7 +52,7 @@ FILE* File::OpenOrDie(const string& filename,
|
|||||||
void File::WriteStringToFileOrDie(const string& str,
|
void File::WriteStringToFileOrDie(const string& str,
|
||||||
const string& filename) {
|
const string& filename) {
|
||||||
FILE* stream = fopen(filename.c_str(), "wb");
|
FILE* stream = fopen(filename.c_str(), "wb");
|
||||||
if (stream == NULL) {
|
if (stream == nullptr) {
|
||||||
tprintf("Unable to open '%s' for writing\n", filename.c_str());
|
tprintf("Unable to open '%s' for writing\n", filename.c_str());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -62,7 +62,7 @@ void File::WriteStringToFileOrDie(const string& str,
|
|||||||
|
|
||||||
bool File::Readable(const string& filename) {
|
bool File::Readable(const string& filename) {
|
||||||
FILE* stream = fopen(filename.c_str(), "rb");
|
FILE* stream = fopen(filename.c_str(), "rb");
|
||||||
if (stream == NULL) {
|
if (stream == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
fclose(stream);
|
fclose(stream);
|
||||||
@ -71,7 +71,7 @@ bool File::Readable(const string& filename) {
|
|||||||
|
|
||||||
bool File::ReadFileToString(const string& filename, string* out) {
|
bool File::ReadFileToString(const string& filename, string* out) {
|
||||||
FILE* stream = File::Open(filename.c_str(), "rb");
|
FILE* stream = File::Open(filename.c_str(), "rb");
|
||||||
if (stream == NULL)
|
if (stream == nullptr)
|
||||||
return false;
|
return false;
|
||||||
InputBuffer in(stream);
|
InputBuffer in(stream);
|
||||||
*out = "";
|
*out = "";
|
||||||
@ -113,8 +113,8 @@ bool File::DeleteMatchingFiles(const char* pattern) {
|
|||||||
glob_t pglob;
|
glob_t pglob;
|
||||||
char **paths;
|
char **paths;
|
||||||
bool all_deleted = true;
|
bool all_deleted = true;
|
||||||
if (glob(pattern, 0, NULL, &pglob) == 0) {
|
if (glob(pattern, 0, nullptr, &pglob) == 0) {
|
||||||
for (paths = pglob.gl_pathv; *paths != NULL; paths++) {
|
for (paths = pglob.gl_pathv; *paths != nullptr; paths++) {
|
||||||
all_deleted &= File::Delete(*paths);
|
all_deleted &= File::Delete(*paths);
|
||||||
}
|
}
|
||||||
globfree(&pglob);
|
globfree(&pglob);
|
||||||
@ -141,7 +141,7 @@ InputBuffer::InputBuffer(FILE* stream, size_t)
|
|||||||
}
|
}
|
||||||
|
|
||||||
InputBuffer::~InputBuffer() {
|
InputBuffer::~InputBuffer() {
|
||||||
if (stream_ != NULL) {
|
if (stream_ != nullptr) {
|
||||||
fclose(stream_);
|
fclose(stream_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -162,7 +162,7 @@ bool InputBuffer::Read(string* out) {
|
|||||||
|
|
||||||
bool InputBuffer::CloseFile() {
|
bool InputBuffer::CloseFile() {
|
||||||
int ret = fclose(stream_);
|
int ret = fclose(stream_);
|
||||||
stream_ = NULL;
|
stream_ = nullptr;
|
||||||
return ret == 0;
|
return ret == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,7 +179,7 @@ OutputBuffer::OutputBuffer(FILE* stream, size_t)
|
|||||||
}
|
}
|
||||||
|
|
||||||
OutputBuffer::~OutputBuffer() {
|
OutputBuffer::~OutputBuffer() {
|
||||||
if (stream_ != NULL) {
|
if (stream_ != nullptr) {
|
||||||
fclose(stream_);
|
fclose(stream_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -190,7 +190,7 @@ void OutputBuffer::WriteString(const string& str) {
|
|||||||
|
|
||||||
bool OutputBuffer::CloseFile() {
|
bool OutputBuffer::CloseFile() {
|
||||||
int ret = fclose(stream_);
|
int ret = fclose(stream_);
|
||||||
stream_ = NULL;
|
stream_ = nullptr;
|
||||||
return ret == 0;
|
return ret == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ SmartPtr<LigatureTable> LigatureTable::instance_;
|
|||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
LigatureTable* LigatureTable::Get() {
|
LigatureTable* LigatureTable::Get() {
|
||||||
if (instance_ == NULL) {
|
if (instance_ == nullptr) {
|
||||||
instance_.reset(new LigatureTable());
|
instance_.reset(new LigatureTable());
|
||||||
instance_->Init();
|
instance_->Init();
|
||||||
}
|
}
|
||||||
@ -93,7 +93,7 @@ void LigatureTable::Init() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Add custom extra ligatures.
|
// Add custom extra ligatures.
|
||||||
for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != NULL; ++i) {
|
for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) {
|
||||||
norm_to_lig_table_[UNICHARSET::kCustomLigatures[i][0]] =
|
norm_to_lig_table_[UNICHARSET::kCustomLigatures[i][0]] =
|
||||||
UNICHARSET::kCustomLigatures[i][1];
|
UNICHARSET::kCustomLigatures[i][1];
|
||||||
int norm_length = strlen(UNICHARSET::kCustomLigatures[i][0]);
|
int norm_length = strlen(UNICHARSET::kCustomLigatures[i][0]);
|
||||||
@ -138,7 +138,7 @@ string LigatureTable::RemoveCustomLigatures(const string& str) const {
|
|||||||
len = it.get_utf8(tmp);
|
len = it.get_utf8(tmp);
|
||||||
tmp[len] = '\0';
|
tmp[len] = '\0';
|
||||||
norm_ind = -1;
|
norm_ind = -1;
|
||||||
for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != NULL && norm_ind < 0;
|
for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr && norm_ind < 0;
|
||||||
++i) {
|
++i) {
|
||||||
if (!strcmp(tmp, UNICHARSET::kCustomLigatures[i][1])) {
|
if (!strcmp(tmp, UNICHARSET::kCustomLigatures[i][1])) {
|
||||||
norm_ind = i;
|
norm_ind = i;
|
||||||
|
@ -82,7 +82,7 @@ int main(int argc, char **argv) {
|
|||||||
checkpoint_file += "_checkpoint";
|
checkpoint_file += "_checkpoint";
|
||||||
STRING checkpoint_bak = checkpoint_file + ".bak";
|
STRING checkpoint_bak = checkpoint_file + ".bak";
|
||||||
tesseract::LSTMTrainer trainer(
|
tesseract::LSTMTrainer trainer(
|
||||||
NULL, NULL, NULL, NULL, FLAGS_model_output.c_str(),
|
nullptr, nullptr, nullptr, nullptr, FLAGS_model_output.c_str(),
|
||||||
checkpoint_file.c_str(), FLAGS_debug_interval,
|
checkpoint_file.c_str(), FLAGS_debug_interval,
|
||||||
static_cast<inT64>(FLAGS_max_image_MB) * 1048576);
|
static_cast<inT64>(FLAGS_max_image_MB) * 1048576);
|
||||||
|
|
||||||
@ -174,7 +174,7 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool best_dumped = true;
|
bool best_dumped = true;
|
||||||
char* best_model_dump = NULL;
|
char* best_model_dump = nullptr;
|
||||||
size_t best_model_size = 0;
|
size_t best_model_size = 0;
|
||||||
STRING best_model_name;
|
STRING best_model_name;
|
||||||
tesseract::LSTMTester tester(static_cast<inT64>(FLAGS_max_image_MB) *
|
tesseract::LSTMTester tester(static_cast<inT64>(FLAGS_max_image_MB) *
|
||||||
|
@ -148,7 +148,7 @@ static LIST ClusterOneConfig(int shape_id, const char* class_label,
|
|||||||
clusterer->SampleSize);
|
clusterer->SampleSize);
|
||||||
FreeClusterer(clusterer);
|
FreeClusterer(clusterer);
|
||||||
MERGE_CLASS merge_class = FindClass(mf_classes, class_label);
|
MERGE_CLASS merge_class = FindClass(mf_classes, class_label);
|
||||||
if (merge_class == NULL) {
|
if (merge_class == nullptr) {
|
||||||
merge_class = NewLabeledClass(class_label);
|
merge_class = NewLabeledClass(class_label);
|
||||||
mf_classes = push(mf_classes, merge_class);
|
mf_classes = push(mf_classes, merge_class);
|
||||||
}
|
}
|
||||||
@ -238,14 +238,14 @@ static void SetupConfigMap(ShapeTable* shape_table, IndexMapBiDi* config_map) {
|
|||||||
int main (int argc, char **argv) {
|
int main (int argc, char **argv) {
|
||||||
ParseArguments(&argc, &argv);
|
ParseArguments(&argc, &argv);
|
||||||
|
|
||||||
ShapeTable* shape_table = NULL;
|
ShapeTable* shape_table = nullptr;
|
||||||
STRING file_prefix;
|
STRING file_prefix;
|
||||||
// Load the training data.
|
// Load the training data.
|
||||||
MasterTrainer* trainer = tesseract::LoadTrainingData(argc, argv,
|
MasterTrainer* trainer = tesseract::LoadTrainingData(argc, argv,
|
||||||
false,
|
false,
|
||||||
&shape_table,
|
&shape_table,
|
||||||
&file_prefix);
|
&file_prefix);
|
||||||
if (trainer == NULL)
|
if (trainer == nullptr)
|
||||||
return 1; // Failed.
|
return 1; // Failed.
|
||||||
|
|
||||||
// Setup an index mapping from the shapes in the shape table to the classes
|
// Setup an index mapping from the shapes in the shape table to the classes
|
||||||
|
@ -48,7 +48,7 @@ void UTF32ToUTF8(const GenericVector<char32>& str32, STRING* utf8_str) {
|
|||||||
for (int i = 0; i < str32.length(); ++i) {
|
for (int i = 0; i < str32.length(); ++i) {
|
||||||
UNICHAR uni_ch(str32[i]);
|
UNICHAR uni_ch(str32[i]);
|
||||||
char *utf8 = uni_ch.utf8_str();
|
char *utf8 = uni_ch.utf8_str();
|
||||||
if (utf8 != NULL) {
|
if (utf8 != nullptr) {
|
||||||
(*utf8_str) += utf8;
|
(*utf8_str) += utf8;
|
||||||
delete[] utf8;
|
delete[] utf8;
|
||||||
}
|
}
|
||||||
@ -131,7 +131,7 @@ STRING NormalizeUTF8String(bool decompose, const char* str8) {
|
|||||||
void NormalizeChar32(char32 ch, bool decompose, GenericVector<char32>* str) {
|
void NormalizeChar32(char32 ch, bool decompose, GenericVector<char32>* str) {
|
||||||
IcuErrorCode error_code;
|
IcuErrorCode error_code;
|
||||||
const icu::Normalizer2* nfkc = icu::Normalizer2::getInstance(
|
const icu::Normalizer2* nfkc = icu::Normalizer2::getInstance(
|
||||||
NULL, "nfkc", decompose ? UNORM2_DECOMPOSE : UNORM2_COMPOSE, error_code);
|
nullptr, "nfkc", decompose ? UNORM2_DECOMPOSE : UNORM2_COMPOSE, error_code);
|
||||||
error_code.assertSuccess();
|
error_code.assertSuccess();
|
||||||
error_code.reset();
|
error_code.reset();
|
||||||
|
|
||||||
|
@ -85,12 +85,12 @@ const int kDefaultResolution = 300;
|
|||||||
string PangoFontInfo::fonts_dir_;
|
string PangoFontInfo::fonts_dir_;
|
||||||
string PangoFontInfo::cache_dir_;
|
string PangoFontInfo::cache_dir_;
|
||||||
|
|
||||||
PangoFontInfo::PangoFontInfo() : desc_(NULL), resolution_(kDefaultResolution) {
|
PangoFontInfo::PangoFontInfo() : desc_(nullptr), resolution_(kDefaultResolution) {
|
||||||
Clear();
|
Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
PangoFontInfo::PangoFontInfo(const string& desc)
|
PangoFontInfo::PangoFontInfo(const string& desc)
|
||||||
: desc_(NULL), resolution_(kDefaultResolution) {
|
: desc_(nullptr), resolution_(kDefaultResolution) {
|
||||||
if (!ParseFontDescriptionName(desc)) {
|
if (!ParseFontDescriptionName(desc)) {
|
||||||
tprintf("ERROR: Could not parse %s\n", desc.c_str());
|
tprintf("ERROR: Could not parse %s\n", desc.c_str());
|
||||||
Clear();
|
Clear();
|
||||||
@ -107,7 +107,7 @@ void PangoFontInfo::Clear() {
|
|||||||
font_type_ = UNKNOWN;
|
font_type_ = UNKNOWN;
|
||||||
if (desc_) {
|
if (desc_) {
|
||||||
pango_font_description_free(desc_);
|
pango_font_description_free(desc_);
|
||||||
desc_ = NULL;
|
desc_ = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -172,7 +172,7 @@ void PangoFontInfo::HardInitFontConfig(const string& fonts_dir,
|
|||||||
}
|
}
|
||||||
FontUtils::ReInit();
|
FontUtils::ReInit();
|
||||||
// Clear Pango's font cache too.
|
// Clear Pango's font cache too.
|
||||||
pango_cairo_font_map_set_default(NULL);
|
pango_cairo_font_map_set_default(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ListFontFamilies(PangoFontFamily*** families,
|
static void ListFontFamilies(PangoFontFamily*** families,
|
||||||
@ -236,7 +236,7 @@ bool PangoFontInfo::ParseFontDescription(const PangoFontDescription *desc) {
|
|||||||
// We don't have a way to detect whether a font is of type Fraktur. The fonts
|
// We don't have a way to detect whether a font is of type Fraktur. The fonts
|
||||||
// we currently use all have "Fraktur" in their family name, so we do a
|
// we currently use all have "Fraktur" in their family name, so we do a
|
||||||
// fragile but functional check for that here.
|
// fragile but functional check for that here.
|
||||||
is_fraktur_ = (strcasestr(family, "Fraktur") != NULL);
|
is_fraktur_ = (strcasestr(family, "Fraktur") != nullptr);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -256,7 +256,7 @@ PangoFont* PangoFontInfo::ToPangoFont() const {
|
|||||||
PangoContext* context = pango_context_new();
|
PangoContext* context = pango_context_new();
|
||||||
pango_cairo_context_set_resolution(context, resolution_);
|
pango_cairo_context_set_resolution(context, resolution_);
|
||||||
pango_context_set_font_map(context, font_map);
|
pango_context_set_font_map(context, font_map);
|
||||||
PangoFont* font = NULL;
|
PangoFont* font = nullptr;
|
||||||
{
|
{
|
||||||
DISABLE_HEAP_LEAK_CHECK;
|
DISABLE_HEAP_LEAK_CHECK;
|
||||||
font = pango_font_map_load_font(font_map, context, desc_);
|
font = pango_font_map_load_font(font_map, context, desc_);
|
||||||
@ -267,7 +267,7 @@ PangoFont* PangoFontInfo::ToPangoFont() const {
|
|||||||
|
|
||||||
bool PangoFontInfo::CoversUTF8Text(const char* utf8_text, int byte_length) const {
|
bool PangoFontInfo::CoversUTF8Text(const char* utf8_text, int byte_length) const {
|
||||||
PangoFont* font = ToPangoFont();
|
PangoFont* font = ToPangoFont();
|
||||||
PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
|
PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
|
||||||
for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
|
for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
|
||||||
it != UNICHAR::end(utf8_text, byte_length);
|
it != UNICHAR::end(utf8_text, byte_length);
|
||||||
++it) {
|
++it) {
|
||||||
@ -308,7 +308,7 @@ static char* my_strnmove(char* dest, const char* src, size_t n) {
|
|||||||
|
|
||||||
int PangoFontInfo::DropUncoveredChars(string* utf8_text) const {
|
int PangoFontInfo::DropUncoveredChars(string* utf8_text) const {
|
||||||
PangoFont* font = ToPangoFont();
|
PangoFont* font = ToPangoFont();
|
||||||
PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
|
PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
|
||||||
int num_dropped_chars = 0;
|
int num_dropped_chars = 0;
|
||||||
// Maintain two iterators that point into the string. For space efficiency, we
|
// Maintain two iterators that point into the string. For space efficiency, we
|
||||||
// will repeatedly copy one covered UTF8 character from one to the other, and
|
// will repeatedly copy one covered UTF8 character from one to the other, and
|
||||||
@ -371,8 +371,8 @@ bool PangoFontInfo::GetSpacingProperties(const string& utf8_char,
|
|||||||
// Find the ink glyph extents for the glyph
|
// Find the ink glyph extents for the glyph
|
||||||
PangoRectangle ink_rect, logical_rect;
|
PangoRectangle ink_rect, logical_rect;
|
||||||
pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
|
pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
|
||||||
pango_extents_to_pixels(&ink_rect, NULL);
|
pango_extents_to_pixels(&ink_rect, nullptr);
|
||||||
pango_extents_to_pixels(&logical_rect, NULL);
|
pango_extents_to_pixels(&logical_rect, nullptr);
|
||||||
|
|
||||||
int bearing = total_advance + PANGO_LBEARING(ink_rect);
|
int bearing = total_advance + PANGO_LBEARING(ink_rect);
|
||||||
if (it == it_begin || bearing < min_bearing) {
|
if (it == it_begin || bearing < min_bearing) {
|
||||||
@ -421,7 +421,7 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len,
|
|||||||
pango_font_description_free(desc);
|
pango_font_description_free(desc);
|
||||||
}
|
}
|
||||||
pango_layout_set_text(layout, utf8_word, len);
|
pango_layout_set_text(layout, utf8_word, len);
|
||||||
PangoLayoutIter* run_iter = NULL;
|
PangoLayoutIter* run_iter = nullptr;
|
||||||
{ // Fontconfig caches some information here that is not freed before exit.
|
{ // Fontconfig caches some information here that is not freed before exit.
|
||||||
DISABLE_HEAP_LEAK_CHECK;
|
DISABLE_HEAP_LEAK_CHECK;
|
||||||
run_iter = pango_layout_get_iter(layout);
|
run_iter = pango_layout_get_iter(layout);
|
||||||
@ -429,7 +429,7 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len,
|
|||||||
do {
|
do {
|
||||||
PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
|
PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
|
||||||
if (!run) {
|
if (!run) {
|
||||||
tlog(2, "Found end of line NULL run marker\n");
|
tlog(2, "Found end of line nullptr run marker\n");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
PangoGlyph dotted_circle_glyph;
|
PangoGlyph dotted_circle_glyph;
|
||||||
@ -534,7 +534,7 @@ bool FontUtils::IsAvailableFont(const char* input_query_desc,
|
|||||||
#endif
|
#endif
|
||||||
PangoFontDescription *desc = pango_font_description_from_string(
|
PangoFontDescription *desc = pango_font_description_from_string(
|
||||||
query_desc.c_str());
|
query_desc.c_str());
|
||||||
PangoFont* selected_font = NULL;
|
PangoFont* selected_font = nullptr;
|
||||||
{
|
{
|
||||||
PangoFontInfo::SoftInitFontConfig();
|
PangoFontInfo::SoftInitFontConfig();
|
||||||
PangoFontMap* font_map = pango_cairo_font_map_get_default();
|
PangoFontMap* font_map = pango_cairo_font_map_get_default();
|
||||||
@ -546,7 +546,7 @@ bool FontUtils::IsAvailableFont(const char* input_query_desc,
|
|||||||
}
|
}
|
||||||
g_object_unref(context);
|
g_object_unref(context);
|
||||||
}
|
}
|
||||||
if (selected_font == NULL) {
|
if (selected_font == nullptr) {
|
||||||
pango_font_description_free(desc);
|
pango_font_description_free(desc);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -560,7 +560,7 @@ bool FontUtils::IsAvailableFont(const char* input_query_desc,
|
|||||||
char* selected_desc_str = pango_font_description_to_string(selected_desc);
|
char* selected_desc_str = pango_font_description_to_string(selected_desc);
|
||||||
tlog(2, "query_desc: '%s' Selected: '%s'\n", query_desc.c_str(),
|
tlog(2, "query_desc: '%s' Selected: '%s'\n", query_desc.c_str(),
|
||||||
selected_desc_str);
|
selected_desc_str);
|
||||||
if (!equal && best_match != NULL) {
|
if (!equal && best_match != nullptr) {
|
||||||
*best_match = selected_desc_str;
|
*best_match = selected_desc_str;
|
||||||
// Clip the ending ' 0' if there is one. It seems that, if there is no
|
// Clip the ending ' 0' if there is one. It seems that, if there is no
|
||||||
// point size on the end of the fontname, then Pango always appends ' 0'.
|
// point size on the end of the fontname, then Pango always appends ' 0'.
|
||||||
@ -579,9 +579,9 @@ bool FontUtils::IsAvailableFont(const char* input_query_desc,
|
|||||||
|
|
||||||
static bool ShouldIgnoreFontFamilyName(const char* query) {
|
static bool ShouldIgnoreFontFamilyName(const char* query) {
|
||||||
static const char* kIgnoredFamilyNames[]
|
static const char* kIgnoredFamilyNames[]
|
||||||
= { "Sans", "Serif", "Monospace", NULL };
|
= { "Sans", "Serif", "Monospace", nullptr };
|
||||||
const char** list = kIgnoredFamilyNames;
|
const char** list = kIgnoredFamilyNames;
|
||||||
for (; *list != NULL; ++list) {
|
for (; *list != nullptr; ++list) {
|
||||||
if (!strcmp(*list, query))
|
if (!strcmp(*list, query))
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -600,7 +600,7 @@ const vector<string>& FontUtils::ListAvailableFonts() {
|
|||||||
tprintf("Using list of legacy fonts only\n");
|
tprintf("Using list of legacy fonts only\n");
|
||||||
const int kNumFontLists = 4;
|
const int kNumFontLists = 4;
|
||||||
for (int i = 0; i < kNumFontLists; ++i) {
|
for (int i = 0; i < kNumFontLists; ++i) {
|
||||||
for (int j = 0; kFontlists[i][j] != NULL; ++j) {
|
for (int j = 0; kFontlists[i][j] != nullptr; ++j) {
|
||||||
available_fonts_.push_back(kFontlists[i][j]);
|
available_fonts_.push_back(kFontlists[i][j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -619,7 +619,7 @@ const vector<string>& FontUtils::ListAvailableFonts() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int n_faces;
|
int n_faces;
|
||||||
PangoFontFace** faces = NULL;
|
PangoFontFace** faces = nullptr;
|
||||||
pango_font_family_list_faces(families[i], &faces, &n_faces);
|
pango_font_family_list_faces(families[i], &faces, &n_faces);
|
||||||
for (int j = 0; j < n_faces; ++j) {
|
for (int j = 0; j < n_faces; ++j) {
|
||||||
PangoFontDescription* desc = pango_font_face_describe(faces[j]);
|
PangoFontDescription* desc = pango_font_face_describe(faces[j]);
|
||||||
@ -663,7 +663,7 @@ void FontUtils::GetAllRenderableCharacters(const string& font_name,
|
|||||||
vector<bool>* unichar_bitmap) {
|
vector<bool>* unichar_bitmap) {
|
||||||
PangoFontInfo font_info(font_name);
|
PangoFontInfo font_info(font_name);
|
||||||
PangoCoverage* coverage = pango_font_get_coverage(
|
PangoCoverage* coverage = pango_font_get_coverage(
|
||||||
font_info.ToPangoFont(), NULL);
|
font_info.ToPangoFont(), nullptr);
|
||||||
CharCoverageMapToBitmap(coverage, unichar_bitmap);
|
CharCoverageMapToBitmap(coverage, unichar_bitmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -676,7 +676,7 @@ void FontUtils::GetAllRenderableCharacters(const vector<string>& fonts,
|
|||||||
for (int i = 0; i < fonts.size(); ++i) {
|
for (int i = 0; i < fonts.size(); ++i) {
|
||||||
PangoFontInfo font_info(fonts[i]);
|
PangoFontInfo font_info(fonts[i]);
|
||||||
PangoCoverage* coverage = pango_font_get_coverage(
|
PangoCoverage* coverage = pango_font_get_coverage(
|
||||||
font_info.ToPangoFont(), NULL);
|
font_info.ToPangoFont(), nullptr);
|
||||||
// Mark off characters that any font can render.
|
// Mark off characters that any font can render.
|
||||||
pango_coverage_max(all_coverage, coverage);
|
pango_coverage_max(all_coverage, coverage);
|
||||||
}
|
}
|
||||||
@ -696,7 +696,7 @@ int FontUtils::FontScore(const TessHashMap<char32, inT64>& ch_map,
|
|||||||
tprintf("ERROR: Could not parse %s\n", fontname.c_str());
|
tprintf("ERROR: Could not parse %s\n", fontname.c_str());
|
||||||
}
|
}
|
||||||
PangoFont* font = font_info.ToPangoFont();
|
PangoFont* font = font_info.ToPangoFont();
|
||||||
PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
|
PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
|
||||||
|
|
||||||
if (ch_flags) {
|
if (ch_flags) {
|
||||||
ch_flags->clear();
|
ch_flags->clear();
|
||||||
|
@ -162,11 +162,11 @@ class FontUtils {
|
|||||||
// Returns true if the font of the given description name is available in the
|
// Returns true if the font of the given description name is available in the
|
||||||
// target directory specified by --fonts_dir
|
// target directory specified by --fonts_dir
|
||||||
static bool IsAvailableFont(const char* font_desc) {
|
static bool IsAvailableFont(const char* font_desc) {
|
||||||
return IsAvailableFont(font_desc, NULL);
|
return IsAvailableFont(font_desc, nullptr);
|
||||||
}
|
}
|
||||||
// Returns true if the font of the given description name is available in the
|
// Returns true if the font of the given description name is available in the
|
||||||
// target directory specified by --fonts_dir. If false is returned, and
|
// target directory specified by --fonts_dir. If false is returned, and
|
||||||
// best_match is not NULL, the closest matching font is returned there.
|
// best_match is not nullptr, the closest matching font is returned there.
|
||||||
static bool IsAvailableFont(const char* font_desc, string* best_match);
|
static bool IsAvailableFont(const char* font_desc, string* best_match);
|
||||||
// Outputs description names of available fonts.
|
// Outputs description names of available fonts.
|
||||||
static const std::vector<string>& ListAvailableFonts();
|
static const std::vector<string>& ListAvailableFonts();
|
||||||
|
@ -49,7 +49,7 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
STRING file_prefix;
|
STRING file_prefix;
|
||||||
tesseract::MasterTrainer* trainer = tesseract::LoadTrainingData(
|
tesseract::MasterTrainer* trainer = tesseract::LoadTrainingData(
|
||||||
argc, argv, false, NULL, &file_prefix);
|
argc, argv, false, nullptr, &file_prefix);
|
||||||
|
|
||||||
if (!trainer)
|
if (!trainer)
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -79,7 +79,7 @@ Pix* CairoARGB32ToPixFormat(cairo_surface_t *surface) {
|
|||||||
if (cairo_image_surface_get_format(surface) != CAIRO_FORMAT_ARGB32) {
|
if (cairo_image_surface_get_format(surface) != CAIRO_FORMAT_ARGB32) {
|
||||||
printf("Unexpected surface format %d\n",
|
printf("Unexpected surface format %d\n",
|
||||||
cairo_image_surface_get_format(surface));
|
cairo_image_surface_get_format(surface));
|
||||||
return NULL;
|
return nullptr;
|
||||||
}
|
}
|
||||||
const int width = cairo_image_surface_get_width(surface);
|
const int width = cairo_image_surface_get_width(surface);
|
||||||
const int height = cairo_image_surface_get_height(surface);
|
const int height = cairo_image_surface_get_height(surface);
|
||||||
@ -108,14 +108,14 @@ StringRenderer::StringRenderer(const string& font_desc, int page_width,
|
|||||||
underline_start_prob_(0),
|
underline_start_prob_(0),
|
||||||
underline_continuation_prob_(0),
|
underline_continuation_prob_(0),
|
||||||
underline_style_(PANGO_UNDERLINE_SINGLE),
|
underline_style_(PANGO_UNDERLINE_SINGLE),
|
||||||
features_(NULL),
|
features_(nullptr),
|
||||||
drop_uncovered_chars_(true),
|
drop_uncovered_chars_(true),
|
||||||
strip_unrenderable_words_(false),
|
strip_unrenderable_words_(false),
|
||||||
add_ligatures_(false),
|
add_ligatures_(false),
|
||||||
output_word_boxes_(false),
|
output_word_boxes_(false),
|
||||||
surface_(NULL),
|
surface_(nullptr),
|
||||||
cr_(NULL),
|
cr_(nullptr),
|
||||||
layout_(NULL),
|
layout_(nullptr),
|
||||||
start_box_(0),
|
start_box_(0),
|
||||||
page_(0),
|
page_(0),
|
||||||
box_padding_(0),
|
box_padding_(0),
|
||||||
@ -127,7 +127,7 @@ StringRenderer::StringRenderer(const string& font_desc, int page_width,
|
|||||||
pen_color_[2] = 0.0;
|
pen_color_[2] = 0.0;
|
||||||
set_font(font_desc);
|
set_font(font_desc);
|
||||||
set_resolution(kDefaultOutputResolution);
|
set_resolution(kDefaultOutputResolution);
|
||||||
page_boxes_ = NULL;
|
page_boxes_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool StringRenderer::set_font(const string& desc) {
|
bool StringRenderer::set_font(const string& desc) {
|
||||||
@ -224,15 +224,15 @@ void StringRenderer::SetLayoutProperties() {
|
|||||||
void StringRenderer::FreePangoCairo() {
|
void StringRenderer::FreePangoCairo() {
|
||||||
if (layout_) {
|
if (layout_) {
|
||||||
g_object_unref(layout_);
|
g_object_unref(layout_);
|
||||||
layout_ = NULL;
|
layout_ = nullptr;
|
||||||
}
|
}
|
||||||
if (cr_) {
|
if (cr_) {
|
||||||
cairo_destroy(cr_);
|
cairo_destroy(cr_);
|
||||||
cr_ = NULL;
|
cr_ = nullptr;
|
||||||
}
|
}
|
||||||
if (surface_) {
|
if (surface_) {
|
||||||
cairo_surface_destroy(surface_);
|
cairo_surface_destroy(surface_);
|
||||||
surface_ = NULL;
|
surface_ = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -297,7 +297,7 @@ int StringRenderer::FindFirstPageBreakOffset(const char* text,
|
|||||||
tlog(1, "len = %d buf_len = %d\n", text_length, buf_length);
|
tlog(1, "len = %d buf_len = %d\n", text_length, buf_length);
|
||||||
pango_layout_set_text(layout_, text, buf_length);
|
pango_layout_set_text(layout_, text, buf_length);
|
||||||
|
|
||||||
PangoLayoutIter* line_iter = NULL;
|
PangoLayoutIter* line_iter = nullptr;
|
||||||
{ // Fontconfig caches some info here that is not freed before exit.
|
{ // Fontconfig caches some info here that is not freed before exit.
|
||||||
DISABLE_HEAP_LEAK_CHECK;
|
DISABLE_HEAP_LEAK_CHECK;
|
||||||
line_iter = pango_layout_get_iter(layout_);
|
line_iter = pango_layout_get_iter(layout_);
|
||||||
@ -308,8 +308,8 @@ int StringRenderer::FindFirstPageBreakOffset(const char* text,
|
|||||||
do {
|
do {
|
||||||
// Get bounding box of the current line
|
// Get bounding box of the current line
|
||||||
PangoRectangle line_ink_rect;
|
PangoRectangle line_ink_rect;
|
||||||
pango_layout_iter_get_line_extents(line_iter, &line_ink_rect, NULL);
|
pango_layout_iter_get_line_extents(line_iter, &line_ink_rect, nullptr);
|
||||||
pango_extents_to_pixels(&line_ink_rect, NULL);
|
pango_extents_to_pixels(&line_ink_rect, nullptr);
|
||||||
PangoLayoutLine* line = pango_layout_iter_get_line_readonly(line_iter);
|
PangoLayoutLine* line = pango_layout_iter_get_line_readonly(line_iter);
|
||||||
if (first_page) {
|
if (first_page) {
|
||||||
page_top = line_ink_rect.y;
|
page_top = line_ink_rect.y;
|
||||||
@ -365,7 +365,7 @@ bool StringRenderer::GetClusterStrings(vector<string>* cluster_text) {
|
|||||||
do {
|
do {
|
||||||
PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
|
PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
|
||||||
if (!run) {
|
if (!run) {
|
||||||
// End of line NULL run marker
|
// End of line nullptr run marker
|
||||||
tlog(2, "Found end of line marker\n");
|
tlog(2, "Found end of line marker\n");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -388,7 +388,7 @@ bool StringRenderer::GetClusterStrings(vector<string>* cluster_text) {
|
|||||||
if (add_ligatures_) {
|
if (add_ligatures_) {
|
||||||
// Make sure the output box files have ligatured text in case the font
|
// Make sure the output box files have ligatured text in case the font
|
||||||
// decided to use an unmapped glyph.
|
// decided to use an unmapped glyph.
|
||||||
text = LigatureTable::Get()->AddLigatures(text, NULL);
|
text = LigatureTable::Get()->AddLigatures(text, nullptr);
|
||||||
}
|
}
|
||||||
start_byte_to_text[start_byte_index] = text;
|
start_byte_to_text[start_byte_index] = text;
|
||||||
}
|
}
|
||||||
@ -418,9 +418,9 @@ static void MergeBoxCharsToWords(vector<BoxChar*>* boxchars) {
|
|||||||
bool started_word = false;
|
bool started_word = false;
|
||||||
for (int i = 0; i < boxchars->size(); ++i) {
|
for (int i = 0; i < boxchars->size(); ++i) {
|
||||||
if (boxchars->at(i)->ch() == " " ||
|
if (boxchars->at(i)->ch() == " " ||
|
||||||
boxchars->at(i)->box() == NULL) {
|
boxchars->at(i)->box() == nullptr) {
|
||||||
result.push_back(boxchars->at(i));
|
result.push_back(boxchars->at(i));
|
||||||
boxchars->at(i) = NULL;
|
boxchars->at(i) = nullptr;
|
||||||
started_word = false;
|
started_word = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -429,7 +429,7 @@ static void MergeBoxCharsToWords(vector<BoxChar*>* boxchars) {
|
|||||||
// Begin new word
|
// Begin new word
|
||||||
started_word = true;
|
started_word = true;
|
||||||
result.push_back(boxchars->at(i));
|
result.push_back(boxchars->at(i));
|
||||||
boxchars->at(i) = NULL;
|
boxchars->at(i) = nullptr;
|
||||||
} else {
|
} else {
|
||||||
BoxChar* last_boxchar = result.back();
|
BoxChar* last_boxchar = result.back();
|
||||||
// Compute bounding box union
|
// Compute bounding box union
|
||||||
@ -448,7 +448,7 @@ static void MergeBoxCharsToWords(vector<BoxChar*>* boxchars) {
|
|||||||
// boxchar.
|
// boxchar.
|
||||||
result.push_back(new BoxChar(" ", 1));
|
result.push_back(new BoxChar(" ", 1));
|
||||||
result.push_back(boxchars->at(i));
|
result.push_back(boxchars->at(i));
|
||||||
boxchars->at(i) = NULL;
|
boxchars->at(i) = nullptr;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Append to last word
|
// Append to last word
|
||||||
@ -458,7 +458,7 @@ static void MergeBoxCharsToWords(vector<BoxChar*>* boxchars) {
|
|||||||
last_box->y = top;
|
last_box->y = top;
|
||||||
last_box->h = bottom - top;
|
last_box->h = bottom - top;
|
||||||
delete boxchars->at(i);
|
delete boxchars->at(i);
|
||||||
boxchars->at(i) = NULL;
|
boxchars->at(i) = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
boxchars->swap(result);
|
boxchars->swap(result);
|
||||||
@ -494,8 +494,8 @@ void StringRenderer::ComputeClusterBoxes() {
|
|||||||
do {
|
do {
|
||||||
PangoRectangle cluster_rect;
|
PangoRectangle cluster_rect;
|
||||||
pango_layout_iter_get_cluster_extents(cluster_iter, &cluster_rect,
|
pango_layout_iter_get_cluster_extents(cluster_iter, &cluster_rect,
|
||||||
NULL);
|
nullptr);
|
||||||
pango_extents_to_pixels(&cluster_rect, NULL);
|
pango_extents_to_pixels(&cluster_rect, nullptr);
|
||||||
const int start_byte_index = pango_layout_iter_get_index(cluster_iter);
|
const int start_byte_index = pango_layout_iter_get_index(cluster_iter);
|
||||||
const int end_byte_index = cluster_start_to_end_index[start_byte_index];
|
const int end_byte_index = cluster_start_to_end_index[start_byte_index];
|
||||||
string cluster_text = string(text + start_byte_index,
|
string cluster_text = string(text + start_byte_index,
|
||||||
@ -534,7 +534,7 @@ void StringRenderer::ComputeClusterBoxes() {
|
|||||||
if (add_ligatures_) {
|
if (add_ligatures_) {
|
||||||
// Make sure the output box files have ligatured text in case the font
|
// Make sure the output box files have ligatured text in case the font
|
||||||
// decided to use an unmapped glyph.
|
// decided to use an unmapped glyph.
|
||||||
cluster_text = LigatureTable::Get()->AddLigatures(cluster_text, NULL);
|
cluster_text = LigatureTable::Get()->AddLigatures(cluster_text, nullptr);
|
||||||
}
|
}
|
||||||
BoxChar* boxchar = new BoxChar(cluster_text.c_str(), cluster_text.size());
|
BoxChar* boxchar = new BoxChar(cluster_text.c_str(), cluster_text.size());
|
||||||
boxchar->set_page(page_);
|
boxchar->set_page(page_);
|
||||||
@ -592,18 +592,18 @@ void StringRenderer::ComputeClusterBoxes() {
|
|||||||
boxchars_.insert(boxchars_.end(), page_boxchars.begin(), page_boxchars.end());
|
boxchars_.insert(boxchars_.end(), page_boxchars.begin(), page_boxchars.end());
|
||||||
|
|
||||||
// Compute the page bounding box
|
// Compute the page bounding box
|
||||||
Box* page_box = NULL;
|
Box* page_box = nullptr;
|
||||||
Boxa* all_boxes = NULL;
|
Boxa* all_boxes = nullptr;
|
||||||
for (int i = 0; i < page_boxchars.size(); ++i) {
|
for (int i = 0; i < page_boxchars.size(); ++i) {
|
||||||
if (page_boxchars[i]->box() == NULL) continue;
|
if (page_boxchars[i]->box() == nullptr) continue;
|
||||||
if (all_boxes == NULL)
|
if (all_boxes == nullptr)
|
||||||
all_boxes = boxaCreate(0);
|
all_boxes = boxaCreate(0);
|
||||||
boxaAddBox(all_boxes, page_boxchars[i]->mutable_box(), L_CLONE);
|
boxaAddBox(all_boxes, page_boxchars[i]->mutable_box(), L_CLONE);
|
||||||
}
|
}
|
||||||
if (all_boxes != NULL) {
|
if (all_boxes != nullptr) {
|
||||||
boxaGetExtent(all_boxes, NULL, NULL, &page_box);
|
boxaGetExtent(all_boxes, nullptr, nullptr, &page_box);
|
||||||
boxaDestroy(&all_boxes);
|
boxaDestroy(&all_boxes);
|
||||||
if (page_boxes_ == NULL) page_boxes_ = boxaCreate(0);
|
if (page_boxes_ == nullptr) page_boxes_ = boxaCreate(0);
|
||||||
boxaAddBox(page_boxes_, page_box, L_INSERT);
|
boxaAddBox(page_boxes_, page_box, L_INSERT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -650,7 +650,7 @@ int StringRenderer::StripUnrenderableWords(string* utf8_text) const {
|
|||||||
|
|
||||||
int StringRenderer::RenderToGrayscaleImage(const char* text, int text_length,
|
int StringRenderer::RenderToGrayscaleImage(const char* text, int text_length,
|
||||||
Pix** pix) {
|
Pix** pix) {
|
||||||
Pix *orig_pix = NULL;
|
Pix *orig_pix = nullptr;
|
||||||
int offset = RenderToImage(text, text_length, &orig_pix);
|
int offset = RenderToImage(text, text_length, &orig_pix);
|
||||||
if (orig_pix) {
|
if (orig_pix) {
|
||||||
*pix = pixConvertTo8(orig_pix, false);
|
*pix = pixConvertTo8(orig_pix, false);
|
||||||
@ -661,7 +661,7 @@ int StringRenderer::RenderToGrayscaleImage(const char* text, int text_length,
|
|||||||
|
|
||||||
int StringRenderer::RenderToBinaryImage(const char* text, int text_length,
|
int StringRenderer::RenderToBinaryImage(const char* text, int text_length,
|
||||||
int threshold, Pix** pix) {
|
int threshold, Pix** pix) {
|
||||||
Pix *orig_pix = NULL;
|
Pix *orig_pix = nullptr;
|
||||||
int offset = RenderToImage(text, text_length, &orig_pix);
|
int offset = RenderToImage(text, text_length, &orig_pix);
|
||||||
if (orig_pix) {
|
if (orig_pix) {
|
||||||
Pix* gray_pix = pixConvertTo8(orig_pix, false);
|
Pix* gray_pix = pixConvertTo8(orig_pix, false);
|
||||||
@ -835,19 +835,19 @@ int StringRenderer::RenderToImage(const char* text, int text_length,
|
|||||||
// do {
|
// do {
|
||||||
// Pix *pix;
|
// Pix *pix;
|
||||||
// offset += renderer.RenderAllFontsToImage(min_proportion, txt + offset,
|
// offset += renderer.RenderAllFontsToImage(min_proportion, txt + offset,
|
||||||
// strlen(txt + offset), NULL, &pix);
|
// strlen(txt + offset), nullptr, &pix);
|
||||||
// ...
|
// ...
|
||||||
// } while (offset < strlen(text));
|
// } while (offset < strlen(text));
|
||||||
//
|
//
|
||||||
int StringRenderer::RenderAllFontsToImage(double min_coverage,
|
int StringRenderer::RenderAllFontsToImage(double min_coverage,
|
||||||
const char* text, int text_length,
|
const char* text, int text_length,
|
||||||
string* font_used, Pix** image) {
|
string* font_used, Pix** image) {
|
||||||
*image = NULL;
|
*image = nullptr;
|
||||||
// Select a suitable font to render the title with.
|
// Select a suitable font to render the title with.
|
||||||
const char kTitleTemplate[] = "%s : %d hits = %.2f%%, raw = %d = %.2f%%";
|
const char kTitleTemplate[] = "%s : %d hits = %.2f%%, raw = %d = %.2f%%";
|
||||||
string title_font;
|
string title_font;
|
||||||
if (!FontUtils::SelectFont(kTitleTemplate, strlen(kTitleTemplate),
|
if (!FontUtils::SelectFont(kTitleTemplate, strlen(kTitleTemplate),
|
||||||
&title_font, NULL)) {
|
&title_font, nullptr)) {
|
||||||
tprintf("WARNING: Could not find a font to render image title with!\n");
|
tprintf("WARNING: Could not find a font to render image title with!\n");
|
||||||
title_font = "Arial";
|
title_font = "Arial";
|
||||||
}
|
}
|
||||||
@ -871,7 +871,7 @@ int StringRenderer::RenderAllFontsToImage(double min_coverage,
|
|||||||
++font_index_;
|
++font_index_;
|
||||||
int raw_score = 0;
|
int raw_score = 0;
|
||||||
int ok_chars = FontUtils::FontScore(char_map_, all_fonts[i], &raw_score,
|
int ok_chars = FontUtils::FontScore(char_map_, all_fonts[i], &raw_score,
|
||||||
NULL);
|
nullptr);
|
||||||
if (ok_chars > 0 && ok_chars >= total_chars_ * min_coverage) {
|
if (ok_chars > 0 && ok_chars >= total_chars_ * min_coverage) {
|
||||||
set_font(all_fonts[i]);
|
set_font(all_fonts[i]);
|
||||||
int offset = RenderToBinaryImage(text, text_length, 128, image);
|
int offset = RenderToBinaryImage(text, text_length, 128, image);
|
||||||
@ -892,7 +892,7 @@ int StringRenderer::RenderAllFontsToImage(double min_coverage,
|
|||||||
// Add the font to the image.
|
// Add the font to the image.
|
||||||
set_font(title_font);
|
set_font(title_font);
|
||||||
v_margin_ /= 8;
|
v_margin_ /= 8;
|
||||||
Pix* title_image = NULL;
|
Pix* title_image = nullptr;
|
||||||
RenderToBinaryImage(title, strlen(title), 128, &title_image);
|
RenderToBinaryImage(title, strlen(title), 128, &title_image);
|
||||||
pixOr(*image, *image, title_image);
|
pixOr(*image, *image, title_image);
|
||||||
pixDestroy(&title_image);
|
pixDestroy(&title_image);
|
||||||
|
@ -183,7 +183,7 @@ struct SpacingProperties {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static bool IsWhitespaceBox(const BoxChar* boxchar) {
|
static bool IsWhitespaceBox(const BoxChar* boxchar) {
|
||||||
return (boxchar->box() == NULL ||
|
return (boxchar->box() == nullptr ||
|
||||||
SpanUTF8Whitespace(boxchar->ch().c_str()));
|
SpanUTF8Whitespace(boxchar->ch().c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -223,7 +223,7 @@ void ExtractFontProperties(const string &utf8_text,
|
|||||||
int offset = 0;
|
int offset = 0;
|
||||||
const char* text = utf8_text.c_str();
|
const char* text = utf8_text.c_str();
|
||||||
while (offset < len) {
|
while (offset < len) {
|
||||||
offset += render->RenderToImage(text + offset, strlen(text + offset), NULL);
|
offset += render->RenderToImage(text + offset, strlen(text + offset), nullptr);
|
||||||
const vector<BoxChar*> &boxes = render->GetBoxes();
|
const vector<BoxChar*> &boxes = render->GetBoxes();
|
||||||
|
|
||||||
// If the page break split a bigram, correct the offset so we try the bigram
|
// If the page break split a bigram, correct the offset so we try the bigram
|
||||||
@ -316,7 +316,7 @@ bool MakeIndividualGlyphs(Pix* pix,
|
|||||||
const int input_tiff_page) {
|
const int input_tiff_page) {
|
||||||
// If checks fail, return false without exiting text2image
|
// If checks fail, return false without exiting text2image
|
||||||
if (!pix) {
|
if (!pix) {
|
||||||
tprintf("ERROR: MakeIndividualGlyphs(): Input Pix* is NULL\n");
|
tprintf("ERROR: MakeIndividualGlyphs(): Input Pix* is nullptr\n");
|
||||||
return false;
|
return false;
|
||||||
} else if (FLAGS_glyph_resized_size <= 0) {
|
} else if (FLAGS_glyph_resized_size <= 0) {
|
||||||
tprintf("ERROR: --glyph_resized_size must be positive\n");
|
tprintf("ERROR: --glyph_resized_size must be positive\n");
|
||||||
@ -359,7 +359,7 @@ bool MakeIndividualGlyphs(Pix* pix,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Crop the boxed character
|
// Crop the boxed character
|
||||||
Pix* pix_glyph = pixClipRectangle(pix, b, NULL);
|
Pix* pix_glyph = pixClipRectangle(pix, b, nullptr);
|
||||||
if (!pix_glyph) {
|
if (!pix_glyph) {
|
||||||
tprintf("ERROR: MakeIndividualGlyphs(): Failed to clip, at i=%d\n", i);
|
tprintf("ERROR: MakeIndividualGlyphs(): Failed to clip, at i=%d\n", i);
|
||||||
continue;
|
continue;
|
||||||
@ -551,7 +551,7 @@ int main(int argc, char** argv) {
|
|||||||
int ngram_len = offsets[i].second;
|
int ngram_len = offsets[i].second;
|
||||||
// Skip words that contain characters not in found in unicharset.
|
// Skip words that contain characters not in found in unicharset.
|
||||||
if (!FLAGS_unicharset_file.empty() &&
|
if (!FLAGS_unicharset_file.empty() &&
|
||||||
!unicharset.encodable_string(curr_pos, NULL)) {
|
!unicharset.encodable_string(curr_pos, nullptr)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
rand_utf8.append(curr_pos, ngram_len);
|
rand_utf8.append(curr_pos, ngram_len);
|
||||||
@ -589,7 +589,7 @@ int main(int argc, char** argv) {
|
|||||||
string font_used;
|
string font_used;
|
||||||
for (int offset = 0; offset < strlen(to_render_utf8); ++im, ++page_num) {
|
for (int offset = 0; offset < strlen(to_render_utf8); ++im, ++page_num) {
|
||||||
tlog(1, "Starting page %d\n", im);
|
tlog(1, "Starting page %d\n", im);
|
||||||
Pix* pix = NULL;
|
Pix* pix = nullptr;
|
||||||
if (FLAGS_find_fonts) {
|
if (FLAGS_find_fonts) {
|
||||||
offset += render.RenderAllFontsToImage(FLAGS_min_coverage,
|
offset += render.RenderAllFontsToImage(FLAGS_min_coverage,
|
||||||
to_render_utf8 + offset,
|
to_render_utf8 + offset,
|
||||||
@ -599,7 +599,7 @@ int main(int argc, char** argv) {
|
|||||||
offset += render.RenderToImage(to_render_utf8 + offset,
|
offset += render.RenderToImage(to_render_utf8 + offset,
|
||||||
strlen(to_render_utf8 + offset), &pix);
|
strlen(to_render_utf8 + offset), &pix);
|
||||||
}
|
}
|
||||||
if (pix != NULL) {
|
if (pix != nullptr) {
|
||||||
float rotation = 0;
|
float rotation = 0;
|
||||||
if (pass == 1) {
|
if (pass == 1) {
|
||||||
// Pass 2, do mirror rotation.
|
// Pass 2, do mirror rotation.
|
||||||
@ -607,7 +607,7 @@ int main(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
if (FLAGS_degrade_image) {
|
if (FLAGS_degrade_image) {
|
||||||
pix = DegradeImage(pix, FLAGS_exposure, &randomizer,
|
pix = DegradeImage(pix, FLAGS_exposure, &randomizer,
|
||||||
FLAGS_rotate_image ? &rotation : NULL);
|
FLAGS_rotate_image ? &rotation : nullptr);
|
||||||
}
|
}
|
||||||
render.RotatePageBoxes(rotation);
|
render.RotatePageBoxes(rotation);
|
||||||
|
|
||||||
@ -660,7 +660,7 @@ int main(int argc, char** argv) {
|
|||||||
string filename = FLAGS_outputbase.c_str();
|
string filename = FLAGS_outputbase.c_str();
|
||||||
filename += ".fontlist.txt";
|
filename += ".fontlist.txt";
|
||||||
FILE* fp = fopen(filename.c_str(), "wb");
|
FILE* fp = fopen(filename.c_str(), "wb");
|
||||||
if (fp == NULL) {
|
if (fp == nullptr) {
|
||||||
tprintf("Failed to create output font list %s\n", filename.c_str());
|
tprintf("Failed to create output font list %s\n", filename.c_str());
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < font_names.size(); ++i) {
|
for (int i = 0; i < font_names.size(); ++i) {
|
||||||
|
@ -134,7 +134,7 @@ int main(int argc, char** argv) {
|
|||||||
printf("Extracting unicharset from %s\n", argv[tessoptind]);
|
printf("Extracting unicharset from %s\n", argv[tessoptind]);
|
||||||
|
|
||||||
FILE* box_file = fopen(argv[tessoptind], "rb");
|
FILE* box_file = fopen(argv[tessoptind], "rb");
|
||||||
if (box_file == NULL) {
|
if (box_file == nullptr) {
|
||||||
printf("Cannot open box file %s\n", argv[tessoptind]);
|
printf("Cannot open box file %s\n", argv[tessoptind]);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -42,7 +42,7 @@ void SetupBasicProperties(bool report_errors, bool decompose,
|
|||||||
for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) {
|
for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) {
|
||||||
// Convert any custom ligatures.
|
// Convert any custom ligatures.
|
||||||
const char* unichar_str = unicharset->id_to_unichar(unichar_id);
|
const char* unichar_str = unicharset->id_to_unichar(unichar_id);
|
||||||
for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != NULL; ++i) {
|
for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) {
|
||||||
if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) {
|
if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) {
|
||||||
unichar_str = UNICHARSET::kCustomLigatures[i][0];
|
unichar_str = UNICHARSET::kCustomLigatures[i][0];
|
||||||
break;
|
break;
|
||||||
|
@ -76,7 +76,7 @@ int main(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
tprintf("Reducing Trie to SquishedDawg\n");
|
tprintf("Reducing Trie to SquishedDawg\n");
|
||||||
tesseract::SquishedDawg *dawg = trie.trie_to_dawg();
|
tesseract::SquishedDawg *dawg = trie.trie_to_dawg();
|
||||||
if (dawg != NULL && dawg->NumEdges() > 0) {
|
if (dawg != nullptr && dawg->NumEdges() > 0) {
|
||||||
tprintf("Writing squished DAWG to '%s'\n", dawg_filename);
|
tprintf("Writing squished DAWG to '%s'\n", dawg_filename);
|
||||||
dawg->write_squished_dawg(dawg_filename);
|
dawg->write_squished_dawg(dawg_filename);
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
Reference in New Issue
Block a user