mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-19 15:03:45 +08:00
Fail if no valid lstmf file was written (fix issue #2741)
Signed-off-by: Stefan Weil <sw@weilnetz.de> # Conflicts: # src/ccmain/linerec.cpp
This commit is contained in:
parent
185d237c2e
commit
975c626dc3
@ -858,7 +858,9 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
|
||||
}
|
||||
|
||||
if (tesseract_->tessedit_train_line_recognizer) {
|
||||
tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_);
|
||||
if (!tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_)) {
|
||||
return -1;
|
||||
}
|
||||
tesseract_->CorrectClassifyWords(page_res_);
|
||||
return 0;
|
||||
}
|
||||
|
@ -40,16 +40,17 @@ const float kWorstDictCertainty = -25.0f;
|
||||
// Generates training data for training a line recognizer, eg LSTM.
|
||||
// Breaks the page into lines, according to the boxes, and writes them to a
|
||||
// serialized DocumentData based on output_basename.
|
||||
void Tesseract::TrainLineRecognizer(const STRING& input_imagename,
|
||||
// Return true if successful, false if an error occurred.
|
||||
bool Tesseract::TrainLineRecognizer(const STRING& input_imagename,
|
||||
const STRING& output_basename,
|
||||
BLOCK_LIST *block_list) {
|
||||
STRING lstmf_name = output_basename + ".lstmf";
|
||||
DocumentData images(lstmf_name);
|
||||
if (applybox_page > 0) {
|
||||
// Load existing document for the previous pages.
|
||||
if (!images.LoadDocument(lstmf_name.string(), 0, 0, nullptr)) {
|
||||
tprintf("Failed to read training data from %s!\n", lstmf_name.string());
|
||||
return;
|
||||
if (!images.LoadDocument(lstmf_name.c_str(), 0, 0, nullptr)) {
|
||||
tprintf("Failed to read training data from %s!\n", lstmf_name.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
GenericVector<TBOX> boxes;
|
||||
@ -58,18 +59,20 @@ void Tesseract::TrainLineRecognizer(const STRING& input_imagename,
|
||||
if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr,
|
||||
nullptr) ||
|
||||
boxes.empty()) {
|
||||
tprintf("Failed to read boxes from %s\n", input_imagename.string());
|
||||
return;
|
||||
tprintf("Failed to read boxes from %s\n", input_imagename.c_str());
|
||||
return false;
|
||||
}
|
||||
TrainFromBoxes(boxes, texts, block_list, &images);
|
||||
if (images.NumPages() <= 0) {
|
||||
tprintf("Failed to read pages from %s\n", input_imagename.c_str());
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
images.Shuffle();
|
||||
if (!images.SaveDocument(lstmf_name.string(), nullptr)) {
|
||||
tprintf("Failed to write training data to %s!\n", lstmf_name.string());
|
||||
if (!images.SaveDocument(lstmf_name.c_str(), nullptr)) {
|
||||
tprintf("Failed to write training data to %s!\n", lstmf_name.c_str());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Generates training data for training a line recognizer, eg LSTM.
|
||||
|
@ -336,7 +336,8 @@ class Tesseract : public Wordrec {
|
||||
// Generates training data for training a line recognizer, eg LSTM.
|
||||
// Breaks the page into lines, according to the boxes, and writes them to a
|
||||
// serialized DocumentData based on output_basename.
|
||||
void TrainLineRecognizer(const STRING& input_imagename,
|
||||
// Return true if successful, false if an error occurred.
|
||||
bool TrainLineRecognizer(const STRING& input_imagename,
|
||||
const STRING& output_basename,
|
||||
BLOCK_LIST* block_list);
|
||||
// Generates training data for training a line recognizer, eg LSTM.
|
||||
|
Loading…
Reference in New Issue
Block a user