mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-19 15:03:45 +08:00
Fail if no valid lstmf file was written (fix issue #2741)
Signed-off-by: Stefan Weil <sw@weilnetz.de> # Conflicts: # src/ccmain/linerec.cpp
This commit is contained in:
parent
185d237c2e
commit
975c626dc3
@ -858,7 +858,9 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (tesseract_->tessedit_train_line_recognizer) {
|
if (tesseract_->tessedit_train_line_recognizer) {
|
||||||
tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_);
|
if (!tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
tesseract_->CorrectClassifyWords(page_res_);
|
tesseract_->CorrectClassifyWords(page_res_);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -40,16 +40,17 @@ const float kWorstDictCertainty = -25.0f;
|
|||||||
// Generates training data for training a line recognizer, eg LSTM.
|
// Generates training data for training a line recognizer, eg LSTM.
|
||||||
// Breaks the page into lines, according to the boxes, and writes them to a
|
// Breaks the page into lines, according to the boxes, and writes them to a
|
||||||
// serialized DocumentData based on output_basename.
|
// serialized DocumentData based on output_basename.
|
||||||
void Tesseract::TrainLineRecognizer(const STRING& input_imagename,
|
// Return true if successful, false if an error occurred.
|
||||||
|
bool Tesseract::TrainLineRecognizer(const STRING& input_imagename,
|
||||||
const STRING& output_basename,
|
const STRING& output_basename,
|
||||||
BLOCK_LIST *block_list) {
|
BLOCK_LIST *block_list) {
|
||||||
STRING lstmf_name = output_basename + ".lstmf";
|
STRING lstmf_name = output_basename + ".lstmf";
|
||||||
DocumentData images(lstmf_name);
|
DocumentData images(lstmf_name);
|
||||||
if (applybox_page > 0) {
|
if (applybox_page > 0) {
|
||||||
// Load existing document for the previous pages.
|
// Load existing document for the previous pages.
|
||||||
if (!images.LoadDocument(lstmf_name.string(), 0, 0, nullptr)) {
|
if (!images.LoadDocument(lstmf_name.c_str(), 0, 0, nullptr)) {
|
||||||
tprintf("Failed to read training data from %s!\n", lstmf_name.string());
|
tprintf("Failed to read training data from %s!\n", lstmf_name.c_str());
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
GenericVector<TBOX> boxes;
|
GenericVector<TBOX> boxes;
|
||||||
@ -58,18 +59,20 @@ void Tesseract::TrainLineRecognizer(const STRING& input_imagename,
|
|||||||
if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr,
|
if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr,
|
||||||
nullptr) ||
|
nullptr) ||
|
||||||
boxes.empty()) {
|
boxes.empty()) {
|
||||||
tprintf("Failed to read boxes from %s\n", input_imagename.string());
|
tprintf("Failed to read boxes from %s\n", input_imagename.c_str());
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
TrainFromBoxes(boxes, texts, block_list, &images);
|
TrainFromBoxes(boxes, texts, block_list, &images);
|
||||||
if (images.NumPages() <= 0) {
|
if (images.NumPages() <= 0) {
|
||||||
tprintf("Failed to read pages from %s\n", input_imagename.c_str());
|
tprintf("Failed to read pages from %s\n", input_imagename.c_str());
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
images.Shuffle();
|
images.Shuffle();
|
||||||
if (!images.SaveDocument(lstmf_name.string(), nullptr)) {
|
if (!images.SaveDocument(lstmf_name.c_str(), nullptr)) {
|
||||||
tprintf("Failed to write training data to %s!\n", lstmf_name.string());
|
tprintf("Failed to write training data to %s!\n", lstmf_name.c_str());
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generates training data for training a line recognizer, eg LSTM.
|
// Generates training data for training a line recognizer, eg LSTM.
|
||||||
|
@ -336,7 +336,8 @@ class Tesseract : public Wordrec {
|
|||||||
// Generates training data for training a line recognizer, eg LSTM.
|
// Generates training data for training a line recognizer, eg LSTM.
|
||||||
// Breaks the page into lines, according to the boxes, and writes them to a
|
// Breaks the page into lines, according to the boxes, and writes them to a
|
||||||
// serialized DocumentData based on output_basename.
|
// serialized DocumentData based on output_basename.
|
||||||
void TrainLineRecognizer(const STRING& input_imagename,
|
// Return true if successful, false if an error occurred.
|
||||||
|
bool TrainLineRecognizer(const STRING& input_imagename,
|
||||||
const STRING& output_basename,
|
const STRING& output_basename,
|
||||||
BLOCK_LIST* block_list);
|
BLOCK_LIST* block_list);
|
||||||
// Generates training data for training a line recognizer, eg LSTM.
|
// Generates training data for training a line recognizer, eg LSTM.
|
||||||
|
Loading…
Reference in New Issue
Block a user