Added osd renderer for psm 0.

Works for single page and multi-page.
This commit is contained in:
amitdo 2015-10-30 20:09:00 +02:00
parent b882590491
commit 6bbcb50dd9
5 changed files with 126 additions and 47 deletions

View File

@ -1198,35 +1198,39 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
SetInputName(filename);
SetImage(pix);
bool failed = false;
if (timeout_millisec > 0) {
if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
// Disabled character recognition
PageIterator* it = AnalyseLayout();
if (it == NULL) {
failed = true;
} else {
delete it;
}
} else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) {
failed = FindLines() != 0;
} else if (timeout_millisec > 0) {
// Running with a timeout.
ETEXT_DESC monitor;
monitor.cancel = NULL;
monitor.cancel_this = NULL;
monitor.set_deadline_msecs(timeout_millisec);
// Now run the main recognition.
failed = Recognize(&monitor) < 0;
} else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY ||
tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
// Disabled character recognition.
PageIterator* it = AnalyseLayout();
if (it == NULL) {
failed = true;
} else {
delete it;
PERF_COUNT_END
return true;
}
} else {
// Normal layout and character recognition with no timeout.
failed = Recognize(NULL) < 0;
}
if (tesseract_->tessedit_write_images) {
#ifndef ANDROID_BUILD
Pix* page_pix = GetThresholdedImage();
pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
#endif // ANDROID_BUILD
}
if (failed && retry_config != NULL && retry_config[0] != '\0') {
// Save current config variables before switching modes.
FILE* fp = fopen(kOldVarsFile, "wb");
@ -1243,6 +1247,7 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
if (renderer && !failed) {
failed = !renderer->AddImage(this);
}
PERF_COUNT_END
return !failed;
}
@ -1734,6 +1739,47 @@ char* TessBaseAPI::GetUNLVText() {
return result;
}
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char* TessBaseAPI::GetOsdText(int page_number) {
OSResults osr;
bool osd = DetectOS(&osr);
if (!osd) {
return NULL;
}
int orient_id = osr.best_result.orientation_id;
int script_id = osr.get_best_script(orient_id);
float orient_conf = osr.best_result.oconfidence;
float script_conf = osr.best_result.sconfidence;
const char* script_name =
osr.unicharset->get_script_from_script_id(script_id);
// clockwise orientation of the input image, in degrees
int orient_deg = orient_id * 90;
// clockwise rotation needed to make the page upright
int rotate = OrientationIdToValue(orient_id);
char* osd_buf = new char[255];
snprintf(osd_buf, 255,
"Page number: %d\n"
"Orientation in degrees: %d\n"
"Rotate: %d\n"
"Orientation confidence: %.2f\n"
"Script: %s\n"
"Script confidence: %.2f\n",
page_number,
orient_deg, rotate, orient_conf,
script_name, script_conf);
return osd_buf;
}
/** Returns the average word confidence for Tesseract page result. */
int TessBaseAPI::MeanTextConf() {
int* conf = AllWordConfidences();

View File

@ -600,12 +600,21 @@ class TESS_API TessBaseAPI {
* page_number is a 0-based page index that will appear in the box file.
*/
char* GetBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes
* and must be freed with the delete [] operator.
*/
char* GetUNLVText();
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char* GetOsdText(int page_number);
/** Returns the (average) confidence value between 0 and 100. */
int MeanTextConf();
/**

View File

@ -213,4 +213,21 @@ bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
return true;
}
/**********************************************************************
* Osd Text Renderer interface implementation
**********************************************************************/
TessOsdRenderer::TessOsdRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "osd") {
}
bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
char* osd = api->GetOsdText(imagenum());
if (osd == NULL) return false;
AppendString(osd);
delete[] osd;
return true;
}
} // namespace tesseract

View File

@ -221,6 +221,17 @@ class TESS_API TessBoxTextRenderer : public TessResultRenderer {
virtual bool AddImageHandler(TessBaseAPI* api);
};
/**
* Renders tesseract output into an osd text string
*/
class TESS_API TessOsdRenderer : public TessResultRenderer {
public:
explicit TessOsdRenderer(const char* outputbase);
protected:
virtual bool AddImageHandler(TessBaseAPI* api);
};
} // namespace tesseract.
#endif // TESSERACT_API_RENDERER_H__

View File

@ -242,8 +242,7 @@ int main(int argc, char **argv) {
if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
api.SetPageSegMode(pagesegmode);
if (pagesegmode == tesseract::PSM_AUTO_ONLY ||
pagesegmode == tesseract::PSM_OSD_ONLY) {
if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
int ret_val = 0;
Pix* pixs = pixRead(image);
@ -251,47 +250,44 @@ int main(int argc, char **argv) {
fprintf(stderr, "Cannot open input file: %s\n", image);
exit(2);
}
api.SetImage(pixs);
if (pagesegmode == tesseract::PSM_OSD_ONLY) {
OSResults osr;
if (api.DetectOS(&osr)) {
int orient = osr.best_result.orientation_id;
int script_id = osr.get_best_script(orient);
const char* script_name =
osr.unicharset->get_script_from_script_id(script_id);
float orient_oco = osr.best_result.oconfidence;
float orient_sco = osr.best_result.sconfidence;
tprintf("Orientation: %d\n"
"Orientation in degrees: %d\n"
"Orientation confidence: %.2f\n"
"Script: %s\n"
"Script confidence: %.2f\n",
orient, OrientationIdToValue(orient), orient_oco,
script_name, orient_sco);
} else {
ret_val = 1;
}
tesseract::Orientation orientation;
tesseract::WritingDirection direction;
tesseract::TextlineOrder order;
float deskew_angle;
tesseract::PageIterator* it = api.AnalyseLayout();
if (it) {
it->Orientation(&orientation, &direction, &order, &deskew_angle);
tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \
"Deskew angle: %.4f\n",
orientation, direction, order, deskew_angle);
} else {
tesseract::Orientation orientation;
tesseract::WritingDirection direction;
tesseract::TextlineOrder order;
float deskew_angle;
tesseract::PageIterator* it = api.AnalyseLayout();
if (it) {
it->Orientation(&orientation, &direction, &order, &deskew_angle);
tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \
"Deskew angle: %.4f\n",
orientation, direction, order, deskew_angle);
} else {
ret_val = 1;
}
delete it;
ret_val = 1;
}
delete it;
pixDestroy(&pixs);
exit(ret_val);
}
if (pagesegmode == tesseract::PSM_OSD_ONLY) {
tesseract::TessResultRenderer* renderer =
new tesseract::TessOsdRenderer(outputbase);
bool succeed = api.ProcessPages(image, NULL, 0, renderer);
if (succeed) {
PERF_COUNT_END
return 0;
} else {
fprintf(stderr, "Error during processing.\n");
exit(1);
}
}
bool b;
tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
api.GetBoolVariable("tessedit_create_hocr", &b);