Cleaned up externally used namespace by removing includes from baseapi.h

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@657 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2012-02-02 03:14:16 +00:00
parent 6e273b71bd
commit 23dfabcab1
4 changed files with 446 additions and 239 deletions

View File

@ -6,7 +6,7 @@ AM_CPPFLAGS = -DLOCALEDIR=\"$(localedir)\"\
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil -I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil
include_HEADERS = \ include_HEADERS = \
apitypes.h baseapi.h pageiterator.h resultiterator.h tesseractmain.h apitypes.h baseapi.h tesseractmain.h
lib_LTLIBRARIES = lib_LTLIBRARIES =
if !USING_MULTIPLELIBS if !USING_MULTIPLELIBS
@ -28,7 +28,7 @@ libtesseract_api_la_LIBADD = \
$(top_srcdir)/viewer/libtesseract_viewer.la \ $(top_srcdir)/viewer/libtesseract_viewer.la \
$(top_srcdir)/ccutil/libtesseract_ccutil.la $(top_srcdir)/ccutil/libtesseract_ccutil.la
endif endif
libtesseract_api_la_SOURCES = baseapi.cpp pageiterator.cpp resultiterator.cpp libtesseract_api_la_SOURCES = baseapi.cpp
lib_LTLIBRARIES += libtesseract.la lib_LTLIBRARIES += libtesseract.la
libtesseract_la_LDFLAGS = libtesseract_la_LDFLAGS =

View File

@ -22,8 +22,10 @@
#include "publictypes.h" #include "publictypes.h"
// The types used by the API and Page/ResultIterator can be found in // The types used by the API and Page/ResultIterator can be found in:
// ccstruct/publictypes.h. // ccstruct/publictypes.h
// ccmain/resultiterator.h
// ccmain/pageiterator.h
// API interfaces and API users should be sure to include this file, rather // API interfaces and API users should be sure to include this file, rather
// than the lower-level one, and lower-level code should be sure to include // than the lower-level one, and lower-level code should be sure to include
// only the lower-level file. // only the lower-level file.

View File

@ -35,9 +35,11 @@
#include "baseapi.h" #include "baseapi.h"
#include "resultiterator.h" #include "resultiterator.h"
#include "mutableiterator.h"
#include "thresholder.h" #include "thresholder.h"
#include "tesseractclass.h" #include "tesseractclass.h"
#include "pageres.h" #include "pageres.h"
#include "paragraphs.h"
#include "tessvars.h" #include "tessvars.h"
#include "control.h" #include "control.h"
#include "pgedit.h" #include "pgedit.h"
@ -45,6 +47,7 @@
#include "output.h" #include "output.h"
#include "globals.h" #include "globals.h"
#include "edgblob.h" #include "edgblob.h"
#include "equationdetect.h"
#include "tessbox.h" #include "tessbox.h"
#include "imgs.h" #include "imgs.h"
#include "imgtiff.h" #include "imgtiff.h"
@ -52,6 +55,7 @@
#include "permute.h" #include "permute.h"
#include "otsuthr.h" #include "otsuthr.h"
#include "osdetect.h" #include "osdetect.h"
#include "params.h"
#ifdef __MSW32__ #ifdef __MSW32__
#include "version.h" #include "version.h"
@ -74,14 +78,21 @@ const char* kInputFile = "noname.tif";
const char* kOldVarsFile = "failed_vars.txt"; const char* kOldVarsFile = "failed_vars.txt";
// Max string length of an int. // Max string length of an int.
const int kMaxIntSize = 22; const int kMaxIntSize = 22;
// Minimum believable resolution. Used as a default if there is no other
// information, as it is safer to under-estimate than over-estimate.
const int kMinCredibleResolution = 70;
// Maximum believable resolution.
const int kMaxCredibleResolution = 2400;
TessBaseAPI::TessBaseAPI() TessBaseAPI::TessBaseAPI()
: tesseract_(NULL), : tesseract_(NULL),
osd_tesseract_(NULL), osd_tesseract_(NULL),
equ_detect_(NULL),
// Thresholder is initialized to NULL here, but will be set before use by: // Thresholder is initialized to NULL here, but will be set before use by:
// A constructor of a derived API, SetThresholder(), or // A constructor of a derived API, SetThresholder(), or
// created implicitly when used in InternalSetImage. // created implicitly when used in InternalSetImage.
thresholder_(NULL), thresholder_(NULL),
paragraph_models_(NULL),
block_list_(NULL), block_list_(NULL),
page_res_(NULL), page_res_(NULL),
input_file_(NULL), input_file_(NULL),
@ -125,7 +136,14 @@ void TessBaseAPI::SetOutputName(const char* name) {
bool TessBaseAPI::SetVariable(const char* name, const char* value) { bool TessBaseAPI::SetVariable(const char* name, const char* value) {
if (tesseract_ == NULL) tesseract_ = new Tesseract; if (tesseract_ == NULL) tesseract_ = new Tesseract;
return ParamUtils::SetParam(name, value, false, tesseract_->params()); return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
tesseract_->params());
}
bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) {
if (tesseract_ == NULL) tesseract_ = new Tesseract;
return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY,
tesseract_->params());
} }
bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
@ -178,7 +196,9 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
OcrEngineMode oem, char **configs, int configs_size, OcrEngineMode oem, char **configs, int configs_size,
const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values, const GenericVector<STRING> *vars_values,
bool set_only_init_params) { bool set_only_non_debug_params) {
// Default language is "eng".
if (language == NULL) language = "eng";
// If the datapath, OcrEngineMode or the language have changed - start again. // If the datapath, OcrEngineMode or the language have changed - start again.
// Note that the language_ field stores the last requested language that was // Note that the language_ field stores the last requested language that was
// initialized successfully, while tesseract_->lang stores the language // initialized successfully, while tesseract_->lang stores the language
@ -188,7 +208,6 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
(datapath_ == NULL || language_ == NULL || (datapath_ == NULL || language_ == NULL ||
*datapath_ != datapath || last_oem_requested_ != oem || *datapath_ != datapath || last_oem_requested_ != oem ||
(*language_ != language && tesseract_->lang != language))) { (*language_ != language && tesseract_->lang != language))) {
tesseract_->end_tesseract();
delete tesseract_; delete tesseract_;
tesseract_ = NULL; tesseract_ = NULL;
} }
@ -200,7 +219,7 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
if (tesseract_->init_tesseract( if (tesseract_->init_tesseract(
datapath, output_file_ != NULL ? output_file_->string() : NULL, datapath, output_file_ != NULL ? output_file_->string() : NULL,
language, oem, configs, configs_size, vars_vec, vars_values, language, oem, configs, configs_size, vars_vec, vars_values,
set_only_init_params) != 0) { set_only_non_debug_params) != 0) {
return -1; return -1;
} }
} }
@ -221,6 +240,31 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
return 0; return 0;
} }
// Returns the languages string used in the last valid initialization.
// If the last initialization specified "deu+hin" then that will be
// returned. If hin loaded eng automatically as well, then that will
// not be included in this list. To find the languages actually
// loaded use GetLoadedLanguagesAsVector.
// The returned string should NOT be deleted.
const char* TessBaseAPI::GetInitLanguagesAsString() const {
return (language_ == NULL || language_->string() == NULL) ?
"" : language_->string();
}
// Returns the loaded languages in the vector of STRINGs.
// Includes all languages loaded by the last Init, including those loaded
// as dependencies of other loaded languages.
void TessBaseAPI::GetLoadedLanguagesAsVector(
GenericVector<STRING>* langs) const {
langs->clear();
if (tesseract_ != NULL) {
langs->push_back(tesseract_->lang);
int num_subs = tesseract_->num_sub_langs();
for (int i = 0; i < num_subs; ++i)
langs->push_back(tesseract_->get_sub_lang(i)->lang);
}
}
// Init only the lang model component of Tesseract. The only functions // Init only the lang model component of Tesseract. The only functions
// that work after this init are SetVariable and IsValidWord. // that work after this init are SetVariable and IsValidWord.
// WARNING: temporary! This function will be removed from here and placed // WARNING: temporary! This function will be removed from here and placed
@ -243,8 +287,12 @@ void TessBaseAPI::InitForAnalysePage() {
// Read a "config" file containing a set of parameter name, value pairs. // Read a "config" file containing a set of parameter name, value pairs.
// Searches the standard places: tessdata/configs, tessdata/tessconfigs // Searches the standard places: tessdata/configs, tessdata/tessconfigs
// and also accepts a relative or absolute path name. // and also accepts a relative or absolute path name.
void TessBaseAPI::ReadConfigFile(const char* filename, bool init_only) { void TessBaseAPI::ReadConfigFile(const char* filename) {
tesseract_->read_config_file(filename, init_only); tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY);
}
// Same as above, but only set debug params from the given config file.
void TessBaseAPI::ReadDebugConfigFile(const char* filename) {
tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY);
} }
// Set the current page segmentation mode. Defaults to PSM_AUTO. // Set the current page segmentation mode. Defaults to PSM_AUTO.
@ -299,7 +347,7 @@ void TessBaseAPI::ClearAdaptiveClassifier() {
if (tesseract_ == NULL) if (tesseract_ == NULL)
return; return;
tesseract_->ResetAdaptiveClassifier(); tesseract_->ResetAdaptiveClassifier();
tesseract_->getDict().ResetDocumentDictionary(); tesseract_->ResetDocumentDictionary();
} }
// Provide an image for Tesseract to recognize. Format is as // Provide an image for Tesseract to recognize. Format is as
@ -317,6 +365,13 @@ void TessBaseAPI::SetImage(const unsigned char* imagedata,
bytes_per_pixel, bytes_per_line); bytes_per_pixel, bytes_per_line);
} }
void TessBaseAPI::SetSourceResolution(int ppi) {
if (thresholder_)
thresholder_->SetSourceYResolution(ppi);
else
tprintf("Please call SetImage before SetSourceResolution.\n");
}
// Provide an image for Tesseract to recognize. As with SetImage above, // Provide an image for Tesseract to recognize. As with SetImage above,
// Tesseract doesn't take a copy or ownership or pixDestroy the image, so // Tesseract doesn't take a copy or ownership or pixDestroy the image, so
// it must persist until after Recognize. // it must persist until after Recognize.
@ -354,7 +409,7 @@ Pix* TessBaseAPI::GetThresholdedImage() {
// Boxa, Pixa pair, in reading order. // Boxa, Pixa pair, in reading order.
// Can be called before or after Recognize. // Can be called before or after Recognize.
Boxa* TessBaseAPI::GetRegions(Pixa** pixa) { Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
return GetComponentImages(RIL_BLOCK, pixa, NULL); return GetComponentImages(RIL_BLOCK, false, pixa, NULL);
} }
// Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. // Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order.
@ -362,7 +417,24 @@ Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
// If blockids is not NULL, the block-id of each line is also returned as an // If blockids is not NULL, the block-id of each line is also returned as an
// array of one element per line. delete [] after use. // array of one element per line. delete [] after use.
Boxa* TessBaseAPI::GetTextlines(Pixa** pixa, int** blockids) { Boxa* TessBaseAPI::GetTextlines(Pixa** pixa, int** blockids) {
return GetComponentImages(RIL_TEXTLINE, pixa, blockids); return GetComponentImages(RIL_TEXTLINE, true, pixa, blockids);
}
// Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
// pair, in reading order. Enables downstream handling of non-rectangular
// regions.
// Can be called before or after Recognize.
// If blockids is not NULL, the block-id of each line is also returned as an
// array of one element per line. delete [] after use.
Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) {
return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
}
// Get the words as a leptonica-style
// Boxa, Pixa pair, in reading order.
// Can be called before or after Recognize.
Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
return GetComponentImages(RIL_WORD, true, pixa, NULL);
} }
// Gets the individual connected (text) components (created // Gets the individual connected (text) components (created
@ -370,14 +442,7 @@ Boxa* TessBaseAPI::GetTextlines(Pixa** pixa, int** blockids) {
// as a leptonica-style Boxa, Pixa pair, in reading order. // as a leptonica-style Boxa, Pixa pair, in reading order.
// Can be called before or after Recognize. // Can be called before or after Recognize.
Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) { Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) {
return GetComponentImages(RIL_SYMBOL, pixa, NULL); return GetComponentImages(RIL_SYMBOL, true, pixa, NULL);
}
// Get the words as a leptonica-style
// Boxa, Pixa pair, in reading order.
// Can be called before or after Recognize.
Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
return GetComponentImages(RIL_WORD, pixa, NULL);
} }
// Get the given level kind of components (block, textline, word etc.) as a // Get the given level kind of components (block, textline, word etc.) as a
@ -385,7 +450,9 @@ Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
// Can be called before or after Recognize. // Can be called before or after Recognize.
// If blockids is not NULL, the block-id of each component is also returned // If blockids is not NULL, the block-id of each component is also returned
// as an array of one element per component. delete [] after use. // as an array of one element per component. delete [] after use.
// If text_only is true, then only text components are returned.
Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
bool text_only,
Pixa** pixa, int** blockids) { Pixa** pixa, int** blockids) {
PageIterator* page_it = GetIterator(); PageIterator* page_it = GetIterator();
if (page_it == NULL) if (page_it == NULL)
@ -397,7 +464,8 @@ Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
int component_count = 0; int component_count = 0;
int left, top, right, bottom; int left, top, right, bottom;
do { do {
if (page_it->BoundingBox(level, &left, &top, &right, &bottom)) if (page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom) &&
(!text_only || PTIsTextType(page_it->BlockType())))
++component_count; ++component_count;
} while (page_it->Next(level)); } while (page_it->Next(level));
@ -411,7 +479,8 @@ Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
int component_index = 0; int component_index = 0;
page_it->Begin(); page_it->Begin();
do { do {
if (page_it->BoundingBox(level, &left, &top, &right, &bottom)) { if (page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom) &&
(!text_only || PTIsTextType(page_it->BlockType()))) {
Box* lbox = boxCreate(left, top, right - left, bottom - top); Box* lbox = boxCreate(left, top, right - left, bottom - top);
boxaAddBox(boxa, lbox, L_INSERT); boxaAddBox(boxa, lbox, L_INSERT);
if (pixa != NULL) { if (pixa != NULL) {
@ -431,6 +500,13 @@ Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
return boxa; return boxa;
} }
int TessBaseAPI::GetThresholdedImageScaleFactor() const {
if (thresholder_ == NULL) {
return 0;
}
return thresholder_->GetScaleFactor();
}
// Dump the internal binary image to a PGM file. // Dump the internal binary image to a PGM file.
void TessBaseAPI::DumpPGM(const char* filename) { void TessBaseAPI::DumpPGM(const char* filename) {
if (tesseract_ == NULL) if (tesseract_ == NULL)
@ -537,9 +613,15 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
tesseract_->CorrectClassifyWords(page_res_); tesseract_->CorrectClassifyWords(page_res_);
return 0; return 0;
} }
if (truth_cb_ != NULL) truth_cb_->Run(image_height_, page_res_);
if (tesseract_->interactive_mode) { if (truth_cb_ != NULL) {
tesseract_->wordrec_run_blamer.set_value(true);
truth_cb_->Run(tesseract_->getDict().getUnicharset(),
image_height_, page_res_);
}
int result = 0;
if (tesseract_->interactive_display_mode) {
tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_);
// The page_res is invalid after an interactive session, so cleanup // The page_res is invalid after an interactive session, so cleanup
// in a way that lets us continue to the next page without crashing. // in a way that lets us continue to the next page without crashing.
@ -556,9 +638,15 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
fclose(training_output_file); fclose(training_output_file);
} else { } else {
// Now run the main recognition. // Now run the main recognition.
tesseract_->recog_all_words(page_res_, monitor, NULL, NULL, 0); if (tesseract_->recog_all_words(page_res_, monitor, NULL, NULL, 0)) {
int paragraph_debug_level = 0;
GetIntVariable("paragraph_debug_level", &paragraph_debug_level);
DetectParagraphs(paragraph_debug_level);
} else {
result = -1;
}
} }
return 0; return result;
} }
// Tests the chopper by exhaustively running chop_one_blob. // Tests the chopper by exhaustively running chop_one_blob.
@ -574,7 +662,7 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
if (FindLines() != 0) if (FindLines() != 0)
return -1; return -1;
// Additional conditions under which chopper test cannot be run // Additional conditions under which chopper test cannot be run
if (tesseract_->interactive_mode) return -1; if (tesseract_->interactive_display_mode) return -1;
recognition_done_ = true; recognition_done_ = true;
@ -584,9 +672,9 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
while (page_res_it.word() != NULL) { while (page_res_it.word() != NULL) {
WERD_RES *word_res = page_res_it.word(); WERD_RES *word_res = page_res_it.word();
tesseract_->MaximallyChopWord(page_res_it.block()->block, GenericVector<TBOX> boxes;
page_res_it.row()->row, tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
word_res); page_res_it.row()->row, word_res);
page_res_it.forward(); page_res_it.forward();
} }
return 0; return 0;
@ -741,11 +829,11 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
PrintVariables(fp); PrintVariables(fp);
fclose(fp); fclose(fp);
// Switch to alternate mode for retry. // Switch to alternate mode for retry.
ReadConfigFile(retry_config, false); ReadConfigFile(retry_config);
SetImage(pix); SetImage(pix);
Recognize(NULL); Recognize(NULL);
// Restore saved config variables. // Restore saved config variables.
ReadConfigFile(kOldVarsFile, false); ReadConfigFile(kOldVarsFile);
} }
// Get text only if successful. // Get text only if successful.
if (!failed) { if (!failed) {
@ -767,8 +855,19 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
return false; return false;
} }
// Get an iterator to the results of LayoutAnalysis and/or Recognize. // Get a left-to-right iterator to the results of LayoutAnalysis and/or
// The returned iterator must be deleted after use. // Recognize. The returned iterator must be deleted after use.
LTRResultIterator* TessBaseAPI::GetLTRIterator() {
if (tesseract_ == NULL || page_res_ == NULL)
return NULL;
return new LTRResultIterator(
page_res_, tesseract_,
thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
rect_left_, rect_top_, rect_width_, rect_height_);
}
// Get a reading-order iterator to the results of LayoutAnalysis and/or
// Recognize. The returned iterator must be deleted after use.
// WARNING! This class points to data held within the TessBaseAPI class, and // WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and // therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End // has not been subjected to a call of Init, SetImage, Recognize, Clear, End
@ -776,10 +875,25 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
ResultIterator* TessBaseAPI::GetIterator() { ResultIterator* TessBaseAPI::GetIterator() {
if (tesseract_ == NULL || page_res_ == NULL) if (tesseract_ == NULL || page_res_ == NULL)
return NULL; return NULL;
return new ResultIterator(page_res_, tesseract_, return ResultIterator::StartOfParagraph(LTRResultIterator(
thresholder_->GetScaleFactor(), page_res_, tesseract_,
thresholder_->GetScaledYResolution(), thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
rect_left_, rect_top_, rect_width_, rect_height_); rect_left_, rect_top_, rect_width_, rect_height_));
}
// Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
// The returned iterator must be deleted after use.
// WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
MutableIterator* TessBaseAPI::GetMutableIterator() {
if (tesseract_ == NULL || page_res_ == NULL)
return NULL;
return new MutableIterator(page_res_, tesseract_,
thresholder_->GetScaleFactor(),
thresholder_->GetScaledYResolution(),
rect_left_, rect_top_, rect_width_, rect_height_);
} }
// Make a text string from the internal data structures. // Make a text string from the internal data structures.
@ -787,67 +901,29 @@ char* TessBaseAPI::GetUTF8Text() {
if (tesseract_ == NULL || if (tesseract_ == NULL ||
(!recognition_done_ && Recognize(NULL) < 0)) (!recognition_done_ && Recognize(NULL) < 0))
return NULL; return NULL;
int total_length = TextLength(NULL); STRING text("");
PAGE_RES_IT page_res_it(page_res_); ResultIterator *it = GetIterator();
char* result = new char[total_length]; do {
char* ptr = result; if (it->Empty(RIL_PARA)) continue;
for (page_res_it.restart_page(); page_res_it.word () != NULL; char *para_text = it->GetUTF8Text(RIL_PARA);
page_res_it.forward()) { text += para_text;
WERD_RES *word = page_res_it.word(); delete []para_text;
WERD_CHOICE* choice = word->best_choice; } while (it->Next(RIL_PARA));
if (choice != NULL) { char* result = new char[text.length() + 1];
strcpy(ptr, choice->unichar_string().string()); strncpy(result, text.string(), text.length() + 1);
ptr += choice->unichar_string().length(); delete it;
if (word->word->flag(W_EOL))
*ptr++ = '\n';
else
*ptr++ = ' ';
}
}
*ptr++ = '\n';
*ptr = '\0';
return result; return result;
} }
// Helper returns true if there is a paragraph break between bbox_cur, static void AddBoxTohOCR(const PageIterator *it,
// and bbox_prev. PageIteratorLevel level,
// TODO(rays) improve and incorporate deeper into tesseract, so other STRING* hocr_str) {
// output methods get the benefit. int left, top, right, bottom;
static bool IsParagraphBreak(TBOX bbox_cur, TBOX bbox_prev, it->BoundingBox(level, &left, &top, &right, &bottom);
int right, int line_height) { hocr_str->add_str_int("' title=\"bbox ", left);
// Check if the distance between lines is larger than the normal leading, hocr_str->add_str_int(" ", top);
if (fabs((float)(bbox_cur.bottom() - bbox_prev.bottom())) > line_height * 2) hocr_str->add_str_int(" ", right);
return true; hocr_str->add_str_int(" ", bottom);
// Check if the distance between left bounds of the two lines is nearly the
// same as between their right bounds (if so, then both lines probably belong
// to the same paragraph, maybe a centered one).
if (fabs((float)((bbox_cur.left() - bbox_prev.left()) -
(bbox_prev.right() - bbox_cur.right()))) < line_height)
return false;
// Check if there is a paragraph indent at this line (either -ve or +ve).
if (fabs((float)(bbox_cur.left() - bbox_prev.left())) > line_height)
return true;
// Check if both current and previous line don't reach the right bound of the
// block, but the distance is different. This will cause all lines in a verse
// to be treated as separate paragraphs, but most probably will not split
// block-quotes to separate lines (at least if the text is justified).
if (fabs((float)(bbox_cur.right() - bbox_prev.right())) > line_height &&
right - bbox_cur.right() > line_height &&
right - bbox_prev.right() > line_height)
return true;
return false;
}
// Helper to add the hOCR for a box to the given hocr_str.
static void AddBoxTohOCR(const TBOX& box, int image_height, STRING* hocr_str) {
hocr_str->add_str_int("' title=\"bbox ", box.left());
hocr_str->add_str_int(" ", image_height - box.top());
hocr_str->add_str_int(" ", box.right());
hocr_str->add_str_int(" ", image_height - box.bottom());
*hocr_str += "\">"; *hocr_str += "\">";
} }
@ -860,15 +936,10 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
(page_res_ == NULL && Recognize(NULL) < 0)) (page_res_ == NULL && Recognize(NULL) < 0))
return NULL; return NULL;
PAGE_RES_IT page_res_it(page_res_); int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
ROW_RES *row = NULL; // current row
ROW *real_row = NULL, *prev_row = NULL;
BLOCK_RES *block = NULL; // current row
BLOCK *real_block = NULL;
int lcnt = 1, bcnt = 1, wcnt = 1;
int page_id = page_number + 1; // hOCR uses 1-based page numbers. int page_id = page_number + 1; // hOCR uses 1-based page numbers.
STRING hocr_str; STRING hocr_str("");
hocr_str.add_str_int("<div class='ocr_page' id='page_", page_id); hocr_str.add_str_int("<div class='ocr_page' id='page_", page_id);
hocr_str += "' title='image \""; hocr_str += "' title='image \"";
@ -879,82 +950,87 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
hocr_str.add_str_int(" ", rect_height_); hocr_str.add_str_int(" ", rect_height_);
hocr_str += "'>\n"; hocr_str += "'>\n";
for (page_res_it.restart_page(); page_res_it.word () != NULL; ResultIterator *res_it = GetIterator();
page_res_it.forward()) { for (; !res_it->Empty(RIL_BLOCK); wcnt++) {
if (block != page_res_it.block()) { if (res_it->Empty(RIL_WORD)) {
if (block != NULL) { res_it->Next(RIL_WORD);
hocr_str += "</span>\n</p>\n</div>\n"; continue;
}
block = page_res_it.block(); // current row
real_block = block->block;
real_row = NULL;
row = NULL;
hocr_str.add_str_int("<div class='ocr_carea' id='block_", page_id);
hocr_str.add_str_int("_", bcnt++);
AddBoxTohOCR(real_block->bounding_box(), image_height_, &hocr_str);
hocr_str += "\n<p class='ocr_par'>\n";
}
if (row != page_res_it.row()) {
if (row != NULL) {
hocr_str += "</span>\n";
}
prev_row = real_row;
row = page_res_it.row(); // current row
real_row = row->row;
if (prev_row != NULL &&
IsParagraphBreak(real_row->bounding_box(), prev_row->bounding_box(),
real_block->bounding_box().right(),
real_row->x_height() + real_row->ascenders()))
hocr_str += "</p>\n<p class='ocr_par'>\n";
hocr_str.add_str_int("<span class='ocr_line' id='line_", page_id);
hocr_str.add_str_int("_", lcnt++);
AddBoxTohOCR(real_row->bounding_box(), image_height_, &hocr_str);
} }
WERD_RES *word = page_res_it.word(); // Open any new block/paragraph/textline.
WERD_CHOICE* choice = word->best_choice; if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
if (choice != NULL) { hocr_str.add_str_int("<div class='ocr_carea' id='block_", bcnt);
hocr_str.add_str_int("<span class='ocr_word' id='word_", page_id); hocr_str.add_str_int("_", bcnt);
hocr_str.add_str_int("_", wcnt); AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
AddBoxTohOCR(word->word->bounding_box(), image_height_, &hocr_str); }
hocr_str.add_str_int("<span class='ocrx_word' id='xword_", page_id); if (res_it->IsAtBeginningOf(RIL_PARA)) {
hocr_str.add_str_int("_", wcnt++); if (res_it->ParagraphIsLtr()) {
hocr_str.add_str_int("' title=\"x_wconf ", choice->certainty()); hocr_str.add_str_int("\n<p class='ocr_par' dir='ltr' id='par_", pcnt);
hocr_str += "\">"; } else {
if (word->bold > 0) hocr_str.add_str_int("\n<p class='ocr_par' dir='rtl' id='par_", pcnt);
hocr_str += "<strong>";
if (word->italic > 0)
hocr_str += "<em>";
int i;
// escape special characters
for (i = 0; choice->unichar_string()[i] != '\0'; i++) {
if (choice->unichar_string()[i] == '<') hocr_str += "&lt;";
else if (choice->unichar_string()[i] == '>') hocr_str += "&gt;";
else if (choice->unichar_string()[i] == '&') hocr_str += "&amp;";
else if (choice->unichar_string()[i] == '"') hocr_str += "&quot;";
else if (choice->unichar_string()[i] == '\'') hocr_str += "&#39;";
else hocr_str += choice->unichar_string()[i];
} }
if (word->italic > 0) AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
hocr_str += "</em>"; }
if (word->bold > 0) if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
hocr_str += "</strong>"; hocr_str.add_str_int("<span class='ocr_line' id='line_", lcnt);
hocr_str += "</span></span>"; AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
if (!word->word->flag(W_EOL)) }
hocr_str += " ";
// Now, process the word...
hocr_str.add_str_int("<span class='ocr_word' id='word_", wcnt);
AddBoxTohOCR(res_it, RIL_WORD, &hocr_str);
const char *font_name;
bool bold, italic, underlined, monospace, serif, smallcaps;
int pointsize, font_id;
font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
&monospace, &serif, &smallcaps,
&pointsize, &font_id);
bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
if (bold) hocr_str += "<strong>";
if (italic) hocr_str += "<em>";
do {
const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
if (grapheme && grapheme[0] != 0) {
if (grapheme[1] == 0) {
switch (grapheme[0]) {
case '<': hocr_str += "&lt;"; break;
case '>': hocr_str += "&gt;"; break;
case '&': hocr_str += "&amp;"; break;
case '"': hocr_str += "&quot;"; break;
case '\'': hocr_str += "&#39;"; break;
default: hocr_str += grapheme;
}
} else {
hocr_str += grapheme;
}
}
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
if (italic) hocr_str += "</em>";
if (bold) hocr_str += "</strong>";
hocr_str += "</span> ";
wcnt++;
// Close any ending block/paragraph/textline.
if (last_word_in_line) {
hocr_str += "</span>\n";
lcnt++;
}
if (last_word_in_para) {
hocr_str += "</p>\n";
pcnt++;
}
if (last_word_in_block) {
hocr_str += "</div>\n";
bcnt++;
} }
} }
if (block != NULL)
hocr_str += "</span>\n</p>\n</div>\n";
hocr_str += "</div>\n"; hocr_str += "</div>\n";
char *ret = new char[hocr_str.length() + 1]; char *ret = new char[hocr_str.length() + 1];
strcpy(ret, hocr_str.string()); strcpy(ret, hocr_str.string());
delete res_it;
return ret; return ret;
} }
@ -990,7 +1066,7 @@ char* TessBaseAPI::GetBoxText(int page_number) {
kMaxBytesPerLine; kMaxBytesPerLine;
char* result = new char[total_length]; char* result = new char[total_length];
int output_length = 0; int output_length = 0;
ResultIterator* it = GetIterator(); LTRResultIterator* it = GetLTRIterator();
do { do {
int left, top, right, bottom; int left, top, right, bottom;
if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
@ -1179,11 +1255,16 @@ int* TessBaseAPI::AllWordConfidences() {
* Returns false if adaption was not possible for some reason. * Returns false if adaption was not possible for some reason.
*/ */
bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
int debug = 0;
GetIntVariable("applybox_debug", &debug);
bool success = true; bool success = true;
PageSegMode current_psm = GetPageSegMode(); PageSegMode current_psm = GetPageSegMode();
SetPageSegMode(mode); SetPageSegMode(mode);
SetVariable("classify_enable_learning", "0"); SetVariable("classify_enable_learning", "0");
char* text = GetUTF8Text(); char* text = GetUTF8Text();
if (debug) {
tprintf("Trying to adapt \"%s\" to \"%s\"\n", text, wordstr);
}
if (text != NULL) { if (text != NULL) {
PAGE_RES_IT it(page_res_); PAGE_RES_IT it(page_res_);
WERD_RES* word_res = it.word(); WERD_RES* word_res = it.word();
@ -1207,7 +1288,8 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
if (text[t] != '\0' || wordstr[w] != '\0') { if (text[t] != '\0' || wordstr[w] != '\0') {
// No match. // No match.
delete page_res_; delete page_res_;
page_res_ = tesseract_->SetupApplyBoxes(block_list_); GenericVector<TBOX> boxes;
page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
tesseract_->ReSegmentByClassification(page_res_); tesseract_->ReSegmentByClassification(page_res_);
tesseract_->TidyUp(page_res_); tesseract_->TidyUp(page_res_);
PAGE_RES_IT pr_it(page_res_); PAGE_RES_IT pr_it(page_res_);
@ -1216,7 +1298,7 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
else else
word_res = pr_it.word(); word_res = pr_it.word();
} else { } else {
word_res->BestChoiceToCorrectText(tesseract_->unicharset); word_res->BestChoiceToCorrectText();
} }
if (success) { if (success) {
tesseract_->EnableLearning = true; tesseract_->EnableLearning = true;
@ -1257,18 +1339,25 @@ void TessBaseAPI::End() {
delete block_list_; delete block_list_;
block_list_ = NULL; block_list_ = NULL;
} }
if (paragraph_models_ != NULL) {
paragraph_models_->delete_data_pointers();
delete paragraph_models_;
paragraph_models_ = NULL;
}
if (tesseract_ != NULL) { if (tesseract_ != NULL) {
tesseract_->end_tesseract();
delete tesseract_; delete tesseract_;
if (osd_tesseract_ == tesseract_) if (osd_tesseract_ == tesseract_)
osd_tesseract_ = NULL; osd_tesseract_ = NULL;
tesseract_ = NULL; tesseract_ = NULL;
} }
if (osd_tesseract_ != NULL) { if (osd_tesseract_ != NULL) {
osd_tesseract_->end_tesseract();
delete osd_tesseract_; delete osd_tesseract_;
osd_tesseract_ = NULL; osd_tesseract_ = NULL;
} }
if (equ_detect_ != NULL) {
delete equ_detect_;
equ_detect_ = NULL;
}
if (input_file_ != NULL) { if (input_file_ != NULL) {
delete input_file_; delete input_file_;
input_file_ = NULL; input_file_ = NULL;
@ -1332,9 +1421,19 @@ void TessBaseAPI::SetDictFunc(DictFunc f) {
void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) {
if (tesseract_ != NULL) { if (tesseract_ != NULL) {
tesseract_->getDict().probability_in_context_ = f; tesseract_->getDict().probability_in_context_ = f;
// Set it for the sublangs too.
int num_subs = tesseract_->num_sub_langs();
for (int i = 0; i < num_subs; ++i) {
tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f;
}
} }
} }
// Sets Wordrec::fill_lattice_ function to point to the given function.
void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) {
if (tesseract_ != NULL) tesseract_->fill_lattice_ = f;
}
// Common code for setting the image. // Common code for setting the image.
bool TessBaseAPI::InternalSetImage() { bool TessBaseAPI::InternalSetImage() {
if (tesseract_ == NULL) { if (tesseract_ == NULL) {
@ -1358,10 +1457,29 @@ void TessBaseAPI::Threshold(Pix** pix) {
} }
if (*pix != NULL) if (*pix != NULL)
pixDestroy(pix); pixDestroy(pix);
// Zero resolution messes up the algorithms, so make sure it is credible.
int y_res = thresholder_->GetScaledYResolution();
if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
// Use the minimum default resolution, as it is safer to under-estimate
// than over-estimate resolution.
thresholder_->SetSourceYResolution(kMinCredibleResolution);
}
thresholder_->ThresholdToPix(pix); thresholder_->ThresholdToPix(pix);
thresholder_->GetImageSizes(&rect_left_, &rect_top_, thresholder_->GetImageSizes(&rect_left_, &rect_top_,
&rect_width_, &rect_height_, &rect_width_, &rect_height_,
&image_width_, &image_height_); &image_width_, &image_height_);
// Set the internal resolution that is used for layout parameters from the
// estimated resolution, rather than the image resolution, which may be
// fabricated, but we will use the image resolution, if there is one, to
// report output point sizes.
int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
kMinCredibleResolution,
kMaxCredibleResolution);
if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
tprintf("Estimated resolution %d out of range! Corrected to %d\n",
thresholder_->GetScaledEstimatedResolution(), estimated_res);
}
tesseract_->set_source_resolution(estimated_res);
} }
// Find lines from the image making the BLOCK_LIST. // Find lines from the image making the BLOCK_LIST.
@ -1390,6 +1508,13 @@ int TessBaseAPI::FindLines() {
tesseract_->PrepareForPageseg(); tesseract_->PrepareForPageseg();
if (tesseract_->textord_equation_detect) {
if (equ_detect_ == NULL && datapath_ != NULL) {
equ_detect_ = new EquationDetect(datapath_->string(), NULL);
}
tesseract_->SetEquationDetect(equ_detect_);
}
Tesseract* osd_tess = osd_tesseract_; Tesseract* osd_tess = osd_tesseract_;
OSResults osr; OSResults osr;
if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == NULL) { if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == NULL) {
@ -1401,6 +1526,8 @@ int TessBaseAPI::FindLines() {
datapath_->string(), NULL, "osd", OEM_TESSERACT_ONLY, datapath_->string(), NULL, "osd", OEM_TESSERACT_ONLY,
NULL, 0, NULL, NULL, false) == 0) { NULL, 0, NULL, NULL, false) == 0) {
osd_tess = osd_tesseract_; osd_tess = osd_tesseract_;
osd_tesseract_->set_source_resolution(
thresholder_->GetSourceYResolution());
} else { } else {
tprintf("Warning: Auto orientation and script detection requested," tprintf("Warning: Auto orientation and script detection requested,"
" but osd language failed to load\n"); " but osd language failed to load\n");
@ -1412,16 +1539,9 @@ int TessBaseAPI::FindLines() {
if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0) if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
return -1; return -1;
// If OCR is to be run using Tesseract, OCR-able blobs are required for // If Devanagari is being recognized, we use different images for page seg
// training, or interactive mode is needed, prepare data and images for ocr. // and for OCR.
if (tesseract_->interactive_mode || tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
tesseract_->tessedit_train_from_boxes ||
tesseract_->tessedit_ambigs_training ||
tesseract_->tessedit_ocr_engine_mode == OEM_TESSERACT_ONLY ||
tesseract_->tessedit_ocr_engine_mode ==
OEM_TESSERACT_CUBE_COMBINED) {
tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
}
return 0; return 0;
} }
@ -1429,7 +1549,6 @@ int TessBaseAPI::FindLines() {
void TessBaseAPI::ClearResults() { void TessBaseAPI::ClearResults() {
if (tesseract_ != NULL) { if (tesseract_ != NULL) {
tesseract_->Clear(); tesseract_->Clear();
tesseract_->ResetFeaturesHaveBeenExtracted();
} }
if (page_res_ != NULL) { if (page_res_ != NULL) {
delete page_res_; delete page_res_;
@ -1440,11 +1559,17 @@ void TessBaseAPI::ClearResults() {
block_list_ = new BLOCK_LIST; block_list_ = new BLOCK_LIST;
else else
block_list_->clear(); block_list_->clear();
if (paragraph_models_ != NULL) {
paragraph_models_->delete_data_pointers();
delete paragraph_models_;
paragraph_models_ = NULL;
}
} }
// Return the length of the output text string, as UTF8, assuming // Return the length of the output text string, as UTF8, assuming
// one newline per line and one per block, with a terminator, // liberally two spacing marks after each word (as paragraphs end with two
// and assuming a single character reject marker for each rejected character. // newlines), and assuming a single character reject marker for each rejected
// character.
// Also return the number of recognized blobs in blob_count. // Also return the number of recognized blobs in blob_count.
int TessBaseAPI::TextLength(int* blob_count) { int TessBaseAPI::TextLength(int* blob_count) {
if (tesseract_ == NULL || page_res_ == NULL) if (tesseract_ == NULL || page_res_ == NULL)
@ -1459,8 +1584,8 @@ int TessBaseAPI::TextLength(int* blob_count) {
WERD_RES *word = page_res_it.word(); WERD_RES *word = page_res_it.word();
WERD_CHOICE* choice = word->best_choice; WERD_CHOICE* choice = word->best_choice;
if (choice != NULL) { if (choice != NULL) {
total_blobs += choice->length() + 1; total_blobs += choice->length() + 2;
total_length += choice->unichar_string().length() + 1; total_length += choice->unichar_string().length() + 2;
for (int i = 0; i < word->reject_map.length(); ++i) { for (int i = 0; i < word->reject_map.length(); ++i) {
if (word->reject_map[i].rejected()) if (word->reject_map[i].rejected())
++total_length; ++total_length;
@ -1661,8 +1786,7 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
// Classify to get a raw choice. // Classify to get a raw choice.
BLOB_CHOICE_LIST choices; BLOB_CHOICE_LIST choices;
DENORM denorm; DENORM denorm;
tesseract_->set_denorm(&denorm); tesseract_->AdaptiveClassifier(blob, denorm, &choices, NULL);
tesseract_->AdaptiveClassifier(blob, &choices, NULL);
BLOB_CHOICE_IT choice_it; BLOB_CHOICE_IT choice_it;
choice_it.set_to_list(&choices); choice_it.set_to_list(&choices);
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
@ -1673,29 +1797,10 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
} }
} }
if (id == best_class) { threshold = tesseract_->matcher_good_threshold;
threshold = tesseract_->matcher_good_threshold;
} else {
/* the blob was incorrectly classified - find the rating threshold
needed to create a template which will correct the error with
some margin. However, don't waste time trying to make
templates which are too tight. */
threshold = tesseract_->GetBestRatingFor(blob, id);
threshold *= .9;
const float max_threshold = .125;
const float min_threshold = .02;
if (threshold > max_threshold)
threshold = max_threshold;
// I have cuddled the following line to set it out of the strike
// of the coverage testing tool. I have no idea how to trigger
// this situation nor I have any necessity to do it. --mezhirov
if (threshold < min_threshold) threshold = min_threshold;
}
if (blob->outlines) if (blob->outlines)
tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold); tesseract_->AdaptToChar(blob, denorm, id, kUnknownFontinfoId, threshold);
delete blob; delete blob;
} }
@ -1716,6 +1821,18 @@ PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
return pass1_result; return pass1_result;
} }
void TessBaseAPI::DetectParagraphs(int debug_level) {
if (paragraph_models_ == NULL)
paragraph_models_ = new GenericVector<ParagraphModel*>;
MutableIterator *result_it = GetMutableIterator();
do { // Detect paragraphs for this block
GenericVector<ParagraphModel *> models;
::tesseract::DetectParagraphs(debug_level, result_it, &models);
*paragraph_models_ += models;
} while (result_it->Next(RIL_BLOCK));
delete result_it;
}
struct TESS_CHAR : ELIST_LINK { struct TESS_CHAR : ELIST_LINK {
char *unicode_repr; char *unicode_repr;
int length; // of unicode_repr int length; // of unicode_repr
@ -1838,12 +1955,12 @@ void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, const DENORM& denorm,
if (tesseract_) { if (tesseract_) {
tesseract_->ResetFeaturesHaveBeenExtracted(); tesseract_->ResetFeaturesHaveBeenExtracted();
} }
tesseract_->set_denorm(&denorm); uinT8* norm_array = new uinT8[MAX_NUM_CLASSES];
CLASS_NORMALIZATION_ARRAY norm_array;
inT32 len; inT32 len;
*num_features = tesseract_->GetIntCharNormFeatures( *num_features = tesseract_->GetCharNormFeatures(
blob, tesseract_->PreTrainedTemplates, blob, denorm, tesseract_->PreTrainedTemplates,
int_features, norm_array, &len, FeatureOutlineIndex); int_features, norm_array, norm_array, &len, FeatureOutlineIndex);
delete [] norm_array;
} }
// This method returns the row to which a box of specified dimensions would // This method returns the row to which a box of specified dimensions would
@ -1879,8 +1996,7 @@ void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm,
float* ratings, float* ratings,
int* num_matches_returned) { int* num_matches_returned) {
BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
tesseract_->set_denorm(&denorm); tesseract_->AdaptiveClassifier(blob, denorm, choices, NULL);
tesseract_->AdaptiveClassifier(blob, choices, NULL);
BLOB_CHOICE_IT choices_it(choices); BLOB_CHOICE_IT choices_it(choices);
int& index = *num_matches_returned; int& index = *num_matches_returned;
index = 0; index = 0;
@ -1912,12 +2028,6 @@ int TessBaseAPI::NumDawgs() const {
return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs(); return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs();
} }
// Return the language used in the last valid initialization.
const char* TessBaseAPI::GetLastInitLanguage() const {
return (tesseract_ == NULL || tesseract_->lang.string() == NULL) ?
"" : tesseract_->lang.string();
}
// Return a pointer to underlying CubeRecoContext object if present. // Return a pointer to underlying CubeRecoContext object if present.
CubeRecoContext *TessBaseAPI::GetCubeRecoContext() const { CubeRecoContext *TessBaseAPI::GetCubeRecoContext() const {
return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext(); return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext();

View File

@ -20,20 +20,25 @@
#ifndef TESSERACT_API_BASEAPI_H__ #ifndef TESSERACT_API_BASEAPI_H__
#define TESSERACT_API_BASEAPI_H__ #define TESSERACT_API_BASEAPI_H__
#include <stdio.h>
// To avoid collision with other typenames include the ABSOLUTE MINIMUM // To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible // complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp. // and hide includes of complex types in baseapi.cpp.
#include "apitypes.h" #include "apitypes.h"
#include "genericvector.h"
#include "thresholder.h" #include "thresholder.h"
#include "unichar.h" #include "unichar.h"
#include "tesscallback.h" #include "tesscallback.h"
#include "publictypes.h"
template <typename T> class GenericVector;
class PAGE_RES; class PAGE_RES;
class PAGE_RES_IT; class PAGE_RES_IT;
class ParagraphModel;
class BlamerBundle;
class BLOCK_LIST; class BLOCK_LIST;
class DENORM; class DENORM;
class IMAGE; class IMAGE;
class MATRIX;
class PBLOB; class PBLOB;
class ROW; class ROW;
class STRING; class STRING;
@ -45,6 +50,12 @@ struct Boxa;
class ETEXT_DESC; class ETEXT_DESC;
struct OSResults; struct OSResults;
class TBOX; class TBOX;
class UNICHARSET;
// From oldlist.h
// TODO(antonova): remove when oldlist is deprecated.
struct list_rec;
typedef list_rec *LIST;
#define MAX_NUM_INT_FEATURES 512 #define MAX_NUM_INT_FEATURES 512
struct INT_FEATURE_STRUCT; struct INT_FEATURE_STRUCT;
@ -66,19 +77,27 @@ namespace tesseract {
class CubeRecoContext; class CubeRecoContext;
class Dawg; class Dawg;
class Dict; class Dict;
class EquationDetect;
class PageIterator; class PageIterator;
class LTRResultIterator;
class ResultIterator; class ResultIterator;
class MutableIterator;
class Tesseract; class Tesseract;
class Trie; class Trie;
class Wordrec;
typedef int (Dict::*DictFunc)(void* void_dawg_args, typedef int (Dict::*DictFunc)(void* void_dawg_args,
UNICHAR_ID unichar_id, bool word_end); UNICHAR_ID unichar_id, bool word_end) const;
typedef double (Dict::*ProbabilityInContextFunc)(const char* lang, typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
const char* context, const char* context,
int context_bytes, int context_bytes,
const char* character, const char* character,
int character_bytes); int character_bytes);
typedef TessCallback2<int, PAGE_RES *> TruthCallback; typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
const LIST &best_choices,
const UNICHARSET &unicharset,
BlamerBundle *blamer_bundle);
typedef TessCallback3<const UNICHARSET &, int, PAGE_RES *> TruthCallback;
/** /**
* Base class for all tesseract APIs. * Base class for all tesseract APIs.
@ -123,6 +142,7 @@ class TESSDLL_API TessBaseAPI {
* (init variables should be passed to Init()). * (init variables should be passed to Init()).
*/ */
bool SetVariable(const char* name, const char* value); bool SetVariable(const char* name, const char* value);
bool SetDebugVariable(const char* name, const char* value);
// Returns true if the parameter was found among Tesseract parameters. // Returns true if the parameter was found among Tesseract parameters.
// Fills in value with the value of the parameter. // Fills in value with the value of the parameter.
@ -155,6 +175,16 @@ class TESSDLL_API TessBaseAPI {
* It is entirely safe (and eventually will be efficient too) to call * It is entirely safe (and eventually will be efficient too) to call
* Init multiple times on the same instance to change language, or just * Init multiple times on the same instance to change language, or just
* to reset the classifier. * to reset the classifier.
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
* English. Languages may specify internally that they want to be loaded
* with one or more other languages, so the ~ sign is available to override
* that. Eg if hin were set to load eng by default, then hin+~eng would force
* loading only hin. The number of loaded languages is limited only by
* memory, with the caveat that loading additional languages will impact
* both speed and accuracy, as there is more work to do to decide on the
* applicable language, and there is more chance of hallucinating incorrect
* words.
* WARNING: On changing languages, all Tesseract parameters are reset * WARNING: On changing languages, all Tesseract parameters are reset
* back to their default values. (Which may vary between languages.) * back to their default values. (Which may vary between languages.)
* If you have a rare need to set a Variable that controls * If you have a rare need to set a Variable that controls
@ -162,12 +192,15 @@ class TESSDLL_API TessBaseAPI {
* call End() and then use SetVariable before Init. This is only a very * call End() and then use SetVariable before Init. This is only a very
* rare use case, since there are very few uses that require any parameters * rare use case, since there are very few uses that require any parameters
* to be set before Init. * to be set before Init.
*
* If set_only_non_debug_params is true, only params that do not contain
* "debug" in the name will be set.
*/ */
int Init(const char* datapath, const char* language, OcrEngineMode mode, int Init(const char* datapath, const char* language, OcrEngineMode mode,
char **configs, int configs_size, char **configs, int configs_size,
const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values, const GenericVector<STRING> *vars_values,
bool set_only_init_params); bool set_only_non_debug_params);
int Init(const char* datapath, const char* language, OcrEngineMode oem) { int Init(const char* datapath, const char* language, OcrEngineMode oem) {
return Init(datapath, language, oem, NULL, 0, NULL, NULL, false); return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
} }
@ -175,6 +208,19 @@ class TESSDLL_API TessBaseAPI {
return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false); return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
} }
// Returns the languages string used in the last valid initialization.
// If the last initialization specified "deu+hin" then that will be
// returned. If hin loaded eng automatically as well, then that will
// not be included in this list. To find the languages actually
// loaded use GetLoadedLanguagesAsVector.
// The returned string should NOT be deleted.
const char* GetInitLanguagesAsString() const;
// Returns the loaded languages in the vector of STRINGs.
// Includes all languages loaded by the last Init, including those loaded
// as dependencies of other loaded languages.
void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
/** /**
* Init only the lang model component of Tesseract. The only functions * Init only the lang model component of Tesseract. The only functions
* that work after this init are SetVariable and IsValidWord. * that work after this init are SetVariable and IsValidWord.
@ -188,15 +234,14 @@ class TESSDLL_API TessBaseAPI {
void InitForAnalysePage(); void InitForAnalysePage();
/** /**
* Read a "config" file containing a set of variable, value pairs. * Read a "config" file containing a set of param, value pairs.
* Searches the standard places: tessdata/configs, tessdata/tessconfigs * Searches the standard places: tessdata/configs, tessdata/tessconfigs
* and also accepts a relative or absolute path name. * and also accepts a relative or absolute path name.
* If init_only is true, only sets the parameters marked with a special * Note: only non-init params will be set (init params are set by Init()).
* INIT flag, which are typically of functional/algorithmic effect
* rather than debug effect. Used to separate debug settings from
* working settings.
*/ */
void ReadConfigFile(const char* filename, bool init_only); void ReadConfigFile(const char* filename);
/** Same as above, but only set debug params from the given config file. */
void ReadDebugConfigFile(const char* filename);
/** /**
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
@ -267,6 +312,12 @@ class TESSDLL_API TessBaseAPI {
*/ */
void SetImage(const Pix* pix); void SetImage(const Pix* pix);
/**
* Set the resolution of the source image in pixels per inch so font size
* information can be calculated in results. Call this after SetImage().
*/
void SetSourceResolution(int ppi);
/** /**
* Restrict recognition to a sub-rectangle of the image. Call after SetImage. * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
* Each SetRectangle clears the recogntion results so multiple rectangles * Each SetRectangle clears the recogntion results so multiple rectangles
@ -311,6 +362,16 @@ class TESSDLL_API TessBaseAPI {
*/ */
Boxa* GetTextlines(Pixa** pixa, int** blockids); Boxa* GetTextlines(Pixa** pixa, int** blockids);
/**
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
* pair, in reading order. Enables downstream handling of non-rectangular
* regions.
* Can be called before or after Recognize.
* If blockids is not NULL, the block-id of each line is also returned as an
* array of one element per line. delete [] after use.
*/
Boxa* GetStrips(Pixa** pixa, int** blockids);
/** /**
* Get the words as a leptonica-style * Get the words as a leptonica-style
* Boxa, Pixa pair, in reading order. * Boxa, Pixa pair, in reading order.
@ -331,9 +392,17 @@ class TESSDLL_API TessBaseAPI {
// Can be called before or after Recognize. // Can be called before or after Recognize.
// If blockids is not NULL, the block-id of each component is also returned // If blockids is not NULL, the block-id of each component is also returned
// as an array of one element per component. delete [] after use. // as an array of one element per component. delete [] after use.
// If text_only is true, then only text components are returned.
Boxa* GetComponentImages(PageIteratorLevel level, Boxa* GetComponentImages(PageIteratorLevel level,
bool text_only,
Pixa** pixa, int** blockids); Pixa** pixa, int** blockids);
// Returns the scale factor of the thresholded image that would be returned by
// GetThresholdedImage() and the various GetX() methods that call
// GetComponentImages().
// Returns 0 if no thresholder has been set.
int GetThresholdedImageScaleFactor() const;
/** /**
* Dump the internal binary image to a PGM file. * Dump the internal binary image to a PGM file.
* @deprecated Use GetThresholdedImage and write the image using pixWrite * @deprecated Use GetThresholdedImage and write the image using pixWrite
@ -403,19 +472,28 @@ class TESSDLL_API TessBaseAPI {
const char* retry_config, int timeout_millisec, const char* retry_config, int timeout_millisec,
STRING* text_out); STRING* text_out);
// Get an iterator to the results of LayoutAnalysis and/or Recognize. // Get a reading-order iterator to the results of LayoutAnalysis and/or
// The returned iterator must be deleted after use. // Recognize. The returned iterator must be deleted after use.
// WARNING! This class points to data held within the TessBaseAPI class, and // WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and // therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End // has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES. // DetectOS, or anything else that changes the internal PAGE_RES.
ResultIterator* GetIterator(); ResultIterator* GetIterator();
// Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
// The returned iterator must be deleted after use.
// WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
MutableIterator* GetMutableIterator();
/** /**
* The recognized text is returned as a char* which is coded * The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator. * as UTF8 and must be freed with the delete [] operator.
*/ */
char* GetUTF8Text(); char* GetUTF8Text();
/** /**
* Make a HTML-formatted string with hOCR markup from the internal * Make a HTML-formatted string with hOCR markup from the internal
* data structures. * data structures.
@ -492,6 +570,9 @@ class TESSDLL_API TessBaseAPI {
*/ */
void SetProbabilityInContextFunc(ProbabilityInContextFunc f); void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
void SetFillLatticeFunc(FillLatticeFunc f);
/** /**
* Estimates the Orientation And Script of the image. * Estimates the Orientation And Script of the image.
* @return true if the image was processed successfully. * @return true if the image was processed successfully.
@ -544,6 +625,9 @@ class TESSDLL_API TessBaseAPI {
Tesseract* const tesseract() const { Tesseract* const tesseract() const {
return tesseract_; return tesseract_;
} }
OcrEngineMode const oem() const {
return last_oem_requested_;
}
void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
@ -588,6 +672,11 @@ class TESSDLL_API TessBaseAPI {
/** Delete the pageres and block list ready for a new page. */ /** Delete the pageres and block list ready for a new page. */
void ClearResults(); void ClearResults();
// Return an LTR Result Iterator -- used only for training, as we really want
// to ignore all BiDi smarts at that point.
// delete once you're done with it.
LTRResultIterator* GetLTRIterator();
/** /**
* Return the length of the output text string, as UTF8, assuming * Return the length of the output text string, as UTF8, assuming
* one newline per line and one per block, with a terminator, * one newline per line and one per block, with a terminator,
@ -614,6 +703,10 @@ class TESSDLL_API TessBaseAPI {
PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result); PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result);
//// paragraphs.cpp ////////////////////////////////////////////////////
/** After text is recognized, break each paragraph into blocks. */
void DetectParagraphs(int debug_level);
/** /**
* Extract the OCR results, costs (penalty points for uncertainty), * Extract the OCR results, costs (penalty points for uncertainty),
* and the bounding boxes of the characters. * and the bounding boxes of the characters.
@ -634,7 +727,9 @@ class TESSDLL_API TessBaseAPI {
protected: protected:
Tesseract* tesseract_; ///< The underlying data object. Tesseract* tesseract_; ///< The underlying data object.
Tesseract* osd_tesseract_; ///< For orientation & script detection. Tesseract* osd_tesseract_; ///< For orientation & script detection.
EquationDetect* equ_detect_; ///<The equation detector.
ImageThresholder* thresholder_; ///< Image thresholding module. ImageThresholder* thresholder_; ///< Image thresholding module.
GenericVector<ParagraphModel *>* paragraph_models_;
BLOCK_LIST* block_list_; ///< The page layout. BLOCK_LIST* block_list_; ///< The page layout.
PAGE_RES* page_res_; ///< The page-level data. PAGE_RES* page_res_; ///< The page-level data.
STRING* input_file_; ///< Name used by training code. STRING* input_file_; ///< Name used by training code.