mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-07 09:52:40 +08:00
Cleaned up externally used namespace by removing includes from baseapi.h
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@657 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
6e273b71bd
commit
23dfabcab1
@ -6,7 +6,7 @@ AM_CPPFLAGS = -DLOCALEDIR=\"$(localedir)\"\
|
||||
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil
|
||||
|
||||
include_HEADERS = \
|
||||
apitypes.h baseapi.h pageiterator.h resultiterator.h tesseractmain.h
|
||||
apitypes.h baseapi.h tesseractmain.h
|
||||
lib_LTLIBRARIES =
|
||||
|
||||
if !USING_MULTIPLELIBS
|
||||
@ -28,7 +28,7 @@ libtesseract_api_la_LIBADD = \
|
||||
$(top_srcdir)/viewer/libtesseract_viewer.la \
|
||||
$(top_srcdir)/ccutil/libtesseract_ccutil.la
|
||||
endif
|
||||
libtesseract_api_la_SOURCES = baseapi.cpp pageiterator.cpp resultiterator.cpp
|
||||
libtesseract_api_la_SOURCES = baseapi.cpp
|
||||
|
||||
lib_LTLIBRARIES += libtesseract.la
|
||||
libtesseract_la_LDFLAGS =
|
||||
|
@ -22,8 +22,10 @@
|
||||
|
||||
#include "publictypes.h"
|
||||
|
||||
// The types used by the API and Page/ResultIterator can be found in
|
||||
// ccstruct/publictypes.h.
|
||||
// The types used by the API and Page/ResultIterator can be found in:
|
||||
// ccstruct/publictypes.h
|
||||
// ccmain/resultiterator.h
|
||||
// ccmain/pageiterator.h
|
||||
// API interfaces and API users should be sure to include this file, rather
|
||||
// than the lower-level one, and lower-level code should be sure to include
|
||||
// only the lower-level file.
|
||||
|
556
api/baseapi.cpp
556
api/baseapi.cpp
@ -35,9 +35,11 @@
|
||||
#include "baseapi.h"
|
||||
|
||||
#include "resultiterator.h"
|
||||
#include "mutableiterator.h"
|
||||
#include "thresholder.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "pageres.h"
|
||||
#include "paragraphs.h"
|
||||
#include "tessvars.h"
|
||||
#include "control.h"
|
||||
#include "pgedit.h"
|
||||
@ -45,6 +47,7 @@
|
||||
#include "output.h"
|
||||
#include "globals.h"
|
||||
#include "edgblob.h"
|
||||
#include "equationdetect.h"
|
||||
#include "tessbox.h"
|
||||
#include "imgs.h"
|
||||
#include "imgtiff.h"
|
||||
@ -52,6 +55,7 @@
|
||||
#include "permute.h"
|
||||
#include "otsuthr.h"
|
||||
#include "osdetect.h"
|
||||
#include "params.h"
|
||||
|
||||
#ifdef __MSW32__
|
||||
#include "version.h"
|
||||
@ -74,14 +78,21 @@ const char* kInputFile = "noname.tif";
|
||||
const char* kOldVarsFile = "failed_vars.txt";
|
||||
// Max string length of an int.
|
||||
const int kMaxIntSize = 22;
|
||||
// Minimum believable resolution. Used as a default if there is no other
|
||||
// information, as it is safer to under-estimate than over-estimate.
|
||||
const int kMinCredibleResolution = 70;
|
||||
// Maximum believable resolution.
|
||||
const int kMaxCredibleResolution = 2400;
|
||||
|
||||
TessBaseAPI::TessBaseAPI()
|
||||
: tesseract_(NULL),
|
||||
osd_tesseract_(NULL),
|
||||
equ_detect_(NULL),
|
||||
// Thresholder is initialized to NULL here, but will be set before use by:
|
||||
// A constructor of a derived API, SetThresholder(), or
|
||||
// created implicitly when used in InternalSetImage.
|
||||
thresholder_(NULL),
|
||||
paragraph_models_(NULL),
|
||||
block_list_(NULL),
|
||||
page_res_(NULL),
|
||||
input_file_(NULL),
|
||||
@ -125,7 +136,14 @@ void TessBaseAPI::SetOutputName(const char* name) {
|
||||
|
||||
bool TessBaseAPI::SetVariable(const char* name, const char* value) {
|
||||
if (tesseract_ == NULL) tesseract_ = new Tesseract;
|
||||
return ParamUtils::SetParam(name, value, false, tesseract_->params());
|
||||
return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
|
||||
tesseract_->params());
|
||||
}
|
||||
|
||||
bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) {
|
||||
if (tesseract_ == NULL) tesseract_ = new Tesseract;
|
||||
return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY,
|
||||
tesseract_->params());
|
||||
}
|
||||
|
||||
bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
|
||||
@ -178,7 +196,9 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
|
||||
OcrEngineMode oem, char **configs, int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_init_params) {
|
||||
bool set_only_non_debug_params) {
|
||||
// Default language is "eng".
|
||||
if (language == NULL) language = "eng";
|
||||
// If the datapath, OcrEngineMode or the language have changed - start again.
|
||||
// Note that the language_ field stores the last requested language that was
|
||||
// initialized successfully, while tesseract_->lang stores the language
|
||||
@ -188,7 +208,6 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
|
||||
(datapath_ == NULL || language_ == NULL ||
|
||||
*datapath_ != datapath || last_oem_requested_ != oem ||
|
||||
(*language_ != language && tesseract_->lang != language))) {
|
||||
tesseract_->end_tesseract();
|
||||
delete tesseract_;
|
||||
tesseract_ = NULL;
|
||||
}
|
||||
@ -200,7 +219,7 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
|
||||
if (tesseract_->init_tesseract(
|
||||
datapath, output_file_ != NULL ? output_file_->string() : NULL,
|
||||
language, oem, configs, configs_size, vars_vec, vars_values,
|
||||
set_only_init_params) != 0) {
|
||||
set_only_non_debug_params) != 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@ -221,6 +240,31 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Returns the languages string used in the last valid initialization.
|
||||
// If the last initialization specified "deu+hin" then that will be
|
||||
// returned. If hin loaded eng automatically as well, then that will
|
||||
// not be included in this list. To find the languages actually
|
||||
// loaded use GetLoadedLanguagesAsVector.
|
||||
// The returned string should NOT be deleted.
|
||||
const char* TessBaseAPI::GetInitLanguagesAsString() const {
|
||||
return (language_ == NULL || language_->string() == NULL) ?
|
||||
"" : language_->string();
|
||||
}
|
||||
|
||||
// Returns the loaded languages in the vector of STRINGs.
|
||||
// Includes all languages loaded by the last Init, including those loaded
|
||||
// as dependencies of other loaded languages.
|
||||
void TessBaseAPI::GetLoadedLanguagesAsVector(
|
||||
GenericVector<STRING>* langs) const {
|
||||
langs->clear();
|
||||
if (tesseract_ != NULL) {
|
||||
langs->push_back(tesseract_->lang);
|
||||
int num_subs = tesseract_->num_sub_langs();
|
||||
for (int i = 0; i < num_subs; ++i)
|
||||
langs->push_back(tesseract_->get_sub_lang(i)->lang);
|
||||
}
|
||||
}
|
||||
|
||||
// Init only the lang model component of Tesseract. The only functions
|
||||
// that work after this init are SetVariable and IsValidWord.
|
||||
// WARNING: temporary! This function will be removed from here and placed
|
||||
@ -243,8 +287,12 @@ void TessBaseAPI::InitForAnalysePage() {
|
||||
// Read a "config" file containing a set of parameter name, value pairs.
|
||||
// Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
// and also accepts a relative or absolute path name.
|
||||
void TessBaseAPI::ReadConfigFile(const char* filename, bool init_only) {
|
||||
tesseract_->read_config_file(filename, init_only);
|
||||
void TessBaseAPI::ReadConfigFile(const char* filename) {
|
||||
tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY);
|
||||
}
|
||||
// Same as above, but only set debug params from the given config file.
|
||||
void TessBaseAPI::ReadDebugConfigFile(const char* filename) {
|
||||
tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY);
|
||||
}
|
||||
|
||||
// Set the current page segmentation mode. Defaults to PSM_AUTO.
|
||||
@ -299,7 +347,7 @@ void TessBaseAPI::ClearAdaptiveClassifier() {
|
||||
if (tesseract_ == NULL)
|
||||
return;
|
||||
tesseract_->ResetAdaptiveClassifier();
|
||||
tesseract_->getDict().ResetDocumentDictionary();
|
||||
tesseract_->ResetDocumentDictionary();
|
||||
}
|
||||
|
||||
// Provide an image for Tesseract to recognize. Format is as
|
||||
@ -317,6 +365,13 @@ void TessBaseAPI::SetImage(const unsigned char* imagedata,
|
||||
bytes_per_pixel, bytes_per_line);
|
||||
}
|
||||
|
||||
void TessBaseAPI::SetSourceResolution(int ppi) {
|
||||
if (thresholder_)
|
||||
thresholder_->SetSourceYResolution(ppi);
|
||||
else
|
||||
tprintf("Please call SetImage before SetSourceResolution.\n");
|
||||
}
|
||||
|
||||
// Provide an image for Tesseract to recognize. As with SetImage above,
|
||||
// Tesseract doesn't take a copy or ownership or pixDestroy the image, so
|
||||
// it must persist until after Recognize.
|
||||
@ -354,7 +409,7 @@ Pix* TessBaseAPI::GetThresholdedImage() {
|
||||
// Boxa, Pixa pair, in reading order.
|
||||
// Can be called before or after Recognize.
|
||||
Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
|
||||
return GetComponentImages(RIL_BLOCK, pixa, NULL);
|
||||
return GetComponentImages(RIL_BLOCK, false, pixa, NULL);
|
||||
}
|
||||
|
||||
// Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order.
|
||||
@ -362,7 +417,24 @@ Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
|
||||
// If blockids is not NULL, the block-id of each line is also returned as an
|
||||
// array of one element per line. delete [] after use.
|
||||
Boxa* TessBaseAPI::GetTextlines(Pixa** pixa, int** blockids) {
|
||||
return GetComponentImages(RIL_TEXTLINE, pixa, blockids);
|
||||
return GetComponentImages(RIL_TEXTLINE, true, pixa, blockids);
|
||||
}
|
||||
|
||||
// Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
|
||||
// pair, in reading order. Enables downstream handling of non-rectangular
|
||||
// regions.
|
||||
// Can be called before or after Recognize.
|
||||
// If blockids is not NULL, the block-id of each line is also returned as an
|
||||
// array of one element per line. delete [] after use.
|
||||
Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) {
|
||||
return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
|
||||
}
|
||||
|
||||
// Get the words as a leptonica-style
|
||||
// Boxa, Pixa pair, in reading order.
|
||||
// Can be called before or after Recognize.
|
||||
Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
|
||||
return GetComponentImages(RIL_WORD, true, pixa, NULL);
|
||||
}
|
||||
|
||||
// Gets the individual connected (text) components (created
|
||||
@ -370,14 +442,7 @@ Boxa* TessBaseAPI::GetTextlines(Pixa** pixa, int** blockids) {
|
||||
// as a leptonica-style Boxa, Pixa pair, in reading order.
|
||||
// Can be called before or after Recognize.
|
||||
Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) {
|
||||
return GetComponentImages(RIL_SYMBOL, pixa, NULL);
|
||||
}
|
||||
|
||||
// Get the words as a leptonica-style
|
||||
// Boxa, Pixa pair, in reading order.
|
||||
// Can be called before or after Recognize.
|
||||
Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
|
||||
return GetComponentImages(RIL_WORD, pixa, NULL);
|
||||
return GetComponentImages(RIL_SYMBOL, true, pixa, NULL);
|
||||
}
|
||||
|
||||
// Get the given level kind of components (block, textline, word etc.) as a
|
||||
@ -385,7 +450,9 @@ Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
|
||||
// Can be called before or after Recognize.
|
||||
// If blockids is not NULL, the block-id of each component is also returned
|
||||
// as an array of one element per component. delete [] after use.
|
||||
// If text_only is true, then only text components are returned.
|
||||
Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
|
||||
bool text_only,
|
||||
Pixa** pixa, int** blockids) {
|
||||
PageIterator* page_it = GetIterator();
|
||||
if (page_it == NULL)
|
||||
@ -397,7 +464,8 @@ Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
|
||||
int component_count = 0;
|
||||
int left, top, right, bottom;
|
||||
do {
|
||||
if (page_it->BoundingBox(level, &left, &top, &right, &bottom))
|
||||
if (page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom) &&
|
||||
(!text_only || PTIsTextType(page_it->BlockType())))
|
||||
++component_count;
|
||||
} while (page_it->Next(level));
|
||||
|
||||
@ -411,7 +479,8 @@ Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
|
||||
int component_index = 0;
|
||||
page_it->Begin();
|
||||
do {
|
||||
if (page_it->BoundingBox(level, &left, &top, &right, &bottom)) {
|
||||
if (page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom) &&
|
||||
(!text_only || PTIsTextType(page_it->BlockType()))) {
|
||||
Box* lbox = boxCreate(left, top, right - left, bottom - top);
|
||||
boxaAddBox(boxa, lbox, L_INSERT);
|
||||
if (pixa != NULL) {
|
||||
@ -431,6 +500,13 @@ Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level,
|
||||
return boxa;
|
||||
}
|
||||
|
||||
int TessBaseAPI::GetThresholdedImageScaleFactor() const {
|
||||
if (thresholder_ == NULL) {
|
||||
return 0;
|
||||
}
|
||||
return thresholder_->GetScaleFactor();
|
||||
}
|
||||
|
||||
// Dump the internal binary image to a PGM file.
|
||||
void TessBaseAPI::DumpPGM(const char* filename) {
|
||||
if (tesseract_ == NULL)
|
||||
@ -537,9 +613,15 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
|
||||
tesseract_->CorrectClassifyWords(page_res_);
|
||||
return 0;
|
||||
}
|
||||
if (truth_cb_ != NULL) truth_cb_->Run(image_height_, page_res_);
|
||||
|
||||
if (tesseract_->interactive_mode) {
|
||||
if (truth_cb_ != NULL) {
|
||||
tesseract_->wordrec_run_blamer.set_value(true);
|
||||
truth_cb_->Run(tesseract_->getDict().getUnicharset(),
|
||||
image_height_, page_res_);
|
||||
}
|
||||
|
||||
int result = 0;
|
||||
if (tesseract_->interactive_display_mode) {
|
||||
tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_);
|
||||
// The page_res is invalid after an interactive session, so cleanup
|
||||
// in a way that lets us continue to the next page without crashing.
|
||||
@ -556,9 +638,15 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
|
||||
fclose(training_output_file);
|
||||
} else {
|
||||
// Now run the main recognition.
|
||||
tesseract_->recog_all_words(page_res_, monitor, NULL, NULL, 0);
|
||||
if (tesseract_->recog_all_words(page_res_, monitor, NULL, NULL, 0)) {
|
||||
int paragraph_debug_level = 0;
|
||||
GetIntVariable("paragraph_debug_level", ¶graph_debug_level);
|
||||
DetectParagraphs(paragraph_debug_level);
|
||||
} else {
|
||||
result = -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Tests the chopper by exhaustively running chop_one_blob.
|
||||
@ -574,7 +662,7 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
|
||||
if (FindLines() != 0)
|
||||
return -1;
|
||||
// Additional conditions under which chopper test cannot be run
|
||||
if (tesseract_->interactive_mode) return -1;
|
||||
if (tesseract_->interactive_display_mode) return -1;
|
||||
|
||||
recognition_done_ = true;
|
||||
|
||||
@ -584,9 +672,9 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
|
||||
|
||||
while (page_res_it.word() != NULL) {
|
||||
WERD_RES *word_res = page_res_it.word();
|
||||
tesseract_->MaximallyChopWord(page_res_it.block()->block,
|
||||
page_res_it.row()->row,
|
||||
word_res);
|
||||
GenericVector<TBOX> boxes;
|
||||
tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
|
||||
page_res_it.row()->row, word_res);
|
||||
page_res_it.forward();
|
||||
}
|
||||
return 0;
|
||||
@ -741,11 +829,11 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
|
||||
PrintVariables(fp);
|
||||
fclose(fp);
|
||||
// Switch to alternate mode for retry.
|
||||
ReadConfigFile(retry_config, false);
|
||||
ReadConfigFile(retry_config);
|
||||
SetImage(pix);
|
||||
Recognize(NULL);
|
||||
// Restore saved config variables.
|
||||
ReadConfigFile(kOldVarsFile, false);
|
||||
ReadConfigFile(kOldVarsFile);
|
||||
}
|
||||
// Get text only if successful.
|
||||
if (!failed) {
|
||||
@ -767,8 +855,19 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get an iterator to the results of LayoutAnalysis and/or Recognize.
|
||||
// The returned iterator must be deleted after use.
|
||||
// Get a left-to-right iterator to the results of LayoutAnalysis and/or
|
||||
// Recognize. The returned iterator must be deleted after use.
|
||||
LTRResultIterator* TessBaseAPI::GetLTRIterator() {
|
||||
if (tesseract_ == NULL || page_res_ == NULL)
|
||||
return NULL;
|
||||
return new LTRResultIterator(
|
||||
page_res_, tesseract_,
|
||||
thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
|
||||
rect_left_, rect_top_, rect_width_, rect_height_);
|
||||
}
|
||||
|
||||
// Get a reading-order iterator to the results of LayoutAnalysis and/or
|
||||
// Recognize. The returned iterator must be deleted after use.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
@ -776,10 +875,25 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
|
||||
ResultIterator* TessBaseAPI::GetIterator() {
|
||||
if (tesseract_ == NULL || page_res_ == NULL)
|
||||
return NULL;
|
||||
return new ResultIterator(page_res_, tesseract_,
|
||||
thresholder_->GetScaleFactor(),
|
||||
thresholder_->GetScaledYResolution(),
|
||||
rect_left_, rect_top_, rect_width_, rect_height_);
|
||||
return ResultIterator::StartOfParagraph(LTRResultIterator(
|
||||
page_res_, tesseract_,
|
||||
thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
|
||||
rect_left_, rect_top_, rect_width_, rect_height_));
|
||||
}
|
||||
|
||||
// Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
|
||||
// The returned iterator must be deleted after use.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
MutableIterator* TessBaseAPI::GetMutableIterator() {
|
||||
if (tesseract_ == NULL || page_res_ == NULL)
|
||||
return NULL;
|
||||
return new MutableIterator(page_res_, tesseract_,
|
||||
thresholder_->GetScaleFactor(),
|
||||
thresholder_->GetScaledYResolution(),
|
||||
rect_left_, rect_top_, rect_width_, rect_height_);
|
||||
}
|
||||
|
||||
// Make a text string from the internal data structures.
|
||||
@ -787,67 +901,29 @@ char* TessBaseAPI::GetUTF8Text() {
|
||||
if (tesseract_ == NULL ||
|
||||
(!recognition_done_ && Recognize(NULL) < 0))
|
||||
return NULL;
|
||||
int total_length = TextLength(NULL);
|
||||
PAGE_RES_IT page_res_it(page_res_);
|
||||
char* result = new char[total_length];
|
||||
char* ptr = result;
|
||||
for (page_res_it.restart_page(); page_res_it.word () != NULL;
|
||||
page_res_it.forward()) {
|
||||
WERD_RES *word = page_res_it.word();
|
||||
WERD_CHOICE* choice = word->best_choice;
|
||||
if (choice != NULL) {
|
||||
strcpy(ptr, choice->unichar_string().string());
|
||||
ptr += choice->unichar_string().length();
|
||||
if (word->word->flag(W_EOL))
|
||||
*ptr++ = '\n';
|
||||
else
|
||||
*ptr++ = ' ';
|
||||
}
|
||||
}
|
||||
*ptr++ = '\n';
|
||||
*ptr = '\0';
|
||||
STRING text("");
|
||||
ResultIterator *it = GetIterator();
|
||||
do {
|
||||
if (it->Empty(RIL_PARA)) continue;
|
||||
char *para_text = it->GetUTF8Text(RIL_PARA);
|
||||
text += para_text;
|
||||
delete []para_text;
|
||||
} while (it->Next(RIL_PARA));
|
||||
char* result = new char[text.length() + 1];
|
||||
strncpy(result, text.string(), text.length() + 1);
|
||||
delete it;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper returns true if there is a paragraph break between bbox_cur,
|
||||
// and bbox_prev.
|
||||
// TODO(rays) improve and incorporate deeper into tesseract, so other
|
||||
// output methods get the benefit.
|
||||
static bool IsParagraphBreak(TBOX bbox_cur, TBOX bbox_prev,
|
||||
int right, int line_height) {
|
||||
// Check if the distance between lines is larger than the normal leading,
|
||||
if (fabs((float)(bbox_cur.bottom() - bbox_prev.bottom())) > line_height * 2)
|
||||
return true;
|
||||
|
||||
// Check if the distance between left bounds of the two lines is nearly the
|
||||
// same as between their right bounds (if so, then both lines probably belong
|
||||
// to the same paragraph, maybe a centered one).
|
||||
if (fabs((float)((bbox_cur.left() - bbox_prev.left()) -
|
||||
(bbox_prev.right() - bbox_cur.right()))) < line_height)
|
||||
return false;
|
||||
|
||||
// Check if there is a paragraph indent at this line (either -ve or +ve).
|
||||
if (fabs((float)(bbox_cur.left() - bbox_prev.left())) > line_height)
|
||||
return true;
|
||||
|
||||
// Check if both current and previous line don't reach the right bound of the
|
||||
// block, but the distance is different. This will cause all lines in a verse
|
||||
// to be treated as separate paragraphs, but most probably will not split
|
||||
// block-quotes to separate lines (at least if the text is justified).
|
||||
if (fabs((float)(bbox_cur.right() - bbox_prev.right())) > line_height &&
|
||||
right - bbox_cur.right() > line_height &&
|
||||
right - bbox_prev.right() > line_height)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Helper to add the hOCR for a box to the given hocr_str.
|
||||
static void AddBoxTohOCR(const TBOX& box, int image_height, STRING* hocr_str) {
|
||||
hocr_str->add_str_int("' title=\"bbox ", box.left());
|
||||
hocr_str->add_str_int(" ", image_height - box.top());
|
||||
hocr_str->add_str_int(" ", box.right());
|
||||
hocr_str->add_str_int(" ", image_height - box.bottom());
|
||||
static void AddBoxTohOCR(const PageIterator *it,
|
||||
PageIteratorLevel level,
|
||||
STRING* hocr_str) {
|
||||
int left, top, right, bottom;
|
||||
it->BoundingBox(level, &left, &top, &right, &bottom);
|
||||
hocr_str->add_str_int("' title=\"bbox ", left);
|
||||
hocr_str->add_str_int(" ", top);
|
||||
hocr_str->add_str_int(" ", right);
|
||||
hocr_str->add_str_int(" ", bottom);
|
||||
*hocr_str += "\">";
|
||||
}
|
||||
|
||||
@ -860,15 +936,10 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
|
||||
(page_res_ == NULL && Recognize(NULL) < 0))
|
||||
return NULL;
|
||||
|
||||
PAGE_RES_IT page_res_it(page_res_);
|
||||
ROW_RES *row = NULL; // current row
|
||||
ROW *real_row = NULL, *prev_row = NULL;
|
||||
BLOCK_RES *block = NULL; // current row
|
||||
BLOCK *real_block = NULL;
|
||||
int lcnt = 1, bcnt = 1, wcnt = 1;
|
||||
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
|
||||
int page_id = page_number + 1; // hOCR uses 1-based page numbers.
|
||||
|
||||
STRING hocr_str;
|
||||
STRING hocr_str("");
|
||||
|
||||
hocr_str.add_str_int("<div class='ocr_page' id='page_", page_id);
|
||||
hocr_str += "' title='image \"";
|
||||
@ -879,82 +950,87 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
|
||||
hocr_str.add_str_int(" ", rect_height_);
|
||||
hocr_str += "'>\n";
|
||||
|
||||
for (page_res_it.restart_page(); page_res_it.word () != NULL;
|
||||
page_res_it.forward()) {
|
||||
if (block != page_res_it.block()) {
|
||||
if (block != NULL) {
|
||||
hocr_str += "</span>\n</p>\n</div>\n";
|
||||
}
|
||||
|
||||
block = page_res_it.block(); // current row
|
||||
real_block = block->block;
|
||||
real_row = NULL;
|
||||
row = NULL;
|
||||
|
||||
hocr_str.add_str_int("<div class='ocr_carea' id='block_", page_id);
|
||||
hocr_str.add_str_int("_", bcnt++);
|
||||
AddBoxTohOCR(real_block->bounding_box(), image_height_, &hocr_str);
|
||||
hocr_str += "\n<p class='ocr_par'>\n";
|
||||
}
|
||||
if (row != page_res_it.row()) {
|
||||
if (row != NULL) {
|
||||
hocr_str += "</span>\n";
|
||||
}
|
||||
prev_row = real_row;
|
||||
|
||||
row = page_res_it.row(); // current row
|
||||
real_row = row->row;
|
||||
|
||||
if (prev_row != NULL &&
|
||||
IsParagraphBreak(real_row->bounding_box(), prev_row->bounding_box(),
|
||||
real_block->bounding_box().right(),
|
||||
real_row->x_height() + real_row->ascenders()))
|
||||
hocr_str += "</p>\n<p class='ocr_par'>\n";
|
||||
|
||||
hocr_str.add_str_int("<span class='ocr_line' id='line_", page_id);
|
||||
hocr_str.add_str_int("_", lcnt++);
|
||||
AddBoxTohOCR(real_row->bounding_box(), image_height_, &hocr_str);
|
||||
ResultIterator *res_it = GetIterator();
|
||||
for (; !res_it->Empty(RIL_BLOCK); wcnt++) {
|
||||
if (res_it->Empty(RIL_WORD)) {
|
||||
res_it->Next(RIL_WORD);
|
||||
continue;
|
||||
}
|
||||
|
||||
WERD_RES *word = page_res_it.word();
|
||||
WERD_CHOICE* choice = word->best_choice;
|
||||
if (choice != NULL) {
|
||||
hocr_str.add_str_int("<span class='ocr_word' id='word_", page_id);
|
||||
hocr_str.add_str_int("_", wcnt);
|
||||
AddBoxTohOCR(word->word->bounding_box(), image_height_, &hocr_str);
|
||||
hocr_str.add_str_int("<span class='ocrx_word' id='xword_", page_id);
|
||||
hocr_str.add_str_int("_", wcnt++);
|
||||
hocr_str.add_str_int("' title=\"x_wconf ", choice->certainty());
|
||||
hocr_str += "\">";
|
||||
if (word->bold > 0)
|
||||
hocr_str += "<strong>";
|
||||
if (word->italic > 0)
|
||||
hocr_str += "<em>";
|
||||
int i;
|
||||
// escape special characters
|
||||
for (i = 0; choice->unichar_string()[i] != '\0'; i++) {
|
||||
if (choice->unichar_string()[i] == '<') hocr_str += "<";
|
||||
else if (choice->unichar_string()[i] == '>') hocr_str += ">";
|
||||
else if (choice->unichar_string()[i] == '&') hocr_str += "&";
|
||||
else if (choice->unichar_string()[i] == '"') hocr_str += """;
|
||||
else if (choice->unichar_string()[i] == '\'') hocr_str += "'";
|
||||
else hocr_str += choice->unichar_string()[i];
|
||||
// Open any new block/paragraph/textline.
|
||||
if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
|
||||
hocr_str.add_str_int("<div class='ocr_carea' id='block_", bcnt);
|
||||
hocr_str.add_str_int("_", bcnt);
|
||||
AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
|
||||
}
|
||||
if (res_it->IsAtBeginningOf(RIL_PARA)) {
|
||||
if (res_it->ParagraphIsLtr()) {
|
||||
hocr_str.add_str_int("\n<p class='ocr_par' dir='ltr' id='par_", pcnt);
|
||||
} else {
|
||||
hocr_str.add_str_int("\n<p class='ocr_par' dir='rtl' id='par_", pcnt);
|
||||
}
|
||||
if (word->italic > 0)
|
||||
hocr_str += "</em>";
|
||||
if (word->bold > 0)
|
||||
hocr_str += "</strong>";
|
||||
hocr_str += "</span></span>";
|
||||
if (!word->word->flag(W_EOL))
|
||||
hocr_str += " ";
|
||||
AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
|
||||
}
|
||||
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
|
||||
hocr_str.add_str_int("<span class='ocr_line' id='line_", lcnt);
|
||||
AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
|
||||
}
|
||||
|
||||
// Now, process the word...
|
||||
hocr_str.add_str_int("<span class='ocr_word' id='word_", wcnt);
|
||||
AddBoxTohOCR(res_it, RIL_WORD, &hocr_str);
|
||||
const char *font_name;
|
||||
bool bold, italic, underlined, monospace, serif, smallcaps;
|
||||
int pointsize, font_id;
|
||||
font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
|
||||
&monospace, &serif, &smallcaps,
|
||||
&pointsize, &font_id);
|
||||
bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
|
||||
bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
|
||||
bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
|
||||
if (bold) hocr_str += "<strong>";
|
||||
if (italic) hocr_str += "<em>";
|
||||
do {
|
||||
const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
|
||||
if (grapheme && grapheme[0] != 0) {
|
||||
if (grapheme[1] == 0) {
|
||||
switch (grapheme[0]) {
|
||||
case '<': hocr_str += "<"; break;
|
||||
case '>': hocr_str += ">"; break;
|
||||
case '&': hocr_str += "&"; break;
|
||||
case '"': hocr_str += """; break;
|
||||
case '\'': hocr_str += "'"; break;
|
||||
default: hocr_str += grapheme;
|
||||
}
|
||||
} else {
|
||||
hocr_str += grapheme;
|
||||
}
|
||||
}
|
||||
res_it->Next(RIL_SYMBOL);
|
||||
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
|
||||
if (italic) hocr_str += "</em>";
|
||||
if (bold) hocr_str += "</strong>";
|
||||
hocr_str += "</span> ";
|
||||
wcnt++;
|
||||
// Close any ending block/paragraph/textline.
|
||||
if (last_word_in_line) {
|
||||
hocr_str += "</span>\n";
|
||||
lcnt++;
|
||||
}
|
||||
if (last_word_in_para) {
|
||||
hocr_str += "</p>\n";
|
||||
pcnt++;
|
||||
}
|
||||
if (last_word_in_block) {
|
||||
hocr_str += "</div>\n";
|
||||
bcnt++;
|
||||
}
|
||||
}
|
||||
if (block != NULL)
|
||||
hocr_str += "</span>\n</p>\n</div>\n";
|
||||
hocr_str += "</div>\n";
|
||||
|
||||
char *ret = new char[hocr_str.length() + 1];
|
||||
strcpy(ret, hocr_str.string());
|
||||
delete res_it;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -990,7 +1066,7 @@ char* TessBaseAPI::GetBoxText(int page_number) {
|
||||
kMaxBytesPerLine;
|
||||
char* result = new char[total_length];
|
||||
int output_length = 0;
|
||||
ResultIterator* it = GetIterator();
|
||||
LTRResultIterator* it = GetLTRIterator();
|
||||
do {
|
||||
int left, top, right, bottom;
|
||||
if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
|
||||
@ -1179,11 +1255,16 @@ int* TessBaseAPI::AllWordConfidences() {
|
||||
* Returns false if adaption was not possible for some reason.
|
||||
*/
|
||||
bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
|
||||
int debug = 0;
|
||||
GetIntVariable("applybox_debug", &debug);
|
||||
bool success = true;
|
||||
PageSegMode current_psm = GetPageSegMode();
|
||||
SetPageSegMode(mode);
|
||||
SetVariable("classify_enable_learning", "0");
|
||||
char* text = GetUTF8Text();
|
||||
if (debug) {
|
||||
tprintf("Trying to adapt \"%s\" to \"%s\"\n", text, wordstr);
|
||||
}
|
||||
if (text != NULL) {
|
||||
PAGE_RES_IT it(page_res_);
|
||||
WERD_RES* word_res = it.word();
|
||||
@ -1207,7 +1288,8 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
|
||||
if (text[t] != '\0' || wordstr[w] != '\0') {
|
||||
// No match.
|
||||
delete page_res_;
|
||||
page_res_ = tesseract_->SetupApplyBoxes(block_list_);
|
||||
GenericVector<TBOX> boxes;
|
||||
page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
|
||||
tesseract_->ReSegmentByClassification(page_res_);
|
||||
tesseract_->TidyUp(page_res_);
|
||||
PAGE_RES_IT pr_it(page_res_);
|
||||
@ -1216,7 +1298,7 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
|
||||
else
|
||||
word_res = pr_it.word();
|
||||
} else {
|
||||
word_res->BestChoiceToCorrectText(tesseract_->unicharset);
|
||||
word_res->BestChoiceToCorrectText();
|
||||
}
|
||||
if (success) {
|
||||
tesseract_->EnableLearning = true;
|
||||
@ -1257,18 +1339,25 @@ void TessBaseAPI::End() {
|
||||
delete block_list_;
|
||||
block_list_ = NULL;
|
||||
}
|
||||
if (paragraph_models_ != NULL) {
|
||||
paragraph_models_->delete_data_pointers();
|
||||
delete paragraph_models_;
|
||||
paragraph_models_ = NULL;
|
||||
}
|
||||
if (tesseract_ != NULL) {
|
||||
tesseract_->end_tesseract();
|
||||
delete tesseract_;
|
||||
if (osd_tesseract_ == tesseract_)
|
||||
osd_tesseract_ = NULL;
|
||||
tesseract_ = NULL;
|
||||
}
|
||||
if (osd_tesseract_ != NULL) {
|
||||
osd_tesseract_->end_tesseract();
|
||||
delete osd_tesseract_;
|
||||
osd_tesseract_ = NULL;
|
||||
}
|
||||
if (equ_detect_ != NULL) {
|
||||
delete equ_detect_;
|
||||
equ_detect_ = NULL;
|
||||
}
|
||||
if (input_file_ != NULL) {
|
||||
delete input_file_;
|
||||
input_file_ = NULL;
|
||||
@ -1332,9 +1421,19 @@ void TessBaseAPI::SetDictFunc(DictFunc f) {
|
||||
void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) {
|
||||
if (tesseract_ != NULL) {
|
||||
tesseract_->getDict().probability_in_context_ = f;
|
||||
// Set it for the sublangs too.
|
||||
int num_subs = tesseract_->num_sub_langs();
|
||||
for (int i = 0; i < num_subs; ++i) {
|
||||
tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sets Wordrec::fill_lattice_ function to point to the given function.
|
||||
void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) {
|
||||
if (tesseract_ != NULL) tesseract_->fill_lattice_ = f;
|
||||
}
|
||||
|
||||
// Common code for setting the image.
|
||||
bool TessBaseAPI::InternalSetImage() {
|
||||
if (tesseract_ == NULL) {
|
||||
@ -1358,10 +1457,29 @@ void TessBaseAPI::Threshold(Pix** pix) {
|
||||
}
|
||||
if (*pix != NULL)
|
||||
pixDestroy(pix);
|
||||
// Zero resolution messes up the algorithms, so make sure it is credible.
|
||||
int y_res = thresholder_->GetScaledYResolution();
|
||||
if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
|
||||
// Use the minimum default resolution, as it is safer to under-estimate
|
||||
// than over-estimate resolution.
|
||||
thresholder_->SetSourceYResolution(kMinCredibleResolution);
|
||||
}
|
||||
thresholder_->ThresholdToPix(pix);
|
||||
thresholder_->GetImageSizes(&rect_left_, &rect_top_,
|
||||
&rect_width_, &rect_height_,
|
||||
&image_width_, &image_height_);
|
||||
// Set the internal resolution that is used for layout parameters from the
|
||||
// estimated resolution, rather than the image resolution, which may be
|
||||
// fabricated, but we will use the image resolution, if there is one, to
|
||||
// report output point sizes.
|
||||
int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
|
||||
kMinCredibleResolution,
|
||||
kMaxCredibleResolution);
|
||||
if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
|
||||
tprintf("Estimated resolution %d out of range! Corrected to %d\n",
|
||||
thresholder_->GetScaledEstimatedResolution(), estimated_res);
|
||||
}
|
||||
tesseract_->set_source_resolution(estimated_res);
|
||||
}
|
||||
|
||||
// Find lines from the image making the BLOCK_LIST.
|
||||
@ -1390,6 +1508,13 @@ int TessBaseAPI::FindLines() {
|
||||
|
||||
tesseract_->PrepareForPageseg();
|
||||
|
||||
if (tesseract_->textord_equation_detect) {
|
||||
if (equ_detect_ == NULL && datapath_ != NULL) {
|
||||
equ_detect_ = new EquationDetect(datapath_->string(), NULL);
|
||||
}
|
||||
tesseract_->SetEquationDetect(equ_detect_);
|
||||
}
|
||||
|
||||
Tesseract* osd_tess = osd_tesseract_;
|
||||
OSResults osr;
|
||||
if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == NULL) {
|
||||
@ -1401,6 +1526,8 @@ int TessBaseAPI::FindLines() {
|
||||
datapath_->string(), NULL, "osd", OEM_TESSERACT_ONLY,
|
||||
NULL, 0, NULL, NULL, false) == 0) {
|
||||
osd_tess = osd_tesseract_;
|
||||
osd_tesseract_->set_source_resolution(
|
||||
thresholder_->GetSourceYResolution());
|
||||
} else {
|
||||
tprintf("Warning: Auto orientation and script detection requested,"
|
||||
" but osd language failed to load\n");
|
||||
@ -1412,16 +1539,9 @@ int TessBaseAPI::FindLines() {
|
||||
|
||||
if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
|
||||
return -1;
|
||||
// If OCR is to be run using Tesseract, OCR-able blobs are required for
|
||||
// training, or interactive mode is needed, prepare data and images for ocr.
|
||||
if (tesseract_->interactive_mode ||
|
||||
tesseract_->tessedit_train_from_boxes ||
|
||||
tesseract_->tessedit_ambigs_training ||
|
||||
tesseract_->tessedit_ocr_engine_mode == OEM_TESSERACT_ONLY ||
|
||||
tesseract_->tessedit_ocr_engine_mode ==
|
||||
OEM_TESSERACT_CUBE_COMBINED) {
|
||||
tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
|
||||
}
|
||||
// If Devanagari is being recognized, we use different images for page seg
|
||||
// and for OCR.
|
||||
tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1429,7 +1549,6 @@ int TessBaseAPI::FindLines() {
|
||||
void TessBaseAPI::ClearResults() {
|
||||
if (tesseract_ != NULL) {
|
||||
tesseract_->Clear();
|
||||
tesseract_->ResetFeaturesHaveBeenExtracted();
|
||||
}
|
||||
if (page_res_ != NULL) {
|
||||
delete page_res_;
|
||||
@ -1440,11 +1559,17 @@ void TessBaseAPI::ClearResults() {
|
||||
block_list_ = new BLOCK_LIST;
|
||||
else
|
||||
block_list_->clear();
|
||||
if (paragraph_models_ != NULL) {
|
||||
paragraph_models_->delete_data_pointers();
|
||||
delete paragraph_models_;
|
||||
paragraph_models_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Return the length of the output text string, as UTF8, assuming
|
||||
// one newline per line and one per block, with a terminator,
|
||||
// and assuming a single character reject marker for each rejected character.
|
||||
// liberally two spacing marks after each word (as paragraphs end with two
|
||||
// newlines), and assuming a single character reject marker for each rejected
|
||||
// character.
|
||||
// Also return the number of recognized blobs in blob_count.
|
||||
int TessBaseAPI::TextLength(int* blob_count) {
|
||||
if (tesseract_ == NULL || page_res_ == NULL)
|
||||
@ -1459,8 +1584,8 @@ int TessBaseAPI::TextLength(int* blob_count) {
|
||||
WERD_RES *word = page_res_it.word();
|
||||
WERD_CHOICE* choice = word->best_choice;
|
||||
if (choice != NULL) {
|
||||
total_blobs += choice->length() + 1;
|
||||
total_length += choice->unichar_string().length() + 1;
|
||||
total_blobs += choice->length() + 2;
|
||||
total_length += choice->unichar_string().length() + 2;
|
||||
for (int i = 0; i < word->reject_map.length(); ++i) {
|
||||
if (word->reject_map[i].rejected())
|
||||
++total_length;
|
||||
@ -1661,8 +1786,7 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
|
||||
// Classify to get a raw choice.
|
||||
BLOB_CHOICE_LIST choices;
|
||||
DENORM denorm;
|
||||
tesseract_->set_denorm(&denorm);
|
||||
tesseract_->AdaptiveClassifier(blob, &choices, NULL);
|
||||
tesseract_->AdaptiveClassifier(blob, denorm, &choices, NULL);
|
||||
BLOB_CHOICE_IT choice_it;
|
||||
choice_it.set_to_list(&choices);
|
||||
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
|
||||
@ -1673,29 +1797,10 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
|
||||
}
|
||||
}
|
||||
|
||||
if (id == best_class) {
|
||||
threshold = tesseract_->matcher_good_threshold;
|
||||
} else {
|
||||
/* the blob was incorrectly classified - find the rating threshold
|
||||
needed to create a template which will correct the error with
|
||||
some margin. However, don't waste time trying to make
|
||||
templates which are too tight. */
|
||||
threshold = tesseract_->GetBestRatingFor(blob, id);
|
||||
threshold *= .9;
|
||||
const float max_threshold = .125;
|
||||
const float min_threshold = .02;
|
||||
|
||||
if (threshold > max_threshold)
|
||||
threshold = max_threshold;
|
||||
|
||||
// I have cuddled the following line to set it out of the strike
|
||||
// of the coverage testing tool. I have no idea how to trigger
|
||||
// this situation nor I have any necessity to do it. --mezhirov
|
||||
if (threshold < min_threshold) threshold = min_threshold;
|
||||
}
|
||||
threshold = tesseract_->matcher_good_threshold;
|
||||
|
||||
if (blob->outlines)
|
||||
tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold);
|
||||
tesseract_->AdaptToChar(blob, denorm, id, kUnknownFontinfoId, threshold);
|
||||
delete blob;
|
||||
}
|
||||
|
||||
@ -1716,6 +1821,18 @@ PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
|
||||
return pass1_result;
|
||||
}
|
||||
|
||||
void TessBaseAPI::DetectParagraphs(int debug_level) {
|
||||
if (paragraph_models_ == NULL)
|
||||
paragraph_models_ = new GenericVector<ParagraphModel*>;
|
||||
MutableIterator *result_it = GetMutableIterator();
|
||||
do { // Detect paragraphs for this block
|
||||
GenericVector<ParagraphModel *> models;
|
||||
::tesseract::DetectParagraphs(debug_level, result_it, &models);
|
||||
*paragraph_models_ += models;
|
||||
} while (result_it->Next(RIL_BLOCK));
|
||||
delete result_it;
|
||||
}
|
||||
|
||||
struct TESS_CHAR : ELIST_LINK {
|
||||
char *unicode_repr;
|
||||
int length; // of unicode_repr
|
||||
@ -1838,12 +1955,12 @@ void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, const DENORM& denorm,
|
||||
if (tesseract_) {
|
||||
tesseract_->ResetFeaturesHaveBeenExtracted();
|
||||
}
|
||||
tesseract_->set_denorm(&denorm);
|
||||
CLASS_NORMALIZATION_ARRAY norm_array;
|
||||
uinT8* norm_array = new uinT8[MAX_NUM_CLASSES];
|
||||
inT32 len;
|
||||
*num_features = tesseract_->GetIntCharNormFeatures(
|
||||
blob, tesseract_->PreTrainedTemplates,
|
||||
int_features, norm_array, &len, FeatureOutlineIndex);
|
||||
*num_features = tesseract_->GetCharNormFeatures(
|
||||
blob, denorm, tesseract_->PreTrainedTemplates,
|
||||
int_features, norm_array, norm_array, &len, FeatureOutlineIndex);
|
||||
delete [] norm_array;
|
||||
}
|
||||
|
||||
// This method returns the row to which a box of specified dimensions would
|
||||
@ -1879,8 +1996,7 @@ void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm,
|
||||
float* ratings,
|
||||
int* num_matches_returned) {
|
||||
BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
|
||||
tesseract_->set_denorm(&denorm);
|
||||
tesseract_->AdaptiveClassifier(blob, choices, NULL);
|
||||
tesseract_->AdaptiveClassifier(blob, denorm, choices, NULL);
|
||||
BLOB_CHOICE_IT choices_it(choices);
|
||||
int& index = *num_matches_returned;
|
||||
index = 0;
|
||||
@ -1912,12 +2028,6 @@ int TessBaseAPI::NumDawgs() const {
|
||||
return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs();
|
||||
}
|
||||
|
||||
// Return the language used in the last valid initialization.
|
||||
const char* TessBaseAPI::GetLastInitLanguage() const {
|
||||
return (tesseract_ == NULL || tesseract_->lang.string() == NULL) ?
|
||||
"" : tesseract_->lang.string();
|
||||
}
|
||||
|
||||
// Return a pointer to underlying CubeRecoContext object if present.
|
||||
CubeRecoContext *TessBaseAPI::GetCubeRecoContext() const {
|
||||
return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext();
|
||||
|
119
api/baseapi.h
119
api/baseapi.h
@ -20,20 +20,25 @@
|
||||
#ifndef TESSERACT_API_BASEAPI_H__
|
||||
#define TESSERACT_API_BASEAPI_H__
|
||||
|
||||
#include <stdio.h>
|
||||
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
|
||||
// complexity of includes here. Use forward declarations wherever possible
|
||||
// and hide includes of complex types in baseapi.cpp.
|
||||
#include "apitypes.h"
|
||||
#include "genericvector.h"
|
||||
#include "thresholder.h"
|
||||
#include "unichar.h"
|
||||
#include "tesscallback.h"
|
||||
#include "publictypes.h"
|
||||
|
||||
template <typename T> class GenericVector;
|
||||
class PAGE_RES;
|
||||
class PAGE_RES_IT;
|
||||
class ParagraphModel;
|
||||
class BlamerBundle;
|
||||
class BLOCK_LIST;
|
||||
class DENORM;
|
||||
class IMAGE;
|
||||
class MATRIX;
|
||||
class PBLOB;
|
||||
class ROW;
|
||||
class STRING;
|
||||
@ -45,6 +50,12 @@ struct Boxa;
|
||||
class ETEXT_DESC;
|
||||
struct OSResults;
|
||||
class TBOX;
|
||||
class UNICHARSET;
|
||||
|
||||
// From oldlist.h
|
||||
// TODO(antonova): remove when oldlist is deprecated.
|
||||
struct list_rec;
|
||||
typedef list_rec *LIST;
|
||||
|
||||
#define MAX_NUM_INT_FEATURES 512
|
||||
struct INT_FEATURE_STRUCT;
|
||||
@ -66,19 +77,27 @@ namespace tesseract {
|
||||
class CubeRecoContext;
|
||||
class Dawg;
|
||||
class Dict;
|
||||
class EquationDetect;
|
||||
class PageIterator;
|
||||
class LTRResultIterator;
|
||||
class ResultIterator;
|
||||
class MutableIterator;
|
||||
class Tesseract;
|
||||
class Trie;
|
||||
class Wordrec;
|
||||
|
||||
typedef int (Dict::*DictFunc)(void* void_dawg_args,
|
||||
UNICHAR_ID unichar_id, bool word_end);
|
||||
UNICHAR_ID unichar_id, bool word_end) const;
|
||||
typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
|
||||
const char* context,
|
||||
int context_bytes,
|
||||
const char* character,
|
||||
int character_bytes);
|
||||
typedef TessCallback2<int, PAGE_RES *> TruthCallback;
|
||||
typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
|
||||
const LIST &best_choices,
|
||||
const UNICHARSET &unicharset,
|
||||
BlamerBundle *blamer_bundle);
|
||||
typedef TessCallback3<const UNICHARSET &, int, PAGE_RES *> TruthCallback;
|
||||
|
||||
/**
|
||||
* Base class for all tesseract APIs.
|
||||
@ -123,6 +142,7 @@ class TESSDLL_API TessBaseAPI {
|
||||
* (init variables should be passed to Init()).
|
||||
*/
|
||||
bool SetVariable(const char* name, const char* value);
|
||||
bool SetDebugVariable(const char* name, const char* value);
|
||||
|
||||
// Returns true if the parameter was found among Tesseract parameters.
|
||||
// Fills in value with the value of the parameter.
|
||||
@ -155,6 +175,16 @@ class TESSDLL_API TessBaseAPI {
|
||||
* It is entirely safe (and eventually will be efficient too) to call
|
||||
* Init multiple times on the same instance to change language, or just
|
||||
* to reset the classifier.
|
||||
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
|
||||
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
|
||||
* English. Languages may specify internally that they want to be loaded
|
||||
* with one or more other languages, so the ~ sign is available to override
|
||||
* that. Eg if hin were set to load eng by default, then hin+~eng would force
|
||||
* loading only hin. The number of loaded languages is limited only by
|
||||
* memory, with the caveat that loading additional languages will impact
|
||||
* both speed and accuracy, as there is more work to do to decide on the
|
||||
* applicable language, and there is more chance of hallucinating incorrect
|
||||
* words.
|
||||
* WARNING: On changing languages, all Tesseract parameters are reset
|
||||
* back to their default values. (Which may vary between languages.)
|
||||
* If you have a rare need to set a Variable that controls
|
||||
@ -162,12 +192,15 @@ class TESSDLL_API TessBaseAPI {
|
||||
* call End() and then use SetVariable before Init. This is only a very
|
||||
* rare use case, since there are very few uses that require any parameters
|
||||
* to be set before Init.
|
||||
*
|
||||
* If set_only_non_debug_params is true, only params that do not contain
|
||||
* "debug" in the name will be set.
|
||||
*/
|
||||
int Init(const char* datapath, const char* language, OcrEngineMode mode,
|
||||
char **configs, int configs_size,
|
||||
const GenericVector<STRING> *vars_vec,
|
||||
const GenericVector<STRING> *vars_values,
|
||||
bool set_only_init_params);
|
||||
bool set_only_non_debug_params);
|
||||
int Init(const char* datapath, const char* language, OcrEngineMode oem) {
|
||||
return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
|
||||
}
|
||||
@ -175,6 +208,19 @@ class TESSDLL_API TessBaseAPI {
|
||||
return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
|
||||
}
|
||||
|
||||
// Returns the languages string used in the last valid initialization.
|
||||
// If the last initialization specified "deu+hin" then that will be
|
||||
// returned. If hin loaded eng automatically as well, then that will
|
||||
// not be included in this list. To find the languages actually
|
||||
// loaded use GetLoadedLanguagesAsVector.
|
||||
// The returned string should NOT be deleted.
|
||||
const char* GetInitLanguagesAsString() const;
|
||||
|
||||
// Returns the loaded languages in the vector of STRINGs.
|
||||
// Includes all languages loaded by the last Init, including those loaded
|
||||
// as dependencies of other loaded languages.
|
||||
void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
|
||||
|
||||
/**
|
||||
* Init only the lang model component of Tesseract. The only functions
|
||||
* that work after this init are SetVariable and IsValidWord.
|
||||
@ -188,15 +234,14 @@ class TESSDLL_API TessBaseAPI {
|
||||
void InitForAnalysePage();
|
||||
|
||||
/**
|
||||
* Read a "config" file containing a set of variable, value pairs.
|
||||
* Read a "config" file containing a set of param, value pairs.
|
||||
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
|
||||
* and also accepts a relative or absolute path name.
|
||||
* If init_only is true, only sets the parameters marked with a special
|
||||
* INIT flag, which are typically of functional/algorithmic effect
|
||||
* rather than debug effect. Used to separate debug settings from
|
||||
* working settings.
|
||||
* Note: only non-init params will be set (init params are set by Init()).
|
||||
*/
|
||||
void ReadConfigFile(const char* filename, bool init_only);
|
||||
void ReadConfigFile(const char* filename);
|
||||
/** Same as above, but only set debug params from the given config file. */
|
||||
void ReadDebugConfigFile(const char* filename);
|
||||
|
||||
/**
|
||||
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
|
||||
@ -267,6 +312,12 @@ class TESSDLL_API TessBaseAPI {
|
||||
*/
|
||||
void SetImage(const Pix* pix);
|
||||
|
||||
/**
|
||||
* Set the resolution of the source image in pixels per inch so font size
|
||||
* information can be calculated in results. Call this after SetImage().
|
||||
*/
|
||||
void SetSourceResolution(int ppi);
|
||||
|
||||
/**
|
||||
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
|
||||
* Each SetRectangle clears the recogntion results so multiple rectangles
|
||||
@ -311,6 +362,16 @@ class TESSDLL_API TessBaseAPI {
|
||||
*/
|
||||
Boxa* GetTextlines(Pixa** pixa, int** blockids);
|
||||
|
||||
/**
|
||||
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
|
||||
* pair, in reading order. Enables downstream handling of non-rectangular
|
||||
* regions.
|
||||
* Can be called before or after Recognize.
|
||||
* If blockids is not NULL, the block-id of each line is also returned as an
|
||||
* array of one element per line. delete [] after use.
|
||||
*/
|
||||
Boxa* GetStrips(Pixa** pixa, int** blockids);
|
||||
|
||||
/**
|
||||
* Get the words as a leptonica-style
|
||||
* Boxa, Pixa pair, in reading order.
|
||||
@ -331,9 +392,17 @@ class TESSDLL_API TessBaseAPI {
|
||||
// Can be called before or after Recognize.
|
||||
// If blockids is not NULL, the block-id of each component is also returned
|
||||
// as an array of one element per component. delete [] after use.
|
||||
// If text_only is true, then only text components are returned.
|
||||
Boxa* GetComponentImages(PageIteratorLevel level,
|
||||
bool text_only,
|
||||
Pixa** pixa, int** blockids);
|
||||
|
||||
// Returns the scale factor of the thresholded image that would be returned by
|
||||
// GetThresholdedImage() and the various GetX() methods that call
|
||||
// GetComponentImages().
|
||||
// Returns 0 if no thresholder has been set.
|
||||
int GetThresholdedImageScaleFactor() const;
|
||||
|
||||
/**
|
||||
* Dump the internal binary image to a PGM file.
|
||||
* @deprecated Use GetThresholdedImage and write the image using pixWrite
|
||||
@ -403,19 +472,28 @@ class TESSDLL_API TessBaseAPI {
|
||||
const char* retry_config, int timeout_millisec,
|
||||
STRING* text_out);
|
||||
|
||||
// Get an iterator to the results of LayoutAnalysis and/or Recognize.
|
||||
// The returned iterator must be deleted after use.
|
||||
// Get a reading-order iterator to the results of LayoutAnalysis and/or
|
||||
// Recognize. The returned iterator must be deleted after use.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
ResultIterator* GetIterator();
|
||||
|
||||
// Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
|
||||
// The returned iterator must be deleted after use.
|
||||
// WARNING! This class points to data held within the TessBaseAPI class, and
|
||||
// therefore can only be used while the TessBaseAPI class still exists and
|
||||
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
|
||||
// DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
MutableIterator* GetMutableIterator();
|
||||
|
||||
/**
|
||||
* The recognized text is returned as a char* which is coded
|
||||
* as UTF8 and must be freed with the delete [] operator.
|
||||
*/
|
||||
char* GetUTF8Text();
|
||||
|
||||
/**
|
||||
* Make a HTML-formatted string with hOCR markup from the internal
|
||||
* data structures.
|
||||
@ -492,6 +570,9 @@ class TESSDLL_API TessBaseAPI {
|
||||
*/
|
||||
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
|
||||
|
||||
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
|
||||
void SetFillLatticeFunc(FillLatticeFunc f);
|
||||
|
||||
/**
|
||||
* Estimates the Orientation And Script of the image.
|
||||
* @return true if the image was processed successfully.
|
||||
@ -544,6 +625,9 @@ class TESSDLL_API TessBaseAPI {
|
||||
Tesseract* const tesseract() const {
|
||||
return tesseract_;
|
||||
}
|
||||
OcrEngineMode const oem() const {
|
||||
return last_oem_requested_;
|
||||
}
|
||||
|
||||
void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
|
||||
|
||||
@ -588,6 +672,11 @@ class TESSDLL_API TessBaseAPI {
|
||||
/** Delete the pageres and block list ready for a new page. */
|
||||
void ClearResults();
|
||||
|
||||
// Return an LTR Result Iterator -- used only for training, as we really want
|
||||
// to ignore all BiDi smarts at that point.
|
||||
// delete once you're done with it.
|
||||
LTRResultIterator* GetLTRIterator();
|
||||
|
||||
/**
|
||||
* Return the length of the output text string, as UTF8, assuming
|
||||
* one newline per line and one per block, with a terminator,
|
||||
@ -614,6 +703,10 @@ class TESSDLL_API TessBaseAPI {
|
||||
PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
|
||||
PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result);
|
||||
|
||||
//// paragraphs.cpp ////////////////////////////////////////////////////
|
||||
/** After text is recognized, break each paragraph into blocks. */
|
||||
void DetectParagraphs(int debug_level);
|
||||
|
||||
/**
|
||||
* Extract the OCR results, costs (penalty points for uncertainty),
|
||||
* and the bounding boxes of the characters.
|
||||
@ -634,7 +727,9 @@ class TESSDLL_API TessBaseAPI {
|
||||
protected:
|
||||
Tesseract* tesseract_; ///< The underlying data object.
|
||||
Tesseract* osd_tesseract_; ///< For orientation & script detection.
|
||||
EquationDetect* equ_detect_; ///<The equation detector.
|
||||
ImageThresholder* thresholder_; ///< Image thresholding module.
|
||||
GenericVector<ParagraphModel *>* paragraph_models_;
|
||||
BLOCK_LIST* block_list_; ///< The page layout.
|
||||
PAGE_RES* page_res_; ///< The page-level data.
|
||||
STRING* input_file_; ///< Name used by training code.
|
||||
|
Loading…
Reference in New Issue
Block a user