mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
added leptonica support and additional interfaces
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@135 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
e8787f3e0c
commit
d543e8c2bc
@ -19,6 +19,22 @@
|
||||
|
||||
#include "baseapi.h"
|
||||
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_LIBLEPT
|
||||
// The jpeg library still has INT32 as long, which is no good for 64 bit.
|
||||
#define INT32 WRONGINT32
|
||||
#define BOX LEPT_BOX
|
||||
// Include leptonica library only if autoconf (or makefile etc) tell us to.
|
||||
#include "allheaders.h"
|
||||
#undef BOX
|
||||
#undef INT32
|
||||
#endif
|
||||
|
||||
#include "tessedit.h"
|
||||
#include "ocrclass.h"
|
||||
#include "pageres.h"
|
||||
@ -27,20 +43,19 @@
|
||||
#include "applybox.h"
|
||||
#include "pgedit.h"
|
||||
#include "varabled.h"
|
||||
#include "variables.h"
|
||||
#include "output.h"
|
||||
#include "globals.h"
|
||||
#include "adaptmatch.h"
|
||||
#include "edgblob.h"
|
||||
#include "tessbox.h"
|
||||
#include "tordvars.h"
|
||||
#include "tessvars.h"
|
||||
#include "imgs.h"
|
||||
#include "makerow.h"
|
||||
#include "output.h"
|
||||
#include "tstruct.h"
|
||||
#include "tessout.h"
|
||||
#include "tface.h"
|
||||
#include "adaptmatch.h"
|
||||
#include "permute.h"
|
||||
|
||||
BOOL_VAR(tessedit_resegment_from_boxes, FALSE,
|
||||
"Take segmentation and labeling from box file");
|
||||
@ -52,6 +67,22 @@ const int kMinRectSize = 10;
|
||||
|
||||
static STRING input_file = "noname.tif";
|
||||
|
||||
// Set the value of an internal "variable" (of either old or new types).
|
||||
// Supply the name of the variable and the value as a string, just as
|
||||
// you would in a config file.
|
||||
// Returns false if the name lookup failed.
|
||||
bool TessBaseAPI::SetVariable(const char* variable, const char* value) {
|
||||
if (set_new_style_variable(variable, value))
|
||||
return true;
|
||||
return set_old_style_variable(variable, value);
|
||||
}
|
||||
|
||||
void TessBaseAPI::SimpleInit(const char* datapath,
|
||||
const char* language,
|
||||
bool numeric_mode) {
|
||||
InitWithLanguage(datapath, NULL, language, NULL, numeric_mode, 0, NULL);
|
||||
}
|
||||
|
||||
// Start tesseract.
|
||||
// The datapath must be the name of the data directory or some other file
|
||||
// in which the data directory resides (for instance argv[0].)
|
||||
@ -75,7 +106,7 @@ int TessBaseAPI::Init(const char* datapath, const char* outputbase,
|
||||
// Start tesseract.
|
||||
// Similar to Init() except that it is possible to specify the language.
|
||||
// Language is the code of the language for which the data will be loaded.
|
||||
// (Codes follow ISO 639-2.) If it is NULL, english (eng) will be loaded.
|
||||
// (Codes follow ISO 639-3.) If it is NULL, english (eng) will be loaded.
|
||||
int TessBaseAPI::InitWithLanguage(const char* datapath, const char* outputbase,
|
||||
const char* language, const char* configfile,
|
||||
bool numeric_mode, int argc, char* argv[]) {
|
||||
@ -192,6 +223,14 @@ void TessBaseAPI::DumpPGM(const char* filename) {
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
#ifdef HAVE_LIBLEPT
|
||||
// ONLY available if you have Leptonica installed.
|
||||
// Get a copy of the thresholded global image from Tesseract.
|
||||
Pix* TessBaseAPI::GetTesseractImage() {
|
||||
return page_image.ToPix();
|
||||
}
|
||||
#endif // HAVE_LIBLEPT
|
||||
|
||||
// Copy the given image rectangle to Tesseract, with adaptive thresholding
|
||||
// if the image is not already binary.
|
||||
void TessBaseAPI::CopyImageToTesseract(const unsigned char* imagedata,
|
||||
@ -250,6 +289,10 @@ void TessBaseAPI::OtsuThreshold(const unsigned char* imagedata,
|
||||
int H;
|
||||
int best_omega_0;
|
||||
int best_t = OtsuStats(histogram, &H, &best_omega_0);
|
||||
if (best_omega_0 == 0 || best_omega_0 == H) {
|
||||
// This channel is empty.
|
||||
continue;
|
||||
}
|
||||
// To be a convincing foreground we must have a small fraction of H
|
||||
// or to be a convincing background we must have a large fraction of H.
|
||||
// In between we assume this channel contains no thresholding information.
|
||||
@ -293,14 +336,14 @@ void TessBaseAPI::HistogramRect(const unsigned char* imagedata,
|
||||
int* histogram) {
|
||||
int width = right - left;
|
||||
memset(histogram, 0, sizeof(*histogram) * 256);
|
||||
const unsigned char* pix = imagedata +
|
||||
top*bytes_per_line +
|
||||
left*bytes_per_pixel;
|
||||
const unsigned char* pixels = imagedata +
|
||||
top*bytes_per_line +
|
||||
left*bytes_per_pixel;
|
||||
for (int y = top; y < bottom; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
++histogram[pix[x * bytes_per_pixel]];
|
||||
++histogram[pixels[x * bytes_per_pixel]];
|
||||
}
|
||||
pix += bytes_per_line;
|
||||
pixels += bytes_per_line;
|
||||
}
|
||||
}
|
||||
|
||||
@ -420,8 +463,6 @@ void TessBaseAPI::FindLines(BLOCK_LIST* block_list) {
|
||||
PAGE_RES* TessBaseAPI::Recognize(BLOCK_LIST* block_list, ETEXT_DESC* monitor) {
|
||||
if (tessedit_resegment_from_boxes)
|
||||
apply_boxes(block_list);
|
||||
if (edit_variables)
|
||||
start_variables_editor();
|
||||
|
||||
PAGE_RES* page_res = new PAGE_RES(block_list);
|
||||
if (interactive_mode) {
|
||||
@ -455,6 +496,41 @@ int TessBaseAPI::TextLength(PAGE_RES* page_res) {
|
||||
return total_length;
|
||||
}
|
||||
|
||||
// Returns an array of all word confidences, terminated by -1.
|
||||
int* TessBaseAPI::AllTextConfidences(PAGE_RES* page_res) {
|
||||
if (!page_res) return NULL;
|
||||
int n_word = 0;
|
||||
PAGE_RES_IT res_it(page_res);
|
||||
for (res_it.restart_page(); res_it.word () != NULL; res_it.forward())
|
||||
n_word++;
|
||||
|
||||
int* conf = new int[n_word+1];
|
||||
n_word = 0;
|
||||
for (res_it.restart_page(); res_it.word () != NULL; res_it.forward()) {
|
||||
WERD_RES *word = res_it.word();
|
||||
WERD_CHOICE* choice = word->best_choice;
|
||||
int w_conf = static_cast<int>(100 + 5 * choice->certainty());
|
||||
// This is the eq for converting Tesseract confidence to 1..100
|
||||
if (w_conf < 0) w_conf = 0;
|
||||
if (w_conf > 100) w_conf = 100;
|
||||
conf[n_word++] = w_conf;
|
||||
}
|
||||
conf[n_word] = -1;
|
||||
return conf;
|
||||
}
|
||||
|
||||
// Returns the average word confidence for Tesseract page result.
|
||||
int TessBaseAPI::TextConf(PAGE_RES* page_res) {
|
||||
int* conf = AllTextConfidences(page_res);
|
||||
if (!conf) return 0;
|
||||
int sum = 0;
|
||||
int *pt = conf;
|
||||
while (*pt >= 0) sum += *pt++;
|
||||
if (pt != conf) sum /= pt - conf;
|
||||
delete [] conf;
|
||||
return sum;
|
||||
}
|
||||
|
||||
// Make a text string from the internal data structures.
|
||||
// The input page_res is deleted.
|
||||
char* TessBaseAPI::TesseractToText(PAGE_RES* page_res) {
|
||||
@ -686,7 +762,6 @@ char* TessBaseAPI::TesseractToUNLV(PAGE_RES* page_res) {
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// ____________________________________________________________________________
|
||||
// Ocropus add-ons.
|
||||
|
||||
@ -1009,3 +1084,9 @@ int TessBaseAPI::TesseractExtractResult(char** string,
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
// Check whether a word is valid according to Tesseract's language model
|
||||
// returns 0 if the string is invalid, non-zero if valid
|
||||
int TessBaseAPI::IsValidWord(const char *string) {
|
||||
return valid_word(string);
|
||||
}
|
||||
|
@ -22,6 +22,8 @@
|
||||
|
||||
class PAGE_RES;
|
||||
class BLOCK_LIST;
|
||||
class IMAGE;
|
||||
struct Pix;
|
||||
|
||||
// Base class for all tesseract APIs.
|
||||
// Specific classes can add ability to work on different inputs or produce
|
||||
@ -29,7 +31,22 @@ class BLOCK_LIST;
|
||||
|
||||
class TessBaseAPI {
|
||||
public:
|
||||
// Set the value of an internal "variable" (of either old or new types).
|
||||
// Supply the name of the variable and the value as a string, just as
|
||||
// you would in a config file.
|
||||
// Returns false if the name lookup failed.
|
||||
// For most variables, it is wise to set them before calling Init.
|
||||
// Eg TessBaseAPI::SetVariable("tessedit_char_blacklist", "xyz");
|
||||
static bool SetVariable(const char* variable, const char* value);
|
||||
|
||||
// Start tesseract.
|
||||
// TODO(???): Make tesseract thread-safe, and then the init functions will
|
||||
// return an instance of tesseract, and most of the other methods will become
|
||||
// regular methods.
|
||||
static void SimpleInit(const char* datapath, // Path to tessdata-no ending /.
|
||||
const char* language, // ISO 639-3 string or NULL.
|
||||
bool numeric_mode);
|
||||
|
||||
// The datapath must be the name of the data directory or some other file
|
||||
// in which the data directory resides (for instance argv[0].)
|
||||
// The configfile is the name of a file in the tessconfigs directory
|
||||
@ -49,7 +66,7 @@ class TessBaseAPI {
|
||||
// Start tesseract.
|
||||
// Similar to Init() except that it is possible to specify the language.
|
||||
// Language is the code of the language for which the data will be loaded.
|
||||
// (Codes follow ISO 639-2.) If it is NULL, english (eng) will be loaded.
|
||||
// (Codes follow ISO 639-3.) If it is NULL, english (eng) will be loaded.
|
||||
static int InitWithLanguage(const char* datapath, const char* outputbase,
|
||||
const char* language, const char* configfile,
|
||||
bool numeric_mode, int argc, char* argv[]);
|
||||
@ -97,6 +114,22 @@ class TessBaseAPI {
|
||||
// Dump the internal binary image to a PGM file.
|
||||
static void DumpPGM(const char* filename);
|
||||
|
||||
// Get a copy of the thresholded global image from Tesseract.
|
||||
// Caller takes ownership of the Pix and must pixDestroy it.
|
||||
// May be called before or after RecognizeText, or after TesseractRect.
|
||||
static Pix* GetTesseractImage();
|
||||
|
||||
// Compute the Otsu threshold(s) for the given histogram.
|
||||
// Also returns H = total count in histogram, and
|
||||
// omega0 = count of histogram below threshold.
|
||||
static int OtsuStats(const int* histogram,
|
||||
int* H_out,
|
||||
int* omega0_out);
|
||||
|
||||
// Check whether a word is valid according to Tesseract's language model
|
||||
// returns 0 if the string is invalid, non-zero if valid
|
||||
static int IsValidWord(const char *string);
|
||||
|
||||
protected:
|
||||
// Copy the given image rectangle to Tesseract, with adaptive thresholding
|
||||
// if the image is not already binary.
|
||||
@ -132,13 +165,6 @@ class TessBaseAPI {
|
||||
int left, int top, int right, int bottom,
|
||||
int* histogram);
|
||||
|
||||
// Compute the Otsu threshold(s) for the given histogram.
|
||||
// Also returns H = total count in histogram, and
|
||||
// omega0 = count of histogram below threshold.
|
||||
static int OtsuStats(const int* histogram,
|
||||
int* H_out,
|
||||
int* omega0_out);
|
||||
|
||||
// Threshold the given grey or color image into the tesseract global
|
||||
// image ready for recognition. Requires thresholds and hi_value
|
||||
// produced by OtsuThreshold above.
|
||||
@ -170,6 +196,12 @@ class TessBaseAPI {
|
||||
|
||||
// Return the maximum length that the output text string might occupy.
|
||||
static int TextLength(PAGE_RES* page_res);
|
||||
// Returns the (average) confidence value between 0 and 100.
|
||||
// The input page_res is NOT deleted.
|
||||
static int TextConf(PAGE_RES* page_res);
|
||||
// Returns all word confidences (between 0 and 100) in an array, terminated
|
||||
// by -1. The calling function must delete [] after use.
|
||||
static int* AllTextConfidences(PAGE_RES* page_res);
|
||||
// Convert (and free) the internal data structures into a text string.
|
||||
static char* TesseractToText(PAGE_RES* page_res);
|
||||
// Make a text string from the internal data structures.
|
||||
@ -226,7 +258,7 @@ class TessBaseAPI {
|
||||
// The segdata values are RGB triples, with distinct R values denoting distinct
|
||||
// "blocks" or "columns" and distinct GB pairs denoting distinct lines.
|
||||
// Lexicographic ordering of the RGB triples corresponds to text output order, with
|
||||
// linebreaks inserted between distinct GB values and double blank lines between
|
||||
// linebreaks inserted between distinct GB values and double blank lines between
|
||||
// distinct R values.
|
||||
static int SetPageSegmentation(const unsigned char *segdata,
|
||||
int bytes_per_pixel, /* must be 3 */
|
||||
|
Loading…
Reference in New Issue
Block a user