Format API header files

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2019-11-01 23:09:39 +01:00
parent 29dcfb081a
commit 90db9b5224
13 changed files with 455 additions and 500 deletions

View File

@ -20,11 +20,13 @@
#define TESSERACT_API_BASEAPI_H_
#include <cstdio>
#include <functional> // for std::function
#include <functional> // for std::function
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include <tesseract/version.h>
#include "apitypes.h"
#include "pageiterator.h"
#include "platform.h"
@ -33,9 +35,9 @@
#include "serialis.h"
#include "thresholder.h"
#include "unichar.h"
#include <tesseract/version.h>
template <typename T> class GenericVector;
template <typename T>
class GenericVector;
class PAGE_RES;
class PAGE_RES_IT;
class ParagraphModel;
@ -57,7 +59,7 @@ class UNICHARSET;
class WERD_CHOICE_LIST;
struct INT_FEATURE_STRUCT;
using INT_FEATURE = INT_FEATURE_STRUCT *;
using INT_FEATURE = INT_FEATURE_STRUCT*;
struct TBLOB;
namespace tesseract {
@ -74,11 +76,16 @@ class Tesseract;
class Trie;
class Wordrec;
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const;
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *, int, const char *, int);
using ParamsModelClassifyFunc = float (Dict::*)(const char *, void *);
using FillLatticeFunc = void (Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *);
using TruthCallback = std::function<void(const UNICHARSET&, int, PageIterator*, Pix*)>;
using DictFunc = int (Dict::*)(void*, const UNICHARSET&, UNICHAR_ID,
bool) const;
using ProbabilityInContextFunc = double (Dict::*)(const char*, const char*, int,
const char*, int);
using ParamsModelClassifyFunc = float (Dict::*)(const char*, void*);
using FillLatticeFunc = void (Wordrec::*)(const MATRIX&,
const WERD_CHOICE_LIST&,
const UNICHARSET&, BlamerBundle*);
using TruthCallback =
std::function<void(const UNICHARSET&, int, PageIterator*, Pix*)>;
/**
* Base class for all tesseract APIs.
@ -108,7 +115,7 @@ class TESS_API TessBaseAPI {
* and returns sizeof(cl_device_id)
* otherwise *device=nullptr and returns 0.
*/
static size_t getOpenCLDevice(void **device);
static size_t getOpenCLDevice(void** device);
/**
* Set the name of the input file. Needed for training and
@ -124,7 +131,7 @@ class TESS_API TessBaseAPI {
*/
const char* GetInputName();
// Takes ownership of the input pix.
void SetInputImage(Pix *pix);
void SetInputImage(Pix* pix);
Pix* GetInputImage();
int GetSourceYResolution();
const char* GetDatapath();
@ -152,25 +159,25 @@ class TESS_API TessBaseAPI {
* Returns true if the parameter was found among Tesseract parameters.
* Fills in value with the value of the parameter.
*/
bool GetIntVariable(const char *name, int *value) const;
bool GetBoolVariable(const char *name, bool *value) const;
bool GetDoubleVariable(const char *name, double *value) const;
bool GetIntVariable(const char* name, int* value) const;
bool GetBoolVariable(const char* name, bool* value) const;
bool GetDoubleVariable(const char* name, double* value) const;
/**
* Returns the pointer to the string that represents the value of the
* parameter if it was found among Tesseract parameters.
*/
const char *GetStringVariable(const char *name) const;
const char* GetStringVariable(const char* name) const;
/**
* Print Tesseract parameters to the given file.
*/
void PrintVariables(FILE *fp) const;
void PrintVariables(FILE* fp) const;
/**
* Get value of named variable as a string, if it exists.
*/
bool GetVariableAsString(const char *name, STRING *val);
bool GetVariableAsString(const char* name, STRING* val);
/**
* Instances are now mostly thread-safe and totally independent,
@ -184,8 +191,8 @@ class TESS_API TessBaseAPI {
* listed above here in the class definition.
*
* The datapath must be the name of the tessdata directory.
* The language is (usually) an ISO 639-3 string or nullptr will default to eng.
* It is entirely safe (and eventually will be efficient too) to call
* The language is (usually) an ISO 639-3 string or nullptr will default to
* eng. It is entirely safe (and eventually will be efficient too) to call
* Init multiple times on the same instance to change language, or just
* to reset the classifier.
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
@ -210,15 +217,16 @@ class TESS_API TessBaseAPI {
* "debug" in the name will be set.
*/
int Init(const char* datapath, const char* language, OcrEngineMode mode,
char **configs, int configs_size,
const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values,
char** configs, int configs_size,
const GenericVector<STRING>* vars_vec,
const GenericVector<STRING>* vars_values,
bool set_only_non_debug_params);
int Init(const char* datapath, const char* language, OcrEngineMode oem) {
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
}
int Init(const char* datapath, const char* language) {
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
false);
}
// In-memory version reads the traineddata file directly from the given
// data[data_size] array, and/or reads data via a FileReader.
@ -301,9 +309,9 @@ class TESS_API TessBaseAPI {
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
* and one or more of the Get*Text functions below.
*/
char* TesseractRect(const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
char* TesseractRect(const unsigned char* imagedata, int bytes_per_pixel,
int bytes_per_line, int left, int top, int width,
int height);
/**
* Call between pages or documents etc to free up memory and forget
@ -317,7 +325,7 @@ class TESS_API TessBaseAPI {
* get hold of the thresholded image, get the text in different formats,
* get bounding boxes, confidences etc.
*/
/* @{ */
/* @{ */
/**
* Provide an image for Tesseract to recognize. Format is as
@ -385,13 +393,13 @@ class TESS_API TessBaseAPI {
* Can be called before or after Recognize.
* If raw_image is true, then extract from the original image instead of the
* thresholded image and pad by raw_padding pixels.
* If blockids is not nullptr, the block-id of each line is also returned as an
* array of one element per line. delete [] after use.
* If paraids is not nullptr, the paragraph-id of each line within its block is
* also returned as an array of one element per line. delete [] after use.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use. If paraids is not
* nullptr, the paragraph-id of each line within its block is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa* GetTextlines(bool raw_image, int raw_padding,
Pixa** pixa, int** blockids, int** paraids);
Boxa* GetTextlines(bool raw_image, int raw_padding, Pixa** pixa,
int** blockids, int** paraids);
/*
Helper method to extract from the thresholded image. (most common usage)
*/
@ -404,8 +412,8 @@ class TESS_API TessBaseAPI {
* pair, in reading order. Enables downstream handling of non-rectangular
* regions.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each line is also returned as an
* array of one element per line. delete [] after use.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa* GetStrips(Pixa** pixa, int** blockids);
@ -432,22 +440,20 @@ class TESS_API TessBaseAPI {
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each component is also returned
* as an array of one element per component. delete [] after use.
* If blockids is not nullptr, the paragraph-id of each component with its block
* is also returned as an array of one element per component. delete [] after
* use.
* If raw_image is true, then portions of the original image are extracted
* instead of the thresholded image and padded with raw_padding.
* If text_only is true, then only text components are returned.
* If blockids is not nullptr, the paragraph-id of each component with its
* block is also returned as an array of one element per component. delete []
* after use. If raw_image is true, then portions of the original image are
* extracted instead of the thresholded image and padded with raw_padding. If
* text_only is true, then only text components are returned.
*/
Boxa* GetComponentImages(PageIteratorLevel level,
bool text_only, bool raw_image,
int raw_padding,
Pixa** pixa, int** blockids, int** paraids);
Boxa* GetComponentImages(PageIteratorLevel level, bool text_only,
bool raw_image, int raw_padding, Pixa** pixa,
int** blockids, int** paraids);
// Helper function to get binary images with no padding (most common usage).
Boxa* GetComponentImages(const PageIteratorLevel level,
const bool text_only,
Boxa* GetComponentImages(const PageIteratorLevel level, const bool text_only,
Pixa** pixa, int** blockids) {
return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
nullptr);
}
/**
@ -489,10 +495,10 @@ class TESS_API TessBaseAPI {
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
*/
#ifndef DISABLED_LEGACY_ENGINE
#ifndef DISABLED_LEGACY_ENGINE
/** Variant on Recognize used for testing chopper. */
int RecognizeForChopTest(ETEXT_DESC* monitor);
#endif
#endif
/**
* Turns images into symbolic text.
@ -586,7 +592,6 @@ class TESS_API TessBaseAPI {
*/
char* GetAltoText(ETEXT_DESC* monitor, int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
@ -706,10 +711,9 @@ class TESS_API TessBaseAPI {
* @warning temporary! This function will be removed from here and placed
* in a separate API at some future time.
*/
int IsValidWord(const char *word);
int IsValidWord(const char* word);
// Returns true if utf8_character is defined in the UniCharset.
bool IsValidCharacter(const char *utf8_character);
bool IsValidCharacter(const char* utf8_character);
bool GetTextDirection(int* out_offset, float* out_slope);
@ -734,8 +738,7 @@ class TESS_API TessBaseAPI {
void GetBlockTextOrientations(int** block_orientation,
bool** vertical_writing);
#ifndef DISABLED_LEGACY_ENGINE
#ifndef DISABLED_LEGACY_ENGINE
/** Sets Wordrec::fill_lattice_ function to point to the given function. */
void SetFillLatticeFunc(FillLatticeFunc f);
@ -751,18 +754,18 @@ class TESS_API TessBaseAPI {
static void DeleteBlockList(BLOCK_LIST* block_list);
/** Returns a ROW object created from the input row specification. */
static ROW *MakeTessOCRRow(float baseline, float xheight,
float descender, float ascender);
static ROW* MakeTessOCRRow(float baseline, float xheight, float descender,
float ascender);
/** Returns a TBLOB corresponding to the entire input image. */
static TBLOB *MakeTBLOB(Pix *pix);
static TBLOB* MakeTBLOB(Pix* pix);
/**
* This method baseline normalizes a TBLOB in-place. The input row is used
* for normalization. The denorm is an optional parameter in which the
* normalization-antidote is returned.
*/
static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
static void NormalizeTBLOB(TBLOB* tblob, ROW* row, bool numeric_mode);
/** This method returns the features associated with the input image. */
void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
@ -772,41 +775,44 @@ class TESS_API TessBaseAPI {
* This method returns the row to which a box of specified dimensions would
* belong. If no good match is found, it returns nullptr.
*/
static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
int right, int bottom);
static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, int right,
int bottom);
/**
* Method to run adaptive classifier on a blob.
* It returns at max num_max_matches results.
*/
void RunAdaptiveClassifier(TBLOB* blob,
int num_max_matches,
int* unichar_ids,
float* ratings,
int* num_matches_returned);
void RunAdaptiveClassifier(TBLOB* blob, int num_max_matches, int* unichar_ids,
float* ratings, int* num_matches_returned);
#endif // ndef DISABLED_LEGACY_ENGINE
/** This method returns the string form of the specified unichar. */
const char* GetUnichar(int unichar_id);
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
const Dawg *GetDawg(int i) const;
const Dawg* GetDawg(int i) const;
/** Return the number of dawgs loaded into tesseract_ object. */
int NumDawgs() const;
Tesseract* tesseract() const { return tesseract_; }
Tesseract* tesseract() const {
return tesseract_;
}
OcrEngineMode oem() const { return last_oem_requested_; }
OcrEngineMode oem() const {
return last_oem_requested_;
}
void InitTruthCallback(TruthCallback cb) { truth_cb_ = cb; }
void InitTruthCallback(TruthCallback cb) {
truth_cb_ = cb;
}
void set_min_orientation_margin(double margin);
/* @} */
/* @} */
protected:
/** Common code for setting the image. Returns true if Init has been called. */
/** Common code for setting the image. Returns true if Init has been called.
*/
TESS_LOCAL bool InternalSetImage();
/**
@ -842,7 +848,7 @@ class TESS_API TessBaseAPI {
//// paragraphs.cpp ////////////////////////////////////////////////////
TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
#ifndef DISABLED_LEGACY_ENGINE
#ifndef DISABLED_LEGACY_ENGINE
/** @defgroup ocropusAddOns ocropus add-ons */
/* @{ */
@ -851,12 +857,9 @@ class TESS_API TessBaseAPI {
* Adapt to recognize the current image as the given character.
* The image must be preloaded and be just an image of a single character.
*/
TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
int length,
float baseline,
float xheight,
float descender,
float ascender);
TESS_LOCAL void AdaptToCharacter(const char* unichar_repr, int length,
float baseline, float xheight,
float descender, float ascender);
/** Recognize text doing one pass only, using settings for a given pass. */
TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
@ -868,35 +871,33 @@ class TESS_API TessBaseAPI {
* Extract the OCR results, costs (penalty points for uncertainty),
* and the bounding boxes of the characters.
*/
TESS_LOCAL static int TesseractExtractResult(char** text,
int** lengths,
float** costs,
int** x0,
int** y0,
int** x1,
int** y1,
PAGE_RES* page_res);
TESS_LOCAL static int TesseractExtractResult(char** text, int** lengths,
float** costs, int** x0,
int** y0, int** x1, int** y1,
PAGE_RES* page_res);
TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
TESS_LOCAL const PAGE_RES* GetPageRes() const {
return page_res_;
}
/* @} */
#endif // ndef DISABLED_LEGACY_ENGINE
protected:
Tesseract* tesseract_; ///< The underlying data object.
Tesseract* osd_tesseract_; ///< For orientation & script detection.
EquationDetect* equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder* thresholder_; ///< Image thresholding module.
GenericVector<ParagraphModel *>* paragraph_models_;
BLOCK_LIST* block_list_; ///< The page layout.
PAGE_RES* page_res_; ///< The page-level data.
STRING* input_file_; ///< Name used by training code.
STRING* output_file_; ///< Name used by debug code.
STRING* datapath_; ///< Current location of tessdata.
STRING* language_; ///< Last initialized language.
Tesseract* tesseract_; ///< The underlying data object.
Tesseract* osd_tesseract_; ///< For orientation & script detection.
EquationDetect* equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder* thresholder_; ///< Image thresholding module.
GenericVector<ParagraphModel*>* paragraph_models_;
BLOCK_LIST* block_list_; ///< The page layout.
PAGE_RES* page_res_; ///< The page-level data.
STRING* input_file_; ///< Name used by training code.
STRING* output_file_; ///< Name used by debug code.
STRING* datapath_; ///< Current location of tessdata.
STRING* language_; ///< Last initialized language.
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
bool recognition_done_; ///< page_res_ contains recognition data.
TruthCallback truth_cb_; ///< fxn for setting truth_* in WERD_RES
bool recognition_done_; ///< page_res_ contains recognition data.
TruthCallback truth_cb_; ///< fxn for setting truth_* in WERD_RES
/**
* @defgroup ThresholderParams Thresholder Parameters
@ -913,16 +914,12 @@ class TESS_API TessBaseAPI {
private:
// A list of image filenames gets special consideration
bool ProcessPagesFileList(FILE *fp,
STRING *buf,
const char* retry_config, int timeout_millisec,
TessResultRenderer* renderer,
bool ProcessPagesFileList(FILE* fp, STRING* buf, const char* retry_config,
int timeout_millisec, TessResultRenderer* renderer,
int tessedit_page_number);
// TIFF supports multipage so gets special consideration.
bool ProcessPagesMultipageTiff(const unsigned char *data,
size_t size,
const char* filename,
const char* retry_config,
bool ProcessPagesMultipageTiff(const unsigned char* data, size_t size,
const char* filename, const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer,
int tessedit_page_number);

View File

@ -31,6 +31,7 @@
#else
# include <stdbool.h>
# include <stdio.h>
# include "platform.h"
#endif
@ -176,43 +177,34 @@ TESS_API void TessDeleteTextArray(char** arr);
TESS_API void TessDeleteIntArray(const int* arr);
/* Renderer API */
TESS_API TessResultRenderer*
TessTextRendererCreate(const char* outputbase);
TESS_API TessResultRenderer*
TessHOcrRendererCreate(const char* outputbase);
TESS_API TessResultRenderer*
TessHOcrRendererCreate2(const char* outputbase, BOOL font_info);
TESS_API TessResultRenderer*
TessAltoRendererCreate(const char* outputbase);
TESS_API TessResultRenderer*
TessTsvRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessPDFRendererCreate(
const char* outputbase, const char* datadir, BOOL textonly);
TESS_API TessResultRenderer*
TessUnlvRendererCreate(const char* outputbase);
TESS_API TessResultRenderer*
TessBoxTextRendererCreate(const char* outputbase);
TESS_API TessResultRenderer*
TessLSTMBoxRendererCreate(const char* outputbase);
TESS_API TessResultRenderer*
TessWordStrBoxRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessTextRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessHOcrRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessHOcrRendererCreate2(const char* outputbase,
BOOL font_info);
TESS_API TessResultRenderer* TessAltoRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessTsvRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessPDFRendererCreate(const char* outputbase,
const char* datadir,
BOOL textonly);
TESS_API TessResultRenderer* TessUnlvRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessBoxTextRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessLSTMBoxRendererCreate(const char* outputbase);
TESS_API TessResultRenderer* TessWordStrBoxRendererCreate(
const char* outputbase);
TESS_API void TessDeleteResultRenderer(TessResultRenderer* renderer);
TESS_API void TessResultRendererInsert(TessResultRenderer* renderer,
TessResultRenderer* next);
TESS_API TessResultRenderer*
TessResultRendererNext(TessResultRenderer* renderer);
TESS_API BOOL TessResultRendererBeginDocument(
TessResultRenderer* renderer, const char* title);
TessResultRenderer* next);
TESS_API TessResultRenderer* TessResultRendererNext(
TessResultRenderer* renderer);
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer* renderer,
const char* title);
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer* renderer,
TessBaseAPI* api);
TESS_API BOOL
TessResultRendererEndDocument(TessResultRenderer* renderer);
TessBaseAPI* api);
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer* renderer);
TESS_API const char*
TessResultRendererExtention(TessResultRenderer* renderer);
TESS_API const char*
TessResultRendererTitle(TessResultRenderer* renderer);
TESS_API const char* TessResultRendererExtention(TessResultRenderer* renderer);
TESS_API const char* TessResultRendererTitle(TessResultRenderer* renderer);
TESS_API int TessResultRendererImageNum(TessResultRenderer* renderer);
/* Base API */
@ -220,198 +212,170 @@ TESS_API int TessResultRendererImageNum(TessResultRenderer* renderer);
TESS_API TessBaseAPI* TessBaseAPICreate();
TESS_API void TessBaseAPIDelete(TessBaseAPI* handle);
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle,
void** device);
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void** device);
TESS_API void TessBaseAPISetInputName(TessBaseAPI* handle,
const char* name);
TESS_API void TessBaseAPISetInputName(TessBaseAPI* handle, const char* name);
TESS_API const char* TessBaseAPIGetInputName(TessBaseAPI* handle);
TESS_API void TessBaseAPISetInputImage(TessBaseAPI* handle,
struct Pix* pix);
TESS_API void TessBaseAPISetInputImage(TessBaseAPI* handle, struct Pix* pix);
TESS_API struct Pix* TessBaseAPIGetInputImage(TessBaseAPI* handle);
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI* handle);
TESS_API const char* TessBaseAPIGetDatapath(TessBaseAPI* handle);
TESS_API void TessBaseAPISetOutputName(TessBaseAPI* handle,
const char* name);
TESS_API void TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name);
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI* handle,
const char* name,
const char* value);
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI* handle,
const char* name,
const char* value);
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name,
const char* value);
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name,
const char* value);
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI* handle,
const char* name, int* value);
const char* name, int* value);
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle,
const char* name,
BOOL* value);
const char* name, BOOL* value);
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle,
const char* name,
double* value);
TESS_API const char*
TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name);
const char* name, double* value);
TESS_API const char* TessBaseAPIGetStringVariable(const TessBaseAPI* handle,
const char* name);
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI* handle,
FILE* fp);
TESS_API BOOL TessBaseAPIPrintVariablesToFile(
const TessBaseAPI* handle, const char* filename);
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp);
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle,
const char* filename);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API BOOL TessBaseAPIGetVariableAsString(TessBaseAPI* handle,
const char* name,
STRING* val);
const char* name, STRING* val);
TESS_API int TessBaseAPIInit(
TessBaseAPI* handle, const char* datapath, const char* language,
TessOcrEngineMode mode, char** configs, int configs_size,
const STRING* vars_vec, size_t vars_vec_size, const STRING* vars_values,
size_t vars_values_size, BOOL set_only_init_params);
TESS_API int TessBaseAPIInit(TessBaseAPI* handle, const char* datapath,
const char* language, TessOcrEngineMode mode,
char** configs, int configs_size,
const STRING* vars_vec, size_t vars_vec_size,
const STRING* vars_values, size_t vars_values_size,
BOOL set_only_init_params);
#endif // def TESS_CAPI_INCLUDE_BASEAPI
TESS_API int TessBaseAPIInit1(TessBaseAPI* handle,
const char* datapath,
const char* language,
TessOcrEngineMode oem, char** configs,
int configs_size);
TESS_API int TessBaseAPIInit2(TessBaseAPI* handle,
const char* datapath,
const char* language,
TessOcrEngineMode oem);
TESS_API int TessBaseAPIInit3(TessBaseAPI* handle,
const char* datapath,
const char* language);
TESS_API int TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath,
const char* language, TessOcrEngineMode oem,
char** configs, int configs_size);
TESS_API int TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath,
const char* language, TessOcrEngineMode oem);
TESS_API int TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath,
const char* language);
TESS_API int TessBaseAPIInit4(
TessBaseAPI* handle, const char* datapath, const char* language,
TessOcrEngineMode mode, char** configs, int configs_size, char** vars_vec,
char** vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params);
TESS_API int TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath,
const char* language, TessOcrEngineMode mode,
char** configs, int configs_size, char** vars_vec,
char** vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API const char*
TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle);
TESS_API char**
TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle);
TESS_API char**
TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle);
TESS_API const char* TessBaseAPIGetInitLanguagesAsString(
const TessBaseAPI* handle);
TESS_API char** TessBaseAPIGetLoadedLanguagesAsVector(
const TessBaseAPI* handle);
TESS_API char** TessBaseAPIGetAvailableLanguagesAsVector(
const TessBaseAPI* handle);
TESS_API int TessBaseAPIInitLangMod(TessBaseAPI* handle,
const char* datapath,
const char* language);
TESS_API int TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath,
const char* language);
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI* handle);
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI* handle,
const char* filename);
const char* filename);
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle,
const char* filename);
const char* filename);
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI* handle,
TessPageSegMode mode);
TESS_API TessPageSegMode
TessBaseAPIGetPageSegMode(const TessBaseAPI* handle);
TessPageSegMode mode);
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI* handle);
TESS_API char* TessBaseAPIRect(TessBaseAPI* handle,
const unsigned char* imagedata,
int bytes_per_pixel,
int bytes_per_line, int left, int top,
int width, int height);
const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle);
TESS_API void TessBaseAPISetImage(TessBaseAPI* handle,
const unsigned char* imagedata,
int width, int height,
int bytes_per_pixel,
int bytes_per_line);
TESS_API void TessBaseAPISetImage2(TessBaseAPI* handle,
struct Pix* pix);
const unsigned char* imagedata, int width,
int height, int bytes_per_pixel,
int bytes_per_line);
TESS_API void TessBaseAPISetImage2(TessBaseAPI* handle, struct Pix* pix);
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI* handle,
int ppi);
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi);
TESS_API void TessBaseAPISetRectangle(TessBaseAPI* handle, int left,
int top, int width, int height);
TESS_API void TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top,
int width, int height);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API void TessBaseAPISetThresholder(
TessBaseAPI* handle, TessImageThresholder* thresholder);
TESS_API void TessBaseAPISetThresholder(TessBaseAPI* handle,
TessImageThresholder* thresholder);
#endif
TESS_API struct Pix*
TessBaseAPIGetThresholdedImage(TessBaseAPI* handle);
TESS_API struct Pix* TessBaseAPIGetThresholdedImage(TessBaseAPI* handle);
TESS_API struct Boxa* TessBaseAPIGetRegions(TessBaseAPI* handle,
struct Pixa** pixa);
struct Pixa** pixa);
TESS_API struct Boxa* TessBaseAPIGetTextlines(TessBaseAPI* handle,
struct Pixa** pixa,
int** blockids);
TESS_API struct Boxa*
TessBaseAPIGetTextlines1(TessBaseAPI* handle, BOOL raw_image, int raw_padding,
struct Pixa** pixa, int** blockids, int** paraids);
struct Pixa** pixa,
int** blockids);
TESS_API struct Boxa* TessBaseAPIGetTextlines1(TessBaseAPI* handle,
BOOL raw_image, int raw_padding,
struct Pixa** pixa,
int** blockids, int** paraids);
TESS_API struct Boxa* TessBaseAPIGetStrips(TessBaseAPI* handle,
struct Pixa** pixa,
int** blockids);
struct Pixa** pixa, int** blockids);
TESS_API struct Boxa* TessBaseAPIGetWords(TessBaseAPI* handle,
struct Pixa** pixa);
TESS_API struct Boxa*
TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc);
TESS_API struct Boxa* TessBaseAPIGetComponentImages(
TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only,
struct Pixa** pixa, int** blockids);
struct Pixa** pixa);
TESS_API struct Boxa* TessBaseAPIGetConnectedComponents(TessBaseAPI* handle,
struct Pixa** cc);
TESS_API struct Boxa* TessBaseAPIGetComponentImages(TessBaseAPI* handle,
TessPageIteratorLevel level,
BOOL text_only,
struct Pixa** pixa,
int** blockids);
TESS_API struct Boxa* TessBaseAPIGetComponentImages1(
TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only,
BOOL raw_image, int raw_padding, struct Pixa** pixa, int** blockids,
int** paraids);
TESS_API int
TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle);
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
const TessBaseAPI* handle);
TESS_API TessPageIterator*
TessBaseAPIAnalyseLayout(TessBaseAPI* handle);
TESS_API TessPageIterator* TessBaseAPIAnalyseLayout(TessBaseAPI* handle);
TESS_API int TessBaseAPIRecognize(TessBaseAPI* handle,
ETEXT_DESC* monitor);
TESS_API int TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API int TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle,
ETEXT_DESC* monitor);
ETEXT_DESC* monitor);
#endif
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI* handle,
const char* filename,
const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer);
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI* handle,
struct Pix* pix, int page_index,
const char* filename,
const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer);
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename,
const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer);
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix,
int page_index, const char* filename,
const char* retry_config,
int timeout_millisec,
TessResultRenderer* renderer);
TESS_API TessResultIterator*
TessBaseAPIGetIterator(TessBaseAPI* handle);
TESS_API TessMutableIterator*
TessBaseAPIGetMutableIterator(TessBaseAPI* handle);
TESS_API TessResultIterator* TessBaseAPIGetIterator(TessBaseAPI* handle);
TESS_API TessMutableIterator* TessBaseAPIGetMutableIterator(
TessBaseAPI* handle);
TESS_API char* TessBaseAPIGetUTF8Text(TessBaseAPI* handle);
TESS_API char* TessBaseAPIGetHOCRText(TessBaseAPI* handle,
int page_number);
TESS_API char* TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number);
TESS_API char* TessBaseAPIGetAltoText(TessBaseAPI* handle,
int page_number);
TESS_API char* TessBaseAPIGetTsvText(TessBaseAPI* handle,
int page_number);
TESS_API char* TessBaseAPIGetAltoText(TessBaseAPI* handle, int page_number);
TESS_API char* TessBaseAPIGetTsvText(TessBaseAPI* handle, int page_number);
TESS_API char* TessBaseAPIGetBoxText(TessBaseAPI* handle,
int page_number);
TESS_API char* TessBaseAPIGetLSTMBoxText(TessBaseAPI* handle,
int page_number);
TESS_API char* TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number);
TESS_API char* TessBaseAPIGetLSTMBoxText(TessBaseAPI* handle, int page_number);
TESS_API char* TessBaseAPIGetWordStrBoxText(TessBaseAPI* handle,
int page_number);
int page_number);
TESS_API char* TessBaseAPIGetUNLVText(TessBaseAPI* handle);
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI* handle);
@ -420,23 +384,20 @@ TESS_API int* TessBaseAPIAllWordConfidences(TessBaseAPI* handle);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle,
TessPageSegMode mode,
const char* wordstr);
TessPageSegMode mode,
const char* wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPIClear(TessBaseAPI* handle);
TESS_API void TessBaseAPIEnd(TessBaseAPI* handle);
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI* handle,
const char* word);
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI* handle,
int* out_offset,
float* out_slope);
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI* handle, const char* word);
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset,
float* out_slope);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API void TessBaseAPISetDictFunc(TessBaseAPI* handle,
TessDictFunc f);
TESS_API void TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f);
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI* handle);
@ -445,32 +406,33 @@ TESS_API void TessBaseAPISetProbabilityInContextFunc(
// Call TessDeleteText(*best_script_name) to free memory allocated by this
// function
TESS_API BOOL TessBaseAPIDetectOrientationScript(
TessBaseAPI* handle, int* orient_deg, float* orient_conf,
const char** script_name, float* script_conf);
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle,
int* orient_deg,
float* orient_conf,
const char** script_name,
float* script_conf);
#endif // def TESS_CAPI_INCLUDE_BASEAPI
TESS_API const char* TessBaseAPIGetUnichar(TessBaseAPI* handle,
int unichar_id);
TESS_API const char* TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id);
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle,
double margin);
double margin);
#ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API const TessDawg* TessBaseAPIGetDawg(const TessBaseAPI* handle,
int i);
TESS_API const TessDawg* TessBaseAPIGetDawg(const TessBaseAPI* handle, int i);
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI* handle);
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI* handle);
TESS_API void TessBaseAPIInitTruthCallback(TessBaseAPI* handle,
TessTruthCallback cb);
TessTruthCallback cb);
TESS_API void TessBaseGetBlockTextOrientations(
TessBaseAPI* handle, int** block_orientation, bool** vertical_writing);
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI* handle,
int** block_orientation,
bool** vertical_writing);
#endif
@ -478,24 +440,24 @@ TESS_API void TessBaseGetBlockTextOrientations(
TESS_API void TessPageIteratorDelete(TessPageIterator* handle);
TESS_API TessPageIterator*
TessPageIteratorCopy(const TessPageIterator* handle);
TESS_API TessPageIterator* TessPageIteratorCopy(const TessPageIterator* handle);
TESS_API void TessPageIteratorBegin(TessPageIterator* handle);
TESS_API BOOL TessPageIteratorNext(TessPageIterator* handle,
TessPageIteratorLevel level);
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtBeginningOf(
const TessPageIterator* handle, TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtFinalElement(
const TessPageIterator* handle, TessPageIteratorLevel level,
TessPageIteratorLevel element);
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle,
TessPageIteratorLevel level,
TessPageIteratorLevel element);
TESS_API BOOL TessPageIteratorBoundingBox(
const TessPageIterator* handle, TessPageIteratorLevel level, int* left,
int* top, int* right, int* bottom);
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator* handle,
TessPageIteratorLevel level,
int* left, int* top, int* right,
int* bottom);
TESS_API TessPolyBlockType
TessPageIteratorBlockType(const TessPageIterator* handle);
@ -503,14 +465,15 @@ TessPageIteratorBlockType(const TessPageIterator* handle);
TESS_API struct Pix* TessPageIteratorGetBinaryImage(
const TessPageIterator* handle, TessPageIteratorLevel level);
TESS_API struct Pix* TessPageIteratorGetImage(
const TessPageIterator* handle, TessPageIteratorLevel level, int padding,
struct Pix* original_image, int* left, int* top);
TESS_API struct Pix* TessPageIteratorGetImage(const TessPageIterator* handle,
TessPageIteratorLevel level,
int padding,
struct Pix* original_image,
int* left, int* top);
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator* handle,
TessPageIteratorLevel level,
int* x1, int* y1, int* x2,
int* y2);
TessPageIteratorLevel level, int* x1,
int* y1, int* x2, int* y2);
TESS_API void TessPageIteratorOrientation(
TessPageIterator* handle, TessOrientation* orientation,
@ -524,23 +487,23 @@ TESS_API void TessPageIteratorParagraphInfo(
/* Result iterator */
TESS_API void TessResultIteratorDelete(TessResultIterator* handle);
TESS_API TessResultIterator*
TessResultIteratorCopy(const TessResultIterator* handle);
TESS_API TessPageIterator*
TessResultIteratorGetPageIterator(TessResultIterator* handle);
TESS_API const TessPageIterator*
TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle);
TESS_API TessChoiceIterator*
TessResultIteratorGetChoiceIterator(const TessResultIterator* handle);
TESS_API TessResultIterator* TessResultIteratorCopy(
const TessResultIterator* handle);
TESS_API TessPageIterator* TessResultIteratorGetPageIterator(
TessResultIterator* handle);
TESS_API const TessPageIterator* TessResultIteratorGetPageIteratorConst(
const TessResultIterator* handle);
TESS_API TessChoiceIterator* TessResultIteratorGetChoiceIterator(
const TessResultIterator* handle);
TESS_API BOOL TessResultIteratorNext(TessResultIterator* handle,
TessPageIteratorLevel level);
TESS_API char* TessResultIteratorGetUTF8Text(
const TessResultIterator* handle, TessPageIteratorLevel level);
TESS_API float TessResultIteratorConfidence(
const TessResultIterator* handle, TessPageIteratorLevel level);
TESS_API const char*
TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle);
TessPageIteratorLevel level);
TESS_API char* TessResultIteratorGetUTF8Text(const TessResultIterator* handle,
TessPageIteratorLevel level);
TESS_API float TessResultIteratorConfidence(const TessResultIterator* handle,
TessPageIteratorLevel level);
TESS_API const char* TessResultIteratorWordRecognitionLanguage(
const TessResultIterator* handle);
TESS_API const char* TessResultIteratorWordFontAttributes(
const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic,
BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, BOOL* is_smallcaps,
@ -548,8 +511,7 @@ TESS_API const char* TessResultIteratorWordFontAttributes(
TESS_API BOOL
TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle);
TESS_API BOOL
TessResultIteratorWordIsNumeric(const TessResultIterator* handle);
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator* handle);
TESS_API BOOL
TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle);
TESS_API BOOL
@ -559,53 +521,50 @@ TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle);
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator* handle);
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator* handle);
TESS_API const char*
TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle);
TESS_API float
TessChoiceIteratorConfidence(const TessChoiceIterator* handle);
TESS_API const char* TessChoiceIteratorGetUTF8Text(
const TessChoiceIterator* handle);
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator* handle);
/* Progress monitor */
TESS_API ETEXT_DESC* TessMonitorCreate();
TESS_API void TessMonitorDelete(ETEXT_DESC* monitor);
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC* monitor,
TessCancelFunc cancelFunc);
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC* monitor,
void* cancelThis);
TessCancelFunc cancelFunc);
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC* monitor, void* cancelThis);
TESS_API void* TessMonitorGetCancelThis(ETEXT_DESC* monitor);
TESS_API void
TessMonitorSetProgressFunc(ETEXT_DESC* monitor, TessProgressFunc progressFunc);
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC* monitor,
TessProgressFunc progressFunc);
TESS_API int TessMonitorGetProgress(ETEXT_DESC* monitor);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor,
int deadline);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor, int deadline);
#ifndef DISABLED_LEGACY_ENGINE
# ifdef TESS_CAPI_INCLUDE_BASEAPI
TESS_API void TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle,
TessFillLatticeFunc f);
TessFillLatticeFunc f);
TESS_API void TessBaseAPIGetFeaturesForBlob(
TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
int* num_features, int* FeatureOutlineIndex);
TESS_API void TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob,
INT_FEATURE_STRUCT* int_features,
int* num_features,
int* FeatureOutlineIndex);
TESS_API ROW* TessFindRowForBox(BLOCK_LIST* blocks, int left, int top,
int right, int bottom);
int right, int bottom);
TESS_API void TessBaseAPIRunAdaptiveClassifier(
TessBaseAPI* handle, TBLOB* blob, int num_max_matches, int* unichar_ids,
float* ratings, int* num_matches_returned);
TESS_API void TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob,
int num_max_matches,
int* unichar_ids, float* ratings,
int* num_matches_returned);
TESS_API ROW* TessMakeTessOCRRow(float baseline, float xheight,
float descender, float ascender);
TESS_API ROW* TessMakeTessOCRRow(float baseline, float xheight, float descender,
float ascender);
TESS_API TBLOB* TessMakeTBLOB(Pix* pix);
TESS_API void TessNormalizeTBLOB(TBLOB* tblob, ROW* row,
BOOL numeric_mode);
TESS_API void TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode);
TESS_API BLOCK_LIST*
TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
TESS_API BLOCK_LIST* TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle);
TESS_API void TessDeleteBlockList(BLOCK_LIST* block_list);

View File

@ -21,11 +21,11 @@
#include <algorithm>
#include <cassert>
#include <climits> // for LONG_MAX
#include <cstdint> // for uint32_t
#include <climits> // for LONG_MAX
#include <cstdint> // for uint32_t
#include <cstdio>
#include <cstdlib>
#include <functional> // for std::function
#include <functional> // for std::function
#include "helpers.h"
#include "serialis.h"
@ -173,8 +173,7 @@ class GenericVector {
// Returns false on error or if the callback returns false.
// DEPRECATED. Use [De]Serialize[Classes] instead.
bool write(FILE* f, std::function<bool(FILE*, const T&)> cb) const;
bool read(tesseract::TFile* f,
std::function<bool(tesseract::TFile*, T*)> cb);
bool read(tesseract::TFile* f, std::function<bool(tesseract::TFile*, T*)> cb);
// Writes a vector of simple types to the given file. Assumes that bitwise
// read/write of T will work. Returns false in case of error.
// TODO(rays) Change all callers to use TFile and remove deprecated methods.
@ -647,12 +646,12 @@ class GenericVectorEqEq : public GenericVector<T> {
GenericVectorEqEq() {
using namespace std::placeholders; // for _1
GenericVector<T>::set_compare_callback(
std::bind(tesseract::cmp_eq<T>, _1, _2));
std::bind(tesseract::cmp_eq<T>, _1, _2));
}
explicit GenericVectorEqEq(int size) : GenericVector<T>(size) {
using namespace std::placeholders; // for _1
GenericVector<T>::set_compare_callback(
std::bind(tesseract::cmp_eq<T>, _1, _2));
std::bind(tesseract::cmp_eq<T>, _1, _2));
}
};
@ -881,8 +880,8 @@ void GenericVector<T>::delete_data_pointers() {
}
template <typename T>
bool GenericVector<T>::write(
FILE* f, std::function<bool(FILE*, const T&)> cb) const {
bool GenericVector<T>::write(FILE* f,
std::function<bool(FILE*, const T&)> cb) const {
if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) {
return false;
}
@ -904,8 +903,8 @@ bool GenericVector<T>::write(
}
template <typename T>
bool GenericVector<T>::read(
tesseract::TFile* f, std::function<bool(tesseract::TFile*, T*)> cb) {
bool GenericVector<T>::read(tesseract::TFile* f,
std::function<bool(tesseract::TFile*, T*)> cb) {
int32_t reserved;
if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
return false;

View File

@ -46,6 +46,7 @@ class Tesseract;
class TESS_API LTRResultIterator : public PageIterator {
friend class ChoiceIterator;
public:
// page_res and tesseract come directly from the BaseAPI.
// The rectangle parameters are copied indirectly from the Thresholder,
@ -59,9 +60,8 @@ class TESS_API LTRResultIterator : public PageIterator {
// The scaled_yres indicates the effective resolution of the binary image
// that tesseract has been given by the Thresholder.
// After the constructor, Begin has already been called.
LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract,
int scale, int scaled_yres,
int rect_left, int rect_top,
LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale,
int scaled_yres, int rect_left, int rect_top,
int rect_width, int rect_height);
~LTRResultIterator() override;
@ -84,10 +84,10 @@ class TESS_API LTRResultIterator : public PageIterator {
char* GetUTF8Text(PageIteratorLevel level) const;
// Set the string inserted at the end of each text line. "\n" by default.
void SetLineSeparator(const char *new_line);
void SetLineSeparator(const char* new_line);
// Set the string inserted at the end of each paragraph. "\n" by default.
void SetParagraphSeparator(const char *new_para);
void SetParagraphSeparator(const char* new_para);
// Returns the mean confidence of the current object at the given level.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
@ -107,14 +107,10 @@ class TESS_API LTRResultIterator : public PageIterator {
// the iterator itself, ie rendered invalid by various members of
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
// Pointsize is returned in printers points (1/72 inch.)
const char* WordFontAttributes(bool* is_bold,
bool* is_italic,
bool* is_underlined,
bool* is_monospace,
bool* is_serif,
bool* is_smallcaps,
int* pointsize,
int* font_id) const;
const char* WordFontAttributes(bool* is_bold, bool* is_italic,
bool* is_underlined, bool* is_monospace,
bool* is_serif, bool* is_smallcaps,
int* pointsize, int* font_id) const;
// Return the name of the language used to recognize this word.
// On error, nullptr. Do not delete this pointer.
@ -137,22 +133,22 @@ class TESS_API LTRResultIterator : public PageIterator {
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
// of the current word.
const void *GetParamsTrainingBundle() const;
const void* GetParamsTrainingBundle() const;
// Returns a pointer to the string with blamer information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerDebug() const;
const char* GetBlamerDebug() const;
// Returns a pointer to the string with misadaption information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerMisadaptionDebug() const;
const char* GetBlamerMisadaptionDebug() const;
// Returns true if a truth string was recorded for the current word.
bool HasTruthString() const;
// Returns true if the given string is equivalent to the truth string for
// the current word.
bool EquivalentToTruth(const char *str) const;
bool EquivalentToTruth(const char* str) const;
// Returns a null terminated UTF-8 encoded truth string for the current word.
// Use delete [] to free after use.
@ -164,7 +160,7 @@ class TESS_API LTRResultIterator : public PageIterator {
// Returns a pointer to serialized choice lattice.
// Fills lattice_size with the number of bytes in lattice data.
const char *WordLattice(int *lattice_size) const;
const char* WordLattice(int* lattice_size) const;
// ============= Functions that refer to symbols only ============.
@ -182,8 +178,8 @@ class TESS_API LTRResultIterator : public PageIterator {
bool SymbolIsDropcap() const;
protected:
const char *line_separator_;
const char *paragraph_separator_;
const char* line_separator_;
const char* paragraph_separator_;
};
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
@ -222,7 +218,7 @@ class ChoiceIterator {
std::vector<std::vector<std::pair<const char*, float>>>* Timesteps() const;
private:
//clears the remaining spaces out of the results and adapt the probabilities
// clears the remaining spaces out of the results and adapt the probabilities
void filterSpaces();
// Pointer to the WERD_RES object owned by the API.
WERD_RES* word_res_;

View File

@ -28,7 +28,8 @@ class BLOB_CHOICE_LIST;
class STRING;
class TO_BLOCK_LIST;
class UNICHARSET;
template <typename T> class GenericVector;
template <typename T>
class GenericVector;
namespace tesseract {
class Tesseract;
@ -38,8 +39,8 @@ class Tesseract;
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
struct OSBestResult {
OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0),
oconfidence(0.0) {}
OSBestResult()
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
int orientation_id;
int script_id;
float sconfidence;
@ -49,8 +50,7 @@ struct OSBestResult {
struct OSResults {
OSResults() : unicharset(nullptr) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j)
scripts_na[i][j] = 0;
for (int j = 0; j < kMaxNumberOfScripts; ++j) scripts_na[i][j] = 0;
orientations[i] = 0;
}
}
@ -87,6 +87,7 @@ class OrientationDetector {
OSResults* results);
bool detect_blob(BLOB_CHOICE_LIST* scores);
int get_orientation();
private:
OSResults* osr_;
const GenericVector<int>* allowed_scripts_;
@ -94,10 +95,11 @@ class OrientationDetector {
class ScriptDetector {
public:
ScriptDetector(const GenericVector<int>* allowed_scripts,
OSResults* osr, tesseract::Tesseract* tess);
ScriptDetector(const GenericVector<int>* allowed_scripts, OSResults* osr,
tesseract::Tesseract* tess);
void detect_blob(BLOB_CHOICE_LIST* scores);
bool must_stop(int orientation);
private:
OSResults* osr_;
static const char* korean_script_;
@ -115,22 +117,18 @@ class ScriptDetector {
const GenericVector<int>* allowed_scripts_;
};
int orientation_and_script_detection(STRING& filename,
OSResults*,
int orientation_and_script_detection(STRING& filename, OSResults*,
tesseract::Tesseract*);
int os_detect(TO_BLOCK_LIST* port_blocks,
OSResults* osr,
int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr,
tesseract::Tesseract* tess);
int os_detect_blobs(const GenericVector<int>* allowed_scripts,
BLOBNBOX_CLIST* blob_list,
OSResults* osr,
BLOBNBOX_CLIST* blob_list, OSResults* osr,
tesseract::Tesseract* tess);
bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
ScriptDetector* s, OSResults*,
tesseract::Tesseract* tess);
bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, ScriptDetector* s,
OSResults*, tesseract::Tesseract* tess);
// Helper method to convert an orientation index to its value in degrees.
// The value represents the amount of clockwise rotation in degrees that must be

View File

@ -65,10 +65,9 @@ class TESS_API PageIterator {
* that tesseract has been given by the Thresholder.
* After the constructor, Begin has already been called.
*/
PageIterator(PAGE_RES* page_res, Tesseract* tesseract,
int scale, int scaled_yres,
int rect_left, int rect_top,
int rect_width, int rect_height);
PageIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale,
int scaled_yres, int rect_left, int rect_top, int rect_width,
int rect_height);
virtual ~PageIterator();
/**
@ -164,7 +163,7 @@ class TESS_API PageIterator {
* equal to other: 0
* after other: 1
*/
int Cmp(const PageIterator &other) const;
int Cmp(const PageIterator& other) const;
// ============= Accessing data ==============.
// Coordinate system:
@ -203,17 +202,17 @@ class TESS_API PageIterator {
* from a grey image. The padding argument to GetImage can be used to expand
* the image to include more foreground pixels. See GetImage below.
*/
bool BoundingBox(PageIteratorLevel level,
int* left, int* top, int* right, int* bottom) const;
bool BoundingBox(PageIteratorLevel level, int padding,
int* left, int* top, int* right, int* bottom) const;
bool BoundingBox(PageIteratorLevel level, int* left, int* top, int* right,
int* bottom) const;
bool BoundingBox(PageIteratorLevel level, int padding, int* left, int* top,
int* right, int* bottom) const;
/**
* Returns the bounding rectangle of the object in a coordinate system of the
* working image rectangle having its origin at (rect_left_, rect_top_) with
* respect to the original image and is scaled by a factor scale_.
*/
bool BoundingBoxInternal(PageIteratorLevel level,
int* left, int* top, int* right, int* bottom) const;
bool BoundingBoxInternal(PageIteratorLevel level, int* left, int* top,
int* right, int* bottom) const;
/** Returns whether there is no object of a given level. */
bool Empty(PageIteratorLevel level) const;
@ -261,8 +260,8 @@ class TESS_API PageIterator {
* WARNING: with vertical text, baselines may be vertical!
* Returns false if there is no baseline at the current position.
*/
bool Baseline(PageIteratorLevel level,
int* x1, int* y1, int* x2, int* y2) const;
bool Baseline(PageIteratorLevel level, int* x1, int* y1, int* x2,
int* y2) const;
/**
* Returns orientation for the block the iterator points to.
@ -272,10 +271,10 @@ class TESS_API PageIterator {
* block anti-clockwise for it to be level?
* -Pi/4 <= deskew_angle <= Pi/4
*/
void Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order,
float *deskew_angle) const;
void Orientation(tesseract::Orientation* orientation,
tesseract::WritingDirection* writing_direction,
tesseract::TextlineOrder* textline_order,
float* deskew_angle) const;
/**
* Returns information about the current paragraph, if available.
@ -305,16 +304,15 @@ class TESS_API PageIterator {
* first_line_indent for subsequent paragraphs in this block
* of text.
*/
void ParagraphInfo(tesseract::ParagraphJustification *justification,
bool *is_list_item,
bool *is_crown,
int *first_line_indent) const;
void ParagraphInfo(tesseract::ParagraphJustification* justification,
bool* is_list_item, bool* is_crown,
int* first_line_indent) const;
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
// of the current word to the given pointer (takes ownership of the pointer)
// and returns true.
// Can only be used when iterating on the word level.
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
bool SetWordBlamerBundle(BlamerBundle* blamer_bundle);
protected:
/**

View File

@ -48,23 +48,23 @@ constexpr int kResolutionEstimationFactor = 10;
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
* below, as well as kPolyBlockNames in layout_test.cc.
* Used extensively by ColPartition, and POLY_BLOCK.
*/
*/
enum PolyBlockType {
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_EQUATION, // Partition belonging to an equation region.
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_EQUATION, // Partition belonging to an equation region.
PT_INLINE_EQUATION, // Partition has inline equation.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_CAPTION_TEXT, // Text that belongs to an image.
PT_FLOWING_IMAGE, // Image that lives inside a column.
PT_HEADING_IMAGE, // Image that spans more than one column.
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
PT_HORZ_LINE, // Horizontal Line.
PT_VERT_LINE, // Vertical Line.
PT_NOISE, // Lies outside of any column.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_CAPTION_TEXT, // Text that belongs to an image.
PT_FLOWING_IMAGE, // Image that lives inside a column.
PT_HEADING_IMAGE, // Image that spans more than one column.
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
PT_HORZ_LINE, // Horizontal Line.
PT_VERT_LINE, // Vertical Line.
PT_NOISE, // Lies outside of any column.
PT_COUNT
};
@ -127,7 +127,7 @@ enum Orientation {
*
* For English text, the writing direction is left-to-right. For the
* Chinese text in the above example, the writing direction is top-to-bottom.
*/
*/
enum WritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
@ -144,7 +144,7 @@ enum WritingDirection {
*
* Note that only some combinations make sense. For example,
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
*/
*/
enum TextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
@ -155,27 +155,28 @@ enum TextlineOrder {
* Possible modes for page layout analysis. These *must* be kept in order
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
* so that the inequality test macros below work.
*/
*/
enum PageSegMode {
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
///< script detection. (OSD)
///< script detection. (OSD)
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of vertically
///< aligned text.
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
PSM_SPARSE_TEXT = 11, ///< Find as much text as possible in no particular order.
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
///< vertically aligned text.
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
PSM_SPARSE_TEXT =
11, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
PSM_COUNT ///< Number of enum entries.
};
/**
@ -183,7 +184,7 @@ enum PageSegMode {
* layout analysis are enabled.
* *Depend critically on the order of elements of PageSegMode.*
* NOTE that arg is an int for compatibility with INT_PARAM.
*/
*/
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
@ -204,14 +205,14 @@ inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
}
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
/**
* enum of the elements of the page hierarchy, used in ResultIterator
* to provide functions that operate on each level without having to
* have 5x as many functions.
*/
*/
enum PageIteratorLevel {
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
@ -260,7 +261,7 @@ enum ParagraphJustification {
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
* cityblock/workflow/detection/detection_storage.proto). Such enums will
* mention the connection to OcrEngineMode in the comments.
*/
*/
enum OcrEngineMode {
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.

View File

@ -22,9 +22,10 @@
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include <string> // for std::string
#include "genericvector.h"
#include "platform.h"
#include "strngs.h" // for STRING
#include "strngs.h" // for STRING
struct Pix;

View File

@ -22,15 +22,18 @@
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#include <set> // for std::pair
#include <vector> // for std::vector
#include <set> // for std::pair
#include <vector> // for std::vector
#include "ltrresultiterator.h" // for LTRResultIterator
#include "platform.h" // for TESS_API, TESS_LOCAL
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
template <typename T> class GenericVector;
template <typename T> class GenericVectorEqEq;
template <typename T>
class GenericVector;
template <typename T>
class GenericVectorEqEq;
class STRING;
@ -40,7 +43,7 @@ class Tesseract;
class TESS_API ResultIterator : public LTRResultIterator {
public:
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
static ResultIterator* StartOfParagraph(const LTRResultIterator& resit);
/**
* ResultIterator is copy constructible!
@ -81,9 +84,9 @@ class TESS_API ResultIterator : public LTRResultIterator {
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
* point at the last word in a paragraph. See PageIterator for full comment.
*/
*/
bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const override;
PageIteratorLevel element) const override;
// ============= Functions that refer to words only ============.
// Returns the number of blanks before the current word.
@ -94,21 +97,21 @@ class TESS_API ResultIterator : public LTRResultIterator {
/**
* Returns the null terminated UTF-8 encoded text string for the current
* object at the given level. Use delete [] to free after use.
*/
*/
virtual char* GetUTF8Text(PageIteratorLevel level) const;
/**
* Returns the LSTM choices for every LSTM timestep for the current word.
*/
*/
virtual std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
GetRawLSTMTimesteps() const;
virtual std::vector<std::vector<std::pair<const char*, float>>>*
GetBestLSTMSymbolChoices() const;
GetBestLSTMSymbolChoices() const;
/**
* Return whether the current paragraph's dominant reading direction
* is left-to-right (as opposed to right-to-left).
*/
*/
bool ParagraphIsLtr() const;
// ============= Exposed only for testing =============.
@ -137,8 +140,8 @@ class TESS_API ResultIterator : public LTRResultIterator {
*/
static void CalculateTextlineOrder(
bool paragraph_is_ltr,
const GenericVector<StrongScriptDirection> &word_dirs,
GenericVectorEqEq<int> *reading_order);
const GenericVector<StrongScriptDirection>& word_dirs,
GenericVectorEqEq<int>* reading_order);
static const int kMinorRunStart;
static const int kMinorRunEnd;
@ -151,7 +154,7 @@ class TESS_API ResultIterator : public LTRResultIterator {
* it resets to the beginning of the paragraph instead of staying wherever
* resit might have pointed.
*/
TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit);
TESS_LOCAL explicit ResultIterator(const LTRResultIterator& resit);
private:
/**
@ -172,13 +175,13 @@ class TESS_API ResultIterator : public LTRResultIterator {
* right-to-left characters and was treated as neutral.
*/
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
GenericVectorEqEq<int> *indices) const;
const LTRResultIterator& resit,
GenericVectorEqEq<int>* indices) const;
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
GenericVector<StrongScriptDirection> *ssd,
GenericVectorEqEq<int> *indices) const;
const LTRResultIterator& resit,
GenericVector<StrongScriptDirection>* ssd,
GenericVectorEqEq<int>* indices) const;
/**
* What is the index of the current word in a strict left-to-right reading
@ -190,7 +193,7 @@ class TESS_API ResultIterator : public LTRResultIterator {
* Given an iterator pointing at a word, returns the logical reading order
* of blob indices for the word.
*/
void CalculateBlobOrder(GenericVector<int> *blob_indices) const;
void CalculateBlobOrder(GenericVector<int>* blob_indices) const;
/** Precondition: current_paragraph_is_ltr_ is set. */
void MoveToLogicalStartOfTextline();
@ -211,10 +214,10 @@ class TESS_API ResultIterator : public LTRResultIterator {
* Append any extra marks that should be appended to this word when printed.
* Mostly, these are Unicode BiDi control characters.
*/
void AppendSuffixMarks(STRING *text) const;
void AppendSuffixMarks(STRING* text) const;
/** Appends the current word in reading order to the given buffer.*/
void AppendUTF8WordText(STRING *text) const;
void AppendUTF8WordText(STRING* text) const;
/**
* Appends the text of the current text line, *assuming this iterator is
@ -223,7 +226,7 @@ class TESS_API ResultIterator : public LTRResultIterator {
* Each textline is terminated in a single newline character.
* If the textline ends a paragraph, it gets a second terminal newline.
*/
void IterateAndAppendUTF8TextlineText(STRING *text);
void IterateAndAppendUTF8TextlineText(STRING* text);
/**
* Appends the text of the current paragraph in reading order
@ -231,7 +234,7 @@ class TESS_API ResultIterator : public LTRResultIterator {
* Each textline is terminated in a single newline character, and the
* paragraph gets an extra newline at the end.
*/
void AppendUTF8ParagraphText(STRING *text) const;
void AppendUTF8ParagraphText(STRING* text) const;
/** Returns whether the bidi_debug flag is set to at least min_level. */
bool BidiDebug(int min_level) const;

View File

@ -49,7 +49,8 @@ constexpr size_t countof(T const (&)[N]) noexcept {
using FileReader = bool (*)(const char* filename, GenericVector<char>* data);
// Function to write a GenericVector<char> to a whole file.
// Returns false on failure.
using FileWriter = bool (*)(const GenericVector<char>& data, const char* filename);
using FileWriter = bool (*)(const GenericVector<char>& data,
const char* filename);
// Deserialize data from file.
bool DeSerialize(FILE* fp, char* data, size_t n = 1);

View File

@ -19,10 +19,11 @@
#ifndef STRNGS_H
#define STRNGS_H
#include <cassert> // for assert
#include <cstdint> // for uint32_t
#include <cstdio> // for FILE
#include <cstring> // for strncpy
#include <cassert> // for assert
#include <cstdint> // for uint32_t
#include <cstdio> // for FILE
#include <cstring> // for strncpy
#include "platform.h" // for TESS_API
namespace tesseract {

View File

@ -150,8 +150,8 @@ class TESS_API ImageThresholder {
/// Return true if we are processing the full image.
bool IsFullImage() const {
return rect_left_ == 0 && rect_top_ == 0 &&
rect_width_ == image_width_ && rect_height_ == image_height_;
return rect_left_ == 0 && rect_top_ == 0 && rect_width_ == image_width_ &&
rect_height_ == image_height_;
}
// Otsu thresholds the rectangle, taking the rectangle from *this.
@ -161,27 +161,26 @@ class TESS_API ImageThresholder {
/// from the class, using thresholds/hi_values to the output pix.
/// NOTE that num_channels is the size of the thresholds and hi_values
// arrays and also the bytes per pixel in src_pix.
void ThresholdRectToPix(Pix* src_pix, int num_channels,
const int* thresholds, const int* hi_values,
Pix** pix) const;
void ThresholdRectToPix(Pix* src_pix, int num_channels, const int* thresholds,
const int* hi_values, Pix** pix) const;
protected:
/// Clone or other copy of the source Pix.
/// The pix will always be PixDestroy()ed on destruction of the class.
Pix* pix_;
Pix* pix_;
int image_width_; ///< Width of source pix_.
int image_height_; ///< Height of source pix_.
int pix_channels_; ///< Number of 8-bit channels in pix_.
int pix_wpl_; ///< Words per line of pix_.
int image_width_; ///< Width of source pix_.
int image_height_; ///< Height of source pix_.
int pix_channels_; ///< Number of 8-bit channels in pix_.
int pix_wpl_; ///< Words per line of pix_.
// Limits of image rectangle to be processed.
int scale_; ///< Scale factor from original image.
int yres_; ///< y pixels/inch in source image.
int estimated_res_; ///< Resolution estimate from text size.
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
int scale_; ///< Scale factor from original image.
int yres_; ///< y pixels/inch in source image.
int estimated_res_; ///< Resolution estimate from text size.
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
};
} // namespace tesseract.

View File

@ -20,9 +20,11 @@
#define TESSERACT_CCUTIL_UNICHAR_H_
#include <memory.h>
#include <cstring>
#include <string>
#include <vector>
#include "platform.h"
// Maximum number of characters that can be stored in a UNICHAR. Must be