diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index 27986b490..1bb34fcdf 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -20,11 +20,13 @@ #define TESSERACT_API_BASEAPI_H_ #include -#include // for std::function +#include // for std::function // To avoid collision with other typenames include the ABSOLUTE MINIMUM // complexity of includes here. Use forward declarations wherever possible // and hide includes of complex types in baseapi.cpp. +#include + #include "apitypes.h" #include "pageiterator.h" #include "platform.h" @@ -33,9 +35,9 @@ #include "serialis.h" #include "thresholder.h" #include "unichar.h" -#include -template class GenericVector; +template +class GenericVector; class PAGE_RES; class PAGE_RES_IT; class ParagraphModel; @@ -57,7 +59,7 @@ class UNICHARSET; class WERD_CHOICE_LIST; struct INT_FEATURE_STRUCT; -using INT_FEATURE = INT_FEATURE_STRUCT *; +using INT_FEATURE = INT_FEATURE_STRUCT*; struct TBLOB; namespace tesseract { @@ -74,11 +76,16 @@ class Tesseract; class Trie; class Wordrec; -using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const; -using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *, int, const char *, int); -using ParamsModelClassifyFunc = float (Dict::*)(const char *, void *); -using FillLatticeFunc = void (Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *); -using TruthCallback = std::function; +using DictFunc = int (Dict::*)(void*, const UNICHARSET&, UNICHAR_ID, + bool) const; +using ProbabilityInContextFunc = double (Dict::*)(const char*, const char*, int, + const char*, int); +using ParamsModelClassifyFunc = float (Dict::*)(const char*, void*); +using FillLatticeFunc = void (Wordrec::*)(const MATRIX&, + const WERD_CHOICE_LIST&, + const UNICHARSET&, BlamerBundle*); +using TruthCallback = + std::function; /** * Base class for all tesseract APIs. @@ -108,7 +115,7 @@ class TESS_API TessBaseAPI { * and returns sizeof(cl_device_id) * otherwise *device=nullptr and returns 0. */ - static size_t getOpenCLDevice(void **device); + static size_t getOpenCLDevice(void** device); /** * Set the name of the input file. Needed for training and @@ -124,7 +131,7 @@ class TESS_API TessBaseAPI { */ const char* GetInputName(); // Takes ownership of the input pix. - void SetInputImage(Pix *pix); + void SetInputImage(Pix* pix); Pix* GetInputImage(); int GetSourceYResolution(); const char* GetDatapath(); @@ -152,25 +159,25 @@ class TESS_API TessBaseAPI { * Returns true if the parameter was found among Tesseract parameters. * Fills in value with the value of the parameter. */ - bool GetIntVariable(const char *name, int *value) const; - bool GetBoolVariable(const char *name, bool *value) const; - bool GetDoubleVariable(const char *name, double *value) const; + bool GetIntVariable(const char* name, int* value) const; + bool GetBoolVariable(const char* name, bool* value) const; + bool GetDoubleVariable(const char* name, double* value) const; /** * Returns the pointer to the string that represents the value of the * parameter if it was found among Tesseract parameters. */ - const char *GetStringVariable(const char *name) const; + const char* GetStringVariable(const char* name) const; /** * Print Tesseract parameters to the given file. */ - void PrintVariables(FILE *fp) const; + void PrintVariables(FILE* fp) const; /** * Get value of named variable as a string, if it exists. */ - bool GetVariableAsString(const char *name, STRING *val); + bool GetVariableAsString(const char* name, STRING* val); /** * Instances are now mostly thread-safe and totally independent, @@ -184,8 +191,8 @@ class TESS_API TessBaseAPI { * listed above here in the class definition. * * The datapath must be the name of the tessdata directory. - * The language is (usually) an ISO 639-3 string or nullptr will default to eng. - * It is entirely safe (and eventually will be efficient too) to call + * The language is (usually) an ISO 639-3 string or nullptr will default to + * eng. It is entirely safe (and eventually will be efficient too) to call * Init multiple times on the same instance to change language, or just * to reset the classifier. * The language may be a string of the form [~][+[~]]* indicating @@ -210,15 +217,16 @@ class TESS_API TessBaseAPI { * "debug" in the name will be set. */ int Init(const char* datapath, const char* language, OcrEngineMode mode, - char **configs, int configs_size, - const GenericVector *vars_vec, - const GenericVector *vars_values, + char** configs, int configs_size, + const GenericVector* vars_vec, + const GenericVector* vars_values, bool set_only_non_debug_params); int Init(const char* datapath, const char* language, OcrEngineMode oem) { return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false); } int Init(const char* datapath, const char* language) { - return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false); + return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, + false); } // In-memory version reads the traineddata file directly from the given // data[data_size] array, and/or reads data via a FileReader. @@ -301,9 +309,9 @@ class TESS_API TessBaseAPI { * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize, * and one or more of the Get*Text functions below. */ - char* TesseractRect(const unsigned char* imagedata, - int bytes_per_pixel, int bytes_per_line, - int left, int top, int width, int height); + char* TesseractRect(const unsigned char* imagedata, int bytes_per_pixel, + int bytes_per_line, int left, int top, int width, + int height); /** * Call between pages or documents etc to free up memory and forget @@ -317,7 +325,7 @@ class TESS_API TessBaseAPI { * get hold of the thresholded image, get the text in different formats, * get bounding boxes, confidences etc. */ - /* @{ */ + /* @{ */ /** * Provide an image for Tesseract to recognize. Format is as @@ -385,13 +393,13 @@ class TESS_API TessBaseAPI { * Can be called before or after Recognize. * If raw_image is true, then extract from the original image instead of the * thresholded image and pad by raw_padding pixels. - * If blockids is not nullptr, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. - * If paraids is not nullptr, the paragraph-id of each line within its block is - * also returned as an array of one element per line. delete [] after use. + * If blockids is not nullptr, the block-id of each line is also returned as + * an array of one element per line. delete [] after use. If paraids is not + * nullptr, the paragraph-id of each line within its block is also returned as + * an array of one element per line. delete [] after use. */ - Boxa* GetTextlines(bool raw_image, int raw_padding, - Pixa** pixa, int** blockids, int** paraids); + Boxa* GetTextlines(bool raw_image, int raw_padding, Pixa** pixa, + int** blockids, int** paraids); /* Helper method to extract from the thresholded image. (most common usage) */ @@ -404,8 +412,8 @@ class TESS_API TessBaseAPI { * pair, in reading order. Enables downstream handling of non-rectangular * regions. * Can be called before or after Recognize. - * If blockids is not nullptr, the block-id of each line is also returned as an - * array of one element per line. delete [] after use. + * If blockids is not nullptr, the block-id of each line is also returned as + * an array of one element per line. delete [] after use. */ Boxa* GetStrips(Pixa** pixa, int** blockids); @@ -432,22 +440,20 @@ class TESS_API TessBaseAPI { * Can be called before or after Recognize. * If blockids is not nullptr, the block-id of each component is also returned * as an array of one element per component. delete [] after use. - * If blockids is not nullptr, the paragraph-id of each component with its block - * is also returned as an array of one element per component. delete [] after - * use. - * If raw_image is true, then portions of the original image are extracted - * instead of the thresholded image and padded with raw_padding. - * If text_only is true, then only text components are returned. + * If blockids is not nullptr, the paragraph-id of each component with its + * block is also returned as an array of one element per component. delete [] + * after use. If raw_image is true, then portions of the original image are + * extracted instead of the thresholded image and padded with raw_padding. If + * text_only is true, then only text components are returned. */ - Boxa* GetComponentImages(PageIteratorLevel level, - bool text_only, bool raw_image, - int raw_padding, - Pixa** pixa, int** blockids, int** paraids); + Boxa* GetComponentImages(PageIteratorLevel level, bool text_only, + bool raw_image, int raw_padding, Pixa** pixa, + int** blockids, int** paraids); // Helper function to get binary images with no padding (most common usage). - Boxa* GetComponentImages(const PageIteratorLevel level, - const bool text_only, + Boxa* GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa** pixa, int** blockids) { - return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr); + return GetComponentImages(level, text_only, false, 0, pixa, blockids, + nullptr); } /** @@ -489,10 +495,10 @@ class TESS_API TessBaseAPI { * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.) */ - #ifndef DISABLED_LEGACY_ENGINE +#ifndef DISABLED_LEGACY_ENGINE /** Variant on Recognize used for testing chopper. */ int RecognizeForChopTest(ETEXT_DESC* monitor); - #endif +#endif /** * Turns images into symbolic text. @@ -586,7 +592,6 @@ class TESS_API TessBaseAPI { */ char* GetAltoText(ETEXT_DESC* monitor, int page_number); - /** * Make an XML-formatted string with Alto markup from the internal * data structures. @@ -706,10 +711,9 @@ class TESS_API TessBaseAPI { * @warning temporary! This function will be removed from here and placed * in a separate API at some future time. */ - int IsValidWord(const char *word); + int IsValidWord(const char* word); // Returns true if utf8_character is defined in the UniCharset. - bool IsValidCharacter(const char *utf8_character); - + bool IsValidCharacter(const char* utf8_character); bool GetTextDirection(int* out_offset, float* out_slope); @@ -734,8 +738,7 @@ class TESS_API TessBaseAPI { void GetBlockTextOrientations(int** block_orientation, bool** vertical_writing); - - #ifndef DISABLED_LEGACY_ENGINE +#ifndef DISABLED_LEGACY_ENGINE /** Sets Wordrec::fill_lattice_ function to point to the given function. */ void SetFillLatticeFunc(FillLatticeFunc f); @@ -751,18 +754,18 @@ class TESS_API TessBaseAPI { static void DeleteBlockList(BLOCK_LIST* block_list); /** Returns a ROW object created from the input row specification. */ - static ROW *MakeTessOCRRow(float baseline, float xheight, - float descender, float ascender); + static ROW* MakeTessOCRRow(float baseline, float xheight, float descender, + float ascender); /** Returns a TBLOB corresponding to the entire input image. */ - static TBLOB *MakeTBLOB(Pix *pix); + static TBLOB* MakeTBLOB(Pix* pix); /** * This method baseline normalizes a TBLOB in-place. The input row is used * for normalization. The denorm is an optional parameter in which the * normalization-antidote is returned. */ - static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); + static void NormalizeTBLOB(TBLOB* tblob, ROW* row, bool numeric_mode); /** This method returns the features associated with the input image. */ void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features, @@ -772,41 +775,44 @@ class TESS_API TessBaseAPI { * This method returns the row to which a box of specified dimensions would * belong. If no good match is found, it returns nullptr. */ - static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, - int right, int bottom); + static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, + int bottom); /** * Method to run adaptive classifier on a blob. * It returns at max num_max_matches results. */ - void RunAdaptiveClassifier(TBLOB* blob, - int num_max_matches, - int* unichar_ids, - float* ratings, - int* num_matches_returned); + void RunAdaptiveClassifier(TBLOB* blob, int num_max_matches, int* unichar_ids, + float* ratings, int* num_matches_returned); #endif // ndef DISABLED_LEGACY_ENGINE /** This method returns the string form of the specified unichar. */ const char* GetUnichar(int unichar_id); /** Return the pointer to the i-th dawg loaded into tesseract_ object. */ - const Dawg *GetDawg(int i) const; + const Dawg* GetDawg(int i) const; /** Return the number of dawgs loaded into tesseract_ object. */ int NumDawgs() const; - Tesseract* tesseract() const { return tesseract_; } + Tesseract* tesseract() const { + return tesseract_; + } - OcrEngineMode oem() const { return last_oem_requested_; } + OcrEngineMode oem() const { + return last_oem_requested_; + } - void InitTruthCallback(TruthCallback cb) { truth_cb_ = cb; } + void InitTruthCallback(TruthCallback cb) { + truth_cb_ = cb; + } void set_min_orientation_margin(double margin); - /* @} */ + /* @} */ protected: - - /** Common code for setting the image. Returns true if Init has been called. */ + /** Common code for setting the image. Returns true if Init has been called. + */ TESS_LOCAL bool InternalSetImage(); /** @@ -842,7 +848,7 @@ class TESS_API TessBaseAPI { //// paragraphs.cpp //////////////////////////////////////////////////// TESS_LOCAL void DetectParagraphs(bool after_text_recognition); - #ifndef DISABLED_LEGACY_ENGINE +#ifndef DISABLED_LEGACY_ENGINE /** @defgroup ocropusAddOns ocropus add-ons */ /* @{ */ @@ -851,12 +857,9 @@ class TESS_API TessBaseAPI { * Adapt to recognize the current image as the given character. * The image must be preloaded and be just an image of a single character. */ - TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, - int length, - float baseline, - float xheight, - float descender, - float ascender); + TESS_LOCAL void AdaptToCharacter(const char* unichar_repr, int length, + float baseline, float xheight, + float descender, float ascender); /** Recognize text doing one pass only, using settings for a given pass. */ TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); @@ -868,35 +871,33 @@ class TESS_API TessBaseAPI { * Extract the OCR results, costs (penalty points for uncertainty), * and the bounding boxes of the characters. */ - TESS_LOCAL static int TesseractExtractResult(char** text, - int** lengths, - float** costs, - int** x0, - int** y0, - int** x1, - int** y1, - PAGE_RES* page_res); + TESS_LOCAL static int TesseractExtractResult(char** text, int** lengths, + float** costs, int** x0, + int** y0, int** x1, int** y1, + PAGE_RES* page_res); - TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; } + TESS_LOCAL const PAGE_RES* GetPageRes() const { + return page_res_; + } /* @} */ #endif // ndef DISABLED_LEGACY_ENGINE protected: - Tesseract* tesseract_; ///< The underlying data object. - Tesseract* osd_tesseract_; ///< For orientation & script detection. - EquationDetect* equ_detect_; ///< The equation detector. - FileReader reader_; ///< Reads files from any filesystem. - ImageThresholder* thresholder_; ///< Image thresholding module. - GenericVector* paragraph_models_; - BLOCK_LIST* block_list_; ///< The page layout. - PAGE_RES* page_res_; ///< The page-level data. - STRING* input_file_; ///< Name used by training code. - STRING* output_file_; ///< Name used by debug code. - STRING* datapath_; ///< Current location of tessdata. - STRING* language_; ///< Last initialized language. + Tesseract* tesseract_; ///< The underlying data object. + Tesseract* osd_tesseract_; ///< For orientation & script detection. + EquationDetect* equ_detect_; ///< The equation detector. + FileReader reader_; ///< Reads files from any filesystem. + ImageThresholder* thresholder_; ///< Image thresholding module. + GenericVector* paragraph_models_; + BLOCK_LIST* block_list_; ///< The page layout. + PAGE_RES* page_res_; ///< The page-level data. + STRING* input_file_; ///< Name used by training code. + STRING* output_file_; ///< Name used by debug code. + STRING* datapath_; ///< Current location of tessdata. + STRING* language_; ///< Last initialized language. OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested. - bool recognition_done_; ///< page_res_ contains recognition data. - TruthCallback truth_cb_; ///< fxn for setting truth_* in WERD_RES + bool recognition_done_; ///< page_res_ contains recognition data. + TruthCallback truth_cb_; ///< fxn for setting truth_* in WERD_RES /** * @defgroup ThresholderParams Thresholder Parameters @@ -913,16 +914,12 @@ class TESS_API TessBaseAPI { private: // A list of image filenames gets special consideration - bool ProcessPagesFileList(FILE *fp, - STRING *buf, - const char* retry_config, int timeout_millisec, - TessResultRenderer* renderer, + bool ProcessPagesFileList(FILE* fp, STRING* buf, const char* retry_config, + int timeout_millisec, TessResultRenderer* renderer, int tessedit_page_number); // TIFF supports multipage so gets special consideration. - bool ProcessPagesMultipageTiff(const unsigned char *data, - size_t size, - const char* filename, - const char* retry_config, + bool ProcessPagesMultipageTiff(const unsigned char* data, size_t size, + const char* filename, const char* retry_config, int timeout_millisec, TessResultRenderer* renderer, int tessedit_page_number); diff --git a/include/tesseract/capi.h b/include/tesseract/capi.h index 5c922642c..c40f68ba7 100644 --- a/include/tesseract/capi.h +++ b/include/tesseract/capi.h @@ -31,6 +31,7 @@ #else # include # include + # include "platform.h" #endif @@ -176,43 +177,34 @@ TESS_API void TessDeleteTextArray(char** arr); TESS_API void TessDeleteIntArray(const int* arr); /* Renderer API */ -TESS_API TessResultRenderer* -TessTextRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* -TessHOcrRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* -TessHOcrRendererCreate2(const char* outputbase, BOOL font_info); -TESS_API TessResultRenderer* -TessAltoRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* -TessTsvRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* TessPDFRendererCreate( - const char* outputbase, const char* datadir, BOOL textonly); -TESS_API TessResultRenderer* -TessUnlvRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* -TessBoxTextRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* -TessLSTMBoxRendererCreate(const char* outputbase); -TESS_API TessResultRenderer* -TessWordStrBoxRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TessTextRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TessHOcrRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TessHOcrRendererCreate2(const char* outputbase, + BOOL font_info); +TESS_API TessResultRenderer* TessAltoRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TessTsvRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TessPDFRendererCreate(const char* outputbase, + const char* datadir, + BOOL textonly); +TESS_API TessResultRenderer* TessUnlvRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TessBoxTextRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TessLSTMBoxRendererCreate(const char* outputbase); +TESS_API TessResultRenderer* TessWordStrBoxRendererCreate( + const char* outputbase); TESS_API void TessDeleteResultRenderer(TessResultRenderer* renderer); TESS_API void TessResultRendererInsert(TessResultRenderer* renderer, - TessResultRenderer* next); -TESS_API TessResultRenderer* -TessResultRendererNext(TessResultRenderer* renderer); -TESS_API BOOL TessResultRendererBeginDocument( - TessResultRenderer* renderer, const char* title); + TessResultRenderer* next); +TESS_API TessResultRenderer* TessResultRendererNext( + TessResultRenderer* renderer); +TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer* renderer, + const char* title); TESS_API BOOL TessResultRendererAddImage(TessResultRenderer* renderer, - TessBaseAPI* api); -TESS_API BOOL -TessResultRendererEndDocument(TessResultRenderer* renderer); + TessBaseAPI* api); +TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer* renderer); -TESS_API const char* -TessResultRendererExtention(TessResultRenderer* renderer); -TESS_API const char* -TessResultRendererTitle(TessResultRenderer* renderer); +TESS_API const char* TessResultRendererExtention(TessResultRenderer* renderer); +TESS_API const char* TessResultRendererTitle(TessResultRenderer* renderer); TESS_API int TessResultRendererImageNum(TessResultRenderer* renderer); /* Base API */ @@ -220,198 +212,170 @@ TESS_API int TessResultRendererImageNum(TessResultRenderer* renderer); TESS_API TessBaseAPI* TessBaseAPICreate(); TESS_API void TessBaseAPIDelete(TessBaseAPI* handle); -TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, - void** device); +TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI* handle, void** device); -TESS_API void TessBaseAPISetInputName(TessBaseAPI* handle, - const char* name); +TESS_API void TessBaseAPISetInputName(TessBaseAPI* handle, const char* name); TESS_API const char* TessBaseAPIGetInputName(TessBaseAPI* handle); -TESS_API void TessBaseAPISetInputImage(TessBaseAPI* handle, - struct Pix* pix); +TESS_API void TessBaseAPISetInputImage(TessBaseAPI* handle, struct Pix* pix); TESS_API struct Pix* TessBaseAPIGetInputImage(TessBaseAPI* handle); TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI* handle); TESS_API const char* TessBaseAPIGetDatapath(TessBaseAPI* handle); -TESS_API void TessBaseAPISetOutputName(TessBaseAPI* handle, - const char* name); +TESS_API void TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name); -TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI* handle, - const char* name, - const char* value); -TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI* handle, - const char* name, - const char* value); +TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, + const char* value); +TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name, + const char* value); TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, - const char* name, int* value); + const char* name, int* value); TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, - const char* name, - BOOL* value); + const char* name, BOOL* value); TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, - const char* name, - double* value); -TESS_API const char* -TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name); + const char* name, double* value); +TESS_API const char* TessBaseAPIGetStringVariable(const TessBaseAPI* handle, + const char* name); -TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI* handle, - FILE* fp); -TESS_API BOOL TessBaseAPIPrintVariablesToFile( - const TessBaseAPI* handle, const char* filename); +TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp); +TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, + const char* filename); #ifdef TESS_CAPI_INCLUDE_BASEAPI TESS_API BOOL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, - const char* name, - STRING* val); + const char* name, STRING* val); -TESS_API int TessBaseAPIInit( - TessBaseAPI* handle, const char* datapath, const char* language, - TessOcrEngineMode mode, char** configs, int configs_size, - const STRING* vars_vec, size_t vars_vec_size, const STRING* vars_values, - size_t vars_values_size, BOOL set_only_init_params); +TESS_API int TessBaseAPIInit(TessBaseAPI* handle, const char* datapath, + const char* language, TessOcrEngineMode mode, + char** configs, int configs_size, + const STRING* vars_vec, size_t vars_vec_size, + const STRING* vars_values, size_t vars_values_size, + BOOL set_only_init_params); #endif // def TESS_CAPI_INCLUDE_BASEAPI -TESS_API int TessBaseAPIInit1(TessBaseAPI* handle, - const char* datapath, - const char* language, - TessOcrEngineMode oem, char** configs, - int configs_size); -TESS_API int TessBaseAPIInit2(TessBaseAPI* handle, - const char* datapath, - const char* language, - TessOcrEngineMode oem); -TESS_API int TessBaseAPIInit3(TessBaseAPI* handle, - const char* datapath, - const char* language); +TESS_API int TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath, + const char* language, TessOcrEngineMode oem, + char** configs, int configs_size); +TESS_API int TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, + const char* language, TessOcrEngineMode oem); +TESS_API int TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, + const char* language); -TESS_API int TessBaseAPIInit4( - TessBaseAPI* handle, const char* datapath, const char* language, - TessOcrEngineMode mode, char** configs, int configs_size, char** vars_vec, - char** vars_values, size_t vars_vec_size, BOOL set_only_non_debug_params); +TESS_API int TessBaseAPIInit4(TessBaseAPI* handle, const char* datapath, + const char* language, TessOcrEngineMode mode, + char** configs, int configs_size, char** vars_vec, + char** vars_values, size_t vars_vec_size, + BOOL set_only_non_debug_params); -TESS_API const char* -TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle); -TESS_API char** -TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle); -TESS_API char** -TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI* handle); +TESS_API const char* TessBaseAPIGetInitLanguagesAsString( + const TessBaseAPI* handle); +TESS_API char** TessBaseAPIGetLoadedLanguagesAsVector( + const TessBaseAPI* handle); +TESS_API char** TessBaseAPIGetAvailableLanguagesAsVector( + const TessBaseAPI* handle); -TESS_API int TessBaseAPIInitLangMod(TessBaseAPI* handle, - const char* datapath, - const char* language); +TESS_API int TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, + const char* language); TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI* handle); TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI* handle, - const char* filename); + const char* filename); TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, - const char* filename); + const char* filename); TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI* handle, - TessPageSegMode mode); -TESS_API TessPageSegMode -TessBaseAPIGetPageSegMode(const TessBaseAPI* handle); + TessPageSegMode mode); +TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI* handle); TESS_API char* TessBaseAPIRect(TessBaseAPI* handle, - const unsigned char* imagedata, - int bytes_per_pixel, - int bytes_per_line, int left, int top, - int width, int height); + const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height); TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle); TESS_API void TessBaseAPISetImage(TessBaseAPI* handle, - const unsigned char* imagedata, - int width, int height, - int bytes_per_pixel, - int bytes_per_line); -TESS_API void TessBaseAPISetImage2(TessBaseAPI* handle, - struct Pix* pix); + const unsigned char* imagedata, int width, + int height, int bytes_per_pixel, + int bytes_per_line); +TESS_API void TessBaseAPISetImage2(TessBaseAPI* handle, struct Pix* pix); -TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI* handle, - int ppi); +TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi); -TESS_API void TessBaseAPISetRectangle(TessBaseAPI* handle, int left, - int top, int width, int height); +TESS_API void TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top, + int width, int height); #ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API void TessBaseAPISetThresholder( - TessBaseAPI* handle, TessImageThresholder* thresholder); +TESS_API void TessBaseAPISetThresholder(TessBaseAPI* handle, + TessImageThresholder* thresholder); #endif -TESS_API struct Pix* -TessBaseAPIGetThresholdedImage(TessBaseAPI* handle); +TESS_API struct Pix* TessBaseAPIGetThresholdedImage(TessBaseAPI* handle); TESS_API struct Boxa* TessBaseAPIGetRegions(TessBaseAPI* handle, - struct Pixa** pixa); + struct Pixa** pixa); TESS_API struct Boxa* TessBaseAPIGetTextlines(TessBaseAPI* handle, - struct Pixa** pixa, - int** blockids); -TESS_API struct Boxa* -TessBaseAPIGetTextlines1(TessBaseAPI* handle, BOOL raw_image, int raw_padding, - struct Pixa** pixa, int** blockids, int** paraids); + struct Pixa** pixa, + int** blockids); +TESS_API struct Boxa* TessBaseAPIGetTextlines1(TessBaseAPI* handle, + BOOL raw_image, int raw_padding, + struct Pixa** pixa, + int** blockids, int** paraids); TESS_API struct Boxa* TessBaseAPIGetStrips(TessBaseAPI* handle, - struct Pixa** pixa, - int** blockids); + struct Pixa** pixa, int** blockids); TESS_API struct Boxa* TessBaseAPIGetWords(TessBaseAPI* handle, - struct Pixa** pixa); -TESS_API struct Boxa* -TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, struct Pixa** cc); -TESS_API struct Boxa* TessBaseAPIGetComponentImages( - TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only, - struct Pixa** pixa, int** blockids); + struct Pixa** pixa); +TESS_API struct Boxa* TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, + struct Pixa** cc); +TESS_API struct Boxa* TessBaseAPIGetComponentImages(TessBaseAPI* handle, + TessPageIteratorLevel level, + BOOL text_only, + struct Pixa** pixa, + int** blockids); TESS_API struct Boxa* TessBaseAPIGetComponentImages1( TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only, BOOL raw_image, int raw_padding, struct Pixa** pixa, int** blockids, int** paraids); -TESS_API int -TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle); +TESS_API int TessBaseAPIGetThresholdedImageScaleFactor( + const TessBaseAPI* handle); -TESS_API TessPageIterator* -TessBaseAPIAnalyseLayout(TessBaseAPI* handle); +TESS_API TessPageIterator* TessBaseAPIAnalyseLayout(TessBaseAPI* handle); -TESS_API int TessBaseAPIRecognize(TessBaseAPI* handle, - ETEXT_DESC* monitor); +TESS_API int TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor); #ifndef DISABLED_LEGACY_ENGINE TESS_API int TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, - ETEXT_DESC* monitor); + ETEXT_DESC* monitor); #endif -TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI* handle, - const char* filename, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer); -TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI* handle, - struct Pix* pix, int page_index, - const char* filename, - const char* retry_config, - int timeout_millisec, - TessResultRenderer* renderer); +TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer); +TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI* handle, struct Pix* pix, + int page_index, const char* filename, + const char* retry_config, + int timeout_millisec, + TessResultRenderer* renderer); -TESS_API TessResultIterator* -TessBaseAPIGetIterator(TessBaseAPI* handle); -TESS_API TessMutableIterator* -TessBaseAPIGetMutableIterator(TessBaseAPI* handle); +TESS_API TessResultIterator* TessBaseAPIGetIterator(TessBaseAPI* handle); +TESS_API TessMutableIterator* TessBaseAPIGetMutableIterator( + TessBaseAPI* handle); TESS_API char* TessBaseAPIGetUTF8Text(TessBaseAPI* handle); -TESS_API char* TessBaseAPIGetHOCRText(TessBaseAPI* handle, - int page_number); +TESS_API char* TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number); -TESS_API char* TessBaseAPIGetAltoText(TessBaseAPI* handle, - int page_number); -TESS_API char* TessBaseAPIGetTsvText(TessBaseAPI* handle, - int page_number); +TESS_API char* TessBaseAPIGetAltoText(TessBaseAPI* handle, int page_number); +TESS_API char* TessBaseAPIGetTsvText(TessBaseAPI* handle, int page_number); -TESS_API char* TessBaseAPIGetBoxText(TessBaseAPI* handle, - int page_number); -TESS_API char* TessBaseAPIGetLSTMBoxText(TessBaseAPI* handle, - int page_number); +TESS_API char* TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number); +TESS_API char* TessBaseAPIGetLSTMBoxText(TessBaseAPI* handle, int page_number); TESS_API char* TessBaseAPIGetWordStrBoxText(TessBaseAPI* handle, - int page_number); + int page_number); TESS_API char* TessBaseAPIGetUNLVText(TessBaseAPI* handle); TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI* handle); @@ -420,23 +384,20 @@ TESS_API int* TessBaseAPIAllWordConfidences(TessBaseAPI* handle); #ifndef DISABLED_LEGACY_ENGINE TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, - TessPageSegMode mode, - const char* wordstr); + TessPageSegMode mode, + const char* wordstr); #endif // ndef DISABLED_LEGACY_ENGINE TESS_API void TessBaseAPIClear(TessBaseAPI* handle); TESS_API void TessBaseAPIEnd(TessBaseAPI* handle); -TESS_API int TessBaseAPIIsValidWord(TessBaseAPI* handle, - const char* word); -TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI* handle, - int* out_offset, - float* out_slope); +TESS_API int TessBaseAPIIsValidWord(TessBaseAPI* handle, const char* word); +TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset, + float* out_slope); #ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API void TessBaseAPISetDictFunc(TessBaseAPI* handle, - TessDictFunc f); +TESS_API void TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f); TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI* handle); @@ -445,32 +406,33 @@ TESS_API void TessBaseAPISetProbabilityInContextFunc( // Call TessDeleteText(*best_script_name) to free memory allocated by this // function -TESS_API BOOL TessBaseAPIDetectOrientationScript( - TessBaseAPI* handle, int* orient_deg, float* orient_conf, - const char** script_name, float* script_conf); +TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, + int* orient_deg, + float* orient_conf, + const char** script_name, + float* script_conf); #endif // def TESS_CAPI_INCLUDE_BASEAPI -TESS_API const char* TessBaseAPIGetUnichar(TessBaseAPI* handle, - int unichar_id); +TESS_API const char* TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id); TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, - double margin); + double margin); #ifdef TESS_CAPI_INCLUDE_BASEAPI -TESS_API const TessDawg* TessBaseAPIGetDawg(const TessBaseAPI* handle, - int i); +TESS_API const TessDawg* TessBaseAPIGetDawg(const TessBaseAPI* handle, int i); TESS_API int TessBaseAPINumDawgs(const TessBaseAPI* handle); TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI* handle); TESS_API void TessBaseAPIInitTruthCallback(TessBaseAPI* handle, - TessTruthCallback cb); + TessTruthCallback cb); -TESS_API void TessBaseGetBlockTextOrientations( - TessBaseAPI* handle, int** block_orientation, bool** vertical_writing); +TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI* handle, + int** block_orientation, + bool** vertical_writing); #endif @@ -478,24 +440,24 @@ TESS_API void TessBaseGetBlockTextOrientations( TESS_API void TessPageIteratorDelete(TessPageIterator* handle); -TESS_API TessPageIterator* -TessPageIteratorCopy(const TessPageIterator* handle); +TESS_API TessPageIterator* TessPageIteratorCopy(const TessPageIterator* handle); TESS_API void TessPageIteratorBegin(TessPageIterator* handle); TESS_API BOOL TessPageIteratorNext(TessPageIterator* handle, - TessPageIteratorLevel level); + TessPageIteratorLevel level); -TESS_API BOOL TessPageIteratorIsAtBeginningOf( - const TessPageIterator* handle, TessPageIteratorLevel level); +TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle, + TessPageIteratorLevel level); -TESS_API BOOL TessPageIteratorIsAtFinalElement( - const TessPageIterator* handle, TessPageIteratorLevel level, - TessPageIteratorLevel element); +TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, + TessPageIteratorLevel level, + TessPageIteratorLevel element); -TESS_API BOOL TessPageIteratorBoundingBox( - const TessPageIterator* handle, TessPageIteratorLevel level, int* left, - int* top, int* right, int* bottom); +TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator* handle, + TessPageIteratorLevel level, + int* left, int* top, int* right, + int* bottom); TESS_API TessPolyBlockType TessPageIteratorBlockType(const TessPageIterator* handle); @@ -503,14 +465,15 @@ TessPageIteratorBlockType(const TessPageIterator* handle); TESS_API struct Pix* TessPageIteratorGetBinaryImage( const TessPageIterator* handle, TessPageIteratorLevel level); -TESS_API struct Pix* TessPageIteratorGetImage( - const TessPageIterator* handle, TessPageIteratorLevel level, int padding, - struct Pix* original_image, int* left, int* top); +TESS_API struct Pix* TessPageIteratorGetImage(const TessPageIterator* handle, + TessPageIteratorLevel level, + int padding, + struct Pix* original_image, + int* left, int* top); TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator* handle, - TessPageIteratorLevel level, - int* x1, int* y1, int* x2, - int* y2); + TessPageIteratorLevel level, int* x1, + int* y1, int* x2, int* y2); TESS_API void TessPageIteratorOrientation( TessPageIterator* handle, TessOrientation* orientation, @@ -524,23 +487,23 @@ TESS_API void TessPageIteratorParagraphInfo( /* Result iterator */ TESS_API void TessResultIteratorDelete(TessResultIterator* handle); -TESS_API TessResultIterator* -TessResultIteratorCopy(const TessResultIterator* handle); -TESS_API TessPageIterator* -TessResultIteratorGetPageIterator(TessResultIterator* handle); -TESS_API const TessPageIterator* -TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle); -TESS_API TessChoiceIterator* -TessResultIteratorGetChoiceIterator(const TessResultIterator* handle); +TESS_API TessResultIterator* TessResultIteratorCopy( + const TessResultIterator* handle); +TESS_API TessPageIterator* TessResultIteratorGetPageIterator( + TessResultIterator* handle); +TESS_API const TessPageIterator* TessResultIteratorGetPageIteratorConst( + const TessResultIterator* handle); +TESS_API TessChoiceIterator* TessResultIteratorGetChoiceIterator( + const TessResultIterator* handle); TESS_API BOOL TessResultIteratorNext(TessResultIterator* handle, - TessPageIteratorLevel level); -TESS_API char* TessResultIteratorGetUTF8Text( - const TessResultIterator* handle, TessPageIteratorLevel level); -TESS_API float TessResultIteratorConfidence( - const TessResultIterator* handle, TessPageIteratorLevel level); -TESS_API const char* -TessResultIteratorWordRecognitionLanguage(const TessResultIterator* handle); + TessPageIteratorLevel level); +TESS_API char* TessResultIteratorGetUTF8Text(const TessResultIterator* handle, + TessPageIteratorLevel level); +TESS_API float TessResultIteratorConfidence(const TessResultIterator* handle, + TessPageIteratorLevel level); +TESS_API const char* TessResultIteratorWordRecognitionLanguage( + const TessResultIterator* handle); TESS_API const char* TessResultIteratorWordFontAttributes( const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, BOOL* is_smallcaps, @@ -548,8 +511,7 @@ TESS_API const char* TessResultIteratorWordFontAttributes( TESS_API BOOL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle); -TESS_API BOOL -TessResultIteratorWordIsNumeric(const TessResultIterator* handle); +TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator* handle); TESS_API BOOL TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle); TESS_API BOOL @@ -559,53 +521,50 @@ TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle); TESS_API void TessChoiceIteratorDelete(TessChoiceIterator* handle); TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator* handle); -TESS_API const char* -TessChoiceIteratorGetUTF8Text(const TessChoiceIterator* handle); -TESS_API float -TessChoiceIteratorConfidence(const TessChoiceIterator* handle); +TESS_API const char* TessChoiceIteratorGetUTF8Text( + const TessChoiceIterator* handle); +TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator* handle); /* Progress monitor */ TESS_API ETEXT_DESC* TessMonitorCreate(); TESS_API void TessMonitorDelete(ETEXT_DESC* monitor); TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC* monitor, - TessCancelFunc cancelFunc); -TESS_API void TessMonitorSetCancelThis(ETEXT_DESC* monitor, - void* cancelThis); + TessCancelFunc cancelFunc); +TESS_API void TessMonitorSetCancelThis(ETEXT_DESC* monitor, void* cancelThis); TESS_API void* TessMonitorGetCancelThis(ETEXT_DESC* monitor); -TESS_API void -TessMonitorSetProgressFunc(ETEXT_DESC* monitor, TessProgressFunc progressFunc); +TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC* monitor, + TessProgressFunc progressFunc); TESS_API int TessMonitorGetProgress(ETEXT_DESC* monitor); -TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor, - int deadline); +TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC* monitor, int deadline); #ifndef DISABLED_LEGACY_ENGINE # ifdef TESS_CAPI_INCLUDE_BASEAPI TESS_API void TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, - TessFillLatticeFunc f); + TessFillLatticeFunc f); -TESS_API void TessBaseAPIGetFeaturesForBlob( - TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, - int* num_features, int* FeatureOutlineIndex); +TESS_API void TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, + INT_FEATURE_STRUCT* int_features, + int* num_features, + int* FeatureOutlineIndex); TESS_API ROW* TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, - int right, int bottom); + int right, int bottom); -TESS_API void TessBaseAPIRunAdaptiveClassifier( - TessBaseAPI* handle, TBLOB* blob, int num_max_matches, int* unichar_ids, - float* ratings, int* num_matches_returned); +TESS_API void TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, + int num_max_matches, + int* unichar_ids, float* ratings, + int* num_matches_returned); -TESS_API ROW* TessMakeTessOCRRow(float baseline, float xheight, - float descender, float ascender); +TESS_API ROW* TessMakeTessOCRRow(float baseline, float xheight, float descender, + float ascender); TESS_API TBLOB* TessMakeTBLOB(Pix* pix); -TESS_API void TessNormalizeTBLOB(TBLOB* tblob, ROW* row, - BOOL numeric_mode); +TESS_API void TessNormalizeTBLOB(TBLOB* tblob, ROW* row, BOOL numeric_mode); -TESS_API BLOCK_LIST* -TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle); +TESS_API BLOCK_LIST* TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle); TESS_API void TessDeleteBlockList(BLOCK_LIST* block_list); diff --git a/include/tesseract/genericvector.h b/include/tesseract/genericvector.h index 5f1d44c6c..61b0a22a6 100644 --- a/include/tesseract/genericvector.h +++ b/include/tesseract/genericvector.h @@ -21,11 +21,11 @@ #include #include -#include // for LONG_MAX -#include // for uint32_t +#include // for LONG_MAX +#include // for uint32_t #include #include -#include // for std::function +#include // for std::function #include "helpers.h" #include "serialis.h" @@ -173,8 +173,7 @@ class GenericVector { // Returns false on error or if the callback returns false. // DEPRECATED. Use [De]Serialize[Classes] instead. bool write(FILE* f, std::function cb) const; - bool read(tesseract::TFile* f, - std::function cb); + bool read(tesseract::TFile* f, std::function cb); // Writes a vector of simple types to the given file. Assumes that bitwise // read/write of T will work. Returns false in case of error. // TODO(rays) Change all callers to use TFile and remove deprecated methods. @@ -647,12 +646,12 @@ class GenericVectorEqEq : public GenericVector { GenericVectorEqEq() { using namespace std::placeholders; // for _1 GenericVector::set_compare_callback( - std::bind(tesseract::cmp_eq, _1, _2)); + std::bind(tesseract::cmp_eq, _1, _2)); } explicit GenericVectorEqEq(int size) : GenericVector(size) { using namespace std::placeholders; // for _1 GenericVector::set_compare_callback( - std::bind(tesseract::cmp_eq, _1, _2)); + std::bind(tesseract::cmp_eq, _1, _2)); } }; @@ -881,8 +880,8 @@ void GenericVector::delete_data_pointers() { } template -bool GenericVector::write( - FILE* f, std::function cb) const { +bool GenericVector::write(FILE* f, + std::function cb) const { if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) { return false; } @@ -904,8 +903,8 @@ bool GenericVector::write( } template -bool GenericVector::read( - tesseract::TFile* f, std::function cb) { +bool GenericVector::read(tesseract::TFile* f, + std::function cb) { int32_t reserved; if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) { return false; diff --git a/include/tesseract/ltrresultiterator.h b/include/tesseract/ltrresultiterator.h index 2a3c02c08..f9820ec37 100644 --- a/include/tesseract/ltrresultiterator.h +++ b/include/tesseract/ltrresultiterator.h @@ -46,6 +46,7 @@ class Tesseract; class TESS_API LTRResultIterator : public PageIterator { friend class ChoiceIterator; + public: // page_res and tesseract come directly from the BaseAPI. // The rectangle parameters are copied indirectly from the Thresholder, @@ -59,9 +60,8 @@ class TESS_API LTRResultIterator : public PageIterator { // The scaled_yres indicates the effective resolution of the binary image // that tesseract has been given by the Thresholder. // After the constructor, Begin has already been called. - LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, - int scale, int scaled_yres, - int rect_left, int rect_top, + LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale, + int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height); ~LTRResultIterator() override; @@ -84,10 +84,10 @@ class TESS_API LTRResultIterator : public PageIterator { char* GetUTF8Text(PageIteratorLevel level) const; // Set the string inserted at the end of each text line. "\n" by default. - void SetLineSeparator(const char *new_line); + void SetLineSeparator(const char* new_line); // Set the string inserted at the end of each paragraph. "\n" by default. - void SetParagraphSeparator(const char *new_para); + void SetParagraphSeparator(const char* new_para); // Returns the mean confidence of the current object at the given level. // The number should be interpreted as a percent probability. (0.0f-100.0f) @@ -107,14 +107,10 @@ class TESS_API LTRResultIterator : public PageIterator { // the iterator itself, ie rendered invalid by various members of // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. // Pointsize is returned in printers points (1/72 inch.) - const char* WordFontAttributes(bool* is_bold, - bool* is_italic, - bool* is_underlined, - bool* is_monospace, - bool* is_serif, - bool* is_smallcaps, - int* pointsize, - int* font_id) const; + const char* WordFontAttributes(bool* is_bold, bool* is_italic, + bool* is_underlined, bool* is_monospace, + bool* is_serif, bool* is_smallcaps, + int* pointsize, int* font_id) const; // Return the name of the language used to recognize this word. // On error, nullptr. Do not delete this pointer. @@ -137,22 +133,22 @@ class TESS_API LTRResultIterator : public PageIterator { // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle // of the current word. - const void *GetParamsTrainingBundle() const; + const void* GetParamsTrainingBundle() const; // Returns a pointer to the string with blamer information for this word. // Assumes that the word's blamer_bundle is not nullptr. - const char *GetBlamerDebug() const; + const char* GetBlamerDebug() const; // Returns a pointer to the string with misadaption information for this word. // Assumes that the word's blamer_bundle is not nullptr. - const char *GetBlamerMisadaptionDebug() const; + const char* GetBlamerMisadaptionDebug() const; // Returns true if a truth string was recorded for the current word. bool HasTruthString() const; // Returns true if the given string is equivalent to the truth string for // the current word. - bool EquivalentToTruth(const char *str) const; + bool EquivalentToTruth(const char* str) const; // Returns a null terminated UTF-8 encoded truth string for the current word. // Use delete [] to free after use. @@ -164,7 +160,7 @@ class TESS_API LTRResultIterator : public PageIterator { // Returns a pointer to serialized choice lattice. // Fills lattice_size with the number of bytes in lattice data. - const char *WordLattice(int *lattice_size) const; + const char* WordLattice(int* lattice_size) const; // ============= Functions that refer to symbols only ============. @@ -182,8 +178,8 @@ class TESS_API LTRResultIterator : public PageIterator { bool SymbolIsDropcap() const; protected: - const char *line_separator_; - const char *paragraph_separator_; + const char* line_separator_; + const char* paragraph_separator_; }; // Class to iterate over the classifier choices for a single RIL_SYMBOL. @@ -222,7 +218,7 @@ class ChoiceIterator { std::vector>>* Timesteps() const; private: - //clears the remaining spaces out of the results and adapt the probabilities + // clears the remaining spaces out of the results and adapt the probabilities void filterSpaces(); // Pointer to the WERD_RES object owned by the API. WERD_RES* word_res_; diff --git a/include/tesseract/osdetect.h b/include/tesseract/osdetect.h index c0864c404..a6333079f 100644 --- a/include/tesseract/osdetect.h +++ b/include/tesseract/osdetect.h @@ -28,7 +28,8 @@ class BLOB_CHOICE_LIST; class STRING; class TO_BLOCK_LIST; class UNICHARSET; -template class GenericVector; +template +class GenericVector; namespace tesseract { class Tesseract; @@ -38,8 +39,8 @@ class Tesseract; const int kMaxNumberOfScripts = 116 + 1 + 2 + 1; struct OSBestResult { - OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0), - oconfidence(0.0) {} + OSBestResult() + : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {} int orientation_id; int script_id; float sconfidence; @@ -49,8 +50,7 @@ struct OSBestResult { struct OSResults { OSResults() : unicharset(nullptr) { for (int i = 0; i < 4; ++i) { - for (int j = 0; j < kMaxNumberOfScripts; ++j) - scripts_na[i][j] = 0; + for (int j = 0; j < kMaxNumberOfScripts; ++j) scripts_na[i][j] = 0; orientations[i] = 0; } } @@ -87,6 +87,7 @@ class OrientationDetector { OSResults* results); bool detect_blob(BLOB_CHOICE_LIST* scores); int get_orientation(); + private: OSResults* osr_; const GenericVector* allowed_scripts_; @@ -94,10 +95,11 @@ class OrientationDetector { class ScriptDetector { public: - ScriptDetector(const GenericVector* allowed_scripts, - OSResults* osr, tesseract::Tesseract* tess); + ScriptDetector(const GenericVector* allowed_scripts, OSResults* osr, + tesseract::Tesseract* tess); void detect_blob(BLOB_CHOICE_LIST* scores); bool must_stop(int orientation); + private: OSResults* osr_; static const char* korean_script_; @@ -115,22 +117,18 @@ class ScriptDetector { const GenericVector* allowed_scripts_; }; -int orientation_and_script_detection(STRING& filename, - OSResults*, +int orientation_and_script_detection(STRING& filename, OSResults*, tesseract::Tesseract*); -int os_detect(TO_BLOCK_LIST* port_blocks, - OSResults* osr, +int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr, tesseract::Tesseract* tess); int os_detect_blobs(const GenericVector* allowed_scripts, - BLOBNBOX_CLIST* blob_list, - OSResults* osr, + BLOBNBOX_CLIST* blob_list, OSResults* osr, tesseract::Tesseract* tess); -bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, - ScriptDetector* s, OSResults*, - tesseract::Tesseract* tess); +bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o, ScriptDetector* s, + OSResults*, tesseract::Tesseract* tess); // Helper method to convert an orientation index to its value in degrees. // The value represents the amount of clockwise rotation in degrees that must be diff --git a/include/tesseract/pageiterator.h b/include/tesseract/pageiterator.h index 53581428e..d830cd04f 100644 --- a/include/tesseract/pageiterator.h +++ b/include/tesseract/pageiterator.h @@ -65,10 +65,9 @@ class TESS_API PageIterator { * that tesseract has been given by the Thresholder. * After the constructor, Begin has already been called. */ - PageIterator(PAGE_RES* page_res, Tesseract* tesseract, - int scale, int scaled_yres, - int rect_left, int rect_top, - int rect_width, int rect_height); + PageIterator(PAGE_RES* page_res, Tesseract* tesseract, int scale, + int scaled_yres, int rect_left, int rect_top, int rect_width, + int rect_height); virtual ~PageIterator(); /** @@ -164,7 +163,7 @@ class TESS_API PageIterator { * equal to other: 0 * after other: 1 */ - int Cmp(const PageIterator &other) const; + int Cmp(const PageIterator& other) const; // ============= Accessing data ==============. // Coordinate system: @@ -203,17 +202,17 @@ class TESS_API PageIterator { * from a grey image. The padding argument to GetImage can be used to expand * the image to include more foreground pixels. See GetImage below. */ - bool BoundingBox(PageIteratorLevel level, - int* left, int* top, int* right, int* bottom) const; - bool BoundingBox(PageIteratorLevel level, int padding, - int* left, int* top, int* right, int* bottom) const; + bool BoundingBox(PageIteratorLevel level, int* left, int* top, int* right, + int* bottom) const; + bool BoundingBox(PageIteratorLevel level, int padding, int* left, int* top, + int* right, int* bottom) const; /** * Returns the bounding rectangle of the object in a coordinate system of the * working image rectangle having its origin at (rect_left_, rect_top_) with * respect to the original image and is scaled by a factor scale_. */ - bool BoundingBoxInternal(PageIteratorLevel level, - int* left, int* top, int* right, int* bottom) const; + bool BoundingBoxInternal(PageIteratorLevel level, int* left, int* top, + int* right, int* bottom) const; /** Returns whether there is no object of a given level. */ bool Empty(PageIteratorLevel level) const; @@ -261,8 +260,8 @@ class TESS_API PageIterator { * WARNING: with vertical text, baselines may be vertical! * Returns false if there is no baseline at the current position. */ - bool Baseline(PageIteratorLevel level, - int* x1, int* y1, int* x2, int* y2) const; + bool Baseline(PageIteratorLevel level, int* x1, int* y1, int* x2, + int* y2) const; /** * Returns orientation for the block the iterator points to. @@ -272,10 +271,10 @@ class TESS_API PageIterator { * block anti-clockwise for it to be level? * -Pi/4 <= deskew_angle <= Pi/4 */ - void Orientation(tesseract::Orientation *orientation, - tesseract::WritingDirection *writing_direction, - tesseract::TextlineOrder *textline_order, - float *deskew_angle) const; + void Orientation(tesseract::Orientation* orientation, + tesseract::WritingDirection* writing_direction, + tesseract::TextlineOrder* textline_order, + float* deskew_angle) const; /** * Returns information about the current paragraph, if available. @@ -305,16 +304,15 @@ class TESS_API PageIterator { * first_line_indent for subsequent paragraphs in this block * of text. */ - void ParagraphInfo(tesseract::ParagraphJustification *justification, - bool *is_list_item, - bool *is_crown, - int *first_line_indent) const; + void ParagraphInfo(tesseract::ParagraphJustification* justification, + bool* is_list_item, bool* is_crown, + int* first_line_indent) const; // If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle // of the current word to the given pointer (takes ownership of the pointer) // and returns true. // Can only be used when iterating on the word level. - bool SetWordBlamerBundle(BlamerBundle *blamer_bundle); + bool SetWordBlamerBundle(BlamerBundle* blamer_bundle); protected: /** diff --git a/include/tesseract/publictypes.h b/include/tesseract/publictypes.h index 0901d8d66..2a879658c 100644 --- a/include/tesseract/publictypes.h +++ b/include/tesseract/publictypes.h @@ -48,23 +48,23 @@ constexpr int kResolutionEstimationFactor = 10; * Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions * below, as well as kPolyBlockNames in layout_test.cc. * Used extensively by ColPartition, and POLY_BLOCK. -*/ + */ enum PolyBlockType { - PT_UNKNOWN, // Type is not yet known. Keep as the first element. - PT_FLOWING_TEXT, // Text that lives inside a column. - PT_HEADING_TEXT, // Text that spans more than one column. - PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region. - PT_EQUATION, // Partition belonging to an equation region. + PT_UNKNOWN, // Type is not yet known. Keep as the first element. + PT_FLOWING_TEXT, // Text that lives inside a column. + PT_HEADING_TEXT, // Text that spans more than one column. + PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region. + PT_EQUATION, // Partition belonging to an equation region. PT_INLINE_EQUATION, // Partition has inline equation. - PT_TABLE, // Partition belonging to a table region. - PT_VERTICAL_TEXT, // Text-line runs vertically. - PT_CAPTION_TEXT, // Text that belongs to an image. - PT_FLOWING_IMAGE, // Image that lives inside a column. - PT_HEADING_IMAGE, // Image that spans more than one column. - PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region. - PT_HORZ_LINE, // Horizontal Line. - PT_VERT_LINE, // Vertical Line. - PT_NOISE, // Lies outside of any column. + PT_TABLE, // Partition belonging to a table region. + PT_VERTICAL_TEXT, // Text-line runs vertically. + PT_CAPTION_TEXT, // Text that belongs to an image. + PT_FLOWING_IMAGE, // Image that lives inside a column. + PT_HEADING_IMAGE, // Image that spans more than one column. + PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region. + PT_HORZ_LINE, // Horizontal Line. + PT_VERT_LINE, // Vertical Line. + PT_NOISE, // Lies outside of any column. PT_COUNT }; @@ -127,7 +127,7 @@ enum Orientation { * * For English text, the writing direction is left-to-right. For the * Chinese text in the above example, the writing direction is top-to-bottom. -*/ + */ enum WritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT = 0, WRITING_DIRECTION_RIGHT_TO_LEFT = 1, @@ -144,7 +144,7 @@ enum WritingDirection { * * Note that only some combinations make sense. For example, * WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM -*/ + */ enum TextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT = 0, TEXTLINE_ORDER_RIGHT_TO_LEFT = 1, @@ -155,27 +155,28 @@ enum TextlineOrder { * Possible modes for page layout analysis. These *must* be kept in order * of decreasing amount of layout analysis to be done, except for OSD_ONLY, * so that the inequality test macros below work. -*/ + */ enum PageSegMode { PSM_OSD_ONLY = 0, ///< Orientation and script detection only. PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and - ///< script detection. (OSD) + ///< script detection. (OSD) PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR. PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD. PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes. - PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of vertically - ///< aligned text. - PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.) - PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line. - PSM_SINGLE_WORD = 8, ///< Treat the image as a single word. - PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle. - PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character. - PSM_SPARSE_TEXT = 11, ///< Find as much text as possible in no particular order. + PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of + ///< vertically aligned text. + PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.) + PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line. + PSM_SINGLE_WORD = 8, ///< Treat the image as a single word. + PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle. + PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character. + PSM_SPARSE_TEXT = + 11, ///< Find as much text as possible in no particular order. PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det. - PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing + PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing ///< hacks that are Tesseract-specific. - PSM_COUNT ///< Number of enum entries. + PSM_COUNT ///< Number of enum entries. }; /** @@ -183,7 +184,7 @@ enum PageSegMode { * layout analysis are enabled. * *Depend critically on the order of elements of PageSegMode.* * NOTE that arg is an int for compatibility with INT_PARAM. -*/ + */ inline bool PSM_OSD_ENABLED(int pageseg_mode) { return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD; } @@ -204,14 +205,14 @@ inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) { } inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) { return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) || - pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD; + pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD; } /** * enum of the elements of the page hierarchy, used in ResultIterator * to provide functions that operate on each level without having to * have 5x as many functions. -*/ + */ enum PageIteratorLevel { RIL_BLOCK, // Block of text/image/separator line. RIL_PARA, // Paragraph within a block. @@ -260,7 +261,7 @@ enum ParagraphJustification { * appropriate changes to all the enums mirroring it (e.g. OCREngine in * cityblock/workflow/detection/detection_storage.proto). Such enums will * mention the connection to OcrEngineMode in the comments. -*/ + */ enum OcrEngineMode { OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated OEM_LSTM_ONLY, // Run just the LSTM line recognizer. diff --git a/include/tesseract/renderer.h b/include/tesseract/renderer.h index 177fd5ff9..d240d9197 100644 --- a/include/tesseract/renderer.h +++ b/include/tesseract/renderer.h @@ -22,9 +22,10 @@ // complexity of includes here. Use forward declarations wherever possible // and hide includes of complex types in baseapi.cpp. #include // for std::string + #include "genericvector.h" #include "platform.h" -#include "strngs.h" // for STRING +#include "strngs.h" // for STRING struct Pix; diff --git a/include/tesseract/resultiterator.h b/include/tesseract/resultiterator.h index 445305b72..440855385 100644 --- a/include/tesseract/resultiterator.h +++ b/include/tesseract/resultiterator.h @@ -22,15 +22,18 @@ #ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_ #define TESSERACT_CCMAIN_RESULT_ITERATOR_H_ -#include // for std::pair -#include // for std::vector +#include // for std::pair +#include // for std::vector + #include "ltrresultiterator.h" // for LTRResultIterator #include "platform.h" // for TESS_API, TESS_LOCAL #include "publictypes.h" // for PageIteratorLevel #include "unichar.h" // for StrongScriptDirection -template class GenericVector; -template class GenericVectorEqEq; +template +class GenericVector; +template +class GenericVectorEqEq; class STRING; @@ -40,7 +43,7 @@ class Tesseract; class TESS_API ResultIterator : public LTRResultIterator { public: - static ResultIterator *StartOfParagraph(const LTRResultIterator &resit); + static ResultIterator* StartOfParagraph(const LTRResultIterator& resit); /** * ResultIterator is copy constructible! @@ -81,9 +84,9 @@ class TESS_API ResultIterator : public LTRResultIterator { * Implement PageIterator's IsAtFinalElement correctly in a BiDi context. * For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we * point at the last word in a paragraph. See PageIterator for full comment. - */ + */ bool IsAtFinalElement(PageIteratorLevel level, - PageIteratorLevel element) const override; + PageIteratorLevel element) const override; // ============= Functions that refer to words only ============. // Returns the number of blanks before the current word. @@ -94,21 +97,21 @@ class TESS_API ResultIterator : public LTRResultIterator { /** * Returns the null terminated UTF-8 encoded text string for the current * object at the given level. Use delete [] to free after use. - */ + */ virtual char* GetUTF8Text(PageIteratorLevel level) const; /** * Returns the LSTM choices for every LSTM timestep for the current word. - */ + */ virtual std::vector>>>* GetRawLSTMTimesteps() const; virtual std::vector>>* - GetBestLSTMSymbolChoices() const; + GetBestLSTMSymbolChoices() const; /** * Return whether the current paragraph's dominant reading direction * is left-to-right (as opposed to right-to-left). - */ + */ bool ParagraphIsLtr() const; // ============= Exposed only for testing =============. @@ -137,8 +140,8 @@ class TESS_API ResultIterator : public LTRResultIterator { */ static void CalculateTextlineOrder( bool paragraph_is_ltr, - const GenericVector &word_dirs, - GenericVectorEqEq *reading_order); + const GenericVector& word_dirs, + GenericVectorEqEq* reading_order); static const int kMinorRunStart; static const int kMinorRunEnd; @@ -151,7 +154,7 @@ class TESS_API ResultIterator : public LTRResultIterator { * it resets to the beginning of the paragraph instead of staying wherever * resit might have pointed. */ - TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit); + TESS_LOCAL explicit ResultIterator(const LTRResultIterator& resit); private: /** @@ -172,13 +175,13 @@ class TESS_API ResultIterator : public LTRResultIterator { * right-to-left characters and was treated as neutral. */ void CalculateTextlineOrder(bool paragraph_is_ltr, - const LTRResultIterator &resit, - GenericVectorEqEq *indices) const; + const LTRResultIterator& resit, + GenericVectorEqEq* indices) const; /** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */ void CalculateTextlineOrder(bool paragraph_is_ltr, - const LTRResultIterator &resit, - GenericVector *ssd, - GenericVectorEqEq *indices) const; + const LTRResultIterator& resit, + GenericVector* ssd, + GenericVectorEqEq* indices) const; /** * What is the index of the current word in a strict left-to-right reading @@ -190,7 +193,7 @@ class TESS_API ResultIterator : public LTRResultIterator { * Given an iterator pointing at a word, returns the logical reading order * of blob indices for the word. */ - void CalculateBlobOrder(GenericVector *blob_indices) const; + void CalculateBlobOrder(GenericVector* blob_indices) const; /** Precondition: current_paragraph_is_ltr_ is set. */ void MoveToLogicalStartOfTextline(); @@ -211,10 +214,10 @@ class TESS_API ResultIterator : public LTRResultIterator { * Append any extra marks that should be appended to this word when printed. * Mostly, these are Unicode BiDi control characters. */ - void AppendSuffixMarks(STRING *text) const; + void AppendSuffixMarks(STRING* text) const; /** Appends the current word in reading order to the given buffer.*/ - void AppendUTF8WordText(STRING *text) const; + void AppendUTF8WordText(STRING* text) const; /** * Appends the text of the current text line, *assuming this iterator is @@ -223,7 +226,7 @@ class TESS_API ResultIterator : public LTRResultIterator { * Each textline is terminated in a single newline character. * If the textline ends a paragraph, it gets a second terminal newline. */ - void IterateAndAppendUTF8TextlineText(STRING *text); + void IterateAndAppendUTF8TextlineText(STRING* text); /** * Appends the text of the current paragraph in reading order @@ -231,7 +234,7 @@ class TESS_API ResultIterator : public LTRResultIterator { * Each textline is terminated in a single newline character, and the * paragraph gets an extra newline at the end. */ - void AppendUTF8ParagraphText(STRING *text) const; + void AppendUTF8ParagraphText(STRING* text) const; /** Returns whether the bidi_debug flag is set to at least min_level. */ bool BidiDebug(int min_level) const; diff --git a/include/tesseract/serialis.h b/include/tesseract/serialis.h index 076d28e76..25cb981d3 100644 --- a/include/tesseract/serialis.h +++ b/include/tesseract/serialis.h @@ -49,7 +49,8 @@ constexpr size_t countof(T const (&)[N]) noexcept { using FileReader = bool (*)(const char* filename, GenericVector* data); // Function to write a GenericVector to a whole file. // Returns false on failure. -using FileWriter = bool (*)(const GenericVector& data, const char* filename); +using FileWriter = bool (*)(const GenericVector& data, + const char* filename); // Deserialize data from file. bool DeSerialize(FILE* fp, char* data, size_t n = 1); diff --git a/include/tesseract/strngs.h b/include/tesseract/strngs.h index f0af9cd68..5d93ca78c 100644 --- a/include/tesseract/strngs.h +++ b/include/tesseract/strngs.h @@ -19,10 +19,11 @@ #ifndef STRNGS_H #define STRNGS_H -#include // for assert -#include // for uint32_t -#include // for FILE -#include // for strncpy +#include // for assert +#include // for uint32_t +#include // for FILE +#include // for strncpy + #include "platform.h" // for TESS_API namespace tesseract { diff --git a/include/tesseract/thresholder.h b/include/tesseract/thresholder.h index b63f51a2a..c7c5c41e0 100644 --- a/include/tesseract/thresholder.h +++ b/include/tesseract/thresholder.h @@ -150,8 +150,8 @@ class TESS_API ImageThresholder { /// Return true if we are processing the full image. bool IsFullImage() const { - return rect_left_ == 0 && rect_top_ == 0 && - rect_width_ == image_width_ && rect_height_ == image_height_; + return rect_left_ == 0 && rect_top_ == 0 && rect_width_ == image_width_ && + rect_height_ == image_height_; } // Otsu thresholds the rectangle, taking the rectangle from *this. @@ -161,27 +161,26 @@ class TESS_API ImageThresholder { /// from the class, using thresholds/hi_values to the output pix. /// NOTE that num_channels is the size of the thresholds and hi_values // arrays and also the bytes per pixel in src_pix. - void ThresholdRectToPix(Pix* src_pix, int num_channels, - const int* thresholds, const int* hi_values, - Pix** pix) const; + void ThresholdRectToPix(Pix* src_pix, int num_channels, const int* thresholds, + const int* hi_values, Pix** pix) const; protected: /// Clone or other copy of the source Pix. /// The pix will always be PixDestroy()ed on destruction of the class. - Pix* pix_; + Pix* pix_; - int image_width_; ///< Width of source pix_. - int image_height_; ///< Height of source pix_. - int pix_channels_; ///< Number of 8-bit channels in pix_. - int pix_wpl_; ///< Words per line of pix_. + int image_width_; ///< Width of source pix_. + int image_height_; ///< Height of source pix_. + int pix_channels_; ///< Number of 8-bit channels in pix_. + int pix_wpl_; ///< Words per line of pix_. // Limits of image rectangle to be processed. - int scale_; ///< Scale factor from original image. - int yres_; ///< y pixels/inch in source image. - int estimated_res_; ///< Resolution estimate from text size. - int rect_left_; - int rect_top_; - int rect_width_; - int rect_height_; + int scale_; ///< Scale factor from original image. + int yres_; ///< y pixels/inch in source image. + int estimated_res_; ///< Resolution estimate from text size. + int rect_left_; + int rect_top_; + int rect_width_; + int rect_height_; }; } // namespace tesseract. diff --git a/include/tesseract/unichar.h b/include/tesseract/unichar.h index 5daca9506..aa1d5dc8c 100644 --- a/include/tesseract/unichar.h +++ b/include/tesseract/unichar.h @@ -20,9 +20,11 @@ #define TESSERACT_CCUTIL_UNICHAR_H_ #include + #include #include #include + #include "platform.h" // Maximum number of characters that can be stored in a UNICHAR. Must be