From c86325e2f7affaff287ed143d42a19e204f19a44 Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Thu, 31 Dec 2020 16:31:10 +0300 Subject: [PATCH] Use TESS_API for every public symbol. Public symbol is exported from the library. This also applies to unit test and training symbols. Users will be limited to public api, but set of exported symbols will be wider still. Remove TESS_LOCAL. Fix several symbol issues that made visible with these changes. All build systems must set -fvisibility-hidden for *nix systems. --- include/tesseract/baseapi.h | 12 +++--- include/tesseract/ltrresultiterator.h | 2 +- include/tesseract/pageiterator.h | 2 +- include/tesseract/platform.h | 24 ++++------- include/tesseract/resultiterator.h | 2 +- include/tesseract/unichar.h | 4 +- src/arch/intsimdmatrix.h | 10 ++--- src/ccmain/equationdetect.h | 2 +- src/ccmain/mutableiterator.h | 2 +- src/ccmain/paragraphs.h | 2 + src/ccmain/paragraphs_internal.h | 3 ++ src/ccmain/tesseractclass.h | 2 +- src/ccstruct/boxread.h | 6 +++ src/ccstruct/ccstruct.h | 2 +- src/ccstruct/fontinfo.h | 8 ++++ src/ccstruct/imagedata.h | 14 +++++- src/ccstruct/linlsq.h | 2 +- src/ccstruct/normalis.h | 2 +- src/ccstruct/ocrblock.h | 5 ++- src/ccstruct/ocrpara.h | 2 +- src/ccstruct/pageres.h | 4 +- src/ccstruct/points.h | 3 +- src/ccstruct/polyblk.h | 2 +- src/ccstruct/ratngs.h | 2 +- src/ccstruct/rect.h | 2 +- src/ccstruct/statistc.h | 2 +- src/ccstruct/stepblob.h | 2 +- src/ccstruct/werd.h | 2 +- src/ccutil/bitvector.h | 2 +- src/ccutil/ccutil.h | 2 +- src/ccutil/clst.h | 4 +- src/ccutil/elst.h | 6 +-- src/ccutil/elst2.h | 6 +-- src/ccutil/genericvector.h | 4 +- src/ccutil/indexmapbidi.h | 4 +- src/ccutil/params.h | 3 +- src/ccutil/scanutils.h | 1 + src/ccutil/serialis.h | 4 +- src/ccutil/tessdatamanager.h | 3 +- src/ccutil/unicharcompress.h | 2 +- src/ccutil/unicharset.h | 12 +++--- src/classify/classify.h | 2 +- src/classify/cluster.h | 6 +++ src/classify/clusttool.h | 2 + src/classify/featdefs.h | 4 ++ src/classify/intfeaturespace.h | 2 +- src/classify/intfx.h | 18 ++++---- src/classify/intproto.h | 4 ++ src/classify/kdtree.h | 1 + src/classify/ocrfeatures.h | 3 ++ src/classify/protos.h | 6 +++ src/classify/shapeclassifier.h | 2 +- src/classify/shapetable.h | 4 +- src/classify/tessclassifier.h | 2 +- src/classify/trainingsample.cpp | 19 +++----- src/classify/trainingsample.h | 10 +++-- src/cutil/emalloc.h | 3 ++ src/cutil/oldlist.h | 3 ++ src/dict/dawg.h | 4 +- src/dict/dict.h | 4 +- src/dict/trie.h | 2 +- src/lstm/convolve.h | 1 + src/lstm/fullyconnected.h | 1 + src/lstm/input.h | 2 + src/lstm/lstm.h | 1 + src/lstm/lstmrecognizer.h | 2 +- src/lstm/maxpool.h | 1 + src/lstm/network.h | 1 + src/lstm/networkio.h | 2 +- src/lstm/parallel.h | 1 + src/lstm/recodebeam.h | 2 +- src/lstm/reconfig.h | 1 + src/lstm/reversed.h | 2 + src/lstm/series.h | 3 ++ src/textord/alignedblob.h | 2 +- src/textord/bbgrid.h | 2 +- src/textord/blobgrid.h | 2 +- src/textord/colfind.h | 2 +- src/textord/colpartition.h | 2 +- src/textord/colpartitiongrid.h | 2 +- src/textord/equationdetectbase.h | 2 +- src/textord/tabfind.h | 2 +- src/textord/tablefind.h | 2 +- src/textord/tablerecog.h | 4 +- src/textord/textlineprojection.h | 2 +- src/training/commandlineflags.h | 11 +++++ src/training/commontraining.cpp | 7 ++- src/training/commontraining.h | 55 +++++++++++------------- src/training/ctc.h | 2 +- src/training/fileio.cpp | 1 - src/training/fileio.h | 6 +-- src/training/intfeaturemap.h | 2 +- src/training/lang_model_helpers.cpp | 16 ++++--- src/training/lang_model_helpers.h | 3 ++ src/training/ligature_table.h | 2 +- src/training/lstmtester.h | 2 +- src/training/lstmtrainer.h | 2 +- src/training/mastertrainer.h | 2 +- src/training/networkbuilder.h | 2 +- src/training/normstrngs.h | 12 ++++++ src/training/pango_font_info.h | 4 +- src/training/sampleiterator.cpp | 16 ++++++- src/training/stringrenderer.h | 2 +- src/training/tessopt.cpp | 11 ++--- src/training/tessopt.h | 11 +++-- src/training/tlog.h | 1 + src/training/trainingsampleset.h | 1 - src/training/unicharset_training_utils.h | 3 ++ src/training/validator.h | 2 +- src/viewer/scrollview.h | 3 +- src/wordrec/params_model.h | 2 +- src/wordrec/wordrec.h | 4 +- sw.cpp | 27 ++++++------ unittest/ligature_table_test.cc | 3 -- unittest/pango_font_info_test.cc | 4 -- unittest/stringrenderer_test.cc | 5 --- 116 files changed, 325 insertions(+), 224 deletions(-) diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index e86ba571b..b3755142e 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -736,7 +736,7 @@ class TESS_API TessBaseAPI { protected: /** Common code for setting the image. Returns true if Init has been called. */ - TESS_LOCAL bool InternalSetImage(); + bool InternalSetImage(); /** * Run the thresholder to make the thresholded image. If pix is not nullptr, @@ -748,7 +748,7 @@ class TESS_API TessBaseAPI { * Find lines from the image making the BLOCK_LIST. * @return 0 on success. */ - TESS_LOCAL int FindLines(); + int FindLines(); /** Delete the pageres and block list ready for a new page. */ void ClearResults(); @@ -758,7 +758,7 @@ class TESS_API TessBaseAPI { * to ignore all BiDi smarts at that point. * delete once you're done with it. */ - TESS_LOCAL LTRResultIterator* GetLTRIterator(); + LTRResultIterator* GetLTRIterator(); /** * Return the length of the output text string, as UTF8, assuming @@ -766,12 +766,12 @@ class TESS_API TessBaseAPI { * and assuming a single character reject marker for each rejected character. * Also return the number of recognized blobs in blob_count. */ - TESS_LOCAL int TextLength(int* blob_count); + int TextLength(int* blob_count); //// paragraphs.cpp //////////////////////////////////////////////////// - TESS_LOCAL void DetectParagraphs(bool after_text_recognition); + void DetectParagraphs(bool after_text_recognition); - TESS_LOCAL const PAGE_RES* GetPageRes() const { + const PAGE_RES* GetPageRes() const { return page_res_; } diff --git a/include/tesseract/ltrresultiterator.h b/include/tesseract/ltrresultiterator.h index 64c82dc10..56a30a4f4 100644 --- a/include/tesseract/ltrresultiterator.h +++ b/include/tesseract/ltrresultiterator.h @@ -183,7 +183,7 @@ class TESS_API LTRResultIterator : public PageIterator { }; // Class to iterate over the classifier choices for a single RIL_SYMBOL. -class ChoiceIterator { +class TESS_API ChoiceIterator { public: // Construction is from a LTRResultIterator that points to the symbol of // interest. The ChoiceIterator allows a one-shot iteration over the diff --git a/include/tesseract/pageiterator.h b/include/tesseract/pageiterator.h index 436be63cd..92076cc88 100644 --- a/include/tesseract/pageiterator.h +++ b/include/tesseract/pageiterator.h @@ -319,7 +319,7 @@ class TESS_API PageIterator { * Sets up the internal data for iterating the blobs of a new word, then * moves the iterator to the given offset. */ - TESS_LOCAL void BeginWord(int offset); + void BeginWord(int offset); /** Pointer to the page_res owned by the API. */ PAGE_RES* page_res_; diff --git a/include/tesseract/platform.h b/include/tesseract/platform.h index 51ae2963f..0f54fef8a 100644 --- a/include/tesseract/platform.h +++ b/include/tesseract/platform.h @@ -15,10 +15,11 @@ // /////////////////////////////////////////////////////////////////////// -#ifndef TESSERACT_CCUTIL_PLATFORM_H_ -#define TESSERACT_CCUTIL_PLATFORM_H_ +#ifndef TESSERACT_PLATFORM_H_ +#define TESSERACT_PLATFORM_H_ -#if defined(_WIN32) || defined(__CYGWIN__) +#ifndef TESS_API +# if defined(_WIN32) || defined(__CYGWIN__) # if defined(TESS_EXPORTS) # define TESS_API __declspec(dllexport) # elif defined(TESS_IMPORTS) @@ -26,20 +27,11 @@ # else # define TESS_API # endif -# define TESS_LOCAL -#else -# if __GNUC__ >= 4 -# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS) +# else +# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS) # define TESS_API __attribute__((visibility("default"))) -# define TESS_LOCAL __attribute__((visibility("hidden"))) -# else -# define TESS_API -# define TESS_LOCAL -# endif -# else -# define TESS_API -# define TESS_LOCAL # endif +# endif #endif -#endif // TESSERACT_CCUTIL_PLATFORM_H_ +#endif // TESSERACT_PLATFORM_H_ diff --git a/include/tesseract/resultiterator.h b/include/tesseract/resultiterator.h index 0f0321d7b..fc3207432 100644 --- a/include/tesseract/resultiterator.h +++ b/include/tesseract/resultiterator.h @@ -144,7 +144,7 @@ class TESS_API ResultIterator : public LTRResultIterator { * it resets to the beginning of the paragraph instead of staying wherever * resit might have pointed. */ - TESS_LOCAL explicit ResultIterator(const LTRResultIterator& resit); + explicit ResultIterator(const LTRResultIterator& resit); private: /** diff --git a/include/tesseract/unichar.h b/include/tesseract/unichar.h index 5202173bc..9e97e3d4f 100644 --- a/include/tesseract/unichar.h +++ b/include/tesseract/unichar.h @@ -55,7 +55,7 @@ using char32 = signed int; // a single Unicode character (stored as between 1 and 4 utf8 bytes) or // multiple Unicode characters representing the NFKC expansion of a ligature // such as fi, ffl etc. These are also stored as utf8. -class UNICHAR { +class TESS_API UNICHAR { public: UNICHAR() { memset(chars, 0, UNICHAR_LEN); @@ -105,7 +105,7 @@ class UNICHAR { // int char_len = it.get_utf8(buf); buf[char_len] = '\0'; // tprintf("Char = %s\n", buf); // } - class const_iterator { + class TESS_API const_iterator { using CI = const_iterator; public: diff --git a/src/arch/intsimdmatrix.h b/src/arch/intsimdmatrix.h index f8f13ea8c..b130f5f1d 100644 --- a/src/arch/intsimdmatrix.h +++ b/src/arch/intsimdmatrix.h @@ -61,7 +61,7 @@ class GenericVector; // NOTE that, although the subclasses execute on different SIMD hardware, no // virtual methods are needed, as the constructor sets up everything that // is required to allow the base class implementation to do all the work. -struct IntSimdMatrix { +struct TESS_API IntSimdMatrix { // Computes a reshaped copy of the weight matrix w. void Init(const GENERIC_2D_ARRAY& w, std::vector& shaped_w, @@ -115,12 +115,12 @@ struct IntSimdMatrix { // Number of groups of inputs to be broadcast. // num_input_groups_ = num_inputs_per_register_ / num_inputs_per_group_ - static TESS_API const IntSimdMatrix* intSimdMatrix; + static const IntSimdMatrix* intSimdMatrix; // Only available with NEON. - static TESS_API const IntSimdMatrix intSimdMatrixNEON; + static const IntSimdMatrix intSimdMatrixNEON; // Only available with AVX2 / SSE. - static TESS_API const IntSimdMatrix intSimdMatrixAVX2; - static TESS_API const IntSimdMatrix intSimdMatrixSSE; + static const IntSimdMatrix intSimdMatrixAVX2; + static const IntSimdMatrix intSimdMatrixSSE; }; } // namespace tesseract diff --git a/src/ccmain/equationdetect.h b/src/ccmain/equationdetect.h index 425e46d62..ffa418fee 100644 --- a/src/ccmain/equationdetect.h +++ b/src/ccmain/equationdetect.h @@ -35,7 +35,7 @@ class ColPartition; class ColPartitionGrid; class ColPartitionSet; -class EquationDetect : public EquationDetectBase { +class TESS_API EquationDetect : public EquationDetectBase { public: EquationDetect(const char* equ_datapath, const char* equ_language); diff --git a/src/ccmain/mutableiterator.h b/src/ccmain/mutableiterator.h index 2e6f51bc1..de3a36120 100644 --- a/src/ccmain/mutableiterator.h +++ b/src/ccmain/mutableiterator.h @@ -40,7 +40,7 @@ class Tesseract; // ResultIterator adds text-specific methods for access to OCR output. // MutableIterator adds access to internal data structures. -class MutableIterator : public ResultIterator { +class TESS_API MutableIterator : public ResultIterator { public: // See argument descriptions in ResultIterator() MutableIterator(PAGE_RES* page_res, Tesseract* tesseract, diff --git a/src/ccmain/paragraphs.h b/src/ccmain/paragraphs.h index 1e6e3d78d..edf9b8ccf 100644 --- a/src/ccmain/paragraphs.h +++ b/src/ccmain/paragraphs.h @@ -87,6 +87,7 @@ class RowInfo { // paragraphs - this is the actual list of PARA objects. // models - the list of paragraph models referenced by the PARA objects. // caller is responsible for deleting the models. +TESS_API void DetectParagraphs(int debug_level, std::vector *row_infos, GenericVector *row_owners, @@ -98,6 +99,7 @@ void DetectParagraphs(int debug_level, // saving the ParagraphModels in models. Caller owns the models. // We use unicharset during the function to answer questions such as "is the // first letter of this word upper case?" +TESS_API void DetectParagraphs(int debug_level, bool after_text_recognition, const MutableIterator *block_start, diff --git a/src/ccmain/paragraphs_internal.h b/src/ccmain/paragraphs_internal.h index 173c6ea17..0a780a90a 100644 --- a/src/ccmain/paragraphs_internal.h +++ b/src/ccmain/paragraphs_internal.h @@ -31,6 +31,7 @@ class UNICHARSET; class WERD_CHOICE; // Return whether the given word is likely to be a list item start word. +TESS_API bool AsciiLikelyListItem(const STRING &word); // Return the first Unicode Codepoint from werd[pos]. @@ -38,11 +39,13 @@ int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos); // Set right word attributes given either a unicharset and werd or a utf8 // string. +TESS_API void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea); // Set left word attributes given either a unicharset and werd or a utf8 string. +TESS_API void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea); diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 21cf64222..159b0ea7a 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -175,7 +175,7 @@ struct WordData { using WordRecognizer = void (Tesseract::*)(const WordData&, WERD_RES**, PointerVector*); -class Tesseract : public Wordrec { +class TESS_API Tesseract : public Wordrec { public: Tesseract(); ~Tesseract() override; diff --git a/src/ccstruct/boxread.h b/src/ccstruct/boxread.h index 3843fb688..6a27d7aaf 100644 --- a/src/ccstruct/boxread.h +++ b/src/ccstruct/boxread.h @@ -32,6 +32,7 @@ const int kBoxReadBufSize = 1024; // Open the boxfile based on the given image filename. // Returns nullptr if the box file cannot be opened. +TESS_API FILE* OpenBoxFile(const char* filename); // Reads all boxes from the given filename. @@ -51,6 +52,7 @@ bool ReadAllBoxes(int target_page, bool skip_blanks, const char* filename, // continue_on_failure allows reading to continue even if an invalid box is // encountered and will return true if it succeeds in reading some boxes. // It otherwise gives up and returns false on encountering an invalid box. +TESS_API bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data, bool continue_on_failure, std::vector* boxes, @@ -66,20 +68,24 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data, // for valid utf-8 and allows space or tab between fields. // utf8_str is set with the unichar string, and bounding box with the box. // If there are page numbers in the file, it reads them all. +TESS_API bool ReadNextBox(int *line_number, FILE* box_file, STRING* utf8_str, TBOX* bounding_box); // As ReadNextBox above, but get a specific page number. (0-based) // Use -1 to read any page number. Files without page number all // read as if they are page 0. +TESS_API bool ReadNextBox(int target_page, int *line_number, FILE* box_file, STRING* utf8_str, TBOX* bounding_box); // Parses the given box file string into a page_number, utf8_str, and // bounding_box. Returns true on a successful parse. +TESS_API bool ParseBoxFileStr(const char* boxfile_str, int* page_number, STRING* utf8_str, TBOX* bounding_box); // Creates a box file string from a unichar string, TBOX and page number. +TESS_API void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num, STRING* box_str); diff --git a/src/ccstruct/ccstruct.h b/src/ccstruct/ccstruct.h index 5d095864d..7af2b8081 100644 --- a/src/ccstruct/ccstruct.h +++ b/src/ccstruct/ccstruct.h @@ -22,7 +22,7 @@ #include "ccutil.h" // for CCUtil namespace tesseract { -class CCStruct : public CCUtil { +class TESS_API CCStruct : public CCUtil { public: CCStruct() = default; ~CCStruct() override; diff --git a/src/ccstruct/fontinfo.h b/src/ccstruct/fontinfo.h index 5daab581f..63d5fdfae 100644 --- a/src/ccstruct/fontinfo.h +++ b/src/ccstruct/fontinfo.h @@ -146,26 +146,34 @@ struct FontSet { // are replaced. class FontInfoTable : public GenericVector { public: + TESS_API // when you remove inheritance from GenericVector, move this on class level FontInfoTable(); + TESS_API ~FontInfoTable(); // Writes to the given file. Returns false in case of error. + TESS_API bool Serialize(FILE* fp) const; // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. + TESS_API bool DeSerialize(TFile* fp); // Returns true if the given set of fonts includes one with the same // properties as font_id. + TESS_API bool SetContainsFontProperties( int font_id, const GenericVector& font_set) const; // Returns true if the given set of fonts includes multiple properties. + TESS_API bool SetContainsMultipleFontProperties( const GenericVector& font_set) const; // Moves any non-empty FontSpacingInfo entries from other to this. + TESS_API void MoveSpacingInfoFrom(FontInfoTable* other); // Moves this to the target unicity table. + TESS_API void MoveTo(UnicityTable* target); }; diff --git a/src/ccstruct/imagedata.h b/src/ccstruct/imagedata.h index 4c253cbd2..f5901166a 100644 --- a/src/ccstruct/imagedata.h +++ b/src/ccstruct/imagedata.h @@ -104,7 +104,7 @@ struct FloatWordFeature { // The text transcription is the ground truth UTF-8 text for the image. // Character boxes are optional and indicate the desired segmentation of // the text into recognition units. -class ImageData { +class TESS_API ImageData { public: ImageData(); // Takes ownership of the pix. @@ -213,19 +213,24 @@ class ImageData { // A collection of ImageData that knows roughly how much memory it is using. class DocumentData { public: + TESS_API explicit DocumentData(const STRING& name); + TESS_API ~DocumentData(); // Reads all the pages in the given lstmf filename to the cache. The reader // is used to read the file. + TESS_API bool LoadDocument(const char* filename, int start_page, int64_t max_memory, FileReader reader); // Sets up the document, without actually loading it. void SetDocument(const char* filename, int64_t max_memory, FileReader reader); // Writes all the pages to the given filename. Returns false on error. + TESS_API bool SaveDocument(const char* filename, FileWriter writer); // Adds the given page data to this document, counting up memory. + TESS_API void AddPageToDocument(ImageData* page); const STRING& document_name() const { @@ -257,6 +262,7 @@ class DocumentData { void LoadPageInBackground(int index); // Returns a pointer to the page with the given index, modulo the total // number of pages. Blocks until the background load is completed. + TESS_API const ImageData* GetPage(int index); // Returns true if the requested page is available, and provides a pointer, // which may be nullptr if the document is empty. May block, even though it @@ -325,7 +331,9 @@ class DocumentData { // content. class DocumentCache { public: + TESS_API explicit DocumentCache(int64_t max_memory); + TESS_API ~DocumentCache(); // Deletes all existing documents from the cache. @@ -335,6 +343,7 @@ class DocumentCache { } // Adds all the documents in the list of filenames, counting memory. // The reader is used to read the files. + TESS_API bool LoadDocuments(const std::vector& filenames, CachingStrategy cache_strategy, FileReader reader); @@ -358,16 +367,19 @@ class DocumentCache { } // Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache // strategy, could take a long time. + TESS_API int TotalPages(); private: // Returns a page by serial number, selecting them in a round-robin fashion // from all the documents. Highly disk-intensive, but doesn't need samples // to be shuffled between files to begin with. + TESS_API const ImageData* GetPageRoundRobin(int serial); // Returns a page by serial number, selecting them in sequence from each file. // Requires the samples to be shuffled between the files to give a random or // uniform distribution of data. Less disk-intensive than GetPageRoundRobin. + TESS_API const ImageData* GetPageSequential(int serial); // Helper counts the number of adjacent cached neighbour documents_ of index diff --git a/src/ccstruct/linlsq.h b/src/ccstruct/linlsq.h index eb0272a70..c8654a77d 100644 --- a/src/ccstruct/linlsq.h +++ b/src/ccstruct/linlsq.h @@ -28,7 +28,7 @@ namespace tesseract { template class GenericVector; -class LLSQ { +class TESS_API LLSQ { public: LLSQ() { // constructor clear(); // set to zeros diff --git a/src/ccstruct/normalis.h b/src/ccstruct/normalis.h index 4b5c6b18a..1dc9e867f 100644 --- a/src/ccstruct/normalis.h +++ b/src/ccstruct/normalis.h @@ -46,7 +46,7 @@ enum NormalizationMode { NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode. }; -class DENORM { +class TESS_API DENORM { public: DENORM(); diff --git a/src/ccstruct/ocrblock.h b/src/ccstruct/ocrblock.h index fb04967c6..e4062a12b 100644 --- a/src/ccstruct/ocrblock.h +++ b/src/ccstruct/ocrblock.h @@ -27,8 +27,9 @@ namespace tesseract { class BLOCK; //forward decl -ELISTIZEH (BLOCK) -class BLOCK : public ELIST_LINK +ELISTIZEH(BLOCK) + +class TESS_API BLOCK : public ELIST_LINK //page block { friend class BLOCK_RECT_IT; //block iterator diff --git a/src/ccstruct/ocrpara.h b/src/ccstruct/ocrpara.h index 754a3fe27..dec83c2ec 100644 --- a/src/ccstruct/ocrpara.h +++ b/src/ccstruct/ocrpara.h @@ -114,7 +114,7 @@ ELISTIZEH(PARA) // |you can try to identify source | // |code. Ouch! | // +--------------------------------+ -class ParagraphModel { +class TESS_API ParagraphModel { public: ParagraphModel(tesseract::ParagraphJustification justification, int margin, diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index 627467d15..814848f69 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -164,7 +164,7 @@ enum CRUNCH_MODE // WERD_RES is a collection of publicly accessible members that gathers // information about a word result. -class WERD_RES : public ELIST_LINK { +class TESS_API WERD_RES : public ELIST_LINK { public: // Which word is which? // There are 3 coordinate spaces in use here: a possibly rotated pixel space, @@ -673,7 +673,7 @@ class WERD_RES : public ELIST_LINK { * PAGE_RES_IT - Page results iterator *************************************************************************/ -class PAGE_RES_IT { +class TESS_API PAGE_RES_IT { public: PAGE_RES * page_res; // page being iterated diff --git a/src/ccstruct/points.h b/src/ccstruct/points.h index 7372a05a6..92e472004 100644 --- a/src/ccstruct/points.h +++ b/src/ccstruct/points.h @@ -189,7 +189,8 @@ class ICOORDELT : public ELIST_LINK, public ICOORD }; ELISTIZEH (ICOORDELT) -class FCOORD + +class TESS_API FCOORD { public: ///empty constructor diff --git a/src/ccstruct/polyblk.h b/src/ccstruct/polyblk.h index 860c2390f..3bbd37e41 100644 --- a/src/ccstruct/polyblk.h +++ b/src/ccstruct/polyblk.h @@ -27,7 +27,7 @@ namespace tesseract { -class POLY_BLOCK { +class TESS_API POLY_BLOCK { public: POLY_BLOCK() = default; // Initialize from box coordinates. diff --git a/src/ccstruct/ratngs.h b/src/ccstruct/ratngs.h index b7206a163..d1467f178 100644 --- a/src/ccstruct/ratngs.h +++ b/src/ccstruct/ratngs.h @@ -263,7 +263,7 @@ enum ScriptPos { const char *ScriptPosToString(ScriptPos script_pos); -class WERD_CHOICE : public ELIST_LINK { +class TESS_API WERD_CHOICE : public ELIST_LINK { public: static const float kBadRating; static const char *permuter_name(uint8_t permuter); diff --git a/src/ccstruct/rect.h b/src/ccstruct/rect.h index 72fff2973..3900f9f22 100644 --- a/src/ccstruct/rect.h +++ b/src/ccstruct/rect.h @@ -35,7 +35,7 @@ namespace tesseract { class STRING; -class TBOX { // bounding box +class TESS_API TBOX { // bounding box public: TBOX (): // empty constructor making a null box bot_left (INT16_MAX, INT16_MAX), top_right (-INT16_MAX, -INT16_MAX) { diff --git a/src/ccstruct/statistc.h b/src/ccstruct/statistc.h index 2534c5092..cc21c60fc 100644 --- a/src/ccstruct/statistc.h +++ b/src/ccstruct/statistc.h @@ -29,7 +29,7 @@ template class GenericVector; // Simple histogram-based statistics for integer values in a known // range, such that the range is small compared to the number of samples. -class STATS { +class TESS_API STATS { public: // The histogram buckets are in the range // [min_bucket_value, max_bucket_value_plus_1 - 1] i.e. diff --git a/src/ccstruct/stepblob.h b/src/ccstruct/stepblob.h index 0203f7514..b1dfcfcfa 100644 --- a/src/ccstruct/stepblob.h +++ b/src/ccstruct/stepblob.h @@ -37,7 +37,7 @@ class DENORM; ELISTIZEH(C_BLOB) -class C_BLOB:public ELIST_LINK +class TESS_API C_BLOB : public ELIST_LINK { public: C_BLOB() = default; diff --git a/src/ccstruct/werd.h b/src/ccstruct/werd.h index 4c5cf616b..fa4f07b93 100644 --- a/src/ccstruct/werd.h +++ b/src/ccstruct/werd.h @@ -56,7 +56,7 @@ enum DISPLAY_FLAGS { class ROW; // forward decl -class WERD : public ELIST2_LINK { +class TESS_API WERD : public ELIST2_LINK { public: WERD() = default; // WERD constructed with: diff --git a/src/ccutil/bitvector.h b/src/ccutil/bitvector.h index 972bbf24a..216ba8452 100644 --- a/src/ccutil/bitvector.h +++ b/src/ccutil/bitvector.h @@ -27,7 +27,7 @@ namespace tesseract { // Trivial class to encapsulate a fixed-length array of bits, with // Serialize/DeSerialize. Replaces the old macros. -class BitVector { +class TESS_API BitVector { public: // Fast lookup table to get the first least significant set bit in a byte. // For zero, the table has 255, but since it is a special case, most code diff --git a/src/ccutil/ccutil.h b/src/ccutil/ccutil.h index 5613bf9ce..5cc39f2b8 100644 --- a/src/ccutil/ccutil.h +++ b/src/ccutil/ccutil.h @@ -41,7 +41,7 @@ namespace tesseract { -class CCUtil { +class TESS_API CCUtil { public: CCUtil(); virtual ~CCUtil(); diff --git a/src/ccutil/clst.h b/src/ccutil/clst.h index 667a85d4b..a826d68b3 100644 --- a/src/ccutil/clst.h +++ b/src/ccutil/clst.h @@ -69,7 +69,7 @@ class CLIST_LINK * Generic list class for singly linked CONS cell lists **********************************************************************/ -class CLIST +class TESS_API CLIST { friend class CLIST_ITERATOR; @@ -144,7 +144,7 @@ class CLIST *links **********************************************************************/ -class CLIST_ITERATOR +class TESS_API CLIST_ITERATOR { friend void CLIST::assign_to_sublist(CLIST_ITERATOR *, CLIST_ITERATOR *); diff --git a/src/ccutil/elst.h b/src/ccutil/elst.h index ed5c6bdb2..c22b9817b 100644 --- a/src/ccutil/elst.h +++ b/src/ccutil/elst.h @@ -107,7 +107,7 @@ class ELIST_LINK * Generic list class for singly linked lists with embedded links **********************************************************************/ -class ELIST +class TESS_API ELIST { friend class ELIST_ITERATOR; @@ -181,7 +181,7 @@ class ELIST * Generic iterator class for singly linked lists with embedded links **********************************************************************/ -class ELIST_ITERATOR +class TESS_API ELIST_ITERATOR { friend void ELIST::assign_to_sublist(ELIST_ITERATOR *, ELIST_ITERATOR *); @@ -849,7 +849,7 @@ ELISTIZEH_C. #define ELISTIZEH_A(CLASSNAME) \ \ -extern void CLASSNAME##_zapper(ELIST_LINK* link); +TESS_API extern void CLASSNAME##_zapper(ELIST_LINK* link); #define ELISTIZEH_B(CLASSNAME) \ \ diff --git a/src/ccutil/elst2.h b/src/ccutil/elst2.h index 8b2befb56..b21a4c0ca 100644 --- a/src/ccutil/elst2.h +++ b/src/ccutil/elst2.h @@ -88,7 +88,7 @@ class ELIST2_LINK * Generic list class for doubly linked lists with embedded links **********************************************************************/ -class ELIST2 +class TESS_API ELIST2 { friend class ELIST2_ITERATOR; @@ -151,7 +151,7 @@ class ELIST2 *links **********************************************************************/ -class ELIST2_ITERATOR +class TESS_API ELIST2_ITERATOR { friend void ELIST2::assign_to_sublist(ELIST2_ITERATOR *, ELIST2_ITERATOR *); @@ -858,7 +858,7 @@ ELIST2IZEH_C. #define ELIST2IZEH_A(CLASSNAME) \ \ - extern void CLASSNAME##_zapper( /*delete a link*/ \ + TESS_API extern void CLASSNAME##_zapper( /*delete a link*/ \ ELIST2_LINK *link); /*link to delete*/ #define ELIST2IZEH_B(CLASSNAME) \ diff --git a/src/ccutil/genericvector.h b/src/ccutil/genericvector.h index e358d9853..939577bf5 100644 --- a/src/ccutil/genericvector.h +++ b/src/ccutil/genericvector.h @@ -310,14 +310,14 @@ class GenericVector { } // Returns true if all elements of *this are within the given range. // Only uses operator< - bool WithinBounds(const T& rangemin, const T& rangemax) const { + /*bool WithinBounds(const T& rangemin, const T& rangemax) const { for (int i = 0; i < size_used_; ++i) { if (data_[i] < rangemin || rangemax < data_[i]) { return false; } } return true; - } + }*/ protected: // Internal recursive version of choose_nth_item. diff --git a/src/ccutil/indexmapbidi.h b/src/ccutil/indexmapbidi.h index 15bb840af..d42bead5e 100644 --- a/src/ccutil/indexmapbidi.h +++ b/src/ccutil/indexmapbidi.h @@ -39,7 +39,7 @@ class IndexMapBiDi; // IndexMapBiDi below. // NOTE: there are currently no methods to setup an IndexMap on its own! // It must be initialized by copying from an IndexMapBiDi or by DeSerialize. -class IndexMap { +class TESS_API IndexMap { public: virtual ~IndexMap(); @@ -99,7 +99,7 @@ class IndexMap { // for ... Merge(index1, index2); // CompleteMerges(); // Allows a many-to-one mapping by merging compact space indices. -class IndexMapBiDi : public IndexMap { +class TESS_API IndexMapBiDi : public IndexMap { public: ~IndexMapBiDi() override; diff --git a/src/ccutil/params.h b/src/ccutil/params.h index 27e255e56..988c40be6 100644 --- a/src/ccutil/params.h +++ b/src/ccutil/params.h @@ -47,7 +47,7 @@ struct ParamsVectors { }; // Utility functions for working with Tesseract parameters. -class ParamUtils { +class TESS_API ParamUtils { public: // Reads a file of parameter definitions and set/modify the values therein. // If the filename begins with a + or -, the BoolVariables will be @@ -279,6 +279,7 @@ class DoubleParam : public Param { // // TODO(daria): remove GlobalParams() when all global Tesseract // parameters are converted to members. +TESS_API ParamsVectors* GlobalParams(); /************************************************************************* diff --git a/src/ccutil/scanutils.h b/src/ccutil/scanutils.h index 93381c5df..c89d37ff0 100644 --- a/src/ccutil/scanutils.h +++ b/src/ccutil/scanutils.h @@ -27,6 +27,7 @@ * @note Note that scientific floating-point notation is not supported. * */ +TESS_API int tfscanf(FILE* stream, const char *format, ...); #endif // TESSERACT_CCUTIL_SCANUTILS_H_ diff --git a/src/ccutil/serialis.h b/src/ccutil/serialis.h index f520b8131..951552fa8 100644 --- a/src/ccutil/serialis.h +++ b/src/ccutil/serialis.h @@ -47,7 +47,9 @@ constexpr size_t countof(T const (&)[N]) noexcept { using FileWriter = bool (*)(const std::vector& data, const char* filename); +TESS_API bool LoadDataFromFile(const char* filename, std::vector* data); +TESS_API bool SaveDataToFile(const std::vector& data, const char* filename); // Deserialize data from file. @@ -64,7 +66,7 @@ bool Serialize(FILE *fp, const T *data, size_t n = 1) { // Simple file class. // Allows for portable file input from memory and from foreign file systems. -class TFile { +class TESS_API TFile { public: TFile(); ~TFile(); diff --git a/src/ccutil/tessdatamanager.h b/src/ccutil/tessdatamanager.h index 8eb935264..f8fff9d8c 100644 --- a/src/ccutil/tessdatamanager.h +++ b/src/ccutil/tessdatamanager.h @@ -123,8 +123,7 @@ static const char *const kTessdataFileSuffixes[] = { */ static const int kMaxNumTessdataEntries = 1000; - -class TessdataManager { +class TESS_API TessdataManager { public: TessdataManager(); explicit TessdataManager(FileReader reader); diff --git a/src/ccutil/unicharcompress.h b/src/ccutil/unicharcompress.h index 9c6ac009f..2c1ccbf57 100644 --- a/src/ccutil/unicharcompress.h +++ b/src/ccutil/unicharcompress.h @@ -125,7 +125,7 @@ class RecodedCharID { // position). For non-CJK, the same code value CAN be used in multiple // positions, eg the ff ligature is converted to , where // is the same code as is used for the single f. -class UnicharCompress { +class TESS_API UnicharCompress { public: UnicharCompress(); UnicharCompress(const UnicharCompress& src); diff --git a/src/ccutil/unicharset.h b/src/ccutil/unicharset.h index 8c6e81da5..4026fcda3 100644 --- a/src/ccutil/unicharset.h +++ b/src/ccutil/unicharset.h @@ -49,7 +49,7 @@ enum class OldUncleanUnichars { kTrue, }; -class CHAR_FRAGMENT { +class TESS_API CHAR_FRAGMENT { public: // Minimum number of characters used for fragment representation. static const int kMinLen = 6; @@ -146,15 +146,15 @@ class CHAR_FRAGMENT { // The UNICHARSET class is an utility class for Tesseract that holds the // set of characters that are used by the engine. Each character is identified // by a unique number, from 0 to (size - 1). -class UNICHARSET { +class TESS_API UNICHARSET { public: // Custom list of characters and their ligature forms (UTF8) // These map to unicode values in the private use area (PUC) and are supported // by only few font families (eg. Wyld, Adobe Caslon Pro). - static TESS_API const char* kCustomLigatures[][2]; + static const char* kCustomLigatures[][2]; // List of strings for the SpecialUnicharCodes. Keep in sync with the enum. - static TESS_API const char* kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT]; + static const char* kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT]; // ICU 2.0 UCharDirection enum (from icu/include/unicode/uchar.h) enum Direction { @@ -893,7 +893,7 @@ class UNICHARSET { private: - struct UNICHAR_PROPERTIES { + struct TESS_API UNICHAR_PROPERTIES { UNICHAR_PROPERTIES(); // Initializes all properties to sensible default values. void Init(); @@ -996,7 +996,7 @@ class UNICHARSET { // The substitutions clean up text that should exists for rendering of // synthetic data, but not in the recognition set. static const char* kCleanupMaps[][2]; - static TESS_API const char* null_script; + static const char* null_script; std::vector unichars; UNICHARMAP ids; diff --git a/src/classify/classify.h b/src/classify/classify.h index af0901473..0ba63a5d3 100644 --- a/src/classify/classify.h +++ b/src/classify/classify.h @@ -99,7 +99,7 @@ enum CharSegmentationType { CST_NGRAM // Multiple characters. }; -class Classify : public CCStruct { +class TESS_API Classify : public CCStruct { public: Classify(); ~Classify() override; diff --git a/src/classify/cluster.h b/src/classify/cluster.h index f4b2e7912..8a6a270a6 100644 --- a/src/classify/cluster.h +++ b/src/classify/cluster.h @@ -106,14 +106,19 @@ typedef struct { /*-------------------------------------------------------------------------- Public Function Prototypes --------------------------------------------------------------------------*/ +TESS_API CLUSTERER* MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[]); +TESS_API SAMPLE* MakeSample(CLUSTERER* Clusterer, const float* Feature, int32_t CharID); +TESS_API LIST ClusterSamples(CLUSTERER* Clusterer, CLUSTERCONFIG* Config); +TESS_API void FreeClusterer(CLUSTERER* Clusterer); +TESS_API void FreeProtoList(LIST* ProtoList); void FreePrototype(void* arg); // PROTOTYPE *Prototype); @@ -124,6 +129,7 @@ float Mean(PROTOTYPE* Proto, uint16_t Dimension); float StandardDeviation(PROTOTYPE* Proto, uint16_t Dimension); +TESS_API int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, float m[], float m1[], float m2[]); diff --git a/src/classify/clusttool.h b/src/classify/clusttool.h index c2ceb597e..ead65618e 100644 --- a/src/classify/clusttool.h +++ b/src/classify/clusttool.h @@ -32,8 +32,10 @@ PARAM_DESC *ReadParamDesc(tesseract::TFile *fp, uint16_t N); PROTOTYPE *ReadPrototype(tesseract::TFile *fp, uint16_t N); +TESS_API void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]); +TESS_API void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto); } // namespace tesseract diff --git a/src/classify/featdefs.h b/src/classify/featdefs.h index f11985774..eb8c66fed 100644 --- a/src/classify/featdefs.h +++ b/src/classify/featdefs.h @@ -50,8 +50,10 @@ using FEATURE_DEFS = FEATURE_DEFS_STRUCT *; /*---------------------------------------------------------------------- Generic functions for manipulating character descriptions ----------------------------------------------------------------------*/ +TESS_API void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs); +TESS_API void FreeCharDescription(CHAR_DESC CharDesc); CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs); @@ -62,9 +64,11 @@ bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, CHAR_DESC CharDesc, STRING* str); +TESS_API CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File); +TESS_API uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName); diff --git a/src/classify/intfeaturespace.h b/src/classify/intfeaturespace.h index af23af9af..3f21e4d3c 100644 --- a/src/classify/intfeaturespace.h +++ b/src/classify/intfeaturespace.h @@ -35,7 +35,7 @@ class IndexMap; // Down-sampling quantization of the INT_FEATURE_STRUCT feature space and // conversion to a single scalar index value, used as a binary feature space. -class IntFeatureSpace { +class TESS_API IntFeatureSpace { public: IntFeatureSpace(); // Default copy constructors and assignment OK! diff --git a/src/classify/intfx.h b/src/classify/intfx.h index 09f34be4f..5a3b893e5 100644 --- a/src/classify/intfx.h +++ b/src/classify/intfx.h @@ -46,20 +46,22 @@ const double kStandardFeatureLength = 64.0 / 5; /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ +TESS_API void InitIntegerFX(); // Returns a vector representing the direction of a feature with the given // theta direction in an INT_FEATURE_STRUCT. +TESS_API FCOORD FeatureDirection(uint8_t theta); - // Generates a TrainingSample from a TBLOB. Extracts features and sets - // the bounding box, so classifiers that operate on the image can work. - // TODO(rays) BlobToTrainingSample must remain a global function until - // the FlexFx and FeatureDescription code can be removed and LearnBlob - // made a member of Classify. - TrainingSample* BlobToTrainingSample( - const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, - GenericVector* bl_features); +// Generates a TrainingSample from a TBLOB. Extracts features and sets +// the bounding box, so classifiers that operate on the image can work. +// TODO(rays) BlobToTrainingSample must remain a global function until +// the FlexFx and FeatureDescription code can be removed and LearnBlob +// made a member of Classify. +TrainingSample* BlobToTrainingSample( + const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, + GenericVector* bl_features); } // namespace tesseract diff --git a/src/classify/intproto.h b/src/classify/intproto.h index 44b42fc91..1ece69988 100644 --- a/src/classify/intproto.h +++ b/src/classify/intproto.h @@ -232,16 +232,19 @@ INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs); INT_TEMPLATES NewIntTemplates(); +TESS_API void free_int_templates(INT_TEMPLATES templates); void ShowMatchDisplay(); // Clears the given window and draws the featurespace guides for the // appropriate normalization method. +TESS_API void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window); /*----------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED +TESS_API void RenderIntFeature(ScrollView* window, const INT_FEATURE_STRUCT* Feature, ScrollView::Color color); @@ -253,6 +256,7 @@ void InitFeatureDisplayWindowIfReqd(); // Creates a window of the appropriate size for displaying elements // in feature space. +TESS_API ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos); #endif // !GRAPHICS_DISABLED diff --git a/src/classify/kdtree.h b/src/classify/kdtree.h index 832b12cd2..b85121917 100644 --- a/src/classify/kdtree.h +++ b/src/classify/kdtree.h @@ -81,6 +81,7 @@ void FreeKDNode(KDNODE* Node); float DistanceSquared(int k, PARAM_DESC* dim, float p1[], float p2[]); +TESS_API float ComputeDistance(int k, PARAM_DESC* dim, float p1[], float p2[]); int QueryInSearch(KDTREE* tree); diff --git a/src/classify/ocrfeatures.h b/src/classify/ocrfeatures.h index 4af49b994..edf63496f 100644 --- a/src/classify/ocrfeatures.h +++ b/src/classify/ocrfeatures.h @@ -102,10 +102,13 @@ DefineFeature (Name, NumLinear, NumCircular, ShortName, ParamName) ----------------------------------------------------------------------*/ bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature); +TESS_API void FreeFeature(FEATURE Feature); +TESS_API void FreeFeatureSet(FEATURE_SET FeatureSet); +TESS_API FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc); FEATURE_SET NewFeatureSet(int NumFeatures); diff --git a/src/classify/protos.h b/src/classify/protos.h index 419327314..93bea842c 100644 --- a/src/classify/protos.h +++ b/src/classify/protos.h @@ -82,18 +82,24 @@ using CLASSES = CLASS_STRUCT*; /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ +TESS_API int AddConfigToClass(CLASS_TYPE Class); +TESS_API int AddProtoToClass(CLASS_TYPE Class); +TESS_API void FillABC(PROTO Proto); +TESS_API void FreeClass(CLASS_TYPE Class); +TESS_API void FreeClassFields(CLASS_TYPE Class); void InitPrototypes(); +TESS_API CLASS_TYPE NewClass(int NumProtos, int NumConfigs); } // namespace tesseract diff --git a/src/classify/shapeclassifier.h b/src/classify/shapeclassifier.h index c3860be72..b323554f8 100644 --- a/src/classify/shapeclassifier.h +++ b/src/classify/shapeclassifier.h @@ -38,7 +38,7 @@ class TrainingSampleSet; struct UnicharRating; // Interface base class for classifiers that produce ShapeRating results. -class ShapeClassifier { +class TESS_API ShapeClassifier { public: virtual ~ShapeClassifier() = default; diff --git a/src/classify/shapetable.h b/src/classify/shapetable.h index d6f088a25..4dde3ecde 100644 --- a/src/classify/shapetable.h +++ b/src/classify/shapetable.h @@ -181,7 +181,7 @@ struct UnicharAndFonts { // characters that have a similar or identical shape. Shapes/ShapeTables may // be organized hierarchically from identical shapes at the leaves to vaguely // similar shapes near the root. -class Shape { +class TESS_API Shape { public: Shape() : destination_index_(-1) {} @@ -258,7 +258,7 @@ class Shape { // that the shape represents. // Each UnicharAndFonts also lists the fonts of the unichar_id that were // mapped to the shape during training. -class ShapeTable { +class TESS_API ShapeTable { public: ShapeTable(); // The UNICHARSET reference supplied here, or in set_unicharset below must diff --git a/src/classify/tessclassifier.h b/src/classify/tessclassifier.h index 5c420ba52..07dcfa4df 100644 --- a/src/classify/tessclassifier.h +++ b/src/classify/tessclassifier.h @@ -33,7 +33,7 @@ class TrainingSample; // Due to limitations in the content of TrainingSample, this currently // only works for the static classifier and only works if the ShapeTable // in classify is not nullptr. -class TessClassifier : public ShapeClassifier { +class TESS_API TessClassifier : public ShapeClassifier { public: TessClassifier(bool pruner_only, tesseract::Classify* classify) : pruner_only_(pruner_only), classify_(classify) {} diff --git a/src/classify/trainingsample.cpp b/src/classify/trainingsample.cpp index 575a89dd1..003fb97b8 100644 --- a/src/classify/trainingsample.cpp +++ b/src/classify/trainingsample.cpp @@ -21,13 +21,15 @@ #include "trainingsample.h" -#include // for M_PI -#include "allheaders.h" +#include "intfeaturespace.h" #include "helpers.h" -#include "intfeaturemap.h" #include "normfeat.h" #include "shapetable.h" +#include "allheaders.h" + +#include // for M_PI + namespace tesseract { ELISTIZE(TrainingSample) @@ -281,17 +283,6 @@ void TrainingSample::IndexFeatures(const IntFeatureSpace& feature_space) { features_are_mapped_ = false; } -// Sets the mapped_features_ from the features using the provided -// feature_map. -void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) { - GenericVector indexed_features; - feature_map.feature_space().IndexAndSortFeatures(features_, num_features_, - &indexed_features); - feature_map.MapIndexedFeatures(indexed_features, &mapped_features_); - features_are_indexed_ = false; - features_are_mapped_ = true; -} - // Returns a pix representing the sample. (Int features only.) Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const { Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1); diff --git a/src/classify/trainingsample.h b/src/classify/trainingsample.h index 0964e2bee..0ac2cc4fc 100644 --- a/src/classify/trainingsample.h +++ b/src/classify/trainingsample.h @@ -50,7 +50,7 @@ static const int kSampleScaleSize = 3; static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2; // ASSERT_IS_PRIME(kSampleRandomSize) !! -class TrainingSample : public ELIST_LINK { +class TESS_API TrainingSample : public ELIST_LINK { public: TrainingSample() : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0), @@ -97,9 +97,6 @@ class TrainingSample : public ELIST_LINK { // Sets the mapped_features_ from the features_ using the provided // feature_space to the indexed versions of the features. void IndexFeatures(const IntFeatureSpace& feature_space); - // Sets the mapped_features_ from the features_ using the provided - // feature_map. - void MapFeatures(const IntFeatureMap& feature_map); // Returns a pix representing the sample. (Int features only.) Pix* RenderToPix(const UNICHARSET* unicharset) const; @@ -231,10 +228,15 @@ class TrainingSample : public ELIST_LINK { double max_dist_; // Global index of this sample. int sample_index_; +public: + // both are used in training tools + // hide after refactoring + // Indexed/mapped features, as indicated by the bools below. GenericVector mapped_features_; bool features_are_indexed_; bool features_are_mapped_; +private: // True if the last classification was an error by the current definition. bool is_error_; diff --git a/src/cutil/emalloc.h b/src/cutil/emalloc.h index 9aa1a5d3d..ea0291f86 100644 --- a/src/cutil/emalloc.h +++ b/src/cutil/emalloc.h @@ -20,8 +20,11 @@ namespace tesseract { +TESS_API void *Emalloc(int Size); +TESS_API void *Erealloc(void *ptr, int size); +TESS_API void Efree(void *ptr); } // namespace tesseract diff --git a/src/cutil/oldlist.h b/src/cutil/oldlist.h index 5fa839a2e..61fa035b1 100644 --- a/src/cutil/oldlist.h +++ b/src/cutil/oldlist.h @@ -112,6 +112,7 @@ int count(LIST var_list); LIST delete_d(LIST list, void* key, int_compare is_equal); +TESS_API LIST destroy(LIST list); void destroy_nodes(LIST list, void_dest destructor); @@ -120,8 +121,10 @@ LIST last(LIST var_list); LIST pop(LIST list); +TESS_API LIST push(LIST list, void* element); +TESS_API LIST push_last(LIST list, void* item); LIST search(LIST list, void* key, int_compare is_equal); diff --git a/src/dict/dawg.h b/src/dict/dawg.h index 119643c0a..26d76d1e7 100644 --- a/src/dict/dawg.h +++ b/src/dict/dawg.h @@ -108,7 +108,7 @@ static const char kWildcard[] = "*"; /// (since they use only the public methods of SquishedDawg and Trie /// classes that are inherited from the Dawg base class). // -class Dawg { +class TESS_API Dawg { public: /// Magic number to determine endianness when reading the Dawg from file. static const int16_t kDawgMagicNumber = 42; @@ -397,7 +397,7 @@ class DawgPositionVector : public GenericVector { /// is stored as a contiguous EDGE_ARRAY (read from file or given as an /// argument to the constructor). // -class SquishedDawg : public Dawg { +class TESS_API SquishedDawg : public Dawg { public: SquishedDawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level) diff --git a/src/dict/dict.h b/src/dict/dict.h index c3548230c..2bbd7fecb 100644 --- a/src/dict/dict.h +++ b/src/dict/dict.h @@ -91,7 +91,7 @@ struct DawgArgs { bool valid_end; }; -class Dict { +class TESS_API Dict { public: Dict(CCUtil* image_ptr); ~Dict(); @@ -313,7 +313,7 @@ class Dict { /// Initialize Dict class - load dawgs from [lang].traineddata and /// user-specified wordlist and parttern list. - static TESS_API DawgCache *GlobalDawgCache(); + static DawgCache *GlobalDawgCache(); // Sets up ready for a Load or LoadLSTM. void SetupForLoad(DawgCache *dawg_cache); // Loads the dawgs needed by Tesseract. Call FinishLoad() after. diff --git a/src/dict/trie.h b/src/dict/trie.h index 182dce5c4..f516b3c5d 100644 --- a/src/dict/trie.h +++ b/src/dict/trie.h @@ -53,7 +53,7 @@ using TRIE_NODES = GenericVector ; * This class stores a vector of pointers to TRIE_NODE_RECORDs, each of * which has a vector of forward and backward edges. */ -class Trie : public Dawg { +class TESS_API Trie : public Dawg { public: enum RTLReversePolicy { RRP_DO_NO_REVERSE, diff --git a/src/lstm/convolve.h b/src/lstm/convolve.h index a3c051e0e..033061f23 100644 --- a/src/lstm/convolve.h +++ b/src/lstm/convolve.h @@ -32,6 +32,7 @@ class Convolve : public Network { public: // The area of convolution is 2*half_x + 1 by 2*half_y + 1, forcing it to // always be odd, so the center is the current pixel. + TESS_API Convolve(const std::string& name, int ni, int half_x, int half_y); ~Convolve() override = default; diff --git a/src/lstm/fullyconnected.h b/src/lstm/fullyconnected.h index c67984efd..eaa437a74 100644 --- a/src/lstm/fullyconnected.h +++ b/src/lstm/fullyconnected.h @@ -26,6 +26,7 @@ namespace tesseract { // C++ Implementation of the Softmax (output) class from lstm.py. class FullyConnected : public Network { public: + TESS_API FullyConnected(const std::string& name, int ni, int no, NetworkType type); ~FullyConnected() override = default; diff --git a/src/lstm/input.h b/src/lstm/input.h index 67cc89366..b9366364c 100644 --- a/src/lstm/input.h +++ b/src/lstm/input.h @@ -26,7 +26,9 @@ class ScrollView; class Input : public Network { public: + TESS_API Input(const std::string& name, int ni, int no); + TESS_API Input(const std::string& name, const StaticShape& shape); ~Input() override = default; diff --git a/src/lstm/lstm.h b/src/lstm/lstm.h index beffafb12..6fb3ce6f8 100644 --- a/src/lstm/lstm.h +++ b/src/lstm/lstm.h @@ -46,6 +46,7 @@ class LSTM : public Network { // 2-d and bidi softmax LSTMs are not rejected, but are impossible to build // in the conventional way because the output feedback both forwards and // backwards in time does become impossible. + TESS_API LSTM(const std::string& name, int num_inputs, int num_states, int num_outputs, bool two_dimensional, NetworkType type); ~LSTM() override; diff --git a/src/lstm/lstmrecognizer.h b/src/lstm/lstmrecognizer.h index a7962fce1..ba6854d52 100644 --- a/src/lstm/lstmrecognizer.h +++ b/src/lstm/lstmrecognizer.h @@ -50,7 +50,7 @@ enum TrainingFlags { // Top-level line recognizer class for LSTM-based networks. // Note that a sub-class, LSTMTrainer is used for training. -class LSTMRecognizer { +class TESS_API LSTMRecognizer { public: LSTMRecognizer(); LSTMRecognizer(const STRING language_data_path_prefix); diff --git a/src/lstm/maxpool.h b/src/lstm/maxpool.h index f39035e6c..bae603397 100644 --- a/src/lstm/maxpool.h +++ b/src/lstm/maxpool.h @@ -28,6 +28,7 @@ namespace tesseract { // Backprop propagates only to the position that was the max. class Maxpool : public Reconfig { public: + TESS_API Maxpool(const char* name, int ni, int x_scale, int y_scale); ~Maxpool() override = default; diff --git a/src/lstm/network.h b/src/lstm/network.h index bf200609b..50c416ba0 100644 --- a/src/lstm/network.h +++ b/src/lstm/network.h @@ -277,6 +277,7 @@ class Network { void DisplayBackward(const NetworkIO& matrix); // Creates the window if needed, otherwise clears it. + TESS_API static void ClearWindow(bool tess_coords, const char* window_name, int width, int height, ScrollView** window); diff --git a/src/lstm/networkio.h b/src/lstm/networkio.h index 764e320e6..b8ee3900d 100644 --- a/src/lstm/networkio.h +++ b/src/lstm/networkio.h @@ -36,7 +36,7 @@ namespace tesseract { // Class to contain all the input/output of a network, allowing for fixed or // variable-strided 2d to 1d mapping, and float or int8_t values. Provides // enough calculating functions to hide the detail of the implementation. -class NetworkIO { +class TESS_API NetworkIO { public: NetworkIO() : int_mode_(false) {} // Resizes the array (and stride), avoiding realloc if possible, to the given diff --git a/src/lstm/parallel.h b/src/lstm/parallel.h index 8386a24bf..5311aba93 100644 --- a/src/lstm/parallel.h +++ b/src/lstm/parallel.h @@ -27,6 +27,7 @@ namespace tesseract { class Parallel : public Plumbing { public: // ni_ and no_ will be set by AddToStack. + TESS_API Parallel(const char* name, NetworkType type); ~Parallel() override = default; diff --git a/src/lstm/recodebeam.h b/src/lstm/recodebeam.h index 88e8e87b6..d19bec423 100644 --- a/src/lstm/recodebeam.h +++ b/src/lstm/recodebeam.h @@ -177,7 +177,7 @@ using RecodePair = KDPairInc; using RecodeHeap = GenericHeap; // Class that holds the entire beam search for recognition of a text line. -class RecodeBeamSearch { +class TESS_API RecodeBeamSearch { public: // Borrows the pointer, which is expected to survive until *this is deleted. RecodeBeamSearch(const UnicharCompress& recoder, int null_char, diff --git a/src/lstm/reconfig.h b/src/lstm/reconfig.h index 458542273..834632c96 100644 --- a/src/lstm/reconfig.h +++ b/src/lstm/reconfig.h @@ -30,6 +30,7 @@ namespace tesseract { // input stride is a multiple of the y_scale factor! class Reconfig : public Network { public: + TESS_API Reconfig(const char* name, int ni, int x_scale, int y_scale); ~Reconfig() override = default; diff --git a/src/lstm/reversed.h b/src/lstm/reversed.h index 8fd2d8aa2..bcf18546e 100644 --- a/src/lstm/reversed.h +++ b/src/lstm/reversed.h @@ -27,6 +27,7 @@ namespace tesseract { // C++ Implementation of the Reversed class from lstm.py. class Reversed : public Plumbing { public: + TESS_API explicit Reversed(const std::string& name, NetworkType type); ~Reversed() override = default; @@ -65,6 +66,7 @@ class Reversed : public Plumbing { } // Takes ownership of the given network to make it the reversed one. + TESS_API void SetNetwork(Network* network); // Runs forward propagation of activations on the input line. diff --git a/src/lstm/series.h b/src/lstm/series.h index 892ffff28..6b2a68e8b 100644 --- a/src/lstm/series.h +++ b/src/lstm/series.h @@ -27,6 +27,7 @@ namespace tesseract { class Series : public Plumbing { public: // ni_ and no_ will be set by AddToStack. + TESS_API explicit Series(const char* name); ~Series() override = default; @@ -81,10 +82,12 @@ class Series : public Plumbing { // Splits the series after the given index, returning the two parts and // deletes itself. The first part, up to network with index last_start, goes // into start, and the rest goes into end. + TESS_API void SplitAt(int last_start, Series** start, Series** end); // Appends the elements of the src series to this, removing from src and // deleting it. + TESS_API void AppendSeries(Network* src); }; diff --git a/src/textord/alignedblob.h b/src/textord/alignedblob.h index 69142c6ce..cca3b05dc 100644 --- a/src/textord/alignedblob.h +++ b/src/textord/alignedblob.h @@ -79,7 +79,7 @@ struct AlignedBlobParams { // The AlignedBlob class contains code to find vertically aligned blobs. // This is factored out into a separate class, so it can be used by both // vertical line finding (LineFind) and tabstop finding (TabFind). -class AlignedBlob : public BlobGrid { +class TESS_API AlignedBlob : public BlobGrid { public: AlignedBlob(int gridsize, const ICOORD& bleft, const ICOORD& tright); ~AlignedBlob() override; diff --git a/src/textord/bbgrid.h b/src/textord/bbgrid.h index 3cf08b85d..5d75aa38d 100644 --- a/src/textord/bbgrid.h +++ b/src/textord/bbgrid.h @@ -49,7 +49,7 @@ template class GridSearch; // The GridBase class is the base class for BBGrid and IntGrid. // It holds the geometry and scale of the grid. -class GridBase { +class TESS_API GridBase { public: GridBase() = default; GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright); diff --git a/src/textord/blobgrid.h b/src/textord/blobgrid.h index 36bfadad7..54b19aebd 100644 --- a/src/textord/blobgrid.h +++ b/src/textord/blobgrid.h @@ -30,7 +30,7 @@ CLISTIZEH(BLOBNBOX) using BlobGridSearch = GridSearch; -class BlobGrid : public BBGrid { +class TESS_API BlobGrid : public BBGrid { public: BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); ~BlobGrid() override; diff --git a/src/textord/colfind.h b/src/textord/colfind.h index acfbb4042..b7d5b672b 100644 --- a/src/textord/colfind.h +++ b/src/textord/colfind.h @@ -47,7 +47,7 @@ class TempColumn_LIST; class EquationDetectBase; // The ColumnFinder class finds columns in the grid. -class ColumnFinder : public TabFind { +class TESS_API ColumnFinder : public TabFind { public: // Gridsize is an estimate of the text size in the image. A suitable value // is in TO_BLOCK::line_size after find_components has been used to make diff --git a/src/textord/colpartition.h b/src/textord/colpartition.h index 2009b9fa4..5c299b3e8 100644 --- a/src/textord/colpartition.h +++ b/src/textord/colpartition.h @@ -64,7 +64,7 @@ CLISTIZEH(ColPartition) * to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions * emerges, which represents the columns over a wide y-coordinate range. */ -class ColPartition : public ELIST2_LINK { +class TESS_API ColPartition : public ELIST2_LINK { public: // This empty constructor is here only so that the class can be ELISTIZED. // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier diff --git a/src/textord/colpartitiongrid.h b/src/textord/colpartitiongrid.h index 32bb46e30..85ab7f3d4 100644 --- a/src/textord/colpartitiongrid.h +++ b/src/textord/colpartitiongrid.h @@ -29,7 +29,7 @@ class TabFind; // ColPartitionGrid is a BBGrid of ColPartition. // It collects functions that work on the grid. -class ColPartitionGrid : public BBGrid { public: diff --git a/src/textord/equationdetectbase.h b/src/textord/equationdetectbase.h index d0e8c12e4..7f84bd091 100644 --- a/src/textord/equationdetectbase.h +++ b/src/textord/equationdetectbase.h @@ -29,7 +29,7 @@ namespace tesseract { class ColPartitionGrid; class ColPartitionSet; -class EquationDetectBase { +class TESS_API EquationDetectBase { public: EquationDetectBase() = default; virtual ~EquationDetectBase(); diff --git a/src/textord/tabfind.h b/src/textord/tabfind.h index aaccb7633..d16a533cb 100644 --- a/src/textord/tabfind.h +++ b/src/textord/tabfind.h @@ -49,7 +49,7 @@ const int kColumnWidthFactor = 20; * rule/separator lines, and tabstop boundaries, (when available), so * as the holder of the list of TabVectors this class provides the functions. */ -class TabFind : public AlignedBlob { +class TESS_API TabFind : public AlignedBlob { public: TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright, TabVector_LIST* vlines, int vertical_x, int vertical_y, diff --git a/src/textord/tablefind.h b/src/textord/tablefind.h index 2f3f44512..dc6ff932d 100644 --- a/src/textord/tablefind.h +++ b/src/textord/tablefind.h @@ -127,7 +127,7 @@ using ColSegmentGridSearch = GridSearch -#include // for M_PI #ifdef DISABLED_LEGACY_ENGINE @@ -32,6 +31,8 @@ STRING_PARAM_FLAG(U, "unicharset", "File to load unicharset from"); STRING_PARAM_FLAG(O, "", "File to write unicharset to"); STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to"); STRING_PARAM_FLAG(test_ch, "", "UTF8 test character string"); +STRING_PARAM_FLAG(fonts_dir, "", ""); +STRING_PARAM_FLAG(fontconfig_tmpdir, "", ""); /** * This routine parses the command line arguments that were @@ -96,6 +97,8 @@ STRING_PARAM_FLAG(U, "unicharset", "File to load unicharset from"); STRING_PARAM_FLAG(O, "", "File to write unicharset to"); STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to"); STRING_PARAM_FLAG(test_ch, "", "UTF8 test character string"); +STRING_PARAM_FLAG(fonts_dir, "", ""); +STRING_PARAM_FLAG(fontconfig_tmpdir, "", ""); static DOUBLE_PARAM_FLAG(clusterconfig_min_samples_fraction, Config.MinSamples, "Min number of samples per proto as % of total"); static DOUBLE_PARAM_FLAG(clusterconfig_max_illegal, Config.MaxIllegal, diff --git a/src/training/commontraining.h b/src/training/commontraining.h index f12ab258a..8f9b2ed63 100644 --- a/src/training/commontraining.h +++ b/src/training/commontraining.h @@ -18,17 +18,14 @@ #include "config_auto.h" #endif +#include "commandlineflags.h" +#include "tprintf.h" + #include -#ifdef DISABLED_LEGACY_ENGINE - -#include "tprintf.h" -#include "commandlineflags.h" - - +TESS_COMMON_TRAINING_API void ParseArguments(int* argc, char*** argv); - namespace tesseract { // Check whether the shared tesseract library is the right one. @@ -48,11 +45,9 @@ static inline void CheckSharedLibraryVersion() } // namespace tesseract - -#else +#ifndef DISABLED_LEGACY_ENGINE #include "cluster.h" -#include "commandlineflags.h" #include "featdefs.h" #include "intproto.h" #include "oldlist.h" @@ -67,9 +62,11 @@ class ShapeTable; // Globals /////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// +TESS_COMMON_TRAINING_API extern tesseract::FEATURE_DEFS_STRUCT feature_defs; // Must be defined in the file that "implements" commonTraining facilities. +TESS_COMMON_TRAINING_API extern tesseract::CLUSTERCONFIG Config; ////////////////////////////////////////////////////////////////////////////// @@ -96,28 +93,13 @@ using MERGE_CLASS = MERGE_CLASS_NODE*; ////////////////////////////////////////////////////////////////////////////// // Functions ///////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void ParseArguments(int* argc, char*** argv); namespace tesseract { -// Check whether the shared tesseract library is the right one. -// This function must be inline because otherwise it would be part of -// the shared library, so it could not compare the versions. -static inline void CheckSharedLibraryVersion() -{ -#ifdef HAVE_CONFIG_H - if (!!strcmp(TESSERACT_VERSION_STR, TessBaseAPI::Version())) { - tprintf("ERROR: shared library version mismatch (was %s, expected %s\n" - "Did you use a wrong shared tesseract library?\n", - TessBaseAPI::Version(), TESSERACT_VERSION_STR); - exit(1); - } -#endif -} - // Helper loads shape table from the given file. ShapeTable* LoadShapeTable(const STRING& file_prefix); // Helper to write the shape_table. +TESS_COMMON_TRAINING_API void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table); // Creates a MasterTraininer and loads the training data into it: @@ -133,21 +115,26 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table); // Computes canonical and cloud features. // If shape_table is not nullptr, but failed to load, make a fake flat one, // as shape clustering was not run. +TESS_COMMON_TRAINING_API MasterTrainer* LoadTrainingData(int argc, const char* const * argv, bool replication, ShapeTable** shape_table, STRING* file_prefix); + } // namespace tesseract. +TESS_COMMON_TRAINING_API const char *GetNextFilename(int argc, const char* const * argv); LABELEDLIST FindList( tesseract::LIST List, char *Label); +TESS_COMMON_TRAINING_API LABELEDLIST NewLabeledList( const char *Label); +TESS_COMMON_TRAINING_API void ReadTrainingSamples(const tesseract::FEATURE_DEFS_STRUCT& feature_defs, const char *feature_name, int max_samples, tesseract::UNICHARSET* unicharset, @@ -159,59 +146,69 @@ void WriteTrainingSamples( tesseract::LIST CharList, const char *program_feature_type); +TESS_COMMON_TRAINING_API void FreeTrainingSamples( tesseract::LIST CharList); +TESS_COMMON_TRAINING_API void FreeLabeledList( LABELEDLIST LabeledList); +TESS_COMMON_TRAINING_API void FreeLabeledClassList( tesseract::LIST ClassListList); +TESS_COMMON_TRAINING_API tesseract::CLUSTERER *SetUpForClustering( const tesseract::FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST CharSample, const char *program_feature_type); +TESS_COMMON_TRAINING_API tesseract::LIST RemoveInsignificantProtos( tesseract::LIST ProtoList, bool KeepSigProtos, bool KeepInsigProtos, int N); +TESS_COMMON_TRAINING_API void CleanUpUnusedData( tesseract::LIST ProtoList); +TESS_COMMON_TRAINING_API void MergeInsignificantProtos( tesseract::LIST ProtoList, const char *label, tesseract::CLUSTERER *Clusterer, tesseract::CLUSTERCONFIG *Config); +TESS_COMMON_TRAINING_API MERGE_CLASS FindClass( tesseract::LIST List, const char *Label); +TESS_COMMON_TRAINING_API MERGE_CLASS NewLabeledClass( const char *Label); -void FreeTrainingSamples( - tesseract::LIST CharList); - +TESS_COMMON_TRAINING_API tesseract::CLASS_STRUCT* SetUpForFloat2Int(const tesseract::UNICHARSET& unicharset, tesseract::LIST LabeledClassList); void Normalize( float *Values); +TESS_COMMON_TRAINING_API void FreeNormProtoList( tesseract::LIST CharList); +TESS_COMMON_TRAINING_API void AddToNormProtosList( tesseract::LIST* NormProtoList, tesseract::LIST ProtoList, char *CharName); +TESS_COMMON_TRAINING_API int NumberOfProtos( tesseract::LIST ProtoList, bool CountSigProtos, diff --git a/src/training/ctc.h b/src/training/ctc.h index 47fba6747..49f47a4e6 100644 --- a/src/training/ctc.h +++ b/src/training/ctc.h @@ -27,7 +27,7 @@ namespace tesseract { // Class to encapsulate CTC and simple target generation. -class CTC { +class TESS_COMMON_TRAINING_API CTC { public: // Normalizes the probabilities such that no target has a prob below min_prob, // and, provided that the initial total is at least min_total_prob, then all diff --git a/src/training/fileio.cpp b/src/training/fileio.cpp index a53bdeeb7..778ead5c1 100644 --- a/src/training/fileio.cpp +++ b/src/training/fileio.cpp @@ -33,7 +33,6 @@ #include "host.h" // includes windows.h for BOOL, ... #include "tprintf.h" - namespace tesseract { /////////////////////////////////////////////////////////////////////////////// diff --git a/src/training/fileio.h b/src/training/fileio.h index b86dc5581..ad2811c0a 100644 --- a/src/training/fileio.h +++ b/src/training/fileio.h @@ -40,7 +40,7 @@ inline bool LoadFileLinesToStrings(const char* filename, } // A class to manipulate FILE*s. -class File { +class TESS_UNICHARSET_TRAINING_API File { public: // Try to open the file 'filename' in mode 'mode'. // Stop the program if it cannot open it. @@ -66,7 +66,7 @@ class File { }; // A class to manipulate Files for reading. -class InputBuffer { +class TESS_UNICHARSET_TRAINING_API InputBuffer { public: explicit InputBuffer(FILE* stream); // 'size' is ignored. @@ -88,7 +88,7 @@ class InputBuffer { }; // A class to manipulate Files for writing. -class OutputBuffer { +class TESS_UNICHARSET_TRAINING_API OutputBuffer { public: explicit OutputBuffer(FILE* stream); // 'size' is ignored. diff --git a/src/training/intfeaturemap.h b/src/training/intfeaturemap.h index 5c5a54b83..af4b981a4 100644 --- a/src/training/intfeaturemap.h +++ b/src/training/intfeaturemap.h @@ -45,7 +45,7 @@ static const int kNumOffsetMaps = 2; // Although the transformations are reversible, the inverses are lossy and do // not return the exact input INT_FEATURE_STRUCT, due to the many->one nature // of both transformations. -class IntFeatureMap { +class TESS_COMMON_TRAINING_API IntFeatureMap { public: IntFeatureMap(); ~IntFeatureMap(); diff --git a/src/training/lang_model_helpers.cpp b/src/training/lang_model_helpers.cpp index 79f85fc14..1bce9cbb0 100644 --- a/src/training/lang_model_helpers.cpp +++ b/src/training/lang_model_helpers.cpp @@ -12,20 +12,24 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + #include "lang_model_helpers.h" -#if defined(_WIN32) -#include -#endif -#include -#include -#include #include "dawg.h" #include "fileio.h" #include "tessdatamanager.h" #include "trie.h" #include "unicharcompress.h" +#include + +#include +#include + +#if defined(_WIN32) +#include +#endif + namespace tesseract { // Helper makes a filename (//) and writes data diff --git a/src/training/lang_model_helpers.h b/src/training/lang_model_helpers.h index 08401b7bf..9a568da8f 100644 --- a/src/training/lang_model_helpers.h +++ b/src/training/lang_model_helpers.h @@ -28,11 +28,13 @@ namespace tesseract { // Default writer will overwrite any existing file, but a supplied writer // can do its own thing. If lang is empty, returns true but does nothing. // NOTE that suffix should contain any required . for the filename. +TESS_UNICHARSET_TRAINING_API bool WriteFile(const std::string& output_dir, const std::string& lang, const std::string& suffix, const std::vector& data, FileWriter writer); // Helper reads a file with optional reader and returns a STRING. // On failure emits a warning message and returns and empty STRING. +TESS_UNICHARSET_TRAINING_API STRING ReadFile(const std::string& filename, FileReader reader); // Helper writes the unicharset to file and to the traineddata. @@ -70,6 +72,7 @@ bool WriteRecoder(const UNICHARSET& unicharset, bool pass_through, // puncs must be non-empty. // lang_is_rtl indicates that the language is generally written from right // to left (eg Arabic/Hebrew). +TESS_UNICHARSET_TRAINING_API int CombineLangModel(const UNICHARSET& unicharset, const std::string& script_dir, const std::string& version_str, const std::string& output_dir, const std::string& lang, bool pass_through_recoder, diff --git a/src/training/ligature_table.h b/src/training/ligature_table.h index 97baea8b6..0fd1f40fc 100644 --- a/src/training/ligature_table.h +++ b/src/training/ligature_table.h @@ -35,7 +35,7 @@ class PangoFontInfo; // defined in pango_font_info.h // Map to substitute strings for ligatures. using LigHash = std::unordered_map; -class LigatureTable { +class TESS_PANGO_TRAINING_API LigatureTable { public: // Get a static instance of this class. static LigatureTable* Get(); diff --git a/src/training/lstmtester.h b/src/training/lstmtester.h index a7b5be637..3ce491df3 100644 --- a/src/training/lstmtester.h +++ b/src/training/lstmtester.h @@ -25,7 +25,7 @@ namespace tesseract { -class LSTMTester { +class TESS_UNICHARSET_TRAINING_API LSTMTester { public: LSTMTester(int64_t max_memory); diff --git a/src/training/lstmtrainer.h b/src/training/lstmtrainer.h index fad939c90..09164c06a 100644 --- a/src/training/lstmtrainer.h +++ b/src/training/lstmtrainer.h @@ -76,7 +76,7 @@ using TestCallback = std::functioncharsetsize()); } + +// Sets the mapped_features_ from the features using the provided +// feature_map. +static void MapFeatures(TrainingSample &s, const IntFeatureMap& feature_map) { + GenericVector indexed_features; + feature_map.feature_space().IndexAndSortFeatures(s.features(), s.num_features(), + &indexed_features); + feature_map.MapIndexedFeatures(indexed_features, &s.mapped_features_); + s.features_are_indexed_ = false; + s.features_are_mapped_ = true; +} + // Apply the supplied feature_space/feature_map transform to all samples // accessed by this iterator. void SampleIterator::MapSampleFeatures(const IntFeatureMap& feature_map) { for (Begin(); !AtEnd(); Next()) { TrainingSample* sample = MutableSample(); - sample->MapFeatures(feature_map); + MapFeatures(*sample, feature_map); } } diff --git a/src/training/stringrenderer.h b/src/training/stringrenderer.h index b1bb8c092..c3d6687c4 100644 --- a/src/training/stringrenderer.h +++ b/src/training/stringrenderer.h @@ -47,7 +47,7 @@ namespace tesseract { class BoxChar; -class StringRenderer { +class TESS_PANGO_TRAINING_API StringRenderer { public: StringRenderer(const std::string& font_desc, int page_width, int page_height); ~StringRenderer(); diff --git a/src/training/tessopt.cpp b/src/training/tessopt.cpp index 15611c4c0..404700c03 100644 --- a/src/training/tessopt.cpp +++ b/src/training/tessopt.cpp @@ -17,9 +17,10 @@ * **********************************************************************/ +#include "tessopt.h" + #include #include -#include "tessopt.h" int tessoptind; char *tessoptarg; @@ -30,10 +31,10 @@ char *tessoptarg; * parse command line args. **********************************************************************/ -int tessopt ( //parse args -int32_t argc, //arg count -char *argv[], //args -const char *arglist //string of arg chars +int tessopt( + int argc, + char *argv[], + const char *arglist //string of arg chars ) { const char *arg; //arg char diff --git a/src/training/tessopt.h b/src/training/tessopt.h index 42b27f99e..b42a4d5fd 100644 --- a/src/training/tessopt.h +++ b/src/training/tessopt.h @@ -19,15 +19,14 @@ #ifndef TESSERACT_TRAINING_TESSOPT_H_ #define TESSERACT_TRAINING_TESSOPT_H_ -#include // for int32_t - extern int tessoptind; extern char *tessoptarg; -int tessopt ( //parse args -int32_t argc, //arg count -char *argv[], //args -const char *arglist //string of arg chars +//parse args +int tessopt( + int argc, // arg count + char *argv[], // args + const char *arglist // string of arg chars ); #endif // TESSERACT_TRAINING_TESSOPT_H_ diff --git a/src/training/tlog.h b/src/training/tlog.h index 29db457c9..1af3a88b1 100644 --- a/src/training/tlog.h +++ b/src/training/tlog.h @@ -24,6 +24,7 @@ #include "errcode.h" #include "tprintf.h" +TESS_PANGO_TRAINING_API DECLARE_INT_PARAM_FLAG(tlog_level); // Variant guarded by the numeric logging level parameter FLAGS_tlog_level diff --git a/src/training/trainingsampleset.h b/src/training/trainingsampleset.h index 854786077..3d022eff7 100644 --- a/src/training/trainingsampleset.h +++ b/src/training/trainingsampleset.h @@ -285,5 +285,4 @@ class TrainingSampleSet { } // namespace tesseract. - #endif // TRAININGSAMPLESETSET_H_ diff --git a/src/training/unicharset_training_utils.h b/src/training/unicharset_training_utils.h index 1edc42c88..16aa0c64e 100644 --- a/src/training/unicharset_training_utils.h +++ b/src/training/unicharset_training_utils.h @@ -31,6 +31,7 @@ class UNICHARSET; // Helper sets the character attribute properties and sets up the script table. // Does not set tops and bottoms. +TESS_UNICHARSET_TRAINING_API void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET* unicharset); // Default behavior is to compose, until it is proven that decomposed benefits @@ -39,6 +40,7 @@ inline void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset) { SetupBasicProperties(report_errors, false, unicharset); } // Helper sets the properties from universal script unicharsets, if found. +TESS_UNICHARSET_TRAINING_API void SetScriptProperties(const std::string& script_dir, UNICHARSET* unicharset); // Helper gets the combined x-heights string. std::string GetXheightString(const std::string& script_dir, const UNICHARSET& unicharset); @@ -48,6 +50,7 @@ std::string GetXheightString(const std::string& script_dir, const UNICHARSET& un // script_dir directory, then the tops and bottoms are expanded using the // script unicharset. // If non-empty, xheight data for the fonts are written to the xheights_file. +TESS_UNICHARSET_TRAINING_API void SetPropertiesForInputFile(const std::string& script_dir, const std::string& input_unicharset_file, const std::string& output_unicharset_file, diff --git a/src/training/validator.h b/src/training/validator.h index a51c809d1..0e94f2107 100644 --- a/src/training/validator.h +++ b/src/training/validator.h @@ -68,7 +68,7 @@ enum class ViramaScript : char32 { // Base class offers a validation API and protected methods to allow subclasses // to easily build the validated/segmented output. -class Validator { +class TESS_UNICHARSET_TRAINING_API Validator { public: // Validates and cleans the src vector of unicodes to the *dest, according to // g_mode. In the case of kSingleString, a single vector containing the whole diff --git a/src/viewer/scrollview.h b/src/viewer/scrollview.h index cbbbda221..a23de6a27 100644 --- a/src/viewer/scrollview.h +++ b/src/viewer/scrollview.h @@ -95,8 +95,7 @@ class SVEventHandler { // Each ScrollView class instance represents one window, and stuff is drawn in // the window through method calls on the class. The constructor is used to // create the class instance (and the window). - -class ScrollView { +class TESS_API ScrollView { public: // Color enum for pens and brushes. enum Color { diff --git a/src/wordrec/params_model.h b/src/wordrec/params_model.h index 431fb3bee..63d1aa1fe 100644 --- a/src/wordrec/params_model.h +++ b/src/wordrec/params_model.h @@ -28,7 +28,7 @@ namespace tesseract { class TFile; // Represents the learned weights for a given language. -class ParamsModel { +class TESS_API ParamsModel { public: // Enum for expressing OCR pass. enum PassEnum { diff --git a/src/wordrec/wordrec.h b/src/wordrec/wordrec.h index 3bdaffa14..a9f7d31fc 100644 --- a/src/wordrec/wordrec.h +++ b/src/wordrec/wordrec.h @@ -36,7 +36,7 @@ namespace tesseract { /* ccmain/tstruct.cpp */ -class Wordrec : public Classify { +class TESS_API Wordrec : public Classify { public: // config parameters @@ -190,7 +190,7 @@ class FRAGMENT:public ELIST_LINK ELISTIZEH(FRAGMENT) -class Wordrec : public Classify { +class TESS_API Wordrec : public Classify { public: // config parameters ******************************************************* BOOL_VAR_H(merge_fragments_in_matrix, true, diff --git a/sw.cpp b/sw.cpp index f46aa188d..c30f5c275 100644 --- a/sw.cpp +++ b/sw.cpp @@ -9,11 +9,11 @@ void build(Solution &s) { libtesseract.setChecks("libtesseract"); - libtesseract.ExportAllSymbols = true; libtesseract.PackageDefinitions = true; libtesseract += cppstd; + libtesseract += "TESS_API"_api; libtesseract += "include/.*"_rr; libtesseract += "src/.*"_rr; libtesseract -= "src/lstm/.*\\.cc"_rr; @@ -83,8 +83,6 @@ void build(Solution &s) libtesseract.Public += "HAVE_CONFIG_H"_d; libtesseract.Public += "_SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS=1"_d; libtesseract.Public += "HAVE_LIBARCHIVE"_d; - libtesseract.Interface += sw::Shared, "TESS_IMPORTS"_d; - libtesseract.Private += sw::Shared, "TESS_EXPORTS"_d; libtesseract.Public += "org.sw.demo.danbloomberg.leptonica"_dep; libtesseract.Public += "org.sw.demo.libarchive.libarchive"_dep; @@ -124,8 +122,9 @@ void build(Solution &s) } // - auto &common_training = training.addStaticLibrary("common_training"); + auto &common_training = training.addLibrary("common_training"); { + common_training += "TESS_COMMON_TRAINING_API"_api; common_training += cppstd; common_training += "src/training/commandlineflags.cpp", @@ -152,8 +151,9 @@ void build(Solution &s) } // - auto &unicharset_training = training.addStaticLibrary("unicharset_training"); + auto &unicharset_training = training.addLibrary("unicharset_training"); { + unicharset_training += "TESS_UNICHARSET_TRAINING_API"_api; unicharset_training += cppstd; unicharset_training += "src/training/fileio.*"_rr, @@ -177,24 +177,25 @@ void build(Solution &s) n.Public += __VA_ARGS__; \ n - ADD_EXE(ambiguous_words, libtesseract); + ADD_EXE(ambiguous_words, common_training); ADD_EXE(classifier_tester, common_training); ADD_EXE(combine_lang_model, unicharset_training); - ADD_EXE(combine_tessdata, libtesseract); + ADD_EXE(combine_tessdata, common_training); ADD_EXE(cntraining, common_training); - ADD_EXE(dawg2wordlist, libtesseract); + ADD_EXE(dawg2wordlist, common_training); ADD_EXE(mftraining, common_training) += "src/training/mergenf.*"_rr; ADD_EXE(shapeclustering, common_training); ADD_EXE(unicharset_extractor, unicharset_training); - ADD_EXE(wordlist2dawg, libtesseract); + ADD_EXE(wordlist2dawg, common_training); ADD_EXE(lstmeval, unicharset_training); ADD_EXE(lstmtraining, unicharset_training); ADD_EXE(set_unicharset_properties, unicharset_training); - ADD_EXE(merge_unicharsets, tessopt); + ADD_EXE(merge_unicharsets, common_training); // - auto &pango_training = training.addStaticLibrary("pango_training"); + auto &pango_training = training.addLibrary("pango_training"); { + pango_training += "TESS_PANGO_TRAINING_API"_api; pango_training += cppstd; pango_training += "src/training/boxchar.cpp", @@ -218,9 +219,6 @@ void build(Solution &s) text2image += "src/training/degradeimage.cpp", "src/training/degradeimage.h", - "src/training/icuerrorcode.h", - "src/training/normstrngs.cpp", - "src/training/normstrngs.h", "src/training/text2image.cpp", "src/training/util.h" ; @@ -229,6 +227,7 @@ void build(Solution &s) if (!s.getExternalVariables()["with-tests"]) return; + // tests { auto &test = tess.addDirectory("test"); test.Scope = TargetScope::Test; diff --git a/unittest/ligature_table_test.cc b/unittest/ligature_table_test.cc index b4d1598c7..49f0ec40b 100644 --- a/unittest/ligature_table_test.cc +++ b/unittest/ligature_table_test.cc @@ -15,9 +15,6 @@ #include "ligature_table.h" #include "pango_font_info.h" -DECLARE_STRING_PARAM_FLAG(fonts_dir); -DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir); - namespace tesseract { const char kEngNonLigatureText[] = "fidelity effigy ſteep"; diff --git a/unittest/pango_font_info_test.cc b/unittest/pango_font_info_test.cc index 614484c56..1d6287363 100644 --- a/unittest/pango_font_info_test.cc +++ b/unittest/pango_font_info_test.cc @@ -22,10 +22,6 @@ #include "util/utf8/unicodetext.h" // for UnicodeText #endif -DECLARE_STRING_PARAM_FLAG(fonts_dir); -DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir); -DECLARE_BOOL_PARAM_FLAG(use_only_legacy_fonts); - namespace tesseract { // Fonts in testdata directory diff --git a/unittest/stringrenderer_test.cc b/unittest/stringrenderer_test.cc index 8624df2bc..e403e03a0 100644 --- a/unittest/stringrenderer_test.cc +++ b/unittest/stringrenderer_test.cc @@ -24,11 +24,6 @@ BOOL_PARAM_FLAG(display, false, "Display image for inspection"); -// Flags defined in pango_font_info.cpp -DECLARE_BOOL_PARAM_FLAG(use_only_legacy_fonts); -DECLARE_STRING_PARAM_FLAG(fonts_dir); -DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir); - namespace tesseract { const char kEngText[] = "the quick brown fox jumps over the lazy dog";