diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index e86ba571b..b3755142e 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -736,7 +736,7 @@ class TESS_API TessBaseAPI { protected: /** Common code for setting the image. Returns true if Init has been called. */ - TESS_LOCAL bool InternalSetImage(); + bool InternalSetImage(); /** * Run the thresholder to make the thresholded image. If pix is not nullptr, @@ -748,7 +748,7 @@ class TESS_API TessBaseAPI { * Find lines from the image making the BLOCK_LIST. * @return 0 on success. */ - TESS_LOCAL int FindLines(); + int FindLines(); /** Delete the pageres and block list ready for a new page. */ void ClearResults(); @@ -758,7 +758,7 @@ class TESS_API TessBaseAPI { * to ignore all BiDi smarts at that point. * delete once you're done with it. */ - TESS_LOCAL LTRResultIterator* GetLTRIterator(); + LTRResultIterator* GetLTRIterator(); /** * Return the length of the output text string, as UTF8, assuming @@ -766,12 +766,12 @@ class TESS_API TessBaseAPI { * and assuming a single character reject marker for each rejected character. * Also return the number of recognized blobs in blob_count. */ - TESS_LOCAL int TextLength(int* blob_count); + int TextLength(int* blob_count); //// paragraphs.cpp //////////////////////////////////////////////////// - TESS_LOCAL void DetectParagraphs(bool after_text_recognition); + void DetectParagraphs(bool after_text_recognition); - TESS_LOCAL const PAGE_RES* GetPageRes() const { + const PAGE_RES* GetPageRes() const { return page_res_; } diff --git a/include/tesseract/ltrresultiterator.h b/include/tesseract/ltrresultiterator.h index 64c82dc10..56a30a4f4 100644 --- a/include/tesseract/ltrresultiterator.h +++ b/include/tesseract/ltrresultiterator.h @@ -183,7 +183,7 @@ class TESS_API LTRResultIterator : public PageIterator { }; // Class to iterate over the classifier choices for a single RIL_SYMBOL. -class ChoiceIterator { +class TESS_API ChoiceIterator { public: // Construction is from a LTRResultIterator that points to the symbol of // interest. The ChoiceIterator allows a one-shot iteration over the diff --git a/include/tesseract/pageiterator.h b/include/tesseract/pageiterator.h index 436be63cd..92076cc88 100644 --- a/include/tesseract/pageiterator.h +++ b/include/tesseract/pageiterator.h @@ -319,7 +319,7 @@ class TESS_API PageIterator { * Sets up the internal data for iterating the blobs of a new word, then * moves the iterator to the given offset. */ - TESS_LOCAL void BeginWord(int offset); + void BeginWord(int offset); /** Pointer to the page_res owned by the API. */ PAGE_RES* page_res_; diff --git a/include/tesseract/platform.h b/include/tesseract/platform.h index 51ae2963f..0f54fef8a 100644 --- a/include/tesseract/platform.h +++ b/include/tesseract/platform.h @@ -15,10 +15,11 @@ // /////////////////////////////////////////////////////////////////////// -#ifndef TESSERACT_CCUTIL_PLATFORM_H_ -#define TESSERACT_CCUTIL_PLATFORM_H_ +#ifndef TESSERACT_PLATFORM_H_ +#define TESSERACT_PLATFORM_H_ -#if defined(_WIN32) || defined(__CYGWIN__) +#ifndef TESS_API +# if defined(_WIN32) || defined(__CYGWIN__) # if defined(TESS_EXPORTS) # define TESS_API __declspec(dllexport) # elif defined(TESS_IMPORTS) @@ -26,20 +27,11 @@ # else # define TESS_API # endif -# define TESS_LOCAL -#else -# if __GNUC__ >= 4 -# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS) +# else +# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS) # define TESS_API __attribute__((visibility("default"))) -# define TESS_LOCAL __attribute__((visibility("hidden"))) -# else -# define TESS_API -# define TESS_LOCAL -# endif -# else -# define TESS_API -# define TESS_LOCAL # endif +# endif #endif -#endif // TESSERACT_CCUTIL_PLATFORM_H_ +#endif // TESSERACT_PLATFORM_H_ diff --git a/include/tesseract/resultiterator.h b/include/tesseract/resultiterator.h index 0f0321d7b..fc3207432 100644 --- a/include/tesseract/resultiterator.h +++ b/include/tesseract/resultiterator.h @@ -144,7 +144,7 @@ class TESS_API ResultIterator : public LTRResultIterator { * it resets to the beginning of the paragraph instead of staying wherever * resit might have pointed. */ - TESS_LOCAL explicit ResultIterator(const LTRResultIterator& resit); + explicit ResultIterator(const LTRResultIterator& resit); private: /** diff --git a/include/tesseract/unichar.h b/include/tesseract/unichar.h index 5202173bc..9e97e3d4f 100644 --- a/include/tesseract/unichar.h +++ b/include/tesseract/unichar.h @@ -55,7 +55,7 @@ using char32 = signed int; // a single Unicode character (stored as between 1 and 4 utf8 bytes) or // multiple Unicode characters representing the NFKC expansion of a ligature // such as fi, ffl etc. These are also stored as utf8. -class UNICHAR { +class TESS_API UNICHAR { public: UNICHAR() { memset(chars, 0, UNICHAR_LEN); @@ -105,7 +105,7 @@ class UNICHAR { // int char_len = it.get_utf8(buf); buf[char_len] = '\0'; // tprintf("Char = %s\n", buf); // } - class const_iterator { + class TESS_API const_iterator { using CI = const_iterator; public: diff --git a/src/arch/intsimdmatrix.h b/src/arch/intsimdmatrix.h index f8f13ea8c..b130f5f1d 100644 --- a/src/arch/intsimdmatrix.h +++ b/src/arch/intsimdmatrix.h @@ -61,7 +61,7 @@ class GenericVector; // NOTE that, although the subclasses execute on different SIMD hardware, no // virtual methods are needed, as the constructor sets up everything that // is required to allow the base class implementation to do all the work. -struct IntSimdMatrix { +struct TESS_API IntSimdMatrix { // Computes a reshaped copy of the weight matrix w. void Init(const GENERIC_2D_ARRAY& w, std::vector& shaped_w, @@ -115,12 +115,12 @@ struct IntSimdMatrix { // Number of groups of inputs to be broadcast. // num_input_groups_ = num_inputs_per_register_ / num_inputs_per_group_ - static TESS_API const IntSimdMatrix* intSimdMatrix; + static const IntSimdMatrix* intSimdMatrix; // Only available with NEON. - static TESS_API const IntSimdMatrix intSimdMatrixNEON; + static const IntSimdMatrix intSimdMatrixNEON; // Only available with AVX2 / SSE. - static TESS_API const IntSimdMatrix intSimdMatrixAVX2; - static TESS_API const IntSimdMatrix intSimdMatrixSSE; + static const IntSimdMatrix intSimdMatrixAVX2; + static const IntSimdMatrix intSimdMatrixSSE; }; } // namespace tesseract diff --git a/src/ccmain/equationdetect.h b/src/ccmain/equationdetect.h index 425e46d62..ffa418fee 100644 --- a/src/ccmain/equationdetect.h +++ b/src/ccmain/equationdetect.h @@ -35,7 +35,7 @@ class ColPartition; class ColPartitionGrid; class ColPartitionSet; -class EquationDetect : public EquationDetectBase { +class TESS_API EquationDetect : public EquationDetectBase { public: EquationDetect(const char* equ_datapath, const char* equ_language); diff --git a/src/ccmain/mutableiterator.h b/src/ccmain/mutableiterator.h index 2e6f51bc1..de3a36120 100644 --- a/src/ccmain/mutableiterator.h +++ b/src/ccmain/mutableiterator.h @@ -40,7 +40,7 @@ class Tesseract; // ResultIterator adds text-specific methods for access to OCR output. // MutableIterator adds access to internal data structures. -class MutableIterator : public ResultIterator { +class TESS_API MutableIterator : public ResultIterator { public: // See argument descriptions in ResultIterator() MutableIterator(PAGE_RES* page_res, Tesseract* tesseract, diff --git a/src/ccmain/paragraphs.h b/src/ccmain/paragraphs.h index 1e6e3d78d..edf9b8ccf 100644 --- a/src/ccmain/paragraphs.h +++ b/src/ccmain/paragraphs.h @@ -87,6 +87,7 @@ class RowInfo { // paragraphs - this is the actual list of PARA objects. // models - the list of paragraph models referenced by the PARA objects. // caller is responsible for deleting the models. +TESS_API void DetectParagraphs(int debug_level, std::vector *row_infos, GenericVector *row_owners, @@ -98,6 +99,7 @@ void DetectParagraphs(int debug_level, // saving the ParagraphModels in models. Caller owns the models. // We use unicharset during the function to answer questions such as "is the // first letter of this word upper case?" +TESS_API void DetectParagraphs(int debug_level, bool after_text_recognition, const MutableIterator *block_start, diff --git a/src/ccmain/paragraphs_internal.h b/src/ccmain/paragraphs_internal.h index 173c6ea17..0a780a90a 100644 --- a/src/ccmain/paragraphs_internal.h +++ b/src/ccmain/paragraphs_internal.h @@ -31,6 +31,7 @@ class UNICHARSET; class WERD_CHOICE; // Return whether the given word is likely to be a list item start word. +TESS_API bool AsciiLikelyListItem(const STRING &word); // Return the first Unicode Codepoint from werd[pos]. @@ -38,11 +39,13 @@ int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos); // Set right word attributes given either a unicharset and werd or a utf8 // string. +TESS_API void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea); // Set left word attributes given either a unicharset and werd or a utf8 string. +TESS_API void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea); diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 21cf64222..159b0ea7a 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -175,7 +175,7 @@ struct WordData { using WordRecognizer = void (Tesseract::*)(const WordData&, WERD_RES**, PointerVector*); -class Tesseract : public Wordrec { +class TESS_API Tesseract : public Wordrec { public: Tesseract(); ~Tesseract() override; diff --git a/src/ccstruct/boxread.h b/src/ccstruct/boxread.h index 3843fb688..6a27d7aaf 100644 --- a/src/ccstruct/boxread.h +++ b/src/ccstruct/boxread.h @@ -32,6 +32,7 @@ const int kBoxReadBufSize = 1024; // Open the boxfile based on the given image filename. // Returns nullptr if the box file cannot be opened. +TESS_API FILE* OpenBoxFile(const char* filename); // Reads all boxes from the given filename. @@ -51,6 +52,7 @@ bool ReadAllBoxes(int target_page, bool skip_blanks, const char* filename, // continue_on_failure allows reading to continue even if an invalid box is // encountered and will return true if it succeeds in reading some boxes. // It otherwise gives up and returns false on encountering an invalid box. +TESS_API bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data, bool continue_on_failure, std::vector* boxes, @@ -66,20 +68,24 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data, // for valid utf-8 and allows space or tab between fields. // utf8_str is set with the unichar string, and bounding box with the box. // If there are page numbers in the file, it reads them all. +TESS_API bool ReadNextBox(int *line_number, FILE* box_file, STRING* utf8_str, TBOX* bounding_box); // As ReadNextBox above, but get a specific page number. (0-based) // Use -1 to read any page number. Files without page number all // read as if they are page 0. +TESS_API bool ReadNextBox(int target_page, int *line_number, FILE* box_file, STRING* utf8_str, TBOX* bounding_box); // Parses the given box file string into a page_number, utf8_str, and // bounding_box. Returns true on a successful parse. +TESS_API bool ParseBoxFileStr(const char* boxfile_str, int* page_number, STRING* utf8_str, TBOX* bounding_box); // Creates a box file string from a unichar string, TBOX and page number. +TESS_API void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num, STRING* box_str); diff --git a/src/ccstruct/ccstruct.h b/src/ccstruct/ccstruct.h index 5d095864d..7af2b8081 100644 --- a/src/ccstruct/ccstruct.h +++ b/src/ccstruct/ccstruct.h @@ -22,7 +22,7 @@ #include "ccutil.h" // for CCUtil namespace tesseract { -class CCStruct : public CCUtil { +class TESS_API CCStruct : public CCUtil { public: CCStruct() = default; ~CCStruct() override; diff --git a/src/ccstruct/fontinfo.h b/src/ccstruct/fontinfo.h index 5daab581f..63d5fdfae 100644 --- a/src/ccstruct/fontinfo.h +++ b/src/ccstruct/fontinfo.h @@ -146,26 +146,34 @@ struct FontSet { // are replaced. class FontInfoTable : public GenericVector { public: + TESS_API // when you remove inheritance from GenericVector, move this on class level FontInfoTable(); + TESS_API ~FontInfoTable(); // Writes to the given file. Returns false in case of error. + TESS_API bool Serialize(FILE* fp) const; // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. + TESS_API bool DeSerialize(TFile* fp); // Returns true if the given set of fonts includes one with the same // properties as font_id. + TESS_API bool SetContainsFontProperties( int font_id, const GenericVector& font_set) const; // Returns true if the given set of fonts includes multiple properties. + TESS_API bool SetContainsMultipleFontProperties( const GenericVector& font_set) const; // Moves any non-empty FontSpacingInfo entries from other to this. + TESS_API void MoveSpacingInfoFrom(FontInfoTable* other); // Moves this to the target unicity table. + TESS_API void MoveTo(UnicityTable* target); }; diff --git a/src/ccstruct/imagedata.h b/src/ccstruct/imagedata.h index 4c253cbd2..f5901166a 100644 --- a/src/ccstruct/imagedata.h +++ b/src/ccstruct/imagedata.h @@ -104,7 +104,7 @@ struct FloatWordFeature { // The text transcription is the ground truth UTF-8 text for the image. // Character boxes are optional and indicate the desired segmentation of // the text into recognition units. -class ImageData { +class TESS_API ImageData { public: ImageData(); // Takes ownership of the pix. @@ -213,19 +213,24 @@ class ImageData { // A collection of ImageData that knows roughly how much memory it is using. class DocumentData { public: + TESS_API explicit DocumentData(const STRING& name); + TESS_API ~DocumentData(); // Reads all the pages in the given lstmf filename to the cache. The reader // is used to read the file. + TESS_API bool LoadDocument(const char* filename, int start_page, int64_t max_memory, FileReader reader); // Sets up the document, without actually loading it. void SetDocument(const char* filename, int64_t max_memory, FileReader reader); // Writes all the pages to the given filename. Returns false on error. + TESS_API bool SaveDocument(const char* filename, FileWriter writer); // Adds the given page data to this document, counting up memory. + TESS_API void AddPageToDocument(ImageData* page); const STRING& document_name() const { @@ -257,6 +262,7 @@ class DocumentData { void LoadPageInBackground(int index); // Returns a pointer to the page with the given index, modulo the total // number of pages. Blocks until the background load is completed. + TESS_API const ImageData* GetPage(int index); // Returns true if the requested page is available, and provides a pointer, // which may be nullptr if the document is empty. May block, even though it @@ -325,7 +331,9 @@ class DocumentData { // content. class DocumentCache { public: + TESS_API explicit DocumentCache(int64_t max_memory); + TESS_API ~DocumentCache(); // Deletes all existing documents from the cache. @@ -335,6 +343,7 @@ class DocumentCache { } // Adds all the documents in the list of filenames, counting memory. // The reader is used to read the files. + TESS_API bool LoadDocuments(const std::vector& filenames, CachingStrategy cache_strategy, FileReader reader); @@ -358,16 +367,19 @@ class DocumentCache { } // Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache // strategy, could take a long time. + TESS_API int TotalPages(); private: // Returns a page by serial number, selecting them in a round-robin fashion // from all the documents. Highly disk-intensive, but doesn't need samples // to be shuffled between files to begin with. + TESS_API const ImageData* GetPageRoundRobin(int serial); // Returns a page by serial number, selecting them in sequence from each file. // Requires the samples to be shuffled between the files to give a random or // uniform distribution of data. Less disk-intensive than GetPageRoundRobin. + TESS_API const ImageData* GetPageSequential(int serial); // Helper counts the number of adjacent cached neighbour documents_ of index diff --git a/src/ccstruct/linlsq.h b/src/ccstruct/linlsq.h index eb0272a70..c8654a77d 100644 --- a/src/ccstruct/linlsq.h +++ b/src/ccstruct/linlsq.h @@ -28,7 +28,7 @@ namespace tesseract { template class GenericVector; -class LLSQ { +class TESS_API LLSQ { public: LLSQ() { // constructor clear(); // set to zeros diff --git a/src/ccstruct/normalis.h b/src/ccstruct/normalis.h index 4b5c6b18a..1dc9e867f 100644 --- a/src/ccstruct/normalis.h +++ b/src/ccstruct/normalis.h @@ -46,7 +46,7 @@ enum NormalizationMode { NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode. }; -class DENORM { +class TESS_API DENORM { public: DENORM(); diff --git a/src/ccstruct/ocrblock.h b/src/ccstruct/ocrblock.h index fb04967c6..e4062a12b 100644 --- a/src/ccstruct/ocrblock.h +++ b/src/ccstruct/ocrblock.h @@ -27,8 +27,9 @@ namespace tesseract { class BLOCK; //forward decl -ELISTIZEH (BLOCK) -class BLOCK : public ELIST_LINK +ELISTIZEH(BLOCK) + +class TESS_API BLOCK : public ELIST_LINK //page block { friend class BLOCK_RECT_IT; //block iterator diff --git a/src/ccstruct/ocrpara.h b/src/ccstruct/ocrpara.h index 754a3fe27..dec83c2ec 100644 --- a/src/ccstruct/ocrpara.h +++ b/src/ccstruct/ocrpara.h @@ -114,7 +114,7 @@ ELISTIZEH(PARA) // |you can try to identify source | // |code. Ouch! | // +--------------------------------+ -class ParagraphModel { +class TESS_API ParagraphModel { public: ParagraphModel(tesseract::ParagraphJustification justification, int margin, diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index 627467d15..814848f69 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -164,7 +164,7 @@ enum CRUNCH_MODE // WERD_RES is a collection of publicly accessible members that gathers // information about a word result. -class WERD_RES : public ELIST_LINK { +class TESS_API WERD_RES : public ELIST_LINK { public: // Which word is which? // There are 3 coordinate spaces in use here: a possibly rotated pixel space, @@ -673,7 +673,7 @@ class WERD_RES : public ELIST_LINK { * PAGE_RES_IT - Page results iterator *************************************************************************/ -class PAGE_RES_IT { +class TESS_API PAGE_RES_IT { public: PAGE_RES * page_res; // page being iterated diff --git a/src/ccstruct/points.h b/src/ccstruct/points.h index 7372a05a6..92e472004 100644 --- a/src/ccstruct/points.h +++ b/src/ccstruct/points.h @@ -189,7 +189,8 @@ class ICOORDELT : public ELIST_LINK, public ICOORD }; ELISTIZEH (ICOORDELT) -class FCOORD + +class TESS_API FCOORD { public: ///empty constructor diff --git a/src/ccstruct/polyblk.h b/src/ccstruct/polyblk.h index 860c2390f..3bbd37e41 100644 --- a/src/ccstruct/polyblk.h +++ b/src/ccstruct/polyblk.h @@ -27,7 +27,7 @@ namespace tesseract { -class POLY_BLOCK { +class TESS_API POLY_BLOCK { public: POLY_BLOCK() = default; // Initialize from box coordinates. diff --git a/src/ccstruct/ratngs.h b/src/ccstruct/ratngs.h index b7206a163..d1467f178 100644 --- a/src/ccstruct/ratngs.h +++ b/src/ccstruct/ratngs.h @@ -263,7 +263,7 @@ enum ScriptPos { const char *ScriptPosToString(ScriptPos script_pos); -class WERD_CHOICE : public ELIST_LINK { +class TESS_API WERD_CHOICE : public ELIST_LINK { public: static const float kBadRating; static const char *permuter_name(uint8_t permuter); diff --git a/src/ccstruct/rect.h b/src/ccstruct/rect.h index 72fff2973..3900f9f22 100644 --- a/src/ccstruct/rect.h +++ b/src/ccstruct/rect.h @@ -35,7 +35,7 @@ namespace tesseract { class STRING; -class TBOX { // bounding box +class TESS_API TBOX { // bounding box public: TBOX (): // empty constructor making a null box bot_left (INT16_MAX, INT16_MAX), top_right (-INT16_MAX, -INT16_MAX) { diff --git a/src/ccstruct/statistc.h b/src/ccstruct/statistc.h index 2534c5092..cc21c60fc 100644 --- a/src/ccstruct/statistc.h +++ b/src/ccstruct/statistc.h @@ -29,7 +29,7 @@ template class GenericVector; // Simple histogram-based statistics for integer values in a known // range, such that the range is small compared to the number of samples. -class STATS { +class TESS_API STATS { public: // The histogram buckets are in the range // [min_bucket_value, max_bucket_value_plus_1 - 1] i.e. diff --git a/src/ccstruct/stepblob.h b/src/ccstruct/stepblob.h index 0203f7514..b1dfcfcfa 100644 --- a/src/ccstruct/stepblob.h +++ b/src/ccstruct/stepblob.h @@ -37,7 +37,7 @@ class DENORM; ELISTIZEH(C_BLOB) -class C_BLOB:public ELIST_LINK +class TESS_API C_BLOB : public ELIST_LINK { public: C_BLOB() = default; diff --git a/src/ccstruct/werd.h b/src/ccstruct/werd.h index 4c5cf616b..fa4f07b93 100644 --- a/src/ccstruct/werd.h +++ b/src/ccstruct/werd.h @@ -56,7 +56,7 @@ enum DISPLAY_FLAGS { class ROW; // forward decl -class WERD : public ELIST2_LINK { +class TESS_API WERD : public ELIST2_LINK { public: WERD() = default; // WERD constructed with: diff --git a/src/ccutil/bitvector.h b/src/ccutil/bitvector.h index 972bbf24a..216ba8452 100644 --- a/src/ccutil/bitvector.h +++ b/src/ccutil/bitvector.h @@ -27,7 +27,7 @@ namespace tesseract { // Trivial class to encapsulate a fixed-length array of bits, with // Serialize/DeSerialize. Replaces the old macros. -class BitVector { +class TESS_API BitVector { public: // Fast lookup table to get the first least significant set bit in a byte. // For zero, the table has 255, but since it is a special case, most code diff --git a/src/ccutil/ccutil.h b/src/ccutil/ccutil.h index 5613bf9ce..5cc39f2b8 100644 --- a/src/ccutil/ccutil.h +++ b/src/ccutil/ccutil.h @@ -41,7 +41,7 @@ namespace tesseract { -class CCUtil { +class TESS_API CCUtil { public: CCUtil(); virtual ~CCUtil(); diff --git a/src/ccutil/clst.h b/src/ccutil/clst.h index 667a85d4b..a826d68b3 100644 --- a/src/ccutil/clst.h +++ b/src/ccutil/clst.h @@ -69,7 +69,7 @@ class CLIST_LINK * Generic list class for singly linked CONS cell lists **********************************************************************/ -class CLIST +class TESS_API CLIST { friend class CLIST_ITERATOR; @@ -144,7 +144,7 @@ class CLIST *links **********************************************************************/ -class CLIST_ITERATOR +class TESS_API CLIST_ITERATOR { friend void CLIST::assign_to_sublist(CLIST_ITERATOR *, CLIST_ITERATOR *); diff --git a/src/ccutil/elst.h b/src/ccutil/elst.h index ed5c6bdb2..c22b9817b 100644 --- a/src/ccutil/elst.h +++ b/src/ccutil/elst.h @@ -107,7 +107,7 @@ class ELIST_LINK * Generic list class for singly linked lists with embedded links **********************************************************************/ -class ELIST +class TESS_API ELIST { friend class ELIST_ITERATOR; @@ -181,7 +181,7 @@ class ELIST * Generic iterator class for singly linked lists with embedded links **********************************************************************/ -class ELIST_ITERATOR +class TESS_API ELIST_ITERATOR { friend void ELIST::assign_to_sublist(ELIST_ITERATOR *, ELIST_ITERATOR *); @@ -849,7 +849,7 @@ ELISTIZEH_C. #define ELISTIZEH_A(CLASSNAME) \ \ -extern void CLASSNAME##_zapper(ELIST_LINK* link); +TESS_API extern void CLASSNAME##_zapper(ELIST_LINK* link); #define ELISTIZEH_B(CLASSNAME) \ \ diff --git a/src/ccutil/elst2.h b/src/ccutil/elst2.h index 8b2befb56..b21a4c0ca 100644 --- a/src/ccutil/elst2.h +++ b/src/ccutil/elst2.h @@ -88,7 +88,7 @@ class ELIST2_LINK * Generic list class for doubly linked lists with embedded links **********************************************************************/ -class ELIST2 +class TESS_API ELIST2 { friend class ELIST2_ITERATOR; @@ -151,7 +151,7 @@ class ELIST2 *links **********************************************************************/ -class ELIST2_ITERATOR +class TESS_API ELIST2_ITERATOR { friend void ELIST2::assign_to_sublist(ELIST2_ITERATOR *, ELIST2_ITERATOR *); @@ -858,7 +858,7 @@ ELIST2IZEH_C. #define ELIST2IZEH_A(CLASSNAME) \ \ - extern void CLASSNAME##_zapper( /*delete a link*/ \ + TESS_API extern void CLASSNAME##_zapper( /*delete a link*/ \ ELIST2_LINK *link); /*link to delete*/ #define ELIST2IZEH_B(CLASSNAME) \ diff --git a/src/ccutil/genericvector.h b/src/ccutil/genericvector.h index e358d9853..939577bf5 100644 --- a/src/ccutil/genericvector.h +++ b/src/ccutil/genericvector.h @@ -310,14 +310,14 @@ class GenericVector { } // Returns true if all elements of *this are within the given range. // Only uses operator< - bool WithinBounds(const T& rangemin, const T& rangemax) const { + /*bool WithinBounds(const T& rangemin, const T& rangemax) const { for (int i = 0; i < size_used_; ++i) { if (data_[i] < rangemin || rangemax < data_[i]) { return false; } } return true; - } + }*/ protected: // Internal recursive version of choose_nth_item. diff --git a/src/ccutil/indexmapbidi.h b/src/ccutil/indexmapbidi.h index 15bb840af..d42bead5e 100644 --- a/src/ccutil/indexmapbidi.h +++ b/src/ccutil/indexmapbidi.h @@ -39,7 +39,7 @@ class IndexMapBiDi; // IndexMapBiDi below. // NOTE: there are currently no methods to setup an IndexMap on its own! // It must be initialized by copying from an IndexMapBiDi or by DeSerialize. -class IndexMap { +class TESS_API IndexMap { public: virtual ~IndexMap(); @@ -99,7 +99,7 @@ class IndexMap { // for ... Merge(index1, index2); // CompleteMerges(); // Allows a many-to-one mapping by merging compact space indices. -class IndexMapBiDi : public IndexMap { +class TESS_API IndexMapBiDi : public IndexMap { public: ~IndexMapBiDi() override; diff --git a/src/ccutil/params.h b/src/ccutil/params.h index 27e255e56..988c40be6 100644 --- a/src/ccutil/params.h +++ b/src/ccutil/params.h @@ -47,7 +47,7 @@ struct ParamsVectors { }; // Utility functions for working with Tesseract parameters. -class ParamUtils { +class TESS_API ParamUtils { public: // Reads a file of parameter definitions and set/modify the values therein. // If the filename begins with a + or -, the BoolVariables will be @@ -279,6 +279,7 @@ class DoubleParam : public Param { // // TODO(daria): remove GlobalParams() when all global Tesseract // parameters are converted to members. +TESS_API ParamsVectors* GlobalParams(); /************************************************************************* diff --git a/src/ccutil/scanutils.h b/src/ccutil/scanutils.h index 93381c5df..c89d37ff0 100644 --- a/src/ccutil/scanutils.h +++ b/src/ccutil/scanutils.h @@ -27,6 +27,7 @@ * @note Note that scientific floating-point notation is not supported. * */ +TESS_API int tfscanf(FILE* stream, const char *format, ...); #endif // TESSERACT_CCUTIL_SCANUTILS_H_ diff --git a/src/ccutil/serialis.h b/src/ccutil/serialis.h index f520b8131..951552fa8 100644 --- a/src/ccutil/serialis.h +++ b/src/ccutil/serialis.h @@ -47,7 +47,9 @@ constexpr size_t countof(T const (&)[N]) noexcept { using FileWriter = bool (*)(const std::vector& data, const char* filename); +TESS_API bool LoadDataFromFile(const char* filename, std::vector* data); +TESS_API bool SaveDataToFile(const std::vector& data, const char* filename); // Deserialize data from file. @@ -64,7 +66,7 @@ bool Serialize(FILE *fp, const T *data, size_t n = 1) { // Simple file class. // Allows for portable file input from memory and from foreign file systems. -class TFile { +class TESS_API TFile { public: TFile(); ~TFile(); diff --git a/src/ccutil/tessdatamanager.h b/src/ccutil/tessdatamanager.h index 8eb935264..f8fff9d8c 100644 --- a/src/ccutil/tessdatamanager.h +++ b/src/ccutil/tessdatamanager.h @@ -123,8 +123,7 @@ static const char *const kTessdataFileSuffixes[] = { */ static const int kMaxNumTessdataEntries = 1000; - -class TessdataManager { +class TESS_API TessdataManager { public: TessdataManager(); explicit TessdataManager(FileReader reader); diff --git a/src/ccutil/unicharcompress.h b/src/ccutil/unicharcompress.h index 9c6ac009f..2c1ccbf57 100644 --- a/src/ccutil/unicharcompress.h +++ b/src/ccutil/unicharcompress.h @@ -125,7 +125,7 @@ class RecodedCharID { // position). For non-CJK, the same code value CAN be used in multiple // positions, eg the ff ligature is converted to , where // is the same code as is used for the single f. -class UnicharCompress { +class TESS_API UnicharCompress { public: UnicharCompress(); UnicharCompress(const UnicharCompress& src); diff --git a/src/ccutil/unicharset.h b/src/ccutil/unicharset.h index 8c6e81da5..4026fcda3 100644 --- a/src/ccutil/unicharset.h +++ b/src/ccutil/unicharset.h @@ -49,7 +49,7 @@ enum class OldUncleanUnichars { kTrue, }; -class CHAR_FRAGMENT { +class TESS_API CHAR_FRAGMENT { public: // Minimum number of characters used for fragment representation. static const int kMinLen = 6; @@ -146,15 +146,15 @@ class CHAR_FRAGMENT { // The UNICHARSET class is an utility class for Tesseract that holds the // set of characters that are used by the engine. Each character is identified // by a unique number, from 0 to (size - 1). -class UNICHARSET { +class TESS_API UNICHARSET { public: // Custom list of characters and their ligature forms (UTF8) // These map to unicode values in the private use area (PUC) and are supported // by only few font families (eg. Wyld, Adobe Caslon Pro). - static TESS_API const char* kCustomLigatures[][2]; + static const char* kCustomLigatures[][2]; // List of strings for the SpecialUnicharCodes. Keep in sync with the enum. - static TESS_API const char* kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT]; + static const char* kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT]; // ICU 2.0 UCharDirection enum (from icu/include/unicode/uchar.h) enum Direction { @@ -893,7 +893,7 @@ class UNICHARSET { private: - struct UNICHAR_PROPERTIES { + struct TESS_API UNICHAR_PROPERTIES { UNICHAR_PROPERTIES(); // Initializes all properties to sensible default values. void Init(); @@ -996,7 +996,7 @@ class UNICHARSET { // The substitutions clean up text that should exists for rendering of // synthetic data, but not in the recognition set. static const char* kCleanupMaps[][2]; - static TESS_API const char* null_script; + static const char* null_script; std::vector unichars; UNICHARMAP ids; diff --git a/src/classify/classify.h b/src/classify/classify.h index af0901473..0ba63a5d3 100644 --- a/src/classify/classify.h +++ b/src/classify/classify.h @@ -99,7 +99,7 @@ enum CharSegmentationType { CST_NGRAM // Multiple characters. }; -class Classify : public CCStruct { +class TESS_API Classify : public CCStruct { public: Classify(); ~Classify() override; diff --git a/src/classify/cluster.h b/src/classify/cluster.h index f4b2e7912..8a6a270a6 100644 --- a/src/classify/cluster.h +++ b/src/classify/cluster.h @@ -106,14 +106,19 @@ typedef struct { /*-------------------------------------------------------------------------- Public Function Prototypes --------------------------------------------------------------------------*/ +TESS_API CLUSTERER* MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[]); +TESS_API SAMPLE* MakeSample(CLUSTERER* Clusterer, const float* Feature, int32_t CharID); +TESS_API LIST ClusterSamples(CLUSTERER* Clusterer, CLUSTERCONFIG* Config); +TESS_API void FreeClusterer(CLUSTERER* Clusterer); +TESS_API void FreeProtoList(LIST* ProtoList); void FreePrototype(void* arg); // PROTOTYPE *Prototype); @@ -124,6 +129,7 @@ float Mean(PROTOTYPE* Proto, uint16_t Dimension); float StandardDeviation(PROTOTYPE* Proto, uint16_t Dimension); +TESS_API int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2, float m[], float m1[], float m2[]); diff --git a/src/classify/clusttool.h b/src/classify/clusttool.h index c2ceb597e..ead65618e 100644 --- a/src/classify/clusttool.h +++ b/src/classify/clusttool.h @@ -32,8 +32,10 @@ PARAM_DESC *ReadParamDesc(tesseract::TFile *fp, uint16_t N); PROTOTYPE *ReadPrototype(tesseract::TFile *fp, uint16_t N); +TESS_API void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]); +TESS_API void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto); } // namespace tesseract diff --git a/src/classify/featdefs.h b/src/classify/featdefs.h index f11985774..eb8c66fed 100644 --- a/src/classify/featdefs.h +++ b/src/classify/featdefs.h @@ -50,8 +50,10 @@ using FEATURE_DEFS = FEATURE_DEFS_STRUCT *; /*---------------------------------------------------------------------- Generic functions for manipulating character descriptions ----------------------------------------------------------------------*/ +TESS_API void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs); +TESS_API void FreeCharDescription(CHAR_DESC CharDesc); CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs); @@ -62,9 +64,11 @@ bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, CHAR_DESC CharDesc, STRING* str); +TESS_API CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File); +TESS_API uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName); diff --git a/src/classify/intfeaturespace.h b/src/classify/intfeaturespace.h index af23af9af..3f21e4d3c 100644 --- a/src/classify/intfeaturespace.h +++ b/src/classify/intfeaturespace.h @@ -35,7 +35,7 @@ class IndexMap; // Down-sampling quantization of the INT_FEATURE_STRUCT feature space and // conversion to a single scalar index value, used as a binary feature space. -class IntFeatureSpace { +class TESS_API IntFeatureSpace { public: IntFeatureSpace(); // Default copy constructors and assignment OK! diff --git a/src/classify/intfx.h b/src/classify/intfx.h index 09f34be4f..5a3b893e5 100644 --- a/src/classify/intfx.h +++ b/src/classify/intfx.h @@ -46,20 +46,22 @@ const double kStandardFeatureLength = 64.0 / 5; /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ +TESS_API void InitIntegerFX(); // Returns a vector representing the direction of a feature with the given // theta direction in an INT_FEATURE_STRUCT. +TESS_API FCOORD FeatureDirection(uint8_t theta); - // Generates a TrainingSample from a TBLOB. Extracts features and sets - // the bounding box, so classifiers that operate on the image can work. - // TODO(rays) BlobToTrainingSample must remain a global function until - // the FlexFx and FeatureDescription code can be removed and LearnBlob - // made a member of Classify. - TrainingSample* BlobToTrainingSample( - const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, - GenericVector* bl_features); +// Generates a TrainingSample from a TBLOB. Extracts features and sets +// the bounding box, so classifiers that operate on the image can work. +// TODO(rays) BlobToTrainingSample must remain a global function until +// the FlexFx and FeatureDescription code can be removed and LearnBlob +// made a member of Classify. +TrainingSample* BlobToTrainingSample( + const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, + GenericVector* bl_features); } // namespace tesseract diff --git a/src/classify/intproto.h b/src/classify/intproto.h index 44b42fc91..1ece69988 100644 --- a/src/classify/intproto.h +++ b/src/classify/intproto.h @@ -232,16 +232,19 @@ INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs); INT_TEMPLATES NewIntTemplates(); +TESS_API void free_int_templates(INT_TEMPLATES templates); void ShowMatchDisplay(); // Clears the given window and draws the featurespace guides for the // appropriate normalization method. +TESS_API void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window); /*----------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED +TESS_API void RenderIntFeature(ScrollView* window, const INT_FEATURE_STRUCT* Feature, ScrollView::Color color); @@ -253,6 +256,7 @@ void InitFeatureDisplayWindowIfReqd(); // Creates a window of the appropriate size for displaying elements // in feature space. +TESS_API ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos); #endif // !GRAPHICS_DISABLED diff --git a/src/classify/kdtree.h b/src/classify/kdtree.h index 832b12cd2..b85121917 100644 --- a/src/classify/kdtree.h +++ b/src/classify/kdtree.h @@ -81,6 +81,7 @@ void FreeKDNode(KDNODE* Node); float DistanceSquared(int k, PARAM_DESC* dim, float p1[], float p2[]); +TESS_API float ComputeDistance(int k, PARAM_DESC* dim, float p1[], float p2[]); int QueryInSearch(KDTREE* tree); diff --git a/src/classify/ocrfeatures.h b/src/classify/ocrfeatures.h index 4af49b994..edf63496f 100644 --- a/src/classify/ocrfeatures.h +++ b/src/classify/ocrfeatures.h @@ -102,10 +102,13 @@ DefineFeature (Name, NumLinear, NumCircular, ShortName, ParamName) ----------------------------------------------------------------------*/ bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature); +TESS_API void FreeFeature(FEATURE Feature); +TESS_API void FreeFeatureSet(FEATURE_SET FeatureSet); +TESS_API FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc); FEATURE_SET NewFeatureSet(int NumFeatures); diff --git a/src/classify/protos.h b/src/classify/protos.h index 419327314..93bea842c 100644 --- a/src/classify/protos.h +++ b/src/classify/protos.h @@ -82,18 +82,24 @@ using CLASSES = CLASS_STRUCT*; /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ +TESS_API int AddConfigToClass(CLASS_TYPE Class); +TESS_API int AddProtoToClass(CLASS_TYPE Class); +TESS_API void FillABC(PROTO Proto); +TESS_API void FreeClass(CLASS_TYPE Class); +TESS_API void FreeClassFields(CLASS_TYPE Class); void InitPrototypes(); +TESS_API CLASS_TYPE NewClass(int NumProtos, int NumConfigs); } // namespace tesseract diff --git a/src/classify/shapeclassifier.h b/src/classify/shapeclassifier.h index c3860be72..b323554f8 100644 --- a/src/classify/shapeclassifier.h +++ b/src/classify/shapeclassifier.h @@ -38,7 +38,7 @@ class TrainingSampleSet; struct UnicharRating; // Interface base class for classifiers that produce ShapeRating results. -class ShapeClassifier { +class TESS_API ShapeClassifier { public: virtual ~ShapeClassifier() = default; diff --git a/src/classify/shapetable.h b/src/classify/shapetable.h index d6f088a25..4dde3ecde 100644 --- a/src/classify/shapetable.h +++ b/src/classify/shapetable.h @@ -181,7 +181,7 @@ struct UnicharAndFonts { // characters that have a similar or identical shape. Shapes/ShapeTables may // be organized hierarchically from identical shapes at the leaves to vaguely // similar shapes near the root. -class Shape { +class TESS_API Shape { public: Shape() : destination_index_(-1) {} @@ -258,7 +258,7 @@ class Shape { // that the shape represents. // Each UnicharAndFonts also lists the fonts of the unichar_id that were // mapped to the shape during training. -class ShapeTable { +class TESS_API ShapeTable { public: ShapeTable(); // The UNICHARSET reference supplied here, or in set_unicharset below must diff --git a/src/classify/tessclassifier.h b/src/classify/tessclassifier.h index 5c420ba52..07dcfa4df 100644 --- a/src/classify/tessclassifier.h +++ b/src/classify/tessclassifier.h @@ -33,7 +33,7 @@ class TrainingSample; // Due to limitations in the content of TrainingSample, this currently // only works for the static classifier and only works if the ShapeTable // in classify is not nullptr. -class TessClassifier : public ShapeClassifier { +class TESS_API TessClassifier : public ShapeClassifier { public: TessClassifier(bool pruner_only, tesseract::Classify* classify) : pruner_only_(pruner_only), classify_(classify) {} diff --git a/src/classify/trainingsample.cpp b/src/classify/trainingsample.cpp index 575a89dd1..003fb97b8 100644 --- a/src/classify/trainingsample.cpp +++ b/src/classify/trainingsample.cpp @@ -21,13 +21,15 @@ #include "trainingsample.h" -#include // for M_PI -#include "allheaders.h" +#include "intfeaturespace.h" #include "helpers.h" -#include "intfeaturemap.h" #include "normfeat.h" #include "shapetable.h" +#include "allheaders.h" + +#include // for M_PI + namespace tesseract { ELISTIZE(TrainingSample) @@ -281,17 +283,6 @@ void TrainingSample::IndexFeatures(const IntFeatureSpace& feature_space) { features_are_mapped_ = false; } -// Sets the mapped_features_ from the features using the provided -// feature_map. -void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) { - GenericVector indexed_features; - feature_map.feature_space().IndexAndSortFeatures(features_, num_features_, - &indexed_features); - feature_map.MapIndexedFeatures(indexed_features, &mapped_features_); - features_are_indexed_ = false; - features_are_mapped_ = true; -} - // Returns a pix representing the sample. (Int features only.) Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const { Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1); diff --git a/src/classify/trainingsample.h b/src/classify/trainingsample.h index 0964e2bee..0ac2cc4fc 100644 --- a/src/classify/trainingsample.h +++ b/src/classify/trainingsample.h @@ -50,7 +50,7 @@ static const int kSampleScaleSize = 3; static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2; // ASSERT_IS_PRIME(kSampleRandomSize) !! -class TrainingSample : public ELIST_LINK { +class TESS_API TrainingSample : public ELIST_LINK { public: TrainingSample() : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0), @@ -97,9 +97,6 @@ class TrainingSample : public ELIST_LINK { // Sets the mapped_features_ from the features_ using the provided // feature_space to the indexed versions of the features. void IndexFeatures(const IntFeatureSpace& feature_space); - // Sets the mapped_features_ from the features_ using the provided - // feature_map. - void MapFeatures(const IntFeatureMap& feature_map); // Returns a pix representing the sample. (Int features only.) Pix* RenderToPix(const UNICHARSET* unicharset) const; @@ -231,10 +228,15 @@ class TrainingSample : public ELIST_LINK { double max_dist_; // Global index of this sample. int sample_index_; +public: + // both are used in training tools + // hide after refactoring + // Indexed/mapped features, as indicated by the bools below. GenericVector mapped_features_; bool features_are_indexed_; bool features_are_mapped_; +private: // True if the last classification was an error by the current definition. bool is_error_; diff --git a/src/cutil/emalloc.h b/src/cutil/emalloc.h index 9aa1a5d3d..ea0291f86 100644 --- a/src/cutil/emalloc.h +++ b/src/cutil/emalloc.h @@ -20,8 +20,11 @@ namespace tesseract { +TESS_API void *Emalloc(int Size); +TESS_API void *Erealloc(void *ptr, int size); +TESS_API void Efree(void *ptr); } // namespace tesseract diff --git a/src/cutil/oldlist.h b/src/cutil/oldlist.h index 5fa839a2e..61fa035b1 100644 --- a/src/cutil/oldlist.h +++ b/src/cutil/oldlist.h @@ -112,6 +112,7 @@ int count(LIST var_list); LIST delete_d(LIST list, void* key, int_compare is_equal); +TESS_API LIST destroy(LIST list); void destroy_nodes(LIST list, void_dest destructor); @@ -120,8 +121,10 @@ LIST last(LIST var_list); LIST pop(LIST list); +TESS_API LIST push(LIST list, void* element); +TESS_API LIST push_last(LIST list, void* item); LIST search(LIST list, void* key, int_compare is_equal); diff --git a/src/dict/dawg.h b/src/dict/dawg.h index 119643c0a..26d76d1e7 100644 --- a/src/dict/dawg.h +++ b/src/dict/dawg.h @@ -108,7 +108,7 @@ static const char kWildcard[] = "*"; /// (since they use only the public methods of SquishedDawg and Trie /// classes that are inherited from the Dawg base class). // -class Dawg { +class TESS_API Dawg { public: /// Magic number to determine endianness when reading the Dawg from file. static const int16_t kDawgMagicNumber = 42; @@ -397,7 +397,7 @@ class DawgPositionVector : public GenericVector { /// is stored as a contiguous EDGE_ARRAY (read from file or given as an /// argument to the constructor). // -class SquishedDawg : public Dawg { +class TESS_API SquishedDawg : public Dawg { public: SquishedDawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level) diff --git a/src/dict/dict.h b/src/dict/dict.h index c3548230c..2bbd7fecb 100644 --- a/src/dict/dict.h +++ b/src/dict/dict.h @@ -91,7 +91,7 @@ struct DawgArgs { bool valid_end; }; -class Dict { +class TESS_API Dict { public: Dict(CCUtil* image_ptr); ~Dict(); @@ -313,7 +313,7 @@ class Dict { /// Initialize Dict class - load dawgs from [lang].traineddata and /// user-specified wordlist and parttern list. - static TESS_API DawgCache *GlobalDawgCache(); + static DawgCache *GlobalDawgCache(); // Sets up ready for a Load or LoadLSTM. void SetupForLoad(DawgCache *dawg_cache); // Loads the dawgs needed by Tesseract. Call FinishLoad() after. diff --git a/src/dict/trie.h b/src/dict/trie.h index 182dce5c4..f516b3c5d 100644 --- a/src/dict/trie.h +++ b/src/dict/trie.h @@ -53,7 +53,7 @@ using TRIE_NODES = GenericVector ; * This class stores a vector of pointers to TRIE_NODE_RECORDs, each of * which has a vector of forward and backward edges. */ -class Trie : public Dawg { +class TESS_API Trie : public Dawg { public: enum RTLReversePolicy { RRP_DO_NO_REVERSE, diff --git a/src/lstm/convolve.h b/src/lstm/convolve.h index a3c051e0e..033061f23 100644 --- a/src/lstm/convolve.h +++ b/src/lstm/convolve.h @@ -32,6 +32,7 @@ class Convolve : public Network { public: // The area of convolution is 2*half_x + 1 by 2*half_y + 1, forcing it to // always be odd, so the center is the current pixel. + TESS_API Convolve(const std::string& name, int ni, int half_x, int half_y); ~Convolve() override = default; diff --git a/src/lstm/fullyconnected.h b/src/lstm/fullyconnected.h index c67984efd..eaa437a74 100644 --- a/src/lstm/fullyconnected.h +++ b/src/lstm/fullyconnected.h @@ -26,6 +26,7 @@ namespace tesseract { // C++ Implementation of the Softmax (output) class from lstm.py. class FullyConnected : public Network { public: + TESS_API FullyConnected(const std::string& name, int ni, int no, NetworkType type); ~FullyConnected() override = default; diff --git a/src/lstm/input.h b/src/lstm/input.h index 67cc89366..b9366364c 100644 --- a/src/lstm/input.h +++ b/src/lstm/input.h @@ -26,7 +26,9 @@ class ScrollView; class Input : public Network { public: + TESS_API Input(const std::string& name, int ni, int no); + TESS_API Input(const std::string& name, const StaticShape& shape); ~Input() override = default; diff --git a/src/lstm/lstm.h b/src/lstm/lstm.h index beffafb12..6fb3ce6f8 100644 --- a/src/lstm/lstm.h +++ b/src/lstm/lstm.h @@ -46,6 +46,7 @@ class LSTM : public Network { // 2-d and bidi softmax LSTMs are not rejected, but are impossible to build // in the conventional way because the output feedback both forwards and // backwards in time does become impossible. + TESS_API LSTM(const std::string& name, int num_inputs, int num_states, int num_outputs, bool two_dimensional, NetworkType type); ~LSTM() override; diff --git a/src/lstm/lstmrecognizer.h b/src/lstm/lstmrecognizer.h index a7962fce1..ba6854d52 100644 --- a/src/lstm/lstmrecognizer.h +++ b/src/lstm/lstmrecognizer.h @@ -50,7 +50,7 @@ enum TrainingFlags { // Top-level line recognizer class for LSTM-based networks. // Note that a sub-class, LSTMTrainer is used for training. -class LSTMRecognizer { +class TESS_API LSTMRecognizer { public: LSTMRecognizer(); LSTMRecognizer(const STRING language_data_path_prefix); diff --git a/src/lstm/maxpool.h b/src/lstm/maxpool.h index f39035e6c..bae603397 100644 --- a/src/lstm/maxpool.h +++ b/src/lstm/maxpool.h @@ -28,6 +28,7 @@ namespace tesseract { // Backprop propagates only to the position that was the max. class Maxpool : public Reconfig { public: + TESS_API Maxpool(const char* name, int ni, int x_scale, int y_scale); ~Maxpool() override = default; diff --git a/src/lstm/network.h b/src/lstm/network.h index bf200609b..50c416ba0 100644 --- a/src/lstm/network.h +++ b/src/lstm/network.h @@ -277,6 +277,7 @@ class Network { void DisplayBackward(const NetworkIO& matrix); // Creates the window if needed, otherwise clears it. + TESS_API static void ClearWindow(bool tess_coords, const char* window_name, int width, int height, ScrollView** window); diff --git a/src/lstm/networkio.h b/src/lstm/networkio.h index 764e320e6..b8ee3900d 100644 --- a/src/lstm/networkio.h +++ b/src/lstm/networkio.h @@ -36,7 +36,7 @@ namespace tesseract { // Class to contain all the input/output of a network, allowing for fixed or // variable-strided 2d to 1d mapping, and float or int8_t values. Provides // enough calculating functions to hide the detail of the implementation. -class NetworkIO { +class TESS_API NetworkIO { public: NetworkIO() : int_mode_(false) {} // Resizes the array (and stride), avoiding realloc if possible, to the given diff --git a/src/lstm/parallel.h b/src/lstm/parallel.h index 8386a24bf..5311aba93 100644 --- a/src/lstm/parallel.h +++ b/src/lstm/parallel.h @@ -27,6 +27,7 @@ namespace tesseract { class Parallel : public Plumbing { public: // ni_ and no_ will be set by AddToStack. + TESS_API Parallel(const char* name, NetworkType type); ~Parallel() override = default; diff --git a/src/lstm/recodebeam.h b/src/lstm/recodebeam.h index 88e8e87b6..d19bec423 100644 --- a/src/lstm/recodebeam.h +++ b/src/lstm/recodebeam.h @@ -177,7 +177,7 @@ using RecodePair = KDPairInc; using RecodeHeap = GenericHeap; // Class that holds the entire beam search for recognition of a text line. -class RecodeBeamSearch { +class TESS_API RecodeBeamSearch { public: // Borrows the pointer, which is expected to survive until *this is deleted. RecodeBeamSearch(const UnicharCompress& recoder, int null_char, diff --git a/src/lstm/reconfig.h b/src/lstm/reconfig.h index 458542273..834632c96 100644 --- a/src/lstm/reconfig.h +++ b/src/lstm/reconfig.h @@ -30,6 +30,7 @@ namespace tesseract { // input stride is a multiple of the y_scale factor! class Reconfig : public Network { public: + TESS_API Reconfig(const char* name, int ni, int x_scale, int y_scale); ~Reconfig() override = default; diff --git a/src/lstm/reversed.h b/src/lstm/reversed.h index 8fd2d8aa2..bcf18546e 100644 --- a/src/lstm/reversed.h +++ b/src/lstm/reversed.h @@ -27,6 +27,7 @@ namespace tesseract { // C++ Implementation of the Reversed class from lstm.py. class Reversed : public Plumbing { public: + TESS_API explicit Reversed(const std::string& name, NetworkType type); ~Reversed() override = default; @@ -65,6 +66,7 @@ class Reversed : public Plumbing { } // Takes ownership of the given network to make it the reversed one. + TESS_API void SetNetwork(Network* network); // Runs forward propagation of activations on the input line. diff --git a/src/lstm/series.h b/src/lstm/series.h index 892ffff28..6b2a68e8b 100644 --- a/src/lstm/series.h +++ b/src/lstm/series.h @@ -27,6 +27,7 @@ namespace tesseract { class Series : public Plumbing { public: // ni_ and no_ will be set by AddToStack. + TESS_API explicit Series(const char* name); ~Series() override = default; @@ -81,10 +82,12 @@ class Series : public Plumbing { // Splits the series after the given index, returning the two parts and // deletes itself. The first part, up to network with index last_start, goes // into start, and the rest goes into end. + TESS_API void SplitAt(int last_start, Series** start, Series** end); // Appends the elements of the src series to this, removing from src and // deleting it. + TESS_API void AppendSeries(Network* src); }; diff --git a/src/textord/alignedblob.h b/src/textord/alignedblob.h index 69142c6ce..cca3b05dc 100644 --- a/src/textord/alignedblob.h +++ b/src/textord/alignedblob.h @@ -79,7 +79,7 @@ struct AlignedBlobParams { // The AlignedBlob class contains code to find vertically aligned blobs. // This is factored out into a separate class, so it can be used by both // vertical line finding (LineFind) and tabstop finding (TabFind). -class AlignedBlob : public BlobGrid { +class TESS_API AlignedBlob : public BlobGrid { public: AlignedBlob(int gridsize, const ICOORD& bleft, const ICOORD& tright); ~AlignedBlob() override; diff --git a/src/textord/bbgrid.h b/src/textord/bbgrid.h index 3cf08b85d..5d75aa38d 100644 --- a/src/textord/bbgrid.h +++ b/src/textord/bbgrid.h @@ -49,7 +49,7 @@ template class GridSearch; // The GridBase class is the base class for BBGrid and IntGrid. // It holds the geometry and scale of the grid. -class GridBase { +class TESS_API GridBase { public: GridBase() = default; GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright); diff --git a/src/textord/blobgrid.h b/src/textord/blobgrid.h index 36bfadad7..54b19aebd 100644 --- a/src/textord/blobgrid.h +++ b/src/textord/blobgrid.h @@ -30,7 +30,7 @@ CLISTIZEH(BLOBNBOX) using BlobGridSearch = GridSearch; -class BlobGrid : public BBGrid { +class TESS_API BlobGrid : public BBGrid { public: BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); ~BlobGrid() override; diff --git a/src/textord/colfind.h b/src/textord/colfind.h index acfbb4042..b7d5b672b 100644 --- a/src/textord/colfind.h +++ b/src/textord/colfind.h @@ -47,7 +47,7 @@ class TempColumn_LIST; class EquationDetectBase; // The ColumnFinder class finds columns in the grid. -class ColumnFinder : public TabFind { +class TESS_API ColumnFinder : public TabFind { public: // Gridsize is an estimate of the text size in the image. A suitable value // is in TO_BLOCK::line_size after find_components has been used to make diff --git a/src/textord/colpartition.h b/src/textord/colpartition.h index 2009b9fa4..5c299b3e8 100644 --- a/src/textord/colpartition.h +++ b/src/textord/colpartition.h @@ -64,7 +64,7 @@ CLISTIZEH(ColPartition) * to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions * emerges, which represents the columns over a wide y-coordinate range. */ -class ColPartition : public ELIST2_LINK { +class TESS_API ColPartition : public ELIST2_LINK { public: // This empty constructor is here only so that the class can be ELISTIZED. // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier diff --git a/src/textord/colpartitiongrid.h b/src/textord/colpartitiongrid.h index 32bb46e30..85ab7f3d4 100644 --- a/src/textord/colpartitiongrid.h +++ b/src/textord/colpartitiongrid.h @@ -29,7 +29,7 @@ class TabFind; // ColPartitionGrid is a BBGrid of ColPartition. // It collects functions that work on the grid. -class ColPartitionGrid : public BBGrid { public: diff --git a/src/textord/equationdetectbase.h b/src/textord/equationdetectbase.h index d0e8c12e4..7f84bd091 100644 --- a/src/textord/equationdetectbase.h +++ b/src/textord/equationdetectbase.h @@ -29,7 +29,7 @@ namespace tesseract { class ColPartitionGrid; class ColPartitionSet; -class EquationDetectBase { +class TESS_API EquationDetectBase { public: EquationDetectBase() = default; virtual ~EquationDetectBase(); diff --git a/src/textord/tabfind.h b/src/textord/tabfind.h index aaccb7633..d16a533cb 100644 --- a/src/textord/tabfind.h +++ b/src/textord/tabfind.h @@ -49,7 +49,7 @@ const int kColumnWidthFactor = 20; * rule/separator lines, and tabstop boundaries, (when available), so * as the holder of the list of TabVectors this class provides the functions. */ -class TabFind : public AlignedBlob { +class TESS_API TabFind : public AlignedBlob { public: TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright, TabVector_LIST* vlines, int vertical_x, int vertical_y, diff --git a/src/textord/tablefind.h b/src/textord/tablefind.h index 2f3f44512..dc6ff932d 100644 --- a/src/textord/tablefind.h +++ b/src/textord/tablefind.h @@ -127,7 +127,7 @@ using ColSegmentGridSearch = GridSearch -#include // for M_PI #ifdef DISABLED_LEGACY_ENGINE @@ -32,6 +31,8 @@ STRING_PARAM_FLAG(U, "unicharset", "File to load unicharset from"); STRING_PARAM_FLAG(O, "", "File to write unicharset to"); STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to"); STRING_PARAM_FLAG(test_ch, "", "UTF8 test character string"); +STRING_PARAM_FLAG(fonts_dir, "", ""); +STRING_PARAM_FLAG(fontconfig_tmpdir, "", ""); /** * This routine parses the command line arguments that were @@ -96,6 +97,8 @@ STRING_PARAM_FLAG(U, "unicharset", "File to load unicharset from"); STRING_PARAM_FLAG(O, "", "File to write unicharset to"); STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to"); STRING_PARAM_FLAG(test_ch, "", "UTF8 test character string"); +STRING_PARAM_FLAG(fonts_dir, "", ""); +STRING_PARAM_FLAG(fontconfig_tmpdir, "", ""); static DOUBLE_PARAM_FLAG(clusterconfig_min_samples_fraction, Config.MinSamples, "Min number of samples per proto as % of total"); static DOUBLE_PARAM_FLAG(clusterconfig_max_illegal, Config.MaxIllegal, diff --git a/src/training/commontraining.h b/src/training/commontraining.h index f12ab258a..8f9b2ed63 100644 --- a/src/training/commontraining.h +++ b/src/training/commontraining.h @@ -18,17 +18,14 @@ #include "config_auto.h" #endif +#include "commandlineflags.h" +#include "tprintf.h" + #include -#ifdef DISABLED_LEGACY_ENGINE - -#include "tprintf.h" -#include "commandlineflags.h" - - +TESS_COMMON_TRAINING_API void ParseArguments(int* argc, char*** argv); - namespace tesseract { // Check whether the shared tesseract library is the right one. @@ -48,11 +45,9 @@ static inline void CheckSharedLibraryVersion() } // namespace tesseract - -#else +#ifndef DISABLED_LEGACY_ENGINE #include "cluster.h" -#include "commandlineflags.h" #include "featdefs.h" #include "intproto.h" #include "oldlist.h" @@ -67,9 +62,11 @@ class ShapeTable; // Globals /////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// +TESS_COMMON_TRAINING_API extern tesseract::FEATURE_DEFS_STRUCT feature_defs; // Must be defined in the file that "implements" commonTraining facilities. +TESS_COMMON_TRAINING_API extern tesseract::CLUSTERCONFIG Config; ////////////////////////////////////////////////////////////////////////////// @@ -96,28 +93,13 @@ using MERGE_CLASS = MERGE_CLASS_NODE*; ////////////////////////////////////////////////////////////////////////////// // Functions ///////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -void ParseArguments(int* argc, char*** argv); namespace tesseract { -// Check whether the shared tesseract library is the right one. -// This function must be inline because otherwise it would be part of -// the shared library, so it could not compare the versions. -static inline void CheckSharedLibraryVersion() -{ -#ifdef HAVE_CONFIG_H - if (!!strcmp(TESSERACT_VERSION_STR, TessBaseAPI::Version())) { - tprintf("ERROR: shared library version mismatch (was %s, expected %s\n" - "Did you use a wrong shared tesseract library?\n", - TessBaseAPI::Version(), TESSERACT_VERSION_STR); - exit(1); - } -#endif -} - // Helper loads shape table from the given file. ShapeTable* LoadShapeTable(const STRING& file_prefix); // Helper to write the shape_table. +TESS_COMMON_TRAINING_API void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table); // Creates a MasterTraininer and loads the training data into it: @@ -133,21 +115,26 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table); // Computes canonical and cloud features. // If shape_table is not nullptr, but failed to load, make a fake flat one, // as shape clustering was not run. +TESS_COMMON_TRAINING_API MasterTrainer* LoadTrainingData(int argc, const char* const * argv, bool replication, ShapeTable** shape_table, STRING* file_prefix); + } // namespace tesseract. +TESS_COMMON_TRAINING_API const char *GetNextFilename(int argc, const char* const * argv); LABELEDLIST FindList( tesseract::LIST List, char *Label); +TESS_COMMON_TRAINING_API LABELEDLIST NewLabeledList( const char *Label); +TESS_COMMON_TRAINING_API void ReadTrainingSamples(const tesseract::FEATURE_DEFS_STRUCT& feature_defs, const char *feature_name, int max_samples, tesseract::UNICHARSET* unicharset, @@ -159,59 +146,69 @@ void WriteTrainingSamples( tesseract::LIST CharList, const char *program_feature_type); +TESS_COMMON_TRAINING_API void FreeTrainingSamples( tesseract::LIST CharList); +TESS_COMMON_TRAINING_API void FreeLabeledList( LABELEDLIST LabeledList); +TESS_COMMON_TRAINING_API void FreeLabeledClassList( tesseract::LIST ClassListList); +TESS_COMMON_TRAINING_API tesseract::CLUSTERER *SetUpForClustering( const tesseract::FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST CharSample, const char *program_feature_type); +TESS_COMMON_TRAINING_API tesseract::LIST RemoveInsignificantProtos( tesseract::LIST ProtoList, bool KeepSigProtos, bool KeepInsigProtos, int N); +TESS_COMMON_TRAINING_API void CleanUpUnusedData( tesseract::LIST ProtoList); +TESS_COMMON_TRAINING_API void MergeInsignificantProtos( tesseract::LIST ProtoList, const char *label, tesseract::CLUSTERER *Clusterer, tesseract::CLUSTERCONFIG *Config); +TESS_COMMON_TRAINING_API MERGE_CLASS FindClass( tesseract::LIST List, const char *Label); +TESS_COMMON_TRAINING_API MERGE_CLASS NewLabeledClass( const char *Label); -void FreeTrainingSamples( - tesseract::LIST CharList); - +TESS_COMMON_TRAINING_API tesseract::CLASS_STRUCT* SetUpForFloat2Int(const tesseract::UNICHARSET& unicharset, tesseract::LIST LabeledClassList); void Normalize( float *Values); +TESS_COMMON_TRAINING_API void FreeNormProtoList( tesseract::LIST CharList); +TESS_COMMON_TRAINING_API void AddToNormProtosList( tesseract::LIST* NormProtoList, tesseract::LIST ProtoList, char *CharName); +TESS_COMMON_TRAINING_API int NumberOfProtos( tesseract::LIST ProtoList, bool CountSigProtos, diff --git a/src/training/ctc.h b/src/training/ctc.h index 47fba6747..49f47a4e6 100644 --- a/src/training/ctc.h +++ b/src/training/ctc.h @@ -27,7 +27,7 @@ namespace tesseract { // Class to encapsulate CTC and simple target generation. -class CTC { +class TESS_COMMON_TRAINING_API CTC { public: // Normalizes the probabilities such that no target has a prob below min_prob, // and, provided that the initial total is at least min_total_prob, then all diff --git a/src/training/fileio.cpp b/src/training/fileio.cpp index a53bdeeb7..778ead5c1 100644 --- a/src/training/fileio.cpp +++ b/src/training/fileio.cpp @@ -33,7 +33,6 @@ #include "host.h" // includes windows.h for BOOL, ... #include "tprintf.h" - namespace tesseract { /////////////////////////////////////////////////////////////////////////////// diff --git a/src/training/fileio.h b/src/training/fileio.h index b86dc5581..ad2811c0a 100644 --- a/src/training/fileio.h +++ b/src/training/fileio.h @@ -40,7 +40,7 @@ inline bool LoadFileLinesToStrings(const char* filename, } // A class to manipulate FILE*s. -class File { +class TESS_UNICHARSET_TRAINING_API File { public: // Try to open the file 'filename' in mode 'mode'. // Stop the program if it cannot open it. @@ -66,7 +66,7 @@ class File { }; // A class to manipulate Files for reading. -class InputBuffer { +class TESS_UNICHARSET_TRAINING_API InputBuffer { public: explicit InputBuffer(FILE* stream); // 'size' is ignored. @@ -88,7 +88,7 @@ class InputBuffer { }; // A class to manipulate Files for writing. -class OutputBuffer { +class TESS_UNICHARSET_TRAINING_API OutputBuffer { public: explicit OutputBuffer(FILE* stream); // 'size' is ignored. diff --git a/src/training/intfeaturemap.h b/src/training/intfeaturemap.h index 5c5a54b83..af4b981a4 100644 --- a/src/training/intfeaturemap.h +++ b/src/training/intfeaturemap.h @@ -45,7 +45,7 @@ static const int kNumOffsetMaps = 2; // Although the transformations are reversible, the inverses are lossy and do // not return the exact input INT_FEATURE_STRUCT, due to the many->one nature // of both transformations. -class IntFeatureMap { +class TESS_COMMON_TRAINING_API IntFeatureMap { public: IntFeatureMap(); ~IntFeatureMap(); diff --git a/src/training/lang_model_helpers.cpp b/src/training/lang_model_helpers.cpp index 79f85fc14..1bce9cbb0 100644 --- a/src/training/lang_model_helpers.cpp +++ b/src/training/lang_model_helpers.cpp @@ -12,20 +12,24 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + #include "lang_model_helpers.h" -#if defined(_WIN32) -#include -#endif -#include -#include -#include #include "dawg.h" #include "fileio.h" #include "tessdatamanager.h" #include "trie.h" #include "unicharcompress.h" +#include + +#include +#include + +#if defined(_WIN32) +#include +#endif + namespace tesseract { // Helper makes a filename (//) and writes data diff --git a/src/training/lang_model_helpers.h b/src/training/lang_model_helpers.h index 08401b7bf..9a568da8f 100644 --- a/src/training/lang_model_helpers.h +++ b/src/training/lang_model_helpers.h @@ -28,11 +28,13 @@ namespace tesseract { // Default writer will overwrite any existing file, but a supplied writer // can do its own thing. If lang is empty, returns true but does nothing. // NOTE that suffix should contain any required . for the filename. +TESS_UNICHARSET_TRAINING_API bool WriteFile(const std::string& output_dir, const std::string& lang, const std::string& suffix, const std::vector& data, FileWriter writer); // Helper reads a file with optional reader and returns a STRING. // On failure emits a warning message and returns and empty STRING. +TESS_UNICHARSET_TRAINING_API STRING ReadFile(const std::string& filename, FileReader reader); // Helper writes the unicharset to file and to the traineddata. @@ -70,6 +72,7 @@ bool WriteRecoder(const UNICHARSET& unicharset, bool pass_through, // puncs must be non-empty. // lang_is_rtl indicates that the language is generally written from right // to left (eg Arabic/Hebrew). +TESS_UNICHARSET_TRAINING_API int CombineLangModel(const UNICHARSET& unicharset, const std::string& script_dir, const std::string& version_str, const std::string& output_dir, const std::string& lang, bool pass_through_recoder, diff --git a/src/training/ligature_table.h b/src/training/ligature_table.h index 97baea8b6..0fd1f40fc 100644 --- a/src/training/ligature_table.h +++ b/src/training/ligature_table.h @@ -35,7 +35,7 @@ class PangoFontInfo; // defined in pango_font_info.h // Map to substitute strings for ligatures. using LigHash = std::unordered_map; -class LigatureTable { +class TESS_PANGO_TRAINING_API LigatureTable { public: // Get a static instance of this class. static LigatureTable* Get(); diff --git a/src/training/lstmtester.h b/src/training/lstmtester.h index a7b5be637..3ce491df3 100644 --- a/src/training/lstmtester.h +++ b/src/training/lstmtester.h @@ -25,7 +25,7 @@ namespace tesseract { -class LSTMTester { +class TESS_UNICHARSET_TRAINING_API LSTMTester { public: LSTMTester(int64_t max_memory); diff --git a/src/training/lstmtrainer.h b/src/training/lstmtrainer.h index fad939c90..09164c06a 100644 --- a/src/training/lstmtrainer.h +++ b/src/training/lstmtrainer.h @@ -76,7 +76,7 @@ using TestCallback = std::functioncharsetsize()); } + +// Sets the mapped_features_ from the features using the provided +// feature_map. +static void MapFeatures(TrainingSample &s, const IntFeatureMap& feature_map) { + GenericVector indexed_features; + feature_map.feature_space().IndexAndSortFeatures(s.features(), s.num_features(), + &indexed_features); + feature_map.MapIndexedFeatures(indexed_features, &s.mapped_features_); + s.features_are_indexed_ = false; + s.features_are_mapped_ = true; +} + // Apply the supplied feature_space/feature_map transform to all samples // accessed by this iterator. void SampleIterator::MapSampleFeatures(const IntFeatureMap& feature_map) { for (Begin(); !AtEnd(); Next()) { TrainingSample* sample = MutableSample(); - sample->MapFeatures(feature_map); + MapFeatures(*sample, feature_map); } } diff --git a/src/training/stringrenderer.h b/src/training/stringrenderer.h index b1bb8c092..c3d6687c4 100644 --- a/src/training/stringrenderer.h +++ b/src/training/stringrenderer.h @@ -47,7 +47,7 @@ namespace tesseract { class BoxChar; -class StringRenderer { +class TESS_PANGO_TRAINING_API StringRenderer { public: StringRenderer(const std::string& font_desc, int page_width, int page_height); ~StringRenderer(); diff --git a/src/training/tessopt.cpp b/src/training/tessopt.cpp index 15611c4c0..404700c03 100644 --- a/src/training/tessopt.cpp +++ b/src/training/tessopt.cpp @@ -17,9 +17,10 @@ * **********************************************************************/ +#include "tessopt.h" + #include #include -#include "tessopt.h" int tessoptind; char *tessoptarg; @@ -30,10 +31,10 @@ char *tessoptarg; * parse command line args. **********************************************************************/ -int tessopt ( //parse args -int32_t argc, //arg count -char *argv[], //args -const char *arglist //string of arg chars +int tessopt( + int argc, + char *argv[], + const char *arglist //string of arg chars ) { const char *arg; //arg char diff --git a/src/training/tessopt.h b/src/training/tessopt.h index 42b27f99e..b42a4d5fd 100644 --- a/src/training/tessopt.h +++ b/src/training/tessopt.h @@ -19,15 +19,14 @@ #ifndef TESSERACT_TRAINING_TESSOPT_H_ #define TESSERACT_TRAINING_TESSOPT_H_ -#include // for int32_t - extern int tessoptind; extern char *tessoptarg; -int tessopt ( //parse args -int32_t argc, //arg count -char *argv[], //args -const char *arglist //string of arg chars +//parse args +int tessopt( + int argc, // arg count + char *argv[], // args + const char *arglist // string of arg chars ); #endif // TESSERACT_TRAINING_TESSOPT_H_ diff --git a/src/training/tlog.h b/src/training/tlog.h index 29db457c9..1af3a88b1 100644 --- a/src/training/tlog.h +++ b/src/training/tlog.h @@ -24,6 +24,7 @@ #include "errcode.h" #include "tprintf.h" +TESS_PANGO_TRAINING_API DECLARE_INT_PARAM_FLAG(tlog_level); // Variant guarded by the numeric logging level parameter FLAGS_tlog_level diff --git a/src/training/trainingsampleset.h b/src/training/trainingsampleset.h index 854786077..3d022eff7 100644 --- a/src/training/trainingsampleset.h +++ b/src/training/trainingsampleset.h @@ -285,5 +285,4 @@ class TrainingSampleSet { } // namespace tesseract. - #endif // TRAININGSAMPLESETSET_H_ diff --git a/src/training/unicharset_training_utils.h b/src/training/unicharset_training_utils.h index 1edc42c88..16aa0c64e 100644 --- a/src/training/unicharset_training_utils.h +++ b/src/training/unicharset_training_utils.h @@ -31,6 +31,7 @@ class UNICHARSET; // Helper sets the character attribute properties and sets up the script table. // Does not set tops and bottoms. +TESS_UNICHARSET_TRAINING_API void SetupBasicProperties(bool report_errors, bool decompose, UNICHARSET* unicharset); // Default behavior is to compose, until it is proven that decomposed benefits @@ -39,6 +40,7 @@ inline void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset) { SetupBasicProperties(report_errors, false, unicharset); } // Helper sets the properties from universal script unicharsets, if found. +TESS_UNICHARSET_TRAINING_API void SetScriptProperties(const std::string& script_dir, UNICHARSET* unicharset); // Helper gets the combined x-heights string. std::string GetXheightString(const std::string& script_dir, const UNICHARSET& unicharset); @@ -48,6 +50,7 @@ std::string GetXheightString(const std::string& script_dir, const UNICHARSET& un // script_dir directory, then the tops and bottoms are expanded using the // script unicharset. // If non-empty, xheight data for the fonts are written to the xheights_file. +TESS_UNICHARSET_TRAINING_API void SetPropertiesForInputFile(const std::string& script_dir, const std::string& input_unicharset_file, const std::string& output_unicharset_file, diff --git a/src/training/validator.h b/src/training/validator.h index a51c809d1..0e94f2107 100644 --- a/src/training/validator.h +++ b/src/training/validator.h @@ -68,7 +68,7 @@ enum class ViramaScript : char32 { // Base class offers a validation API and protected methods to allow subclasses // to easily build the validated/segmented output. -class Validator { +class TESS_UNICHARSET_TRAINING_API Validator { public: // Validates and cleans the src vector of unicodes to the *dest, according to // g_mode. In the case of kSingleString, a single vector containing the whole diff --git a/src/viewer/scrollview.h b/src/viewer/scrollview.h index cbbbda221..a23de6a27 100644 --- a/src/viewer/scrollview.h +++ b/src/viewer/scrollview.h @@ -95,8 +95,7 @@ class SVEventHandler { // Each ScrollView class instance represents one window, and stuff is drawn in // the window through method calls on the class. The constructor is used to // create the class instance (and the window). - -class ScrollView { +class TESS_API ScrollView { public: // Color enum for pens and brushes. enum Color { diff --git a/src/wordrec/params_model.h b/src/wordrec/params_model.h index 431fb3bee..63d1aa1fe 100644 --- a/src/wordrec/params_model.h +++ b/src/wordrec/params_model.h @@ -28,7 +28,7 @@ namespace tesseract { class TFile; // Represents the learned weights for a given language. -class ParamsModel { +class TESS_API ParamsModel { public: // Enum for expressing OCR pass. enum PassEnum { diff --git a/src/wordrec/wordrec.h b/src/wordrec/wordrec.h index 3bdaffa14..a9f7d31fc 100644 --- a/src/wordrec/wordrec.h +++ b/src/wordrec/wordrec.h @@ -36,7 +36,7 @@ namespace tesseract { /* ccmain/tstruct.cpp */ -class Wordrec : public Classify { +class TESS_API Wordrec : public Classify { public: // config parameters @@ -190,7 +190,7 @@ class FRAGMENT:public ELIST_LINK ELISTIZEH(FRAGMENT) -class Wordrec : public Classify { +class TESS_API Wordrec : public Classify { public: // config parameters ******************************************************* BOOL_VAR_H(merge_fragments_in_matrix, true, diff --git a/sw.cpp b/sw.cpp index f46aa188d..c30f5c275 100644 --- a/sw.cpp +++ b/sw.cpp @@ -9,11 +9,11 @@ void build(Solution &s) { libtesseract.setChecks("libtesseract"); - libtesseract.ExportAllSymbols = true; libtesseract.PackageDefinitions = true; libtesseract += cppstd; + libtesseract += "TESS_API"_api; libtesseract += "include/.*"_rr; libtesseract += "src/.*"_rr; libtesseract -= "src/lstm/.*\\.cc"_rr; @@ -83,8 +83,6 @@ void build(Solution &s) libtesseract.Public += "HAVE_CONFIG_H"_d; libtesseract.Public += "_SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS=1"_d; libtesseract.Public += "HAVE_LIBARCHIVE"_d; - libtesseract.Interface += sw::Shared, "TESS_IMPORTS"_d; - libtesseract.Private += sw::Shared, "TESS_EXPORTS"_d; libtesseract.Public += "org.sw.demo.danbloomberg.leptonica"_dep; libtesseract.Public += "org.sw.demo.libarchive.libarchive"_dep; @@ -124,8 +122,9 @@ void build(Solution &s) } // - auto &common_training = training.addStaticLibrary("common_training"); + auto &common_training = training.addLibrary("common_training"); { + common_training += "TESS_COMMON_TRAINING_API"_api; common_training += cppstd; common_training += "src/training/commandlineflags.cpp", @@ -152,8 +151,9 @@ void build(Solution &s) } // - auto &unicharset_training = training.addStaticLibrary("unicharset_training"); + auto &unicharset_training = training.addLibrary("unicharset_training"); { + unicharset_training += "TESS_UNICHARSET_TRAINING_API"_api; unicharset_training += cppstd; unicharset_training += "src/training/fileio.*"_rr, @@ -177,24 +177,25 @@ void build(Solution &s) n.Public += __VA_ARGS__; \ n - ADD_EXE(ambiguous_words, libtesseract); + ADD_EXE(ambiguous_words, common_training); ADD_EXE(classifier_tester, common_training); ADD_EXE(combine_lang_model, unicharset_training); - ADD_EXE(combine_tessdata, libtesseract); + ADD_EXE(combine_tessdata, common_training); ADD_EXE(cntraining, common_training); - ADD_EXE(dawg2wordlist, libtesseract); + ADD_EXE(dawg2wordlist, common_training); ADD_EXE(mftraining, common_training) += "src/training/mergenf.*"_rr; ADD_EXE(shapeclustering, common_training); ADD_EXE(unicharset_extractor, unicharset_training); - ADD_EXE(wordlist2dawg, libtesseract); + ADD_EXE(wordlist2dawg, common_training); ADD_EXE(lstmeval, unicharset_training); ADD_EXE(lstmtraining, unicharset_training); ADD_EXE(set_unicharset_properties, unicharset_training); - ADD_EXE(merge_unicharsets, tessopt); + ADD_EXE(merge_unicharsets, common_training); // - auto &pango_training = training.addStaticLibrary("pango_training"); + auto &pango_training = training.addLibrary("pango_training"); { + pango_training += "TESS_PANGO_TRAINING_API"_api; pango_training += cppstd; pango_training += "src/training/boxchar.cpp", @@ -218,9 +219,6 @@ void build(Solution &s) text2image += "src/training/degradeimage.cpp", "src/training/degradeimage.h", - "src/training/icuerrorcode.h", - "src/training/normstrngs.cpp", - "src/training/normstrngs.h", "src/training/text2image.cpp", "src/training/util.h" ; @@ -229,6 +227,7 @@ void build(Solution &s) if (!s.getExternalVariables()["with-tests"]) return; + // tests { auto &test = tess.addDirectory("test"); test.Scope = TargetScope::Test; diff --git a/unittest/ligature_table_test.cc b/unittest/ligature_table_test.cc index b4d1598c7..49f0ec40b 100644 --- a/unittest/ligature_table_test.cc +++ b/unittest/ligature_table_test.cc @@ -15,9 +15,6 @@ #include "ligature_table.h" #include "pango_font_info.h" -DECLARE_STRING_PARAM_FLAG(fonts_dir); -DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir); - namespace tesseract { const char kEngNonLigatureText[] = "fidelity effigy ſteep"; diff --git a/unittest/pango_font_info_test.cc b/unittest/pango_font_info_test.cc index 614484c56..1d6287363 100644 --- a/unittest/pango_font_info_test.cc +++ b/unittest/pango_font_info_test.cc @@ -22,10 +22,6 @@ #include "util/utf8/unicodetext.h" // for UnicodeText #endif -DECLARE_STRING_PARAM_FLAG(fonts_dir); -DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir); -DECLARE_BOOL_PARAM_FLAG(use_only_legacy_fonts); - namespace tesseract { // Fonts in testdata directory diff --git a/unittest/stringrenderer_test.cc b/unittest/stringrenderer_test.cc index 8624df2bc..e403e03a0 100644 --- a/unittest/stringrenderer_test.cc +++ b/unittest/stringrenderer_test.cc @@ -24,11 +24,6 @@ BOOL_PARAM_FLAG(display, false, "Display image for inspection"); -// Flags defined in pango_font_info.cpp -DECLARE_BOOL_PARAM_FLAG(use_only_legacy_fonts); -DECLARE_STRING_PARAM_FLAG(fonts_dir); -DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir); - namespace tesseract { const char kEngText[] = "the quick brown fox jumps over the lazy dog";