Use TESS_API for every public symbol. Public symbol is exported from the library. This also applies to unit test and training symbols. Users will be limited to public api, but set of exported symbols will be wider still.

Remove TESS_LOCAL.
Fix several symbol issues that made visible with these changes.

All build systems must set -fvisibility-hidden for *nix systems.
This commit is contained in:
Egor Pugin 2020-12-31 16:31:10 +03:00
parent 4d817d09a5
commit c86325e2f7
116 changed files with 325 additions and 224 deletions

View File

@ -736,7 +736,7 @@ class TESS_API TessBaseAPI {
protected:
/** Common code for setting the image. Returns true if Init has been called.
*/
TESS_LOCAL bool InternalSetImage();
bool InternalSetImage();
/**
* Run the thresholder to make the thresholded image. If pix is not nullptr,
@ -748,7 +748,7 @@ class TESS_API TessBaseAPI {
* Find lines from the image making the BLOCK_LIST.
* @return 0 on success.
*/
TESS_LOCAL int FindLines();
int FindLines();
/** Delete the pageres and block list ready for a new page. */
void ClearResults();
@ -758,7 +758,7 @@ class TESS_API TessBaseAPI {
* to ignore all BiDi smarts at that point.
* delete once you're done with it.
*/
TESS_LOCAL LTRResultIterator* GetLTRIterator();
LTRResultIterator* GetLTRIterator();
/**
* Return the length of the output text string, as UTF8, assuming
@ -766,12 +766,12 @@ class TESS_API TessBaseAPI {
* and assuming a single character reject marker for each rejected character.
* Also return the number of recognized blobs in blob_count.
*/
TESS_LOCAL int TextLength(int* blob_count);
int TextLength(int* blob_count);
//// paragraphs.cpp ////////////////////////////////////////////////////
TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
void DetectParagraphs(bool after_text_recognition);
TESS_LOCAL const PAGE_RES* GetPageRes() const {
const PAGE_RES* GetPageRes() const {
return page_res_;
}

View File

@ -183,7 +183,7 @@ class TESS_API LTRResultIterator : public PageIterator {
};
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
class ChoiceIterator {
class TESS_API ChoiceIterator {
public:
// Construction is from a LTRResultIterator that points to the symbol of
// interest. The ChoiceIterator allows a one-shot iteration over the

View File

@ -319,7 +319,7 @@ class TESS_API PageIterator {
* Sets up the internal data for iterating the blobs of a new word, then
* moves the iterator to the given offset.
*/
TESS_LOCAL void BeginWord(int offset);
void BeginWord(int offset);
/** Pointer to the page_res owned by the API. */
PAGE_RES* page_res_;

View File

@ -15,10 +15,11 @@
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCUTIL_PLATFORM_H_
#define TESSERACT_CCUTIL_PLATFORM_H_
#ifndef TESSERACT_PLATFORM_H_
#define TESSERACT_PLATFORM_H_
#if defined(_WIN32) || defined(__CYGWIN__)
#ifndef TESS_API
# if defined(_WIN32) || defined(__CYGWIN__)
# if defined(TESS_EXPORTS)
# define TESS_API __declspec(dllexport)
# elif defined(TESS_IMPORTS)
@ -26,20 +27,11 @@
# else
# define TESS_API
# endif
# define TESS_LOCAL
#else
# if __GNUC__ >= 4
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
# else
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
# define TESS_API __attribute__((visibility("default")))
# define TESS_LOCAL __attribute__((visibility("hidden")))
# else
# define TESS_API
# define TESS_LOCAL
# endif
# else
# define TESS_API
# define TESS_LOCAL
# endif
# endif
#endif
#endif // TESSERACT_CCUTIL_PLATFORM_H_
#endif // TESSERACT_PLATFORM_H_

View File

@ -144,7 +144,7 @@ class TESS_API ResultIterator : public LTRResultIterator {
* it resets to the beginning of the paragraph instead of staying wherever
* resit might have pointed.
*/
TESS_LOCAL explicit ResultIterator(const LTRResultIterator& resit);
explicit ResultIterator(const LTRResultIterator& resit);
private:
/**

View File

@ -55,7 +55,7 @@ using char32 = signed int;
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
// multiple Unicode characters representing the NFKC expansion of a ligature
// such as fi, ffl etc. These are also stored as utf8.
class UNICHAR {
class TESS_API UNICHAR {
public:
UNICHAR() {
memset(chars, 0, UNICHAR_LEN);
@ -105,7 +105,7 @@ class UNICHAR {
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
// tprintf("Char = %s\n", buf);
// }
class const_iterator {
class TESS_API const_iterator {
using CI = const_iterator;
public:

View File

@ -61,7 +61,7 @@ class GenericVector;
// NOTE that, although the subclasses execute on different SIMD hardware, no
// virtual methods are needed, as the constructor sets up everything that
// is required to allow the base class implementation to do all the work.
struct IntSimdMatrix {
struct TESS_API IntSimdMatrix {
// Computes a reshaped copy of the weight matrix w.
void Init(const GENERIC_2D_ARRAY<int8_t>& w,
std::vector<int8_t>& shaped_w,
@ -115,12 +115,12 @@ struct IntSimdMatrix {
// Number of groups of inputs to be broadcast.
// num_input_groups_ = num_inputs_per_register_ / num_inputs_per_group_
static TESS_API const IntSimdMatrix* intSimdMatrix;
static const IntSimdMatrix* intSimdMatrix;
// Only available with NEON.
static TESS_API const IntSimdMatrix intSimdMatrixNEON;
static const IntSimdMatrix intSimdMatrixNEON;
// Only available with AVX2 / SSE.
static TESS_API const IntSimdMatrix intSimdMatrixAVX2;
static TESS_API const IntSimdMatrix intSimdMatrixSSE;
static const IntSimdMatrix intSimdMatrixAVX2;
static const IntSimdMatrix intSimdMatrixSSE;
};
} // namespace tesseract

View File

@ -35,7 +35,7 @@ class ColPartition;
class ColPartitionGrid;
class ColPartitionSet;
class EquationDetect : public EquationDetectBase {
class TESS_API EquationDetect : public EquationDetectBase {
public:
EquationDetect(const char* equ_datapath,
const char* equ_language);

View File

@ -40,7 +40,7 @@ class Tesseract;
// ResultIterator adds text-specific methods for access to OCR output.
// MutableIterator adds access to internal data structures.
class MutableIterator : public ResultIterator {
class TESS_API MutableIterator : public ResultIterator {
public:
// See argument descriptions in ResultIterator()
MutableIterator(PAGE_RES* page_res, Tesseract* tesseract,

View File

@ -87,6 +87,7 @@ class RowInfo {
// paragraphs - this is the actual list of PARA objects.
// models - the list of paragraph models referenced by the PARA objects.
// caller is responsible for deleting the models.
TESS_API
void DetectParagraphs(int debug_level,
std::vector<RowInfo> *row_infos,
GenericVector<PARA *> *row_owners,
@ -98,6 +99,7 @@ void DetectParagraphs(int debug_level,
// saving the ParagraphModels in models. Caller owns the models.
// We use unicharset during the function to answer questions such as "is the
// first letter of this word upper case?"
TESS_API
void DetectParagraphs(int debug_level,
bool after_text_recognition,
const MutableIterator *block_start,

View File

@ -31,6 +31,7 @@ class UNICHARSET;
class WERD_CHOICE;
// Return whether the given word is likely to be a list item start word.
TESS_API
bool AsciiLikelyListItem(const STRING &word);
// Return the first Unicode Codepoint from werd[pos].
@ -38,11 +39,13 @@ int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos);
// Set right word attributes given either a unicharset and werd or a utf8
// string.
TESS_API
void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,
const STRING &utf8,
bool *is_list, bool *starts_idea, bool *ends_idea);
// Set left word attributes given either a unicharset and werd or a utf8 string.
TESS_API
void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,
const STRING &utf8,
bool *is_list, bool *starts_idea, bool *ends_idea);

View File

@ -175,7 +175,7 @@ struct WordData {
using WordRecognizer = void (Tesseract::*)(const WordData&, WERD_RES**,
PointerVector<WERD_RES>*);
class Tesseract : public Wordrec {
class TESS_API Tesseract : public Wordrec {
public:
Tesseract();
~Tesseract() override;

View File

@ -32,6 +32,7 @@ const int kBoxReadBufSize = 1024;
// Open the boxfile based on the given image filename.
// Returns nullptr if the box file cannot be opened.
TESS_API
FILE* OpenBoxFile(const char* filename);
// Reads all boxes from the given filename.
@ -51,6 +52,7 @@ bool ReadAllBoxes(int target_page, bool skip_blanks, const char* filename,
// continue_on_failure allows reading to continue even if an invalid box is
// encountered and will return true if it succeeds in reading some boxes.
// It otherwise gives up and returns false on encountering an invalid box.
TESS_API
bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data,
bool continue_on_failure,
std::vector<TBOX>* boxes,
@ -66,20 +68,24 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char* box_data,
// for valid utf-8 and allows space or tab between fields.
// utf8_str is set with the unichar string, and bounding box with the box.
// If there are page numbers in the file, it reads them all.
TESS_API
bool ReadNextBox(int *line_number, FILE* box_file,
STRING* utf8_str, TBOX* bounding_box);
// As ReadNextBox above, but get a specific page number. (0-based)
// Use -1 to read any page number. Files without page number all
// read as if they are page 0.
TESS_API
bool ReadNextBox(int target_page, int *line_number, FILE* box_file,
STRING* utf8_str, TBOX* bounding_box);
// Parses the given box file string into a page_number, utf8_str, and
// bounding_box. Returns true on a successful parse.
TESS_API
bool ParseBoxFileStr(const char* boxfile_str, int* page_number,
STRING* utf8_str, TBOX* bounding_box);
// Creates a box file string from a unichar string, TBOX and page number.
TESS_API
void MakeBoxFileStr(const char* unichar_str, const TBOX& box, int page_num,
STRING* box_str);

View File

@ -22,7 +22,7 @@
#include "ccutil.h" // for CCUtil
namespace tesseract {
class CCStruct : public CCUtil {
class TESS_API CCStruct : public CCUtil {
public:
CCStruct() = default;
~CCStruct() override;

View File

@ -146,26 +146,34 @@ struct FontSet {
// are replaced.
class FontInfoTable : public GenericVector<FontInfo> {
public:
TESS_API // when you remove inheritance from GenericVector, move this on class level
FontInfoTable();
TESS_API
~FontInfoTable();
// Writes to the given file. Returns false in case of error.
TESS_API
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
TESS_API
bool DeSerialize(TFile* fp);
// Returns true if the given set of fonts includes one with the same
// properties as font_id.
TESS_API
bool SetContainsFontProperties(
int font_id, const GenericVector<ScoredFont>& font_set) const;
// Returns true if the given set of fonts includes multiple properties.
TESS_API
bool SetContainsMultipleFontProperties(
const GenericVector<ScoredFont>& font_set) const;
// Moves any non-empty FontSpacingInfo entries from other to this.
TESS_API
void MoveSpacingInfoFrom(FontInfoTable* other);
// Moves this to the target unicity table.
TESS_API
void MoveTo(UnicityTable<FontInfo>* target);
};

View File

@ -104,7 +104,7 @@ struct FloatWordFeature {
// The text transcription is the ground truth UTF-8 text for the image.
// Character boxes are optional and indicate the desired segmentation of
// the text into recognition units.
class ImageData {
class TESS_API ImageData {
public:
ImageData();
// Takes ownership of the pix.
@ -213,19 +213,24 @@ class ImageData {
// A collection of ImageData that knows roughly how much memory it is using.
class DocumentData {
public:
TESS_API
explicit DocumentData(const STRING& name);
TESS_API
~DocumentData();
// Reads all the pages in the given lstmf filename to the cache. The reader
// is used to read the file.
TESS_API
bool LoadDocument(const char* filename, int start_page, int64_t max_memory,
FileReader reader);
// Sets up the document, without actually loading it.
void SetDocument(const char* filename, int64_t max_memory, FileReader reader);
// Writes all the pages to the given filename. Returns false on error.
TESS_API
bool SaveDocument(const char* filename, FileWriter writer);
// Adds the given page data to this document, counting up memory.
TESS_API
void AddPageToDocument(ImageData* page);
const STRING& document_name() const {
@ -257,6 +262,7 @@ class DocumentData {
void LoadPageInBackground(int index);
// Returns a pointer to the page with the given index, modulo the total
// number of pages. Blocks until the background load is completed.
TESS_API
const ImageData* GetPage(int index);
// Returns true if the requested page is available, and provides a pointer,
// which may be nullptr if the document is empty. May block, even though it
@ -325,7 +331,9 @@ class DocumentData {
// content.
class DocumentCache {
public:
TESS_API
explicit DocumentCache(int64_t max_memory);
TESS_API
~DocumentCache();
// Deletes all existing documents from the cache.
@ -335,6 +343,7 @@ class DocumentCache {
}
// Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files.
TESS_API
bool LoadDocuments(const std::vector<STRING>& filenames,
CachingStrategy cache_strategy, FileReader reader);
@ -358,16 +367,19 @@ class DocumentCache {
}
// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache
// strategy, could take a long time.
TESS_API
int TotalPages();
private:
// Returns a page by serial number, selecting them in a round-robin fashion
// from all the documents. Highly disk-intensive, but doesn't need samples
// to be shuffled between files to begin with.
TESS_API
const ImageData* GetPageRoundRobin(int serial);
// Returns a page by serial number, selecting them in sequence from each file.
// Requires the samples to be shuffled between the files to give a random or
// uniform distribution of data. Less disk-intensive than GetPageRoundRobin.
TESS_API
const ImageData* GetPageSequential(int serial);
// Helper counts the number of adjacent cached neighbour documents_ of index

View File

@ -28,7 +28,7 @@ namespace tesseract {
template <typename T> class GenericVector;
class LLSQ {
class TESS_API LLSQ {
public:
LLSQ() { // constructor
clear(); // set to zeros

View File

@ -46,7 +46,7 @@ enum NormalizationMode {
NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
};
class DENORM {
class TESS_API DENORM {
public:
DENORM();

View File

@ -27,8 +27,9 @@ namespace tesseract {
class BLOCK; //forward decl
ELISTIZEH (BLOCK)
class BLOCK : public ELIST_LINK
ELISTIZEH(BLOCK)
class TESS_API BLOCK : public ELIST_LINK
//page block
{
friend class BLOCK_RECT_IT; //block iterator

View File

@ -114,7 +114,7 @@ ELISTIZEH(PARA)
// |you can try to identify source |
// |code. Ouch! |
// +--------------------------------+
class ParagraphModel {
class TESS_API ParagraphModel {
public:
ParagraphModel(tesseract::ParagraphJustification justification,
int margin,

View File

@ -164,7 +164,7 @@ enum CRUNCH_MODE
// WERD_RES is a collection of publicly accessible members that gathers
// information about a word result.
class WERD_RES : public ELIST_LINK {
class TESS_API WERD_RES : public ELIST_LINK {
public:
// Which word is which?
// There are 3 coordinate spaces in use here: a possibly rotated pixel space,
@ -673,7 +673,7 @@ class WERD_RES : public ELIST_LINK {
* PAGE_RES_IT - Page results iterator
*************************************************************************/
class PAGE_RES_IT {
class TESS_API PAGE_RES_IT {
public:
PAGE_RES * page_res; // page being iterated

View File

@ -189,7 +189,8 @@ class ICOORDELT : public ELIST_LINK, public ICOORD
};
ELISTIZEH (ICOORDELT)
class FCOORD
class TESS_API FCOORD
{
public:
///empty constructor

View File

@ -27,7 +27,7 @@
namespace tesseract {
class POLY_BLOCK {
class TESS_API POLY_BLOCK {
public:
POLY_BLOCK() = default;
// Initialize from box coordinates.

View File

@ -263,7 +263,7 @@ enum ScriptPos {
const char *ScriptPosToString(ScriptPos script_pos);
class WERD_CHOICE : public ELIST_LINK {
class TESS_API WERD_CHOICE : public ELIST_LINK {
public:
static const float kBadRating;
static const char *permuter_name(uint8_t permuter);

View File

@ -35,7 +35,7 @@ namespace tesseract {
class STRING;
class TBOX { // bounding box
class TESS_API TBOX { // bounding box
public:
TBOX (): // empty constructor making a null box
bot_left (INT16_MAX, INT16_MAX), top_right (-INT16_MAX, -INT16_MAX) {

View File

@ -29,7 +29,7 @@ template <typename T> class GenericVector;
// Simple histogram-based statistics for integer values in a known
// range, such that the range is small compared to the number of samples.
class STATS {
class TESS_API STATS {
public:
// The histogram buckets are in the range
// [min_bucket_value, max_bucket_value_plus_1 - 1] i.e.

View File

@ -37,7 +37,7 @@ class DENORM;
ELISTIZEH(C_BLOB)
class C_BLOB:public ELIST_LINK
class TESS_API C_BLOB : public ELIST_LINK
{
public:
C_BLOB() = default;

View File

@ -56,7 +56,7 @@ enum DISPLAY_FLAGS {
class ROW; // forward decl
class WERD : public ELIST2_LINK {
class TESS_API WERD : public ELIST2_LINK {
public:
WERD() = default;
// WERD constructed with:

View File

@ -27,7 +27,7 @@ namespace tesseract {
// Trivial class to encapsulate a fixed-length array of bits, with
// Serialize/DeSerialize. Replaces the old macros.
class BitVector {
class TESS_API BitVector {
public:
// Fast lookup table to get the first least significant set bit in a byte.
// For zero, the table has 255, but since it is a special case, most code

View File

@ -41,7 +41,7 @@
namespace tesseract {
class CCUtil {
class TESS_API CCUtil {
public:
CCUtil();
virtual ~CCUtil();

View File

@ -69,7 +69,7 @@ class CLIST_LINK
* Generic list class for singly linked CONS cell lists
**********************************************************************/
class CLIST
class TESS_API CLIST
{
friend class CLIST_ITERATOR;
@ -144,7 +144,7 @@ class CLIST
*links
**********************************************************************/
class CLIST_ITERATOR
class TESS_API CLIST_ITERATOR
{
friend void CLIST::assign_to_sublist(CLIST_ITERATOR *, CLIST_ITERATOR *);

View File

@ -107,7 +107,7 @@ class ELIST_LINK
* Generic list class for singly linked lists with embedded links
**********************************************************************/
class ELIST
class TESS_API ELIST
{
friend class ELIST_ITERATOR;
@ -181,7 +181,7 @@ class ELIST
* Generic iterator class for singly linked lists with embedded links
**********************************************************************/
class ELIST_ITERATOR
class TESS_API ELIST_ITERATOR
{
friend void ELIST::assign_to_sublist(ELIST_ITERATOR *, ELIST_ITERATOR *);
@ -849,7 +849,7 @@ ELISTIZEH_C.
#define ELISTIZEH_A(CLASSNAME) \
\
extern void CLASSNAME##_zapper(ELIST_LINK* link);
TESS_API extern void CLASSNAME##_zapper(ELIST_LINK* link);
#define ELISTIZEH_B(CLASSNAME) \
\

View File

@ -88,7 +88,7 @@ class ELIST2_LINK
* Generic list class for doubly linked lists with embedded links
**********************************************************************/
class ELIST2
class TESS_API ELIST2
{
friend class ELIST2_ITERATOR;
@ -151,7 +151,7 @@ class ELIST2
*links
**********************************************************************/
class ELIST2_ITERATOR
class TESS_API ELIST2_ITERATOR
{
friend void ELIST2::assign_to_sublist(ELIST2_ITERATOR *, ELIST2_ITERATOR *);
@ -858,7 +858,7 @@ ELIST2IZEH_C.
#define ELIST2IZEH_A(CLASSNAME) \
\
extern void CLASSNAME##_zapper( /*delete a link*/ \
TESS_API extern void CLASSNAME##_zapper( /*delete a link*/ \
ELIST2_LINK *link); /*link to delete*/
#define ELIST2IZEH_B(CLASSNAME) \

View File

@ -310,14 +310,14 @@ class GenericVector {
}
// Returns true if all elements of *this are within the given range.
// Only uses operator<
bool WithinBounds(const T& rangemin, const T& rangemax) const {
/*bool WithinBounds(const T& rangemin, const T& rangemax) const {
for (int i = 0; i < size_used_; ++i) {
if (data_[i] < rangemin || rangemax < data_[i]) {
return false;
}
}
return true;
}
}*/
protected:
// Internal recursive version of choose_nth_item.

View File

@ -39,7 +39,7 @@ class IndexMapBiDi;
// IndexMapBiDi below.
// NOTE: there are currently no methods to setup an IndexMap on its own!
// It must be initialized by copying from an IndexMapBiDi or by DeSerialize.
class IndexMap {
class TESS_API IndexMap {
public:
virtual ~IndexMap();
@ -99,7 +99,7 @@ class IndexMap {
// for ... Merge(index1, index2);
// CompleteMerges();
// Allows a many-to-one mapping by merging compact space indices.
class IndexMapBiDi : public IndexMap {
class TESS_API IndexMapBiDi : public IndexMap {
public:
~IndexMapBiDi() override;

View File

@ -47,7 +47,7 @@ struct ParamsVectors {
};
// Utility functions for working with Tesseract parameters.
class ParamUtils {
class TESS_API ParamUtils {
public:
// Reads a file of parameter definitions and set/modify the values therein.
// If the filename begins with a + or -, the BoolVariables will be
@ -279,6 +279,7 @@ class DoubleParam : public Param {
//
// TODO(daria): remove GlobalParams() when all global Tesseract
// parameters are converted to members.
TESS_API
ParamsVectors* GlobalParams();
/*************************************************************************

View File

@ -27,6 +27,7 @@
* @note Note that scientific floating-point notation is not supported.
*
*/
TESS_API
int tfscanf(FILE* stream, const char *format, ...);
#endif // TESSERACT_CCUTIL_SCANUTILS_H_

View File

@ -47,7 +47,9 @@ constexpr size_t countof(T const (&)[N]) noexcept {
using FileWriter = bool (*)(const std::vector<char>& data,
const char* filename);
TESS_API
bool LoadDataFromFile(const char* filename, std::vector<char>* data);
TESS_API
bool SaveDataToFile(const std::vector<char>& data, const char* filename);
// Deserialize data from file.
@ -64,7 +66,7 @@ bool Serialize(FILE *fp, const T *data, size_t n = 1) {
// Simple file class.
// Allows for portable file input from memory and from foreign file systems.
class TFile {
class TESS_API TFile {
public:
TFile();
~TFile();

View File

@ -123,8 +123,7 @@ static const char *const kTessdataFileSuffixes[] = {
*/
static const int kMaxNumTessdataEntries = 1000;
class TessdataManager {
class TESS_API TessdataManager {
public:
TessdataManager();
explicit TessdataManager(FileReader reader);

View File

@ -125,7 +125,7 @@ class RecodedCharID {
// position). For non-CJK, the same code value CAN be used in multiple
// positions, eg the ff ligature is converted to <f> <nullchar> <f>, where <f>
// is the same code as is used for the single f.
class UnicharCompress {
class TESS_API UnicharCompress {
public:
UnicharCompress();
UnicharCompress(const UnicharCompress& src);

View File

@ -49,7 +49,7 @@ enum class OldUncleanUnichars {
kTrue,
};
class CHAR_FRAGMENT {
class TESS_API CHAR_FRAGMENT {
public:
// Minimum number of characters used for fragment representation.
static const int kMinLen = 6;
@ -146,15 +146,15 @@ class CHAR_FRAGMENT {
// The UNICHARSET class is an utility class for Tesseract that holds the
// set of characters that are used by the engine. Each character is identified
// by a unique number, from 0 to (size - 1).
class UNICHARSET {
class TESS_API UNICHARSET {
public:
// Custom list of characters and their ligature forms (UTF8)
// These map to unicode values in the private use area (PUC) and are supported
// by only few font families (eg. Wyld, Adobe Caslon Pro).
static TESS_API const char* kCustomLigatures[][2];
static const char* kCustomLigatures[][2];
// List of strings for the SpecialUnicharCodes. Keep in sync with the enum.
static TESS_API const char* kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT];
static const char* kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT];
// ICU 2.0 UCharDirection enum (from icu/include/unicode/uchar.h)
enum Direction {
@ -893,7 +893,7 @@ class UNICHARSET {
private:
struct UNICHAR_PROPERTIES {
struct TESS_API UNICHAR_PROPERTIES {
UNICHAR_PROPERTIES();
// Initializes all properties to sensible default values.
void Init();
@ -996,7 +996,7 @@ class UNICHARSET {
// The substitutions clean up text that should exists for rendering of
// synthetic data, but not in the recognition set.
static const char* kCleanupMaps[][2];
static TESS_API const char* null_script;
static const char* null_script;
std::vector<UNICHAR_SLOT> unichars;
UNICHARMAP ids;

View File

@ -99,7 +99,7 @@ enum CharSegmentationType {
CST_NGRAM // Multiple characters.
};
class Classify : public CCStruct {
class TESS_API Classify : public CCStruct {
public:
Classify();
~Classify() override;

View File

@ -106,14 +106,19 @@ typedef struct {
/*--------------------------------------------------------------------------
Public Function Prototypes
--------------------------------------------------------------------------*/
TESS_API
CLUSTERER* MakeClusterer(int16_t SampleSize, const PARAM_DESC ParamDesc[]);
TESS_API
SAMPLE* MakeSample(CLUSTERER* Clusterer, const float* Feature, int32_t CharID);
TESS_API
LIST ClusterSamples(CLUSTERER* Clusterer, CLUSTERCONFIG* Config);
TESS_API
void FreeClusterer(CLUSTERER* Clusterer);
TESS_API
void FreeProtoList(LIST* ProtoList);
void FreePrototype(void* arg); // PROTOTYPE *Prototype);
@ -124,6 +129,7 @@ float Mean(PROTOTYPE* Proto, uint16_t Dimension);
float StandardDeviation(PROTOTYPE* Proto, uint16_t Dimension);
TESS_API
int32_t MergeClusters(int16_t N, PARAM_DESC ParamDesc[], int32_t n1, int32_t n2,
float m[], float m1[], float m2[]);

View File

@ -32,8 +32,10 @@ PARAM_DESC *ReadParamDesc(tesseract::TFile *fp, uint16_t N);
PROTOTYPE *ReadPrototype(tesseract::TFile *fp, uint16_t N);
TESS_API
void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]);
TESS_API
void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto);
} // namespace tesseract

View File

@ -50,8 +50,10 @@ using FEATURE_DEFS = FEATURE_DEFS_STRUCT *;
/*----------------------------------------------------------------------
Generic functions for manipulating character descriptions
----------------------------------------------------------------------*/
TESS_API
void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs);
TESS_API
void FreeCharDescription(CHAR_DESC CharDesc);
CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs);
@ -62,9 +64,11 @@ bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs,
CHAR_DESC CharDesc, STRING* str);
TESS_API
CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
FILE *File);
TESS_API
uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs,
const char *ShortName);

View File

@ -35,7 +35,7 @@ class IndexMap;
// Down-sampling quantization of the INT_FEATURE_STRUCT feature space and
// conversion to a single scalar index value, used as a binary feature space.
class IntFeatureSpace {
class TESS_API IntFeatureSpace {
public:
IntFeatureSpace();
// Default copy constructors and assignment OK!

View File

@ -46,20 +46,22 @@ const double kStandardFeatureLength = 64.0 / 5;
/**----------------------------------------------------------------------------
Public Function Prototypes
----------------------------------------------------------------------------**/
TESS_API
void InitIntegerFX();
// Returns a vector representing the direction of a feature with the given
// theta direction in an INT_FEATURE_STRUCT.
TESS_API
FCOORD FeatureDirection(uint8_t theta);
// Generates a TrainingSample from a TBLOB. Extracts features and sets
// the bounding box, so classifiers that operate on the image can work.
// TODO(rays) BlobToTrainingSample must remain a global function until
// the FlexFx and FeatureDescription code can be removed and LearnBlob
// made a member of Classify.
TrainingSample* BlobToTrainingSample(
const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
GenericVector<INT_FEATURE_STRUCT>* bl_features);
// Generates a TrainingSample from a TBLOB. Extracts features and sets
// the bounding box, so classifiers that operate on the image can work.
// TODO(rays) BlobToTrainingSample must remain a global function until
// the FlexFx and FeatureDescription code can be removed and LearnBlob
// made a member of Classify.
TrainingSample* BlobToTrainingSample(
const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
GenericVector<INT_FEATURE_STRUCT>* bl_features);
} // namespace tesseract

View File

@ -232,16 +232,19 @@ INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs);
INT_TEMPLATES NewIntTemplates();
TESS_API
void free_int_templates(INT_TEMPLATES templates);
void ShowMatchDisplay();
// Clears the given window and draws the featurespace guides for the
// appropriate normalization method.
TESS_API
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window);
/*----------------------------------------------------------------------------*/
#ifndef GRAPHICS_DISABLED
TESS_API
void RenderIntFeature(ScrollView* window, const INT_FEATURE_STRUCT* Feature,
ScrollView::Color color);
@ -253,6 +256,7 @@ void InitFeatureDisplayWindowIfReqd();
// Creates a window of the appropriate size for displaying elements
// in feature space.
TESS_API
ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos);
#endif // !GRAPHICS_DISABLED

View File

@ -81,6 +81,7 @@ void FreeKDNode(KDNODE* Node);
float DistanceSquared(int k, PARAM_DESC* dim, float p1[], float p2[]);
TESS_API
float ComputeDistance(int k, PARAM_DESC* dim, float p1[], float p2[]);
int QueryInSearch(KDTREE* tree);

View File

@ -102,10 +102,13 @@ DefineFeature (Name, NumLinear, NumCircular, ShortName, ParamName)
----------------------------------------------------------------------*/
bool AddFeature(FEATURE_SET FeatureSet, FEATURE Feature);
TESS_API
void FreeFeature(FEATURE Feature);
TESS_API
void FreeFeatureSet(FEATURE_SET FeatureSet);
TESS_API
FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc);
FEATURE_SET NewFeatureSet(int NumFeatures);

View File

@ -82,18 +82,24 @@ using CLASSES = CLASS_STRUCT*;
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
TESS_API
int AddConfigToClass(CLASS_TYPE Class);
TESS_API
int AddProtoToClass(CLASS_TYPE Class);
TESS_API
void FillABC(PROTO Proto);
TESS_API
void FreeClass(CLASS_TYPE Class);
TESS_API
void FreeClassFields(CLASS_TYPE Class);
void InitPrototypes();
TESS_API
CLASS_TYPE NewClass(int NumProtos, int NumConfigs);
} // namespace tesseract

View File

@ -38,7 +38,7 @@ class TrainingSampleSet;
struct UnicharRating;
// Interface base class for classifiers that produce ShapeRating results.
class ShapeClassifier {
class TESS_API ShapeClassifier {
public:
virtual ~ShapeClassifier() = default;

View File

@ -181,7 +181,7 @@ struct UnicharAndFonts {
// characters that have a similar or identical shape. Shapes/ShapeTables may
// be organized hierarchically from identical shapes at the leaves to vaguely
// similar shapes near the root.
class Shape {
class TESS_API Shape {
public:
Shape() : destination_index_(-1) {}
@ -258,7 +258,7 @@ class Shape {
// that the shape represents.
// Each UnicharAndFonts also lists the fonts of the unichar_id that were
// mapped to the shape during training.
class ShapeTable {
class TESS_API ShapeTable {
public:
ShapeTable();
// The UNICHARSET reference supplied here, or in set_unicharset below must

View File

@ -33,7 +33,7 @@ class TrainingSample;
// Due to limitations in the content of TrainingSample, this currently
// only works for the static classifier and only works if the ShapeTable
// in classify is not nullptr.
class TessClassifier : public ShapeClassifier {
class TESS_API TessClassifier : public ShapeClassifier {
public:
TessClassifier(bool pruner_only, tesseract::Classify* classify)
: pruner_only_(pruner_only), classify_(classify) {}

View File

@ -21,13 +21,15 @@
#include "trainingsample.h"
#include <cmath> // for M_PI
#include "allheaders.h"
#include "intfeaturespace.h"
#include "helpers.h"
#include "intfeaturemap.h"
#include "normfeat.h"
#include "shapetable.h"
#include "allheaders.h"
#include <cmath> // for M_PI
namespace tesseract {
ELISTIZE(TrainingSample)
@ -281,17 +283,6 @@ void TrainingSample::IndexFeatures(const IntFeatureSpace& feature_space) {
features_are_mapped_ = false;
}
// Sets the mapped_features_ from the features using the provided
// feature_map.
void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) {
GenericVector<int> indexed_features;
feature_map.feature_space().IndexAndSortFeatures(features_, num_features_,
&indexed_features);
feature_map.MapIndexedFeatures(indexed_features, &mapped_features_);
features_are_indexed_ = false;
features_are_mapped_ = true;
}
// Returns a pix representing the sample. (Int features only.)
Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);

View File

@ -50,7 +50,7 @@ static const int kSampleScaleSize = 3;
static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2;
// ASSERT_IS_PRIME(kSampleRandomSize) !!
class TrainingSample : public ELIST_LINK {
class TESS_API TrainingSample : public ELIST_LINK {
public:
TrainingSample()
: class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0),
@ -97,9 +97,6 @@ class TrainingSample : public ELIST_LINK {
// Sets the mapped_features_ from the features_ using the provided
// feature_space to the indexed versions of the features.
void IndexFeatures(const IntFeatureSpace& feature_space);
// Sets the mapped_features_ from the features_ using the provided
// feature_map.
void MapFeatures(const IntFeatureMap& feature_map);
// Returns a pix representing the sample. (Int features only.)
Pix* RenderToPix(const UNICHARSET* unicharset) const;
@ -231,10 +228,15 @@ class TrainingSample : public ELIST_LINK {
double max_dist_;
// Global index of this sample.
int sample_index_;
public:
// both are used in training tools
// hide after refactoring
// Indexed/mapped features, as indicated by the bools below.
GenericVector<int> mapped_features_;
bool features_are_indexed_;
bool features_are_mapped_;
private:
// True if the last classification was an error by the current definition.
bool is_error_;

View File

@ -20,8 +20,11 @@
namespace tesseract {
TESS_API
void *Emalloc(int Size);
TESS_API
void *Erealloc(void *ptr, int size);
TESS_API
void Efree(void *ptr);
} // namespace tesseract

View File

@ -112,6 +112,7 @@ int count(LIST var_list);
LIST delete_d(LIST list, void* key, int_compare is_equal);
TESS_API
LIST destroy(LIST list);
void destroy_nodes(LIST list, void_dest destructor);
@ -120,8 +121,10 @@ LIST last(LIST var_list);
LIST pop(LIST list);
TESS_API
LIST push(LIST list, void* element);
TESS_API
LIST push_last(LIST list, void* item);
LIST search(LIST list, void* key, int_compare is_equal);

View File

@ -108,7 +108,7 @@ static const char kWildcard[] = "*";
/// (since they use only the public methods of SquishedDawg and Trie
/// classes that are inherited from the Dawg base class).
//
class Dawg {
class TESS_API Dawg {
public:
/// Magic number to determine endianness when reading the Dawg from file.
static const int16_t kDawgMagicNumber = 42;
@ -397,7 +397,7 @@ class DawgPositionVector : public GenericVector<DawgPosition> {
/// is stored as a contiguous EDGE_ARRAY (read from file or given as an
/// argument to the constructor).
//
class SquishedDawg : public Dawg {
class TESS_API SquishedDawg : public Dawg {
public:
SquishedDawg(DawgType type, const STRING &lang, PermuterType perm,
int debug_level)

View File

@ -91,7 +91,7 @@ struct DawgArgs {
bool valid_end;
};
class Dict {
class TESS_API Dict {
public:
Dict(CCUtil* image_ptr);
~Dict();
@ -313,7 +313,7 @@ class Dict {
/// Initialize Dict class - load dawgs from [lang].traineddata and
/// user-specified wordlist and parttern list.
static TESS_API DawgCache *GlobalDawgCache();
static DawgCache *GlobalDawgCache();
// Sets up ready for a Load or LoadLSTM.
void SetupForLoad(DawgCache *dawg_cache);
// Loads the dawgs needed by Tesseract. Call FinishLoad() after.

View File

@ -53,7 +53,7 @@ using TRIE_NODES = GenericVector<TRIE_NODE_RECORD *> ;
* This class stores a vector of pointers to TRIE_NODE_RECORDs, each of
* which has a vector of forward and backward edges.
*/
class Trie : public Dawg {
class TESS_API Trie : public Dawg {
public:
enum RTLReversePolicy {
RRP_DO_NO_REVERSE,

View File

@ -32,6 +32,7 @@ class Convolve : public Network {
public:
// The area of convolution is 2*half_x + 1 by 2*half_y + 1, forcing it to
// always be odd, so the center is the current pixel.
TESS_API
Convolve(const std::string& name, int ni, int half_x, int half_y);
~Convolve() override = default;

View File

@ -26,6 +26,7 @@ namespace tesseract {
// C++ Implementation of the Softmax (output) class from lstm.py.
class FullyConnected : public Network {
public:
TESS_API
FullyConnected(const std::string& name, int ni, int no, NetworkType type);
~FullyConnected() override = default;

View File

@ -26,7 +26,9 @@ class ScrollView;
class Input : public Network {
public:
TESS_API
Input(const std::string& name, int ni, int no);
TESS_API
Input(const std::string& name, const StaticShape& shape);
~Input() override = default;

View File

@ -46,6 +46,7 @@ class LSTM : public Network {
// 2-d and bidi softmax LSTMs are not rejected, but are impossible to build
// in the conventional way because the output feedback both forwards and
// backwards in time does become impossible.
TESS_API
LSTM(const std::string& name, int num_inputs, int num_states, int num_outputs,
bool two_dimensional, NetworkType type);
~LSTM() override;

View File

@ -50,7 +50,7 @@ enum TrainingFlags {
// Top-level line recognizer class for LSTM-based networks.
// Note that a sub-class, LSTMTrainer is used for training.
class LSTMRecognizer {
class TESS_API LSTMRecognizer {
public:
LSTMRecognizer();
LSTMRecognizer(const STRING language_data_path_prefix);

View File

@ -28,6 +28,7 @@ namespace tesseract {
// Backprop propagates only to the position that was the max.
class Maxpool : public Reconfig {
public:
TESS_API
Maxpool(const char* name, int ni, int x_scale, int y_scale);
~Maxpool() override = default;

View File

@ -277,6 +277,7 @@ class Network {
void DisplayBackward(const NetworkIO& matrix);
// Creates the window if needed, otherwise clears it.
TESS_API
static void ClearWindow(bool tess_coords, const char* window_name,
int width, int height, ScrollView** window);

View File

@ -36,7 +36,7 @@ namespace tesseract {
// Class to contain all the input/output of a network, allowing for fixed or
// variable-strided 2d to 1d mapping, and float or int8_t values. Provides
// enough calculating functions to hide the detail of the implementation.
class NetworkIO {
class TESS_API NetworkIO {
public:
NetworkIO() : int_mode_(false) {}
// Resizes the array (and stride), avoiding realloc if possible, to the given

View File

@ -27,6 +27,7 @@ namespace tesseract {
class Parallel : public Plumbing {
public:
// ni_ and no_ will be set by AddToStack.
TESS_API
Parallel(const char* name, NetworkType type);
~Parallel() override = default;

View File

@ -177,7 +177,7 @@ using RecodePair = KDPairInc<double, RecodeNode>;
using RecodeHeap = GenericHeap<RecodePair>;
// Class that holds the entire beam search for recognition of a text line.
class RecodeBeamSearch {
class TESS_API RecodeBeamSearch {
public:
// Borrows the pointer, which is expected to survive until *this is deleted.
RecodeBeamSearch(const UnicharCompress& recoder, int null_char,

View File

@ -30,6 +30,7 @@ namespace tesseract {
// input stride is a multiple of the y_scale factor!
class Reconfig : public Network {
public:
TESS_API
Reconfig(const char* name, int ni, int x_scale, int y_scale);
~Reconfig() override = default;

View File

@ -27,6 +27,7 @@ namespace tesseract {
// C++ Implementation of the Reversed class from lstm.py.
class Reversed : public Plumbing {
public:
TESS_API
explicit Reversed(const std::string& name, NetworkType type);
~Reversed() override = default;
@ -65,6 +66,7 @@ class Reversed : public Plumbing {
}
// Takes ownership of the given network to make it the reversed one.
TESS_API
void SetNetwork(Network* network);
// Runs forward propagation of activations on the input line.

View File

@ -27,6 +27,7 @@ namespace tesseract {
class Series : public Plumbing {
public:
// ni_ and no_ will be set by AddToStack.
TESS_API
explicit Series(const char* name);
~Series() override = default;
@ -81,10 +82,12 @@ class Series : public Plumbing {
// Splits the series after the given index, returning the two parts and
// deletes itself. The first part, up to network with index last_start, goes
// into start, and the rest goes into end.
TESS_API
void SplitAt(int last_start, Series** start, Series** end);
// Appends the elements of the src series to this, removing from src and
// deleting it.
TESS_API
void AppendSeries(Network* src);
};

View File

@ -79,7 +79,7 @@ struct AlignedBlobParams {
// The AlignedBlob class contains code to find vertically aligned blobs.
// This is factored out into a separate class, so it can be used by both
// vertical line finding (LineFind) and tabstop finding (TabFind).
class AlignedBlob : public BlobGrid {
class TESS_API AlignedBlob : public BlobGrid {
public:
AlignedBlob(int gridsize, const ICOORD& bleft, const ICOORD& tright);
~AlignedBlob() override;

View File

@ -49,7 +49,7 @@ template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch;
// The GridBase class is the base class for BBGrid and IntGrid.
// It holds the geometry and scale of the grid.
class GridBase {
class TESS_API GridBase {
public:
GridBase() = default;
GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright);

View File

@ -30,7 +30,7 @@ CLISTIZEH(BLOBNBOX)
using BlobGridSearch = GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>;
class BlobGrid : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
class TESS_API BlobGrid : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
public:
BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
~BlobGrid() override;

View File

@ -47,7 +47,7 @@ class TempColumn_LIST;
class EquationDetectBase;
// The ColumnFinder class finds columns in the grid.
class ColumnFinder : public TabFind {
class TESS_API ColumnFinder : public TabFind {
public:
// Gridsize is an estimate of the text size in the image. A suitable value
// is in TO_BLOCK::line_size after find_components has been used to make

View File

@ -64,7 +64,7 @@ CLISTIZEH(ColPartition)
* to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions
* emerges, which represents the columns over a wide y-coordinate range.
*/
class ColPartition : public ELIST2_LINK {
class TESS_API ColPartition : public ELIST2_LINK {
public:
// This empty constructor is here only so that the class can be ELISTIZED.
// TODO(rays) change deep_copy in elst.h line 955 to take a callback copier

View File

@ -29,7 +29,7 @@ class TabFind;
// ColPartitionGrid is a BBGrid of ColPartition.
// It collects functions that work on the grid.
class ColPartitionGrid : public BBGrid<ColPartition,
class TESS_API ColPartitionGrid : public BBGrid<ColPartition,
ColPartition_CLIST,
ColPartition_C_IT> {
public:

View File

@ -29,7 +29,7 @@ namespace tesseract {
class ColPartitionGrid;
class ColPartitionSet;
class EquationDetectBase {
class TESS_API EquationDetectBase {
public:
EquationDetectBase() = default;
virtual ~EquationDetectBase();

View File

@ -49,7 +49,7 @@ const int kColumnWidthFactor = 20;
* rule/separator lines, and tabstop boundaries, (when available), so
* as the holder of the list of TabVectors this class provides the functions.
*/
class TabFind : public AlignedBlob {
class TESS_API TabFind : public AlignedBlob {
public:
TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
TabVector_LIST* vlines, int vertical_x, int vertical_y,

View File

@ -127,7 +127,7 @@ using ColSegmentGridSearch = GridSearch<ColSegment,
// finder.InsertCleanPartitions(/* grid info */)
// finder.LocateTables(/* ColPartitions and Columns */);
// finder.Update TODO(nbeato)
class TableFinder {
class TESS_API TableFinder {
public:
// Constructor is simple initializations
TableFinder();

View File

@ -69,7 +69,7 @@ namespace tesseract {
// // etc.
// }
//
class StructuredTable {
class TESS_API StructuredTable {
public:
StructuredTable();
~StructuredTable() = default;
@ -254,7 +254,7 @@ class StructuredTable {
int max_text_height_;
};
class TableRecognizer {
class TESS_API TableRecognizer {
public:
TableRecognizer();
~TableRecognizer();

View File

@ -30,7 +30,7 @@ class ColPartition;
// horizontally (vertically for components on a vertically written textline)
// and count the number of smeared components in an image, then the resulting
// image shows the density of the textlines at each image position.
class TextlineProjection {
class TESS_API TextlineProjection {
public:
// The down-scaling factor is computed to obtain a projection resolution
// of about 100 dpi, whatever the input.

View File

@ -41,15 +41,25 @@
// Flags from commontraining.cpp
// Command line arguments for font_properties, xheights and unicharset.
TESS_COMMON_TRAINING_API
DECLARE_INT_PARAM_FLAG(debug_level);
TESS_COMMON_TRAINING_API
DECLARE_STRING_PARAM_FLAG(D);
TESS_COMMON_TRAINING_API
DECLARE_STRING_PARAM_FLAG(F);
TESS_COMMON_TRAINING_API
DECLARE_STRING_PARAM_FLAG(O);
TESS_COMMON_TRAINING_API
DECLARE_STRING_PARAM_FLAG(U);
TESS_COMMON_TRAINING_API
DECLARE_STRING_PARAM_FLAG(X);
TESS_COMMON_TRAINING_API
DECLARE_STRING_PARAM_FLAG(fonts_dir);
TESS_COMMON_TRAINING_API
DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir);
TESS_COMMON_TRAINING_API
DECLARE_STRING_PARAM_FLAG(output_trainer);
TESS_COMMON_TRAINING_API
DECLARE_STRING_PARAM_FLAG(test_ch);
namespace tesseract {
@ -63,6 +73,7 @@ namespace tesseract {
// eg. If the input *argv is
// { "program", "--foo=4", "--bar=true", "file1", "file2" } with *argc = 5, the
// output *argv is { "program", "file1", "file2" } with *argc = 3
TESS_COMMON_TRAINING_API
void ParseCommandLineFlags(const char* usage, int* argc,
char*** argv, const bool remove_flags);

View File

@ -12,9 +12,8 @@
// limitations under the License.
#define _USE_MATH_DEFINES // for M_PI
#include "commontraining.h"
#include <algorithm>
#include <cmath> // for M_PI
#ifdef DISABLED_LEGACY_ENGINE
@ -32,6 +31,8 @@ STRING_PARAM_FLAG(U, "unicharset", "File to load unicharset from");
STRING_PARAM_FLAG(O, "", "File to write unicharset to");
STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to");
STRING_PARAM_FLAG(test_ch, "", "UTF8 test character string");
STRING_PARAM_FLAG(fonts_dir, "", "");
STRING_PARAM_FLAG(fontconfig_tmpdir, "", "");
/**
* This routine parses the command line arguments that were
@ -96,6 +97,8 @@ STRING_PARAM_FLAG(U, "unicharset", "File to load unicharset from");
STRING_PARAM_FLAG(O, "", "File to write unicharset to");
STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to");
STRING_PARAM_FLAG(test_ch, "", "UTF8 test character string");
STRING_PARAM_FLAG(fonts_dir, "", "");
STRING_PARAM_FLAG(fontconfig_tmpdir, "", "");
static DOUBLE_PARAM_FLAG(clusterconfig_min_samples_fraction, Config.MinSamples,
"Min number of samples per proto as % of total");
static DOUBLE_PARAM_FLAG(clusterconfig_max_illegal, Config.MaxIllegal,

View File

@ -18,17 +18,14 @@
#include "config_auto.h"
#endif
#include "commandlineflags.h"
#include "tprintf.h"
#include <tesseract/baseapi.h>
#ifdef DISABLED_LEGACY_ENGINE
#include "tprintf.h"
#include "commandlineflags.h"
TESS_COMMON_TRAINING_API
void ParseArguments(int* argc, char*** argv);
namespace tesseract {
// Check whether the shared tesseract library is the right one.
@ -48,11 +45,9 @@ static inline void CheckSharedLibraryVersion()
} // namespace tesseract
#else
#ifndef DISABLED_LEGACY_ENGINE
#include "cluster.h"
#include "commandlineflags.h"
#include "featdefs.h"
#include "intproto.h"
#include "oldlist.h"
@ -67,9 +62,11 @@ class ShapeTable;
// Globals ///////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
TESS_COMMON_TRAINING_API
extern tesseract::FEATURE_DEFS_STRUCT feature_defs;
// Must be defined in the file that "implements" commonTraining facilities.
TESS_COMMON_TRAINING_API
extern tesseract::CLUSTERCONFIG Config;
//////////////////////////////////////////////////////////////////////////////
@ -96,28 +93,13 @@ using MERGE_CLASS = MERGE_CLASS_NODE*;
//////////////////////////////////////////////////////////////////////////////
// Functions /////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
void ParseArguments(int* argc, char*** argv);
namespace tesseract {
// Check whether the shared tesseract library is the right one.
// This function must be inline because otherwise it would be part of
// the shared library, so it could not compare the versions.
static inline void CheckSharedLibraryVersion()
{
#ifdef HAVE_CONFIG_H
if (!!strcmp(TESSERACT_VERSION_STR, TessBaseAPI::Version())) {
tprintf("ERROR: shared library version mismatch (was %s, expected %s\n"
"Did you use a wrong shared tesseract library?\n",
TessBaseAPI::Version(), TESSERACT_VERSION_STR);
exit(1);
}
#endif
}
// Helper loads shape table from the given file.
ShapeTable* LoadShapeTable(const STRING& file_prefix);
// Helper to write the shape_table.
TESS_COMMON_TRAINING_API
void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table);
// Creates a MasterTraininer and loads the training data into it:
@ -133,21 +115,26 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table);
// Computes canonical and cloud features.
// If shape_table is not nullptr, but failed to load, make a fake flat one,
// as shape clustering was not run.
TESS_COMMON_TRAINING_API
MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
bool replication,
ShapeTable** shape_table,
STRING* file_prefix);
} // namespace tesseract.
TESS_COMMON_TRAINING_API
const char *GetNextFilename(int argc, const char* const * argv);
LABELEDLIST FindList(
tesseract::LIST List,
char *Label);
TESS_COMMON_TRAINING_API
LABELEDLIST NewLabeledList(
const char *Label);
TESS_COMMON_TRAINING_API
void ReadTrainingSamples(const tesseract::FEATURE_DEFS_STRUCT& feature_defs,
const char *feature_name, int max_samples,
tesseract::UNICHARSET* unicharset,
@ -159,59 +146,69 @@ void WriteTrainingSamples(
tesseract::LIST CharList,
const char *program_feature_type);
TESS_COMMON_TRAINING_API
void FreeTrainingSamples(
tesseract::LIST CharList);
TESS_COMMON_TRAINING_API
void FreeLabeledList(
LABELEDLIST LabeledList);
TESS_COMMON_TRAINING_API
void FreeLabeledClassList(
tesseract::LIST ClassListList);
TESS_COMMON_TRAINING_API
tesseract::CLUSTERER *SetUpForClustering(
const tesseract::FEATURE_DEFS_STRUCT &FeatureDefs,
LABELEDLIST CharSample,
const char *program_feature_type);
TESS_COMMON_TRAINING_API
tesseract::LIST RemoveInsignificantProtos(
tesseract::LIST ProtoList,
bool KeepSigProtos,
bool KeepInsigProtos,
int N);
TESS_COMMON_TRAINING_API
void CleanUpUnusedData(
tesseract::LIST ProtoList);
TESS_COMMON_TRAINING_API
void MergeInsignificantProtos(
tesseract::LIST ProtoList,
const char *label,
tesseract::CLUSTERER *Clusterer,
tesseract::CLUSTERCONFIG *Config);
TESS_COMMON_TRAINING_API
MERGE_CLASS FindClass(
tesseract::LIST List,
const char *Label);
TESS_COMMON_TRAINING_API
MERGE_CLASS NewLabeledClass(
const char *Label);
void FreeTrainingSamples(
tesseract::LIST CharList);
TESS_COMMON_TRAINING_API
tesseract::CLASS_STRUCT* SetUpForFloat2Int(const tesseract::UNICHARSET& unicharset,
tesseract::LIST LabeledClassList);
void Normalize(
float *Values);
TESS_COMMON_TRAINING_API
void FreeNormProtoList(
tesseract::LIST CharList);
TESS_COMMON_TRAINING_API
void AddToNormProtosList(
tesseract::LIST* NormProtoList,
tesseract::LIST ProtoList,
char *CharName);
TESS_COMMON_TRAINING_API
int NumberOfProtos(
tesseract::LIST ProtoList,
bool CountSigProtos,

View File

@ -27,7 +27,7 @@
namespace tesseract {
// Class to encapsulate CTC and simple target generation.
class CTC {
class TESS_COMMON_TRAINING_API CTC {
public:
// Normalizes the probabilities such that no target has a prob below min_prob,
// and, provided that the initial total is at least min_total_prob, then all

View File

@ -33,7 +33,6 @@
#include "host.h" // includes windows.h for BOOL, ...
#include "tprintf.h"
namespace tesseract {
///////////////////////////////////////////////////////////////////////////////

View File

@ -40,7 +40,7 @@ inline bool LoadFileLinesToStrings(const char* filename,
}
// A class to manipulate FILE*s.
class File {
class TESS_UNICHARSET_TRAINING_API File {
public:
// Try to open the file 'filename' in mode 'mode'.
// Stop the program if it cannot open it.
@ -66,7 +66,7 @@ class File {
};
// A class to manipulate Files for reading.
class InputBuffer {
class TESS_UNICHARSET_TRAINING_API InputBuffer {
public:
explicit InputBuffer(FILE* stream);
// 'size' is ignored.
@ -88,7 +88,7 @@ class InputBuffer {
};
// A class to manipulate Files for writing.
class OutputBuffer {
class TESS_UNICHARSET_TRAINING_API OutputBuffer {
public:
explicit OutputBuffer(FILE* stream);
// 'size' is ignored.

View File

@ -45,7 +45,7 @@ static const int kNumOffsetMaps = 2;
// Although the transformations are reversible, the inverses are lossy and do
// not return the exact input INT_FEATURE_STRUCT, due to the many->one nature
// of both transformations.
class IntFeatureMap {
class TESS_COMMON_TRAINING_API IntFeatureMap {
public:
IntFeatureMap();
~IntFeatureMap();

View File

@ -12,20 +12,24 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lang_model_helpers.h"
#if defined(_WIN32)
#include <direct.h>
#endif
#include <sys/stat.h>
#include <sys/types.h>
#include <cstdlib>
#include "dawg.h"
#include "fileio.h"
#include "tessdatamanager.h"
#include "trie.h"
#include "unicharcompress.h"
#include <cstdlib>
#include <sys/stat.h>
#include <sys/types.h>
#if defined(_WIN32)
#include <direct.h>
#endif
namespace tesseract {
// Helper makes a filename (<output_dir>/<lang>/<lang><suffix>) and writes data

View File

@ -28,11 +28,13 @@ namespace tesseract {
// Default writer will overwrite any existing file, but a supplied writer
// can do its own thing. If lang is empty, returns true but does nothing.
// NOTE that suffix should contain any required . for the filename.
TESS_UNICHARSET_TRAINING_API
bool WriteFile(const std::string& output_dir, const std::string& lang,
const std::string& suffix, const std::vector<char>& data,
FileWriter writer);
// Helper reads a file with optional reader and returns a STRING.
// On failure emits a warning message and returns and empty STRING.
TESS_UNICHARSET_TRAINING_API
STRING ReadFile(const std::string& filename, FileReader reader);
// Helper writes the unicharset to file and to the traineddata.
@ -70,6 +72,7 @@ bool WriteRecoder(const UNICHARSET& unicharset, bool pass_through,
// puncs must be non-empty.
// lang_is_rtl indicates that the language is generally written from right
// to left (eg Arabic/Hebrew).
TESS_UNICHARSET_TRAINING_API
int CombineLangModel(const UNICHARSET& unicharset, const std::string& script_dir,
const std::string& version_str, const std::string& output_dir,
const std::string& lang, bool pass_through_recoder,

View File

@ -35,7 +35,7 @@ class PangoFontInfo; // defined in pango_font_info.h
// Map to substitute strings for ligatures.
using LigHash = std::unordered_map<std::string, std::string>;
class LigatureTable {
class TESS_PANGO_TRAINING_API LigatureTable {
public:
// Get a static instance of this class.
static LigatureTable* Get();

View File

@ -25,7 +25,7 @@
namespace tesseract {
class LSTMTester {
class TESS_UNICHARSET_TRAINING_API LSTMTester {
public:
LSTMTester(int64_t max_memory);

View File

@ -76,7 +76,7 @@ using TestCallback = std::function<STRING(int, const double*, const TessdataMana
// ideal target outputs from the transcription. A box file is used if it is
// available, otherwise estimates of the char widths from the unicharset are
// used to guide a DP search for the best fit to the transcription.
class LSTMTrainer : public LSTMRecognizer {
class TESS_UNICHARSET_TRAINING_API LSTMTrainer : public LSTMRecognizer {
public:
LSTMTrainer();
LSTMTrainer(const char* model_base, const char* checkpoint_name,

View File

@ -66,7 +66,7 @@ struct ShapeDist {
// Initially supports shape clustering and mftrainining.
// Other important features of the MasterTrainer are conditioning the data
// by outlier elimination, replication with perturbation, and serialization.
class MasterTrainer {
class TESS_COMMON_TRAINING_API MasterTrainer {
public:
MasterTrainer(NormalizationMode norm_mode, bool shape_analysis,
bool replicate_samples, int debug_level);

View File

@ -31,7 +31,7 @@ class Network;
class Parallel;
class TRand;
class NetworkBuilder {
class TESS_COMMON_TRAINING_API NetworkBuilder {
public:
explicit NetworkBuilder(int num_softmax_outputs)
: num_softmax_outputs_(num_softmax_outputs) {}

View File

@ -54,13 +54,16 @@ enum class GraphemeNorm {
// Normalizes a UTF8 string according to the given modes. Returns true on
// success. If false is returned, some failure or invalidity was present, and
// the result string is produced on a "best effort" basis.
TESS_UNICHARSET_TRAINING_API
bool NormalizeUTF8String(UnicodeNormMode u_mode, OCRNorm ocr_normalize,
GraphemeNorm grapheme_normalize, const char* str8,
std::string* normalized);
// Normalizes a UTF8 string according to the given modes and splits into
// graphemes according to g_mode. Returns true on success. If false is returned,
// some failure or invalidity was present, and the result string is produced on
// a "best effort" basis.
TESS_UNICHARSET_TRAINING_API
bool NormalizeCleanAndSegmentUTF8(UnicodeNormMode u_mode, OCRNorm ocr_normalize,
GraphemeNormMode g_mode, bool report_errors,
const char* str8,
@ -76,26 +79,35 @@ bool IsOCREquivalent(char32 ch1, char32 ch2);
bool IsValidCodepoint(const char32 ch);
// Returns true a code point has the White_Space Unicode property.
TESS_UNICHARSET_TRAINING_API
bool IsWhitespace(const char32 ch);
// Returns true if every char in the given (null-terminated) string has the
// White_Space Unicode property.
TESS_UNICHARSET_TRAINING_API
bool IsUTF8Whitespace(const char* text);
// Returns the length of bytes of the prefix of 'text' that have the White_Space
// unicode property.
TESS_UNICHARSET_TRAINING_API
unsigned int SpanUTF8Whitespace(const char* text);
// Returns the length of bytes of the prefix of 'text' that DO NOT have the
// White_Space unicode property.
TESS_UNICHARSET_TRAINING_API
unsigned int SpanUTF8NotWhitespace(const char* text);
// Returns true if the char is interchange valid i.e. no C0 or C1 control codes
// (other than CR LF HT FF) and no non-characters.
TESS_UNICHARSET_TRAINING_API
bool IsInterchangeValid(const char32 ch);
// Same as above but restricted to 7-bit ASCII.
TESS_UNICHARSET_TRAINING_API
bool IsInterchangeValid7BitAscii(const char32 ch);
// Convert a full-width UTF-8 string to half-width.
TESS_UNICHARSET_TRAINING_API
char32 FullwidthToHalfwidth(const char32 ch);
} // namespace tesseract

Some files were not shown because too many files have changed in this diff Show More