From dc8bd4682b90118af08fc61b82c4433765b20bc5 Mon Sep 17 00:00:00 2001 From: "zdenop@gmail.com" Date: Mon, 24 Sep 2012 05:14:11 +0000 Subject: [PATCH] C-API (fix issue 362) git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@759 d0cd1f9f-072b-0410-8dd7-cf729c803f20 --- README | 1 + api/Makefile.am | 4 +- api/capi.cpp | 592 ++++++++++++++++++++++++ api/capi.h | 295 ++++++++++++ ccutil/strngs.h | 5 + contrib/tesseract-c_api-demo.py | 71 +++ vs2008/libtesseract/libtesseract.vcproj | 8 + 7 files changed, 974 insertions(+), 2 deletions(-) create mode 100644 api/capi.cpp create mode 100644 api/capi.h create mode 100644 contrib/tesseract-c_api-demo.py diff --git a/README b/README index c234e2892..51c19f85a 100644 --- a/README +++ b/README @@ -95,6 +95,7 @@ find its data directory. You must either: ./configure make make install +sudo ldconfig to move the data files to the standard place, or: diff --git a/api/Makefile.am b/api/Makefile.am index 2e5f49adc..d1c338803 100644 --- a/api/Makefile.am +++ b/api/Makefile.am @@ -9,7 +9,7 @@ if VISIBILITY AM_CPPFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden endif -include_HEADERS = apitypes.h baseapi.h +include_HEADERS = apitypes.h baseapi.h capi.h noinst_HEADERS = tesseractmain.h lib_LTLIBRARIES = @@ -36,7 +36,7 @@ libtesseract_api_la_CPPFLAGS = $(AM_CPPFLAGS) if VISIBILITY libtesseract_api_la_CPPFLAGS += -DTESS_EXPORTS endif -libtesseract_api_la_SOURCES = baseapi.cpp +libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp lib_LTLIBRARIES += libtesseract.la libtesseract_la_LDFLAGS = diff --git a/api/capi.cpp b/api/capi.cpp new file mode 100644 index 000000000..25ac790a7 --- /dev/null +++ b/api/capi.cpp @@ -0,0 +1,592 @@ +#ifndef TESS_CAPI_INCLUDE_BASEAPI +# define TESS_CAPI_INCLUDE_BASEAPI +#endif +#include "capi.h" + +TESS_API const char* TESS_CALL TessVersion() +{ + return TessBaseAPI::Version(); +} + +TESS_API void TESS_CALL TessDeleteText(char* text) +{ + delete [] text; +} + +TESS_API void TESS_CALL TessDeleteTextArray(char** arr) +{ + for (char** pos = arr; *pos != NULL; ++pos) + delete [] *pos; + delete [] arr; +} + +TESS_API void TESS_CALL TessDeleteIntArray(int* arr) +{ + delete [] arr; +} + +TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list) +{ + TessBaseAPI::DeleteBlockList(block_list); +} + +TESS_API TessBaseAPI* TESS_CALL TessBaseAPICreate() +{ + return new TessBaseAPI; +} + +TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle) +{ + delete handle; +} + +TESS_API void TESS_CALL TessBaseAPISetInputName(TessBaseAPI* handle, const char* name) +{ + handle->SetInputName(name); +} + +TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name) +{ + handle->SetOutputName(name); +} + +TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, const char* value) +{ + return handle->SetVariable(name, value) ? TRUE : FALSE; +} + +TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name, const char* value) +{ + return handle->SetVariable(name, value) ? TRUE : FALSE; +} + +TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable(const TessBaseAPI* handle, const char* name, int* value) +{ + return handle->GetIntVariable(name, value) ? TRUE : FALSE; +} + +TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable(const TessBaseAPI* handle, const char* name, BOOL* value) +{ + bool boolValue; + if (handle->GetBoolVariable(name, &boolValue)) + { + *value = boolValue ? TRUE : FALSE; + return TRUE; + } + else + { + return FALSE; + } +} + +TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value) +{ + return handle->GetDoubleVariable(name, value) ? TRUE : FALSE; +} + +TESS_API const char* TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name) +{ + return handle->GetStringVariable(name); +} + +TESS_API void TESS_CALL TessBaseAPIPrintVariables(const TessBaseAPI* handle, FILE* fp) +{ + handle->PrintVariables(fp); +} + +TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename) +{ + FILE* fp = fopen(filename, "w"); + if (fp != NULL) + { + handle->PrintVariables(fp); + fclose(fp); + return TRUE; + } + return FALSE; +} + +TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, const char* name, STRING* val) +{ + return handle->GetVariableAsString(name, val) ? TRUE : FALSE; +} + +TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem, + char** configs, int configs_size) +{ + return handle->Init(datapath, language, oem, configs, configs_size, NULL, NULL, false); +} + +TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem) +{ + return handle->Init(datapath, language, oem); +} + +TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language) +{ + return handle->Init(datapath, language); +} + +TESS_API const char* TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle) +{ + return handle->GetInitLanguagesAsString(); +} + +TESS_API char** TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle) +{ + GenericVector languages; + handle->GetLoadedLanguagesAsVector(&languages); + char** arr = new char*[languages.size() + 1]; + for (int index = 0; index < languages.size(); ++index) + arr[index] = languages[index].strdup(); + arr[languages.size()] = NULL; + return arr; +} + +TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language) +{ + return handle->InitLangMod(datapath, language); +} + +TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle) +{ + handle->InitForAnalysePage(); +} + +TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, const char* filename) +{ + handle->ReadConfigFile(filename); +} + +TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, const char* filename) +{ + handle->ReadDebugConfigFile(filename); +} + +TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode) +{ + handle->SetPageSegMode(mode); +} + +TESS_API TessPageSegMode TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle) +{ + return handle->GetPageSegMode(); +} + +TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height) +{ + return handle->TesseractRect(imagedata, bytes_per_pixel, bytes_per_line, left, top, width, height); +} + +TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle) +{ + handle->ClearAdaptiveClassifier(); +} + +TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height, + int bytes_per_pixel, int bytes_per_line) +{ + handle->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line); +} + +TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, const Pix* pix) +{ + return handle->SetImage(pix); +} + +TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi) +{ + handle->SetSourceResolution(ppi); +} + +TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top, int width, int height) +{ + handle->SetRectangle(left, top, width, height); +} + +TESS_API void TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImageThresholder* thresholder) +{ + handle->SetThresholder(thresholder); +} + +TESS_API Pix* TESS_CALL TessBaseAPIGetThresholdedImage(TessBaseAPI* handle) +{ + return handle->GetThresholdedImage(); +} + +TESS_API Boxa* TESS_CALL TessBaseAPIGetRegions(TessBaseAPI* handle, Pixa** pixa) +{ + return handle->GetRegions(pixa); +} + +TESS_API Boxa* TESS_CALL TessBaseAPIGetTextlines(TessBaseAPI* handle, Pixa** pixa, int** blockids) +{ + return handle->GetTextlines(pixa, blockids); +} + +TESS_API BOXA* TESS_CALL TessBaseAPIGetStrips(TessBaseAPI* handle, PIXA** pixa, int** blockids) +{ + return handle->GetStrips(pixa, blockids); +} + +TESS_API Boxa* TESS_CALL TessBaseAPIGetWords(TessBaseAPI* handle, Pixa** pixa) +{ + return handle->GetWords(pixa); +} + +TESS_API Boxa* TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, Pixa** cc) +{ + return handle->GetConnectedComponents(cc); +} + +TESS_API Boxa* TESS_CALL TessBaseAPIGetComponentImages(TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only, Pixa** pixa, int** blockids) +{ + return handle->GetComponentImages(level, text_only != FALSE, pixa, blockids); +} + +TESS_API int TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle) +{ + return handle->GetThresholdedImageScaleFactor(); +} + +TESS_API void TESS_CALL TessBaseAPIDumpPGM(TessBaseAPI* handle, const char* filename) +{ + handle->DumpPGM(filename); +} + +TESS_API TessPageIterator* TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle) +{ + return handle->AnalyseLayout(); +} + +TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor) +{ + return handle->Recognize(monitor); +} + +TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ETEXT_DESC* monitor) +{ + return handle->RecognizeForChopTest(monitor); +} + +TESS_API char* TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename, const char* retry_config, + int timeout_millisec) +{ + STRING text_out; + if (handle->ProcessPages(filename, retry_config, timeout_millisec, &text_out)) + return text_out.strdup(); + else + return NULL; +} + +TESS_API char* TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, Pix* pix, int page_index, const char* filename, + const char* retry_config, int timeout_millisec) +{ + STRING text_out; + if (handle->ProcessPage(pix, page_index, filename, retry_config, timeout_millisec, &text_out)) + return text_out.strdup(); + else + return NULL; +} + +TESS_API TessResultIterator* TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle) +{ + return handle->GetIterator(); +} + +TESS_API TessMutableIterator* TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle) +{ + return handle->GetMutableIterator(); +} + +TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle) +{ + return handle->GetUTF8Text(); +} + +TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number) +{ + return handle->GetHOCRText(page_number); +} + +TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number) +{ + return handle->GetBoxText(page_number); +} + +TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle) +{ + return handle->GetUNLVText(); +} + +TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle) +{ + return handle->MeanTextConf(); +} + +TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle) +{ + return handle->AllWordConfidences(); +} + +TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, TessPageSegMode mode, const char* wordstr) +{ + return handle->AdaptToWordStr(mode, wordstr) ? TRUE : FALSE; +} + +TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle) +{ + handle->Clear(); +} + +TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle) +{ + handle->End(); +} + +TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, const char *word) +{ + return handle->IsValidWord(word); +} + +TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset, float* out_slope) +{ + return handle->GetTextDirection(out_offset, out_slope) ? TRUE : FALSE; +} + +TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f) +{ + handle->SetDictFunc(f); +} + +TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f) +{ + handle->SetProbabilityInContextFunc(f); +} + +TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results) +{ + return handle->DetectOS(results) ? TRUE : FALSE; +} + +TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, const DENORM* denorm, INT_FEATURE_ARRAY int_features, + int* num_features, int* FeatureOutlineIndex) +{ + handle->GetFeaturesForBlob(blob, *denorm, int_features, num_features, FeatureOutlineIndex); +} + +TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, int bottom) +{ + return TessBaseAPI::FindRowForBox(blocks, left, top, right, bottom); +} + +TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, const DENORM* denorm, int num_max_matches, + int* unichar_ids, float* ratings, int* num_matches_returned) +{ + handle->RunAdaptiveClassifier(blob, *denorm, num_max_matches, unichar_ids, ratings, num_matches_returned); +} + +TESS_API const char* TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id) +{ + return handle->GetUnichar(unichar_id); +} + +TESS_API const TessDawg* TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i) +{ + return handle->GetDawg(i); +} + +TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle) +{ + return handle->NumDawgs(); +} + +TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender) +{ + return TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); +} + +TESS_API TBLOB* TESS_CALL TessMakeTBLOB(Pix *pix) +{ + return TessBaseAPI::MakeTBLOB(pix); +} + +TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB *tblob, ROW *row, BOOL numeric_mode, DENORM *denorm) +{ + TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode != FALSE, denorm); +} + +TESS_API TessOcrEngineMode TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle) +{ + return handle->oem(); +} + +TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback *cb) +{ + handle->InitTruthCallback(cb); +} + +TESS_API TessCubeRecoContext* TESS_CALL TessBaseAPIGetCubeRecoContext(const TessBaseAPI* handle) +{ + return handle->GetCubeRecoContext(); +} + +TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin) +{ + handle->set_min_orientation_margin(margin); +} + +TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, int** block_orientation, bool** vertical_writing) +{ + handle->GetBlockTextOrientations(block_orientation, vertical_writing); +} + +TESS_API BLOCK_LIST* TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle) +{ + return handle->FindLinesCreateBlockList(); +} + +TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle) +{ + delete handle; +} + +TESS_API TessPageIterator* TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle) +{ + return new TessPageIterator(*handle); +} + +TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle) +{ + handle->Begin(); +} + +TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, TessPageIteratorLevel level) +{ + return handle->Next(level) ? TRUE : FALSE; +} + +TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle, TessPageIteratorLevel level) +{ + return handle->IsAtBeginningOf(level) ? TRUE : FALSE; +} + +TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level, + TessPageIteratorLevel element) +{ + return handle->IsAtFinalElement(level, element) ? TRUE : FALSE; +} + +TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level, + int* left, int* top, int* right, int* bottom) +{ + return handle->BoundingBox(level, left, top, right, bottom) ? TRUE : FALSE; +} + +TESS_API TessPolyBlockType TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle) +{ + return handle->BlockType(); +} + +TESS_API Pix* TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level) +{ + return handle->GetBinaryImage(level); +} + +TESS_API Pix* TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding, + int* left, int* top) +{ + return handle->GetImage(level, padding, left, top); +} + +TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level, + int* x1, int* y1, int* x2, int* y2) +{ + return handle->Baseline(level, x1, y1, x2, y2) ? TRUE : FALSE; +} + +TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation *orientation, + TessWritingDirection *writing_direction, TessTextlineOrder *textline_order, + float *deskew_angle) +{ + handle->Orientation(orientation, writing_direction, textline_order, deskew_angle); +} + +TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle) +{ + delete handle; +} + +TESS_API TessResultIterator* TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle) +{ + return static_cast(new TessPageIterator(*handle)); +} + +TESS_API TessPageIterator* TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle) +{ + return handle; +} + +TESS_API const TessPageIterator* TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle) +{ + return handle; +} + +TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level) +{ + return handle->GetUTF8Text(level); +} + +TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level) +{ + return handle->Confidence(level); +} + +TESS_API const char* TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, + BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, + BOOL* is_smallcaps, int* pointsize, int* font_id) +{ + bool bool_is_bold, bool_is_italic, bool_is_underlined, bool_is_monospace, bool_is_serif, bool_is_smallcaps; + const char* ret = handle->WordFontAttributes(&bool_is_bold, &bool_is_italic, &bool_is_underlined, &bool_is_monospace, &bool_is_serif, + &bool_is_smallcaps, pointsize, font_id); + if (is_bold) + *is_bold = bool_is_bold ? TRUE : FALSE; + if (is_italic) + *is_italic = bool_is_italic ? TRUE : FALSE; + if (is_underlined) + *is_underlined = bool_is_underlined ? TRUE : FALSE; + if (is_monospace) + *is_monospace = bool_is_monospace ? TRUE : FALSE; + if (is_serif) + *is_serif = bool_is_serif ? TRUE : FALSE; + if (is_smallcaps) + *is_smallcaps = bool_is_smallcaps ? TRUE : FALSE; + return ret; +} + +TESS_API BOOL TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle) +{ + return handle->WordIsFromDictionary() ? TRUE : FALSE; +} + +TESS_API BOOL TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle) +{ + return handle->WordIsNumeric() ? TRUE : FALSE; +} + +TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle) +{ + return handle->SymbolIsSuperscript() ? TRUE : FALSE; +} + +TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle) +{ + return handle->SymbolIsSubscript() ? TRUE : FALSE; +} + +TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle) +{ + return handle->SymbolIsDropcap() ? TRUE : FALSE; +} diff --git a/api/capi.h b/api/capi.h new file mode 100644 index 000000000..b491a49a8 --- /dev/null +++ b/api/capi.h @@ -0,0 +1,295 @@ +#ifndef TESSERACT_API_CAPI_H__ +#define TESSERACT_API_CAPI_H__ + +#ifdef TESS_CAPI_INCLUDE_BASEAPI +# include "baseapi.h" +# include "pageiterator.h" +# include "resultiterator.h" +#else +# include "platform.h" +# include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef TESS_CALL +# if defined(WIN32) +# define TESS_CALL __cdecl +# else +# define TESS_CALL +# endif +#endif + +#ifndef BOOL +# define BOOL int +# define TRUE 1 +# define FALSE 0 +#endif + +#ifdef TESS_CAPI_INCLUDE_BASEAPI +typedef tesseract::TessBaseAPI TessBaseAPI; +typedef tesseract::PageIterator TessPageIterator; +typedef tesseract::ResultIterator TessResultIterator; +typedef tesseract::MutableIterator TessMutableIterator; +typedef tesseract::OcrEngineMode TessOcrEngineMode; +typedef tesseract::PageSegMode TessPageSegMode; +typedef tesseract::ImageThresholder TessImageThresholder; +typedef tesseract::PageIteratorLevel TessPageIteratorLevel; +typedef tesseract::DictFunc TessDictFunc; +typedef tesseract::ProbabilityInContextFunc TessProbabilityInContextFunc; +typedef tesseract::FillLatticeFunc TessFillLatticeFunc; +typedef tesseract::Dawg TessDawg; +typedef tesseract::TruthCallback TessTruthCallback; +typedef tesseract::CubeRecoContext TessCubeRecoContext; +typedef tesseract::Orientation TessOrientation; +typedef tesseract::WritingDirection TessWritingDirection; +typedef tesseract::TextlineOrder TessTextlineOrder; +typedef PolyBlockType TessPolyBlockType; +typedef Pix PIX; +typedef Boxa BOXA; +typedef Pixa PIXA; +#else +typedef struct TessBaseAPI TessBaseAPI; +typedef struct TessPageIterator TessPageIterator; +typedef struct TessResultIterator TessResultIterator; +typedef struct TessMutableIterator TessMutableIterator; +typedef enum TessOcrEngineMode { OEM_TESSERACT_ONLY, OEM_CUBE_ONLY, OEM_TESSERACT_CUBE_COMBINED, OEM_DEFAULT } TessOcrEngineMode; +typedef enum TessPageSegMode { PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO, PSM_SINGLE_COLUMN, PSM_SINGLE_BLOCK_VERT_TEXT, + PSM_SINGLE_BLOCK, PSM_SINGLE_LINE, PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_COUNT } TessPageSegMode; +typedef enum TessPageIteratorLevel { RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL} TessPageIteratorLevel; +typedef enum TessPolyBlockType { PT_UNKNOWN, PT_FLOWING_TEXT, PT_HEADING_TEXT, PT_PULLOUT_TEXT, PT_TABLE, PT_VERTICAL_TEXT, + PT_CAPTION_TEXT, PT_FLOWING_IMAGE, PT_HEADING_IMAGE, PT_PULLOUT_IMAGE, PT_HORZ_LINE, PT_VERT_LINE, + PT_NOISE, PT_COUNT } TessPolyBlockType; +typedef enum TessOrientation { ORIENTATION_PAGE_UP, ORIENTATION_PAGE_RIGHT, ORIENTATION_PAGE_DOWN, ORIENTATION_PAGE_LEFT } TessOrientation; +typedef enum TessWritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT, WRITING_DIRECTION_RIGHT_TO_LEFT, WRITING_DIRECTION_TOP_TO_BOTTOM } TessWritingDirection; +typedef enum TessTextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT, TEXTLINE_ORDER_RIGHT_TO_LEFT, TEXTLINE_ORDER_TOP_TO_BOTTOM } TessTextlineOrder; +typedef struct ETEXT_DESC ETEXT_DESC; +typedef struct Pix PIX; +typedef struct Boxa BOXA; +typedef struct Pixa PIXA; +#endif + +/* General free functions */ + +TESS_API const char* + TESS_CALL TessVersion(); +TESS_API void TESS_CALL TessDeleteText(char* text); +TESS_API void TESS_CALL TessDeleteTextArray(char** arr); +TESS_API void TESS_CALL TessDeleteIntArray(int* arr); +#ifdef TESS_CAPI_INCLUDE_BASEAPI +TESS_API void TESS_CALL TessDeleteBlockList(BLOCK_LIST* block_list); +#endif + +/* Base API */ + +TESS_API TessBaseAPI* + TESS_CALL TessBaseAPICreate(); +TESS_API void TESS_CALL TessBaseAPIDelete(TessBaseAPI* handle); + +TESS_API void TESS_CALL TessBaseAPISetInputName( TessBaseAPI* handle, const char* name); +TESS_API void TESS_CALL TessBaseAPISetOutputName(TessBaseAPI* handle, const char* name); + +TESS_API BOOL TESS_CALL TessBaseAPISetVariable(TessBaseAPI* handle, const char* name, const char* value); +TESS_API BOOL TESS_CALL TessBaseAPISetDebugVariable(TessBaseAPI* handle, const char* name, const char* value); + +TESS_API BOOL TESS_CALL TessBaseAPIGetIntVariable( const TessBaseAPI* handle, const char* name, int* value); +TESS_API BOOL TESS_CALL TessBaseAPIGetBoolVariable( const TessBaseAPI* handle, const char* name, BOOL* value); +TESS_API BOOL TESS_CALL TessBaseAPIGetDoubleVariable(const TessBaseAPI* handle, const char* name, double* value); +TESS_API const char* + TESS_CALL TessBaseAPIGetStringVariable(const TessBaseAPI* handle, const char* name); + +TESS_API void TESS_CALL TessBaseAPIPrintVariables( const TessBaseAPI* handle, FILE* fp); +TESS_API BOOL TESS_CALL TessBaseAPIPrintVariablesToFile(const TessBaseAPI* handle, const char* filename); +#ifdef TESS_CAPI_INCLUDE_BASEAPI +TESS_API BOOL TESS_CALL TessBaseAPIGetVariableAsString(TessBaseAPI* handle, const char* name, STRING* val); +#endif + +#ifdef TESS_CAPI_INCLUDE_BASEAPI +TESS_API int TESS_CALL TessBaseAPIInit(TessBaseAPI* handle, const char* datapath, const char* language, + TessOcrEngineMode mode, char** configs, int configs_size, + const STRING* vars_vec, size_t vars_vec_size, + const STRING* vars_values, size_t vars_values_size, BOOL set_only_init_params); +#endif +TESS_API int TESS_CALL TessBaseAPIInit1(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem, + char** configs, int configs_size); +TESS_API int TESS_CALL TessBaseAPIInit2(TessBaseAPI* handle, const char* datapath, const char* language, TessOcrEngineMode oem); +TESS_API int TESS_CALL TessBaseAPIInit3(TessBaseAPI* handle, const char* datapath, const char* language); + +TESS_API const char* + TESS_CALL TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI* handle); +TESS_API char** + TESS_CALL TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI* handle); + +TESS_API int TESS_CALL TessBaseAPIInitLangMod(TessBaseAPI* handle, const char* datapath, const char* language); +TESS_API void TESS_CALL TessBaseAPIInitForAnalysePage(TessBaseAPI* handle); + +TESS_API void TESS_CALL TessBaseAPIReadConfigFile(TessBaseAPI* handle, const char* filename); +TESS_API void TESS_CALL TessBaseAPIReadDebugConfigFile(TessBaseAPI* handle, const char* filename); + +TESS_API void TESS_CALL TessBaseAPISetPageSegMode(TessBaseAPI* handle, TessPageSegMode mode); +TESS_API TessPageSegMode + TESS_CALL TessBaseAPIGetPageSegMode(const TessBaseAPI* handle); + +TESS_API char* TESS_CALL TessBaseAPIRect(TessBaseAPI* handle, const unsigned char* imagedata, + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height); + +TESS_API void TESS_CALL TessBaseAPIClearAdaptiveClassifier(TessBaseAPI* handle); + +TESS_API void TESS_CALL TessBaseAPISetImage(TessBaseAPI* handle, const unsigned char* imagedata, int width, int height, + int bytes_per_pixel, int bytes_per_line); +TESS_API void TESS_CALL TessBaseAPISetImage2(TessBaseAPI* handle, const PIX* pix); + +TESS_API void TESS_CALL TessBaseAPISetSourceResolution(TessBaseAPI* handle, int ppi); + +TESS_API void TESS_CALL TessBaseAPISetRectangle(TessBaseAPI* handle, int left, int top, int width, int height); + +#ifdef TESS_CAPI_INCLUDE_BASEAPI +TESS_API void TESS_CALL TessBaseAPISetThresholder(TessBaseAPI* handle, TessImageThresholder* thresholder); +#endif + +TESS_API PIX* TESS_CALL TessBaseAPIGetThresholdedImage( TessBaseAPI* handle); +TESS_API BOXA* TESS_CALL TessBaseAPIGetRegions( TessBaseAPI* handle, PIXA** pixa); +TESS_API BOXA* TESS_CALL TessBaseAPIGetTextlines( TessBaseAPI* handle, PIXA** pixa, int** blockids); +TESS_API BOXA* TESS_CALL TessBaseAPIGetStrips( TessBaseAPI* handle, PIXA** pixa, int** blockids); +TESS_API BOXA* TESS_CALL TessBaseAPIGetWords( TessBaseAPI* handle, PIXA** pixa); +TESS_API BOXA* TESS_CALL TessBaseAPIGetConnectedComponents(TessBaseAPI* handle, PIXA** cc); +TESS_API BOXA* TESS_CALL TessBaseAPIGetComponentImages( TessBaseAPI* handle, TessPageIteratorLevel level, BOOL text_only, PIXA** pixa, int** blockids); + +TESS_API int TESS_CALL TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI* handle); + +TESS_API void TESS_CALL TessBaseAPIDumpPGM(TessBaseAPI* handle, const char* filename); + +TESS_API TessPageIterator* + TESS_CALL TessBaseAPIAnalyseLayout(TessBaseAPI* handle); + +TESS_API int TESS_CALL TessBaseAPIRecognize(TessBaseAPI* handle, ETEXT_DESC* monitor); +TESS_API int TESS_CALL TessBaseAPIRecognizeForChopTest(TessBaseAPI* handle, ETEXT_DESC* monitor); +TESS_API char* TESS_CALL TessBaseAPIProcessPages(TessBaseAPI* handle, const char* filename, const char* retry_config, + int timeout_millisec); +TESS_API char* TESS_CALL TessBaseAPIProcessPage(TessBaseAPI* handle, PIX* pix, int page_index, const char* filename, + const char* retry_config, int timeout_millisec); + +TESS_API TessResultIterator* + TESS_CALL TessBaseAPIGetIterator(TessBaseAPI* handle); +TESS_API TessMutableIterator* + TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAPI* handle); + +TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle); +TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number); +TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number); +TESS_API char* TESS_CALL TessBaseAPIGetUNLVText(TessBaseAPI* handle); +TESS_API int TESS_CALL TessBaseAPIMeanTextConf(TessBaseAPI* handle); +TESS_API int* TESS_CALL TessBaseAPIAllWordConfidences(TessBaseAPI* handle); +TESS_API BOOL TESS_CALL TessBaseAPIAdaptToWordStr(TessBaseAPI* handle, TessPageSegMode mode, const char* wordstr); + +TESS_API void TESS_CALL TessBaseAPIClear(TessBaseAPI* handle); +TESS_API void TESS_CALL TessBaseAPIEnd(TessBaseAPI* handle); + +TESS_API int TESS_CALL TessBaseAPIIsValidWord(TessBaseAPI* handle, const char *word); +TESS_API BOOL TESS_CALL TessBaseAPIGetTextDirection(TessBaseAPI* handle, int* out_offset, float* out_slope); + +#ifdef TESS_CAPI_INCLUDE_BASEAPI +TESS_API void TESS_CALL TessBaseAPISetDictFunc(TessBaseAPI* handle, TessDictFunc f); +TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f); +TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, TessFillLatticeFunc f); +TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results); + +TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, const DENORM* denorm, INT_FEATURE_ARRAY int_features, + int* num_features, int* FeatureOutlineIndex); + +TESS_API ROW* TESS_CALL TessFindRowForBox(BLOCK_LIST* blocks, int left, int top, int right, int bottom); +TESS_API void TESS_CALL TessBaseAPIRunAdaptiveClassifier(TessBaseAPI* handle, TBLOB* blob, const DENORM* denorm, int num_max_matches, + int* unichar_ids, float* ratings, int* num_matches_returned); +#endif + +TESS_API const char* + TESS_CALL TessBaseAPIGetUnichar(TessBaseAPI* handle, int unichar_id); + +#ifdef TESS_CAPI_INCLUDE_BASEAPI +TESS_API const TessDawg* + TESS_CALL TessBaseAPIGetDawg(const TessBaseAPI* handle, int i); +TESS_API int TESS_CALL TessBaseAPINumDawgs(const TessBaseAPI* handle); +#endif + +#ifdef TESS_CAPI_INCLUDE_BASEAPI +TESS_API ROW* TESS_CALL TessMakeTessOCRRow(float baseline, float xheight, float descender, float ascender); +TESS_API TBLOB* + TESS_CALL TessMakeTBLOB(Pix *pix); +TESS_API void TESS_CALL TessNormalizeTBLOB(TBLOB *tblob, ROW *row, BOOL numeric_mode, DENORM *denorm); + +TESS_API TessOcrEngineMode + TESS_CALL TessBaseAPIOem(const TessBaseAPI* handle); +TESS_API void TESS_CALL TessBaseAPIInitTruthCallback(TessBaseAPI* handle, TessTruthCallback *cb); + +TESS_API TessCubeRecoContext* + TESS_CALL TessBaseAPIGetCubeRecoContext(const TessBaseAPI* handle); +#endif + +TESS_API void TESS_CALL TessBaseAPISetMinOrientationMargin(TessBaseAPI* handle, double margin); +#ifdef TESS_CAPI_INCLUDE_BASEAPI +TESS_API void TESS_CALL TessBaseGetBlockTextOrientations(TessBaseAPI* handle, int** block_orientation, bool** vertical_writing); + +TESS_API BLOCK_LIST* + TESS_CALL TessBaseAPIFindLinesCreateBlockList(TessBaseAPI* handle); +#endif + +/* Page iterator */ + +TESS_API void TESS_CALL TessPageIteratorDelete(TessPageIterator* handle); +TESS_API TessPageIterator* + TESS_CALL TessPageIteratorCopy(const TessPageIterator* handle); + +TESS_API void TESS_CALL TessPageIteratorBegin(TessPageIterator* handle); +TESS_API BOOL TESS_CALL TessPageIteratorNext(TessPageIterator* handle, TessPageIteratorLevel level); +TESS_API BOOL TESS_CALL TessPageIteratorIsAtBeginningOf(const TessPageIterator* handle, TessPageIteratorLevel level); +TESS_API BOOL TESS_CALL TessPageIteratorIsAtFinalElement(const TessPageIterator* handle, TessPageIteratorLevel level, + TessPageIteratorLevel element); + +TESS_API BOOL TESS_CALL TessPageIteratorBoundingBox(const TessPageIterator* handle, TessPageIteratorLevel level, + int* left, int* top, int* right, int* bottom); +TESS_API TessPolyBlockType + TESS_CALL TessPageIteratorBlockType(const TessPageIterator* handle); + +TESS_API PIX* TESS_CALL TessPageIteratorGetBinaryImage(const TessPageIterator* handle, TessPageIteratorLevel level); +TESS_API PIX* TESS_CALL TessPageIteratorGetImage(const TessPageIterator* handle, TessPageIteratorLevel level, int padding, + int* left, int* top); + +TESS_API BOOL TESS_CALL TessPageIteratorBaseline(const TessPageIterator* handle, TessPageIteratorLevel level, + int* x1, int* y1, int* x2, int* y2); + +TESS_API void TESS_CALL TessPageIteratorOrientation(TessPageIterator* handle, TessOrientation *orientation, + TessWritingDirection *writing_direction, TessTextlineOrder *textline_order, + float *deskew_angle); + +/* Result iterator */ + +TESS_API void TESS_CALL TessResultIteratorDelete(TessResultIterator* handle); +TESS_API TessResultIterator* + TESS_CALL TessResultIteratorCopy(const TessResultIterator* handle); +TESS_API TessPageIterator* + TESS_CALL TessResultIteratorGetPageIterator(TessResultIterator* handle); +TESS_API const TessPageIterator* + TESS_CALL TessResultIteratorGetPageIteratorConst(const TessResultIterator* handle); + +TESS_API char* TESS_CALL TessResultIteratorGetUTF8Text(const TessResultIterator* handle, TessPageIteratorLevel level); +TESS_API float TESS_CALL TessResultIteratorConfidence(const TessResultIterator* handle, TessPageIteratorLevel level); + +TESS_API const char* + TESS_CALL TessResultIteratorWordFontAttributes(const TessResultIterator* handle, BOOL* is_bold, BOOL* is_italic, + BOOL* is_underlined, BOOL* is_monospace, BOOL* is_serif, + BOOL* is_smallcaps, int* pointsize, int* font_id); + +TESS_API BOOL TESS_CALL TessResultIteratorWordIsFromDictionary(const TessResultIterator* handle); +TESS_API BOOL TESS_CALL TessResultIteratorWordIsNumeric(const TessResultIterator* handle); +TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSuperscript(const TessResultIterator* handle); +TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsSubscript(const TessResultIterator* handle); +TESS_API BOOL TESS_CALL TessResultIteratorSymbolIsDropcap(const TessResultIterator* handle); + +#ifdef __cplusplus +} +#endif + +#endif /* TESSERACT_API_CAPI_H__ */ diff --git a/ccutil/strngs.h b/ccutil/strngs.h index 382605220..5c0d61eb0 100644 --- a/ccutil/strngs.h +++ b/ccutil/strngs.h @@ -56,6 +56,11 @@ class TESS_API STRING inT32 size() const { return length(); } const char *string() const; + inline char* strdup() const { + inT32 len = length() + 1; + return strncpy(new char[len], GetCStr(), len); + } + #if STRING_IS_PROTECTED const char &operator[] (inT32 index) const; // len is number of chars in s to insert starting at index in this string diff --git a/contrib/tesseract-c_api-demo.py b/contrib/tesseract-c_api-demo.py new file mode 100644 index 000000000..8fed83e23 --- /dev/null +++ b/contrib/tesseract-c_api-demo.py @@ -0,0 +1,71 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright 2012 Zdenko Podobný +# Author: Zdenko Podobný +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Simple python demo script of tesseract-ocr 3.02 c-api +""" + +import os +import sys +import ctypes + +# Demo variables +lang = "eng" +filename = "../phototest.tif" +libpath = "/usr/local/lib64/" +libpath_w = "../vs2008/DLL_Release/" + +if sys.platform == "win32": + libname = libpath_w + "libtesseract302.dll" + libname_alt = "libtesseract302.dll" + os.environ["PATH"] += os.pathsep + libpath_w +else: + libname = libpath + "libtesseract.so.3.0.2" + libname_alt = "libtesseract.so.3" + +try: + tesseract = ctypes.cdll.LoadLibrary(libname) +except: + try: + tesseract = ctypes.cdll.LoadLibrary(libname_alt) + except WindowsError, err: + print("Trying to load '%s'..." % libname) + print("Trying to load '%s'..." % libname_alt) + print(err) + exit(1) + +tesseract.TessVersion.restype = ctypes.c_char_p +tesseract_version = tesseract.TessVersion() + +# We need to check library version because libtesseract.so.3 is symlink +# and can point to other version than 3.02 +if float(tesseract_version) < 3.02: + print("Found tesseract-ocr library version %s." % tesseract_version) + print("C-API is present only in version 3.02!") + exit(2) + +api = tesseract.TessBaseAPICreate() +rc = tesseract.TessBaseAPIInit3(api, "", lang); +if (rc): + tesseract.TessBaseAPIDelete(api) + print("Could not initialize tesseract.\n") + exit(3) + +text_out = tesseract.TessBaseAPIProcessPages(api, filename, None , 0); +result_text = ctypes.string_at(text_out) +print result_text diff --git a/vs2008/libtesseract/libtesseract.vcproj b/vs2008/libtesseract/libtesseract.vcproj index fcf60fc23..8dbd7de81 100644 --- a/vs2008/libtesseract/libtesseract.vcproj +++ b/vs2008/libtesseract/libtesseract.vcproj @@ -444,6 +444,10 @@ RelativePath="..\..\cutil\callcpp.cpp" > + + @@ -1510,6 +1514,10 @@ RelativePath="..\..\cutil\callcpp.h" > + +