/////////////////////////////////////////////////////////////////////// // File: tessdll.h // Description: Windows dll interface for Tesseract. // Author: Glen Wernersbach // Created: Tue May 15 10:30:01 PDT 2007 // // (C) Copyright 2007, Jetsoftdev. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /////////////////////////////////////////////////////////////////////// #ifndef __cplusplus typedef BOOL bool; #endif /* __cplusplus */ #include "ocrclass.h" #ifdef TESSDLL_EXPORTS #define TESSDLL_API __declspec(dllexport) #else #define TESSDLL_API __declspec(dllimport) #endif #ifdef __cplusplus #include "baseapi.h" //This is an exposed C++ class TESSDLL_API TessDllAPI : public TessBaseAPI { public: //lang is the code of the language for which the data will be loaded. //(Codes follow ISO 639-2.) If it is NULL, english (eng) will be loaded. TessDllAPI(const char* lang = NULL) ; ~TessDllAPI (); //xsize should be the width of line in bytes times 8 //ysize is the height //pass through a buffer of bytes for a 1 bit per pixel bitmap //BeginPage assumes the first memory address is the bottom of the image //BeginPageUpright assumes the first memory address is the top of the image int BeginPage(UINT32 xsize,UINT32 ysize,unsigned char *buf); int BeginPageUpright(UINT32 xsize,UINT32 ysize,unsigned char *buf); // This could probably be combined with about in a one function bpp=1 int BeginPage(UINT32 xsize,UINT32 ysize,unsigned char *buf,UINT8 bpp); int BeginPageUpright(UINT32 xsize,UINT32 ysize,unsigned char *buf, UINT8 bpp); void EndPage(); //This allows you to extract one word or section from the bitmap or //the whole page //To extract the whole page just enter zeros for left, right, top, bottom //Note: getting one word at time is not yet optimized for speed. //limit of 32000 character can be returned //see ocrclass.h for a decription of the ETEXT_DESC file ETEXT_DESC *Recognize_a_Block(UINT32 left,UINT32 right, UINT32 top,UINT32 bottom); ETEXT_DESC *Recognize_all_Words(void); private: int ProcessPagePass1(); PAGE_RES *page_res; unsigned char *membuf; BLOCK_LIST* block_list; }; #endif #ifdef __cplusplus extern "C" { #endif //The functions below provide a c wrapper to a global recognize class object //xsize should be the width of line in bytes times 8 //ysize is the height //pass through a buffer of bytes for a 1 bit per pixel bitmap //BeginPage assumes the first memory address is the bottom of the image (MS DIB format) //BeginPageUpright assumes the first memory address is the top of the image (TIFF format) //lang is the code of the language for which the data will be loaded. //(Codes follow ISO 639-2.) If it is NULL, english (eng) will be loaded. TESSDLL_API int __cdecl TessDllBeginPage(UINT32 xsize,UINT32 ysize, unsigned char *buf); TESSDLL_API int __cdecl TessDllBeginPageLang(UINT32 xsize,UINT32 ysize, unsigned char *buf, const char* lang); TESSDLL_API int __cdecl TessDllBeginPageUpright(UINT32 xsize,UINT32 ysize, unsigned char *buf, const char* lang); //Added in version 2.0 to allow users to specify bytes per pixel to do //1 for binary biptmap //8 for gray //24 bit for color RGB TESSDLL_API int __cdecl TessDllBeginPageBPP(UINT32 xsize,UINT32 ysize, unsigned char *buf,UINT8 bpp); TESSDLL_API int __cdecl TessDllBeginPageLangBPP(UINT32 xsize,UINT32 ysize, unsigned char *buf, const char* lang,UINT8 bpp); TESSDLL_API int __cdecl TessDllBeginPageUprightBPP(UINT32 xsize,UINT32 ysize, unsigned char *buf, const char* lang,UINT8 bpp); TESSDLL_API void __cdecl TessDllEndPage(void); //This allows you to extract one word or section from the bitmap or //the whole page //To extract the whole page just enter zeros for left, right, top, bottom //Note: getting one word at time is not yet optimized for speed. //limit of 32000 character can be returned //see ocrclass.h for a decription of the ETEXT_DESC file TESSDLL_API ETEXT_DESC * __cdecl TessDllRecognize_a_Block(UINT32 left, UINT32 right, UINT32 top, UINT32 bottom); TESSDLL_API ETEXT_DESC * __cdecl TessDllRecognize_all_Words(); //This will release any memory associated with the recognize class object TESSDLL_API void __cdecl TessDllRelease(); #ifdef __cplusplus } #endif