tesseract/tessdll.h

144 lines
5.3 KiB
C
Raw Normal View History

///////////////////////////////////////////////////////////////////////
// File: tessdll.h
// Description: Windows dll interface for Tesseract.
// Author: Glen Wernersbach
// Created: Tue May 15 10:30:01 PDT 2007
//
// (C) Copyright 2007, Jetsoftdev.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef __cplusplus
typedef BOOL bool;
#endif /* __cplusplus */
#include "ocrclass.h"
#ifdef TESSDLL_EXPORTS
#define TESSDLL_API __declspec(dllexport)
#else
#define TESSDLL_API __declspec(dllimport)
#endif
#ifdef __cplusplus
#include "baseapi.h"
//This is an exposed C++
class TESSDLL_API TessDllAPI : public TessBaseAPI
{
public:
//lang is the code of the language for which the data will be loaded.
//(Codes follow ISO 639-3.) If it is NULL, english (eng) will be loaded.
TessDllAPI(const char* lang = NULL) ;
~TessDllAPI ();
//xsize should be the width of line in bytes times 8
//ysize is the height
//pass through a buffer of bytes for a 1 bit per pixel bitmap
//BeginPage assumes the first memory address is the bottom of the image
//BeginPageUpright assumes the first memory address is the top of the image
int BeginPage(uinT32 xsize,uinT32 ysize,unsigned char *buf);
int BeginPageUpright(uinT32 xsize,uinT32 ysize,unsigned char *buf);
// This could probably be combined with about in a one function bpp=1
int BeginPage(uinT32 xsize,uinT32 ysize,unsigned char *buf,uinT8 bpp);
int BeginPageUpright(uinT32 xsize,uinT32 ysize,unsigned char *buf, uinT8 bpp);
void EndPage();
//This allows you to extract one word or section from the bitmap or
//the whole page
//To extract the whole page just enter zeros for left, right, top, bottom
//Note: getting one word at time is not yet optimized for speed.
//limit of 32000 character can be returned
//see ocrclass.h for a decription of the ETEXT_DESC file
ETEXT_DESC *Recognize_a_Block(uinT32 left,uinT32 right,
uinT32 top,uinT32 bottom);
ETEXT_DESC *Recognize_all_Words(void);
private:
int ProcessPagePass1();
PAGE_RES *page_res;
unsigned char *membuf;
BLOCK_LIST* block_list;
};
#endif
#ifdef __cplusplus
extern "C"
{
#endif
//The functions below provide a c wrapper to a global recognize class object
//xsize should be the width of line in bytes times 8
//ysize is the height
//pass through a buffer of bytes for a 1 bit per pixel bitmap
//BeginPage assumes the first memory address is the bottom of the image (MS DIB format)
//BeginPageUpright assumes the first memory address is the top of the image (TIFF format)
//lang is the code of the language for which the data will be loaded.
//(Codes follow ISO 639-3.) If it is NULL, english (eng) will be loaded.
TESSDLL_API int __cdecl TessDllBeginPage(uinT32 xsize,uinT32 ysize,
unsigned char *buf);
TESSDLL_API int __cdecl TessDllBeginPageLang(uinT32 xsize,uinT32 ysize,
unsigned char *buf,
const char* lang);
TESSDLL_API int __cdecl TessDllBeginPageUpright(uinT32 xsize,uinT32 ysize,
unsigned char *buf,
const char* lang);
//Added in version 2.0 to allow users to specify bytes per pixel to do
//1 for binary biptmap
//8 for gray
//24 bit for color RGB
TESSDLL_API int __cdecl TessDllBeginPageBPP(uinT32 xsize,uinT32 ysize,
unsigned char *buf,uinT8 bpp);
TESSDLL_API int __cdecl TessDllBeginPageLangBPP(uinT32 xsize,uinT32 ysize,
unsigned char *buf,
const char* lang,uinT8 bpp);
TESSDLL_API int __cdecl TessDllBeginPageUprightBPP(uinT32 xsize,uinT32 ysize,
unsigned char *buf,
const char* lang,uinT8 bpp);
TESSDLL_API void __cdecl TessDllEndPage(void);
//This allows you to extract one word or section from the bitmap or
//the whole page
//To extract the whole page just enter zeros for left, right, top, bottom
//Note: getting one word at time is not yet optimized for speed.
//limit of 32000 character can be returned
//see ocrclass.h for a decription of the ETEXT_DESC file
TESSDLL_API ETEXT_DESC * __cdecl TessDllRecognize_a_Block(uinT32 left,
uinT32 right,
uinT32 top,
uinT32 bottom);
TESSDLL_API ETEXT_DESC * __cdecl TessDllRecognize_all_Words();
//This will release any memory associated with the recognize class object
TESSDLL_API void __cdecl TessDllRelease();
#ifdef __cplusplus
}
#endif