2007-07-18 08:59:35 +08:00
|
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
// File: tessdll.h
|
|
|
|
// Description: Windows dll interface for Tesseract.
|
|
|
|
// Author: Glen Wernersbach
|
|
|
|
// Created: Tue May 15 10:30:01 PDT 2007
|
|
|
|
//
|
|
|
|
// (C) Copyright 2007, Jetsoftdev.
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
//
|
|
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef __cplusplus
|
|
|
|
typedef BOOL bool;
|
|
|
|
#endif /* __cplusplus */
|
|
|
|
|
|
|
|
#include "ocrclass.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef TESSDLL_EXPORTS
|
|
|
|
#define TESSDLL_API __declspec(dllexport)
|
|
|
|
#else
|
|
|
|
#define TESSDLL_API __declspec(dllimport)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
|
|
|
#include "baseapi.h"
|
|
|
|
|
|
|
|
|
|
|
|
//This is an exposed C++
|
|
|
|
class TESSDLL_API TessDllAPI : public TessBaseAPI
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
//lang is the code of the language for which the data will be loaded.
|
|
|
|
//(Codes follow ISO 639-2.) If it is NULL, english (eng) will be loaded.
|
|
|
|
TessDllAPI(const char* lang = NULL) ;
|
|
|
|
~TessDllAPI ();
|
|
|
|
|
|
|
|
//xsize should be the width of line in bytes times 8
|
|
|
|
//ysize is the height
|
2007-08-31 02:22:12 +08:00
|
|
|
//pass through a buffer of bytes for a 1 bit per pixel bitmap
|
|
|
|
//BeginPage assumes the first memory address is the bottom of the image
|
|
|
|
//BeginPageUpright assumes the first memory address is the top of the image
|
|
|
|
int BeginPage(UINT32 xsize,UINT32 ysize,unsigned char *buf);
|
|
|
|
int BeginPageUpright(UINT32 xsize,UINT32 ysize,unsigned char *buf);
|
|
|
|
|
|
|
|
// This could probably be combined with about in a one function bpp=1
|
|
|
|
int BeginPage(UINT32 xsize,UINT32 ysize,unsigned char *buf,UINT8 bpp);
|
|
|
|
int BeginPageUpright(UINT32 xsize,UINT32 ysize,unsigned char *buf, UINT8 bpp);
|
2007-07-18 08:59:35 +08:00
|
|
|
void EndPage();
|
|
|
|
|
|
|
|
//This allows you to extract one word or section from the bitmap or
|
|
|
|
//the whole page
|
|
|
|
//To extract the whole page just enter zeros for left, right, top, bottom
|
|
|
|
//Note: getting one word at time is not yet optimized for speed.
|
|
|
|
//limit of 32000 character can be returned
|
|
|
|
//see ocrclass.h for a decription of the ETEXT_DESC file
|
|
|
|
ETEXT_DESC *Recognize_a_Block(UINT32 left,UINT32 right,
|
|
|
|
UINT32 top,UINT32 bottom);
|
|
|
|
ETEXT_DESC *Recognize_all_Words(void);
|
|
|
|
|
2007-08-31 02:22:12 +08:00
|
|
|
private:
|
2007-07-18 08:59:35 +08:00
|
|
|
int ProcessPagePass1();
|
|
|
|
|
|
|
|
PAGE_RES *page_res;
|
|
|
|
unsigned char *membuf;
|
|
|
|
BLOCK_LIST* block_list;
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C"
|
|
|
|
{
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//The functions below provide a c wrapper to a global recognize class object
|
|
|
|
|
|
|
|
//xsize should be the width of line in bytes times 8
|
|
|
|
//ysize is the height
|
|
|
|
//pass through a buffer of bytes for a 1 bit per pixel bitmap
|
2007-08-31 02:22:12 +08:00
|
|
|
//BeginPage assumes the first memory address is the bottom of the image (MS DIB format)
|
|
|
|
//BeginPageUpright assumes the first memory address is the top of the image (TIFF format)
|
2007-07-18 08:59:35 +08:00
|
|
|
//lang is the code of the language for which the data will be loaded.
|
|
|
|
//(Codes follow ISO 639-2.) If it is NULL, english (eng) will be loaded.
|
2007-08-31 02:22:12 +08:00
|
|
|
TESSDLL_API int __cdecl TessDllBeginPage(UINT32 xsize,UINT32 ysize,
|
|
|
|
unsigned char *buf);
|
|
|
|
|
|
|
|
TESSDLL_API int __cdecl TessDllBeginPageLang(UINT32 xsize,UINT32 ysize,
|
|
|
|
unsigned char *buf,
|
|
|
|
const char* lang);
|
|
|
|
TESSDLL_API int __cdecl TessDllBeginPageUpright(UINT32 xsize,UINT32 ysize,
|
|
|
|
unsigned char *buf,
|
|
|
|
const char* lang);
|
|
|
|
//Added in version 2.0 to allow users to specify bytes per pixel to do
|
|
|
|
//1 for binary biptmap
|
|
|
|
//8 for gray
|
|
|
|
//24 bit for color RGB
|
|
|
|
TESSDLL_API int __cdecl TessDllBeginPageBPP(UINT32 xsize,UINT32 ysize,
|
|
|
|
unsigned char *buf,UINT8 bpp);
|
|
|
|
|
|
|
|
TESSDLL_API int __cdecl TessDllBeginPageLangBPP(UINT32 xsize,UINT32 ysize,
|
|
|
|
unsigned char *buf,
|
|
|
|
const char* lang,UINT8 bpp);
|
|
|
|
TESSDLL_API int __cdecl TessDllBeginPageUprightBPP(UINT32 xsize,UINT32 ysize,
|
|
|
|
unsigned char *buf,
|
|
|
|
const char* lang,UINT8 bpp);
|
|
|
|
|
2007-07-18 08:59:35 +08:00
|
|
|
TESSDLL_API void __cdecl TessDllEndPage(void);
|
|
|
|
|
|
|
|
//This allows you to extract one word or section from the bitmap or
|
|
|
|
//the whole page
|
|
|
|
//To extract the whole page just enter zeros for left, right, top, bottom
|
|
|
|
//Note: getting one word at time is not yet optimized for speed.
|
|
|
|
//limit of 32000 character can be returned
|
|
|
|
//see ocrclass.h for a decription of the ETEXT_DESC file
|
|
|
|
TESSDLL_API ETEXT_DESC * __cdecl TessDllRecognize_a_Block(UINT32 left,
|
|
|
|
UINT32 right,
|
|
|
|
UINT32 top,
|
|
|
|
UINT32 bottom);
|
|
|
|
TESSDLL_API ETEXT_DESC * __cdecl TessDllRecognize_all_Words();
|
|
|
|
|
|
|
|
//This will release any memory associated with the recognize class object
|
|
|
|
TESSDLL_API void __cdecl TessDllRelease();
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|