tesseract/ccutil/ocrclass.h

/**********************************************************************
 * File:        ocrclass.h
 * Description: Class definitions and constants for the OCR API.
 * Author:					Hewlett-Packard Co
 *
 * (C) Copyright 1996, Hewlett-Packard Co.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

/**********************************************************************
 * This file contains typedefs for all the structures used by
 * the HP OCR interface.
 * The code is designed to be used with either a C or C++ compiler.
 * The structures are designed to allow them to be used with any
 * structure alignment upto 8.
 **********************************************************************/

#ifndef           OCRCLASS_H
#define           OCRCLASS_H

#include          <time.h>
#ifdef __MSW32__
#include          <windows.h>
#endif
#include          "host.h"

/*Maximum lengths of various strings*/
#define MAX_FONT_NAME   34       /*name of font */
#define MAX_OCR_NAME    32       /*name of engine */
#define MAX_OCR_VERSION   17     /*version code of engine */

/*Image parameters*/
#define MIN_IMAGE_SIZE    64     /*smallest image that will be passed */
#define IMAGE_ROUNDING    32     /*all sizes are multiple of this */

#if defined(__SLOW_TIMES__)
/*Maximum timeouts of various functions (in secs)*/
#define STARTUP_TIMEOUT   100    /*start of OCR engine */
#define SHUTDOWN_TIMEOUT  50     /*end of OCR engine */
#define SENDIM_TIMEOUT    50     /*send of image */
#define RELEASE_TIMEOUT   50     /*release of semaphore */
#define READIM_TIMEOUT    100    /*read of image */
#define READTEXT_TIMEOUT  50     /*read of text */
#define PROGRESS_TIMEOUT  30     /*progress every 3 seconds */
#define BADTIMES_TIMEOUT  7      /*max lack of progress */
#else
/*Maximum timeouts of various functions (in secs)*/
#define STARTUP_TIMEOUT   10     /*start of OCR engine */
#define SHUTDOWN_TIMEOUT  6      /*end of OCR engine */
#define SENDIM_TIMEOUT    5      /*send of image */
#define RELEASE_TIMEOUT   5      /*release of semaphore */
#define READIM_TIMEOUT    10     /*read of image */
#define READTEXT_TIMEOUT  5      /*read of text */
#define PROGRESS_TIMEOUT  3      /*progress every 3 seconds */
#define BADTIMES_TIMEOUT  7      /*max lack of progress */
#endif

/*language definitions are identical to RTF*/
#define LANGE_NONE      0x0400   /*no language */
#define LANGE_ALBANIAN    0x041c /*Albanian */
#define LANGE_BRITISH   0x0809   /*International English */
#define LANGE_BULGARIAN   0x0402 /*Bulgarian */
#define LANGE_CROATIAN    0x041a /*Croatian(latin alphabet) */
#define LANGE_CZECH     0x0405   /*Czech */
#define LANGE_DANISH    0x0406   /*Danish */
#define LANGE_DUTCH     0x0413   /*Dutch */
#define LANGE_FINNISH   0x040b   /*Finnish */
#define LANGE_FRENCH    0x040c   /*French */
#define LANGE_GERMAN    0x0407   /*German */
#define LANGE_GREEK     0x0408   /*Greek */
#define LANGE_HUNGARIAN   0x040e /*Hungarian */
#define LANGE_ITALIAN   0x0410   /*Italian */
#define LANGE_JAPANESE    0x0411 /*Japanese */
#define LANGE_KOREAN    0x0412   /*Korean */
#define LANGE_NORWEGIAN   0x0414 /*Bokmal */
#define LANGE_POLISH    0x0415   /*Polish */
#define LANGE_PORTUGESE   0x0416 /*Brazilian Portugese */
#define LANGE_ROMANIAN    0x0418 /*Romanian */
#define LANGE_RUSSIAN   0x0419   /*Russian */
#define LANGE_SCHINESE    0x0804 /*Simplified Chinese */
#define LANGE_SLOVAK    0x041b   /*Slovak */
#define LANGE_SPANISH   0x040a   /*Castilian */
#define LANGE_SWEDISH   0x041d   /*Swedish */
#define LANGE_TCHINESE    0x0404 /*Traditional Chinese */
#define LANGE_TURKISH   0x041f   /*Turkish */
#define LANGE_USENGLISH   0x0409 /*American */

/*font family definitions are identical to RTF*/
#define FFAM_NONE     0          /*unknown */
#define FFAM_ROMAN      1        /*serifed prop */
#define FFAM_SWISS      2        /*sans-serif prop */
#define FFAM_MODERN     3        /*fixed pitch */

/*character set definitions are identical to RTF*/
#define CHSET_ANSI      0        /*Ansi efigs */
#define CHSET_SHIFT_JIS   128    /*JIS X 0208-1990 */
#define CHSET_KOREAN    129      /*KS C 5601-1992 */
#define CHSET_SCHINESE    134    /*GB 2312-80 */
#define CHSET_BIG5      136      /*Big Five */
#define CHSET_CYRILLIC    204    /*Cyrillic */
#define CHSET_EEUROPE   238      /*Eastern Europe */

/*pitch set definitions are identical to RTF*/
#define PITCH_DEF     0          /*default */
#define PITCH_FIXED     1        /*fixed pitch */
#define PITCH_VAR     2          /*variable pitch */

/*Bitmasks for character enhancements.
OR these together for enhancement in ocr_append_char*/
#define EUC_BOLD      1          /*bold character */
#define EUC_ITALIC      2        /*italic char */
#define EUC_UNDERLINE   4        /*underlined char */
#define EUC_SUBSCRIPT   8        /*subscript char */
#define EUC_SUPERSCRIPT   16     /*superscript char */

/*enum for character rendering direction*/
enum OCR_CHAR_DIRECTION
{
  OCR_CDIR_RIGHT_LEFT,           /*right to left horizontal */
  OCR_CDIR_LEFT_RIGHT,           /*left to right horizontal */
  OCR_CDIR_TOP_BOTTOM,           /*top to bottom vertical */
  OCR_CDIR_BOTTOM_TOP            /*bottom to top vertical */
};

/*enum for line rendering direction*/
enum OCR_LINE_DIRECTION
{
  OCR_LDIR_DOWN_RIGHT,           /*horizontal lines go down */
  /*vertical lines go right */
  OCR_LDIR_UP_LEFT               /*horizontal lines go up */
};

/*enum for newline type*/
enum OCR_NEWLINE_TYPE
{
  OCR_NL_NONE,                   /*not a newline */
  OCR_NL_NEWLINE,                /*this is a newline but not new para */
  OCR_NL_NEWPARA                 /*this is a newline and a new para */
};

/*error codes that can be returned from the API functions other than OKAY and HPERR*/
#define OCR_API_NO_MEM    (-2)   /*filled output buffer */
#define OCR_API_BAD_CHAR  (-3)   /*whitespace sent to ocr_append_char */
#define OCR_API_BAD_STATE (-4)   /*invalid call sequence */

/*error codes used for passing errors back to the HP side*/
enum OCR_ERR_CODE
{
  OCR_ERR_NONE,                  /*no error */
  OCR_ERR_CLEAN_EXIT,            /*no error */
  OCR_ERR_NO_MEM,                /*out of memory */
  OCR_ERR_FILE_READ,             /*failed to read data file */
  OCR_ERR_TMP_WRITE,             /*failed to write temp file */
  OCR_ERR_TMP_READ,              /*failed to read temp file */
  OCR_ERR_BAD_DLL,               /*missing or invalid dll subcomponent */
  OCR_ERR_BAD_EXE,               /*missing or invalid exe subcomponent */
  OCR_ERR_BAD_LOAD,              /*failed to load subcomponent */
  OCR_ERR_BAD_LANG,              /*unable to recognize requested language */
  OCR_ERR_BAD_STATE,             /*engine did call out of sequence */
  OCR_ERR_INTERNAL1,             /*internal error type 1 */
  OCR_ERR_INTERNAL2,             /*internal error type 1 */
  OCR_ERR_INTERNAL3,             /*internal error type 1 */
  OCR_ERR_INTERNAL4,             /*internal error type 1 */
  OCR_ERR_INTERNAL5,             /*internal error type 1 */
  OCR_ERR_INTERNAL6,             /*internal error type 1 */
  OCR_ERR_INTERNAL7,             /*internal error type 1 */
  OCR_ERR_INTERNAL8,             /*internal error type 1 */
  OCR_ERR_TIMEOUT                /*timed out in comms */
};                               /*for calls to ocr_error */

/**********************************************************************
 * EFONT_DESC
 * Description of one font.
 * The information required is basically that used by RTF.
 * The name may be either a valid font on the system or the empty string.
 **********************************************************************/

typedef struct                   /*font description */
{
  uinT16 language;               /*default language */
  uinT8 font_family;             /*serif/not, fixed/not */
  uinT8 char_set;                /*character set standard */
  uinT8 pitch;                   /*fixed or prop */
  inT8 name[MAX_FONT_NAME + 1];  /*plain ascii name */
} EFONT_DESC;                    /*font description */

/**********************************************************************
 * EOCR_DESC
 * Description of the OCR engine provided at startup.
 * The name and version may be reported to the user at some point.
 * The fonts array should indicate the fonts that the OCR system
 * can recognize.
 **********************************************************************/

typedef struct                   /*startup info */
{
  inT32 protocol;                /*interface version */
  uinT32 font_count;             /*number of fonts */
  uinT16 language;               /*default language */
  uinT16 name[MAX_OCR_NAME + 1]; /*name of engine */
                                 /*version of engine */
  uinT16 version[MAX_OCR_VERSION + 1];
  EFONT_DESC fonts[1];           /*array of fonts */
} EOCR_DESC;                     /*startup info */

/**********************************************************************
 * ESTRIP_DESC
 * Description of the image strip as it is passed to the engine.
 * The image is always 1 bit, with 1=black.
 * The width is always a multiple of 32, so padding is always OK.
 * The height of the full image is always a multiple of 32.
 * The top y coordinate is 0, and increases down.
 * The top leftmost pixel is in the most significant bit of the first byte.
 **********************************************************************/

typedef struct                   /*bitmap strip */
{
  inT16 x_size;                  /*width in pixels */
  inT16 y_size;                  /*of full image */
  inT16 strip_size;              /*of this strip */
  inT16 resolution;              /*pixels per inch */
  uinT8 data[8];                 /*image data */
} ESTRIP_DESC;                   /*bitmap strip */

/**********************************************************************
 * EANYCODE_CHAR
 * Description of a single character. The character code is defined by
 * the character set of the current font.
 * Output text is sent as an array of these structures.
 * Spaces and line endings in the output are represented in the
 * structures of the surrounding characters. They are not directly
 * represented as characters.
 * The first character in a word has a positive value of blanks.
 * Missing information should be set to the defaults in the comments.
 * If word bounds are known, but not character bounds, then the top and
 * bottom of each character should be those of the word. The left of the
 * first and right of the last char in each word should be set. All other
 * lefts and rights should be set to -1.
 * If set, the values of right and bottom are left+width and top+height.
 * Most of the members come directly from the parameters to ocr_append_char.
 * The formatting member uses the enhancement parameter and combines the
 * line direction stuff into the top 3 bits.
 * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
 * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
 * the coding is, only that it is backwards compatible with the previous
 * version.
 **********************************************************************/

typedef struct                   /*single character */
{
// It should be noted that the format for char_code for version 2.0 and beyond is UTF8
// which means that ASCII characters will come out as one structure but other characters
// will be returned in two or more instances of this structure with a single byte of the
// UTF8 code in each, but each will have the same bounding box.
// Programs which want to handle languagues with different characters sets will need to
// handle extended characters appropriately, but *all* code needs to be prepared to
// receive UTF8 coded characters for characters such as bullet and fancy quotes.
  uinT16 char_code;              /*character itself */
  inT16 left;                    /*of char (-1) */
  inT16 right;                   /*of char (-1) */
  inT16 top;                     /*of char (-1) */
  inT16 bottom;                  /*of char (-1) */
  inT16 font_index;              /*what font (0) */
  uinT8 confidence;              /*0=perfect, 100=reject (0/100) */
  uinT8 point_size;              /*of char, 72=i inch, (10) */
  inT8 blanks;                   /*no of spaces before this char (1) */
  uinT8 formatting;              /*char formatting (0) */
} EANYCODE_CHAR;                 /*single character */

/**********************************************************************
 * ETEXT_DESC
 * Description of the output of the OCR engine.
 * This structure is used as both a progress monitor and the final
 * output header, since it needs to be a valid progress monitor while
 * the OCR engine is storing its output to shared memory.
 * During progress, all the buffer info is -1.
 * Progress starts at 0 and increases to 100 during OCR. No other constraint.
 * Every progress callback, the OCR engine must set ocr_alive to 1.
 * The HP side will set ocr_alive to 0. Repeated failure to reset
 * to 1 indicates that the OCR engine is dead.
 * If the cancel function is not null then it is called with the number of
 * user words found. If it returns true then operation is cancelled.
 **********************************************************************/
typedef bool (*CANCEL_FUNC)(void* cancel_this, int words);

typedef struct ETEXT_STRUCT      /*output header */
{
  inT16 count;                   /*chars in this buffer(0) */
  inT16 progress;                /*percent complete increasing (0-100) */
  inT8 more_to_come;             /*true if not last */
  inT8 ocr_alive;                /*ocr sets to 1, HP 0 */
  inT8 err_code;                 /*for errcode use */
  CANCEL_FUNC cancel;            /*returns true to cancel */
  void* cancel_this;             /*this or other data for cancel*/
  clock_t end_time;              /*time to stop if not 0*/
  EANYCODE_CHAR text[1];         /*character data */
} ETEXT_DESC;                    /*output header */

#ifdef __MSW32__
/**********************************************************************
 * ESHM_INFO
 * This data structure is used internally to the API to hold the handles
 * to the operating system tools used for interprocess communications.
 * API users do not access this structure directly.
 **********************************************************************/
typedef struct                   /*shared mem info */
{
  HANDLE shm_hand;               /*handle to shm */
  HANDLE mutex;                  /*alive check */
  HANDLE ocr_sem;                /*ocr semaphore */
  HANDLE hp_sem;                 /*hp semaphore */
  void *shm_mem;                 /*shared memory */
  inT32 shm_size;                /*size of shm */
} ESHM_INFO;                     /*shared mem info */
#elif defined (__MAC__)
typedef struct                   /*shared mem info */
{
  Boolean mutex;                 /*alive check */
  Boolean ocr_sem;               /*ocr semaphore */
  Boolean hp_sem;                /*hp semaphore */
  void *shm_mem;                 /*shared memory */
  inT32 shm_size;                /*size of shm */
  inT16 language;

  // Process management information follows:
  ProcessSerialNumber IPEProcess;
  ProcessSerialNumber OCRProcess;
} ESHM_INFO;
#elif defined (__UNIX__)
typedef struct                   /*shared mem info */
{
  void *shm_mem;                 /*shared memory */
  inT32 shm_size;                /*size of shm */
} ESHM_INFO;
#endif
#endif