tesseract/ccutil/ocrshell.cpp
2007-03-07 20:03:40 +00:00

760 lines
27 KiB
C++

/**********************************************************************
* File: ocrshell.cpp
* Description: Code for the OCR side of the OCR API.
* Author: Hewlett-Packard Co
*
* (C) Copyright 1996, Hewlett-Packard Co.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
/**********************************************************************
* This file contains code for the OCR side of the HP OCR interface.
* The code is designed to be used with either an ANSI C or C++ compiler.
* The structures are designed to allow them to be used with any
* structure alignment upto 8.
**********************************************************************/
#include "mfcpch.h"
#include "ocrshell.h"
#include "tprintf.h"
#include <stdlib.h>
#define EXTERN
#ifdef __UNIX__
EXTERN ESHM_INFO shm; /*info on shm */
#define TICKS 1
#endif
#ifdef __MSW32__
EXTERN ESHM_INFO shm; /*info on shm */
#define TICKS 1000
#endif
#ifdef __MAC__
#if defined(__CFM68K__) && !defined(__USING_STATIC_LIBS__)
#pragma import on
#endif
extern volatile ESHM_INFO shm; /*info on shm */
extern unsigned short WaitForSingleObject( /*"C" */
volatile Boolean &semaphore,
unsigned long timeout);
extern unsigned short ReleaseSemaphore( /*"C" */
volatile Boolean &semaphore);
#if defined(__CFM68K__) && !defined(__USING_STATIC_LIBS__)
#pragma import reset
#endif
#define WAIT_OBJECT_0 1
#define TICKS 60
#endif
typedef enum {
OCS_UNINIT, /*uninitialized */
OCS_SETUP_SHM, /*shm setup done */
OCS_SETUP_INFO, /*startinfo sent */
OCS_READING_STRIPS, /*read first but more to come */
OCS_READ_STRIPS, /*read all but no monitor yet */
OCS_RECOGNIZING, /*OCR incomplete */
OCS_SENDING_TEXT, /*sent buffer but more to come */
OCS_DEAD /*disconnected */
} OCR_STATE;
/* forward declarations - not in .h file as not needed outside this file*/
INT16 ocr_internal_shutdown(); /*closedown */
INT16 wait_for_mutex(); /*wait for HP to be ready */
INT16 wait_for_hp( /*wait for semaphore */
INT32 timeout /*in seconds */
);
INT16 release_mutex(); /*release mutex */
INT16 release_ocr(); /*release semaphore */
static INT32 font_count = 0; /*number of fonts */
static INT16 lines_read = 0; /*no read in this image */
/*current state */
static OCR_STATE ocr_state = OCS_UNINIT;
#ifdef __MAC__
pascal short TerminateOCR(AppleEvent *theEvent,
AppleEvent *theReply,
long refCon) {
ocr_internal_shutdown();
ExitToShell();
}
#endif
/**********************************************************************
* ocr_open_shm
*
* Attempt to connect to the shared memory segment and semaphores used
* in talking to the OCR engine. Called from OCR engine.
* The parameters are the command line arguments in order.
**********************************************************************/
#ifdef __MAC__
INT16
ocr_open_shm (UINT16 * lang)
#else
INT16
ocr_open_shm ( /*open the shm */
const char *shm_h, /*handle of shm */
const char *shm_size, /*size of shm segment */
const char *mutex_h, /*hp mutex */
const char *ocr_h, /*ocr semaphore */
const char *hp_h, /*hp semaphore */
const char *lang_str, /*language */
UINT16 * lang /*required language */
)
#endif
{
font_count = 0; /*no fonts yet */
#ifdef __MAC__
if (shm.OCRProcess.lowLongOfPSN && shm.OCRProcess.highLongOfPSN)
return HPERR;
*lang = shm.language;
GetCurrentProcess (&shm.OCRProcess);
if (WakeUpProcess (&shm.IPEProcess))
ExitToShell();
AEInstallEventHandler (kCoreEventClass, kAEQuitApplication,
(AEEventHandlerUPP) TerminateOCR, 0, FALSE);
#else
if (lang != NULL)
/*get language */
*lang = (UINT16) strtol (lang_str, NULL, 10);
#endif
if (ocr_state != OCS_UNINIT) {
ocr_error(OCR_ERR_BAD_STATE);
return OCR_API_BAD_STATE; /*incorrect state */
}
#ifdef __MSW32__
shm.shm_size = strtol (shm_size, NULL, 10);
/*convert to handle */
shm.shm_hand = (HANDLE) strtol (shm_h, NULL, 10);
shm.shm_mem = MapViewOfFile (shm.shm_hand, FILE_MAP_WRITE, 0, 0, 0);
if (shm.shm_mem == NULL)
return HPERR; /*failed */
/*convert to handle */
shm.mutex = (HANDLE) strtol (mutex_h, NULL, 10);
/*convert to handle */
shm.ocr_sem = (HANDLE) strtol (ocr_h, NULL, 10);
/*convert to handle */
shm.hp_sem = (HANDLE) strtol (hp_h, NULL, 10);
#endif
ocr_state = OCS_SETUP_SHM; /*record state */
return OKAY;
}
/**********************************************************************
* ocr_error
*
* Inform the HP side of an error.
* The OCR engine should do any cleanup of its own and exit aferwards.
* Uses the current state to determine how to send it and cleanup.
**********************************************************************/
void ocr_error( /*send an error code */
OCR_ERR_CODE code /*error code */
) {
ESTRIP_DESC *strip = (ESTRIP_DESC *) shm.shm_mem;
/*strip info */
ETEXT_DESC *monitor = (ETEXT_DESC *) shm.shm_mem;
/*progress monitor */
switch (ocr_state) {
case OCS_UNINIT: /*uninitialized */
case OCS_DEAD: /*uninitialized */
return; /*can't do anything else */
case OCS_SETUP_SHM: /*shm setup done */
if (font_count < 1)
font_count = 1;
ocr_setup_startinfo_ansi (-code, LANGE_NONE, "", "");
/*report error */
break;
case OCS_SETUP_INFO: /*startinfo sent */
if (ocr_get_first_image_strip () == NULL)
break; /*disconnected */
case OCS_READING_STRIPS: /*read first but more to come */
strip->x_size = -code; /*report error */
release_ocr(); /*send ack */
release_mutex();
break;
case OCS_READ_STRIPS: /*read all but no monitor yet */
monitor->count = 0; /*chars in this buffer(-1) */
monitor->progress = 0; /*percent complete increasing (0-100) */
/*text not complete */
monitor->more_to_come = FALSE;
monitor->ocr_alive = TRUE; /*ocr sets to 1, hp 0 */
monitor->err_code = -code; /*report error */
monitor->cancel = FALSE; /*0=continue, 1=cancel */
release_ocr(); /*send ack */
break;
case OCS_RECOGNIZING: /*OCR incomplete */
case OCS_SENDING_TEXT: /*sent buffer but more to come */
monitor->err_code = -code; /*report error */
release_ocr(); /*send ack */
}
ocr_internal_shutdown(); /*get ready for exit */
}
/**********************************************************************
* ocr_append_fontinfo
*
* Initialize one of the font descriptors.
**********************************************************************/
INT16 ocr_append_fontinfo( /*put info into shm */
UINT16 language, /*default language */
UINT8 font_family, /*serif/not, fixed/not */
UINT8 char_set, /*character set standard */
UINT8 pitch, /*fixed or prop */
const char *name /*plain ascii name */
) {
EOCR_DESC *desc; /*ocr engine info */
int index; /*char index */
INT32 font_index; /*which font */
if (ocr_state != OCS_SETUP_SHM) {
ocr_error(OCR_ERR_BAD_STATE);
return OCR_API_BAD_STATE; /*incorrect state */
}
/*turn to right type */
desc = (EOCR_DESC *) shm.shm_mem;
if (font_count >
(INT32) ((shm.shm_size - sizeof (EOCR_DESC)) / sizeof (EFONT_DESC)))
return OCR_API_NO_MEM; /*insufficient space */
font_index = font_count++; /*add a font */
/*setup structure */
desc->fonts[font_index].language = language;
/*setup structure */
desc->fonts[font_index].font_family = font_family;
/*setup structure */
desc->fonts[font_index].char_set = char_set;
/*setup structure */
desc->fonts[font_index].pitch = pitch;
if (name != NULL) {
for (index = 0; index < MAX_FONT_NAME && name[index] != 0; index++)
desc->fonts[font_index].name[index] = name[index];
}
else
index = 0;
desc->fonts[font_index].name[index] = 0;
return OKAY;
}
/**********************************************************************
* ocr_setup_startinfo
*
* Setup the info on the OCR engine. Uses 16 bit chars to name the
* engine.
**********************************************************************/
INT16 ocr_setup_startinfo( /*put info into shm */
INT32 protocol, /*interface version */
UINT16 language, /*default language */
const UINT16 *name, /*name of engine */
const UINT16 *version /*version of engine */
) {
EOCR_DESC *desc; /*ocr engine info */
int index; /*char index */
INT16 result; /*from open */
if (ocr_state != OCS_SETUP_SHM || font_count < 1) {
ocr_error(OCR_ERR_BAD_STATE);
return OCR_API_BAD_STATE; /*incorrect state */
}
/*turn to right type */
desc = (EOCR_DESC *) shm.shm_mem;
desc->protocol = protocol; /*setup structure */
desc->font_count = font_count;
desc->language = language;
for (index = 0; index < MAX_OCR_NAME && name[index] != 0; index++)
desc->name[index] = name[index];
desc->name[index] = 0;
for (index = 0; index < MAX_OCR_VERSION && version[index] != 0; index++)
desc->version[index] = version[index];
desc->version[index] = 0;
result = release_ocr ();
if (result != OKAY)
return result;
ocr_state = OCS_SETUP_INFO; /*record state */
return OKAY;
}
/**********************************************************************
* ocr_setup_startinfo_ansi
*
* Setup the info on the OCR engine. Uses 8 bit chars to name the
* engine.
**********************************************************************/
INT16 ocr_setup_startinfo_ansi( /*put info into shm */
UINT32 protocol, /*interface version */
UINT16 language, /*default language */
const char *name, /*name of engine */
const char *version /*version of engine */
) {
EOCR_DESC *desc; /*ocr engine info */
int index; /*char index */
INT16 result; /*from open */
if (ocr_state != OCS_SETUP_SHM || font_count < 1) {
ocr_error(OCR_ERR_BAD_STATE);
return OCR_API_BAD_STATE; /*incorrect state */
}
/*turn to right type */
desc = (EOCR_DESC *) shm.shm_mem;
desc->protocol = protocol; /*setup structure */
desc->font_count = font_count;
desc->language = language;
for (index = 0; index < MAX_OCR_NAME && name[index] != 0; index++)
desc->name[index] = name[index];
desc->name[index] = 0;
for (index = 0; index < MAX_OCR_VERSION && version[index] != 0; index++)
desc->version[index] = version[index];
desc->version[index] = 0;
result = release_ocr ();
if (result != OKAY)
return result;
ocr_state = OCS_SETUP_INFO; /*record state */
return OKAY;
}
/**********************************************************************
* ocr_get_first_image_strip
*
* Wait for the master to send the first image strip and return a
* pointer to it. The result is NULL if it is time to exit.
**********************************************************************/
ESTRIP_DESC *ocr_get_first_image_strip() { /*get image strip */
ESTRIP_DESC *strip; /*strip info */
INT16 result; /*of wait/release */
if (ocr_state != OCS_SETUP_INFO) {
tprintf ("Bad state reading strip");
ocr_error(OCR_ERR_BAD_STATE);
return NULL; /*incorrect state */
}
/*strip info */
strip = (ESTRIP_DESC *) shm.shm_mem;
lines_read = 0;
result = wait_for_mutex ();
if (result != OKAY) {
tprintf ("Mutax wait failed reading strip");
return NULL; /*HP dead */
}
result = release_mutex ();
if (result != OKAY) {
tprintf ("Mutax release failed reading strip");
return NULL; /*HP dead */
}
result = wait_for_hp (READIM_TIMEOUT);
if (result != OKAY) {
tprintf ("Wait for HP failed reading strip");
return NULL; /*HP dead */
}
lines_read = strip->strip_size;/*lines read so far */
if (lines_read < strip->y_size)
/*record state */
ocr_state = OCS_READING_STRIPS;
else
ocr_state = OCS_READ_STRIPS;
if (strip->x_size == 0 || strip->y_size == 0)
return NULL; /*end of job */
return strip;
}
/**********************************************************************
* ocr_get_next_image_strip
*
* Wait for the master to send the next image strip and return a
* pointer to it. The result is NULL if it is time to exit.
**********************************************************************/
ESTRIP_DESC *ocr_get_next_image_strip() { /*get image strip */
ESTRIP_DESC *strip; /*strip info */
INT16 result; /*of wait/release */
if (ocr_state != OCS_READING_STRIPS) {
ocr_error(OCR_ERR_BAD_STATE);
return NULL; /*incorrect state */
}
/*strip info */
strip = (ESTRIP_DESC *) shm.shm_mem;
result = release_ocr ();
if (result != OKAY)
return NULL; /*HP dead */
result = wait_for_hp (READIM_TIMEOUT);
if (result != OKAY)
return NULL; /*HP dead */
/*lines read so far */
lines_read += strip->strip_size;
if (lines_read < strip->y_size)
/*record state */
ocr_state = OCS_READING_STRIPS;
else
ocr_state = OCS_READ_STRIPS;
return strip;
}
/**********************************************************************
* ocr_setup_monitor
*
* Setup the progress monitor. Call before starting the recognize task.
**********************************************************************/
ETEXT_DESC *ocr_setup_monitor() { /*setup monitor */
ETEXT_DESC *monitor; /*progress monitor */
/*text info */
monitor = (ETEXT_DESC *) shm.shm_mem;
monitor->count = 0; /*chars in this buffer(-1) */
monitor->progress = 0; /*percent complete increasing (0-100) */
monitor->more_to_come = TRUE; /*text not complete */
monitor->ocr_alive = TRUE; /*ocr sets to 1, hp 0 */
monitor->err_code = 0; /*used by ocr_error */
monitor->cancel = FALSE; /*0=continue, 1=cancel */
if (release_ocr () != OKAY)
return NULL; /*release failed */
ocr_state = OCS_RECOGNIZING; /*record state */
return monitor;
}
/**********************************************************************
* ocr_char_space
*
* Return the number of chars that can be fitted into the buffer.
**********************************************************************/
INT32 ocr_char_space() { /*put char into shm */
ETEXT_DESC *buf; /*text buffer */
int result;
/*progress info */
buf = (ETEXT_DESC *) shm.shm_mem;
result =
(shm.shm_size - sizeof (ETEXT_DESC)) / sizeof (EANYCODE_CHAR) -
buf->count + 1;
// while (buf->hp_alive==-1)
// Sleep(50); /*wait for HP*/
return result;
}
/**********************************************************************
* ocr_append_char
*
* Add a character to the output. Returns OKAY if successful, OCR_API_NO_MEM
* if there was insufficient room in the buffer.
**********************************************************************/
INT16 ocr_append_char( /*put char into shm */
UINT16 char_code, /*character itself */
INT16 left, /*of char (-1) */
INT16 right, /*of char (-1) */
INT16 top, /*of char (-1) */
INT16 bottom, /*of char (-1) */
INT16 font_index, /*what font (-1) */
UINT8 confidence, /*0=perfect, 100=reject (0/100) */
UINT8 point_size, /*of char, 72=i inch, (10) */
INT8 blanks, /*no of spaces before this char (1) */
UINT8 enhancement, /*char enhancement (0) */
OCR_CHAR_DIRECTION text_dir, /*rendering direction (OCR_CDIR_RIGHT_LEFT) */
OCR_LINE_DIRECTION line_dir, /*line rendering direction (OCR_LDIR_DOWN_RIGHT) */
OCR_NEWLINE_TYPE nl_type /*type of newline (if any) (OCR_NL_NONE) */
) {
ETEXT_DESC *buf; /*text buffer */
int index; /*char index */
INT16 result; /*of callback */
if (ocr_state != OCS_RECOGNIZING && ocr_state != OCS_SENDING_TEXT) {
ocr_error(OCR_ERR_BAD_STATE);
return OCR_API_BAD_STATE; /*incorrect state */
}
if (char_code == ' ' || char_code == '\n' || char_code == '\r'
|| char_code == '\t')
return OCR_API_BAD_CHAR; /*illegal char */
/*progress info */
buf = (ETEXT_DESC *) shm.shm_mem;
result =
(shm.shm_size - sizeof (ETEXT_DESC)) / sizeof (EANYCODE_CHAR) -
buf->count;
if (result < 1)
return OCR_API_NO_MEM; /*insufficient room */
index = buf->count++; /*count of chars */
/*setup structure */
buf->text[index].char_code = char_code;
buf->text[index].left = left; /*setup structure */
buf->text[index].right = right;/*setup structure */
buf->text[index].top = top; /*setup structure */
/*setup structure */
buf->text[index].bottom = bottom;
/*setup structure */
buf->text[index].font_index = font_index;
/*setup structure */
buf->text[index].confidence = confidence;
/*setup structure */
buf->text[index].point_size = point_size;
/*setup structure */
buf->text[index].blanks = blanks;
if (nl_type == OCR_NL_NONE) {
if (text_dir == OCR_CDIR_TOP_BOTTOM || text_dir == OCR_CDIR_BOTTOM_TOP)
buf->text[index].formatting = (text_dir << 5) | 128;
/*setup structure */
else
/*setup structure */
buf->text[index].formatting = text_dir << 5;
}
else {
buf->text[index].formatting = (nl_type << 6) | (line_dir << 5);
/*setup structure */
}
buf->text[index].formatting |= enhancement & (~EUC_FORMAT_MASK);
return OKAY;
}
/**********************************************************************
* ocr_send_text
*
* Send the text to the host and wait for the ack.
* Use this function after a sequence of ocr_append_char calls to
* actually sent the text to the master process.
* Set more to come TRUE if there is more text in this page, FALSE
* if the OCR engine is now ready to receive another image.
**********************************************************************/
INT16 ocr_send_text( /*send shm */
BOOL8 more_to_come /*any text left */
) {
ETEXT_DESC *buf; /*text buffer */
if (ocr_state != OCS_RECOGNIZING && ocr_state != OCS_SENDING_TEXT) {
ocr_error(OCR_ERR_BAD_STATE);
return OCR_API_BAD_STATE; /*incorrect state */
}
/*progress info */
buf = (ETEXT_DESC *) shm.shm_mem;
/*setup structure */
buf->more_to_come = more_to_come;
if (more_to_come) {
if ((buf->text[buf->count - 1].formatting >> 6) != OCR_NL_NEWLINE
&& (buf->text[buf->count - 1].formatting >> 6) != OCR_NL_NEWPARA) {
/*force line end */
buf->text[buf->count - 1].formatting &= 63;
buf->text[buf->count - 1].formatting |= OCR_NL_NEWLINE << 6;
}
}
else {
if (buf->count < 1)
ocr_append_char ('~', -1, -1, -1, -1, 0, 100, 10, 0,
0, OCR_CDIR_RIGHT_LEFT, OCR_LDIR_DOWN_RIGHT,
OCR_NL_NEWPARA);
/*dummy character */
else if ((buf->text[buf->count - 1].formatting >> 6) != OCR_NL_NEWPARA) {
/*force para end */
buf->text[buf->count - 1].formatting &= 63;
buf->text[buf->count - 1].formatting |= OCR_NL_NEWPARA << 6;
}
}
if (release_ocr () != OKAY)
return HPERR; /*release failed */
if (wait_for_hp (READTEXT_TIMEOUT) != OKAY)
return HPERR;
if (more_to_come) {
buf->count = 0; /*setup structure */
ocr_state = OCS_SENDING_TEXT;/*record state */
}
else
ocr_state = OCS_SETUP_INFO; /*record state */
return OKAY;
}
/**********************************************************************
* ocr_shutdown
*
* Closedown communications with the HP side and free up handles.
**********************************************************************/
INT16 ocr_shutdown() { /*closedown */
#ifdef __MAC__
shm.OCRProcess.lowLongOfPSN = kNoProcess;
shm.OCRProcess.highLongOfPSN = 0;
#endif
ocr_error(OCR_ERR_CLEAN_EXIT); /*signal exit */
return OKAY;
}
/**********************************************************************
* ocr_internal_shutdown
*
* Free up handles or whatever to clean up without attempting to communicate.
**********************************************************************/
INT16 ocr_internal_shutdown() { /*closedown */
ocr_state = OCS_DEAD; /*record state */
#ifdef __MSW32__
if (shm.shm_mem != NULL) {
UnmapViewOfFile (shm.shm_mem);
CloseHandle (shm.shm_hand); /*no longer used */
CloseHandle (shm.mutex); /*release handles */
CloseHandle (shm.ocr_sem);
CloseHandle (shm.hp_sem);
shm.shm_mem = NULL;
}
#elif defined (__MAC__)
shm.OCRProcess.lowLongOfPSN = kNoProcess;
shm.OCRProcess.highLongOfPSN = 0;
#endif
return OKAY;
}
/**********************************************************************
* wait_for_mutex
*
* Wait for the HP side to release its mutex.
* The return value is HPERR if the HP side has terminated.
**********************************************************************/
INT16 wait_for_mutex() { /*wait for HP to be ready */
INT16 result = HPERR; /*return code */
#if defined (__MSW32__) || defined (__MAC__)
result = WaitForSingleObject (shm.mutex, (unsigned long) -1)
/*wait for thread to move */
/*bad if timeout */
== WAIT_OBJECT_0 ? OKAY : HPERR;
#endif
if (result != OKAY)
ocr_internal_shutdown();
return result;
}
/**********************************************************************
* wait_for_hp
*
* Wait for the HP side to release its semaphore.
* The return value is HPERR if the timeout (in seconds) elapsed.
**********************************************************************/
INT16 wait_for_hp( /*wait for semaphore */
INT32 timeout /*in seconds */
) {
INT16 result = HPERR; /*return code */
#if defined (__MSW32__) || defined (__MAC__)
/*wait for thread to move */
result = WaitForSingleObject (shm.hp_sem, timeout * TICKS)
/*bad if timeout */
== WAIT_OBJECT_0 ? OKAY : HPERR;
#endif
if (result != OKAY)
ocr_internal_shutdown();
return result;
}
/**********************************************************************
* release_mutex
*
* Release the HP mutex.
* The return value is OKAY if the call succeeds.
**********************************************************************/
INT16 release_mutex() { /*release mutex */
INT16 result = HPERR; /*return code */
#ifdef __MSW32__
/*release it */
result = ReleaseMutex (shm.mutex) ? OKAY : HPERR;
#elif defined (__MAC__)
/*release it */
result = ReleaseSemaphore (shm.mutex) ? OKAY : HPERR;
#endif
if (result != OKAY)
ocr_internal_shutdown();
return result;
}
/**********************************************************************
* release_ocr
*
* Release the OCR semaphore.
* The return value is OKAY if the call succeeds.
**********************************************************************/
INT16 release_ocr() { /*release semaphore */
INT32 timeout; //time allowed
timeout = RELEASE_TIMEOUT * TICKS;
#ifdef __MSW32__
BOOL result = 0; //of release
do {
//release it
result = ReleaseSemaphore (shm.ocr_sem, 1, NULL);
if (result == FALSE) {
timeout -= 50;
Sleep (50);
}
}
while (result == FALSE && timeout > 0);
if (!result)
ocr_internal_shutdown();
return OKAY;
#elif defined (__MAC__)
INT16 result = HPERR; /*return code */
/*release it */
result = ReleaseSemaphore (shm.ocr_sem) ? OKAY : HPERR;
if (result != OKAY)
ocr_internal_shutdown();
return result;
#elif defined (__UNIX__)
return 0;
#endif
}