tesseract/api/tesseractmain.cpp

/**********************************************************************
 * File:        tessedit.cpp  (Formerly tessedit.c)
 * Description: Main program for merge of tess and editor.
 * Author:                  Ray Smith
 * Created:                 Tue Jan 07 15:21:46 GMT 1992
 *
 * (C) Copyright 1992, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

#include "mfcpch.h"
#include "applybox.h"
#include "control.h"
#include "tessvars.h"
#include "tessedit.h"
#include "baseapi.h"
#include "thresholder.h"
#include "pageres.h"
#include "imgs.h"
#include "varabled.h"
#include "tprintf.h"
#include "tesseractmain.h"
#include "stderr.h"
#include "notdll.h"
#include "mainblk.h"
#include "output.h"
#include "globals.h"
#include "helpers.h"
#include "blread.h"
#include "tfacep.h"
#include "callnet.h"

// Include automatically generated configuration file if running autoconf
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif
#ifdef HAVE_LIBTIFF
#include "tiffio.h"
#endif
#ifdef HAVE_LIBLEPT
#include "allheaders.h"
#else
class Pix;
#endif

#ifdef _TIFFIO_
void read_tiff_image(TIFF* tif, IMAGE* image);
#endif

#define VARDIR        "configs/" /*variables files */
                                 //config under api
#define API_CONFIG      "configs/api_config"
#define EXTERN

BOOL_VAR(tessedit_create_boxfile, FALSE, "Output text with boxes");
BOOL_VAR(tessedit_read_image, TRUE, "Ensure the image is read");
INT_VAR(tessedit_serial_unlv, 0,
        "0->Whole page, 1->serial no adapt, 2->serial with adapt");
INT_VAR(tessedit_page_number, -1,
        "-1 -> All pages, else specifc page to process");
BOOL_VAR(tessedit_write_images, FALSE, "Capture the image from the IPE");
BOOL_VAR(tessedit_debug_to_screen, FALSE, "Dont use debug file");

const int kMaxIntSize = 22;
const ERRCODE USAGE = "Usage";
char szAppName[] = "Tessedit";   //app name

// Recognize a single page, given by the (const) image, and output the text,
// as controlled by global flag variables into the output text_out STRING:
// tessedit_serial_unlv is the top-level control, and provides 3 ways of
// treating the UNLV zones with the adaptive classifier:
// case 0: if there is a unlv zone file present, use it to segment the page
// and process the zones in parallel (pass 1 on all, then pass2 on all),
// otherwise, treat the whole page as a single zone.
// Independently of the existence of the unlv zone file:
// if tessedit_create_boxfile, output text in ".box" training file format, with
// one recognizable unit (as UTF8 characters) per line and its bounding box
// coded in UTF8(equivalent to ascii) for generating training data by hand.
// else if tessedit_write_unlv, output text in Latin-1, with a few special
// hacks for the UNLV test environment. Only works for latin!
// else (default mode) write plain text in UTF-8.
// case 1:(tessedit_serial_unlv) Read a unlv zone file (and fail if not found)
// and treat each zone as an independent "page", including resetting the
// adaptive classifier between zones.
// case 2: Read a unlv zone file (fail if not found) and treat each zone as
// a page of a document, i.e. DON'T reset the adaptive classifier between
// zones.
// In case 1 and 2, the UNLV zone file name is derived from input_file, by
// replacing the last 4 characters with ".uzn". In case 0, the unlv zone
// file name is derived from the 2nd parameter to InitWithLanguage, and
// the value of input_file is ignored - ugly, but true - a consequence of
// the way that unlv zone file reading takes the place of a page layout
// analyzer.
void TesseractImage(const char* input_file, IMAGE* image, Pix* pix,
                    tesseract::TessBaseAPI* api, STRING* text_out) {
  api->SetInputName(input_file);
#ifdef HAVE_LIBLEPT
  if (pix != NULL) {
    api->SetImage(pix);
  } else {
#endif
    int bytes_per_line = check_legal_image_size(image->get_xsize(),
                                                image->get_ysize(),
                                                image->get_bpp());
    api->SetImage(image->get_buffer(), image->get_xsize(), image->get_ysize(),
                  image->get_bpp() / 8, bytes_per_line);
#ifdef HAVE_LIBLEPT
  }
#endif
  if (tessedit_serial_unlv == 0) {
    char* text;
    if (tessedit_create_boxfile)
      text = api->GetBoxText();
    else if (tessedit_write_unlv)
      text = api->GetUNLVText();
    else
      text = api->GetUTF8Text();
    *text_out += text;
    delete [] text;
  } else {
    BLOCK_LIST blocks;
    STRING filename = input_file;
    const char* lastdot = strrchr(filename.string(), '.');
    if (lastdot != NULL) {
      filename[lastdot - filename.string()] = '\0';
    }
    if (!read_unlv_file(filename, image->get_xsize(), image->get_ysize(),
                        &blocks)) {
      fprintf(stderr, "Error: Must have a unlv zone file %s to read!\n",
              filename.string());
      return;
    }
    BLOCK_IT b_it = &blocks;
    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
      BLOCK* block = b_it.data();
      TBOX box = block->bounding_box();
      api->SetRectangle(box.left(), image->get_ysize() - box.top(),
                        box.width(), box.height());
      char* text = api->GetUNLVText();
      *text_out += text;
      delete [] text;
      if (tessedit_serial_unlv == 1)
        api->ClearAdaptiveClassifier();
    }
  }
  if (tessedit_write_images) {
    page_image.write("tessinput.tif");
  }
}

/**********************************************************************
 *  main()
 *
 **********************************************************************/

int main(int argc, char **argv) {
  STRING outfile;               //output file

  if (argc < 3) {
    USAGE.error (argv[0], EXIT,
      "%s imagename outputbase [-l lang] [configfile [[+|-]varfile]...]\n"
#if !defined(HAVE_LIBLEPT) && !defined(_TIFFIO_)
      "Warning - no liblept or libtiff - cannot read compressed tiff files.\n"
#endif
      , argv[0]);
  }
  // Find the required language.
  const char* lang = "eng";
  int arg = 3;
  if (argc >= 5 && strcmp(argv[3], "-l") == 0) {
    lang = argv[4];
    arg = 5;
  }

  tesseract::TessBaseAPI  api;

  api.SetOutputName(argv[2]);
  api.Init(argv[0], lang, &(argv[arg]), argc-arg, false);
  api.SetPageSegMode(tesseract::PSM_AUTO);

  tprintf ("Tesseract Open Source OCR Engine %s\n",
#if defined(HAVE_LIBLEPT)
           "with Leptonica");
#elif defined(_TIFFIO_)
           "with LibTiff");
#else
           "");
#endif

  IMAGE image;
  STRING text_out;
#ifdef HAVE_LIBLEPT
  // Use leptonica to read images.
  // If the image fails to read, try it as a list of filenames.
  PIX* pix = pixRead(argv[1]);
  if (pix == NULL) {
    FILE* fp = fopen(argv[1], "r");
    if (fp == NULL)
      READFAILED.error(argv[0], EXIT, argv[1]);
    char filename[MAX_PATH];
    while (fgets(filename, sizeof(filename), fp) != NULL) {
      chomp_string(filename);
      pix = pixRead(filename);
      if (pix == NULL)
        READFAILED.error(argv[0], EXIT, argv[1]);
      TesseractImage(argv[1], NULL, pix, &api, &text_out);
      pixDestroy(&pix);
    }
    fclose(fp);
  } else {
    TesseractImage(argv[1], NULL, pix, &api, &text_out);
    pixDestroy(&pix);
  }
#else
#ifdef _TIFFIO_
  int len = strlen(argv[1]);
  if (len > 3 && strcmp("tif", argv[1] + len - 3) == 0) {
    // Use libtiff to read a tif file so multi-page can be handled.
    // The page number so the tiff file can be closed and reopened.
    int page_number = tessedit_page_number;
    if (page_number < 0)
      page_number = 0;
    TIFF* archive = NULL;
    do {
      // Since libtiff keeps all read images in memory we have to close the
      // file and reopen it for every page, and seek to the appropriate page.
      if (archive != NULL)
        TIFFClose(archive);
      archive = TIFFOpen(argv[1], "r");
      if (archive == NULL) {
        READFAILED.error (argv[0], EXIT, argv[1]);
        return 1;
      }
      if (page_number > 0)
        tprintf("Page %d\n", page_number);

      // Seek to the appropriate page.
      for (int i = 0; i < page_number; ++i) {
        TIFFReadDirectory(archive);
      }
      char page_str[kMaxIntSize];
      snprintf(page_str, kMaxIntSize - 1, "%d", page_number);
      api.SetVariable("applybox_page", page_str);
      ++page_number;
      // Read the current page into the Tesseract image.
      IMAGE image;
      read_tiff_image(archive, &image);

      // Run tesseract on the page!
      TesseractImage(argv[1], &image, NULL, &api, &text_out);
    // Do this while there are more pages in the tiff file.
    } while (TIFFReadDirectory(archive) &&
             (page_number <= tessedit_page_number || tessedit_page_number < 0));
    TIFFClose(archive);
  } else {
#endif
    // Using built-in image library to read bmp, or tiff without libtiff.
    if (image.read_header(argv[1]) < 0)
      READFAILED.error (argv[0], EXIT, argv[1]);
    if (image.read(image.get_ysize ()) < 0)
      MEMORY_OUT.error(argv[0], EXIT, "Read of image %s", argv[1]);
    TesseractImage(argv[1], &image, NULL, &api, &text_out);
#ifdef _TIFFIO_
  }
#endif
#endif  // HAVE_LIBLEPT

  outfile = argv[2];
  outfile += ".txt";
  FILE* fp = fopen(outfile.string(), "w");
  if (fp != NULL) {
    fwrite(text_out.string(), 1, text_out.length(), fp);
    fclose(fp);
  }

  return 0;                      //Normal exit
}

#ifdef __MSW32__
int initialized = 0;

/**********************************************************************
 * WinMain
 *
 * Main function for a windows program.
 **********************************************************************/

int WINAPI WinMain(  //main for windows //command line
                   HINSTANCE hInstance,
                   HINSTANCE hPrevInstance,
                   LPSTR lpszCmdLine,
                   int nCmdShow) {
  WNDCLASS wc;
  HWND hwnd;
  MSG msg;

  char **argv;
  char *argsin[2];
  int argc;
  int exit_code;

  wc.style = CS_NOCLOSE | CS_OWNDC;
  wc.lpfnWndProc = (WNDPROC) WndProc;
  wc.cbClsExtra = 0;
  wc.cbWndExtra = 0;
  wc.hInstance = hInstance;
  wc.hIcon = NULL;               //LoadIcon (NULL, IDI_APPLICATION);
  wc.hCursor = NULL;             //LoadCursor (NULL, IDC_ARROW);
  wc.hbrBackground = (HBRUSH) (COLOR_WINDOW + 1);
  wc.lpszMenuName = NULL;
  wc.lpszClassName = szAppName;

  RegisterClass(&wc);

  hwnd = CreateWindow (szAppName, szAppName,
    WS_OVERLAPPEDWINDOW | WS_DISABLED,
    CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT,
    CW_USEDEFAULT, HWND_DESKTOP, NULL, hInstance, NULL);

  argsin[0] = strdup (szAppName);
  argsin[1] = strdup (lpszCmdLine);
  /*allocate memory for the args. There can never be more than half*/
  /*the total number of characters in the arguments.*/
  argv =
    (char **) malloc (((strlen (argsin[0]) + strlen (argsin[1])) / 2 + 1) *
    sizeof (char *));

  /*now construct argv as it should be for C.*/
  argc = parse_args (2, argsin, argv);

  //  ShowWindow (hwnd, nCmdShow);
  //  UpdateWindow (hwnd);

  if (initialized) {
    exit_code = main (argc, argv);
    free (argsin[0]);
    free (argsin[1]);
    free(argv);
    return exit_code;
  }
  while (GetMessage (&msg, NULL, 0, 0)) {
    TranslateMessage(&msg);
    DispatchMessage(&msg);
    if (initialized) {
      exit_code = main (argc, argv);
      break;
    }
    else
      exit_code = msg.wParam;
  }
  free (argsin[0]);
  free (argsin[1]);
  free(argv);
  return exit_code;
}


/**********************************************************************
 * WndProc
 *
 * Function to respond to messages.
 **********************************************************************/

LONG WINAPI WndProc(            //message handler
                    HWND hwnd,  //window with message
                    UINT msg,   //message typ
                    WPARAM wParam,
                    LPARAM lParam) {
  HDC hdc;

  if (msg == WM_CREATE) {
    //
    // Create a rendering context.
    //
    hdc = GetDC (hwnd);
    ReleaseDC(hwnd, hdc);
    initialized = 1;
    return 0;
  }
  return DefWindowProc (hwnd, msg, wParam, lParam);
}


/**********************************************************************
 * parse_args
 *
 * Turn a list of args into a new list of args with each separate
 * whitespace spaced string being an arg.
 **********************************************************************/

int
parse_args (                     /*refine arg list */
int argc,                        /*no of input args */
char *argv[],                    /*input args */
char *arglist[]                  /*output args */
) {
  int argcount;                  /*converted argc */
  char *testchar;                /*char in option string */
  int arg;                       /*current argument */

  argcount = 0;                  /*no of options */
  for (arg = 0; arg < argc; arg++) {
    testchar = argv[arg];        /*start of arg */
    do {
      while (*testchar
        && (*testchar == ' ' || *testchar == '\n'
        || *testchar == '\t'))
        testchar++;              /*skip white space */
      if (*testchar) {
                                 /*new arg */
        arglist[argcount++] = testchar;
                                 /*skip to white space */
        for (testchar++; *testchar && *testchar != ' ' && *testchar != '\n' && *testchar != '\t'; testchar++);
        if (*testchar)
          *testchar++ = '\0';    /*turn to separate args */
      }
    }
    while (*testchar);
  }
  return argcount;               /*new number of args */
}
#endif
New files in api for 3.00 git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@285 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2009-07-11 10:03:09 +08:00			`/**********************************************************************`
			`* File: tessedit.cpp (Formerly tessedit.c)`
			`* Description: Main program for merge of tess and editor.`
			`* Author: Ray Smith`
			`* Created: Tue Jan 07 15:21:46 GMT 1992`
			`*`
			`* (C) Copyright 1992, Hewlett-Packard Ltd.`
			`** Licensed under the Apache License, Version 2.0 (the "License");`
			`** you may not use this file except in compliance with the License.`
			`** You may obtain a copy of the License at`
			`** http://www.apache.org/licenses/LICENSE-2.0`
			`** Unless required by applicable law or agreed to in writing, software`
			`** distributed under the License is distributed on an "AS IS" BASIS,`
			`** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`** See the License for the specific language governing permissions and`
			`** limitations under the License.`
			`*`
			`**********************************************************************/`

			`#include "mfcpch.h"`
			`#include "applybox.h"`
			`#include "control.h"`
			`#include "tessvars.h"`
			`#include "tessedit.h"`
			`#include "baseapi.h"`
			`#include "thresholder.h"`
			`#include "pageres.h"`
			`#include "imgs.h"`
			`#include "varabled.h"`
			`#include "tprintf.h"`
			`#include "tesseractmain.h"`
			`#include "stderr.h"`
			`#include "notdll.h"`
			`#include "mainblk.h"`
			`#include "output.h"`
			`#include "globals.h"`
			`#include "helpers.h"`
			`#include "blread.h"`
			`#include "tfacep.h"`
			`#include "callnet.h"`

			`// Include automatically generated configuration file if running autoconf`
			`#ifdef HAVE_CONFIG_H`
			`#include "config_auto.h"`
			`#endif`
			`#ifdef HAVE_LIBTIFF`
			`#include "tiffio.h"`
			`#endif`
			`#ifdef HAVE_LIBLEPT`
			`#include "allheaders.h"`
			`#else`
			`class Pix;`
			`#endif`

			`#ifdef _TIFFIO_`
			`void read_tiff_image(TIFF* tif, IMAGE* image);`
			`#endif`

			`#define VARDIR "configs/" /variables files /`
			`//config under api`
			`#define API_CONFIG "configs/api_config"`
			`#define EXTERN`

			`BOOL_VAR(tessedit_create_boxfile, FALSE, "Output text with boxes");`
			`BOOL_VAR(tessedit_read_image, TRUE, "Ensure the image is read");`
			`INT_VAR(tessedit_serial_unlv, 0,`
			`"0->Whole page, 1->serial no adapt, 2->serial with adapt");`
			`INT_VAR(tessedit_page_number, -1,`
			`"-1 -> All pages, else specifc page to process");`
			`BOOL_VAR(tessedit_write_images, FALSE, "Capture the image from the IPE");`
			`BOOL_VAR(tessedit_debug_to_screen, FALSE, "Dont use debug file");`

			`const int kMaxIntSize = 22;`
			`const ERRCODE USAGE = "Usage";`
			`char szAppName[] = "Tessedit"; //app name`

			`// Recognize a single page, given by the (const) image, and output the text,`
			`// as controlled by global flag variables into the output text_out STRING:`
			`// tessedit_serial_unlv is the top-level control, and provides 3 ways of`
			`// treating the UNLV zones with the adaptive classifier:`
			`// case 0: if there is a unlv zone file present, use it to segment the page`
			`// and process the zones in parallel (pass 1 on all, then pass2 on all),`
			`// otherwise, treat the whole page as a single zone.`
			`// Independently of the existence of the unlv zone file:`
			`// if tessedit_create_boxfile, output text in ".box" training file format, with`
			`// one recognizable unit (as UTF8 characters) per line and its bounding box`
			`// coded in UTF8(equivalent to ascii) for generating training data by hand.`
			`// else if tessedit_write_unlv, output text in Latin-1, with a few special`
			`// hacks for the UNLV test environment. Only works for latin!`
			`// else (default mode) write plain text in UTF-8.`
			`// case 1:(tessedit_serial_unlv) Read a unlv zone file (and fail if not found)`
			`// and treat each zone as an independent "page", including resetting the`
			`// adaptive classifier between zones.`
			`// case 2: Read a unlv zone file (fail if not found) and treat each zone as`
			`// a page of a document, i.e. DON'T reset the adaptive classifier between`
			`// zones.`
			`// In case 1 and 2, the UNLV zone file name is derived from input_file, by`
			`// replacing the last 4 characters with ".uzn". In case 0, the unlv zone`
			`// file name is derived from the 2nd parameter to InitWithLanguage, and`
			`// the value of input_file is ignored - ugly, but true - a consequence of`
			`// the way that unlv zone file reading takes the place of a page layout`
			`// analyzer.`
			`void TesseractImage(const char* input_file, IMAGE* image, Pix* pix,`
			`tesseract::TessBaseAPI* api, STRING* text_out) {`
			`api->SetInputName(input_file);`
			`#ifdef HAVE_LIBLEPT`
			`if (pix != NULL) {`
			`api->SetImage(pix);`
			`} else {`
			`#endif`
			`int bytes_per_line = check_legal_image_size(image->get_xsize(),`
			`image->get_ysize(),`
			`image->get_bpp());`
			`api->SetImage(image->get_buffer(), image->get_xsize(), image->get_ysize(),`
			`image->get_bpp() / 8, bytes_per_line);`
			`#ifdef HAVE_LIBLEPT`
			`}`
			`#endif`
			`if (tessedit_serial_unlv == 0) {`
			`char* text;`
			`if (tessedit_create_boxfile)`
			`text = api->GetBoxText();`
			`else if (tessedit_write_unlv)`
			`text = api->GetUNLVText();`
			`else`
			`text = api->GetUTF8Text();`
			`*text_out += text;`
			`delete [] text;`
			`} else {`
			`BLOCK_LIST blocks;`
			`STRING filename = input_file;`
			`const char* lastdot = strrchr(filename.string(), '.');`
			`if (lastdot != NULL) {`
			`filename[lastdot - filename.string()] = '\0';`
			`}`
			`if (!read_unlv_file(filename, image->get_xsize(), image->get_ysize(),`
			`&blocks)) {`
			`fprintf(stderr, "Error: Must have a unlv zone file %s to read!\n",`
			`filename.string());`
			`return;`
			`}`
			`BLOCK_IT b_it = &blocks;`
			`for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {`
			`BLOCK* block = b_it.data();`
			`TBOX box = block->bounding_box();`
			`api->SetRectangle(box.left(), image->get_ysize() - box.top(),`
			`box.width(), box.height());`
			`char* text = api->GetUNLVText();`
			`*text_out += text;`
			`delete [] text;`
			`if (tessedit_serial_unlv == 1)`
			`api->ClearAdaptiveClassifier();`
			`}`
			`}`
			`if (tessedit_write_images) {`
			`page_image.write("tessinput.tif");`
			`}`
			`}`

			`/**********************************************************************`
			`* main()`
			`*`
			`**********************************************************************/`

			`int main(int argc, char **argv) {`
			`STRING outfile; //output file`

			`if (argc < 3) {`
			`USAGE.error (argv[0], EXIT,`
			`"%s imagename outputbase [-l lang] [configfile [[+\|-]varfile]...]\n"`
			`#if !defined(HAVE_LIBLEPT) && !defined(_TIFFIO_)`
			`"Warning - no liblept or libtiff - cannot read compressed tiff files.\n"`
			`#endif`
			`, argv[0]);`
			`}`
			`// Find the required language.`
			`const char* lang = "eng";`
			`int arg = 3;`
			`if (argc >= 5 && strcmp(argv[3], "-l") == 0) {`
			`lang = argv[4];`
			`arg = 5;`
			`}`

			`tesseract::TessBaseAPI api;`

			`api.SetOutputName(argv[2]);`
			`api.Init(argv[0], lang, &(argv[arg]), argc-arg, false);`
			`api.SetPageSegMode(tesseract::PSM_AUTO);`

			`tprintf ("Tesseract Open Source OCR Engine %s\n",`
			`#if defined(HAVE_LIBLEPT)`
			`"with Leptonica");`
			`#elif defined(_TIFFIO_)`
			`"with LibTiff");`
			`#else`
			`"");`
			`#endif`

			`IMAGE image;`
			`STRING text_out;`
			`#ifdef HAVE_LIBLEPT`
			`// Use leptonica to read images.`
			`// If the image fails to read, try it as a list of filenames.`
			`PIX* pix = pixRead(argv[1]);`
			`if (pix == NULL) {`
			`FILE* fp = fopen(argv[1], "r");`
			`if (fp == NULL)`
			`READFAILED.error(argv[0], EXIT, argv[1]);`
			`char filename[MAX_PATH];`
			`while (fgets(filename, sizeof(filename), fp) != NULL) {`
			`chomp_string(filename);`
			`pix = pixRead(filename);`
			`if (pix == NULL)`
			`READFAILED.error(argv[0], EXIT, argv[1]);`
			`TesseractImage(argv[1], NULL, pix, &api, &text_out);`
			`pixDestroy(&pix);`
			`}`
			`fclose(fp);`
			`} else {`
			`TesseractImage(argv[1], NULL, pix, &api, &text_out);`
			`pixDestroy(&pix);`
			`}`
			`#else`
			`#ifdef _TIFFIO_`
			`int len = strlen(argv[1]);`
			`if (len > 3 && strcmp("tif", argv[1] + len - 3) == 0) {`
			`// Use libtiff to read a tif file so multi-page can be handled.`
			`// The page number so the tiff file can be closed and reopened.`
			`int page_number = tessedit_page_number;`
			`if (page_number < 0)`
			`page_number = 0;`
			`TIFF* archive = NULL;`
			`do {`
			`// Since libtiff keeps all read images in memory we have to close the`
			`// file and reopen it for every page, and seek to the appropriate page.`
			`if (archive != NULL)`
			`TIFFClose(archive);`
			`archive = TIFFOpen(argv[1], "r");`
			`if (archive == NULL) {`
			`READFAILED.error (argv[0], EXIT, argv[1]);`
			`return 1;`
			`}`
			`if (page_number > 0)`
			`tprintf("Page %d\n", page_number);`

			`// Seek to the appropriate page.`
			`for (int i = 0; i < page_number; ++i) {`
			`TIFFReadDirectory(archive);`
			`}`
			`char page_str[kMaxIntSize];`
			`snprintf(page_str, kMaxIntSize - 1, "%d", page_number);`
			`api.SetVariable("applybox_page", page_str);`
			`++page_number;`
			`// Read the current page into the Tesseract image.`
			`IMAGE image;`
			`read_tiff_image(archive, &image);`

			`// Run tesseract on the page!`
			`TesseractImage(argv[1], &image, NULL, &api, &text_out);`
			`// Do this while there are more pages in the tiff file.`
			`} while (TIFFReadDirectory(archive) &&`
			`(page_number <= tessedit_page_number \|\| tessedit_page_number < 0));`
			`TIFFClose(archive);`
			`} else {`
			`#endif`
			`// Using built-in image library to read bmp, or tiff without libtiff.`
			`if (image.read_header(argv[1]) < 0)`
			`READFAILED.error (argv[0], EXIT, argv[1]);`
			`if (image.read(image.get_ysize ()) < 0)`
			`MEMORY_OUT.error(argv[0], EXIT, "Read of image %s", argv[1]);`
			`TesseractImage(argv[1], &image, NULL, &api, &text_out);`
			`#ifdef _TIFFIO_`
			`}`
			`#endif`
			`#endif // HAVE_LIBLEPT`

			`outfile = argv[2];`
			`outfile += ".txt";`
			`FILE* fp = fopen(outfile.string(), "w");`
			`if (fp != NULL) {`
			`fwrite(text_out.string(), 1, text_out.length(), fp);`
			`fclose(fp);`
			`}`

			`return 0; //Normal exit`
			`}`

			`#ifdef __MSW32__`
			`int initialized = 0;`

			`/**********************************************************************`
			`* WinMain`
			`*`
			`* Main function for a windows program.`
			`**********************************************************************/`

			`int WINAPI WinMain( //main for windows //command line`
			`HINSTANCE hInstance,`
			`HINSTANCE hPrevInstance,`
			`LPSTR lpszCmdLine,`
			`int nCmdShow) {`
			`WNDCLASS wc;`
			`HWND hwnd;`
			`MSG msg;`

			`char **argv;`
			`char *argsin[2];`
			`int argc;`
			`int exit_code;`

			`wc.style = CS_NOCLOSE \| CS_OWNDC;`
			`wc.lpfnWndProc = (WNDPROC) WndProc;`
			`wc.cbClsExtra = 0;`
			`wc.cbWndExtra = 0;`
			`wc.hInstance = hInstance;`
			`wc.hIcon = NULL; //LoadIcon (NULL, IDI_APPLICATION);`
			`wc.hCursor = NULL; //LoadCursor (NULL, IDC_ARROW);`
			`wc.hbrBackground = (HBRUSH) (COLOR_WINDOW + 1);`
			`wc.lpszMenuName = NULL;`
			`wc.lpszClassName = szAppName;`

			`RegisterClass(&wc);`

			`hwnd = CreateWindow (szAppName, szAppName,`
			`WS_OVERLAPPEDWINDOW \| WS_DISABLED,`
			`CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT,`
			`CW_USEDEFAULT, HWND_DESKTOP, NULL, hInstance, NULL);`

			`argsin[0] = strdup (szAppName);`
			`argsin[1] = strdup (lpszCmdLine);`
			`/allocate memory for the args. There can never be more than half/`
			`/the total number of characters in the arguments./`
			`argv =`
			`(char *) malloc (((strlen (argsin[0]) + strlen (argsin[1])) / 2 + 1) `
			`sizeof (char *));`

			`/now construct argv as it should be for C./`
			`argc = parse_args (2, argsin, argv);`

			`// ShowWindow (hwnd, nCmdShow);`
			`// UpdateWindow (hwnd);`

			`if (initialized) {`
			`exit_code = main (argc, argv);`
			`free (argsin[0]);`
			`free (argsin[1]);`
			`free(argv);`
			`return exit_code;`
			`}`
			`while (GetMessage (&msg, NULL, 0, 0)) {`
			`TranslateMessage(&msg);`
			`DispatchMessage(&msg);`
			`if (initialized) {`
			`exit_code = main (argc, argv);`
			`break;`
			`}`
			`else`
			`exit_code = msg.wParam;`
			`}`
			`free (argsin[0]);`
			`free (argsin[1]);`
			`free(argv);`
			`return exit_code;`
			`}`


			`/**********************************************************************`
			`* WndProc`
			`*`
			`* Function to respond to messages.`
			`**********************************************************************/`

			`LONG WINAPI WndProc( //message handler`
			`HWND hwnd, //window with message`
			`UINT msg, //message typ`
			`WPARAM wParam,`
			`LPARAM lParam) {`
			`HDC hdc;`

			`if (msg == WM_CREATE) {`
			`//`
			`// Create a rendering context.`
			`//`
			`hdc = GetDC (hwnd);`
			`ReleaseDC(hwnd, hdc);`
			`initialized = 1;`
			`return 0;`
			`}`
			`return DefWindowProc (hwnd, msg, wParam, lParam);`
			`}`


			`/**********************************************************************`
			`* parse_args`
			`*`
			`* Turn a list of args into a new list of args with each separate`
			`* whitespace spaced string being an arg.`
			`**********************************************************************/`

			`int`
			`parse_args ( /refine arg list /`
			`int argc, /no of input args /`
			`char argv[], /input args */`
			`char arglist[] /output args */`
			`) {`
			`int argcount; /converted argc /`
			`char testchar; /char in option string */`
			`int arg; /current argument /`

			`argcount = 0; /no of options /`
			`for (arg = 0; arg < argc; arg++) {`
			`testchar = argv[arg]; /start of arg /`
			`do {`
			`while (*testchar`
			`&& (testchar == ' ' \|\| testchar == '\n'`
			`\|\| *testchar == '\t'))`
			`testchar++; /skip white space /`
			`if (*testchar) {`
			`/new arg /`
			`arglist[argcount++] = testchar;`
			`/skip to white space /`
			`for (testchar++; testchar && testchar != ' ' && testchar != '\n' && testchar != '\t'; testchar++);`
			`if (*testchar)`
			`testchar++ = '\0'; /turn to separate args */`
			`}`
			`}`
			`while (*testchar);`
			`}`
			`return argcount; /new number of args /`
			`}`
			`#endif`