mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-29 06:09:04 +08:00
Fix for linking with leptonica on Linux.
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@548 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
137f4806b6
commit
a7db6dada9
@ -52,6 +52,7 @@
|
||||
#include "edgblob.h"
|
||||
#include "tessbox.h"
|
||||
#include "imgs.h"
|
||||
#include "imgtiff.h"
|
||||
#include "makerow.h"
|
||||
#include "tstruct.h"
|
||||
#include "permute.h"
|
||||
@ -79,8 +80,6 @@ const char* kInputFile = "noname.tif";
|
||||
const char* kOldVarsFile = "failed_vars.txt";
|
||||
// Max string length of an int.
|
||||
const int kMaxIntSize = 22;
|
||||
// Buffer size for leptonica to discover image file format.
|
||||
const int kImageHeaderSize = 32;
|
||||
|
||||
TessBaseAPI::TessBaseAPI()
|
||||
: tesseract_(NULL),
|
||||
@ -618,28 +617,13 @@ bool TessBaseAPI::ProcessPages(const char* filename,
|
||||
int page = tesseract_->tessedit_page_number;
|
||||
if (page < 0)
|
||||
page = 0;
|
||||
int npages = 1;
|
||||
FILE* fp = fopen(filename, "rb");
|
||||
if (fp == NULL) {
|
||||
tprintf(_("Image file %s cannot be opened!\n"), filename);
|
||||
return false;
|
||||
}
|
||||
// Find the image file type without incurring errors.
|
||||
bool is_tiff = false;
|
||||
l_uint8 header_buffer[kImageHeaderSize];
|
||||
l_int32 image_type = IFF_UNKNOWN;
|
||||
if (fread(header_buffer, 1, kImageHeaderSize, fp) == kImageHeaderSize &&
|
||||
findFileFormatBuffer(header_buffer, &image_type) == 0 &&
|
||||
image_type == IFF_TIFF) {
|
||||
rewind(fp);
|
||||
int tiffstat = tiffGetCount(fp, &npages);
|
||||
if (tiffstat == 1) {
|
||||
fprintf(stderr, _("Error reading file %s!\n"), filename);
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
is_tiff = true;
|
||||
}
|
||||
// Find the number of pages if a tiff file, or zero otherwise.
|
||||
int npages = CountTiffPages(fp);
|
||||
fclose(fp);
|
||||
|
||||
if (tesseract_->tessedit_create_hocr) {
|
||||
@ -656,7 +640,7 @@ bool TessBaseAPI::ProcessPages(const char* filename,
|
||||
|
||||
bool success = true;
|
||||
Pix *pix;
|
||||
if (is_tiff) {
|
||||
if (npages > 0) {
|
||||
for (; page < npages && (pix = pixReadTiff(filename, page)) != NULL;
|
||||
++page) {
|
||||
if (page > 0)
|
||||
@ -671,44 +655,42 @@ bool TessBaseAPI::ProcessPages(const char* filename,
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (image_type == IFF_UNKNOWN) {
|
||||
// The file is not an image file, so try it as a list of filenames.
|
||||
FILE* fimg = fopen(filename, "r");
|
||||
if (fimg == NULL) {
|
||||
tprintf(_("File %s cannot be opened!\n"), filename);
|
||||
fclose(fimg);
|
||||
return false;
|
||||
}
|
||||
char pagename[MAX_PATH];
|
||||
// Skip to the requested page number.
|
||||
for (int i = 0; i < page &&
|
||||
fgets(pagename, sizeof(pagename), fimg) != NULL;
|
||||
++i);
|
||||
while (fgets(pagename, sizeof(pagename), fimg) != NULL) {
|
||||
chomp_string(pagename);
|
||||
pix = pixRead(pagename);
|
||||
if (pix == NULL) {
|
||||
tprintf(_("Image file %s cannot be read!\n"), pagename);
|
||||
fclose(fimg);
|
||||
return false;
|
||||
}
|
||||
tprintf(_("Page %d : %s\n"), page, pagename);
|
||||
success &= ProcessPage(pix, page, pagename, retry_config,
|
||||
timeout_millisec, text_out);
|
||||
pixDestroy(&pix);
|
||||
++page;
|
||||
}
|
||||
fclose(fimg);
|
||||
} else {
|
||||
// The file is not a tiff file, so use the general pixRead function.
|
||||
pix = pixRead(filename);
|
||||
if (pix == NULL) {
|
||||
tprintf(_("Image file %s cannot be read!\n"), filename);
|
||||
return false;
|
||||
if (pix != NULL) {
|
||||
success &= ProcessPage(pix, 0, filename, retry_config,
|
||||
timeout_millisec, text_out);
|
||||
pixDestroy(&pix);
|
||||
} else {
|
||||
// The file is not an image file, so try it as a list of filenames.
|
||||
FILE* fimg = fopen(filename, "r");
|
||||
if (fimg == NULL) {
|
||||
tprintf(_("File %s cannot be opened!\n"), filename);
|
||||
return false;
|
||||
}
|
||||
tprintf(_("Reading %s as a list of filenames...\n"), filename);
|
||||
char pagename[MAX_PATH];
|
||||
// Skip to the requested page number.
|
||||
for (int i = 0; i < page &&
|
||||
fgets(pagename, sizeof(pagename), fimg) != NULL;
|
||||
++i);
|
||||
while (fgets(pagename, sizeof(pagename), fimg) != NULL) {
|
||||
chomp_string(pagename);
|
||||
pix = pixRead(pagename);
|
||||
if (pix == NULL) {
|
||||
tprintf(_("Image file %s cannot be read!\n"), pagename);
|
||||
fclose(fimg);
|
||||
return false;
|
||||
}
|
||||
tprintf(_("Page %d : %s\n"), page, pagename);
|
||||
success &= ProcessPage(pix, page, pagename, retry_config,
|
||||
timeout_millisec, text_out);
|
||||
pixDestroy(&pix);
|
||||
++page;
|
||||
}
|
||||
fclose(fimg);
|
||||
}
|
||||
success &= ProcessPage(pix, 0, filename, retry_config,
|
||||
timeout_millisec, text_out);
|
||||
pixDestroy(&pix);
|
||||
}
|
||||
if (tesseract_->tessedit_create_hocr)
|
||||
*text_out += "</body>\n</html>\n";
|
||||
|
@ -180,6 +180,45 @@ const uinT8 long_black_lengths[LONG_CODE_SIZE] = {
|
||||
12, 12, 12, 12, 12, 12, 12, 12
|
||||
};
|
||||
|
||||
// CountTiffPages
|
||||
// Returns the number of pages in the file if it is a tiff file, otherwise 0.
|
||||
// WARNING: requires __MOTO__ to be #defined on a big-endian system.
|
||||
int CountTiffPages(FILE* fp) {
|
||||
if (fp == NULL) return 0;
|
||||
// Read header
|
||||
inT16 filetype = 0;
|
||||
if (fread(&filetype, sizeof(filetype), 1, fp) != 1 ||
|
||||
(filetype != INTEL && filetype != MOTO)) {
|
||||
return 0;
|
||||
}
|
||||
fseek(fp, 4L, SEEK_SET);
|
||||
int npages = 0;
|
||||
do {
|
||||
inT32 start; // Start of tiff directory.
|
||||
if (fread(&start, sizeof(start), 1, fp) != 1) {
|
||||
return npages;
|
||||
}
|
||||
if (filetype != __NATIVE__)
|
||||
ReverseN(&start, sizeof(start));
|
||||
if (start <= 0) {
|
||||
return npages;
|
||||
}
|
||||
fseek(fp, start, SEEK_SET);
|
||||
inT16 entries; // No of tiff entries.
|
||||
if (fread(&entries, sizeof(entries), 1, fp) != 1) {
|
||||
return npages;
|
||||
}
|
||||
if (filetype != __NATIVE__)
|
||||
ReverseN(&entries, sizeof(entries));
|
||||
// Skip the tags and get to the next start.
|
||||
fseek(fp, entries * sizeof(TIFFENTRY), SEEK_CUR);
|
||||
++npages;
|
||||
} while (1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO(rays) The rest of this file is redundant and should be deleted.
|
||||
|
||||
/**********************************************************************
|
||||
* open_tif_image
|
||||
*
|
||||
|
@ -23,6 +23,10 @@
|
||||
#include "host.h"
|
||||
#include "bitstrm.h"
|
||||
|
||||
// CountTiffPages
|
||||
// Returns the number of pages in the file if it is a tiff file, otherwise 0.
|
||||
int CountTiffPages(FILE* fp);
|
||||
|
||||
inT8 open_tif_image( //read header
|
||||
int fd, //file to read
|
||||
inT32 *xsize, //size of image
|
||||
|
Loading…
Reference in New Issue
Block a user