mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-30 23:49:05 +08:00
embed pdf.ttf to tesseract library #2551
This commit is contained in:
parent
5ca4e0062d
commit
044921267f
75
src/api/pdf_ttf.h
Normal file
75
src/api/pdf_ttf.h
Normal file
@ -0,0 +1,75 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: pdf_ttf.h
|
||||
// Description: pdf.ttf (GlyphLessFont) replacement.
|
||||
// Generated with: "bin2cpp pdf.ttf pdf_ttf cpp17"
|
||||
// Author: Zdenko Podobny
|
||||
//
|
||||
// (C) Copyright 2020, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef pdf_ttf__H
|
||||
#define pdf_ttf__H
|
||||
|
||||
inline unsigned long pdf_ttf_length = 0x23c;
|
||||
|
||||
inline unsigned char pdf_ttf[] = {
|
||||
0x0, 0x1, 0x0, 0x0, 0x0, 0xa, 0x0, 0x80, 0x0, 0x3, 0x0, 0x20,
|
||||
0x4f, 0x53, 0x2f, 0x32, 0x56, 0xde, 0xc8, 0x94, 0x0, 0x0, 0x1, 0x28,
|
||||
0x0, 0x0, 0x0, 0x60, 0x63, 0x6d, 0x61, 0x70, 0x0, 0xa, 0x0, 0x34,
|
||||
0x0, 0x0, 0x1, 0x90, 0x0, 0x0, 0x0, 0x1e, 0x67, 0x6c, 0x79, 0x66,
|
||||
0x15, 0x22, 0x41, 0x24, 0x0, 0x0, 0x1, 0xb8, 0x0, 0x0, 0x0, 0x18,
|
||||
0x68, 0x65, 0x61, 0x64, 0xb, 0x78, 0xf1, 0x65, 0x0, 0x0, 0x0, 0xac,
|
||||
0x0, 0x0, 0x0, 0x36, 0x68, 0x68, 0x65, 0x61, 0xc, 0x2, 0x4, 0x2,
|
||||
0x0, 0x0, 0x0, 0xe4, 0x0, 0x0, 0x0, 0x24, 0x68, 0x6d, 0x74, 0x78,
|
||||
0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x88, 0x0, 0x0, 0x0, 0x8,
|
||||
0x6c, 0x6f, 0x63, 0x61, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 0x1, 0xb0,
|
||||
0x0, 0x0, 0x0, 0x6, 0x6d, 0x61, 0x78, 0x70, 0x0, 0x4, 0x0, 0x5,
|
||||
0x0, 0x0, 0x1, 0x8, 0x0, 0x0, 0x0, 0x20, 0x6e, 0x61, 0x6d, 0x65,
|
||||
0xf2, 0xeb, 0x16, 0xda, 0x0, 0x0, 0x1, 0xd0, 0x0, 0x0, 0x0, 0x4b,
|
||||
0x70, 0x6f, 0x73, 0x74, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x2, 0x1c,
|
||||
0x0, 0x0, 0x0, 0x20, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0,
|
||||
0xb0, 0x94, 0x71, 0x10, 0x5f, 0xf, 0x3c, 0xf5, 0x4, 0x7, 0x8, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0xcf, 0x9a, 0xfc, 0x6e, 0x0, 0x0, 0x0, 0x0,
|
||||
0xd4, 0xc3, 0xa7, 0xf2, 0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x8, 0x0,
|
||||
0x0, 0x0, 0x0, 0x10, 0x0, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x1, 0x0, 0x0, 0x8, 0x0, 0xff, 0xff, 0x0, 0x0, 0x4, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2,
|
||||
0x0, 0x1, 0x0, 0x0, 0x0, 0x2, 0x0, 0x4, 0x0, 0x1, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0,
|
||||
0x1, 0x90, 0x0, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x47, 0x4f, 0x4f, 0x47, 0x0, 0x40,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0xff, 0xff, 0x0, 0x0, 0x0, 0x1,
|
||||
0x0, 0x1, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x1, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x14, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x14,
|
||||
0x0, 0x6, 0x0, 0xa, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0,
|
||||
0x0, 0x0, 0x4, 0x0, 0x8, 0x0, 0x0, 0x3, 0x0, 0x0, 0x31, 0x21,
|
||||
0x11, 0x21, 0x4, 0x0, 0xfc, 0x0, 0x8, 0x0, 0x0, 0x0, 0x0, 0x3,
|
||||
0x0, 0x2a, 0x0, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x5, 0x0, 0x16,
|
||||
0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0xb,
|
||||
0x0, 0x16, 0x0, 0x3, 0x0, 0x1, 0x4, 0x9, 0x0, 0x5, 0x0, 0x16,
|
||||
0x0, 0x0, 0x0, 0x56, 0x0, 0x65, 0x0, 0x72, 0x0, 0x73, 0x0, 0x69,
|
||||
0x0, 0x6f, 0x0, 0x6e, 0x0, 0x20, 0x0, 0x31, 0x0, 0x2e, 0x0, 0x30,
|
||||
0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x31, 0x2e, 0x30, 0x0,
|
||||
0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
|
||||
|
||||
#endif
|
@ -23,12 +23,14 @@
|
||||
#include <locale> // for std::locale::classic
|
||||
#include <memory> // std::unique_ptr
|
||||
#include <sstream> // for std::stringstream
|
||||
#include <fstream> // for std::ifstream
|
||||
#include "allheaders.h"
|
||||
#include <tesseract/baseapi.h>
|
||||
#include <cmath>
|
||||
#include <tesseract/renderer.h>
|
||||
#include <cstring>
|
||||
#include "tprintf.h"
|
||||
#include "pdf_ttf.h"
|
||||
|
||||
/*
|
||||
|
||||
@ -623,24 +625,21 @@ bool TessPDFRenderer::BeginDocumentHandler() {
|
||||
|
||||
stream.str("");
|
||||
stream << datadir_.c_str() << "/pdf.ttf";
|
||||
FILE *fp = fopen(stream.str().c_str(), "rb");
|
||||
if (!fp) {
|
||||
tprintf("Cannot open file \"%s\"!\n", stream.str().c_str());
|
||||
return false;
|
||||
const char* font;
|
||||
std::ifstream input(stream.str().c_str(), std::ios::in | std::ios::binary);
|
||||
std::vector<unsigned char> buffer(std::istreambuf_iterator<char>(input), {});
|
||||
auto size = buffer.size();
|
||||
if (size) {
|
||||
font = reinterpret_cast<char*>(buffer.data());
|
||||
} else {
|
||||
#if defined(DEBUG)
|
||||
tprintf("Cannot open file \"%s\"!\nUsing internal glyphless font.\n",
|
||||
stream.str().c_str());
|
||||
#endif
|
||||
font = reinterpret_cast<char*>(pdf_ttf);
|
||||
size = pdf_ttf_length;
|
||||
}
|
||||
fseek(fp, 0, SEEK_END);
|
||||
auto size = std::ftell(fp);
|
||||
if (size < 0) {
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
const std::unique_ptr<char[]> buffer(new char[size]);
|
||||
if (!tesseract::DeSerialize(fp, buffer.get(), size)) {
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
// FONTFILE2
|
||||
stream.str("");
|
||||
stream <<
|
||||
@ -652,7 +651,7 @@ bool TessPDFRenderer::BeginDocumentHandler() {
|
||||
"stream\n";
|
||||
AppendString(stream.str().c_str());
|
||||
objsize = stream.str().size();
|
||||
AppendData(buffer.get(), size);
|
||||
AppendData(font, size);
|
||||
objsize += size;
|
||||
AppendString(endstream_endobj);
|
||||
objsize += strlen(endstream_endobj);
|
||||
|
Loading…
Reference in New Issue
Block a user