mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-23 18:49:08 +08:00
[pdfrenderer] Add some debugging capabilities. Do not compress symbol table and text data during debug.
This commit is contained in:
parent
b8961a7375
commit
94bd98b7ef
@ -139,6 +139,12 @@ protected:
|
||||
// This method will grow the output buffer if needed.
|
||||
void AppendData(const char *s, int len);
|
||||
|
||||
template <typename T>
|
||||
auto AppendData(T &&d) {
|
||||
AppendData(d.data(), d.size());
|
||||
return d.size();
|
||||
}
|
||||
|
||||
private:
|
||||
TessResultRenderer *next_; // Can link multiple renderers together
|
||||
FILE *fout_; // output file pointer
|
||||
|
@ -34,6 +34,16 @@
|
||||
#include <locale> // for std::locale::classic
|
||||
#include <memory> // std::unique_ptr
|
||||
#include <sstream> // for std::stringstream
|
||||
#include <string_view>
|
||||
|
||||
using namespace std::literals;
|
||||
|
||||
#ifndef NDEBUG
|
||||
#define DEBUG_PDF
|
||||
#endif
|
||||
#ifdef DEBUG_PDF
|
||||
#define NO_PDF_COMPRESSION
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
||||
@ -443,6 +453,9 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
|
||||
if (fontsize != old_fontsize) {
|
||||
pdf_str << "/f-0-0 " << fontsize << " Tf ";
|
||||
old_fontsize = fontsize;
|
||||
#ifdef DEBUG_PDF
|
||||
pdf_str << "\n";
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -469,9 +482,13 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
|
||||
}
|
||||
if (word_length > 0 && pdf_word_len > 0) {
|
||||
double h_stretch = kCharWidth * prec(100.0 * word_length / (fontsize * pdf_word_len));
|
||||
pdf_str << h_stretch << " Tz" // horizontal stretch
|
||||
<< " [ <" << pdf_word // UTF-16BE representation
|
||||
<< "> ] TJ"; // show the text
|
||||
pdf_str << h_stretch << " Tz"; // horizontal stretch
|
||||
pdf_str
|
||||
<< " [ <" << pdf_word // UTF-16BE representation
|
||||
<< "> ] TJ"; // show the text
|
||||
#ifdef DEBUG_PDF
|
||||
pdf_str << "\n";
|
||||
#endif
|
||||
}
|
||||
if (last_word_in_line) {
|
||||
pdf_str << " \n";
|
||||
@ -545,26 +562,36 @@ bool TessPDFRenderer::BeginDocumentHandler() {
|
||||
for (int i = 0; i < kCIDToGIDMapSize; i++) {
|
||||
cidtogidmap[i] = (i % 2) ? 1 : 0;
|
||||
}
|
||||
size_t len;
|
||||
unsigned char *comp = zlibCompress(cidtogidmap.get(), kCIDToGIDMapSize, &len);
|
||||
size_t len = kCIDToGIDMapSize;
|
||||
#ifndef NO_PDF_COMPRESSION
|
||||
auto comp = zlibCompress(cidtogidmap.get(), kCIDToGIDMapSize, &len);
|
||||
#endif
|
||||
stream.str("");
|
||||
stream << "5 0 obj\n"
|
||||
"<<\n"
|
||||
" /Length "
|
||||
<< len
|
||||
<< " /Filter /FlateDecode\n"
|
||||
<< ""
|
||||
#ifndef NO_PDF_COMPRESSION
|
||||
" /Filter /FlateDecode"
|
||||
#endif
|
||||
"\n"
|
||||
">>\n"
|
||||
"stream\n";
|
||||
"stream\n"
|
||||
;
|
||||
AppendString(stream.str().c_str());
|
||||
long objsize = stream.str().size();
|
||||
#ifndef NO_PDF_COMPRESSION
|
||||
AppendData(reinterpret_cast<char *>(comp), len);
|
||||
#else
|
||||
AppendData(reinterpret_cast<char *>(cidtogidmap.get()), len);
|
||||
#endif
|
||||
objsize += len;
|
||||
#ifndef NO_PDF_COMPRESSION
|
||||
lept_free(comp);
|
||||
const char *endstream_endobj =
|
||||
"endstream\n"
|
||||
"endobj\n";
|
||||
AppendString(endstream_endobj);
|
||||
objsize += strlen(endstream_endobj);
|
||||
#endif
|
||||
objsize += AppendData("endstream\n"sv);
|
||||
objsize += AppendData("endobj\n"sv);
|
||||
AppendPDFObjectDIY(objsize);
|
||||
|
||||
const char stream2[] =
|
||||
@ -654,8 +681,8 @@ bool TessPDFRenderer::BeginDocumentHandler() {
|
||||
objsize = stream.str().size();
|
||||
AppendData(reinterpret_cast<const char *>(font), size);
|
||||
objsize += size;
|
||||
AppendString(endstream_endobj);
|
||||
objsize += strlen(endstream_endobj);
|
||||
objsize += AppendData("endstream\n"sv);
|
||||
objsize += AppendData("endobj\n"sv);
|
||||
AppendPDFObjectDIY(objsize);
|
||||
return true;
|
||||
}
|
||||
@ -848,29 +875,37 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) {
|
||||
// CONTENTS
|
||||
const std::unique_ptr<char[]> pdftext(GetPDFTextObjects(api, width, height));
|
||||
const size_t pdftext_len = strlen(pdftext.get());
|
||||
size_t len;
|
||||
unsigned char *comp_pdftext =
|
||||
zlibCompress(reinterpret_cast<unsigned char *>(pdftext.get()), pdftext_len, &len);
|
||||
long comp_pdftext_len = len;
|
||||
size_t len = pdftext_len;
|
||||
#ifndef NO_PDF_COMPRESSION
|
||||
auto comp_pdftext = zlibCompress(reinterpret_cast<unsigned char *>(pdftext.get()), pdftext_len, &len);
|
||||
#endif
|
||||
stream.str("");
|
||||
stream << obj_
|
||||
<< " 0 obj\n"
|
||||
"<<\n"
|
||||
" /Length "
|
||||
<< comp_pdftext_len
|
||||
<< " /Filter /FlateDecode\n"
|
||||
<< len
|
||||
<< ""
|
||||
#ifndef NO_PDF_COMPRESSION
|
||||
" /Filter /FlateDecode"
|
||||
#endif
|
||||
"\n"
|
||||
">>\n"
|
||||
"stream\n";
|
||||
"stream\n"
|
||||
;
|
||||
AppendString(stream.str().c_str());
|
||||
long objsize = stream.str().size();
|
||||
AppendData(reinterpret_cast<char *>(comp_pdftext), comp_pdftext_len);
|
||||
objsize += comp_pdftext_len;
|
||||
#ifndef NO_PDF_COMPRESSION
|
||||
AppendData(reinterpret_cast<char *>(comp_pdftext), len);
|
||||
#else
|
||||
AppendData(reinterpret_cast<char *>(pdftext.get()), len);
|
||||
#endif
|
||||
objsize += len;
|
||||
#ifndef NO_PDF_COMPRESSION
|
||||
lept_free(comp_pdftext);
|
||||
const char *b2 =
|
||||
"endstream\n"
|
||||
"endobj\n";
|
||||
AppendString(b2);
|
||||
objsize += strlen(b2);
|
||||
#endif
|
||||
objsize += AppendData("endstream\n"sv);
|
||||
objsize += AppendData("endobj\n"sv);
|
||||
AppendPDFObjectDIY(objsize);
|
||||
|
||||
if (!textonly_) {
|
||||
|
Loading…
Reference in New Issue
Block a user