mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-27 10:34:12 +08:00
48713f7df2
Signed-off-by: Stefan Weil <sw@weilnetz.de>
227 lines
6.3 KiB
C++
227 lines
6.3 KiB
C++
///////////////////////////////////////////////////////////////////////
|
|
// File: renderer.cpp
|
|
// Description: Rendering interface to inject into TessBaseAPI
|
|
//
|
|
// (C) Copyright 2011, Google Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config_auto.h"
|
|
#endif
|
|
|
|
#include <cstring>
|
|
#include <memory> // std::unique_ptr
|
|
#include "baseapi.h"
|
|
#include "genericvector.h"
|
|
#include "renderer.h"
|
|
|
|
namespace tesseract {
|
|
|
|
/**********************************************************************
|
|
* Base Renderer interface implementation
|
|
**********************************************************************/
|
|
TessResultRenderer::TessResultRenderer(const char *outputbase,
|
|
const char* extension)
|
|
: file_extension_(extension),
|
|
title_(""), imagenum_(-1),
|
|
fout_(stdout),
|
|
next_(nullptr),
|
|
happy_(true) {
|
|
if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
|
|
STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
|
|
fout_ = fopen(outfile.string(), "wb");
|
|
if (fout_ == nullptr) {
|
|
happy_ = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
TessResultRenderer::~TessResultRenderer() {
|
|
if (fout_ != nullptr) {
|
|
if (fout_ != stdout)
|
|
fclose(fout_);
|
|
else
|
|
clearerr(fout_);
|
|
}
|
|
delete next_;
|
|
}
|
|
|
|
void TessResultRenderer::insert(TessResultRenderer* next) {
|
|
if (next == nullptr) return;
|
|
|
|
TessResultRenderer* remainder = next_;
|
|
next_ = next;
|
|
if (remainder) {
|
|
while (next->next_ != nullptr) {
|
|
next = next->next_;
|
|
}
|
|
next->next_ = remainder;
|
|
}
|
|
}
|
|
|
|
bool TessResultRenderer::BeginDocument(const char* title) {
|
|
if (!happy_) return false;
|
|
title_ = title;
|
|
imagenum_ = -1;
|
|
bool ok = BeginDocumentHandler();
|
|
if (next_) {
|
|
ok = next_->BeginDocument(title) && ok;
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
bool TessResultRenderer::AddImage(TessBaseAPI* api) {
|
|
if (!happy_) return false;
|
|
++imagenum_;
|
|
bool ok = AddImageHandler(api);
|
|
if (next_) {
|
|
ok = next_->AddImage(api) && ok;
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
bool TessResultRenderer::EndDocument() {
|
|
if (!happy_) return false;
|
|
bool ok = EndDocumentHandler();
|
|
if (next_) {
|
|
ok = next_->EndDocument() && ok;
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
void TessResultRenderer::AppendString(const char* s) {
|
|
AppendData(s, strlen(s));
|
|
}
|
|
|
|
void TessResultRenderer::AppendData(const char* s, int len) {
|
|
if (!tesseract::Serialize(fout_, s, len)) happy_ = false;
|
|
}
|
|
|
|
bool TessResultRenderer::BeginDocumentHandler() {
|
|
return happy_;
|
|
}
|
|
|
|
bool TessResultRenderer::EndDocumentHandler() {
|
|
return happy_;
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* UTF8 Text Renderer interface implementation
|
|
**********************************************************************/
|
|
TessTextRenderer::TessTextRenderer(const char *outputbase)
|
|
: TessResultRenderer(outputbase, "txt") {
|
|
}
|
|
|
|
bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
|
|
const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
|
|
if (utf8 == nullptr) {
|
|
return false;
|
|
}
|
|
|
|
AppendString(utf8.get());
|
|
|
|
const char* pageSeparator = api->GetStringVariable("page_separator");
|
|
if (pageSeparator != nullptr && *pageSeparator != '\0') {
|
|
AppendString(pageSeparator);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**********************************************************************
|
|
* TSV Text Renderer interface implementation
|
|
**********************************************************************/
|
|
TessTsvRenderer::TessTsvRenderer(const char* outputbase)
|
|
: TessResultRenderer(outputbase, "tsv") {
|
|
font_info_ = false;
|
|
}
|
|
|
|
TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
|
|
: TessResultRenderer(outputbase, "tsv") {
|
|
font_info_ = font_info;
|
|
}
|
|
|
|
bool TessTsvRenderer::BeginDocumentHandler() {
|
|
// Output TSV column headings
|
|
AppendString(
|
|
"level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
|
|
"num\tleft\ttop\twidth\theight\tconf\ttext\n");
|
|
return true;
|
|
}
|
|
|
|
bool TessTsvRenderer::EndDocumentHandler() { return true; }
|
|
|
|
bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
|
|
const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
|
|
if (tsv == nullptr) return false;
|
|
|
|
AppendString(tsv.get());
|
|
|
|
return true;
|
|
}
|
|
|
|
/**********************************************************************
|
|
* UNLV Text Renderer interface implementation
|
|
**********************************************************************/
|
|
TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
|
|
: TessResultRenderer(outputbase, "unlv") {
|
|
}
|
|
|
|
bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
|
|
const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
|
|
if (unlv == nullptr) return false;
|
|
|
|
AppendString(unlv.get());
|
|
|
|
return true;
|
|
}
|
|
|
|
/**********************************************************************
|
|
* BoxText Renderer interface implementation
|
|
**********************************************************************/
|
|
TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
|
|
: TessResultRenderer(outputbase, "box") {
|
|
}
|
|
|
|
bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
|
|
const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
|
|
if (text == nullptr) return false;
|
|
|
|
AppendString(text.get());
|
|
|
|
return true;
|
|
}
|
|
|
|
#ifndef DISABLED_LEGACY_ENGINE
|
|
|
|
/**********************************************************************
|
|
* Osd Text Renderer interface implementation
|
|
**********************************************************************/
|
|
TessOsdRenderer::TessOsdRenderer(const char* outputbase)
|
|
: TessResultRenderer(outputbase, "osd") {}
|
|
|
|
bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
|
|
char* osd = api->GetOsdText(imagenum());
|
|
if (osd == nullptr) return false;
|
|
|
|
AppendString(osd);
|
|
delete[] osd;
|
|
|
|
return true;
|
|
}
|
|
|
|
#endif // ndef DISABLED_LEGACY_ENGINE
|
|
|
|
} // namespace tesseract
|