mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-29 20:08:19 +08:00
103 lines
2.8 KiB
C
103 lines
2.8 KiB
C
|
///////////////////////////////////////////////////////////////////////
|
||
|
// File: osdetect.h
|
||
|
// Description: Orientation and script detection.
|
||
|
// Author: Samuel Charron
|
||
|
//
|
||
|
// (C) Copyright 2008, Google Inc.
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
//
|
||
|
///////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
#ifndef TESSERACT_CCMAIN_OSDETECT_H__
|
||
|
#define TESSERACT_CCMAIN_OSDETECT_H__
|
||
|
|
||
|
#include "strngs.h"
|
||
|
#include "unicharset.h"
|
||
|
|
||
|
class TO_BLOCK_LIST;
|
||
|
class BLOBNBOX;
|
||
|
class BLOB_CHOICE_LIST;
|
||
|
|
||
|
namespace tesseract {
|
||
|
class Tesseract;
|
||
|
}
|
||
|
|
||
|
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
|
||
|
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
|
||
|
|
||
|
struct OSBestResult {
|
||
|
int orientation;
|
||
|
const char* script;
|
||
|
float sconfidence;
|
||
|
float oconfidence;
|
||
|
};
|
||
|
|
||
|
struct OSResults {
|
||
|
OSResults() {
|
||
|
for (int i = 0; i < 4; ++i) {
|
||
|
for (int j = 0; j < kMaxNumberOfScripts; ++j)
|
||
|
scripts_na[i][j] = 0;
|
||
|
orientations[i] = 0;
|
||
|
}
|
||
|
}
|
||
|
float orientations[4];
|
||
|
float scripts_na[4][kMaxNumberOfScripts];
|
||
|
|
||
|
UNICHARSET* unicharset;
|
||
|
OSBestResult best_result;
|
||
|
};
|
||
|
|
||
|
class OrientationDetector {
|
||
|
public:
|
||
|
OrientationDetector(OSResults*);
|
||
|
bool detect_blob(BLOB_CHOICE_LIST* scores);
|
||
|
void update_best_orientation();
|
||
|
int get_orientation();
|
||
|
private:
|
||
|
OSResults* osr_;
|
||
|
};
|
||
|
|
||
|
class ScriptDetector {
|
||
|
public:
|
||
|
ScriptDetector(OSResults*, tesseract::Tesseract* tess);
|
||
|
void detect_blob(BLOB_CHOICE_LIST* scores);
|
||
|
void update_best_script(int);
|
||
|
void get_script() ;
|
||
|
bool must_stop(int orientation);
|
||
|
private:
|
||
|
OSResults* osr_;
|
||
|
static const char* korean_script_;
|
||
|
static const char* japanese_script_;
|
||
|
static const char* fraktur_script_;
|
||
|
int korean_id_;
|
||
|
int japanese_id_;
|
||
|
int katakana_id_;
|
||
|
int hiragana_id_;
|
||
|
int han_id_;
|
||
|
int hangul_id_;
|
||
|
int latin_id_;
|
||
|
int fraktur_id_;
|
||
|
tesseract::Tesseract* tess_;
|
||
|
};
|
||
|
|
||
|
bool orientation_and_script_detection(STRING& filename,
|
||
|
OSResults*,
|
||
|
tesseract::Tesseract*);
|
||
|
|
||
|
bool os_detect(TO_BLOCK_LIST* port_blocks,
|
||
|
OSResults* osr,
|
||
|
tesseract::Tesseract* tess);
|
||
|
|
||
|
bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
|
||
|
ScriptDetector* s, OSResults*,
|
||
|
tesseract::Tesseract* tess);
|
||
|
#endif // TESSERACT_CCMAIN_OSDETECT_H__
|