mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-23 23:17:49 +08:00
cdc35338c5
Wrote a function to throw an error if PSM is outside 0-13 or OEM is outside 0-5. fixes #1234
288 lines
12 KiB
C++
288 lines
12 KiB
C++
///////////////////////////////////////////////////////////////////////
|
|
// File: publictypes.h
|
|
// Description: Types used in both the API and internally
|
|
// Author: Ray Smith
|
|
// Created: Wed Mar 03 09:22:53 PST 2010
|
|
//
|
|
// (C) Copyright 2010, Google Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
|
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
|
|
|
// This file contains types that are used both by the API and internally
|
|
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
|
|
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
|
|
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
|
|
// but not for the low-level tesseract code to include top-level API code.
|
|
// This file should not use other Tesseract types, as that would drag
|
|
// their includes into the API-level.
|
|
// API-level code should include apitypes.h in preference to this file.
|
|
|
|
/** Number of printers' points in an inch. The unit of the pointsize return. */
|
|
constexpr int kPointsPerInch = 72;
|
|
/**
|
|
* Minimum believable resolution. Used as a default if there is no other
|
|
* information, as it is safer to under-estimate than over-estimate.
|
|
*/
|
|
constexpr int kMinCredibleResolution = 70;
|
|
/** Maximum believable resolution. */
|
|
constexpr int kMaxCredibleResolution = 2400;
|
|
/**
|
|
* Ratio between median blob size and likely resolution. Used to estimate
|
|
* resolution when none is provided. This is basically 1/usual text size in
|
|
* inches. */
|
|
constexpr int kResolutionEstimationFactor = 10;
|
|
|
|
/**
|
|
* Possible types for a POLY_BLOCK or ColPartition.
|
|
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
|
|
* below, as well as kPolyBlockNames in publictypes.cpp.
|
|
* Used extensively by ColPartition, and POLY_BLOCK.
|
|
*/
|
|
enum PolyBlockType {
|
|
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
|
|
PT_FLOWING_TEXT, // Text that lives inside a column.
|
|
PT_HEADING_TEXT, // Text that spans more than one column.
|
|
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
|
|
PT_EQUATION, // Partition belonging to an equation region.
|
|
PT_INLINE_EQUATION, // Partition has inline equation.
|
|
PT_TABLE, // Partition belonging to a table region.
|
|
PT_VERTICAL_TEXT, // Text-line runs vertically.
|
|
PT_CAPTION_TEXT, // Text that belongs to an image.
|
|
PT_FLOWING_IMAGE, // Image that lives inside a column.
|
|
PT_HEADING_IMAGE, // Image that spans more than one column.
|
|
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
|
|
PT_HORZ_LINE, // Horizontal Line.
|
|
PT_VERT_LINE, // Vertical Line.
|
|
PT_NOISE, // Lies outside of any column.
|
|
PT_COUNT
|
|
};
|
|
|
|
/** Returns true if PolyBlockType is of horizontal line type */
|
|
inline bool PTIsLineType(PolyBlockType type) {
|
|
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
|
|
}
|
|
/** Returns true if PolyBlockType is of image type */
|
|
inline bool PTIsImageType(PolyBlockType type) {
|
|
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
|
|
type == PT_PULLOUT_IMAGE;
|
|
}
|
|
/** Returns true if PolyBlockType is of text type */
|
|
inline bool PTIsTextType(PolyBlockType type) {
|
|
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
|
|
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
|
|
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
|
|
type == PT_INLINE_EQUATION;
|
|
}
|
|
// Returns true if PolyBlockType is of pullout(inter-column) type
|
|
inline bool PTIsPulloutType(PolyBlockType type) {
|
|
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
|
|
}
|
|
|
|
/** String name for each block type. Keep in sync with PolyBlockType. */
|
|
extern const char* kPolyBlockNames[];
|
|
|
|
namespace tesseract {
|
|
/**
|
|
* +------------------+ Orientation Example:
|
|
* | 1 Aaaa Aaaa Aaaa | ====================
|
|
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
|
|
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
|
|
* | 2 |
|
|
* | ####### c c C | Upright Latin characters are represented as A and a.
|
|
* | ####### c c c | '<' represents a latin character rotated
|
|
* | < ####### c c c | anti-clockwise 90 degrees.
|
|
* | < ####### c c |
|
|
* | < ####### . c | Upright Chinese characters are represented C and c.
|
|
* | 3 ####### c |
|
|
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
|
|
|
|
* If you orient your head so that "up" aligns with Orientation,
|
|
* then the characters will appear "right side up" and readable.
|
|
*
|
|
* In the example above, both the English and Chinese paragraphs are oriented
|
|
* so their "up" is the top of the page (page up). The photo credit is read
|
|
* with one's head turned leftward ("up" is to page left).
|
|
*
|
|
* The values of this enum match the convention of Tesseract's osdetect.h
|
|
*/
|
|
enum Orientation {
|
|
ORIENTATION_PAGE_UP = 0,
|
|
ORIENTATION_PAGE_RIGHT = 1,
|
|
ORIENTATION_PAGE_DOWN = 2,
|
|
ORIENTATION_PAGE_LEFT = 3,
|
|
};
|
|
|
|
/**
|
|
* The grapheme clusters within a line of text are laid out logically
|
|
* in this direction, judged when looking at the text line rotated so that
|
|
* its Orientation is "page up".
|
|
*
|
|
* For English text, the writing direction is left-to-right. For the
|
|
* Chinese text in the above example, the writing direction is top-to-bottom.
|
|
*/
|
|
enum WritingDirection {
|
|
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
|
|
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
|
|
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
|
|
};
|
|
|
|
/**
|
|
* The text lines are read in the given sequence.
|
|
*
|
|
* In English, the order is top-to-bottom.
|
|
* In Chinese, vertical text lines are read right-to-left. Mongolian is
|
|
* written in vertical columns top to bottom like Chinese, but the lines
|
|
* order left-to right.
|
|
*
|
|
* Note that only some combinations make sense. For example,
|
|
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
|
|
*/
|
|
enum TextlineOrder {
|
|
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
|
|
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
|
|
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
|
|
};
|
|
|
|
/**
|
|
* Possible modes for page layout analysis. These *must* be kept in order
|
|
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
|
|
* so that the inequality test macros below work.
|
|
*/
|
|
enum PageSegMode {
|
|
PSM_OSD_ONLY, ///< Orientation and script detection only.
|
|
PSM_AUTO_OSD, ///< Automatic page segmentation with orientation and
|
|
///< script detection. (OSD)
|
|
PSM_AUTO_ONLY, ///< Automatic page segmentation, but no OSD, or OCR.
|
|
PSM_AUTO, ///< Fully automatic page segmentation, but no OSD.
|
|
PSM_SINGLE_COLUMN, ///< Assume a single column of text of variable sizes.
|
|
PSM_SINGLE_BLOCK_VERT_TEXT, ///< Assume a single uniform block of vertically
|
|
///< aligned text.
|
|
PSM_SINGLE_BLOCK, ///< Assume a single uniform block of text. (Default.)
|
|
PSM_SINGLE_LINE, ///< Treat the image as a single text line.
|
|
PSM_SINGLE_WORD, ///< Treat the image as a single word.
|
|
PSM_CIRCLE_WORD, ///< Treat the image as a single word in a circle.
|
|
PSM_SINGLE_CHAR, ///< Treat the image as a single character.
|
|
PSM_SPARSE_TEXT, ///< Find as much text as possible in no particular order.
|
|
PSM_SPARSE_TEXT_OSD, ///< Sparse text with orientation and script det.
|
|
PSM_RAW_LINE, ///< Treat the image as a single text line, bypassing
|
|
///< hacks that are Tesseract-specific.
|
|
|
|
PSM_COUNT ///< Number of enum entries.
|
|
};
|
|
|
|
/**
|
|
* Inline functions that act on a PageSegMode to determine whether components of
|
|
* layout analysis are enabled.
|
|
* *Depend critically on the order of elements of PageSegMode.*
|
|
* NOTE that arg is an int for compatibility with INT_PARAM.
|
|
*/
|
|
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
|
|
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
|
}
|
|
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
|
|
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
|
}
|
|
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
|
|
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
|
|
}
|
|
inline bool PSM_SPARSE(int pageseg_mode) {
|
|
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
|
}
|
|
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
|
|
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
|
|
}
|
|
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
|
|
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
|
|
}
|
|
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
|
|
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
|
|
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
|
|
}
|
|
|
|
/**
|
|
* enum of the elements of the page hierarchy, used in ResultIterator
|
|
* to provide functions that operate on each level without having to
|
|
* have 5x as many functions.
|
|
*/
|
|
enum PageIteratorLevel {
|
|
RIL_BLOCK, // Block of text/image/separator line.
|
|
RIL_PARA, // Paragraph within a block.
|
|
RIL_TEXTLINE, // Line within a paragraph.
|
|
RIL_WORD, // Word within a textline.
|
|
RIL_SYMBOL // Symbol/character within a word.
|
|
};
|
|
|
|
/**
|
|
* JUSTIFICATION_UNKNOWN
|
|
* The alignment is not clearly one of the other options. This could happen
|
|
* for example if there are only one or two lines of text or the text looks
|
|
* like source code or poetry.
|
|
*
|
|
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
|
|
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
|
|
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
|
|
* their text is written in a right-to-left script.
|
|
*
|
|
* Interpretation for text read in vertical lines:
|
|
* "Left" is wherever the starting reading position is.
|
|
*
|
|
* JUSTIFICATION_LEFT
|
|
* Each line, except possibly the first, is flush to the same left tab stop.
|
|
*
|
|
* JUSTIFICATION_CENTER
|
|
* The text lines of the paragraph are centered about a line going
|
|
* down through their middle of the text lines.
|
|
*
|
|
* JUSTIFICATION_RIGHT
|
|
* Each line, except possibly the first, is flush to the same right tab stop.
|
|
*/
|
|
enum ParagraphJustification {
|
|
JUSTIFICATION_UNKNOWN,
|
|
JUSTIFICATION_LEFT,
|
|
JUSTIFICATION_CENTER,
|
|
JUSTIFICATION_RIGHT,
|
|
};
|
|
|
|
/**
|
|
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
|
|
* only the Tesseract part, only the Cube part or both along with the combiner.
|
|
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
|
|
*
|
|
* ATTENTION: When modifying this enum, please make sure to make the
|
|
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
|
|
* cityblock/workflow/detection/detection_storage.proto). Such enums will
|
|
* mention the connection to OcrEngineMode in the comments.
|
|
*/
|
|
enum OcrEngineMode {
|
|
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest
|
|
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
|
|
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
|
|
// to Tesseract when things get difficult.
|
|
OEM_DEFAULT, // Specify this mode when calling init_*(),
|
|
// to indicate that any of the above modes
|
|
// should be automatically inferred from the
|
|
// variables in the language-specific config,
|
|
// command-line configs, or if not specified
|
|
// in any of the above should be set to the
|
|
// default OEM_TESSERACT_ONLY.
|
|
OEM_CUBE_ONLY, // Run Cube only - better accuracy, but slower
|
|
OEM_TESSERACT_CUBE_COMBINED, // Run both and combine results - best accuracy
|
|
OEM_COUNT // Number of OEMs
|
|
};
|
|
|
|
} // namespace tesseract.
|
|
|
|
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|