mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-07 10:17:50 +08:00
dfb81e163e
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@484 d0cd1f9f-072b-0410-8dd7-cf729c803f20
1049 lines
20 KiB
HTML
1049 lines
20 KiB
HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
|
|
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
|
<meta name="generator" content="AsciiDoc 8.5.2" />
|
|
<title>TESSERACT(1)</title>
|
|
<style type="text/css">
|
|
/* Debug borders */
|
|
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
|
|
/*
|
|
border: 1px solid red;
|
|
*/
|
|
}
|
|
|
|
body {
|
|
margin: 1em 5% 1em 5%;
|
|
}
|
|
|
|
a {
|
|
color: blue;
|
|
text-decoration: underline;
|
|
}
|
|
a:visited {
|
|
color: fuchsia;
|
|
}
|
|
|
|
em {
|
|
font-style: italic;
|
|
color: navy;
|
|
}
|
|
|
|
strong {
|
|
font-weight: bold;
|
|
color: #083194;
|
|
}
|
|
|
|
tt {
|
|
color: navy;
|
|
}
|
|
|
|
h1, h2, h3, h4, h5, h6 {
|
|
color: #527bbd;
|
|
font-family: sans-serif;
|
|
margin-top: 1.2em;
|
|
margin-bottom: 0.5em;
|
|
line-height: 1.3;
|
|
}
|
|
|
|
h1, h2, h3 {
|
|
border-bottom: 2px solid silver;
|
|
}
|
|
h2 {
|
|
padding-top: 0.5em;
|
|
}
|
|
h3 {
|
|
float: left;
|
|
}
|
|
h3 + * {
|
|
clear: left;
|
|
}
|
|
|
|
div.sectionbody {
|
|
font-family: serif;
|
|
margin-left: 0;
|
|
}
|
|
|
|
hr {
|
|
border: 1px solid silver;
|
|
}
|
|
|
|
p {
|
|
margin-top: 0.5em;
|
|
margin-bottom: 0.5em;
|
|
}
|
|
|
|
ul, ol, li > p {
|
|
margin-top: 0;
|
|
}
|
|
|
|
pre {
|
|
padding: 0;
|
|
margin: 0;
|
|
}
|
|
|
|
span#author {
|
|
color: #527bbd;
|
|
font-family: sans-serif;
|
|
font-weight: bold;
|
|
font-size: 1.1em;
|
|
}
|
|
span#email {
|
|
}
|
|
span#revnumber, span#revdate, span#revremark {
|
|
font-family: sans-serif;
|
|
}
|
|
|
|
div#footer {
|
|
font-family: sans-serif;
|
|
font-size: small;
|
|
border-top: 2px solid silver;
|
|
padding-top: 0.5em;
|
|
margin-top: 4.0em;
|
|
}
|
|
div#footer-text {
|
|
float: left;
|
|
padding-bottom: 0.5em;
|
|
}
|
|
div#footer-badges {
|
|
float: right;
|
|
padding-bottom: 0.5em;
|
|
}
|
|
|
|
div#preamble {
|
|
margin-top: 1.5em;
|
|
margin-bottom: 1.5em;
|
|
}
|
|
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
|
|
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
|
div.admonitionblock {
|
|
margin-top: 1.0em;
|
|
margin-bottom: 1.5em;
|
|
}
|
|
div.admonitionblock {
|
|
margin-top: 2.0em;
|
|
margin-bottom: 2.0em;
|
|
margin-right: 10%;
|
|
color: #606060;
|
|
}
|
|
|
|
div.content { /* Block element content. */
|
|
padding: 0;
|
|
}
|
|
|
|
/* Block element titles. */
|
|
div.title, caption.title {
|
|
color: #527bbd;
|
|
font-family: sans-serif;
|
|
font-weight: bold;
|
|
text-align: left;
|
|
margin-top: 1.0em;
|
|
margin-bottom: 0.5em;
|
|
}
|
|
div.title + * {
|
|
margin-top: 0;
|
|
}
|
|
|
|
td div.title:first-child {
|
|
margin-top: 0.0em;
|
|
}
|
|
div.content div.title:first-child {
|
|
margin-top: 0.0em;
|
|
}
|
|
div.content + div.title {
|
|
margin-top: 0.0em;
|
|
}
|
|
|
|
div.sidebarblock > div.content {
|
|
background: #ffffee;
|
|
border: 1px solid silver;
|
|
padding: 0.5em;
|
|
}
|
|
|
|
div.listingblock > div.content {
|
|
border: 1px solid silver;
|
|
background: #f4f4f4;
|
|
padding: 0.5em;
|
|
}
|
|
|
|
div.quoteblock, div.verseblock {
|
|
padding-left: 1.0em;
|
|
margin-left: 1.0em;
|
|
margin-right: 10%;
|
|
border-left: 5px solid #dddddd;
|
|
color: #777777;
|
|
}
|
|
|
|
div.quoteblock > div.attribution {
|
|
padding-top: 0.5em;
|
|
text-align: right;
|
|
}
|
|
|
|
div.verseblock > div.content {
|
|
white-space: pre;
|
|
}
|
|
div.verseblock > div.attribution {
|
|
padding-top: 0.75em;
|
|
text-align: left;
|
|
}
|
|
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
|
div.verseblock + div.attribution {
|
|
text-align: left;
|
|
}
|
|
|
|
div.admonitionblock .icon {
|
|
vertical-align: top;
|
|
font-size: 1.1em;
|
|
font-weight: bold;
|
|
text-decoration: underline;
|
|
color: #527bbd;
|
|
padding-right: 0.5em;
|
|
}
|
|
div.admonitionblock td.content {
|
|
padding-left: 0.5em;
|
|
border-left: 3px solid #dddddd;
|
|
}
|
|
|
|
div.exampleblock > div.content {
|
|
border-left: 3px solid #dddddd;
|
|
padding-left: 0.5em;
|
|
}
|
|
|
|
div.imageblock div.content { padding-left: 0; }
|
|
span.image img { border-style: none; }
|
|
a.image:visited { color: white; }
|
|
|
|
dl {
|
|
margin-top: 0.8em;
|
|
margin-bottom: 0.8em;
|
|
}
|
|
dt {
|
|
margin-top: 0.5em;
|
|
margin-bottom: 0;
|
|
font-style: normal;
|
|
color: navy;
|
|
}
|
|
dd > *:first-child {
|
|
margin-top: 0.1em;
|
|
}
|
|
|
|
ul, ol {
|
|
list-style-position: outside;
|
|
}
|
|
ol.arabic {
|
|
list-style-type: decimal;
|
|
}
|
|
ol.loweralpha {
|
|
list-style-type: lower-alpha;
|
|
}
|
|
ol.upperalpha {
|
|
list-style-type: upper-alpha;
|
|
}
|
|
ol.lowerroman {
|
|
list-style-type: lower-roman;
|
|
}
|
|
ol.upperroman {
|
|
list-style-type: upper-roman;
|
|
}
|
|
|
|
div.compact ul, div.compact ol,
|
|
div.compact p, div.compact p,
|
|
div.compact div, div.compact div {
|
|
margin-top: 0.1em;
|
|
margin-bottom: 0.1em;
|
|
}
|
|
|
|
div.tableblock > table {
|
|
border: 3px solid #527bbd;
|
|
}
|
|
thead, p.table.header {
|
|
font-family: sans-serif;
|
|
font-weight: bold;
|
|
}
|
|
tfoot {
|
|
font-weight: bold;
|
|
}
|
|
td > div.verse {
|
|
white-space: pre;
|
|
}
|
|
p.table {
|
|
margin-top: 0;
|
|
}
|
|
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
|
div.tableblock > table[frame="void"] {
|
|
border-style: none;
|
|
}
|
|
div.tableblock > table[frame="hsides"] {
|
|
border-left-style: none;
|
|
border-right-style: none;
|
|
}
|
|
div.tableblock > table[frame="vsides"] {
|
|
border-top-style: none;
|
|
border-bottom-style: none;
|
|
}
|
|
|
|
|
|
div.hdlist {
|
|
margin-top: 0.8em;
|
|
margin-bottom: 0.8em;
|
|
}
|
|
div.hdlist tr {
|
|
padding-bottom: 15px;
|
|
}
|
|
dt.hdlist1.strong, td.hdlist1.strong {
|
|
font-weight: bold;
|
|
}
|
|
td.hdlist1 {
|
|
vertical-align: top;
|
|
font-style: normal;
|
|
padding-right: 0.8em;
|
|
color: navy;
|
|
}
|
|
td.hdlist2 {
|
|
vertical-align: top;
|
|
}
|
|
div.hdlist.compact tr {
|
|
margin: 0;
|
|
padding-bottom: 0;
|
|
}
|
|
|
|
.comment {
|
|
background: yellow;
|
|
}
|
|
|
|
.footnote, .footnoteref {
|
|
font-size: 0.8em;
|
|
}
|
|
|
|
span.footnote, span.footnoteref {
|
|
vertical-align: super;
|
|
}
|
|
|
|
#footnotes {
|
|
margin: 20px 0 20px 0;
|
|
padding: 7px 0 0 0;
|
|
}
|
|
|
|
#footnotes div.footnote {
|
|
margin: 0 0 5px 0;
|
|
}
|
|
|
|
#footnotes hr {
|
|
border: none;
|
|
border-top: 1px solid silver;
|
|
height: 1px;
|
|
text-align: left;
|
|
margin-left: 0;
|
|
width: 20%;
|
|
min-width: 100px;
|
|
}
|
|
|
|
|
|
@media print {
|
|
div#footer-badges { display: none; }
|
|
}
|
|
|
|
div#toc {
|
|
margin-bottom: 2.5em;
|
|
}
|
|
|
|
div#toctitle {
|
|
color: #527bbd;
|
|
font-family: sans-serif;
|
|
font-size: 1.1em;
|
|
font-weight: bold;
|
|
margin-top: 1.0em;
|
|
margin-bottom: 0.1em;
|
|
}
|
|
|
|
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
|
margin-top: 0;
|
|
margin-bottom: 0;
|
|
}
|
|
div.toclevel2 {
|
|
margin-left: 2em;
|
|
font-size: 0.9em;
|
|
}
|
|
div.toclevel3 {
|
|
margin-left: 4em;
|
|
font-size: 0.9em;
|
|
}
|
|
div.toclevel4 {
|
|
margin-left: 6em;
|
|
font-size: 0.9em;
|
|
}
|
|
/* Overrides for manpage documents */
|
|
h1 {
|
|
padding-top: 0.5em;
|
|
padding-bottom: 0.5em;
|
|
border-top: 2px solid silver;
|
|
border-bottom: 2px solid silver;
|
|
}
|
|
h2 {
|
|
border-style: none;
|
|
}
|
|
div.sectionbody {
|
|
margin-left: 5%;
|
|
}
|
|
|
|
@media print {
|
|
div#toc { display: none; }
|
|
}
|
|
|
|
/* Workarounds for IE6's broken and incomplete CSS2. */
|
|
|
|
div.sidebar-content {
|
|
background: #ffffee;
|
|
border: 1px solid silver;
|
|
padding: 0.5em;
|
|
}
|
|
div.sidebar-title, div.image-title {
|
|
color: #527bbd;
|
|
font-family: sans-serif;
|
|
font-weight: bold;
|
|
margin-top: 0.0em;
|
|
margin-bottom: 0.5em;
|
|
}
|
|
|
|
div.listingblock div.content {
|
|
border: 1px solid silver;
|
|
background: #f4f4f4;
|
|
padding: 0.5em;
|
|
}
|
|
|
|
div.quoteblock-attribution {
|
|
padding-top: 0.5em;
|
|
text-align: right;
|
|
}
|
|
|
|
div.verseblock-content {
|
|
white-space: pre;
|
|
}
|
|
div.verseblock-attribution {
|
|
padding-top: 0.75em;
|
|
text-align: left;
|
|
}
|
|
|
|
div.exampleblock-content {
|
|
border-left: 3px solid #dddddd;
|
|
padding-left: 0.5em;
|
|
}
|
|
|
|
/* IE6 sets dynamically generated links as visited. */
|
|
div#toc a:visited { color: blue; }
|
|
</style>
|
|
<script type="text/javascript">
|
|
/*<![CDATA[*/
|
|
window.onload = function(){asciidoc.footnotes();}
|
|
var asciidoc = { // Namespace.
|
|
|
|
/////////////////////////////////////////////////////////////////////
|
|
// Table Of Contents generator
|
|
/////////////////////////////////////////////////////////////////////
|
|
|
|
/* Author: Mihai Bazon, September 2002
|
|
* http://students.infoiasi.ro/~mishoo
|
|
*
|
|
* Table Of Content generator
|
|
* Version: 0.4
|
|
*
|
|
* Feel free to use this script under the terms of the GNU General Public
|
|
* License, as long as you do not remove or alter this notice.
|
|
*/
|
|
|
|
/* modified by Troy D. Hanson, September 2006. License: GPL */
|
|
/* modified by Stuart Rackham, 2006, 2009. License: GPL */
|
|
|
|
// toclevels = 1..4.
|
|
toc: function (toclevels) {
|
|
|
|
function getText(el) {
|
|
var text = "";
|
|
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
|
if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
|
|
text += i.data;
|
|
else if (i.firstChild != null)
|
|
text += getText(i);
|
|
}
|
|
return text;
|
|
}
|
|
|
|
function TocEntry(el, text, toclevel) {
|
|
this.element = el;
|
|
this.text = text;
|
|
this.toclevel = toclevel;
|
|
}
|
|
|
|
function tocEntries(el, toclevels) {
|
|
var result = new Array;
|
|
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
|
|
// Function that scans the DOM tree for header elements (the DOM2
|
|
// nodeIterator API would be a better technique but not supported by all
|
|
// browsers).
|
|
var iterate = function (el) {
|
|
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
|
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
|
var mo = re.exec(i.tagName);
|
|
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
|
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
|
}
|
|
iterate(i);
|
|
}
|
|
}
|
|
}
|
|
iterate(el);
|
|
return result;
|
|
}
|
|
|
|
var toc = document.getElementById("toc");
|
|
var entries = tocEntries(document.getElementById("content"), toclevels);
|
|
for (var i = 0; i < entries.length; ++i) {
|
|
var entry = entries[i];
|
|
if (entry.element.id == "")
|
|
entry.element.id = "_toc_" + i;
|
|
var a = document.createElement("a");
|
|
a.href = "#" + entry.element.id;
|
|
a.appendChild(document.createTextNode(entry.text));
|
|
var div = document.createElement("div");
|
|
div.appendChild(a);
|
|
div.className = "toclevel" + entry.toclevel;
|
|
toc.appendChild(div);
|
|
}
|
|
if (entries.length == 0)
|
|
toc.parentNode.removeChild(toc);
|
|
},
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////////
|
|
// Footnotes generator
|
|
/////////////////////////////////////////////////////////////////////
|
|
|
|
/* Based on footnote generation code from:
|
|
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
|
*/
|
|
|
|
footnotes: function () {
|
|
var cont = document.getElementById("content");
|
|
var noteholder = document.getElementById("footnotes");
|
|
var spans = cont.getElementsByTagName("span");
|
|
var refs = {};
|
|
var n = 0;
|
|
for (i=0; i<spans.length; i++) {
|
|
if (spans[i].className == "footnote") {
|
|
n++;
|
|
// Use [\s\S] in place of . so multi-line matches work.
|
|
// Because JavaScript has no s (dotall) regex flag.
|
|
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
|
noteholder.innerHTML +=
|
|
"<div class='footnote' id='_footnote_" + n + "'>" +
|
|
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
|
n + "</a>. " + note + "</div>";
|
|
spans[i].innerHTML =
|
|
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
|
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
|
var id =spans[i].getAttribute("id");
|
|
if (id != null) refs["#"+id] = n;
|
|
}
|
|
}
|
|
if (n == 0)
|
|
noteholder.parentNode.removeChild(noteholder);
|
|
else {
|
|
// Process footnoterefs.
|
|
for (i=0; i<spans.length; i++) {
|
|
if (spans[i].className == "footnoteref") {
|
|
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
|
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
|
n = refs[href];
|
|
spans[i].innerHTML =
|
|
"[<a href='#_footnote_" + n +
|
|
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
/*]]>*/
|
|
</script>
|
|
</head>
|
|
<body>
|
|
<div id="header">
|
|
<h1>
|
|
TESSERACT(1) Manual Page
|
|
</h1>
|
|
<h2>NAME</h2>
|
|
<div class="sectionbody">
|
|
<p>tesseract -
|
|
command-line OCR engine
|
|
</p>
|
|
</div>
|
|
</div>
|
|
<div id="content">
|
|
<h2 id="_synopsis">SYNOPSIS</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p><strong>tesseract</strong> <em>imagename</em> <em>textbase</em> [<em>configfile</em>] [<em>-l lang</em>]</p></div>
|
|
</div>
|
|
<h2 id="_description">DESCRIPTION</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>tesseract(1) is a commercial quality OCR engine originally developed at HP
|
|
between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by
|
|
UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed
|
|
by Google since then.</p></div>
|
|
</div>
|
|
<h2 id="_options">OPTIONS</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p><em>imagename</em>
|
|
The name of the input image</p></div>
|
|
<div class="paragraph"><p><em>textbase</em>
|
|
The basename of the output file (to which the appropriate extension
|
|
will be appended)</p></div>
|
|
<div class="paragraph"><p><em>configfile</em>
|
|
The config to use. A config is a plaintext file which contains a list
|
|
of variables and their values, one per line, with a space separating
|
|
variable from value.</p></div>
|
|
<div class="paragraph"><p><em>-l lang</em>
|
|
The language to use. If none is specified, English is assumed.
|
|
Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES)</p></div>
|
|
<div class="paragraph"><p><em>-v</em>
|
|
Returns the current version of the tesseract(1) executable.</p></div>
|
|
</div>
|
|
<h2 id="_languages">LANGUAGES</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>There are currently language packs available for the following languages:</p></div>
|
|
<div class="hdlist"><table>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
bul
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Bulgarian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
cat
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Catalan
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
ces
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Czech
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
chi_sim
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Simplified Chinese
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
chi_tra
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Traditional Chinese
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
dan
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Danish
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
dan-frak
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Danish (Fraktur)
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
deu
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
German
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
ell
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Greek
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
eng
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
English
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
fin
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Finnish
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
fra
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
French
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
hun
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Hungarian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
ind
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Indonesian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
ita
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Italian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
jpn
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Japanese
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
kor
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Korean
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
lav
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Latvian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
lit
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Lithuanian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
nld
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Dutch
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
nor
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Norwegian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
pol
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Polish
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
por
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Portuguese
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
ron
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Romanian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
rus
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Russian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
slk
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Slovakian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
slv
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Slovenian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
spa
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Spanish
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
srp
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Serbian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
swe
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Swedish
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
tgl
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Tagalog
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
tha
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Thai
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
tur
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Turkish
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
ukr
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Ukrainian
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class="hdlist1">
|
|
vie
|
|
<br />
|
|
</td>
|
|
<td class="hdlist2">
|
|
<p style="margin-top: 0;">
|
|
Vietnamese
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
</table></div>
|
|
</div>
|
|
<h2 id="_history">HISTORY</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>The engine was developed at Hewlett Packard Laboratories Bristol and at
|
|
Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more
|
|
changes made in 1996 to port to Windows, and some C++izing in 1998. A
|
|
lot of the code was written in C, and then some more was written in C++.
|
|
Since then all the code has been converted to at least compile with a
|
|
C++ compiler. Currently it builds under Linux with gcc4.0, gcc4.1 and
|
|
under Windows with VC++6 and VC++Express. The C++ code makes heavy use of
|
|
a list system using macros. This predates stl, was portable before stl, and
|
|
is more efficient than stl lists, but has the big negative that if you do get
|
|
a segmentation violation, it is hard to debug. Another "feature" of the
|
|
C/C++ split is that the C++ data structures get converted to C data
|
|
structures to call the low-level C code. This is ugly, and the C++izing of
|
|
the C code is a step towards eliminating the conversion, but it has not
|
|
happened yet.</p></div>
|
|
<div class="paragraph"><p>The most important changes in version 2.00 were that Tesseract can now
|
|
recognize 6 languages, is fully UTF8 capable, and is fully trainable. See
|
|
<a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract</a> for more
|
|
information on training.</p></div>
|
|
<div class="paragraph"><p>Tesseract was included in UNLV’s Fourth Annual Test of OCR Accuracy.
|
|
See <a href="http://www.isri.unlv.edu/downloads/AT-1995.pdf">http://www.isri.unlv.edu/downloads/AT-1995.pdf</a>. With Tesseract 2.00,
|
|
scripts are now included to allow anyone to reproduce some of these tests.
|
|
See <a href="http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract">http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract</a> for more
|
|
details.</p></div>
|
|
<div class="paragraph"><p>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
|
|
and Korean. It also introduces a new, single-file based system of managing
|
|
language data. For further details, see the file ReleaseNotes included with
|
|
the distribution.</p></div>
|
|
</div>
|
|
<h2 id="_see_also">SEE ALSO</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>tesseract(1)</p></div>
|
|
</div>
|
|
<h2 id="_copying">COPYING</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph"><p>Licensed under the Apache License, Version 2.0</p></div>
|
|
</div>
|
|
</div>
|
|
<div id="footnotes"><hr /></div>
|
|
<div id="footer">
|
|
<div id="footer-text">
|
|
Last updated 2010-09-29 19:55:57 IST
|
|
</div>
|
|
</div>
|
|
</body>
|
|
</html>
|