fix links in doc; autotools requires README

This commit is contained in:
Zdenko Podobný 2015-06-13 00:08:05 +02:00
parent 0ee178d79b
commit 9b7f2527f1
43 changed files with 3130 additions and 1330 deletions

View File

View File

@ -1,13 +1,13 @@
'\" t '\" t
.\" Title: ambiguous_words .\" Title: ambiguous_words
.\" Author: [see the "AUTHOR" section] .\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/> .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 02/09/2012 .\" Date: 06/12/2015
.\" Manual: \ \& .\" Manual: \ \&
.\" Source: \ \& .\" Source: \ \&
.\" Language: English .\" Language: English
.\" .\"
.TH "AMBIGUOUS_WORDS" "1" "02/09/2012" "\ \&" "\ \&" .TH "AMBIGUOUS_WORDS" "1" "06/12/2015" "\ \&" "\ \&"
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
.\" * Define some portability stuff .\" * Define some portability stuff
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" /> <meta name="generator" content="AsciiDoc 8.6.9" />
<title>AMBIGUOUS_WORDS(1)</title> <title>AMBIGUOUS_WORDS(1)</title>
<style type="text/css"> <style type="text/css">
/* Debug borders */ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/* /* Default font. */
border: 1px solid red; body {
*/ font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
} }
body { body {
@ -35,13 +45,8 @@ strong {
color: #083194; color: #083194;
} }
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em; margin-top: 1.2em;
margin-bottom: 0.5em; margin-bottom: 0.5em;
line-height: 1.3; line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * { h3 + * {
clear: left; clear: left;
} }
h5 {
font-size: 1.0em;
}
div.sectionbody { div.sectionbody {
font-family: serif;
margin-left: 0; margin-left: 0;
} }
@ -77,45 +84,50 @@ p {
ul, ol, li > p { ul, ol, li > p {
margin-top: 0; margin-top: 0;
} }
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre { .monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0; padding: 0;
margin: 0; margin: 0;
} }
pre {
white-space: pre-wrap;
}
span#author { #author {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
font-size: 1.1em; font-size: 1.1em;
} }
span#email { #email {
} }
span#revnumber, span#revdate, span#revremark { #revnumber, #revdate, #revremark {
font-family: sans-serif;
} }
div#footer { #footer {
font-family: sans-serif;
font-size: small; font-size: small;
border-top: 2px solid silver; border-top: 2px solid silver;
padding-top: 0.5em; padding-top: 0.5em;
margin-top: 4.0em; margin-top: 4.0em;
} }
div#footer-text { #footer-text {
float: left; float: left;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#footer-badges { #footer-badges {
float: right; float: right;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#preamble { #preamble {
margin-top: 1.5em; margin-top: 1.5em;
margin-bottom: 1.5em; margin-bottom: 1.5em;
} }
div.tableblock, div.imageblock, div.exampleblock, div.verseblock, div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock, div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock { div.admonitionblock {
margin-top: 1.0em; margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */ /* Block element titles. */
div.title, caption.title { div.title, caption.title {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
text-align: left; text-align: left;
margin-top: 1.0em; margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content { div.sidebarblock > div.content {
background: #ffffee; background: #ffffee;
border: 1px solid silver; border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em; padding: 0.5em;
} }
div.listingblock > div.content { div.listingblock > div.content {
border: 1px solid silver; border: 1px solid #dddddd;
background: #f4f4f4; border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em; padding: 0.5em;
} }
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em; padding-left: 1.0em;
margin-left: 1.0em; margin-left: 1.0em;
margin-right: 10%; margin-right: 10%;
border-left: 5px solid #dddddd; border-left: 5px solid #f0f0f0;
color: #777777; color: #888;
} }
div.quoteblock > div.attribution { div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right; text-align: right;
} }
div.verseblock > div.content { div.verseblock > pre.content {
white-space: pre; font-family: inherit;
font-size: inherit;
} }
div.verseblock > div.attribution { div.verseblock > div.attribution {
padding-top: 0.75em; padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
} }
div.imageblock div.content { padding-left: 0; } div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; } span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; } a.image:visited { color: white; }
dl { dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot { tfoot {
font-weight: bold; font-weight: bold;
} }
td > div.verse { td > div.verse {
white-space: pre; white-space: pre;
} }
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist { div.hdlist {
margin-top: 0.8em; margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px; min-width: 100px;
} }
div.colist td {
@media print { padding-right: 0.5em;
div#footer-badges { display: none; } padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
} }
div#toc { @media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em; margin-bottom: 2.5em;
} }
div#toctitle { #toctitle {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-size: 1.1em; font-size: 1.1em;
font-weight: bold; font-weight: bold;
margin-top: 1.0em; margin-top: 1.0em;
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 { div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0; margin-top: 0;
margin-bottom: 0; margin-bottom: 0;
} }
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em; margin-left: 6em;
font-size: 0.9em; font-size: 0.9em;
} }
/* Overrides for manpage documents */
h1 { span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em; padding-top: 0.5em;
padding-bottom: 0.5em; padding-bottom: 0.5em;
border-top: 2px solid silver; border-top: 2px solid silver;
border-bottom: 2px solid silver; border-bottom: 2px solid silver;
} }
h2 { body.manpage h2 {
border-style: none; border-style: none;
} }
div.sectionbody { body.manpage div.sectionbody {
margin-left: 5%; margin-left: 3em;
} }
@media print { @media print {
div#toc { display: none; } body.manpage div#toc { display: none; }
} }
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style> </style>
<script type="text/javascript"> <script type="text/javascript">
/*<![CDATA[*/ /*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace. var asciidoc = { // Namespace.
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) { function tocEntries(el, toclevels) {
var result = new Array; var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])'); var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2 // Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all // nodeIterator API would be a better technique but not supported by all
// browsers). // browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
} }
var toc = document.getElementById("toc"); var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels); var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) { for (var i = 0; i < entries.length; ++i) {
var entry = entries[i]; var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/ */
footnotes: function () { footnotes: function () {
var cont = document.getElementById("content"); // Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes"); var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span"); var spans = cont.getElementsByTagName("span");
var refs = {}; var refs = {};
var n = 0; var n = 0;
for (i=0; i<spans.length; i++) { for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") { if (spans[i].className == "footnote") {
n++; n++;
// Use [\s\S] in place of . so multi-line matches work. var note = spans[i].getAttribute("data-note");
// Because JavaScript has no s (dotall) regex flag. if (!note) {
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1]; // Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML += noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" + "<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" + "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>"; n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id"); var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n; if (id != null) refs["#"+id] = n;
} }
@ -561,13 +702,36 @@ footnotes: function () {
} }
} }
} }
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
} }
} }
asciidoc.install();
/*]]>*/ /*]]>*/
</script> </script>
</head> </head>
<body> <body class="manpage">
<div id="header"> <div id="header">
<h1> <h1>
AMBIGUOUS_WORDS(1) Manual Page AMBIGUOUS_WORDS(1) Manual Page
@ -580,10 +744,13 @@ AMBIGUOUS_WORDS(1) Manual Page
</div> </div>
</div> </div>
<div id="content"> <div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2> <h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p><strong>ambiguous_words</strong> [-l lang] <em>TESSDATADIR</em> <em>WORDLIST</em> <em>AMBIGUOUSFILE</em></p></div> <div class="paragraph"><p><strong>ambiguous_words</strong> [-l lang] <em>TESSDATADIR</em> <em>WORDLIST</em> <em>AMBIGUOUSFILE</em></p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2> <h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>ambiguous_words(1) runs Tesseract in a special mode, and for each word <div class="paragraph"><p>ambiguous_words(1) runs Tesseract in a special mode, and for each word
@ -591,25 +758,32 @@ in word list, produces a set of words which Tesseract thinks might be
ambiguous with it. <em>TESSDATADIR</em> must be set to the absolute path of ambiguous with it. <em>TESSDATADIR</em> must be set to the absolute path of
a directory containing <em>tessdata/lang.traineddata</em>.</p></div> a directory containing <em>tessdata/lang.traineddata</em>.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2> <h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>tesseract(1)</p></div> <div class="paragraph"><p>tesseract(1)</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2> <h2 id="_copying">COPYING</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc. <div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
Licensed under the Apache License, Version 2.0</p></div> Licensed under the Apache License, Version 2.0</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2> <h2 id="_author">AUTHOR</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div> at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div> </div>
</div> </div>
</div>
<div id="footnotes"><hr /></div> <div id="footnotes"><hr /></div>
<div id="footer"> <div id="footer">
<div id="footer-text"> <div id="footer-text">
Last updated 2012-02-07 13:38:29 PDT Last updated 2015-05-13 19:59:45 CEST
</div> </div>
</div> </div>
</body> </body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?> <?asciidoc-toc?>
<?asciidoc-numbered?> <?asciidoc-numbered?>
<refentry lang="en"> <refentry lang="en">
<refentryinfo>
<title>AMBIGUOUS_WORDS(1)</title>
</refentryinfo>
<refmeta> <refmeta>
<refentrytitle>ambiguous_words</refentrytitle> <refentrytitle>ambiguous_words</refentrytitle>
<manvolnum>1</manvolnum> <manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo> <refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo> <refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta> </refmeta>
<refnamediv> <refnamediv>
<refname>ambiguous_words</refname> <refname>ambiguous_words</refname>

View File

@ -1,13 +1,13 @@
'\" t '\" t
.\" Title: cntraining .\" Title: cntraining
.\" Author: [see the "AUTHOR" section] .\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/> .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 02/09/2012 .\" Date: 06/12/2015
.\" Manual: \ \& .\" Manual: \ \&
.\" Source: \ \& .\" Source: \ \&
.\" Language: English .\" Language: English
.\" .\"
.TH "CNTRAINING" "1" "02/09/2012" "\ \&" "\ \&" .TH "CNTRAINING" "1" "06/12/2015" "\ \&" "\ \&"
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
.\" * Define some portability stuff .\" * Define some portability stuff
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
@ -45,7 +45,7 @@ Directory to write output files to\&.
.sp .sp
tesseract(1), shapeclustering(1), mftraining(1) tesseract(1), shapeclustering(1), mftraining(1)
.sp .sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[] \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "COPYING" .SH "COPYING"
.sp .sp
Copyright (c) Hewlett\-Packard Company, 1988 Licensed under the Apache License, Version 2\&.0 Copyright (c) Hewlett\-Packard Company, 1988 Licensed under the Apache License, Version 2\&.0

View File

@ -24,7 +24,7 @@ SEE ALSO
-------- --------
tesseract(1), shapeclustering(1), mftraining(1) tesseract(1), shapeclustering(1), mftraining(1)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3> <https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
COPYING COPYING
------- -------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" /> <meta name="generator" content="AsciiDoc 8.6.9" />
<title>CNTRAINING(1)</title> <title>CNTRAINING(1)</title>
<style type="text/css"> <style type="text/css">
/* Debug borders */ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/* /* Default font. */
border: 1px solid red; body {
*/ font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
} }
body { body {
@ -35,13 +45,8 @@ strong {
color: #083194; color: #083194;
} }
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em; margin-top: 1.2em;
margin-bottom: 0.5em; margin-bottom: 0.5em;
line-height: 1.3; line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * { h3 + * {
clear: left; clear: left;
} }
h5 {
font-size: 1.0em;
}
div.sectionbody { div.sectionbody {
font-family: serif;
margin-left: 0; margin-left: 0;
} }
@ -77,45 +84,50 @@ p {
ul, ol, li > p { ul, ol, li > p {
margin-top: 0; margin-top: 0;
} }
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre { .monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0; padding: 0;
margin: 0; margin: 0;
} }
pre {
white-space: pre-wrap;
}
span#author { #author {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
font-size: 1.1em; font-size: 1.1em;
} }
span#email { #email {
} }
span#revnumber, span#revdate, span#revremark { #revnumber, #revdate, #revremark {
font-family: sans-serif;
} }
div#footer { #footer {
font-family: sans-serif;
font-size: small; font-size: small;
border-top: 2px solid silver; border-top: 2px solid silver;
padding-top: 0.5em; padding-top: 0.5em;
margin-top: 4.0em; margin-top: 4.0em;
} }
div#footer-text { #footer-text {
float: left; float: left;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#footer-badges { #footer-badges {
float: right; float: right;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#preamble { #preamble {
margin-top: 1.5em; margin-top: 1.5em;
margin-bottom: 1.5em; margin-bottom: 1.5em;
} }
div.tableblock, div.imageblock, div.exampleblock, div.verseblock, div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock, div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock { div.admonitionblock {
margin-top: 1.0em; margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */ /* Block element titles. */
div.title, caption.title { div.title, caption.title {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
text-align: left; text-align: left;
margin-top: 1.0em; margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content { div.sidebarblock > div.content {
background: #ffffee; background: #ffffee;
border: 1px solid silver; border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em; padding: 0.5em;
} }
div.listingblock > div.content { div.listingblock > div.content {
border: 1px solid silver; border: 1px solid #dddddd;
background: #f4f4f4; border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em; padding: 0.5em;
} }
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em; padding-left: 1.0em;
margin-left: 1.0em; margin-left: 1.0em;
margin-right: 10%; margin-right: 10%;
border-left: 5px solid #dddddd; border-left: 5px solid #f0f0f0;
color: #777777; color: #888;
} }
div.quoteblock > div.attribution { div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right; text-align: right;
} }
div.verseblock > div.content { div.verseblock > pre.content {
white-space: pre; font-family: inherit;
font-size: inherit;
} }
div.verseblock > div.attribution { div.verseblock > div.attribution {
padding-top: 0.75em; padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
} }
div.imageblock div.content { padding-left: 0; } div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; } span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; } a.image:visited { color: white; }
dl { dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot { tfoot {
font-weight: bold; font-weight: bold;
} }
td > div.verse { td > div.verse {
white-space: pre; white-space: pre;
} }
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist { div.hdlist {
margin-top: 0.8em; margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px; min-width: 100px;
} }
div.colist td {
@media print { padding-right: 0.5em;
div#footer-badges { display: none; } padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
} }
div#toc { @media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em; margin-bottom: 2.5em;
} }
div#toctitle { #toctitle {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-size: 1.1em; font-size: 1.1em;
font-weight: bold; font-weight: bold;
margin-top: 1.0em; margin-top: 1.0em;
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 { div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0; margin-top: 0;
margin-bottom: 0; margin-bottom: 0;
} }
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em; margin-left: 6em;
font-size: 0.9em; font-size: 0.9em;
} }
/* Overrides for manpage documents */
h1 { span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em; padding-top: 0.5em;
padding-bottom: 0.5em; padding-bottom: 0.5em;
border-top: 2px solid silver; border-top: 2px solid silver;
border-bottom: 2px solid silver; border-bottom: 2px solid silver;
} }
h2 { body.manpage h2 {
border-style: none; border-style: none;
} }
div.sectionbody { body.manpage div.sectionbody {
margin-left: 5%; margin-left: 3em;
} }
@media print { @media print {
div#toc { display: none; } body.manpage div#toc { display: none; }
} }
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style> </style>
<script type="text/javascript"> <script type="text/javascript">
/*<![CDATA[*/ /*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace. var asciidoc = { // Namespace.
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) { function tocEntries(el, toclevels) {
var result = new Array; var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])'); var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2 // Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all // nodeIterator API would be a better technique but not supported by all
// browsers). // browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
} }
var toc = document.getElementById("toc"); var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels); var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) { for (var i = 0; i < entries.length; ++i) {
var entry = entries[i]; var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/ */
footnotes: function () { footnotes: function () {
var cont = document.getElementById("content"); // Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes"); var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span"); var spans = cont.getElementsByTagName("span");
var refs = {}; var refs = {};
var n = 0; var n = 0;
for (i=0; i<spans.length; i++) { for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") { if (spans[i].className == "footnote") {
n++; n++;
// Use [\s\S] in place of . so multi-line matches work. var note = spans[i].getAttribute("data-note");
// Because JavaScript has no s (dotall) regex flag. if (!note) {
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1]; // Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML += noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" + "<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" + "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>"; n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id"); var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n; if (id != null) refs["#"+id] = n;
} }
@ -561,13 +702,36 @@ footnotes: function () {
} }
} }
} }
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
} }
} }
asciidoc.install();
/*]]>*/ /*]]>*/
</script> </script>
</head> </head>
<body> <body class="manpage">
<div id="header"> <div id="header">
<h1> <h1>
CNTRAINING(1) Manual Page CNTRAINING(1) Manual Page
@ -580,16 +744,21 @@ CNTRAINING(1) Manual Page
</div> </div>
</div> </div>
<div id="content"> <div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2> <h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p><strong>cntraining</strong> [-D <em>dir</em>] <em>FILE</em>&#8230;</p></div> <div class="paragraph"><p><strong>cntraining</strong> [-D <em>dir</em>] <em>FILE</em>&#8230;</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2> <h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>cntraining takes a list of .tr files, from which it generates the <div class="paragraph"><p>cntraining takes a list of .tr files, from which it generates the
<strong>normproto</strong> data file (the character normalization sensitivity <strong>normproto</strong> data file (the character normalization sensitivity
prototypes).</p></div> prototypes).</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_options">OPTIONS</h2> <h2 id="_options">OPTIONS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="dlist"><dl> <div class="dlist"><dl>
@ -603,26 +772,33 @@ prototypes).</p></div>
</dd> </dd>
</dl></div> </dl></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2> <h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>tesseract(1), shapeclustering(1), mftraining(1)</p></div> <div class="paragraph"><p>tesseract(1), shapeclustering(1), mftraining(1)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div> <div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2> <h2 id="_copying">COPYING</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>Copyright (c) Hewlett-Packard Company, 1988 <div class="paragraph"><p>Copyright (c) Hewlett-Packard Company, 1988
Licensed under the Apache License, Version 2.0</p></div> Licensed under the Apache License, Version 2.0</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2> <h2 id="_author">AUTHOR</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div> at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div> </div>
</div> </div>
</div>
<div id="footnotes"><hr /></div> <div id="footnotes"><hr /></div>
<div id="footer"> <div id="footer">
<div id="footer-text"> <div id="footer-text">
Last updated 2012-02-09 11:37:31 PDT Last updated 2015-06-12 23:50:30 CEST
</div> </div>
</div> </div>
</body> </body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?> <?asciidoc-toc?>
<?asciidoc-numbered?> <?asciidoc-numbered?>
<refentry lang="en"> <refentry lang="en">
<refentryinfo>
<title>CNTRAINING(1)</title>
</refentryinfo>
<refmeta> <refmeta>
<refentrytitle>cntraining</refentrytitle> <refentrytitle>cntraining</refentrytitle>
<manvolnum>1</manvolnum> <manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo> <refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo> <refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta> </refmeta>
<refnamediv> <refnamediv>
<refname>cntraining</refname> <refname>cntraining</refname>
@ -40,7 +43,7 @@ prototypes).</simpara>
<refsect1 id="_see_also"> <refsect1 id="_see_also">
<title>SEE ALSO</title> <title>SEE ALSO</title>
<simpara>tesseract(1), shapeclustering(1), mftraining(1)</simpara> <simpara>tesseract(1), shapeclustering(1), mftraining(1)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara> <simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1> </refsect1>
<refsect1 id="_copying"> <refsect1 id="_copying">
<title>COPYING</title> <title>COPYING</title>

View File

@ -1,13 +1,13 @@
'\" t '\" t
.\" Title: combine_tessdata .\" Title: combine_tessdata
.\" Author: [see the "AUTHOR" section] .\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/> .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 02/09/2012 .\" Date: 06/12/2015
.\" Manual: \ \& .\" Manual: \ \&
.\" Source: \ \& .\" Source: \ \&
.\" Language: English .\" Language: English
.\" .\"
.TH "COMBINE_TESSDATA" "1" "02/09/2012" "\ \&" "\ \&" .TH "COMBINE_TESSDATA" "1" "06/12/2015" "\ \&" "\ \&"
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
.\" * Define some portability stuff .\" * Define some portability stuff
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
@ -107,7 +107,7 @@ This will create /home/$USER/temp/eng\&.* files with individual tessdata compone
\fIPrefix\fR refers to the full file prefix, including period (\&.) \fIPrefix\fR refers to the full file prefix, including period (\&.)
.SH "COMPONENTS" .SH "COMPONENTS"
.sp .sp
The components in a Tesseract lang\&.traineddata file as of Tesseract 3\&.02 are briefly described below; For more information on many of these files, see \m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[] The components in a Tesseract lang\&.traineddata file as of Tesseract 3\&.02 are briefly described below; For more information on many of these files, see \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.PP .PP
lang\&.config lang\&.config
.RS 4 .RS 4

View File

@ -76,7 +76,7 @@ COMPONENTS
The components in a Tesseract lang.traineddata file as of The components in a Tesseract lang.traineddata file as of
Tesseract 3.02 are briefly described below; For more information on Tesseract 3.02 are briefly described below; For more information on
many of these files, see many of these files, see
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3> <https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
lang.config:: lang.config::
(Optional) Language-specific overrides to default config variables. (Optional) Language-specific overrides to default config variables.

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" /> <meta name="generator" content="AsciiDoc 8.6.9" />
<title>COMBINE_TESSDATA(1)</title> <title>COMBINE_TESSDATA(1)</title>
<style type="text/css"> <style type="text/css">
/* Debug borders */ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/* /* Default font. */
border: 1px solid red; body {
*/ font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
} }
body { body {
@ -35,13 +45,8 @@ strong {
color: #083194; color: #083194;
} }
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em; margin-top: 1.2em;
margin-bottom: 0.5em; margin-bottom: 0.5em;
line-height: 1.3; line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * { h3 + * {
clear: left; clear: left;
} }
h5 {
font-size: 1.0em;
}
div.sectionbody { div.sectionbody {
font-family: serif;
margin-left: 0; margin-left: 0;
} }
@ -77,45 +84,50 @@ p {
ul, ol, li > p { ul, ol, li > p {
margin-top: 0; margin-top: 0;
} }
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre { .monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0; padding: 0;
margin: 0; margin: 0;
} }
pre {
white-space: pre-wrap;
}
span#author { #author {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
font-size: 1.1em; font-size: 1.1em;
} }
span#email { #email {
} }
span#revnumber, span#revdate, span#revremark { #revnumber, #revdate, #revremark {
font-family: sans-serif;
} }
div#footer { #footer {
font-family: sans-serif;
font-size: small; font-size: small;
border-top: 2px solid silver; border-top: 2px solid silver;
padding-top: 0.5em; padding-top: 0.5em;
margin-top: 4.0em; margin-top: 4.0em;
} }
div#footer-text { #footer-text {
float: left; float: left;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#footer-badges { #footer-badges {
float: right; float: right;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#preamble { #preamble {
margin-top: 1.5em; margin-top: 1.5em;
margin-bottom: 1.5em; margin-bottom: 1.5em;
} }
div.tableblock, div.imageblock, div.exampleblock, div.verseblock, div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock, div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock { div.admonitionblock {
margin-top: 1.0em; margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */ /* Block element titles. */
div.title, caption.title { div.title, caption.title {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
text-align: left; text-align: left;
margin-top: 1.0em; margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content { div.sidebarblock > div.content {
background: #ffffee; background: #ffffee;
border: 1px solid silver; border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em; padding: 0.5em;
} }
div.listingblock > div.content { div.listingblock > div.content {
border: 1px solid silver; border: 1px solid #dddddd;
background: #f4f4f4; border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em; padding: 0.5em;
} }
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em; padding-left: 1.0em;
margin-left: 1.0em; margin-left: 1.0em;
margin-right: 10%; margin-right: 10%;
border-left: 5px solid #dddddd; border-left: 5px solid #f0f0f0;
color: #777777; color: #888;
} }
div.quoteblock > div.attribution { div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right; text-align: right;
} }
div.verseblock > div.content { div.verseblock > pre.content {
white-space: pre; font-family: inherit;
font-size: inherit;
} }
div.verseblock > div.attribution { div.verseblock > div.attribution {
padding-top: 0.75em; padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
} }
div.imageblock div.content { padding-left: 0; } div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; } span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; } a.image:visited { color: white; }
dl { dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot { tfoot {
font-weight: bold; font-weight: bold;
} }
td > div.verse { td > div.verse {
white-space: pre; white-space: pre;
} }
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist { div.hdlist {
margin-top: 0.8em; margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px; min-width: 100px;
} }
div.colist td {
@media print { padding-right: 0.5em;
div#footer-badges { display: none; } padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
} }
div#toc { @media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em; margin-bottom: 2.5em;
} }
div#toctitle { #toctitle {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-size: 1.1em; font-size: 1.1em;
font-weight: bold; font-weight: bold;
margin-top: 1.0em; margin-top: 1.0em;
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 { div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0; margin-top: 0;
margin-bottom: 0; margin-bottom: 0;
} }
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em; margin-left: 6em;
font-size: 0.9em; font-size: 0.9em;
} }
/* Overrides for manpage documents */
h1 { span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em; padding-top: 0.5em;
padding-bottom: 0.5em; padding-bottom: 0.5em;
border-top: 2px solid silver; border-top: 2px solid silver;
border-bottom: 2px solid silver; border-bottom: 2px solid silver;
} }
h2 { body.manpage h2 {
border-style: none; border-style: none;
} }
div.sectionbody { body.manpage div.sectionbody {
margin-left: 5%; margin-left: 3em;
} }
@media print { @media print {
div#toc { display: none; } body.manpage div#toc { display: none; }
} }
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style> </style>
<script type="text/javascript"> <script type="text/javascript">
/*<![CDATA[*/ /*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace. var asciidoc = { // Namespace.
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) { function tocEntries(el, toclevels) {
var result = new Array; var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])'); var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2 // Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all // nodeIterator API would be a better technique but not supported by all
// browsers). // browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
} }
var toc = document.getElementById("toc"); var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels); var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) { for (var i = 0; i < entries.length; ++i) {
var entry = entries[i]; var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/ */
footnotes: function () { footnotes: function () {
var cont = document.getElementById("content"); // Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes"); var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span"); var spans = cont.getElementsByTagName("span");
var refs = {}; var refs = {};
var n = 0; var n = 0;
for (i=0; i<spans.length; i++) { for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") { if (spans[i].className == "footnote") {
n++; n++;
// Use [\s\S] in place of . so multi-line matches work. var note = spans[i].getAttribute("data-note");
// Because JavaScript has no s (dotall) regex flag. if (!note) {
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1]; // Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML += noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" + "<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" + "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>"; n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id"); var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n; if (id != null) refs["#"+id] = n;
} }
@ -561,13 +702,36 @@ footnotes: function () {
} }
} }
} }
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
} }
} }
asciidoc.install();
/*]]>*/ /*]]>*/
</script> </script>
</head> </head>
<body> <body class="manpage">
<div id="header"> <div id="header">
<h1> <h1>
COMBINE_TESSDATA(1) Manual Page COMBINE_TESSDATA(1) Manual Page
@ -580,10 +744,13 @@ COMBINE_TESSDATA(1) Manual Page
</div> </div>
</div> </div>
<div id="content"> <div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2> <h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p><strong>combine_tessdata</strong> [<em>OPTION</em>] <em>FILE</em>&#8230;</p></div> <div class="paragraph"><p><strong>combine_tessdata</strong> [<em>OPTION</em>] <em>FILE</em>&#8230;</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2> <h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>combine_tessdata(1) is the main program to combine/extract/overwrite <div class="paragraph"><p>combine_tessdata(1) is the main program to combine/extract/overwrite
@ -593,7 +760,7 @@ classifier templates, ambiguities, language configs) located at, say,
/home/$USER/temp/eng.* run:</p></div> /home/$USER/temp/eng.* run:</p></div>
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>combine_tessdata /home/$USER/temp/eng.</tt></pre> <pre><code>combine_tessdata /home/$USER/temp/eng.</code></pre>
</div></div> </div></div>
<div class="paragraph"><p>The result will be a combined tessdata file /home/$USER/temp/eng.traineddata</p></div> <div class="paragraph"><p>The result will be a combined tessdata file /home/$USER/temp/eng.traineddata</p></div>
<div class="paragraph"><p>Specify option -e if you would like to extract individual components <div class="paragraph"><p>Specify option -e if you would like to extract individual components
@ -601,8 +768,8 @@ from a combined traineddata file. For example, to extract language config
file and the unicharset from tessdata/eng.traineddata run:</p></div> file and the unicharset from tessdata/eng.traineddata run:</p></div>
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>combine_tessdata -e tessdata/eng.traineddata \ <pre><code>combine_tessdata -e tessdata/eng.traineddata \
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</tt></pre> /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</code></pre>
</div></div> </div></div>
<div class="paragraph"><p>The desired config file and unicharset will be written to <div class="paragraph"><p>The desired config file and unicharset will be written to
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</p></div> /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</p></div>
@ -611,8 +778,8 @@ file and the unicharset from tessdata/eng.traineddata run:</p></div>
and unichar ambiguities files in tessdata/eng.traineddata use:</p></div> and unichar ambiguities files in tessdata/eng.traineddata use:</p></div>
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>combine_tessdata -o tessdata/eng.traineddata \ <pre><code>combine_tessdata -o tessdata/eng.traineddata \
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs</tt></pre> /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs</code></pre>
</div></div> </div></div>
<div class="paragraph"><p>As a result, tessdata/eng.traineddata will contain the new language config <div class="paragraph"><p>As a result, tessdata/eng.traineddata will contain the new language config
and unichar ambigs, plus all the original DAWGs, classifier templates, etc.</p></div> and unichar ambigs, plus all the original DAWGs, classifier templates, etc.</p></div>
@ -623,11 +790,13 @@ ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.</p></div>
<div class="paragraph"><p>Specify option -u to unpack all the components to the specified path:</p></div> <div class="paragraph"><p>Specify option -u to unpack all the components to the specified path:</p></div>
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.</tt></pre> <pre><code>combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.</code></pre>
</div></div> </div></div>
<div class="paragraph"><p>This will create /home/$USER/temp/eng.* files with individual tessdata <div class="paragraph"><p>This will create /home/$USER/temp/eng.* files with individual tessdata
components from tessdata/eng.traineddata.</p></div> components from tessdata/eng.traineddata.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_options">OPTIONS</h2> <h2 id="_options">OPTIONS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p><strong>-e</strong> <em>.traineddata</em> <em>FILE</em>&#8230;: <div class="paragraph"><p><strong>-e</strong> <em>.traineddata</em> <em>FILE</em>&#8230;:
@ -638,16 +807,20 @@ components from tessdata/eng.traineddata.</p></div>
<div class="paragraph"><p><strong>-u</strong> <em>.traineddata</em> <em>PATHPREFIX</em> <div class="paragraph"><p><strong>-u</strong> <em>.traineddata</em> <em>PATHPREFIX</em>
Unpacks the .traineddata using the provided prefix.</p></div> Unpacks the .traineddata using the provided prefix.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_caveats">CAVEATS</h2> <h2 id="_caveats">CAVEATS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p><em>Prefix</em> refers to the full file prefix, including period (.)</p></div> <div class="paragraph"><p><em>Prefix</em> refers to the full file prefix, including period (.)</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_components">COMPONENTS</h2> <h2 id="_components">COMPONENTS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The components in a Tesseract lang.traineddata file as of <div class="paragraph"><p>The components in a Tesseract lang.traineddata file as of
Tesseract 3.02 are briefly described below; For more information on Tesseract 3.02 are briefly described below; For more information on
many of these files, see many of these files, see
<a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div> <a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
<div class="dlist"><dl> <div class="dlist"><dl>
<dt class="hdlist1"> <dt class="hdlist1">
lang.config lang.config
@ -802,30 +975,39 @@ lang.params-training-model
</dd> </dd>
</dl></div> </dl></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_history">HISTORY</h2> <h2 id="_history">HISTORY</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>combine_tessdata(1) first appeared in version 3.00 of Tesseract</p></div> <div class="paragraph"><p>combine_tessdata(1) first appeared in version 3.00 of Tesseract</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2> <h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5), <div class="paragraph"><p>tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5),
unicharambigs(5)</p></div> unicharambigs(5)</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2> <h2 id="_copying">COPYING</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>Copyright (C) 2009, Google Inc. <div class="paragraph"><p>Copyright (C) 2009, Google Inc.
Licensed under the Apache License, Version 2.0</p></div> Licensed under the Apache License, Version 2.0</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2> <h2 id="_author">AUTHOR</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div> at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div> </div>
</div> </div>
</div>
<div id="footnotes"><hr /></div> <div id="footnotes"><hr /></div>
<div id="footer"> <div id="footer">
<div id="footer-text"> <div id="footer-text">
Last updated 2012-02-08 10:52:17 PDT Last updated 2015-06-12 23:52:02 CEST
</div> </div>
</div> </div>
</body> </body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?> <?asciidoc-toc?>
<?asciidoc-numbered?> <?asciidoc-numbered?>
<refentry lang="en"> <refentry lang="en">
<refentryinfo>
<title>COMBINE_TESSDATA(1)</title>
</refentryinfo>
<refmeta> <refmeta>
<refentrytitle>combine_tessdata</refentrytitle> <refentrytitle>combine_tessdata</refentrytitle>
<manvolnum>1</manvolnum> <manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo> <refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo> <refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta> </refmeta>
<refnamediv> <refnamediv>
<refname>combine_tessdata</refname> <refname>combine_tessdata</refname>
@ -67,7 +70,7 @@ components from tessdata/eng.traineddata.</simpara>
<simpara>The components in a Tesseract lang.traineddata file as of <simpara>The components in a Tesseract lang.traineddata file as of
Tesseract 3.02 are briefly described below; For more information on Tesseract 3.02 are briefly described below; For more information on
many of these files, see many of these files, see
<ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara> <ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
<variablelist> <variablelist>
<varlistentry> <varlistentry>
<term> <term>

View File

@ -1,13 +1,13 @@
'\" t '\" t
.\" Title: dawg2wordlist .\" Title: dawg2wordlist
.\" Author: [see the "AUTHOR" section] .\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/> .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 02/09/2012 .\" Date: 06/12/2015
.\" Manual: \ \& .\" Manual: \ \&
.\" Source: \ \& .\" Source: \ \&
.\" Language: English .\" Language: English
.\" .\"
.TH "DAWG2WORDLIST" "1" "02/09/2012" "\ \&" "\ \&" .TH "DAWG2WORDLIST" "1" "06/12/2015" "\ \&" "\ \&"
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
.\" * Define some portability stuff .\" * Define some portability stuff
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
@ -46,7 +46,7 @@ dawg2wordlist(1) converts a Tesseract Directed Acyclic Word Graph (DAWG) to a li
.sp .sp
tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5), combine_tessdata(1) tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5), combine_tessdata(1)
.sp .sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[] \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "COPYING" .SH "COPYING"
.sp .sp
Copyright (C) 2012 Google, Inc\&. Licensed under the Apache License, Version 2\&.0 Copyright (C) 2012 Google, Inc\&. Licensed under the Apache License, Version 2\&.0

View File

@ -32,7 +32,7 @@ SEE ALSO
tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5), tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
combine_tessdata(1) combine_tessdata(1)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3> <https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
COPYING COPYING
------- -------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" /> <meta name="generator" content="AsciiDoc 8.6.9" />
<title>DAWG2WORDLIST(1)</title> <title>DAWG2WORDLIST(1)</title>
<style type="text/css"> <style type="text/css">
/* Debug borders */ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/* /* Default font. */
border: 1px solid red; body {
*/ font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
} }
body { body {
@ -35,13 +45,8 @@ strong {
color: #083194; color: #083194;
} }
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em; margin-top: 1.2em;
margin-bottom: 0.5em; margin-bottom: 0.5em;
line-height: 1.3; line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * { h3 + * {
clear: left; clear: left;
} }
h5 {
font-size: 1.0em;
}
div.sectionbody { div.sectionbody {
font-family: serif;
margin-left: 0; margin-left: 0;
} }
@ -77,45 +84,50 @@ p {
ul, ol, li > p { ul, ol, li > p {
margin-top: 0; margin-top: 0;
} }
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre { .monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0; padding: 0;
margin: 0; margin: 0;
} }
pre {
white-space: pre-wrap;
}
span#author { #author {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
font-size: 1.1em; font-size: 1.1em;
} }
span#email { #email {
} }
span#revnumber, span#revdate, span#revremark { #revnumber, #revdate, #revremark {
font-family: sans-serif;
} }
div#footer { #footer {
font-family: sans-serif;
font-size: small; font-size: small;
border-top: 2px solid silver; border-top: 2px solid silver;
padding-top: 0.5em; padding-top: 0.5em;
margin-top: 4.0em; margin-top: 4.0em;
} }
div#footer-text { #footer-text {
float: left; float: left;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#footer-badges { #footer-badges {
float: right; float: right;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#preamble { #preamble {
margin-top: 1.5em; margin-top: 1.5em;
margin-bottom: 1.5em; margin-bottom: 1.5em;
} }
div.tableblock, div.imageblock, div.exampleblock, div.verseblock, div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock, div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock { div.admonitionblock {
margin-top: 1.0em; margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */ /* Block element titles. */
div.title, caption.title { div.title, caption.title {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
text-align: left; text-align: left;
margin-top: 1.0em; margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content { div.sidebarblock > div.content {
background: #ffffee; background: #ffffee;
border: 1px solid silver; border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em; padding: 0.5em;
} }
div.listingblock > div.content { div.listingblock > div.content {
border: 1px solid silver; border: 1px solid #dddddd;
background: #f4f4f4; border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em; padding: 0.5em;
} }
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em; padding-left: 1.0em;
margin-left: 1.0em; margin-left: 1.0em;
margin-right: 10%; margin-right: 10%;
border-left: 5px solid #dddddd; border-left: 5px solid #f0f0f0;
color: #777777; color: #888;
} }
div.quoteblock > div.attribution { div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right; text-align: right;
} }
div.verseblock > div.content { div.verseblock > pre.content {
white-space: pre; font-family: inherit;
font-size: inherit;
} }
div.verseblock > div.attribution { div.verseblock > div.attribution {
padding-top: 0.75em; padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
} }
div.imageblock div.content { padding-left: 0; } div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; } span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; } a.image:visited { color: white; }
dl { dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot { tfoot {
font-weight: bold; font-weight: bold;
} }
td > div.verse { td > div.verse {
white-space: pre; white-space: pre;
} }
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist { div.hdlist {
margin-top: 0.8em; margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px; min-width: 100px;
} }
div.colist td {
@media print { padding-right: 0.5em;
div#footer-badges { display: none; } padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
} }
div#toc { @media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em; margin-bottom: 2.5em;
} }
div#toctitle { #toctitle {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-size: 1.1em; font-size: 1.1em;
font-weight: bold; font-weight: bold;
margin-top: 1.0em; margin-top: 1.0em;
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 { div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0; margin-top: 0;
margin-bottom: 0; margin-bottom: 0;
} }
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em; margin-left: 6em;
font-size: 0.9em; font-size: 0.9em;
} }
/* Overrides for manpage documents */
h1 { span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em; padding-top: 0.5em;
padding-bottom: 0.5em; padding-bottom: 0.5em;
border-top: 2px solid silver; border-top: 2px solid silver;
border-bottom: 2px solid silver; border-bottom: 2px solid silver;
} }
h2 { body.manpage h2 {
border-style: none; border-style: none;
} }
div.sectionbody { body.manpage div.sectionbody {
margin-left: 5%; margin-left: 3em;
} }
@media print { @media print {
div#toc { display: none; } body.manpage div#toc { display: none; }
} }
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style> </style>
<script type="text/javascript"> <script type="text/javascript">
/*<![CDATA[*/ /*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace. var asciidoc = { // Namespace.
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) { function tocEntries(el, toclevels) {
var result = new Array; var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])'); var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2 // Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all // nodeIterator API would be a better technique but not supported by all
// browsers). // browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
} }
var toc = document.getElementById("toc"); var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels); var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) { for (var i = 0; i < entries.length; ++i) {
var entry = entries[i]; var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/ */
footnotes: function () { footnotes: function () {
var cont = document.getElementById("content"); // Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes"); var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span"); var spans = cont.getElementsByTagName("span");
var refs = {}; var refs = {};
var n = 0; var n = 0;
for (i=0; i<spans.length; i++) { for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") { if (spans[i].className == "footnote") {
n++; n++;
// Use [\s\S] in place of . so multi-line matches work. var note = spans[i].getAttribute("data-note");
// Because JavaScript has no s (dotall) regex flag. if (!note) {
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1]; // Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML += noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" + "<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" + "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>"; n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id"); var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n; if (id != null) refs["#"+id] = n;
} }
@ -561,13 +702,36 @@ footnotes: function () {
} }
} }
} }
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
} }
} }
asciidoc.install();
/*]]>*/ /*]]>*/
</script> </script>
</head> </head>
<body> <body class="manpage">
<div id="header"> <div id="header">
<h1> <h1>
DAWG2WORDLIST(1) Manual Page DAWG2WORDLIST(1) Manual Page
@ -580,15 +744,20 @@ DAWG2WORDLIST(1) Manual Page
</div> </div>
</div> </div>
<div id="content"> <div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2> <h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p><strong>dawg2wordlist</strong> <em>UNICHARSET</em> <em>DAWG</em> <em>WORDLIST</em></p></div> <div class="paragraph"><p><strong>dawg2wordlist</strong> <em>UNICHARSET</em> <em>DAWG</em> <em>WORDLIST</em></p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2> <h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>dawg2wordlist(1) converts a Tesseract Directed Acyclic Word <div class="paragraph"><p>dawg2wordlist(1) converts a Tesseract Directed Acyclic Word
Graph (DAWG) to a list of words using a unicharset as key.</p></div> Graph (DAWG) to a list of words using a unicharset as key.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_options">OPTIONS</h2> <h2 id="_options">OPTIONS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p><em>UNICHARSET</em> <div class="paragraph"><p><em>UNICHARSET</em>
@ -599,27 +768,34 @@ Graph (DAWG) to a list of words using a unicharset as key.</p></div>
<div class="paragraph"><p><em>WORDLIST</em> <div class="paragraph"><p><em>WORDLIST</em>
Plain text (output) file in UTF-8, one word per line</p></div> Plain text (output) file in UTF-8, one word per line</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2> <h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5), <div class="paragraph"><p>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
combine_tessdata(1)</p></div> combine_tessdata(1)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div> <div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2> <h2 id="_copying">COPYING</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc. <div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
Licensed under the Apache License, Version 2.0</p></div> Licensed under the Apache License, Version 2.0</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2> <h2 id="_author">AUTHOR</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div> at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div> </div>
</div> </div>
</div>
<div id="footnotes"><hr /></div> <div id="footnotes"><hr /></div>
<div id="footer"> <div id="footer">
<div id="footer-text"> <div id="footer-text">
Last updated 2012-02-07 13:38:01 PDT Last updated 2015-06-12 23:52:09 CEST
</div> </div>
</div> </div>
</body> </body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?> <?asciidoc-toc?>
<?asciidoc-numbered?> <?asciidoc-numbered?>
<refentry lang="en"> <refentry lang="en">
<refentryinfo>
<title>DAWG2WORDLIST(1)</title>
</refentryinfo>
<refmeta> <refmeta>
<refentrytitle>dawg2wordlist</refentrytitle> <refentrytitle>dawg2wordlist</refentrytitle>
<manvolnum>1</manvolnum> <manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo> <refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo> <refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta> </refmeta>
<refnamediv> <refnamediv>
<refname>dawg2wordlist</refname> <refname>dawg2wordlist</refname>
@ -35,7 +38,7 @@ Graph (DAWG) to a list of words using a unicharset as key.</simpara>
<title>SEE ALSO</title> <title>SEE ALSO</title>
<simpara>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5), <simpara>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
combine_tessdata(1)</simpara> combine_tessdata(1)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara> <simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1> </refsect1>
<refsect1 id="_copying"> <refsect1 id="_copying">
<title>COPYING</title> <title>COPYING</title>

View File

@ -1,13 +1,13 @@
'\" t '\" t
.\" Title: mftraining .\" Title: mftraining
.\" Author: [see the "AUTHOR" section] .\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/> .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 02/09/2012 .\" Date: 06/12/2015
.\" Manual: \ \& .\" Manual: \ \&
.\" Source: \ \& .\" Source: \ \&
.\" Language: English .\" Language: English
.\" .\"
.TH "MFTRAINING" "1" "02/09/2012" "\ \&" "\ \&" .TH "MFTRAINING" "1" "06/12/2015" "\ \&" "\ \&"
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
.\" * Define some portability stuff .\" * Define some portability stuff
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
@ -85,7 +85,7 @@ Directory to write output files to\&.
.sp .sp
tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), shapeclustering(1), unicharset(5) tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), shapeclustering(1), unicharset(5)
.sp .sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[] \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "COPYING" .SH "COPYING"
.sp .sp
Copyright (C) Hewlett\-Packard Company, 1988 Licensed under the Apache License, Version 2\&.0 Copyright (C) Hewlett\-Packard Company, 1988 Licensed under the Apache License, Version 2\&.0

View File

@ -43,7 +43,7 @@ SEE ALSO
tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
shapeclustering(1), unicharset(5) shapeclustering(1), unicharset(5)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3> <https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
COPYING COPYING
------- -------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" /> <meta name="generator" content="AsciiDoc 8.6.9" />
<title>MFTRAINING(1)</title> <title>MFTRAINING(1)</title>
<style type="text/css"> <style type="text/css">
/* Debug borders */ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/* /* Default font. */
border: 1px solid red; body {
*/ font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
} }
body { body {
@ -35,13 +45,8 @@ strong {
color: #083194; color: #083194;
} }
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em; margin-top: 1.2em;
margin-bottom: 0.5em; margin-bottom: 0.5em;
line-height: 1.3; line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * { h3 + * {
clear: left; clear: left;
} }
h5 {
font-size: 1.0em;
}
div.sectionbody { div.sectionbody {
font-family: serif;
margin-left: 0; margin-left: 0;
} }
@ -77,45 +84,50 @@ p {
ul, ol, li > p { ul, ol, li > p {
margin-top: 0; margin-top: 0;
} }
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre { .monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0; padding: 0;
margin: 0; margin: 0;
} }
pre {
white-space: pre-wrap;
}
span#author { #author {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
font-size: 1.1em; font-size: 1.1em;
} }
span#email { #email {
} }
span#revnumber, span#revdate, span#revremark { #revnumber, #revdate, #revremark {
font-family: sans-serif;
} }
div#footer { #footer {
font-family: sans-serif;
font-size: small; font-size: small;
border-top: 2px solid silver; border-top: 2px solid silver;
padding-top: 0.5em; padding-top: 0.5em;
margin-top: 4.0em; margin-top: 4.0em;
} }
div#footer-text { #footer-text {
float: left; float: left;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#footer-badges { #footer-badges {
float: right; float: right;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#preamble { #preamble {
margin-top: 1.5em; margin-top: 1.5em;
margin-bottom: 1.5em; margin-bottom: 1.5em;
} }
div.tableblock, div.imageblock, div.exampleblock, div.verseblock, div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock, div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock { div.admonitionblock {
margin-top: 1.0em; margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */ /* Block element titles. */
div.title, caption.title { div.title, caption.title {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
text-align: left; text-align: left;
margin-top: 1.0em; margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content { div.sidebarblock > div.content {
background: #ffffee; background: #ffffee;
border: 1px solid silver; border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em; padding: 0.5em;
} }
div.listingblock > div.content { div.listingblock > div.content {
border: 1px solid silver; border: 1px solid #dddddd;
background: #f4f4f4; border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em; padding: 0.5em;
} }
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em; padding-left: 1.0em;
margin-left: 1.0em; margin-left: 1.0em;
margin-right: 10%; margin-right: 10%;
border-left: 5px solid #dddddd; border-left: 5px solid #f0f0f0;
color: #777777; color: #888;
} }
div.quoteblock > div.attribution { div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right; text-align: right;
} }
div.verseblock > div.content { div.verseblock > pre.content {
white-space: pre; font-family: inherit;
font-size: inherit;
} }
div.verseblock > div.attribution { div.verseblock > div.attribution {
padding-top: 0.75em; padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
} }
div.imageblock div.content { padding-left: 0; } div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; } span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; } a.image:visited { color: white; }
dl { dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot { tfoot {
font-weight: bold; font-weight: bold;
} }
td > div.verse { td > div.verse {
white-space: pre; white-space: pre;
} }
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist { div.hdlist {
margin-top: 0.8em; margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px; min-width: 100px;
} }
div.colist td {
@media print { padding-right: 0.5em;
div#footer-badges { display: none; } padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
} }
div#toc { @media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em; margin-bottom: 2.5em;
} }
div#toctitle { #toctitle {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-size: 1.1em; font-size: 1.1em;
font-weight: bold; font-weight: bold;
margin-top: 1.0em; margin-top: 1.0em;
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 { div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0; margin-top: 0;
margin-bottom: 0; margin-bottom: 0;
} }
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em; margin-left: 6em;
font-size: 0.9em; font-size: 0.9em;
} }
/* Overrides for manpage documents */
h1 { span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em; padding-top: 0.5em;
padding-bottom: 0.5em; padding-bottom: 0.5em;
border-top: 2px solid silver; border-top: 2px solid silver;
border-bottom: 2px solid silver; border-bottom: 2px solid silver;
} }
h2 { body.manpage h2 {
border-style: none; border-style: none;
} }
div.sectionbody { body.manpage div.sectionbody {
margin-left: 5%; margin-left: 3em;
} }
@media print { @media print {
div#toc { display: none; } body.manpage div#toc { display: none; }
} }
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style> </style>
<script type="text/javascript"> <script type="text/javascript">
/*<![CDATA[*/ /*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace. var asciidoc = { // Namespace.
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) { function tocEntries(el, toclevels) {
var result = new Array; var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])'); var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2 // Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all // nodeIterator API would be a better technique but not supported by all
// browsers). // browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
} }
var toc = document.getElementById("toc"); var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels); var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) { for (var i = 0; i < entries.length; ++i) {
var entry = entries[i]; var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/ */
footnotes: function () { footnotes: function () {
var cont = document.getElementById("content"); // Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes"); var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span"); var spans = cont.getElementsByTagName("span");
var refs = {}; var refs = {};
var n = 0; var n = 0;
for (i=0; i<spans.length; i++) { for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") { if (spans[i].className == "footnote") {
n++; n++;
// Use [\s\S] in place of . so multi-line matches work. var note = spans[i].getAttribute("data-note");
// Because JavaScript has no s (dotall) regex flag. if (!note) {
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1]; // Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML += noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" + "<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" + "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>"; n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id"); var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n; if (id != null) refs["#"+id] = n;
} }
@ -561,13 +702,36 @@ footnotes: function () {
} }
} }
} }
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
} }
} }
asciidoc.install();
/*]]>*/ /*]]>*/
</script> </script>
</head> </head>
<body> <body class="manpage">
<div id="header"> <div id="header">
<h1> <h1>
MFTRAINING(1) Manual Page MFTRAINING(1) Manual Page
@ -580,10 +744,13 @@ MFTRAINING(1) Manual Page
</div> </div>
</div> </div>
<div id="content"> <div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2> <h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>mftraining -U <em>unicharset</em> -O <em>lang.unicharset</em> <em>FILE</em>&#8230;</p></div> <div class="paragraph"><p>mftraining -U <em>unicharset</em> -O <em>lang.unicharset</em> <em>FILE</em>&#8230;</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2> <h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>mftraining takes a list of .tr files, from which it generates the <div class="paragraph"><p>mftraining takes a list of .tr files, from which it generates the
@ -591,6 +758,8 @@ files <strong>inttemp</strong> (the shape prototypes), <strong>shapetable</stron
(the number of expected features for each character). (A fourth file (the number of expected features for each character). (A fourth file
called Microfeat is also written by this program, but it is not used.)</p></div> called Microfeat is also written by this program, but it is not used.)</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_options">OPTIONS</h2> <h2 id="_options">OPTIONS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="dlist"><dl> <div class="dlist"><dl>
@ -611,7 +780,7 @@ called Microfeat is also written by this program, but it is not used.)</p></div>
</p> </p>
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</tt></pre> <pre><code>*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</code></pre>
</div></div> </div></div>
</dd> </dd>
<dt class="hdlist1"> <dt class="hdlist1">
@ -623,7 +792,7 @@ called Microfeat is also written by this program, but it is not used.)</p></div>
</p> </p>
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>*font_name* *xheight*</tt></pre> <pre><code>*font_name* *xheight*</code></pre>
</div></div> </div></div>
</dd> </dd>
<dt class="hdlist1"> <dt class="hdlist1">
@ -644,27 +813,34 @@ called Microfeat is also written by this program, but it is not used.)</p></div>
</dd> </dd>
</dl></div> </dl></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2> <h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), <div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
shapeclustering(1), unicharset(5)</p></div> shapeclustering(1), unicharset(5)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div> <div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2> <h2 id="_copying">COPYING</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>Copyright (C) Hewlett-Packard Company, 1988 <div class="paragraph"><p>Copyright (C) Hewlett-Packard Company, 1988
Licensed under the Apache License, Version 2.0</p></div> Licensed under the Apache License, Version 2.0</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2> <h2 id="_author">AUTHOR</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div> at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div> </div>
</div> </div>
</div>
<div id="footnotes"><hr /></div> <div id="footnotes"><hr /></div>
<div id="footer"> <div id="footer">
<div id="footer-text"> <div id="footer-text">
Last updated 2012-02-09 14:23:49 PDT Last updated 2015-06-12 23:52:19 CEST
</div> </div>
</div> </div>
</body> </body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?> <?asciidoc-toc?>
<?asciidoc-numbered?> <?asciidoc-numbered?>
<refentry lang="en"> <refentry lang="en">
<refentryinfo>
<title>MFTRAINING(1)</title>
</refentryinfo>
<refmeta> <refmeta>
<refentrytitle>mftraining</refentrytitle> <refentrytitle>mftraining</refentrytitle>
<manvolnum>1</manvolnum> <manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo> <refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo> <refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta> </refmeta>
<refnamediv> <refnamediv>
<refname>mftraining</refname> <refname>mftraining</refname>
@ -84,7 +87,7 @@ called Microfeat is also written by this program, but it is not used.)</simpara>
<title>SEE ALSO</title> <title>SEE ALSO</title>
<simpara>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), <simpara>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
shapeclustering(1), unicharset(5)</simpara> shapeclustering(1), unicharset(5)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara> <simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1> </refsect1>
<refsect1 id="_copying"> <refsect1 id="_copying">
<title>COPYING</title> <title>COPYING</title>

View File

@ -1,13 +1,13 @@
'\" t '\" t
.\" Title: shapeclustering .\" Title: shapeclustering
.\" Author: [see the "AUTHOR" section] .\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/> .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 02/09/2012 .\" Date: 06/12/2015
.\" Manual: \ \& .\" Manual: \ \&
.\" Source: \ \& .\" Source: \ \&
.\" Language: English .\" Language: English
.\" .\"
.TH "SHAPECLUSTERING" "1" "02/09/2012" "\ \&" "\ \&" .TH "SHAPECLUSTERING" "1" "06/12/2015" "\ \&" "\ \&"
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
.\" * Define some portability stuff .\" * Define some portability stuff
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
@ -85,7 +85,7 @@ The output unicharset that will be given to combine_tessdata(1)\&.
.sp .sp
tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), unicharset(5) tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), unicharset(5)
.sp .sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[] \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "COPYING" .SH "COPYING"
.sp .sp
Copyright (C) Google, 2011 Licensed under the Apache License, Version 2\&.0 Copyright (C) Google, 2011 Licensed under the Apache License, Version 2\&.0

View File

@ -46,7 +46,7 @@ SEE ALSO
tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
unicharset(5) unicharset(5)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3> <https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
COPYING COPYING
------- -------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" /> <meta name="generator" content="AsciiDoc 8.6.9" />
<title>SHAPECLUSTERING(1)</title> <title>SHAPECLUSTERING(1)</title>
<style type="text/css"> <style type="text/css">
/* Debug borders */ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/* /* Default font. */
border: 1px solid red; body {
*/ font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
} }
body { body {
@ -35,13 +45,8 @@ strong {
color: #083194; color: #083194;
} }
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em; margin-top: 1.2em;
margin-bottom: 0.5em; margin-bottom: 0.5em;
line-height: 1.3; line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * { h3 + * {
clear: left; clear: left;
} }
h5 {
font-size: 1.0em;
}
div.sectionbody { div.sectionbody {
font-family: serif;
margin-left: 0; margin-left: 0;
} }
@ -77,45 +84,50 @@ p {
ul, ol, li > p { ul, ol, li > p {
margin-top: 0; margin-top: 0;
} }
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre { .monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0; padding: 0;
margin: 0; margin: 0;
} }
pre {
white-space: pre-wrap;
}
span#author { #author {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
font-size: 1.1em; font-size: 1.1em;
} }
span#email { #email {
} }
span#revnumber, span#revdate, span#revremark { #revnumber, #revdate, #revremark {
font-family: sans-serif;
} }
div#footer { #footer {
font-family: sans-serif;
font-size: small; font-size: small;
border-top: 2px solid silver; border-top: 2px solid silver;
padding-top: 0.5em; padding-top: 0.5em;
margin-top: 4.0em; margin-top: 4.0em;
} }
div#footer-text { #footer-text {
float: left; float: left;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#footer-badges { #footer-badges {
float: right; float: right;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#preamble { #preamble {
margin-top: 1.5em; margin-top: 1.5em;
margin-bottom: 1.5em; margin-bottom: 1.5em;
} }
div.tableblock, div.imageblock, div.exampleblock, div.verseblock, div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock, div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock { div.admonitionblock {
margin-top: 1.0em; margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */ /* Block element titles. */
div.title, caption.title { div.title, caption.title {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
text-align: left; text-align: left;
margin-top: 1.0em; margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content { div.sidebarblock > div.content {
background: #ffffee; background: #ffffee;
border: 1px solid silver; border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em; padding: 0.5em;
} }
div.listingblock > div.content { div.listingblock > div.content {
border: 1px solid silver; border: 1px solid #dddddd;
background: #f4f4f4; border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em; padding: 0.5em;
} }
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em; padding-left: 1.0em;
margin-left: 1.0em; margin-left: 1.0em;
margin-right: 10%; margin-right: 10%;
border-left: 5px solid #dddddd; border-left: 5px solid #f0f0f0;
color: #777777; color: #888;
} }
div.quoteblock > div.attribution { div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right; text-align: right;
} }
div.verseblock > div.content { div.verseblock > pre.content {
white-space: pre; font-family: inherit;
font-size: inherit;
} }
div.verseblock > div.attribution { div.verseblock > div.attribution {
padding-top: 0.75em; padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
} }
div.imageblock div.content { padding-left: 0; } div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; } span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; } a.image:visited { color: white; }
dl { dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot { tfoot {
font-weight: bold; font-weight: bold;
} }
td > div.verse { td > div.verse {
white-space: pre; white-space: pre;
} }
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist { div.hdlist {
margin-top: 0.8em; margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px; min-width: 100px;
} }
div.colist td {
@media print { padding-right: 0.5em;
div#footer-badges { display: none; } padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
} }
div#toc { @media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em; margin-bottom: 2.5em;
} }
div#toctitle { #toctitle {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-size: 1.1em; font-size: 1.1em;
font-weight: bold; font-weight: bold;
margin-top: 1.0em; margin-top: 1.0em;
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 { div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0; margin-top: 0;
margin-bottom: 0; margin-bottom: 0;
} }
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em; margin-left: 6em;
font-size: 0.9em; font-size: 0.9em;
} }
/* Overrides for manpage documents */
h1 { span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em; padding-top: 0.5em;
padding-bottom: 0.5em; padding-bottom: 0.5em;
border-top: 2px solid silver; border-top: 2px solid silver;
border-bottom: 2px solid silver; border-bottom: 2px solid silver;
} }
h2 { body.manpage h2 {
border-style: none; border-style: none;
} }
div.sectionbody { body.manpage div.sectionbody {
margin-left: 5%; margin-left: 3em;
} }
@media print { @media print {
div#toc { display: none; } body.manpage div#toc { display: none; }
} }
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style> </style>
<script type="text/javascript"> <script type="text/javascript">
/*<![CDATA[*/ /*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace. var asciidoc = { // Namespace.
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) { function tocEntries(el, toclevels) {
var result = new Array; var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])'); var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2 // Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all // nodeIterator API would be a better technique but not supported by all
// browsers). // browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
} }
var toc = document.getElementById("toc"); var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels); var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) { for (var i = 0; i < entries.length; ++i) {
var entry = entries[i]; var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/ */
footnotes: function () { footnotes: function () {
var cont = document.getElementById("content"); // Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes"); var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span"); var spans = cont.getElementsByTagName("span");
var refs = {}; var refs = {};
var n = 0; var n = 0;
for (i=0; i<spans.length; i++) { for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") { if (spans[i].className == "footnote") {
n++; n++;
// Use [\s\S] in place of . so multi-line matches work. var note = spans[i].getAttribute("data-note");
// Because JavaScript has no s (dotall) regex flag. if (!note) {
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1]; // Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML += noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" + "<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" + "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>"; n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id"); var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n; if (id != null) refs["#"+id] = n;
} }
@ -561,13 +702,36 @@ footnotes: function () {
} }
} }
} }
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
} }
} }
asciidoc.install();
/*]]>*/ /*]]>*/
</script> </script>
</head> </head>
<body> <body class="manpage">
<div id="header"> <div id="header">
<h1> <h1>
SHAPECLUSTERING(1) Manual Page SHAPECLUSTERING(1) Manual Page
@ -580,6 +744,7 @@ SHAPECLUSTERING(1) Manual Page
</div> </div>
</div> </div>
<div id="content"> <div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2> <h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>shapeclustering -D <em>output_dir</em> <div class="paragraph"><p>shapeclustering -D <em>output_dir</em>
@ -587,6 +752,8 @@ SHAPECLUSTERING(1) Manual Page
-F <em>font_props</em> -X <em>xheights</em> -F <em>font_props</em> -X <em>xheights</em>
<em>FILE</em>&#8230;</p></div> <em>FILE</em>&#8230;</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2> <h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>shapeclustering(1) takes extracted feature .tr files (generated by <div class="paragraph"><p>shapeclustering(1) takes extracted feature .tr files (generated by
@ -594,6 +761,8 @@ tesseract(1) run in a special mode from box files) and produces a
file <strong>shapetable</strong> and an enhanced unicharset. This program is still file <strong>shapetable</strong> and an enhanced unicharset. This program is still
experimental, and is not required (yet) for training Tesseract.</p></div> experimental, and is not required (yet) for training Tesseract.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_options">OPTIONS</h2> <h2 id="_options">OPTIONS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="dlist"><dl> <div class="dlist"><dl>
@ -622,7 +791,7 @@ experimental, and is not required (yet) for training Tesseract.</p></div>
</p> </p>
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'</tt></pre> <pre><code>'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'</code></pre>
</div></div> </div></div>
</dd> </dd>
<dt class="hdlist1"> <dt class="hdlist1">
@ -634,7 +803,7 @@ experimental, and is not required (yet) for training Tesseract.</p></div>
</p> </p>
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>'font_name' 'xheight'</tt></pre> <pre><code>'font_name' 'xheight'</code></pre>
</div></div> </div></div>
</dd> </dd>
<dt class="hdlist1"> <dt class="hdlist1">
@ -647,27 +816,34 @@ experimental, and is not required (yet) for training Tesseract.</p></div>
</dd> </dd>
</dl></div> </dl></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2> <h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), <div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
unicharset(5)</p></div> unicharset(5)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div> <div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2> <h2 id="_copying">COPYING</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>Copyright (C) Google, 2011 <div class="paragraph"><p>Copyright (C) Google, 2011
Licensed under the Apache License, Version 2.0</p></div> Licensed under the Apache License, Version 2.0</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2> <h2 id="_author">AUTHOR</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div> at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div> </div>
</div> </div>
</div>
<div id="footnotes"><hr /></div> <div id="footnotes"><hr /></div>
<div id="footer"> <div id="footer">
<div id="footer-text"> <div id="footer-text">
Last updated 2012-02-09 14:19:44 PDT Last updated 2015-06-12 23:52:24 CEST
</div> </div>
</div> </div>
</body> </body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?> <?asciidoc-toc?>
<?asciidoc-numbered?> <?asciidoc-numbered?>
<refentry lang="en"> <refentry lang="en">
<refentryinfo>
<title>SHAPECLUSTERING(1)</title>
</refentryinfo>
<refmeta> <refmeta>
<refentrytitle>shapeclustering</refentrytitle> <refentrytitle>shapeclustering</refentrytitle>
<manvolnum>1</manvolnum> <manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo> <refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo> <refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta> </refmeta>
<refnamediv> <refnamediv>
<refname>shapeclustering</refname> <refname>shapeclustering</refname>
@ -87,7 +90,7 @@ experimental, and is not required (yet) for training Tesseract.</simpara>
<title>SEE ALSO</title> <title>SEE ALSO</title>
<simpara>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), <simpara>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
unicharset(5)</simpara> unicharset(5)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara> <simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1> </refsect1>
<refsect1 id="_copying"> <refsect1 id="_copying">
<title>COPYING</title> <title>COPYING</title>

View File

@ -2,12 +2,12 @@
.\" Title: tesseract .\" Title: tesseract
.\" Author: [see the "AUTHOR" section] .\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/> .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 08/02/2014 .\" Date: 06/12/2015
.\" Manual: \ \& .\" Manual: \ \&
.\" Source: \ \& .\" Source: \ \&
.\" Language: English .\" Language: English
.\" .\"
.TH "TESSERACT" "1" "08/02/2014" "\ \&" "\ \&" .TH "TESSERACT" "1" "06/12/2015" "\ \&" "\ \&"
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
.\" * Define some portability stuff .\" * Define some portability stuff
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
@ -224,7 +224,7 @@ The engine was developed at Hewlett Packard Laboratories Bristol and at Hewlett
.sp .sp
Version 2\&.00 brought Unicode (UTF\-8) support, six languages, and the ability to train Tesseract\&. Version 2\&.00 brought Unicode (UTF\-8) support, six languages, and the ability to train Tesseract\&.
.sp .sp
Tesseract was included in UNLV\(cqs Fourth Annual Test of OCR Accuracy\&. See \m[blue]\fBhttp://www\&.isri\&.unlv\&.edu/downloads/AT\-1995\&.pdf\fR\m[]\&. With Tesseract 2\&.00, scripts are now included to allow anyone to reproduce some of these tests\&. See \m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TestingTesseract\fR\m[] for more details\&. Tesseract was included in UNLV\(cqs Fourth Annual Test of OCR Accuracy\&. See \m[blue]\fBhttp://www\&.isri\&.unlv\&.edu/downloads/AT\-1995\&.pdf\fR\m[]\&. With Tesseract 2\&.00, scripts are now included to allow anyone to reproduce some of these tests\&. See \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TestingTesseract\fR\m[] for more details\&.
.sp .sp
Tesseract 3\&.00 adds a number of new languages, including Chinese, Japanese, and Korean\&. It also introduces a new, single\-file based system of managing language data\&. Tesseract 3\&.00 adds a number of new languages, including Chinese, Japanese, and Korean\&. It also introduces a new, single\-file based system of managing language data\&.
.sp .sp
@ -233,7 +233,7 @@ Tesseract 3\&.02 adds BiDirectional text support, the ability to recognize multi
For further details, see the file ReleaseNotes included with the distribution\&. For further details, see the file ReleaseNotes included with the distribution\&.
.SH "RESOURCES" .SH "RESOURCES"
.sp .sp
Main web site: \m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/\fR\m[] Information on training: \m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[] Main web site: \m[blue]\fBhttps://github\&.com/tesseract\-ocr\fR\m[] Information on training: \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "SEE ALSO" .SH "SEE ALSO"
.sp .sp
ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1), shape_training(1), mftraining(1), unicharambigs(5), unicharset(5), unicharset_extractor(1), wordlist2dawg(1) ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1), shape_training(1), mftraining(1), unicharambigs(5), unicharset(5), unicharset_extractor(1), wordlist2dawg(1)

View File

@ -218,9 +218,9 @@ Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability
to train Tesseract. to train Tesseract.
Tesseract was included in UNLV's Fourth Annual Test of OCR Accuracy. Tesseract was included in UNLV's Fourth Annual Test of OCR Accuracy.
See <http://www.isri.unlv.edu/downloads/AT-1995.pdf>. With Tesseract 2.00, See <https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf>. With Tesseract 2.00,
scripts are now included to allow anyone to reproduce some of these tests. scripts are now included to allow anyone to reproduce some of these tests.
See <http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract> for more See <https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract> for more
details. details.
Tesseract 3.00 adds a number of new languages, including Chinese, Japanese, Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
@ -234,8 +234,8 @@ For further details, see the file ReleaseNotes included with the distribution.
RESOURCES RESOURCES
--------- ---------
Main web site: <http://code.google.com/p/tesseract-ocr/> + Main web site: <https://github.com/tesseract-ocr> +
Information on training: <http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3> Information on training: <https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
SEE ALSO SEE ALSO
-------- --------

View File

@ -3,7 +3,7 @@
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" /> <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.8" /> <meta name="generator" content="AsciiDoc 8.6.9" />
<title>TESSERACT(1)</title> <title>TESSERACT(1)</title>
<style type="text/css"> <style type="text/css">
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
@ -94,7 +94,9 @@ ul > li > * { color: black; }
padding: 0; padding: 0;
margin: 0; margin: 0;
} }
pre {
white-space: pre-wrap;
}
#author { #author {
color: #527bbd; color: #527bbd;
@ -223,7 +225,7 @@ div.exampleblock > div.content {
} }
div.imageblock div.content { padding-left: 0; } div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; } span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; } a.image:visited { color: white; }
dl { dl {
@ -1047,7 +1049,7 @@ to train Tesseract.</p></div>
<div class="paragraph"><p>Tesseract was included in UNLV&#8217;s Fourth Annual Test of OCR Accuracy. <div class="paragraph"><p>Tesseract was included in UNLV&#8217;s Fourth Annual Test of OCR Accuracy.
See <a href="http://www.isri.unlv.edu/downloads/AT-1995.pdf">http://www.isri.unlv.edu/downloads/AT-1995.pdf</a>. With Tesseract 2.00, See <a href="http://www.isri.unlv.edu/downloads/AT-1995.pdf">http://www.isri.unlv.edu/downloads/AT-1995.pdf</a>. With Tesseract 2.00,
scripts are now included to allow anyone to reproduce some of these tests. scripts are now included to allow anyone to reproduce some of these tests.
See <a href="http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract">http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract</a> for more See <a href="https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract</a> for more
details.</p></div> details.</p></div>
<div class="paragraph"><p>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese, <div class="paragraph"><p>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
and Korean. It also introduces a new, single-file based system of managing and Korean. It also introduces a new, single-file based system of managing
@ -1060,8 +1062,8 @@ multiple languages in a single image, and improved layout analysis.</p></div>
<div class="sect1"> <div class="sect1">
<h2 id="_resources">RESOURCES</h2> <h2 id="_resources">RESOURCES</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>Main web site: <a href="http://code.google.com/p/tesseract-ocr/">http://code.google.com/p/tesseract-ocr/</a><br /> <div class="paragraph"><p>Main web site: <a href="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</a><br />
Information on training: <a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div> Information on training: <a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div> </div>
</div> </div>
<div class="sect1"> <div class="sect1">
@ -1095,7 +1097,7 @@ Lloyd, Shobhit Saxena, and Thomas Kielbus.</p></div>
<div id="footnotes"><hr /></div> <div id="footnotes"><hr /></div>
<div id="footer"> <div id="footer">
<div id="footer-text"> <div id="footer-text">
Last updated 2014-08-02 22:51:16 CEST Last updated 2015-06-12 23:49:44 CEST
</div> </div>
</div> </div>
</body> </body>

View File

@ -327,7 +327,7 @@ to train Tesseract.</simpara>
<simpara>Tesseract was included in UNLV&#8217;s Fourth Annual Test of OCR Accuracy. <simpara>Tesseract was included in UNLV&#8217;s Fourth Annual Test of OCR Accuracy.
See <ulink url="http://www.isri.unlv.edu/downloads/AT-1995.pdf">http://www.isri.unlv.edu/downloads/AT-1995.pdf</ulink>. With Tesseract 2.00, See <ulink url="http://www.isri.unlv.edu/downloads/AT-1995.pdf">http://www.isri.unlv.edu/downloads/AT-1995.pdf</ulink>. With Tesseract 2.00,
scripts are now included to allow anyone to reproduce some of these tests. scripts are now included to allow anyone to reproduce some of these tests.
See <ulink url="http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract">http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract</ulink> for more See <ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract</ulink> for more
details.</simpara> details.</simpara>
<simpara>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese, <simpara>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
and Korean. It also introduces a new, single-file based system of managing and Korean. It also introduces a new, single-file based system of managing
@ -338,8 +338,8 @@ multiple languages in a single image, and improved layout analysis.</simpara>
</refsect1> </refsect1>
<refsect1 id="_resources"> <refsect1 id="_resources">
<title>RESOURCES</title> <title>RESOURCES</title>
<simpara>Main web site: <ulink url="http://code.google.com/p/tesseract-ocr/">http://code.google.com/p/tesseract-ocr/</ulink><?asciidoc-br?> <simpara>Main web site: <ulink url="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</ulink><?asciidoc-br?>
Information on training: <ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara> Information on training: <ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1> </refsect1>
<refsect1 id="_see_also"> <refsect1 id="_see_also">
<title>SEE ALSO</title> <title>SEE ALSO</title>

View File

@ -1,13 +1,13 @@
'\" t '\" t
.\" Title: unicharambigs .\" Title: unicharambigs
.\" Author: [see the "AUTHOR" section] .\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/> .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 02/09/2012 .\" Date: 06/12/2015
.\" Manual: \ \& .\" Manual: \ \&
.\" Source: \ \& .\" Source: \ \&
.\" Language: English .\" Language: English
.\" .\"
.TH "UNICHARAMBIGS" "5" "02/09/2012" "\ \&" "\ \&" .TH "UNICHARAMBIGS" "5" "06/12/2015" "\ \&" "\ \&"
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
.\" * Define some portability stuff .\" * Define some portability stuff
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" /> <meta name="generator" content="AsciiDoc 8.6.9" />
<title>UNICHARAMBIGS(5)</title> <title>UNICHARAMBIGS(5)</title>
<style type="text/css"> <style type="text/css">
/* Debug borders */ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/* /* Default font. */
border: 1px solid red; body {
*/ font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
} }
body { body {
@ -35,13 +45,8 @@ strong {
color: #083194; color: #083194;
} }
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em; margin-top: 1.2em;
margin-bottom: 0.5em; margin-bottom: 0.5em;
line-height: 1.3; line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * { h3 + * {
clear: left; clear: left;
} }
h5 {
font-size: 1.0em;
}
div.sectionbody { div.sectionbody {
font-family: serif;
margin-left: 0; margin-left: 0;
} }
@ -77,45 +84,50 @@ p {
ul, ol, li > p { ul, ol, li > p {
margin-top: 0; margin-top: 0;
} }
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre { .monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0; padding: 0;
margin: 0; margin: 0;
} }
pre {
white-space: pre-wrap;
}
span#author { #author {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
font-size: 1.1em; font-size: 1.1em;
} }
span#email { #email {
} }
span#revnumber, span#revdate, span#revremark { #revnumber, #revdate, #revremark {
font-family: sans-serif;
} }
div#footer { #footer {
font-family: sans-serif;
font-size: small; font-size: small;
border-top: 2px solid silver; border-top: 2px solid silver;
padding-top: 0.5em; padding-top: 0.5em;
margin-top: 4.0em; margin-top: 4.0em;
} }
div#footer-text { #footer-text {
float: left; float: left;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#footer-badges { #footer-badges {
float: right; float: right;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#preamble { #preamble {
margin-top: 1.5em; margin-top: 1.5em;
margin-bottom: 1.5em; margin-bottom: 1.5em;
} }
div.tableblock, div.imageblock, div.exampleblock, div.verseblock, div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock, div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock { div.admonitionblock {
margin-top: 1.0em; margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */ /* Block element titles. */
div.title, caption.title { div.title, caption.title {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
text-align: left; text-align: left;
margin-top: 1.0em; margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content { div.sidebarblock > div.content {
background: #ffffee; background: #ffffee;
border: 1px solid silver; border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em; padding: 0.5em;
} }
div.listingblock > div.content { div.listingblock > div.content {
border: 1px solid silver; border: 1px solid #dddddd;
background: #f4f4f4; border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em; padding: 0.5em;
} }
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em; padding-left: 1.0em;
margin-left: 1.0em; margin-left: 1.0em;
margin-right: 10%; margin-right: 10%;
border-left: 5px solid #dddddd; border-left: 5px solid #f0f0f0;
color: #777777; color: #888;
} }
div.quoteblock > div.attribution { div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right; text-align: right;
} }
div.verseblock > div.content { div.verseblock > pre.content {
white-space: pre; font-family: inherit;
font-size: inherit;
} }
div.verseblock > div.attribution { div.verseblock > div.attribution {
padding-top: 0.75em; padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
} }
div.imageblock div.content { padding-left: 0; } div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; } span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; } a.image:visited { color: white; }
dl { dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot { tfoot {
font-weight: bold; font-weight: bold;
} }
td > div.verse { td > div.verse {
white-space: pre; white-space: pre;
} }
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist { div.hdlist {
margin-top: 0.8em; margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px; min-width: 100px;
} }
div.colist td {
@media print { padding-right: 0.5em;
div#footer-badges { display: none; } padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
} }
div#toc { @media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em; margin-bottom: 2.5em;
} }
div#toctitle { #toctitle {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-size: 1.1em; font-size: 1.1em;
font-weight: bold; font-weight: bold;
margin-top: 1.0em; margin-top: 1.0em;
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 { div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0; margin-top: 0;
margin-bottom: 0; margin-bottom: 0;
} }
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em; margin-left: 6em;
font-size: 0.9em; font-size: 0.9em;
} }
/* Overrides for manpage documents */
h1 { span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em; padding-top: 0.5em;
padding-bottom: 0.5em; padding-bottom: 0.5em;
border-top: 2px solid silver; border-top: 2px solid silver;
border-bottom: 2px solid silver; border-bottom: 2px solid silver;
} }
h2 { body.manpage h2 {
border-style: none; border-style: none;
} }
div.sectionbody { body.manpage div.sectionbody {
margin-left: 5%; margin-left: 3em;
} }
@media print { @media print {
div#toc { display: none; } body.manpage div#toc { display: none; }
} }
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style> </style>
<script type="text/javascript"> <script type="text/javascript">
/*<![CDATA[*/ /*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace. var asciidoc = { // Namespace.
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) { function tocEntries(el, toclevels) {
var result = new Array; var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])'); var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2 // Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all // nodeIterator API would be a better technique but not supported by all
// browsers). // browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
} }
var toc = document.getElementById("toc"); var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels); var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) { for (var i = 0; i < entries.length; ++i) {
var entry = entries[i]; var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/ */
footnotes: function () { footnotes: function () {
var cont = document.getElementById("content"); // Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes"); var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span"); var spans = cont.getElementsByTagName("span");
var refs = {}; var refs = {};
var n = 0; var n = 0;
for (i=0; i<spans.length; i++) { for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") { if (spans[i].className == "footnote") {
n++; n++;
// Use [\s\S] in place of . so multi-line matches work. var note = spans[i].getAttribute("data-note");
// Because JavaScript has no s (dotall) regex flag. if (!note) {
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1]; // Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML += noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" + "<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" + "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>"; n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id"); var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n; if (id != null) refs["#"+id] = n;
} }
@ -561,13 +702,36 @@ footnotes: function () {
} }
} }
} }
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
} }
} }
asciidoc.install();
/*]]>*/ /*]]>*/
</script> </script>
</head> </head>
<body> <body class="manpage">
<div id="header"> <div id="header">
<h1> <h1>
UNICHARAMBIGS(5) Manual Page UNICHARAMBIGS(5) Manual Page
@ -580,6 +744,7 @@ UNICHARAMBIGS(5) Manual Page
</div> </div>
</div> </div>
<div id="content"> <div id="content">
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2> <h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The unicharambigs file (a component of traineddata, see combine_tessdata(1) ) <div class="paragraph"><p>The unicharambigs file (a component of traineddata, see combine_tessdata(1) )
@ -588,7 +753,7 @@ or groups of characters.</p></div>
<div class="paragraph"><p>The file contains a number of lines, laid out as follow:</p></div> <div class="paragraph"><p>The file contains a number of lines, laid out as follow:</p></div>
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>[num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num]</tt></pre> <pre><code>[num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num]</code></pre>
</div></div> </div></div>
<div class="hdlist"><table> <div class="hdlist"><table>
<tr> <tr>
@ -652,13 +817,15 @@ replacement, 0 denotes an optional replacement.
unicharset. The numbers in fields one and three refer to the unicharset. The numbers in fields one and three refer to the
number of unichars (not bytes).</p></div> number of unichars (not bytes).</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_example">EXAMPLE</h2> <h2 id="_example">EXAMPLE</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>2 ' ' 1 " 1 <pre><code>2 ' ' 1 " 1
1 m 2 r n 0 1 m 2 r n 0
3 i i i 1 m 0</tt></pre> 3 i i i 1 m 0</code></pre>
</div></div> </div></div>
<div class="paragraph"><p>In this example, all instances of the <em>2</em> character sequence <em>'</em>' will <div class="paragraph"><p>In this example, all instances of the <em>2</em> character sequence <em>'</em>' will
<strong>always</strong> be replaced by the <em>1</em> character sequence <em>"</em>; a <em>1</em> character <strong>always</strong> be replaced by the <em>1</em> character sequence <em>"</em>; a <em>1</em> character
@ -666,6 +833,8 @@ sequence <em>m</em> <strong>may</strong> be replaced by the <em>2</em> character
the <em>3</em> character sequence <strong>may</strong> be replaced by the <em>1</em> character the <em>3</em> character sequence <strong>may</strong> be replaced by the <em>1</em> character
sequence <em>m</em>.</p></div> sequence <em>m</em>.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_history">HISTORY</h2> <h2 id="_history">HISTORY</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The unicharambigs file first appeared in Tesseract 3.00; prior to that, a <div class="paragraph"><p>The unicharambigs file first appeared in Tesseract 3.00; prior to that, a
@ -673,26 +842,33 @@ similar format, called DangAmbigs (<em>dangerous ambiguities</em>) was used: the
format was almost identical, except only mandatory replacements could be format was almost identical, except only mandatory replacements could be
specified, and field 5 was absent.</p></div> specified, and field 5 was absent.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_bugs">BUGS</h2> <h2 id="_bugs">BUGS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>This is a documentation "bug": it&#8217;s not currently clear what should be done <div class="paragraph"><p>This is a documentation "bug": it&#8217;s not currently clear what should be done
in the case of ligatures (such as <em>fi</em>) which may also appear as regular in the case of ligatures (such as <em>fi</em>) which may also appear as regular
letters in the unicharset.</p></div> letters in the unicharset.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2> <h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>tesseract(1), unicharset(5)</p></div> <div class="paragraph"><p>tesseract(1), unicharset(5)</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2> <h2 id="_author">AUTHOR</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div> at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div> </div>
</div> </div>
</div>
<div id="footnotes"><hr /></div> <div id="footnotes"><hr /></div>
<div id="footer"> <div id="footer">
<div id="footer-text"> <div id="footer-text">
Last updated 2012-02-08 10:59:49 PDT Last updated 2015-05-13 19:59:45 CEST
</div> </div>
</div> </div>
</body> </body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?> <?asciidoc-toc?>
<?asciidoc-numbered?> <?asciidoc-numbered?>
<refentry lang="en"> <refentry lang="en">
<refentryinfo>
<title>UNICHARAMBIGS(5)</title>
</refentryinfo>
<refmeta> <refmeta>
<refentrytitle>unicharambigs</refentrytitle> <refentrytitle>unicharambigs</refentrytitle>
<manvolnum>5</manvolnum> <manvolnum>5</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo> <refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo> <refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta> </refmeta>
<refnamediv> <refnamediv>
<refname>unicharambigs</refname> <refname>unicharambigs</refname>

View File

@ -1,13 +1,13 @@
'\" t '\" t
.\" Title: unicharset .\" Title: unicharset
.\" Author: [see the "AUTHOR" section] .\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/> .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 02/09/2012 .\" Date: 06/12/2015
.\" Manual: \ \& .\" Manual: \ \&
.\" Source: \ \& .\" Source: \ \&
.\" Language: English .\" Language: English
.\" .\"
.TH "UNICHARSET" "5" "02/09/2012" "\ \&" "\ \&" .TH "UNICHARSET" "5" "06/12/2015" "\ \&" "\ \&"
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
.\" * Define some portability stuff .\" * Define some portability stuff
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
@ -214,7 +214,7 @@ The unicharset format first appeared with Tesseract 2\&.00, which was the first
.sp .sp
tesseract(1), combine_tessdata(1), unicharset_extractor(1) tesseract(1), combine_tessdata(1), unicharset_extractor(1)
.sp .sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[] \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "AUTHOR" .SH "AUTHOR"
.sp .sp
The Tesseract OCR engine was written by Ray Smith and his research groups at Hewlett Packard (1985\-1995) and Google (2006\-present)\&. The Tesseract OCR engine was written by Ray Smith and his research groups at Hewlett Packard (1985\-1995) and Google (2006\-present)\&.

View File

@ -124,7 +124,7 @@ SEE ALSO
-------- --------
tesseract(1), combine_tessdata(1), unicharset_extractor(1) tesseract(1), combine_tessdata(1), unicharset_extractor(1)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3> <https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
AUTHOR AUTHOR

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" /> <meta name="generator" content="AsciiDoc 8.6.9" />
<title>UNICHARSET(5)</title> <title>UNICHARSET(5)</title>
<style type="text/css"> <style type="text/css">
/* Debug borders */ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/* /* Default font. */
border: 1px solid red; body {
*/ font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
} }
body { body {
@ -35,13 +45,8 @@ strong {
color: #083194; color: #083194;
} }
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em; margin-top: 1.2em;
margin-bottom: 0.5em; margin-bottom: 0.5em;
line-height: 1.3; line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * { h3 + * {
clear: left; clear: left;
} }
h5 {
font-size: 1.0em;
}
div.sectionbody { div.sectionbody {
font-family: serif;
margin-left: 0; margin-left: 0;
} }
@ -77,45 +84,50 @@ p {
ul, ol, li > p { ul, ol, li > p {
margin-top: 0; margin-top: 0;
} }
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre { .monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0; padding: 0;
margin: 0; margin: 0;
} }
pre {
white-space: pre-wrap;
}
span#author { #author {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
font-size: 1.1em; font-size: 1.1em;
} }
span#email { #email {
} }
span#revnumber, span#revdate, span#revremark { #revnumber, #revdate, #revremark {
font-family: sans-serif;
} }
div#footer { #footer {
font-family: sans-serif;
font-size: small; font-size: small;
border-top: 2px solid silver; border-top: 2px solid silver;
padding-top: 0.5em; padding-top: 0.5em;
margin-top: 4.0em; margin-top: 4.0em;
} }
div#footer-text { #footer-text {
float: left; float: left;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#footer-badges { #footer-badges {
float: right; float: right;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#preamble { #preamble {
margin-top: 1.5em; margin-top: 1.5em;
margin-bottom: 1.5em; margin-bottom: 1.5em;
} }
div.tableblock, div.imageblock, div.exampleblock, div.verseblock, div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock, div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock { div.admonitionblock {
margin-top: 1.0em; margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */ /* Block element titles. */
div.title, caption.title { div.title, caption.title {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
text-align: left; text-align: left;
margin-top: 1.0em; margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content { div.sidebarblock > div.content {
background: #ffffee; background: #ffffee;
border: 1px solid silver; border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em; padding: 0.5em;
} }
div.listingblock > div.content { div.listingblock > div.content {
border: 1px solid silver; border: 1px solid #dddddd;
background: #f4f4f4; border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em; padding: 0.5em;
} }
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em; padding-left: 1.0em;
margin-left: 1.0em; margin-left: 1.0em;
margin-right: 10%; margin-right: 10%;
border-left: 5px solid #dddddd; border-left: 5px solid #f0f0f0;
color: #777777; color: #888;
} }
div.quoteblock > div.attribution { div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right; text-align: right;
} }
div.verseblock > div.content { div.verseblock > pre.content {
white-space: pre; font-family: inherit;
font-size: inherit;
} }
div.verseblock > div.attribution { div.verseblock > div.attribution {
padding-top: 0.75em; padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
} }
div.imageblock div.content { padding-left: 0; } div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; } span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; } a.image:visited { color: white; }
dl { dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot { tfoot {
font-weight: bold; font-weight: bold;
} }
td > div.verse { td > div.verse {
white-space: pre; white-space: pre;
} }
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist { div.hdlist {
margin-top: 0.8em; margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px; min-width: 100px;
} }
div.colist td {
@media print { padding-right: 0.5em;
div#footer-badges { display: none; } padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
} }
div#toc { @media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em; margin-bottom: 2.5em;
} }
div#toctitle { #toctitle {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-size: 1.1em; font-size: 1.1em;
font-weight: bold; font-weight: bold;
margin-top: 1.0em; margin-top: 1.0em;
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 { div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0; margin-top: 0;
margin-bottom: 0; margin-bottom: 0;
} }
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em; margin-left: 6em;
font-size: 0.9em; font-size: 0.9em;
} }
/* Overrides for manpage documents */
h1 { span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em; padding-top: 0.5em;
padding-bottom: 0.5em; padding-bottom: 0.5em;
border-top: 2px solid silver; border-top: 2px solid silver;
border-bottom: 2px solid silver; border-bottom: 2px solid silver;
} }
h2 { body.manpage h2 {
border-style: none; border-style: none;
} }
div.sectionbody { body.manpage div.sectionbody {
margin-left: 5%; margin-left: 3em;
} }
@media print { @media print {
div#toc { display: none; } body.manpage div#toc { display: none; }
} }
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style> </style>
<script type="text/javascript"> <script type="text/javascript">
/*<![CDATA[*/ /*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace. var asciidoc = { // Namespace.
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) { function tocEntries(el, toclevels) {
var result = new Array; var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])'); var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2 // Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all // nodeIterator API would be a better technique but not supported by all
// browsers). // browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
} }
var toc = document.getElementById("toc"); var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels); var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) { for (var i = 0; i < entries.length; ++i) {
var entry = entries[i]; var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/ */
footnotes: function () { footnotes: function () {
var cont = document.getElementById("content"); // Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes"); var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span"); var spans = cont.getElementsByTagName("span");
var refs = {}; var refs = {};
var n = 0; var n = 0;
for (i=0; i<spans.length; i++) { for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") { if (spans[i].className == "footnote") {
n++; n++;
// Use [\s\S] in place of . so multi-line matches work. var note = spans[i].getAttribute("data-note");
// Because JavaScript has no s (dotall) regex flag. if (!note) {
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1]; // Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML += noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" + "<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" + "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>"; n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id"); var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n; if (id != null) refs["#"+id] = n;
} }
@ -561,13 +702,36 @@ footnotes: function () {
} }
} }
} }
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
} }
} }
asciidoc.install();
/*]]>*/ /*]]>*/
</script> </script>
</head> </head>
<body> <body class="manpage">
<div id="header"> <div id="header">
<h1> <h1>
UNICHARSET(5) Manual Page UNICHARSET(5) Manual Page
@ -580,6 +744,7 @@ UNICHARSET(5) Manual Page
</div> </div>
</div> </div>
<div id="content"> <div id="content">
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2> <h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>Tesseract&#8217;s unicharset file contains information on each symbol <div class="paragraph"><p>Tesseract&#8217;s unicharset file contains information on each symbol
@ -596,12 +761,12 @@ Therefore, space gets unichar 0.</p></div>
<div class="paragraph"><p>Each unichar line in the unicharset file (v2+) may have four space-separated fields:</p></div> <div class="paragraph"><p>Each unichar line in the unicharset file (v2+) may have four space-separated fields:</p></div>
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>'character' 'properties' 'script' 'id'</tt></pre> <pre><code>'character' 'properties' 'script' 'id'</code></pre>
</div></div> </div></div>
<div class="paragraph"><p>Starting with Tesseract v3.02, more information may be given for each unichar:</p></div> <div class="paragraph"><p>Starting with Tesseract v3.02, more information may be given for each unichar:</p></div>
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'</tt></pre> <pre><code>'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'</code></pre>
</div></div> </div></div>
<div class="paragraph"><p>Entries:</p></div> <div class="paragraph"><p>Entries:</p></div>
<div class="dlist"><dl> <div class="dlist"><dl>
@ -712,15 +877,17 @@ The UTF-8 representation of a "normalized form" of this unichar
</dd> </dd>
</dl></div> </dl></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_example_v2">EXAMPLE (v2)</h2> <h2 id="_example_v2">EXAMPLE (v2)</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>; 10 Common 46 <pre><code>; 10 Common 46
b 3 Latin 59 b 3 Latin 59
W 5 Latin 40 W 5 Latin 40
7 8 Common 66 7 8 Common 66
= 0 Common 93</tt></pre> = 0 Common 93</code></pre>
</div></div> </div></div>
<div class="paragraph"><p>";" is a punctuation character. Its properties are thus represented by the <div class="paragraph"><p>";" is a punctuation character. Its properties are thus represented by the
binary number 10000 (10 in hexadecimal).</p></div> binary number 10000 (10 in hexadecimal).</p></div>
@ -736,20 +903,24 @@ are thus represented by the binary number 00000 (0 in hexadecimal).</p></div>
binary number 00001 (1 in hexadecimal): they are alphabetic, but neither binary number 00001 (1 in hexadecimal): they are alphabetic, but neither
upper nor lower case.</p></div> upper nor lower case.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_example_v3_02">EXAMPLE (v3.02)</h2> <h2 id="_example_v3_02">EXAMPLE (v3.02)</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>110 <pre><code>110
NULL 0 NULL 0 NULL 0 NULL 0
N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N
Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y
1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1 1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1
9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9 9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9
a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a
. . .</tt></pre> . . .</code></pre>
</div></div> </div></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_caveats">CAVEATS</h2> <h2 id="_caveats">CAVEATS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>Although the unicharset reader maintains the ability to read unicharsets <div class="paragraph"><p>Although the unicharset reader maintains the ability to read unicharsets
@ -759,6 +930,8 @@ the accuracy will be degraded.</p></div>
so changing it without re-generating the others is likely to have dire so changing it without re-generating the others is likely to have dire
consequences.</p></div> consequences.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_history">HISTORY</h2> <h2 id="_history">HISTORY</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The unicharset format first appeared with Tesseract 2.00, which was the <div class="paragraph"><p>The unicharset format first appeared with Tesseract 2.00, which was the
@ -766,21 +939,26 @@ first version to support languages other than English. The unicharset file
contained only the first two fields, and the "ispunctuation" property was contained only the first two fields, and the "ispunctuation" property was
absent (punctuation was regarded as "0", as "=" is in the above example.</p></div> absent (punctuation was regarded as "0", as "=" is in the above example.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2> <h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>tesseract(1), combine_tessdata(1), unicharset_extractor(1)</p></div> <div class="paragraph"><p>tesseract(1), combine_tessdata(1), unicharset_extractor(1)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div> <div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2> <h2 id="_author">AUTHOR</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div> at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div> </div>
</div> </div>
</div>
<div id="footnotes"><hr /></div> <div id="footnotes"><hr /></div>
<div id="footer"> <div id="footer">
<div id="footer-text"> <div id="footer-text">
Last updated 2012-02-08 11:01:57 PDT Last updated 2015-06-12 23:52:34 CEST
</div> </div>
</div> </div>
</body> </body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?> <?asciidoc-toc?>
<?asciidoc-numbered?> <?asciidoc-numbered?>
<refentry lang="en"> <refentry lang="en">
<refentryinfo>
<title>UNICHARSET(5)</title>
</refentryinfo>
<refmeta> <refmeta>
<refentrytitle>unicharset</refentrytitle> <refentrytitle>unicharset</refentrytitle>
<manvolnum>5</manvolnum> <manvolnum>5</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo> <refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo> <refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta> </refmeta>
<refnamediv> <refnamediv>
<refname>unicharset</refname> <refname>unicharset</refname>
@ -206,7 +209,7 @@ absent (punctuation was regarded as "0", as "=" is in the above example.</simpar
<refsect1 id="_see_also"> <refsect1 id="_see_also">
<title>SEE ALSO</title> <title>SEE ALSO</title>
<simpara>tesseract(1), combine_tessdata(1), unicharset_extractor(1)</simpara> <simpara>tesseract(1), combine_tessdata(1), unicharset_extractor(1)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara> <simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1> </refsect1>
<refsect1 id="_author"> <refsect1 id="_author">
<title>AUTHOR</title> <title>AUTHOR</title>

View File

@ -1,13 +1,13 @@
'\" t '\" t
.\" Title: unicharset_extractor .\" Title: unicharset_extractor
.\" Author: [see the "AUTHOR" section] .\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/> .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 02/09/2012 .\" Date: 06/12/2015
.\" Manual: \ \& .\" Manual: \ \&
.\" Source: \ \& .\" Source: \ \&
.\" Language: English .\" Language: English
.\" .\"
.TH "UNICHARSET_EXTRACTOR" "1" "02/09/2012" "\ \&" "\ \&" .TH "UNICHARSET_EXTRACTOR" "1" "06/12/2015" "\ \&" "\ \&"
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
.\" * Define some portability stuff .\" * Define some portability stuff
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
@ -57,7 +57,7 @@ If your system supports the wctype functions, these values will be set automatic
.sp .sp
tesseract(1), unicharset(5) tesseract(1), unicharset(5)
.sp .sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[] \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "HISTORY" .SH "HISTORY"
.sp .sp
unicharset_extractor first appeared in Tesseract 2\&.00\&. unicharset_extractor first appeared in Tesseract 2\&.00\&.

View File

@ -40,7 +40,7 @@ SEE ALSO
-------- --------
tesseract(1), unicharset(5) tesseract(1), unicharset(5)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3> <https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
HISTORY HISTORY
------- -------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" /> <meta name="generator" content="AsciiDoc 8.6.9" />
<title>UNICHARSET_EXTRACTOR(1)</title> <title>UNICHARSET_EXTRACTOR(1)</title>
<style type="text/css"> <style type="text/css">
/* Debug borders */ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/* /* Default font. */
border: 1px solid red; body {
*/ font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
} }
body { body {
@ -35,13 +45,8 @@ strong {
color: #083194; color: #083194;
} }
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em; margin-top: 1.2em;
margin-bottom: 0.5em; margin-bottom: 0.5em;
line-height: 1.3; line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * { h3 + * {
clear: left; clear: left;
} }
h5 {
font-size: 1.0em;
}
div.sectionbody { div.sectionbody {
font-family: serif;
margin-left: 0; margin-left: 0;
} }
@ -77,45 +84,50 @@ p {
ul, ol, li > p { ul, ol, li > p {
margin-top: 0; margin-top: 0;
} }
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre { .monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0; padding: 0;
margin: 0; margin: 0;
} }
pre {
white-space: pre-wrap;
}
span#author { #author {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
font-size: 1.1em; font-size: 1.1em;
} }
span#email { #email {
} }
span#revnumber, span#revdate, span#revremark { #revnumber, #revdate, #revremark {
font-family: sans-serif;
} }
div#footer { #footer {
font-family: sans-serif;
font-size: small; font-size: small;
border-top: 2px solid silver; border-top: 2px solid silver;
padding-top: 0.5em; padding-top: 0.5em;
margin-top: 4.0em; margin-top: 4.0em;
} }
div#footer-text { #footer-text {
float: left; float: left;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#footer-badges { #footer-badges {
float: right; float: right;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#preamble { #preamble {
margin-top: 1.5em; margin-top: 1.5em;
margin-bottom: 1.5em; margin-bottom: 1.5em;
} }
div.tableblock, div.imageblock, div.exampleblock, div.verseblock, div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock, div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock { div.admonitionblock {
margin-top: 1.0em; margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */ /* Block element titles. */
div.title, caption.title { div.title, caption.title {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
text-align: left; text-align: left;
margin-top: 1.0em; margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content { div.sidebarblock > div.content {
background: #ffffee; background: #ffffee;
border: 1px solid silver; border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em; padding: 0.5em;
} }
div.listingblock > div.content { div.listingblock > div.content {
border: 1px solid silver; border: 1px solid #dddddd;
background: #f4f4f4; border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em; padding: 0.5em;
} }
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em; padding-left: 1.0em;
margin-left: 1.0em; margin-left: 1.0em;
margin-right: 10%; margin-right: 10%;
border-left: 5px solid #dddddd; border-left: 5px solid #f0f0f0;
color: #777777; color: #888;
} }
div.quoteblock > div.attribution { div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right; text-align: right;
} }
div.verseblock > div.content { div.verseblock > pre.content {
white-space: pre; font-family: inherit;
font-size: inherit;
} }
div.verseblock > div.attribution { div.verseblock > div.attribution {
padding-top: 0.75em; padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
} }
div.imageblock div.content { padding-left: 0; } div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; } span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; } a.image:visited { color: white; }
dl { dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot { tfoot {
font-weight: bold; font-weight: bold;
} }
td > div.verse { td > div.verse {
white-space: pre; white-space: pre;
} }
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist { div.hdlist {
margin-top: 0.8em; margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px; min-width: 100px;
} }
div.colist td {
@media print { padding-right: 0.5em;
div#footer-badges { display: none; } padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
} }
div#toc { @media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em; margin-bottom: 2.5em;
} }
div#toctitle { #toctitle {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-size: 1.1em; font-size: 1.1em;
font-weight: bold; font-weight: bold;
margin-top: 1.0em; margin-top: 1.0em;
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 { div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0; margin-top: 0;
margin-bottom: 0; margin-bottom: 0;
} }
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em; margin-left: 6em;
font-size: 0.9em; font-size: 0.9em;
} }
/* Overrides for manpage documents */
h1 { span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em; padding-top: 0.5em;
padding-bottom: 0.5em; padding-bottom: 0.5em;
border-top: 2px solid silver; border-top: 2px solid silver;
border-bottom: 2px solid silver; border-bottom: 2px solid silver;
} }
h2 { body.manpage h2 {
border-style: none; border-style: none;
} }
div.sectionbody { body.manpage div.sectionbody {
margin-left: 5%; margin-left: 3em;
} }
@media print { @media print {
div#toc { display: none; } body.manpage div#toc { display: none; }
} }
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style> </style>
<script type="text/javascript"> <script type="text/javascript">
/*<![CDATA[*/ /*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace. var asciidoc = { // Namespace.
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) { function tocEntries(el, toclevels) {
var result = new Array; var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])'); var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2 // Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all // nodeIterator API would be a better technique but not supported by all
// browsers). // browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
} }
var toc = document.getElementById("toc"); var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels); var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) { for (var i = 0; i < entries.length; ++i) {
var entry = entries[i]; var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/ */
footnotes: function () { footnotes: function () {
var cont = document.getElementById("content"); // Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes"); var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span"); var spans = cont.getElementsByTagName("span");
var refs = {}; var refs = {};
var n = 0; var n = 0;
for (i=0; i<spans.length; i++) { for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") { if (spans[i].className == "footnote") {
n++; n++;
// Use [\s\S] in place of . so multi-line matches work. var note = spans[i].getAttribute("data-note");
// Because JavaScript has no s (dotall) regex flag. if (!note) {
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1]; // Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML += noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" + "<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" + "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>"; n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id"); var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n; if (id != null) refs["#"+id] = n;
} }
@ -561,13 +702,36 @@ footnotes: function () {
} }
} }
} }
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
} }
} }
asciidoc.install();
/*]]>*/ /*]]>*/
</script> </script>
</head> </head>
<body> <body class="manpage">
<div id="header"> <div id="header">
<h1> <h1>
UNICHARSET_EXTRACTOR(1) Manual Page UNICHARSET_EXTRACTOR(1) Manual Page
@ -580,10 +744,13 @@ UNICHARSET_EXTRACTOR(1) Manual Page
</div> </div>
</div> </div>
<div id="content"> <div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2> <h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p><strong>unicharset_extractor</strong> <em>[-D dir]</em> <em>FILE</em>&#8230;</p></div> <div class="paragraph"><p><strong>unicharset_extractor</strong> <em>[-D dir]</em> <em>FILE</em>&#8230;</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2> <h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>Tesseract needs to know the set of possible characters it can output. <div class="paragraph"><p>Tesseract needs to know the set of possible characters it can output.
@ -592,7 +759,7 @@ program on the same training pages bounding box files as used for
clustering:</p></div> clustering:</p></div>
<div class="literalblock"> <div class="literalblock">
<div class="content"> <div class="content">
<pre><tt>unicharset_extractor fontfile_1.box fontfile_2.box ...</tt></pre> <pre><code>unicharset_extractor fontfile_1.box fontfile_2.box ...</code></pre>
</div></div> </div></div>
<div class="paragraph"><p>The unicharset will be put into the file <em>dir/unicharset</em>, or simply <div class="paragraph"><p>The unicharset will be put into the file <em>dir/unicharset</em>, or simply
<em>./unicharset</em> if no output directory is provided.</p></div> <em>./unicharset</em> if no output directory is provided.</p></div>
@ -609,30 +776,39 @@ file is changed) as they have to be in sync. This is made easier than in
previous versions by running unicharset_extractor before mftraining and previous versions by running unicharset_extractor before mftraining and
cntraining, and giving the unicharset to mftraining.</p></div> cntraining, and giving the unicharset to mftraining.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2> <h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>tesseract(1), unicharset(5)</p></div> <div class="paragraph"><p>tesseract(1), unicharset(5)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div> <div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_history">HISTORY</h2> <h2 id="_history">HISTORY</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>unicharset_extractor first appeared in Tesseract 2.00.</p></div> <div class="paragraph"><p>unicharset_extractor first appeared in Tesseract 2.00.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2> <h2 id="_copying">COPYING</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>Copyright (C) 2006, Google Inc. <div class="paragraph"><p>Copyright (C) 2006, Google Inc.
Licensed under the Apache License, Version 2.0</p></div> Licensed under the Apache License, Version 2.0</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2> <h2 id="_author">AUTHOR</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div> at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div> </div>
</div> </div>
</div>
<div id="footnotes"><hr /></div> <div id="footnotes"><hr /></div>
<div id="footer"> <div id="footer">
<div id="footer-text"> <div id="footer-text">
Last updated 2012-02-09 09:19:05 PDT Last updated 2015-06-12 23:52:38 CEST
</div> </div>
</div> </div>
</body> </body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?> <?asciidoc-toc?>
<?asciidoc-numbered?> <?asciidoc-numbered?>
<refentry lang="en"> <refentry lang="en">
<refentryinfo>
<title>UNICHARSET_EXTRACTOR(1)</title>
</refentryinfo>
<refmeta> <refmeta>
<refentrytitle>unicharset_extractor</refentrytitle> <refentrytitle>unicharset_extractor</refentrytitle>
<manvolnum>1</manvolnum> <manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo> <refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo> <refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta> </refmeta>
<refnamediv> <refnamediv>
<refname>unicharset_extractor</refname> <refname>unicharset_extractor</refname>
@ -41,7 +44,7 @@ cntraining, and giving the unicharset to mftraining.</simpara>
<refsect1 id="_see_also"> <refsect1 id="_see_also">
<title>SEE ALSO</title> <title>SEE ALSO</title>
<simpara>tesseract(1), unicharset(5)</simpara> <simpara>tesseract(1), unicharset(5)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara> <simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1> </refsect1>
<refsect1 id="_history"> <refsect1 id="_history">
<title>HISTORY</title> <title>HISTORY</title>

View File

@ -1,13 +1,13 @@
'\" t '\" t
.\" Title: wordlist2dawg .\" Title: wordlist2dawg
.\" Author: [see the "AUTHOR" section] .\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/> .\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 02/09/2012 .\" Date: 06/12/2015
.\" Manual: \ \& .\" Manual: \ \&
.\" Source: \ \& .\" Source: \ \&
.\" Language: English .\" Language: English
.\" .\"
.TH "WORDLIST2DAWG" "1" "02/09/2012" "\ \&" "\ \&" .TH "WORDLIST2DAWG" "1" "06/12/2015" "\ \&" "\ \&"
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
.\" * Define some portability stuff .\" * Define some portability stuff
.\" ----------------------------------------------------------------- .\" -----------------------------------------------------------------
@ -63,7 +63,7 @@ wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph (DAWG) for
.sp .sp
tesseract(1), combine_tessdata(1), dawg2wordlist(1) tesseract(1), combine_tessdata(1), dawg2wordlist(1)
.sp .sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[] \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "COPYING" .SH "COPYING"
.sp .sp
Copyright (C) 2006 Google, Inc\&. Licensed under the Apache License, Version 2\&.0 Copyright (C) 2006 Google, Inc\&. Licensed under the Apache License, Version 2\&.0

View File

@ -56,7 +56,7 @@ SEE ALSO
-------- --------
tesseract(1), combine_tessdata(1), dawg2wordlist(1) tesseract(1), combine_tessdata(1), dawg2wordlist(1)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3> <https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
COPYING COPYING
------- -------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head> <head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" /> <meta name="generator" content="AsciiDoc 8.6.9" />
<title>WORDLIST2DAWG(1)</title> <title>WORDLIST2DAWG(1)</title>
<style type="text/css"> <style type="text/css">
/* Debug borders */ /* Shared CSS for AsciiDoc xhtml11 and html5 backends */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/* /* Default font. */
border: 1px solid red; body {
*/ font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
} }
body { body {
@ -35,13 +45,8 @@ strong {
color: #083194; color: #083194;
} }
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em; margin-top: 1.2em;
margin-bottom: 0.5em; margin-bottom: 0.5em;
line-height: 1.3; line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * { h3 + * {
clear: left; clear: left;
} }
h5 {
font-size: 1.0em;
}
div.sectionbody { div.sectionbody {
font-family: serif;
margin-left: 0; margin-left: 0;
} }
@ -77,45 +84,50 @@ p {
ul, ol, li > p { ul, ol, li > p {
margin-top: 0; margin-top: 0;
} }
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre { .monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0; padding: 0;
margin: 0; margin: 0;
} }
pre {
white-space: pre-wrap;
}
span#author { #author {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
font-size: 1.1em; font-size: 1.1em;
} }
span#email { #email {
} }
span#revnumber, span#revdate, span#revremark { #revnumber, #revdate, #revremark {
font-family: sans-serif;
} }
div#footer { #footer {
font-family: sans-serif;
font-size: small; font-size: small;
border-top: 2px solid silver; border-top: 2px solid silver;
padding-top: 0.5em; padding-top: 0.5em;
margin-top: 4.0em; margin-top: 4.0em;
} }
div#footer-text { #footer-text {
float: left; float: left;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#footer-badges { #footer-badges {
float: right; float: right;
padding-bottom: 0.5em; padding-bottom: 0.5em;
} }
div#preamble { #preamble {
margin-top: 1.5em; margin-top: 1.5em;
margin-bottom: 1.5em; margin-bottom: 1.5em;
} }
div.tableblock, div.imageblock, div.exampleblock, div.verseblock, div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock, div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock { div.admonitionblock {
margin-top: 1.0em; margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */ /* Block element titles. */
div.title, caption.title { div.title, caption.title {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-weight: bold; font-weight: bold;
text-align: left; text-align: left;
margin-top: 1.0em; margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content { div.sidebarblock > div.content {
background: #ffffee; background: #ffffee;
border: 1px solid silver; border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em; padding: 0.5em;
} }
div.listingblock > div.content { div.listingblock > div.content {
border: 1px solid silver; border: 1px solid #dddddd;
background: #f4f4f4; border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em; padding: 0.5em;
} }
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em; padding-left: 1.0em;
margin-left: 1.0em; margin-left: 1.0em;
margin-right: 10%; margin-right: 10%;
border-left: 5px solid #dddddd; border-left: 5px solid #f0f0f0;
color: #777777; color: #888;
} }
div.quoteblock > div.attribution { div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right; text-align: right;
} }
div.verseblock > div.content { div.verseblock > pre.content {
white-space: pre; font-family: inherit;
font-size: inherit;
} }
div.verseblock > div.attribution { div.verseblock > div.attribution {
padding-top: 0.75em; padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
} }
div.imageblock div.content { padding-left: 0; } div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; } span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; } a.image:visited { color: white; }
dl { dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot { tfoot {
font-weight: bold; font-weight: bold;
} }
td > div.verse { td > div.verse {
white-space: pre; white-space: pre;
} }
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist { div.hdlist {
margin-top: 0.8em; margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px; min-width: 100px;
} }
div.colist td {
@media print { padding-right: 0.5em;
div#footer-badges { display: none; } padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
} }
div#toc { @media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em; margin-bottom: 2.5em;
} }
div#toctitle { #toctitle {
color: #527bbd; color: #527bbd;
font-family: sans-serif;
font-size: 1.1em; font-size: 1.1em;
font-weight: bold; font-weight: bold;
margin-top: 1.0em; margin-top: 1.0em;
margin-bottom: 0.1em; margin-bottom: 0.1em;
} }
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 { div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0; margin-top: 0;
margin-bottom: 0; margin-bottom: 0;
} }
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em; margin-left: 6em;
font-size: 0.9em; font-size: 0.9em;
} }
/* Overrides for manpage documents */
h1 { span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em; padding-top: 0.5em;
padding-bottom: 0.5em; padding-bottom: 0.5em;
border-top: 2px solid silver; border-top: 2px solid silver;
border-bottom: 2px solid silver; border-bottom: 2px solid silver;
} }
h2 { body.manpage h2 {
border-style: none; border-style: none;
} }
div.sectionbody { body.manpage div.sectionbody {
margin-left: 5%; margin-left: 3em;
} }
@media print { @media print {
div#toc { display: none; } body.manpage div#toc { display: none; }
} }
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style> </style>
<script type="text/javascript"> <script type="text/javascript">
/*<![CDATA[*/ /*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace. var asciidoc = { // Namespace.
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) { function tocEntries(el, toclevels) {
var result = new Array; var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])'); var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2 // Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all // nodeIterator API would be a better technique but not supported by all
// browsers). // browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
} }
var toc = document.getElementById("toc"); var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels); var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) { for (var i = 0; i < entries.length; ++i) {
var entry = entries[i]; var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/ */
footnotes: function () { footnotes: function () {
var cont = document.getElementById("content"); // Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes"); var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span"); var spans = cont.getElementsByTagName("span");
var refs = {}; var refs = {};
var n = 0; var n = 0;
for (i=0; i<spans.length; i++) { for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") { if (spans[i].className == "footnote") {
n++; n++;
// Use [\s\S] in place of . so multi-line matches work. var note = spans[i].getAttribute("data-note");
// Because JavaScript has no s (dotall) regex flag. if (!note) {
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1]; // Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML += noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" + "<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" + "<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>"; n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id"); var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n; if (id != null) refs["#"+id] = n;
} }
@ -561,13 +702,36 @@ footnotes: function () {
} }
} }
} }
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
} }
} }
asciidoc.install();
/*]]>*/ /*]]>*/
</script> </script>
</head> </head>
<body> <body class="manpage">
<div id="header"> <div id="header">
<h1> <h1>
WORDLIST2DAWG(1) Manual Page WORDLIST2DAWG(1) Manual Page
@ -580,6 +744,7 @@ WORDLIST2DAWG(1) Manual Page
</div> </div>
</div> </div>
<div id="content"> <div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2> <h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p><strong>wordlist2dawg</strong> <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div> <div class="paragraph"><p><strong>wordlist2dawg</strong> <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
@ -588,12 +753,16 @@ WORDLIST2DAWG(1) Manual Page
<div class="paragraph"><p><strong>wordlist2dawg</strong> -r 2 <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div> <div class="paragraph"><p><strong>wordlist2dawg</strong> -r 2 <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
<div class="paragraph"><p><strong>wordlist2dawg</strong> -l &lt;short&gt; &lt;long&gt; <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div> <div class="paragraph"><p><strong>wordlist2dawg</strong> -l &lt;short&gt; &lt;long&gt; <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2> <h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph <div class="paragraph"><p>wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph
(DAWG) for use with Tesseract. A DAWG is a compressed, space and time (DAWG) for use with Tesseract. A DAWG is a compressed, space and time
efficient representation of a word list.</p></div> efficient representation of a word list.</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_options">OPTIONS</h2> <h2 id="_options">OPTIONS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>-t <div class="paragraph"><p>-t
@ -606,6 +775,8 @@ efficient representation of a word list.</p></div>
Produce a file with several dawgs in it, one each for words Produce a file with several dawgs in it, one each for words
of length &lt;short&gt;, &lt;short+1&gt;,&#8230; &lt;long&gt;</p></div> of length &lt;short&gt;, &lt;short+1&gt;,&#8230; &lt;long&gt;</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_arguments">ARGUMENTS</h2> <h2 id="_arguments">ARGUMENTS</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p><em>WORDLIST</em> <div class="paragraph"><p><em>WORDLIST</em>
@ -616,26 +787,33 @@ efficient representation of a word list.</p></div>
The unicharset of the language. This is the unicharset The unicharset of the language. This is the unicharset
generated by mftraining(1).</p></div> generated by mftraining(1).</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2> <h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>tesseract(1), combine_tessdata(1), dawg2wordlist(1)</p></div> <div class="paragraph"><p>tesseract(1), combine_tessdata(1), dawg2wordlist(1)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div> <div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2> <h2 id="_copying">COPYING</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>Copyright (C) 2006 Google, Inc. <div class="paragraph"><p>Copyright (C) 2006 Google, Inc.
Licensed under the Apache License, Version 2.0</p></div> Licensed under the Apache License, Version 2.0</p></div>
</div> </div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2> <h2 id="_author">AUTHOR</h2>
<div class="sectionbody"> <div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups <div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div> at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div> </div>
</div> </div>
</div>
<div id="footnotes"><hr /></div> <div id="footnotes"><hr /></div>
<div id="footer"> <div id="footer">
<div id="footer-text"> <div id="footer-text">
Last updated 2012-02-07 13:43:35 PDT Last updated 2015-06-12 23:52:50 CEST
</div> </div>
</div> </div>
</body> </body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?> <?asciidoc-toc?>
<?asciidoc-numbered?> <?asciidoc-numbered?>
<refentry lang="en"> <refentry lang="en">
<refentryinfo>
<title>WORDLIST2DAWG(1)</title>
</refentryinfo>
<refmeta> <refmeta>
<refentrytitle>wordlist2dawg</refentrytitle> <refentrytitle>wordlist2dawg</refentrytitle>
<manvolnum>1</manvolnum> <manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo> <refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo> <refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta> </refmeta>
<refnamediv> <refnamediv>
<refname>wordlist2dawg</refname> <refname>wordlist2dawg</refname>
@ -51,7 +54,7 @@ efficient representation of a word list.</simpara>
<refsect1 id="_see_also"> <refsect1 id="_see_also">
<title>SEE ALSO</title> <title>SEE ALSO</title>
<simpara>tesseract(1), combine_tessdata(1), dawg2wordlist(1)</simpara> <simpara>tesseract(1), combine_tessdata(1), dawg2wordlist(1)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara> <simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1> </refsect1>
<refsect1 id="_copying"> <refsect1 id="_copying">
<title>COPYING</title> <title>COPYING</title>