fix links in doc; autotools requires README

This commit is contained in:
Zdenko Podobný 2015-06-13 00:08:05 +02:00
parent 0ee178d79b
commit 9b7f2527f1
43 changed files with 3130 additions and 1330 deletions

View File

View File

@ -1,13 +1,13 @@
'\" t
.\" Title: ambiguous_words
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
.\" Date: 02/09/2012
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 06/12/2015
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "AMBIGUOUS_WORDS" "1" "02/09/2012" "\ \&" "\ \&"
.TH "AMBIGUOUS_WORDS" "1" "06/12/2015" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" />
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>AMBIGUOUS_WORDS(1)</title>
<style type="text/css">
/* Debug borders */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/*
border: 1px solid red;
*/
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
/* Default font. */
body {
font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
}
body {
@ -35,13 +45,8 @@ strong {
color: #083194;
}
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 {
color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em;
margin-bottom: 0.5em;
line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * {
clear: left;
}
h5 {
font-size: 1.0em;
}
div.sectionbody {
font-family: serif;
margin-left: 0;
}
@ -77,45 +84,50 @@ p {
ul, ol, li > p {
margin-top: 0;
}
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre {
.monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0;
margin: 0;
}
pre {
white-space: pre-wrap;
}
span#author {
#author {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
font-size: 1.1em;
}
span#email {
#email {
}
span#revnumber, span#revdate, span#revremark {
font-family: sans-serif;
#revnumber, #revdate, #revremark {
}
div#footer {
font-family: sans-serif;
#footer {
font-size: small;
border-top: 2px solid silver;
padding-top: 0.5em;
margin-top: 4.0em;
}
div#footer-text {
#footer-text {
float: left;
padding-bottom: 0.5em;
}
div#footer-badges {
#footer-badges {
float: right;
padding-bottom: 0.5em;
}
div#preamble {
#preamble {
margin-top: 1.5em;
margin-bottom: 1.5em;
}
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock {
margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */
div.title, caption.title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
text-align: left;
margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content {
background: #ffffee;
border: 1px solid silver;
border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em;
}
div.listingblock > div.content {
border: 1px solid silver;
background: #f4f4f4;
border: 1px solid #dddddd;
border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em;
}
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em;
margin-left: 1.0em;
margin-right: 10%;
border-left: 5px solid #dddddd;
color: #777777;
border-left: 5px solid #f0f0f0;
color: #888;
}
div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right;
}
div.verseblock > div.content {
white-space: pre;
div.verseblock > pre.content {
font-family: inherit;
font-size: inherit;
}
div.verseblock > div.attribution {
padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
}
div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }
dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot {
font-weight: bold;
}
td > div.verse {
white-space: pre;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist {
margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px;
}
@media print {
div#footer-badges { display: none; }
div.colist td {
padding-right: 0.5em;
padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
}
div#toc {
@media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em;
}
div#toctitle {
#toctitle {
color: #527bbd;
font-family: sans-serif;
font-size: 1.1em;
font-weight: bold;
margin-top: 1.0em;
margin-bottom: 0.1em;
}
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0;
margin-bottom: 0;
}
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em;
font-size: 0.9em;
}
/* Overrides for manpage documents */
h1 {
span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em;
padding-bottom: 0.5em;
border-top: 2px solid silver;
border-bottom: 2px solid silver;
}
h2 {
body.manpage h2 {
border-style: none;
}
div.sectionbody {
margin-left: 5%;
body.manpage div.sectionbody {
margin-left: 3em;
}
@media print {
div#toc { display: none; }
body.manpage div#toc { display: none; }
}
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style>
<script type="text/javascript">
/*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace.
/////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) {
var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all
// browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
}
var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) {
var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/
footnotes: function () {
var cont = document.getElementById("content");
// Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span");
var refs = {};
var n = 0;
for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") {
n++;
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
var note = spans[i].getAttribute("data-note");
if (!note) {
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n;
}
@ -561,13 +702,36 @@ footnotes: function () {
}
}
}
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
}
}
asciidoc.install();
/*]]>*/
</script>
</head>
<body>
<body class="manpage">
<div id="header">
<h1>
AMBIGUOUS_WORDS(1) Manual Page
@ -580,10 +744,13 @@ AMBIGUOUS_WORDS(1) Manual Page
</div>
</div>
<div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody">
<div class="paragraph"><p><strong>ambiguous_words</strong> [-l lang] <em>TESSDATADIR</em> <em>WORDLIST</em> <em>AMBIGUOUSFILE</em></p></div>
</div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody">
<div class="paragraph"><p>ambiguous_words(1) runs Tesseract in a special mode, and for each word
@ -591,25 +758,32 @@ in word list, produces a set of words which Tesseract thinks might be
ambiguous with it. <em>TESSDATADIR</em> must be set to the absolute path of
a directory containing <em>tessdata/lang.traineddata</em>.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody">
<div class="paragraph"><p>tesseract(1)</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2>
<div class="sectionbody">
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
Licensed under the Apache License, Version 2.0</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2>
<div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Last updated 2012-02-07 13:38:29 PDT
Last updated 2015-05-13 19:59:45 CEST
</div>
</div>
</body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?>
<?asciidoc-numbered?>
<refentry lang="en">
<refentryinfo>
<title>AMBIGUOUS_WORDS(1)</title>
</refentryinfo>
<refmeta>
<refentrytitle>ambiguous_words</refentrytitle>
<manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo>
<refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta>
<refnamediv>
<refname>ambiguous_words</refname>

View File

@ -1,13 +1,13 @@
'\" t
.\" Title: cntraining
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
.\" Date: 02/09/2012
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 06/12/2015
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "CNTRAINING" "1" "02/09/2012" "\ \&" "\ \&"
.TH "CNTRAINING" "1" "06/12/2015" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
@ -45,7 +45,7 @@ Directory to write output files to\&.
.sp
tesseract(1), shapeclustering(1), mftraining(1)
.sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "COPYING"
.sp
Copyright (c) Hewlett\-Packard Company, 1988 Licensed under the Apache License, Version 2\&.0

View File

@ -24,7 +24,7 @@ SEE ALSO
--------
tesseract(1), shapeclustering(1), mftraining(1)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
COPYING
-------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" />
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>CNTRAINING(1)</title>
<style type="text/css">
/* Debug borders */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/*
border: 1px solid red;
*/
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
/* Default font. */
body {
font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
}
body {
@ -35,13 +45,8 @@ strong {
color: #083194;
}
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 {
color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em;
margin-bottom: 0.5em;
line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * {
clear: left;
}
h5 {
font-size: 1.0em;
}
div.sectionbody {
font-family: serif;
margin-left: 0;
}
@ -77,45 +84,50 @@ p {
ul, ol, li > p {
margin-top: 0;
}
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre {
.monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0;
margin: 0;
}
pre {
white-space: pre-wrap;
}
span#author {
#author {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
font-size: 1.1em;
}
span#email {
#email {
}
span#revnumber, span#revdate, span#revremark {
font-family: sans-serif;
#revnumber, #revdate, #revremark {
}
div#footer {
font-family: sans-serif;
#footer {
font-size: small;
border-top: 2px solid silver;
padding-top: 0.5em;
margin-top: 4.0em;
}
div#footer-text {
#footer-text {
float: left;
padding-bottom: 0.5em;
}
div#footer-badges {
#footer-badges {
float: right;
padding-bottom: 0.5em;
}
div#preamble {
#preamble {
margin-top: 1.5em;
margin-bottom: 1.5em;
}
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock {
margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */
div.title, caption.title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
text-align: left;
margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content {
background: #ffffee;
border: 1px solid silver;
border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em;
}
div.listingblock > div.content {
border: 1px solid silver;
background: #f4f4f4;
border: 1px solid #dddddd;
border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em;
}
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em;
margin-left: 1.0em;
margin-right: 10%;
border-left: 5px solid #dddddd;
color: #777777;
border-left: 5px solid #f0f0f0;
color: #888;
}
div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right;
}
div.verseblock > div.content {
white-space: pre;
div.verseblock > pre.content {
font-family: inherit;
font-size: inherit;
}
div.verseblock > div.attribution {
padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
}
div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }
dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot {
font-weight: bold;
}
td > div.verse {
white-space: pre;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist {
margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px;
}
@media print {
div#footer-badges { display: none; }
div.colist td {
padding-right: 0.5em;
padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
}
div#toc {
@media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em;
}
div#toctitle {
#toctitle {
color: #527bbd;
font-family: sans-serif;
font-size: 1.1em;
font-weight: bold;
margin-top: 1.0em;
margin-bottom: 0.1em;
}
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0;
margin-bottom: 0;
}
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em;
font-size: 0.9em;
}
/* Overrides for manpage documents */
h1 {
span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em;
padding-bottom: 0.5em;
border-top: 2px solid silver;
border-bottom: 2px solid silver;
}
h2 {
body.manpage h2 {
border-style: none;
}
div.sectionbody {
margin-left: 5%;
body.manpage div.sectionbody {
margin-left: 3em;
}
@media print {
div#toc { display: none; }
body.manpage div#toc { display: none; }
}
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style>
<script type="text/javascript">
/*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace.
/////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) {
var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all
// browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
}
var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) {
var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/
footnotes: function () {
var cont = document.getElementById("content");
// Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span");
var refs = {};
var n = 0;
for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") {
n++;
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
var note = spans[i].getAttribute("data-note");
if (!note) {
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n;
}
@ -561,13 +702,36 @@ footnotes: function () {
}
}
}
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
}
}
asciidoc.install();
/*]]>*/
</script>
</head>
<body>
<body class="manpage">
<div id="header">
<h1>
CNTRAINING(1) Manual Page
@ -580,16 +744,21 @@ CNTRAINING(1) Manual Page
</div>
</div>
<div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody">
<div class="paragraph"><p><strong>cntraining</strong> [-D <em>dir</em>] <em>FILE</em>&#8230;</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody">
<div class="paragraph"><p>cntraining takes a list of .tr files, from which it generates the
<strong>normproto</strong> data file (the character normalization sensitivity
prototypes).</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_options">OPTIONS</h2>
<div class="sectionbody">
<div class="dlist"><dl>
@ -603,26 +772,33 @@ prototypes).</p></div>
</dd>
</dl></div>
</div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody">
<div class="paragraph"><p>tesseract(1), shapeclustering(1), mftraining(1)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2>
<div class="sectionbody">
<div class="paragraph"><p>Copyright (c) Hewlett-Packard Company, 1988
Licensed under the Apache License, Version 2.0</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2>
<div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Last updated 2012-02-09 11:37:31 PDT
Last updated 2015-06-12 23:50:30 CEST
</div>
</div>
</body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?>
<?asciidoc-numbered?>
<refentry lang="en">
<refentryinfo>
<title>CNTRAINING(1)</title>
</refentryinfo>
<refmeta>
<refentrytitle>cntraining</refentrytitle>
<manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo>
<refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta>
<refnamediv>
<refname>cntraining</refname>
@ -40,7 +43,7 @@ prototypes).</simpara>
<refsect1 id="_see_also">
<title>SEE ALSO</title>
<simpara>tesseract(1), shapeclustering(1), mftraining(1)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1>
<refsect1 id="_copying">
<title>COPYING</title>

View File

@ -1,13 +1,13 @@
'\" t
.\" Title: combine_tessdata
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
.\" Date: 02/09/2012
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 06/12/2015
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "COMBINE_TESSDATA" "1" "02/09/2012" "\ \&" "\ \&"
.TH "COMBINE_TESSDATA" "1" "06/12/2015" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
@ -107,7 +107,7 @@ This will create /home/$USER/temp/eng\&.* files with individual tessdata compone
\fIPrefix\fR refers to the full file prefix, including period (\&.)
.SH "COMPONENTS"
.sp
The components in a Tesseract lang\&.traineddata file as of Tesseract 3\&.02 are briefly described below; For more information on many of these files, see \m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
The components in a Tesseract lang\&.traineddata file as of Tesseract 3\&.02 are briefly described below; For more information on many of these files, see \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.PP
lang\&.config
.RS 4

View File

@ -76,7 +76,7 @@ COMPONENTS
The components in a Tesseract lang.traineddata file as of
Tesseract 3.02 are briefly described below; For more information on
many of these files, see
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
lang.config::
(Optional) Language-specific overrides to default config variables.

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" />
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>COMBINE_TESSDATA(1)</title>
<style type="text/css">
/* Debug borders */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/*
border: 1px solid red;
*/
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
/* Default font. */
body {
font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
}
body {
@ -35,13 +45,8 @@ strong {
color: #083194;
}
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 {
color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em;
margin-bottom: 0.5em;
line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * {
clear: left;
}
h5 {
font-size: 1.0em;
}
div.sectionbody {
font-family: serif;
margin-left: 0;
}
@ -77,45 +84,50 @@ p {
ul, ol, li > p {
margin-top: 0;
}
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre {
.monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0;
margin: 0;
}
pre {
white-space: pre-wrap;
}
span#author {
#author {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
font-size: 1.1em;
}
span#email {
#email {
}
span#revnumber, span#revdate, span#revremark {
font-family: sans-serif;
#revnumber, #revdate, #revremark {
}
div#footer {
font-family: sans-serif;
#footer {
font-size: small;
border-top: 2px solid silver;
padding-top: 0.5em;
margin-top: 4.0em;
}
div#footer-text {
#footer-text {
float: left;
padding-bottom: 0.5em;
}
div#footer-badges {
#footer-badges {
float: right;
padding-bottom: 0.5em;
}
div#preamble {
#preamble {
margin-top: 1.5em;
margin-bottom: 1.5em;
}
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock {
margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */
div.title, caption.title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
text-align: left;
margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content {
background: #ffffee;
border: 1px solid silver;
border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em;
}
div.listingblock > div.content {
border: 1px solid silver;
background: #f4f4f4;
border: 1px solid #dddddd;
border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em;
}
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em;
margin-left: 1.0em;
margin-right: 10%;
border-left: 5px solid #dddddd;
color: #777777;
border-left: 5px solid #f0f0f0;
color: #888;
}
div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right;
}
div.verseblock > div.content {
white-space: pre;
div.verseblock > pre.content {
font-family: inherit;
font-size: inherit;
}
div.verseblock > div.attribution {
padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
}
div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }
dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot {
font-weight: bold;
}
td > div.verse {
white-space: pre;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist {
margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px;
}
@media print {
div#footer-badges { display: none; }
div.colist td {
padding-right: 0.5em;
padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
}
div#toc {
@media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em;
}
div#toctitle {
#toctitle {
color: #527bbd;
font-family: sans-serif;
font-size: 1.1em;
font-weight: bold;
margin-top: 1.0em;
margin-bottom: 0.1em;
}
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0;
margin-bottom: 0;
}
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em;
font-size: 0.9em;
}
/* Overrides for manpage documents */
h1 {
span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em;
padding-bottom: 0.5em;
border-top: 2px solid silver;
border-bottom: 2px solid silver;
}
h2 {
body.manpage h2 {
border-style: none;
}
div.sectionbody {
margin-left: 5%;
body.manpage div.sectionbody {
margin-left: 3em;
}
@media print {
div#toc { display: none; }
body.manpage div#toc { display: none; }
}
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style>
<script type="text/javascript">
/*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace.
/////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) {
var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all
// browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
}
var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) {
var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/
footnotes: function () {
var cont = document.getElementById("content");
// Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span");
var refs = {};
var n = 0;
for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") {
n++;
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
var note = spans[i].getAttribute("data-note");
if (!note) {
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n;
}
@ -561,13 +702,36 @@ footnotes: function () {
}
}
}
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
}
}
asciidoc.install();
/*]]>*/
</script>
</head>
<body>
<body class="manpage">
<div id="header">
<h1>
COMBINE_TESSDATA(1) Manual Page
@ -580,10 +744,13 @@ COMBINE_TESSDATA(1) Manual Page
</div>
</div>
<div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody">
<div class="paragraph"><p><strong>combine_tessdata</strong> [<em>OPTION</em>] <em>FILE</em>&#8230;</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody">
<div class="paragraph"><p>combine_tessdata(1) is the main program to combine/extract/overwrite
@ -593,7 +760,7 @@ classifier templates, ambiguities, language configs) located at, say,
/home/$USER/temp/eng.* run:</p></div>
<div class="literalblock">
<div class="content">
<pre><tt>combine_tessdata /home/$USER/temp/eng.</tt></pre>
<pre><code>combine_tessdata /home/$USER/temp/eng.</code></pre>
</div></div>
<div class="paragraph"><p>The result will be a combined tessdata file /home/$USER/temp/eng.traineddata</p></div>
<div class="paragraph"><p>Specify option -e if you would like to extract individual components
@ -601,8 +768,8 @@ from a combined traineddata file. For example, to extract language config
file and the unicharset from tessdata/eng.traineddata run:</p></div>
<div class="literalblock">
<div class="content">
<pre><tt>combine_tessdata -e tessdata/eng.traineddata \
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</tt></pre>
<pre><code>combine_tessdata -e tessdata/eng.traineddata \
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</code></pre>
</div></div>
<div class="paragraph"><p>The desired config file and unicharset will be written to
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</p></div>
@ -611,8 +778,8 @@ file and the unicharset from tessdata/eng.traineddata run:</p></div>
and unichar ambiguities files in tessdata/eng.traineddata use:</p></div>
<div class="literalblock">
<div class="content">
<pre><tt>combine_tessdata -o tessdata/eng.traineddata \
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs</tt></pre>
<pre><code>combine_tessdata -o tessdata/eng.traineddata \
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs</code></pre>
</div></div>
<div class="paragraph"><p>As a result, tessdata/eng.traineddata will contain the new language config
and unichar ambigs, plus all the original DAWGs, classifier templates, etc.</p></div>
@ -623,11 +790,13 @@ ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.</p></div>
<div class="paragraph"><p>Specify option -u to unpack all the components to the specified path:</p></div>
<div class="literalblock">
<div class="content">
<pre><tt>combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.</tt></pre>
<pre><code>combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.</code></pre>
</div></div>
<div class="paragraph"><p>This will create /home/$USER/temp/eng.* files with individual tessdata
components from tessdata/eng.traineddata.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_options">OPTIONS</h2>
<div class="sectionbody">
<div class="paragraph"><p><strong>-e</strong> <em>.traineddata</em> <em>FILE</em>&#8230;:
@ -638,16 +807,20 @@ components from tessdata/eng.traineddata.</p></div>
<div class="paragraph"><p><strong>-u</strong> <em>.traineddata</em> <em>PATHPREFIX</em>
Unpacks the .traineddata using the provided prefix.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_caveats">CAVEATS</h2>
<div class="sectionbody">
<div class="paragraph"><p><em>Prefix</em> refers to the full file prefix, including period (.)</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_components">COMPONENTS</h2>
<div class="sectionbody">
<div class="paragraph"><p>The components in a Tesseract lang.traineddata file as of
Tesseract 3.02 are briefly described below; For more information on
many of these files, see
<a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
<a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
<div class="dlist"><dl>
<dt class="hdlist1">
lang.config
@ -802,30 +975,39 @@ lang.params-training-model
</dd>
</dl></div>
</div>
</div>
<div class="sect1">
<h2 id="_history">HISTORY</h2>
<div class="sectionbody">
<div class="paragraph"><p>combine_tessdata(1) first appeared in version 3.00 of Tesseract</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody">
<div class="paragraph"><p>tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5),
unicharambigs(5)</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2>
<div class="sectionbody">
<div class="paragraph"><p>Copyright (C) 2009, Google Inc.
Licensed under the Apache License, Version 2.0</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2>
<div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Last updated 2012-02-08 10:52:17 PDT
Last updated 2015-06-12 23:52:02 CEST
</div>
</div>
</body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?>
<?asciidoc-numbered?>
<refentry lang="en">
<refentryinfo>
<title>COMBINE_TESSDATA(1)</title>
</refentryinfo>
<refmeta>
<refentrytitle>combine_tessdata</refentrytitle>
<manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo>
<refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta>
<refnamediv>
<refname>combine_tessdata</refname>
@ -67,7 +70,7 @@ components from tessdata/eng.traineddata.</simpara>
<simpara>The components in a Tesseract lang.traineddata file as of
Tesseract 3.02 are briefly described below; For more information on
many of these files, see
<ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
<ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
<variablelist>
<varlistentry>
<term>

View File

@ -1,13 +1,13 @@
'\" t
.\" Title: dawg2wordlist
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
.\" Date: 02/09/2012
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 06/12/2015
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "DAWG2WORDLIST" "1" "02/09/2012" "\ \&" "\ \&"
.TH "DAWG2WORDLIST" "1" "06/12/2015" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
@ -46,7 +46,7 @@ dawg2wordlist(1) converts a Tesseract Directed Acyclic Word Graph (DAWG) to a li
.sp
tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5), combine_tessdata(1)
.sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "COPYING"
.sp
Copyright (C) 2012 Google, Inc\&. Licensed under the Apache License, Version 2\&.0

View File

@ -32,7 +32,7 @@ SEE ALSO
tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
combine_tessdata(1)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
COPYING
-------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" />
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>DAWG2WORDLIST(1)</title>
<style type="text/css">
/* Debug borders */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/*
border: 1px solid red;
*/
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
/* Default font. */
body {
font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
}
body {
@ -35,13 +45,8 @@ strong {
color: #083194;
}
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 {
color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em;
margin-bottom: 0.5em;
line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * {
clear: left;
}
h5 {
font-size: 1.0em;
}
div.sectionbody {
font-family: serif;
margin-left: 0;
}
@ -77,45 +84,50 @@ p {
ul, ol, li > p {
margin-top: 0;
}
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre {
.monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0;
margin: 0;
}
pre {
white-space: pre-wrap;
}
span#author {
#author {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
font-size: 1.1em;
}
span#email {
#email {
}
span#revnumber, span#revdate, span#revremark {
font-family: sans-serif;
#revnumber, #revdate, #revremark {
}
div#footer {
font-family: sans-serif;
#footer {
font-size: small;
border-top: 2px solid silver;
padding-top: 0.5em;
margin-top: 4.0em;
}
div#footer-text {
#footer-text {
float: left;
padding-bottom: 0.5em;
}
div#footer-badges {
#footer-badges {
float: right;
padding-bottom: 0.5em;
}
div#preamble {
#preamble {
margin-top: 1.5em;
margin-bottom: 1.5em;
}
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock {
margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */
div.title, caption.title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
text-align: left;
margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content {
background: #ffffee;
border: 1px solid silver;
border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em;
}
div.listingblock > div.content {
border: 1px solid silver;
background: #f4f4f4;
border: 1px solid #dddddd;
border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em;
}
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em;
margin-left: 1.0em;
margin-right: 10%;
border-left: 5px solid #dddddd;
color: #777777;
border-left: 5px solid #f0f0f0;
color: #888;
}
div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right;
}
div.verseblock > div.content {
white-space: pre;
div.verseblock > pre.content {
font-family: inherit;
font-size: inherit;
}
div.verseblock > div.attribution {
padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
}
div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }
dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot {
font-weight: bold;
}
td > div.verse {
white-space: pre;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist {
margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px;
}
@media print {
div#footer-badges { display: none; }
div.colist td {
padding-right: 0.5em;
padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
}
div#toc {
@media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em;
}
div#toctitle {
#toctitle {
color: #527bbd;
font-family: sans-serif;
font-size: 1.1em;
font-weight: bold;
margin-top: 1.0em;
margin-bottom: 0.1em;
}
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0;
margin-bottom: 0;
}
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em;
font-size: 0.9em;
}
/* Overrides for manpage documents */
h1 {
span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em;
padding-bottom: 0.5em;
border-top: 2px solid silver;
border-bottom: 2px solid silver;
}
h2 {
body.manpage h2 {
border-style: none;
}
div.sectionbody {
margin-left: 5%;
body.manpage div.sectionbody {
margin-left: 3em;
}
@media print {
div#toc { display: none; }
body.manpage div#toc { display: none; }
}
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style>
<script type="text/javascript">
/*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace.
/////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) {
var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all
// browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
}
var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) {
var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/
footnotes: function () {
var cont = document.getElementById("content");
// Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span");
var refs = {};
var n = 0;
for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") {
n++;
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
var note = spans[i].getAttribute("data-note");
if (!note) {
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n;
}
@ -561,13 +702,36 @@ footnotes: function () {
}
}
}
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
}
}
asciidoc.install();
/*]]>*/
</script>
</head>
<body>
<body class="manpage">
<div id="header">
<h1>
DAWG2WORDLIST(1) Manual Page
@ -580,15 +744,20 @@ DAWG2WORDLIST(1) Manual Page
</div>
</div>
<div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody">
<div class="paragraph"><p><strong>dawg2wordlist</strong> <em>UNICHARSET</em> <em>DAWG</em> <em>WORDLIST</em></p></div>
</div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody">
<div class="paragraph"><p>dawg2wordlist(1) converts a Tesseract Directed Acyclic Word
Graph (DAWG) to a list of words using a unicharset as key.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_options">OPTIONS</h2>
<div class="sectionbody">
<div class="paragraph"><p><em>UNICHARSET</em>
@ -599,27 +768,34 @@ Graph (DAWG) to a list of words using a unicharset as key.</p></div>
<div class="paragraph"><p><em>WORDLIST</em>
Plain text (output) file in UTF-8, one word per line</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody">
<div class="paragraph"><p>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
combine_tessdata(1)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2>
<div class="sectionbody">
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
Licensed under the Apache License, Version 2.0</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2>
<div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Last updated 2012-02-07 13:38:01 PDT
Last updated 2015-06-12 23:52:09 CEST
</div>
</div>
</body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?>
<?asciidoc-numbered?>
<refentry lang="en">
<refentryinfo>
<title>DAWG2WORDLIST(1)</title>
</refentryinfo>
<refmeta>
<refentrytitle>dawg2wordlist</refentrytitle>
<manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo>
<refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta>
<refnamediv>
<refname>dawg2wordlist</refname>
@ -35,7 +38,7 @@ Graph (DAWG) to a list of words using a unicharset as key.</simpara>
<title>SEE ALSO</title>
<simpara>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
combine_tessdata(1)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1>
<refsect1 id="_copying">
<title>COPYING</title>

View File

@ -1,13 +1,13 @@
'\" t
.\" Title: mftraining
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
.\" Date: 02/09/2012
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 06/12/2015
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "MFTRAINING" "1" "02/09/2012" "\ \&" "\ \&"
.TH "MFTRAINING" "1" "06/12/2015" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
@ -85,7 +85,7 @@ Directory to write output files to\&.
.sp
tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), shapeclustering(1), unicharset(5)
.sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "COPYING"
.sp
Copyright (C) Hewlett\-Packard Company, 1988 Licensed under the Apache License, Version 2\&.0

View File

@ -43,7 +43,7 @@ SEE ALSO
tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
shapeclustering(1), unicharset(5)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
COPYING
-------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" />
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>MFTRAINING(1)</title>
<style type="text/css">
/* Debug borders */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/*
border: 1px solid red;
*/
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
/* Default font. */
body {
font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
}
body {
@ -35,13 +45,8 @@ strong {
color: #083194;
}
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 {
color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em;
margin-bottom: 0.5em;
line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * {
clear: left;
}
h5 {
font-size: 1.0em;
}
div.sectionbody {
font-family: serif;
margin-left: 0;
}
@ -77,45 +84,50 @@ p {
ul, ol, li > p {
margin-top: 0;
}
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre {
.monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0;
margin: 0;
}
pre {
white-space: pre-wrap;
}
span#author {
#author {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
font-size: 1.1em;
}
span#email {
#email {
}
span#revnumber, span#revdate, span#revremark {
font-family: sans-serif;
#revnumber, #revdate, #revremark {
}
div#footer {
font-family: sans-serif;
#footer {
font-size: small;
border-top: 2px solid silver;
padding-top: 0.5em;
margin-top: 4.0em;
}
div#footer-text {
#footer-text {
float: left;
padding-bottom: 0.5em;
}
div#footer-badges {
#footer-badges {
float: right;
padding-bottom: 0.5em;
}
div#preamble {
#preamble {
margin-top: 1.5em;
margin-bottom: 1.5em;
}
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock {
margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */
div.title, caption.title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
text-align: left;
margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content {
background: #ffffee;
border: 1px solid silver;
border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em;
}
div.listingblock > div.content {
border: 1px solid silver;
background: #f4f4f4;
border: 1px solid #dddddd;
border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em;
}
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em;
margin-left: 1.0em;
margin-right: 10%;
border-left: 5px solid #dddddd;
color: #777777;
border-left: 5px solid #f0f0f0;
color: #888;
}
div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right;
}
div.verseblock > div.content {
white-space: pre;
div.verseblock > pre.content {
font-family: inherit;
font-size: inherit;
}
div.verseblock > div.attribution {
padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
}
div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }
dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot {
font-weight: bold;
}
td > div.verse {
white-space: pre;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist {
margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px;
}
@media print {
div#footer-badges { display: none; }
div.colist td {
padding-right: 0.5em;
padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
}
div#toc {
@media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em;
}
div#toctitle {
#toctitle {
color: #527bbd;
font-family: sans-serif;
font-size: 1.1em;
font-weight: bold;
margin-top: 1.0em;
margin-bottom: 0.1em;
}
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0;
margin-bottom: 0;
}
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em;
font-size: 0.9em;
}
/* Overrides for manpage documents */
h1 {
span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em;
padding-bottom: 0.5em;
border-top: 2px solid silver;
border-bottom: 2px solid silver;
}
h2 {
body.manpage h2 {
border-style: none;
}
div.sectionbody {
margin-left: 5%;
body.manpage div.sectionbody {
margin-left: 3em;
}
@media print {
div#toc { display: none; }
body.manpage div#toc { display: none; }
}
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style>
<script type="text/javascript">
/*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace.
/////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) {
var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all
// browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
}
var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) {
var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/
footnotes: function () {
var cont = document.getElementById("content");
// Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span");
var refs = {};
var n = 0;
for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") {
n++;
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
var note = spans[i].getAttribute("data-note");
if (!note) {
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n;
}
@ -561,13 +702,36 @@ footnotes: function () {
}
}
}
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
}
}
asciidoc.install();
/*]]>*/
</script>
</head>
<body>
<body class="manpage">
<div id="header">
<h1>
MFTRAINING(1) Manual Page
@ -580,10 +744,13 @@ MFTRAINING(1) Manual Page
</div>
</div>
<div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody">
<div class="paragraph"><p>mftraining -U <em>unicharset</em> -O <em>lang.unicharset</em> <em>FILE</em>&#8230;</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody">
<div class="paragraph"><p>mftraining takes a list of .tr files, from which it generates the
@ -591,6 +758,8 @@ files <strong>inttemp</strong> (the shape prototypes), <strong>shapetable</stron
(the number of expected features for each character). (A fourth file
called Microfeat is also written by this program, but it is not used.)</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_options">OPTIONS</h2>
<div class="sectionbody">
<div class="dlist"><dl>
@ -611,7 +780,7 @@ called Microfeat is also written by this program, but it is not used.)</p></div>
</p>
<div class="literalblock">
<div class="content">
<pre><tt>*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</tt></pre>
<pre><code>*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</code></pre>
</div></div>
</dd>
<dt class="hdlist1">
@ -623,7 +792,7 @@ called Microfeat is also written by this program, but it is not used.)</p></div>
</p>
<div class="literalblock">
<div class="content">
<pre><tt>*font_name* *xheight*</tt></pre>
<pre><code>*font_name* *xheight*</code></pre>
</div></div>
</dd>
<dt class="hdlist1">
@ -644,27 +813,34 @@ called Microfeat is also written by this program, but it is not used.)</p></div>
</dd>
</dl></div>
</div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody">
<div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
shapeclustering(1), unicharset(5)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2>
<div class="sectionbody">
<div class="paragraph"><p>Copyright (C) Hewlett-Packard Company, 1988
Licensed under the Apache License, Version 2.0</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2>
<div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Last updated 2012-02-09 14:23:49 PDT
Last updated 2015-06-12 23:52:19 CEST
</div>
</div>
</body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?>
<?asciidoc-numbered?>
<refentry lang="en">
<refentryinfo>
<title>MFTRAINING(1)</title>
</refentryinfo>
<refmeta>
<refentrytitle>mftraining</refentrytitle>
<manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo>
<refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta>
<refnamediv>
<refname>mftraining</refname>
@ -84,7 +87,7 @@ called Microfeat is also written by this program, but it is not used.)</simpara>
<title>SEE ALSO</title>
<simpara>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
shapeclustering(1), unicharset(5)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1>
<refsect1 id="_copying">
<title>COPYING</title>

View File

@ -1,13 +1,13 @@
'\" t
.\" Title: shapeclustering
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
.\" Date: 02/09/2012
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 06/12/2015
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "SHAPECLUSTERING" "1" "02/09/2012" "\ \&" "\ \&"
.TH "SHAPECLUSTERING" "1" "06/12/2015" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
@ -85,7 +85,7 @@ The output unicharset that will be given to combine_tessdata(1)\&.
.sp
tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), unicharset(5)
.sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "COPYING"
.sp
Copyright (C) Google, 2011 Licensed under the Apache License, Version 2\&.0

View File

@ -46,7 +46,7 @@ SEE ALSO
tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
unicharset(5)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
COPYING
-------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" />
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>SHAPECLUSTERING(1)</title>
<style type="text/css">
/* Debug borders */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/*
border: 1px solid red;
*/
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
/* Default font. */
body {
font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
}
body {
@ -35,13 +45,8 @@ strong {
color: #083194;
}
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 {
color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em;
margin-bottom: 0.5em;
line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * {
clear: left;
}
h5 {
font-size: 1.0em;
}
div.sectionbody {
font-family: serif;
margin-left: 0;
}
@ -77,45 +84,50 @@ p {
ul, ol, li > p {
margin-top: 0;
}
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre {
.monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0;
margin: 0;
}
pre {
white-space: pre-wrap;
}
span#author {
#author {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
font-size: 1.1em;
}
span#email {
#email {
}
span#revnumber, span#revdate, span#revremark {
font-family: sans-serif;
#revnumber, #revdate, #revremark {
}
div#footer {
font-family: sans-serif;
#footer {
font-size: small;
border-top: 2px solid silver;
padding-top: 0.5em;
margin-top: 4.0em;
}
div#footer-text {
#footer-text {
float: left;
padding-bottom: 0.5em;
}
div#footer-badges {
#footer-badges {
float: right;
padding-bottom: 0.5em;
}
div#preamble {
#preamble {
margin-top: 1.5em;
margin-bottom: 1.5em;
}
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock {
margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */
div.title, caption.title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
text-align: left;
margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content {
background: #ffffee;
border: 1px solid silver;
border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em;
}
div.listingblock > div.content {
border: 1px solid silver;
background: #f4f4f4;
border: 1px solid #dddddd;
border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em;
}
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em;
margin-left: 1.0em;
margin-right: 10%;
border-left: 5px solid #dddddd;
color: #777777;
border-left: 5px solid #f0f0f0;
color: #888;
}
div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right;
}
div.verseblock > div.content {
white-space: pre;
div.verseblock > pre.content {
font-family: inherit;
font-size: inherit;
}
div.verseblock > div.attribution {
padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
}
div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }
dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot {
font-weight: bold;
}
td > div.verse {
white-space: pre;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist {
margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px;
}
@media print {
div#footer-badges { display: none; }
div.colist td {
padding-right: 0.5em;
padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
}
div#toc {
@media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em;
}
div#toctitle {
#toctitle {
color: #527bbd;
font-family: sans-serif;
font-size: 1.1em;
font-weight: bold;
margin-top: 1.0em;
margin-bottom: 0.1em;
}
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0;
margin-bottom: 0;
}
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em;
font-size: 0.9em;
}
/* Overrides for manpage documents */
h1 {
span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em;
padding-bottom: 0.5em;
border-top: 2px solid silver;
border-bottom: 2px solid silver;
}
h2 {
body.manpage h2 {
border-style: none;
}
div.sectionbody {
margin-left: 5%;
body.manpage div.sectionbody {
margin-left: 3em;
}
@media print {
div#toc { display: none; }
body.manpage div#toc { display: none; }
}
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style>
<script type="text/javascript">
/*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace.
/////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) {
var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all
// browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
}
var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) {
var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/
footnotes: function () {
var cont = document.getElementById("content");
// Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span");
var refs = {};
var n = 0;
for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") {
n++;
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
var note = spans[i].getAttribute("data-note");
if (!note) {
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n;
}
@ -561,13 +702,36 @@ footnotes: function () {
}
}
}
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
}
}
asciidoc.install();
/*]]>*/
</script>
</head>
<body>
<body class="manpage">
<div id="header">
<h1>
SHAPECLUSTERING(1) Manual Page
@ -580,6 +744,7 @@ SHAPECLUSTERING(1) Manual Page
</div>
</div>
<div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody">
<div class="paragraph"><p>shapeclustering -D <em>output_dir</em>
@ -587,6 +752,8 @@ SHAPECLUSTERING(1) Manual Page
-F <em>font_props</em> -X <em>xheights</em>
<em>FILE</em>&#8230;</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody">
<div class="paragraph"><p>shapeclustering(1) takes extracted feature .tr files (generated by
@ -594,6 +761,8 @@ tesseract(1) run in a special mode from box files) and produces a
file <strong>shapetable</strong> and an enhanced unicharset. This program is still
experimental, and is not required (yet) for training Tesseract.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_options">OPTIONS</h2>
<div class="sectionbody">
<div class="dlist"><dl>
@ -622,7 +791,7 @@ experimental, and is not required (yet) for training Tesseract.</p></div>
</p>
<div class="literalblock">
<div class="content">
<pre><tt>'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'</tt></pre>
<pre><code>'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'</code></pre>
</div></div>
</dd>
<dt class="hdlist1">
@ -634,7 +803,7 @@ experimental, and is not required (yet) for training Tesseract.</p></div>
</p>
<div class="literalblock">
<div class="content">
<pre><tt>'font_name' 'xheight'</tt></pre>
<pre><code>'font_name' 'xheight'</code></pre>
</div></div>
</dd>
<dt class="hdlist1">
@ -647,27 +816,34 @@ experimental, and is not required (yet) for training Tesseract.</p></div>
</dd>
</dl></div>
</div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody">
<div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
unicharset(5)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2>
<div class="sectionbody">
<div class="paragraph"><p>Copyright (C) Google, 2011
Licensed under the Apache License, Version 2.0</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2>
<div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Last updated 2012-02-09 14:19:44 PDT
Last updated 2015-06-12 23:52:24 CEST
</div>
</div>
</body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?>
<?asciidoc-numbered?>
<refentry lang="en">
<refentryinfo>
<title>SHAPECLUSTERING(1)</title>
</refentryinfo>
<refmeta>
<refentrytitle>shapeclustering</refentrytitle>
<manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo>
<refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta>
<refnamediv>
<refname>shapeclustering</refname>
@ -87,7 +90,7 @@ experimental, and is not required (yet) for training Tesseract.</simpara>
<title>SEE ALSO</title>
<simpara>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
unicharset(5)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1>
<refsect1 id="_copying">
<title>COPYING</title>

View File

@ -2,12 +2,12 @@
.\" Title: tesseract
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 08/02/2014
.\" Date: 06/12/2015
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "TESSERACT" "1" "08/02/2014" "\ \&" "\ \&"
.TH "TESSERACT" "1" "06/12/2015" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
@ -224,7 +224,7 @@ The engine was developed at Hewlett Packard Laboratories Bristol and at Hewlett
.sp
Version 2\&.00 brought Unicode (UTF\-8) support, six languages, and the ability to train Tesseract\&.
.sp
Tesseract was included in UNLV\(cqs Fourth Annual Test of OCR Accuracy\&. See \m[blue]\fBhttp://www\&.isri\&.unlv\&.edu/downloads/AT\-1995\&.pdf\fR\m[]\&. With Tesseract 2\&.00, scripts are now included to allow anyone to reproduce some of these tests\&. See \m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TestingTesseract\fR\m[] for more details\&.
Tesseract was included in UNLV\(cqs Fourth Annual Test of OCR Accuracy\&. See \m[blue]\fBhttp://www\&.isri\&.unlv\&.edu/downloads/AT\-1995\&.pdf\fR\m[]\&. With Tesseract 2\&.00, scripts are now included to allow anyone to reproduce some of these tests\&. See \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TestingTesseract\fR\m[] for more details\&.
.sp
Tesseract 3\&.00 adds a number of new languages, including Chinese, Japanese, and Korean\&. It also introduces a new, single\-file based system of managing language data\&.
.sp
@ -233,7 +233,7 @@ Tesseract 3\&.02 adds BiDirectional text support, the ability to recognize multi
For further details, see the file ReleaseNotes included with the distribution\&.
.SH "RESOURCES"
.sp
Main web site: \m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/\fR\m[] Information on training: \m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
Main web site: \m[blue]\fBhttps://github\&.com/tesseract\-ocr\fR\m[] Information on training: \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "SEE ALSO"
.sp
ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1), shape_training(1), mftraining(1), unicharambigs(5), unicharset(5), unicharset_extractor(1), wordlist2dawg(1)

View File

@ -218,9 +218,9 @@ Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability
to train Tesseract.
Tesseract was included in UNLV's Fourth Annual Test of OCR Accuracy.
See <http://www.isri.unlv.edu/downloads/AT-1995.pdf>. With Tesseract 2.00,
See <https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf>. With Tesseract 2.00,
scripts are now included to allow anyone to reproduce some of these tests.
See <http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract> for more
See <https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract> for more
details.
Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
@ -234,8 +234,8 @@ For further details, see the file ReleaseNotes included with the distribution.
RESOURCES
---------
Main web site: <http://code.google.com/p/tesseract-ocr/> +
Information on training: <http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
Main web site: <https://github.com/tesseract-ocr> +
Information on training: <https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
SEE ALSO
--------

View File

@ -3,7 +3,7 @@
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.8" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>TESSERACT(1)</title>
<style type="text/css">
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
@ -94,7 +94,9 @@ ul > li > * { color: black; }
padding: 0;
margin: 0;
}
pre {
white-space: pre-wrap;
}
#author {
color: #527bbd;
@ -223,7 +225,7 @@ div.exampleblock > div.content {
}
div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }
dl {
@ -1047,7 +1049,7 @@ to train Tesseract.</p></div>
<div class="paragraph"><p>Tesseract was included in UNLV&#8217;s Fourth Annual Test of OCR Accuracy.
See <a href="http://www.isri.unlv.edu/downloads/AT-1995.pdf">http://www.isri.unlv.edu/downloads/AT-1995.pdf</a>. With Tesseract 2.00,
scripts are now included to allow anyone to reproduce some of these tests.
See <a href="http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract">http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract</a> for more
See <a href="https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract</a> for more
details.</p></div>
<div class="paragraph"><p>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
and Korean. It also introduces a new, single-file based system of managing
@ -1060,8 +1062,8 @@ multiple languages in a single image, and improved layout analysis.</p></div>
<div class="sect1">
<h2 id="_resources">RESOURCES</h2>
<div class="sectionbody">
<div class="paragraph"><p>Main web site: <a href="http://code.google.com/p/tesseract-ocr/">http://code.google.com/p/tesseract-ocr/</a><br />
Information on training: <a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
<div class="paragraph"><p>Main web site: <a href="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</a><br />
Information on training: <a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div>
</div>
<div class="sect1">
@ -1095,7 +1097,7 @@ Lloyd, Shobhit Saxena, and Thomas Kielbus.</p></div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Last updated 2014-08-02 22:51:16 CEST
Last updated 2015-06-12 23:49:44 CEST
</div>
</div>
</body>

View File

@ -327,7 +327,7 @@ to train Tesseract.</simpara>
<simpara>Tesseract was included in UNLV&#8217;s Fourth Annual Test of OCR Accuracy.
See <ulink url="http://www.isri.unlv.edu/downloads/AT-1995.pdf">http://www.isri.unlv.edu/downloads/AT-1995.pdf</ulink>. With Tesseract 2.00,
scripts are now included to allow anyone to reproduce some of these tests.
See <ulink url="http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract">http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract</ulink> for more
See <ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract</ulink> for more
details.</simpara>
<simpara>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
and Korean. It also introduces a new, single-file based system of managing
@ -338,8 +338,8 @@ multiple languages in a single image, and improved layout analysis.</simpara>
</refsect1>
<refsect1 id="_resources">
<title>RESOURCES</title>
<simpara>Main web site: <ulink url="http://code.google.com/p/tesseract-ocr/">http://code.google.com/p/tesseract-ocr/</ulink><?asciidoc-br?>
Information on training: <ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
<simpara>Main web site: <ulink url="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</ulink><?asciidoc-br?>
Information on training: <ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1>
<refsect1 id="_see_also">
<title>SEE ALSO</title>

View File

@ -1,13 +1,13 @@
'\" t
.\" Title: unicharambigs
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
.\" Date: 02/09/2012
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 06/12/2015
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "UNICHARAMBIGS" "5" "02/09/2012" "\ \&" "\ \&"
.TH "UNICHARAMBIGS" "5" "06/12/2015" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" />
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>UNICHARAMBIGS(5)</title>
<style type="text/css">
/* Debug borders */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/*
border: 1px solid red;
*/
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
/* Default font. */
body {
font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
}
body {
@ -35,13 +45,8 @@ strong {
color: #083194;
}
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 {
color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em;
margin-bottom: 0.5em;
line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * {
clear: left;
}
h5 {
font-size: 1.0em;
}
div.sectionbody {
font-family: serif;
margin-left: 0;
}
@ -77,45 +84,50 @@ p {
ul, ol, li > p {
margin-top: 0;
}
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre {
.monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0;
margin: 0;
}
pre {
white-space: pre-wrap;
}
span#author {
#author {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
font-size: 1.1em;
}
span#email {
#email {
}
span#revnumber, span#revdate, span#revremark {
font-family: sans-serif;
#revnumber, #revdate, #revremark {
}
div#footer {
font-family: sans-serif;
#footer {
font-size: small;
border-top: 2px solid silver;
padding-top: 0.5em;
margin-top: 4.0em;
}
div#footer-text {
#footer-text {
float: left;
padding-bottom: 0.5em;
}
div#footer-badges {
#footer-badges {
float: right;
padding-bottom: 0.5em;
}
div#preamble {
#preamble {
margin-top: 1.5em;
margin-bottom: 1.5em;
}
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock {
margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */
div.title, caption.title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
text-align: left;
margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content {
background: #ffffee;
border: 1px solid silver;
border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em;
}
div.listingblock > div.content {
border: 1px solid silver;
background: #f4f4f4;
border: 1px solid #dddddd;
border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em;
}
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em;
margin-left: 1.0em;
margin-right: 10%;
border-left: 5px solid #dddddd;
color: #777777;
border-left: 5px solid #f0f0f0;
color: #888;
}
div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right;
}
div.verseblock > div.content {
white-space: pre;
div.verseblock > pre.content {
font-family: inherit;
font-size: inherit;
}
div.verseblock > div.attribution {
padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
}
div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }
dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot {
font-weight: bold;
}
td > div.verse {
white-space: pre;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist {
margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px;
}
@media print {
div#footer-badges { display: none; }
div.colist td {
padding-right: 0.5em;
padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
}
div#toc {
@media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em;
}
div#toctitle {
#toctitle {
color: #527bbd;
font-family: sans-serif;
font-size: 1.1em;
font-weight: bold;
margin-top: 1.0em;
margin-bottom: 0.1em;
}
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0;
margin-bottom: 0;
}
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em;
font-size: 0.9em;
}
/* Overrides for manpage documents */
h1 {
span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em;
padding-bottom: 0.5em;
border-top: 2px solid silver;
border-bottom: 2px solid silver;
}
h2 {
body.manpage h2 {
border-style: none;
}
div.sectionbody {
margin-left: 5%;
body.manpage div.sectionbody {
margin-left: 3em;
}
@media print {
div#toc { display: none; }
body.manpage div#toc { display: none; }
}
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style>
<script type="text/javascript">
/*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace.
/////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) {
var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all
// browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
}
var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) {
var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/
footnotes: function () {
var cont = document.getElementById("content");
// Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span");
var refs = {};
var n = 0;
for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") {
n++;
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
var note = spans[i].getAttribute("data-note");
if (!note) {
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n;
}
@ -561,13 +702,36 @@ footnotes: function () {
}
}
}
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
}
}
asciidoc.install();
/*]]>*/
</script>
</head>
<body>
<body class="manpage">
<div id="header">
<h1>
UNICHARAMBIGS(5) Manual Page
@ -580,6 +744,7 @@ UNICHARAMBIGS(5) Manual Page
</div>
</div>
<div id="content">
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody">
<div class="paragraph"><p>The unicharambigs file (a component of traineddata, see combine_tessdata(1) )
@ -588,7 +753,7 @@ or groups of characters.</p></div>
<div class="paragraph"><p>The file contains a number of lines, laid out as follow:</p></div>
<div class="literalblock">
<div class="content">
<pre><tt>[num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num]</tt></pre>
<pre><code>[num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num] &lt;TAB&gt; [char(s)] &lt;TAB&gt; [num]</code></pre>
</div></div>
<div class="hdlist"><table>
<tr>
@ -652,13 +817,15 @@ replacement, 0 denotes an optional replacement.
unicharset. The numbers in fields one and three refer to the
number of unichars (not bytes).</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_example">EXAMPLE</h2>
<div class="sectionbody">
<div class="literalblock">
<div class="content">
<pre><tt>2 ' ' 1 " 1
<pre><code>2 ' ' 1 " 1
1 m 2 r n 0
3 i i i 1 m 0</tt></pre>
3 i i i 1 m 0</code></pre>
</div></div>
<div class="paragraph"><p>In this example, all instances of the <em>2</em> character sequence <em>'</em>' will
<strong>always</strong> be replaced by the <em>1</em> character sequence <em>"</em>; a <em>1</em> character
@ -666,6 +833,8 @@ sequence <em>m</em> <strong>may</strong> be replaced by the <em>2</em> character
the <em>3</em> character sequence <strong>may</strong> be replaced by the <em>1</em> character
sequence <em>m</em>.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_history">HISTORY</h2>
<div class="sectionbody">
<div class="paragraph"><p>The unicharambigs file first appeared in Tesseract 3.00; prior to that, a
@ -673,26 +842,33 @@ similar format, called DangAmbigs (<em>dangerous ambiguities</em>) was used: the
format was almost identical, except only mandatory replacements could be
specified, and field 5 was absent.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_bugs">BUGS</h2>
<div class="sectionbody">
<div class="paragraph"><p>This is a documentation "bug": it&#8217;s not currently clear what should be done
in the case of ligatures (such as <em>fi</em>) which may also appear as regular
letters in the unicharset.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody">
<div class="paragraph"><p>tesseract(1), unicharset(5)</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2>
<div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Last updated 2012-02-08 10:59:49 PDT
Last updated 2015-05-13 19:59:45 CEST
</div>
</div>
</body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?>
<?asciidoc-numbered?>
<refentry lang="en">
<refentryinfo>
<title>UNICHARAMBIGS(5)</title>
</refentryinfo>
<refmeta>
<refentrytitle>unicharambigs</refentrytitle>
<manvolnum>5</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo>
<refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta>
<refnamediv>
<refname>unicharambigs</refname>

View File

@ -1,13 +1,13 @@
'\" t
.\" Title: unicharset
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
.\" Date: 02/09/2012
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 06/12/2015
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "UNICHARSET" "5" "02/09/2012" "\ \&" "\ \&"
.TH "UNICHARSET" "5" "06/12/2015" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
@ -214,7 +214,7 @@ The unicharset format first appeared with Tesseract 2\&.00, which was the first
.sp
tesseract(1), combine_tessdata(1), unicharset_extractor(1)
.sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "AUTHOR"
.sp
The Tesseract OCR engine was written by Ray Smith and his research groups at Hewlett Packard (1985\-1995) and Google (2006\-present)\&.

View File

@ -124,7 +124,7 @@ SEE ALSO
--------
tesseract(1), combine_tessdata(1), unicharset_extractor(1)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
AUTHOR

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" />
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>UNICHARSET(5)</title>
<style type="text/css">
/* Debug borders */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/*
border: 1px solid red;
*/
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
/* Default font. */
body {
font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
}
body {
@ -35,13 +45,8 @@ strong {
color: #083194;
}
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 {
color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em;
margin-bottom: 0.5em;
line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * {
clear: left;
}
h5 {
font-size: 1.0em;
}
div.sectionbody {
font-family: serif;
margin-left: 0;
}
@ -77,45 +84,50 @@ p {
ul, ol, li > p {
margin-top: 0;
}
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre {
.monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0;
margin: 0;
}
pre {
white-space: pre-wrap;
}
span#author {
#author {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
font-size: 1.1em;
}
span#email {
#email {
}
span#revnumber, span#revdate, span#revremark {
font-family: sans-serif;
#revnumber, #revdate, #revremark {
}
div#footer {
font-family: sans-serif;
#footer {
font-size: small;
border-top: 2px solid silver;
padding-top: 0.5em;
margin-top: 4.0em;
}
div#footer-text {
#footer-text {
float: left;
padding-bottom: 0.5em;
}
div#footer-badges {
#footer-badges {
float: right;
padding-bottom: 0.5em;
}
div#preamble {
#preamble {
margin-top: 1.5em;
margin-bottom: 1.5em;
}
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock {
margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */
div.title, caption.title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
text-align: left;
margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content {
background: #ffffee;
border: 1px solid silver;
border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em;
}
div.listingblock > div.content {
border: 1px solid silver;
background: #f4f4f4;
border: 1px solid #dddddd;
border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em;
}
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em;
margin-left: 1.0em;
margin-right: 10%;
border-left: 5px solid #dddddd;
color: #777777;
border-left: 5px solid #f0f0f0;
color: #888;
}
div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right;
}
div.verseblock > div.content {
white-space: pre;
div.verseblock > pre.content {
font-family: inherit;
font-size: inherit;
}
div.verseblock > div.attribution {
padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
}
div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }
dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot {
font-weight: bold;
}
td > div.verse {
white-space: pre;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist {
margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px;
}
@media print {
div#footer-badges { display: none; }
div.colist td {
padding-right: 0.5em;
padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
}
div#toc {
@media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em;
}
div#toctitle {
#toctitle {
color: #527bbd;
font-family: sans-serif;
font-size: 1.1em;
font-weight: bold;
margin-top: 1.0em;
margin-bottom: 0.1em;
}
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0;
margin-bottom: 0;
}
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em;
font-size: 0.9em;
}
/* Overrides for manpage documents */
h1 {
span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em;
padding-bottom: 0.5em;
border-top: 2px solid silver;
border-bottom: 2px solid silver;
}
h2 {
body.manpage h2 {
border-style: none;
}
div.sectionbody {
margin-left: 5%;
body.manpage div.sectionbody {
margin-left: 3em;
}
@media print {
div#toc { display: none; }
body.manpage div#toc { display: none; }
}
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style>
<script type="text/javascript">
/*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace.
/////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) {
var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all
// browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
}
var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) {
var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/
footnotes: function () {
var cont = document.getElementById("content");
// Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span");
var refs = {};
var n = 0;
for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") {
n++;
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
var note = spans[i].getAttribute("data-note");
if (!note) {
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n;
}
@ -561,13 +702,36 @@ footnotes: function () {
}
}
}
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
}
}
asciidoc.install();
/*]]>*/
</script>
</head>
<body>
<body class="manpage">
<div id="header">
<h1>
UNICHARSET(5) Manual Page
@ -580,6 +744,7 @@ UNICHARSET(5) Manual Page
</div>
</div>
<div id="content">
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody">
<div class="paragraph"><p>Tesseract&#8217;s unicharset file contains information on each symbol
@ -596,12 +761,12 @@ Therefore, space gets unichar 0.</p></div>
<div class="paragraph"><p>Each unichar line in the unicharset file (v2+) may have four space-separated fields:</p></div>
<div class="literalblock">
<div class="content">
<pre><tt>'character' 'properties' 'script' 'id'</tt></pre>
<pre><code>'character' 'properties' 'script' 'id'</code></pre>
</div></div>
<div class="paragraph"><p>Starting with Tesseract v3.02, more information may be given for each unichar:</p></div>
<div class="literalblock">
<div class="content">
<pre><tt>'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'</tt></pre>
<pre><code>'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'</code></pre>
</div></div>
<div class="paragraph"><p>Entries:</p></div>
<div class="dlist"><dl>
@ -712,15 +877,17 @@ The UTF-8 representation of a "normalized form" of this unichar
</dd>
</dl></div>
</div>
</div>
<div class="sect1">
<h2 id="_example_v2">EXAMPLE (v2)</h2>
<div class="sectionbody">
<div class="literalblock">
<div class="content">
<pre><tt>; 10 Common 46
<pre><code>; 10 Common 46
b 3 Latin 59
W 5 Latin 40
7 8 Common 66
= 0 Common 93</tt></pre>
= 0 Common 93</code></pre>
</div></div>
<div class="paragraph"><p>";" is a punctuation character. Its properties are thus represented by the
binary number 10000 (10 in hexadecimal).</p></div>
@ -736,20 +903,24 @@ are thus represented by the binary number 00000 (0 in hexadecimal).</p></div>
binary number 00001 (1 in hexadecimal): they are alphabetic, but neither
upper nor lower case.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_example_v3_02">EXAMPLE (v3.02)</h2>
<div class="sectionbody">
<div class="literalblock">
<div class="content">
<pre><tt>110
<pre><code>110
NULL 0 NULL 0
N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N
Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y
1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1
9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9
a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a
. . .</tt></pre>
. . .</code></pre>
</div></div>
</div>
</div>
<div class="sect1">
<h2 id="_caveats">CAVEATS</h2>
<div class="sectionbody">
<div class="paragraph"><p>Although the unicharset reader maintains the ability to read unicharsets
@ -759,6 +930,8 @@ the accuracy will be degraded.</p></div>
so changing it without re-generating the others is likely to have dire
consequences.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_history">HISTORY</h2>
<div class="sectionbody">
<div class="paragraph"><p>The unicharset format first appeared with Tesseract 2.00, which was the
@ -766,21 +939,26 @@ first version to support languages other than English. The unicharset file
contained only the first two fields, and the "ispunctuation" property was
absent (punctuation was regarded as "0", as "=" is in the above example.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody">
<div class="paragraph"><p>tesseract(1), combine_tessdata(1), unicharset_extractor(1)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2>
<div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Last updated 2012-02-08 11:01:57 PDT
Last updated 2015-06-12 23:52:34 CEST
</div>
</div>
</body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?>
<?asciidoc-numbered?>
<refentry lang="en">
<refentryinfo>
<title>UNICHARSET(5)</title>
</refentryinfo>
<refmeta>
<refentrytitle>unicharset</refentrytitle>
<manvolnum>5</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo>
<refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta>
<refnamediv>
<refname>unicharset</refname>
@ -206,7 +209,7 @@ absent (punctuation was regarded as "0", as "=" is in the above example.</simpar
<refsect1 id="_see_also">
<title>SEE ALSO</title>
<simpara>tesseract(1), combine_tessdata(1), unicharset_extractor(1)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1>
<refsect1 id="_author">
<title>AUTHOR</title>

View File

@ -1,13 +1,13 @@
'\" t
.\" Title: unicharset_extractor
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
.\" Date: 02/09/2012
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 06/12/2015
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "UNICHARSET_EXTRACTOR" "1" "02/09/2012" "\ \&" "\ \&"
.TH "UNICHARSET_EXTRACTOR" "1" "06/12/2015" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
@ -57,7 +57,7 @@ If your system supports the wctype functions, these values will be set automatic
.sp
tesseract(1), unicharset(5)
.sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "HISTORY"
.sp
unicharset_extractor first appeared in Tesseract 2\&.00\&.

View File

@ -40,7 +40,7 @@ SEE ALSO
--------
tesseract(1), unicharset(5)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
HISTORY
-------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" />
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>UNICHARSET_EXTRACTOR(1)</title>
<style type="text/css">
/* Debug borders */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/*
border: 1px solid red;
*/
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
/* Default font. */
body {
font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
}
body {
@ -35,13 +45,8 @@ strong {
color: #083194;
}
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 {
color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em;
margin-bottom: 0.5em;
line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * {
clear: left;
}
h5 {
font-size: 1.0em;
}
div.sectionbody {
font-family: serif;
margin-left: 0;
}
@ -77,45 +84,50 @@ p {
ul, ol, li > p {
margin-top: 0;
}
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre {
.monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0;
margin: 0;
}
pre {
white-space: pre-wrap;
}
span#author {
#author {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
font-size: 1.1em;
}
span#email {
#email {
}
span#revnumber, span#revdate, span#revremark {
font-family: sans-serif;
#revnumber, #revdate, #revremark {
}
div#footer {
font-family: sans-serif;
#footer {
font-size: small;
border-top: 2px solid silver;
padding-top: 0.5em;
margin-top: 4.0em;
}
div#footer-text {
#footer-text {
float: left;
padding-bottom: 0.5em;
}
div#footer-badges {
#footer-badges {
float: right;
padding-bottom: 0.5em;
}
div#preamble {
#preamble {
margin-top: 1.5em;
margin-bottom: 1.5em;
}
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock {
margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */
div.title, caption.title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
text-align: left;
margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content {
background: #ffffee;
border: 1px solid silver;
border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em;
}
div.listingblock > div.content {
border: 1px solid silver;
background: #f4f4f4;
border: 1px solid #dddddd;
border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em;
}
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em;
margin-left: 1.0em;
margin-right: 10%;
border-left: 5px solid #dddddd;
color: #777777;
border-left: 5px solid #f0f0f0;
color: #888;
}
div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right;
}
div.verseblock > div.content {
white-space: pre;
div.verseblock > pre.content {
font-family: inherit;
font-size: inherit;
}
div.verseblock > div.attribution {
padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
}
div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }
dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot {
font-weight: bold;
}
td > div.verse {
white-space: pre;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist {
margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px;
}
@media print {
div#footer-badges { display: none; }
div.colist td {
padding-right: 0.5em;
padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
}
div#toc {
@media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em;
}
div#toctitle {
#toctitle {
color: #527bbd;
font-family: sans-serif;
font-size: 1.1em;
font-weight: bold;
margin-top: 1.0em;
margin-bottom: 0.1em;
}
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0;
margin-bottom: 0;
}
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em;
font-size: 0.9em;
}
/* Overrides for manpage documents */
h1 {
span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em;
padding-bottom: 0.5em;
border-top: 2px solid silver;
border-bottom: 2px solid silver;
}
h2 {
body.manpage h2 {
border-style: none;
}
div.sectionbody {
margin-left: 5%;
body.manpage div.sectionbody {
margin-left: 3em;
}
@media print {
div#toc { display: none; }
body.manpage div#toc { display: none; }
}
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style>
<script type="text/javascript">
/*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace.
/////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) {
var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all
// browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
}
var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) {
var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/
footnotes: function () {
var cont = document.getElementById("content");
// Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span");
var refs = {};
var n = 0;
for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") {
n++;
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
var note = spans[i].getAttribute("data-note");
if (!note) {
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n;
}
@ -561,13 +702,36 @@ footnotes: function () {
}
}
}
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
}
}
asciidoc.install();
/*]]>*/
</script>
</head>
<body>
<body class="manpage">
<div id="header">
<h1>
UNICHARSET_EXTRACTOR(1) Manual Page
@ -580,10 +744,13 @@ UNICHARSET_EXTRACTOR(1) Manual Page
</div>
</div>
<div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody">
<div class="paragraph"><p><strong>unicharset_extractor</strong> <em>[-D dir]</em> <em>FILE</em>&#8230;</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody">
<div class="paragraph"><p>Tesseract needs to know the set of possible characters it can output.
@ -592,7 +759,7 @@ program on the same training pages bounding box files as used for
clustering:</p></div>
<div class="literalblock">
<div class="content">
<pre><tt>unicharset_extractor fontfile_1.box fontfile_2.box ...</tt></pre>
<pre><code>unicharset_extractor fontfile_1.box fontfile_2.box ...</code></pre>
</div></div>
<div class="paragraph"><p>The unicharset will be put into the file <em>dir/unicharset</em>, or simply
<em>./unicharset</em> if no output directory is provided.</p></div>
@ -609,30 +776,39 @@ file is changed) as they have to be in sync. This is made easier than in
previous versions by running unicharset_extractor before mftraining and
cntraining, and giving the unicharset to mftraining.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody">
<div class="paragraph"><p>tesseract(1), unicharset(5)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div>
</div>
<div class="sect1">
<h2 id="_history">HISTORY</h2>
<div class="sectionbody">
<div class="paragraph"><p>unicharset_extractor first appeared in Tesseract 2.00.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2>
<div class="sectionbody">
<div class="paragraph"><p>Copyright (C) 2006, Google Inc.
Licensed under the Apache License, Version 2.0</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2>
<div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Last updated 2012-02-09 09:19:05 PDT
Last updated 2015-06-12 23:52:38 CEST
</div>
</div>
</body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?>
<?asciidoc-numbered?>
<refentry lang="en">
<refentryinfo>
<title>UNICHARSET_EXTRACTOR(1)</title>
</refentryinfo>
<refmeta>
<refentrytitle>unicharset_extractor</refentrytitle>
<manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo>
<refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta>
<refnamediv>
<refname>unicharset_extractor</refname>
@ -41,7 +44,7 @@ cntraining, and giving the unicharset to mftraining.</simpara>
<refsect1 id="_see_also">
<title>SEE ALSO</title>
<simpara>tesseract(1), unicharset(5)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1>
<refsect1 id="_history">
<title>HISTORY</title>

View File

@ -1,13 +1,13 @@
'\" t
.\" Title: wordlist2dawg
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
.\" Date: 02/09/2012
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 06/12/2015
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "WORDLIST2DAWG" "1" "02/09/2012" "\ \&" "\ \&"
.TH "WORDLIST2DAWG" "1" "06/12/2015" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
@ -63,7 +63,7 @@ wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph (DAWG) for
.sp
tesseract(1), combine_tessdata(1), dawg2wordlist(1)
.sp
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
.SH "COPYING"
.sp
Copyright (C) 2006 Google, Inc\&. Licensed under the Apache License, Version 2\&.0

View File

@ -56,7 +56,7 @@ SEE ALSO
--------
tesseract(1), combine_tessdata(1), dawg2wordlist(1)
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
COPYING
-------

View File

@ -2,15 +2,25 @@
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.5.2" />
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.9" />
<title>WORDLIST2DAWG(1)</title>
<style type="text/css">
/* Debug borders */
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
/*
border: 1px solid red;
*/
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
/* Default font. */
body {
font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
}
body {
@ -35,13 +45,8 @@ strong {
color: #083194;
}
tt {
color: navy;
}
h1, h2, h3, h4, h5, h6 {
color: #527bbd;
font-family: sans-serif;
margin-top: 1.2em;
margin-bottom: 0.5em;
line-height: 1.3;
@ -59,9 +64,11 @@ h3 {
h3 + * {
clear: left;
}
h5 {
font-size: 1.0em;
}
div.sectionbody {
font-family: serif;
margin-left: 0;
}
@ -77,45 +84,50 @@ p {
ul, ol, li > p {
margin-top: 0;
}
ul > li { color: #aaa; }
ul > li > * { color: black; }
pre {
.monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0;
margin: 0;
}
pre {
white-space: pre-wrap;
}
span#author {
#author {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
font-size: 1.1em;
}
span#email {
#email {
}
span#revnumber, span#revdate, span#revremark {
font-family: sans-serif;
#revnumber, #revdate, #revremark {
}
div#footer {
font-family: sans-serif;
#footer {
font-size: small;
border-top: 2px solid silver;
padding-top: 0.5em;
margin-top: 4.0em;
}
div#footer-text {
#footer-text {
float: left;
padding-bottom: 0.5em;
}
div#footer-badges {
#footer-badges {
float: right;
padding-bottom: 0.5em;
}
div#preamble {
#preamble {
margin-top: 1.5em;
margin-bottom: 1.5em;
}
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock {
margin-top: 1.0em;
@ -135,7 +147,6 @@ div.content { /* Block element content. */
/* Block element titles. */
div.title, caption.title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
text-align: left;
margin-top: 1.0em;
@ -157,13 +168,15 @@ div.content + div.title {
div.sidebarblock > div.content {
background: #ffffee;
border: 1px solid silver;
border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em;
}
div.listingblock > div.content {
border: 1px solid silver;
background: #f4f4f4;
border: 1px solid #dddddd;
border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em;
}
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
padding-left: 1.0em;
margin-left: 1.0em;
margin-right: 10%;
border-left: 5px solid #dddddd;
color: #777777;
border-left: 5px solid #f0f0f0;
color: #888;
}
div.quoteblock > div.attribution {
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
text-align: right;
}
div.verseblock > div.content {
white-space: pre;
div.verseblock > pre.content {
font-family: inherit;
font-size: inherit;
}
div.verseblock > div.attribution {
padding-top: 0.75em;
@ -211,7 +225,7 @@ div.exampleblock > div.content {
}
div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }
dl {
@ -254,35 +268,12 @@ div.compact div, div.compact div {
margin-bottom: 0.1em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-family: sans-serif;
font-weight: bold;
}
tfoot {
font-weight: bold;
}
td > div.verse {
white-space: pre;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
div.hdlist {
margin-top: 0.8em;
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
min-width: 100px;
}
@media print {
div#footer-badges { display: none; }
div.colist td {
padding-right: 0.5em;
padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
}
div#toc {
@media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em;
}
div#toctitle {
#toctitle {
color: #527bbd;
font-family: sans-serif;
font-size: 1.1em;
font-weight: bold;
margin-top: 1.0em;
margin-bottom: 0.1em;
}
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0;
margin-bottom: 0;
}
@ -373,69 +371,173 @@ div.toclevel4 {
margin-left: 6em;
font-size: 0.9em;
}
/* Overrides for manpage documents */
h1 {
span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em;
padding-bottom: 0.5em;
border-top: 2px solid silver;
border-bottom: 2px solid silver;
}
h2 {
body.manpage h2 {
border-style: none;
}
div.sectionbody {
margin-left: 5%;
body.manpage div.sectionbody {
margin-left: 3em;
}
@media print {
div#toc { display: none; }
body.manpage div#toc { display: none; }
}
/* Workarounds for IE6's broken and incomplete CSS2. */
div.sidebar-content {
background: #ffffee;
border: 1px solid silver;
padding: 0.5em;
}
div.sidebar-title, div.image-title {
color: #527bbd;
font-family: sans-serif;
font-weight: bold;
margin-top: 0.0em;
margin-bottom: 0.5em;
}
div.listingblock div.content {
border: 1px solid silver;
background: #f4f4f4;
padding: 0.5em;
}
div.quoteblock-attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock-content {
white-space: pre;
}
div.verseblock-attribution {
padding-top: 0.75em;
text-align: left;
}
div.exampleblock-content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
/* IE6 sets dynamically generated links as visited. */
div#toc a:visited { color: blue; }
</style>
<script type="text/javascript">
/*<![CDATA[*/
window.onload = function(){asciidoc.footnotes();}
var asciidoc = { // Namespace.
/////////////////////////////////////////////////////////////////////
@ -477,7 +579,7 @@ toc: function (toclevels) {
function tocEntries(el, toclevels) {
var result = new Array;
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all
// browsers).
@ -497,6 +599,25 @@ toc: function (toclevels) {
}
var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) {
var entry = entries[i];
@ -524,24 +645,44 @@ toc: function (toclevels) {
*/
footnotes: function () {
var cont = document.getElementById("content");
// Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span");
var refs = {};
var n = 0;
for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") {
n++;
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
var note = spans[i].getAttribute("data-note");
if (!note) {
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>";
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n;
}
@ -561,13 +702,36 @@ footnotes: function () {
}
}
}
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
}
}
asciidoc.install();
/*]]>*/
</script>
</head>
<body>
<body class="manpage">
<div id="header">
<h1>
WORDLIST2DAWG(1) Manual Page
@ -580,6 +744,7 @@ WORDLIST2DAWG(1) Manual Page
</div>
</div>
<div id="content">
<div class="sect1">
<h2 id="_synopsis">SYNOPSIS</h2>
<div class="sectionbody">
<div class="paragraph"><p><strong>wordlist2dawg</strong> <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
@ -588,12 +753,16 @@ WORDLIST2DAWG(1) Manual Page
<div class="paragraph"><p><strong>wordlist2dawg</strong> -r 2 <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
<div class="paragraph"><p><strong>wordlist2dawg</strong> -l &lt;short&gt; &lt;long&gt; <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
</div>
</div>
<div class="sect1">
<h2 id="_description">DESCRIPTION</h2>
<div class="sectionbody">
<div class="paragraph"><p>wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph
(DAWG) for use with Tesseract. A DAWG is a compressed, space and time
efficient representation of a word list.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_options">OPTIONS</h2>
<div class="sectionbody">
<div class="paragraph"><p>-t
@ -606,6 +775,8 @@ efficient representation of a word list.</p></div>
Produce a file with several dawgs in it, one each for words
of length &lt;short&gt;, &lt;short+1&gt;,&#8230; &lt;long&gt;</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_arguments">ARGUMENTS</h2>
<div class="sectionbody">
<div class="paragraph"><p><em>WORDLIST</em>
@ -616,26 +787,33 @@ efficient representation of a word list.</p></div>
The unicharset of the language. This is the unicharset
generated by mftraining(1).</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_see_also">SEE ALSO</h2>
<div class="sectionbody">
<div class="paragraph"><p>tesseract(1), combine_tessdata(1), dawg2wordlist(1)</p></div>
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
</div>
</div>
<div class="sect1">
<h2 id="_copying">COPYING</h2>
<div class="sectionbody">
<div class="paragraph"><p>Copyright (C) 2006 Google, Inc.
Licensed under the Apache License, Version 2.0</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_author">AUTHOR</h2>
<div class="sectionbody">
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Last updated 2012-02-07 13:43:35 PDT
Last updated 2015-06-12 23:52:50 CEST
</div>
</div>
</body>

View File

@ -3,11 +3,14 @@
<?asciidoc-toc?>
<?asciidoc-numbered?>
<refentry lang="en">
<refentryinfo>
<title>WORDLIST2DAWG(1)</title>
</refentryinfo>
<refmeta>
<refentrytitle>wordlist2dawg</refentrytitle>
<manvolnum>1</manvolnum>
<refmiscinfo class="source">&nbsp;</refmiscinfo>
<refmiscinfo class="manual">&nbsp;</refmiscinfo>
<refmiscinfo class="source">&#160;</refmiscinfo>
<refmiscinfo class="manual">&#160;</refmiscinfo>
</refmeta>
<refnamediv>
<refname>wordlist2dawg</refname>
@ -51,7 +54,7 @@ efficient representation of a word list.</simpara>
<refsect1 id="_see_also">
<title>SEE ALSO</title>
<simpara>tesseract(1), combine_tessdata(1), dawg2wordlist(1)</simpara>
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
</refsect1>
<refsect1 id="_copying">
<title>COPYING</title>