mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 12:49:35 +08:00
fix links in doc; autotools requires README
This commit is contained in:
parent
0ee178d79b
commit
9b7f2527f1
@ -1,13 +1,13 @@
|
||||
'\" t
|
||||
.\" Title: ambiguous_words
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
|
||||
.\" Date: 02/09/2012
|
||||
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
||||
.\" Date: 06/12/2015
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "AMBIGUOUS_WORDS" "1" "02/09/2012" "\ \&" "\ \&"
|
||||
.TH "AMBIGUOUS_WORDS" "1" "06/12/2015" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Define some portability stuff
|
||||
.\" -----------------------------------------------------------------
|
||||
|
@ -2,15 +2,25 @@
|
||||
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.5.2" />
|
||||
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.6.9" />
|
||||
<title>AMBIGUOUS_WORDS(1)</title>
|
||||
<style type="text/css">
|
||||
/* Debug borders */
|
||||
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
|
||||
/*
|
||||
border: 1px solid red;
|
||||
*/
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
@ -35,13 +45,8 @@ strong {
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
tt {
|
||||
color: navy;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
@ -59,9 +64,11 @@ h3 {
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
font-family: serif;
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
@ -77,45 +84,50 @@ p {
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
pre {
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
span#author {
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
span#email {
|
||||
#email {
|
||||
}
|
||||
span#revnumber, span#revdate, span#revremark {
|
||||
font-family: sans-serif;
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
div#footer {
|
||||
font-family: sans-serif;
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
div#footer-text {
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
div#footer-badges {
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div#preamble {
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
@ -135,7 +147,6 @@ div.content { /* Block element content. */
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
@ -157,13 +168,15 @@ div.content + div.title {
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #dddddd;
|
||||
color: #777777;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > div.content {
|
||||
white-space: pre;
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
@ -211,7 +225,7 @@ div.exampleblock > div.content {
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
@ -254,35 +268,12 @@ div.compact div, div.compact div {
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
|
||||
@media print {
|
||||
div#footer-badges { display: none; }
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
div#toc {
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
div#toctitle {
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
@ -373,69 +371,173 @@ div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
/* Overrides for manpage documents */
|
||||
h1 {
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
div.sectionbody {
|
||||
margin-left: 5%;
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
div#toc { display: none; }
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
/* Workarounds for IE6's broken and incomplete CSS2. */
|
||||
|
||||
div.sidebar-content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
padding: 0.5em;
|
||||
}
|
||||
div.sidebar-title, div.image-title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
margin-top: 0.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock-attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock-content {
|
||||
white-space: pre;
|
||||
}
|
||||
div.verseblock-attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.exampleblock-content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
/* IE6 sets dynamically generated links as visited. */
|
||||
div#toc a:visited { color: blue; }
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<![CDATA[*/
|
||||
window.onload = function(){asciidoc.footnotes();}
|
||||
var asciidoc = { // Namespace.
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
@ -477,7 +579,7 @@ toc: function (toclevels) {
|
||||
|
||||
function tocEntries(el, toclevels) {
|
||||
var result = new Array;
|
||||
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
|
||||
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
@ -497,6 +599,25 @@ toc: function (toclevels) {
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
@ -524,24 +645,44 @@ toc: function (toclevels) {
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
var cont = document.getElementById("content");
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
@ -561,13 +702,36 @@ footnotes: function () {
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
AMBIGUOUS_WORDS(1) Manual Page
|
||||
@ -580,10 +744,13 @@ AMBIGUOUS_WORDS(1) Manual Page
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>ambiguous_words</strong> [-l lang] <em>TESSDATADIR</em> <em>WORDLIST</em> <em>AMBIGUOUSFILE</em></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>ambiguous_words(1) runs Tesseract in a special mode, and for each word
|
||||
@ -591,25 +758,32 @@ in word list, produces a set of words which Tesseract thinks might be
|
||||
ambiguous with it. <em>TESSDATADIR</em> must be set to the absolute path of
|
||||
a directory containing <em>tessdata/lang.traineddata</em>.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr /></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated 2012-02-07 13:38:29 PDT
|
||||
Last updated 2015-05-13 19:59:45 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
@ -3,11 +3,14 @@
|
||||
<?asciidoc-toc?>
|
||||
<?asciidoc-numbered?>
|
||||
<refentry lang="en">
|
||||
<refentryinfo>
|
||||
<title>AMBIGUOUS_WORDS(1)</title>
|
||||
</refentryinfo>
|
||||
<refmeta>
|
||||
<refentrytitle>ambiguous_words</refentrytitle>
|
||||
<manvolnum>1</manvolnum>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>ambiguous_words</refname>
|
||||
|
@ -1,13 +1,13 @@
|
||||
'\" t
|
||||
.\" Title: cntraining
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
|
||||
.\" Date: 02/09/2012
|
||||
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
||||
.\" Date: 06/12/2015
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "CNTRAINING" "1" "02/09/2012" "\ \&" "\ \&"
|
||||
.TH "CNTRAINING" "1" "06/12/2015" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Define some portability stuff
|
||||
.\" -----------------------------------------------------------------
|
||||
@ -45,7 +45,7 @@ Directory to write output files to\&.
|
||||
.sp
|
||||
tesseract(1), shapeclustering(1), mftraining(1)
|
||||
.sp
|
||||
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
|
||||
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
|
||||
.SH "COPYING"
|
||||
.sp
|
||||
Copyright (c) Hewlett\-Packard Company, 1988 Licensed under the Apache License, Version 2\&.0
|
||||
|
@ -24,7 +24,7 @@ SEE ALSO
|
||||
--------
|
||||
tesseract(1), shapeclustering(1), mftraining(1)
|
||||
|
||||
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
|
||||
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
|
||||
|
||||
COPYING
|
||||
-------
|
||||
|
@ -2,15 +2,25 @@
|
||||
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.5.2" />
|
||||
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.6.9" />
|
||||
<title>CNTRAINING(1)</title>
|
||||
<style type="text/css">
|
||||
/* Debug borders */
|
||||
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
|
||||
/*
|
||||
border: 1px solid red;
|
||||
*/
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
@ -35,13 +45,8 @@ strong {
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
tt {
|
||||
color: navy;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
@ -59,9 +64,11 @@ h3 {
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
font-family: serif;
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
@ -77,45 +84,50 @@ p {
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
pre {
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
span#author {
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
span#email {
|
||||
#email {
|
||||
}
|
||||
span#revnumber, span#revdate, span#revremark {
|
||||
font-family: sans-serif;
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
div#footer {
|
||||
font-family: sans-serif;
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
div#footer-text {
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
div#footer-badges {
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div#preamble {
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
@ -135,7 +147,6 @@ div.content { /* Block element content. */
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
@ -157,13 +168,15 @@ div.content + div.title {
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #dddddd;
|
||||
color: #777777;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > div.content {
|
||||
white-space: pre;
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
@ -211,7 +225,7 @@ div.exampleblock > div.content {
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
@ -254,35 +268,12 @@ div.compact div, div.compact div {
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
|
||||
@media print {
|
||||
div#footer-badges { display: none; }
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
div#toc {
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
div#toctitle {
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
@ -373,69 +371,173 @@ div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
/* Overrides for manpage documents */
|
||||
h1 {
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
div.sectionbody {
|
||||
margin-left: 5%;
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
div#toc { display: none; }
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
/* Workarounds for IE6's broken and incomplete CSS2. */
|
||||
|
||||
div.sidebar-content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
padding: 0.5em;
|
||||
}
|
||||
div.sidebar-title, div.image-title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
margin-top: 0.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock-attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock-content {
|
||||
white-space: pre;
|
||||
}
|
||||
div.verseblock-attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.exampleblock-content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
/* IE6 sets dynamically generated links as visited. */
|
||||
div#toc a:visited { color: blue; }
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<![CDATA[*/
|
||||
window.onload = function(){asciidoc.footnotes();}
|
||||
var asciidoc = { // Namespace.
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
@ -477,7 +579,7 @@ toc: function (toclevels) {
|
||||
|
||||
function tocEntries(el, toclevels) {
|
||||
var result = new Array;
|
||||
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
|
||||
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
@ -497,6 +599,25 @@ toc: function (toclevels) {
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
@ -524,24 +645,44 @@ toc: function (toclevels) {
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
var cont = document.getElementById("content");
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
@ -561,13 +702,36 @@ footnotes: function () {
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
CNTRAINING(1) Manual Page
|
||||
@ -580,16 +744,21 @@ CNTRAINING(1) Manual Page
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>cntraining</strong> [-D <em>dir</em>] <em>FILE</em>…</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>cntraining takes a list of .tr files, from which it generates the
|
||||
<strong>normproto</strong> data file (the character normalization sensitivity
|
||||
prototypes).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="dlist"><dl>
|
||||
@ -603,26 +772,33 @@ prototypes).</p></div>
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), shapeclustering(1), mftraining(1)</p></div>
|
||||
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
|
||||
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (c) Hewlett-Packard Company, 1988
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr /></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated 2012-02-09 11:37:31 PDT
|
||||
Last updated 2015-06-12 23:50:30 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
@ -3,11 +3,14 @@
|
||||
<?asciidoc-toc?>
|
||||
<?asciidoc-numbered?>
|
||||
<refentry lang="en">
|
||||
<refentryinfo>
|
||||
<title>CNTRAINING(1)</title>
|
||||
</refentryinfo>
|
||||
<refmeta>
|
||||
<refentrytitle>cntraining</refentrytitle>
|
||||
<manvolnum>1</manvolnum>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>cntraining</refname>
|
||||
@ -40,7 +43,7 @@ prototypes).</simpara>
|
||||
<refsect1 id="_see_also">
|
||||
<title>SEE ALSO</title>
|
||||
<simpara>tesseract(1), shapeclustering(1), mftraining(1)</simpara>
|
||||
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
|
||||
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
|
||||
</refsect1>
|
||||
<refsect1 id="_copying">
|
||||
<title>COPYING</title>
|
||||
|
@ -1,13 +1,13 @@
|
||||
'\" t
|
||||
.\" Title: combine_tessdata
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
|
||||
.\" Date: 02/09/2012
|
||||
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
||||
.\" Date: 06/12/2015
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "COMBINE_TESSDATA" "1" "02/09/2012" "\ \&" "\ \&"
|
||||
.TH "COMBINE_TESSDATA" "1" "06/12/2015" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Define some portability stuff
|
||||
.\" -----------------------------------------------------------------
|
||||
@ -107,7 +107,7 @@ This will create /home/$USER/temp/eng\&.* files with individual tessdata compone
|
||||
\fIPrefix\fR refers to the full file prefix, including period (\&.)
|
||||
.SH "COMPONENTS"
|
||||
.sp
|
||||
The components in a Tesseract lang\&.traineddata file as of Tesseract 3\&.02 are briefly described below; For more information on many of these files, see \m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
|
||||
The components in a Tesseract lang\&.traineddata file as of Tesseract 3\&.02 are briefly described below; For more information on many of these files, see \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
|
||||
.PP
|
||||
lang\&.config
|
||||
.RS 4
|
||||
|
@ -76,7 +76,7 @@ COMPONENTS
|
||||
The components in a Tesseract lang.traineddata file as of
|
||||
Tesseract 3.02 are briefly described below; For more information on
|
||||
many of these files, see
|
||||
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
|
||||
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
|
||||
|
||||
lang.config::
|
||||
(Optional) Language-specific overrides to default config variables.
|
||||
|
@ -2,15 +2,25 @@
|
||||
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.5.2" />
|
||||
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.6.9" />
|
||||
<title>COMBINE_TESSDATA(1)</title>
|
||||
<style type="text/css">
|
||||
/* Debug borders */
|
||||
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
|
||||
/*
|
||||
border: 1px solid red;
|
||||
*/
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
@ -35,13 +45,8 @@ strong {
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
tt {
|
||||
color: navy;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
@ -59,9 +64,11 @@ h3 {
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
font-family: serif;
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
@ -77,45 +84,50 @@ p {
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
pre {
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
span#author {
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
span#email {
|
||||
#email {
|
||||
}
|
||||
span#revnumber, span#revdate, span#revremark {
|
||||
font-family: sans-serif;
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
div#footer {
|
||||
font-family: sans-serif;
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
div#footer-text {
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
div#footer-badges {
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div#preamble {
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
@ -135,7 +147,6 @@ div.content { /* Block element content. */
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
@ -157,13 +168,15 @@ div.content + div.title {
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #dddddd;
|
||||
color: #777777;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > div.content {
|
||||
white-space: pre;
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
@ -211,7 +225,7 @@ div.exampleblock > div.content {
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
@ -254,35 +268,12 @@ div.compact div, div.compact div {
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
|
||||
@media print {
|
||||
div#footer-badges { display: none; }
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
div#toc {
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
div#toctitle {
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
@ -373,69 +371,173 @@ div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
/* Overrides for manpage documents */
|
||||
h1 {
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
div.sectionbody {
|
||||
margin-left: 5%;
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
div#toc { display: none; }
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
/* Workarounds for IE6's broken and incomplete CSS2. */
|
||||
|
||||
div.sidebar-content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
padding: 0.5em;
|
||||
}
|
||||
div.sidebar-title, div.image-title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
margin-top: 0.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock-attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock-content {
|
||||
white-space: pre;
|
||||
}
|
||||
div.verseblock-attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.exampleblock-content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
/* IE6 sets dynamically generated links as visited. */
|
||||
div#toc a:visited { color: blue; }
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<![CDATA[*/
|
||||
window.onload = function(){asciidoc.footnotes();}
|
||||
var asciidoc = { // Namespace.
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
@ -477,7 +579,7 @@ toc: function (toclevels) {
|
||||
|
||||
function tocEntries(el, toclevels) {
|
||||
var result = new Array;
|
||||
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
|
||||
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
@ -497,6 +599,25 @@ toc: function (toclevels) {
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
@ -524,24 +645,44 @@ toc: function (toclevels) {
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
var cont = document.getElementById("content");
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
@ -561,13 +702,36 @@ footnotes: function () {
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
COMBINE_TESSDATA(1) Manual Page
|
||||
@ -580,10 +744,13 @@ COMBINE_TESSDATA(1) Manual Page
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>combine_tessdata</strong> [<em>OPTION</em>] <em>FILE</em>…</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>combine_tessdata(1) is the main program to combine/extract/overwrite
|
||||
@ -593,7 +760,7 @@ classifier templates, ambiguities, language configs) located at, say,
|
||||
/home/$USER/temp/eng.* run:</p></div>
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>combine_tessdata /home/$USER/temp/eng.</tt></pre>
|
||||
<pre><code>combine_tessdata /home/$USER/temp/eng.</code></pre>
|
||||
</div></div>
|
||||
<div class="paragraph"><p>The result will be a combined tessdata file /home/$USER/temp/eng.traineddata</p></div>
|
||||
<div class="paragraph"><p>Specify option -e if you would like to extract individual components
|
||||
@ -601,8 +768,8 @@ from a combined traineddata file. For example, to extract language config
|
||||
file and the unicharset from tessdata/eng.traineddata run:</p></div>
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>combine_tessdata -e tessdata/eng.traineddata \
|
||||
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</tt></pre>
|
||||
<pre><code>combine_tessdata -e tessdata/eng.traineddata \
|
||||
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</code></pre>
|
||||
</div></div>
|
||||
<div class="paragraph"><p>The desired config file and unicharset will be written to
|
||||
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset</p></div>
|
||||
@ -611,8 +778,8 @@ file and the unicharset from tessdata/eng.traineddata run:</p></div>
|
||||
and unichar ambiguities files in tessdata/eng.traineddata use:</p></div>
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>combine_tessdata -o tessdata/eng.traineddata \
|
||||
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs</tt></pre>
|
||||
<pre><code>combine_tessdata -o tessdata/eng.traineddata \
|
||||
/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs</code></pre>
|
||||
</div></div>
|
||||
<div class="paragraph"><p>As a result, tessdata/eng.traineddata will contain the new language config
|
||||
and unichar ambigs, plus all the original DAWGs, classifier templates, etc.</p></div>
|
||||
@ -623,11 +790,13 @@ ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.</p></div>
|
||||
<div class="paragraph"><p>Specify option -u to unpack all the components to the specified path:</p></div>
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.</tt></pre>
|
||||
<pre><code>combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.</code></pre>
|
||||
</div></div>
|
||||
<div class="paragraph"><p>This will create /home/$USER/temp/eng.* files with individual tessdata
|
||||
components from tessdata/eng.traineddata.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>-e</strong> <em>.traineddata</em> <em>FILE</em>…:
|
||||
@ -638,16 +807,20 @@ components from tessdata/eng.traineddata.</p></div>
|
||||
<div class="paragraph"><p><strong>-u</strong> <em>.traineddata</em> <em>PATHPREFIX</em>
|
||||
Unpacks the .traineddata using the provided prefix.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_caveats">CAVEATS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><em>Prefix</em> refers to the full file prefix, including period (.)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_components">COMPONENTS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The components in a Tesseract lang.traineddata file as of
|
||||
Tesseract 3.02 are briefly described below; For more information on
|
||||
many of these files, see
|
||||
<a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
|
||||
<a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
|
||||
<div class="dlist"><dl>
|
||||
<dt class="hdlist1">
|
||||
lang.config
|
||||
@ -802,30 +975,39 @@ lang.params-training-model
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_history">HISTORY</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>combine_tessdata(1) first appeared in version 3.00 of Tesseract</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5),
|
||||
unicharambigs(5)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2009, Google Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr /></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated 2012-02-08 10:52:17 PDT
|
||||
Last updated 2015-06-12 23:52:02 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
@ -3,11 +3,14 @@
|
||||
<?asciidoc-toc?>
|
||||
<?asciidoc-numbered?>
|
||||
<refentry lang="en">
|
||||
<refentryinfo>
|
||||
<title>COMBINE_TESSDATA(1)</title>
|
||||
</refentryinfo>
|
||||
<refmeta>
|
||||
<refentrytitle>combine_tessdata</refentrytitle>
|
||||
<manvolnum>1</manvolnum>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>combine_tessdata</refname>
|
||||
@ -67,7 +70,7 @@ components from tessdata/eng.traineddata.</simpara>
|
||||
<simpara>The components in a Tesseract lang.traineddata file as of
|
||||
Tesseract 3.02 are briefly described below; For more information on
|
||||
many of these files, see
|
||||
<ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
|
||||
<ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term>
|
||||
|
@ -1,13 +1,13 @@
|
||||
'\" t
|
||||
.\" Title: dawg2wordlist
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
|
||||
.\" Date: 02/09/2012
|
||||
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
||||
.\" Date: 06/12/2015
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "DAWG2WORDLIST" "1" "02/09/2012" "\ \&" "\ \&"
|
||||
.TH "DAWG2WORDLIST" "1" "06/12/2015" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Define some portability stuff
|
||||
.\" -----------------------------------------------------------------
|
||||
@ -46,7 +46,7 @@ dawg2wordlist(1) converts a Tesseract Directed Acyclic Word Graph (DAWG) to a li
|
||||
.sp
|
||||
tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5), combine_tessdata(1)
|
||||
.sp
|
||||
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
|
||||
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
|
||||
.SH "COPYING"
|
||||
.sp
|
||||
Copyright (C) 2012 Google, Inc\&. Licensed under the Apache License, Version 2\&.0
|
||||
|
@ -32,7 +32,7 @@ SEE ALSO
|
||||
tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
|
||||
combine_tessdata(1)
|
||||
|
||||
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
|
||||
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
|
||||
|
||||
COPYING
|
||||
-------
|
||||
|
@ -2,15 +2,25 @@
|
||||
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.5.2" />
|
||||
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.6.9" />
|
||||
<title>DAWG2WORDLIST(1)</title>
|
||||
<style type="text/css">
|
||||
/* Debug borders */
|
||||
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
|
||||
/*
|
||||
border: 1px solid red;
|
||||
*/
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
@ -35,13 +45,8 @@ strong {
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
tt {
|
||||
color: navy;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
@ -59,9 +64,11 @@ h3 {
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
font-family: serif;
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
@ -77,45 +84,50 @@ p {
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
pre {
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
span#author {
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
span#email {
|
||||
#email {
|
||||
}
|
||||
span#revnumber, span#revdate, span#revremark {
|
||||
font-family: sans-serif;
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
div#footer {
|
||||
font-family: sans-serif;
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
div#footer-text {
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
div#footer-badges {
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div#preamble {
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
@ -135,7 +147,6 @@ div.content { /* Block element content. */
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
@ -157,13 +168,15 @@ div.content + div.title {
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #dddddd;
|
||||
color: #777777;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > div.content {
|
||||
white-space: pre;
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
@ -211,7 +225,7 @@ div.exampleblock > div.content {
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
@ -254,35 +268,12 @@ div.compact div, div.compact div {
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
|
||||
@media print {
|
||||
div#footer-badges { display: none; }
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
div#toc {
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
div#toctitle {
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
@ -373,69 +371,173 @@ div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
/* Overrides for manpage documents */
|
||||
h1 {
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
div.sectionbody {
|
||||
margin-left: 5%;
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
div#toc { display: none; }
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
/* Workarounds for IE6's broken and incomplete CSS2. */
|
||||
|
||||
div.sidebar-content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
padding: 0.5em;
|
||||
}
|
||||
div.sidebar-title, div.image-title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
margin-top: 0.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock-attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock-content {
|
||||
white-space: pre;
|
||||
}
|
||||
div.verseblock-attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.exampleblock-content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
/* IE6 sets dynamically generated links as visited. */
|
||||
div#toc a:visited { color: blue; }
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<![CDATA[*/
|
||||
window.onload = function(){asciidoc.footnotes();}
|
||||
var asciidoc = { // Namespace.
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
@ -477,7 +579,7 @@ toc: function (toclevels) {
|
||||
|
||||
function tocEntries(el, toclevels) {
|
||||
var result = new Array;
|
||||
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
|
||||
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
@ -497,6 +599,25 @@ toc: function (toclevels) {
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
@ -524,24 +645,44 @@ toc: function (toclevels) {
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
var cont = document.getElementById("content");
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
@ -561,13 +702,36 @@ footnotes: function () {
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
DAWG2WORDLIST(1) Manual Page
|
||||
@ -580,15 +744,20 @@ DAWG2WORDLIST(1) Manual Page
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>dawg2wordlist</strong> <em>UNICHARSET</em> <em>DAWG</em> <em>WORDLIST</em></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>dawg2wordlist(1) converts a Tesseract Directed Acyclic Word
|
||||
Graph (DAWG) to a list of words using a unicharset as key.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><em>UNICHARSET</em>
|
||||
@ -599,27 +768,34 @@ Graph (DAWG) to a list of words using a unicharset as key.</p></div>
|
||||
<div class="paragraph"><p><em>WORDLIST</em>
|
||||
Plain text (output) file in UTF-8, one word per line</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
|
||||
combine_tessdata(1)</p></div>
|
||||
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
|
||||
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2012 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr /></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated 2012-02-07 13:38:01 PDT
|
||||
Last updated 2015-06-12 23:52:09 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
@ -3,11 +3,14 @@
|
||||
<?asciidoc-toc?>
|
||||
<?asciidoc-numbered?>
|
||||
<refentry lang="en">
|
||||
<refentryinfo>
|
||||
<title>DAWG2WORDLIST(1)</title>
|
||||
</refentryinfo>
|
||||
<refmeta>
|
||||
<refentrytitle>dawg2wordlist</refentrytitle>
|
||||
<manvolnum>1</manvolnum>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>dawg2wordlist</refname>
|
||||
@ -35,7 +38,7 @@ Graph (DAWG) to a list of words using a unicharset as key.</simpara>
|
||||
<title>SEE ALSO</title>
|
||||
<simpara>tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5),
|
||||
combine_tessdata(1)</simpara>
|
||||
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
|
||||
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
|
||||
</refsect1>
|
||||
<refsect1 id="_copying">
|
||||
<title>COPYING</title>
|
||||
|
@ -1,13 +1,13 @@
|
||||
'\" t
|
||||
.\" Title: mftraining
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
|
||||
.\" Date: 02/09/2012
|
||||
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
||||
.\" Date: 06/12/2015
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "MFTRAINING" "1" "02/09/2012" "\ \&" "\ \&"
|
||||
.TH "MFTRAINING" "1" "06/12/2015" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Define some portability stuff
|
||||
.\" -----------------------------------------------------------------
|
||||
@ -85,7 +85,7 @@ Directory to write output files to\&.
|
||||
.sp
|
||||
tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), shapeclustering(1), unicharset(5)
|
||||
.sp
|
||||
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
|
||||
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
|
||||
.SH "COPYING"
|
||||
.sp
|
||||
Copyright (C) Hewlett\-Packard Company, 1988 Licensed under the Apache License, Version 2\&.0
|
||||
|
@ -43,7 +43,7 @@ SEE ALSO
|
||||
tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
|
||||
shapeclustering(1), unicharset(5)
|
||||
|
||||
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
|
||||
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
|
||||
|
||||
COPYING
|
||||
-------
|
||||
|
@ -2,15 +2,25 @@
|
||||
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.5.2" />
|
||||
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.6.9" />
|
||||
<title>MFTRAINING(1)</title>
|
||||
<style type="text/css">
|
||||
/* Debug borders */
|
||||
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
|
||||
/*
|
||||
border: 1px solid red;
|
||||
*/
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
@ -35,13 +45,8 @@ strong {
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
tt {
|
||||
color: navy;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
@ -59,9 +64,11 @@ h3 {
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
font-family: serif;
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
@ -77,45 +84,50 @@ p {
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
pre {
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
span#author {
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
span#email {
|
||||
#email {
|
||||
}
|
||||
span#revnumber, span#revdate, span#revremark {
|
||||
font-family: sans-serif;
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
div#footer {
|
||||
font-family: sans-serif;
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
div#footer-text {
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
div#footer-badges {
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div#preamble {
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
@ -135,7 +147,6 @@ div.content { /* Block element content. */
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
@ -157,13 +168,15 @@ div.content + div.title {
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #dddddd;
|
||||
color: #777777;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > div.content {
|
||||
white-space: pre;
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
@ -211,7 +225,7 @@ div.exampleblock > div.content {
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
@ -254,35 +268,12 @@ div.compact div, div.compact div {
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
|
||||
@media print {
|
||||
div#footer-badges { display: none; }
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
div#toc {
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
div#toctitle {
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
@ -373,69 +371,173 @@ div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
/* Overrides for manpage documents */
|
||||
h1 {
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
div.sectionbody {
|
||||
margin-left: 5%;
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
div#toc { display: none; }
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
/* Workarounds for IE6's broken and incomplete CSS2. */
|
||||
|
||||
div.sidebar-content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
padding: 0.5em;
|
||||
}
|
||||
div.sidebar-title, div.image-title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
margin-top: 0.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock-attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock-content {
|
||||
white-space: pre;
|
||||
}
|
||||
div.verseblock-attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.exampleblock-content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
/* IE6 sets dynamically generated links as visited. */
|
||||
div#toc a:visited { color: blue; }
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<![CDATA[*/
|
||||
window.onload = function(){asciidoc.footnotes();}
|
||||
var asciidoc = { // Namespace.
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
@ -477,7 +579,7 @@ toc: function (toclevels) {
|
||||
|
||||
function tocEntries(el, toclevels) {
|
||||
var result = new Array;
|
||||
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
|
||||
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
@ -497,6 +599,25 @@ toc: function (toclevels) {
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
@ -524,24 +645,44 @@ toc: function (toclevels) {
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
var cont = document.getElementById("content");
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
@ -561,13 +702,36 @@ footnotes: function () {
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
MFTRAINING(1) Manual Page
|
||||
@ -580,10 +744,13 @@ MFTRAINING(1) Manual Page
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>mftraining -U <em>unicharset</em> -O <em>lang.unicharset</em> <em>FILE</em>…</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>mftraining takes a list of .tr files, from which it generates the
|
||||
@ -591,6 +758,8 @@ files <strong>inttemp</strong> (the shape prototypes), <strong>shapetable</stron
|
||||
(the number of expected features for each character). (A fourth file
|
||||
called Microfeat is also written by this program, but it is not used.)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="dlist"><dl>
|
||||
@ -611,7 +780,7 @@ called Microfeat is also written by this program, but it is not used.)</p></div>
|
||||
</p>
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</tt></pre>
|
||||
<pre><code>*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*</code></pre>
|
||||
</div></div>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
@ -623,7 +792,7 @@ called Microfeat is also written by this program, but it is not used.)</p></div>
|
||||
</p>
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>*font_name* *xheight*</tt></pre>
|
||||
<pre><code>*font_name* *xheight*</code></pre>
|
||||
</div></div>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
@ -644,27 +813,34 @@ called Microfeat is also written by this program, but it is not used.)</p></div>
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
|
||||
shapeclustering(1), unicharset(5)</p></div>
|
||||
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
|
||||
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) Hewlett-Packard Company, 1988
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr /></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated 2012-02-09 14:23:49 PDT
|
||||
Last updated 2015-06-12 23:52:19 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
@ -3,11 +3,14 @@
|
||||
<?asciidoc-toc?>
|
||||
<?asciidoc-numbered?>
|
||||
<refentry lang="en">
|
||||
<refentryinfo>
|
||||
<title>MFTRAINING(1)</title>
|
||||
</refentryinfo>
|
||||
<refmeta>
|
||||
<refentrytitle>mftraining</refentrytitle>
|
||||
<manvolnum>1</manvolnum>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>mftraining</refname>
|
||||
@ -84,7 +87,7 @@ called Microfeat is also written by this program, but it is not used.)</simpara>
|
||||
<title>SEE ALSO</title>
|
||||
<simpara>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
|
||||
shapeclustering(1), unicharset(5)</simpara>
|
||||
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
|
||||
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
|
||||
</refsect1>
|
||||
<refsect1 id="_copying">
|
||||
<title>COPYING</title>
|
||||
|
@ -1,13 +1,13 @@
|
||||
'\" t
|
||||
.\" Title: shapeclustering
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
|
||||
.\" Date: 02/09/2012
|
||||
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
||||
.\" Date: 06/12/2015
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "SHAPECLUSTERING" "1" "02/09/2012" "\ \&" "\ \&"
|
||||
.TH "SHAPECLUSTERING" "1" "06/12/2015" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Define some portability stuff
|
||||
.\" -----------------------------------------------------------------
|
||||
@ -85,7 +85,7 @@ The output unicharset that will be given to combine_tessdata(1)\&.
|
||||
.sp
|
||||
tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), unicharset(5)
|
||||
.sp
|
||||
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
|
||||
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
|
||||
.SH "COPYING"
|
||||
.sp
|
||||
Copyright (C) Google, 2011 Licensed under the Apache License, Version 2\&.0
|
||||
|
@ -46,7 +46,7 @@ SEE ALSO
|
||||
tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
|
||||
unicharset(5)
|
||||
|
||||
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
|
||||
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
|
||||
|
||||
COPYING
|
||||
-------
|
||||
|
@ -2,15 +2,25 @@
|
||||
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.5.2" />
|
||||
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.6.9" />
|
||||
<title>SHAPECLUSTERING(1)</title>
|
||||
<style type="text/css">
|
||||
/* Debug borders */
|
||||
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
|
||||
/*
|
||||
border: 1px solid red;
|
||||
*/
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
@ -35,13 +45,8 @@ strong {
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
tt {
|
||||
color: navy;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
@ -59,9 +64,11 @@ h3 {
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
font-family: serif;
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
@ -77,45 +84,50 @@ p {
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
pre {
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
span#author {
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
span#email {
|
||||
#email {
|
||||
}
|
||||
span#revnumber, span#revdate, span#revremark {
|
||||
font-family: sans-serif;
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
div#footer {
|
||||
font-family: sans-serif;
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
div#footer-text {
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
div#footer-badges {
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div#preamble {
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
@ -135,7 +147,6 @@ div.content { /* Block element content. */
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
@ -157,13 +168,15 @@ div.content + div.title {
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #dddddd;
|
||||
color: #777777;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > div.content {
|
||||
white-space: pre;
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
@ -211,7 +225,7 @@ div.exampleblock > div.content {
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
@ -254,35 +268,12 @@ div.compact div, div.compact div {
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
|
||||
@media print {
|
||||
div#footer-badges { display: none; }
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
div#toc {
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
div#toctitle {
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
@ -373,69 +371,173 @@ div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
/* Overrides for manpage documents */
|
||||
h1 {
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
div.sectionbody {
|
||||
margin-left: 5%;
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
div#toc { display: none; }
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
/* Workarounds for IE6's broken and incomplete CSS2. */
|
||||
|
||||
div.sidebar-content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
padding: 0.5em;
|
||||
}
|
||||
div.sidebar-title, div.image-title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
margin-top: 0.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock-attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock-content {
|
||||
white-space: pre;
|
||||
}
|
||||
div.verseblock-attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.exampleblock-content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
/* IE6 sets dynamically generated links as visited. */
|
||||
div#toc a:visited { color: blue; }
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<![CDATA[*/
|
||||
window.onload = function(){asciidoc.footnotes();}
|
||||
var asciidoc = { // Namespace.
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
@ -477,7 +579,7 @@ toc: function (toclevels) {
|
||||
|
||||
function tocEntries(el, toclevels) {
|
||||
var result = new Array;
|
||||
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
|
||||
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
@ -497,6 +599,25 @@ toc: function (toclevels) {
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
@ -524,24 +645,44 @@ toc: function (toclevels) {
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
var cont = document.getElementById("content");
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
@ -561,13 +702,36 @@ footnotes: function () {
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
SHAPECLUSTERING(1) Manual Page
|
||||
@ -580,6 +744,7 @@ SHAPECLUSTERING(1) Manual Page
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>shapeclustering -D <em>output_dir</em>
|
||||
@ -587,6 +752,8 @@ SHAPECLUSTERING(1) Manual Page
|
||||
-F <em>font_props</em> -X <em>xheights</em>
|
||||
<em>FILE</em>…</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>shapeclustering(1) takes extracted feature .tr files (generated by
|
||||
@ -594,6 +761,8 @@ tesseract(1) run in a special mode from box files) and produces a
|
||||
file <strong>shapetable</strong> and an enhanced unicharset. This program is still
|
||||
experimental, and is not required (yet) for training Tesseract.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="dlist"><dl>
|
||||
@ -622,7 +791,7 @@ experimental, and is not required (yet) for training Tesseract.</p></div>
|
||||
</p>
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'</tt></pre>
|
||||
<pre><code>'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'</code></pre>
|
||||
</div></div>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
@ -634,7 +803,7 @@ experimental, and is not required (yet) for training Tesseract.</p></div>
|
||||
</p>
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>'font_name' 'xheight'</tt></pre>
|
||||
<pre><code>'font_name' 'xheight'</code></pre>
|
||||
</div></div>
|
||||
</dd>
|
||||
<dt class="hdlist1">
|
||||
@ -647,27 +816,34 @@ experimental, and is not required (yet) for training Tesseract.</p></div>
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
|
||||
unicharset(5)</p></div>
|
||||
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
|
||||
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) Google, 2011
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr /></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated 2012-02-09 14:19:44 PDT
|
||||
Last updated 2015-06-12 23:52:24 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
@ -3,11 +3,14 @@
|
||||
<?asciidoc-toc?>
|
||||
<?asciidoc-numbered?>
|
||||
<refentry lang="en">
|
||||
<refentryinfo>
|
||||
<title>SHAPECLUSTERING(1)</title>
|
||||
</refentryinfo>
|
||||
<refmeta>
|
||||
<refentrytitle>shapeclustering</refentrytitle>
|
||||
<manvolnum>1</manvolnum>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>shapeclustering</refname>
|
||||
@ -87,7 +90,7 @@ experimental, and is not required (yet) for training Tesseract.</simpara>
|
||||
<title>SEE ALSO</title>
|
||||
<simpara>tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1),
|
||||
unicharset(5)</simpara>
|
||||
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
|
||||
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
|
||||
</refsect1>
|
||||
<refsect1 id="_copying">
|
||||
<title>COPYING</title>
|
||||
|
@ -2,12 +2,12 @@
|
||||
.\" Title: tesseract
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
||||
.\" Date: 08/02/2014
|
||||
.\" Date: 06/12/2015
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "TESSERACT" "1" "08/02/2014" "\ \&" "\ \&"
|
||||
.TH "TESSERACT" "1" "06/12/2015" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Define some portability stuff
|
||||
.\" -----------------------------------------------------------------
|
||||
@ -224,7 +224,7 @@ The engine was developed at Hewlett Packard Laboratories Bristol and at Hewlett
|
||||
.sp
|
||||
Version 2\&.00 brought Unicode (UTF\-8) support, six languages, and the ability to train Tesseract\&.
|
||||
.sp
|
||||
Tesseract was included in UNLV\(cqs Fourth Annual Test of OCR Accuracy\&. See \m[blue]\fBhttp://www\&.isri\&.unlv\&.edu/downloads/AT\-1995\&.pdf\fR\m[]\&. With Tesseract 2\&.00, scripts are now included to allow anyone to reproduce some of these tests\&. See \m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TestingTesseract\fR\m[] for more details\&.
|
||||
Tesseract was included in UNLV\(cqs Fourth Annual Test of OCR Accuracy\&. See \m[blue]\fBhttp://www\&.isri\&.unlv\&.edu/downloads/AT\-1995\&.pdf\fR\m[]\&. With Tesseract 2\&.00, scripts are now included to allow anyone to reproduce some of these tests\&. See \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TestingTesseract\fR\m[] for more details\&.
|
||||
.sp
|
||||
Tesseract 3\&.00 adds a number of new languages, including Chinese, Japanese, and Korean\&. It also introduces a new, single\-file based system of managing language data\&.
|
||||
.sp
|
||||
@ -233,7 +233,7 @@ Tesseract 3\&.02 adds BiDirectional text support, the ability to recognize multi
|
||||
For further details, see the file ReleaseNotes included with the distribution\&.
|
||||
.SH "RESOURCES"
|
||||
.sp
|
||||
Main web site: \m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/\fR\m[] Information on training: \m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
|
||||
Main web site: \m[blue]\fBhttps://github\&.com/tesseract\-ocr\fR\m[] Information on training: \m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
|
||||
.SH "SEE ALSO"
|
||||
.sp
|
||||
ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1), shape_training(1), mftraining(1), unicharambigs(5), unicharset(5), unicharset_extractor(1), wordlist2dawg(1)
|
||||
|
@ -218,9 +218,9 @@ Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability
|
||||
to train Tesseract.
|
||||
|
||||
Tesseract was included in UNLV's Fourth Annual Test of OCR Accuracy.
|
||||
See <http://www.isri.unlv.edu/downloads/AT-1995.pdf>. With Tesseract 2.00,
|
||||
See <https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf>. With Tesseract 2.00,
|
||||
scripts are now included to allow anyone to reproduce some of these tests.
|
||||
See <http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract> for more
|
||||
See <https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract> for more
|
||||
details.
|
||||
|
||||
Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
|
||||
@ -234,8 +234,8 @@ For further details, see the file ReleaseNotes included with the distribution.
|
||||
|
||||
RESOURCES
|
||||
---------
|
||||
Main web site: <http://code.google.com/p/tesseract-ocr/> +
|
||||
Information on training: <http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
|
||||
Main web site: <https://github.com/tesseract-ocr> +
|
||||
Information on training: <https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
|
@ -3,7 +3,7 @@
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.6.8" />
|
||||
<meta name="generator" content="AsciiDoc 8.6.9" />
|
||||
<title>TESSERACT(1)</title>
|
||||
<style type="text/css">
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
@ -94,7 +94,9 @@ ul > li > * { color: black; }
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
#author {
|
||||
color: #527bbd;
|
||||
@ -223,7 +225,7 @@ div.exampleblock > div.content {
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
@ -1047,7 +1049,7 @@ to train Tesseract.</p></div>
|
||||
<div class="paragraph"><p>Tesseract was included in UNLV’s Fourth Annual Test of OCR Accuracy.
|
||||
See <a href="http://www.isri.unlv.edu/downloads/AT-1995.pdf">http://www.isri.unlv.edu/downloads/AT-1995.pdf</a>. With Tesseract 2.00,
|
||||
scripts are now included to allow anyone to reproduce some of these tests.
|
||||
See <a href="http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract">http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract</a> for more
|
||||
See <a href="https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract</a> for more
|
||||
details.</p></div>
|
||||
<div class="paragraph"><p>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
|
||||
and Korean. It also introduces a new, single-file based system of managing
|
||||
@ -1060,8 +1062,8 @@ multiple languages in a single image, and improved layout analysis.</p></div>
|
||||
<div class="sect1">
|
||||
<h2 id="_resources">RESOURCES</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Main web site: <a href="http://code.google.com/p/tesseract-ocr/">http://code.google.com/p/tesseract-ocr/</a><br />
|
||||
Information on training: <a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
|
||||
<div class="paragraph"><p>Main web site: <a href="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</a><br />
|
||||
Information on training: <a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
@ -1095,7 +1097,7 @@ Lloyd, Shobhit Saxena, and Thomas Kielbus.</p></div>
|
||||
<div id="footnotes"><hr /></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated 2014-08-02 22:51:16 CEST
|
||||
Last updated 2015-06-12 23:49:44 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
@ -327,7 +327,7 @@ to train Tesseract.</simpara>
|
||||
<simpara>Tesseract was included in UNLV’s Fourth Annual Test of OCR Accuracy.
|
||||
See <ulink url="http://www.isri.unlv.edu/downloads/AT-1995.pdf">http://www.isri.unlv.edu/downloads/AT-1995.pdf</ulink>. With Tesseract 2.00,
|
||||
scripts are now included to allow anyone to reproduce some of these tests.
|
||||
See <ulink url="http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract">http://code.google.com/p/tesseract-ocr/wiki/TestingTesseract</ulink> for more
|
||||
See <ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract</ulink> for more
|
||||
details.</simpara>
|
||||
<simpara>Tesseract 3.00 adds a number of new languages, including Chinese, Japanese,
|
||||
and Korean. It also introduces a new, single-file based system of managing
|
||||
@ -338,8 +338,8 @@ multiple languages in a single image, and improved layout analysis.</simpara>
|
||||
</refsect1>
|
||||
<refsect1 id="_resources">
|
||||
<title>RESOURCES</title>
|
||||
<simpara>Main web site: <ulink url="http://code.google.com/p/tesseract-ocr/">http://code.google.com/p/tesseract-ocr/</ulink><?asciidoc-br?>
|
||||
Information on training: <ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
|
||||
<simpara>Main web site: <ulink url="https://github.com/tesseract-ocr">https://github.com/tesseract-ocr</ulink><?asciidoc-br?>
|
||||
Information on training: <ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
|
||||
</refsect1>
|
||||
<refsect1 id="_see_also">
|
||||
<title>SEE ALSO</title>
|
||||
|
@ -1,13 +1,13 @@
|
||||
'\" t
|
||||
.\" Title: unicharambigs
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
|
||||
.\" Date: 02/09/2012
|
||||
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
||||
.\" Date: 06/12/2015
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "UNICHARAMBIGS" "5" "02/09/2012" "\ \&" "\ \&"
|
||||
.TH "UNICHARAMBIGS" "5" "06/12/2015" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Define some portability stuff
|
||||
.\" -----------------------------------------------------------------
|
||||
|
@ -2,15 +2,25 @@
|
||||
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.5.2" />
|
||||
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.6.9" />
|
||||
<title>UNICHARAMBIGS(5)</title>
|
||||
<style type="text/css">
|
||||
/* Debug borders */
|
||||
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
|
||||
/*
|
||||
border: 1px solid red;
|
||||
*/
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
@ -35,13 +45,8 @@ strong {
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
tt {
|
||||
color: navy;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
@ -59,9 +64,11 @@ h3 {
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
font-family: serif;
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
@ -77,45 +84,50 @@ p {
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
pre {
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
span#author {
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
span#email {
|
||||
#email {
|
||||
}
|
||||
span#revnumber, span#revdate, span#revremark {
|
||||
font-family: sans-serif;
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
div#footer {
|
||||
font-family: sans-serif;
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
div#footer-text {
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
div#footer-badges {
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div#preamble {
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
@ -135,7 +147,6 @@ div.content { /* Block element content. */
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
@ -157,13 +168,15 @@ div.content + div.title {
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #dddddd;
|
||||
color: #777777;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > div.content {
|
||||
white-space: pre;
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
@ -211,7 +225,7 @@ div.exampleblock > div.content {
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
@ -254,35 +268,12 @@ div.compact div, div.compact div {
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
|
||||
@media print {
|
||||
div#footer-badges { display: none; }
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
div#toc {
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
div#toctitle {
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
@ -373,69 +371,173 @@ div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
/* Overrides for manpage documents */
|
||||
h1 {
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
div.sectionbody {
|
||||
margin-left: 5%;
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
div#toc { display: none; }
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
/* Workarounds for IE6's broken and incomplete CSS2. */
|
||||
|
||||
div.sidebar-content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
padding: 0.5em;
|
||||
}
|
||||
div.sidebar-title, div.image-title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
margin-top: 0.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock-attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock-content {
|
||||
white-space: pre;
|
||||
}
|
||||
div.verseblock-attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.exampleblock-content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
/* IE6 sets dynamically generated links as visited. */
|
||||
div#toc a:visited { color: blue; }
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<![CDATA[*/
|
||||
window.onload = function(){asciidoc.footnotes();}
|
||||
var asciidoc = { // Namespace.
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
@ -477,7 +579,7 @@ toc: function (toclevels) {
|
||||
|
||||
function tocEntries(el, toclevels) {
|
||||
var result = new Array;
|
||||
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
|
||||
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
@ -497,6 +599,25 @@ toc: function (toclevels) {
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
@ -524,24 +645,44 @@ toc: function (toclevels) {
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
var cont = document.getElementById("content");
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
@ -561,13 +702,36 @@ footnotes: function () {
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
UNICHARAMBIGS(5) Manual Page
|
||||
@ -580,6 +744,7 @@ UNICHARAMBIGS(5) Manual Page
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The unicharambigs file (a component of traineddata, see combine_tessdata(1) )
|
||||
@ -588,7 +753,7 @@ or groups of characters.</p></div>
|
||||
<div class="paragraph"><p>The file contains a number of lines, laid out as follow:</p></div>
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>[num] <TAB> [char(s)] <TAB> [num] <TAB> [char(s)] <TAB> [num]</tt></pre>
|
||||
<pre><code>[num] <TAB> [char(s)] <TAB> [num] <TAB> [char(s)] <TAB> [num]</code></pre>
|
||||
</div></div>
|
||||
<div class="hdlist"><table>
|
||||
<tr>
|
||||
@ -652,13 +817,15 @@ replacement, 0 denotes an optional replacement.
|
||||
unicharset. The numbers in fields one and three refer to the
|
||||
number of unichars (not bytes).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_example">EXAMPLE</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>2 ' ' 1 " 1
|
||||
<pre><code>2 ' ' 1 " 1
|
||||
1 m 2 r n 0
|
||||
3 i i i 1 m 0</tt></pre>
|
||||
3 i i i 1 m 0</code></pre>
|
||||
</div></div>
|
||||
<div class="paragraph"><p>In this example, all instances of the <em>2</em> character sequence <em>'</em>' will
|
||||
<strong>always</strong> be replaced by the <em>1</em> character sequence <em>"</em>; a <em>1</em> character
|
||||
@ -666,6 +833,8 @@ sequence <em>m</em> <strong>may</strong> be replaced by the <em>2</em> character
|
||||
the <em>3</em> character sequence <strong>may</strong> be replaced by the <em>1</em> character
|
||||
sequence <em>m</em>.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_history">HISTORY</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The unicharambigs file first appeared in Tesseract 3.00; prior to that, a
|
||||
@ -673,26 +842,33 @@ similar format, called DangAmbigs (<em>dangerous ambiguities</em>) was used: the
|
||||
format was almost identical, except only mandatory replacements could be
|
||||
specified, and field 5 was absent.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_bugs">BUGS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>This is a documentation "bug": it’s not currently clear what should be done
|
||||
in the case of ligatures (such as <em>fi</em>) which may also appear as regular
|
||||
letters in the unicharset.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), unicharset(5)</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr /></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated 2012-02-08 10:59:49 PDT
|
||||
Last updated 2015-05-13 19:59:45 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
@ -3,11 +3,14 @@
|
||||
<?asciidoc-toc?>
|
||||
<?asciidoc-numbered?>
|
||||
<refentry lang="en">
|
||||
<refentryinfo>
|
||||
<title>UNICHARAMBIGS(5)</title>
|
||||
</refentryinfo>
|
||||
<refmeta>
|
||||
<refentrytitle>unicharambigs</refentrytitle>
|
||||
<manvolnum>5</manvolnum>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>unicharambigs</refname>
|
||||
|
@ -1,13 +1,13 @@
|
||||
'\" t
|
||||
.\" Title: unicharset
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
|
||||
.\" Date: 02/09/2012
|
||||
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
||||
.\" Date: 06/12/2015
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "UNICHARSET" "5" "02/09/2012" "\ \&" "\ \&"
|
||||
.TH "UNICHARSET" "5" "06/12/2015" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Define some portability stuff
|
||||
.\" -----------------------------------------------------------------
|
||||
@ -214,7 +214,7 @@ The unicharset format first appeared with Tesseract 2\&.00, which was the first
|
||||
.sp
|
||||
tesseract(1), combine_tessdata(1), unicharset_extractor(1)
|
||||
.sp
|
||||
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
|
||||
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
|
||||
.SH "AUTHOR"
|
||||
.sp
|
||||
The Tesseract OCR engine was written by Ray Smith and his research groups at Hewlett Packard (1985\-1995) and Google (2006\-present)\&.
|
||||
|
@ -124,7 +124,7 @@ SEE ALSO
|
||||
--------
|
||||
tesseract(1), combine_tessdata(1), unicharset_extractor(1)
|
||||
|
||||
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
|
||||
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
|
||||
|
||||
|
||||
AUTHOR
|
||||
|
@ -2,15 +2,25 @@
|
||||
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.5.2" />
|
||||
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.6.9" />
|
||||
<title>UNICHARSET(5)</title>
|
||||
<style type="text/css">
|
||||
/* Debug borders */
|
||||
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
|
||||
/*
|
||||
border: 1px solid red;
|
||||
*/
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
@ -35,13 +45,8 @@ strong {
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
tt {
|
||||
color: navy;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
@ -59,9 +64,11 @@ h3 {
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
font-family: serif;
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
@ -77,45 +84,50 @@ p {
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
pre {
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
span#author {
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
span#email {
|
||||
#email {
|
||||
}
|
||||
span#revnumber, span#revdate, span#revremark {
|
||||
font-family: sans-serif;
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
div#footer {
|
||||
font-family: sans-serif;
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
div#footer-text {
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
div#footer-badges {
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div#preamble {
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
@ -135,7 +147,6 @@ div.content { /* Block element content. */
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
@ -157,13 +168,15 @@ div.content + div.title {
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #dddddd;
|
||||
color: #777777;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > div.content {
|
||||
white-space: pre;
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
@ -211,7 +225,7 @@ div.exampleblock > div.content {
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
@ -254,35 +268,12 @@ div.compact div, div.compact div {
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
|
||||
@media print {
|
||||
div#footer-badges { display: none; }
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
div#toc {
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
div#toctitle {
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
@ -373,69 +371,173 @@ div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
/* Overrides for manpage documents */
|
||||
h1 {
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
div.sectionbody {
|
||||
margin-left: 5%;
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
div#toc { display: none; }
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
/* Workarounds for IE6's broken and incomplete CSS2. */
|
||||
|
||||
div.sidebar-content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
padding: 0.5em;
|
||||
}
|
||||
div.sidebar-title, div.image-title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
margin-top: 0.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock-attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock-content {
|
||||
white-space: pre;
|
||||
}
|
||||
div.verseblock-attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.exampleblock-content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
/* IE6 sets dynamically generated links as visited. */
|
||||
div#toc a:visited { color: blue; }
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<![CDATA[*/
|
||||
window.onload = function(){asciidoc.footnotes();}
|
||||
var asciidoc = { // Namespace.
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
@ -477,7 +579,7 @@ toc: function (toclevels) {
|
||||
|
||||
function tocEntries(el, toclevels) {
|
||||
var result = new Array;
|
||||
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
|
||||
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
@ -497,6 +599,25 @@ toc: function (toclevels) {
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
@ -524,24 +645,44 @@ toc: function (toclevels) {
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
var cont = document.getElementById("content");
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
@ -561,13 +702,36 @@ footnotes: function () {
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
UNICHARSET(5) Manual Page
|
||||
@ -580,6 +744,7 @@ UNICHARSET(5) Manual Page
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Tesseract’s unicharset file contains information on each symbol
|
||||
@ -596,12 +761,12 @@ Therefore, space gets unichar 0.</p></div>
|
||||
<div class="paragraph"><p>Each unichar line in the unicharset file (v2+) may have four space-separated fields:</p></div>
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>'character' 'properties' 'script' 'id'</tt></pre>
|
||||
<pre><code>'character' 'properties' 'script' 'id'</code></pre>
|
||||
</div></div>
|
||||
<div class="paragraph"><p>Starting with Tesseract v3.02, more information may be given for each unichar:</p></div>
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'</tt></pre>
|
||||
<pre><code>'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'</code></pre>
|
||||
</div></div>
|
||||
<div class="paragraph"><p>Entries:</p></div>
|
||||
<div class="dlist"><dl>
|
||||
@ -712,15 +877,17 @@ The UTF-8 representation of a "normalized form" of this unichar
|
||||
</dd>
|
||||
</dl></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_example_v2">EXAMPLE (v2)</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>; 10 Common 46
|
||||
<pre><code>; 10 Common 46
|
||||
b 3 Latin 59
|
||||
W 5 Latin 40
|
||||
7 8 Common 66
|
||||
= 0 Common 93</tt></pre>
|
||||
= 0 Common 93</code></pre>
|
||||
</div></div>
|
||||
<div class="paragraph"><p>";" is a punctuation character. Its properties are thus represented by the
|
||||
binary number 10000 (10 in hexadecimal).</p></div>
|
||||
@ -736,20 +903,24 @@ are thus represented by the binary number 00000 (0 in hexadecimal).</p></div>
|
||||
binary number 00001 (1 in hexadecimal): they are alphabetic, but neither
|
||||
upper nor lower case.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_example_v3_02">EXAMPLE (v3.02)</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>110
|
||||
<pre><code>110
|
||||
NULL 0 NULL 0
|
||||
N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N
|
||||
Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y
|
||||
1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1
|
||||
9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9
|
||||
a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a
|
||||
. . .</tt></pre>
|
||||
. . .</code></pre>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_caveats">CAVEATS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Although the unicharset reader maintains the ability to read unicharsets
|
||||
@ -759,6 +930,8 @@ the accuracy will be degraded.</p></div>
|
||||
so changing it without re-generating the others is likely to have dire
|
||||
consequences.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_history">HISTORY</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The unicharset format first appeared with Tesseract 2.00, which was the
|
||||
@ -766,21 +939,26 @@ first version to support languages other than English. The unicharset file
|
||||
contained only the first two fields, and the "ispunctuation" property was
|
||||
absent (punctuation was regarded as "0", as "=" is in the above example.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), combine_tessdata(1), unicharset_extractor(1)</p></div>
|
||||
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
|
||||
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr /></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated 2012-02-08 11:01:57 PDT
|
||||
Last updated 2015-06-12 23:52:34 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
@ -3,11 +3,14 @@
|
||||
<?asciidoc-toc?>
|
||||
<?asciidoc-numbered?>
|
||||
<refentry lang="en">
|
||||
<refentryinfo>
|
||||
<title>UNICHARSET(5)</title>
|
||||
</refentryinfo>
|
||||
<refmeta>
|
||||
<refentrytitle>unicharset</refentrytitle>
|
||||
<manvolnum>5</manvolnum>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>unicharset</refname>
|
||||
@ -206,7 +209,7 @@ absent (punctuation was regarded as "0", as "=" is in the above example.</simpar
|
||||
<refsect1 id="_see_also">
|
||||
<title>SEE ALSO</title>
|
||||
<simpara>tesseract(1), combine_tessdata(1), unicharset_extractor(1)</simpara>
|
||||
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
|
||||
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
|
||||
</refsect1>
|
||||
<refsect1 id="_author">
|
||||
<title>AUTHOR</title>
|
||||
|
@ -1,13 +1,13 @@
|
||||
'\" t
|
||||
.\" Title: unicharset_extractor
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
|
||||
.\" Date: 02/09/2012
|
||||
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
||||
.\" Date: 06/12/2015
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "UNICHARSET_EXTRACTOR" "1" "02/09/2012" "\ \&" "\ \&"
|
||||
.TH "UNICHARSET_EXTRACTOR" "1" "06/12/2015" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Define some portability stuff
|
||||
.\" -----------------------------------------------------------------
|
||||
@ -57,7 +57,7 @@ If your system supports the wctype functions, these values will be set automatic
|
||||
.sp
|
||||
tesseract(1), unicharset(5)
|
||||
.sp
|
||||
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
|
||||
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
|
||||
.SH "HISTORY"
|
||||
.sp
|
||||
unicharset_extractor first appeared in Tesseract 2\&.00\&.
|
||||
|
@ -40,7 +40,7 @@ SEE ALSO
|
||||
--------
|
||||
tesseract(1), unicharset(5)
|
||||
|
||||
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
|
||||
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
|
||||
|
||||
HISTORY
|
||||
-------
|
||||
|
@ -2,15 +2,25 @@
|
||||
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.5.2" />
|
||||
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.6.9" />
|
||||
<title>UNICHARSET_EXTRACTOR(1)</title>
|
||||
<style type="text/css">
|
||||
/* Debug borders */
|
||||
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
|
||||
/*
|
||||
border: 1px solid red;
|
||||
*/
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
@ -35,13 +45,8 @@ strong {
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
tt {
|
||||
color: navy;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
@ -59,9 +64,11 @@ h3 {
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
font-family: serif;
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
@ -77,45 +84,50 @@ p {
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
pre {
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
span#author {
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
span#email {
|
||||
#email {
|
||||
}
|
||||
span#revnumber, span#revdate, span#revremark {
|
||||
font-family: sans-serif;
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
div#footer {
|
||||
font-family: sans-serif;
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
div#footer-text {
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
div#footer-badges {
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div#preamble {
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
@ -135,7 +147,6 @@ div.content { /* Block element content. */
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
@ -157,13 +168,15 @@ div.content + div.title {
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #dddddd;
|
||||
color: #777777;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > div.content {
|
||||
white-space: pre;
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
@ -211,7 +225,7 @@ div.exampleblock > div.content {
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
@ -254,35 +268,12 @@ div.compact div, div.compact div {
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
|
||||
@media print {
|
||||
div#footer-badges { display: none; }
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
div#toc {
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
div#toctitle {
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
@ -373,69 +371,173 @@ div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
/* Overrides for manpage documents */
|
||||
h1 {
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
div.sectionbody {
|
||||
margin-left: 5%;
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
div#toc { display: none; }
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
/* Workarounds for IE6's broken and incomplete CSS2. */
|
||||
|
||||
div.sidebar-content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
padding: 0.5em;
|
||||
}
|
||||
div.sidebar-title, div.image-title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
margin-top: 0.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock-attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock-content {
|
||||
white-space: pre;
|
||||
}
|
||||
div.verseblock-attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.exampleblock-content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
/* IE6 sets dynamically generated links as visited. */
|
||||
div#toc a:visited { color: blue; }
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<![CDATA[*/
|
||||
window.onload = function(){asciidoc.footnotes();}
|
||||
var asciidoc = { // Namespace.
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
@ -477,7 +579,7 @@ toc: function (toclevels) {
|
||||
|
||||
function tocEntries(el, toclevels) {
|
||||
var result = new Array;
|
||||
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
|
||||
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
@ -497,6 +599,25 @@ toc: function (toclevels) {
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
@ -524,24 +645,44 @@ toc: function (toclevels) {
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
var cont = document.getElementById("content");
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
@ -561,13 +702,36 @@ footnotes: function () {
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
UNICHARSET_EXTRACTOR(1) Manual Page
|
||||
@ -580,10 +744,13 @@ UNICHARSET_EXTRACTOR(1) Manual Page
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>unicharset_extractor</strong> <em>[-D dir]</em> <em>FILE</em>…</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Tesseract needs to know the set of possible characters it can output.
|
||||
@ -592,7 +759,7 @@ program on the same training pages bounding box files as used for
|
||||
clustering:</p></div>
|
||||
<div class="literalblock">
|
||||
<div class="content">
|
||||
<pre><tt>unicharset_extractor fontfile_1.box fontfile_2.box ...</tt></pre>
|
||||
<pre><code>unicharset_extractor fontfile_1.box fontfile_2.box ...</code></pre>
|
||||
</div></div>
|
||||
<div class="paragraph"><p>The unicharset will be put into the file <em>dir/unicharset</em>, or simply
|
||||
<em>./unicharset</em> if no output directory is provided.</p></div>
|
||||
@ -609,30 +776,39 @@ file is changed) as they have to be in sync. This is made easier than in
|
||||
previous versions by running unicharset_extractor before mftraining and
|
||||
cntraining, and giving the unicharset to mftraining.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), unicharset(5)</p></div>
|
||||
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
|
||||
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_history">HISTORY</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>unicharset_extractor first appeared in Tesseract 2.00.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2006, Google Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr /></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated 2012-02-09 09:19:05 PDT
|
||||
Last updated 2015-06-12 23:52:38 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
@ -3,11 +3,14 @@
|
||||
<?asciidoc-toc?>
|
||||
<?asciidoc-numbered?>
|
||||
<refentry lang="en">
|
||||
<refentryinfo>
|
||||
<title>UNICHARSET_EXTRACTOR(1)</title>
|
||||
</refentryinfo>
|
||||
<refmeta>
|
||||
<refentrytitle>unicharset_extractor</refentrytitle>
|
||||
<manvolnum>1</manvolnum>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>unicharset_extractor</refname>
|
||||
@ -41,7 +44,7 @@ cntraining, and giving the unicharset to mftraining.</simpara>
|
||||
<refsect1 id="_see_also">
|
||||
<title>SEE ALSO</title>
|
||||
<simpara>tesseract(1), unicharset(5)</simpara>
|
||||
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
|
||||
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
|
||||
</refsect1>
|
||||
<refsect1 id="_history">
|
||||
<title>HISTORY</title>
|
||||
|
@ -1,13 +1,13 @@
|
||||
'\" t
|
||||
.\" Title: wordlist2dawg
|
||||
.\" Author: [see the "AUTHOR" section]
|
||||
.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
|
||||
.\" Date: 02/09/2012
|
||||
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
|
||||
.\" Date: 06/12/2015
|
||||
.\" Manual: \ \&
|
||||
.\" Source: \ \&
|
||||
.\" Language: English
|
||||
.\"
|
||||
.TH "WORDLIST2DAWG" "1" "02/09/2012" "\ \&" "\ \&"
|
||||
.TH "WORDLIST2DAWG" "1" "06/12/2015" "\ \&" "\ \&"
|
||||
.\" -----------------------------------------------------------------
|
||||
.\" * Define some portability stuff
|
||||
.\" -----------------------------------------------------------------
|
||||
@ -63,7 +63,7 @@ wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph (DAWG) for
|
||||
.sp
|
||||
tesseract(1), combine_tessdata(1), dawg2wordlist(1)
|
||||
.sp
|
||||
\m[blue]\fBhttp://code\&.google\&.com/p/tesseract\-ocr/wiki/TrainingTesseract3\fR\m[]
|
||||
\m[blue]\fBhttps://github\&.com/tesseract\-ocr/tesseract/wiki/TrainingTesseract\fR\m[]
|
||||
.SH "COPYING"
|
||||
.sp
|
||||
Copyright (C) 2006 Google, Inc\&. Licensed under the Apache License, Version 2\&.0
|
||||
|
@ -56,7 +56,7 @@ SEE ALSO
|
||||
--------
|
||||
tesseract(1), combine_tessdata(1), dawg2wordlist(1)
|
||||
|
||||
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
|
||||
<https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract>
|
||||
|
||||
COPYING
|
||||
-------
|
||||
|
@ -2,15 +2,25 @@
|
||||
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.5.2" />
|
||||
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
||||
<meta name="generator" content="AsciiDoc 8.6.9" />
|
||||
<title>WORDLIST2DAWG(1)</title>
|
||||
<style type="text/css">
|
||||
/* Debug borders */
|
||||
p, li, dt, dd, div, pre, h1, h2, h3, h4, h5, h6 {
|
||||
/*
|
||||
border: 1px solid red;
|
||||
*/
|
||||
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
||||
|
||||
/* Default font. */
|
||||
body {
|
||||
font-family: Georgia,serif;
|
||||
}
|
||||
|
||||
/* Title font. */
|
||||
h1, h2, h3, h4, h5, h6,
|
||||
div.title, caption.title,
|
||||
thead, p.table.header,
|
||||
#toctitle,
|
||||
#author, #revnumber, #revdate, #revremark,
|
||||
#footer {
|
||||
font-family: Arial,Helvetica,sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
@ -35,13 +45,8 @@ strong {
|
||||
color: #083194;
|
||||
}
|
||||
|
||||
tt {
|
||||
color: navy;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
margin-top: 1.2em;
|
||||
margin-bottom: 0.5em;
|
||||
line-height: 1.3;
|
||||
@ -59,9 +64,11 @@ h3 {
|
||||
h3 + * {
|
||||
clear: left;
|
||||
}
|
||||
h5 {
|
||||
font-size: 1.0em;
|
||||
}
|
||||
|
||||
div.sectionbody {
|
||||
font-family: serif;
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
@ -77,45 +84,50 @@ p {
|
||||
ul, ol, li > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
ul > li { color: #aaa; }
|
||||
ul > li > * { color: black; }
|
||||
|
||||
pre {
|
||||
.monospaced, code, pre {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
font-size: inherit;
|
||||
color: navy;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
pre {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
span#author {
|
||||
#author {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
span#email {
|
||||
#email {
|
||||
}
|
||||
span#revnumber, span#revdate, span#revremark {
|
||||
font-family: sans-serif;
|
||||
#revnumber, #revdate, #revremark {
|
||||
}
|
||||
|
||||
div#footer {
|
||||
font-family: sans-serif;
|
||||
#footer {
|
||||
font-size: small;
|
||||
border-top: 2px solid silver;
|
||||
padding-top: 0.5em;
|
||||
margin-top: 4.0em;
|
||||
}
|
||||
div#footer-text {
|
||||
#footer-text {
|
||||
float: left;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
div#footer-badges {
|
||||
#footer-badges {
|
||||
float: right;
|
||||
padding-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div#preamble {
|
||||
#preamble {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock, div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.imageblock, div.exampleblock, div.verseblock,
|
||||
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
||||
div.admonitionblock {
|
||||
margin-top: 1.0em;
|
||||
@ -135,7 +147,6 @@ div.content { /* Block element content. */
|
||||
/* Block element titles. */
|
||||
div.title, caption.title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
text-align: left;
|
||||
margin-top: 1.0em;
|
||||
@ -157,13 +168,15 @@ div.content + div.title {
|
||||
|
||||
div.sidebarblock > div.content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 4px solid #f0f0f0;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock > div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
border: 1px solid #dddddd;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
background: #f8f8f8;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
@ -171,8 +184,8 @@ div.quoteblock, div.verseblock {
|
||||
padding-left: 1.0em;
|
||||
margin-left: 1.0em;
|
||||
margin-right: 10%;
|
||||
border-left: 5px solid #dddddd;
|
||||
color: #777777;
|
||||
border-left: 5px solid #f0f0f0;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
div.quoteblock > div.attribution {
|
||||
@ -180,8 +193,9 @@ div.quoteblock > div.attribution {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock > div.content {
|
||||
white-space: pre;
|
||||
div.verseblock > pre.content {
|
||||
font-family: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
div.verseblock > div.attribution {
|
||||
padding-top: 0.75em;
|
||||
@ -211,7 +225,7 @@ div.exampleblock > div.content {
|
||||
}
|
||||
|
||||
div.imageblock div.content { padding-left: 0; }
|
||||
span.image img { border-style: none; }
|
||||
span.image img { border-style: none; vertical-align: text-bottom; }
|
||||
a.image:visited { color: white; }
|
||||
|
||||
dl {
|
||||
@ -254,35 +268,12 @@ div.compact div, div.compact div {
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
tfoot {
|
||||
font-weight: bold;
|
||||
}
|
||||
td > div.verse {
|
||||
white-space: pre;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
div.hdlist {
|
||||
margin-top: 0.8em;
|
||||
@ -339,25 +330,32 @@ span.footnote, span.footnoteref {
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
|
||||
@media print {
|
||||
div#footer-badges { display: none; }
|
||||
div.colist td {
|
||||
padding-right: 0.5em;
|
||||
padding-bottom: 0.3em;
|
||||
vertical-align: top;
|
||||
}
|
||||
div.colist td img {
|
||||
margin-top: 0.3em;
|
||||
}
|
||||
|
||||
div#toc {
|
||||
@media print {
|
||||
#footer-badges { display: none; }
|
||||
}
|
||||
|
||||
#toc {
|
||||
margin-bottom: 2.5em;
|
||||
}
|
||||
|
||||
div#toctitle {
|
||||
#toctitle {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 0.1em;
|
||||
}
|
||||
|
||||
div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
@ -373,69 +371,173 @@ div.toclevel4 {
|
||||
margin-left: 6em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
/* Overrides for manpage documents */
|
||||
h1 {
|
||||
|
||||
span.aqua { color: aqua; }
|
||||
span.black { color: black; }
|
||||
span.blue { color: blue; }
|
||||
span.fuchsia { color: fuchsia; }
|
||||
span.gray { color: gray; }
|
||||
span.green { color: green; }
|
||||
span.lime { color: lime; }
|
||||
span.maroon { color: maroon; }
|
||||
span.navy { color: navy; }
|
||||
span.olive { color: olive; }
|
||||
span.purple { color: purple; }
|
||||
span.red { color: red; }
|
||||
span.silver { color: silver; }
|
||||
span.teal { color: teal; }
|
||||
span.white { color: white; }
|
||||
span.yellow { color: yellow; }
|
||||
|
||||
span.aqua-background { background: aqua; }
|
||||
span.black-background { background: black; }
|
||||
span.blue-background { background: blue; }
|
||||
span.fuchsia-background { background: fuchsia; }
|
||||
span.gray-background { background: gray; }
|
||||
span.green-background { background: green; }
|
||||
span.lime-background { background: lime; }
|
||||
span.maroon-background { background: maroon; }
|
||||
span.navy-background { background: navy; }
|
||||
span.olive-background { background: olive; }
|
||||
span.purple-background { background: purple; }
|
||||
span.red-background { background: red; }
|
||||
span.silver-background { background: silver; }
|
||||
span.teal-background { background: teal; }
|
||||
span.white-background { background: white; }
|
||||
span.yellow-background { background: yellow; }
|
||||
|
||||
span.big { font-size: 2em; }
|
||||
span.small { font-size: 0.6em; }
|
||||
|
||||
span.underline { text-decoration: underline; }
|
||||
span.overline { text-decoration: overline; }
|
||||
span.line-through { text-decoration: line-through; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
|
||||
/*
|
||||
* xhtml11 specific
|
||||
*
|
||||
* */
|
||||
|
||||
div.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
div.tableblock > table {
|
||||
border: 3px solid #527bbd;
|
||||
}
|
||||
thead, p.table.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.table {
|
||||
margin-top: 0;
|
||||
}
|
||||
/* Because the table frame attribute is overriden by CSS in most browsers. */
|
||||
div.tableblock > table[frame="void"] {
|
||||
border-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="hsides"] {
|
||||
border-left-style: none;
|
||||
border-right-style: none;
|
||||
}
|
||||
div.tableblock > table[frame="vsides"] {
|
||||
border-top-style: none;
|
||||
border-bottom-style: none;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* html5 specific
|
||||
*
|
||||
* */
|
||||
|
||||
table.tableblock {
|
||||
margin-top: 1.0em;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
thead, p.tableblock.header {
|
||||
font-weight: bold;
|
||||
color: #527bbd;
|
||||
}
|
||||
p.tableblock {
|
||||
margin-top: 0;
|
||||
}
|
||||
table.tableblock {
|
||||
border-width: 3px;
|
||||
border-spacing: 0px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th.tableblock, td.tableblock {
|
||||
border-width: 1px;
|
||||
padding: 4px;
|
||||
border-style: solid;
|
||||
border-color: #527bbd;
|
||||
}
|
||||
|
||||
table.tableblock.frame-topbot {
|
||||
border-left-style: hidden;
|
||||
border-right-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-sides {
|
||||
border-top-style: hidden;
|
||||
border-bottom-style: hidden;
|
||||
}
|
||||
table.tableblock.frame-none {
|
||||
border-style: hidden;
|
||||
}
|
||||
|
||||
th.tableblock.halign-left, td.tableblock.halign-left {
|
||||
text-align: left;
|
||||
}
|
||||
th.tableblock.halign-center, td.tableblock.halign-center {
|
||||
text-align: center;
|
||||
}
|
||||
th.tableblock.halign-right, td.tableblock.halign-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.tableblock.valign-top, td.tableblock.valign-top {
|
||||
vertical-align: top;
|
||||
}
|
||||
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
||||
vertical-align: middle;
|
||||
}
|
||||
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* manpage specific
|
||||
*
|
||||
* */
|
||||
|
||||
body.manpage h1 {
|
||||
padding-top: 0.5em;
|
||||
padding-bottom: 0.5em;
|
||||
border-top: 2px solid silver;
|
||||
border-bottom: 2px solid silver;
|
||||
}
|
||||
h2 {
|
||||
body.manpage h2 {
|
||||
border-style: none;
|
||||
}
|
||||
div.sectionbody {
|
||||
margin-left: 5%;
|
||||
body.manpage div.sectionbody {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
@media print {
|
||||
div#toc { display: none; }
|
||||
body.manpage div#toc { display: none; }
|
||||
}
|
||||
|
||||
/* Workarounds for IE6's broken and incomplete CSS2. */
|
||||
|
||||
div.sidebar-content {
|
||||
background: #ffffee;
|
||||
border: 1px solid silver;
|
||||
padding: 0.5em;
|
||||
}
|
||||
div.sidebar-title, div.image-title {
|
||||
color: #527bbd;
|
||||
font-family: sans-serif;
|
||||
font-weight: bold;
|
||||
margin-top: 0.0em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
div.listingblock div.content {
|
||||
border: 1px solid silver;
|
||||
background: #f4f4f4;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.quoteblock-attribution {
|
||||
padding-top: 0.5em;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
div.verseblock-content {
|
||||
white-space: pre;
|
||||
}
|
||||
div.verseblock-attribution {
|
||||
padding-top: 0.75em;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
div.exampleblock-content {
|
||||
border-left: 3px solid #dddddd;
|
||||
padding-left: 0.5em;
|
||||
}
|
||||
|
||||
/* IE6 sets dynamically generated links as visited. */
|
||||
div#toc a:visited { color: blue; }
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
/*<![CDATA[*/
|
||||
window.onload = function(){asciidoc.footnotes();}
|
||||
var asciidoc = { // Namespace.
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
@ -477,7 +579,7 @@ toc: function (toclevels) {
|
||||
|
||||
function tocEntries(el, toclevels) {
|
||||
var result = new Array;
|
||||
var re = new RegExp('[hH]([2-'+(toclevels+1)+'])');
|
||||
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
|
||||
// Function that scans the DOM tree for header elements (the DOM2
|
||||
// nodeIterator API would be a better technique but not supported by all
|
||||
// browsers).
|
||||
@ -497,6 +599,25 @@ toc: function (toclevels) {
|
||||
}
|
||||
|
||||
var toc = document.getElementById("toc");
|
||||
if (!toc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete existing TOC entries in case we're reloading the TOC.
|
||||
var tocEntriesToRemove = [];
|
||||
var i;
|
||||
for (i = 0; i < toc.childNodes.length; i++) {
|
||||
var entry = toc.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div'
|
||||
&& entry.getAttribute("class")
|
||||
&& entry.getAttribute("class").match(/^toclevel/))
|
||||
tocEntriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
||||
toc.removeChild(tocEntriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild TOC entries.
|
||||
var entries = tocEntries(document.getElementById("content"), toclevels);
|
||||
for (var i = 0; i < entries.length; ++i) {
|
||||
var entry = entries[i];
|
||||
@ -524,24 +645,44 @@ toc: function (toclevels) {
|
||||
*/
|
||||
|
||||
footnotes: function () {
|
||||
var cont = document.getElementById("content");
|
||||
// Delete existing footnote entries in case we're reloading the footnodes.
|
||||
var i;
|
||||
var noteholder = document.getElementById("footnotes");
|
||||
if (!noteholder) {
|
||||
return;
|
||||
}
|
||||
var entriesToRemove = [];
|
||||
for (i = 0; i < noteholder.childNodes.length; i++) {
|
||||
var entry = noteholder.childNodes[i];
|
||||
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
||||
entriesToRemove.push(entry);
|
||||
}
|
||||
for (i = 0; i < entriesToRemove.length; i++) {
|
||||
noteholder.removeChild(entriesToRemove[i]);
|
||||
}
|
||||
|
||||
// Rebuild footnote entries.
|
||||
var cont = document.getElementById("content");
|
||||
var spans = cont.getElementsByTagName("span");
|
||||
var refs = {};
|
||||
var n = 0;
|
||||
for (i=0; i<spans.length; i++) {
|
||||
if (spans[i].className == "footnote") {
|
||||
n++;
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
var note = spans[i].getAttribute("data-note");
|
||||
if (!note) {
|
||||
// Use [\s\S] in place of . so multi-line matches work.
|
||||
// Because JavaScript has no s (dotall) regex flag.
|
||||
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
spans[i].setAttribute("data-note", note);
|
||||
}
|
||||
noteholder.innerHTML +=
|
||||
"<div class='footnote' id='_footnote_" + n + "'>" +
|
||||
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
||||
n + "</a>. " + note + "</div>";
|
||||
spans[i].innerHTML =
|
||||
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
||||
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
||||
var id =spans[i].getAttribute("id");
|
||||
if (id != null) refs["#"+id] = n;
|
||||
}
|
||||
@ -561,13 +702,36 @@ footnotes: function () {
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
install: function(toclevels) {
|
||||
var timerId;
|
||||
|
||||
function reinstall() {
|
||||
asciidoc.footnotes();
|
||||
if (toclevels) {
|
||||
asciidoc.toc(toclevels);
|
||||
}
|
||||
}
|
||||
|
||||
function reinstallAndRemoveTimer() {
|
||||
clearInterval(timerId);
|
||||
reinstall();
|
||||
}
|
||||
|
||||
timerId = setInterval(reinstall, 500);
|
||||
if (document.addEventListener)
|
||||
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
||||
else
|
||||
window.onload = reinstallAndRemoveTimer;
|
||||
}
|
||||
|
||||
}
|
||||
asciidoc.install();
|
||||
/*]]>*/
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<body class="manpage">
|
||||
<div id="header">
|
||||
<h1>
|
||||
WORDLIST2DAWG(1) Manual Page
|
||||
@ -580,6 +744,7 @@ WORDLIST2DAWG(1) Manual Page
|
||||
</div>
|
||||
</div>
|
||||
<div id="content">
|
||||
<div class="sect1">
|
||||
<h2 id="_synopsis">SYNOPSIS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><strong>wordlist2dawg</strong> <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
|
||||
@ -588,12 +753,16 @@ WORDLIST2DAWG(1) Manual Page
|
||||
<div class="paragraph"><p><strong>wordlist2dawg</strong> -r 2 <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
|
||||
<div class="paragraph"><p><strong>wordlist2dawg</strong> -l <short> <long> <em>WORDLIST</em> <em>DAWG</em> <em>lang.unicharset</em></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_description">DESCRIPTION</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph
|
||||
(DAWG) for use with Tesseract. A DAWG is a compressed, space and time
|
||||
efficient representation of a word list.</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_options">OPTIONS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>-t
|
||||
@ -606,6 +775,8 @@ efficient representation of a word list.</p></div>
|
||||
Produce a file with several dawgs in it, one each for words
|
||||
of length <short>, <short+1>,… <long></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_arguments">ARGUMENTS</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p><em>WORDLIST</em>
|
||||
@ -616,26 +787,33 @@ efficient representation of a word list.</p></div>
|
||||
The unicharset of the language. This is the unicharset
|
||||
generated by mftraining(1).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_see_also">SEE ALSO</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>tesseract(1), combine_tessdata(1), dawg2wordlist(1)</p></div>
|
||||
<div class="paragraph"><p><a href="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</a></p></div>
|
||||
<div class="paragraph"><p><a href="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</a></p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_copying">COPYING</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>Copyright (C) 2006 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0</p></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sect1">
|
||||
<h2 id="_author">AUTHOR</h2>
|
||||
<div class="sectionbody">
|
||||
<div class="paragraph"><p>The Tesseract OCR engine was written by Ray Smith and his research groups
|
||||
at Hewlett Packard (1985-1995) and Google (2006-present).</p></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="footnotes"><hr /></div>
|
||||
<div id="footer">
|
||||
<div id="footer-text">
|
||||
Last updated 2012-02-07 13:43:35 PDT
|
||||
Last updated 2015-06-12 23:52:50 CEST
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
|
@ -3,11 +3,14 @@
|
||||
<?asciidoc-toc?>
|
||||
<?asciidoc-numbered?>
|
||||
<refentry lang="en">
|
||||
<refentryinfo>
|
||||
<title>WORDLIST2DAWG(1)</title>
|
||||
</refentryinfo>
|
||||
<refmeta>
|
||||
<refentrytitle>wordlist2dawg</refentrytitle>
|
||||
<manvolnum>1</manvolnum>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>wordlist2dawg</refname>
|
||||
@ -51,7 +54,7 @@ efficient representation of a word list.</simpara>
|
||||
<refsect1 id="_see_also">
|
||||
<title>SEE ALSO</title>
|
||||
<simpara>tesseract(1), combine_tessdata(1), dawg2wordlist(1)</simpara>
|
||||
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
|
||||
<simpara><ulink url="https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract">https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract</ulink></simpara>
|
||||
</refsect1>
|
||||
<refsect1 id="_copying">
|
||||
<title>COPYING</title>
|
||||
|
Loading…
Reference in New Issue
Block a user