mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-03 00:49:01 +08:00
47 lines
1.9 KiB
XML
47 lines
1.9 KiB
XML
|
<?xml version="1.0" encoding="UTF-8"?>
|
||
|
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
|
||
|
<?asciidoc-toc?>
|
||
|
<?asciidoc-numbered?>
|
||
|
<refentry lang="en">
|
||
|
<refmeta>
|
||
|
<refentrytitle>wordlist2dawg</refentrytitle>
|
||
|
<manvolnum>1</manvolnum>
|
||
|
<refmiscinfo class="source"> </refmiscinfo>
|
||
|
<refmiscinfo class="manual"> </refmiscinfo>
|
||
|
</refmeta>
|
||
|
<refnamediv>
|
||
|
<refname>wordlist2dawg</refname>
|
||
|
<refpurpose>convert a wordlist to a DAWG for Tesseract</refpurpose>
|
||
|
</refnamediv>
|
||
|
<refsynopsisdiv id="_synopsis">
|
||
|
<simpara><emphasis role="strong">wordlist2dawg</emphasis> <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
|
||
|
</refsynopsisdiv>
|
||
|
<refsect1 id="_description">
|
||
|
<title>DESCRIPTION</title>
|
||
|
<simpara>wordlist2dawg(1) converts a wordlist to a Directed Acyclic
|
||
|
Word Graph (DAWG) for use with Tesseract.</simpara>
|
||
|
<simpara>The wordlists are split into two: one with high frequency
|
||
|
words, and one with the rest.</simpara>
|
||
|
</refsect1>
|
||
|
<refsect1 id="_options">
|
||
|
<title>OPTIONS</title>
|
||
|
<simpara><emphasis>WORDLIST</emphasis>
|
||
|
A plain text file in UTF-8, one word per line</simpara>
|
||
|
<simpara><emphasis>DAWG</emphasis>
|
||
|
The output DAWG to write</simpara>
|
||
|
<simpara><emphasis>lang.unicharset</emphasis>
|
||
|
The unicharset of the language. This is the unicharset
|
||
|
generated by mftraining(1)</simpara>
|
||
|
</refsect1>
|
||
|
<refsect1 id="_see_also">
|
||
|
<title>SEE ALSO</title>
|
||
|
<simpara>tesseract(1), mftraining(1)</simpara>
|
||
|
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
|
||
|
</refsect1>
|
||
|
<refsect1 id="_copying">
|
||
|
<title>COPYING</title>
|
||
|
<simpara>Copyright (c) 2006 Google, Inc.
|
||
|
Licensed under the Apache License, Version 2.0</simpara>
|
||
|
</refsect1>
|
||
|
</refentry>
|