mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 02:59:07 +08:00
last one
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@483 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
0759ee7e17
commit
5575d8db02
41
doc/wordlist2dawg.1.asc
Normal file
41
doc/wordlist2dawg.1.asc
Normal file
@ -0,0 +1,41 @@
|
||||
WORDLIST2DAWG(1)
|
||||
================
|
||||
|
||||
NAME
|
||||
----
|
||||
wordlist2dawg - convert a wordlist to a DAWG for Tesseract
|
||||
|
||||
SYNOPSIS
|
||||
--------
|
||||
*wordlist2dawg* 'WORDLIST' 'DAWG' 'lang.unicharset'
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
wordlist2dawg(1) converts a wordlist to a Directed Acyclic
|
||||
Word Graph (DAWG) for use with Tesseract.
|
||||
|
||||
The wordlists are split into two: one with high frequency
|
||||
words, and one with the rest.
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
'WORDLIST'
|
||||
A plain text file in UTF-8, one word per line
|
||||
|
||||
'DAWG'
|
||||
The output DAWG to write
|
||||
|
||||
'lang.unicharset'
|
||||
The unicharset of the language. This is the unicharset
|
||||
generated by mftraining(1)
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
tesseract(1), mftraining(1)
|
||||
|
||||
<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
|
||||
|
||||
COPYING
|
||||
-------
|
||||
Copyright (c) 2006 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0
|
46
doc/wordlist2dawg.1.xml
Normal file
46
doc/wordlist2dawg.1.xml
Normal file
@ -0,0 +1,46 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
|
||||
<?asciidoc-toc?>
|
||||
<?asciidoc-numbered?>
|
||||
<refentry lang="en">
|
||||
<refmeta>
|
||||
<refentrytitle>wordlist2dawg</refentrytitle>
|
||||
<manvolnum>1</manvolnum>
|
||||
<refmiscinfo class="source"> </refmiscinfo>
|
||||
<refmiscinfo class="manual"> </refmiscinfo>
|
||||
</refmeta>
|
||||
<refnamediv>
|
||||
<refname>wordlist2dawg</refname>
|
||||
<refpurpose>convert a wordlist to a DAWG for Tesseract</refpurpose>
|
||||
</refnamediv>
|
||||
<refsynopsisdiv id="_synopsis">
|
||||
<simpara><emphasis role="strong">wordlist2dawg</emphasis> <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
|
||||
</refsynopsisdiv>
|
||||
<refsect1 id="_description">
|
||||
<title>DESCRIPTION</title>
|
||||
<simpara>wordlist2dawg(1) converts a wordlist to a Directed Acyclic
|
||||
Word Graph (DAWG) for use with Tesseract.</simpara>
|
||||
<simpara>The wordlists are split into two: one with high frequency
|
||||
words, and one with the rest.</simpara>
|
||||
</refsect1>
|
||||
<refsect1 id="_options">
|
||||
<title>OPTIONS</title>
|
||||
<simpara><emphasis>WORDLIST</emphasis>
|
||||
A plain text file in UTF-8, one word per line</simpara>
|
||||
<simpara><emphasis>DAWG</emphasis>
|
||||
The output DAWG to write</simpara>
|
||||
<simpara><emphasis>lang.unicharset</emphasis>
|
||||
The unicharset of the language. This is the unicharset
|
||||
generated by mftraining(1)</simpara>
|
||||
</refsect1>
|
||||
<refsect1 id="_see_also">
|
||||
<title>SEE ALSO</title>
|
||||
<simpara>tesseract(1), mftraining(1)</simpara>
|
||||
<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
|
||||
</refsect1>
|
||||
<refsect1 id="_copying">
|
||||
<title>COPYING</title>
|
||||
<simpara>Copyright (c) 2006 Google, Inc.
|
||||
Licensed under the Apache License, Version 2.0</simpara>
|
||||
</refsect1>
|
||||
</refentry>
|
Loading…
Reference in New Issue
Block a user