last one

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@483 d0cd1f9f-072b-0410-8dd7-cf729c803f20
2024-11-24 02:59:07 +08:00 · 2010-09-30 02:18:45 +00:00 · 2010-09-30 02:18:45 +00:00 · 5575d8db02
commit 5575d8db02
parent 0759ee7e17
2 changed files with 87 additions and 0 deletions
--- a/doc/wordlist2dawg.1.asc
+++ b/doc/wordlist2dawg.1.asc
@ -0,0 +1,41 @@
+WORDLIST2DAWG(1)
+================
+
+NAME
+----
+wordlist2dawg - convert a wordlist to a DAWG for Tesseract
+
+SYNOPSIS
+--------
+*wordlist2dawg* 'WORDLIST' 'DAWG' 'lang.unicharset'
+
+DESCRIPTION
+-----------
+wordlist2dawg(1) converts a wordlist to a Directed Acyclic
+Word Graph (DAWG) for use with Tesseract.
+
+The wordlists are split into two: one with high frequency
+words, and one with the rest. 
+
+OPTIONS
+-------
+'WORDLIST'
+	A plain text file in UTF-8, one word per line
+
+'DAWG'
+	The output DAWG to write
+
+'lang.unicharset'
+	The unicharset of the language. This is the unicharset
+	generated by mftraining(1)
+
+SEE ALSO
+--------
+tesseract(1), mftraining(1)
+
+<http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3>
+
+COPYING
+-------
+Copyright (c) 2006 Google, Inc.
+Licensed under the Apache License, Version 2.0
--- a/doc/wordlist2dawg.1.xml
+++ b/doc/wordlist2dawg.1.xml
@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?asciidoc-toc?>
+<?asciidoc-numbered?>
+<refentry lang="en">
+<refmeta>
+<refentrytitle>wordlist2dawg</refentrytitle>
+<manvolnum>1</manvolnum>
+<refmiscinfo class="source">&nbsp;</refmiscinfo>
+<refmiscinfo class="manual">&nbsp;</refmiscinfo>
+</refmeta>
+<refnamediv>
+    <refname>wordlist2dawg</refname>
+    <refpurpose>convert a wordlist to a DAWG for Tesseract</refpurpose>
+</refnamediv>
+<refsynopsisdiv id="_synopsis">
+<simpara><emphasis role="strong">wordlist2dawg</emphasis> <emphasis>WORDLIST</emphasis> <emphasis>DAWG</emphasis> <emphasis>lang.unicharset</emphasis></simpara>
+</refsynopsisdiv>
+<refsect1 id="_description">
+<title>DESCRIPTION</title>
+<simpara>wordlist2dawg(1) converts a wordlist to a Directed Acyclic
+Word Graph (DAWG) for use with Tesseract.</simpara>
+<simpara>The wordlists are split into two: one with high frequency
+words, and one with the rest.</simpara>
+</refsect1>
+<refsect1 id="_options">
+<title>OPTIONS</title>
+<simpara><emphasis>WORDLIST</emphasis>
+        A plain text file in UTF-8, one word per line</simpara>
+<simpara><emphasis>DAWG</emphasis>
+        The output DAWG to write</simpara>
+<simpara><emphasis>lang.unicharset</emphasis>
+        The unicharset of the language. This is the unicharset
+        generated by mftraining(1)</simpara>
+</refsect1>
+<refsect1 id="_see_also">
+<title>SEE ALSO</title>
+<simpara>tesseract(1), mftraining(1)</simpara>
+<simpara><ulink url="http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3">http://code.google.com/p/tesseract-ocr/wiki/TrainingTesseract3</ulink></simpara>
+</refsect1>
+<refsect1 id="_copying">
+<title>COPYING</title>
+<simpara>Copyright (c) 2006 Google, Inc.
+Licensed under the Apache License, Version 2.0</simpara>
+</refsect1>
+</refentry>