mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-20 12:50:55 +08:00
425d593ebe
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk/trunk@2 d0cd1f9f-072b-0410-8dd7-cf729c803f20
107 lines
3.6 KiB
C++
107 lines
3.6 KiB
C++
/******************************************************************************
|
|
** Filename: badwords.c
|
|
** Purpose: Routines to keep the bad words in sorted order.
|
|
** Author: Dan Johnson
|
|
** History: Thu Apr 25 08:40:19 1991, DSJ, Created.
|
|
**
|
|
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
******************************************************************************/
|
|
|
|
/**----------------------------------------------------------------------------
|
|
Include Files and Type Defines
|
|
----------------------------------------------------------------------------**/
|
|
#include "general.h"
|
|
#include "oldheap.h"
|
|
#include "callcpp.h"
|
|
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#ifdef __UNIX__
|
|
#include <assert.h>
|
|
#endif
|
|
|
|
#define MAX_NUM_BAD_WERDS 1000
|
|
|
|
/**----------------------------------------------------------------------------
|
|
Global Data Definitions and Declarations
|
|
----------------------------------------------------------------------------**/
|
|
static HEAP *BadWords = NULL;
|
|
BOOL_VAR (tessedit_save_stats, FALSE, "Save final recognition statistics");
|
|
|
|
/**----------------------------------------------------------------------------
|
|
Public Code
|
|
----------------------------------------------------------------------------**/
|
|
/*---------------------------------------------------------------------------*/
|
|
void PrintBadWords(FILE *File) {
|
|
/*
|
|
** Parameters:
|
|
** File open text file to print bad words to
|
|
** Globals:
|
|
** BadWords heap that bad words are stored in
|
|
** Operation: This routine prints the bad words stored in BadWords
|
|
** to file ordered by certainty (worst certainty first).
|
|
** Return: none
|
|
** Exceptions: none
|
|
** History: Thu Apr 25 08:57:08 1991, DSJ, Created.
|
|
*/
|
|
HEAPENTRY NextWord;
|
|
|
|
if (BadWords == NULL)
|
|
return;
|
|
|
|
fprintf (File, "\n");
|
|
fprintf (File, "Bad Word Certainty\n");
|
|
fprintf (File, "---------------- ---------\n");
|
|
while (GetTopOfHeap (BadWords, &NextWord) != EMPTY) {
|
|
fprintf (File, "%16s %6.2f\n", (const char *) NextWord.Data,
|
|
NextWord.Key);
|
|
c_free_string ((char *) NextWord.Data);
|
|
}
|
|
fprintf (File, "\n");
|
|
|
|
} /* PrintBadWords */
|
|
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
void SaveBadWord(const char *Word, FLOAT32 Certainty) {
|
|
/*
|
|
** Parameters:
|
|
** Word bad word to be saved
|
|
** Certainty certainty of word
|
|
** Globals:
|
|
** BadWords heap to keep bad words in
|
|
** Operation: This routine saves all words flagged as bad in a heap
|
|
** with the worst word on the top of the heap. The contents
|
|
** of this heap can be printed to a file by calling
|
|
** PrintBadWords (File).
|
|
** Return: none
|
|
** Exceptions: none
|
|
** History: Thu Apr 25 08:41:00 1991, DSJ, Created.
|
|
*/
|
|
HEAPENTRY NewWord;
|
|
|
|
assert (Word != NULL);
|
|
|
|
if (BadWords == NULL) {
|
|
BadWords = MakeHeap (MAX_NUM_BAD_WERDS);
|
|
InitHeap(BadWords);
|
|
} else if (HeapFull(BadWords)) {
|
|
return;
|
|
}
|
|
|
|
NewWord.Key = Certainty;
|
|
NewWord.Data = c_alloc_string (strlen (Word) + 1);
|
|
strcpy ((char *) NewWord.Data, Word);
|
|
HeapStore(BadWords, &NewWord);
|
|
|
|
} /* SaveBadWord */
|