mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-28 11:08:15 +08:00
570af48b8b
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@87 d0cd1f9f-072b-0410-8dd7-cf729c803f20
85 lines
3.1 KiB
C++
85 lines
3.1 KiB
C++
/* -*-C-*-
|
|
********************************************************************************
|
|
*
|
|
* File: hyphen.c (Formerly hyphen.c)
|
|
* Description:
|
|
* Author: Mark Seaman, OCR Technology
|
|
* Created: Fri Oct 16 14:37:00 1987
|
|
* Modified: Thu Mar 14 11:09:43 1991 (Mark Seaman) marks@hpgrlt
|
|
* Language: C
|
|
* Package: N/A
|
|
* Status: Reusable Software Component
|
|
*
|
|
* (c) Copyright 1987, Hewlett-Packard Company.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
*********************************************************************************/
|
|
/*----------------------------------------------------------------------
|
|
I n c l u d e s
|
|
----------------------------------------------------------------------*/
|
|
#include "const.h"
|
|
#include "hyphen.h"
|
|
#include "tordvars.h"
|
|
#include "callcpp.h"
|
|
#include <math.h>
|
|
|
|
/*----------------------------------------------------------------------
|
|
V a r i a b l e s
|
|
----------------------------------------------------------------------*/
|
|
int last_word_on_line = 0;
|
|
char *hyphen_string = 0;
|
|
char *hyphen_unichar_lengths = 0;
|
|
int *hyphen_unichar_offsets = NULL;
|
|
float hyphen_rating = MAXFLOAT;
|
|
int hyphen_state = 0;
|
|
|
|
/*----------------------------------------------------------------------
|
|
F u n c t i o n s
|
|
---------------------------------------------------------------------*/
|
|
/**********************************************************************
|
|
* set_hyphen_word
|
|
*
|
|
* If this hyphenated word choice is better than the last one then add
|
|
* it as the new word choice. This string can be used on the next
|
|
* line to permute the other half of the word.
|
|
**********************************************************************/
|
|
void set_hyphen_word(char *word, char *unichar_lengths, int *unichar_offsets,
|
|
float rating, int state) {
|
|
int char_index = strlen (unichar_lengths) - 1;
|
|
|
|
if (display_ratings)
|
|
cprintf ("set hyphen word = %s\n", word);
|
|
|
|
if (hyphen_rating > rating && char_index > 0) {
|
|
word[unichar_offsets[char_index]] = '\0';
|
|
unichar_lengths[char_index] = 0;
|
|
|
|
if (hyphen_string)
|
|
{
|
|
strfree(hyphen_string);
|
|
strfree(hyphen_unichar_lengths);
|
|
Efree(hyphen_unichar_offsets);
|
|
}
|
|
hyphen_string = strsave (word);
|
|
hyphen_unichar_lengths = strsave (unichar_lengths);
|
|
hyphen_unichar_offsets = (int *)
|
|
Emalloc((strlen(unichar_lengths)) * sizeof (int));
|
|
memcpy(hyphen_unichar_offsets, unichar_offsets,
|
|
(strlen(unichar_lengths)) * sizeof (int));
|
|
|
|
hyphen_state = state;
|
|
hyphen_rating = rating;
|
|
|
|
word[unichar_offsets[char_index]] = '-';
|
|
unichar_lengths[char_index] = 1;
|
|
}
|
|
}
|