mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-05 02:47:00 +08:00
425d593ebe
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk/trunk@2 d0cd1f9f-072b-0410-8dd7-cf729c803f20
73 lines
3.5 KiB
C
73 lines
3.5 KiB
C
/******************************************************************
|
|
* File: fixspace.h (Formerly fixspace.h)
|
|
* Description: Implements a pass over the page res, exploring the alternative
|
|
* spacing possibilities, trying to use context to improve the
|
|
word spacing
|
|
* Author: Phil Cheatle
|
|
* Created: Thu Oct 21 11:38:43 BST 1993
|
|
*
|
|
* (C) Copyright 1993, Hewlett-Packard Ltd.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
**********************************************************************/
|
|
|
|
#ifndef FIXSPACE_H
|
|
#define FIXSPACE_H
|
|
|
|
#include "pageres.h"
|
|
#include "varable.h"
|
|
#include "ocrclass.h"
|
|
#include "notdll.h"
|
|
|
|
extern BOOL_VAR_H (fixsp_check_for_fp_noise_space, TRUE,
|
|
"Try turning noise to space in fixed pitch");
|
|
extern BOOL_VAR_H (fixsp_fp_eval, TRUE, "Use alternate evaluation for fp");
|
|
extern BOOL_VAR_H (fixsp_noise_score_fixing, TRUE, "More sophisticated?");
|
|
extern INT_VAR_H (fixsp_non_noise_limit, 1,
|
|
"How many non-noise blbs either side?");
|
|
extern double_VAR_H (fixsp_small_outlines_size, 0.28,
|
|
"Small if lt xht x this");
|
|
extern BOOL_VAR_H (fixsp_ignore_punct, TRUE, "In uniform spacing calc");
|
|
extern BOOL_VAR_H (fixsp_numeric_fix, TRUE, "Try to deal with numeric punct");
|
|
extern BOOL_VAR_H (fixsp_prefer_joined_1s, TRUE, "Arbitrary boost");
|
|
extern BOOL_VAR_H (tessedit_test_uniform_wd_spacing, FALSE,
|
|
"Limit context word spacing");
|
|
extern BOOL_VAR_H (tessedit_prefer_joined_punct, FALSE,
|
|
"Reward punctation joins");
|
|
extern INT_VAR_H (fixsp_done_mode, 1, "What constitues done for spacing");
|
|
extern INT_VAR_H (debug_fix_space_level, 0, "Contextual fixspace debug");
|
|
extern STRING_VAR_H (numeric_punctuation, ".,",
|
|
"Punct. chs expected WITHIN numbers");
|
|
void fix_fuzzy_spaces( //find fuzzy words
|
|
volatile ETEXT_DESC *monitor, //progress monitor
|
|
INT32 word_count, //count of words in doc
|
|
PAGE_RES *page_res);
|
|
void fix_fuzzy_space_list( //space explorer
|
|
WERD_RES_LIST &best_perm,
|
|
ROW *row);
|
|
void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list);
|
|
void match_current_words(WERD_RES_LIST &words, ROW *row);
|
|
INT16 eval_word_spacing(WERD_RES_LIST &word_res_list);
|
|
BOOL8 digit_or_numeric_punct(WERD_RES *word, char ch);
|
|
void transform_to_next_perm(WERD_RES_LIST &words);
|
|
void dump_words(WERD_RES_LIST &perm, INT16 score, INT16 mode, BOOL8 improved);
|
|
BOOL8 uniformly_spaced( //sensible word
|
|
WERD_RES *word);
|
|
BOOL8 fixspace_thinks_word_done(WERD_RES *word);
|
|
void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row);
|
|
void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row);
|
|
void break_noisiest_blob_word(WERD_RES_LIST &words);
|
|
INT16 worst_noise_blob(WERD_RES *word_res, float *worst_noise_score);
|
|
float blob_noise_score(PBLOB *blob);
|
|
void fixspace_dbg(WERD_RES *word);
|
|
INT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list);
|
|
#endif
|