/********************************************************************** * File: tordmain.h (Formerly textordp.h) * Description: C++ top level textord code. * Author: Ray Smith * Created: Tue Jul 28 17:12:33 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/ #ifndef TORDMAIN_H #define TORDMAIN_H #include #include "varable.h" #include "ocrblock.h" #include "tessclas.h" #include "blobbox.h" #include "notdll.h" extern BOOL_VAR_H (textord_show_blobs, FALSE, "Display unsorted blobs"); extern BOOL_VAR_H (textord_new_initial_xheight, TRUE, "Use test xheight mechanism"); extern BOOL_VAR_H (textord_exit_after, FALSE, "Exit after completing textord"); extern INT_VAR_H (textord_max_noise_size, 7, "Pixel size of noise"); extern double_VAR_H (textord_blob_size_bigile, 95, "Percentile for large blobs"); extern double_VAR_H (textord_noise_area_ratio, 0.7, "Fraction of bounding box for noise"); extern double_VAR_H (textord_blob_size_smallile, 20, "Percentile for small blobs"); extern double_VAR_H (textord_initialx_ile, 0.75, "Ile of sizes for xheight guess"); extern double_VAR_H (textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess"); extern INT_VAR_H (textord_noise_sizefraction, 10, "Fraction of size for maxima"); extern double_VAR_H (textord_noise_sizelimit, 0.5, "Fraction of x for big t count"); extern INT_VAR_H (textord_noise_translimit, 16, "Transitions for normal blob"); extern double_VAR_H (textord_noise_normratio, 2.0, "Dot to norm ratio for deletion"); extern BOOL_VAR_H (textord_noise_rejwords, TRUE, "Reject noise-like words"); extern BOOL_VAR_H (textord_noise_rejrows, TRUE, "Reject noise-like rows"); extern double_VAR_H (textord_noise_syfract, 0.2, "xh fract error for norm blobs"); extern double_VAR_H (textord_noise_sxfract, 0.4, "xh fract width error for norm blobs"); extern INT_VAR_H (textord_noise_sncount, 1, "super norm blobs to save row"); extern double_VAR_H (textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion"); extern BOOL_VAR_H (textord_noise_debug, FALSE, "Debug row garbage detector"); extern double_VAR_H (textord_blshift_maxshift, 0.00, "Max baseline shift"); extern double_VAR_H (textord_blshift_xfraction, 9.99, "Min size of baseline shift"); //xiaofan extern STRING_EVAR_H (tessedit_image_ext, ".tif", "Externsion for image file"); extern clock_t previous_cpu; void make_blocks_from_blobs( //convert & textord TBLOB *tessblobs, //tess style input const char *filename, //blob file ICOORD page_tr, //top right BOOL8 do_shift, //shift tess coords BLOCK_LIST *blocks //block list ); void read_and_textord( //read .pb file const char *filename, //.pb file BLOCK_LIST *blocks); void edges_and_textord( //read .pb file const char *filename, //.pb file BLOCK_LIST *blocks); void assign_blobs_to_blocks( //split into groups PBLOB_LIST *blobs, //blobs to distribute BLOCK_LIST *blocks, //block list TO_BLOCK_LIST *land_blocks, //rotated for landscape TO_BLOCK_LIST *port_blocks //output list ); void assign_blobs_to_blocks2( //split into groups BLOCK_LIST *blocks, //blocks to process TO_BLOCK_LIST *land_blocks, //rotated for landscape TO_BLOCK_LIST *port_blocks //output list ); void filter_blobs( //split into groups ICOORD page_tr, //top right TO_BLOCK_LIST *blocks, //output list BOOL8 testing_on //for plotting ); float filter_noise_blobs( //separate noise BLOBNBOX_LIST *src_list, //origonal list BLOBNBOX_LIST *noise_list, //noise list BLOBNBOX_LIST *small_list, //small blobs BLOBNBOX_LIST *large_list //large blobs ); float filter_noise_blobs2( //separate noise BLOBNBOX_LIST *src_list, //origonal list BLOBNBOX_LIST *noise_list, //noise list BLOBNBOX_LIST *small_list, //small blobs BLOBNBOX_LIST *large_list //large blobs ); void textord_page( //make rows & words ICOORD page_tr, //top right BLOCK_LIST *blocks, //block list TO_BLOCK_LIST *land_blocks, //rotated for landscape TO_BLOCK_LIST *port_blocks //output list ); void cleanup_blocks( //remove empties BLOCK_LIST *blocks //list ); BOOL8 clean_noise_from_row( //remove empties ROW *row //row to clean ); void clean_noise_from_words( //remove empties ROW *row //row to clean ); void tweak_row_baseline( //remove empties ROW *row //row to clean ); inT32 blob_y_order( //sort function void *item1, //items to compare void *item2); #endif