diff --git a/ccmain/Makefile.am b/ccmain/Makefile.am index c2a79ed44..af6cc8d23 100644 --- a/ccmain/Makefile.am +++ b/ccmain/Makefile.am @@ -12,25 +12,25 @@ AM_CPPFLAGS = \ EXTRA_DIST = tessembedded.cpp include_HEADERS = \ - charcut.h control.h cube_reco_context.h \ + control.h cube_reco_context.h \ docqual.h fixspace.h \ imgscale.h osdetect.h output.h \ paramsd.h pgedit.h reject.h scaleimg.h \ tessbox.h tessedit.h tessembedded.h tesseractclass.h \ tesseract_cube_combiner.h \ - tessvars.h tfacep.h tfacepp.h thresholder.h tstruct.h \ + tessvars.h tfacep.h tfacepp.h thresholder.h \ werdit.h lib_LTLIBRARIES = libtesseract_main.la libtesseract_main_la_SOURCES = \ adaptions.cpp applybox.cpp \ - charcut.cpp control.cpp cube_control.cpp cube_reco_context.cpp \ + control.cpp cube_control.cpp cube_reco_context.cpp \ docqual.cpp fixspace.cpp fixxht.cpp \ imgscale.cpp osdetect.cpp output.cpp pagesegmain.cpp \ pagewalk.cpp paramsd.cpp pgedit.cpp reject.cpp scaleimg.cpp \ recogtraining.cpp tesseract_cube_combiner.cpp \ tessbox.cpp tessedit.cpp tesseractclass.cpp tessvars.cpp \ - tfacepp.cpp thresholder.cpp tstruct.cpp \ + tfacepp.cpp thresholder.cpp \ werdit.cpp libtesseract_main_la_LIBADD = \ ../wordrec/libtesseract_wordrec.la diff --git a/ccmain/adaptions.cpp b/ccmain/adaptions.cpp index 0c0d13975..5db337f07 100644 --- a/ccmain/adaptions.cpp +++ b/ccmain/adaptions.cpp @@ -33,7 +33,6 @@ #include "tessbox.h" #include "tessvars.h" #include "memry.h" -#include "charcut.h" #include "imgs.h" #include "scaleimg.h" #include "reject.h" diff --git a/ccmain/charcut.cpp b/ccmain/charcut.cpp deleted file mode 100644 index bba782210..000000000 --- a/ccmain/charcut.cpp +++ /dev/null @@ -1,693 +0,0 @@ -/********************************************************************** - * File: charcut.cpp (Formerly charclip.c) - * Description: Code for character clipping - * Author: Phil Cheatle - * Created: Wed Nov 11 08:35:15 GMT 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "mfcpch.h" -#include "charcut.h" -#include "imgs.h" -#include "scrollview.h" -#include "svshowim.h" -#include "notdll.h" -#include "helpers.h" - -// Include automatically generated configuration file if running autoconf. -#ifdef HAVE_CONFIG_H -#include "config_auto.h" -#endif - -#define LARGEST(a,b) ( (a) > (b) ? (a) : (b) ) -#define SMALLEST(a,b) ( (a) > (b) ? (b) : (a) ) -#define BUG_OFFSET 1 -#define EXTERN - -ELISTIZE (PIXROW) -/************************************************************************* - * PIXROW::PIXROW() - * - * Constructor for a specified size PIXROW from a blob - *************************************************************************/ -PIXROW::PIXROW(inT16 pos, inT16 count, PBLOB *blob) { - OUTLINE_LIST *outline_list; - OUTLINE_IT outline_it; - POLYPT_LIST *pts_list; - POLYPT_IT pts_it; - inT16 i; - FCOORD pt; - FCOORD vec; - float y_coord; - inT16 x_coord; - - row_offset = pos; - row_count = count; - min = (inT16 *) alloc_mem(count * sizeof(inT16)); - max = (inT16 *) alloc_mem(count * sizeof(inT16)); - outline_list = blob->out_list (); - outline_it.set_to_list (outline_list); - - for (i = 0; i < count; i++) { - min[i] = MAX_INT16 - 1; - max[i] = -MAX_INT16 + 1; - y_coord = row_offset + i + 0.5; - for (outline_it.mark_cycle_pt(); - !outline_it.cycled_list(); outline_it.forward()) { - pts_list = outline_it.data()->polypts(); - pts_it.set_to_list(pts_list); - for (pts_it.mark_cycle_pt(); !pts_it.cycled_list(); pts_it.forward()) { - pt = pts_it.data()->pos; - vec = pts_it.data()->vec; - if ((vec.y() != 0) && - (((pt.y() <= y_coord) && (pt.y() + vec.y() >= y_coord)) - || ((pt.y() >= y_coord) && (pt.y() + vec.y() <= y_coord)))) { - /* The segment crosses y_coord so find x-point and check for min/max. */ - x_coord = (inT16) floor((y_coord - pt.y()) * vec.x() / vec.y() + - pt.x() + 0.5); - // x_coord - 1 to get pix to left of line - UpdateRange(x_coord, x_coord - 1, &min[i], &max[i]); - } - } - } - } -} - - -/************************************************************************* - * PIXROW::plot() - * - * Draw the PIXROW - *************************************************************************/ - -#ifndef GRAPHICS_DISABLED -void PIXROW::plot(ScrollView* fd //where to paint - ) const { - inT16 i; - inT16 y_coord; - - for (i = 0; i < row_count; i++) { - y_coord = row_offset + i; - if (min[i] <= max[i]) { - fd->Rectangle(min[i], y_coord, max[i] + 1, y_coord + 1); - } - } -} -#endif - -/************************************************************************* - * PIXROW::bounding_box() - * - * Generate bounding box for blob image - *************************************************************************/ - -bool PIXROW::bad_box( //return true if box exceeds image - int xsize, - int ysize) const { - TBOX bbox = bounding_box (); - if (bbox.left () < 0 || bbox.right () > xsize - || bbox.top () > ysize || bbox.bottom () < 0) { - tprintf("Box (%d,%d)->(%d,%d) bad compared to %d,%d\n", - bbox.left(),bbox.bottom(), bbox.right(), bbox.top(), - xsize, ysize); - return true; - } - return false; -} - - -/************************************************************************* - * PIXROW::bounding_box() - * - * Generate bounding box for blob image - *************************************************************************/ - -TBOX PIXROW::bounding_box() const { - inT16 i; - inT16 y_coord; - inT16 min_x = MAX_INT16 - 1; - inT16 min_y = MAX_INT16 - 1; - inT16 max_x = -MAX_INT16 + 1; - inT16 max_y = -MAX_INT16 + 1; - - for (i = 0; i < row_count; i++) { - y_coord = row_offset + i; - if (min[i] <= max[i]) { - UpdateRange(y_coord, y_coord + 1, &min_y, &max_y); - UpdateRange(min[i], max[i] + 1, &min_x, &max_x); - } - } - if (min_x > max_x || min_y > max_y) - return TBOX(); - else - return TBOX(ICOORD(min_x, min_y), ICOORD(max_x, max_y)); -} - - -/************************************************************************* - * PIXROW::contract() - * - * Reduce the mins and maxs so that they end on black pixels - *************************************************************************/ - -void PIXROW::contract( //image array - IMAGELINE *imlines, - inT16 x_offset, //of pixels[0] - inT16 foreground_colour //0 or 1 - ) { - inT16 i; - uinT8 *line_pixels; - - for (i = 0; i < row_count; i++) { - if (min[i] > max[i]) - continue; - - line_pixels = imlines[i].pixels; - while (line_pixels[min[i] - x_offset] != foreground_colour) { - if (min[i] == max[i]) { - min[i] = MAX_INT16 - 1; - max[i] = -MAX_INT16 + 1; - goto nextline; - } - else - min[i]++; - } - while (line_pixels[max[i] - x_offset] != foreground_colour) { - if (min[i] == max[i]) { - min[i] = MAX_INT16 - 1; - max[i] = -MAX_INT16 + 1; - goto nextline; - } - else - max[i]--; - } - nextline:; - //goto label! - } -} - - -/************************************************************************* - * PIXROW::extend() - * - * 1 pixel extension in each direction to cover extra black area - *************************************************************************/ - -BOOL8 PIXROW::extend( //image array - IMAGELINE *imlines, - TBOX &imbox, - PIXROW *prev, //for prev blob - PIXROW *next, //for next blob - inT16 foreground_colour) { - inT16 i; - inT16 x_offset = imbox.left (); - inT16 limit; - inT16 left_limit; - inT16 right_limit; - uinT8 *pixels = NULL; - uinT8 *pixels_below = NULL; //row below current - uinT8 *pixels_above = NULL; //row above current - BOOL8 changed = FALSE; - - pixels_above = imlines[0].pixels; - for (i = 0; i < row_count; i++) { - pixels_below = pixels; - pixels = pixels_above; - if (i < (row_count - 1)) - pixels_above = imlines[i + 1].pixels; - else - pixels_above = NULL; - - /* Extend Left by one pixel*/ - if (prev == NULL || prev->max[i] < prev->min[i]) - limit = imbox.left (); - else - limit = prev->max[i] + 1; - if ((min[i] <= max[i]) && - (min[i] > limit) && - (pixels[min[i] - 1 - x_offset] == foreground_colour)) { - min[i]--; - changed = TRUE; - } - - /* Extend Right by one pixel*/ - if (next == NULL || next->min[i] > next->max[i]) - limit = imbox.right () - 1;//-1 to index inside pix - else - limit = next->min[i] - 1; - if ((min[i] <= max[i]) && - (max[i] < limit) && - (pixels[max[i] + 1 - x_offset] == foreground_colour)) { - max[i]++; - changed = TRUE; - } - - /* Extend down by one row */ - if (pixels_below != NULL) { - if (min[i] < min[i - 1]) { //row goes left of row below - if (prev == NULL || prev->max[i - 1] < prev->min[i - 1]) - left_limit = min[i]; - else - left_limit = LARGEST (min[i], prev->max[i - 1] + 1); - } - else - left_limit = min[i - 1]; - - if (max[i] > max[i - 1]) { //row goes right of row below - if (next == NULL || next->min[i - 1] > next->max[i - 1]) - right_limit = max[i]; - else - right_limit = SMALLEST (max[i], next->min[i - 1] - 1); - } - else - right_limit = max[i - 1]; - - while ((left_limit <= right_limit) && - (pixels_below[left_limit - x_offset] != foreground_colour)) - left_limit++; //find black extremity - - if ((left_limit <= right_limit) && (left_limit < min[i - 1])) { - min[i - 1] = left_limit; //widen left if poss - changed = TRUE; - } - - while ((left_limit <= right_limit) && - (pixels_below[right_limit - x_offset] != foreground_colour)) - right_limit--; //find black extremity - - if ((left_limit <= right_limit) && (right_limit > max[i - 1])) { - max[i - 1] = right_limit;//widen right if poss - changed = TRUE; - } - } - - /* Extend up by one row */ - if (pixels_above != NULL) { - if (min[i] < min[i + 1]) { //row goes left of row above - if (prev == NULL || prev->min[i + 1] > prev->max[i + 1]) - left_limit = min[i]; - else - left_limit = LARGEST (min[i], prev->max[i + 1] + 1); - } - else - left_limit = min[i + 1]; - - if (max[i] > max[i + 1]) { //row goes right of row above - if (next == NULL || next->min[i + 1] > next->max[i + 1]) - right_limit = max[i]; - else - right_limit = SMALLEST (max[i], next->min[i + 1] - 1); - } - else - right_limit = max[i + 1]; - - while ((left_limit <= right_limit) && - (pixels_above[left_limit - x_offset] != foreground_colour)) - left_limit++; //find black extremity - - if ((left_limit <= right_limit) && (left_limit < min[i + 1])) { - min[i + 1] = left_limit; //widen left if poss - changed = TRUE; - } - - while ((left_limit <= right_limit) && - (pixels_above[right_limit - x_offset] != foreground_colour)) - right_limit--; //find black extremity - - if ((left_limit <= right_limit) && (right_limit > max[i + 1])) { - max[i + 1] = right_limit;//widen right if poss - changed = TRUE; - } - } - } - return changed; -} - - -/************************************************************************* - * PIXROW::char_clip_image() - * Cut out a sub image for a character - *************************************************************************/ - -void PIXROW::char_clip_image( //box of imlines extnt - IMAGELINE *imlines, - TBOX &im_box, - ROW *row, //row containing word - IMAGE &clip_image, //unscaled sq subimage - float &baseline_pos //baseline ht in image - ) { - inT16 clip_image_xsize; //sub image x size - inT16 clip_image_ysize; //sub image y size - inT16 x_shift; //from pixrow to subim - inT16 y_shift; //from pixrow to subim - TBOX char_pix_box; //bbox of char pixels - inT16 y_dest; - inT16 x_min; - inT16 x_max; - inT16 x_min_dest; - inT16 x_max_dest; - inT16 x_width; - inT16 y; - - clip_image_xsize = clip_image.get_xsize (); - clip_image_ysize = clip_image.get_ysize (); - - char_pix_box = bounding_box (); - /* - The y shift is calculated by first finding the coord of the bottom of the - image relative to the image lines. Then reducing this so by the amount - relative to the clip image size, necessary to vertically position the - character. - */ - y_shift = char_pix_box.bottom () - row_offset - - (inT16) floor ((clip_image_ysize - char_pix_box.height () + 0.5) / 2); - - /* - The x_shift is the shift to be applied to the page coord in the pixrow to - generate a centred char in the clip image. Thus the left hand edge of the - char is shifted to the margin width of the centred character. - */ - x_shift = char_pix_box.left () - - (inT16) floor ((clip_image_xsize - char_pix_box.width () + 0.5) / 2); - - for (y = 0; y < row_count; y++) { - /* - Check that there is something in this row of the source that will fit in the - sub image. If there is, reduce x range if necessary, then copy it - */ - y_dest = y - y_shift; - if ((min[y] <= max[y]) && (y_dest >= 0) && (y_dest < clip_image_ysize)) { - x_min = min[y]; - x_min_dest = x_min - x_shift; - if (x_min_dest < 0) { - x_min = x_min - x_min_dest; - x_min_dest = 0; - } - x_max = max[y]; - x_max_dest = x_max - x_shift; - if (x_max_dest > clip_image_xsize - 1) { - x_max = x_max - (x_max_dest - (clip_image_xsize - 1)); - x_max_dest = clip_image_xsize - 1; - } - x_width = x_max - x_min + 1; - if (x_width > 0) { - x_min -= im_box.left (); - //offset pixel ptr - imlines[y].pixels += x_min; - clip_image.put_line (x_min_dest, y_dest, x_width, imlines + y, - 0); - imlines[y].init (); //reset pixel ptr - } - } - } - /* - Baseline position relative to clip image: First find the baseline relative - to the page origin at the x coord of the centre of the character. Then - make this relative to the character bottom. Finally shift by the margin - between the bottom of the character and the bottom of the clip image. - */ - if (row == NULL) - baseline_pos = 0; //Not needed - else - baseline_pos = row->base_line ((char_pix_box.left () + - char_pix_box.right ()) / 2.0) - - char_pix_box.bottom () - + ((clip_image_ysize - char_pix_box.height ()) / 2); -} - - -/************************************************************************* - * char_clip_word() - * - * Generate a PIXROW_LIST with one element for each blob in the word, together - * with the image lines for the whole word. - *************************************************************************/ - -void char_clip_word( // - WERD *word, //word to be processed - IMAGE &bin_image, //whole image - PIXROW_LIST *&pixrow_list, //pixrows built - IMAGELINE *&imlines, //lines cut from image - TBOX &pix_box //box defining imlines - ) { - TBOX word_box = word->bounding_box (); - PBLOB_LIST *blob_list; - PBLOB_IT blob_it; - PIXROW_IT pixrow_it; - inT16 pix_offset; //Y pos of pixrow[0] - inT16 row_height; //No of pix rows - inT16 imlines_x_offset; - PIXROW *prev; - PIXROW *next; - PIXROW *current; - BOOL8 changed; //still improving - BOOL8 just_changed; //still improving - inT16 iteration_count = 0; - inT16 foreground_colour; - - if (word->flag (W_INVERSE)) - foreground_colour = 1; - else - foreground_colour = 0; - - /* Define region for max pixrow expansion */ - pix_box = word_box; - pix_box.move_bottom_edge (-kPixWordMargin); - pix_box.move_top_edge (kPixWordMargin); - pix_box.move_left_edge (-kPixWordMargin); - pix_box.move_right_edge (kPixWordMargin); - pix_box -= TBOX (ICOORD (0, 0 + BUG_OFFSET), - ICOORD (bin_image.get_xsize (), - bin_image.get_ysize () - BUG_OFFSET)); - - /* Generate pixrows list */ - - pix_offset = pix_box.bottom (); - row_height = pix_box.height (); - blob_list = word->blob_list (); - blob_it.set_to_list (blob_list); - - pixrow_list = new PIXROW_LIST; - pixrow_it.set_to_list (pixrow_list); - - for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { - PIXROW *row = new PIXROW (pix_offset, row_height, blob_it.data ()); - ASSERT_HOST (!row-> - bad_box (bin_image.get_xsize (), bin_image.get_ysize ())); - pixrow_it.add_after_then_move (row); - } - - imlines = generate_imlines (bin_image, pix_box); - - /* Contract pixrows - shrink min and max back to black pixels */ - - imlines_x_offset = pix_box.left (); - - pixrow_it.move_to_first (); - for (pixrow_it.mark_cycle_pt (); - !pixrow_it.cycled_list (); pixrow_it.forward ()) { - ASSERT_HOST (!pixrow_it.data ()-> - bad_box (bin_image.get_xsize (), bin_image.get_ysize ())); - pixrow_it.data ()->contract (imlines, imlines_x_offset, - foreground_colour); - ASSERT_HOST (!pixrow_it.data ()-> - bad_box (bin_image.get_xsize (), bin_image.get_ysize ())); - } - - /* Expand pixrows iteratively 1 pixel at a time */ - do { - changed = FALSE; - pixrow_it.move_to_first (); - prev = NULL; - current = NULL; - next = pixrow_it.data (); - for (pixrow_it.mark_cycle_pt (); - !pixrow_it.cycled_list (); pixrow_it.forward ()) { - prev = current; - current = next; - if (pixrow_it.at_last ()) - next = NULL; - else - next = pixrow_it.data_relative (1); - just_changed = current->extend (imlines, pix_box, prev, next, - foreground_colour); - ASSERT_HOST (!current-> - bad_box (bin_image.get_xsize (), - bin_image.get_ysize ())); - changed = changed || just_changed; - } - iteration_count++; - } - while (changed); -} - - -/************************************************************************* - * generate_imlines() - * Get an array of IMAGELINES holding a portion of an image - *************************************************************************/ - -IMAGELINE *generate_imlines( //get some imagelines - IMAGE &bin_image, //from here - TBOX &pix_box) { - IMAGELINE *imlines; //array of lines - int i; - - imlines = new IMAGELINE[pix_box.height ()]; - for (i = 0; i < pix_box.height (); i++) { - imlines[i].init (pix_box.width ()); - //coord to start at - bin_image.fast_get_line (pix_box.left (), - pix_box.bottom () + i + BUG_OFFSET, - //line to get - pix_box.width (), //width to get - imlines + i); //dest imline - } - return imlines; -} - - -/************************************************************************* - * display_clip_image() - * All the boring user interface bits to let you see what's going on - *************************************************************************/ - -#ifndef GRAPHICS_DISABLED -ScrollView* display_clip_image(WERD *word, //word to be processed - IMAGE &bin_image, //whole image - PIXROW_LIST *pixrow_list, //pixrows built - TBOX &pix_box //box of subimage - ) { - ScrollView* clip_window; //window for debug - TBOX word_box = word->bounding_box (); - int border = word_box.height () / 2; - TBOX display_box = word_box; - - display_box.move_bottom_edge (-border); - display_box.move_top_edge (border); - display_box.move_left_edge (-border); - display_box.move_right_edge (border); - display_box -= TBOX (ICOORD (0, 0 - BUG_OFFSET), - ICOORD (bin_image.get_xsize (), - bin_image.get_ysize () - BUG_OFFSET)); - - pgeditor_msg ("Creating Clip window..."); - clip_window = new ScrollView("Clipped Blobs", - editor_word_xpos, editor_word_ypos, - 3 * (word_box.width () + 2 * border), - 3 * (word_box.height () + 2 * border), - display_box.left () + display_box.right (), - display_box.bottom () - BUG_OFFSET + - display_box.top () - BUG_OFFSET, - true); - // ymin, ymax - pgeditor_msg ("Creating Clip window...Done"); - - clip_window->Clear(); - sv_show_sub_image (&bin_image, - display_box.left (), - display_box.bottom (), - display_box.width (), - display_box.height (), - clip_window, - display_box.left (), display_box.bottom () - BUG_OFFSET); - - word->plot (clip_window, ScrollView::RED); - word_box.plot (clip_window, ScrollView::BLUE, ScrollView::BLUE); - pix_box.plot (clip_window, ScrollView::BLUE, ScrollView::BLUE); - plot_pixrows(pixrow_list, clip_window); - return clip_window; -} - - -/************************************************************************* - * display_images() - * Show a pair of clip and scaled character images and wait for key before - * continuing. - *************************************************************************/ - -void display_images(IMAGE &clip_image, IMAGE &scaled_image) { - ScrollView* clip_im_window; //window for debug - ScrollView* scale_im_window; //window for debug - inT16 i; - - // xmin xmax ymin ymax - clip_im_window = new ScrollView ("Clipped Blob", editor_word_xpos - 20, - editor_word_ypos - 100, 5 * clip_image.get_xsize (), - 5 * clip_image.get_ysize (), clip_image.get_xsize (), - clip_image.get_ysize (), true); - - sv_show_sub_image (&clip_image, - 0, 0, - clip_image.get_xsize (), clip_image.get_ysize (), - clip_im_window, 0, 0); - - clip_im_window->Pen(255,0,0); - for (i = 1; i < clip_image.get_xsize (); i++) { - clip_im_window->SetCursor(i,0); - clip_im_window->DrawTo(i, clip_image.get_xsize ()); - } - for (i = 1; i < clip_image.get_ysize (); i++) { - clip_im_window->SetCursor(0,i); - clip_im_window->DrawTo(clip_image.get_xsize (),i); - - } - - // xmin xmax ymin ymax - scale_im_window = new ScrollView ("Scaled Blob", editor_word_xpos + 300, - editor_word_ypos - 100, 5 * scaled_image.get_xsize (), - 5 * scaled_image.get_ysize (), scaled_image.get_xsize (), - scaled_image.get_ysize (), true); - - sv_show_sub_image (&scaled_image, - 0, 0, - scaled_image.get_xsize (), scaled_image.get_ysize (), - scale_im_window, 0, 0); - - scale_im_window->Pen(255,0,0); - for (i = 1; i < scaled_image.get_xsize (); i++) { - scale_im_window->SetCursor(i,0); - scale_im_window->DrawTo(i, scaled_image.get_xsize ()); - } - for (i = 1; i < scaled_image.get_ysize (); i++) { - scale_im_window->SetCursor(0,i); - scale_im_window->DrawTo(scaled_image.get_xsize (),i); - } - - ScrollView::Update(); -} - - -/************************************************************************* - * plot_pixrows() - * Display a list of pixrows - *************************************************************************/ - -void plot_pixrows( //plot for all blobs - PIXROW_LIST *pixrow_list, - ScrollView* win) { - PIXROW_IT pixrow_it(pixrow_list); - inT16 colour = ScrollView::RED; - - for (pixrow_it.mark_cycle_pt (); - !pixrow_it.cycled_list (); pixrow_it.forward ()) { - if (colour > ScrollView::RED + 7) - colour = ScrollView::RED; - - win->Pen((ScrollView::Color) colour); - pixrow_it.data ()->plot (win); - colour++; - } -} -#endif diff --git a/ccmain/charcut.h b/ccmain/charcut.h deleted file mode 100644 index 838ce8692..000000000 --- a/ccmain/charcut.h +++ /dev/null @@ -1,146 +0,0 @@ -/********************************************************************** - * File: charcut.h (Formerly charclip.h) - * Description: Code for character clipping - * Author: Phil Cheatle - * Created: Wed Nov 11 08:35:15 GMT 1992 - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef CHARCUT_H -#define CHARCUT_H - -#include "pgedit.h" -#include "notdll.h" -#include "notdll.h" -class ScrollView; - -/** - * @class PIXROW - * - * This class describes the pixels occupied by a blob. It uses two arrays, (min - * and max), each with one element per row, to identify the min and max x - * coordinates of the black pixels in the character on that row of the image. - * The number of rows used to describe the blob is held in row_count - note that - * some rows may be unoccupied - signified by max < min. The page coordinate of - * the row defined by min[0] and max[0] is held in row_offset. - */ - -const int kPixWordMargin = 3; // how far outside word BB to grow - -class PIXROW:public ELIST_LINK -{ - public: - inT16 row_offset; ///< y coord of min[0] - inT16 row_count; ///< length of arrays - inT16 *min; ///< array of min x - inT16 *max; ///< array of max x - /** empty constructor */ - PIXROW() { - row_offset = 0; - row_count = 0; - min = NULL; - max = NULL; - } - /** specified size */ - PIXROW( - inT16 pos, - inT16 count, - PBLOB *blob); - /** destructor */ - ~PIXROW () { - if (min != NULL) - free_mem(min); - if (max != NULL) - free_mem(max); - max = NULL; - } - - /** - * use current settings - * @param fd where to paint - */ - void plot(ScrollView* fd) const; - - /** - * return bounding box - * @return true if box exceeds image - */ - TBOX bounding_box() const; - - bool bad_box(int xsize, int ysize) const; - - /** - * force end on black - * @param imlines image array - * @param x_offset of pixels[0] - * @param foreground_colour 0 or 1 - */ - void contract( - IMAGELINE *imlines, - inT16 x_offset, - inT16 foreground_colour); - - /** - * @param imlines image array - * @param imbox image box - * @param prev for prev blob - * @param next for next blob - * @param foreground_colour 0 or 1 - */ - BOOL8 extend(IMAGELINE *imlines, - TBOX &imbox, - PIXROW *prev, - PIXROW *next, - inT16 foreground_colour); - - /** - * @param imlines box of imlines extnt - * @param imbox image box - * @param row row containing word - * @param clip_image unscaled char image - * @param baseline_pos baseline ht in image - */ - void char_clip_image(IMAGELINE *imlines, - TBOX &im_box, - ROW *row, - IMAGE &clip_image, - float &baseline_pos); - -}; - -ELISTIZEH (PIXROW) -void char_clip_word( - WERD *word, ///< word to be processed - IMAGE &bin_image, ///< whole image - PIXROW_LIST *&pixrow_list, ///< pixrows built - IMAGELINE *&imlines, ///< lines cut from image - TBOX &pix_box ///< box defining imlines - ); -/** get some imagelines */ -IMAGELINE *generate_imlines( - IMAGE &bin_image, ///< from here - TBOX &pix_box); - -ScrollView* display_clip_image(WERD *word, ///< word to be processed - IMAGE &bin_image, ///< whole image - PIXROW_LIST *pixrow_list, ///< pixrows built - TBOX &pix_box ///< box of subimage - ); -void display_images(IMAGE &clip_image, IMAGE &scaled_image); - -/** plot for all blobs */ -void plot_pixrows( - PIXROW_LIST *pixrow_list, - ScrollView* win); -#endif diff --git a/ccmain/control.cpp b/ccmain/control.cpp index d895fbebe..075b9fa39 100644 --- a/ccmain/control.cpp +++ b/ccmain/control.cpp @@ -36,9 +36,7 @@ #include "tessvars.h" #include "pgedit.h" #include "reject.h" -#include "charcut.h" #include "fixspace.h" -#include "genblob.h" #include "docqual.h" #include "control.h" #include "secname.h" @@ -61,12 +59,6 @@ const char* const kBackUpConfigFile = "tempconfigdata.config"; // Multiple of x-height to make a repeated word have spaces in it. const double kRepcharGapThreshold = 0.5; -CLISTIZEH (PBLOB) CLISTIZE (PBLOB) -/* DEBUGGING */ -inT16 blob_count(WERD *w) { - return w->blob_list ()->length (); -} - /** * recog_pseudo_word @@ -974,23 +966,6 @@ void Tesseract::fix_hyphens(WERD_RES *word_res, } // namespace tesseract -/** - * merge_blobs - * - * Add the outlines from blob2 to blob1. Blob2 is emptied but not deleted. - */ - -void merge_blobs( //combine 2 blobs - PBLOB *blob1, //dest blob - PBLOB *blob2 //source blob - ) { - OUTLINE_IT outline_it = blob1->out_list (); - //iterator - - outline_it.move_to_last (); //go to end - //do it - outline_it.add_list_after (blob2->out_list ()); -} namespace tesseract { diff --git a/ccmain/control.h b/ccmain/control.h index 5a1615436..080c82aa8 100644 --- a/ccmain/control.h +++ b/ccmain/control.h @@ -42,21 +42,4 @@ enum ACCEPTABLE_WERD_TYPE AC_UC_ABBREV ///< A.B.C. }; -typedef BOOL8 (*BLOB_REJECTOR) (PBLOB *, BLOB_CHOICE_IT *, void *); - -/** - * combine 2 blobs - * @param blob1 dest blob - * @param blob2 source blob - */ -void merge_blobs(PBLOB *blob1, PBLOB *blob2); -/** dump chars in word */ -void choice_dump_tester( - PBLOB *, ///< blob - DENORM *, ///< de-normaliser - BOOL8 correct, ///< ly segmented - char *text, ///< correct text - inT32 count, ///< chars in text - BLOB_CHOICE_LIST *ratings ///< list of results - ); #endif diff --git a/ccmain/docqual.cpp b/ccmain/docqual.cpp index a97157e5a..19287499f 100644 --- a/ccmain/docqual.cpp +++ b/ccmain/docqual.cpp @@ -24,12 +24,10 @@ #include "mfcpch.h" #include #include "docqual.h" -#include "tstruct.h" #include "tfacep.h" #include "reject.h" #include "tesscallback.h" #include "tessvars.h" -#include "genblob.h" #include "secname.h" #include "globals.h" #include "tesseractclass.h" diff --git a/ccmain/fixspace.cpp b/ccmain/fixspace.cpp index a7076164f..99cc34745 100644 --- a/ccmain/fixspace.cpp +++ b/ccmain/fixspace.cpp @@ -23,9 +23,9 @@ #include #include "reject.h" #include "statistc.h" -#include "genblob.h" #include "control.h" #include "fixspace.h" +#include "genblob.h" #include "tessvars.h" #include "tessbox.h" #include "secname.h" @@ -90,7 +90,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, if (!word_res_it_from.at_last()) { word_res_it_to = word_res_it_from; prevent_null_wd_fixsp = - word_res->word->gblob_list()->empty(); + word_res->word->cblob_list()->empty(); if (check_debug_pt(word_res, 60)) debug_fix_space_level.set_value(10); word_res_it_to.forward(); @@ -108,13 +108,13 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, word_res_it_to.data_relative(1)->word->flag(W_FUZZY_SP))) { if (check_debug_pt(word_res, 60)) debug_fix_space_level.set_value(10); - if (word_res->word->gblob_list()->empty()) + if (word_res->word->cblob_list()->empty()) prevent_null_wd_fixsp = TRUE; word_res = word_res_it_to.forward(); } if (check_debug_pt(word_res, 60)) debug_fix_space_level.set_value(10); - if (word_res->word->gblob_list()->empty()) + if (word_res->word->cblob_list()->empty()) prevent_null_wd_fixsp = TRUE; if (prevent_null_wd_fixsp) { word_res_it_from = word_res_it_to; @@ -634,13 +634,11 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, if (blob_index < 0) return; - #ifndef SECURE_NAMES if (debug_fix_space_level > 1) { tprintf("FP fixspace working on \"%s\"\n", word_res->best_choice->unichar_string().string()); } - #endif - gblob_sort_list((PBLOB_LIST *)word_res->word->rej_cblob_list(), FALSE); + word_res->word->rej_cblob_list()->sort(c_blob_comparator); sub_word_list_it.add_after_stay_put(word_res_it.extract()); fix_noisy_space_list(sub_word_list, row, block); new_length = sub_word_list.length(); @@ -881,7 +879,7 @@ void fixspace_dbg(WERD_RES *word) { box.print(); tprintf(" \"%s\" ", word->best_choice->unichar_string().string()); tprintf("Blob count: %d (word); %d/%d (rebuild word)\n", - word->word->gblob_list()->length(), + word->word->cblob_list()->length(), word->rebuild_word->NumBlobs(), word->box_word->length()); word->reject_map.print(debug_fp); @@ -911,7 +909,6 @@ namespace tesseract { inT16 Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) { WERD_RES_IT word_it(&word_res_list); WERD_RES *word; - PBLOB_IT blob_it; inT16 word_length; inT16 score = 0; inT16 i; diff --git a/ccmain/osdetect.cpp b/ccmain/osdetect.cpp index cafd80e43..b4e5224e4 100644 --- a/ccmain/osdetect.cpp +++ b/ccmain/osdetect.cpp @@ -31,7 +31,6 @@ #include "tabvector.h" #include "tesseractclass.h" #include "textord.h" -#include "tstruct.h" const int kMinCharactersToTry = 50; const int kMaxCharactersToTry = 5 * kMinCharactersToTry; diff --git a/ccmain/output.cpp b/ccmain/output.cpp index 7a361aa5b..dce3d478c 100644 --- a/ccmain/output.cpp +++ b/ccmain/output.cpp @@ -150,7 +150,6 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it, char map_chs[32]; //Only for unlv_tilde_crunch int txt_index = 0; BOOL8 need_reject = FALSE; - PBLOB_IT blob_it; //blobs UNICHAR_ID space = unicharset.unichar_to_id(" "); if ((word->unlv_crunch_mode != CR_NONE || word->best_choice->length() == 0) && diff --git a/ccmain/pagesegmain.cpp b/ccmain/pagesegmain.cpp index e63b30dfb..28e28b517 100644 --- a/ccmain/pagesegmain.cpp +++ b/ccmain/pagesegmain.cpp @@ -135,8 +135,8 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, block->set_right_to_left(right_to_left()); block_it.add_to_end(block); } else { - // UNLV file present. Use PSM_SINGLE_COLUMN. - pageseg_mode = PSM_SINGLE_COLUMN; + // UNLV file present. Use PSM_SINGLE_BLOCK. + pageseg_mode = PSM_SINGLE_BLOCK; } bool single_column = !PSM_COL_FIND_ENABLED(pageseg_mode); bool osd_enabled = PSM_OSD_ENABLED(pageseg_mode); diff --git a/ccmain/pgedit.cpp b/ccmain/pgedit.cpp index 6d9a2b3ef..f99acb801 100755 --- a/ccmain/pgedit.cpp +++ b/ccmain/pgedit.cpp @@ -31,7 +31,6 @@ #include #include -#include "genblob.h" #include "tordmain.h" #include "statistc.h" #include "debugwin.h" diff --git a/ccmain/reject.cpp b/ccmain/reject.cpp index 471f66a41..f3052008f 100644 --- a/ccmain/reject.cpp +++ b/ccmain/reject.cpp @@ -35,7 +35,6 @@ #include "memry.h" #include "reject.h" #include "tfacep.h" -#include "charcut.h" #include "imgs.h" #include "control.h" #include "docqual.h" diff --git a/ccmain/tfacep.h b/ccmain/tfacep.h index 80c9bd8fa..dd3e7850f 100644 --- a/ccmain/tfacep.h +++ b/ccmain/tfacep.h @@ -23,7 +23,6 @@ #include "hosthplb.h" #include "blobs.h" #include "tessarray.h" -#include "tstruct.h" #include "notdll.h" #include "oldlist.h" #include "permute.h" diff --git a/ccmain/tfacepp.cpp b/ccmain/tfacepp.cpp index 72c3661ba..8f7c74f47 100644 --- a/ccmain/tfacepp.cpp +++ b/ccmain/tfacepp.cpp @@ -32,7 +32,6 @@ #include "reject.h" #include "werd.h" #include "tfacep.h" -#include "tstruct.h" #include "tfacepp.h" #include "tessvars.h" #include "globals.h" diff --git a/ccmain/tfacepp.h b/ccmain/tfacepp.h index c9fd31a32..7484eec53 100644 --- a/ccmain/tfacepp.h +++ b/ccmain/tfacepp.h @@ -20,7 +20,6 @@ #ifndef TFACEPP_H #define TFACEPP_H -#include "tstruct.h" #include "ratngs.h" #include "blobs.h" #include "notdll.h" diff --git a/ccmain/tstruct.cpp b/ccmain/tstruct.cpp deleted file mode 100644 index 9f6074e8d..000000000 --- a/ccmain/tstruct.cpp +++ /dev/null @@ -1,147 +0,0 @@ -/********************************************************************** - * File: tstruct.cpp (Formerly tstruct.c) - * Description: Code to manipulate the structures of the C++/C interface. - * Author: Ray Smith - * Created: Thu Apr 23 15:49:29 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "mfcpch.h" -#include "ccstruct.h" -#include "helpers.h" -#include "tfacep.h" -#include "tstruct.h" -#include "makerow.h" -#include "ocrblock.h" - -/********************************************************************** - * make_tess_blob - * - * Make a single Tess style blob - **********************************************************************/ - -TBLOB *make_tess_blob(PBLOB *blob) { - TBLOB* tessblob = new TBLOB; - tessblob->outlines = make_tess_outlines(blob->out_list(), false); - tessblob->next = NULL; - return tessblob; -} - - -/********************************************************************** - * make_tess_outlines - * - * Make Tess style outlines from a list of OUTLINEs. - **********************************************************************/ - -TESSLINE *make_tess_outlines(OUTLINE_LIST *outlinelist, // List to convert. - bool is_holes) { // These are hole outlines. - OUTLINE_IT it = outlinelist; //iterator - OUTLINE *outline; //current outline - TESSLINE *head; //output list - TESSLINE *tail; //end of list - TESSLINE *tessoutline; - - head = NULL; - tail = NULL; - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - outline = it.data(); - tessoutline = new TESSLINE; - tessoutline->loop = make_tess_edgepts(outline->polypts(), - tessoutline->topleft, - tessoutline->botright); - if (tessoutline->loop == NULL) { - delete tessoutline; - continue; - } - tessoutline->start = tessoutline->loop->pos; - tessoutline->next = NULL; - tessoutline->is_hole = is_holes; - if (!outline->child()->empty()) - tessoutline->next = make_tess_outlines(outline->child(), true); - else - tessoutline->next = NULL; - if (head) - tail->next = tessoutline; - else - head = tessoutline; - while (tessoutline->next != NULL) - tessoutline = tessoutline->next; - tail = tessoutline; - } - return head; -} - - -/********************************************************************** - * make_tess_edgepts - * - * Make Tess style edgepts from a list of POLYPTs. - **********************************************************************/ - -EDGEPT *make_tess_edgepts( //make tess edgepts - POLYPT_LIST *edgeptlist, //list to convert - TPOINT &tl, //bounding box - TPOINT &br) { - inT32 index; - POLYPT_IT it = edgeptlist; //iterator - POLYPT *edgept; //current edgept - EDGEPT *head; //output list - EDGEPT *tail; //end of list - EDGEPT *tessedgept; - - head = NULL; - tail = NULL; - tl.x = MAX_INT16; - tl.y = -MAX_INT16; - br.x = -MAX_INT16; - br.y = MAX_INT16; - for (it.mark_cycle_pt(); !it.cycled_list ();) { - edgept = it.data(); - tessedgept = new EDGEPT; - tessedgept->pos.x = (inT16) edgept->pos.x(); - tessedgept->pos.y = (inT16) edgept->pos.y(); - UpdateRange(tessedgept->pos.x, &tl.x, &br.x); - UpdateRange(tessedgept->pos.y, &br.y, &tl.y); - if (head != NULL && - tessedgept->pos.x == tail->pos.x && - tessedgept->pos.y == tail->pos.y) { - delete tessedgept; - } - else { - for (index = 0; index < EDGEPTFLAGS; index++) - tessedgept->flags[index] = 0; - if (head != NULL) { - tail->vec.x = tessedgept->pos.x - tail->pos.x; - tail->vec.y = tessedgept->pos.y - tail->pos.y; - tessedgept->prev = tail; - } - tessedgept->next = head; - if (head) - tail->next = tessedgept; - else - head = tessedgept; - tail = tessedgept; - } - it.forward (); - } - head->prev = tail; - tail->vec.x = head->pos.x - tail->pos.x; - tail->vec.y = head->pos.y - tail->pos.y; - if (head == tail) { - delete head; - return NULL; //empty - } - return head; -} diff --git a/ccmain/tstruct.h b/ccmain/tstruct.h deleted file mode 100644 index 204f94c53..000000000 --- a/ccmain/tstruct.h +++ /dev/null @@ -1,35 +0,0 @@ -/********************************************************************** - * File: tstruct.h (Formerly tstruct.h) - * Description: Code to manipulate the structures of the C++/C interface. - * Author: Ray Smith - * Created: Thu Apr 23 15:49:29 BST 1992 - * - * (C) Copyright 1992, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef TSTRUCT_H -#define TSTRUCT_H - -#include "werd.h" -#include "blobs.h" -#include "ratngs.h" -#include "notdll.h" - -TBLOB *make_tess_blob(PBLOB *blob); -TESSLINE *make_tess_outlines(OUTLINE_LIST *outlinelist, // List to convert - bool is_holes); // These are hole outlines. -EDGEPT *make_tess_edgepts( //make tess edgepts - POLYPT_LIST *edgeptlist, //list to convert - TPOINT &tl, //bounding box - TPOINT &br); -#endif