/********************************************************************** * File: edgblob.c (Formerly edgeloop.c) * Description: Functions to clean up an outline before approximation. * Author: Ray Smith * Created: Tue Mar 26 16:56:25 GMT 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/ #include "mfcpch.h" //#include "dirtab.h" #include "scanedg.h" #include "drawedg.h" #include "edgloop.h" #include "edgblob.h" #define EXTERN EXTERN INT_VAR (edges_children_per_grandchild, 10, "Importance ratio for chucking outlines"); EXTERN INT_VAR (edges_children_count_limit, 45, "Max holes allowed in blob"); EXTERN BOOL_VAR (edges_children_fix, FALSE, "Remove boxy parents of char-like children"); EXTERN INT_VAR (edges_min_nonhole, 12, "Min pixels for potential char in box"); EXTERN INT_VAR (edges_patharea_ratio, 40, "Max lensq/area for acceptable child outline"); EXTERN double_VAR (edges_childarea, 0.5, "Max area fraction of child outline"); EXTERN double_VAR (edges_boxarea, 0.8, "Min area fraction of grandchild for box"); /********************************************************************** * OL_BUCKETS::OL_BUCKETS * * Construct an array of buckets for associating outlines into blobs. **********************************************************************/ OL_BUCKETS::OL_BUCKETS ( ////constructor ICOORD bleft, //corners ICOORD tright): bl (bleft), tr (tright) { bxdim = (tright.x () - bleft.x ()) / BUCKETSIZE + 1; bydim = (tright.y () - bleft.y ()) / BUCKETSIZE + 1; //make array buckets = new C_OUTLINE_LIST[bxdim * bydim]; index = 0; } /********************************************************************** * OL_BUCKETS::operator( * * Return a pointer to a list of C_OUTLINEs corresponding to the * given pixel coordinates. **********************************************************************/ C_OUTLINE_LIST * OL_BUCKETS::operator () ( //array access INT16 x, //image coords INT16 y) { return &buckets[(y - bl.y ()) / BUCKETSIZE * bxdim + (x - bl.x ()) / BUCKETSIZE]; } /********************************************************************** * OL_BUCKETS::count_children * * Find number of descendants of this outline. **********************************************************************/ INT32 OL_BUCKETS::count_children( //recursive count C_OUTLINE *outline, //parent outline INT32 max_count //max output ) { BOOL8 parent_box; //could it be boxy INT16 xmin, xmax; //coord limits INT16 ymin, ymax; INT16 xindex, yindex; //current bucket C_OUTLINE *child; //current child INT32 child_count; //no of children INT32 grandchild_count; //no of grandchildren INT32 parent_area; //potential box FLOAT32 max_parent_area; //potential box INT32 child_area; //current child INT32 child_length; //current child BOX olbox; C_OUTLINE_IT child_it; //search iterator olbox = outline->bounding_box (); xmin = (olbox.left () - bl.x ()) / BUCKETSIZE; xmax = (olbox.right () - bl.x ()) / BUCKETSIZE; ymin = (olbox.bottom () - bl.y ()) / BUCKETSIZE; ymax = (olbox.top () - bl.y ()) / BUCKETSIZE; child_count = 0; grandchild_count = 0; parent_area = 0; max_parent_area = 0; parent_box = TRUE; for (yindex = ymin; yindex <= ymax; yindex++) { for (xindex = xmin; xindex <= xmax; xindex++) { child_it.set_to_list (&buckets[yindex * bxdim + xindex]); if (child_it.empty ()) continue; for (child_it.mark_cycle_pt (); !child_it.cycled_list (); child_it.forward ()) { child = child_it.data (); if (child != outline && *child < *outline) { child_count++; if (child_count <= max_count) grandchild_count += count_children (child, (max_count - child_count) / edges_children_per_grandchild) * edges_children_per_grandchild; if (child_count + grandchild_count > max_count) { /* err.log(RESULT_OKAY,E_LOC,ERR_OCR, ERR_SCROLLING,ERR_CONTINUE,ERR_DEBUG, "Discarding parent with child count=%d, gc=%d", child_count,grandchild_count);*/ return child_count + grandchild_count; } if (parent_area == 0) { parent_area = outline->outer_area (); if (parent_area < 0) parent_area = -parent_area; max_parent_area = outline->bounding_box ().width () * outline->bounding_box ().height () * edges_boxarea; if (parent_area < max_parent_area) parent_box = FALSE; } if (parent_box && (!edges_children_fix || child->bounding_box ().height () > edges_min_nonhole) /**/) { child_area = child->outer_area (); if (child_area < 0) child_area = -child_area; if (edges_children_fix) { if (parent_area - child_area < max_parent_area) { parent_box = FALSE; continue; } if (grandchild_count > 0) { /* err.log(RESULT_OKAY,E_LOC,ERR_OCR, ERR_SCROLLING,ERR_CONTINUE,ERR_DEBUG, "Discarding parent of area %d, child area=%d, max%g with gc=%d", parent_area,child_area,max_parent_area,grandchild_count);*/ return max_count + 1; } child_length = child->pathlength (); if (child_length * child_length > child_area * edges_patharea_ratio) { /*/ err.log(RESULT_OKAY,E_LOC,ERR_OCR, ERR_SCROLLING,ERR_CONTINUE,ERR_DEBUG, "Discarding parent of area %d, child area=%d, max%g with child length=%d", parent_area,child_area,max_parent_area,child_length);*/ return max_count + 1; } } if (child_area < child->bounding_box ().width () * child->bounding_box ().height () * edges_childarea) { /* err.log(RESULT_OKAY,E_LOC,ERR_OCR, ERR_SCROLLING,ERR_CONTINUE,ERR_DEBUG, "Discarding parent of area %d, child area=%d, max%g with child rect=%d", parent_area,child_area,max_parent_area,child->bounding_box().width() *child->bounding_box().height()); */ return max_count + 1; } } } } } } return child_count + grandchild_count; } /********************************************************************** * OL_BUCKETS::extract_children * * Find number of descendants of this outline. **********************************************************************/ void OL_BUCKETS::extract_children( //recursive count C_OUTLINE *outline, //parent outline C_OUTLINE_IT *it //destination iterator ) { INT16 xmin, xmax; //coord limits INT16 ymin, ymax; INT16 xindex, yindex; //current bucket BOX olbox; C_OUTLINE_IT child_it; //search iterator olbox = outline->bounding_box (); xmin = (olbox.left () - bl.x ()) / BUCKETSIZE; xmax = (olbox.right () - bl.x ()) / BUCKETSIZE; ymin = (olbox.bottom () - bl.y ()) / BUCKETSIZE; ymax = (olbox.top () - bl.y ()) / BUCKETSIZE; for (yindex = ymin; yindex <= ymax; yindex++) { for (xindex = xmin; xindex <= xmax; xindex++) { child_it.set_to_list (&buckets[yindex * bxdim + xindex]); for (child_it.mark_cycle_pt (); !child_it.cycled_list (); child_it.forward ()) { if (*child_it.data () < *outline) { it->add_after_then_move (child_it.extract ()); } } } } } /********************************************************************** * extract_edges * * Run the edge detector over the block and return a list of blobs. **********************************************************************/ void extract_edges( //find blobs #ifndef GRAPHICS_DISABLED WINDOW window, //window for output #endif IMAGE *image, //image to scan IMAGE *t_image, //thresholded image ICOORD page_tr, //corner of page BLOCK *block //block to scan ) { ICOORD bleft; //block box ICOORD tright; C_OUTLINE_LIST outlines; //outlines in block //iterator C_OUTLINE_IT out_it = &outlines; #ifndef GRAPHICS_DISABLED get_outlines (window, image, t_image, page_tr, (PDBLK *) block, &out_it); #else get_outlines (image, t_image, page_tr, (PDBLK *) block, &out_it); #endif //block box block->bounding_box (bleft, tright); //make blobs outlines_to_blobs(block, bleft, tright, &outlines); } /********************************************************************** * outlines_to_blobs * * Gather together outlines into blobs using the usual bucket sort. **********************************************************************/ void outlines_to_blobs( //find blobs BLOCK *block, //block to scan ICOORD bleft, //block box //outlines in block ICOORD tright, C_OUTLINE_LIST *outlines) { //make buckets OL_BUCKETS buckets(bleft, tright); fill_buckets(outlines, &buckets); empty_buckets(block, &buckets); } /********************************************************************** * fill_buckets * * Run the edge detector over the block and return a list of blobs. **********************************************************************/ void fill_buckets( //find blobs C_OUTLINE_LIST *outlines, //outlines in block OL_BUCKETS *buckets //output buckets ) { BOX ol_box; //outline box C_OUTLINE_IT out_it = outlines;//iterator C_OUTLINE_IT bucket_it; //iterator in bucket C_OUTLINE *outline; //current outline for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { outline = out_it.extract (); //take off list //get box ol_box = outline->bounding_box (); bucket_it.set_to_list ((*buckets) (ol_box.left (), ol_box.bottom ())); bucket_it.add_to_end (outline); } } /********************************************************************** * empty_buckets * * Run the edge detector over the block and return a list of blobs. **********************************************************************/ void empty_buckets( //find blobs BLOCK *block, //block to scan OL_BUCKETS *buckets //output buckets ) { BOOL8 good_blob; //healthy blob C_OUTLINE_LIST outlines; //outlines in block //iterator C_OUTLINE_IT out_it = &outlines; C_OUTLINE_IT bucket_it = buckets->start_scan (); C_OUTLINE_IT parent_it; //parent outline C_BLOB *blob; //new blob C_BLOB_IT good_blobs = block->blob_list (); C_BLOB_IT junk_blobs = block->reject_blobs (); while (!bucket_it.empty ()) { out_it.set_to_list (&outlines); do { parent_it = bucket_it; //find outermost do bucket_it.forward (); while (!bucket_it.at_first () && !(*parent_it.data () < *bucket_it.data ())); } while (!bucket_it.at_first ()); //move to new list out_it.add_after_then_move (parent_it.extract ()); good_blob = capture_children (buckets, &junk_blobs, &out_it); blob = new C_BLOB (&outlines); if (good_blob) good_blobs.add_after_then_move (blob); else junk_blobs.add_after_then_move (blob); bucket_it.set_to_list (buckets->scan_next ()); } } /********************************************************************** * capture_children * * Find all neighbouring outlines that are children of this outline * and either move them to the output list or declare this outline * illegal and return FALSE. **********************************************************************/ BOOL8 capture_children( //find children OL_BUCKETS *buckets, //bucket sort clanss C_BLOB_IT *reject_it, //dead grandchildren C_OUTLINE_IT *blob_it //output outlines ) { BOOL8 anydone; //anything canned C_OUTLINE *outline; //master outline C_OUTLINE *child; //child under test C_OUTLINE_IT test_it; //for grandchildren INT32 child_count; //no of children C_BLOB *blob; //reject C_OUTLINE_LIST r_list; //rejects C_OUTLINE_IT r_it; //iterator outline = blob_it->data (); child_count = buckets->count_children (outline, edges_children_count_limit); if (child_count > edges_children_count_limit) return FALSE; if (child_count == 0) return TRUE; //get single level buckets->extract_children (outline, blob_it); if (child_count == 1) return TRUE; do { anydone = FALSE; blob_it->move_to_first (); for (blob_it->mark_cycle_pt (); !blob_it->cycled_list (); blob_it->forward ()) { child = blob_it->data (); if (child != outline) { for (test_it = *blob_it, test_it.mark_cycle_pt (); !test_it.cycled_list (); test_it.forward ()) { if (test_it.data () != child && *test_it.data () < *child) { r_it.set_to_list (&r_list); r_it.add_after_then_move (test_it.extract ()); //turn to blob blob = new C_BLOB (&r_list); reject_it->add_after_then_move (blob); anydone = TRUE; } } if (anydone) break; //got to reatart } } } while (anydone); //got to restart return TRUE; }