/********************************************************************** * File: tstruct.cpp (Formerly tstruct.c) * Description: Code to manipulate the structures of the C++/C interface. * Author: Ray Smith * Created: Thu Apr 23 15:49:29 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/ #include "mfcpch.h" #include "tfacep.h" #include "tstruct.h" #include "makerow.h" #include "ocrblock.h" //#include "structures.h" static ERRCODE BADFRAGMENTS = "Couldn't find matching fragment ends"; ELISTIZE (FRAGMENT) //extern /*"C"*/ oldoutline(TESSLINE*); /********************************************************************** * FRAGMENT::FRAGMENT * * Constructor for fragments. **********************************************************************/ FRAGMENT::FRAGMENT ( //constructor EDGEPT * head_pt, //start point EDGEPT * tail_pt //end point ):head (head_pt->pos.x, head_pt->pos.y), tail (tail_pt->pos.x, tail_pt->pos.y) { headpt = head_pt; // save ptrs tailpt = tail_pt; } // Helper function to make a fake PBLOB formed from the bounding box // of the given old-format outline. static PBLOB* MakeRectBlob(TESSLINE* ol) { POLYPT_LIST poly_list; POLYPT_IT poly_it = &poly_list; FCOORD pos, vec; POLYPT *polypt; // Create points at each of the 4 corners of the rectangle in turn. pos = FCOORD(ol->topleft.x, ol->topleft.y); vec = FCOORD(0.0f, ol->botright.y - ol->topleft.y); polypt = new POLYPT(pos, vec); poly_it.add_after_then_move(polypt); pos = FCOORD(ol->topleft.x, ol->botright.y); vec = FCOORD(ol->botright.x - ol->topleft.x, 0.0f); polypt = new POLYPT(pos, vec); poly_it.add_after_then_move(polypt); pos = FCOORD(ol->botright.x, ol->botright.y); vec = FCOORD(0.0f, ol->topleft.y - ol->botright.y); polypt = new POLYPT(pos, vec); poly_it.add_after_then_move(polypt); pos = FCOORD(ol->botright.x, ol->topleft.y); vec = FCOORD(ol->topleft.x - ol->botright.x, 0.0f); polypt = new POLYPT(pos, vec); poly_it.add_after_then_move(polypt); OUTLINE_LIST out_list; OUTLINE_IT out_it = &out_list; out_it.add_after_then_move(new OUTLINE(&poly_it)); return new PBLOB(&out_list); } /********************************************************************** * make_ed_word * * Make an editor format word from the tess style word. **********************************************************************/ WERD *make_ed_word( //construct word TWERD *tessword, //word to convert WERD *clone //clone this one ) { WERD *word; //converted word TBLOB *tblob; //current blob PBLOB *blob; //new blob PBLOB_LIST blobs; //list of blobs PBLOB_IT blob_it = &blobs; //iterator for (tblob = tessword->blobs; tblob != NULL; tblob = tblob->next) { blob = make_ed_blob (tblob); if (blob == NULL && tblob->outlines != NULL) { // Make a fake blob using the bounding box rectangle of the 1st outline. blob = MakeRectBlob(tblob->outlines); } if (blob != NULL) { blob_it.add_after_then_move (blob); } } if (!blobs.empty ()) word = new WERD (&blobs, clone); else word = NULL; return word; } /********************************************************************** * make_ed_blob * * Make an editor format blob from the tess style blob. **********************************************************************/ PBLOB *make_ed_blob( //construct blob TBLOB *tessblob //blob to convert ) { TESSLINE *tessol; //tess outline FRAGMENT_LIST fragments; //list of fragments OUTLINE *outline; //current outline OUTLINE_LIST out_list; //list of outlines OUTLINE_IT out_it = &out_list; //iterator for (tessol = tessblob->outlines; tessol != NULL; tessol = tessol->next) { //stick in list register_outline(tessol, &fragments); } while (!fragments.empty ()) { outline = make_ed_outline (&fragments); if (outline != NULL) { out_it.add_after_then_move (outline); } } if (out_it.empty()) return NULL; //couldn't do it return new PBLOB (&out_list); //turn to blob } /********************************************************************** * make_ed_outline * * Make an editor format outline from the list of fragments. **********************************************************************/ OUTLINE *make_ed_outline( //constructoutline FRAGMENT_LIST *list //list of fragments ) { FRAGMENT *fragment; //current fragment EDGEPT *edgept; //current point ICOORD headpos; //coords of head ICOORD tailpos; //coords of tail FCOORD pos; //coords of edgept FCOORD vec; //empty POLYPT *polypt; //current point POLYPT_LIST poly_list; //list of point POLYPT_IT poly_it = &poly_list;//iterator FRAGMENT_IT fragment_it = list;//fragment headpos = fragment_it.data ()->head; do { fragment = fragment_it.data (); edgept = fragment->headpt; //start of segment do { pos = FCOORD (edgept->pos.x, edgept->pos.y); vec = FCOORD (edgept->vec.x, edgept->vec.y); polypt = new POLYPT (pos, vec); //add to list poly_it.add_after_then_move (polypt); edgept = edgept->next; } while (edgept != fragment->tailpt); tailpos = ICOORD (edgept->pos.x, edgept->pos.y); //get rid of it delete fragment_it.extract (); if (tailpos != headpos) { if (fragment_it.empty ()) { return NULL; } fragment_it.forward (); //find next segment for (fragment_it.mark_cycle_pt (); !fragment_it.cycled_list () && fragment_it.data ()->head != tailpos; fragment_it.forward ()); if (fragment_it.data ()->head != tailpos) { // It is legitimate for the heads to not all match to tails, // since not all combinations of seams always make sense. for (fragment_it.mark_cycle_pt (); !fragment_it.cycled_list (); fragment_it.forward ()) { fragment = fragment_it.extract (); delete fragment; } return NULL; //can't do it } } } while (tailpos != headpos); return new OUTLINE (&poly_it); //turn to outline } /********************************************************************** * register_outline * * Add the fragments in the given outline to the list **********************************************************************/ void register_outline( //add fragments TESSLINE *outline, //tess format FRAGMENT_LIST *list //list to add to ) { EDGEPT *startpt; //start of outline EDGEPT *headpt; //start of fragment EDGEPT *tailpt; //end of fragment FRAGMENT *fragment; //new fragment FRAGMENT_IT it = list; //iterator startpt = outline->loop; do { startpt = startpt->next; if (startpt == NULL) return; //illegal! } while (startpt->flags[0] == 0 && startpt != outline->loop); headpt = startpt; do startpt = startpt->next; while (startpt->flags[0] != 0 && startpt != headpt); if (startpt->flags[0] != 0) return; //all hidden! headpt = startpt; do { tailpt = headpt; do tailpt = tailpt->next; while (tailpt->flags[0] == 0 && tailpt != startpt); fragment = new FRAGMENT (headpt, tailpt); it.add_after_then_move (fragment); while (tailpt->flags[0] != 0) tailpt = tailpt->next; headpt = tailpt; } while (tailpt != startpt); } /********************************************************************** * make_tess_row * * Make a fake row structure to pass to the tesseract matchers. **********************************************************************/ void make_tess_row( //make fake row DENORM *denorm, //row info TEXTROW *tessrow //output row ) { tessrow->baseline.segments = 1; tessrow->baseline.xstarts[0] = -32767; tessrow->baseline.xstarts[1] = 32767; tessrow->baseline.quads[0].a = 0; tessrow->baseline.quads[0].b = 0; tessrow->baseline.quads[0].c = bln_baseline_offset; tessrow->xheight.segments = 1; tessrow->xheight.xstarts[0] = -32767; tessrow->xheight.xstarts[1] = 32767; tessrow->xheight.quads[0].a = 0; tessrow->xheight.quads[0].b = 0; tessrow->xheight.quads[0].c = bln_x_height + bln_baseline_offset; tessrow->lineheight = bln_x_height; if (denorm != NULL) { tessrow->ascrise = denorm->row ()->ascenders () * denorm->scale (); tessrow->descdrop = denorm->row ()->descenders () * denorm->scale (); } else { tessrow->ascrise = bln_baseline_offset; tessrow->descdrop = -bln_baseline_offset; } } /********************************************************************** * make_tess_word * * Convert the word to Tess format. **********************************************************************/ TWERD *make_tess_word( //convert word WERD *word, //word to do TEXTROW *row //fake row ) { TWERD *tessword; //tess format tessword = newword (); //use old allocator tessword->row = row; //give them something //copy string tessword->correct = strsave (word->text ()); tessword->guess = NULL; tessword->blobs = make_tess_blobs (word->blob_list ()); tessword->blanks = 1; tessword->blobcount = word->blob_list ()->length (); tessword->next = NULL; return tessword; } /********************************************************************** * make_tess_blobs * * Make Tess style blobs from a list of BLOBs. **********************************************************************/ TBLOB *make_tess_blobs( //make tess blobs PBLOB_LIST *bloblist //list to convert ) { PBLOB_IT it = bloblist; //iterator PBLOB *blob; //current blob TBLOB *head; //output list TBLOB *tail; //end of list TBLOB *tessblob; head = NULL; tail = NULL; for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { blob = it.data (); tessblob = make_tess_blob (blob, TRUE); if (head) tail->next = tessblob; else head = tessblob; tail = tessblob; } return head; } /********************************************************************** * make_rotated_tess_blob * * Make a single Tess style blob, applying the given rotation and * renormalizing. **********************************************************************/ TBLOB *make_rotated_tess_blob(const DENORM* denorm, PBLOB *blob, BOOL8 flatten) { if (denorm != NULL && denorm->block() != NULL && denorm->block()->classify_rotation().y() != 0.0) { TBOX box = blob->bounding_box(); int src_width = box.width(); int src_height = box.height(); src_width = static_cast(src_width / denorm->scale() + 0.5); src_height = static_cast(src_height / denorm->scale() + 0.5); int x_middle = (box.left() + box.right()) / 2; int y_middle = (box.top() + box.bottom()) / 2; PBLOB* rotated_blob = PBLOB::deep_copy(blob); rotated_blob->move(FCOORD(-x_middle, -y_middle)); rotated_blob->rotate(denorm->block()->classify_rotation()); ICOORD median_size = denorm->block()->median_size(); int tolerance = median_size.x() / 8; // TODO(dsl/rays) find a better normalization solution. In the mean time // make it work for CJK by normalizing for Cap height in the same way // as is applied in compute_block_xheight when the row is presumed to // be ALLCAPS, i.e. the x-height is the fixed fraction // blob height * textord_merge_x / (textord_merge_x + textord_merge_asc) if (NearlyEqual(src_width, static_cast(median_size.x()), tolerance) && NearlyEqual(src_height, static_cast(median_size.y()), tolerance)) { float target_height = bln_x_height * (textord_merge_x + textord_merge_asc) / textord_merge_x; rotated_blob->scale(target_height / box.width()); rotated_blob->move(FCOORD(0.0f, bln_baseline_offset - rotated_blob->bounding_box().bottom())); } TBLOB* result = make_tess_blob(rotated_blob, flatten); delete rotated_blob; return result; } else { return make_tess_blob(blob, flatten); } } /********************************************************************** * make_tess_blob * * Make a single Tess style blob **********************************************************************/ TBLOB *make_tess_blob( //make tess blob PBLOB *blob, //blob to convert BOOL8 flatten //flatten outline structure ) { inT32 index; TBLOB *tessblob; tessblob = newblob (); tessblob->outlines = (struct olinestruct *) make_tess_outlines (blob->out_list (), flatten); for (index = 0; index < TBLOBFLAGS; index++) tessblob->flags[index] = 0; //!! tessblob->correct = 0; tessblob->guess = 0; for (index = 0; index < MAX_WO_CLASSES; index++) { tessblob->classes[index] = 0; tessblob->values[index] = 0; } tessblob->next = NULL; return tessblob; } /********************************************************************** * make_tess_outlines * * Make Tess style outlines from a list of OUTLINEs. **********************************************************************/ TESSLINE *make_tess_outlines( //make tess outlines OUTLINE_LIST *outlinelist, //list to convert BOOL8 flatten //flatten outline structure ) { OUTLINE_IT it = outlinelist; //iterator OUTLINE *outline; //current outline TESSLINE *head; //output list TESSLINE *tail; //end of list TESSLINE *tessoutline; head = NULL; tail = NULL; for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { outline = it.data (); tessoutline = newoutline (); tessoutline->compactloop = NULL; tessoutline->loop = make_tess_edgepts (outline->polypts (), tessoutline->topleft, tessoutline->botright); if (tessoutline->loop == NULL) { oldoutline(tessoutline); continue; } tessoutline->start = tessoutline->loop->pos; tessoutline->node = NULL; tessoutline->next = NULL; tessoutline->child = NULL; if (!outline->child ()->empty ()) { if (flatten) tessoutline->next = (struct olinestruct *) make_tess_outlines (outline->child (), flatten); else { tessoutline->next = NULL; tessoutline->child = (struct olinestruct *) make_tess_outlines (outline->child (), flatten); } } else tessoutline->next = NULL; if (head) tail->next = tessoutline; else head = tessoutline; while (tessoutline->next != NULL) tessoutline = tessoutline->next; tail = tessoutline; } return head; } /********************************************************************** * make_tess_edgepts * * Make Tess style edgepts from a list of POLYPTs. **********************************************************************/ EDGEPT *make_tess_edgepts( //make tess edgepts POLYPT_LIST *edgeptlist, //list to convert TPOINT &tl, //bounding box TPOINT &br) { inT32 index; POLYPT_IT it = edgeptlist; //iterator POLYPT *edgept; //current edgept EDGEPT *head; //output list EDGEPT *tail; //end of list EDGEPT *tessedgept; head = NULL; tail = NULL; tl.x = MAX_INT16; tl.y = -MAX_INT16; br.x = -MAX_INT16; br.y = MAX_INT16; for (it.mark_cycle_pt (); !it.cycled_list ();) { edgept = it.data (); tessedgept = newedgept (); tessedgept->pos.x = (inT16) edgept->pos.x (); tessedgept->pos.y = (inT16) edgept->pos.y (); if (tessedgept->pos.x < tl.x) tl.x = tessedgept->pos.x; if (tessedgept->pos.x > br.x) br.x = tessedgept->pos.x; if (tessedgept->pos.y > tl.y) tl.y = tessedgept->pos.y; if (tessedgept->pos.y < br.y) br.y = tessedgept->pos.y; if (head != NULL && tessedgept->pos.x == tail->pos.x && tessedgept->pos.y == tail->pos.y) { oldedgept(tessedgept); } else { for (index = 0; index < EDGEPTFLAGS; index++) tessedgept->flags[index] = 0; if (head != NULL) { tail->vec.x = tessedgept->pos.x - tail->pos.x; tail->vec.y = tessedgept->pos.y - tail->pos.y; tessedgept->prev = tail; } tessedgept->next = head; if (head) tail->next = tessedgept; else head = tessedgept; tail = tessedgept; } it.forward (); } head->prev = tail; tail->vec.x = head->pos.x - tail->pos.x; tail->vec.y = head->pos.y - tail->pos.y; if (head == tail) { oldedgept(head); return NULL; //empty } return head; }