mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
109d1c8f21
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@286 d0cd1f9f-072b-0410-8dd7-cf729c803f20
533 lines
18 KiB
C++
533 lines
18 KiB
C++
/**********************************************************************
|
|
* File: tstruct.cpp (Formerly tstruct.c)
|
|
* Description: Code to manipulate the structures of the C++/C interface.
|
|
* Author: Ray Smith
|
|
* Created: Thu Apr 23 15:49:29 BST 1992
|
|
*
|
|
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
**********************************************************************/
|
|
|
|
#include "mfcpch.h"
|
|
#include "tfacep.h"
|
|
#include "tstruct.h"
|
|
#include "makerow.h"
|
|
#include "ocrblock.h"
|
|
//#include "structures.h"
|
|
|
|
static ERRCODE BADFRAGMENTS = "Couldn't find matching fragment ends";
|
|
|
|
ELISTIZE (FRAGMENT)
|
|
//extern /*"C"*/ oldoutline(TESSLINE*);
|
|
/**********************************************************************
|
|
* FRAGMENT::FRAGMENT
|
|
*
|
|
* Constructor for fragments.
|
|
**********************************************************************/
|
|
FRAGMENT::FRAGMENT ( //constructor
|
|
EDGEPT * head_pt, //start point
|
|
EDGEPT * tail_pt //end point
|
|
):head (head_pt->pos.x, head_pt->pos.y), tail (tail_pt->pos.x,
|
|
tail_pt->pos.y) {
|
|
headpt = head_pt; // save ptrs
|
|
tailpt = tail_pt;
|
|
}
|
|
|
|
// Helper function to make a fake PBLOB formed from the bounding box
|
|
// of the given old-format outline.
|
|
static PBLOB* MakeRectBlob(TESSLINE* ol) {
|
|
POLYPT_LIST poly_list;
|
|
POLYPT_IT poly_it = &poly_list;
|
|
FCOORD pos, vec;
|
|
POLYPT *polypt;
|
|
|
|
// Create points at each of the 4 corners of the rectangle in turn.
|
|
pos = FCOORD(ol->topleft.x, ol->topleft.y);
|
|
vec = FCOORD(0.0f, ol->botright.y - ol->topleft.y);
|
|
polypt = new POLYPT(pos, vec);
|
|
poly_it.add_after_then_move(polypt);
|
|
pos = FCOORD(ol->topleft.x, ol->botright.y);
|
|
vec = FCOORD(ol->botright.x - ol->topleft.x, 0.0f);
|
|
polypt = new POLYPT(pos, vec);
|
|
poly_it.add_after_then_move(polypt);
|
|
pos = FCOORD(ol->botright.x, ol->botright.y);
|
|
vec = FCOORD(0.0f, ol->topleft.y - ol->botright.y);
|
|
polypt = new POLYPT(pos, vec);
|
|
poly_it.add_after_then_move(polypt);
|
|
pos = FCOORD(ol->botright.x, ol->topleft.y);
|
|
vec = FCOORD(ol->topleft.x - ol->botright.x, 0.0f);
|
|
polypt = new POLYPT(pos, vec);
|
|
poly_it.add_after_then_move(polypt);
|
|
|
|
OUTLINE_LIST out_list;
|
|
OUTLINE_IT out_it = &out_list;
|
|
out_it.add_after_then_move(new OUTLINE(&poly_it));
|
|
return new PBLOB(&out_list);
|
|
}
|
|
|
|
/**********************************************************************
|
|
* make_ed_word
|
|
*
|
|
* Make an editor format word from the tess style word.
|
|
**********************************************************************/
|
|
|
|
WERD *make_ed_word( //construct word
|
|
TWERD *tessword, //word to convert
|
|
WERD *clone //clone this one
|
|
) {
|
|
WERD *word; //converted word
|
|
TBLOB *tblob; //current blob
|
|
PBLOB *blob; //new blob
|
|
PBLOB_LIST blobs; //list of blobs
|
|
PBLOB_IT blob_it = &blobs; //iterator
|
|
|
|
for (tblob = tessword->blobs; tblob != NULL; tblob = tblob->next) {
|
|
blob = make_ed_blob (tblob);
|
|
if (blob == NULL && tblob->outlines != NULL) {
|
|
// Make a fake blob using the bounding box rectangle of the 1st outline.
|
|
blob = MakeRectBlob(tblob->outlines);
|
|
}
|
|
if (blob != NULL) {
|
|
blob_it.add_after_then_move (blob);
|
|
}
|
|
}
|
|
if (!blobs.empty ())
|
|
word = new WERD (&blobs, clone);
|
|
else
|
|
word = NULL;
|
|
return word;
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* make_ed_blob
|
|
*
|
|
* Make an editor format blob from the tess style blob.
|
|
**********************************************************************/
|
|
|
|
PBLOB *make_ed_blob( //construct blob
|
|
TBLOB *tessblob //blob to convert
|
|
) {
|
|
TESSLINE *tessol; //tess outline
|
|
FRAGMENT_LIST fragments; //list of fragments
|
|
OUTLINE *outline; //current outline
|
|
OUTLINE_LIST out_list; //list of outlines
|
|
OUTLINE_IT out_it = &out_list; //iterator
|
|
|
|
for (tessol = tessblob->outlines; tessol != NULL; tessol = tessol->next) {
|
|
//stick in list
|
|
register_outline(tessol, &fragments);
|
|
}
|
|
while (!fragments.empty ()) {
|
|
outline = make_ed_outline (&fragments);
|
|
if (outline != NULL) {
|
|
out_it.add_after_then_move (outline);
|
|
}
|
|
}
|
|
if (out_it.empty())
|
|
return NULL; //couldn't do it
|
|
return new PBLOB (&out_list); //turn to blob
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* make_ed_outline
|
|
*
|
|
* Make an editor format outline from the list of fragments.
|
|
**********************************************************************/
|
|
|
|
OUTLINE *make_ed_outline( //constructoutline
|
|
FRAGMENT_LIST *list //list of fragments
|
|
) {
|
|
FRAGMENT *fragment; //current fragment
|
|
EDGEPT *edgept; //current point
|
|
ICOORD headpos; //coords of head
|
|
ICOORD tailpos; //coords of tail
|
|
FCOORD pos; //coords of edgept
|
|
FCOORD vec; //empty
|
|
POLYPT *polypt; //current point
|
|
POLYPT_LIST poly_list; //list of point
|
|
POLYPT_IT poly_it = &poly_list;//iterator
|
|
FRAGMENT_IT fragment_it = list;//fragment
|
|
|
|
headpos = fragment_it.data ()->head;
|
|
do {
|
|
fragment = fragment_it.data ();
|
|
edgept = fragment->headpt; //start of segment
|
|
do {
|
|
pos = FCOORD (edgept->pos.x, edgept->pos.y);
|
|
vec = FCOORD (edgept->vec.x, edgept->vec.y);
|
|
polypt = new POLYPT (pos, vec);
|
|
//add to list
|
|
poly_it.add_after_then_move (polypt);
|
|
edgept = edgept->next;
|
|
}
|
|
while (edgept != fragment->tailpt);
|
|
tailpos = ICOORD (edgept->pos.x, edgept->pos.y);
|
|
//get rid of it
|
|
delete fragment_it.extract ();
|
|
if (tailpos != headpos) {
|
|
if (fragment_it.empty ()) {
|
|
return NULL;
|
|
}
|
|
fragment_it.forward ();
|
|
//find next segment
|
|
for (fragment_it.mark_cycle_pt (); !fragment_it.cycled_list () &&
|
|
fragment_it.data ()->head != tailpos;
|
|
fragment_it.forward ());
|
|
if (fragment_it.data ()->head != tailpos) {
|
|
// It is legitimate for the heads to not all match to tails,
|
|
// since not all combinations of seams always make sense.
|
|
for (fragment_it.mark_cycle_pt ();
|
|
!fragment_it.cycled_list (); fragment_it.forward ()) {
|
|
fragment = fragment_it.extract ();
|
|
delete fragment;
|
|
}
|
|
return NULL; //can't do it
|
|
}
|
|
}
|
|
}
|
|
while (tailpos != headpos);
|
|
return new OUTLINE (&poly_it); //turn to outline
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* register_outline
|
|
*
|
|
* Add the fragments in the given outline to the list
|
|
**********************************************************************/
|
|
|
|
void register_outline( //add fragments
|
|
TESSLINE *outline, //tess format
|
|
FRAGMENT_LIST *list //list to add to
|
|
) {
|
|
EDGEPT *startpt; //start of outline
|
|
EDGEPT *headpt; //start of fragment
|
|
EDGEPT *tailpt; //end of fragment
|
|
FRAGMENT *fragment; //new fragment
|
|
FRAGMENT_IT it = list; //iterator
|
|
|
|
startpt = outline->loop;
|
|
do {
|
|
startpt = startpt->next;
|
|
if (startpt == NULL)
|
|
return; //illegal!
|
|
}
|
|
while (startpt->flags[0] == 0 && startpt != outline->loop);
|
|
headpt = startpt;
|
|
do
|
|
startpt = startpt->next;
|
|
while (startpt->flags[0] != 0 && startpt != headpt);
|
|
if (startpt->flags[0] != 0)
|
|
return; //all hidden!
|
|
|
|
headpt = startpt;
|
|
do {
|
|
tailpt = headpt;
|
|
do
|
|
tailpt = tailpt->next;
|
|
while (tailpt->flags[0] == 0 && tailpt != startpt);
|
|
fragment = new FRAGMENT (headpt, tailpt);
|
|
it.add_after_then_move (fragment);
|
|
while (tailpt->flags[0] != 0)
|
|
tailpt = tailpt->next;
|
|
headpt = tailpt;
|
|
}
|
|
while (tailpt != startpt);
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* make_tess_row
|
|
*
|
|
* Make a fake row structure to pass to the tesseract matchers.
|
|
**********************************************************************/
|
|
|
|
void make_tess_row( //make fake row
|
|
DENORM *denorm, //row info
|
|
TEXTROW *tessrow //output row
|
|
) {
|
|
tessrow->baseline.segments = 1;
|
|
tessrow->baseline.xstarts[0] = -32767;
|
|
tessrow->baseline.xstarts[1] = 32767;
|
|
tessrow->baseline.quads[0].a = 0;
|
|
tessrow->baseline.quads[0].b = 0;
|
|
tessrow->baseline.quads[0].c = bln_baseline_offset;
|
|
tessrow->xheight.segments = 1;
|
|
tessrow->xheight.xstarts[0] = -32767;
|
|
tessrow->xheight.xstarts[1] = 32767;
|
|
tessrow->xheight.quads[0].a = 0;
|
|
tessrow->xheight.quads[0].b = 0;
|
|
tessrow->xheight.quads[0].c = bln_x_height + bln_baseline_offset;
|
|
tessrow->lineheight = bln_x_height;
|
|
if (denorm != NULL) {
|
|
tessrow->ascrise = denorm->row ()->ascenders () * denorm->scale ();
|
|
tessrow->descdrop = denorm->row ()->descenders () * denorm->scale ();
|
|
} else {
|
|
tessrow->ascrise = bln_baseline_offset;
|
|
tessrow->descdrop = -bln_baseline_offset;
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* make_tess_word
|
|
*
|
|
* Convert the word to Tess format.
|
|
**********************************************************************/
|
|
|
|
TWERD *make_tess_word( //convert word
|
|
WERD *word, //word to do
|
|
TEXTROW *row //fake row
|
|
) {
|
|
TWERD *tessword; //tess format
|
|
|
|
tessword = newword (); //use old allocator
|
|
tessword->row = row; //give them something
|
|
//copy string
|
|
tessword->correct = strsave (word->text ());
|
|
tessword->guess = NULL;
|
|
tessword->blobs = make_tess_blobs (word->blob_list ());
|
|
tessword->blanks = 1;
|
|
tessword->blobcount = word->blob_list ()->length ();
|
|
tessword->next = NULL;
|
|
return tessword;
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* make_tess_blobs
|
|
*
|
|
* Make Tess style blobs from a list of BLOBs.
|
|
**********************************************************************/
|
|
|
|
TBLOB *make_tess_blobs( //make tess blobs
|
|
PBLOB_LIST *bloblist //list to convert
|
|
) {
|
|
PBLOB_IT it = bloblist; //iterator
|
|
PBLOB *blob; //current blob
|
|
TBLOB *head; //output list
|
|
TBLOB *tail; //end of list
|
|
TBLOB *tessblob;
|
|
|
|
head = NULL;
|
|
tail = NULL;
|
|
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
|
|
blob = it.data ();
|
|
tessblob = make_tess_blob (blob, TRUE);
|
|
if (head)
|
|
tail->next = tessblob;
|
|
else
|
|
head = tessblob;
|
|
tail = tessblob;
|
|
}
|
|
return head;
|
|
}
|
|
|
|
/**********************************************************************
|
|
* make_rotated_tess_blob
|
|
*
|
|
* Make a single Tess style blob, applying the given rotation and
|
|
* renormalizing.
|
|
**********************************************************************/
|
|
TBLOB *make_rotated_tess_blob(const DENORM* denorm, PBLOB *blob,
|
|
BOOL8 flatten) {
|
|
if (denorm != NULL && denorm->block() != NULL &&
|
|
denorm->block()->classify_rotation().y() != 0.0) {
|
|
TBOX box = blob->bounding_box();
|
|
int src_width = box.width();
|
|
int src_height = box.height();
|
|
src_width = static_cast<int>(src_width / denorm->scale() + 0.5);
|
|
src_height = static_cast<int>(src_height / denorm->scale() + 0.5);
|
|
int x_middle = (box.left() + box.right()) / 2;
|
|
int y_middle = (box.top() + box.bottom()) / 2;
|
|
PBLOB* rotated_blob = PBLOB::deep_copy(blob);
|
|
rotated_blob->move(FCOORD(-x_middle, -y_middle));
|
|
rotated_blob->rotate(denorm->block()->classify_rotation());
|
|
ICOORD median_size = denorm->block()->median_size();
|
|
int tolerance = median_size.x() / 8;
|
|
// TODO(dsl/rays) find a better normalization solution. In the mean time
|
|
// make it work for CJK by normalizing for Cap height in the same way
|
|
// as is applied in compute_block_xheight when the row is presumed to
|
|
// be ALLCAPS, i.e. the x-height is the fixed fraction
|
|
// blob height * textord_merge_x / (textord_merge_x + textord_merge_asc)
|
|
if (NearlyEqual(src_width, static_cast<int>(median_size.x()), tolerance) &&
|
|
NearlyEqual(src_height, static_cast<int>(median_size.y()), tolerance)) {
|
|
float target_height = bln_x_height * (textord_merge_x + textord_merge_asc)
|
|
/ textord_merge_x;
|
|
rotated_blob->scale(target_height / box.width());
|
|
rotated_blob->move(FCOORD(0.0f,
|
|
bln_baseline_offset -
|
|
rotated_blob->bounding_box().bottom()));
|
|
}
|
|
TBLOB* result = make_tess_blob(rotated_blob, flatten);
|
|
delete rotated_blob;
|
|
return result;
|
|
} else {
|
|
return make_tess_blob(blob, flatten);
|
|
}
|
|
}
|
|
|
|
/**********************************************************************
|
|
* make_tess_blob
|
|
*
|
|
* Make a single Tess style blob
|
|
**********************************************************************/
|
|
|
|
TBLOB *make_tess_blob( //make tess blob
|
|
PBLOB *blob, //blob to convert
|
|
BOOL8 flatten //flatten outline structure
|
|
) {
|
|
inT32 index;
|
|
TBLOB *tessblob;
|
|
|
|
tessblob = newblob ();
|
|
tessblob->outlines = (struct olinestruct *)
|
|
make_tess_outlines (blob->out_list (), flatten);
|
|
for (index = 0; index < TBLOBFLAGS; index++)
|
|
tessblob->flags[index] = 0; //!!
|
|
tessblob->correct = 0;
|
|
tessblob->guess = 0;
|
|
for (index = 0; index < MAX_WO_CLASSES; index++) {
|
|
tessblob->classes[index] = 0;
|
|
tessblob->values[index] = 0;
|
|
}
|
|
tessblob->next = NULL;
|
|
return tessblob;
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* make_tess_outlines
|
|
*
|
|
* Make Tess style outlines from a list of OUTLINEs.
|
|
**********************************************************************/
|
|
|
|
TESSLINE *make_tess_outlines( //make tess outlines
|
|
OUTLINE_LIST *outlinelist, //list to convert
|
|
BOOL8 flatten //flatten outline structure
|
|
) {
|
|
OUTLINE_IT it = outlinelist; //iterator
|
|
OUTLINE *outline; //current outline
|
|
TESSLINE *head; //output list
|
|
TESSLINE *tail; //end of list
|
|
TESSLINE *tessoutline;
|
|
|
|
head = NULL;
|
|
tail = NULL;
|
|
for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
|
|
outline = it.data ();
|
|
tessoutline = newoutline ();
|
|
tessoutline->compactloop = NULL;
|
|
tessoutline->loop = make_tess_edgepts (outline->polypts (),
|
|
tessoutline->topleft,
|
|
tessoutline->botright);
|
|
if (tessoutline->loop == NULL) {
|
|
oldoutline(tessoutline);
|
|
continue;
|
|
}
|
|
tessoutline->start = tessoutline->loop->pos;
|
|
tessoutline->node = NULL;
|
|
tessoutline->next = NULL;
|
|
tessoutline->child = NULL;
|
|
if (!outline->child ()->empty ()) {
|
|
if (flatten)
|
|
tessoutline->next = (struct olinestruct *)
|
|
make_tess_outlines (outline->child (), flatten);
|
|
else {
|
|
tessoutline->next = NULL;
|
|
tessoutline->child = (struct olinestruct *)
|
|
make_tess_outlines (outline->child (), flatten);
|
|
}
|
|
}
|
|
else
|
|
tessoutline->next = NULL;
|
|
if (head)
|
|
tail->next = tessoutline;
|
|
else
|
|
head = tessoutline;
|
|
while (tessoutline->next != NULL)
|
|
tessoutline = tessoutline->next;
|
|
tail = tessoutline;
|
|
}
|
|
return head;
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* make_tess_edgepts
|
|
*
|
|
* Make Tess style edgepts from a list of POLYPTs.
|
|
**********************************************************************/
|
|
|
|
EDGEPT *make_tess_edgepts( //make tess edgepts
|
|
POLYPT_LIST *edgeptlist, //list to convert
|
|
TPOINT &tl, //bounding box
|
|
TPOINT &br) {
|
|
inT32 index;
|
|
POLYPT_IT it = edgeptlist; //iterator
|
|
POLYPT *edgept; //current edgept
|
|
EDGEPT *head; //output list
|
|
EDGEPT *tail; //end of list
|
|
EDGEPT *tessedgept;
|
|
|
|
head = NULL;
|
|
tail = NULL;
|
|
tl.x = MAX_INT16;
|
|
tl.y = -MAX_INT16;
|
|
br.x = -MAX_INT16;
|
|
br.y = MAX_INT16;
|
|
for (it.mark_cycle_pt (); !it.cycled_list ();) {
|
|
edgept = it.data ();
|
|
tessedgept = newedgept ();
|
|
tessedgept->pos.x = (inT16) edgept->pos.x ();
|
|
tessedgept->pos.y = (inT16) edgept->pos.y ();
|
|
if (tessedgept->pos.x < tl.x)
|
|
tl.x = tessedgept->pos.x;
|
|
if (tessedgept->pos.x > br.x)
|
|
br.x = tessedgept->pos.x;
|
|
if (tessedgept->pos.y > tl.y)
|
|
tl.y = tessedgept->pos.y;
|
|
if (tessedgept->pos.y < br.y)
|
|
br.y = tessedgept->pos.y;
|
|
if (head != NULL && tessedgept->pos.x == tail->pos.x
|
|
&& tessedgept->pos.y == tail->pos.y) {
|
|
oldedgept(tessedgept);
|
|
}
|
|
else {
|
|
for (index = 0; index < EDGEPTFLAGS; index++)
|
|
tessedgept->flags[index] = 0;
|
|
if (head != NULL) {
|
|
tail->vec.x = tessedgept->pos.x - tail->pos.x;
|
|
tail->vec.y = tessedgept->pos.y - tail->pos.y;
|
|
tessedgept->prev = tail;
|
|
}
|
|
tessedgept->next = head;
|
|
if (head)
|
|
tail->next = tessedgept;
|
|
else
|
|
head = tessedgept;
|
|
tail = tessedgept;
|
|
}
|
|
it.forward ();
|
|
}
|
|
head->prev = tail;
|
|
tail->vec.x = head->pos.x - tail->pos.x;
|
|
tail->vec.y = head->pos.y - tail->pos.y;
|
|
if (head == tail) {
|
|
oldedgept(head);
|
|
return NULL; //empty
|
|
}
|
|
return head;
|
|
}
|