mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
524a61452d
Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits:6317305
doxygen9f42f69
doxygen0fc4d52
doxygen37b4b55
fix typobded8f1
some more doxy020eb00
slight tweak524666d
doxygenify2a36a3e
doxygenify229d218
doxygenify7fd28ae
doxygenifya8c64bc
doxygenifyf5d21b6
fix5d8ede8
doxygenifya58a4e0
language_model.cppfa85709
lm_pain_points.cpp lm_state.cpp6418da3
merge06190ba
Merge branch 'old_doxygen_merge' into more-doxygen84acf08
Merge branch 'master' into more-doxygen50fe1ff
pagewalk.cpp cube_reco_context.cpp2982583
change to relative192a24a
applybox.cpp, take one8eeb053
delete docs for obsolete params52e4c77
modernise classify/ocrfeatures.cpp2a1cba6
modernise cutil/emalloc.cpp773e006
silence doxygen warningaeb1731
silence doxygen warningf18387f
silence doxygen; new params are unused?15ad6bd
doxygenify cutil/efio.cppc8b5dad
doxygenify cutil/danerror.cpp784450f
the globals and exceptions parts are obsolete; remove8bca324
doxygen classify/normfeat.cpp9bcbe16
doxygen classify/normmatch.cppaa9a971
doxygen ccmain/cube_control.cppc083ff2
doxygen ccmain/cube_reco_context.cppf842850
params changed5c94f12
doxygen ccmain/cubeclassifier.cpp15ba750
case sensitivef5c71d4
case sensitivef85655b
doxygen classify/intproto.cpp4bbc7aa
partial doxygen classify/mfx.cppdbb6041
partial doxygen classify/intproto.cpp2aa72db
finish doxygen classify/intproto.cpp0b8de99
doxygen training/mftraining.cpp0b5b35c
partial doxygen ccstruct/coutln.cppb81c766
partial doxygen ccstruct/coutln.cpp40fc415
finished? doxygen ccstruct/coutln.cpp6e4165c
doxygen classify/clusttool.cpp0267dec
doxygen classify/cutoffs.cpp7f0c70c
doxygen classify/fpoint.cpp512f3bd
ignore ~ files5668a52
doxygen classify/intmatcher.cpp84788d4
doxygen classify/kdtree.cpp29f36ca
doxygen classify/mfoutline.cpp40b94b1
silence doxygen warnings6c511b9
doxygen classify/mfx.cppf9b4080
doxygen classify/outfeat.cppaa1df05
doxygen classify/picofeat.cppcc5f466
doxygen training/cntraining.cppcce044f
doxygen training/commontraining.cpp167e216
missing param9498383
renamed params37eeac2
renamed paramd87b5dd
casec8ee174
renamed paramsb858db8
typo4c2a838
h2 context?81a2c0c
fix some param names; add some missing params, no docsbcf8a4c
add some missing params, no docsaf77f86
add some missing params, no docs; fix some param names01df24e
fix some params6161056
fix some params68508b6
fix some params285aeb6
doxygen complains here no matter what529bcfa
rm some missing params, typoscd21226
rm some missing params, add some new ones48a4bc2
fix paramsc844628
missing param312ce37
missing param; rename oneec2fdec
missing param05e15e0
missing paramsd515858
change "<" to < to make doxygen happyb476a28
wrong place
481 lines
16 KiB
C++
481 lines
16 KiB
C++
/******************************************************************************
|
|
** Filename: mfoutline.c
|
|
** Purpose: Interface to outline struct used for extracting features
|
|
** Author: Dan Johnson
|
|
** History: Thu May 17 08:14:18 1990, DSJ, Created.
|
|
**
|
|
** (c) Copyright Hewlett-Packard Company, 1988.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
******************************************************************************/
|
|
/*----------------------------------------------------------------------------
|
|
Include Files and Type Defines
|
|
----------------------------------------------------------------------------*/
|
|
#include "clusttool.h" //If remove you get cought in a loop somewhere
|
|
#include "emalloc.h"
|
|
#include "mfoutline.h"
|
|
#include "blobs.h"
|
|
#include "const.h"
|
|
#include "mfx.h"
|
|
#include "params.h"
|
|
#include "classify.h"
|
|
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
|
|
/*----------------------------------------------------------------------------
|
|
Public Code
|
|
----------------------------------------------------------------------------*/
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). */
|
|
LIST ConvertBlob(TBLOB *blob) {
|
|
LIST outlines = NIL_LIST;
|
|
return (blob == NULL)
|
|
? NIL_LIST
|
|
: ConvertOutlines(blob->outlines, outlines, outer);
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/** Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. */
|
|
MFOUTLINE ConvertOutline(TESSLINE *outline) {
|
|
MFEDGEPT *NewPoint;
|
|
MFOUTLINE MFOutline = NIL_LIST;
|
|
EDGEPT *EdgePoint;
|
|
EDGEPT *StartPoint;
|
|
EDGEPT *NextPoint;
|
|
|
|
if (outline == NULL || outline->loop == NULL)
|
|
return MFOutline;
|
|
|
|
StartPoint = outline->loop;
|
|
EdgePoint = StartPoint;
|
|
do {
|
|
NextPoint = EdgePoint->next;
|
|
|
|
/* filter out duplicate points */
|
|
if (EdgePoint->pos.x != NextPoint->pos.x ||
|
|
EdgePoint->pos.y != NextPoint->pos.y) {
|
|
NewPoint = NewEdgePoint();
|
|
ClearMark(NewPoint);
|
|
NewPoint->Hidden = EdgePoint->IsHidden();
|
|
NewPoint->Point.x = EdgePoint->pos.x;
|
|
NewPoint->Point.y = EdgePoint->pos.y;
|
|
MFOutline = push(MFOutline, NewPoint);
|
|
}
|
|
EdgePoint = NextPoint;
|
|
} while (EdgePoint != StartPoint);
|
|
|
|
if (MFOutline != NULL)
|
|
MakeOutlineCircular(MFOutline);
|
|
return MFOutline;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/**
|
|
* Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs).
|
|
*
|
|
* @param outline first outline to be converted
|
|
* @param mf_outlines list to add converted outlines to
|
|
* @param outline_type are the outlines outer or holes?
|
|
*/
|
|
LIST ConvertOutlines(TESSLINE *outline,
|
|
LIST mf_outlines,
|
|
OUTLINETYPE outline_type) {
|
|
MFOUTLINE mf_outline;
|
|
|
|
while (outline != NULL) {
|
|
mf_outline = ConvertOutline(outline);
|
|
if (mf_outline != NULL)
|
|
mf_outlines = push(mf_outlines, mf_outline);
|
|
outline = outline->next;
|
|
}
|
|
return mf_outlines;
|
|
}
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/**
|
|
* This routine searches thru the specified outline, computes
|
|
* a slope for each vector in the outline, and marks each
|
|
* vector as having one of the following directions:
|
|
* N, S, E, W, NE, NW, SE, SW
|
|
* This information is then stored in the outline and the
|
|
* outline is returned.
|
|
* @param Outline micro-feature outline to analyze
|
|
* @param MinSlope controls "snapping" of segments to horizontal
|
|
* @param MaxSlope controls "snapping" of segments to vertical
|
|
* @return none
|
|
* @note Exceptions: none
|
|
* @note History: 7/21/89, DSJ, Created.
|
|
*/
|
|
void FindDirectionChanges(MFOUTLINE Outline,
|
|
FLOAT32 MinSlope,
|
|
FLOAT32 MaxSlope) {
|
|
MFEDGEPT *Current;
|
|
MFEDGEPT *Last;
|
|
MFOUTLINE EdgePoint;
|
|
|
|
if (DegenerateOutline (Outline))
|
|
return;
|
|
|
|
Last = PointAt (Outline);
|
|
Outline = NextPointAfter (Outline);
|
|
EdgePoint = Outline;
|
|
do {
|
|
Current = PointAt (EdgePoint);
|
|
ComputeDirection(Last, Current, MinSlope, MaxSlope);
|
|
|
|
Last = Current;
|
|
EdgePoint = NextPointAfter (EdgePoint);
|
|
}
|
|
while (EdgePoint != Outline);
|
|
|
|
} /* FindDirectionChanges */
|
|
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/**
|
|
* This routine deallocates all of the memory consumed by
|
|
* a micro-feature outline.
|
|
* @param arg micro-feature outline to be freed
|
|
* @return none
|
|
* @note Exceptions: none
|
|
* @note History: 7/27/89, DSJ, Created.
|
|
*/
|
|
void FreeMFOutline(void *arg) { //MFOUTLINE Outline)
|
|
MFOUTLINE Start;
|
|
MFOUTLINE Outline = (MFOUTLINE) arg;
|
|
|
|
/* break the circular outline so we can use std. techniques to deallocate */
|
|
Start = list_rest (Outline);
|
|
set_rest(Outline, NIL_LIST);
|
|
while (Start != NULL) {
|
|
free_struct (first_node (Start), sizeof (MFEDGEPT), "MFEDGEPT");
|
|
Start = pop (Start);
|
|
}
|
|
|
|
} /* FreeMFOutline */
|
|
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/**
|
|
* Release all memory consumed by the specified list
|
|
* of outlines.
|
|
* @param Outlines list of mf-outlines to be freed
|
|
* @return none
|
|
* @note Exceptions: none
|
|
* @note History: Thu Dec 13 16:14:50 1990, DSJ, Created.
|
|
*/
|
|
void FreeOutlines(LIST Outlines) {
|
|
destroy_nodes(Outlines, FreeMFOutline);
|
|
} /* FreeOutlines */
|
|
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/**
|
|
* This routine searches thru the specified outline and finds
|
|
* the points at which the outline changes direction. These
|
|
* points are then marked as "extremities". This routine is
|
|
* used as an alternative to FindExtremities(). It forces the
|
|
* endpoints of the microfeatures to be at the direction
|
|
* changes rather than at the midpoint between direction
|
|
* changes.
|
|
* @param Outline micro-feature outline to analyze
|
|
* @return none
|
|
* @note Globals: none
|
|
* @note Exceptions: none
|
|
* @note History: 6/29/90, DSJ, Created.
|
|
*/
|
|
void MarkDirectionChanges(MFOUTLINE Outline) {
|
|
MFOUTLINE Current;
|
|
MFOUTLINE Last;
|
|
MFOUTLINE First;
|
|
|
|
if (DegenerateOutline (Outline))
|
|
return;
|
|
|
|
First = NextDirectionChange (Outline);
|
|
Last = First;
|
|
do {
|
|
Current = NextDirectionChange (Last);
|
|
MarkPoint (PointAt (Current));
|
|
Last = Current;
|
|
}
|
|
while (Last != First);
|
|
|
|
} /* MarkDirectionChanges */
|
|
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/** Return a new edge point for a micro-feature outline. */
|
|
MFEDGEPT *NewEdgePoint() {
|
|
return ((MFEDGEPT *) alloc_struct(sizeof(MFEDGEPT), "MFEDGEPT"));
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/**
|
|
* This routine returns the next point in the micro-feature
|
|
* outline that is an extremity. The search starts after
|
|
* EdgePoint. The routine assumes that the outline being
|
|
* searched is not a degenerate outline (i.e. it must have
|
|
* 2 or more edge points).
|
|
* @param EdgePoint start search from this point
|
|
* @return Next extremity in the outline after EdgePoint.
|
|
* @note Globals: none
|
|
* @note Exceptions: none
|
|
* @note History: 7/26/89, DSJ, Created.
|
|
*/
|
|
MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) {
|
|
EdgePoint = NextPointAfter(EdgePoint);
|
|
while (!PointAt(EdgePoint)->ExtremityMark)
|
|
EdgePoint = NextPointAfter(EdgePoint);
|
|
|
|
return (EdgePoint);
|
|
|
|
} /* NextExtremity */
|
|
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/**
|
|
* This routine normalizes the coordinates of the specified
|
|
* outline so that the outline is deskewed down to the
|
|
* baseline, translated so that x=0 is at XOrigin, and scaled
|
|
* so that the height of a character cell from descender to
|
|
* ascender is 1. Of this height, 0.25 is for the descender,
|
|
* 0.25 for the ascender, and 0.5 for the x-height. The
|
|
* y coordinate of the baseline is 0.
|
|
* @param Outline outline to be normalized
|
|
* @param XOrigin x-origin of text
|
|
* @return none
|
|
* @note Globals: none
|
|
* @note Exceptions: none
|
|
* @note History: 8/2/89, DSJ, Created.
|
|
*/
|
|
void NormalizeOutline(MFOUTLINE Outline,
|
|
FLOAT32 XOrigin) {
|
|
if (Outline == NIL_LIST)
|
|
return;
|
|
|
|
MFOUTLINE EdgePoint = Outline;
|
|
do {
|
|
MFEDGEPT *Current = PointAt(EdgePoint);
|
|
Current->Point.y = MF_SCALE_FACTOR *
|
|
(Current->Point.y - kBlnBaselineOffset);
|
|
Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin);
|
|
EdgePoint = NextPointAfter(EdgePoint);
|
|
} while (EdgePoint != Outline);
|
|
} /* NormalizeOutline */
|
|
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
namespace tesseract {
|
|
/**
|
|
* This routine normalizes every outline in Outlines
|
|
* according to the currently selected normalization method.
|
|
* It also returns the scale factors that it used to do this
|
|
* scaling. The scale factors returned represent the x and
|
|
* y sizes in the normalized coordinate system that correspond
|
|
* to 1 pixel in the original coordinate system.
|
|
*
|
|
* Globals:
|
|
* - classify_norm_method method being used for normalization
|
|
* - classify_char_norm_range map radius of gyration to this value
|
|
* @param Outlines list of outlines to be normalized
|
|
* @param XScale x-direction scale factor used by routine
|
|
* @param YScale y-direction scale factor used by routine
|
|
* @return none (Outlines are changed and XScale and YScale are updated)
|
|
* @note Exceptions: none
|
|
* @note History: Fri Dec 14 08:14:55 1990, DSJ, Created.
|
|
*/
|
|
void Classify::NormalizeOutlines(LIST Outlines,
|
|
FLOAT32 *XScale,
|
|
FLOAT32 *YScale) {
|
|
MFOUTLINE Outline;
|
|
|
|
switch (classify_norm_method) {
|
|
case character:
|
|
ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?");
|
|
break;
|
|
|
|
case baseline:
|
|
iterate(Outlines) {
|
|
Outline = (MFOUTLINE) first_node(Outlines);
|
|
NormalizeOutline(Outline, 0.0);
|
|
}
|
|
*XScale = *YScale = MF_SCALE_FACTOR;
|
|
break;
|
|
}
|
|
} /* NormalizeOutlines */
|
|
} // namespace tesseract
|
|
|
|
/*----------------------------------------------------------------------------
|
|
Private Code
|
|
----------------------------------------------------------------------------*/
|
|
/**
|
|
* Change the direction of every vector in the specified
|
|
* outline segment to Direction. The segment to be changed
|
|
* starts at Start and ends at End. Note that the previous
|
|
* direction of End must also be changed to reflect the
|
|
* change in direction of the point before it.
|
|
* @param Start, End defines segment of outline to be modified
|
|
* @param Direction new direction to assign to segment
|
|
* @return none
|
|
* @note Globals: none
|
|
* @note Exceptions: none
|
|
* @note History: Fri May 4 10:42:04 1990, DSJ, Created.
|
|
*/
|
|
void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) {
|
|
MFOUTLINE Current;
|
|
|
|
for (Current = Start; Current != End; Current = NextPointAfter (Current))
|
|
PointAt (Current)->Direction = Direction;
|
|
|
|
PointAt (End)->PreviousDirection = Direction;
|
|
|
|
} /* ChangeDirection */
|
|
|
|
|
|
/**
|
|
* This routine normalizes each point in Outline by
|
|
* translating it to the specified center and scaling it
|
|
* anisotropically according to the given scale factors.
|
|
* @param Outline outline to be character normalized
|
|
* @param cn_denorm
|
|
* @return none
|
|
* @note Globals: none
|
|
* @note Exceptions: none
|
|
* @note History: Fri Dec 14 10:27:11 1990, DSJ, Created.
|
|
*/
|
|
void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) {
|
|
MFOUTLINE First, Current;
|
|
MFEDGEPT *CurrentPoint;
|
|
|
|
if (Outline == NIL_LIST)
|
|
return;
|
|
|
|
First = Outline;
|
|
Current = First;
|
|
do {
|
|
CurrentPoint = PointAt(Current);
|
|
FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y);
|
|
cn_denorm.LocalNormTransform(pos, &pos);
|
|
CurrentPoint->Point.x = (pos.x() - MAX_UINT8 / 2) * MF_SCALE_FACTOR;
|
|
CurrentPoint->Point.y = (pos.y() - MAX_UINT8 / 2) * MF_SCALE_FACTOR;
|
|
|
|
Current = NextPointAfter(Current);
|
|
}
|
|
while (Current != First);
|
|
|
|
} /* CharNormalizeOutline */
|
|
|
|
|
|
/**
|
|
* This routine computes the slope from Start to Finish and
|
|
* and then computes the approximate direction of the line
|
|
* segment from Start to Finish. The direction is quantized
|
|
* into 8 buckets:
|
|
* N, S, E, W, NE, NW, SE, SW
|
|
* Both the slope and the direction are then stored into
|
|
* the appropriate fields of the Start edge point. The
|
|
* direction is also stored into the PreviousDirection field
|
|
* of the Finish edge point.
|
|
* @param Start starting point to compute direction from
|
|
* @param Finish finishing point to compute direction to
|
|
* @param MinSlope slope below which lines are horizontal
|
|
* @param MaxSlope slope above which lines are vertical
|
|
* @return none
|
|
* @note Globals: none
|
|
* @note Exceptions: none
|
|
* @note History: 7/25/89, DSJ, Created.
|
|
*/
|
|
void ComputeDirection(MFEDGEPT *Start,
|
|
MFEDGEPT *Finish,
|
|
FLOAT32 MinSlope,
|
|
FLOAT32 MaxSlope) {
|
|
FVECTOR Delta;
|
|
|
|
Delta.x = Finish->Point.x - Start->Point.x;
|
|
Delta.y = Finish->Point.y - Start->Point.y;
|
|
if (Delta.x == 0)
|
|
if (Delta.y < 0) {
|
|
Start->Slope = -MAX_FLOAT32;
|
|
Start->Direction = south;
|
|
}
|
|
else {
|
|
Start->Slope = MAX_FLOAT32;
|
|
Start->Direction = north;
|
|
}
|
|
else {
|
|
Start->Slope = Delta.y / Delta.x;
|
|
if (Delta.x > 0)
|
|
if (Delta.y > 0)
|
|
if (Start->Slope > MinSlope)
|
|
if (Start->Slope < MaxSlope)
|
|
Start->Direction = northeast;
|
|
else
|
|
Start->Direction = north;
|
|
else
|
|
Start->Direction = east;
|
|
else if (Start->Slope < -MinSlope)
|
|
if (Start->Slope > -MaxSlope)
|
|
Start->Direction = southeast;
|
|
else
|
|
Start->Direction = south;
|
|
else
|
|
Start->Direction = east;
|
|
else if (Delta.y > 0)
|
|
if (Start->Slope < -MinSlope)
|
|
if (Start->Slope > -MaxSlope)
|
|
Start->Direction = northwest;
|
|
else
|
|
Start->Direction = north;
|
|
else
|
|
Start->Direction = west;
|
|
else if (Start->Slope > MinSlope)
|
|
if (Start->Slope < MaxSlope)
|
|
Start->Direction = southwest;
|
|
else
|
|
Start->Direction = south;
|
|
else
|
|
Start->Direction = west;
|
|
}
|
|
Finish->PreviousDirection = Start->Direction;
|
|
}
|
|
|
|
/**
|
|
* This routine returns the next point in the micro-feature
|
|
* outline that has a direction different than EdgePoint. The
|
|
* routine assumes that the outline being searched is not a
|
|
* degenerate outline (i.e. it must have 2 or more edge points).
|
|
* @param EdgePoint start search from this point
|
|
* @return Point of next direction change in micro-feature outline.
|
|
* @note Globals: none
|
|
* @note Exceptions: none
|
|
* @note History: 7/25/89, DSJ, Created.
|
|
*/
|
|
MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
|
|
DIRECTION InitialDirection;
|
|
|
|
InitialDirection = PointAt (EdgePoint)->Direction;
|
|
|
|
MFOUTLINE next_pt = NULL;
|
|
do {
|
|
EdgePoint = NextPointAfter(EdgePoint);
|
|
next_pt = NextPointAfter(EdgePoint);
|
|
} while (PointAt(EdgePoint)->Direction == InitialDirection &&
|
|
!PointAt(EdgePoint)->Hidden &&
|
|
next_pt != NULL && !PointAt(next_pt)->Hidden);
|
|
|
|
return (EdgePoint);
|
|
}
|