tesseract/src/textord/scanedg.cpp

370 lines
13 KiB
C++
Raw Normal View History

/**********************************************************************
2017-07-03 05:35:47 +08:00
* File: scanedg.cpp (Formerly scanedge.c)
* Description: Raster scanning crack based edge extractor.
* Author: Ray Smith
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include "scanedg.h"
#include <memory> // std::unique_ptr
#include "allheaders.h"
#include "edgloop.h"
#define WHITE_PIX 1 /*thresholded colours */
#define BLACK_PIX 0
// Flips between WHITE_PIX and BLACK_PIX.
#define FLIP_COLOUR(pix) (1-(pix))
/**********************************************************************
* block_edges
*
* Extract edges from a PDBLK.
**********************************************************************/
void block_edges(Pix *t_pix, // thresholded image
PDBLK *block, // block in image
C_OUTLINE_IT* outline_it) {
ICOORD bleft; // bounding box
ICOORD tright;
BLOCK_LINE_IT line_it = block; // line iterator
int width = pixGetWidth(t_pix);
int height = pixGetHeight(t_pix);
int wpl = pixGetWpl(t_pix);
// lines in progress
2018-05-22 22:55:45 +08:00
std::unique_ptr<CRACKEDGE*[]> ptrline(new CRACKEDGE*[width + 1]);
CRACKEDGE *free_cracks = nullptr;
block->bounding_box(bleft, tright); // block box
ASSERT_HOST(tright.x() <= width);
ASSERT_HOST(tright.y() <= height);
int block_width = tright.x() - bleft.x();
for (int x = block_width; x >= 0; x--)
ptrline[x] = nullptr; // no lines in progress
2018-05-22 22:55:45 +08:00
std::unique_ptr<uint8_t[]> bwline(new uint8_t[width]);
const uint8_t margin = WHITE_PIX;
for (int y = tright.y() - 1; y >= bleft.y() - 1; y--) {
if (y >= bleft.y() && y < tright.y()) {
// Get the binary pixels from the image.
l_uint32* line = pixGetData(t_pix) + wpl * (height - 1 - y);
for (int x = 0; x < block_width; ++x) {
bwline[x] = GET_DATA_BIT(line, x + bleft.x()) ^ 1;
}
2018-05-22 22:55:45 +08:00
make_margins(block, &line_it, bwline.get(), margin, bleft.x(), tright.x(), y);
} else {
2018-05-22 22:55:45 +08:00
memset(bwline.get(), margin, block_width * sizeof(bwline[0]));
}
line_edges(bleft.x(), y, block_width,
2018-05-22 22:55:45 +08:00
margin, bwline.get(), ptrline.get(), &free_cracks, outline_it);
}
free_crackedges(free_cracks); // really free them
}
/**********************************************************************
* make_margins
*
* Get an image line and set to margin non-text pixels.
**********************************************************************/
void make_margins( //get a line
PDBLK *block, //block in image
BLOCK_LINE_IT *line_it, //for old style
Use POSIX data types and macros (#878) * api: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccmain: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccstruct: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * classify: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * cutil: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * dict: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * textord: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * training: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * wordrec: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccutil: Replace Tesseract data types by POSIX data types Now all Tesseract data types which are no longer needed can be removed from ccutil/host.h. Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Remove the macros which are now unused from ccutil/host.h. Remove also the obsolete history comments. Signed-off-by: Stefan Weil <sw@weilnetz.de> * Fix build error caused by ambiguous ClipToRange Error message vom Appveyor CI: C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj] C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj] c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange' C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char' C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int' Signed-off-by: Stefan Weil <sw@weilnetz.de> * unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
uint8_t *pixels, //pixels to strip
uint8_t margin, //white-out pixel
int16_t left, //block edges
int16_t right,
int16_t y //line coord
) {
ICOORDELT_IT seg_it;
Use POSIX data types and macros (#878) * api: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccmain: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccstruct: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * classify: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * cutil: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * dict: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * textord: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * training: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * wordrec: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccutil: Replace Tesseract data types by POSIX data types Now all Tesseract data types which are no longer needed can be removed from ccutil/host.h. Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Remove the macros which are now unused from ccutil/host.h. Remove also the obsolete history comments. Signed-off-by: Stefan Weil <sw@weilnetz.de> * Fix build error caused by ambiguous ClipToRange Error message vom Appveyor CI: C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj] C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj] c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange' C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char' C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int' Signed-off-by: Stefan Weil <sw@weilnetz.de> * unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
int32_t start; //of segment
int16_t xext; //of segment
int xindex; //index to pixel
if (block->poly_block () != nullptr) {
2018-05-22 22:55:45 +08:00
std::unique_ptr<PB_LINE_IT> lines(new PB_LINE_IT (block->poly_block ()));
const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(
lines->get_line(y));
if (!segments->empty ()) {
seg_it.set_to_list(segments.get());
seg_it.mark_cycle_pt ();
start = seg_it.data ()->x ();
xext = seg_it.data ()->y ();
for (xindex = left; xindex < right; xindex++) {
if (xindex >= start && !seg_it.cycled_list ()) {
xindex = start + xext - 1;
seg_it.forward ();
start = seg_it.data ()->x ();
xext = seg_it.data ()->y ();
}
else
pixels[xindex - left] = margin;
}
}
else {
for (xindex = left; xindex < right; xindex++)
pixels[xindex - left] = margin;
}
}
else {
start = line_it->get_line (y, xext);
for (xindex = left; xindex < start; xindex++)
pixels[xindex - left] = margin;
for (xindex = start + xext; xindex < right; xindex++)
pixels[xindex - left] = margin;
}
}
/**********************************************************************
* line_edges
*
* Scan a line for edges and update the edges in progress.
* When edges close into loops, send them for approximation.
**********************************************************************/
Use POSIX data types and macros (#878) * api: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccmain: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccstruct: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * classify: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * cutil: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * dict: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * textord: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * training: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * wordrec: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccutil: Replace Tesseract data types by POSIX data types Now all Tesseract data types which are no longer needed can be removed from ccutil/host.h. Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Remove the macros which are now unused from ccutil/host.h. Remove also the obsolete history comments. Signed-off-by: Stefan Weil <sw@weilnetz.de> * Fix build error caused by ambiguous ClipToRange Error message vom Appveyor CI: C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj] C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj] c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange' C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char' C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int' Signed-off-by: Stefan Weil <sw@weilnetz.de> * unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de> * arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
void line_edges(int16_t x, // coord of line start
int16_t y, // coord of line
int16_t xext, // width of line
uint8_t uppercolour, // start of prev line
uint8_t * bwpos, // thresholded line
CRACKEDGE ** prevline, // edges in progress
CRACKEDGE **free_cracks,
C_OUTLINE_IT* outline_it) {
CrackPos pos = {free_cracks, x, y };
int xmax; // max x coord
int prevcolour; // of previous pixel
CRACKEDGE *current; // current h edge
CRACKEDGE *newcurrent; // new h edge
xmax = x + xext; // max allowable coord
prevcolour = uppercolour; // forced plain margin
current = nullptr; // nothing yet
// do each pixel
for (; pos.x < xmax; pos.x++, prevline++) {
const int colour = *bwpos++; // current pixel
if (*prevline != nullptr) {
// changed above
// change colour
uppercolour = FLIP_COLOUR(uppercolour);
if (colour == prevcolour) {
if (colour == uppercolour) {
// finish a line
join_edges(current, *prevline, free_cracks, outline_it);
current = nullptr; // no edge now
} else {
// new horiz edge
current = h_edge(uppercolour - colour, *prevline, &pos);
}
*prevline = nullptr; // no change this time
} else {
if (colour == uppercolour)
*prevline = v_edge(colour - prevcolour, *prevline, &pos);
// 8 vs 4 connection
else if (colour == WHITE_PIX) {
join_edges(current, *prevline, free_cracks, outline_it);
current = h_edge(uppercolour - colour, nullptr, &pos);
*prevline = v_edge(colour - prevcolour, current, &pos);
} else {
newcurrent = h_edge(uppercolour - colour, *prevline, &pos);
*prevline = v_edge(colour - prevcolour, current, &pos);
current = newcurrent; // right going h edge
}
prevcolour = colour; // remember new colour
}
} else {
if (colour != prevcolour) {
*prevline = current = v_edge(colour - prevcolour, current, &pos);
prevcolour = colour;
}
if (colour != uppercolour)
current = h_edge(uppercolour - colour, current, &pos);
else
current = nullptr; // no edge now
}
}
if (current != nullptr) {
// out of block
if (*prevline != nullptr) { // got one to join to?
join_edges(current, *prevline, free_cracks, outline_it);
*prevline = nullptr; // tidy now
} else {
// fake vertical
*prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, current, &pos);
}
} else if (*prevline != nullptr) {
//continue fake
*prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, *prevline, &pos);
}
}
/**********************************************************************
* h_edge
*
* Create a new horizontal CRACKEDGE and join it to the given edge.
**********************************************************************/
CRACKEDGE *h_edge(int sign, // sign of edge
CRACKEDGE* join, // edge to join to
CrackPos* pos) {
CRACKEDGE *newpt; // return value
if (*pos->free_cracks != nullptr) {
newpt = *pos->free_cracks;
*pos->free_cracks = newpt->next; // get one fast
} else {
newpt = new CRACKEDGE;
}
newpt->pos.set_y(pos->y + 1); // coords of pt
newpt->stepy = 0; // edge is horizontal
if (sign > 0) {
newpt->pos.set_x(pos->x + 1); // start location
newpt->stepx = -1;
newpt->stepdir = 0;
} else {
newpt->pos.set_x(pos->x); // start location
newpt->stepx = 1;
newpt->stepdir = 2;
}
if (join == nullptr) {
newpt->next = newpt; // ptrs to other ends
newpt->prev = newpt;
} else {
if (newpt->pos.x() + newpt->stepx == join->pos.x()
&& newpt->pos.y() == join->pos.y()) {
newpt->prev = join->prev; // update other ends
newpt->prev->next = newpt;
newpt->next = join; // join up
join->prev = newpt;
} else {
newpt->next = join->next; // update other ends
newpt->next->prev = newpt;
newpt->prev = join; // join up
join->next = newpt;
}
}
return newpt;
}
/**********************************************************************
* v_edge
*
* Create a new vertical CRACKEDGE and join it to the given edge.
**********************************************************************/
CRACKEDGE *v_edge(int sign, // sign of edge
CRACKEDGE* join,
CrackPos* pos) {
CRACKEDGE *newpt; // return value
if (*pos->free_cracks != nullptr) {
newpt = *pos->free_cracks;
*pos->free_cracks = newpt->next; // get one fast
} else {
newpt = new CRACKEDGE;
}
newpt->pos.set_x(pos->x); // coords of pt
newpt->stepx = 0; // edge is vertical
if (sign > 0) {
newpt->pos.set_y(pos->y); // start location
newpt->stepy = 1;
newpt->stepdir = 3;
} else {
newpt->pos.set_y(pos->y + 1); // start location
newpt->stepy = -1;
newpt->stepdir = 1;
}
if (join == nullptr) {
newpt->next = newpt; //ptrs to other ends
newpt->prev = newpt;
} else {
if (newpt->pos.x() == join->pos.x()
&& newpt->pos.y() + newpt->stepy == join->pos.y()) {
newpt->prev = join->prev; // update other ends
newpt->prev->next = newpt;
newpt->next = join; // join up
join->prev = newpt;
} else {
newpt->next = join->next; // update other ends
newpt->next->prev = newpt;
newpt->prev = join; // join up
join->next = newpt;
}
}
return newpt;
}
/**********************************************************************
* join_edges
*
* Join 2 edges together. Send the outline for approximation when a
* closed loop is formed.
**********************************************************************/
void join_edges(CRACKEDGE *edge1, // edges to join
CRACKEDGE *edge2, // no specific order
CRACKEDGE **free_cracks,
C_OUTLINE_IT* outline_it) {
if (edge1->pos.x() + edge1->stepx != edge2->pos.x()
|| edge1->pos.y() + edge1->stepy != edge2->pos.y()) {
CRACKEDGE *tempedge = edge1;
edge1 = edge2; // swap around
edge2 = tempedge;
}
if (edge1->next == edge2) {
// already closed
complete_edge(edge1, outline_it);
// attach freelist to end
edge1->prev->next = *free_cracks;
*free_cracks = edge1; // and free list
} else {
// update opposite ends
edge2->prev->next = edge1->next;
edge1->next->prev = edge2->prev;
edge1->next = edge2; // make joins
edge2->prev = edge1;
}
}
/**********************************************************************
* free_crackedges
*
* Really free the CRACKEDGEs by giving them back to delete.
**********************************************************************/
void free_crackedges(CRACKEDGE *start) {
CRACKEDGE *current; // current edge to free
CRACKEDGE *next; // next one to free
for (current = start; current != nullptr; current = next) {
next = current->next;
delete current; // delete them all
}
}