Fixed issue #557

This commit is contained in:
Ray Smith 2017-01-25 16:05:59 -08:00
parent b453f74e01
commit a1c22fb0d0
23 changed files with 131 additions and 423 deletions

View File

@ -403,7 +403,7 @@ int main(int argc, char** argv) {
#if !defined(DEBUG)
// Disable debugging and informational messages from Leptonica.
setMsgSeverity(L_SEVERITY_WARNING);
setMsgSeverity(L_SEVERITY_ERROR);
#endif
#if defined(HAVE_TIFFIO_H) && defined(_WIN32)

View File

@ -176,7 +176,7 @@ void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,
tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix,
&vertical_x, &vertical_y,
NULL, &v_lines, &h_lines);
Pix* im_pix = tesseract::ImageFind::FindImages(pix);
Pix* im_pix = tesseract::ImageFind::FindImages(pix, nullptr);
if (im_pix != NULL) {
pixSubtract(pix, pix, im_pix);
pixDestroy(&im_pix);

View File

@ -37,6 +37,7 @@
#include "blobbox.h"
#include "blread.h"
#include "colfind.h"
#include "debugpixa.h"
#include "equationdetect.h"
#include "imagefind.h"
#include "linefind.h"
@ -176,28 +177,6 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
return auto_page_seg_ret_val;
}
// Helper writes a grey image to a file for use by scrollviewer.
// Normally for speed we don't display the image in the layout debug windows.
// If textord_debug_images is true, we draw the image as a background to some
// of the debug windows. printable determines whether these
// images are optimized for printing instead of screen display.
static void WriteDebugBackgroundImage(bool printable, Pix* pix_binary) {
Pix* grey_pix = pixCreate(pixGetWidth(pix_binary),
pixGetHeight(pix_binary), 8);
// Printable images are light grey on white, but for screen display
// they are black on dark grey so the other colors show up well.
if (printable) {
pixSetAll(grey_pix);
pixSetMasked(grey_pix, pix_binary, 192);
} else {
pixSetAllArbitrary(grey_pix, 64);
pixSetMasked(grey_pix, pix_binary, 0);
}
AlignedBlob::IncrementDebugPix();
pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG);
pixDestroy(&grey_pix);
}
/**
* Auto page segmentation. Divide the page image into blocks of uniform
* text linespacing and images.
@ -226,9 +205,6 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
TO_BLOCK_LIST* to_blocks,
BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess,
OSResults* osr) {
if (textord_debug_images) {
WriteDebugBackgroundImage(textord_debug_printable, pix_binary_);
}
Pix* photomask_pix = NULL;
Pix* musicmask_pix = NULL;
// The blocks made by the ColumnFinder. Moved to blocks before return.
@ -250,9 +226,10 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
if (equ_detect_) {
finder->SetEquationDetect(equ_detect_);
}
result = finder->FindBlocks(
pageseg_mode, scaled_color_, scaled_factor_, to_block, photomask_pix,
pix_thresholds_, pix_grey_, &found_blocks, diacritic_blobs, to_blocks);
result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_,
to_block, photomask_pix, pix_thresholds_,
pix_grey_, &pixa_debug_, &found_blocks,
diacritic_blobs, to_blocks);
if (result >= 0)
finder->GetDeskewVectors(&deskew_, &reskew_);
delete finder;
@ -265,11 +242,6 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
BLOCK_IT block_it(blocks);
// Move the found blocks to the input/output blocks.
block_it.add_list_after(&found_blocks);
if (textord_debug_images) {
// The debug image is no longer needed so delete it.
unlink(AlignedBlob::textord_debug_pix().string());
}
return result;
}
@ -311,19 +283,21 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
ASSERT_HOST(pix_binary_ != NULL);
if (tessedit_dump_pageseg_images) {
pixWrite("tessinput.png", pix_binary_, IFF_PNG);
pixa_debug_.AddPix(pix_binary_, "PageSegInput");
}
// Leptonica is used to find the rule/separator lines in the input.
LineFinder::FindAndRemoveLines(source_resolution_,
textord_tabfind_show_vlines, pix_binary_,
&vertical_x, &vertical_y, music_mask_pix,
&v_lines, &h_lines);
if (tessedit_dump_pageseg_images)
pixWrite("tessnolines.png", pix_binary_, IFF_PNG);
if (tessedit_dump_pageseg_images) {
pixa_debug_.AddPix(pix_binary_, "NoLines");
}
// Leptonica is used to find a mask of the photo regions in the input.
*photo_mask_pix = ImageFind::FindImages(pix_binary_);
if (tessedit_dump_pageseg_images)
pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);
*photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
if (tessedit_dump_pageseg_images) {
pixa_debug_.AddPix(pix_binary_, "NoImages");
}
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();
// The rest of the algorithm uses the usual connected components.

View File

@ -12,7 +12,7 @@ endif
include_HEADERS = publictypes.h
noinst_HEADERS = \
blamer.h blckerr.h blobbox.h blobs.h blread.h boxread.h boxword.h ccstruct.h coutln.h crakedge.h \
detlinefit.h dppoint.h fontinfo.h genblob.h hpdsizes.h \
debugpixa.h detlinefit.h dppoint.h fontinfo.h genblob.h hpdsizes.h \
imagedata.h \
ipoints.h \
linlsq.h matrix.h mod128.h normalis.h \

52
ccstruct/debugpixa.h Normal file
View File

@ -0,0 +1,52 @@
#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#include "allheaders.h"
namespace tesseract {
// Class to hold a Pixa collection of debug images with captions and save them
// to a PDF file.
class DebugPixa {
public:
// TODO(rays) add another constructor with size control.
DebugPixa() {
pixa_ = pixaCreate(0);
fonts_ = bmfCreate(nullptr, 14);
}
// If the filename_ has been set and there are any debug images, they are
// written to the set filename_.
~DebugPixa() {
pixaDestroy(&pixa_);
bmfDestroy(&fonts_);
}
// Adds the given pix to the set of pages in the PDF file, with the given
// caption added to the top.
void AddPix(const Pix* pix, const char* caption) {
int depth = pixGetDepth(const_cast<Pix*>(pix));
int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
Pix* pix_debug = pixAddSingleTextblock(
const_cast<Pix*>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
pixaAddPix(pixa_, pix_debug, L_INSERT);
}
// Sets the destination filename and enables images to be written to a PDF
// on destruction.
void WritePDF(const char* filename) {
if (pixaGetCount(pixa_) > 0) {
pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
pixaClear(pixa_);
}
}
private:
// The collection of images to put in the PDF.
Pixa* pixa_;
// The fonts used to draw text captions.
L_Bmf* fonts_;
};
} // namespace tesseract
#endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_

View File

@ -487,81 +487,6 @@ void TrainingSampleSet::IndexFeatures(const IntFeatureSpace& feature_space) {
samples_[s]->IndexFeatures(feature_space);
}
// Delete outlier samples with few features that are shared with others.
// IndexFeatures must have been called already.
void TrainingSampleSet::DeleteOutliers(const IntFeatureSpace& feature_space,
bool debug) {
if (font_class_array_ == NULL)
OrganizeByFontAndClass();
Pixa* pixa = NULL;
if (debug)
pixa = pixaCreate(0);
GenericVector<int> feature_counts;
int fs_size = feature_space.Size();
int font_size = font_id_map_.CompactSize();
for (int font_index = 0; font_index < font_size; ++font_index) {
for (int c = 0; c < unicharset_size_; ++c) {
// Create a histogram of the features used by all samples of this
// font/class combination.
feature_counts.init_to_size(fs_size, 0);
FontClassInfo& fcinfo = (*font_class_array_)(font_index, c);
int sample_count = fcinfo.samples.size();
if (sample_count < kMinOutlierSamples)
continue;
for (int i = 0; i < sample_count; ++i) {
int s = fcinfo.samples[i];
const GenericVector<int>& features = samples_[s]->indexed_features();
for (int f = 0; f < features.size(); ++f) {
++feature_counts[features[f]];
}
}
for (int i = 0; i < sample_count; ++i) {
int s = fcinfo.samples[i];
const TrainingSample& sample = *samples_[s];
const GenericVector<int>& features = sample.indexed_features();
// A feature that has a histogram count of 1 is only used by this
// sample, making it 'bad'. All others are 'good'.
int good_features = 0;
int bad_features = 0;
for (int f = 0; f < features.size(); ++f) {
if (feature_counts[features[f]] > 1)
++good_features;
else
++bad_features;
}
// If more than 1/3 features are bad, then this is an outlier.
if (bad_features * 2 > good_features) {
tprintf("Deleting outlier sample of %s, %d good, %d bad\n",
SampleToString(sample).string(),
good_features, bad_features);
if (debug) {
pixaAddPix(pixa, sample.RenderToPix(&unicharset_), L_INSERT);
// Add the previous sample as well, so it is easier to see in
// the output what is wrong with this sample.
int t;
if (i == 0)
t = fcinfo.samples[1];
else
t = fcinfo.samples[i - 1];
const TrainingSample &csample = *samples_[t];
pixaAddPix(pixa, csample.RenderToPix(&unicharset_), L_INSERT);
}
// Mark the sample for deletion.
KillSample(samples_[s]);
}
}
}
}
// Truly delete all bad samples and renumber everything.
DeleteDeadSamples();
if (pixa != NULL) {
Pix* pix = pixaDisplayTiledInRows(pixa, 1, 2600, 1.0, 0, 10, 10);
pixaDestroy(&pixa);
pixWrite("outliers.png", pix, IFF_PNG);
pixDestroy(&pix);
}
}
// Marks the given sample index for deletion.
// Deletion is actually completed by DeleteDeadSamples.
void TrainingSampleSet::KillSample(TrainingSample* sample) {
@ -745,12 +670,6 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap& map,
if (debug) {
tprintf("Global worst dist = %g, between sample %d and %d\n",
global_worst_dist, worst_s1, worst_s2);
Pix* pix1 = DebugSample(unicharset_, samples_[worst_s1]);
Pix* pix2 = DebugSample(unicharset_, samples_[worst_s2]);
pixOr(pix1, pix1, pix2);
pixWrite("worstpair.png", pix1, IFF_PNG);
pixDestroy(&pix1);
pixDestroy(&pix2);
}
}

View File

@ -171,10 +171,6 @@ class TrainingSampleSet {
// Generates indexed features for all samples with the supplied feature_space.
void IndexFeatures(const IntFeatureSpace& feature_space);
// Delete outlier samples with few features that are shared with others.
// IndexFeatures must have been called already.
void DeleteOutliers(const IntFeatureSpace& feature_space, bool debug);
// Marks the given sample for deletion.
// Deletion is actually completed by DeleteDeadSamples.
void KillSample(TrainingSample* sample);

View File

@ -30,7 +30,6 @@ INT_VAR(textord_testregion_left, -1, "Left edge of debug reporting rectangle");
INT_VAR(textord_testregion_top, -1, "Top edge of debug reporting rectangle");
INT_VAR(textord_testregion_right, MAX_INT32, "Right edge of debug rectangle");
INT_VAR(textord_testregion_bottom, MAX_INT32, "Bottom edge of debug rectangle");
BOOL_VAR(textord_debug_images, false, "Use greyed image background for debug");
BOOL_VAR(textord_debug_printable, false, "Make debug windows printable");
namespace tesseract {
@ -64,25 +63,6 @@ const double kMinTabGradient = 4.0;
// If the angle is small, the angle in degrees is roughly 60/kMaxSkewFactor.
const int kMaxSkewFactor = 15;
// Constant part of textord_debug_pix_.
const char* kTextordDebugPix = "psdebug_pix";
// Name of image file to use if textord_debug_images is true.
STRING AlignedBlob::textord_debug_pix_ = kTextordDebugPix;
// Index to image file to use if textord_debug_images is true.
int AlignedBlob::debug_pix_index_ = 0;
// Increment the serial number counter and set the string to use
// for a filename if textord_debug_images is true.
void AlignedBlob::IncrementDebugPix() {
++debug_pix_index_;
textord_debug_pix_ = kTextordDebugPix;
char numbuf[32];
snprintf(numbuf, sizeof(numbuf), "%d", debug_pix_index_);
textord_debug_pix_ += numbuf;
textord_debug_pix_ += ".pix";
}
// Constructor to set the parameters for finding aligned and ragged tabs.
// Vertical_x and vertical_y are the current estimates of the true vertical
// direction (up) in the image. Height is the height of the starter blob.

View File

@ -29,8 +29,6 @@
extern INT_VAR_H(textord_debug_bugs, 0,
"Turn on output related to bugs in tab finding");
extern INT_VAR_H(textord_debug_tabfind, 2, "Debug tab finding");
extern BOOL_VAR_H(textord_debug_images, false,
"Use greyed image background for debug");
extern BOOL_VAR_H(textord_debug_printable, false,
"Make debug windows printable");
@ -102,17 +100,6 @@ class AlignedBlob : public BlobGrid {
BLOBNBOX* bbox,
int* vertical_x, int* vertical_y);
// Increment the serial number counter and set the string to use
// for a filename if textord_debug_images is true.
static void IncrementDebugPix();
// Return the string to use for a filename if textord_debug_images is true.
// Use IncrementDebugPix first to set the filename, and each time is
// to be incremented.
static const STRING& textord_debug_pix() {
return textord_debug_pix_;
}
private:
// Find a set of blobs that are aligned in the given vertical
// direction with the given blob. Returns a list of aligned
@ -132,11 +119,6 @@ class AlignedBlob : public BlobGrid {
BLOBNBOX* FindAlignedBlob(const AlignedBlobParams& p,
bool top_to_bottom, BLOBNBOX* bbox,
int x_start, int* end_y);
// Name of image file to use if textord_debug_images is true.
static STRING textord_debug_pix_;
// Index to image file to use if textord_debug_images is true.
static int debug_pix_index_;
};
} // namespace tesseract.

View File

@ -782,11 +782,9 @@ double BaselineBlock::FitLineSpacingModel(
return rms_error;
}
BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew,
TO_BLOCK_LIST* blocks)
: page_skew_(page_skew), debug_level_(debug_level), pix_debug_(NULL),
debug_file_prefix_("") {
: page_skew_(page_skew), debug_level_(debug_level) {
TO_BLOCK_IT it(blocks);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
TO_BLOCK* to_block = it.data();
@ -804,7 +802,6 @@ BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew,
}
BaselineDetect::~BaselineDetect() {
pixDestroy(&pix_debug_);
}
// Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
@ -847,31 +844,15 @@ void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr,
bool remove_noise,
bool show_final_rows,
Textord* textord) {
Pix* pix_spline = pix_debug_ ? pixConvertTo32(pix_debug_) : NULL;
for (int i = 0; i < blocks_.size(); ++i) {
BaselineBlock* bl_block = blocks_[i];
if (enable_splines)
bl_block->PrepareForSplineFitting(page_tr, remove_noise);
bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
if (pix_spline) {
bl_block->DrawPixSpline(pix_spline);
}
if (show_final_rows) {
bl_block->DrawFinalRows(page_tr);
}
}
if (pix_spline) {
STRING outfile_name = debug_file_prefix_ + "_spline.png";
pixWrite(outfile_name.string(), pix_spline, IFF_PNG);
pixDestroy(&pix_spline);
}
}
void BaselineDetect::SetDebugImage(Pix* pixIn, const STRING& output_path) {
pixDestroy(&pix_debug_);
pix_debug_ = pixClone(pixIn);
debug_file_prefix_ = output_path;
}
} // namespace tesseract.

View File

@ -262,10 +262,6 @@ class BaselineDetect {
bool show_final_rows,
Textord* textord);
// Set up the image and filename, so that a debug image with the detected
// baseline rendered will be saved.
void SetDebugImage(Pix* pixIn, const STRING& output_path);
private:
// Average (median) skew of the blocks on the page among those that have
// a good angle of their own.
@ -274,9 +270,6 @@ class BaselineDetect {
int debug_level_;
// The blocks that we are working with.
PointerVector<BaselineBlock> blocks_;
Pix* pix_debug_;
STRING debug_file_prefix_;
};
} // namespace tesseract

View File

@ -290,8 +290,8 @@ void ColumnFinder::CorrectOrientation(TO_BLOCK* block,
int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color,
int scaled_factor, TO_BLOCK* input_block,
Pix* photo_mask_pix, Pix* thresholds_pix,
Pix* grey_pix, BLOCK_LIST* blocks,
BLOBNBOX_LIST* diacritic_blobs,
Pix* grey_pix, DebugPixa* pixa_debug,
BLOCK_LIST* blocks, BLOBNBOX_LIST* diacritic_blobs,
TO_BLOCK_LIST* to_blocks) {
pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
@ -304,11 +304,13 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color,
&projection_, diacritic_blobs, &part_grid_, &big_parts_);
if (!PSM_SPARSE(pageseg_mode)) {
ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
input_block, this, &part_grid_, &big_parts_);
input_block, this, pixa_debug, &part_grid_,
&big_parts_);
ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_,
photo_mask_pix);
ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
input_block, this, &part_grid_, &big_parts_);
input_block, this, pixa_debug, &part_grid_,
&big_parts_);
}
part_grid_.ReTypeBlobs(&image_bblobs_);
TidyBlobs(input_block);
@ -441,9 +443,6 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color,
if (textord_tabfind_show_partitions) {
ScrollView* window = MakeWindow(400, 300, "Partitions");
if (window != NULL) {
if (textord_debug_images)
window->Image(AlignedBlob::textord_debug_pix().string(),
image_origin().x(), image_origin().y());
part_grid_.DisplayBoxes(window);
if (!textord_debug_printable)
DisplayTabVectors(window);
@ -519,11 +518,7 @@ void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) {
blocks_win_ = MakeWindow(700, 300, "Blocks");
else
blocks_win_->Clear();
if (textord_debug_images)
blocks_win_->Image(AlignedBlob::textord_debug_pix().string(),
image_origin().x(), image_origin().y());
else
DisplayBoxes(blocks_win_);
DisplayBoxes(blocks_win_);
BLOCK_IT block_it(blocks);
int serial = 1;
for (block_it.mark_cycle_pt(); !block_it.cycled_list();
@ -543,11 +538,7 @@ void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) {
void ColumnFinder::DisplayColumnBounds(PartSetVector* sets) {
#ifndef GRAPHICS_DISABLED
ScrollView* col_win = MakeWindow(50, 300, "Columns");
if (textord_debug_images)
col_win->Image(AlignedBlob::textord_debug_pix().string(),
image_origin().x(), image_origin().y());
else
DisplayBoxes(col_win);
DisplayBoxes(col_win);
col_win->Pen(textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN);
for (int i = 0; i < gridheight_; ++i) {
ColPartitionSet* columns = best_columns_[i];

View File

@ -20,11 +20,12 @@
#ifndef TESSERACT_TEXTORD_COLFIND_H_
#define TESSERACT_TEXTORD_COLFIND_H_
#include "tabfind.h"
#include "imagefind.h"
#include "colpartitiongrid.h"
#include "colpartitionset.h"
#include "debugpixa.h"
#include "imagefind.h"
#include "ocrblock.h"
#include "tabfind.h"
#include "textlineprojection.h"
class BLOCK_LIST;
@ -163,7 +164,7 @@ class ColumnFinder : public TabFind {
// in debug mode, which requests a retry with more debug info.
int FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, int scaled_factor,
TO_BLOCK* block, Pix* photo_mask_pix, Pix* thresholds_pix,
Pix* grey_pix, BLOCK_LIST* blocks,
Pix* grey_pix, DebugPixa* pixa_debug, BLOCK_LIST* blocks,
BLOBNBOX_LIST* diacritic_blobs, TO_BLOCK_LIST* to_blocks);
// Get the rotation required to deskew, and its inverse rotation.

View File

@ -645,46 +645,6 @@ bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type,
return any_changed;
}
// Compute the mean RGB of the light and dark pixels in each ColPartition
// and also the rms error in the linearity of color.
void ColPartitionGrid::ComputePartitionColors(Pix* scaled_color,
int scaled_factor,
const FCOORD& rerotation) {
if (scaled_color == NULL)
return;
Pix* color_map1 = NULL;
Pix* color_map2 = NULL;
Pix* rms_map = NULL;
if (textord_tabfind_show_color_fit) {
int width = pixGetWidth(scaled_color);
int height = pixGetHeight(scaled_color);
color_map1 = pixCreate(width, height, 32);
color_map2 = pixCreate(width, height, 32);
rms_map = pixCreate(width, height, 8);
}
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
TBOX part_box = part->bounding_box();
part_box.rotate_large(rerotation);
ImageFind::ComputeRectangleColors(part_box, scaled_color,
scaled_factor,
color_map1, color_map2, rms_map,
part->color1(), part->color2());
}
if (color_map1 != NULL) {
pixWrite("swcolorinput.png", scaled_color, IFF_PNG);
pixWrite("swcolor1.png", color_map1, IFF_PNG);
pixWrite("swcolor2.png", color_map2, IFF_PNG);
pixWrite("swrms.png", rms_map, IFF_PNG);
pixDestroy(&color_map1);
pixDestroy(&color_map2);
pixDestroy(&rms_map);
}
}
// Reflects the grid and its colpartitions in the y-axis, assuming that
// all blob boxes have already been done.
void ColPartitionGrid::ReflectInYAxis() {

View File

@ -106,11 +106,6 @@ class ColPartitionGrid : public BBGrid<ColPartition,
bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix* nontext_map,
const TBOX& im_box, const FCOORD& rerotation);
// Compute the mean RGB of the light and dark pixels in each ColPartition
// and also the rms error in the linearity of color.
void ComputePartitionColors(Pix* scaled_color, int scaled_factor,
const FCOORD& rerotation);
// Reflects the grid and its colpartitions in the y-axis, assuming that
// all blob boxes have already been done.
void ReflectInYAxis();

View File

@ -22,10 +22,11 @@
#include "config_auto.h"
#endif
#include "devanagari_processing.h"
#include "allheaders.h"
#include "tordmain.h"
#include "debugpixa.h"
#include "devanagari_processing.h"
#include "statistc.h"
#include "tordmain.h"
// Flags controlling the debugging information for shiro-rekha splitting
// strategies.
@ -63,11 +64,6 @@ void ShiroRekhaSplitter::Clear() {
perform_close_ = false;
}
// This method dumps a debug image to the specified location.
void ShiroRekhaSplitter::DumpDebugImage(const char* filename) const {
pixWrite(filename, debug_image_, IFF_PNG);
}
// On setting the input image, a clone of it is owned by this class.
void ShiroRekhaSplitter::set_orig_pix(Pix* pix) {
if (orig_pix_) {
@ -81,7 +77,7 @@ void ShiroRekhaSplitter::set_orig_pix(Pix* pix) {
// split_for_pageseg should be true if the splitting is being done prior to
// page segmentation. This mode uses the flag
// pageseg_devanagari_split_strategy to determine the splitting strategy.
bool ShiroRekhaSplitter::Split(bool split_for_pageseg) {
bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa* pixa_debug) {
SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ :
ocr_split_strategy_;
if (split_strategy == NO_SPLIT) {
@ -163,9 +159,9 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg) {
}
boxaDestroy(&regions_to_clear);
pixaDestroy(&ccs);
if (devanagari_split_debugimage) {
DumpDebugImage(split_for_pageseg ? "pageseg_split_debug.png" :
"ocr_split_debug.png");
if (devanagari_split_debugimage && pixa_debug != nullptr) {
pixa_debug->AddPix(debug_image_,
split_for_pageseg ? "pageseg_split" : "ocr_split");
}
return true;
}

View File

@ -13,6 +13,8 @@
#ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
#define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
#include "allheaders.h"
#include "debugpixa.h"
#include "ocrblock.h"
#include "params.h"
@ -84,7 +86,7 @@ class ShiroRekhaSplitter {
// Returns true if a split was actually performed.
// If split_for_pageseg is true, the pageseg_split_strategy_ is used for
// splitting. If false, the ocr_split_strategy_ is used.
bool Split(bool split_for_pageseg);
bool Split(bool split_for_pageseg, DebugPixa* pixa_debug);
// Clears the memory held by this object.
void Clear();
@ -152,9 +154,6 @@ class ShiroRekhaSplitter {
return segmentation_block_list_;
}
// This method dumps a debug image to the specified location.
void DumpDebugImage(const char* filename) const;
// This method returns the computed mode-height of blobs in the pix.
// It also prunes very small blobs from calculation. Could be used to provide
// a global xheight estimate for images which have the same point-size text.

View File

@ -62,7 +62,8 @@ const int kNoisePadding = 4;
// the image regions as a mask image.
// The returned pix may be NULL, meaning no images found.
// If not NULL, it must be PixDestroyed by the caller.
Pix* ImageFind::FindImages(Pix* pix) {
// If textord_tabfind_show_images, debug images are appended to pixa_debug.
Pix* ImageFind::FindImages(Pix* pix, DebugPixa* pixa_debug) {
// Not worth looking at small images.
if (pixGetWidth(pix) < kMinImageFindSize ||
pixGetHeight(pix) < kMinImageFindSize)
@ -70,7 +71,8 @@ Pix* ImageFind::FindImages(Pix* pix) {
// Reduce by factor 2.
Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
pixDisplayWrite(pixr, textord_tabfind_show_images);
if (textord_tabfind_show_images && pixa_debug != nullptr)
pixa_debug->AddPix(pixr, "CascadeReduced");
// Get the halftone mask directly from Leptonica.
//
@ -93,7 +95,8 @@ Pix* ImageFind::FindImages(Pix* pix) {
// Expand back up again.
Pix *pixht = pixExpandReplicate(pixht2, 2);
pixDisplayWrite(pixht, textord_tabfind_show_images);
if (textord_tabfind_show_images && pixa_debug != nullptr)
pixa_debug->AddPix(pixht, "HalftoneReplicated");
pixDestroy(&pixht2);
// Fill to capture pixels near the mask edges that were missed
@ -104,14 +107,16 @@ Pix* ImageFind::FindImages(Pix* pix) {
// Eliminate lines and bars that may be joined to images.
Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
pixDisplayWrite(pixfinemask, textord_tabfind_show_images);
if (textord_tabfind_show_images && pixa_debug != nullptr)
pixa_debug->AddPix(pixfinemask, "FineMask");
Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
pixDestroy(&pixreduced);
pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
pixDestroy(&pixreduced2);
pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images);
if (textord_tabfind_show_images && pixa_debug != nullptr)
pixa_debug->AddPix(pixcoarsemask, "CoarseMask");
// Combine the coarse and fine image masks.
pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
pixDestroy(&pixfinemask);
@ -119,13 +124,13 @@ Pix* ImageFind::FindImages(Pix* pix) {
pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
pixDestroy(&pixcoarsemask);
if (textord_tabfind_show_images)
pixWrite("junkexpandedcoarsemask.png", pixmask, IFF_PNG);
if (textord_tabfind_show_images && pixa_debug != nullptr)
pixa_debug->AddPix(pixmask, "MaskDilated");
// And the image mask with the line and bar remover.
pixAnd(pixht, pixht, pixmask);
pixDestroy(&pixmask);
if (textord_tabfind_show_images)
pixWrite("junkfinalimagemask.png", pixht, IFF_PNG);
if (textord_tabfind_show_images && pixa_debug != nullptr)
pixa_debug->AddPix(pixht, "FinalMask");
// Make the result image the same size as the input.
Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
pixOr(result, result, pixht);
@ -140,12 +145,13 @@ Pix* ImageFind::FindImages(Pix* pix) {
// If not NULL, they must be destroyed by the caller.
// Resolution of pix should match the source image (Tesseract::pix_binary_)
// so the output coordinate systems match.
void ImageFind::ConnCompAndRectangularize(Pix* pix, Boxa** boxa, Pixa** pixa) {
void ImageFind::ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug,
Boxa** boxa, Pixa** pixa) {
*boxa = NULL;
*pixa = NULL;
if (textord_tabfind_show_images)
pixWrite("junkconncompimage.png", pix, IFF_PNG);
if (textord_tabfind_show_images && pixa_debug != nullptr)
pixa_debug->AddPix(pix, "Conncompimage");
// Find the individual image regions in the mask image.
*boxa = pixConnComp(pix, pixa, 8);
// Rectangularize the individual images. If a sharp edge in vertical and/or
@ -156,7 +162,8 @@ void ImageFind::ConnCompAndRectangularize(Pix* pix, Boxa** boxa, Pixa** pixa) {
for (int i = 0; i < npixes; ++i) {
int x_start, x_end, y_start, y_end;
Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
pixDisplayWrite(img_pix, textord_tabfind_show_images);
if (textord_tabfind_show_images && pixa_debug != nullptr)
pixa_debug->AddPix(img_pix, "A component");
if (pixNearlyRectangular(img_pix, kMinRectangularFraction,
kMaxRectangularFraction,
kMaxRectangularGradient,
@ -1282,17 +1289,15 @@ static void DeleteSmallImages(ColPartitionGrid* part_grid) {
// Since the other blobs in the other partitions will be owned by the block,
// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
// situation and collect the image blobs.
void ImageFind::FindImagePartitions(Pix* image_pix,
const FCOORD& rotation,
const FCOORD& rerotation,
TO_BLOCK* block,
TabFind* tab_grid,
ColPartitionGrid* part_grid,
ColPartition_LIST* big_parts) {
void ImageFind::FindImagePartitions(Pix* image_pix, const FCOORD& rotation,
const FCOORD& rerotation, TO_BLOCK* block,
TabFind* tab_grid, DebugPixa* pixa_debug,
ColPartitionGrid* part_grid,
ColPartition_LIST* big_parts) {
int imageheight = pixGetHeight(image_pix);
Boxa* boxa;
Pixa* pixa;
ConnCompAndRectangularize(image_pix, &boxa, &pixa);
ConnCompAndRectangularize(image_pix, pixa_debug, &boxa, &pixa);
// Iterate the connected components in the image regions mask.
int nboxes = 0;
if (boxa != nullptr && pixa != nullptr) nboxes = boxaGetCount(boxa);
@ -1307,8 +1312,8 @@ void ImageFind::FindImagePartitions(Pix* image_pix,
ColPartition_LIST part_list;
DivideImageIntoParts(im_box, rotation, rerotation, pix,
&rectsearch, &part_list);
if (textord_tabfind_show_images) {
pixWrite("junkimagecomponent.png", pix, IFF_PNG);
if (textord_tabfind_show_images && pixa_debug != nullptr) {
pixa_debug->AddPix(pix, "ImageComponent");
tprintf("Component has %d parts\n", part_list.length());
}
pixDestroy(&pix);

View File

@ -21,6 +21,7 @@
#ifndef TESSERACT_TEXTORD_IMAGEFIND_H_
#define TESSERACT_TEXTORD_IMAGEFIND_H_
#include "debugpixa.h"
#include "host.h"
struct Boxa;
@ -45,7 +46,8 @@ class ImageFind {
// the image regions as a mask image.
// The returned pix may be NULL, meaning no images found.
// If not NULL, it must be PixDestroyed by the caller.
static Pix* FindImages(Pix* pix);
// If textord_tabfind_show_images, debug images are appended to pixa_debug.
static Pix* FindImages(Pix* pix, DebugPixa* pixa_debug);
// Generates a Boxa, Pixa pair from the input binary (image mask) pix,
// analgous to pixConnComp, except that connected components which are nearly
@ -54,7 +56,8 @@ class ImageFind {
// If not NULL, they must be destroyed by the caller.
// Resolution of pix should match the source image (Tesseract::pix_binary_)
// so the output coordinate systems match.
static void ConnCompAndRectangularize(Pix* pix, Boxa** boxa, Pixa** pixa);
static void ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug,
Boxa** boxa, Pixa** pixa);
// Returns true if there is a rectangle in the source pix, such that all
// pixel rows and column slices outside of it have less than
@ -144,11 +147,9 @@ class ImageFind {
// Since the other blobs in the other partitions will be owned by the block,
// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
// situation and collect the image blobs.
static void FindImagePartitions(Pix* image_pix,
const FCOORD& rotation,
const FCOORD& rerotation,
TO_BLOCK* block,
TabFind* tab_grid,
static void FindImagePartitions(Pix* image_pix, const FCOORD& rotation,
const FCOORD& rerotation, TO_BLOCK* block,
TabFind* tab_grid, DebugPixa* pixa_debug,
ColPartitionGrid* part_grid,
ColPartition_LIST* big_parts);
};

View File

@ -440,13 +440,8 @@ bool TabFind::FindTabVectors(TabVector_LIST* hlines,
#ifndef GRAPHICS_DISABLED
if (textord_tabfind_show_finaltabs) {
tab_win = MakeWindow(640, 50, "FinalTabs");
if (textord_debug_images) {
tab_win->Image(AlignedBlob::textord_debug_pix().string(),
image_origin_.x(), image_origin_.y());
} else {
DisplayBoxes(tab_win);
DisplayTabs("FinalTabs", tab_win);
}
DisplayBoxes(tab_win);
DisplayTabs("FinalTabs", tab_win);
tab_win = DisplayTabVectors(tab_win);
}
#endif // GRAPHICS_DISABLED
@ -1277,32 +1272,6 @@ bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
RotateBlobList(*deskew, &block->blobs);
RotateBlobList(*deskew, &block->small_blobs);
RotateBlobList(*deskew, &block->noise_blobs);
if (textord_debug_images) {
// Rotate the debug pix and arrange for it to be drawn at the correct
// pixel offset.
Pix* pix_grey = pixRead(AlignedBlob::textord_debug_pix().string());
int width = pixGetWidth(pix_grey);
int height = pixGetHeight(pix_grey);
float angle = atan2(deskew->y(), deskew->x());
// Positive angle is clockwise to pixRotate.
Pix* pix_rot = pixRotate(pix_grey, -angle, L_ROTATE_AREA_MAP,
L_BRING_IN_WHITE, width, height);
// The image must be translated by the rotation of its center, since it
// has just been rotated about its center.
ICOORD center_offset(width / 2, height / 2);
ICOORD new_center_offset(center_offset);
new_center_offset.rotate(*deskew);
image_origin_ += new_center_offset - center_offset;
// The image grew as it was rotated, so offset the (top/left) origin
// by half the change in size. y is opposite to x because it is drawn
// at ist top/left, not bottom/left.
ICOORD corner_offset((width - pixGetWidth(pix_rot)) / 2,
(pixGetHeight(pix_rot) - height) / 2);
image_origin_ += corner_offset;
pixWrite(AlignedBlob::textord_debug_pix().string(), pix_rot, IFF_PNG);
pixDestroy(&pix_grey);
pixDestroy(&pix_rot);
}
// Rotate the horizontal vectors. The vertical vectors don't need
// rotating as they can just be refitted.

View File

@ -143,7 +143,6 @@ const double kMaxXProjectionGapFactor = 2.0;
const double kStrokeWidthFractionalTolerance = 0.25;
const double kStrokeWidthConstantTolerance = 2.0;
BOOL_VAR(textord_dump_table_images, false, "Paint table detection output");
BOOL_VAR(textord_show_tables, false, "Show table regions");
BOOL_VAR(textord_tablefind_show_mark, false,
"Debug table marking steps in detail");
@ -371,9 +370,6 @@ void TableFinder::LocateTables(ColPartitionGrid* grid,
#endif // GRAPHICS_DISABLED
}
if (textord_dump_table_images)
WriteToPix(reskew);
// Merge all colpartitions in table regions to make them a single
// colpartition and revert types of isolated table cells not
// assigned to any table to their original types.
@ -1999,80 +1995,6 @@ void TableFinder::DisplayColPartitionConnections(
#endif
}
// Write debug image and text file.
// Note: This method is only for debug purpose during development and
// would not be part of checked in code
void TableFinder::WriteToPix(const FCOORD& reskew) {
// Input file must be named test1.tif
PIX* pix = pixRead("test1.tif");
if (!pix) {
tprintf("Input file test1.tif not found.\n");
return;
}
int img_height = pixGetHeight(pix);
int img_width = pixGetWidth(pix);
// Maximum number of text or table partitions
int num_boxes = 10;
BOXA* text_box_array = boxaCreate(num_boxes);
BOXA* table_box_array = boxaCreate(num_boxes);
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
gsearch(&clean_part_grid_);
gsearch.StartFullSearch();
ColPartition* part;
// load colpartitions into text_box_array and table_box_array
while ((part = gsearch.NextFullSearch()) != NULL) {
TBOX box = part->bounding_box();
box.rotate_large(reskew);
BOX* lept_box = boxCreate(box.left(), img_height - box.top(),
box.right() - box.left(),
box.top() - box.bottom());
if (part->type() == PT_TABLE)
boxaAddBox(table_box_array, lept_box, L_INSERT);
else
boxaAddBox(text_box_array, lept_box, L_INSERT);
}
// draw colpartitions on the output image
PIX* out = pixDrawBoxa(pix, text_box_array, 3, 0xff000000);
out = pixDrawBoxa(out, table_box_array, 3, 0x0000ff00);
BOXA* table_array = boxaCreate(num_boxes);
// text file containing detected table bounding boxes
FILE* fptr = fopen("tess-table.txt", "wb");
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
table_search(&table_grid_);
table_search.StartFullSearch();
ColSegment* table;
// load table boxes to table_array and write them to text file as well
while ((table = table_search.NextFullSearch()) != NULL) {
TBOX box = table->bounding_box();
box.rotate_large(reskew);
// Since deskewing introduces negative coordinates, reskewing
// might not completely recover from that since both steps enlarge
// the actual box. Hence a box that undergoes deskewing/reskewing
// may go out of image boundaries. Crop a table box if needed to
// contain it inside the image dimensions.
box = box.intersection(TBOX(0, 0, img_width - 1, img_height - 1));
BOX* lept_box = boxCreate(box.left(), img_height - box.top(),
box.right() - box.left(),
box.top() - box.bottom());
boxaAddBox(table_array, lept_box, L_INSERT);
fprintf(fptr, "%d %d %d %d TABLE\n", box.left(),
img_height - box.top(), box.right(), img_height - box.bottom());
}
fclose(fptr);
// paint table boxes on the debug image
out = pixDrawBoxa(out, table_array, 5, 0x7fff0000);
pixWrite("out.png", out, IFF_PNG);
// memory cleanup
boxaDestroy(&text_box_array);
boxaDestroy(&table_box_array);
boxaDestroy(&table_array);
pixDestroy(&pix);
pixDestroy(&out);
}
// Merge all colpartitions in table regions to make them a single
// colpartition and revert types of isolated table cells not
// assigned to any table to their original types.

View File

@ -389,11 +389,6 @@ class TableFinder {
void DisplayColSegmentGrid(ScrollView* win, ColSegmentGrid* grid,
ScrollView::Color color);
// Write ColParitions and Tables to a PIX image
// Note: This method is only for debug purpose during development and
// would not be part of checked in code
void WriteToPix(const FCOORD& reskew);
// Merge all colpartitions in table regions to make them a single
// colpartition and revert types of isolated table cells not
// assigned to any table to their original types.

View File

@ -119,6 +119,7 @@ void TextlineProjection::MoveNonTextlineBlobs(
// Create a window and display the projection in it.
void TextlineProjection::DisplayProjection() const {
#ifndef GRAPHICS_DISABLED
int width = pixGetWidth(pix_);
int height = pixGetHeight(pix_);
Pix* pixc = pixCreate(width, height, 32);
@ -139,16 +140,12 @@ void TextlineProjection::DisplayProjection() const {
col_data[x] = result;
}
}
#if 0
// TODO(rays) uncomment when scrollview can display non-binary images.
ScrollView* win = new ScrollView("Projection", 0, 0,
width, height, width, height);
win->Image(pixc, 0, 0);
win->Update();
#else
pixWrite("projection.png", pixc, IFF_PNG);
#endif
pixDestroy(&pixc);
#endif // GRAPHICS_DISABLED
}
// Compute the distance of the box from the partition using curved projection