Generalized feature extractor to allow fx from greyscale

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@875 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2013-09-23 15:19:50 +00:00
parent dfc1a92628
commit b0fb616299
5 changed files with 121 additions and 31 deletions

View File

@ -326,7 +326,7 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
tess->tess_cn_matching.set_value(true); // turn it on
tess->tess_bn_matching.set_value(false);
C_BLOB* blob = bbox->cblob();
TBLOB* tblob = TBLOB::PolygonalCopy(blob);
TBLOB* tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob);
TBOX box = tblob->bounding_box();
FCOORD current_rotation(1.0f, 0.0f);
FCOORD rotation90(0.0f, 1.0f);
@ -347,13 +347,12 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
scaling = static_cast<float>(kBlnXHeight) / box.width();
x_origin = i == 1 ? box.left() : box.right();
}
DENORM denorm;
denorm.SetupNormalization(NULL, NULL, &current_rotation, NULL, NULL, 0,
x_origin, y_origin, scaling, scaling,
0.0f, static_cast<float>(kBlnBaselineOffset));
TBLOB* rotated_blob = new TBLOB(*tblob);
rotated_blob->Normalize(denorm);
tess->AdaptiveClassifier(rotated_blob, denorm, ratings + i, NULL);
rotated_blob->Normalize(NULL, &current_rotation, NULL,
x_origin, y_origin, scaling, scaling,
0.0f, static_cast<float>(kBlnBaselineOffset),
false, NULL);
tess->AdaptiveClassifier(rotated_blob, ratings + i, NULL);
delete rotated_blob;
current_rotation.rotate(rotation90);
}

View File

@ -163,8 +163,12 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
tprintf("Empty page\n");
return 0; // AutoPageSeg found an empty page.
}
bool splitting =
pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
bool cjk_mode = textord_use_cjk_fp_model;
textord_.TextordPage(pageseg_mode, width, height, pix_binary_,
textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
pix_thresholds_, pix_grey_, splitting || cjk_mode,
blocks, &to_blocks);
return auto_page_seg_ret_val;
}
@ -243,6 +247,7 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode,
}
result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_,
to_block, photomask_pix,
pix_thresholds_, pix_grey_,
&found_blocks, to_blocks);
if (result >= 0)
finder->GetDeskewVectors(&deskew_, &reskew_);

View File

@ -41,6 +41,7 @@
#include "svmnode.h"
#include "statistc.h"
#include "tesseractclass.h"
#include "werdit.h"
#ifndef GRAPHICS_DISABLED
#define ASC_HEIGHT (2 * kBlnBaselineOffset + kBlnXHeight)
@ -73,6 +74,7 @@ enum CMD_EVENTS
QUIT_CMD_EVENT,
RECOG_WERDS,
RECOG_PSEUDO,
SHOW_BLOB_FEATURES,
SHOW_SUBSCRIPT_CMD_EVENT,
SHOW_SUPERSCRIPT_CMD_EVENT,
SHOW_ITALIC_CMD_EVENT,
@ -266,6 +268,7 @@ SVMenuNode *Tesseract::build_menu_new() {
modes_menu_item->AddChild("Config Words", DEBUG_WERD_CMD_EVENT);
modes_menu_item->AddChild("Recog Words", RECOG_WERDS);
modes_menu_item->AddChild("Recog Blobs", RECOG_PSEUDO);
modes_menu_item->AddChild("Show Blob Features", SHOW_BLOB_FEATURES);
parent_menu = root_menu_item->AddChild("DISPLAY");
@ -433,6 +436,7 @@ BOOL8 Tesseract::process_cmd_win_event( // UI command semantics
case SHOW_BLN_WERD_CMD_EVENT:
case RECOG_WERDS:
case RECOG_PSEUDO:
case SHOW_BLOB_FEATURES:
mode =(CMD_EVENTS) cmd_event;
break;
case DEBUG_WERD_CMD_EVENT:
@ -617,6 +621,9 @@ void Tesseract::process_image_event( // action in image win
image_win->AddMessage("Recogging selected blobs");
recog_pseudo_word(current_page_res, selection_box);
break;
case SHOW_BLOB_FEATURES:
blob_feature_display(current_page_res, selection_box);
break;
default:
sprintf(msg, "Mode %d not yet implemented", mode);
@ -725,14 +732,21 @@ BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
TWERD *bln_word = word_res->chopped_word;
if (bln_word == NULL) {
word_res->SetupForTessRecognition(unicharset, this, BestPix(), false,
this->textord_use_cjk_fp_model,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block);
bln_word = word_res->chopped_word;
}
bln_word_window_handle()->Clear();
display_bln_lines(bln_word_window_handle(), ScrollView::CYAN,
1.0, 0.0f, -1000.0f, 1000.0f);
bln_word->plot(bln_word_window_handle());
C_BLOB_IT it(word_res->word->cblob_list());
ScrollView::Color color = WERD::NextColor(ScrollView::BLACK);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->plot_normed(word_res->denorm, color, ScrollView::BROWN,
bln_word_window_handle());
color = WERD::NextColor(color);
}
bln_word_window_handle()->Update();
return TRUE;
}
@ -754,6 +768,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
if (color_mode != CM_RAINBOW && word_res->box_word != NULL) {
BoxWord* box_word = word_res->box_word;
WERD_CHOICE* best_choice = word_res->best_choice;
int length = box_word->length();
if (word_res->fontinfo == NULL) return false;
const FontInfo& font_info = *word_res->fontinfo;
@ -761,11 +776,11 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
ScrollView::Color color = ScrollView::GREEN;
switch (color_mode) {
case CM_SUBSCRIPT:
if (box_word->BlobPosition(i) == SP_SUBSCRIPT)
if (best_choice->BlobPosition(i) == SP_SUBSCRIPT)
color = ScrollView::RED;
break;
case CM_SUPERSCRIPT:
if (box_word->BlobPosition(i) == SP_SUPERSCRIPT)
if (best_choice->BlobPosition(i) == SP_SUPERSCRIPT)
color = ScrollView::RED;
break;
case CM_ITALIC:
@ -789,7 +804,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
color = ScrollView::RED;
break;
case CM_DROPCAPS:
if (box_word->BlobPosition(i) == SP_DROPCAP)
if (best_choice->BlobPosition(i) == SP_DROPCAP)
color = ScrollView::RED;
break;
// TODO(rays) underline is currently completely unsupported.
@ -833,7 +848,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
// display poly approx
if (word->display_flag(DF_POLYGONAL)) {
// need to convert
TWERD* tword = TWERD::PolygonalCopy(word);
TWERD* tword = TWERD::PolygonalCopy(poly_allow_detailed_fx, word);
tword->plot(image_win);
delete tword;
displayed_something = TRUE;
@ -847,15 +862,13 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
}
if (word->display_flag(DF_BLAMER) &&
!(word_res->blamer_bundle != NULL &&
word_res->blamer_bundle->incorrect_result_reason == IRR_CORRECT)) {
word_res->blamer_bundle->incorrect_result_reason() == IRR_CORRECT)) {
text = "";
const BlamerBundle *blamer_bundle = word_res->blamer_bundle;
if (blamer_bundle == NULL) {
text += "NULL";
} else {
for (int i = 0; i < blamer_bundle->truth_text.length(); ++i) {
text += blamer_bundle->truth_text[i];
}
text = blamer_bundle->TruthString();
}
text += " -> ";
STRING best_choice_str;
@ -866,7 +879,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
}
text += best_choice_str;
IncorrectResultReason reason = (blamer_bundle == NULL) ?
IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason;
IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason();
ASSERT_HOST(reason < IRR_NUM_REASONS)
blame += " [";
blame += BlamerBundle::IncorrectReasonName(reason);
@ -915,9 +928,9 @@ BOOL8 Tesseract::word_dumper(BLOCK* block, ROW* row, WERD_RES* word_res) {
tprintf("\nWord data...\n");
word_res->word->print();
if (word_res->blamer_bundle != NULL && wordrec_debug_blamer &&
word_res->blamer_bundle->incorrect_result_reason != IRR_CORRECT) {
word_res->blamer_bundle->incorrect_result_reason() != IRR_CORRECT) {
tprintf("Current blamer debug: %s\n",
word_res->blamer_bundle->debug.string());
word_res->blamer_bundle->debug().string());
}
return TRUE;
}
@ -939,6 +952,46 @@ BOOL8 Tesseract::word_set_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
word->set_display_flag(DF_BLAMER, word_display_mode.bit(DF_BLAMER));
return word_display(block, row, word_res);
}
// page_res is non-const because the iterator doesn't know if you are going
// to change the items it points to! Really a const here though.
void Tesseract::blob_feature_display(PAGE_RES* page_res,
const TBOX& selection_box) {
ROW* row; // row of word
BLOCK* block; // block of word
WERD* word = make_pseudo_word(page_res, selection_box, block, row);
if (word != NULL) {
WERD_RES word_res(word);
word_res.x_height = row->x_height();
word_res.SetupForTessRecognition(unicharset, this, BestPix(), false,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block);
TWERD* bln_word = word_res.chopped_word;
TBLOB* bln_blob = bln_word->blobs[0];
INT_FX_RESULT_STRUCT fx_info;
GenericVector<INT_FEATURE_STRUCT> bl_features;
GenericVector<INT_FEATURE_STRUCT> cn_features;
Classify::ExtractFeatures(*bln_blob, classify_nonlinear_norm, &bl_features,
&cn_features, &fx_info, NULL);
// Display baseline features.
ScrollView* bl_win = CreateFeatureSpaceWindow("BL Features", 512, 0);
ClearFeatureSpaceWindow(baseline, bl_win);
for (int f = 0; f < bl_features.size(); ++f)
RenderIntFeature(bl_win, &bl_features[f], ScrollView::GREEN);
bl_win->Update();
// Display cn features.
ScrollView* cn_win = CreateFeatureSpaceWindow("CN Features", 512, 0);
ClearFeatureSpaceWindow(character, cn_win);
for (int f = 0; f < cn_features.size(); ++f)
RenderIntFeature(cn_win, &cn_features[f], ScrollView::GREEN);
cn_win->Update();
delete word;
}
}
#endif // GRAPHICS_DISABLED
} // namespace tesseract

View File

@ -169,6 +169,32 @@ void ImageThresholder::ThresholdToPix(Pix** pix) {
}
}
// Gets a pix that contains an 8 bit threshold value at each pixel. The
// returned pix may be an integer reduction of the binary image such that
// the scale factor may be inferred from the ratio of the sizes, even down
// to the extreme of a 1x1 pixel thresholds image.
// Ideally the 8 bit threshold should be the exact threshold used to generate
// the binary image in ThresholdToPix, but this is not a hard constraint.
// Returns NULL if the input is binary. PixDestroy after use.
Pix* ImageThresholder::GetPixRectThresholds() {
if (IsBinary()) return NULL;
Pix* pix_grey = GetPixRectGrey();
int width = pixGetWidth(pix_grey);
int height = pixGetHeight(pix_grey);
int* thresholds;
int* hi_values;
OtsuThreshold(reinterpret_cast<const unsigned char*>(pixGetData(pix_grey)),
1, pixGetWpl(pix_grey) * sizeof(l_uint32),
0, 0, width, height, &thresholds, &hi_values);
pixDestroy(&pix_grey);
Pix* pix_thresholds = pixCreate(width, height, 8);
int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
pixSetAllArbitrary(pix_thresholds, threshold);
delete [] thresholds;
delete [] hi_values;
return pix_thresholds;
}
// Common initialization shared between SetImage methods.
void ImageThresholder::Init() {
SetRectangle(0, 0, image_width_, image_height_);
@ -198,11 +224,10 @@ Pix* ImageThresholder::GetPixRect() {
return raw_pix;
}
// Get a clone/copy of the source image rectangle, reduced to greyscale.
// Get a clone/copy of the source image rectangle, reduced to greyscale,
// and at the same resolution as the output binary.
// The returned Pix must be pixDestroyed.
// This function will be used in the future by the page layout analysis, and
// the layout analysis that uses it will only be available with Leptonica,
// so there is no raw equivalent.
// Provided to the classifier to extract features from the greyscale image.
Pix* ImageThresholder::GetPixRectGrey() {
Pix* pix = GetPixRect(); // May have to be reduced to grey.
int depth = pixGetDepth(pix);

View File

@ -122,6 +122,15 @@ class ImageThresholder {
/// Caller must use pixDestroy to free the created Pix.
virtual void ThresholdToPix(Pix** pix);
// Gets a pix that contains an 8 bit threshold value at each pixel. The
// returned pix may be an integer reduction of the binary image such that
// the scale factor may be inferred from the ratio of the sizes, even down
// to the extreme of a 1x1 pixel thresholds image.
// Ideally the 8 bit threshold should be the exact threshold used to generate
// the binary image in ThresholdToPix, but this is not a hard constraint.
// Returns NULL if the input is binary. PixDestroy after use.
virtual Pix* GetPixRectThresholds();
/// Get a clone/copy of the source image rectangle.
/// The returned Pix must be pixDestroyed.
/// This function will be used in the future by the page layout analysis, and
@ -129,12 +138,11 @@ class ImageThresholder {
/// so there is no raw equivalent.
Pix* GetPixRect();
/// Get a clone/copy of the source image rectangle, reduced to greyscale.
/// The returned Pix must be pixDestroyed.
/// This function will be used in the future by the page layout analysis, and
/// the layout analysis that uses it will only be available with Leptonica,
/// so there is no raw equivalent.
Pix* GetPixRectGrey();
// Get a clone/copy of the source image rectangle, reduced to greyscale,
// and at the same resolution as the output binary.
// The returned Pix must be pixDestroyed.
// Provided to the classifier to extract features from the greyscale image.
virtual Pix* GetPixRectGrey();
protected:
// ----------------------------------------------------------------------