mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
Generalized feature extractor to allow fx from greyscale
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@875 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
dfc1a92628
commit
b0fb616299
@ -326,7 +326,7 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
|
||||
tess->tess_cn_matching.set_value(true); // turn it on
|
||||
tess->tess_bn_matching.set_value(false);
|
||||
C_BLOB* blob = bbox->cblob();
|
||||
TBLOB* tblob = TBLOB::PolygonalCopy(blob);
|
||||
TBLOB* tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob);
|
||||
TBOX box = tblob->bounding_box();
|
||||
FCOORD current_rotation(1.0f, 0.0f);
|
||||
FCOORD rotation90(0.0f, 1.0f);
|
||||
@ -347,13 +347,12 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
|
||||
scaling = static_cast<float>(kBlnXHeight) / box.width();
|
||||
x_origin = i == 1 ? box.left() : box.right();
|
||||
}
|
||||
DENORM denorm;
|
||||
denorm.SetupNormalization(NULL, NULL, ¤t_rotation, NULL, NULL, 0,
|
||||
x_origin, y_origin, scaling, scaling,
|
||||
0.0f, static_cast<float>(kBlnBaselineOffset));
|
||||
TBLOB* rotated_blob = new TBLOB(*tblob);
|
||||
rotated_blob->Normalize(denorm);
|
||||
tess->AdaptiveClassifier(rotated_blob, denorm, ratings + i, NULL);
|
||||
rotated_blob->Normalize(NULL, ¤t_rotation, NULL,
|
||||
x_origin, y_origin, scaling, scaling,
|
||||
0.0f, static_cast<float>(kBlnBaselineOffset),
|
||||
false, NULL);
|
||||
tess->AdaptiveClassifier(rotated_blob, ratings + i, NULL);
|
||||
delete rotated_blob;
|
||||
current_rotation.rotate(rotation90);
|
||||
}
|
||||
|
@ -163,8 +163,12 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
|
||||
tprintf("Empty page\n");
|
||||
return 0; // AutoPageSeg found an empty page.
|
||||
}
|
||||
bool splitting =
|
||||
pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
|
||||
bool cjk_mode = textord_use_cjk_fp_model;
|
||||
|
||||
textord_.TextordPage(pageseg_mode, width, height, pix_binary_,
|
||||
textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,
|
||||
pix_thresholds_, pix_grey_, splitting || cjk_mode,
|
||||
blocks, &to_blocks);
|
||||
return auto_page_seg_ret_val;
|
||||
}
|
||||
@ -243,6 +247,7 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode,
|
||||
}
|
||||
result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_,
|
||||
to_block, photomask_pix,
|
||||
pix_thresholds_, pix_grey_,
|
||||
&found_blocks, to_blocks);
|
||||
if (result >= 0)
|
||||
finder->GetDeskewVectors(&deskew_, &reskew_);
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include "svmnode.h"
|
||||
#include "statistc.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "werdit.h"
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
#define ASC_HEIGHT (2 * kBlnBaselineOffset + kBlnXHeight)
|
||||
@ -73,6 +74,7 @@ enum CMD_EVENTS
|
||||
QUIT_CMD_EVENT,
|
||||
RECOG_WERDS,
|
||||
RECOG_PSEUDO,
|
||||
SHOW_BLOB_FEATURES,
|
||||
SHOW_SUBSCRIPT_CMD_EVENT,
|
||||
SHOW_SUPERSCRIPT_CMD_EVENT,
|
||||
SHOW_ITALIC_CMD_EVENT,
|
||||
@ -266,6 +268,7 @@ SVMenuNode *Tesseract::build_menu_new() {
|
||||
modes_menu_item->AddChild("Config Words", DEBUG_WERD_CMD_EVENT);
|
||||
modes_menu_item->AddChild("Recog Words", RECOG_WERDS);
|
||||
modes_menu_item->AddChild("Recog Blobs", RECOG_PSEUDO);
|
||||
modes_menu_item->AddChild("Show Blob Features", SHOW_BLOB_FEATURES);
|
||||
|
||||
parent_menu = root_menu_item->AddChild("DISPLAY");
|
||||
|
||||
@ -433,6 +436,7 @@ BOOL8 Tesseract::process_cmd_win_event( // UI command semantics
|
||||
case SHOW_BLN_WERD_CMD_EVENT:
|
||||
case RECOG_WERDS:
|
||||
case RECOG_PSEUDO:
|
||||
case SHOW_BLOB_FEATURES:
|
||||
mode =(CMD_EVENTS) cmd_event;
|
||||
break;
|
||||
case DEBUG_WERD_CMD_EVENT:
|
||||
@ -617,6 +621,9 @@ void Tesseract::process_image_event( // action in image win
|
||||
image_win->AddMessage("Recogging selected blobs");
|
||||
recog_pseudo_word(current_page_res, selection_box);
|
||||
break;
|
||||
case SHOW_BLOB_FEATURES:
|
||||
blob_feature_display(current_page_res, selection_box);
|
||||
break;
|
||||
|
||||
default:
|
||||
sprintf(msg, "Mode %d not yet implemented", mode);
|
||||
@ -725,14 +732,21 @@ BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
|
||||
TWERD *bln_word = word_res->chopped_word;
|
||||
if (bln_word == NULL) {
|
||||
word_res->SetupForTessRecognition(unicharset, this, BestPix(), false,
|
||||
this->textord_use_cjk_fp_model,
|
||||
textord_use_cjk_fp_model,
|
||||
poly_allow_detailed_fx,
|
||||
row, block);
|
||||
bln_word = word_res->chopped_word;
|
||||
}
|
||||
bln_word_window_handle()->Clear();
|
||||
display_bln_lines(bln_word_window_handle(), ScrollView::CYAN,
|
||||
1.0, 0.0f, -1000.0f, 1000.0f);
|
||||
bln_word->plot(bln_word_window_handle());
|
||||
C_BLOB_IT it(word_res->word->cblob_list());
|
||||
ScrollView::Color color = WERD::NextColor(ScrollView::BLACK);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
it.data()->plot_normed(word_res->denorm, color, ScrollView::BROWN,
|
||||
bln_word_window_handle());
|
||||
color = WERD::NextColor(color);
|
||||
}
|
||||
bln_word_window_handle()->Update();
|
||||
return TRUE;
|
||||
}
|
||||
@ -754,6 +768,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
|
||||
|
||||
if (color_mode != CM_RAINBOW && word_res->box_word != NULL) {
|
||||
BoxWord* box_word = word_res->box_word;
|
||||
WERD_CHOICE* best_choice = word_res->best_choice;
|
||||
int length = box_word->length();
|
||||
if (word_res->fontinfo == NULL) return false;
|
||||
const FontInfo& font_info = *word_res->fontinfo;
|
||||
@ -761,11 +776,11 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
|
||||
ScrollView::Color color = ScrollView::GREEN;
|
||||
switch (color_mode) {
|
||||
case CM_SUBSCRIPT:
|
||||
if (box_word->BlobPosition(i) == SP_SUBSCRIPT)
|
||||
if (best_choice->BlobPosition(i) == SP_SUBSCRIPT)
|
||||
color = ScrollView::RED;
|
||||
break;
|
||||
case CM_SUPERSCRIPT:
|
||||
if (box_word->BlobPosition(i) == SP_SUPERSCRIPT)
|
||||
if (best_choice->BlobPosition(i) == SP_SUPERSCRIPT)
|
||||
color = ScrollView::RED;
|
||||
break;
|
||||
case CM_ITALIC:
|
||||
@ -789,7 +804,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
|
||||
color = ScrollView::RED;
|
||||
break;
|
||||
case CM_DROPCAPS:
|
||||
if (box_word->BlobPosition(i) == SP_DROPCAP)
|
||||
if (best_choice->BlobPosition(i) == SP_DROPCAP)
|
||||
color = ScrollView::RED;
|
||||
break;
|
||||
// TODO(rays) underline is currently completely unsupported.
|
||||
@ -833,7 +848,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
|
||||
// display poly approx
|
||||
if (word->display_flag(DF_POLYGONAL)) {
|
||||
// need to convert
|
||||
TWERD* tword = TWERD::PolygonalCopy(word);
|
||||
TWERD* tword = TWERD::PolygonalCopy(poly_allow_detailed_fx, word);
|
||||
tword->plot(image_win);
|
||||
delete tword;
|
||||
displayed_something = TRUE;
|
||||
@ -847,15 +862,13 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
|
||||
}
|
||||
if (word->display_flag(DF_BLAMER) &&
|
||||
!(word_res->blamer_bundle != NULL &&
|
||||
word_res->blamer_bundle->incorrect_result_reason == IRR_CORRECT)) {
|
||||
word_res->blamer_bundle->incorrect_result_reason() == IRR_CORRECT)) {
|
||||
text = "";
|
||||
const BlamerBundle *blamer_bundle = word_res->blamer_bundle;
|
||||
if (blamer_bundle == NULL) {
|
||||
text += "NULL";
|
||||
} else {
|
||||
for (int i = 0; i < blamer_bundle->truth_text.length(); ++i) {
|
||||
text += blamer_bundle->truth_text[i];
|
||||
}
|
||||
text = blamer_bundle->TruthString();
|
||||
}
|
||||
text += " -> ";
|
||||
STRING best_choice_str;
|
||||
@ -866,7 +879,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
|
||||
}
|
||||
text += best_choice_str;
|
||||
IncorrectResultReason reason = (blamer_bundle == NULL) ?
|
||||
IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason;
|
||||
IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason();
|
||||
ASSERT_HOST(reason < IRR_NUM_REASONS)
|
||||
blame += " [";
|
||||
blame += BlamerBundle::IncorrectReasonName(reason);
|
||||
@ -915,9 +928,9 @@ BOOL8 Tesseract::word_dumper(BLOCK* block, ROW* row, WERD_RES* word_res) {
|
||||
tprintf("\nWord data...\n");
|
||||
word_res->word->print();
|
||||
if (word_res->blamer_bundle != NULL && wordrec_debug_blamer &&
|
||||
word_res->blamer_bundle->incorrect_result_reason != IRR_CORRECT) {
|
||||
word_res->blamer_bundle->incorrect_result_reason() != IRR_CORRECT) {
|
||||
tprintf("Current blamer debug: %s\n",
|
||||
word_res->blamer_bundle->debug.string());
|
||||
word_res->blamer_bundle->debug().string());
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
@ -939,6 +952,46 @@ BOOL8 Tesseract::word_set_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
|
||||
word->set_display_flag(DF_BLAMER, word_display_mode.bit(DF_BLAMER));
|
||||
return word_display(block, row, word_res);
|
||||
}
|
||||
|
||||
// page_res is non-const because the iterator doesn't know if you are going
|
||||
// to change the items it points to! Really a const here though.
|
||||
void Tesseract::blob_feature_display(PAGE_RES* page_res,
|
||||
const TBOX& selection_box) {
|
||||
ROW* row; // row of word
|
||||
BLOCK* block; // block of word
|
||||
WERD* word = make_pseudo_word(page_res, selection_box, block, row);
|
||||
if (word != NULL) {
|
||||
WERD_RES word_res(word);
|
||||
word_res.x_height = row->x_height();
|
||||
word_res.SetupForTessRecognition(unicharset, this, BestPix(), false,
|
||||
textord_use_cjk_fp_model,
|
||||
poly_allow_detailed_fx,
|
||||
row, block);
|
||||
TWERD* bln_word = word_res.chopped_word;
|
||||
TBLOB* bln_blob = bln_word->blobs[0];
|
||||
INT_FX_RESULT_STRUCT fx_info;
|
||||
GenericVector<INT_FEATURE_STRUCT> bl_features;
|
||||
GenericVector<INT_FEATURE_STRUCT> cn_features;
|
||||
Classify::ExtractFeatures(*bln_blob, classify_nonlinear_norm, &bl_features,
|
||||
&cn_features, &fx_info, NULL);
|
||||
// Display baseline features.
|
||||
ScrollView* bl_win = CreateFeatureSpaceWindow("BL Features", 512, 0);
|
||||
ClearFeatureSpaceWindow(baseline, bl_win);
|
||||
for (int f = 0; f < bl_features.size(); ++f)
|
||||
RenderIntFeature(bl_win, &bl_features[f], ScrollView::GREEN);
|
||||
bl_win->Update();
|
||||
// Display cn features.
|
||||
ScrollView* cn_win = CreateFeatureSpaceWindow("CN Features", 512, 0);
|
||||
ClearFeatureSpaceWindow(character, cn_win);
|
||||
for (int f = 0; f < cn_features.size(); ++f)
|
||||
RenderIntFeature(cn_win, &cn_features[f], ScrollView::GREEN);
|
||||
cn_win->Update();
|
||||
|
||||
delete word;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif // GRAPHICS_DISABLED
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -169,6 +169,32 @@ void ImageThresholder::ThresholdToPix(Pix** pix) {
|
||||
}
|
||||
}
|
||||
|
||||
// Gets a pix that contains an 8 bit threshold value at each pixel. The
|
||||
// returned pix may be an integer reduction of the binary image such that
|
||||
// the scale factor may be inferred from the ratio of the sizes, even down
|
||||
// to the extreme of a 1x1 pixel thresholds image.
|
||||
// Ideally the 8 bit threshold should be the exact threshold used to generate
|
||||
// the binary image in ThresholdToPix, but this is not a hard constraint.
|
||||
// Returns NULL if the input is binary. PixDestroy after use.
|
||||
Pix* ImageThresholder::GetPixRectThresholds() {
|
||||
if (IsBinary()) return NULL;
|
||||
Pix* pix_grey = GetPixRectGrey();
|
||||
int width = pixGetWidth(pix_grey);
|
||||
int height = pixGetHeight(pix_grey);
|
||||
int* thresholds;
|
||||
int* hi_values;
|
||||
OtsuThreshold(reinterpret_cast<const unsigned char*>(pixGetData(pix_grey)),
|
||||
1, pixGetWpl(pix_grey) * sizeof(l_uint32),
|
||||
0, 0, width, height, &thresholds, &hi_values);
|
||||
pixDestroy(&pix_grey);
|
||||
Pix* pix_thresholds = pixCreate(width, height, 8);
|
||||
int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
|
||||
pixSetAllArbitrary(pix_thresholds, threshold);
|
||||
delete [] thresholds;
|
||||
delete [] hi_values;
|
||||
return pix_thresholds;
|
||||
}
|
||||
|
||||
// Common initialization shared between SetImage methods.
|
||||
void ImageThresholder::Init() {
|
||||
SetRectangle(0, 0, image_width_, image_height_);
|
||||
@ -198,11 +224,10 @@ Pix* ImageThresholder::GetPixRect() {
|
||||
return raw_pix;
|
||||
}
|
||||
|
||||
// Get a clone/copy of the source image rectangle, reduced to greyscale.
|
||||
// Get a clone/copy of the source image rectangle, reduced to greyscale,
|
||||
// and at the same resolution as the output binary.
|
||||
// The returned Pix must be pixDestroyed.
|
||||
// This function will be used in the future by the page layout analysis, and
|
||||
// the layout analysis that uses it will only be available with Leptonica,
|
||||
// so there is no raw equivalent.
|
||||
// Provided to the classifier to extract features from the greyscale image.
|
||||
Pix* ImageThresholder::GetPixRectGrey() {
|
||||
Pix* pix = GetPixRect(); // May have to be reduced to grey.
|
||||
int depth = pixGetDepth(pix);
|
||||
|
@ -122,6 +122,15 @@ class ImageThresholder {
|
||||
/// Caller must use pixDestroy to free the created Pix.
|
||||
virtual void ThresholdToPix(Pix** pix);
|
||||
|
||||
// Gets a pix that contains an 8 bit threshold value at each pixel. The
|
||||
// returned pix may be an integer reduction of the binary image such that
|
||||
// the scale factor may be inferred from the ratio of the sizes, even down
|
||||
// to the extreme of a 1x1 pixel thresholds image.
|
||||
// Ideally the 8 bit threshold should be the exact threshold used to generate
|
||||
// the binary image in ThresholdToPix, but this is not a hard constraint.
|
||||
// Returns NULL if the input is binary. PixDestroy after use.
|
||||
virtual Pix* GetPixRectThresholds();
|
||||
|
||||
/// Get a clone/copy of the source image rectangle.
|
||||
/// The returned Pix must be pixDestroyed.
|
||||
/// This function will be used in the future by the page layout analysis, and
|
||||
@ -129,12 +138,11 @@ class ImageThresholder {
|
||||
/// so there is no raw equivalent.
|
||||
Pix* GetPixRect();
|
||||
|
||||
/// Get a clone/copy of the source image rectangle, reduced to greyscale.
|
||||
/// The returned Pix must be pixDestroyed.
|
||||
/// This function will be used in the future by the page layout analysis, and
|
||||
/// the layout analysis that uses it will only be available with Leptonica,
|
||||
/// so there is no raw equivalent.
|
||||
Pix* GetPixRectGrey();
|
||||
// Get a clone/copy of the source image rectangle, reduced to greyscale,
|
||||
// and at the same resolution as the output binary.
|
||||
// The returned Pix must be pixDestroyed.
|
||||
// Provided to the classifier to extract features from the greyscale image.
|
||||
virtual Pix* GetPixRectGrey();
|
||||
|
||||
protected:
|
||||
// ----------------------------------------------------------------------
|
||||
|
Loading…
Reference in New Issue
Block a user