/********************************************************************** * File: cube_search_object.cpp * Description: Implementation of the Cube Search Object Class * Author: Ahmad Abdulkader * Created: 2007 * * (C) Copyright 2008, Google Inc. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/ #include "cube_search_object.h" #include "cube_utils.h" #include "ndminx.h" namespace tesseract { const bool CubeSearchObject::kUseCroppedChars = true; CubeSearchObject::CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp) : SearchObject(cntxt) { init_ = false; reco_cache_ = NULL; samp_cache_ = NULL; segments_ = NULL; segment_cnt_ = 0; samp_ = samp; left_ = 0; itop_ = 0; space_cost_ = NULL; no_space_cost_ = NULL; wid_ = samp_->Width(); hgt_ = samp_->Height(); max_seg_per_char_ = cntxt_->Params()->MaxSegPerChar(); rtl_ = (cntxt_->ReadingOrder() == CubeRecoContext::R2L); min_spc_gap_ = static_cast(hgt_ * cntxt_->Params()->MinSpaceHeightRatio()); max_spc_gap_ = static_cast(hgt_ * cntxt_->Params()->MaxSpaceHeightRatio()); } CubeSearchObject::~CubeSearchObject() { Cleanup(); } // Cleanup void CubeSearchObject::Cleanup() { // delete Recognition Cache if (reco_cache_) { for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++) { if (reco_cache_[strt_seg]) { for (int end_seg = 0; end_seg < segment_cnt_; end_seg++) { if (reco_cache_[strt_seg][end_seg]) { delete reco_cache_[strt_seg][end_seg]; } } delete []reco_cache_[strt_seg]; } } delete []reco_cache_; reco_cache_ = NULL; } // delete CharSamp Cache if (samp_cache_) { for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++) { if (samp_cache_[strt_seg]) { for (int end_seg = 0; end_seg < segment_cnt_; end_seg++) { if (samp_cache_[strt_seg][end_seg]) { delete samp_cache_[strt_seg][end_seg]; } } delete []samp_cache_[strt_seg]; } } delete []samp_cache_; samp_cache_ = NULL; } // delete segment list if (segments_) { for (int seg = 0; seg < segment_cnt_; seg++) { if (segments_[seg]) { delete segments_[seg]; } } delete []segments_; segments_ = NULL; } if (space_cost_) { delete []space_cost_; space_cost_ = NULL; } if (no_space_cost_) { delete []no_space_cost_; no_space_cost_ = NULL; } segment_cnt_ = 0; init_ = false; } // # of segmentation points. One less than the count of segments int CubeSearchObject::SegPtCnt() { if (!init_ && !Init()) return -1; return segment_cnt_ - 1; } // init and allocate variables, perform segmentation bool CubeSearchObject::Init() { if (init_) return true; if (!Segment()) { return false; } // init cache reco_cache_ = new CharAltList **[segment_cnt_]; if (reco_cache_ == NULL) { fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " "allocate CharAltList array\n"); return false; } samp_cache_ = new CharSamp **[segment_cnt_]; if (samp_cache_ == NULL) { fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " "allocate CharSamp array\n"); return false; } for (int seg = 0; seg < segment_cnt_; seg++) { reco_cache_[seg] = new CharAltList *[segment_cnt_]; if (reco_cache_[seg] == NULL) { fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " "allocate a single segment's CharAltList array\n"); return false; } memset(reco_cache_[seg], 0, segment_cnt_ * sizeof(*reco_cache_[seg])); samp_cache_[seg] = new CharSamp *[segment_cnt_]; if (samp_cache_[seg] == NULL) { fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " "allocate a single segment's CharSamp array\n"); return false; } memset(samp_cache_[seg], 0, segment_cnt_ * sizeof(*samp_cache_[seg])); } init_ = true; return true; } // returns a char sample corresponding to the bitmap between 2 seg pts CharSamp *CubeSearchObject::CharSample(int start_pt, int end_pt) { // init if necessary if (!init_ && !Init()) return NULL; // validate segment range if (!IsValidSegmentRange(start_pt, end_pt)) return NULL; // look for the samp in the cache if (samp_cache_ && samp_cache_[start_pt + 1] && samp_cache_[start_pt + 1][end_pt]) { return samp_cache_[start_pt + 1][end_pt]; } // create a char samp object from the specified range of segments bool left_most; bool right_most; CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1, end_pt - start_pt, NULL, &left_most, &right_most, hgt_); if (!samp) return NULL; if (kUseCroppedChars) { CharSamp *cropped_samp = samp->Crop(); // we no longer need the orig sample delete samp; if (!cropped_samp) return NULL; samp = cropped_samp; } // get the dimensions of the new cropped sample int char_top = samp->Top(); int char_wid = samp->Width(); int char_hgt = samp->Height(); // for cursive languages, these features correspond to whether // the charsamp is at the beginning or end of conncomp if (cntxt_->Cursive() == true) { // first and last char flags depend on reading order bool first_char = rtl_ ? right_most : left_most; bool last_char = rtl_ ? left_most : right_most; samp->SetFirstChar(first_char ? 255 : 0); samp->SetLastChar(last_char ? 255 : 0); } else { // for non cursive languages, these features correspond // to whether the charsamp is at the beginning or end of the word samp->SetFirstChar((start_pt == -1) ? 255 : 0); samp->SetLastChar((end_pt == (segment_cnt_ - 1)) ? 255 : 0); } samp->SetNormTop(255 * char_top / hgt_); samp->SetNormBottom(255 * (char_top + char_hgt) / hgt_); samp->SetNormAspectRatio(255 * char_wid / (char_wid + char_hgt)); // add to cache & return samp_cache_[start_pt + 1][end_pt] = samp; return samp; } Box *CubeSearchObject::CharBox(int start_pt, int end_pt) { if (!init_ && !Init()) return NULL; if (!IsValidSegmentRange(start_pt, end_pt)) { fprintf(stderr, "Cube ERROR (CubeSearchObject::CharBox): invalid " "segment range (%d, %d)\n", start_pt, end_pt); return NULL; } // create a char samp object from the specified range of segments, // extract its dimensions into a leptonica box, and delete it bool left_most; bool right_most; CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1, end_pt - start_pt, NULL, &left_most, &right_most, hgt_); if (!samp) return NULL; if (kUseCroppedChars) { CharSamp *cropped_samp = samp->Crop(); delete samp; if (!cropped_samp) { return NULL; } samp = cropped_samp; } Box *box = boxCreate(samp->Left(), samp->Top(), samp->Width(), samp->Height()); delete samp; return box; } // call from Beam Search to return the alt list corresponding to // recognizing the bitmap between two segmentation pts CharAltList * CubeSearchObject::RecognizeSegment(int start_pt, int end_pt) { // init if necessary if (!init_ && !Init()) { fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could " "not initialize CubeSearchObject\n"); return NULL; } // validate segment range if (!IsValidSegmentRange(start_pt, end_pt)) { fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): invalid " "segment range (%d, %d)\n", start_pt, end_pt); return NULL; } // look for the recognition results in cache in the cache if (reco_cache_ && reco_cache_[start_pt + 1] && reco_cache_[start_pt + 1][end_pt]) { return reco_cache_[start_pt + 1][end_pt]; } // create the char sample corresponding to the blob CharSamp *samp = CharSample(start_pt, end_pt); if (!samp) { fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could " "not construct CharSamp\n"); return NULL; } // recognize the char sample CharClassifier *char_classifier = cntxt_->Classifier(); if (char_classifier) { reco_cache_[start_pt + 1][end_pt] = char_classifier->Classify(samp); } else { // no classifer: all characters are equally probable; add a penalty // that favors 2-segment characters and aspect ratios (w/h) > 1 fprintf(stderr, "Cube WARNING (CubeSearchObject::RecognizeSegment): cube " "context has no character classifier!! Inventing a probability " "distribution.\n"); int class_cnt = cntxt_->CharacterSet()->ClassCount(); CharAltList *alt_list = new CharAltList(cntxt_->CharacterSet(), class_cnt); int seg_cnt = end_pt - start_pt; double prob_val = (1.0 / class_cnt) * exp(-fabs(seg_cnt - 2.0)) * exp(-samp->Width() / static_cast(samp->Height())); if (alt_list) { for (int class_idx = 0; class_idx < class_cnt; class_idx++) { alt_list->Insert(class_idx, CubeUtils::Prob2Cost(prob_val)); } reco_cache_[start_pt + 1][end_pt] = alt_list; } } return reco_cache_[start_pt + 1][end_pt]; } // Perform segmentation of the bitmap by detecting connected components, // segmenting each connected component using windowed vertical pixel density // histogram and sorting the resulting segments in reading order bool CubeSearchObject::Segment() { if (!samp_) return false; segment_cnt_ = 0; segments_ = samp_->Segment(&segment_cnt_, rtl_, cntxt_->Params()->HistWindWid(), cntxt_->Params()->MinConCompSize()); if (!segments_ || segment_cnt_ <= 0) { return false; } if (segment_cnt_ >= kMaxSegmentCnt) { return false; } return true; } // computes the space and no space costs at gaps between segments bool CubeSearchObject::ComputeSpaceCosts() { // init if necessary if (!init_ && !Init()) return false; // Already computed if (space_cost_) return true; // No segmentation points if (segment_cnt_ < 2) return false; // Compute the maximum x to the left of and minimum x to the right of each // segmentation point int *max_left_x = new int[segment_cnt_ - 1]; int *min_right_x = new int[segment_cnt_ - 1]; if (!max_left_x || !min_right_x) { delete []min_right_x; delete []max_left_x; return false; } if (rtl_) { min_right_x[0] = segments_[0]->Left(); max_left_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Right(); for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) { min_right_x[pt_idx] = MIN(min_right_x[pt_idx - 1], segments_[pt_idx]->Left()); max_left_x[segment_cnt_ - pt_idx - 2] = MAX(max_left_x[segment_cnt_ - pt_idx - 1], segments_[segment_cnt_ - pt_idx - 1]->Right()); } } else { min_right_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Left(); max_left_x[0] = segments_[0]->Right(); for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) { min_right_x[segment_cnt_ - pt_idx - 2] = MIN(min_right_x[segment_cnt_ - pt_idx - 1], segments_[segment_cnt_ - pt_idx - 1]->Left()); max_left_x[pt_idx] = MAX(max_left_x[pt_idx - 1], segments_[pt_idx]->Right()); } } // Allocate memory for space and no space costs // trivial cases space_cost_ = new int[segment_cnt_ - 1]; no_space_cost_ = new int[segment_cnt_ - 1]; if (!space_cost_ || !no_space_cost_) { delete []min_right_x; delete []max_left_x; return false; } // go through all segmentation points determining the horizontal gap between // the images on both sides of each break points. Use the gap to estimate // the probability of a space. The probability is modeled a linear function // of the gap width for (int pt_idx = 0; pt_idx < (segment_cnt_ - 1); pt_idx++) { // determine the gap at the segmentation point int gap = min_right_x[pt_idx] - max_left_x[pt_idx]; float prob = 0.0; // gap is too small => no space if (gap < min_spc_gap_) { prob = 0.0; } else if (gap > max_spc_gap_) { // gap is too big => definite space prob = 1.0; } else { // gap is somewhere in between, compute probability prob = (gap - min_spc_gap_) / static_cast(max_spc_gap_ - min_spc_gap_); } // compute cost of space and non-space space_cost_[pt_idx] = CubeUtils::Prob2Cost(prob) + CubeUtils::Prob2Cost(0.1); no_space_cost_[pt_idx] = CubeUtils::Prob2Cost(1.0 - prob); } delete []min_right_x; delete []max_left_x; return true; } // Returns the cost of having a space before the specified segmentation point int CubeSearchObject::SpaceCost(int pt_idx) { if (!space_cost_ && !ComputeSpaceCosts()) { // Failed to compute costs return a zero prob return CubeUtils::Prob2Cost(0.0); } return space_cost_[pt_idx]; } // Returns the cost of not having a space before the specified // segmentation point int CubeSearchObject::NoSpaceCost(int pt_idx) { // If failed to compute costs, return a 1.0 prob if (!space_cost_ && !ComputeSpaceCosts()) return CubeUtils::Prob2Cost(0.0); return no_space_cost_[pt_idx]; } // Returns the cost of not having any spaces within the specified range // of segmentation points int CubeSearchObject::NoSpaceCost(int st_pt, int end_pt) { // If fail to compute costs, return a 1.0 prob if (!space_cost_ && !ComputeSpaceCosts()) return CubeUtils::Prob2Cost(1.0); int no_spc_cost = 0; for (int pt_idx = st_pt + 1; pt_idx < end_pt; pt_idx++) no_spc_cost += NoSpaceCost(pt_idx); return no_spc_cost; } }