tesseract/cube/beam_search.cpp

/**********************************************************************
 * File:        beam_search.cpp
 * Description: Class to implement Beam Word Search Algorithm
 * Author:    Ahmad Abdulkader
 * Created:   2007
 *
 * (C) Copyright 2008, Google Inc.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

#include <algorithm>

#include "beam_search.h"
#include "tesseractclass.h"

namespace tesseract {

BeamSearch::BeamSearch(CubeRecoContext *cntxt, bool word_mode) {
  cntxt_ = cntxt;
  seg_pt_cnt_ = 0;
  col_cnt_ = 1;
  col_ = NULL;
  word_mode_ = word_mode;
}

// Cleanup the lattice corresponding to the last search
void BeamSearch::Cleanup() {
  if (col_ != NULL) {
    for (int col = 0; col < col_cnt_; col++) {
      delete col_[col];
    }
    delete []col_;
  }
  col_ = NULL;
}

BeamSearch::~BeamSearch() {
  Cleanup();
}

// Creates a set of children nodes emerging from a parent node based on
// the character alternate list and the language model.
void BeamSearch::CreateChildren(SearchColumn *out_col, LangModel *lang_mod,
                                SearchNode *parent_node,
                                LangModEdge *lm_parent_edge,
                                CharAltList *char_alt_list, int extra_cost) {
  // get all the edges from this parent
  int edge_cnt;
  LangModEdge **lm_edges = lang_mod->GetEdges(char_alt_list,
                                              lm_parent_edge, &edge_cnt);
  if (lm_edges) {
    // add them to the ending column with the appropriate parent
    for (int edge = 0; edge < edge_cnt; edge++) {
      // add a node to the column if the current column is not the
      // last one, or if the lang model edge indicates it is valid EOW
      if (!cntxt_->NoisyInput() && out_col->ColIdx() >= seg_pt_cnt_ &&
          !lm_edges[edge]->IsEOW()) {
        // free edge since no object is going to own it
        delete lm_edges[edge];
        continue;
      }

      // compute the recognition cost of this node
      int recognition_cost =  MIN_PROB_COST;
      if (char_alt_list && char_alt_list->AltCount() > 0) {
        recognition_cost = MAX(0, char_alt_list->ClassCost(
            lm_edges[edge]->ClassID()));
        // Add the no space cost. This should zero in word mode
        recognition_cost += extra_cost;
      }

      // Note that the edge will be freed inside the column if
      // AddNode is called
      if (recognition_cost >= 0) {
        out_col->AddNode(lm_edges[edge], recognition_cost, parent_node,
                         cntxt_);
      } else {
        delete lm_edges[edge];
      }
    }  // edge
    // free edge array
    delete []lm_edges;
  }  // lm_edges
}

// Performs a beam search in the specified search using the specified
// language model; returns an alternate list of possible words as a result.
WordAltList * BeamSearch::Search(SearchObject *srch_obj, LangModel *lang_mod) {
  // verifications
  if (!lang_mod)
    lang_mod = cntxt_->LangMod();
  if (!lang_mod) {
    fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
            "LangModel\n");
    return NULL;
  }

  // free existing state
  Cleanup();

  // get seg pt count
  seg_pt_cnt_ = srch_obj->SegPtCnt();
  if (seg_pt_cnt_ < 0) {
    return NULL;
  }
  col_cnt_ = seg_pt_cnt_ + 1;

  // disregard suspicious cases
  if (seg_pt_cnt_ > 128) {
    fprintf(stderr, "Cube ERROR (BeamSearch::Search): segment point count is "
            "suspiciously high; bailing out\n");
    return NULL;
  }

  // alloc memory for columns
  col_ = new SearchColumn *[col_cnt_];
  if (!col_) {
    fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
            "SearchColumn array\n");
    return NULL;
  }
  memset(col_, 0, col_cnt_ * sizeof(*col_));

  // for all possible segments
  for (int end_seg = 1; end_seg <= (seg_pt_cnt_ + 1); end_seg++) {
    // create a search column
    col_[end_seg - 1] = new SearchColumn(end_seg - 1,
                                         cntxt_->Params()->BeamWidth());
    if (!col_[end_seg - 1]) {
      fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
              "SearchColumn for column %d\n", end_seg - 1);
      return NULL;
    }

    // for all possible start segments
    int init_seg = MAX(0, end_seg - cntxt_->Params()->MaxSegPerChar());
    for (int strt_seg = init_seg; strt_seg < end_seg; strt_seg++) {
      int parent_nodes_cnt;
      SearchNode **parent_nodes;

      // for the root segment, we do not have a parent
      if (strt_seg == 0) {
        parent_nodes_cnt = 1;
        parent_nodes = NULL;
      } else {
        // for all the existing nodes in the starting column
        parent_nodes_cnt = col_[strt_seg - 1]->NodeCount();
        parent_nodes = col_[strt_seg - 1]->Nodes();
      }

      // run the shape recognizer
      CharAltList *char_alt_list = srch_obj->RecognizeSegment(strt_seg - 1,
                                                              end_seg - 1);
      // for all the possible parents
      for (int parent_idx = 0; parent_idx < parent_nodes_cnt; parent_idx++) {
        // point to the parent node
        SearchNode *parent_node = !parent_nodes ? NULL
            : parent_nodes[parent_idx];
        LangModEdge *lm_parent_edge = !parent_node ? lang_mod->Root()
            : parent_node->LangModelEdge();

        // compute the cost of not having spaces within the segment range
        int contig_cost = srch_obj->NoSpaceCost(strt_seg - 1, end_seg - 1);

        // In phrase mode, compute the cost of not having a space before
        // this character
        int no_space_cost = 0;
        if (!word_mode_ && strt_seg > 0) {
          no_space_cost = srch_obj->NoSpaceCost(strt_seg - 1);
        }

        // if the no space cost is low enough
        if ((contig_cost + no_space_cost) < MIN_PROB_COST) {
          // Add the children nodes
          CreateChildren(col_[end_seg - 1], lang_mod, parent_node,
                         lm_parent_edge, char_alt_list,
                         contig_cost + no_space_cost);
        }

        // In phrase mode and if not starting at the root
        if (!word_mode_ && strt_seg > 0) {  // parent_node must be non-NULL
          // consider starting a new word for nodes that are valid EOW
          if (parent_node->LangModelEdge()->IsEOW()) {
            // get the space cost
            int space_cost = srch_obj->SpaceCost(strt_seg - 1);
            // if the space cost is low enough
            if ((contig_cost + space_cost) < MIN_PROB_COST) {
              // Restart the language model and add nodes as children to the
              // space node.
              CreateChildren(col_[end_seg - 1], lang_mod, parent_node, NULL,
                             char_alt_list, contig_cost + space_cost);
            }
          }
        }
      }  // parent
    }  // strt_seg

    // prune the column nodes
    col_[end_seg - 1]->Prune();

    // Free the column hash table. No longer needed
    col_[end_seg - 1]->FreeHashTable();
  }  // end_seg

  WordAltList *alt_list = CreateWordAltList(srch_obj);
  return alt_list;
}

// Creates a Word alternate list from the results in the lattice.
WordAltList *BeamSearch::CreateWordAltList(SearchObject *srch_obj) {
  // create an alternate list of all the nodes in the last column
  int node_cnt = col_[col_cnt_ - 1]->NodeCount();
  SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes();
  CharBigrams *bigrams = cntxt_->Bigrams();
  WordUnigrams *word_unigrams = cntxt_->WordUnigramsObj();

  // Save the index of the best-cost node before the alt list is
  // sorted, so that we can retrieve it from the node list when backtracking.
  best_presorted_node_idx_ = 0;
  int best_cost = -1;

  if (node_cnt <= 0)
    return NULL;

  // start creating the word alternate list
  WordAltList *alt_list = new WordAltList(node_cnt + 1);
  for (int node_idx = 0; node_idx < node_cnt; node_idx++) {
    // recognition cost
    int recognition_cost = srch_nodes[node_idx]->BestCost();
    // compute the size cost of the alternate
    char_32 *ch_buff = NULL;
    int size_cost = SizeCost(srch_obj, srch_nodes[node_idx], &ch_buff);
    // accumulate other costs
    if (ch_buff) {
      int cost = 0;
      // char bigram cost
      int bigram_cost = !bigrams ? 0 :
          bigrams->Cost(ch_buff, cntxt_->CharacterSet());
      // word unigram cost
      int unigram_cost = !word_unigrams ? 0 :
          word_unigrams->Cost(ch_buff, cntxt_->LangMod(),
                              cntxt_->CharacterSet());
      // overall cost
      cost = static_cast<int>(
          (size_cost * cntxt_->Params()->SizeWgt()) +
          (bigram_cost * cntxt_->Params()->CharBigramWgt()) +
          (unigram_cost * cntxt_->Params()->WordUnigramWgt()) +
          (recognition_cost * cntxt_->Params()->RecoWgt()));

      // insert into word alt list
      alt_list->Insert(ch_buff, cost,
                       static_cast<void *>(srch_nodes[node_idx]));
      // Note that strict < is necessary because WordAltList::Sort()
      // uses it in a bubble sort to swap entries.
      if (best_cost < 0 || cost < best_cost) {
        best_presorted_node_idx_ = node_idx;
        best_cost = cost;
      }
      delete []ch_buff;
    }
  }

  // sort the alternates based on cost
  alt_list->Sort();
  return alt_list;
}

// Returns the lattice column corresponding to the specified column index.
SearchColumn *BeamSearch::Column(int col) const {
  if (col < 0 || col >= col_cnt_ || !col_)
    return NULL;
  return col_[col];
}

// Returns the best node in the last column of last performed search.
SearchNode *BeamSearch::BestNode() const {
  if (col_cnt_ < 1 || !col_ || !col_[col_cnt_ - 1])
    return NULL;

  int node_cnt = col_[col_cnt_ - 1]->NodeCount();
  SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes();
  if (node_cnt < 1 || !srch_nodes || !srch_nodes[0])
    return NULL;
  return srch_nodes[0];
}

// Returns the string corresponding to the specified alt.
char_32 *BeamSearch::Alt(int alt) const {
  // get the last column of the lattice
  if (col_cnt_ <= 0)
    return NULL;

  SearchColumn *srch_col = col_[col_cnt_ - 1];
  if (!srch_col)
    return NULL;

  // point to the last node in the selected path
  if (alt >= srch_col->NodeCount() || srch_col->Nodes() == NULL) {
    return NULL;
  }

  SearchNode *srch_node = srch_col->Nodes()[alt];
  if (!srch_node)
    return  NULL;

  // get string
  char_32 *str32 = srch_node->PathString();
  if (!str32)
    return NULL;

  return str32;
}

// Backtracks from the specified node index and returns the corresponding
// character mapped segments and character count. Optional return
// arguments are the char_32 result string and character bounding
// boxes, if non-NULL values are passed in.
CharSamp **BeamSearch::BackTrack(SearchObject *srch_obj, int node_index,
                                 int *char_cnt, char_32 **str32,
                                 Boxa **char_boxes) const {
  // get the last column of the lattice
  if (col_cnt_ <= 0)
    return NULL;
  SearchColumn *srch_col = col_[col_cnt_ - 1];
  if (!srch_col)
    return NULL;

  // point to the last node in the selected path
  if (node_index >= srch_col->NodeCount() || !srch_col->Nodes())
    return NULL;

  SearchNode *srch_node = srch_col->Nodes()[node_index];
  if (!srch_node)
    return NULL;
  return BackTrack(srch_obj, srch_node, char_cnt, str32, char_boxes);
}

// Backtracks from the specified node index and returns the corresponding
// character mapped segments and character count. Optional return
// arguments are the char_32 result string and character bounding
// boxes, if non-NULL values are passed in.
CharSamp **BeamSearch::BackTrack(SearchObject *srch_obj, SearchNode *srch_node,
                                 int *char_cnt, char_32 **str32,
                                 Boxa **char_boxes) const {
  if (!srch_node)
    return NULL;

  if (str32) {
    delete [](*str32);  // clear existing value
    *str32 = srch_node->PathString();
    if (!*str32)
      return NULL;
  }

  if (char_boxes && *char_boxes) {
    boxaDestroy(char_boxes);  // clear existing value
  }

  CharSamp **chars;
  chars = SplitByNode(srch_obj, srch_node, char_cnt, char_boxes);
  if (!chars && str32)
    delete []*str32;
  return chars;
}

// Backtracks from the given lattice node and return the corresponding
// char mapped segments and character count. The character bounding
// boxes are optional return arguments, if non-NULL values are passed in.
CharSamp **BeamSearch::SplitByNode(SearchObject *srch_obj,
                                   SearchNode *srch_node,
                                   int *char_cnt,
                                   Boxa **char_boxes) const {
  // Count the characters (could be less than the path length when in
  // phrase mode)
  *char_cnt = 0;
  SearchNode *node = srch_node;
  while (node) {
    node = node->ParentNode();
    (*char_cnt)++;
  }

  if (*char_cnt == 0)
    return NULL;

  // Allocate box array
  if (char_boxes) {
    if (*char_boxes)
      boxaDestroy(char_boxes);  // clear existing value
    *char_boxes = boxaCreate(*char_cnt);
    if (*char_boxes == NULL)
      return NULL;
  }

  // Allocate memory for CharSamp array.
  CharSamp **chars = new CharSamp *[*char_cnt];
  if (!chars) {
    if (char_boxes)
      boxaDestroy(char_boxes);
    return NULL;
  }

  int ch_idx = *char_cnt - 1;
  int seg_pt_cnt = srch_obj->SegPtCnt();
  bool success=true;
  while (srch_node && ch_idx >= 0) {
    // Parent node (could be null)
    SearchNode *parent_node = srch_node->ParentNode();

    // Get the seg pts corresponding to the search node
    int st_col = !parent_node ? 0 : parent_node->ColIdx() + 1;
    int st_seg_pt = st_col <= 0 ? -1 : st_col - 1;
    int end_col = srch_node->ColIdx();
    int end_seg_pt = end_col >= seg_pt_cnt ? seg_pt_cnt : end_col;

    // Get a char sample corresponding to the segmentation points
    CharSamp *samp = srch_obj->CharSample(st_seg_pt, end_seg_pt);
    if (!samp) {
      success = false;
      break;
    }
    samp->SetLabel(srch_node->NodeString());
    chars[ch_idx] = samp;
    if (char_boxes) {
      // Create the corresponding character bounding box
      Box *char_box = boxCreate(samp->Left(), samp->Top(),
                                samp->Width(), samp->Height());
      if (!char_box) {
        success = false;
        break;
      }
      boxaAddBox(*char_boxes, char_box, L_INSERT);
    }
    srch_node = parent_node;
    ch_idx--;
  }
  if (!success) {
    delete []chars;
    if (char_boxes)
      boxaDestroy(char_boxes);
    return NULL;
  }

  // Reverse the order of boxes.
  if (char_boxes) {
    int char_boxa_size = boxaGetCount(*char_boxes);
    int limit = char_boxa_size / 2;
    for (int i = 0; i < limit; ++i) {
      int box1_idx = i;
      int box2_idx = char_boxa_size - 1 - i;
      Box *box1 = boxaGetBox(*char_boxes, box1_idx, L_CLONE);
      Box *box2 = boxaGetBox(*char_boxes, box2_idx, L_CLONE);
      boxaReplaceBox(*char_boxes, box2_idx, box1);
      boxaReplaceBox(*char_boxes, box1_idx, box2);
    }
  }
  return chars;
}

// Returns the size cost of a string for a lattice path that
// ends at the specified lattice node.
int BeamSearch::SizeCost(SearchObject *srch_obj, SearchNode *node,
                         char_32 **str32) const {
  CharSamp **chars = NULL;
  int char_cnt = 0;
  if (!node)
    return 0;
  // Backtrack to get string and character segmentation
  chars = BackTrack(srch_obj, node, &char_cnt, str32, NULL);
  if (!chars)
    return WORST_COST;
  int size_cost = (cntxt_->SizeModel() == NULL) ? 0 :
      cntxt_->SizeModel()->Cost(chars, char_cnt);
  delete []chars;
  return size_cost;
}
}  // namespace tesesract