tesseract/ccstruct/blamer.h

///////////////////////////////////////////////////////////////////////
// File:        blamer.h
// Description: Module allowing precise error causes to be allocated.
// Author:      Rike Antonova
// Refactored:  Ray Smith
// Created:     Mon Feb 04 14:37:01 PST 2013
//
// (C) Copyright 2013, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

#ifndef TESSERACT_CCSTRUCT_BLAMER_H_
#define TESSERACT_CCSTRUCT_BLAMER_H_

#include <stdio.h>
#include "boxword.h"
#include "genericvector.h"
#include "matrix.h"
#include "params_training_featdef.h"
#include "ratngs.h"
#include "strngs.h"
#include "tesscallback.h"

static const inT16 kBlamerBoxTolerance = 5;

// Enum for expressing the source of error.
// Note: Please update kIncorrectResultReasonNames when modifying this enum.
enum IncorrectResultReason {
  // The text recorded in best choice == truth text
  IRR_CORRECT,
  // Either: Top choice is incorrect and is a dictionary word (language model
  // is unlikely to help correct such errors, so blame the classifier).
  // Or: the correct unichar was not included in shortlist produced by the
  // classifier at all.
  IRR_CLASSIFIER,
  // Chopper have not found one or more splits that correspond to the correct
  // character bounding boxes recorded in BlamerBundle::truth_word.
  IRR_CHOPPER,
  // Classifier did include correct unichars for each blob in the correct
  // segmentation, however its rating could have been too bad to allow the
  // language model to pull out the correct choice. On the other hand the
  // strength of the language model might have been too weak to favor the
  // correct answer, this we call this case a classifier-language model
  // tradeoff error.
  IRR_CLASS_LM_TRADEOFF,
  // Page layout failed to produce the correct bounding box. Blame page layout
  // if the truth was not found for the word, which implies that the bounding
  // box of the word was incorrect (no truth word had a similar bounding box).
  IRR_PAGE_LAYOUT,
  // SegSearch heuristic prevented one or more blobs from the correct
  // segmentation state to be classified (e.g. the blob was too wide).
  IRR_SEGSEARCH_HEUR,
  // The correct segmentaiton state was not explored because of poor SegSearch
  // pain point prioritization. We blame SegSearch pain point prioritization
  // if the best rating of a choice constructed from correct segmentation is
  // better than that of the best choice (i.e. if we got to explore the correct
  // segmentation state, language model would have picked the correct choice).
  IRR_SEGSEARCH_PP,
  // Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,
  // and thus use the old language model (permuters).
  // TODO(antonova): integrate the new language mode with chopper
  IRR_CLASS_OLD_LM_TRADEOFF,
  // If there is an incorrect adaptive template match with a better score than
  // a correct one (either pre-trained or adapted), mark this as adaption error.
  IRR_ADAPTION,
  // split_and_recog_word() failed to find a suitable split in truth.
  IRR_NO_TRUTH_SPLIT,
  // Truth is not available for this word (e.g. when words in corrected content
  // file are turned into ~~~~ because an appropriate alignment was not found.
  IRR_NO_TRUTH,
  // The text recorded in best choice != truth text, but none of the above
  // reasons are set.
  IRR_UNKNOWN,

  IRR_NUM_REASONS
};

// Blamer-related information to determine the source of errors.
struct BlamerBundle {
  static const char *IncorrectReasonName(IncorrectResultReason irr);
  BlamerBundle() : truth_has_char_boxes_(false),
      incorrect_result_reason_(IRR_CORRECT),
      lattice_data_(NULL) { ClearResults(); }
  BlamerBundle(const BlamerBundle &other) {
    this->CopyTruth(other);
    this->CopyResults(other);
  }
  ~BlamerBundle() { delete[] lattice_data_; }

  // Accessors.
  STRING TruthString() const {
    STRING truth_str;
    for (int i = 0; i < truth_text_.length(); ++i)
      truth_str += truth_text_[i];
    return truth_str;
  }
  IncorrectResultReason incorrect_result_reason() const {
    return incorrect_result_reason_;
  }
  bool NoTruth() const {
    return incorrect_result_reason_ == IRR_NO_TRUTH ||
           incorrect_result_reason_ == IRR_PAGE_LAYOUT;
  }
  bool HasDebugInfo() const {
    return debug_.length() > 0 || misadaption_debug_.length() > 0;
  }
  const STRING& debug() const {
    return debug_;
  }
  const STRING& misadaption_debug() const {
    return misadaption_debug_;
  }
  void UpdateBestRating(float rating) {
    if (rating < best_correctly_segmented_rating_)
      best_correctly_segmented_rating_ = rating;
  }
  int correct_segmentation_length() const {
    return correct_segmentation_cols_.length();
  }
  // Returns true if the given ratings matrix col,row position is included
  // in the correct segmentation path at the given index.
  bool MatrixPositionCorrect(int index, const MATRIX_COORD& coord) {
    return correct_segmentation_cols_[index] == coord.col &&
        correct_segmentation_rows_[index] == coord.row;
  }
  void set_best_choice_is_dict_and_top_choice(bool value) {
    best_choice_is_dict_and_top_choice_ = value;
  }
  const char* lattice_data() const {
    return lattice_data_;
  }
  int lattice_size() const {
    return lattice_size_;  // size of lattice_data in bytes
  }
  void set_lattice_data(const char* data, int size) {
    lattice_size_ = size;
    delete [] lattice_data_;
    lattice_data_ = new char[lattice_size_];
    memcpy(lattice_data_, data, lattice_size_);
  }
  const tesseract::ParamsTrainingBundle& params_training_bundle() const {
    return params_training_bundle_;
  }
  // Adds a new ParamsTrainingHypothesis to the current hypothesis list.
  void AddHypothesis(const tesseract::ParamsTrainingHypothesis& hypo) {
    params_training_bundle_.AddHypothesis(hypo);
  }

  // Functions to setup the blamer.
  // Whole word string, whole word bounding box.
  void SetWordTruth(const UNICHARSET& unicharset,
                    const char* truth_str, const TBOX& word_box);
  // Single "character" string, "character" bounding box.
  // May be called multiple times to indicate the characters in a word.
  void SetSymbolTruth(const UNICHARSET& unicharset,
                      const char* char_str, const TBOX& char_box);
  // Marks that there is something wrong with the truth text, like it contains
  // reject characters.
  void SetRejectedTruth();

  // Returns true if the provided word_choice is correct.
  bool ChoiceIsCorrect(const WERD_CHOICE* word_choice) const;

  void ClearResults() {
    norm_truth_word_.DeleteAllBoxes();
    norm_box_tolerance_ = 0;
    if (!NoTruth()) incorrect_result_reason_ = IRR_CORRECT;
    debug_ = "";
    segsearch_is_looking_for_blame_ = false;
    best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
    correct_segmentation_cols_.clear();
    correct_segmentation_rows_.clear();
    best_choice_is_dict_and_top_choice_ = false;
    delete[] lattice_data_;
    lattice_data_ = NULL;
    lattice_size_ = 0;
  }
  void CopyTruth(const BlamerBundle &other) {
    truth_has_char_boxes_ = other.truth_has_char_boxes_;
    truth_word_ = other.truth_word_;
    truth_text_ = other.truth_text_;
    incorrect_result_reason_ =
        (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
  }
  void CopyResults(const BlamerBundle &other) {
    norm_truth_word_ = other.norm_truth_word_;
    norm_box_tolerance_ = other.norm_box_tolerance_;
    incorrect_result_reason_ = other.incorrect_result_reason_;
    segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
    best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
    correct_segmentation_cols_ = other.correct_segmentation_cols_;
    correct_segmentation_rows_ = other.correct_segmentation_rows_;
    best_choice_is_dict_and_top_choice_ =
        other.best_choice_is_dict_and_top_choice_;
    if (other.lattice_data_ != NULL) {
      lattice_data_ = new char[other.lattice_size_];
      memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
      lattice_size_ = other.lattice_size_;
    } else {
      lattice_data_ = NULL;
    }
  }
  const char *IncorrectReason() const;

  // Appends choice and truth details to the given debug string.
  void FillDebugString(const STRING &msg, const WERD_CHOICE *choice,
                       STRING *debug);

  // Sets up the norm_truth_word from truth_word using the given DENORM.
  void SetupNormTruthWord(const DENORM& denorm);

  // Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
  // bundles) where the right edge/ of the left-hand word is word1_right,
  // and the left edge of the right-hand word is word2_left.
  void SplitBundle(int word1_right, int word2_left, bool debug,
                   BlamerBundle* bundle1, BlamerBundle* bundle2) const;
  // "Joins" the blames from bundle1 and bundle2 into *this.
  void JoinBlames(const BlamerBundle& bundle1, const BlamerBundle& bundle2,
                  bool debug);

  // If a blob with the same bounding box as one of the truth character
  // bounding boxes is not classified as the corresponding truth character
  // blames character classifier for incorrect answer.
  void BlameClassifier(const UNICHARSET& unicharset,
                       const TBOX& blob_box,
                       const BLOB_CHOICE_LIST& choices,
                       bool debug);


  // Checks whether chops were made at all the character bounding box
  // boundaries in word->truth_word. If not - blames the chopper for an
  // incorrect answer.
  void SetChopperBlame(const WERD_RES* word, bool debug);
  // Blames the classifier or the language model if, after running only the
  // chopper, best_choice is incorrect and no blame has been yet set.
  // Blames the classifier if best_choice is classifier's top choice and is a
  // dictionary word (i.e. language model could not have helped).
  // Otherwise, blames the language model (formerly permuter word adjustment).
  void BlameClassifierOrLangModel(
      const WERD_RES* word,
      const UNICHARSET& unicharset, bool valid_permuter, bool debug);
  // Sets up the correct_segmentation_* to mark the correct bounding boxes.
  void SetupCorrectSegmentation(const TWERD* word, bool debug);

  // Returns true if a guided segmentation search is needed.
  bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const;
  // Setup ready to guide the segmentation search to the correct segmentation.
  // The callback pp_cb is used to avoid a cyclic dependency.
  // It calls into LMPainPoints::GenerateForBlamer by pre-binding the
  // WERD_RES, and the LMPainPoints itself.
  // pp_cb must be a permanent callback, and should be deleted by the caller.
  void InitForSegSearch(const WERD_CHOICE *best_choice,
                        MATRIX* ratings, UNICHAR_ID wildcard_id,
                        bool debug, STRING *debug_str,
                        TessResultCallback2<bool, int, int>* pp_cb);
  // Returns true if the guided segsearch is in progress.
  bool GuidedSegsearchStillGoing() const;
  // The segmentation search has ended. Sets the blame appropriately.
  void FinishSegSearch(const WERD_CHOICE *best_choice,
                       bool debug, STRING *debug_str);

  // If the bundle is null or still does not indicate the correct result,
  // fix it and use some backup reason for the blame.
  static void LastChanceBlame(bool debug, WERD_RES* word);

  // Sets the misadaption debug if this word is incorrect, as this word is
  // being adapted to.
  void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug);

 private:
  void SetBlame(IncorrectResultReason irr, const STRING &msg,
                const WERD_CHOICE *choice, bool debug) {
    incorrect_result_reason_ = irr;
    debug_ = IncorrectReason();
    debug_ += " to blame: ";
    FillDebugString(msg, choice, &debug_);
    if (debug) tprintf("SetBlame(): %s", debug_.string());
  }

 private:
  // Set to true when bounding boxes for individual unichars are recorded.
  bool truth_has_char_boxes_;
  // The true_word (in the original image coordinate space) contains ground
  // truth bounding boxes for this WERD_RES.
  tesseract::BoxWord truth_word_;
  // Same as above, but in normalized coordinates
  // (filled in by WERD_RES::SetupForRecognition()).
  tesseract::BoxWord norm_truth_word_;
  // Tolerance for bounding box comparisons in normalized space.
  int norm_box_tolerance_;
  // Contains ground truth unichar for each of the bounding boxes in truth_word.
  GenericVector<STRING> truth_text_;
  // The reason for incorrect OCR result.
  IncorrectResultReason incorrect_result_reason_;
  // Debug text associated with the blame.
  STRING debug_;
  // Misadaption debug information (filled in if this word was misadapted to).
  STRING misadaption_debug_;
  // Variables used by the segmentation search when looking for the blame.
  // Set to true while segmentation search is continued after the usual
  // termination condition in order to look for the blame.
  bool segsearch_is_looking_for_blame_;
  // Best rating for correctly segmented path
  // (set and used by SegSearch when looking for blame).
  float best_correctly_segmented_rating_;
  // Vectors populated by SegSearch to indicate column and row indices that
  // correspond to blobs with correct bounding boxes.
  GenericVector<int> correct_segmentation_cols_;
  GenericVector<int> correct_segmentation_rows_;
  // Set to true if best choice is a dictionary word and
  // classifier's top choice.
  bool best_choice_is_dict_and_top_choice_;
  // Serialized segmentation search lattice.
  char *lattice_data_;
  int lattice_size_;  // size of lattice_data in bytes
  // Information about hypotheses (paths) explored by the segmentation search.
  tesseract::ParamsTrainingBundle params_training_bundle_;
};


#endif  // TESSERACT_CCSTRUCT_BLAMER_H_
Major refactor of beam search, elimination of dead code, misc bug fixes, updates to Makefile.am, Changelog etc. git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@878 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2013-09-23 23:26:50 +08:00			`///////////////////////////////////////////////////////////////////////`
			`// File: blamer.h`
			`// Description: Module allowing precise error causes to be allocated.`
			`// Author: Rike Antonova`
			`// Refactored: Ray Smith`
			`// Created: Mon Feb 04 14:37:01 PST 2013`
			`//`
			`// (C) Copyright 2013, Google Inc.`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
			`//`
			`///////////////////////////////////////////////////////////////////////`

			`#ifndef TESSERACT_CCSTRUCT_BLAMER_H_`
			`#define TESSERACT_CCSTRUCT_BLAMER_H_`

			`#include <stdio.h>`
			`#include "boxword.h"`
			`#include "genericvector.h"`
			`#include "matrix.h"`
			`#include "params_training_featdef.h"`
			`#include "ratngs.h"`
			`#include "strngs.h"`
			`#include "tesscallback.h"`

			`static const inT16 kBlamerBoxTolerance = 5;`

			`// Enum for expressing the source of error.`
			`// Note: Please update kIncorrectResultReasonNames when modifying this enum.`
			`enum IncorrectResultReason {`
			`// The text recorded in best choice == truth text`
			`IRR_CORRECT,`
			`// Either: Top choice is incorrect and is a dictionary word (language model`
			`// is unlikely to help correct such errors, so blame the classifier).`
			`// Or: the correct unichar was not included in shortlist produced by the`
			`// classifier at all.`
			`IRR_CLASSIFIER,`
			`// Chopper have not found one or more splits that correspond to the correct`
			`// character bounding boxes recorded in BlamerBundle::truth_word.`
			`IRR_CHOPPER,`
			`// Classifier did include correct unichars for each blob in the correct`
			`// segmentation, however its rating could have been too bad to allow the`
			`// language model to pull out the correct choice. On the other hand the`
			`// strength of the language model might have been too weak to favor the`
			`// correct answer, this we call this case a classifier-language model`
			`// tradeoff error.`
			`IRR_CLASS_LM_TRADEOFF,`
			`// Page layout failed to produce the correct bounding box. Blame page layout`
			`// if the truth was not found for the word, which implies that the bounding`
			`// box of the word was incorrect (no truth word had a similar bounding box).`
			`IRR_PAGE_LAYOUT,`
			`// SegSearch heuristic prevented one or more blobs from the correct`
			`// segmentation state to be classified (e.g. the blob was too wide).`
			`IRR_SEGSEARCH_HEUR,`
			`// The correct segmentaiton state was not explored because of poor SegSearch`
			`// pain point prioritization. We blame SegSearch pain point prioritization`
			`// if the best rating of a choice constructed from correct segmentation is`
			`// better than that of the best choice (i.e. if we got to explore the correct`
			`// segmentation state, language model would have picked the correct choice).`
			`IRR_SEGSEARCH_PP,`
			`// Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,`
			`// and thus use the old language model (permuters).`
			`// TODO(antonova): integrate the new language mode with chopper`
			`IRR_CLASS_OLD_LM_TRADEOFF,`
			`// If there is an incorrect adaptive template match with a better score than`
			`// a correct one (either pre-trained or adapted), mark this as adaption error.`
			`IRR_ADAPTION,`
			`// split_and_recog_word() failed to find a suitable split in truth.`
			`IRR_NO_TRUTH_SPLIT,`
			`// Truth is not available for this word (e.g. when words in corrected content`
			`// file are turned into ~~~~ because an appropriate alignment was not found.`
			`IRR_NO_TRUTH,`
			`// The text recorded in best choice != truth text, but none of the above`
			`// reasons are set.`
			`IRR_UNKNOWN,`

			`IRR_NUM_REASONS`
			`};`

			`// Blamer-related information to determine the source of errors.`
			`struct BlamerBundle {`
			`static const char *IncorrectReasonName(IncorrectResultReason irr);`
			`BlamerBundle() : truth_has_char_boxes_(false),`
			`incorrect_result_reason_(IRR_CORRECT),`
			`lattice_data_(NULL) { ClearResults(); }`
			`BlamerBundle(const BlamerBundle &other) {`
			`this->CopyTruth(other);`
			`this->CopyResults(other);`
			`}`
			`~BlamerBundle() { delete[] lattice_data_; }`

			`// Accessors.`
			`STRING TruthString() const {`
			`STRING truth_str;`
			`for (int i = 0; i < truth_text_.length(); ++i)`
			`truth_str += truth_text_[i];`
			`return truth_str;`
			`}`
			`IncorrectResultReason incorrect_result_reason() const {`
			`return incorrect_result_reason_;`
			`}`
			`bool NoTruth() const {`
			`return incorrect_result_reason_ == IRR_NO_TRUTH \|\|`
			`incorrect_result_reason_ == IRR_PAGE_LAYOUT;`
			`}`
			`bool HasDebugInfo() const {`
			`return debug_.length() > 0 \|\| misadaption_debug_.length() > 0;`
			`}`
			`const STRING& debug() const {`
			`return debug_;`
			`}`
			`const STRING& misadaption_debug() const {`
			`return misadaption_debug_;`
			`}`
			`void UpdateBestRating(float rating) {`
			`if (rating < best_correctly_segmented_rating_)`
			`best_correctly_segmented_rating_ = rating;`
			`}`
			`int correct_segmentation_length() const {`
			`return correct_segmentation_cols_.length();`
			`}`
			`// Returns true if the given ratings matrix col,row position is included`
			`// in the correct segmentation path at the given index.`
			`bool MatrixPositionCorrect(int index, const MATRIX_COORD& coord) {`
			`return correct_segmentation_cols_[index] == coord.col &&`
			`correct_segmentation_rows_[index] == coord.row;`
			`}`
			`void set_best_choice_is_dict_and_top_choice(bool value) {`
			`best_choice_is_dict_and_top_choice_ = value;`
			`}`
			`const char* lattice_data() const {`
			`return lattice_data_;`
			`}`
			`int lattice_size() const {`
			`return lattice_size_; // size of lattice_data in bytes`
			`}`
			`void set_lattice_data(const char* data, int size) {`
			`lattice_size_ = size;`
			`delete [] lattice_data_;`
			`lattice_data_ = new char[lattice_size_];`
			`memcpy(lattice_data_, data, lattice_size_);`
			`}`
			`const tesseract::ParamsTrainingBundle& params_training_bundle() const {`
			`return params_training_bundle_;`
			`}`
			`// Adds a new ParamsTrainingHypothesis to the current hypothesis list.`
			`void AddHypothesis(const tesseract::ParamsTrainingHypothesis& hypo) {`
			`params_training_bundle_.AddHypothesis(hypo);`
			`}`

			`// Functions to setup the blamer.`
			`// Whole word string, whole word bounding box.`
			`void SetWordTruth(const UNICHARSET& unicharset,`
			`const char* truth_str, const TBOX& word_box);`
			`// Single "character" string, "character" bounding box.`
			`// May be called multiple times to indicate the characters in a word.`
			`void SetSymbolTruth(const UNICHARSET& unicharset,`
			`const char* char_str, const TBOX& char_box);`
			`// Marks that there is something wrong with the truth text, like it contains`
			`// reject characters.`
			`void SetRejectedTruth();`

			`// Returns true if the provided word_choice is correct.`
			`bool ChoiceIsCorrect(const WERD_CHOICE* word_choice) const;`

			`void ClearResults() {`
			`norm_truth_word_.DeleteAllBoxes();`
			`norm_box_tolerance_ = 0;`
			`if (!NoTruth()) incorrect_result_reason_ = IRR_CORRECT;`
			`debug_ = "";`
			`segsearch_is_looking_for_blame_ = false;`
			`best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;`
			`correct_segmentation_cols_.clear();`
			`correct_segmentation_rows_.clear();`
			`best_choice_is_dict_and_top_choice_ = false;`
			`delete[] lattice_data_;`
			`lattice_data_ = NULL;`
			`lattice_size_ = 0;`
			`}`
			`void CopyTruth(const BlamerBundle &other) {`
			`truth_has_char_boxes_ = other.truth_has_char_boxes_;`
			`truth_word_ = other.truth_word_;`
			`truth_text_ = other.truth_text_;`
			`incorrect_result_reason_ =`
			`(other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);`
			`}`
			`void CopyResults(const BlamerBundle &other) {`
			`norm_truth_word_ = other.norm_truth_word_;`
			`norm_box_tolerance_ = other.norm_box_tolerance_;`
			`incorrect_result_reason_ = other.incorrect_result_reason_;`
			`segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;`
			`best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;`
			`correct_segmentation_cols_ = other.correct_segmentation_cols_;`
			`correct_segmentation_rows_ = other.correct_segmentation_rows_;`
			`best_choice_is_dict_and_top_choice_ =`
			`other.best_choice_is_dict_and_top_choice_;`
			`if (other.lattice_data_ != NULL) {`
			`lattice_data_ = new char[other.lattice_size_];`
			`memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);`
			`lattice_size_ = other.lattice_size_;`
			`} else {`
			`lattice_data_ = NULL;`
			`}`
			`}`
			`const char *IncorrectReason() const;`

			`// Appends choice and truth details to the given debug string.`
			`void FillDebugString(const STRING &msg, const WERD_CHOICE *choice,`
			`STRING *debug);`

			`// Sets up the norm_truth_word from truth_word using the given DENORM.`
			`void SetupNormTruthWord(const DENORM& denorm);`

			`// Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty`
			`// bundles) where the right edge/ of the left-hand word is word1_right,`
			`// and the left edge of the right-hand word is word2_left.`
			`void SplitBundle(int word1_right, int word2_left, bool debug,`
			`BlamerBundle* bundle1, BlamerBundle* bundle2) const;`
			`// "Joins" the blames from bundle1 and bundle2 into *this.`
			`void JoinBlames(const BlamerBundle& bundle1, const BlamerBundle& bundle2,`
			`bool debug);`

			`// If a blob with the same bounding box as one of the truth character`
			`// bounding boxes is not classified as the corresponding truth character`
			`// blames character classifier for incorrect answer.`
			`void BlameClassifier(const UNICHARSET& unicharset,`
			`const TBOX& blob_box,`
			`const BLOB_CHOICE_LIST& choices,`
			`bool debug);`


			`// Checks whether chops were made at all the character bounding box`
			`// boundaries in word->truth_word. If not - blames the chopper for an`
			`// incorrect answer.`
			`void SetChopperBlame(const WERD_RES* word, bool debug);`
			`// Blames the classifier or the language model if, after running only the`
			`// chopper, best_choice is incorrect and no blame has been yet set.`
			`// Blames the classifier if best_choice is classifier's top choice and is a`
			`// dictionary word (i.e. language model could not have helped).`
			`// Otherwise, blames the language model (formerly permuter word adjustment).`
			`void BlameClassifierOrLangModel(`
			`const WERD_RES* word,`
			`const UNICHARSET& unicharset, bool valid_permuter, bool debug);`
			`// Sets up the correct_segmentation_* to mark the correct bounding boxes.`
			`void SetupCorrectSegmentation(const TWERD* word, bool debug);`

			`// Returns true if a guided segmentation search is needed.`
			`bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const;`
			`// Setup ready to guide the segmentation search to the correct segmentation.`
			`// The callback pp_cb is used to avoid a cyclic dependency.`
			`// It calls into LMPainPoints::GenerateForBlamer by pre-binding the`
			`// WERD_RES, and the LMPainPoints itself.`
			`// pp_cb must be a permanent callback, and should be deleted by the caller.`
			`void InitForSegSearch(const WERD_CHOICE *best_choice,`
			`MATRIX* ratings, UNICHAR_ID wildcard_id,`
			`bool debug, STRING *debug_str,`
			`TessResultCallback2<bool, int, int>* pp_cb);`
			`// Returns true if the guided segsearch is in progress.`
			`bool GuidedSegsearchStillGoing() const;`
			`// The segmentation search has ended. Sets the blame appropriately.`
			`void FinishSegSearch(const WERD_CHOICE *best_choice,`
			`bool debug, STRING *debug_str);`

			`// If the bundle is null or still does not indicate the correct result,`
			`// fix it and use some backup reason for the blame.`
			`static void LastChanceBlame(bool debug, WERD_RES* word);`

			`// Sets the misadaption debug if this word is incorrect, as this word is`
			`// being adapted to.`
			`void SetMisAdaptionDebug(const WERD_CHOICE *best_choice, bool debug);`

			`private:`
			`void SetBlame(IncorrectResultReason irr, const STRING &msg,`
			`const WERD_CHOICE *choice, bool debug) {`
			`incorrect_result_reason_ = irr;`
			`debug_ = IncorrectReason();`
			`debug_ += " to blame: ";`
			`FillDebugString(msg, choice, &debug_);`
			`if (debug) tprintf("SetBlame(): %s", debug_.string());`
			`}`

			`private:`
			`// Set to true when bounding boxes for individual unichars are recorded.`
			`bool truth_has_char_boxes_;`
			`// The true_word (in the original image coordinate space) contains ground`
			`// truth bounding boxes for this WERD_RES.`
			`tesseract::BoxWord truth_word_;`
			`// Same as above, but in normalized coordinates`
			`// (filled in by WERD_RES::SetupForRecognition()).`
			`tesseract::BoxWord norm_truth_word_;`
			`// Tolerance for bounding box comparisons in normalized space.`
			`int norm_box_tolerance_;`
			`// Contains ground truth unichar for each of the bounding boxes in truth_word.`
			`GenericVector<STRING> truth_text_;`
			`// The reason for incorrect OCR result.`
			`IncorrectResultReason incorrect_result_reason_;`
			`// Debug text associated with the blame.`
			`STRING debug_;`
			`// Misadaption debug information (filled in if this word was misadapted to).`
			`STRING misadaption_debug_;`
			`// Variables used by the segmentation search when looking for the blame.`
			`// Set to true while segmentation search is continued after the usual`
			`// termination condition in order to look for the blame.`
			`bool segsearch_is_looking_for_blame_;`
			`// Best rating for correctly segmented path`
			`// (set and used by SegSearch when looking for blame).`
			`float best_correctly_segmented_rating_;`
			`// Vectors populated by SegSearch to indicate column and row indices that`
			`// correspond to blobs with correct bounding boxes.`
			`GenericVector<int> correct_segmentation_cols_;`
			`GenericVector<int> correct_segmentation_rows_;`
			`// Set to true if best choice is a dictionary word and`
			`// classifier's top choice.`
			`bool best_choice_is_dict_and_top_choice_;`
			`// Serialized segmentation search lattice.`
			`char *lattice_data_;`
			`int lattice_size_; // size of lattice_data in bytes`
			`// Information about hypotheses (paths) explored by the segmentation search.`
			`tesseract::ParamsTrainingBundle params_training_bundle_;`
			`};`


			`#endif // TESSERACT_CCSTRUCT_BLAMER_H_`