tesseract/cube/word_size_model.cpp

/**********************************************************************
 * File:        word_size_model.cpp
 * Description: Implementation of the Word Size Model Class
 * Author:    Ahmad Abdulkader
 * Created:   2008
 *
 * (C) Copyright 2008, Google Inc.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

#include <math.h>
#include <string>
#include <vector>
#include "word_size_model.h"
#include "cube_utils.h"

namespace tesseract {

WordSizeModel::WordSizeModel(CharSet * char_set, bool contextual) {
  char_set_ = char_set;
  contextual_ = contextual;
}

WordSizeModel::~WordSizeModel() {
  for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
    FontPairSizeInfo fnt_info = font_pair_size_models_[fnt];
    delete []fnt_info.pair_size_info[0];
    delete []fnt_info.pair_size_info;
  }
}

WordSizeModel *WordSizeModel::Create(const string &data_file_path,
                                     const string &lang,
                                     CharSet *char_set,
                                     bool contextual) {
  WordSizeModel *obj = new WordSizeModel(char_set, contextual);
  if (!obj) {
    fprintf(stderr, "Cube ERROR (WordSizeModel::Create): unable to allocate "
            "new word size model object\n");
    return NULL;
  }

  if (!obj->Init(data_file_path, lang)) {
    delete obj;
    return NULL;
  }
  return obj;
}

bool WordSizeModel::Init(const string &data_file_path, const string &lang) {
  string stats_file_name;
  stats_file_name = data_file_path + lang;
  stats_file_name += ".cube.size";

  // read file to memory
  string str_data;

  if (!CubeUtils::ReadFileToString(stats_file_name, &str_data)) {
    return false;
  }

  // split to words
  vector<string> tokens;
  CubeUtils::SplitStringUsing(str_data, "\t\r\n", &tokens);
  if (tokens.size() < 1) {
    fprintf(stderr, "Cube ERROR (WordSizeModel::Init): invalid "
            "file contents: %s\n", stats_file_name.c_str());
    return false;
  }

  font_pair_size_models_.clear();

  // token count per line depends on whether the language is contextual or not
  int token_cnt = contextual_ ?
      (kExpectedTokenCount + 4) : kExpectedTokenCount;
  // the count of size classes depends on whether the language is contextual
  // or not. For non contextual languages (Ex: Eng), it is equal to the class
  // count. For contextual languages (Ex: Ara), it is equal to the class count
  // multiplied by the position count (4: start, middle, final, isolated)
  int size_class_cnt = contextual_ ?
      (char_set_->ClassCount() * 4) : char_set_->ClassCount();
  string fnt_name = "";

  for (int tok = 0; tok < tokens.size(); tok += token_cnt) {
    // a new font, write the old font data and re-init
    if (tok == 0 || fnt_name != tokens[tok]) {
      FontPairSizeInfo fnt_info;

      fnt_info.pair_size_info = new PairSizeInfo *[size_class_cnt];
      if (!fnt_info.pair_size_info) {
        fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allcoating "
                "memory for font pair size info\n");
        return false;
      }

      fnt_info.pair_size_info[0] =
          new PairSizeInfo[size_class_cnt * size_class_cnt];
      if (!fnt_info.pair_size_info[0]) {
        fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allocating "
                "memory for font pair size info\n");
        return false;
      }

      memset(fnt_info.pair_size_info[0], 0, size_class_cnt * size_class_cnt *
             sizeof(PairSizeInfo));

      for (int cls = 1; cls < size_class_cnt; cls++) {
        fnt_info.pair_size_info[cls] =
            fnt_info.pair_size_info[cls - 1] + size_class_cnt;
      }

      // strip out path and extension
      string stripped_font_name = tokens[tok].substr(0, tokens[tok].find('.'));
      string::size_type strt_pos = stripped_font_name.find_last_of("/\\");
      if (strt_pos != string::npos) {
        fnt_info.font_name = stripped_font_name.substr(strt_pos);
      } else {
        fnt_info.font_name = stripped_font_name;
      }
      font_pair_size_models_.push_back(fnt_info);
    }

    // parse the data
    int cls_0;
    int cls_1;
    double delta_top;
    double wid_0;
    double hgt_0;
    double wid_1;
    double hgt_1;
    int size_code_0;
    int size_code_1;

    // read and parse the tokens
    if (contextual_) {
      int start_0;
      int end_0;
      int start_1;
      int end_1;
      // The expected format for a character size bigram is as follows:
      // ClassId0<delim>Start-flag0<delim>End-flag0<delim>String0(ignored)
      // Width0<delim>Height0<delim>
      // ClassId1<delim>Start-flag1<delim>End-flag1<delim>String1(ignored)
      // HeightDelta<delim>Width1<delim>Height0<delim>
      // In case of non-contextual languages, the Start and End flags are
      // omitted
      if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 ||
          sscanf(tokens[tok + 2].c_str(), "%d", &start_0) != 1 ||
          sscanf(tokens[tok + 3].c_str(), "%d", &end_0) != 1 ||
          sscanf(tokens[tok + 5].c_str(), "%lf", &wid_0) != 1 ||
          sscanf(tokens[tok + 6].c_str(), "%lf", &hgt_0) != 1 ||
          sscanf(tokens[tok + 7].c_str(), "%d", &cls_1) != 1 ||
          sscanf(tokens[tok + 8].c_str(), "%d", &start_1) != 1 ||
          sscanf(tokens[tok + 9].c_str(), "%d", &end_1) != 1 ||
          sscanf(tokens[tok + 11].c_str(), "%lf", &delta_top) != 1 ||
          sscanf(tokens[tok + 12].c_str(), "%lf", &wid_1) != 1 ||
          sscanf(tokens[tok + 13].c_str(), "%lf", &hgt_1) != 1 ||
          (start_0 != 0 && start_0 != 1) || (end_0 != 0 && end_0 != 1) ||
          (start_1 != 0 && start_1 != 1) || (end_1 != 0 && end_1 != 1)) {
        fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at "
                "line %d\n", 1 + (tok / token_cnt));
        return false;
      }
      size_code_0 = SizeCode(cls_0, start_0, end_0);
      size_code_1 = SizeCode(cls_1, start_1, end_1);
    } else {
      if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 ||
          sscanf(tokens[tok + 3].c_str(), "%lf", &wid_0) != 1 ||
          sscanf(tokens[tok + 4].c_str(), "%lf", &hgt_0) != 1 ||
          sscanf(tokens[tok + 5].c_str(), "%d", &cls_1) != 1 ||
          sscanf(tokens[tok + 7].c_str(), "%lf", &delta_top) != 1 ||
          sscanf(tokens[tok + 8].c_str(), "%lf", &wid_1) != 1 ||
          sscanf(tokens[tok + 9].c_str(), "%lf", &hgt_1) != 1) {
        fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at "
                "line %d\n", 1 + (tok / token_cnt));
        return false;
      }
      size_code_0 = cls_0;
      size_code_1 = cls_1;
    }

    // copy the data to the size tables
    FontPairSizeInfo fnt_info = font_pair_size_models_.back();
    fnt_info.pair_size_info[size_code_0][size_code_1].delta_top =
        static_cast<int>(delta_top * kShapeModelScale);
    fnt_info.pair_size_info[size_code_0][size_code_1].wid_0 =
        static_cast<int>(wid_0 * kShapeModelScale);
    fnt_info.pair_size_info[size_code_0][size_code_1].hgt_0 =
        static_cast<int>(hgt_0 * kShapeModelScale);
    fnt_info.pair_size_info[size_code_0][size_code_1].wid_1 =
        static_cast<int>(wid_1 * kShapeModelScale);
    fnt_info.pair_size_info[size_code_0][size_code_1].hgt_1 =
        static_cast<int>(hgt_1 * kShapeModelScale);

    fnt_name = tokens[tok];
  }

  return true;
}

int WordSizeModel::Cost(CharSamp **samp_array, int samp_cnt) const {
  if (samp_cnt < 2) {
    return 0;
  }
  double best_dist = static_cast<double>(WORST_COST);
  int best_fnt = -1;
  for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
    const FontPairSizeInfo *fnt_info = &font_pair_size_models_[fnt];
    double mean_dist = 0;
    int pair_cnt = 0;

    for (int smp_0 = 0; smp_0 < samp_cnt; smp_0++) {
      int cls_0 = char_set_->ClassID(samp_array[smp_0]->StrLabel());
      if (cls_0 < 1) {
        continue;
      }
      // compute size code for samp 0 based on class id and position
      int size_code_0;
      if (contextual_) {
        size_code_0 = SizeCode(cls_0,
                               samp_array[smp_0]->FirstChar() == 0 ? 0 : 1,
                               samp_array[smp_0]->LastChar() == 0 ? 0 : 1);
      } else {
        size_code_0 = cls_0;
      }

      int char0_height = samp_array[smp_0]->Height();
      int char0_width = samp_array[smp_0]->Width();
      int char0_top = samp_array[smp_0]->Top();

      for (int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) {
        int cls_1 = char_set_->ClassID(samp_array[smp_1]->StrLabel());
        if (cls_1 < 1) {
          continue;
        }
        // compute size code for samp 0 based on class id and position
        int size_code_1;
        if (contextual_) {
          size_code_1 = SizeCode(cls_1,
                                 samp_array[smp_1]->FirstChar() == 0 ? 0 : 1,
                                 samp_array[smp_1]->LastChar() == 0 ? 0 : 1);
        } else {
          size_code_1 = cls_1;
        }
        double dist = PairCost(
            char0_width, char0_height, char0_top, samp_array[smp_1]->Width(),
            samp_array[smp_1]->Height(), samp_array[smp_1]->Top(),
            fnt_info->pair_size_info[size_code_0][size_code_1]);
        if (dist > 0) {
          mean_dist += dist;
          pair_cnt++;
        }
      }  // smp_1
    }  // smp_0
    if (pair_cnt == 0) {
      continue;
    }
    mean_dist /= pair_cnt;
    if (best_fnt == -1 || mean_dist < best_dist) {
      best_dist = mean_dist;
      best_fnt = fnt;
    }
  }
  if (best_fnt == -1) {
    return static_cast<int>(WORST_COST);
  } else {
    return static_cast<int>(best_dist);
  }
}

double WordSizeModel::PairCost(int width_0, int height_0, int top_0,
                               int width_1, int height_1, int top_1,
                               const PairSizeInfo& pair_info) {
  double scale_factor = static_cast<double>(pair_info.hgt_0) /
      static_cast<double>(height_0);
  double dist = 0.0;
  if (scale_factor > 0) {
    double norm_width_0 = width_0 * scale_factor;
    double norm_width_1 = width_1 * scale_factor;
    double norm_height_1 = height_1 * scale_factor;
    double norm_delta_top = (top_1 - top_0) * scale_factor;

    // accumulate the distance between the model character and the
    // predicted one on all dimensions of the pair
    dist += fabs(pair_info.wid_0 - norm_width_0);
    dist += fabs(pair_info.wid_1 - norm_width_1);
    dist += fabs(pair_info.hgt_1 - norm_height_1);
    dist += fabs(pair_info.delta_top - norm_delta_top);
  }
  return dist;
}
}  // namespace tesseract
3.01 code from http://github.com/jimregan/tesseract-ocr with addaptions related to Linux and Windows (VC2008) compile process git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@526 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2010-11-24 02:34:14 +08:00			`/**********************************************************************`
			`* File: word_size_model.cpp`
			`* Description: Implementation of the Word Size Model Class`
			`* Author: Ahmad Abdulkader`
			`* Created: 2008`
			`*`
			`* (C) Copyright 2008, Google Inc.`
			`** Licensed under the Apache License, Version 2.0 (the "License");`
			`** you may not use this file except in compliance with the License.`
			`** You may obtain a copy of the License at`
			`** http://www.apache.org/licenses/LICENSE-2.0`
			`** Unless required by applicable law or agreed to in writing, software`
			`** distributed under the License is distributed on an "AS IS" BASIS,`
			`** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`** See the License for the specific language governing permissions and`
			`** limitations under the License.`
			`*`
			`**********************************************************************/`

			`#include <math.h>`
			`#include <string>`
			`#include <vector>`
			`#include "word_size_model.h"`
			`#include "cube_utils.h"`

			`namespace tesseract {`

			`WordSizeModel::WordSizeModel(CharSet * char_set, bool contextual) {`
			`char_set_ = char_set;`
			`contextual_ = contextual;`
			`}`

			`WordSizeModel::~WordSizeModel() {`
			`for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {`
			`FontPairSizeInfo fnt_info = font_pair_size_models_[fnt];`
			`delete []fnt_info.pair_size_info[0];`
			`delete []fnt_info.pair_size_info;`
			`}`
			`}`

			`WordSizeModel *WordSizeModel::Create(const string &data_file_path,`
			`const string &lang,`
			`CharSet *char_set,`
			`bool contextual) {`
			`WordSizeModel *obj = new WordSizeModel(char_set, contextual);`
			`if (!obj) {`
			`fprintf(stderr, "Cube ERROR (WordSizeModel::Create): unable to allocate "`
			`"new word size model object\n");`
			`return NULL;`
			`}`

			`if (!obj->Init(data_file_path, lang)) {`
			`delete obj;`
			`return NULL;`
			`}`
			`return obj;`
			`}`

			`bool WordSizeModel::Init(const string &data_file_path, const string &lang) {`
			`string stats_file_name;`
			`stats_file_name = data_file_path + lang;`
			`stats_file_name += ".cube.size";`

			`// read file to memory`
			`string str_data;`

			`if (!CubeUtils::ReadFileToString(stats_file_name, &str_data)) {`
			`return false;`
			`}`

			`// split to words`
			`vector<string> tokens;`
			`CubeUtils::SplitStringUsing(str_data, "\t\r\n", &tokens);`
			`if (tokens.size() < 1) {`
			`fprintf(stderr, "Cube ERROR (WordSizeModel::Init): invalid "`
			`"file contents: %s\n", stats_file_name.c_str());`
			`return false;`
			`}`

			`font_pair_size_models_.clear();`

			`// token count per line depends on whether the language is contextual or not`
			`int token_cnt = contextual_ ?`
			`(kExpectedTokenCount + 4) : kExpectedTokenCount;`
			`// the count of size classes depends on whether the language is contextual`
			`// or not. For non contextual languages (Ex: Eng), it is equal to the class`
			`// count. For contextual languages (Ex: Ara), it is equal to the class count`
			`// multiplied by the position count (4: start, middle, final, isolated)`
			`int size_class_cnt = contextual_ ?`
			`(char_set_->ClassCount() * 4) : char_set_->ClassCount();`
			`string fnt_name = "";`

			`for (int tok = 0; tok < tokens.size(); tok += token_cnt) {`
			`// a new font, write the old font data and re-init`
			`if (tok == 0 \|\| fnt_name != tokens[tok]) {`
			`FontPairSizeInfo fnt_info;`

			`fnt_info.pair_size_info = new PairSizeInfo *[size_class_cnt];`
			`if (!fnt_info.pair_size_info) {`
			`fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allcoating "`
			`"memory for font pair size info\n");`
			`return false;`
			`}`

			`fnt_info.pair_size_info[0] =`
			`new PairSizeInfo[size_class_cnt * size_class_cnt];`
			`if (!fnt_info.pair_size_info[0]) {`
			`fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allocating "`
			`"memory for font pair size info\n");`
			`return false;`
			`}`

			`memset(fnt_info.pair_size_info[0], 0, size_class_cnt * size_class_cnt *`
			`sizeof(PairSizeInfo));`

			`for (int cls = 1; cls < size_class_cnt; cls++) {`
			`fnt_info.pair_size_info[cls] =`
			`fnt_info.pair_size_info[cls - 1] + size_class_cnt;`
			`}`

			`// strip out path and extension`
			`string stripped_font_name = tokens[tok].substr(0, tokens[tok].find('.'));`
			`string::size_type strt_pos = stripped_font_name.find_last_of("/\\");`
			`if (strt_pos != string::npos) {`
			`fnt_info.font_name = stripped_font_name.substr(strt_pos);`
			`} else {`
			`fnt_info.font_name = stripped_font_name;`
			`}`
			`font_pair_size_models_.push_back(fnt_info);`
			`}`

			`// parse the data`
			`int cls_0;`
			`int cls_1;`
			`double delta_top;`
			`double wid_0;`
			`double hgt_0;`
			`double wid_1;`
			`double hgt_1;`
			`int size_code_0;`
			`int size_code_1;`

			`// read and parse the tokens`
			`if (contextual_) {`
			`int start_0;`
			`int end_0;`
			`int start_1;`
			`int end_1;`
			`// The expected format for a character size bigram is as follows:`
			`// ClassId0<delim>Start-flag0<delim>End-flag0<delim>String0(ignored)`
			`// Width0<delim>Height0<delim>`
			`// ClassId1<delim>Start-flag1<delim>End-flag1<delim>String1(ignored)`
			`// HeightDelta<delim>Width1<delim>Height0<delim>`
			`// In case of non-contextual languages, the Start and End flags are`
			`// omitted`
			`if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 \|\|`
			`sscanf(tokens[tok + 2].c_str(), "%d", &start_0) != 1 \|\|`
			`sscanf(tokens[tok + 3].c_str(), "%d", &end_0) != 1 \|\|`
			`sscanf(tokens[tok + 5].c_str(), "%lf", &wid_0) != 1 \|\|`
			`sscanf(tokens[tok + 6].c_str(), "%lf", &hgt_0) != 1 \|\|`
			`sscanf(tokens[tok + 7].c_str(), "%d", &cls_1) != 1 \|\|`
			`sscanf(tokens[tok + 8].c_str(), "%d", &start_1) != 1 \|\|`
			`sscanf(tokens[tok + 9].c_str(), "%d", &end_1) != 1 \|\|`
			`sscanf(tokens[tok + 11].c_str(), "%lf", &delta_top) != 1 \|\|`
			`sscanf(tokens[tok + 12].c_str(), "%lf", &wid_1) != 1 \|\|`
			`sscanf(tokens[tok + 13].c_str(), "%lf", &hgt_1) != 1 \|\|`
			`(start_0 != 0 && start_0 != 1) \|\| (end_0 != 0 && end_0 != 1) \|\|`
			`(start_1 != 0 && start_1 != 1) \|\| (end_1 != 0 && end_1 != 1)) {`
			`fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at "`
			`"line %d\n", 1 + (tok / token_cnt));`
			`return false;`
			`}`
			`size_code_0 = SizeCode(cls_0, start_0, end_0);`
			`size_code_1 = SizeCode(cls_1, start_1, end_1);`
			`} else {`
			`if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 \|\|`
			`sscanf(tokens[tok + 3].c_str(), "%lf", &wid_0) != 1 \|\|`
			`sscanf(tokens[tok + 4].c_str(), "%lf", &hgt_0) != 1 \|\|`
			`sscanf(tokens[tok + 5].c_str(), "%d", &cls_1) != 1 \|\|`
			`sscanf(tokens[tok + 7].c_str(), "%lf", &delta_top) != 1 \|\|`
			`sscanf(tokens[tok + 8].c_str(), "%lf", &wid_1) != 1 \|\|`
			`sscanf(tokens[tok + 9].c_str(), "%lf", &hgt_1) != 1) {`
			`fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at "`
			`"line %d\n", 1 + (tok / token_cnt));`
			`return false;`
			`}`
			`size_code_0 = cls_0;`
			`size_code_1 = cls_1;`
			`}`

			`// copy the data to the size tables`
			`FontPairSizeInfo fnt_info = font_pair_size_models_.back();`
			`fnt_info.pair_size_info[size_code_0][size_code_1].delta_top =`
			`static_cast<int>(delta_top * kShapeModelScale);`
			`fnt_info.pair_size_info[size_code_0][size_code_1].wid_0 =`
			`static_cast<int>(wid_0 * kShapeModelScale);`
			`fnt_info.pair_size_info[size_code_0][size_code_1].hgt_0 =`
			`static_cast<int>(hgt_0 * kShapeModelScale);`
			`fnt_info.pair_size_info[size_code_0][size_code_1].wid_1 =`
			`static_cast<int>(wid_1 * kShapeModelScale);`
			`fnt_info.pair_size_info[size_code_0][size_code_1].hgt_1 =`
			`static_cast<int>(hgt_1 * kShapeModelScale);`

			`fnt_name = tokens[tok];`
			`}`

			`return true;`
			`}`

			`int WordSizeModel::Cost(CharSamp **samp_array, int samp_cnt) const {`
			`if (samp_cnt < 2) {`
			`return 0;`
			`}`
			`double best_dist = static_cast<double>(WORST_COST);`
			`int best_fnt = -1;`
			`for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {`
			`const FontPairSizeInfo *fnt_info = &font_pair_size_models_[fnt];`
			`double mean_dist = 0;`
			`int pair_cnt = 0;`

			`for (int smp_0 = 0; smp_0 < samp_cnt; smp_0++) {`
			`int cls_0 = char_set_->ClassID(samp_array[smp_0]->StrLabel());`
			`if (cls_0 < 1) {`
			`continue;`
			`}`
			`// compute size code for samp 0 based on class id and position`
			`int size_code_0;`
			`if (contextual_) {`
			`size_code_0 = SizeCode(cls_0,`
			`samp_array[smp_0]->FirstChar() == 0 ? 0 : 1,`
			`samp_array[smp_0]->LastChar() == 0 ? 0 : 1);`
			`} else {`
			`size_code_0 = cls_0;`
			`}`

			`int char0_height = samp_array[smp_0]->Height();`
			`int char0_width = samp_array[smp_0]->Width();`
			`int char0_top = samp_array[smp_0]->Top();`

			`for (int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) {`
			`int cls_1 = char_set_->ClassID(samp_array[smp_1]->StrLabel());`
			`if (cls_1 < 1) {`
			`continue;`
			`}`
			`// compute size code for samp 0 based on class id and position`
			`int size_code_1;`
			`if (contextual_) {`
			`size_code_1 = SizeCode(cls_1,`
			`samp_array[smp_1]->FirstChar() == 0 ? 0 : 1,`
			`samp_array[smp_1]->LastChar() == 0 ? 0 : 1);`
			`} else {`
			`size_code_1 = cls_1;`
			`}`
			`double dist = PairCost(`
			`char0_width, char0_height, char0_top, samp_array[smp_1]->Width(),`
			`samp_array[smp_1]->Height(), samp_array[smp_1]->Top(),`
			`fnt_info->pair_size_info[size_code_0][size_code_1]);`
			`if (dist > 0) {`
			`mean_dist += dist;`
			`pair_cnt++;`
			`}`
			`} // smp_1`
			`} // smp_0`
			`if (pair_cnt == 0) {`
			`continue;`
			`}`
			`mean_dist /= pair_cnt;`
			`if (best_fnt == -1 \|\| mean_dist < best_dist) {`
			`best_dist = mean_dist;`
			`best_fnt = fnt;`
			`}`
			`}`
			`if (best_fnt == -1) {`
			`return static_cast<int>(WORST_COST);`
			`} else {`
			`return static_cast<int>(best_dist);`
			`}`
			`}`

			`double WordSizeModel::PairCost(int width_0, int height_0, int top_0,`
			`int width_1, int height_1, int top_1,`
			`const PairSizeInfo& pair_info) {`
			`double scale_factor = static_cast<double>(pair_info.hgt_0) /`
			`static_cast<double>(height_0);`
			`double dist = 0.0;`
			`if (scale_factor > 0) {`
			`double norm_width_0 = width_0 * scale_factor;`
			`double norm_width_1 = width_1 * scale_factor;`
			`double norm_height_1 = height_1 * scale_factor;`
			`double norm_delta_top = (top_1 - top_0) * scale_factor;`

			`// accumulate the distance between the model character and the`
			`// predicted one on all dimensions of the pair`
			`dist += fabs(pair_info.wid_0 - norm_width_0);`
			`dist += fabs(pair_info.wid_1 - norm_width_1);`
			`dist += fabs(pair_info.hgt_1 - norm_height_1);`
			`dist += fabs(pair_info.delta_top - norm_delta_top);`
			`}`
			`return dist;`
			`}`
			`} // namespace tesseract`