This commit is contained in:
Jim O'Regan 2014-09-12 21:54:57 +01:00
parent 9f42f69782
commit 0fc4d528a3
3 changed files with 55 additions and 21 deletions

View File

@ -31,7 +31,9 @@ CubeUtils::CubeUtils() {
CubeUtils::~CubeUtils() {
}
// convert a prob to a cost (-ve log prob)
/**
* convert a prob to a cost (-ve log prob)
*/
int CubeUtils::Prob2Cost(double prob_val) {
if (prob_val < MIN_PROB) {
return MIN_PROB_COST;
@ -39,12 +41,16 @@ int CubeUtils::Prob2Cost(double prob_val) {
return static_cast<int>(-log(prob_val) * PROB2COST_SCALE);
}
// converts a cost to probability
/**
* converts a cost to probability
*/
double CubeUtils::Cost2Prob(int cost) {
return exp(-cost / PROB2COST_SCALE);
}
// computes the length of a NULL terminated char_32 string
/**
* computes the length of a NULL terminated char_32 string
*/
int CubeUtils::StrLen(const char_32 *char_32_ptr) {
if (char_32_ptr == NULL) {
return 0;
@ -54,7 +60,9 @@ int CubeUtils::StrLen(const char_32 *char_32_ptr) {
return len;
}
// compares two char_32 strings
/**
* compares two char_32 strings
*/
int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
const char_32 *pch1 = str1;
const char_32 *pch2 = str2;
@ -76,7 +84,9 @@ int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
}
}
// Duplicates a 32-bit char buffer
/**
* Duplicates a 32-bit char buffer
*/
char_32 *CubeUtils::StrDup(const char_32 *str32) {
int len = StrLen(str32);
char_32 *new_str = new char_32[len + 1];
@ -88,7 +98,9 @@ char_32 *CubeUtils::StrDup(const char_32 *str32) {
return new_str;
}
// creates a char samp from a specified portion of the image
/**
* creates a char samp from a specified portion of the image
*/
CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top,
int wid, int hgt) {
// get the raw img data from the image
@ -105,7 +117,9 @@ CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top,
return char_samp;
}
// create a B/W image from a char_sample
/**
* create a B/W image from a char_sample
*/
Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) {
// parameter check
if (char_samp == NULL) {
@ -137,7 +151,9 @@ Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) {
return pix;
}
// creates a raw buffer from the specified location of the pix
/**
* creates a raw buffer from the specified location of the pix
*/
unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
int wid, int hgt) {
// skip invalid dimensions
@ -173,7 +189,9 @@ unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
return temp_buff;
}
// read file contents to a string
/**
* read file contents to a string
*/
bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
str->clear();
FILE *fp = fopen(file_name.c_str(), "rb");
@ -206,7 +224,9 @@ bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
return (read_bytes == file_size);
}
// splits a string into vectors based on specified delimiters
/**
* splits a string into vectors based on specified delimiters
*/
void CubeUtils::SplitStringUsing(const string &str,
const string &delims,
vector<string> *str_vec) {
@ -240,7 +260,9 @@ void CubeUtils::SplitStringUsing(const string &str,
}
}
// UTF-8 to UTF-32 convesion functions
/**
* UTF-8 to UTF-32 convesion functions
*/
void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) {
str32->clear();
int len = strlen(utf8_str);
@ -254,7 +276,9 @@ void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) {
}
}
// UTF-8 to UTF-32 convesion functions
/**
* UTF-8 to UTF-32 convesion functions
*/
void CubeUtils::UTF32ToUTF8(const char_32 *utf32_str, string *str) {
str->clear();
for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) {

View File

@ -37,7 +37,9 @@ WordAltList::~WordAltList() {
}
}
// insert an alternate word with the specified cost and tag
/**
* insert an alternate word with the specified cost and tag
*/
bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
if (word_alt_ == NULL || alt_cost_ == NULL) {
word_alt_ = new char_32*[max_alt_];
@ -84,7 +86,9 @@ bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
return true;
}
// sort the alternate in descending order based on the cost
/**
* sort the alternate in descending order based on the cost
*/
void WordAltList::Sort() {
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {

View File

@ -50,8 +50,10 @@ WordUnigrams::~WordUnigrams() {
}
}
// Load the word-list and unigrams from file and create an object
// The word list is assumed to be sorted in lexicographic order.
/**
* Load the word-list and unigrams from file and create an object
* The word list is assumed to be sorted in lexicographic order.
*/
WordUnigrams *WordUnigrams::Create(const string &data_file_path,
const string &lang) {
string file_name;
@ -143,10 +145,12 @@ WordUnigrams *WordUnigrams::Create(const string &data_file_path,
return word_unigrams_obj;
}
// Split input into space-separated tokens, strip trailing punctuation
// from each, determine case properties, call UTF-8 flavor of cost
// function on each word, and aggregate all into single mean word
// cost.
/**
* Split input into space-separated tokens, strip trailing punctuation
* from each, determine case properties, call UTF-8 flavor of cost
* function on each word, and aggregate all into single mean word
* cost.
*/
int WordUnigrams::Cost(const char_32 *key_str32,
LangModel *lang_mod,
CharSet *char_set) const {
@ -239,7 +243,9 @@ int WordUnigrams::Cost(const char_32 *key_str32,
return static_cast<int>(cost / static_cast<double>(words.size()));
}
// Search for UTF-8 string using binary search of sorted words_ array.
/**
* Search for UTF-8 string using binary search of sorted words_ array.
*/
int WordUnigrams::CostInternal(const char *key_str) const {
if (strlen(key_str) == 0)
return not_in_list_cost_;