mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-28 01:05:03 +08:00
doxygen
This commit is contained in:
parent
9f42f69782
commit
0fc4d528a3
@ -31,7 +31,9 @@ CubeUtils::CubeUtils() {
|
|||||||
CubeUtils::~CubeUtils() {
|
CubeUtils::~CubeUtils() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert a prob to a cost (-ve log prob)
|
/**
|
||||||
|
* convert a prob to a cost (-ve log prob)
|
||||||
|
*/
|
||||||
int CubeUtils::Prob2Cost(double prob_val) {
|
int CubeUtils::Prob2Cost(double prob_val) {
|
||||||
if (prob_val < MIN_PROB) {
|
if (prob_val < MIN_PROB) {
|
||||||
return MIN_PROB_COST;
|
return MIN_PROB_COST;
|
||||||
@ -39,12 +41,16 @@ int CubeUtils::Prob2Cost(double prob_val) {
|
|||||||
return static_cast<int>(-log(prob_val) * PROB2COST_SCALE);
|
return static_cast<int>(-log(prob_val) * PROB2COST_SCALE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// converts a cost to probability
|
/**
|
||||||
|
* converts a cost to probability
|
||||||
|
*/
|
||||||
double CubeUtils::Cost2Prob(int cost) {
|
double CubeUtils::Cost2Prob(int cost) {
|
||||||
return exp(-cost / PROB2COST_SCALE);
|
return exp(-cost / PROB2COST_SCALE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// computes the length of a NULL terminated char_32 string
|
/**
|
||||||
|
* computes the length of a NULL terminated char_32 string
|
||||||
|
*/
|
||||||
int CubeUtils::StrLen(const char_32 *char_32_ptr) {
|
int CubeUtils::StrLen(const char_32 *char_32_ptr) {
|
||||||
if (char_32_ptr == NULL) {
|
if (char_32_ptr == NULL) {
|
||||||
return 0;
|
return 0;
|
||||||
@ -54,7 +60,9 @@ int CubeUtils::StrLen(const char_32 *char_32_ptr) {
|
|||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
// compares two char_32 strings
|
/**
|
||||||
|
* compares two char_32 strings
|
||||||
|
*/
|
||||||
int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
|
int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
|
||||||
const char_32 *pch1 = str1;
|
const char_32 *pch1 = str1;
|
||||||
const char_32 *pch2 = str2;
|
const char_32 *pch2 = str2;
|
||||||
@ -76,7 +84,9 @@ int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Duplicates a 32-bit char buffer
|
/**
|
||||||
|
* Duplicates a 32-bit char buffer
|
||||||
|
*/
|
||||||
char_32 *CubeUtils::StrDup(const char_32 *str32) {
|
char_32 *CubeUtils::StrDup(const char_32 *str32) {
|
||||||
int len = StrLen(str32);
|
int len = StrLen(str32);
|
||||||
char_32 *new_str = new char_32[len + 1];
|
char_32 *new_str = new char_32[len + 1];
|
||||||
@ -88,7 +98,9 @@ char_32 *CubeUtils::StrDup(const char_32 *str32) {
|
|||||||
return new_str;
|
return new_str;
|
||||||
}
|
}
|
||||||
|
|
||||||
// creates a char samp from a specified portion of the image
|
/**
|
||||||
|
* creates a char samp from a specified portion of the image
|
||||||
|
*/
|
||||||
CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top,
|
CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top,
|
||||||
int wid, int hgt) {
|
int wid, int hgt) {
|
||||||
// get the raw img data from the image
|
// get the raw img data from the image
|
||||||
@ -105,7 +117,9 @@ CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top,
|
|||||||
return char_samp;
|
return char_samp;
|
||||||
}
|
}
|
||||||
|
|
||||||
// create a B/W image from a char_sample
|
/**
|
||||||
|
* create a B/W image from a char_sample
|
||||||
|
*/
|
||||||
Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) {
|
Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) {
|
||||||
// parameter check
|
// parameter check
|
||||||
if (char_samp == NULL) {
|
if (char_samp == NULL) {
|
||||||
@ -137,7 +151,9 @@ Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) {
|
|||||||
return pix;
|
return pix;
|
||||||
}
|
}
|
||||||
|
|
||||||
// creates a raw buffer from the specified location of the pix
|
/**
|
||||||
|
* creates a raw buffer from the specified location of the pix
|
||||||
|
*/
|
||||||
unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
|
unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
|
||||||
int wid, int hgt) {
|
int wid, int hgt) {
|
||||||
// skip invalid dimensions
|
// skip invalid dimensions
|
||||||
@ -173,7 +189,9 @@ unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
|
|||||||
return temp_buff;
|
return temp_buff;
|
||||||
}
|
}
|
||||||
|
|
||||||
// read file contents to a string
|
/**
|
||||||
|
* read file contents to a string
|
||||||
|
*/
|
||||||
bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
|
bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
|
||||||
str->clear();
|
str->clear();
|
||||||
FILE *fp = fopen(file_name.c_str(), "rb");
|
FILE *fp = fopen(file_name.c_str(), "rb");
|
||||||
@ -206,7 +224,9 @@ bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
|
|||||||
return (read_bytes == file_size);
|
return (read_bytes == file_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// splits a string into vectors based on specified delimiters
|
/**
|
||||||
|
* splits a string into vectors based on specified delimiters
|
||||||
|
*/
|
||||||
void CubeUtils::SplitStringUsing(const string &str,
|
void CubeUtils::SplitStringUsing(const string &str,
|
||||||
const string &delims,
|
const string &delims,
|
||||||
vector<string> *str_vec) {
|
vector<string> *str_vec) {
|
||||||
@ -240,7 +260,9 @@ void CubeUtils::SplitStringUsing(const string &str,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// UTF-8 to UTF-32 convesion functions
|
/**
|
||||||
|
* UTF-8 to UTF-32 convesion functions
|
||||||
|
*/
|
||||||
void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) {
|
void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) {
|
||||||
str32->clear();
|
str32->clear();
|
||||||
int len = strlen(utf8_str);
|
int len = strlen(utf8_str);
|
||||||
@ -254,7 +276,9 @@ void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// UTF-8 to UTF-32 convesion functions
|
/**
|
||||||
|
* UTF-8 to UTF-32 convesion functions
|
||||||
|
*/
|
||||||
void CubeUtils::UTF32ToUTF8(const char_32 *utf32_str, string *str) {
|
void CubeUtils::UTF32ToUTF8(const char_32 *utf32_str, string *str) {
|
||||||
str->clear();
|
str->clear();
|
||||||
for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) {
|
for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) {
|
||||||
|
@ -37,7 +37,9 @@ WordAltList::~WordAltList() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// insert an alternate word with the specified cost and tag
|
/**
|
||||||
|
* insert an alternate word with the specified cost and tag
|
||||||
|
*/
|
||||||
bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
|
bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
|
||||||
if (word_alt_ == NULL || alt_cost_ == NULL) {
|
if (word_alt_ == NULL || alt_cost_ == NULL) {
|
||||||
word_alt_ = new char_32*[max_alt_];
|
word_alt_ = new char_32*[max_alt_];
|
||||||
@ -84,7 +86,9 @@ bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort the alternate in descending order based on the cost
|
/**
|
||||||
|
* sort the alternate in descending order based on the cost
|
||||||
|
*/
|
||||||
void WordAltList::Sort() {
|
void WordAltList::Sort() {
|
||||||
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
|
for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
|
||||||
for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
|
for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
|
||||||
|
@ -50,8 +50,10 @@ WordUnigrams::~WordUnigrams() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load the word-list and unigrams from file and create an object
|
/**
|
||||||
// The word list is assumed to be sorted in lexicographic order.
|
* Load the word-list and unigrams from file and create an object
|
||||||
|
* The word list is assumed to be sorted in lexicographic order.
|
||||||
|
*/
|
||||||
WordUnigrams *WordUnigrams::Create(const string &data_file_path,
|
WordUnigrams *WordUnigrams::Create(const string &data_file_path,
|
||||||
const string &lang) {
|
const string &lang) {
|
||||||
string file_name;
|
string file_name;
|
||||||
@ -143,10 +145,12 @@ WordUnigrams *WordUnigrams::Create(const string &data_file_path,
|
|||||||
return word_unigrams_obj;
|
return word_unigrams_obj;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Split input into space-separated tokens, strip trailing punctuation
|
/**
|
||||||
// from each, determine case properties, call UTF-8 flavor of cost
|
* Split input into space-separated tokens, strip trailing punctuation
|
||||||
// function on each word, and aggregate all into single mean word
|
* from each, determine case properties, call UTF-8 flavor of cost
|
||||||
// cost.
|
* function on each word, and aggregate all into single mean word
|
||||||
|
* cost.
|
||||||
|
*/
|
||||||
int WordUnigrams::Cost(const char_32 *key_str32,
|
int WordUnigrams::Cost(const char_32 *key_str32,
|
||||||
LangModel *lang_mod,
|
LangModel *lang_mod,
|
||||||
CharSet *char_set) const {
|
CharSet *char_set) const {
|
||||||
@ -239,7 +243,9 @@ int WordUnigrams::Cost(const char_32 *key_str32,
|
|||||||
return static_cast<int>(cost / static_cast<double>(words.size()));
|
return static_cast<int>(cost / static_cast<double>(words.size()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Search for UTF-8 string using binary search of sorted words_ array.
|
/**
|
||||||
|
* Search for UTF-8 string using binary search of sorted words_ array.
|
||||||
|
*/
|
||||||
int WordUnigrams::CostInternal(const char *key_str) const {
|
int WordUnigrams::CostInternal(const char *key_str) const {
|
||||||
if (strlen(key_str) == 0)
|
if (strlen(key_str) == 0)
|
||||||
return not_in_list_cost_;
|
return not_in_list_cost_;
|
||||||
|
Loading…
Reference in New Issue
Block a user