mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
commit
40637384d3
@ -1419,12 +1419,12 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
|
||||
#ifdef _WIN32
|
||||
// convert input name from ANSI encoding to utf-8
|
||||
int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
|
||||
NULL, NULL);
|
||||
NULL, 0);
|
||||
wchar_t *uni16_str = new WCHAR[str16_len];
|
||||
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
|
||||
uni16_str, str16_len);
|
||||
int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL,
|
||||
NULL, NULL, NULL);
|
||||
0, NULL, NULL);
|
||||
char *utf8_str = new char[utf8_len];
|
||||
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
|
||||
utf8_len, NULL, NULL);
|
||||
|
@ -736,11 +736,11 @@ class TESS_API TessBaseAPI {
|
||||
*/
|
||||
static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
|
||||
|
||||
Tesseract* const tesseract() const {
|
||||
Tesseract* tesseract() const {
|
||||
return tesseract_;
|
||||
}
|
||||
|
||||
OcrEngineMode const oem() const {
|
||||
OcrEngineMode oem() const {
|
||||
return last_oem_requested_;
|
||||
}
|
||||
|
||||
|
@ -544,9 +544,9 @@ bool TessPDFRenderer::BeginDocumentHandler() {
|
||||
n = snprintf(buf, sizeof(buf),
|
||||
"5 0 obj\n"
|
||||
"<<\n"
|
||||
" /Length %ld /Filter /FlateDecode\n"
|
||||
" /Length %lu /Filter /FlateDecode\n"
|
||||
">>\n"
|
||||
"stream\n", len);
|
||||
"stream\n", (unsigned long)len);
|
||||
if (n >= sizeof(buf)) {
|
||||
lept_free(comp);
|
||||
return false;
|
||||
|
@ -560,7 +560,7 @@ bool ScriptDetector::must_stop(int orientation) {
|
||||
// Helper method to convert an orientation index to its value in degrees.
|
||||
// The value represents the amount of clockwise rotation in degrees that must be
|
||||
// applied for the text to be upright (readable).
|
||||
const int OrientationIdToValue(const int& id) {
|
||||
int OrientationIdToValue(const int& id) {
|
||||
switch (id) {
|
||||
case 0:
|
||||
return 0;
|
||||
|
@ -134,6 +134,6 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
|
||||
// Helper method to convert an orientation index to its value in degrees.
|
||||
// The value represents the amount of clockwise rotation in degrees that must be
|
||||
// applied for the text to be upright (readable).
|
||||
TESS_API const int OrientationIdToValue(const int& id);
|
||||
TESS_API int OrientationIdToValue(const int& id);
|
||||
|
||||
#endif // TESSERACT_CCMAIN_OSDETECT_H__
|
||||
|
@ -82,7 +82,7 @@ class BoxWord {
|
||||
const TBOX& bounding_box() const {
|
||||
return bbox_;
|
||||
}
|
||||
const int length() const {
|
||||
int length() const {
|
||||
return length_;
|
||||
}
|
||||
const TBOX& BlobBox(int index) const {
|
||||
|
@ -94,6 +94,8 @@ DIR128::DIR128( //from fcoord
|
||||
* Convert a direction to a vector.
|
||||
**********************************************************************/
|
||||
|
||||
#if 0 // code is buggy for negative dir and unused
|
||||
ICOORD DIR128::vector() const { //convert to vector
|
||||
return dirtab[dir]; //easy really
|
||||
}
|
||||
#endif
|
||||
|
@ -339,7 +339,7 @@ class WERD_RES : public ELIST_LINK {
|
||||
// This matters for mirrorable characters such as parentheses. We recognize
|
||||
// characters purely based on their shape on the page, and by default produce
|
||||
// the corresponding unicode for a left-to-right context.
|
||||
const char* const BestUTF8(int blob_index, bool in_rtl_context) const {
|
||||
const char* BestUTF8(int blob_index, bool in_rtl_context) const {
|
||||
if (blob_index < 0 || best_choice == NULL ||
|
||||
blob_index >= best_choice->length())
|
||||
return NULL;
|
||||
@ -352,7 +352,7 @@ class WERD_RES : public ELIST_LINK {
|
||||
return uch_set->id_to_unichar_ext(id);
|
||||
}
|
||||
// Returns the UTF-8 string for the given blob index in the raw_choice word.
|
||||
const char* const RawUTF8(int blob_index) const {
|
||||
const char* RawUTF8(int blob_index) const {
|
||||
if (blob_index < 0 || blob_index >= raw_choice->length())
|
||||
return NULL;
|
||||
UNICHAR_ID id = raw_choice->unichar_id(blob_index);
|
||||
|
@ -309,7 +309,7 @@ class WERD_CHOICE : public ELIST_LINK {
|
||||
inline const UNICHAR_ID *unichar_ids() const {
|
||||
return unichar_ids_;
|
||||
}
|
||||
inline const UNICHAR_ID unichar_id(int index) const {
|
||||
inline UNICHAR_ID unichar_id(int index) const {
|
||||
assert(index < length_);
|
||||
return unichar_ids_[index];
|
||||
}
|
||||
|
@ -190,14 +190,14 @@ void UNICHARSET::reserve(int unichars_number) {
|
||||
}
|
||||
}
|
||||
|
||||
const UNICHAR_ID
|
||||
UNICHAR_ID
|
||||
UNICHARSET::unichar_to_id(const char* const unichar_repr) const {
|
||||
return ids.contains(unichar_repr) ?
|
||||
ids.unichar_to_id(unichar_repr) : INVALID_UNICHAR_ID;
|
||||
}
|
||||
|
||||
const UNICHAR_ID UNICHARSET::unichar_to_id(const char* const unichar_repr,
|
||||
int length) const {
|
||||
UNICHAR_ID UNICHARSET::unichar_to_id(const char* const unichar_repr,
|
||||
int length) const {
|
||||
assert(length > 0 && length <= UNICHAR_LEN);
|
||||
return ids.contains(unichar_repr, length) ?
|
||||
ids.unichar_to_id(unichar_repr, length) : INVALID_UNICHAR_ID;
|
||||
@ -263,7 +263,7 @@ bool UNICHARSET::encode_string(const char* str, bool give_up_on_failure,
|
||||
return perfect;
|
||||
}
|
||||
|
||||
const char* const UNICHARSET::id_to_unichar(UNICHAR_ID id) const {
|
||||
const char* UNICHARSET::id_to_unichar(UNICHAR_ID id) const {
|
||||
if (id == INVALID_UNICHAR_ID) {
|
||||
return INVALID_UNICHAR;
|
||||
}
|
||||
@ -271,7 +271,7 @@ const char* const UNICHARSET::id_to_unichar(UNICHAR_ID id) const {
|
||||
return unichars[id].representation;
|
||||
}
|
||||
|
||||
const char* const UNICHARSET::id_to_unichar_ext(UNICHAR_ID id) const {
|
||||
const char* UNICHARSET::id_to_unichar_ext(UNICHAR_ID id) const {
|
||||
if (id == INVALID_UNICHAR_ID) {
|
||||
return INVALID_UNICHAR;
|
||||
}
|
||||
|
@ -177,11 +177,11 @@ class UNICHARSET {
|
||||
|
||||
// Return the UNICHAR_ID of a given unichar representation within the
|
||||
// UNICHARSET.
|
||||
const UNICHAR_ID unichar_to_id(const char* const unichar_repr) const;
|
||||
UNICHAR_ID unichar_to_id(const char* const unichar_repr) const;
|
||||
|
||||
// Return the UNICHAR_ID of a given unichar representation within the
|
||||
// UNICHARSET. Only the first length characters from unichar_repr are used.
|
||||
const UNICHAR_ID unichar_to_id(const char* const unichar_repr,
|
||||
UNICHAR_ID unichar_to_id(const char* const unichar_repr,
|
||||
int length) const;
|
||||
|
||||
// Return the minimum number of bytes that matches a legal UNICHAR_ID,
|
||||
@ -215,13 +215,13 @@ class UNICHARSET {
|
||||
|
||||
// Return the unichar representation corresponding to the given UNICHAR_ID
|
||||
// within the UNICHARSET.
|
||||
const char* const id_to_unichar(UNICHAR_ID id) const;
|
||||
const char* id_to_unichar(UNICHAR_ID id) const;
|
||||
|
||||
// Return the UTF8 representation corresponding to the given UNICHAR_ID after
|
||||
// resolving any private encodings internal to Tesseract. This method is
|
||||
// preferable to id_to_unichar for outputting text that will be visible to
|
||||
// external applications.
|
||||
const char* const id_to_unichar_ext(UNICHAR_ID id) const;
|
||||
const char* id_to_unichar_ext(UNICHAR_ID id) const;
|
||||
|
||||
// Return a STRING that reformats the utf8 str into the str followed
|
||||
// by its hex unicodes.
|
||||
|
@ -397,11 +397,11 @@ class Dict {
|
||||
}
|
||||
|
||||
inline void SetWildcardID(UNICHAR_ID id) { wildcard_unichar_id_ = id; }
|
||||
inline const UNICHAR_ID WildcardID() const {
|
||||
inline UNICHAR_ID WildcardID() const {
|
||||
return wildcard_unichar_id_;
|
||||
}
|
||||
/// Return the number of dawgs in the dawgs_ vector.
|
||||
inline const int NumDawgs() const { return dawgs_.size(); }
|
||||
inline int NumDawgs() const { return dawgs_.size(); }
|
||||
/// Return i-th dawg pointer recorded in the dawgs_ vector.
|
||||
inline const Dawg *GetDawg(int index) const { return dawgs_[index]; }
|
||||
/// Return the points to the punctuation dawg.
|
||||
|
@ -1,5 +1,5 @@
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
#include <windows.h>
|
||||
#include <io.h>
|
||||
|
||||
#else
|
||||
|
@ -41,7 +41,7 @@ class PixelHistogram {
|
||||
length_ = 0;
|
||||
}
|
||||
|
||||
int* const hist() const {
|
||||
int* hist() const {
|
||||
return hist_;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user