mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-11 20:53:24 +08:00
Started TFile conversion to remove fmemopen
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1139 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
d52231cff3
commit
c86fe22a62
@ -28,6 +28,7 @@
|
||||
#include "errcode.h"
|
||||
#include "helpers.h"
|
||||
#include "ndminx.h"
|
||||
#include "serialis.h"
|
||||
#include "strngs.h"
|
||||
|
||||
// Use PointerVector<T> below in preference to GenericVector<T*>, as that
|
||||
@ -61,6 +62,11 @@ class GenericVector {
|
||||
|
||||
// Resizes to size and sets all values to t.
|
||||
void init_to_size(int size, T t);
|
||||
// Resizes to size without any initialization.
|
||||
void resize_no_init(int size) {
|
||||
reserve(size);
|
||||
size_used_ = size;
|
||||
}
|
||||
|
||||
// Return the size used.
|
||||
int size() const {
|
||||
@ -159,22 +165,27 @@ class GenericVector {
|
||||
bool read(FILE* f, TessResultCallback3<bool, FILE*, T*, bool>* cb, bool swap);
|
||||
// Writes a vector of simple types to the given file. Assumes that bitwise
|
||||
// read/write of T will work. Returns false in case of error.
|
||||
// TODO(rays) Change all callers to use TFile and remove deprecated methods.
|
||||
bool Serialize(FILE* fp) const;
|
||||
bool Serialize(tesseract::TFile* fp) const;
|
||||
// Reads a vector of simple types from the given file. Assumes that bitwise
|
||||
// read/write will work with ReverseN according to sizeof(T).
|
||||
// Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, FILE* fp);
|
||||
bool DeSerialize(bool swap, tesseract::TFile* fp);
|
||||
// Writes a vector of classes to the given file. Assumes the existence of
|
||||
// bool T::Serialize(FILE* fp) const that returns false in case of error.
|
||||
// Returns false in case of error.
|
||||
bool SerializeClasses(FILE* fp) const;
|
||||
bool SerializeClasses(tesseract::TFile* fp) const;
|
||||
// Reads a vector of classes from the given file. Assumes the existence of
|
||||
// bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
|
||||
// error. Also needs T::T() and T::T(constT&), as init_to_size is used in
|
||||
// this function. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerializeClasses(bool swap, FILE* fp);
|
||||
bool DeSerializeClasses(bool swap, tesseract::TFile* fp);
|
||||
|
||||
// Allocates a new array of double the current_size, copies over the
|
||||
// information from data to the new location, deletes data and returns
|
||||
@ -188,6 +199,12 @@ class GenericVector {
|
||||
return data_new;
|
||||
}
|
||||
|
||||
// Reverses the elements of the vector.
|
||||
void reverse() {
|
||||
for (int i = 0; i < size_used_ / 2; ++i)
|
||||
Swap(&data_[i], &data_[size_used_ - 1 - i]);
|
||||
}
|
||||
|
||||
// Sorts the members of this vector using the less than comparator (cmp_lt),
|
||||
// which compares the values. Useful for GenericVectors to primitive types.
|
||||
// Will not work so great for pointers (unless you just want to sort some
|
||||
@ -296,6 +313,15 @@ class GenericVector {
|
||||
data_[index2] = tmp;
|
||||
}
|
||||
}
|
||||
// Returns true if all elements of *this are within the given range.
|
||||
// Only uses operator<
|
||||
bool WithinBounds(const T& rangemin, const T& rangemax) const {
|
||||
for (int i = 0; i < size_used_; ++i) {
|
||||
if (data_[i] < rangemin || rangemax < data_[i])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Internal recursive version of choose_nth_item.
|
||||
@ -343,7 +369,7 @@ inline bool LoadDataFromFile(const STRING& filename,
|
||||
// The default FileWriter writes the vector of char to the filename file,
|
||||
// returning false on error.
|
||||
inline bool SaveDataToFile(const GenericVector<char>& data,
|
||||
const STRING& filename) {
|
||||
const STRING& filename) {
|
||||
FILE* fp = fopen(filename.string(), "wb");
|
||||
if (fp == NULL) return false;
|
||||
bool result =
|
||||
@ -470,8 +496,11 @@ class PointerVector : public GenericVector<T*> {
|
||||
GenericVector<T*>::clear();
|
||||
}
|
||||
|
||||
// Writes a vector of simple types to the given file. Assumes that bitwise
|
||||
// read/write of T will work. Returns false in case of error.
|
||||
// Writes a vector of (pointers to) classes to the given file. Assumes the
|
||||
// existence of bool T::Serialize(FILE*) const that returns false in case of
|
||||
// error. There is no Serialize for simple types, as you would have a
|
||||
// normal GenericVector of those.
|
||||
// Returns false in case of error.
|
||||
bool Serialize(FILE* fp) const {
|
||||
inT32 used = GenericVector<T*>::size_used_;
|
||||
if (fwrite(&used, sizeof(used), 1, fp) != 1) return false;
|
||||
@ -482,16 +511,29 @@ class PointerVector : public GenericVector<T*> {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Reads a vector of simple types from the given file. Assumes that bitwise
|
||||
// read/write will work with ReverseN according to sizeof(T).
|
||||
bool Serialize(TFile* fp) const {
|
||||
inT32 used = GenericVector<T*>::size_used_;
|
||||
if (fp->FWrite(&used, sizeof(used), 1) != 1) return false;
|
||||
for (int i = 0; i < used; ++i) {
|
||||
inT8 non_null = GenericVector<T*>::data_[i] != NULL;
|
||||
if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) return false;
|
||||
if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Reads a vector of (pointers to) classes to the given file. Assumes the
|
||||
// existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
|
||||
// case of error. There is no Serialize for simple types, as you would have a
|
||||
// normal GenericVector of those.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
// Also needs T::T(), as new T is used in this function.
|
||||
// Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, FILE* fp) {
|
||||
inT32 reserved;
|
||||
if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
GenericVector<T*>::reserve(reserved);
|
||||
truncate(0);
|
||||
for (int i = 0; i < reserved; ++i) {
|
||||
inT8 non_null;
|
||||
if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
|
||||
@ -510,6 +552,30 @@ class PointerVector : public GenericVector<T*> {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool DeSerialize(bool swap, TFile* fp) {
|
||||
inT32 reserved;
|
||||
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
GenericVector<T*>::reserve(reserved);
|
||||
truncate(0);
|
||||
for (int i = 0; i < reserved; ++i) {
|
||||
inT8 non_null;
|
||||
if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
|
||||
T* item = NULL;
|
||||
if (non_null) {
|
||||
item = new T;
|
||||
if (!item->DeSerialize(swap, fp)) {
|
||||
delete item;
|
||||
return false;
|
||||
}
|
||||
this->push_back(item);
|
||||
} else {
|
||||
// Null elements should keep their place in the vector.
|
||||
this->push_back(NULL);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Sorts the items pointed to by the members of this vector using
|
||||
// t::operator<().
|
||||
@ -817,6 +883,12 @@ bool GenericVector<T>::Serialize(FILE* fp) const {
|
||||
if (fwrite(data_, sizeof(*data_), size_used_, fp) != size_used_) return false;
|
||||
return true;
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::Serialize(tesseract::TFile* fp) const {
|
||||
if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
|
||||
if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reads a vector of simple types from the given file. Assumes that bitwise
|
||||
// read/write will work with ReverseN according to sizeof(T).
|
||||
@ -836,6 +908,20 @@ bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::DeSerialize(bool swap, tesseract::TFile* fp) {
|
||||
inT32 reserved;
|
||||
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
reserve(reserved);
|
||||
size_used_ = reserved;
|
||||
if (fp->FRead(data_, sizeof(T), size_used_) != size_used_) return false;
|
||||
if (swap) {
|
||||
for (int i = 0; i < size_used_; ++i)
|
||||
ReverseN(&data_[i], sizeof(data_[i]));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Writes a vector of classes to the given file. Assumes the existence of
|
||||
// bool T::Serialize(FILE* fp) const that returns false in case of error.
|
||||
@ -848,6 +934,14 @@ bool GenericVector<T>::SerializeClasses(FILE* fp) const {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::SerializeClasses(tesseract::TFile* fp) const {
|
||||
if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
|
||||
for (int i = 0; i < size_used_; ++i) {
|
||||
if (!data_[i].Serialize(fp)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reads a vector of classes from the given file. Assumes the existence of
|
||||
// bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
|
||||
@ -866,6 +960,18 @@ bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::DeSerializeClasses(bool swap, tesseract::TFile* fp) {
|
||||
uinT32 reserved;
|
||||
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
T empty;
|
||||
init_to_size(reserved, empty);
|
||||
for (int i = 0; i < reserved; ++i) {
|
||||
if (!data_[i].DeSerialize(swap, fp)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// This method clear the current object, then, does a shallow copy of
|
||||
// its argument, and finally invalidates its argument.
|
||||
|
@ -19,24 +19,41 @@
|
||||
|
||||
#include "serialis.h"
|
||||
#include <stdio.h>
|
||||
#include "genericvector.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
TFile::TFile() : offset_(0) {
|
||||
TFile::TFile()
|
||||
: offset_(0), data_(NULL), data_is_owned_(false), is_writing_(false) {
|
||||
}
|
||||
|
||||
TFile::~TFile() {
|
||||
if (data_is_owned_)
|
||||
delete data_;
|
||||
}
|
||||
|
||||
bool TFile::Open(const STRING& filename, FileReader reader) {
|
||||
if (!data_is_owned_) {
|
||||
data_ = new GenericVector<char>;
|
||||
data_is_owned_ = true;
|
||||
}
|
||||
offset_ = 0;
|
||||
is_writing_ = false;
|
||||
if (reader == NULL)
|
||||
return LoadDataFromFile(filename, &data_);
|
||||
return LoadDataFromFile(filename, data_);
|
||||
else
|
||||
return (*reader)(filename, &data_);
|
||||
return (*reader)(filename, data_);
|
||||
}
|
||||
|
||||
bool TFile::Open(const char* data, int size) {
|
||||
offset_ = 0;
|
||||
data_.init_to_size(size, 0);
|
||||
memcpy(&data_[0], data, size);
|
||||
if (!data_is_owned_) {
|
||||
data_ = new GenericVector<char>;
|
||||
data_is_owned_ = true;
|
||||
}
|
||||
is_writing_ = false;
|
||||
data_->init_to_size(size, 0);
|
||||
memcpy(&(*data_)[0], data, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -49,30 +66,78 @@ bool TFile::Open(FILE* fp, inT64 end_offset) {
|
||||
fseek(fp, current_pos, SEEK_SET);
|
||||
}
|
||||
int size = end_offset - current_pos;
|
||||
data_.init_to_size(size, 0);
|
||||
return static_cast<int>(fread(&data_[0], 1, size, fp)) == size;
|
||||
is_writing_ = false;
|
||||
if (!data_is_owned_) {
|
||||
data_ = new GenericVector<char>;
|
||||
data_is_owned_ = true;
|
||||
}
|
||||
data_->init_to_size(size, 0);
|
||||
return static_cast<int>(fread(&(*data_)[0], 1, size, fp)) == size;
|
||||
}
|
||||
|
||||
char* TFile::FGets(char* buffer, int buffer_size) {
|
||||
ASSERT_HOST(!is_writing_);
|
||||
int size = 0;
|
||||
while (size + 1 < buffer_size && offset_ < data_.size()) {
|
||||
buffer[size++] = data_[offset_++];
|
||||
if (data_[offset_ - 1] == '\n') break;
|
||||
while (size + 1 < buffer_size && offset_ < data_->size()) {
|
||||
buffer[size++] = (*data_)[offset_++];
|
||||
if ((*data_)[offset_ - 1] == '\n') break;
|
||||
}
|
||||
if (size < buffer_size) buffer[size] = '\0';
|
||||
return size > 0 ? buffer : NULL;
|
||||
}
|
||||
|
||||
int TFile::FRead(void* buffer, int size, int count) {
|
||||
char* char_buffer = reinterpret_cast<char*>(buffer);
|
||||
ASSERT_HOST(!is_writing_);
|
||||
int required_size = size * count;
|
||||
if (data_.size() - offset_ < required_size)
|
||||
required_size = data_.size() - offset_;
|
||||
memcpy(char_buffer, &data_[offset_], required_size);
|
||||
if (required_size <= 0) return 0;
|
||||
char* char_buffer = reinterpret_cast<char*>(buffer);
|
||||
if (data_->size() - offset_ < required_size)
|
||||
required_size = data_->size() - offset_;
|
||||
if (required_size > 0)
|
||||
memcpy(char_buffer, &(*data_)[offset_], required_size);
|
||||
offset_ += required_size;
|
||||
return required_size / size;
|
||||
}
|
||||
|
||||
void TFile::Rewind() {
|
||||
ASSERT_HOST(!is_writing_);
|
||||
offset_ = 0;
|
||||
}
|
||||
|
||||
void TFile::OpenWrite(GenericVector<char>* data) {
|
||||
offset_ = 0;
|
||||
if (data != NULL) {
|
||||
if (data_is_owned_) delete data_;
|
||||
data_ = data;
|
||||
data_is_owned_ = false;
|
||||
} else if (!data_is_owned_) {
|
||||
data_ = new GenericVector<char>;
|
||||
data_is_owned_ = true;
|
||||
}
|
||||
is_writing_ = true;
|
||||
data_->truncate(0);
|
||||
}
|
||||
|
||||
bool TFile::CloseWrite(const STRING& filename, FileWriter writer) {
|
||||
ASSERT_HOST(is_writing_);
|
||||
if (writer == NULL)
|
||||
return SaveDataToFile(*data_, filename);
|
||||
else
|
||||
return (*writer)(*data_, filename);
|
||||
}
|
||||
|
||||
int TFile::FWrite(const void* buffer, int size, int count) {
|
||||
ASSERT_HOST(is_writing_);
|
||||
int total = size * count;
|
||||
if (total <= 0) return 0;
|
||||
const char* buf = reinterpret_cast<const char*>(buffer);
|
||||
// This isn't very efficient, but memory is so fast compared to disk
|
||||
// that it is relatively unimportant, and very simple.
|
||||
for (int i = 0; i < total; ++i)
|
||||
data_->push_back(buf[i]);
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
|
@ -20,11 +20,13 @@
|
||||
#ifndef SERIALIS_H
|
||||
#define SERIALIS_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "host.h"
|
||||
|
||||
#include "genericvector.h"
|
||||
template <typename T> class GenericVector;
|
||||
class STRING;
|
||||
|
||||
/***********************************************************************
|
||||
QUOTE_IT MACRO DEFINITION
|
||||
@ -36,14 +38,24 @@ Replace <parm> with "<parm>". <parm> may be an arbitrary number of tokens
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Simple file class. Only does input for now.
|
||||
// Allows for portable file input from memory.
|
||||
// Function to read a GenericVector<char> from a whole file.
|
||||
// Returns false on failure.
|
||||
typedef bool (*FileReader)(const STRING& filename, GenericVector<char>* data);
|
||||
// Function to write a GenericVector<char> to a whole file.
|
||||
// Returns false on failure.
|
||||
typedef bool (*FileWriter)(const GenericVector<char>& data,
|
||||
const STRING& filename);
|
||||
|
||||
// Simple file class.
|
||||
// Allows for portable file input from memory and from foreign file systems.
|
||||
class TFile {
|
||||
public:
|
||||
TFile();
|
||||
~TFile();
|
||||
|
||||
// All the Open methods load the whole file into memory.
|
||||
// All the Open methods load the whole file into memory for reading.
|
||||
// Opens a file with a supplied reader, or NULL to use the default.
|
||||
// Note that mixed read/write is not supported.
|
||||
bool Open(const STRING& filename, FileReader reader);
|
||||
// From an existing memory buffer.
|
||||
bool Open(const char* data, int size);
|
||||
@ -53,21 +65,33 @@ class TFile {
|
||||
// Reads a line like fgets. Returns NULL on EOF, otherwise buffer.
|
||||
// Reads at most buffer_size bytes, including '\0' terminator, even if
|
||||
// the line is longer. Does nothing if buffer_size <= 0.
|
||||
// To use fscanf use FGets and sscanf.
|
||||
char* FGets(char* buffer, int buffer_size);
|
||||
// Replicates fread, returning the number of items read.
|
||||
int FRead(void* buffer, int size, int count);
|
||||
// To use fscanf use FGets and sscanf.
|
||||
|
||||
// Resets the TFile as if it has been Opened, but nothing read.
|
||||
void Rewind() {
|
||||
offset_ = 0;
|
||||
}
|
||||
// Only allowed while reading!
|
||||
void Rewind();
|
||||
|
||||
// Open for writing. Either supply a non-NULL data with OpenWrite before
|
||||
// calling FWrite, (no close required), or supply a NULL data to OpenWrite
|
||||
// and call CloseWrite to write to a file after the FWrites.
|
||||
void OpenWrite(GenericVector<char>* data);
|
||||
bool CloseWrite(const STRING& filename, FileWriter writer);
|
||||
|
||||
// Replicates fwrite, returning the number of items written.
|
||||
// To use fprintf, use snprintf and FWrite.
|
||||
int FWrite(const void* buffer, int size, int count);
|
||||
|
||||
private:
|
||||
// The number of bytes used so far.
|
||||
int offset_;
|
||||
// The buffered data from the file.
|
||||
GenericVector<char> data_;
|
||||
GenericVector<char>* data_;
|
||||
// True if the data_ pointer is owned by *this.
|
||||
bool data_is_owned_;
|
||||
// True if the TFile is open for writing.
|
||||
bool is_writing_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -17,12 +17,17 @@
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
#include "helpers.h"
|
||||
#include "tprintf.h"
|
||||
#include "strngs.h"
|
||||
#include "genericvector.h"
|
||||
#include "strngs.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "helpers.h"
|
||||
#include "serialis.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
using tesseract::TFile;
|
||||
|
||||
// Size of buffer needed to host the decimal representation of the maximum
|
||||
// possible length of an int (in 64 bits), being -<20 digits>.
|
||||
const int kMaxIntSize = 22;
|
||||
@ -123,10 +128,22 @@ STRING::STRING(const char* cstr) {
|
||||
assert(InvariantOk());
|
||||
}
|
||||
|
||||
STRING::STRING(const char *data, int length) {
|
||||
if (data == NULL) {
|
||||
// Empty STRINGs contain just the "\0".
|
||||
memcpy(AllocData(1, kMinCapacity), "", 1);
|
||||
} else {
|
||||
char* this_cstr = AllocData(length + 1, length + 1);
|
||||
memcpy(this_cstr, data, length);
|
||||
this_cstr[length] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
STRING::~STRING() {
|
||||
DiscardData();
|
||||
}
|
||||
|
||||
// TODO(rays) Change all callers to use TFile and remove the old functions.
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool STRING::Serialize(FILE* fp) const {
|
||||
inT32 len = length();
|
||||
@ -134,6 +151,13 @@ bool STRING::Serialize(FILE* fp) const {
|
||||
if (static_cast<int>(fwrite(GetCStr(), 1, len, fp)) != len) return false;
|
||||
return true;
|
||||
}
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool STRING::Serialize(TFile* fp) const {
|
||||
inT32 len = length();
|
||||
if (fp->FWrite(&len, sizeof(len), 1) != 1) return false;
|
||||
if (fp->FWrite(GetCStr(), 1, len) != len) return false;
|
||||
return true;
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool STRING::DeSerialize(bool swap, FILE* fp) {
|
||||
@ -145,6 +169,17 @@ bool STRING::DeSerialize(bool swap, FILE* fp) {
|
||||
if (static_cast<int>(fread(GetCStr(), 1, len, fp)) != len) return false;
|
||||
return true;
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool STRING::DeSerialize(bool swap, TFile* fp) {
|
||||
inT32 len;
|
||||
if (fp->FRead(&len, sizeof(len), 1) != 1) return false;
|
||||
if (swap)
|
||||
ReverseN(&len, sizeof(len));
|
||||
truncate_at(len);
|
||||
if (fp->FRead(GetCStr(), 1, len) != len) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
BOOL8 STRING::contains(const char c) const {
|
||||
return (c != '\0') && (strchr (GetCStr(), c) != NULL);
|
||||
@ -245,21 +280,20 @@ char& STRING::operator[](inT32 index) const {
|
||||
|
||||
void STRING::split(const char c, GenericVector<STRING> *splited) {
|
||||
int start_index = 0;
|
||||
for (int i = 0; i < length(); i++) {
|
||||
int len = length();
|
||||
for (int i = 0; i < len; i++) {
|
||||
if ((*this)[i] == c) {
|
||||
if (i != start_index) {
|
||||
(*this)[i] = '\0';
|
||||
STRING tmp = GetCStr() + start_index;
|
||||
splited->push_back(tmp);
|
||||
splited->push_back(STRING(GetCStr() + start_index, i - start_index));
|
||||
(*this)[i] = c;
|
||||
}
|
||||
start_index = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (length() != start_index) {
|
||||
STRING tmp = GetCStr() + start_index;
|
||||
splited->push_back(tmp);
|
||||
if (len != start_index) {
|
||||
splited->push_back(STRING(GetCStr() + start_index, len - start_index));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -25,6 +25,10 @@
|
||||
#include "platform.h"
|
||||
#include "memry.h"
|
||||
|
||||
namespace tesseract {
|
||||
class TFile;
|
||||
} // namespace tesseract.
|
||||
|
||||
// STRING_IS_PROTECTED means that string[index] = X is invalid
|
||||
// because you have to go through strings interface to modify it.
|
||||
// This allows the string to ensure internal integrity and maintain
|
||||
@ -43,6 +47,7 @@ class TESS_API STRING
|
||||
STRING();
|
||||
STRING(const STRING &string);
|
||||
STRING(const char *string);
|
||||
STRING(const char *data, int length);
|
||||
~STRING ();
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
@ -50,6 +55,11 @@ class TESS_API STRING
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, FILE* fp);
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(tesseract::TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, tesseract::TFile* fp);
|
||||
|
||||
BOOL8 contains(const char c) const;
|
||||
inT32 length() const;
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include "unichar.h"
|
||||
#include "errcode.h"
|
||||
#include "genericvector.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
#define UNI_MAX_LEGAL_UTF32 0x0010FFFF
|
||||
@ -203,3 +204,14 @@ UNICHAR::const_iterator UNICHAR::begin(const char* utf8_str, const int len) {
|
||||
UNICHAR::const_iterator UNICHAR::end(const char* utf8_str, const int len) {
|
||||
return UNICHAR::const_iterator(utf8_str + len);
|
||||
}
|
||||
|
||||
// Converts a utf-8 string to a vector of unicodes.
|
||||
void UNICHAR::UTF8ToUnicode(const char* utf8_str,
|
||||
GenericVector<int>* unicodes) {
|
||||
const int utf8_length = strlen(utf8_str);
|
||||
const_iterator end_it(end(utf8_str, utf8_length));
|
||||
for (const_iterator it(begin(utf8_str, utf8_length)); it != end_it; ++it) {
|
||||
unicodes->push_back(*it);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,8 @@
|
||||
#include <memory.h>
|
||||
#include <string.h>
|
||||
|
||||
template <typename T> class GenericVector;
|
||||
|
||||
// Maximum number of characters that can be stored in a UNICHAR. Must be
|
||||
// at least 4. Must not exceed 31 without changing the coding of length.
|
||||
#define UNICHAR_LEN 30
|
||||
@ -148,6 +150,9 @@ class UNICHAR {
|
||||
static const_iterator begin(const char* utf8_str, const int byte_length);
|
||||
static const_iterator end(const char* utf8_str, const int byte_length);
|
||||
|
||||
// Converts a utf-8 string to a vector of unicodes.
|
||||
static void UTF8ToUnicode(const char* utf8_str, GenericVector<int>* unicodes);
|
||||
|
||||
private:
|
||||
// A UTF-8 representation of 1 or more Unicode characters.
|
||||
// The last element (chars[UNICHAR_LEN - 1]) is a length if
|
||||
|
Loading…
Reference in New Issue
Block a user