From ba9f73f04b7932e7760b26e0ba44526178c3d200 Mon Sep 17 00:00:00 2001 From: theraysmith Date: Mon, 21 Mar 2011 21:45:12 +0000 Subject: [PATCH] Various fixes, including memory leak in fixspace, font labels on output, removed some annoying debug output, fixes to initialization of parameters, general cleanup, and added Hindi git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@569 d0cd1f9f-072b-0410-8dd7-cf729c803f20 --- ccutil/genericvector.h | 241 ++++++++++++++++++++++++++++++++++++++++- ccutil/memry.h | 2 +- ccutil/tesscallback.h | 230 +++++++++++++++++++++++++++++++++++++++ ccutil/unicity_table.h | 13 ++- 4 files changed, 477 insertions(+), 9 deletions(-) diff --git a/ccutil/genericvector.h b/ccutil/genericvector.h index 7ee7275bd..b1dcfc435 100644 --- a/ccutil/genericvector.h +++ b/ccutil/genericvector.h @@ -26,12 +26,16 @@ #include "tesscallback.h" #include "errcode.h" #include "helpers.h" +#include "ndminx.h" +// Use PointerVector below in preference to GenericVector, as that +// provides automatic deletion of pointers, [De]Serialize that works, and +// sort that works. template class GenericVector { public: GenericVector() { this->init(kDefaultVectorSize); } - GenericVector(int size) { this->init(size); } + explicit GenericVector(int size) { this->init(size); } // Copy GenericVector(const GenericVector& other) { @@ -96,11 +100,11 @@ class GenericVector { // Removes an element at the given index and // shifts the remaining elements to the left. - void remove(int index); + virtual void remove(int index); // Truncates the array to the given size by removing the end. // If the current size is less, the array is not expanded. - void truncate(int size) { + virtual void truncate(int size) { if (size < size_used_) size_used_ = size; } @@ -133,8 +137,27 @@ class GenericVector { // If the callbacks are NULL, then the data is simply read/written using // fread (and swapping)/fwrite. // Returns false on error or if the callback returns false. + // DEPRECATED. Use [De]Serialize[Classes] instead. bool write(FILE* f, TessResultCallback2* cb) const; bool read(FILE* f, TessResultCallback3* cb, bool swap); + // Writes a vector of simple types to the given file. Assumes that bitwise + // read/write of T will work. Returns false in case of error. + virtual bool Serialize(FILE* fp) const; + // Reads a vector of simple types from the given file. Assumes that bitwise + // read/write will work with ReverseN according to sizeof(T). + // Returns false in case of error. + // If swap is true, assumes a big/little-endian swap is needed. + virtual bool DeSerialize(bool swap, FILE* fp); + // Writes a vector of classes to the given file. Assumes the existence of + // bool T::Serialize(FILE* fp) const that returns false in case of error. + // Returns false in case of error. + bool SerializeClasses(FILE* fp) const; + // Reads a vector of classes from the given file. Assumes the existence of + // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of + // error. Also needs T::T() and T::T(constT&), as init_to_size is used in + // this function. Returns false in case of error. + // If swap is true, assumes a big/little-endian swap is needed. + bool DeSerializeClasses(bool swap, FILE* fp); // Allocates a new array of double the current_size, copies over the // information from data to the new location, deletes data and returns @@ -163,6 +186,33 @@ class GenericVector { qsort(data_, size_used_, sizeof(*data_), comparator); } + // Searches the array (assuming sorted in ascending order, using sort()) for + // an element equal to target and returns true if it is present. + // Use binary_search to get the index of target, or its nearest candidate. + bool bool_binary_search(const T& target) const { + int index = binary_search(target); + if (index >= size_used_) + return false; + return data_[index] == target; + } + // Searches the array (assuming sorted in ascending order, using sort()) for + // an element equal to target and returns the index of the best candidate. + // The return value is the largest index i such that data_[i] > target is + // false. + int binary_search(const T& target) const { + int bottom = 0; + int top = size_used_; + do { + int middle = (bottom + top) / 2; + if (data_[middle] > target) + top = middle; + else + bottom = middle; + } + while (top - bottom > 1); + return bottom; + } + // Compact the vector by deleting elements using operator!= on basic types. // The vector must be sorted. void compact_sorted() { @@ -198,6 +248,13 @@ class GenericVector { delete delete_cb; } + T dot_product(const GenericVector& other) const { + T result = static_cast(0); + for (int i = MIN(size_used_, other.size_used_) - 1; i >= 0; --i) + result += data_[i] * other.data_[i]; + return result; + } + protected: // Init the object, allocating size memory. @@ -239,6 +296,122 @@ int sort_cmp(const void* t1, const void* t2) { } } +// Used by PointerVector::sort() +// return < 0 if t1 < t2 +// return 0 if t1 == t2 +// return > 0 if t1 > t2 +template +int sort_ptr_cmp(const void* t1, const void* t2) { + const T* a = *reinterpret_cast(t1); + const T* b = *reinterpret_cast(t2); + if (*a < *b) { + return -1; + } else if (*b < *a) { + return 1; + } else { + return 0; + } +} + +// Subclass for a vector of pointers. Use in preference to GenericVector +// as it provides automatic deletion and correct serialization, with the +// corollary that all copy operations are deep copies of the pointed-to objects. +template +class PointerVector : public GenericVector { + public: + PointerVector() : GenericVector() { } + explicit PointerVector(int size) : GenericVector(size) { } + virtual ~PointerVector() { + // Clear must be called here, even though it is called again by the base, + // as the base will call the wrong clear. + clear(); + } + // Copy must be deep, as the pointers will be automatically deleted on + // destruction. + PointerVector(const PointerVector& other) { + init(other.size()); + this->operator+=(other); + } + PointerVector& operator+=(const PointerVector& other) { + reserve(this->size_used_ + other.size_used_); + for (int i = 0; i < other.size(); ++i) { + push_back(new T(*other.data_[i])); + } + return *this; + } + + PointerVector& operator=(const PointerVector& other) { + this->truncate(0); + this->operator+=(other); + return *this; + } + + // Removes an element at the given index and + // shifts the remaining elements to the left. + virtual void remove(int index) { + delete GenericVector::data_[index]; + GenericVector::remove(index); + } + + // Truncates the array to the given size by removing the end. + // If the current size is less, the array is not expanded. + virtual void truncate(int size) { + for (int i = size; i < GenericVector::size_used_; ++i) + delete GenericVector::data_[i]; + GenericVector::truncate(size); + } + + // Clear the array, calling the clear callback function if any. + // All the owned callbacks are also deleted. + // If you don't want the callbacks to be deleted, before calling clear, set + // the callback to NULL. + virtual void clear() { + GenericVector::delete_data_pointers(); + GenericVector::clear(); + } + + // Writes a vector of simple types to the given file. Assumes that bitwise + // read/write of T will work. Returns false in case of error. + virtual bool Serialize(FILE* fp) const { + inT32 used = GenericVector::size_used_; + if (fwrite(&used, sizeof(used), 1, fp) != 1) return false; + for (int i = 0; i < used; ++i) { + inT8 non_null = GenericVector::data_[i] != NULL; + if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false; + if (non_null && !GenericVector::data_[i]->Serialize(fp)) return false; + } + return true; + } + // Reads a vector of simple types from the given file. Assumes that bitwise + // read/write will work with ReverseN according to sizeof(T). + // Also needs T::T(), as new T is used in this function. + // Returns false in case of error. + // If swap is true, assumes a big/little-endian swap is needed. + virtual bool DeSerialize(bool swap, FILE* fp) { + inT32 reserved; + if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; + if (swap) Reverse32(&reserved); + GenericVector::reserve(reserved); + for (int i = 0; i < reserved; ++i) { + inT8 non_null; + if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false; + T* item = NULL; + if (non_null) { + item = new T; + if (!item->DeSerialize(swap, fp)) return false; + } + push_back(item); + } + return true; + } + + // Sorts the items pointed to by the members of this vector using + // t::operator<(). + void sort() { + sort(&sort_ptr_cmp); + } +}; + } // namespace tesseract // A useful vector that uses operator== to do comparisons. @@ -411,7 +584,7 @@ GenericVector &GenericVector::operator+=(const GenericVector& other) { template GenericVector &GenericVector::operator=(const GenericVector& other) { - this->clear(); + this->truncate(0); this->operator+=(other); return *this; } @@ -484,7 +657,7 @@ template bool GenericVector::read(FILE* f, TessResultCallback3* cb, bool swap) { - uinT32 reserved; + inT32 reserved; if (fread(&reserved, sizeof(reserved), 1, f) != 1) return false; if (swap) Reverse32(&reserved); reserve(reserved); @@ -508,6 +681,64 @@ bool GenericVector::read(FILE* f, return true; } +// Writes a vector of simple types to the given file. Assumes that bitwise +// read/write of T will work. Returns false in case of error. +template +bool GenericVector::Serialize(FILE* fp) const { + if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false; + if (fwrite(data_, sizeof(*data_), size_used_, fp) != size_used_) return false; + return true; +} + +// Reads a vector of simple types from the given file. Assumes that bitwise +// read/write will work with ReverseN according to sizeof(T). +// Returns false in case of error. +// If swap is true, assumes a big/little-endian swap is needed. +template +bool GenericVector::DeSerialize(bool swap, FILE* fp) { + inT32 reserved; + if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; + if (swap) Reverse32(&reserved); + reserve(reserved); + size_used_ = reserved; + if (fread(data_, sizeof(T), size_used_, fp) != size_used_) return false; + if (swap) { + for (int i = 0; i < size_used_; ++i) + ReverseN(&data_[i], sizeof(data_[i])); + } + return true; +} + +// Writes a vector of classes to the given file. Assumes the existence of +// bool T::Serialize(FILE* fp) const that returns false in case of error. +// Returns false in case of error. +template +bool GenericVector::SerializeClasses(FILE* fp) const { + if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false; + for (int i = 0; i < size_used_; ++i) { + if (!data_[i].Serialize(fp)) return false; + } + return true; +} + +// Reads a vector of classes from the given file. Assumes the existence of +// bool T::Deserialize(bool swap, FILE* fp) that returns false in case of +// error. Alse needs T::T() and T::T(constT&), as init_to_size is used in +// this function. Returns false in case of error. +// If swap is true, assumes a big/little-endian swap is needed. +template +bool GenericVector::DeSerializeClasses(bool swap, FILE* fp) { + uinT32 reserved; + if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; + if (swap) Reverse32(&reserved); + T empty; + init_to_size(reserved, empty); + for (int i = 0; i < reserved; ++i) { + if (!data_[i].DeSerialize(swap, fp)) return false; + } + return true; +} + // This method clear the current object, then, does a shallow copy of // its argument, and finally invalindate its argument. template diff --git a/ccutil/memry.h b/ccutil/memry.h index 8700a13f0..0a80b5423 100644 --- a/ccutil/memry.h +++ b/ccutil/memry.h @@ -29,7 +29,7 @@ /********************************************************************** * ALLOC_2D_ARRAY - * + * DEPRECATED! Use GENERIC_2D_ARRAY instead. * Create a dynamic 2D array. **********************************************************************/ diff --git a/ccutil/tesscallback.h b/ccutil/tesscallback.h index c0e02aec3..e8fbb716a 100644 --- a/ccutil/tesscallback.h +++ b/ccutil/tesscallback.h @@ -1005,4 +1005,234 @@ NewPermanentTessCallback(R (*function)(A1,A2,A3)) { return new _TessFunctionResultCallback_0_3(function); } +// Specified by TR1 [4.7.2] Reference modifications. +template struct remove_reference; +template struct remove_reference { typedef T type; }; +template struct remove_reference { typedef T type; }; + +// Identity::type is a typedef of T. Useful for preventing the +// compiler from inferring the type of an argument in templates. +template +struct Identity { + typedef T type; +}; + +template +class _ConstTessMemberResultCallback_1_2 + : public TessResultCallback2 { + public: + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1,A1,A2) const; + + private: + T* object_; + MemberSignature member_; + typename remove_reference::type p1_; + + public: + inline _ConstTessMemberResultCallback_1_2(T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) { } + + virtual R Run(A1 a1, A2 a2) { + if (!del) { + R result = (object_->*member_)(p1_,a1,a2); + return result; + } else { + R result = (object_->*member_)(p1_,a1,a2); + // zero out the pointer to ensure segfault if used again + member_ = NULL; + delete this; + return result; + } + } +}; + +template +class _ConstTessMemberResultCallback_1_2 + : public TessCallback2 { + public: + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1,A1,A2) const; + + private: + T* object_; + MemberSignature member_; + typename remove_reference::type p1_; + + public: + inline _ConstTessMemberResultCallback_1_2(T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) { } + + virtual void Run(A1 a1, A2 a2) { + if (!del) { + (object_->*member_)(p1_,a1,a2); + } else { + (object_->*member_)(p1_,a1,a2); + // zero out the pointer to ensure segfault if used again + member_ = NULL; + delete this; + } + } +}; + +#ifndef SWIG +template +inline typename _ConstTessMemberResultCallback_1_2::base* +NewTessCallback( T1* obj, R (T2::*member)(P1,A1,A2) , typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_2(obj, member, p1); +} +#endif + +#ifndef SWIG +template +inline typename _ConstTessMemberResultCallback_1_2::base* +NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,A1,A2) , typename Identity::type p1) { + return new _ConstTessMemberResultCallback_1_2(obj, member, p1); +} +#endif + +template +class _TessMemberResultCallback_1_2 : public TessResultCallback2 { + public: + typedef TessResultCallback2 base; + typedef R (T::*MemberSignature)(P1,A1,A2) ; + + private: + T* object_; + MemberSignature member_; + typename remove_reference::type p1_; + + public: + inline _TessMemberResultCallback_1_2(T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) { } + + virtual R Run(A1 a1, A2 a2) { + if (!del) { + R result = (object_->*member_)(p1_,a1,a2); + return result; + } else { + R result = (object_->*member_)(p1_,a1,a2); + // zero out the pointer to ensure segfault if used again + member_ = NULL; + delete this; + return result; + } + } +}; + +template +class _TessMemberResultCallback_1_2 + : public TessCallback2 { + public: + typedef TessCallback2 base; + typedef void (T::*MemberSignature)(P1,A1,A2) ; + + private: + T* object_; + MemberSignature member_; + typename remove_reference::type p1_; + + public: + inline _TessMemberResultCallback_1_2(T* object, + MemberSignature member, P1 p1) + : object_(object), member_(member), p1_(p1) { } + + virtual void Run(A1 a1, A2 a2) { + if (!del) { + (object_->*member_)(p1_,a1,a2); + } else { + (object_->*member_)(p1_,a1,a2); + // zero out the pointer to ensure segfault if used again + member_ = NULL; + delete this; + } + } +}; + +#ifndef SWIG +template +inline typename _TessMemberResultCallback_1_2::base* +NewTessCallback( T1* obj, R (T2::*member)(P1,A1,A2) , typename Identity::type p1) { + return new _TessMemberResultCallback_1_2(obj, member, p1); +} +#endif + +#ifndef SWIG +template +inline typename _TessMemberResultCallback_1_2::base* +NewPermanentTessCallback( T1* obj, R (T2::*member)(P1,A1,A2) , typename Identity::type p1) { + return new _TessMemberResultCallback_1_2(obj, member, p1); +} +#endif + +template +class _TessFunctionResultCallback_1_2 : public TessCallback2 { + public: + typedef TessCallback2 base; + typedef R (*FunctionSignature)(P1,A1,A2); + + private: + FunctionSignature function_; + typename remove_reference::type p1_; + + public: + inline _TessFunctionResultCallback_1_2(FunctionSignature function, P1 p1) + : function_(function), p1_(p1) { } + + virtual R Run(A1 a1, A2 a2) { + if (!del) { + R result = (*function_)(p1_,a1,a2); + return result; + } else { + R result = (*function_)(p1_,a1,a2); + // zero out the pointer to ensure segfault if used again + function_ = NULL; + delete this; + return result; + } + } +}; + +template +class _TessFunctionResultCallback_1_2 + : public TessCallback2 { + public: + typedef TessCallback2 base; + typedef void (*FunctionSignature)(P1,A1,A2); + + private: + FunctionSignature function_; + typename remove_reference::type p1_; + + public: + inline _TessFunctionResultCallback_1_2(FunctionSignature function, P1 p1) + : function_(function), p1_(p1) { } + + virtual void Run(A1 a1, A2 a2) { + if (!del) { + (*function_)(p1_,a1,a2); + } else { + (*function_)(p1_,a1,a2); + // zero out the pointer to ensure segfault if used again + function_ = NULL; + delete this; + } + } +}; + +template +inline typename _TessFunctionResultCallback_1_2::base* +NewTessCallback(R (*function)(P1,A1,A2), typename Identity::type p1) { + return new _TessFunctionResultCallback_1_2(function, p1); +} + +template +inline typename _TessFunctionResultCallback_1_2::base* +NewPermanentTessCallback(R (*function)(P1,A1,A2), typename Identity::type p1) { + return new _TessFunctionResultCallback_1_2(function, p1); +} + #endif /* _TESS_CALLBACK_SPECIALIZATIONS_H */ diff --git a/ccutil/unicity_table.h b/ccutil/unicity_table.h index c6273b5d7..47807786a 100644 --- a/ccutil/unicity_table.h +++ b/ccutil/unicity_table.h @@ -44,7 +44,10 @@ class UnicityTable { int size() const; /// Return the object from an id. - T get(int id) const; + const T &get(int id) const; + + // Return the pointer to an object with the given id. + T *get_mutable(int id); /// Return the id of the T object. /// This method NEEDS a compare_callback to be passed to @@ -126,10 +129,14 @@ void UnicityTable::reserve(int size) { // Return the object from an id. template -T UnicityTable::get(int id) const { +const T &UnicityTable::get(int id) const { return table_.get(id); } - +// Returns the pointer to the object with the given id. +template +T *UnicityTable::get_mutable(int id) { + return &(table_.get(id)); +} // Return true if the id is valid template T UnicityTable::contains_id(int id) const {