2012-02-02 11:14:43 +08:00
|
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
// File: indexmapbidi.h
|
|
|
|
// Description: Bi-directional mapping between a sparse and compact space.
|
|
|
|
// Author: rays@google.com (Ray Smith)
|
|
|
|
// Created: Tue Apr 06 11:33:59 PDT 2010
|
|
|
|
//
|
|
|
|
// (C) Copyright 2010, Google Inc.
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
//
|
|
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
#ifndef TESSERACT_CCUTIL_INDEXMAPBIDI_H_
|
|
|
|
#define TESSERACT_CCUTIL_INDEXMAPBIDI_H_
|
|
|
|
|
2018-05-20 05:52:04 +08:00
|
|
|
#include <cstdio>
|
2012-02-02 11:14:43 +08:00
|
|
|
#include "genericvector.h"
|
|
|
|
|
|
|
|
namespace tesseract {
|
|
|
|
|
|
|
|
class IndexMapBiDi;
|
|
|
|
|
|
|
|
// Bidirectional one-to-one mapping between a sparse and a compact discrete
|
|
|
|
// space. Many entries in the sparse space are unmapped, but those that are
|
|
|
|
// mapped have a 1-1 mapping to (and from) the compact space, where all
|
|
|
|
// values are used. This is useful for forming subsets of larger collections,
|
|
|
|
// such as subsets of character sets, or subsets of binary feature spaces.
|
|
|
|
//
|
|
|
|
// This base class provides basic functionality with binary search for the
|
|
|
|
// SparseToCompact mapping to save memory.
|
|
|
|
// For a faster inverse mapping, or to allow a many-to-one mapping, use
|
|
|
|
// IndexMapBiDi below.
|
|
|
|
// NOTE: there are currently no methods to setup an IndexMap on its own!
|
|
|
|
// It must be initialized by copying from an IndexMapBiDi or by DeSerialize.
|
|
|
|
class IndexMap {
|
|
|
|
public:
|
|
|
|
virtual ~IndexMap() {}
|
|
|
|
|
|
|
|
// SparseToCompact takes a sparse index to an index in the compact space.
|
|
|
|
// Uses a binary search to find the result. For faster speed use
|
|
|
|
// IndexMapBiDi, but that takes more memory.
|
|
|
|
virtual int SparseToCompact(int sparse_index) const;
|
|
|
|
|
|
|
|
// CompactToSparse takes a compact index to the corresponding index in the
|
|
|
|
// sparse space.
|
|
|
|
int CompactToSparse(int compact_index) const {
|
|
|
|
return compact_map_[compact_index];
|
|
|
|
}
|
|
|
|
// The size of the sparse space.
|
|
|
|
virtual int SparseSize() const {
|
|
|
|
return sparse_size_;
|
|
|
|
}
|
|
|
|
// The size of the compact space.
|
|
|
|
int CompactSize() const {
|
|
|
|
return compact_map_.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy from the input.
|
|
|
|
void CopyFrom(const IndexMap& src);
|
|
|
|
void CopyFrom(const IndexMapBiDi& src);
|
|
|
|
|
|
|
|
// Writes to the given file. Returns false in case of error.
|
|
|
|
bool Serialize(FILE* fp) const;
|
|
|
|
// Reads from the given file. Returns false in case of error.
|
|
|
|
// If swap is true, assumes a big/little-endian swap is needed.
|
|
|
|
bool DeSerialize(bool swap, FILE* fp);
|
|
|
|
|
|
|
|
protected:
|
|
|
|
// The sparse space covers integers in the range [0, sparse_size_-1].
|
|
|
|
int sparse_size_;
|
|
|
|
// The compact space covers integers in the range [0, compact_map_.size()-1].
|
|
|
|
// Each element contains the corresponding sparse index.
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
GenericVector<int32_t> compact_map_;
|
2012-02-02 11:14:43 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// Bidirectional many-to-one mapping between a sparse and a compact discrete
|
|
|
|
// space. As with IndexMap, many entries may be unmapped, but unlike IndexMap,
|
|
|
|
// of those that are, many may be mapped to the same compact index.
|
|
|
|
// If the map is many-to-one, it is not possible to directly obtain all the
|
|
|
|
// sparse indices that map to a single compact index.
|
|
|
|
// This map is time- rather than space-efficient. It stores the entire sparse
|
|
|
|
// space.
|
|
|
|
// IndexMapBiDi may be initialized in one of 3 ways:
|
|
|
|
// 1. Init(size, true);
|
|
|
|
// Setup();
|
|
|
|
// Sets a complete 1:1 mapping with no unmapped elements.
|
|
|
|
// 2. Init(size, false);
|
|
|
|
// for ... SetMap(index, true);
|
|
|
|
// Setup();
|
|
|
|
// Specifies precisely which sparse indices are mapped. The mapping is 1:1.
|
|
|
|
// 3. Either of the above, followed by:
|
|
|
|
// for ... Merge(index1, index2);
|
|
|
|
// CompleteMerges();
|
|
|
|
// Allows a many-to-one mapping by merging compact space indices.
|
|
|
|
class IndexMapBiDi : public IndexMap {
|
|
|
|
public:
|
|
|
|
virtual ~IndexMapBiDi() {}
|
|
|
|
|
|
|
|
// Top-level init function in a single call to initialize a map to select
|
|
|
|
// a single contiguous subrange [start, end) of the sparse space to be mapped
|
|
|
|
// 1 to 1 to the compact space, with all other elements of the sparse space
|
|
|
|
// left unmapped.
|
|
|
|
// No need to call Setup after this.
|
|
|
|
void InitAndSetupRange(int sparse_size, int start, int end);
|
|
|
|
|
|
|
|
// Initializes just the sparse_map_ to the given size with either all
|
|
|
|
// forward indices mapped (all_mapped = true) or none (all_mapped = false).
|
|
|
|
// Call Setup immediately after, or make calls to SetMap first to adjust the
|
|
|
|
// mapping and then call Setup before using the map.
|
|
|
|
void Init(int size, bool all_mapped);
|
|
|
|
// Sets a given index in the sparse_map_ to be mapped or not.
|
|
|
|
void SetMap(int sparse_index, bool mapped);
|
|
|
|
// Sets up the sparse_map_ and compact_map_ properly after Init and
|
|
|
|
// some calls to SetMap. Assumes an ordered 1-1 map from set indices
|
|
|
|
// in the sparse space to the compact space.
|
|
|
|
void Setup();
|
|
|
|
|
|
|
|
// Merges the two compact space indices. May be called many times, but
|
|
|
|
// the merges must be concluded by a call to CompleteMerges.
|
|
|
|
// Returns true if a merge was actually performed.
|
|
|
|
bool Merge(int compact_index1, int compact_index2);
|
|
|
|
// Returns true if the given compact index has been deleted.
|
|
|
|
bool IsCompactDeleted(int index) const {
|
|
|
|
return MasterCompactIndex(index) < 0;
|
|
|
|
}
|
|
|
|
// Completes one or more Merge operations by further compacting the
|
|
|
|
// compact space.
|
|
|
|
void CompleteMerges();
|
|
|
|
|
|
|
|
// SparseToCompact takes a sparse index to an index in the compact space.
|
|
|
|
virtual int SparseToCompact(int sparse_index) const {
|
|
|
|
return sparse_map_[sparse_index];
|
|
|
|
}
|
|
|
|
// The size of the sparse space.
|
|
|
|
virtual int SparseSize() const {
|
|
|
|
return sparse_map_.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy from the input.
|
|
|
|
void CopyFrom(const IndexMapBiDi& src);
|
|
|
|
|
|
|
|
// Writes to the given file. Returns false in case of error.
|
|
|
|
bool Serialize(FILE* fp) const;
|
|
|
|
// Reads from the given file. Returns false in case of error.
|
|
|
|
// If swap is true, assumes a big/little-endian swap is needed.
|
|
|
|
bool DeSerialize(bool swap, FILE* fp);
|
|
|
|
|
|
|
|
// Bulk calls to SparseToCompact.
|
|
|
|
// Maps the given array of sparse indices to an array of compact indices.
|
|
|
|
// Assumes the input is sorted. The output indices are sorted and uniqued.
|
|
|
|
// Return value is the number of "missed" features, being features that
|
|
|
|
// don't map to the compact feature space.
|
|
|
|
int MapFeatures(const GenericVector<int>& sparse,
|
|
|
|
GenericVector<int>* compact) const;
|
|
|
|
|
|
|
|
private:
|
|
|
|
// Returns the master compact index for a given compact index.
|
|
|
|
// During a multiple merge operation, several compact indices may be
|
|
|
|
// combined, so we need to be able to find the master of all.
|
|
|
|
int MasterCompactIndex(int compact_index) const {
|
|
|
|
while (compact_index >= 0 &&
|
|
|
|
sparse_map_[compact_map_[compact_index]] != compact_index)
|
|
|
|
compact_index = sparse_map_[compact_map_[compact_index]];
|
|
|
|
return compact_index;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Direct look-up of the compact index for each element in sparse space.
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
GenericVector<int32_t> sparse_map_;
|
2012-02-02 11:14:43 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace tesseract.
|
|
|
|
|
|
|
|
#endif // TESSERACT_CCUTIL_INDEXMAPBIDI_H_
|