mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-03 23:27:50 +08:00
0a9ad20d1c
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@301 d0cd1f9f-072b-0410-8dd7-cf729c803f20
92 lines
3.6 KiB
C++
92 lines
3.6 KiB
C++
///////////////////////////////////////////////////////////////////////
|
|
// File: workingpartset.h
|
|
// Description: Class to hold a working set of partitions of the page
|
|
// during construction of text/image regions.
|
|
// Author: Ray Smith
|
|
// Created: Tue Ocr 28 17:21:01 PDT 2008
|
|
//
|
|
// (C) Copyright 2008, Google Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef TESSERACT_TEXTORD_WORKINGPARSET_H__
|
|
#define TESSERACT_TEXTORD_WORKINGPARSET_H__
|
|
|
|
#include "blobbox.h" // For TO_BLOCK_LIST and BLOCK_LIST.
|
|
#include "colpartition.h" // For ColPartition_LIST.
|
|
|
|
namespace tesseract {
|
|
|
|
// WorkingPartSet holds a working set of ColPartitions during transformation
|
|
// from the grid-based storage to regions in logical reading order, and is
|
|
// therefore only used during construction of the regions.
|
|
class WorkingPartSet : public ELIST_LINK {
|
|
public:
|
|
WorkingPartSet() {
|
|
}
|
|
explicit WorkingPartSet(ColPartition* column)
|
|
: column_(column), latest_part_(NULL), part_it_(&part_set_) {
|
|
}
|
|
|
|
// Simple accessors.
|
|
ColPartition* column() const {
|
|
return column_;
|
|
}
|
|
void set_column(ColPartition* col) {
|
|
column_ = col;
|
|
}
|
|
|
|
// Add the partition to this WorkingPartSet. Partitions are generally
|
|
// stored in the order in which they are received, but if the partition
|
|
// has a SingletonPartner, make sure that it stays with its partner.
|
|
void AddPartition(ColPartition* part);
|
|
|
|
// Make blocks out of any partitions in this WorkingPartSet, and append
|
|
// them to the end of the blocks list. bleft, tright and resolution give
|
|
// the bounds and resolution of the source image, so that blocks can be
|
|
// made to fit in the bounds.
|
|
// All ColPartitions go in the used_parts list, as they need to be kept
|
|
// around, but are no longer needed.
|
|
void ExtractCompletedBlocks(const ICOORD& bleft, const ICOORD& tright,
|
|
int resolution, ColPartition_LIST* used_parts,
|
|
BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
|
|
|
|
// Insert the given blocks at the front of the completed_blocks_ list so
|
|
// they can be kept in the correct reading order.
|
|
void InsertCompletedBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
|
|
|
|
private:
|
|
// Convert the part_set_ into blocks, starting a new block at a break
|
|
// in partnerships, or a change in linespacing (for text).
|
|
void MakeBlocks(const ICOORD& bleft, const ICOORD& tright, int resolution,
|
|
ColPartition_LIST* used_parts);
|
|
|
|
// The column that this working set applies to. Used by the caller.
|
|
ColPartition* column_;
|
|
// The most recently added partition.
|
|
ColPartition* latest_part_;
|
|
// All the partitions in the block that is currently under construction.
|
|
ColPartition_LIST part_set_;
|
|
// Iteratorn on part_set_ pointing to the most recent addition.
|
|
ColPartition_IT part_it_;
|
|
// The blocks that have been made so far and belong before the current block.
|
|
BLOCK_LIST completed_blocks_;
|
|
TO_BLOCK_LIST to_blocks_;
|
|
};
|
|
|
|
ELISTIZEH(WorkingPartSet)
|
|
|
|
} // namespace tesseract.
|
|
|
|
#endif // TESSERACT_TEXTORD_WORKINGPARSET_H__
|
|
|