tesseract/api/pageiterator.h

///////////////////////////////////////////////////////////////////////
// File:        pageiterator.h
// Description: Iterator for tesseract page structure that avoids using
//              tesseract internal data structures.
// Author:      Ray Smith
// Created:     Fri Feb 26 11:01:06 PST 2010
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

#ifndef TESSERACT_API_PAGEITERATOR_H__
#define TESSERACT_API_PAGEITERATOR_H__

#include "apitypes.h"

class C_BLOB_IT;
class PBLOB_IT;
class PAGE_RES;
class PAGE_RES_IT;
class WERD;
struct Pix;

namespace tesseract {

class Tesseract;

// Class to iterate over tesseract page structure, providing access to all
// levels of the page hierarchy, without including any tesseract headers or
// having to handle any tesseract structures.
// WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
// See apitypes.h for the definition of PageIteratorLevel.
// See also ResultIterator, derived from PageIterator, which adds in the
// ability to access OCR output with text-specific methods.

class PageIterator {
 public:
  // page_res and tesseract come directly from the BaseAPI.
  // The rectangle parameters are copied indirectly from the Thresholder,
  // via the BaseAPI. They represent the coordinates of some rectangle in an
  // original image (in top-left-origin coordinates) and therefore the top-left
  // needs to be added to any output boxes in order to specify coordinates
  // in the original image. See TessBaseAPI::SetRectangle.
  // The scale and scaled_yres are in case the Thresholder scaled the image
  // rectangle prior to thresholding. Any coordinates in tesseract's image
  // must be divided by scale before adding (rect_left, rect_top).
  // The scaled_yres indicates the effective resolution of the binary image
  // that tesseract has been given by the Thresholder.
  // After the constructor, Begin has already been called.
  PageIterator(PAGE_RES* page_res, Tesseract* tesseract,
               int scale, int scaled_yres,
               int rect_left, int rect_top,
               int rect_width, int rect_height);
  virtual ~PageIterator();

  // Page/ResultIterators may be copied! This makes it possible to iterate over
  // all the objects at a lower level, while maintaining an iterator to
  // objects at a higher level. These constructors DO NOT CALL Begin, so
  // iterations will continue from the location of src.
  PageIterator(const PageIterator& src);
  const PageIterator& operator=(const PageIterator& src);

  // ============= Moving around within the page ============.

  // Moves the iterator to point to the start of the page to begin an iteration.
  void Begin();

  // Moves to the start of the next object at the given level in the
  // page hierarchy, and returns false if the end of the page was reached.
  // NOTE that RIL_SYMBOL will skip non-text blocks, but all other
  // PageIteratorLevel level values will visit each non-text block once.
  // Think of non text blocks as containing a single para, with a single line,
  // with a single imaginary word.
  // Calls to Next with different levels may be freely intermixed.
  // This function iterates words in right-to-left scripts correctly, if
  // the appropriate language has been loaded into Tesseract.
  bool Next(PageIteratorLevel level);

  // Returns true if the iterator is at the start of an object at the given
  // level. Possible uses include determining if a call to Next(RIL_WORD)
  // moved to the start of a RIL_PARA.
  bool IsAtBeginningOf(PageIteratorLevel level) const;

  // Returns whether the iterator is positioned at the last element in a
  // given level. (e.g. the last word in a line, the last line in a block)
  bool IsAtFinalElement(PageIteratorLevel level,
                        PageIteratorLevel element) const;

  // ============= Accessing data ==============.
  // Coordinate system:
  // Integer coordinates are at the cracks between the pixels.
  // The top-left corner of the top-left pixel in the image is at (0,0).
  // The bottom-right corner of the bottom-right pixel in the image is at
  // (width, height).
  // Every bounding box goes from the top-left of the top-left contained
  // pixel to the bottom-right of the bottom-right contained pixel, so
  // the bounding box of the single top-left pixel in the image is:
  // (0,0)->(1,1).
  // If an image rectangle has been set in the API, then returned coordinates
  // relate to the original (full) image, rather than the rectangle.

  // Returns the bounding rectangle of the current object at the given level.
  // See comment on coordinate system above.
  // Returns false if there is no such object at the current position.
  // The returned bounding box is guaranteed to match the size and position
  // of the image returned by GetBinaryImage, but may clip foreground pixels
  // from a grey image. The padding argument to GetImage can be used to expand
  // the image to include more foreground pixels. See GetImage below.
  bool BoundingBox(PageIteratorLevel level,
                   int* left, int* top, int* right, int* bottom) const;

  // Returns the type of the current block. See apitypes.h for PolyBlockType.
  PolyBlockType BlockType() const;

  // Returns a binary image of the current object at the given level.
  // The position and size match the return from BoundingBox.
  // Use pixDestroy to delete the image after use.
  Pix* GetBinaryImage(PageIteratorLevel level) const;

  // Returns an image of the current object at the given level in greyscale
  // if available in the input. To guarantee a binary image use BinaryImage.
  // NOTE that in order to give the best possible image, the bounds are
  // expanded slightly over the binary connected component, by the supplied
  // padding, so the top-left position of the returned image is returned
  // in (left,top). These will most likely not match the coordinates
  // returned by BoundingBox.
  // Use pixDestroy to delete the image after use.
  Pix* GetImage(PageIteratorLevel level, int padding,
                int* left, int* top) const;

  // Returns the baseline of the current object at the given level.
  // The baseline is the line that passes through (x1, y1) and (x2, y2).
  // WARNING: with vertical text, baselines may be vertical!
  // Returns false if there is no baseline at the current position.
  bool Baseline(PageIteratorLevel level,
                int* x1, int* y1, int* x2, int* y2) const;

  // Returns orientation for the block the iterator points to.
  //   orientation, writing_direction, textline_order: see publictypes.h
  //   deskew_angle: after rotating the block so the text orientation is
  //                 upright, how many radians does one have to rotate the
  //                 block anti-clockwise for it to be level?
  //                   -Pi/4 <= deskew_angle <= Pi/4
  void Orientation(tesseract::Orientation *orientation,
                   tesseract::WritingDirection *writing_direction,
                   tesseract::TextlineOrder *textline_order,
                   float *deskew_angle);

 protected:
  // Sets up the internal data for iterating the blobs of a new word, then
  // moves the iterator to the given offset.
  void BeginWord(int offset);

  // Pointer to the page_res owned by the API.
  PAGE_RES* page_res_;
  // Pointer to the Tesseract object owned by the API.
  Tesseract* tesseract_;
  // The iterator to the page_res_. Owned by this ResultIterator.
  // A pointer just to avoid dragging in Tesseract includes.
  PAGE_RES_IT* it_;
  // The current input WERD being iterated. If there is an output from OCR,
  // then word_ is NULL. Owned by the API.
  WERD* word_;
  // The length of the current word_.
  int word_length_;
  // The current blob index within the word.
  int blob_index_;
  // Iterator to the blobs within the word. If NULL, then we are iterating
  // OCR results in the box_word.
  // Owned by this ResultIterator.
  C_BLOB_IT* cblob_it_;
  // Parameters saved from the Thresholder. Needed to rebuild coordinates.
  int scale_;
  int scaled_yres_;
  int rect_left_;
  int rect_top_;
  int rect_width_;
  int rect_height_;
};

}  // namespace tesseract.

#endif  // TESSERACT_API_PAGEITERATOR_H__
3.01 code from http://github.com/jimregan/tesseract-ocr with addaptions related to Linux and Windows (VC2008) compile process git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@526 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2010-11-24 02:34:14 +08:00			`///////////////////////////////////////////////////////////////////////`
			`// File: pageiterator.h`
			`// Description: Iterator for tesseract page structure that avoids using`
			`// tesseract internal data structures.`
			`// Author: Ray Smith`
			`// Created: Fri Feb 26 11:01:06 PST 2010`
			`//`
			`// (C) Copyright 2010, Google Inc.`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
			`//`
			`///////////////////////////////////////////////////////////////////////`

			`#ifndef TESSERACT_API_PAGEITERATOR_H__`
			`#define TESSERACT_API_PAGEITERATOR_H__`

			`#include "apitypes.h"`

			`class C_BLOB_IT;`
			`class PBLOB_IT;`
			`class PAGE_RES;`
			`class PAGE_RES_IT;`
			`class WERD;`
			`struct Pix;`

			`namespace tesseract {`

			`class Tesseract;`

			`// Class to iterate over tesseract page structure, providing access to all`
			`// levels of the page hierarchy, without including any tesseract headers or`
			`// having to handle any tesseract structures.`
			`// WARNING! This class points to data held within the TessBaseAPI class, and`
			`// therefore can only be used while the TessBaseAPI class still exists and`
			`// has not been subjected to a call of Init, SetImage, Recognize, Clear, End`
			`// DetectOS, or anything else that changes the internal PAGE_RES.`
			`// See apitypes.h for the definition of PageIteratorLevel.`
			`// See also ResultIterator, derived from PageIterator, which adds in the`
			`// ability to access OCR output with text-specific methods.`

			`class PageIterator {`
			`public:`
			`// page_res and tesseract come directly from the BaseAPI.`
			`// The rectangle parameters are copied indirectly from the Thresholder,`
			`// via the BaseAPI. They represent the coordinates of some rectangle in an`
			`// original image (in top-left-origin coordinates) and therefore the top-left`
			`// needs to be added to any output boxes in order to specify coordinates`
			`// in the original image. See TessBaseAPI::SetRectangle.`
			`// The scale and scaled_yres are in case the Thresholder scaled the image`
			`// rectangle prior to thresholding. Any coordinates in tesseract's image`
			`// must be divided by scale before adding (rect_left, rect_top).`
			`// The scaled_yres indicates the effective resolution of the binary image`
			`// that tesseract has been given by the Thresholder.`
			`// After the constructor, Begin has already been called.`
			`PageIterator(PAGE_RES* page_res, Tesseract* tesseract,`
			`int scale, int scaled_yres,`
			`int rect_left, int rect_top,`
			`int rect_width, int rect_height);`
			`virtual ~PageIterator();`

			`// Page/ResultIterators may be copied! This makes it possible to iterate over`
			`// all the objects at a lower level, while maintaining an iterator to`
			`// objects at a higher level. These constructors DO NOT CALL Begin, so`
			`// iterations will continue from the location of src.`
			`PageIterator(const PageIterator& src);`
			`const PageIterator& operator=(const PageIterator& src);`

			`// ============= Moving around within the page ============.`

			`// Moves the iterator to point to the start of the page to begin an iteration.`
			`void Begin();`

			`// Moves to the start of the next object at the given level in the`
			`// page hierarchy, and returns false if the end of the page was reached.`
			`// NOTE that RIL_SYMBOL will skip non-text blocks, but all other`
			`// PageIteratorLevel level values will visit each non-text block once.`
			`// Think of non text blocks as containing a single para, with a single line,`
			`// with a single imaginary word.`
			`// Calls to Next with different levels may be freely intermixed.`
			`// This function iterates words in right-to-left scripts correctly, if`
			`// the appropriate language has been loaded into Tesseract.`
			`bool Next(PageIteratorLevel level);`

			`// Returns true if the iterator is at the start of an object at the given`
			`// level. Possible uses include determining if a call to Next(RIL_WORD)`
			`// moved to the start of a RIL_PARA.`
			`bool IsAtBeginningOf(PageIteratorLevel level) const;`

			`// Returns whether the iterator is positioned at the last element in a`
			`// given level. (e.g. the last word in a line, the last line in a block)`
			`bool IsAtFinalElement(PageIteratorLevel level,`
			`PageIteratorLevel element) const;`

			`// ============= Accessing data ==============.`
			`// Coordinate system:`
			`// Integer coordinates are at the cracks between the pixels.`
			`// The top-left corner of the top-left pixel in the image is at (0,0).`
			`// The bottom-right corner of the bottom-right pixel in the image is at`
			`// (width, height).`
			`// Every bounding box goes from the top-left of the top-left contained`
			`// pixel to the bottom-right of the bottom-right contained pixel, so`
			`// the bounding box of the single top-left pixel in the image is:`
			`// (0,0)->(1,1).`
			`// If an image rectangle has been set in the API, then returned coordinates`
			`// relate to the original (full) image, rather than the rectangle.`

			`// Returns the bounding rectangle of the current object at the given level.`
			`// See comment on coordinate system above.`
			`// Returns false if there is no such object at the current position.`
			`// The returned bounding box is guaranteed to match the size and position`
			`// of the image returned by GetBinaryImage, but may clip foreground pixels`
			`// from a grey image. The padding argument to GetImage can be used to expand`
			`// the image to include more foreground pixels. See GetImage below.`
			`bool BoundingBox(PageIteratorLevel level,`
			`int* left, int* top, int* right, int* bottom) const;`

			`// Returns the type of the current block. See apitypes.h for PolyBlockType.`
			`PolyBlockType BlockType() const;`

			`// Returns a binary image of the current object at the given level.`
			`// The position and size match the return from BoundingBox.`
			`// Use pixDestroy to delete the image after use.`
			`Pix* GetBinaryImage(PageIteratorLevel level) const;`

			`// Returns an image of the current object at the given level in greyscale`
			`// if available in the input. To guarantee a binary image use BinaryImage.`
			`// NOTE that in order to give the best possible image, the bounds are`
			`// expanded slightly over the binary connected component, by the supplied`
			`// padding, so the top-left position of the returned image is returned`
			`// in (left,top). These will most likely not match the coordinates`
			`// returned by BoundingBox.`
			`// Use pixDestroy to delete the image after use.`
			`Pix* GetImage(PageIteratorLevel level, int padding,`
			`int* left, int* top) const;`

			`// Returns the baseline of the current object at the given level.`
			`// The baseline is the line that passes through (x1, y1) and (x2, y2).`
			`// WARNING: with vertical text, baselines may be vertical!`
			`// Returns false if there is no baseline at the current position.`
			`bool Baseline(PageIteratorLevel level,`
			`int* x1, int* y1, int* x2, int* y2) const;`

Various fixes, including memory leak in fixspace, font labels on output, removed some annoying debug output, fixes to initialization of parameters, general cleanup, and added Hindi git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@566 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2011-03-22 05:43:04 +08:00			`// Returns orientation for the block the iterator points to.`
			`// orientation, writing_direction, textline_order: see publictypes.h`
			`// deskew_angle: after rotating the block so the text orientation is`
			`// upright, how many radians does one have to rotate the`
			`// block anti-clockwise for it to be level?`
			`// -Pi/4 <= deskew_angle <= Pi/4`
			`void Orientation(tesseract::Orientation *orientation,`
			`tesseract::WritingDirection *writing_direction,`
			`tesseract::TextlineOrder *textline_order,`
			`float *deskew_angle);`

3.01 code from http://github.com/jimregan/tesseract-ocr with addaptions related to Linux and Windows (VC2008) compile process git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@526 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2010-11-24 02:34:14 +08:00			`protected:`
			`// Sets up the internal data for iterating the blobs of a new word, then`
			`// moves the iterator to the given offset.`
			`void BeginWord(int offset);`

			`// Pointer to the page_res owned by the API.`
			`PAGE_RES* page_res_;`
			`// Pointer to the Tesseract object owned by the API.`
			`Tesseract* tesseract_;`
			`// The iterator to the page_res_. Owned by this ResultIterator.`
			`// A pointer just to avoid dragging in Tesseract includes.`
			`PAGE_RES_IT* it_;`
			`// The current input WERD being iterated. If there is an output from OCR,`
			`// then word_ is NULL. Owned by the API.`
			`WERD* word_;`
			`// The length of the current word_.`
			`int word_length_;`
			`// The current blob index within the word.`
			`int blob_index_;`
			`// Iterator to the blobs within the word. If NULL, then we are iterating`
			`// OCR results in the box_word.`
			`// Owned by this ResultIterator.`
			`C_BLOB_IT* cblob_it_;`
			`// Parameters saved from the Thresholder. Needed to rebuild coordinates.`
			`int scale_;`
			`int scaled_yres_;`
			`int rect_left_;`
			`int rect_top_;`
			`int rect_width_;`
			`int rect_height_;`
			`};`

			`} // namespace tesseract.`

			`#endif // TESSERACT_API_PAGEITERATOR_H__`