tesseract/training/degradeimage.cpp

/**********************************************************************
 * File:        degradeimage.cpp
 * Description: Function to degrade an image (usually of text) as if it
 *              has been printed and then scanned.
 * Authors:     Ray Smith
 * Created:     Tue Nov 19 2013
 *
 * (C) Copyright 2013, Google Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 **********************************************************************/

#include "degradeimage.h"

#include <stdlib.h>
#include "allheaders.h"   // from leptonica
#include "genericvector.h"
#include "helpers.h"  // For TRand.
#include "rect.h"

namespace tesseract {

// A randomized perspective distortion can be applied to synthetic input.
// The perspective distortion comes from leptonica, which uses 2 sets of 4
// corners to determine the distortion. There are random values for each of
// the x numbers x0..x3 and y0..y3, except for x2 and x3 which are instead
// defined in terms of a single shear value. This reduces the degrees of
// freedom enough to make the distortion more realistic than it would otherwise
// be if all 8 coordinates could move independently.
// One additional factor is used for the color of the pixels that don't exist
// in the source image.
// Name for each of the randomizing factors.
enum FactorNames {
  FN_INCOLOR,
  FN_Y0,
  FN_Y1,
  FN_Y2,
  FN_Y3,
  FN_X0,
  FN_X1,
  FN_SHEAR,
  // x2 = x1 - shear
  // x3 = x0 + shear
  FN_NUM_FACTORS
};

// Rotation is +/- kRotationRange radians.
const float kRotationRange = 0.02f;
// Number of grey levels to shift by for each exposure step.
const int kExposureFactor = 16;
// Salt and pepper noise is +/- kSaltnPepper.
const int kSaltnPepper = 5;
// Min sum of width + height on which to operate the ramp.
const int kMinRampSize = 1000;

// Degrade the pix as if by a print/copy/scan cycle with exposure > 0
// corresponding to darkening on the copier and <0 lighter and 0 not copied.
// Exposures in [-2,2] are most useful, with -3 and 3 being extreme.
// If rotation is NULL, rotation is skipped. If *rotation is non-zero, the pix
// is rotated by *rotation else it is randomly rotated and *rotation is
// modified.
//
// HOW IT WORKS:
// Most of the process is really dictated by the fact that the minimum
// available convolution is 3X3, which is too big really to simulate a
// good quality print/scan process. (2X2 would be better.)
// 1 pixel wide inputs are heavily smeared by the 3X3 convolution, making the
// images generally biased to being too light, so most of the work is to make
// them darker. 3 levels of thickening/darkening are achieved with 2 dilations,
// (using a greyscale erosion) one heavy (by being before convolution) and one
// light (after convolution).
// With no dilation, after covolution, the images are so light that a heavy
// constant offset is required to make the 0 image look reasonable. A simple
// constant offset multiple of exposure to undo this value is enough to achieve
// all the required lightening. This gives the advantage that exposure level 1
// with a single dilation gives a good impression of the broken-yet-too-dark
// problem that is often seen in scans.
// A small random rotation gives some varying greyscale values on the edges,
// and some random salt and pepper noise on top helps to realistically jaggy-up
// the edges.
// Finally a greyscale ramp provides a continuum of effects between exposure
// levels.
Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer,
                  float* rotation) {
  Pix* pix = pixConvertTo8(input, false);
  pixDestroy(&input);
  input = pix;
  int width = pixGetWidth(input);
  int height = pixGetHeight(input);
  if (exposure >= 2) {
    // An erosion simulates the spreading darkening of a dark copy.
    // This is backwards to binary morphology,
    // see http://www.leptonica.com/grayscale-morphology.html
    pix = input;
    input = pixErodeGray(pix, 3, 3);
    pixDestroy(&pix);
  }
  // A convolution is essential to any mode as no scanner produces an
  // image as sharp as the electronic image.
  pix = pixBlockconv(input, 1, 1);
  pixDestroy(&input);
  // A small random rotation helps to make the edges jaggy in a realistic way.
  if (rotation != NULL) {
    float radians_clockwise = 0.0f;
    if (*rotation) {
      radians_clockwise = *rotation;
    } else if (randomizer != NULL) {
      radians_clockwise = randomizer->SignedRand(kRotationRange);
    }

    input = pixRotate(pix, radians_clockwise,
                      L_ROTATE_AREA_MAP, L_BRING_IN_WHITE,
                      0, 0);
    // Rotate the boxes to match.
    *rotation = radians_clockwise;
    pixDestroy(&pix);
  } else {
    input = pix;
  }

  if (exposure >= 3 || exposure == 1) {
    // Erosion after the convolution is not as heavy as before, so it is
    // good for level 1 and in addition as a level 3.
    // This is backwards to binary morphology,
    // see http://www.leptonica.com/grayscale-morphology.html
    pix = input;
    input = pixErodeGray(pix, 3, 3);
    pixDestroy(&pix);
  }
  // The convolution really needed to be 2x2 to be realistic enough, but
  // we only have 3x3, so we have to bias the image darker or lose thin
  // strokes.
  int erosion_offset = 0;
  // For light and 0 exposure, there is no dilation, so compensate for the
  // convolution with a big darkening bias which is undone for lighter
  // exposures.
  if (exposure <= 0)
    erosion_offset = -3 * kExposureFactor;
  // Add in a general offset of the greyscales for the exposure level so
  // a threshold of 128 gives a reasonable binary result.
  erosion_offset -= exposure * kExposureFactor;
  // Add a gradual fade over the page and a small amount of salt and pepper
  // noise to simulate noise in the sensor/paper fibres and varying
  // illumination.
  l_uint32* data = pixGetData(input);
  for (int y = 0; y < height; ++y) {
    for (int x = 0; x < width; ++x) {
      int pixel = GET_DATA_BYTE(data, x);
      if (randomizer != NULL)
        pixel += randomizer->IntRand() % (kSaltnPepper*2 + 1) - kSaltnPepper;
      if (height + width > kMinRampSize)
        pixel -= (2*x + y) * 32 / (height + width);
      pixel += erosion_offset;
      if (pixel < 0)
        pixel = 0;
      if (pixel > 255)
        pixel = 255;
      SET_DATA_BYTE(data, x, pixel);
    }
    data += input->wpl;
  }
  return input;
}

// Creates and returns a Pix distorted by various means according to the bool
// flags. If boxes is not NULL, the boxes are resized/positioned according to
// any spatial distortion and also by the integer reduction factor box_scale
// so they will match what the network will output.
// Returns NULL on error. The returned Pix must be pixDestroyed.
Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
                         bool white_noise, bool smooth_noise, bool blur,
                         int box_reduction, TRand* randomizer,
                         GenericVector<TBOX>* boxes) {
  Pix* distorted = pixCopy(NULL, const_cast<Pix*>(pix));
  // Things to do to synthetic training data.
  if (invert && randomizer->SignedRand(1.0) < 0)
    pixInvert(distorted, distorted);
  if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
    // TODO(rays) Cook noise in a more thread-safe manner than rand().
    // Attempt to make the sequences reproducible.
    srand(randomizer->IntRand());
    Pix* pixn = pixAddGaussianNoise(distorted, 8.0);
    pixDestroy(&distorted);
    if (smooth_noise) {
      distorted = pixBlockconv(pixn, 1, 1);
      pixDestroy(&pixn);
    } else {
      distorted = pixn;
    }
  }
  if (blur && randomizer->SignedRand(1.0) > 0.0) {
    Pix* blurred = pixBlockconv(distorted, 1, 1);
    pixDestroy(&distorted);
    distorted = blurred;
  }
  if (perspective)
    GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
  if (boxes != NULL) {
    for (int b = 0; b < boxes->size(); ++b) {
      (*boxes)[b].scale(1.0f / box_reduction);
      if ((*boxes)[b].width() <= 0)
        (*boxes)[b].set_right((*boxes)[b].left() + 1);
    }
  }
  return distorted;
}

// Distorts anything that has a non-null pointer with the same pseudo-random
// perspective distortion. Width and height only need to be set if there
// is no pix. If there is a pix, then they will be taken from there.
void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer,
                                   Pix** pix, GenericVector<TBOX>* boxes) {
  if (pix != NULL && *pix != NULL) {
    width = pixGetWidth(*pix);
    height = pixGetHeight(*pix);
  }
  float* im_coeffs = NULL;
  float* box_coeffs = NULL;
  l_int32 incolor =
      ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
  if (pix != NULL && *pix != NULL) {
    // Transform the image.
    Pix* transformed = pixProjective(*pix, im_coeffs, incolor);
    if (transformed == NULL) {
      tprintf("Projective transformation failed!!\n");
      return;
    }
    pixDestroy(pix);
    *pix = transformed;
  }
  if (boxes != NULL) {
    // Transform the boxes.
    for (int b = 0; b < boxes->size(); ++b) {
      int x1, y1, x2, y2;
      const TBOX& box = (*boxes)[b];
      projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1,
                               &y1);
      projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(),
                               &x2, &y2);
      TBOX new_box1(x1, height - y2, x2, height - y1);
      projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(),
                               &x1, &y1);
      projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2,
                               &y2);
      TBOX new_box2(x1, height - y1, x2, height - y2);
      (*boxes)[b] = new_box1.bounding_union(new_box2);
    }
  }
  free(im_coeffs);
  free(box_coeffs);
}

// Computes the coefficients of a randomized projective transformation.
// The image transform requires backward transformation coefficient, and the
// box transform the forward coefficients.
// Returns the incolor arg to pixProjective.
int ProjectiveCoeffs(int width, int height, TRand* randomizer,
                     float** im_coeffs, float** box_coeffs) {
  // Setup "from" points.
  Pta* src_pts = ptaCreate(4);
  ptaAddPt(src_pts, 0.0f, 0.0f);
  ptaAddPt(src_pts, width, 0.0f);
  ptaAddPt(src_pts, width, height);
  ptaAddPt(src_pts, 0.0f, height);
  // Extract factors from pseudo-random sequence.
  float factors[FN_NUM_FACTORS];
  float shear = 0.0f;  // Shear is signed.
  for (int i = 0; i < FN_NUM_FACTORS; ++i) {
    // Everything is squared to make wild values rarer.
    if (i == FN_SHEAR) {
      // Shear is signed.
      shear = randomizer->SignedRand(0.5 / 3.0);
      shear = shear >= 0.0 ? shear * shear : -shear * shear;
      // Keep the sheared points within the original rectangle.
      if (shear < -factors[FN_X0]) shear = -factors[FN_X0];
      if (shear > factors[FN_X1]) shear = factors[FN_X1];
      factors[i] = shear;
    } else if (i != FN_INCOLOR) {
      factors[i] = fabs(randomizer->SignedRand(1.0));
      if (i <= FN_Y3)
        factors[i] *= 5.0 / 8.0;
      else
        factors[i] *= 0.5;
      factors[i] *= factors[i];
    }
  }
  // Setup "to" points.
  Pta* dest_pts = ptaCreate(4);
  ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);
  ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);
  ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width,
           (1 - factors[FN_Y2]) * height);
  ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width,
           (1 - factors[FN_Y3]) * height);
  getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);
  getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);
  ptaDestroy(&src_pts);
  ptaDestroy(&dest_pts);
  return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;
}

}  // namespace tesseract
New training tool text2image git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@965 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-01-10 02:04:20 +08:00			`/**********************************************************************`
			`* File: degradeimage.cpp`
			`* Description: Function to degrade an image (usually of text) as if it`
			`* has been printed and then scanned.`
			`* Authors: Ray Smith`
			`* Created: Tue Nov 19 2013`
			`*`
			`* (C) Copyright 2013, Google Inc.`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*`
			`**********************************************************************/`

			`#include "degradeimage.h"`

			`#include <stdlib.h>`
			`#include "allheaders.h" // from leptonica`
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`#include "genericvector.h"`
Fixed issue 1134 git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1082 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-04-25 09:07:26 +08:00			`#include "helpers.h" // For TRand.`
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`#include "rect.h"`
New training tool text2image git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@965 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-01-10 02:04:20 +08:00
			`namespace tesseract {`

Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`// A randomized perspective distortion can be applied to synthetic input.`
			`// The perspective distortion comes from leptonica, which uses 2 sets of 4`
			`// corners to determine the distortion. There are random values for each of`
			`// the x numbers x0..x3 and y0..y3, except for x2 and x3 which are instead`
			`// defined in terms of a single shear value. This reduces the degrees of`
			`// freedom enough to make the distortion more realistic than it would otherwise`
			`// be if all 8 coordinates could move independently.`
			`// One additional factor is used for the color of the pixels that don't exist`
			`// in the source image.`
			`// Name for each of the randomizing factors.`
			`enum FactorNames {`
			`FN_INCOLOR,`
			`FN_Y0,`
			`FN_Y1,`
			`FN_Y2,`
			`FN_Y3,`
			`FN_X0,`
			`FN_X1,`
			`FN_SHEAR,`
			`// x2 = x1 - shear`
			`// x3 = x0 + shear`
			`FN_NUM_FACTORS`
			`};`

New training tool text2image git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@965 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-01-10 02:04:20 +08:00			`// Rotation is +/- kRotationRange radians.`
			`const float kRotationRange = 0.02f;`
			`// Number of grey levels to shift by for each exposure step.`
			`const int kExposureFactor = 16;`
			`// Salt and pepper noise is +/- kSaltnPepper.`
			`const int kSaltnPepper = 5;`
			`// Min sum of width + height on which to operate the ramp.`
			`const int kMinRampSize = 1000;`

			`// Degrade the pix as if by a print/copy/scan cycle with exposure > 0`
			`// corresponding to darkening on the copier and <0 lighter and 0 not copied.`
			`// Exposures in [-2,2] are most useful, with -3 and 3 being extreme.`
			`// If rotation is NULL, rotation is skipped. If *rotation is non-zero, the pix`
			`// is rotated by rotation else it is randomly rotated and rotation is`
			`// modified.`
Fixed some formatting issues git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1083 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-04-25 09:25:42 +08:00			`//`
New training tool text2image git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@965 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-01-10 02:04:20 +08:00			`// HOW IT WORKS:`
			`// Most of the process is really dictated by the fact that the minimum`
			`// available convolution is 3X3, which is too big really to simulate a`
			`// good quality print/scan process. (2X2 would be better.)`
			`// 1 pixel wide inputs are heavily smeared by the 3X3 convolution, making the`
			`// images generally biased to being too light, so most of the work is to make`
			`// them darker. 3 levels of thickening/darkening are achieved with 2 dilations,`
			`// (using a greyscale erosion) one heavy (by being before convolution) and one`
			`// light (after convolution).`
			`// With no dilation, after covolution, the images are so light that a heavy`
			`// constant offset is required to make the 0 image look reasonable. A simple`
			`// constant offset multiple of exposure to undo this value is enough to achieve`
			`// all the required lightening. This gives the advantage that exposure level 1`
			`// with a single dilation gives a good impression of the broken-yet-too-dark`
			`// problem that is often seen in scans.`
			`// A small random rotation gives some varying greyscale values on the edges,`
			`// and some random salt and pepper noise on top helps to realistically jaggy-up`
			`// the edges.`
			`// Finally a greyscale ramp provides a continuum of effects between exposure`
			`// levels.`
Fixed issue 1134 git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1082 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-04-25 09:07:26 +08:00			`Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer,`
			`float* rotation) {`
New training tool text2image git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@965 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-01-10 02:04:20 +08:00			`Pix* pix = pixConvertTo8(input, false);`
			`pixDestroy(&input);`
			`input = pix;`
			`int width = pixGetWidth(input);`
			`int height = pixGetHeight(input);`
			`if (exposure >= 2) {`
			`// An erosion simulates the spreading darkening of a dark copy.`
			`// This is backwards to binary morphology,`
			`// see http://www.leptonica.com/grayscale-morphology.html`
			`pix = input;`
			`input = pixErodeGray(pix, 3, 3);`
			`pixDestroy(&pix);`
			`}`
			`// A convolution is essential to any mode as no scanner produces an`
			`// image as sharp as the electronic image.`
			`pix = pixBlockconv(input, 1, 1);`
			`pixDestroy(&input);`
			`// A small random rotation helps to make the edges jaggy in a realistic way.`
			`if (rotation != NULL) {`
Fixed issue 1134 git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1082 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-04-25 09:07:26 +08:00			`float radians_clockwise = 0.0f;`
New training tool text2image git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@965 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-01-10 02:04:20 +08:00			`if (*rotation) {`
			`radians_clockwise = *rotation;`
Fixed issue 1134 git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1082 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-04-25 09:07:26 +08:00			`} else if (randomizer != NULL) {`
			`radians_clockwise = randomizer->SignedRand(kRotationRange);`
New training tool text2image git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@965 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-01-10 02:04:20 +08:00			`}`

			`input = pixRotate(pix, radians_clockwise,`
			`L_ROTATE_AREA_MAP, L_BRING_IN_WHITE,`
			`0, 0);`
			`// Rotate the boxes to match.`
			`*rotation = radians_clockwise;`
			`pixDestroy(&pix);`
			`} else {`
			`input = pix;`
			`}`

			`if (exposure >= 3 \|\| exposure == 1) {`
			`// Erosion after the convolution is not as heavy as before, so it is`
			`// good for level 1 and in addition as a level 3.`
			`// This is backwards to binary morphology,`
			`// see http://www.leptonica.com/grayscale-morphology.html`
			`pix = input;`
			`input = pixErodeGray(pix, 3, 3);`
			`pixDestroy(&pix);`
			`}`
			`// The convolution really needed to be 2x2 to be realistic enough, but`
			`// we only have 3x3, so we have to bias the image darker or lose thin`
			`// strokes.`
			`int erosion_offset = 0;`
			`// For light and 0 exposure, there is no dilation, so compensate for the`
			`// convolution with a big darkening bias which is undone for lighter`
			`// exposures.`
			`if (exposure <= 0)`
			`erosion_offset = -3 * kExposureFactor;`
			`// Add in a general offset of the greyscales for the exposure level so`
			`// a threshold of 128 gives a reasonable binary result.`
			`erosion_offset -= exposure * kExposureFactor;`
			`// Add a gradual fade over the page and a small amount of salt and pepper`
			`// noise to simulate noise in the sensor/paper fibres and varying`
			`// illumination.`
			`l_uint32* data = pixGetData(input);`
			`for (int y = 0; y < height; ++y) {`
			`for (int x = 0; x < width; ++x) {`
			`int pixel = GET_DATA_BYTE(data, x);`
Fixed issue 1134 git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1082 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-04-25 09:07:26 +08:00			`if (randomizer != NULL)`
			`pixel += randomizer->IntRand() % (kSaltnPepper*2 + 1) - kSaltnPepper;`
New training tool text2image git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@965 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-01-10 02:04:20 +08:00			`if (height + width > kMinRampSize)`
			`pixel -= (2x + y) 32 / (height + width);`
			`pixel += erosion_offset;`
			`if (pixel < 0)`
			`pixel = 0;`
			`if (pixel > 255)`
			`pixel = 255;`
			`SET_DATA_BYTE(data, x, pixel);`
			`}`
			`data += input->wpl;`
			`}`
			`return input;`
			`}`

Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`// Creates and returns a Pix distorted by various means according to the bool`
			`// flags. If boxes is not NULL, the boxes are resized/positioned according to`
			`// any spatial distortion and also by the integer reduction factor box_scale`
			`// so they will match what the network will output.`
			`// Returns NULL on error. The returned Pix must be pixDestroyed.`
			`Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,`
			`bool white_noise, bool smooth_noise, bool blur,`
			`int box_reduction, TRand* randomizer,`
			`GenericVector<TBOX>* boxes) {`
			`Pix* distorted = pixCopy(NULL, const_cast<Pix*>(pix));`
			`// Things to do to synthetic training data.`
			`if (invert && randomizer->SignedRand(1.0) < 0)`
			`pixInvert(distorted, distorted);`
			`if ((white_noise \|\| smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {`
			`// TODO(rays) Cook noise in a more thread-safe manner than rand().`
			`// Attempt to make the sequences reproducible.`
			`srand(randomizer->IntRand());`
			`Pix* pixn = pixAddGaussianNoise(distorted, 8.0);`
			`pixDestroy(&distorted);`
			`if (smooth_noise) {`
			`distorted = pixBlockconv(pixn, 1, 1);`
			`pixDestroy(&pixn);`
			`} else {`
			`distorted = pixn;`
			`}`
			`}`
			`if (blur && randomizer->SignedRand(1.0) > 0.0) {`
			`Pix* blurred = pixBlockconv(distorted, 1, 1);`
			`pixDestroy(&distorted);`
			`distorted = blurred;`
			`}`
			`if (perspective)`
			`GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);`
			`if (boxes != NULL) {`
			`for (int b = 0; b < boxes->size(); ++b) {`
			`(*boxes)[b].scale(1.0f / box_reduction);`
			`if ((*boxes)[b].width() <= 0)`
			`(boxes)[b].set_right((boxes)[b].left() + 1);`
			`}`
			`}`
			`return distorted;`
			`}`

			`// Distorts anything that has a non-null pointer with the same pseudo-random`
			`// perspective distortion. Width and height only need to be set if there`
			`// is no pix. If there is a pix, then they will be taken from there.`
			`void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer,`
			`Pix** pix, GenericVector<TBOX>* boxes) {`
			`if (pix != NULL && *pix != NULL) {`
			`width = pixGetWidth(*pix);`
			`height = pixGetHeight(*pix);`
			`}`
			`float* im_coeffs = NULL;`
			`float* box_coeffs = NULL;`
			`l_int32 incolor =`
			`ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);`
			`if (pix != NULL && *pix != NULL) {`
			`// Transform the image.`
			`Pix* transformed = pixProjective(*pix, im_coeffs, incolor);`
			`if (transformed == NULL) {`
			`tprintf("Projective transformation failed!!\n");`
			`return;`
			`}`
			`pixDestroy(pix);`
			`*pix = transformed;`
			`}`
			`if (boxes != NULL) {`
			`// Transform the boxes.`
			`for (int b = 0; b < boxes->size(); ++b) {`
			`int x1, y1, x2, y2;`
			`const TBOX& box = (*boxes)[b];`
			`projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1,`
			`&y1);`
			`projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(),`
			`&x2, &y2);`
			`TBOX new_box1(x1, height - y2, x2, height - y1);`
			`projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(),`
			`&x1, &y1);`
			`projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2,`
			`&y2);`
			`TBOX new_box2(x1, height - y1, x2, height - y2);`
			`(*boxes)[b] = new_box1.bounding_union(new_box2);`
			`}`
			`}`
			`free(im_coeffs);`
			`free(box_coeffs);`
			`}`

			`// Computes the coefficients of a randomized projective transformation.`
			`// The image transform requires backward transformation coefficient, and the`
			`// box transform the forward coefficients.`
			`// Returns the incolor arg to pixProjective.`
			`int ProjectiveCoeffs(int width, int height, TRand* randomizer,`
			`float im_coeffs, float box_coeffs) {`
			`// Setup "from" points.`
			`Pta* src_pts = ptaCreate(4);`
			`ptaAddPt(src_pts, 0.0f, 0.0f);`
			`ptaAddPt(src_pts, width, 0.0f);`
			`ptaAddPt(src_pts, width, height);`
			`ptaAddPt(src_pts, 0.0f, height);`
			`// Extract factors from pseudo-random sequence.`
			`float factors[FN_NUM_FACTORS];`
			`float shear = 0.0f; // Shear is signed.`
			`for (int i = 0; i < FN_NUM_FACTORS; ++i) {`
			`// Everything is squared to make wild values rarer.`
			`if (i == FN_SHEAR) {`
			`// Shear is signed.`
			`shear = randomizer->SignedRand(0.5 / 3.0);`
			`shear = shear >= 0.0 ? shear * shear : -shear * shear;`
			`// Keep the sheared points within the original rectangle.`
			`if (shear < -factors[FN_X0]) shear = -factors[FN_X0];`
			`if (shear > factors[FN_X1]) shear = factors[FN_X1];`
			`factors[i] = shear;`
			`} else if (i != FN_INCOLOR) {`
			`factors[i] = fabs(randomizer->SignedRand(1.0));`
			`if (i <= FN_Y3)`
			`factors[i] *= 5.0 / 8.0;`
			`else`
			`factors[i] *= 0.5;`
			`factors[i] *= factors[i];`
			`}`
			`}`
			`// Setup "to" points.`
			`Pta* dest_pts = ptaCreate(4);`
			`ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);`
			`ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);`
			`ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width,`
			`(1 - factors[FN_Y2]) * height);`
			`ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width,`
			`(1 - factors[FN_Y3]) * height);`
			`getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);`
			`getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);`
			`ptaDestroy(&src_pts);`
			`ptaDestroy(&dest_pts);`
			`return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;`
			`}`

New training tool text2image git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@965 d0cd1f9f-072b-0410-8dd7-cf729c803f20 2014-01-10 02:04:20 +08:00			`} // namespace tesseract`