mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 12:49:35 +08:00
Merge branch '4.1' of https://github.com/tesseract-ocr/tesseract into 4.1
This commit is contained in:
commit
82a7a87b3b
@ -45,10 +45,6 @@ const double kMaxRectangularFraction = 0.75;
|
||||
const double kMaxRectangularGradient = 0.1; // About 6 degrees.
|
||||
// Minimum image size to be worth looking for images on.
|
||||
const int kMinImageFindSize = 100;
|
||||
// Scale factor for the rms color fit error.
|
||||
const double kRMSFitScaling = 8.0;
|
||||
// Min color difference to call it two colors.
|
||||
const int kMinColorDifference = 16;
|
||||
// Pixel padding for noise blobs and partitions when rendering on the image
|
||||
// mask to encourage them to join together. Make it too big and images
|
||||
// will fatten out too much and have to be clipped to text.
|
||||
@ -384,153 +380,6 @@ double ImageFind::ColorDistanceFromLine(const uint8_t* line1,
|
||||
return cross_sq / line_sq; // This is the squared distance.
|
||||
}
|
||||
|
||||
|
||||
// Returns the leptonica combined code for the given RGB triplet.
|
||||
uint32_t ImageFind::ComposeRGB(uint32_t r, uint32_t g, uint32_t b) {
|
||||
l_uint32 result;
|
||||
composeRGBPixel(r, g, b, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Returns the input value clipped to a uint8_t.
|
||||
uint8_t ImageFind::ClipToByte(double pixel) {
|
||||
if (pixel < 0.0)
|
||||
return 0;
|
||||
else if (pixel >= 255.0)
|
||||
return 255;
|
||||
return static_cast<uint8_t>(pixel);
|
||||
}
|
||||
|
||||
// Computes the light and dark extremes of color in the given rectangle of
|
||||
// the given pix, which is factor smaller than the coordinate system in rect.
|
||||
// The light and dark points are taken to be the upper and lower 8th-ile of
|
||||
// the most deviant of R, G and B. The value of the other 2 channels are
|
||||
// computed by linear fit against the most deviant.
|
||||
// The colors of the two points are returned in color1 and color2, with the
|
||||
// alpha channel set to a scaled mean rms of the fits.
|
||||
// If color_map1 is not null then it and color_map2 get rect pasted in them
|
||||
// with the two calculated colors, and rms map gets a pasted rect of the rms.
|
||||
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
|
||||
void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
|
||||
Pix* color_map1, Pix* color_map2,
|
||||
Pix* rms_map,
|
||||
uint8_t* color1, uint8_t* color2) {
|
||||
ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32);
|
||||
// Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
|
||||
// background.
|
||||
int width = pixGetWidth(pix);
|
||||
int height = pixGetHeight(pix);
|
||||
int left_pad = std::max(rect.left() - 2 * factor, 0) / factor;
|
||||
int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor;
|
||||
top_pad = std::min(height, top_pad);
|
||||
int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor;
|
||||
right_pad = std::min(width, right_pad);
|
||||
int bottom_pad = std::max(rect.bottom() - 2 * factor, 0) / factor;
|
||||
int width_pad = right_pad - left_pad;
|
||||
int height_pad = top_pad - bottom_pad;
|
||||
if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4)
|
||||
return;
|
||||
// Now crop the pix to the rectangle.
|
||||
Box* scaled_box = boxCreate(left_pad, height - top_pad,
|
||||
width_pad, height_pad);
|
||||
Pix* scaled = pixClipRectangle(pix, scaled_box, nullptr);
|
||||
|
||||
// Compute stats over the whole image.
|
||||
STATS red_stats(0, 256);
|
||||
STATS green_stats(0, 256);
|
||||
STATS blue_stats(0, 256);
|
||||
uint32_t* data = pixGetData(scaled);
|
||||
ASSERT_HOST(pixGetWpl(scaled) == width_pad);
|
||||
for (int y = 0; y < height_pad; ++y) {
|
||||
for (int x = 0; x < width_pad; ++x, ++data) {
|
||||
int r = GET_DATA_BYTE(data, COLOR_RED);
|
||||
int g = GET_DATA_BYTE(data, COLOR_GREEN);
|
||||
int b = GET_DATA_BYTE(data, COLOR_BLUE);
|
||||
red_stats.add(r, 1);
|
||||
green_stats.add(g, 1);
|
||||
blue_stats.add(b, 1);
|
||||
}
|
||||
}
|
||||
// Find the RGB component with the greatest 8th-ile-range.
|
||||
// 8th-iles are used instead of quartiles to get closer to the true
|
||||
// foreground color, which is going to be faint at best because of the
|
||||
// pre-scaling of the input image.
|
||||
int best_l8 = static_cast<int>(red_stats.ile(0.125f));
|
||||
int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f)));
|
||||
int best_i8r = best_u8 - best_l8;
|
||||
int x_color = COLOR_RED;
|
||||
int y1_color = COLOR_GREEN;
|
||||
int y2_color = COLOR_BLUE;
|
||||
int l8 = static_cast<int>(green_stats.ile(0.125f));
|
||||
int u8 = static_cast<int>(ceil(green_stats.ile(0.875f)));
|
||||
if (u8 - l8 > best_i8r) {
|
||||
best_i8r = u8 - l8;
|
||||
best_l8 = l8;
|
||||
best_u8 = u8;
|
||||
x_color = COLOR_GREEN;
|
||||
y1_color = COLOR_RED;
|
||||
}
|
||||
l8 = static_cast<int>(blue_stats.ile(0.125f));
|
||||
u8 = static_cast<int>(ceil(blue_stats.ile(0.875f)));
|
||||
if (u8 - l8 > best_i8r) {
|
||||
best_i8r = u8 - l8;
|
||||
best_l8 = l8;
|
||||
best_u8 = u8;
|
||||
x_color = COLOR_BLUE;
|
||||
y1_color = COLOR_GREEN;
|
||||
y2_color = COLOR_RED;
|
||||
}
|
||||
if (best_i8r >= kMinColorDifference) {
|
||||
LLSQ line1;
|
||||
LLSQ line2;
|
||||
uint32_t* data = pixGetData(scaled);
|
||||
for (int im_y = 0; im_y < height_pad; ++im_y) {
|
||||
for (int im_x = 0; im_x < width_pad; ++im_x, ++data) {
|
||||
int x = GET_DATA_BYTE(data, x_color);
|
||||
int y1 = GET_DATA_BYTE(data, y1_color);
|
||||
int y2 = GET_DATA_BYTE(data, y2_color);
|
||||
line1.add(x, y1);
|
||||
line2.add(x, y2);
|
||||
}
|
||||
}
|
||||
double m1 = line1.m();
|
||||
double c1 = line1.c(m1);
|
||||
double m2 = line2.m();
|
||||
double c2 = line2.c(m2);
|
||||
double rms = line1.rms(m1, c1) + line2.rms(m2, c2);
|
||||
rms *= kRMSFitScaling;
|
||||
// Save the results.
|
||||
color1[x_color] = ClipToByte(best_l8);
|
||||
color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5);
|
||||
color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5);
|
||||
color1[L_ALPHA_CHANNEL] = ClipToByte(rms);
|
||||
color2[x_color] = ClipToByte(best_u8);
|
||||
color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5);
|
||||
color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5);
|
||||
color2[L_ALPHA_CHANNEL] = ClipToByte(rms);
|
||||
} else {
|
||||
// There is only one color.
|
||||
color1[COLOR_RED] = ClipToByte(red_stats.median());
|
||||
color1[COLOR_GREEN] = ClipToByte(green_stats.median());
|
||||
color1[COLOR_BLUE] = ClipToByte(blue_stats.median());
|
||||
color1[L_ALPHA_CHANNEL] = 0;
|
||||
memcpy(color2, color1, 4);
|
||||
}
|
||||
if (color_map1 != nullptr) {
|
||||
pixSetInRectArbitrary(color_map1, scaled_box,
|
||||
ComposeRGB(color1[COLOR_RED],
|
||||
color1[COLOR_GREEN],
|
||||
color1[COLOR_BLUE]));
|
||||
pixSetInRectArbitrary(color_map2, scaled_box,
|
||||
ComposeRGB(color2[COLOR_RED],
|
||||
color2[COLOR_GREEN],
|
||||
color2[COLOR_BLUE]));
|
||||
pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
|
||||
}
|
||||
pixDestroy(&scaled);
|
||||
boxDestroy(&scaled_box);
|
||||
}
|
||||
|
||||
// ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================
|
||||
// The following functions are responsible for cutting a polygonal image from
|
||||
// a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts
|
||||
|
@ -87,27 +87,6 @@ class ImageFind {
|
||||
static double ColorDistanceFromLine(const uint8_t* line1, const uint8_t* line2,
|
||||
const uint8_t* point);
|
||||
|
||||
// Returns the leptonica combined code for the given RGB triplet.
|
||||
static uint32_t ComposeRGB(uint32_t r, uint32_t g, uint32_t b);
|
||||
|
||||
// Returns the input value clipped to a uint8_t.
|
||||
static uint8_t ClipToByte(double pixel);
|
||||
|
||||
// Computes the light and dark extremes of color in the given rectangle of
|
||||
// the given pix, which is factor smaller than the coordinate system in rect.
|
||||
// The light and dark points are taken to be the upper and lower 8th-ile of
|
||||
// the most deviant of R, G and B. The value of the other 2 channels are
|
||||
// computed by linear fit against the most deviant.
|
||||
// The colors of the two point are returned in color1 and color2, with the
|
||||
// alpha channel set to a scaled mean rms of the fits.
|
||||
// If color_map1 is not null then it and color_map2 get rect pasted in them
|
||||
// with the two calculated colors, and rms map gets a pasted rect of the rms.
|
||||
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
|
||||
static void ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
|
||||
Pix* color_map1, Pix* color_map2,
|
||||
Pix* rms_map,
|
||||
uint8_t* color1, uint8_t* color2);
|
||||
|
||||
// Returns true if there are no black pixels in between the boxes.
|
||||
// The im_box must represent the bounding box of the pix in tesseract
|
||||
// coordinates, which may be negative, due to rotations to make the textlines
|
||||
|
Loading…
Reference in New Issue
Block a user