This commit is contained in:
zdenop 2022-02-06 12:56:54 +01:00
commit 82a7a87b3b
2 changed files with 0 additions and 172 deletions

View File

@ -45,10 +45,6 @@ const double kMaxRectangularFraction = 0.75;
const double kMaxRectangularGradient = 0.1; // About 6 degrees.
// Minimum image size to be worth looking for images on.
const int kMinImageFindSize = 100;
// Scale factor for the rms color fit error.
const double kRMSFitScaling = 8.0;
// Min color difference to call it two colors.
const int kMinColorDifference = 16;
// Pixel padding for noise blobs and partitions when rendering on the image
// mask to encourage them to join together. Make it too big and images
// will fatten out too much and have to be clipped to text.
@ -384,153 +380,6 @@ double ImageFind::ColorDistanceFromLine(const uint8_t* line1,
return cross_sq / line_sq; // This is the squared distance.
}
// Returns the leptonica combined code for the given RGB triplet.
uint32_t ImageFind::ComposeRGB(uint32_t r, uint32_t g, uint32_t b) {
l_uint32 result;
composeRGBPixel(r, g, b, &result);
return result;
}
// Returns the input value clipped to a uint8_t.
uint8_t ImageFind::ClipToByte(double pixel) {
if (pixel < 0.0)
return 0;
else if (pixel >= 255.0)
return 255;
return static_cast<uint8_t>(pixel);
}
// Computes the light and dark extremes of color in the given rectangle of
// the given pix, which is factor smaller than the coordinate system in rect.
// The light and dark points are taken to be the upper and lower 8th-ile of
// the most deviant of R, G and B. The value of the other 2 channels are
// computed by linear fit against the most deviant.
// The colors of the two points are returned in color1 and color2, with the
// alpha channel set to a scaled mean rms of the fits.
// If color_map1 is not null then it and color_map2 get rect pasted in them
// with the two calculated colors, and rms map gets a pasted rect of the rms.
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
Pix* color_map1, Pix* color_map2,
Pix* rms_map,
uint8_t* color1, uint8_t* color2) {
ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32);
// Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
// background.
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
int left_pad = std::max(rect.left() - 2 * factor, 0) / factor;
int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor;
top_pad = std::min(height, top_pad);
int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor;
right_pad = std::min(width, right_pad);
int bottom_pad = std::max(rect.bottom() - 2 * factor, 0) / factor;
int width_pad = right_pad - left_pad;
int height_pad = top_pad - bottom_pad;
if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4)
return;
// Now crop the pix to the rectangle.
Box* scaled_box = boxCreate(left_pad, height - top_pad,
width_pad, height_pad);
Pix* scaled = pixClipRectangle(pix, scaled_box, nullptr);
// Compute stats over the whole image.
STATS red_stats(0, 256);
STATS green_stats(0, 256);
STATS blue_stats(0, 256);
uint32_t* data = pixGetData(scaled);
ASSERT_HOST(pixGetWpl(scaled) == width_pad);
for (int y = 0; y < height_pad; ++y) {
for (int x = 0; x < width_pad; ++x, ++data) {
int r = GET_DATA_BYTE(data, COLOR_RED);
int g = GET_DATA_BYTE(data, COLOR_GREEN);
int b = GET_DATA_BYTE(data, COLOR_BLUE);
red_stats.add(r, 1);
green_stats.add(g, 1);
blue_stats.add(b, 1);
}
}
// Find the RGB component with the greatest 8th-ile-range.
// 8th-iles are used instead of quartiles to get closer to the true
// foreground color, which is going to be faint at best because of the
// pre-scaling of the input image.
int best_l8 = static_cast<int>(red_stats.ile(0.125f));
int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f)));
int best_i8r = best_u8 - best_l8;
int x_color = COLOR_RED;
int y1_color = COLOR_GREEN;
int y2_color = COLOR_BLUE;
int l8 = static_cast<int>(green_stats.ile(0.125f));
int u8 = static_cast<int>(ceil(green_stats.ile(0.875f)));
if (u8 - l8 > best_i8r) {
best_i8r = u8 - l8;
best_l8 = l8;
best_u8 = u8;
x_color = COLOR_GREEN;
y1_color = COLOR_RED;
}
l8 = static_cast<int>(blue_stats.ile(0.125f));
u8 = static_cast<int>(ceil(blue_stats.ile(0.875f)));
if (u8 - l8 > best_i8r) {
best_i8r = u8 - l8;
best_l8 = l8;
best_u8 = u8;
x_color = COLOR_BLUE;
y1_color = COLOR_GREEN;
y2_color = COLOR_RED;
}
if (best_i8r >= kMinColorDifference) {
LLSQ line1;
LLSQ line2;
uint32_t* data = pixGetData(scaled);
for (int im_y = 0; im_y < height_pad; ++im_y) {
for (int im_x = 0; im_x < width_pad; ++im_x, ++data) {
int x = GET_DATA_BYTE(data, x_color);
int y1 = GET_DATA_BYTE(data, y1_color);
int y2 = GET_DATA_BYTE(data, y2_color);
line1.add(x, y1);
line2.add(x, y2);
}
}
double m1 = line1.m();
double c1 = line1.c(m1);
double m2 = line2.m();
double c2 = line2.c(m2);
double rms = line1.rms(m1, c1) + line2.rms(m2, c2);
rms *= kRMSFitScaling;
// Save the results.
color1[x_color] = ClipToByte(best_l8);
color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5);
color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5);
color1[L_ALPHA_CHANNEL] = ClipToByte(rms);
color2[x_color] = ClipToByte(best_u8);
color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5);
color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5);
color2[L_ALPHA_CHANNEL] = ClipToByte(rms);
} else {
// There is only one color.
color1[COLOR_RED] = ClipToByte(red_stats.median());
color1[COLOR_GREEN] = ClipToByte(green_stats.median());
color1[COLOR_BLUE] = ClipToByte(blue_stats.median());
color1[L_ALPHA_CHANNEL] = 0;
memcpy(color2, color1, 4);
}
if (color_map1 != nullptr) {
pixSetInRectArbitrary(color_map1, scaled_box,
ComposeRGB(color1[COLOR_RED],
color1[COLOR_GREEN],
color1[COLOR_BLUE]));
pixSetInRectArbitrary(color_map2, scaled_box,
ComposeRGB(color2[COLOR_RED],
color2[COLOR_GREEN],
color2[COLOR_BLUE]));
pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
}
pixDestroy(&scaled);
boxDestroy(&scaled_box);
}
// ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================
// The following functions are responsible for cutting a polygonal image from
// a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts

View File

@ -87,27 +87,6 @@ class ImageFind {
static double ColorDistanceFromLine(const uint8_t* line1, const uint8_t* line2,
const uint8_t* point);
// Returns the leptonica combined code for the given RGB triplet.
static uint32_t ComposeRGB(uint32_t r, uint32_t g, uint32_t b);
// Returns the input value clipped to a uint8_t.
static uint8_t ClipToByte(double pixel);
// Computes the light and dark extremes of color in the given rectangle of
// the given pix, which is factor smaller than the coordinate system in rect.
// The light and dark points are taken to be the upper and lower 8th-ile of
// the most deviant of R, G and B. The value of the other 2 channels are
// computed by linear fit against the most deviant.
// The colors of the two point are returned in color1 and color2, with the
// alpha channel set to a scaled mean rms of the fits.
// If color_map1 is not null then it and color_map2 get rect pasted in them
// with the two calculated colors, and rms map gets a pasted rect of the rms.
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
static void ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
Pix* color_map1, Pix* color_map2,
Pix* rms_map,
uint8_t* color1, uint8_t* color2);
// Returns true if there are no black pixels in between the boxes.
// The im_box must represent the bounding box of the pix in tesseract
// coordinates, which may be negative, due to rotations to make the textlines