mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-23 18:49:08 +08:00
Basic usage of new Image class. Only pixDestroy is wrapped at the moment.
Add new methods to Image class and replace them in non-public code.
This commit is contained in:
parent
ce6e2f1821
commit
a792b67983
@ -631,7 +631,7 @@ Pix *TessBaseAPI::GetThresholdedImage() {
|
|||||||
if (tesseract_ == nullptr || thresholder_ == nullptr) {
|
if (tesseract_ == nullptr || thresholder_ == nullptr) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) {
|
if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
return pixClone(tesseract_->pix_binary());
|
return pixClone(tesseract_->pix_binary());
|
||||||
@ -2098,9 +2098,11 @@ bool TessBaseAPI::Threshold(Pix **pix) {
|
|||||||
thresholder_->SetSourceYResolution(kMinCredibleResolution);
|
thresholder_->SetSourceYResolution(kMinCredibleResolution);
|
||||||
}
|
}
|
||||||
auto pageseg_mode = static_cast<PageSegMode>(static_cast<int>(tesseract_->tessedit_pageseg_mode));
|
auto pageseg_mode = static_cast<PageSegMode>(static_cast<int>(tesseract_->tessedit_pageseg_mode));
|
||||||
if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) {
|
Image im(*pix);
|
||||||
|
if (!thresholder_->ThresholdToPix(pageseg_mode, &im)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
*pix = im;
|
||||||
thresholder_->GetImageSizes(&rect_left_, &rect_top_, &rect_width_, &rect_height_, &image_width_,
|
thresholder_->GetImageSizes(&rect_left_, &rect_top_, &rect_width_, &rect_height_, &image_width_,
|
||||||
&image_height_);
|
&image_height_);
|
||||||
if (!thresholder_->IsBinary()) {
|
if (!thresholder_->IsBinary()) {
|
||||||
@ -2144,7 +2146,7 @@ int TessBaseAPI::FindLines() {
|
|||||||
tesseract_->InitAdaptiveClassifier(nullptr);
|
tesseract_->InitAdaptiveClassifier(nullptr);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) {
|
if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2270,7 +2272,7 @@ bool TessBaseAPI::DetectOS(OSResults *osr) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
ClearResults();
|
ClearResults();
|
||||||
if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) {
|
if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -583,13 +583,13 @@ void EquationDetect::IdentifySeedParts() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
float EquationDetect::ComputeForegroundDensity(const TBOX &tbox) {
|
float EquationDetect::ComputeForegroundDensity(const TBOX &tbox) {
|
||||||
Pix *pix_bi = lang_tesseract_->pix_binary();
|
Image pix_bi = lang_tesseract_->pix_binary();
|
||||||
const int pix_height = pixGetHeight(pix_bi);
|
const int pix_height = pixGetHeight(pix_bi);
|
||||||
Box *box = boxCreate(tbox.left(), pix_height - tbox.top(), tbox.width(), tbox.height());
|
Box *box = boxCreate(tbox.left(), pix_height - tbox.top(), tbox.width(), tbox.height());
|
||||||
Pix *pix_sub = pixClipRectangle(pix_bi, box, nullptr);
|
Image pix_sub = pixClipRectangle(pix_bi, box, nullptr);
|
||||||
l_float32 fract;
|
l_float32 fract;
|
||||||
pixForegroundFraction(pix_sub, &fract);
|
pixForegroundFraction(pix_sub, &fract);
|
||||||
pixDestroy(&pix_sub);
|
pix_sub.destroy();
|
||||||
boxDestroy(&box);
|
boxDestroy(&box);
|
||||||
|
|
||||||
return fract;
|
return fract;
|
||||||
@ -1395,7 +1395,7 @@ void EquationDetect::GetOutputTiffName(const char *name, std::string &image_name
|
|||||||
}
|
}
|
||||||
|
|
||||||
void EquationDetect::PaintSpecialTexts(const std::string &outfile) const {
|
void EquationDetect::PaintSpecialTexts(const std::string &outfile) const {
|
||||||
Pix *pix = nullptr, *pixBi = lang_tesseract_->pix_binary();
|
Image pix = nullptr, pixBi = lang_tesseract_->pix_binary();
|
||||||
pix = pixConvertTo32(pixBi);
|
pix = pixConvertTo32(pixBi);
|
||||||
ColPartitionGridSearch gsearch(part_grid_);
|
ColPartitionGridSearch gsearch(part_grid_);
|
||||||
ColPartition *part = nullptr;
|
ColPartition *part = nullptr;
|
||||||
@ -1408,11 +1408,11 @@ void EquationDetect::PaintSpecialTexts(const std::string &outfile) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pixWrite(outfile.c_str(), pix, IFF_TIFF_LZW);
|
pixWrite(outfile.c_str(), pix, IFF_TIFF_LZW);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
void EquationDetect::PaintColParts(const std::string &outfile) const {
|
void EquationDetect::PaintColParts(const std::string &outfile) const {
|
||||||
Pix *pix = pixConvertTo32(lang_tesseract_->BestPix());
|
Image pix = pixConvertTo32(lang_tesseract_->BestPix());
|
||||||
ColPartitionGridSearch gsearch(part_grid_);
|
ColPartitionGridSearch gsearch(part_grid_);
|
||||||
gsearch.StartFullSearch();
|
gsearch.StartFullSearch();
|
||||||
ColPartition *part = nullptr;
|
ColPartition *part = nullptr;
|
||||||
@ -1430,7 +1430,7 @@ void EquationDetect::PaintColParts(const std::string &outfile) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pixWrite(outfile.c_str(), pix, IFF_TIFF_LZW);
|
pixWrite(outfile.c_str(), pix, IFF_TIFF_LZW);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
void EquationDetect::PrintSpecialBlobsDensity(const ColPartition *part) const {
|
void EquationDetect::PrintSpecialBlobsDensity(const ColPartition *part) const {
|
||||||
|
@ -185,7 +185,7 @@ ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padd
|
|||||||
}
|
}
|
||||||
// Now revised_box always refers to the image.
|
// Now revised_box always refers to the image.
|
||||||
// BestPix is never colormapped, but may be of any depth.
|
// BestPix is never colormapped, but may be of any depth.
|
||||||
Pix *pix = BestPix();
|
Image pix = BestPix();
|
||||||
int width = pixGetWidth(pix);
|
int width = pixGetWidth(pix);
|
||||||
int height = pixGetHeight(pix);
|
int height = pixGetHeight(pix);
|
||||||
TBOX image_box(0, 0, width, height);
|
TBOX image_box(0, 0, width, height);
|
||||||
@ -196,22 +196,22 @@ ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padd
|
|||||||
}
|
}
|
||||||
Box *clip_box = boxCreate(revised_box->left(), height - revised_box->top(), revised_box->width(),
|
Box *clip_box = boxCreate(revised_box->left(), height - revised_box->top(), revised_box->width(),
|
||||||
revised_box->height());
|
revised_box->height());
|
||||||
Pix *box_pix = pixClipRectangle(pix, clip_box, nullptr);
|
Image box_pix = pixClipRectangle(pix, clip_box, nullptr);
|
||||||
boxDestroy(&clip_box);
|
boxDestroy(&clip_box);
|
||||||
if (box_pix == nullptr) {
|
if (box_pix == nullptr) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
if (num_rotations > 0) {
|
if (num_rotations > 0) {
|
||||||
Pix *rot_pix = pixRotateOrth(box_pix, num_rotations);
|
Image rot_pix = pixRotateOrth(box_pix, num_rotations);
|
||||||
pixDestroy(&box_pix);
|
box_pix.destroy();
|
||||||
box_pix = rot_pix;
|
box_pix = rot_pix;
|
||||||
}
|
}
|
||||||
// Convert sub-8-bit images to 8 bit.
|
// Convert sub-8-bit images to 8 bit.
|
||||||
int depth = pixGetDepth(box_pix);
|
int depth = pixGetDepth(box_pix);
|
||||||
if (depth < 8) {
|
if (depth < 8) {
|
||||||
Pix *grey;
|
Image grey;
|
||||||
grey = pixConvertTo8(box_pix, false);
|
grey = pixConvertTo8(box_pix, false);
|
||||||
pixDestroy(&box_pix);
|
box_pix.destroy();
|
||||||
box_pix = grey;
|
box_pix = grey;
|
||||||
}
|
}
|
||||||
bool vertical_text = false;
|
bool vertical_text = false;
|
||||||
|
@ -158,7 +158,7 @@ void OSResults::accumulate(const OSResults &osr) {
|
|||||||
// image, so that non-text blobs are removed from consideration.
|
// image, so that non-text blobs are removed from consideration.
|
||||||
static void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,
|
static void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,
|
||||||
TO_BLOCK_LIST *to_blocks) {
|
TO_BLOCK_LIST *to_blocks) {
|
||||||
Pix *pix = tess->pix_binary();
|
Image pix = tess->pix_binary();
|
||||||
ASSERT_HOST(pix != nullptr);
|
ASSERT_HOST(pix != nullptr);
|
||||||
int vertical_x = 0;
|
int vertical_x = 0;
|
||||||
int vertical_y = 1;
|
int vertical_y = 1;
|
||||||
@ -174,10 +174,10 @@ static void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *block
|
|||||||
|
|
||||||
tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix, &vertical_x, &vertical_y,
|
tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix, &vertical_x, &vertical_y,
|
||||||
nullptr, &v_lines, &h_lines);
|
nullptr, &v_lines, &h_lines);
|
||||||
Pix *im_pix = tesseract::ImageFind::FindImages(pix, nullptr);
|
Image im_pix = tesseract::ImageFind::FindImages(pix, nullptr);
|
||||||
if (im_pix != nullptr) {
|
if (im_pix != nullptr) {
|
||||||
pixSubtract(pix, pix, im_pix);
|
pixSubtract(pix, pix, im_pix);
|
||||||
pixDestroy(&im_pix);
|
im_pix.destroy();
|
||||||
}
|
}
|
||||||
tess->mutable_textord()->find_components(tess->pix_binary(), blocks, to_blocks);
|
tess->mutable_textord()->find_components(tess->pix_binary(), blocks, to_blocks);
|
||||||
}
|
}
|
||||||
|
@ -444,19 +444,19 @@ Pix *PageIterator::GetBinaryImage(PageIteratorLevel level) const {
|
|||||||
return cblob_it_->data()->render();
|
return cblob_it_->data()->render();
|
||||||
}
|
}
|
||||||
Box *box = boxCreate(left, top, right - left, bottom - top);
|
Box *box = boxCreate(left, top, right - left, bottom - top);
|
||||||
Pix *pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr);
|
Image pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr);
|
||||||
boxDestroy(&box);
|
boxDestroy(&box);
|
||||||
if (level == RIL_BLOCK || level == RIL_PARA) {
|
if (level == RIL_BLOCK || level == RIL_PARA) {
|
||||||
// Clip to the block polygon as well.
|
// Clip to the block polygon as well.
|
||||||
TBOX mask_box;
|
TBOX mask_box;
|
||||||
Pix *mask = it_->block()->block->render_mask(&mask_box);
|
Image mask = it_->block()->block->render_mask(&mask_box);
|
||||||
int mask_x = left - mask_box.left();
|
int mask_x = left - mask_box.left();
|
||||||
int mask_y = top - (tesseract_->ImageHeight() - mask_box.top());
|
int mask_y = top - (tesseract_->ImageHeight() - mask_box.top());
|
||||||
// AND the mask and pix, putting the result in pix.
|
// AND the mask and pix, putting the result in pix.
|
||||||
pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), pixGetWidth(pix),
|
pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), pixGetWidth(pix),
|
||||||
pixGetHeight(pix), PIX_SRC & PIX_DST, mask, std::max(0, mask_x),
|
pixGetHeight(pix), PIX_SRC & PIX_DST, mask, std::max(0, mask_x),
|
||||||
std::max(0, mask_y));
|
std::max(0, mask_y));
|
||||||
pixDestroy(&mask);
|
mask.destroy();
|
||||||
}
|
}
|
||||||
return pix;
|
return pix;
|
||||||
}
|
}
|
||||||
@ -488,25 +488,25 @@ Pix *PageIterator::GetImage(PageIteratorLevel level, int padding, Pix *original_
|
|||||||
right = std::min(right + padding, rect_width_);
|
right = std::min(right + padding, rect_width_);
|
||||||
bottom = std::min(bottom + padding, rect_height_);
|
bottom = std::min(bottom + padding, rect_height_);
|
||||||
Box *box = boxCreate(*left, *top, right - *left, bottom - *top);
|
Box *box = boxCreate(*left, *top, right - *left, bottom - *top);
|
||||||
Pix *grey_pix = pixClipRectangle(original_img, box, nullptr);
|
Image grey_pix = pixClipRectangle(original_img, box, nullptr);
|
||||||
boxDestroy(&box);
|
boxDestroy(&box);
|
||||||
if (level == RIL_BLOCK || level == RIL_PARA) {
|
if (level == RIL_BLOCK || level == RIL_PARA) {
|
||||||
// Clip to the block polygon as well.
|
// Clip to the block polygon as well.
|
||||||
TBOX mask_box;
|
TBOX mask_box;
|
||||||
Pix *mask = it_->block()->block->render_mask(&mask_box);
|
Image mask = it_->block()->block->render_mask(&mask_box);
|
||||||
// Copy the mask registered correctly into an image the size of grey_pix.
|
// Copy the mask registered correctly into an image the size of grey_pix.
|
||||||
int mask_x = *left - mask_box.left();
|
int mask_x = *left - mask_box.left();
|
||||||
int mask_y = *top - (pixGetHeight(original_img) - mask_box.top());
|
int mask_y = *top - (pixGetHeight(original_img) - mask_box.top());
|
||||||
int width = pixGetWidth(grey_pix);
|
int width = pixGetWidth(grey_pix);
|
||||||
int height = pixGetHeight(grey_pix);
|
int height = pixGetHeight(grey_pix);
|
||||||
Pix *resized_mask = pixCreate(width, height, 1);
|
Image resized_mask = pixCreate(width, height, 1);
|
||||||
pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, height, PIX_SRC,
|
pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, height, PIX_SRC,
|
||||||
mask, std::max(0, mask_x), std::max(0, mask_y));
|
mask, std::max(0, mask_x), std::max(0, mask_y));
|
||||||
pixDestroy(&mask);
|
mask.destroy();
|
||||||
pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1, 2 * padding + 1);
|
pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1, 2 * padding + 1);
|
||||||
pixInvert(resized_mask, resized_mask);
|
pixInvert(resized_mask, resized_mask);
|
||||||
pixSetMasked(grey_pix, resized_mask, UINT32_MAX);
|
pixSetMasked(grey_pix, resized_mask, UINT32_MAX);
|
||||||
pixDestroy(&resized_mask);
|
resized_mask.destroy();
|
||||||
}
|
}
|
||||||
return grey_pix;
|
return grey_pix;
|
||||||
}
|
}
|
||||||
|
@ -58,21 +58,21 @@ const int kMaxCircleErosions = 8;
|
|||||||
// The returned pix must be pixDestroyed after use. nullptr may be returned
|
// The returned pix must be pixDestroyed after use. nullptr may be returned
|
||||||
// if the image doesn't meet the trivial conditions that it uses to determine
|
// if the image doesn't meet the trivial conditions that it uses to determine
|
||||||
// success.
|
// success.
|
||||||
static Pix *RemoveEnclosingCircle(Pix *pixs) {
|
static Image RemoveEnclosingCircle(Image pixs) {
|
||||||
Pix *pixsi = pixInvert(nullptr, pixs);
|
Image pixsi = pixInvert(nullptr, pixs);
|
||||||
Pix *pixc = pixCreateTemplate(pixs);
|
Image pixc = pixCreateTemplate(pixs);
|
||||||
pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET);
|
pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET);
|
||||||
pixSeedfillBinary(pixc, pixc, pixsi, 4);
|
pixSeedfillBinary(pixc, pixc, pixsi, 4);
|
||||||
pixInvert(pixc, pixc);
|
pixInvert(pixc, pixc);
|
||||||
pixDestroy(&pixsi);
|
pixsi.destroy();
|
||||||
Pix *pixt = pixAnd(nullptr, pixs, pixc);
|
Image pixt = pixAnd(nullptr, pixs, pixc);
|
||||||
l_int32 max_count;
|
l_int32 max_count;
|
||||||
pixCountConnComp(pixt, 8, &max_count);
|
pixCountConnComp(pixt, 8, &max_count);
|
||||||
// The count has to go up before we start looking for the minimum.
|
// The count has to go up before we start looking for the minimum.
|
||||||
l_int32 min_count = INT32_MAX;
|
l_int32 min_count = INT32_MAX;
|
||||||
Pix *pixout = nullptr;
|
Image pixout = nullptr;
|
||||||
for (int i = 1; i < kMaxCircleErosions; i++) {
|
for (int i = 1; i < kMaxCircleErosions; i++) {
|
||||||
pixDestroy(&pixt);
|
pixt.destroy();
|
||||||
pixErodeBrick(pixc, pixc, 3, 3);
|
pixErodeBrick(pixc, pixc, 3, 3);
|
||||||
pixt = pixAnd(nullptr, pixs, pixc);
|
pixt = pixAnd(nullptr, pixs, pixc);
|
||||||
l_int32 count;
|
l_int32 count;
|
||||||
@ -82,14 +82,14 @@ static Pix *RemoveEnclosingCircle(Pix *pixs) {
|
|||||||
min_count = count;
|
min_count = count;
|
||||||
} else if (count < min_count) {
|
} else if (count < min_count) {
|
||||||
min_count = count;
|
min_count = count;
|
||||||
pixDestroy(&pixout);
|
pixout.destroy();
|
||||||
pixout = pixCopy(nullptr, pixt); // Save the best.
|
pixout = pixCopy(nullptr, pixt); // Save the best.
|
||||||
} else if (count >= min_count) {
|
} else if (count >= min_count) {
|
||||||
break; // We have passed by the best.
|
break; // We have passed by the best.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pixDestroy(&pixt);
|
pixt.destroy();
|
||||||
pixDestroy(&pixc);
|
pixc.destroy();
|
||||||
return pixout;
|
return pixout;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -148,9 +148,9 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
|
|||||||
deskew_ = FCOORD(1.0f, 0.0f);
|
deskew_ = FCOORD(1.0f, 0.0f);
|
||||||
reskew_ = FCOORD(1.0f, 0.0f);
|
reskew_ = FCOORD(1.0f, 0.0f);
|
||||||
if (pageseg_mode == PSM_CIRCLE_WORD) {
|
if (pageseg_mode == PSM_CIRCLE_WORD) {
|
||||||
Pix *pixcleaned = RemoveEnclosingCircle(pix_binary_);
|
Image pixcleaned = RemoveEnclosingCircle(pix_binary_);
|
||||||
if (pixcleaned != nullptr) {
|
if (pixcleaned != nullptr) {
|
||||||
pixDestroy(&pix_binary_);
|
pix_binary_.destroy();
|
||||||
pix_binary_ = pixcleaned;
|
pix_binary_ = pixcleaned;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -200,8 +200,8 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
|
|||||||
*/
|
*/
|
||||||
int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks,
|
int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks,
|
||||||
BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr) {
|
BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr) {
|
||||||
Pix *photomask_pix = nullptr;
|
Image photomask_pix = nullptr;
|
||||||
Pix *musicmask_pix = nullptr;
|
Image musicmask_pix = nullptr;
|
||||||
// The blocks made by the ColumnFinder. Moved to blocks before return.
|
// The blocks made by the ColumnFinder. Moved to blocks before return.
|
||||||
BLOCK_LIST found_blocks;
|
BLOCK_LIST found_blocks;
|
||||||
TO_BLOCK_LIST temp_blocks;
|
TO_BLOCK_LIST temp_blocks;
|
||||||
@ -231,8 +231,8 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOC
|
|||||||
}
|
}
|
||||||
delete finder;
|
delete finder;
|
||||||
}
|
}
|
||||||
pixDestroy(&photomask_pix);
|
photomask_pix.destroy();
|
||||||
pixDestroy(&musicmask_pix);
|
musicmask_pix.destroy();
|
||||||
if (result < 0) {
|
if (result < 0) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -272,8 +272,8 @@ static void AddAllScriptsConverted(const UNICHARSET &sid_set, const UNICHARSET &
|
|||||||
ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode,
|
ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode,
|
||||||
BLOCK_LIST *blocks, Tesseract *osd_tess,
|
BLOCK_LIST *blocks, Tesseract *osd_tess,
|
||||||
OSResults *osr, TO_BLOCK_LIST *to_blocks,
|
OSResults *osr, TO_BLOCK_LIST *to_blocks,
|
||||||
Pix **photo_mask_pix,
|
Image *photo_mask_pix,
|
||||||
Pix **music_mask_pix) {
|
Image *music_mask_pix) {
|
||||||
int vertical_x = 0;
|
int vertical_x = 0;
|
||||||
int vertical_y = 1;
|
int vertical_y = 1;
|
||||||
TabVector_LIST v_lines;
|
TabVector_LIST v_lines;
|
||||||
@ -293,14 +293,14 @@ ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mo
|
|||||||
// Leptonica is used to find a mask of the photo regions in the input.
|
// Leptonica is used to find a mask of the photo regions in the input.
|
||||||
*photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
|
*photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
|
||||||
if (tessedit_dump_pageseg_images) {
|
if (tessedit_dump_pageseg_images) {
|
||||||
Pix *pix_no_image_ = nullptr;
|
Image pix_no_image_ = nullptr;
|
||||||
if (*photo_mask_pix != nullptr) {
|
if (*photo_mask_pix != nullptr) {
|
||||||
pix_no_image_ = pixSubtract(nullptr, pix_binary_, *photo_mask_pix);
|
pix_no_image_ = pixSubtract(nullptr, pix_binary_, *photo_mask_pix);
|
||||||
} else {
|
} else {
|
||||||
pix_no_image_ = pixClone(pix_binary_);
|
pix_no_image_ = pixClone(pix_binary_);
|
||||||
}
|
}
|
||||||
pixa_debug_.AddPix(pix_no_image_, "NoImages");
|
pixa_debug_.AddPix(pix_no_image_, "NoImages");
|
||||||
pixDestroy(&pix_no_image_);
|
pix_no_image_.destroy();
|
||||||
}
|
}
|
||||||
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) {
|
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) {
|
||||||
v_lines.clear();
|
v_lines.clear();
|
||||||
|
@ -421,7 +421,7 @@ Tesseract::Tesseract()
|
|||||||
|
|
||||||
Tesseract::~Tesseract() {
|
Tesseract::~Tesseract() {
|
||||||
Clear();
|
Clear();
|
||||||
pixDestroy(&pix_original_);
|
pix_original_.destroy();
|
||||||
end_tesseract();
|
end_tesseract();
|
||||||
for (auto *lang : sub_langs_) {
|
for (auto *lang : sub_langs_) {
|
||||||
delete lang;
|
delete lang;
|
||||||
@ -442,10 +442,10 @@ Dict &Tesseract::getDict() {
|
|||||||
void Tesseract::Clear() {
|
void Tesseract::Clear() {
|
||||||
std::string debug_name = imagebasename + "_debug.pdf";
|
std::string debug_name = imagebasename + "_debug.pdf";
|
||||||
pixa_debug_.WritePDF(debug_name.c_str());
|
pixa_debug_.WritePDF(debug_name.c_str());
|
||||||
pixDestroy(&pix_binary_);
|
pix_binary_.destroy();
|
||||||
pixDestroy(&pix_grey_);
|
pix_grey_.destroy();
|
||||||
pixDestroy(&pix_thresholds_);
|
pix_thresholds_.destroy();
|
||||||
pixDestroy(&scaled_color_);
|
scaled_color_.destroy();
|
||||||
deskew_ = FCOORD(1.0f, 0.0f);
|
deskew_ = FCOORD(1.0f, 0.0f);
|
||||||
reskew_ = FCOORD(1.0f, 0.0f);
|
reskew_ = FCOORD(1.0f, 0.0f);
|
||||||
splitter_.Clear();
|
splitter_.Clear();
|
||||||
@ -518,7 +518,7 @@ void Tesseract::PrepareForPageseg() {
|
|||||||
if (pageseg_strategy > max_pageseg_strategy) {
|
if (pageseg_strategy > max_pageseg_strategy) {
|
||||||
max_pageseg_strategy = pageseg_strategy;
|
max_pageseg_strategy = pageseg_strategy;
|
||||||
}
|
}
|
||||||
pixDestroy(&sub_lang->pix_binary_);
|
sub_lang->pix_binary_.destroy();
|
||||||
sub_lang->pix_binary_ = pixClone(pix_binary());
|
sub_lang->pix_binary_ = pixClone(pix_binary());
|
||||||
}
|
}
|
||||||
// Perform shiro-rekha (top-line) splitting and replace the current image by
|
// Perform shiro-rekha (top-line) splitting and replace the current image by
|
||||||
@ -527,7 +527,7 @@ void Tesseract::PrepareForPageseg() {
|
|||||||
splitter_.set_pageseg_split_strategy(max_pageseg_strategy);
|
splitter_.set_pageseg_split_strategy(max_pageseg_strategy);
|
||||||
if (splitter_.Split(true, &pixa_debug_)) {
|
if (splitter_.Split(true, &pixa_debug_)) {
|
||||||
ASSERT_HOST(splitter_.splitted_image());
|
ASSERT_HOST(splitter_.splitted_image());
|
||||||
pixDestroy(&pix_binary_);
|
pix_binary_.destroy();
|
||||||
pix_binary_ = pixClone(splitter_.splitted_image());
|
pix_binary_ = pixClone(splitter_.splitted_image());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -555,14 +555,14 @@ void Tesseract::PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, O
|
|||||||
bool split_for_ocr = splitter_.Split(false, &pixa_debug_);
|
bool split_for_ocr = splitter_.Split(false, &pixa_debug_);
|
||||||
// Restore pix_binary to the binarized original pix for future reference.
|
// Restore pix_binary to the binarized original pix for future reference.
|
||||||
ASSERT_HOST(splitter_.orig_pix());
|
ASSERT_HOST(splitter_.orig_pix());
|
||||||
pixDestroy(&pix_binary_);
|
pix_binary_.destroy();
|
||||||
pix_binary_ = pixClone(splitter_.orig_pix());
|
pix_binary_ = pixClone(splitter_.orig_pix());
|
||||||
// If the pageseg and ocr strategies are different, refresh the block list
|
// If the pageseg and ocr strategies are different, refresh the block list
|
||||||
// (from the last SegmentImage call) with blobs from the real image to be used
|
// (from the last SegmentImage call) with blobs from the real image to be used
|
||||||
// for OCR.
|
// for OCR.
|
||||||
if (splitter_.HasDifferentSplitStrategies()) {
|
if (splitter_.HasDifferentSplitStrategies()) {
|
||||||
BLOCK block("", true, 0, 0, 0, 0, pixGetWidth(pix_binary_), pixGetHeight(pix_binary_));
|
BLOCK block("", true, 0, 0, 0, 0, pixGetWidth(pix_binary_), pixGetHeight(pix_binary_));
|
||||||
Pix *pix_for_ocr = split_for_ocr ? splitter_.splitted_image() : splitter_.orig_pix();
|
Image pix_for_ocr = split_for_ocr ? splitter_.splitted_image() : splitter_.orig_pix();
|
||||||
extract_edges(pix_for_ocr, &block);
|
extract_edges(pix_for_ocr, &block);
|
||||||
splitter_.RefreshSegmentationWithNewBlobs(block.blob_list());
|
splitter_.RefreshSegmentationWithNewBlobs(block.blob_list());
|
||||||
}
|
}
|
||||||
|
@ -197,26 +197,26 @@ public:
|
|||||||
return reskew_;
|
return reskew_;
|
||||||
}
|
}
|
||||||
// Destroy any existing pix and return a pointer to the pointer.
|
// Destroy any existing pix and return a pointer to the pointer.
|
||||||
Pix **mutable_pix_binary() {
|
Image *mutable_pix_binary() {
|
||||||
pixDestroy(&pix_binary_);
|
pix_binary_.destroy();
|
||||||
return &pix_binary_;
|
return &pix_binary_;
|
||||||
}
|
}
|
||||||
Pix *pix_binary() const {
|
Image pix_binary() const {
|
||||||
return pix_binary_;
|
return pix_binary_;
|
||||||
}
|
}
|
||||||
Pix *pix_grey() const {
|
Image pix_grey() const {
|
||||||
return pix_grey_;
|
return pix_grey_;
|
||||||
}
|
}
|
||||||
void set_pix_grey(Pix *grey_pix) {
|
void set_pix_grey(Image grey_pix) {
|
||||||
pixDestroy(&pix_grey_);
|
pix_grey_.destroy();
|
||||||
pix_grey_ = grey_pix;
|
pix_grey_ = grey_pix;
|
||||||
}
|
}
|
||||||
Pix *pix_original() const {
|
Image pix_original() const {
|
||||||
return pix_original_;
|
return pix_original_;
|
||||||
}
|
}
|
||||||
// Takes ownership of the given original_pix.
|
// Takes ownership of the given original_pix.
|
||||||
void set_pix_original(Pix *original_pix) {
|
void set_pix_original(Image original_pix) {
|
||||||
pixDestroy(&pix_original_);
|
pix_original_.destroy();
|
||||||
pix_original_ = original_pix;
|
pix_original_ = original_pix;
|
||||||
// Clone to sublangs as well.
|
// Clone to sublangs as well.
|
||||||
for (auto &lang : sub_langs_) {
|
for (auto &lang : sub_langs_) {
|
||||||
@ -231,7 +231,7 @@ public:
|
|||||||
// To tell the difference pixGetDepth() will return 32, 8 or 1.
|
// To tell the difference pixGetDepth() will return 32, 8 or 1.
|
||||||
// In any case, the return value is a borrowed Pix, and should not be
|
// In any case, the return value is a borrowed Pix, and should not be
|
||||||
// deleted or pixDestroyed.
|
// deleted or pixDestroyed.
|
||||||
Pix *BestPix() const {
|
Image BestPix() const {
|
||||||
if (pixGetWidth(pix_original_) == ImageWidth()) {
|
if (pixGetWidth(pix_original_) == ImageWidth()) {
|
||||||
return pix_original_;
|
return pix_original_;
|
||||||
} else if (pix_grey_ != nullptr) {
|
} else if (pix_grey_ != nullptr) {
|
||||||
@ -240,8 +240,8 @@ public:
|
|||||||
return pix_binary_;
|
return pix_binary_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void set_pix_thresholds(Pix *thresholds) {
|
void set_pix_thresholds(Image thresholds) {
|
||||||
pixDestroy(&pix_thresholds_);
|
pix_thresholds_.destroy();
|
||||||
pix_thresholds_ = thresholds;
|
pix_thresholds_ = thresholds;
|
||||||
}
|
}
|
||||||
int source_resolution() const {
|
int source_resolution() const {
|
||||||
@ -256,13 +256,13 @@ public:
|
|||||||
int ImageHeight() const {
|
int ImageHeight() const {
|
||||||
return pixGetHeight(pix_binary_);
|
return pixGetHeight(pix_binary_);
|
||||||
}
|
}
|
||||||
Pix *scaled_color() const {
|
Image scaled_color() const {
|
||||||
return scaled_color_;
|
return scaled_color_;
|
||||||
}
|
}
|
||||||
int scaled_factor() const {
|
int scaled_factor() const {
|
||||||
return scaled_factor_;
|
return scaled_factor_;
|
||||||
}
|
}
|
||||||
void SetScaledColor(int factor, Pix *color) {
|
void SetScaledColor(int factor, Image color) {
|
||||||
scaled_factor_ = factor;
|
scaled_factor_ = factor;
|
||||||
scaled_color_ = color;
|
scaled_color_ = color;
|
||||||
}
|
}
|
||||||
@ -328,8 +328,8 @@ public:
|
|||||||
BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr);
|
BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr);
|
||||||
ColumnFinder *SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks,
|
ColumnFinder *SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks,
|
||||||
Tesseract *osd_tess, OSResults *osr,
|
Tesseract *osd_tess, OSResults *osr,
|
||||||
TO_BLOCK_LIST *to_blocks, Pix **photo_mask_pix,
|
TO_BLOCK_LIST *to_blocks, Image *photo_mask_pix,
|
||||||
Pix **music_mask_pix);
|
Image *music_mask_pix);
|
||||||
// par_control.cpp
|
// par_control.cpp
|
||||||
void PrerecAllWordsPar(const std::vector<WordData> &words);
|
void PrerecAllWordsPar(const std::vector<WordData> &words);
|
||||||
|
|
||||||
@ -1034,13 +1034,13 @@ private:
|
|||||||
std::string word_config_;
|
std::string word_config_;
|
||||||
// Image used for input to layout analysis and tesseract recognition.
|
// Image used for input to layout analysis and tesseract recognition.
|
||||||
// May be modified by the ShiroRekhaSplitter to eliminate the top-line.
|
// May be modified by the ShiroRekhaSplitter to eliminate the top-line.
|
||||||
Pix *pix_binary_;
|
Image pix_binary_;
|
||||||
// Grey-level input image if the input was not binary, otherwise nullptr.
|
// Grey-level input image if the input was not binary, otherwise nullptr.
|
||||||
Pix *pix_grey_;
|
Image pix_grey_;
|
||||||
// Original input image. Color if the input was color.
|
// Original input image. Color if the input was color.
|
||||||
Pix *pix_original_;
|
Image pix_original_;
|
||||||
// Thresholds that were used to generate the thresholded image from grey.
|
// Thresholds that were used to generate the thresholded image from grey.
|
||||||
Pix *pix_thresholds_;
|
Image pix_thresholds_;
|
||||||
// Debug images. If non-empty, will be written on destruction.
|
// Debug images. If non-empty, will be written on destruction.
|
||||||
DebugPixa pixa_debug_;
|
DebugPixa pixa_debug_;
|
||||||
// Input image resolution after any scaling. The resolution is not well
|
// Input image resolution after any scaling. The resolution is not well
|
||||||
@ -1053,7 +1053,7 @@ private:
|
|||||||
Textord textord_;
|
Textord textord_;
|
||||||
// True if the primary language uses right_to_left reading order.
|
// True if the primary language uses right_to_left reading order.
|
||||||
bool right_to_left_;
|
bool right_to_left_;
|
||||||
Pix *scaled_color_;
|
Image scaled_color_;
|
||||||
int scaled_factor_;
|
int scaled_factor_;
|
||||||
FCOORD deskew_;
|
FCOORD deskew_;
|
||||||
FCOORD reskew_;
|
FCOORD reskew_;
|
||||||
|
@ -49,7 +49,7 @@ ImageThresholder::~ImageThresholder() {
|
|||||||
|
|
||||||
// Destroy the Pix if there is one, freeing memory.
|
// Destroy the Pix if there is one, freeing memory.
|
||||||
void ImageThresholder::Clear() {
|
void ImageThresholder::Clear() {
|
||||||
pixDestroy(&pix_);
|
pix_.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return true if no image has been set.
|
// Return true if no image has been set.
|
||||||
@ -71,7 +71,7 @@ void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int h
|
|||||||
if (bpp == 0) {
|
if (bpp == 0) {
|
||||||
bpp = 1;
|
bpp = 1;
|
||||||
}
|
}
|
||||||
Pix *pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
|
Image pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
|
||||||
l_uint32 *data = pixGetData(pix);
|
l_uint32 *data = pixGetData(pix);
|
||||||
int wpl = pixGetWpl(pix);
|
int wpl = pixGetWpl(pix);
|
||||||
switch (bpp) {
|
switch (bpp) {
|
||||||
@ -121,7 +121,7 @@ void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int h
|
|||||||
tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
|
tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
|
||||||
}
|
}
|
||||||
SetImage(pix);
|
SetImage(pix);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store the coordinates of the rectangle to process for later use.
|
// Store the coordinates of the rectangle to process for later use.
|
||||||
@ -152,22 +152,22 @@ void ImageThresholder::GetImageSizes(int *left, int *top, int *width, int *heigh
|
|||||||
// SetImage for Pix clones its input, so the source pix may be pixDestroyed
|
// SetImage for Pix clones its input, so the source pix may be pixDestroyed
|
||||||
// immediately after, but may not go away until after the Thresholder has
|
// immediately after, but may not go away until after the Thresholder has
|
||||||
// finished with it.
|
// finished with it.
|
||||||
void ImageThresholder::SetImage(const Pix *pix) {
|
void ImageThresholder::SetImage(const Image pix) {
|
||||||
if (pix_ != nullptr) {
|
if (pix_ != nullptr) {
|
||||||
pixDestroy(&pix_);
|
pix_.destroy();
|
||||||
}
|
}
|
||||||
Pix *src = const_cast<Pix *>(pix);
|
Image src = pix;
|
||||||
int depth;
|
int depth;
|
||||||
pixGetDimensions(src, &image_width_, &image_height_, &depth);
|
pixGetDimensions(src, &image_width_, &image_height_, &depth);
|
||||||
// Convert the image as necessary so it is one of binary, plain RGB, or
|
// Convert the image as necessary so it is one of binary, plain RGB, or
|
||||||
// 8 bit with no colormap. Guarantee that we always end up with our own copy,
|
// 8 bit with no colormap. Guarantee that we always end up with our own copy,
|
||||||
// not just a clone of the input.
|
// not just a clone of the input.
|
||||||
if (pixGetColormap(src)) {
|
if (pixGetColormap(src)) {
|
||||||
Pix *tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
|
Image tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
|
||||||
depth = pixGetDepth(tmp);
|
depth = pixGetDepth(tmp);
|
||||||
if (depth > 1 && depth < 8) {
|
if (depth > 1 && depth < 8) {
|
||||||
pix_ = pixConvertTo8(tmp, false);
|
pix_ = pixConvertTo8(tmp, false);
|
||||||
pixDestroy(&tmp);
|
tmp.destroy();
|
||||||
} else {
|
} else {
|
||||||
pix_ = tmp;
|
pix_ = tmp;
|
||||||
}
|
}
|
||||||
@ -188,7 +188,7 @@ void ImageThresholder::SetImage(const Pix *pix) {
|
|||||||
// Creates a Pix and sets pix to point to the resulting pointer.
|
// Creates a Pix and sets pix to point to the resulting pointer.
|
||||||
// Caller must use pixDestroy to free the created Pix.
|
// Caller must use pixDestroy to free the created Pix.
|
||||||
/// Returns false on error.
|
/// Returns false on error.
|
||||||
bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix **pix) {
|
bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Image *pix) {
|
||||||
if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
|
if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
|
||||||
tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
|
tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
|
||||||
return false;
|
return false;
|
||||||
@ -196,9 +196,9 @@ bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix **pix) {
|
|||||||
if (pix_channels_ == 0) {
|
if (pix_channels_ == 0) {
|
||||||
// We have a binary image, but it still has to be copied, as this API
|
// We have a binary image, but it still has to be copied, as this API
|
||||||
// allows the caller to modify the output.
|
// allows the caller to modify the output.
|
||||||
Pix *original = GetPixRect();
|
Image original = GetPixRect();
|
||||||
*pix = pixCopy(nullptr, original);
|
*pix = pixCopy(nullptr, original);
|
||||||
pixDestroy(&original);
|
original.destroy();
|
||||||
} else {
|
} else {
|
||||||
OtsuThresholdRectToPix(pix_, pix);
|
OtsuThresholdRectToPix(pix_, pix);
|
||||||
}
|
}
|
||||||
@ -212,18 +212,18 @@ bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix **pix) {
|
|||||||
// Ideally the 8 bit threshold should be the exact threshold used to generate
|
// Ideally the 8 bit threshold should be the exact threshold used to generate
|
||||||
// the binary image in ThresholdToPix, but this is not a hard constraint.
|
// the binary image in ThresholdToPix, but this is not a hard constraint.
|
||||||
// Returns nullptr if the input is binary. PixDestroy after use.
|
// Returns nullptr if the input is binary. PixDestroy after use.
|
||||||
Pix *ImageThresholder::GetPixRectThresholds() {
|
Image ImageThresholder::GetPixRectThresholds() {
|
||||||
if (IsBinary()) {
|
if (IsBinary()) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
Pix *pix_grey = GetPixRectGrey();
|
Image pix_grey = GetPixRectGrey();
|
||||||
int width = pixGetWidth(pix_grey);
|
int width = pixGetWidth(pix_grey);
|
||||||
int height = pixGetHeight(pix_grey);
|
int height = pixGetHeight(pix_grey);
|
||||||
std::vector<int> thresholds;
|
std::vector<int> thresholds;
|
||||||
std::vector<int> hi_values;
|
std::vector<int> hi_values;
|
||||||
OtsuThreshold(pix_grey, 0, 0, width, height, thresholds, hi_values);
|
OtsuThreshold(pix_grey, 0, 0, width, height, thresholds, hi_values);
|
||||||
pixDestroy(&pix_grey);
|
pix_grey.destroy();
|
||||||
Pix *pix_thresholds = pixCreate(width, height, 8);
|
Image pix_thresholds = pixCreate(width, height, 8);
|
||||||
int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
|
int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
|
||||||
pixSetAllArbitrary(pix_thresholds, threshold);
|
pixSetAllArbitrary(pix_thresholds, threshold);
|
||||||
return pix_thresholds;
|
return pix_thresholds;
|
||||||
@ -239,14 +239,14 @@ void ImageThresholder::Init() {
|
|||||||
// This function will be used in the future by the page layout analysis, and
|
// This function will be used in the future by the page layout analysis, and
|
||||||
// the layout analysis that uses it will only be available with Leptonica,
|
// the layout analysis that uses it will only be available with Leptonica,
|
||||||
// so there is no raw equivalent.
|
// so there is no raw equivalent.
|
||||||
Pix *ImageThresholder::GetPixRect() {
|
Image ImageThresholder::GetPixRect() {
|
||||||
if (IsFullImage()) {
|
if (IsFullImage()) {
|
||||||
// Just clone the whole thing.
|
// Just clone the whole thing.
|
||||||
return pixClone(pix_);
|
return pixClone(pix_);
|
||||||
} else {
|
} else {
|
||||||
// Crop to the given rectangle.
|
// Crop to the given rectangle.
|
||||||
Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
|
Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
|
||||||
Pix *cropped = pixClipRectangle(pix_, box, nullptr);
|
Image cropped = pixClipRectangle(pix_, box, nullptr);
|
||||||
boxDestroy(&box);
|
boxDestroy(&box);
|
||||||
return cropped;
|
return cropped;
|
||||||
}
|
}
|
||||||
@ -256,24 +256,24 @@ Pix *ImageThresholder::GetPixRect() {
|
|||||||
// and at the same resolution as the output binary.
|
// and at the same resolution as the output binary.
|
||||||
// The returned Pix must be pixDestroyed.
|
// The returned Pix must be pixDestroyed.
|
||||||
// Provided to the classifier to extract features from the greyscale image.
|
// Provided to the classifier to extract features from the greyscale image.
|
||||||
Pix *ImageThresholder::GetPixRectGrey() {
|
Image ImageThresholder::GetPixRectGrey() {
|
||||||
auto pix = GetPixRect(); // May have to be reduced to grey.
|
auto pix = GetPixRect(); // May have to be reduced to grey.
|
||||||
int depth = pixGetDepth(pix);
|
int depth = pixGetDepth(pix);
|
||||||
if (depth != 8) {
|
if (depth != 8) {
|
||||||
if (depth == 24) {
|
if (depth == 24) {
|
||||||
auto tmp = pixConvert24To32(pix);
|
auto tmp = pixConvert24To32(pix);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
pix = tmp;
|
pix = tmp;
|
||||||
}
|
}
|
||||||
auto result = pixConvertTo8(pix, false);
|
auto result = pixConvertTo8(pix, false);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
return pix;
|
return pix;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otsu thresholds the rectangle, taking the rectangle from *this.
|
// Otsu thresholds the rectangle, taking the rectangle from *this.
|
||||||
void ImageThresholder::OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const {
|
void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const {
|
||||||
std::vector<int> thresholds;
|
std::vector<int> thresholds;
|
||||||
std::vector<int> hi_values;
|
std::vector<int> hi_values;
|
||||||
|
|
||||||
@ -298,8 +298,8 @@ void ImageThresholder::OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const
|
|||||||
/// from the class, using thresholds/hi_values to the output pix.
|
/// from the class, using thresholds/hi_values to the output pix.
|
||||||
/// NOTE that num_channels is the size of the thresholds and hi_values
|
/// NOTE that num_channels is the size of the thresholds and hi_values
|
||||||
// arrays and also the bytes per pixel in src_pix.
|
// arrays and also the bytes per pixel in src_pix.
|
||||||
void ImageThresholder::ThresholdRectToPix(Pix *src_pix, int num_channels, const std::vector<int> &thresholds,
|
void ImageThresholder::ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds,
|
||||||
const std::vector<int> &hi_values, Pix **pix) const {
|
const std::vector<int> &hi_values, Image *pix) const {
|
||||||
*pix = pixCreate(rect_width_, rect_height_, 1);
|
*pix = pixCreate(rect_width_, rect_height_, 1);
|
||||||
uint32_t *pixdata = pixGetData(*pix);
|
uint32_t *pixdata = pixGetData(*pix);
|
||||||
int wpl = pixGetWpl(*pix);
|
int wpl = pixGetWpl(*pix);
|
||||||
|
@ -113,13 +113,13 @@ public:
|
|||||||
/// SetImage for Pix clones its input, so the source pix may be pixDestroyed
|
/// SetImage for Pix clones its input, so the source pix may be pixDestroyed
|
||||||
/// immediately after, but may not go away until after the Thresholder has
|
/// immediately after, but may not go away until after the Thresholder has
|
||||||
/// finished with it.
|
/// finished with it.
|
||||||
void SetImage(const Pix *pix);
|
void SetImage(const Image pix);
|
||||||
|
|
||||||
/// Threshold the source image as efficiently as possible to the output Pix.
|
/// Threshold the source image as efficiently as possible to the output Pix.
|
||||||
/// Creates a Pix and sets pix to point to the resulting pointer.
|
/// Creates a Pix and sets pix to point to the resulting pointer.
|
||||||
/// Caller must use pixDestroy to free the created Pix.
|
/// Caller must use pixDestroy to free the created Pix.
|
||||||
/// Returns false on error.
|
/// Returns false on error.
|
||||||
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix **pix);
|
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Image *pix);
|
||||||
|
|
||||||
// Gets a pix that contains an 8 bit threshold value at each pixel. The
|
// Gets a pix that contains an 8 bit threshold value at each pixel. The
|
||||||
// returned pix may be an integer reduction of the binary image such that
|
// returned pix may be an integer reduction of the binary image such that
|
||||||
@ -128,20 +128,20 @@ public:
|
|||||||
// Ideally the 8 bit threshold should be the exact threshold used to generate
|
// Ideally the 8 bit threshold should be the exact threshold used to generate
|
||||||
// the binary image in ThresholdToPix, but this is not a hard constraint.
|
// the binary image in ThresholdToPix, but this is not a hard constraint.
|
||||||
// Returns nullptr if the input is binary. PixDestroy after use.
|
// Returns nullptr if the input is binary. PixDestroy after use.
|
||||||
virtual Pix *GetPixRectThresholds();
|
virtual Image GetPixRectThresholds();
|
||||||
|
|
||||||
/// Get a clone/copy of the source image rectangle.
|
/// Get a clone/copy of the source image rectangle.
|
||||||
/// The returned Pix must be pixDestroyed.
|
/// The returned Pix must be pixDestroyed.
|
||||||
/// This function will be used in the future by the page layout analysis, and
|
/// This function will be used in the future by the page layout analysis, and
|
||||||
/// the layout analysis that uses it will only be available with Leptonica,
|
/// the layout analysis that uses it will only be available with Leptonica,
|
||||||
/// so there is no raw equivalent.
|
/// so there is no raw equivalent.
|
||||||
Pix *GetPixRect();
|
Image GetPixRect();
|
||||||
|
|
||||||
// Get a clone/copy of the source image rectangle, reduced to greyscale,
|
// Get a clone/copy of the source image rectangle, reduced to greyscale,
|
||||||
// and at the same resolution as the output binary.
|
// and at the same resolution as the output binary.
|
||||||
// The returned Pix must be pixDestroyed.
|
// The returned Pix must be pixDestroyed.
|
||||||
// Provided to the classifier to extract features from the greyscale image.
|
// Provided to the classifier to extract features from the greyscale image.
|
||||||
virtual Pix *GetPixRectGrey();
|
virtual Image GetPixRectGrey();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// ----------------------------------------------------------------------
|
// ----------------------------------------------------------------------
|
||||||
@ -157,19 +157,19 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Otsu thresholds the rectangle, taking the rectangle from *this.
|
// Otsu thresholds the rectangle, taking the rectangle from *this.
|
||||||
void OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const;
|
void OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const;
|
||||||
|
|
||||||
/// Threshold the rectangle, taking everything except the src_pix
|
/// Threshold the rectangle, taking everything except the src_pix
|
||||||
/// from the class, using thresholds/hi_values to the output pix.
|
/// from the class, using thresholds/hi_values to the output pix.
|
||||||
/// NOTE that num_channels is the size of the thresholds and hi_values
|
/// NOTE that num_channels is the size of the thresholds and hi_values
|
||||||
// arrays and also the bytes per pixel in src_pix.
|
// arrays and also the bytes per pixel in src_pix.
|
||||||
void ThresholdRectToPix(Pix *src_pix, int num_channels, const std::vector<int> &thresholds,
|
void ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds,
|
||||||
const std::vector <int> &hi_values, Pix **pix) const;
|
const std::vector <int> &hi_values, Image *pix) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/// Clone or other copy of the source Pix.
|
/// Clone or other copy of the source Pix.
|
||||||
/// The pix will always be PixDestroy()ed on destruction of the class.
|
/// The pix will always be PixDestroy()ed on destruction of the class.
|
||||||
Pix *pix_;
|
Image pix_;
|
||||||
|
|
||||||
int image_width_; ///< Width of source pix_.
|
int image_width_; ///< Width of source pix_.
|
||||||
int image_height_; ///< Height of source pix_.
|
int image_height_; ///< Height of source pix_.
|
||||||
|
@ -390,7 +390,7 @@ void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST *blobs) {
|
|||||||
|
|
||||||
// Helper to compute edge offsets for all the blobs on the list.
|
// Helper to compute edge offsets for all the blobs on the list.
|
||||||
// See coutln.h for an explanation of edge offsets.
|
// See coutln.h for an explanation of edge offsets.
|
||||||
void BLOBNBOX::ComputeEdgeOffsets(Pix *thresholds, Pix *grey, BLOBNBOX_LIST *blobs) {
|
void BLOBNBOX::ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs) {
|
||||||
int grey_height = 0;
|
int grey_height = 0;
|
||||||
int thr_height = 0;
|
int thr_height = 0;
|
||||||
int scale_factor = 1;
|
int scale_factor = 1;
|
||||||
@ -1052,7 +1052,7 @@ void TO_BLOCK::DeleteUnownedNoise() {
|
|||||||
// Thresholds must either be the same size as grey or an integer down-scale
|
// Thresholds must either be the same size as grey or an integer down-scale
|
||||||
// of grey.
|
// of grey.
|
||||||
// See coutln.h for an explanation of edge offsets.
|
// See coutln.h for an explanation of edge offsets.
|
||||||
void TO_BLOCK::ComputeEdgeOffsets(Pix *thresholds, Pix *grey) {
|
void TO_BLOCK::ComputeEdgeOffsets(Image thresholds, Image grey) {
|
||||||
BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &blobs);
|
BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &blobs);
|
||||||
BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &small_blobs);
|
BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &small_blobs);
|
||||||
BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &noise_blobs);
|
BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &noise_blobs);
|
||||||
|
@ -435,7 +435,7 @@ public:
|
|||||||
static void DeleteNoiseBlobs(BLOBNBOX_LIST *blobs);
|
static void DeleteNoiseBlobs(BLOBNBOX_LIST *blobs);
|
||||||
// Helper to compute edge offsets for all the blobs on the list.
|
// Helper to compute edge offsets for all the blobs on the list.
|
||||||
// See coutln.h for an explanation of edge offsets.
|
// See coutln.h for an explanation of edge offsets.
|
||||||
static void ComputeEdgeOffsets(Pix *thresholds, Pix *grey, BLOBNBOX_LIST *blobs);
|
static void ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs);
|
||||||
|
|
||||||
#ifndef GRAPHICS_DISABLED
|
#ifndef GRAPHICS_DISABLED
|
||||||
// Helper to draw all the blobs on the list in the given body_colour,
|
// Helper to draw all the blobs on the list in the given body_colour,
|
||||||
@ -745,7 +745,7 @@ public:
|
|||||||
// Thresholds must either be the same size as grey or an integer down-scale
|
// Thresholds must either be the same size as grey or an integer down-scale
|
||||||
// of grey.
|
// of grey.
|
||||||
// See coutln.h for an explanation of edge offsets.
|
// See coutln.h for an explanation of edge offsets.
|
||||||
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey);
|
void ComputeEdgeOffsets(Image thresholds, Image grey);
|
||||||
|
|
||||||
#ifndef GRAPHICS_DISABLED
|
#ifndef GRAPHICS_DISABLED
|
||||||
// Draw the noise blobs from all lists in red.
|
// Draw the noise blobs from all lists in red.
|
||||||
|
@ -401,7 +401,7 @@ void TBLOB::Clear() {
|
|||||||
// this blob and the Pix for the full image.
|
// this blob and the Pix for the full image.
|
||||||
void TBLOB::Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
|
void TBLOB::Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
|
||||||
float x_origin, float y_origin, float x_scale, float y_scale,
|
float x_origin, float y_origin, float x_scale, float y_scale,
|
||||||
float final_xshift, float final_yshift, bool inverse, Pix *pix) {
|
float final_xshift, float final_yshift, bool inverse, Image pix) {
|
||||||
denorm_.SetupNormalization(block, rotation, predecessor, x_origin, y_origin, x_scale, y_scale,
|
denorm_.SetupNormalization(block, rotation, predecessor, x_origin, y_origin, x_scale, y_scale,
|
||||||
final_xshift, final_yshift);
|
final_xshift, final_yshift);
|
||||||
denorm_.set_inverse(inverse);
|
denorm_.set_inverse(inverse);
|
||||||
@ -789,7 +789,7 @@ TWERD *TWERD::PolygonalCopy(bool allow_detailed_fx, WERD *src) {
|
|||||||
|
|
||||||
// Baseline normalizes the blobs in-place, recording the normalization in the
|
// Baseline normalizes the blobs in-place, recording the normalization in the
|
||||||
// DENORMs in the blobs.
|
// DENORMs in the blobs.
|
||||||
void TWERD::BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height,
|
void TWERD::BLNormalize(const BLOCK *block, const ROW *row, Image pix, bool inverse, float x_height,
|
||||||
float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint,
|
float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint,
|
||||||
const TBOX *norm_box, DENORM *word_denorm) {
|
const TBOX *norm_box, DENORM *word_denorm) {
|
||||||
TBOX word_box = bounding_box();
|
TBOX word_box = bounding_box();
|
||||||
|
@ -324,7 +324,7 @@ struct TBLOB {
|
|||||||
// this blob and the Pix for the full image.
|
// this blob and the Pix for the full image.
|
||||||
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
|
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
|
||||||
float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift,
|
float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift,
|
||||||
float final_yshift, bool inverse, Pix *pix);
|
float final_yshift, bool inverse, Image pix);
|
||||||
// Rotates by the given rotation in place.
|
// Rotates by the given rotation in place.
|
||||||
void Rotate(const FCOORD rotation);
|
void Rotate(const FCOORD rotation);
|
||||||
// Moves by the given vec in place.
|
// Moves by the given vec in place.
|
||||||
@ -436,7 +436,7 @@ struct TWERD {
|
|||||||
static TWERD *PolygonalCopy(bool allow_detailed_fx, WERD *src);
|
static TWERD *PolygonalCopy(bool allow_detailed_fx, WERD *src);
|
||||||
// Baseline normalizes the blobs in-place, recording the normalization in the
|
// Baseline normalizes the blobs in-place, recording the normalization in the
|
||||||
// DENORMs in the blobs.
|
// DENORMs in the blobs.
|
||||||
void BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height,
|
void BLNormalize(const BLOCK *block, const ROW *row, Image pix, bool inverse, float x_height,
|
||||||
float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint,
|
float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint,
|
||||||
const TBOX *norm_box, DENORM *word_denorm);
|
const TBOX *norm_box, DENORM *word_denorm);
|
||||||
// Copies the data and the blobs, but leaves next untouched.
|
// Copies the data and the blobs, but leaves next untouched.
|
||||||
|
@ -736,7 +736,7 @@ static bool EvaluateHorizontalDiff(const l_uint32 *line, int diff_sign, int x, i
|
|||||||
* for each horizontal step, and the conflict in step direction and gradient
|
* for each horizontal step, and the conflict in step direction and gradient
|
||||||
* direction can be used to ignore the vertical steps.
|
* direction can be used to ignore the vertical steps.
|
||||||
*/
|
*/
|
||||||
void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix *pix) {
|
void C_OUTLINE::ComputeEdgeOffsets(int threshold, Image pix) {
|
||||||
if (pixGetDepth(pix) != 8) {
|
if (pixGetDepth(pix) != 8) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -904,7 +904,7 @@ void C_OUTLINE::ComputeBinaryOffsets() {
|
|||||||
* Renders the outline to the given pix, with left and top being
|
* Renders the outline to the given pix, with left and top being
|
||||||
* the coords of the upper-left corner of the pix.
|
* the coords of the upper-left corner of the pix.
|
||||||
*/
|
*/
|
||||||
void C_OUTLINE::render(int left, int top, Pix *pix) const {
|
void C_OUTLINE::render(int left, int top, Image pix) const {
|
||||||
ICOORD pos = start;
|
ICOORD pos = start;
|
||||||
for (int stepindex = 0; stepindex < stepcount; ++stepindex) {
|
for (int stepindex = 0; stepindex < stepcount; ++stepindex) {
|
||||||
ICOORD next_step = step(stepindex);
|
ICOORD next_step = step(stepindex);
|
||||||
@ -924,7 +924,7 @@ void C_OUTLINE::render(int left, int top, Pix *pix) const {
|
|||||||
* @param top coord
|
* @param top coord
|
||||||
* @param pix the pix to outline
|
* @param pix the pix to outline
|
||||||
*/
|
*/
|
||||||
void C_OUTLINE::render_outline(int left, int top, Pix *pix) const {
|
void C_OUTLINE::render_outline(int left, int top, Image pix) const {
|
||||||
ICOORD pos = start;
|
ICOORD pos = start;
|
||||||
for (int stepindex = 0; stepindex < stepcount; ++stepindex) {
|
for (int stepindex = 0; stepindex < stepcount; ++stepindex) {
|
||||||
ICOORD next_step = step(stepindex);
|
ICOORD next_step = step(stepindex);
|
||||||
|
@ -234,18 +234,18 @@ public:
|
|||||||
|
|
||||||
// Adds sub-pixel resolution EdgeOffsets for the outline if the supplied
|
// Adds sub-pixel resolution EdgeOffsets for the outline if the supplied
|
||||||
// pix is 8-bit. Does nothing otherwise.
|
// pix is 8-bit. Does nothing otherwise.
|
||||||
void ComputeEdgeOffsets(int threshold, Pix *pix);
|
void ComputeEdgeOffsets(int threshold, Image pix);
|
||||||
// Adds sub-pixel resolution EdgeOffsets for the outline using only
|
// Adds sub-pixel resolution EdgeOffsets for the outline using only
|
||||||
// a binary image source.
|
// a binary image source.
|
||||||
void ComputeBinaryOffsets();
|
void ComputeBinaryOffsets();
|
||||||
|
|
||||||
// Renders the outline to the given pix, with left and top being
|
// Renders the outline to the given pix, with left and top being
|
||||||
// the coords of the upper-left corner of the pix.
|
// the coords of the upper-left corner of the pix.
|
||||||
void render(int left, int top, Pix *pix) const;
|
void render(int left, int top, Image pix) const;
|
||||||
|
|
||||||
// Renders just the outline to the given pix (no fill), with left and top
|
// Renders just the outline to the given pix (no fill), with left and top
|
||||||
// being the coords of the upper-left corner of the pix.
|
// being the coords of the upper-left corner of the pix.
|
||||||
void render_outline(int left, int top, Pix *pix) const;
|
void render_outline(int left, int top, Image pix) const;
|
||||||
|
|
||||||
#ifndef GRAPHICS_DISABLED
|
#ifndef GRAPHICS_DISABLED
|
||||||
void plot( // draw one
|
void plot( // draw one
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
||||||
#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
|
||||||
|
|
||||||
|
#include "image.h"
|
||||||
|
|
||||||
#include <allheaders.h>
|
#include <allheaders.h>
|
||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
@ -27,11 +29,11 @@ public:
|
|||||||
|
|
||||||
// Adds the given pix to the set of pages in the PDF file, with the given
|
// Adds the given pix to the set of pages in the PDF file, with the given
|
||||||
// caption added to the top.
|
// caption added to the top.
|
||||||
void AddPix(const Pix *pix, const char *caption) {
|
void AddPix(const Image pix, const char *caption) {
|
||||||
int depth = pixGetDepth(const_cast<Pix *>(pix));
|
int depth = pixGetDepth(pix);
|
||||||
int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
|
int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
|
||||||
Pix *pix_debug =
|
Image pix_debug =
|
||||||
pixAddSingleTextblock(const_cast<Pix *>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
|
pixAddSingleTextblock(pix, fonts_, caption, color, L_ADD_BELOW, nullptr);
|
||||||
pixaAddPix(pixa_, pix_debug, L_INSERT);
|
pixaAddPix(pixa_, pix_debug, L_INSERT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ const int kMaxReadAhead = 8;
|
|||||||
|
|
||||||
ImageData::ImageData() : page_number_(-1), vertical_text_(false) {}
|
ImageData::ImageData() : page_number_(-1), vertical_text_(false) {}
|
||||||
// Takes ownership of the pix and destroys it.
|
// Takes ownership of the pix and destroys it.
|
||||||
ImageData::ImageData(bool vertical, Pix *pix) : page_number_(0), vertical_text_(vertical) {
|
ImageData::ImageData(bool vertical, Image pix) : page_number_(0), vertical_text_(vertical) {
|
||||||
SetPix(pix);
|
SetPix(pix);
|
||||||
}
|
}
|
||||||
ImageData::~ImageData() {
|
ImageData::~ImageData() {
|
||||||
@ -176,12 +176,12 @@ bool ImageData::SkipDeSerialize(TFile *fp) {
|
|||||||
// Saves the given Pix as a PNG-encoded string and destroys it.
|
// Saves the given Pix as a PNG-encoded string and destroys it.
|
||||||
// In case of missing PNG support in Leptonica use PNM format,
|
// In case of missing PNG support in Leptonica use PNM format,
|
||||||
// which requires more memory.
|
// which requires more memory.
|
||||||
void ImageData::SetPix(Pix *pix) {
|
void ImageData::SetPix(Image pix) {
|
||||||
SetPixInternal(pix, &image_data_);
|
SetPixInternal(pix, &image_data_);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the Pix image for *this. Must be pixDestroyed after use.
|
// Returns the Pix image for *this. Must be pixDestroyed after use.
|
||||||
Pix *ImageData::GetPix() const {
|
Image ImageData::GetPix() const {
|
||||||
return GetPixInternal(image_data_);
|
return GetPixInternal(image_data_);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -191,11 +191,11 @@ Pix *ImageData::GetPix() const {
|
|||||||
// The return value is the scaled Pix, which must be pixDestroyed after use,
|
// The return value is the scaled Pix, which must be pixDestroyed after use,
|
||||||
// and scale_factor (if not nullptr) is set to the scale factor that was applied
|
// and scale_factor (if not nullptr) is set to the scale factor that was applied
|
||||||
// to the image to achieve the target_height.
|
// to the image to achieve the target_height.
|
||||||
Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
|
Image ImageData::PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
|
||||||
int *scaled_height, std::vector<TBOX> *boxes) const {
|
int *scaled_height, std::vector<TBOX> *boxes) const {
|
||||||
int input_width = 0;
|
int input_width = 0;
|
||||||
int input_height = 0;
|
int input_height = 0;
|
||||||
Pix *src_pix = GetPix();
|
Image src_pix = GetPix();
|
||||||
ASSERT_HOST(src_pix != nullptr);
|
ASSERT_HOST(src_pix != nullptr);
|
||||||
input_width = pixGetWidth(src_pix);
|
input_width = pixGetWidth(src_pix);
|
||||||
input_height = pixGetHeight(src_pix);
|
input_height = pixGetHeight(src_pix);
|
||||||
@ -210,11 +210,11 @@ Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor,
|
|||||||
*scaled_height = target_height;
|
*scaled_height = target_height;
|
||||||
}
|
}
|
||||||
// Get the scaled image.
|
// Get the scaled image.
|
||||||
Pix *pix = pixScale(src_pix, im_factor, im_factor);
|
Image pix = pixScale(src_pix, im_factor, im_factor);
|
||||||
if (pix == nullptr) {
|
if (pix == nullptr) {
|
||||||
tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", input_width, input_height,
|
tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", input_width, input_height,
|
||||||
im_factor);
|
im_factor);
|
||||||
pixDestroy(&src_pix);
|
src_pix.destroy();
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
if (scaled_width != nullptr) {
|
if (scaled_width != nullptr) {
|
||||||
@ -223,7 +223,7 @@ Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor,
|
|||||||
if (scaled_height != nullptr) {
|
if (scaled_height != nullptr) {
|
||||||
*scaled_height = pixGetHeight(pix);
|
*scaled_height = pixGetHeight(pix);
|
||||||
}
|
}
|
||||||
pixDestroy(&src_pix);
|
src_pix.destroy();
|
||||||
if (boxes != nullptr) {
|
if (boxes != nullptr) {
|
||||||
// Get the boxes.
|
// Get the boxes.
|
||||||
boxes->clear();
|
boxes->clear();
|
||||||
@ -253,7 +253,7 @@ int ImageData::MemoryUsed() const {
|
|||||||
void ImageData::Display() const {
|
void ImageData::Display() const {
|
||||||
const int kTextSize = 64;
|
const int kTextSize = 64;
|
||||||
// Draw the image.
|
// Draw the image.
|
||||||
Pix *pix = GetPix();
|
Image pix = GetPix();
|
||||||
if (pix == nullptr) {
|
if (pix == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -263,7 +263,7 @@ void ImageData::Display() const {
|
|||||||
new ScrollView("Imagedata", 100, 100, 2 * (width + 2 * kTextSize),
|
new ScrollView("Imagedata", 100, 100, 2 * (width + 2 * kTextSize),
|
||||||
2 * (height + 4 * kTextSize), width + 10, height + 3 * kTextSize, true);
|
2 * (height + 4 * kTextSize), width + 10, height + 3 * kTextSize, true);
|
||||||
win->Image(pix, 0, height - 1);
|
win->Image(pix, 0, height - 1);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
// Draw the boxes.
|
// Draw the boxes.
|
||||||
win->Pen(ScrollView::RED);
|
win->Pen(ScrollView::RED);
|
||||||
win->Brush(ScrollView::NONE);
|
win->Brush(ScrollView::NONE);
|
||||||
@ -306,7 +306,7 @@ void ImageData::AddBoxes(const std::vector<TBOX> &boxes, const std::vector<std::
|
|||||||
// Saves the given Pix as a PNG-encoded string and destroys it.
|
// Saves the given Pix as a PNG-encoded string and destroys it.
|
||||||
// In case of missing PNG support in Leptonica use PNM format,
|
// In case of missing PNG support in Leptonica use PNM format,
|
||||||
// which requires more memory.
|
// which requires more memory.
|
||||||
void ImageData::SetPixInternal(Pix *pix, std::vector<char> *image_data) {
|
void ImageData::SetPixInternal(Image pix, std::vector<char> *image_data) {
|
||||||
l_uint8 *data;
|
l_uint8 *data;
|
||||||
size_t size;
|
size_t size;
|
||||||
l_int32 ret;
|
l_int32 ret;
|
||||||
@ -314,7 +314,7 @@ void ImageData::SetPixInternal(Pix *pix, std::vector<char> *image_data) {
|
|||||||
if (ret) {
|
if (ret) {
|
||||||
ret = pixWriteMem(&data, &size, pix, IFF_PNM);
|
ret = pixWriteMem(&data, &size, pix, IFF_PNM);
|
||||||
}
|
}
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
// TODO: optimize resize (no init).
|
// TODO: optimize resize (no init).
|
||||||
image_data->resize(size);
|
image_data->resize(size);
|
||||||
memcpy(&(*image_data)[0], data, size);
|
memcpy(&(*image_data)[0], data, size);
|
||||||
@ -322,8 +322,8 @@ void ImageData::SetPixInternal(Pix *pix, std::vector<char> *image_data) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Returns the Pix image for the image_data. Must be pixDestroyed after use.
|
// Returns the Pix image for the image_data. Must be pixDestroyed after use.
|
||||||
Pix *ImageData::GetPixInternal(const std::vector<char> &image_data) {
|
Image ImageData::GetPixInternal(const std::vector<char> &image_data) {
|
||||||
Pix *pix = nullptr;
|
Image pix = nullptr;
|
||||||
if (!image_data.empty()) {
|
if (!image_data.empty()) {
|
||||||
// Convert the array to an image.
|
// Convert the array to an image.
|
||||||
const auto *u_data = reinterpret_cast<const unsigned char *>(&image_data[0]);
|
const auto *u_data = reinterpret_cast<const unsigned char *>(&image_data[0]);
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#ifndef TESSERACT_IMAGE_IMAGEDATA_H_
|
#ifndef TESSERACT_IMAGE_IMAGEDATA_H_
|
||||||
#define TESSERACT_IMAGE_IMAGEDATA_H_
|
#define TESSERACT_IMAGE_IMAGEDATA_H_
|
||||||
|
|
||||||
|
#include "image.h"
|
||||||
#include "points.h" // for FCOORD
|
#include "points.h" // for FCOORD
|
||||||
|
|
||||||
#include <mutex> // for std::mutex
|
#include <mutex> // for std::mutex
|
||||||
@ -62,7 +63,7 @@ class TESS_API ImageData {
|
|||||||
public:
|
public:
|
||||||
ImageData();
|
ImageData();
|
||||||
// Takes ownership of the pix.
|
// Takes ownership of the pix.
|
||||||
ImageData(bool vertical, Pix *pix);
|
ImageData(bool vertical, Image pix);
|
||||||
~ImageData();
|
~ImageData();
|
||||||
|
|
||||||
// Builds and returns an ImageData from the basic data. Note that imagedata,
|
// Builds and returns an ImageData from the basic data. Note that imagedata,
|
||||||
@ -115,16 +116,16 @@ public:
|
|||||||
// Saves the given Pix as a PNG-encoded string and destroys it.
|
// Saves the given Pix as a PNG-encoded string and destroys it.
|
||||||
// In case of missing PNG support in Leptonica use PNM format,
|
// In case of missing PNG support in Leptonica use PNM format,
|
||||||
// which requires more memory.
|
// which requires more memory.
|
||||||
void SetPix(Pix *pix);
|
void SetPix(Image pix);
|
||||||
// Returns the Pix image for *this. Must be pixDestroyed after use.
|
// Returns the Pix image for *this. Must be pixDestroyed after use.
|
||||||
Pix *GetPix() const;
|
Image GetPix() const;
|
||||||
// Gets anything and everything with a non-nullptr pointer, prescaled to a
|
// Gets anything and everything with a non-nullptr pointer, prescaled to a
|
||||||
// given target_height (if 0, then the original image height), and aligned.
|
// given target_height (if 0, then the original image height), and aligned.
|
||||||
// Also returns (if not nullptr) the width and height of the scaled image.
|
// Also returns (if not nullptr) the width and height of the scaled image.
|
||||||
// The return value is the scaled Pix, which must be pixDestroyed after use,
|
// The return value is the scaled Pix, which must be pixDestroyed after use,
|
||||||
// and scale_factor (if not nullptr) is set to the scale factor that was
|
// and scale_factor (if not nullptr) is set to the scale factor that was
|
||||||
// applied to the image to achieve the target_height.
|
// applied to the image to achieve the target_height.
|
||||||
Pix *PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
|
Image PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
|
||||||
int *scaled_height, std::vector<TBOX> *boxes) const;
|
int *scaled_height, std::vector<TBOX> *boxes) const;
|
||||||
|
|
||||||
int MemoryUsed() const;
|
int MemoryUsed() const;
|
||||||
@ -141,9 +142,9 @@ private:
|
|||||||
// Saves the given Pix as a PNG-encoded string and destroys it.
|
// Saves the given Pix as a PNG-encoded string and destroys it.
|
||||||
// In case of missing PNG support in Leptonica use PNM format,
|
// In case of missing PNG support in Leptonica use PNM format,
|
||||||
// which requires more memory.
|
// which requires more memory.
|
||||||
static void SetPixInternal(Pix *pix, std::vector<char> *image_data);
|
static void SetPixInternal(Image pix, std::vector<char> *image_data);
|
||||||
// Returns the Pix image for the image_data. Must be pixDestroyed after use.
|
// Returns the Pix image for the image_data. Must be pixDestroyed after use.
|
||||||
static Pix *GetPixInternal(const std::vector<char> &image_data);
|
static Image GetPixInternal(const std::vector<char> &image_data);
|
||||||
// Parses the text string as a box file and adds any discovered boxes that
|
// Parses the text string as a box file and adds any discovered boxes that
|
||||||
// match the page number. Returns false on error.
|
// match the page number. Returns false on error.
|
||||||
bool AddBoxes(const char *box_text);
|
bool AddBoxes(const char *box_text);
|
||||||
|
@ -19,8 +19,10 @@
|
|||||||
#ifndef NORMALIS_H
|
#ifndef NORMALIS_H
|
||||||
#define NORMALIS_H
|
#define NORMALIS_H
|
||||||
|
|
||||||
#include <vector>
|
#include "image.h"
|
||||||
|
|
||||||
#include <tesseract/export.h>
|
#include <tesseract/export.h>
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
struct Pix;
|
struct Pix;
|
||||||
@ -232,10 +234,10 @@ public:
|
|||||||
// Prints the content of the DENORM for debug purposes.
|
// Prints the content of the DENORM for debug purposes.
|
||||||
void Print() const;
|
void Print() const;
|
||||||
|
|
||||||
Pix *pix() const {
|
Image pix() const {
|
||||||
return pix_;
|
return pix_;
|
||||||
}
|
}
|
||||||
void set_pix(Pix *pix) {
|
void set_pix(Image pix) {
|
||||||
pix_ = pix;
|
pix_ = pix;
|
||||||
}
|
}
|
||||||
bool inverse() const {
|
bool inverse() const {
|
||||||
@ -274,7 +276,7 @@ private:
|
|||||||
void Init();
|
void Init();
|
||||||
|
|
||||||
// Best available image.
|
// Best available image.
|
||||||
Pix *pix_;
|
Image pix_;
|
||||||
// True if the source image is white-on-black.
|
// True if the source image is white-on-black.
|
||||||
bool inverse_;
|
bool inverse_;
|
||||||
// Block the word came from. If not null, block->re_rotation() takes the
|
// Block the word came from. If not null, block->re_rotation() takes the
|
||||||
|
@ -152,7 +152,7 @@ public:
|
|||||||
median_size_.set_y(y);
|
median_size_.set_y(y);
|
||||||
}
|
}
|
||||||
|
|
||||||
Pix *render_mask(TBOX *mask_box) {
|
Image render_mask(TBOX *mask_box) {
|
||||||
return pdblk.render_mask(re_rotation_, mask_box);
|
return pdblk.render_mask(re_rotation_, mask_box);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ namespace tesseract {
|
|||||||
// that there is no apparent foreground. At least one hi_value will not be -1.
|
// that there is no apparent foreground. At least one hi_value will not be -1.
|
||||||
// The return value is the number of channels in the input image, being
|
// The return value is the number of channels in the input image, being
|
||||||
// the size of the output thresholds and hi_values arrays.
|
// the size of the output thresholds and hi_values arrays.
|
||||||
int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height, std::vector<int> &thresholds,
|
int OtsuThreshold(Image src_pix, int left, int top, int width, int height, std::vector<int> &thresholds,
|
||||||
std::vector<int> &hi_values) {
|
std::vector<int> &hi_values) {
|
||||||
int num_channels = pixGetDepth(src_pix) / 8;
|
int num_channels = pixGetDepth(src_pix) / 8;
|
||||||
// Of all channels with no good hi_value, keep the best so we can always
|
// Of all channels with no good hi_value, keep the best so we can always
|
||||||
@ -143,7 +143,7 @@ int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height, std::v
|
|||||||
// single channel. Each channel is always one byte per pixel.
|
// single channel. Each channel is always one byte per pixel.
|
||||||
// Histogram is always a kHistogramSize(256) element array to count
|
// Histogram is always a kHistogramSize(256) element array to count
|
||||||
// occurrences of each pixel value.
|
// occurrences of each pixel value.
|
||||||
void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height,
|
void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height,
|
||||||
int *histogram) {
|
int *histogram) {
|
||||||
int num_channels = pixGetDepth(src_pix) / 8;
|
int num_channels = pixGetDepth(src_pix) / 8;
|
||||||
channel = ClipToRange(channel, 0, num_channels - 1);
|
channel = ClipToRange(channel, 0, num_channels - 1);
|
||||||
|
@ -19,6 +19,8 @@
|
|||||||
#ifndef TESSERACT_CCMAIN_OTSUTHR_H_
|
#ifndef TESSERACT_CCMAIN_OTSUTHR_H_
|
||||||
#define TESSERACT_CCMAIN_OTSUTHR_H_
|
#define TESSERACT_CCMAIN_OTSUTHR_H_
|
||||||
|
|
||||||
|
#include "image.h"
|
||||||
|
|
||||||
#include <vector> // for std::vector
|
#include <vector> // for std::vector
|
||||||
|
|
||||||
struct Pix;
|
struct Pix;
|
||||||
@ -35,7 +37,7 @@ const int kHistogramSize = 256; // The size of a histogram of pixel values.
|
|||||||
// that there is no apparent foreground. At least one hi_value will not be -1.
|
// that there is no apparent foreground. At least one hi_value will not be -1.
|
||||||
// The return value is the number of channels in the input image, being
|
// The return value is the number of channels in the input image, being
|
||||||
// the size of the output thresholds and hi_values arrays.
|
// the size of the output thresholds and hi_values arrays.
|
||||||
int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height,
|
int OtsuThreshold(Image src_pix, int left, int top, int width, int height,
|
||||||
std::vector<int> &thresholds,
|
std::vector<int> &thresholds,
|
||||||
std::vector<int> &hi_values);
|
std::vector<int> &hi_values);
|
||||||
|
|
||||||
@ -43,7 +45,7 @@ int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height,
|
|||||||
// single channel. Each channel is always one byte per pixel.
|
// single channel. Each channel is always one byte per pixel.
|
||||||
// Histogram is always a kHistogramSize(256) element array to count
|
// Histogram is always a kHistogramSize(256) element array to count
|
||||||
// occurrences of each pixel value.
|
// occurrences of each pixel value.
|
||||||
void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height,
|
void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height,
|
||||||
int *histogram);
|
int *histogram);
|
||||||
|
|
||||||
// Computes the Otsu threshold(s) for the given histogram.
|
// Computes the Otsu threshold(s) for the given histogram.
|
||||||
|
@ -304,7 +304,7 @@ void WERD_RES::InitForRetryRecognition(const WERD_RES &source) {
|
|||||||
// normalization scale and offset.
|
// normalization scale and offset.
|
||||||
// Returns false if the word is empty and sets up fake results.
|
// Returns false if the word is empty and sets up fake results.
|
||||||
bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tess,
|
bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tess,
|
||||||
Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
|
Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
|
||||||
bool use_body_size, bool allow_detailed_fx, ROW *row,
|
bool use_body_size, bool allow_detailed_fx, ROW *row,
|
||||||
const BLOCK *block) {
|
const BLOCK *block) {
|
||||||
auto norm_mode_hint = static_cast<tesseract::OcrEngineMode>(norm_mode);
|
auto norm_mode_hint = static_cast<tesseract::OcrEngineMode>(norm_mode);
|
||||||
|
@ -462,7 +462,7 @@ public:
|
|||||||
// but is declared as int for ease of use with tessedit_ocr_engine_mode.
|
// but is declared as int for ease of use with tessedit_ocr_engine_mode.
|
||||||
// Returns false if the word is empty and sets up fake results.
|
// Returns false if the word is empty and sets up fake results.
|
||||||
bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract,
|
bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract,
|
||||||
Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
|
Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
|
||||||
bool use_body_size, bool allow_detailed_fx, ROW *row,
|
bool use_body_size, bool allow_detailed_fx, ROW *row,
|
||||||
const BLOCK *block);
|
const BLOCK *block);
|
||||||
|
|
||||||
|
@ -134,10 +134,10 @@ void PDBLK::move( // reposition block
|
|||||||
|
|
||||||
// Returns a binary Pix mask with a 1 pixel for every pixel within the
|
// Returns a binary Pix mask with a 1 pixel for every pixel within the
|
||||||
// block. Rotates the coordinate system by rerotation prior to rendering.
|
// block. Rotates the coordinate system by rerotation prior to rendering.
|
||||||
Pix *PDBLK::render_mask(const FCOORD &rerotation, TBOX *mask_box) {
|
Image PDBLK::render_mask(const FCOORD &rerotation, TBOX *mask_box) {
|
||||||
TBOX rotated_box(box);
|
TBOX rotated_box(box);
|
||||||
rotated_box.rotate(rerotation);
|
rotated_box.rotate(rerotation);
|
||||||
Pix *pix = pixCreate(rotated_box.width(), rotated_box.height(), 1);
|
Image pix = pixCreate(rotated_box.width(), rotated_box.height(), 1);
|
||||||
if (hand_poly != nullptr) {
|
if (hand_poly != nullptr) {
|
||||||
// We are going to rotate, so get a deep copy of the points and
|
// We are going to rotate, so get a deep copy of the points and
|
||||||
// make a new POLY_BLOCK with it.
|
// make a new POLY_BLOCK with it.
|
||||||
|
@ -91,7 +91,7 @@ public:
|
|||||||
// block. Rotates the coordinate system by rerotation prior to rendering.
|
// block. Rotates the coordinate system by rerotation prior to rendering.
|
||||||
// If not nullptr, mask_box is filled with the position box of the returned
|
// If not nullptr, mask_box is filled with the position box of the returned
|
||||||
// mask image.
|
// mask image.
|
||||||
Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box);
|
Image render_mask(const FCOORD &rerotation, TBOX *mask_box);
|
||||||
|
|
||||||
#ifndef GRAPHICS_DISABLED
|
#ifndef GRAPHICS_DISABLED
|
||||||
/// draw histogram
|
/// draw histogram
|
||||||
|
@ -362,7 +362,7 @@ void QSPLINE::plot( // draw it
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void QSPLINE::plot(Pix *pix) const {
|
void QSPLINE::plot(Image pix) const {
|
||||||
if (pix == nullptr) {
|
if (pix == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -82,7 +82,7 @@ public:
|
|||||||
|
|
||||||
// Paint the baseline over pix. If pix has depth of 32, then the line will
|
// Paint the baseline over pix. If pix has depth of 32, then the line will
|
||||||
// be painted in red. Otherwise it will be painted in black.
|
// be painted in red. Otherwise it will be painted in black.
|
||||||
void plot(Pix *pix) const;
|
void plot(Image pix) const;
|
||||||
|
|
||||||
QSPLINE &operator=(const QSPLINE &source); // from this
|
QSPLINE &operator=(const QSPLINE &source); // from this
|
||||||
|
|
||||||
|
@ -388,7 +388,7 @@ void C_BLOB::rotate(const FCOORD &rotation) {
|
|||||||
|
|
||||||
// Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the
|
// Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the
|
||||||
// outline list and its children.
|
// outline list and its children.
|
||||||
static void ComputeEdgeOffsetsOutlineList(int threshold, Pix *pix, C_OUTLINE_LIST *list) {
|
static void ComputeEdgeOffsetsOutlineList(int threshold, Image pix, C_OUTLINE_LIST *list) {
|
||||||
C_OUTLINE_IT it(list);
|
C_OUTLINE_IT it(list);
|
||||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||||
C_OUTLINE *outline = it.data();
|
C_OUTLINE *outline = it.data();
|
||||||
@ -405,7 +405,7 @@ static void ComputeEdgeOffsetsOutlineList(int threshold, Pix *pix, C_OUTLINE_LIS
|
|||||||
|
|
||||||
// Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
|
// Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
|
||||||
// if the supplied pix is 8-bit or the binary edges if nullptr.
|
// if the supplied pix is 8-bit or the binary edges if nullptr.
|
||||||
void C_BLOB::ComputeEdgeOffsets(int threshold, Pix *pix) {
|
void C_BLOB::ComputeEdgeOffsets(int threshold, Image pix) {
|
||||||
ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines);
|
ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -491,7 +491,7 @@ int16_t C_BLOB::EstimateBaselinePosition() {
|
|||||||
return best_min == box.top() ? bottom : best_min;
|
return best_min == box.top() ? bottom : best_min;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Pix *pix) {
|
static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Image pix) {
|
||||||
C_OUTLINE_IT it(list);
|
C_OUTLINE_IT it(list);
|
||||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||||
C_OUTLINE *outline = it.data();
|
C_OUTLINE *outline = it.data();
|
||||||
@ -502,7 +502,7 @@ static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Pix *pi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top, Pix *pix) {
|
static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top, Image pix) {
|
||||||
C_OUTLINE_IT it(list);
|
C_OUTLINE_IT it(list);
|
||||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||||
C_OUTLINE *outline = it.data();
|
C_OUTLINE *outline = it.data();
|
||||||
@ -511,18 +511,18 @@ static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Returns a Pix rendering of the blob. pixDestroy after use.
|
// Returns a Pix rendering of the blob. pixDestroy after use.
|
||||||
Pix *C_BLOB::render() {
|
Image C_BLOB::render() {
|
||||||
TBOX box = bounding_box();
|
TBOX box = bounding_box();
|
||||||
Pix *pix = pixCreate(box.width(), box.height(), 1);
|
Image pix = pixCreate(box.width(), box.height(), 1);
|
||||||
render_outline_list(&outlines, box.left(), box.top(), pix);
|
render_outline_list(&outlines, box.left(), box.top(), pix);
|
||||||
return pix;
|
return pix;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a Pix rendering of the outline of the blob. (no fill).
|
// Returns a Pix rendering of the outline of the blob. (no fill).
|
||||||
// pixDestroy after use.
|
// pixDestroy after use.
|
||||||
Pix *C_BLOB::render_outline() {
|
Image C_BLOB::render_outline() {
|
||||||
TBOX box = bounding_box();
|
TBOX box = bounding_box();
|
||||||
Pix *pix = pixCreate(box.width(), box.height(), 1);
|
Image pix = pixCreate(box.width(), box.height(), 1);
|
||||||
render_outline_list_outline(&outlines, box.left(), box.top(), pix);
|
render_outline_list_outline(&outlines, box.left(), box.top(), pix);
|
||||||
return pix;
|
return pix;
|
||||||
}
|
}
|
||||||
|
@ -83,17 +83,17 @@ public:
|
|||||||
|
|
||||||
// Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
|
// Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
|
||||||
// if the supplied pix is 8-bit or the binary edges if nullptr.
|
// if the supplied pix is 8-bit or the binary edges if nullptr.
|
||||||
void ComputeEdgeOffsets(int threshold, Pix *pix);
|
void ComputeEdgeOffsets(int threshold, Image pix);
|
||||||
|
|
||||||
// Estimates and returns the baseline position based on the shape of the
|
// Estimates and returns the baseline position based on the shape of the
|
||||||
// outlines.
|
// outlines.
|
||||||
int16_t EstimateBaselinePosition();
|
int16_t EstimateBaselinePosition();
|
||||||
|
|
||||||
// Returns a Pix rendering of the blob. pixDestroy after use.
|
// Returns a Pix rendering of the blob. pixDestroy after use.
|
||||||
Pix *render();
|
Image render();
|
||||||
// Returns a Pix rendering of the outline of the blob. (no fill).
|
// Returns a Pix rendering of the outline of the blob. (no fill).
|
||||||
// pixDestroy after use.
|
// pixDestroy after use.
|
||||||
Pix *render_outline();
|
Image render_outline();
|
||||||
|
|
||||||
#ifndef GRAPHICS_DISABLED
|
#ifndef GRAPHICS_DISABLED
|
||||||
void plot( // draw one
|
void plot( // draw one
|
||||||
|
@ -36,7 +36,7 @@ namespace tesseract {
|
|||||||
// Classifies the given [training] sample, writing to results.
|
// Classifies the given [training] sample, writing to results.
|
||||||
// See shapeclassifier.h for a full description.
|
// See shapeclassifier.h for a full description.
|
||||||
// Default implementation calls the ShapeRating version.
|
// Default implementation calls the ShapeRating version.
|
||||||
int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug,
|
int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug,
|
||||||
UNICHAR_ID keep_this,
|
UNICHAR_ID keep_this,
|
||||||
std::vector<UnicharRating> *results) {
|
std::vector<UnicharRating> *results) {
|
||||||
results->clear();
|
results->clear();
|
||||||
@ -54,7 +54,7 @@ int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Pix *pa
|
|||||||
// Classifies the given [training] sample, writing to results.
|
// Classifies the given [training] sample, writing to results.
|
||||||
// See shapeclassifier.h for a full description.
|
// See shapeclassifier.h for a full description.
|
||||||
// Default implementation aborts.
|
// Default implementation aborts.
|
||||||
int ShapeClassifier::ClassifySample(const TrainingSample &sample, Pix *page_pix, int debug,
|
int ShapeClassifier::ClassifySample(const TrainingSample &sample, Image page_pix, int debug,
|
||||||
int keep_this, std::vector<ShapeRating> *results) {
|
int keep_this, std::vector<ShapeRating> *results) {
|
||||||
ASSERT_HOST("Must implement ClassifySample!" == nullptr);
|
ASSERT_HOST("Must implement ClassifySample!" == nullptr);
|
||||||
return 0;
|
return 0;
|
||||||
@ -64,7 +64,7 @@ int ShapeClassifier::ClassifySample(const TrainingSample &sample, Pix *page_pix,
|
|||||||
// If result is not nullptr, it is set with the shape_id and rating.
|
// If result is not nullptr, it is set with the shape_id and rating.
|
||||||
// Does not need to be overridden if ClassifySample respects the keep_this
|
// Does not need to be overridden if ClassifySample respects the keep_this
|
||||||
// rule.
|
// rule.
|
||||||
int ShapeClassifier::BestShapeForUnichar(const TrainingSample &sample, Pix *page_pix,
|
int ShapeClassifier::BestShapeForUnichar(const TrainingSample &sample, Image page_pix,
|
||||||
UNICHAR_ID unichar_id, ShapeRating *result) {
|
UNICHAR_ID unichar_id, ShapeRating *result) {
|
||||||
std::vector<ShapeRating> results;
|
std::vector<ShapeRating> results;
|
||||||
const ShapeTable *shapes = GetShapeTable();
|
const ShapeTable *shapes = GetShapeTable();
|
||||||
@ -93,7 +93,7 @@ const UNICHARSET &ShapeClassifier::GetUnicharset() const {
|
|||||||
// the user has finished with debugging the sample.
|
// the user has finished with debugging the sample.
|
||||||
// Probably doesn't need to be overridden if the subclass provides
|
// Probably doesn't need to be overridden if the subclass provides
|
||||||
// DisplayClassifyAs.
|
// DisplayClassifyAs.
|
||||||
void ShapeClassifier::DebugDisplay(const TrainingSample &sample, Pix *page_pix,
|
void ShapeClassifier::DebugDisplay(const TrainingSample &sample, Image page_pix,
|
||||||
UNICHAR_ID unichar_id) {
|
UNICHAR_ID unichar_id) {
|
||||||
static ScrollView *terminator = nullptr;
|
static ScrollView *terminator = nullptr;
|
||||||
if (terminator == nullptr) {
|
if (terminator == nullptr) {
|
||||||
@ -159,7 +159,7 @@ void ShapeClassifier::DebugDisplay(const TrainingSample &sample, Pix *page_pix,
|
|||||||
// windows to the windows output and returns a new index that may be used
|
// windows to the windows output and returns a new index that may be used
|
||||||
// by any subsequent classifiers. Caller waits for the user to view and
|
// by any subsequent classifiers. Caller waits for the user to view and
|
||||||
// then destroys the windows by clearing the vector.
|
// then destroys the windows by clearing the vector.
|
||||||
int ShapeClassifier::DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix,
|
int ShapeClassifier::DisplayClassifyAs(const TrainingSample &sample, Image page_pix,
|
||||||
UNICHAR_ID unichar_id, int index,
|
UNICHAR_ID unichar_id, int index,
|
||||||
std::vector<ScrollView *> &windows) {
|
std::vector<ScrollView *> &windows) {
|
||||||
// Does nothing in the default implementation.
|
// Does nothing in the default implementation.
|
||||||
|
@ -20,7 +20,10 @@
|
|||||||
#ifndef TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
|
#ifndef TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
|
||||||
#define TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
|
#define TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
|
||||||
|
|
||||||
|
#include "image.h"
|
||||||
|
|
||||||
#include <tesseract/unichar.h>
|
#include <tesseract/unichar.h>
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
struct Pix;
|
struct Pix;
|
||||||
@ -61,11 +64,11 @@ public:
|
|||||||
// classifiers.
|
// classifiers.
|
||||||
// NOTE: Neither overload of ClassifySample is pure, but at least one must
|
// NOTE: Neither overload of ClassifySample is pure, but at least one must
|
||||||
// be overridden by a classifier in order for it to do anything.
|
// be overridden by a classifier in order for it to do anything.
|
||||||
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug,
|
virtual int UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug,
|
||||||
UNICHAR_ID keep_this, std::vector<UnicharRating> *results);
|
UNICHAR_ID keep_this, std::vector<UnicharRating> *results);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual int ClassifySample(const TrainingSample &sample, Pix *page_pix, int debug,
|
virtual int ClassifySample(const TrainingSample &sample, Image page_pix, int debug,
|
||||||
UNICHAR_ID keep_this, std::vector<ShapeRating> *results);
|
UNICHAR_ID keep_this, std::vector<ShapeRating> *results);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -74,7 +77,7 @@ public:
|
|||||||
// Returns -1 if ClassifySample fails to provide any result containing
|
// Returns -1 if ClassifySample fails to provide any result containing
|
||||||
// unichar_id. BestShapeForUnichar does not need to be overridden if
|
// unichar_id. BestShapeForUnichar does not need to be overridden if
|
||||||
// ClassifySample respects the keep_this rule.
|
// ClassifySample respects the keep_this rule.
|
||||||
virtual int BestShapeForUnichar(const TrainingSample &sample, Pix *page_pix,
|
virtual int BestShapeForUnichar(const TrainingSample &sample, Image page_pix,
|
||||||
UNICHAR_ID unichar_id, ShapeRating *result);
|
UNICHAR_ID unichar_id, ShapeRating *result);
|
||||||
|
|
||||||
// Provides access to the ShapeTable that this classifier works with.
|
// Provides access to the ShapeTable that this classifier works with.
|
||||||
@ -88,14 +91,14 @@ public:
|
|||||||
// the user has finished with debugging the sample.
|
// the user has finished with debugging the sample.
|
||||||
// Probably doesn't need to be overridden if the subclass provides
|
// Probably doesn't need to be overridden if the subclass provides
|
||||||
// DisplayClassifyAs.
|
// DisplayClassifyAs.
|
||||||
void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id);
|
void DebugDisplay(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id);
|
||||||
|
|
||||||
// Displays classification as the given unichar_id. Creates as many windows
|
// Displays classification as the given unichar_id. Creates as many windows
|
||||||
// as it feels fit, using index as a guide for placement. Adds any created
|
// as it feels fit, using index as a guide for placement. Adds any created
|
||||||
// windows to the windows output and returns a new index that may be used
|
// windows to the windows output and returns a new index that may be used
|
||||||
// by any subsequent classifiers. Caller waits for the user to view and
|
// by any subsequent classifiers. Caller waits for the user to view and
|
||||||
// then destroys the windows by clearing the vector.
|
// then destroys the windows by clearing the vector.
|
||||||
virtual int DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id,
|
virtual int DisplayClassifyAs(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id,
|
||||||
int index, std::vector<ScrollView *> &windows);
|
int index, std::vector<ScrollView *> &windows);
|
||||||
|
|
||||||
// Prints debug information on the results. context is some introductory/title
|
// Prints debug information on the results. context is some introductory/title
|
||||||
|
@ -25,7 +25,7 @@ namespace tesseract {
|
|||||||
|
|
||||||
// Classifies the given [training] sample, writing to results.
|
// Classifies the given [training] sample, writing to results.
|
||||||
// See ShapeClassifier for a full description.
|
// See ShapeClassifier for a full description.
|
||||||
int TessClassifier::UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug,
|
int TessClassifier::UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug,
|
||||||
UNICHAR_ID keep_this,
|
UNICHAR_ID keep_this,
|
||||||
std::vector<UnicharRating> *results) {
|
std::vector<UnicharRating> *results) {
|
||||||
const int old_matcher_level = classify_->matcher_debug_level;
|
const int old_matcher_level = classify_->matcher_debug_level;
|
||||||
@ -62,7 +62,7 @@ const UNICHARSET &TessClassifier::GetUnicharset() const {
|
|||||||
// windows to the windows output and returns a new index that may be used
|
// windows to the windows output and returns a new index that may be used
|
||||||
// by any subsequent classifiers. Caller waits for the user to view and
|
// by any subsequent classifiers. Caller waits for the user to view and
|
||||||
// then destroys the windows by clearing the vector.
|
// then destroys the windows by clearing the vector.
|
||||||
int TessClassifier::DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix, int unichar_id,
|
int TessClassifier::DisplayClassifyAs(const TrainingSample &sample, Image page_pix, int unichar_id,
|
||||||
int index, std::vector<ScrollView *> &windows) {
|
int index, std::vector<ScrollView *> &windows) {
|
||||||
int shape_id = unichar_id;
|
int shape_id = unichar_id;
|
||||||
// TODO(rays) Fix this so it works with both flat and real shapetables.
|
// TODO(rays) Fix this so it works with both flat and real shapetables.
|
||||||
|
@ -40,7 +40,7 @@ public:
|
|||||||
|
|
||||||
// Classifies the given [training] sample, writing to results.
|
// Classifies the given [training] sample, writing to results.
|
||||||
// See ShapeClassifier for a full description.
|
// See ShapeClassifier for a full description.
|
||||||
int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug,
|
int UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug,
|
||||||
UNICHAR_ID keep_this, std::vector<UnicharRating> *results) override;
|
UNICHAR_ID keep_this, std::vector<UnicharRating> *results) override;
|
||||||
// Provides access to the ShapeTable that this classifier works with.
|
// Provides access to the ShapeTable that this classifier works with.
|
||||||
const ShapeTable *GetShapeTable() const override;
|
const ShapeTable *GetShapeTable() const override;
|
||||||
@ -53,7 +53,7 @@ public:
|
|||||||
// windows to the windows output and returns a new index that may be used
|
// windows to the windows output and returns a new index that may be used
|
||||||
// by any subsequent classifiers. Caller waits for the user to view and
|
// by any subsequent classifiers. Caller waits for the user to view and
|
||||||
// then destroys the windows by clearing the vector.
|
// then destroys the windows by clearing the vector.
|
||||||
int DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix, int unichar_id, int index,
|
int DisplayClassifyAs(const TrainingSample &sample, Image page_pix, int unichar_id, int index,
|
||||||
std::vector<ScrollView *> &windows) override;
|
std::vector<ScrollView *> &windows) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -305,8 +305,8 @@ void TrainingSample::IndexFeatures(const IntFeatureSpace &feature_space) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Returns a pix representing the sample. (Int features only.)
|
// Returns a pix representing the sample. (Int features only.)
|
||||||
Pix *TrainingSample::RenderToPix(const UNICHARSET *unicharset) const {
|
Image TrainingSample::RenderToPix(const UNICHARSET *unicharset) const {
|
||||||
Pix *pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
|
Image pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
|
||||||
for (uint32_t f = 0; f < num_features_; ++f) {
|
for (uint32_t f = 0; f < num_features_; ++f) {
|
||||||
int start_x = features_[f].X;
|
int start_x = features_[f].X;
|
||||||
int start_y = kIntFeatureExtent - features_[f].Y;
|
int start_y = kIntFeatureExtent - features_[f].Y;
|
||||||
@ -341,7 +341,7 @@ void TrainingSample::DisplayFeatures(ScrollView::Color color, ScrollView *window
|
|||||||
// by padding wherever possible.
|
// by padding wherever possible.
|
||||||
// The returned Pix must be pixDestroyed after use.
|
// The returned Pix must be pixDestroyed after use.
|
||||||
// If the input page_pix is nullptr, nullptr is returned.
|
// If the input page_pix is nullptr, nullptr is returned.
|
||||||
Pix *TrainingSample::GetSamplePix(int padding, Pix *page_pix) const {
|
Image TrainingSample::GetSamplePix(int padding, Image page_pix) const {
|
||||||
if (page_pix == nullptr) {
|
if (page_pix == nullptr) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -354,7 +354,7 @@ Pix *TrainingSample::GetSamplePix(int padding, Pix *page_pix) const {
|
|||||||
padded_box &= page_box;
|
padded_box &= page_box;
|
||||||
Box *box =
|
Box *box =
|
||||||
boxCreate(page_box.left(), page_height - page_box.top(), page_box.width(), page_box.height());
|
boxCreate(page_box.left(), page_height - page_box.top(), page_box.width(), page_box.height());
|
||||||
Pix *sample_pix = pixClipRectangle(page_pix, box, nullptr);
|
Image sample_pix = pixClipRectangle(page_pix, box, nullptr);
|
||||||
boxDestroy(&box);
|
boxDestroy(&box);
|
||||||
return sample_pix;
|
return sample_pix;
|
||||||
}
|
}
|
||||||
|
@ -104,7 +104,7 @@ public:
|
|||||||
void IndexFeatures(const IntFeatureSpace &feature_space);
|
void IndexFeatures(const IntFeatureSpace &feature_space);
|
||||||
|
|
||||||
// Returns a pix representing the sample. (Int features only.)
|
// Returns a pix representing the sample. (Int features only.)
|
||||||
Pix *RenderToPix(const UNICHARSET *unicharset) const;
|
Image RenderToPix(const UNICHARSET *unicharset) const;
|
||||||
// Displays the features in the given window with the given color.
|
// Displays the features in the given window with the given color.
|
||||||
void DisplayFeatures(ScrollView::Color color, ScrollView *window) const;
|
void DisplayFeatures(ScrollView::Color color, ScrollView *window) const;
|
||||||
|
|
||||||
@ -112,7 +112,7 @@ public:
|
|||||||
// by padding wherever possible.
|
// by padding wherever possible.
|
||||||
// The returned Pix must be pixDestroyed after use.
|
// The returned Pix must be pixDestroyed after use.
|
||||||
// If the input page_pix is nullptr, nullptr is returned.
|
// If the input page_pix is nullptr, nullptr is returned.
|
||||||
Pix *GetSamplePix(int padding, Pix *page_pix) const;
|
Image GetSamplePix(int padding, Image page_pix) const;
|
||||||
|
|
||||||
// Accessors.
|
// Accessors.
|
||||||
UNICHAR_ID class_id() const {
|
UNICHAR_ID class_id() const {
|
||||||
|
@ -78,12 +78,12 @@ bool Input::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *sc
|
|||||||
// image_data. If non-null, *image_scale returns the image scale factor used.
|
// image_data. If non-null, *image_scale returns the image scale factor used.
|
||||||
// Returns nullptr on error.
|
// Returns nullptr on error.
|
||||||
/* static */
|
/* static */
|
||||||
Pix *Input::PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width,
|
Image Input::PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width,
|
||||||
TRand *randomizer, float *image_scale) {
|
TRand *randomizer, float *image_scale) {
|
||||||
// Note that NumInputs() is defined as input image height.
|
// Note that NumInputs() is defined as input image height.
|
||||||
int target_height = network->NumInputs();
|
int target_height = network->NumInputs();
|
||||||
int width, height;
|
int width, height;
|
||||||
Pix *pix =
|
Image pix =
|
||||||
image_data.PreScale(target_height, kMaxInputHeight, image_scale, &width, &height, nullptr);
|
image_data.PreScale(target_height, kMaxInputHeight, image_scale, &width, &height, nullptr);
|
||||||
if (pix == nullptr) {
|
if (pix == nullptr) {
|
||||||
tprintf("Bad pix from ImageData!\n");
|
tprintf("Bad pix from ImageData!\n");
|
||||||
@ -91,7 +91,7 @@ Pix *Input::PrepareLSTMInputs(const ImageData &image_data, const Network *networ
|
|||||||
}
|
}
|
||||||
if (width < min_width || height < min_width) {
|
if (width < min_width || height < min_width) {
|
||||||
tprintf("Image too small to scale!! (%dx%d vs min width of %d)\n", width, height, min_width);
|
tprintf("Image too small to scale!! (%dx%d vs min width of %d)\n", width, height, min_width);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
return pix;
|
return pix;
|
||||||
@ -104,12 +104,12 @@ Pix *Input::PrepareLSTMInputs(const ImageData &image_data, const Network *networ
|
|||||||
// height == 1. If height == 0 then no scaling.
|
// height == 1. If height == 0 then no scaling.
|
||||||
// NOTE: It isn't safe for multiple threads to call this on the same pix.
|
// NOTE: It isn't safe for multiple threads to call this on the same pix.
|
||||||
/* static */
|
/* static */
|
||||||
void Input::PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer,
|
void Input::PreparePixInput(const StaticShape &shape, const Image pix, TRand *randomizer,
|
||||||
NetworkIO *input) {
|
NetworkIO *input) {
|
||||||
bool color = shape.depth() == 3;
|
bool color = shape.depth() == 3;
|
||||||
Pix *var_pix = const_cast<Pix *>(pix);
|
Image var_pix = pix;
|
||||||
int depth = pixGetDepth(var_pix);
|
int depth = pixGetDepth(var_pix);
|
||||||
Pix *normed_pix = nullptr;
|
Image normed_pix = nullptr;
|
||||||
// On input to BaseAPI, an image is forced to be 1, 8 or 24 bit, without
|
// On input to BaseAPI, an image is forced to be 1, 8 or 24 bit, without
|
||||||
// colormap, so we just have to deal with depth conversion here.
|
// colormap, so we just have to deal with depth conversion here.
|
||||||
if (color) {
|
if (color) {
|
||||||
@ -135,12 +135,12 @@ void Input::PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *ran
|
|||||||
if (target_height != 0 && target_height != height) {
|
if (target_height != 0 && target_height != height) {
|
||||||
// Get the scaled image.
|
// Get the scaled image.
|
||||||
float im_factor = static_cast<float>(target_height) / height;
|
float im_factor = static_cast<float>(target_height) / height;
|
||||||
Pix *scaled_pix = pixScale(normed_pix, im_factor, im_factor);
|
Image scaled_pix = pixScale(normed_pix, im_factor, im_factor);
|
||||||
pixDestroy(&normed_pix);
|
normed_pix.destroy();
|
||||||
normed_pix = scaled_pix;
|
normed_pix = scaled_pix;
|
||||||
}
|
}
|
||||||
input->FromPix(shape, normed_pix, randomizer);
|
input->FromPix(shape, normed_pix, randomizer);
|
||||||
pixDestroy(&normed_pix);
|
normed_pix.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace tesseract.
|
} // namespace tesseract.
|
||||||
|
@ -77,7 +77,7 @@ public:
|
|||||||
// image_data. If non-null, *image_scale returns the image scale factor used.
|
// image_data. If non-null, *image_scale returns the image scale factor used.
|
||||||
// Returns nullptr on error.
|
// Returns nullptr on error.
|
||||||
/* static */
|
/* static */
|
||||||
static Pix *PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width,
|
static Image PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width,
|
||||||
TRand *randomizer, float *image_scale);
|
TRand *randomizer, float *image_scale);
|
||||||
// Converts the given pix to a NetworkIO of height and depth appropriate to
|
// Converts the given pix to a NetworkIO of height and depth appropriate to
|
||||||
// the given StaticShape:
|
// the given StaticShape:
|
||||||
@ -85,7 +85,7 @@ public:
|
|||||||
// Scale to target height, if the shape's height is > 1, or its depth if the
|
// Scale to target height, if the shape's height is > 1, or its depth if the
|
||||||
// height == 1. If height == 0 then no scaling.
|
// height == 1. If height == 0 then no scaling.
|
||||||
// NOTE: It isn't safe for multiple threads to call this on the same pix.
|
// NOTE: It isn't safe for multiple threads to call this on the same pix.
|
||||||
static void PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer,
|
static void PreparePixInput(const StaticShape &shape, const Image pix, TRand *randomizer,
|
||||||
NetworkIO *input);
|
NetworkIO *input);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -321,7 +321,7 @@ bool LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo
|
|||||||
// This ensures consistent recognition results.
|
// This ensures consistent recognition results.
|
||||||
SetRandomSeed();
|
SetRandomSeed();
|
||||||
int min_width = network_->XScaleFactor();
|
int min_width = network_->XScaleFactor();
|
||||||
Pix *pix = Input::PrepareLSTMInputs(image_data, network_, min_width, &randomizer_, scale_factor);
|
Image pix = Input::PrepareLSTMInputs(image_data, network_, min_width, &randomizer_, scale_factor);
|
||||||
if (pix == nullptr) {
|
if (pix == nullptr) {
|
||||||
tprintf("Line cannot be recognized!!\n");
|
tprintf("Line cannot be recognized!!\n");
|
||||||
return false;
|
return false;
|
||||||
@ -330,7 +330,7 @@ bool LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo
|
|||||||
const int kMaxImageWidth = 128 * pixGetHeight(pix);
|
const int kMaxImageWidth = 128 * pixGetHeight(pix);
|
||||||
if (network_->IsTraining() && pixGetWidth(pix) > kMaxImageWidth) {
|
if (network_->IsTraining() && pixGetWidth(pix) > kMaxImageWidth) {
|
||||||
tprintf("Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix), pixGetHeight(pix));
|
tprintf("Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix), pixGetHeight(pix));
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (upside_down) {
|
if (upside_down) {
|
||||||
@ -370,7 +370,7 @@ bool LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo
|
|||||||
network_->Forward(debug, *inputs, nullptr, &scratch_space_, outputs);
|
network_->Forward(debug, *inputs, nullptr, &scratch_space_, outputs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
if (debug) {
|
if (debug) {
|
||||||
std::vector<int> labels, coords;
|
std::vector<int> labels, coords;
|
||||||
LabelsFromOutputs(*outputs, &labels, &coords);
|
LabelsFromOutputs(*outputs, &labels, &coords);
|
||||||
@ -404,7 +404,7 @@ std::string LSTMRecognizer::DecodeLabels(const std::vector<int> &labels) {
|
|||||||
void LSTMRecognizer::DisplayForward(const NetworkIO &inputs, const std::vector<int> &labels,
|
void LSTMRecognizer::DisplayForward(const NetworkIO &inputs, const std::vector<int> &labels,
|
||||||
const std::vector<int> &label_coords, const char *window_name,
|
const std::vector<int> &label_coords, const char *window_name,
|
||||||
ScrollView **window) {
|
ScrollView **window) {
|
||||||
Pix *input_pix = inputs.ToPix();
|
Image input_pix = inputs.ToPix();
|
||||||
Network::ClearWindow(false, window_name, pixGetWidth(input_pix), pixGetHeight(input_pix), window);
|
Network::ClearWindow(false, window_name, pixGetWidth(input_pix), pixGetHeight(input_pix), window);
|
||||||
int line_height = Network::DisplayImage(input_pix, *window);
|
int line_height = Network::DisplayImage(input_pix, *window);
|
||||||
DisplayLSTMOutput(labels, label_coords, line_height, *window);
|
DisplayLSTMOutput(labels, label_coords, line_height, *window);
|
||||||
|
@ -327,7 +327,7 @@ double Network::Random(double range) {
|
|||||||
// === Debug image display methods. ===
|
// === Debug image display methods. ===
|
||||||
// Displays the image of the matrix to the forward window.
|
// Displays the image of the matrix to the forward window.
|
||||||
void Network::DisplayForward(const NetworkIO &matrix) {
|
void Network::DisplayForward(const NetworkIO &matrix) {
|
||||||
Pix *image = matrix.ToPix();
|
Image image = matrix.ToPix();
|
||||||
ClearWindow(false, name_.c_str(), pixGetWidth(image), pixGetHeight(image), &forward_win_);
|
ClearWindow(false, name_.c_str(), pixGetWidth(image), pixGetHeight(image), &forward_win_);
|
||||||
DisplayImage(image, forward_win_);
|
DisplayImage(image, forward_win_);
|
||||||
forward_win_->Update();
|
forward_win_->Update();
|
||||||
@ -335,7 +335,7 @@ void Network::DisplayForward(const NetworkIO &matrix) {
|
|||||||
|
|
||||||
// Displays the image of the matrix to the backward window.
|
// Displays the image of the matrix to the backward window.
|
||||||
void Network::DisplayBackward(const NetworkIO &matrix) {
|
void Network::DisplayBackward(const NetworkIO &matrix) {
|
||||||
Pix *image = matrix.ToPix();
|
Image image = matrix.ToPix();
|
||||||
std::string window_name = name_ + "-back";
|
std::string window_name = name_ + "-back";
|
||||||
ClearWindow(false, window_name.c_str(), pixGetWidth(image), pixGetHeight(image), &backward_win_);
|
ClearWindow(false, window_name.c_str(), pixGetWidth(image), pixGetHeight(image), &backward_win_);
|
||||||
DisplayImage(image, backward_win_);
|
DisplayImage(image, backward_win_);
|
||||||
@ -371,10 +371,10 @@ void Network::ClearWindow(bool tess_coords, const char *window_name, int width,
|
|||||||
|
|
||||||
// Displays the pix in the given window. and returns the height of the pix.
|
// Displays the pix in the given window. and returns the height of the pix.
|
||||||
// The pix is pixDestroyed.
|
// The pix is pixDestroyed.
|
||||||
int Network::DisplayImage(Pix *pix, ScrollView *window) {
|
int Network::DisplayImage(Image pix, ScrollView *window) {
|
||||||
int height = pixGetHeight(pix);
|
int height = pixGetHeight(pix);
|
||||||
window->Image(pix, 0, 0);
|
window->Image(pix, 0, 0);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
return height;
|
return height;
|
||||||
}
|
}
|
||||||
#endif // !GRAPHICS_DISABLED
|
#endif // !GRAPHICS_DISABLED
|
||||||
|
@ -283,7 +283,7 @@ public:
|
|||||||
|
|
||||||
// Displays the pix in the given window. and returns the height of the pix.
|
// Displays the pix in the given window. and returns the height of the pix.
|
||||||
// The pix is pixDestroyed.
|
// The pix is pixDestroyed.
|
||||||
static int DisplayImage(Pix *pix, ScrollView *window);
|
static int DisplayImage(Image pix, ScrollView *window);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// Returns a random number in [-range, range].
|
// Returns a random number in [-range, range].
|
||||||
|
@ -123,7 +123,7 @@ void NetworkIO::ZeroInvalidElements() {
|
|||||||
// of text, so a horizontal line through the middle of the image passes through
|
// of text, so a horizontal line through the middle of the image passes through
|
||||||
// at least some of it, so local minima and maxima are a good proxy for black
|
// at least some of it, so local minima and maxima are a good proxy for black
|
||||||
// and white pixel samples.
|
// and white pixel samples.
|
||||||
static void ComputeBlackWhite(Pix *pix, float *black, float *white) {
|
static void ComputeBlackWhite(Image pix, float *black, float *white) {
|
||||||
int width = pixGetWidth(pix);
|
int width = pixGetWidth(pix);
|
||||||
int height = pixGetHeight(pix);
|
int height = pixGetHeight(pix);
|
||||||
STATS mins(0, 256), maxes(0, 256);
|
STATS mins(0, 256), maxes(0, 256);
|
||||||
@ -159,21 +159,21 @@ static void ComputeBlackWhite(Pix *pix, float *black, float *white) {
|
|||||||
// Sets up the array from the given image, using the currently set int_mode_.
|
// Sets up the array from the given image, using the currently set int_mode_.
|
||||||
// If the image width doesn't match the shape, the image is truncated or padded
|
// If the image width doesn't match the shape, the image is truncated or padded
|
||||||
// with noise to match.
|
// with noise to match.
|
||||||
void NetworkIO::FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer) {
|
void NetworkIO::FromPix(const StaticShape &shape, const Image pix, TRand *randomizer) {
|
||||||
std::vector<const Pix *> pixes(1, pix);
|
std::vector<Image> pixes(1, pix);
|
||||||
FromPixes(shape, pixes, randomizer);
|
FromPixes(shape, pixes, randomizer);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sets up the array from the given set of images, using the currently set
|
// Sets up the array from the given set of images, using the currently set
|
||||||
// int_mode_. If the image width doesn't match the shape, the images are
|
// int_mode_. If the image width doesn't match the shape, the images are
|
||||||
// truncated or padded with noise to match.
|
// truncated or padded with noise to match.
|
||||||
void NetworkIO::FromPixes(const StaticShape &shape, const std::vector<const Pix *> &pixes,
|
void NetworkIO::FromPixes(const StaticShape &shape, const std::vector<Image> &pixes,
|
||||||
TRand *randomizer) {
|
TRand *randomizer) {
|
||||||
int target_height = shape.height();
|
int target_height = shape.height();
|
||||||
int target_width = shape.width();
|
int target_width = shape.width();
|
||||||
std::vector<std::pair<int, int>> h_w_pairs;
|
std::vector<std::pair<int, int>> h_w_pairs;
|
||||||
for (auto pix : pixes) {
|
for (auto pix : pixes) {
|
||||||
Pix *var_pix = const_cast<Pix *>(pix);
|
Image var_pix = pix;
|
||||||
int width = pixGetWidth(var_pix);
|
int width = pixGetWidth(var_pix);
|
||||||
if (target_width != 0) {
|
if (target_width != 0) {
|
||||||
width = target_width;
|
width = target_width;
|
||||||
@ -188,7 +188,7 @@ void NetworkIO::FromPixes(const StaticShape &shape, const std::vector<const Pix
|
|||||||
ResizeToMap(int_mode(), stride_map_, shape.depth());
|
ResizeToMap(int_mode(), stride_map_, shape.depth());
|
||||||
// Iterate over the images again to copy the data.
|
// Iterate over the images again to copy the data.
|
||||||
for (size_t b = 0; b < pixes.size(); ++b) {
|
for (size_t b = 0; b < pixes.size(); ++b) {
|
||||||
Pix *pix = const_cast<Pix *>(pixes[b]);
|
Image pix = pixes[b];
|
||||||
float black = 0.0f, white = 255.0f;
|
float black = 0.0f, white = 255.0f;
|
||||||
if (shape.depth() != 3) {
|
if (shape.depth() != 3) {
|
||||||
ComputeBlackWhite(pix, &black, &white);
|
ComputeBlackWhite(pix, &black, &white);
|
||||||
@ -212,7 +212,7 @@ void NetworkIO::FromPixes(const StaticShape &shape, const std::vector<const Pix
|
|||||||
// of input channels, the height is the height of the image, and the width
|
// of input channels, the height is the height of the image, and the width
|
||||||
// is the width of the image, or truncated/padded with noise if the width
|
// is the width of the image, or truncated/padded with noise if the width
|
||||||
// is a fixed size.
|
// is a fixed size.
|
||||||
void NetworkIO::Copy2DImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer) {
|
void NetworkIO::Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer) {
|
||||||
int width = pixGetWidth(pix);
|
int width = pixGetWidth(pix);
|
||||||
int height = pixGetHeight(pix);
|
int height = pixGetHeight(pix);
|
||||||
int wpl = pixGetWpl(pix);
|
int wpl = pixGetWpl(pix);
|
||||||
@ -253,7 +253,7 @@ void NetworkIO::Copy2DImage(int batch, Pix *pix, float black, float contrast, TR
|
|||||||
// above, except that the output depth is the height of the input image, the
|
// above, except that the output depth is the height of the input image, the
|
||||||
// output height is 1, and the output width as for Copy2DImage.
|
// output height is 1, and the output width as for Copy2DImage.
|
||||||
// The image is thus treated as a 1-d set of vertical pixel strips.
|
// The image is thus treated as a 1-d set of vertical pixel strips.
|
||||||
void NetworkIO::Copy1DGreyImage(int batch, Pix *pix, float black, float contrast,
|
void NetworkIO::Copy1DGreyImage(int batch, Image pix, float black, float contrast,
|
||||||
TRand *randomizer) {
|
TRand *randomizer) {
|
||||||
int width = pixGetWidth(pix);
|
int width = pixGetWidth(pix);
|
||||||
int height = pixGetHeight(pix);
|
int height = pixGetHeight(pix);
|
||||||
@ -296,7 +296,7 @@ void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Converts the array to a Pix. Must be pixDestroyed after use.
|
// Converts the array to a Pix. Must be pixDestroyed after use.
|
||||||
Pix *NetworkIO::ToPix() const {
|
Image NetworkIO::ToPix() const {
|
||||||
// Count the width of the image, and find the max multiplication factor.
|
// Count the width of the image, and find the max multiplication factor.
|
||||||
int im_width = stride_map_.Size(FD_WIDTH);
|
int im_width = stride_map_.Size(FD_WIDTH);
|
||||||
int im_height = stride_map_.Size(FD_HEIGHT);
|
int im_height = stride_map_.Size(FD_HEIGHT);
|
||||||
@ -307,7 +307,7 @@ Pix *NetworkIO::ToPix() const {
|
|||||||
num_features = 1;
|
num_features = 1;
|
||||||
feature_factor = 3;
|
feature_factor = 3;
|
||||||
}
|
}
|
||||||
Pix *pix = pixCreate(im_width, im_height * num_features, 32);
|
Image pix = pixCreate(im_width, im_height * num_features, 32);
|
||||||
StrideMap::Index index(stride_map_);
|
StrideMap::Index index(stride_map_);
|
||||||
do {
|
do {
|
||||||
int im_x = index.index(FD_WIDTH);
|
int im_x = index.index(FD_WIDTH);
|
||||||
|
@ -19,15 +19,16 @@
|
|||||||
#ifndef TESSERACT_LSTM_NETWORKIO_H_
|
#ifndef TESSERACT_LSTM_NETWORKIO_H_
|
||||||
#define TESSERACT_LSTM_NETWORKIO_H_
|
#define TESSERACT_LSTM_NETWORKIO_H_
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "helpers.h"
|
#include "helpers.h"
|
||||||
|
#include "image.h"
|
||||||
#include "static_shape.h"
|
#include "static_shape.h"
|
||||||
#include "stridemap.h"
|
#include "stridemap.h"
|
||||||
#include "weightmatrix.h"
|
#include "weightmatrix.h"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
struct Pix;
|
struct Pix;
|
||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
@ -66,11 +67,11 @@ public:
|
|||||||
// Sets up the array from the given image, using the currently set int_mode_.
|
// Sets up the array from the given image, using the currently set int_mode_.
|
||||||
// If the image width doesn't match the shape, the image is truncated or
|
// If the image width doesn't match the shape, the image is truncated or
|
||||||
// padded with noise to match.
|
// padded with noise to match.
|
||||||
void FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer);
|
void FromPix(const StaticShape &shape, const Image pix, TRand *randomizer);
|
||||||
// Sets up the array from the given set of images, using the currently set
|
// Sets up the array from the given set of images, using the currently set
|
||||||
// int_mode_. If the image width doesn't match the shape, the images are
|
// int_mode_. If the image width doesn't match the shape, the images are
|
||||||
// truncated or padded with noise to match.
|
// truncated or padded with noise to match.
|
||||||
void FromPixes(const StaticShape &shape, const std::vector<const Pix *> &pixes,
|
void FromPixes(const StaticShape &shape, const std::vector<Image> &pixes,
|
||||||
TRand *randomizer);
|
TRand *randomizer);
|
||||||
// Copies the given pix to *this at the given batch index, stretching and
|
// Copies the given pix to *this at the given batch index, stretching and
|
||||||
// clipping the pixel values so that [black, black + 2*contrast] maps to the
|
// clipping the pixel values so that [black, black + 2*contrast] maps to the
|
||||||
@ -79,12 +80,12 @@ public:
|
|||||||
// of input channels, the height is the height of the image, and the width
|
// of input channels, the height is the height of the image, and the width
|
||||||
// is the width of the image, or truncated/padded with noise if the width
|
// is the width of the image, or truncated/padded with noise if the width
|
||||||
// is a fixed size.
|
// is a fixed size.
|
||||||
void Copy2DImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer);
|
void Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer);
|
||||||
// Copies the given pix to *this at the given batch index, as Copy2DImage
|
// Copies the given pix to *this at the given batch index, as Copy2DImage
|
||||||
// above, except that the output depth is the height of the input image, the
|
// above, except that the output depth is the height of the input image, the
|
||||||
// output height is 1, and the output width as for Copy2DImage.
|
// output height is 1, and the output width as for Copy2DImage.
|
||||||
// The image is thus treated as a 1-d set of vertical pixel strips.
|
// The image is thus treated as a 1-d set of vertical pixel strips.
|
||||||
void Copy1DGreyImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer);
|
void Copy1DGreyImage(int batch, Image pix, float black, float contrast, TRand *randomizer);
|
||||||
// Helper stores the pixel value in i_ or f_ according to int_mode_.
|
// Helper stores the pixel value in i_ or f_ according to int_mode_.
|
||||||
// t: is the index from the StrideMap corresponding to the current
|
// t: is the index from the StrideMap corresponding to the current
|
||||||
// [batch,y,x] position
|
// [batch,y,x] position
|
||||||
@ -94,7 +95,7 @@ public:
|
|||||||
// contrast: the range of pixel values to stretch to half the range of *this.
|
// contrast: the range of pixel values to stretch to half the range of *this.
|
||||||
void SetPixel(int t, int f, int pixel, float black, float contrast);
|
void SetPixel(int t, int f, int pixel, float black, float contrast);
|
||||||
// Converts the array to a Pix. Must be pixDestroyed after use.
|
// Converts the array to a Pix. Must be pixDestroyed after use.
|
||||||
Pix *ToPix() const;
|
Image ToPix() const;
|
||||||
// Prints the first and last num timesteps of the array for each feature.
|
// Prints the first and last num timesteps of the array for each feature.
|
||||||
void Print(int num) const;
|
void Print(int num) const;
|
||||||
|
|
||||||
|
@ -629,7 +629,7 @@ static cl_mem allocateZeroCopyBuffer(const KernelEnv &rEnv, l_uint32 *hostbuffer
|
|||||||
return membuffer;
|
return membuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Pix *mapOutputCLBuffer(const KernelEnv &rEnv, cl_mem clbuffer, Pix *pixd, Pix *pixs,
|
static Image mapOutputCLBuffer(const KernelEnv &rEnv, cl_mem clbuffer, Image pixd, Image pixs,
|
||||||
int elements, cl_mem_flags flags, bool memcopy = false,
|
int elements, cl_mem_flags flags, bool memcopy = false,
|
||||||
bool sync = true) {
|
bool sync = true) {
|
||||||
if (!pixd) {
|
if (!pixd) {
|
||||||
@ -673,7 +673,7 @@ void OpenclDevice::releaseMorphCLBuffers() {
|
|||||||
pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = nullptr;
|
pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs) {
|
int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Image pixs) {
|
||||||
SetKernelEnv(&rEnv);
|
SetKernelEnv(&rEnv);
|
||||||
|
|
||||||
if (pixThBuffer != nullptr) {
|
if (pixThBuffer != nullptr) {
|
||||||
@ -1455,8 +1455,8 @@ static cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_me
|
|||||||
// OpenCL implementation of Get Lines from pix function
|
// OpenCL implementation of Get Lines from pix function
|
||||||
// Note: Assumes the source and dest opencl buffer are initialized. No check
|
// Note: Assumes the source and dest opencl buffer are initialized. No check
|
||||||
// done
|
// done
|
||||||
void OpenclDevice::pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline, Pix **pix_hline,
|
void OpenclDevice::pixGetLinesCL(Image pixd, Image pixs, Image *pix_vline, Image *pix_hline,
|
||||||
Pix **pixClosed, bool getpixClosed, l_int32 close_hsize,
|
Image *pixClosed, bool getpixClosed, l_int32 close_hsize,
|
||||||
l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize,
|
l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize,
|
||||||
l_int32 line_hsize, l_int32 line_vsize) {
|
l_int32 line_hsize, l_int32 line_vsize) {
|
||||||
l_uint32 wpl, h;
|
l_uint32 wpl, h;
|
||||||
@ -1678,7 +1678,7 @@ int OpenclDevice::HistogramRectOCL(void *imageData, int bytes_per_pixel, int byt
|
|||||||
************************************************************************/
|
************************************************************************/
|
||||||
int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, int bytes_per_pixel,
|
int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, int bytes_per_pixel,
|
||||||
int bytes_per_line, int *thresholds, int *hi_values,
|
int bytes_per_line, int *thresholds, int *hi_values,
|
||||||
Pix **pix, int height, int width, int top, int left) {
|
Image *pix, int height, int width, int top, int left) {
|
||||||
int retVal = 0;
|
int retVal = 0;
|
||||||
/* create pix result buffer */
|
/* create pix result buffer */
|
||||||
*pix = pixCreate(width, height, 1);
|
*pix = pixCreate(width, height, 1);
|
||||||
@ -1783,7 +1783,7 @@ struct TessScoreEvaluationInputData {
|
|||||||
int width;
|
int width;
|
||||||
int numChannels;
|
int numChannels;
|
||||||
unsigned char *imageData;
|
unsigned char *imageData;
|
||||||
Pix *pix;
|
Image pix;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void populateTessScoreEvaluationInputData(TessScoreEvaluationInputData *input) {
|
static void populateTessScoreEvaluationInputData(TessScoreEvaluationInputData *input) {
|
||||||
@ -1928,7 +1928,7 @@ static double composeRGBPixelMicroBench(GPUEnv *env, TessScoreEvaluationInputDat
|
|||||||
# else
|
# else
|
||||||
clock_gettime(CLOCK_MONOTONIC, &time_funct_start);
|
clock_gettime(CLOCK_MONOTONIC, &time_funct_start);
|
||||||
# endif
|
# endif
|
||||||
Pix *pix = pixCreate(input.width, input.height, 32);
|
Image pix = pixCreate(input.width, input.height, 32);
|
||||||
l_uint32 *pixData = pixGetData(pix);
|
l_uint32 *pixData = pixGetData(pix);
|
||||||
int i, j;
|
int i, j;
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
@ -1954,7 +1954,7 @@ static double composeRGBPixelMicroBench(GPUEnv *env, TessScoreEvaluationInputDat
|
|||||||
time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
|
time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
|
||||||
(time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
|
(time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
|
||||||
# endif
|
# endif
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
return time;
|
return time;
|
||||||
@ -2044,7 +2044,7 @@ static double histogramRectMicroBench(GPUEnv *env, TessScoreEvaluationInputData
|
|||||||
// Reproducing the ThresholdRectToPix native version
|
// Reproducing the ThresholdRectToPix native version
|
||||||
static void ThresholdRectToPix_Native(const unsigned char *imagedata, int bytes_per_pixel,
|
static void ThresholdRectToPix_Native(const unsigned char *imagedata, int bytes_per_pixel,
|
||||||
int bytes_per_line, const int *thresholds,
|
int bytes_per_line, const int *thresholds,
|
||||||
const int *hi_values, Pix **pix) {
|
const int *hi_values, Image *pix) {
|
||||||
int top = 0;
|
int top = 0;
|
||||||
int left = 0;
|
int left = 0;
|
||||||
int width = pixGetWidth(*pix);
|
int width = pixGetWidth(*pix);
|
||||||
@ -2193,7 +2193,7 @@ static double getLineMasksMorphMicroBench(GPUEnv *env, TessScoreEvaluationInputD
|
|||||||
# endif
|
# endif
|
||||||
OpenclDevice::gpuEnv = *env;
|
OpenclDevice::gpuEnv = *env;
|
||||||
OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix);
|
OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix);
|
||||||
Pix *pix_vline = nullptr, *pix_hline = nullptr, *pix_closed = nullptr;
|
Image pix_vline = nullptr, *pix_hline = nullptr, *pix_closed = nullptr;
|
||||||
OpenclDevice::pixGetLinesCL(nullptr, input.pix, &pix_vline, &pix_hline, &pix_closed, true,
|
OpenclDevice::pixGetLinesCL(nullptr, input.pix, &pix_vline, &pix_hline, &pix_closed, true,
|
||||||
closing_brick, closing_brick, max_line_width, max_line_width,
|
closing_brick, closing_brick, max_line_width, max_line_width,
|
||||||
min_line_length, min_line_length);
|
min_line_length, min_line_length);
|
||||||
@ -2221,16 +2221,16 @@ static double getLineMasksMorphMicroBench(GPUEnv *env, TessScoreEvaluationInputD
|
|||||||
# endif
|
# endif
|
||||||
|
|
||||||
// native serial code
|
// native serial code
|
||||||
Pix *src_pix = input.pix;
|
Image src_pix = input.pix;
|
||||||
Pix *pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick);
|
Image pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick);
|
||||||
Pix *pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
|
Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
|
||||||
Pix *pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
|
Image pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
|
||||||
pixDestroy(&pix_solid);
|
pix_solid.destroy();
|
||||||
Pix *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
|
Image pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
|
||||||
Pix *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
|
Image pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
|
||||||
pixDestroy(&pix_hline);
|
pix_hline.destroy();
|
||||||
pixDestroy(&pix_vline);
|
pix_vline.destroy();
|
||||||
pixDestroy(&pix_hollow);
|
pix_hollow.destroy();
|
||||||
|
|
||||||
# if ON_WINDOWS
|
# if ON_WINDOWS
|
||||||
QueryPerformanceCounter(&time_funct_end);
|
QueryPerformanceCounter(&time_funct_end);
|
||||||
|
@ -127,10 +127,10 @@ public:
|
|||||||
/* OpenCL implementations of Morphological operations*/
|
/* OpenCL implementations of Morphological operations*/
|
||||||
|
|
||||||
// Initialization of OCL buffers used in Morph operations
|
// Initialization of OCL buffers used in Morph operations
|
||||||
static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs);
|
static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Image pixs);
|
||||||
static void releaseMorphCLBuffers();
|
static void releaseMorphCLBuffers();
|
||||||
|
|
||||||
static void pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline, Pix **pix_hline, Pix **pixClosed,
|
static void pixGetLinesCL(Image pixd, Image pixs, Image *pix_vline, Image *pix_hline, Image *pixClosed,
|
||||||
bool getpixClosed, l_int32 close_hsize, l_int32 close_vsize,
|
bool getpixClosed, l_int32 close_hsize, l_int32 close_vsize,
|
||||||
l_int32 open_hsize, l_int32 open_vsize, l_int32 line_hsize,
|
l_int32 open_hsize, l_int32 open_vsize, l_int32 line_hsize,
|
||||||
l_int32 line_vsize);
|
l_int32 line_vsize);
|
||||||
@ -161,7 +161,7 @@ public:
|
|||||||
int *histogramAllChannels);
|
int *histogramAllChannels);
|
||||||
|
|
||||||
static int ThresholdRectToPixOCL(unsigned char *imagedata, int bytes_per_pixel,
|
static int ThresholdRectToPixOCL(unsigned char *imagedata, int bytes_per_pixel,
|
||||||
int bytes_per_line, int *thresholds, int *hi_values, Pix **pix,
|
int bytes_per_line, int *thresholds, int *hi_values, Image *pix,
|
||||||
int rect_height, int rect_width, int rect_top, int rect_left);
|
int rect_height, int rect_width, int rect_top, int rect_left);
|
||||||
|
|
||||||
static ds_device getDeviceSelection();
|
static ds_device getDeviceSelection();
|
||||||
|
@ -608,7 +608,7 @@ void BaselineBlock::DrawFinalRows(const ICOORD &page_tr) {
|
|||||||
|
|
||||||
#endif // !GRAPHICS_DISABLED
|
#endif // !GRAPHICS_DISABLED
|
||||||
|
|
||||||
void BaselineBlock::DrawPixSpline(Pix *pix_in) {
|
void BaselineBlock::DrawPixSpline(Image pix_in) {
|
||||||
if (non_text_block_) {
|
if (non_text_block_) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -178,7 +178,7 @@ public:
|
|||||||
void DrawFinalRows(const ICOORD &page_tr);
|
void DrawFinalRows(const ICOORD &page_tr);
|
||||||
|
|
||||||
// Render the generated spline baselines for this block on pix_in.
|
// Render the generated spline baselines for this block on pix_in.
|
||||||
void DrawPixSpline(Pix *pix_in);
|
void DrawPixSpline(Image pix_in);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Top-level line-spacing calculation. Computes an estimate of the line-
|
// Top-level line-spacing calculation. Computes an estimate of the line-
|
||||||
|
@ -187,8 +187,8 @@ bool IntGrid::AnyZeroInRect(const TBOX &rect) const {
|
|||||||
// Returns a full-resolution binary pix in which each cell over the given
|
// Returns a full-resolution binary pix in which each cell over the given
|
||||||
// threshold is filled as a black square. pixDestroy after use.
|
// threshold is filled as a black square. pixDestroy after use.
|
||||||
// Edge cells, which have a zero 4-neighbour, are not marked.
|
// Edge cells, which have a zero 4-neighbour, are not marked.
|
||||||
Pix *IntGrid::ThresholdToPix(int threshold) const {
|
Image IntGrid::ThresholdToPix(int threshold) const {
|
||||||
Pix *pix = pixCreate(tright().x() - bleft().x(), tright().y() - bleft().y(), 1);
|
Image pix = pixCreate(tright().x() - bleft().x(), tright().y() - bleft().y(), 1);
|
||||||
int cellsize = gridsize();
|
int cellsize = gridsize();
|
||||||
for (int y = 0; y < gridheight(); ++y) {
|
for (int y = 0; y < gridheight(); ++y) {
|
||||||
for (int x = 0; x < gridwidth(); ++x) {
|
for (int x = 0; x < gridwidth(); ++x) {
|
||||||
@ -204,7 +204,7 @@ Pix *IntGrid::ThresholdToPix(int threshold) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Make a Pix of the correct scaled size for the TraceOutline functions.
|
// Make a Pix of the correct scaled size for the TraceOutline functions.
|
||||||
static Pix *GridReducedPix(const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) {
|
static Image GridReducedPix(const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) {
|
||||||
// Compute grid bounds of the outline and pad all round by 1.
|
// Compute grid bounds of the outline and pad all round by 1.
|
||||||
int grid_left = (box.left() - bleft.x()) / gridsize - 1;
|
int grid_left = (box.left() - bleft.x()) / gridsize - 1;
|
||||||
int grid_bottom = (box.bottom() - bleft.y()) / gridsize - 1;
|
int grid_bottom = (box.bottom() - bleft.y()) / gridsize - 1;
|
||||||
@ -221,10 +221,10 @@ static Pix *GridReducedPix(const TBOX &box, int gridsize, ICOORD bleft, int *lef
|
|||||||
// Also returns the grid coords of the bottom-left of the Pix, in *left
|
// Also returns the grid coords of the bottom-left of the Pix, in *left
|
||||||
// and *bottom, which corresponds to (0, 0) on the Pix.
|
// and *bottom, which corresponds to (0, 0) on the Pix.
|
||||||
// Note that the Pix is used upside-down, with (0, 0) being the bottom-left.
|
// Note that the Pix is used upside-down, with (0, 0) being the bottom-left.
|
||||||
Pix *TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left,
|
Image TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left,
|
||||||
int *bottom) {
|
int *bottom) {
|
||||||
const TBOX &box = outline->bounding_box();
|
const TBOX &box = outline->bounding_box();
|
||||||
Pix *pix = GridReducedPix(box, gridsize, bleft, left, bottom);
|
Image pix = GridReducedPix(box, gridsize, bleft, left, bottom);
|
||||||
int wpl = pixGetWpl(pix);
|
int wpl = pixGetWpl(pix);
|
||||||
l_uint32 *data = pixGetData(pix);
|
l_uint32 *data = pixGetData(pix);
|
||||||
int length = outline->pathlength();
|
int length = outline->pathlength();
|
||||||
@ -243,13 +243,13 @@ Pix *TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, in
|
|||||||
Pix* pix = TraceOutlineOnReducedPix(ol_it.data(), gridsize_, bleft_,
|
Pix* pix = TraceOutlineOnReducedPix(ol_it.data(), gridsize_, bleft_,
|
||||||
&grid_left, &grid_bottom);
|
&grid_left, &grid_bottom);
|
||||||
grid->InsertPixPtBBox(grid_left, grid_bottom, pix, blob);
|
grid->InsertPixPtBBox(grid_left, grid_bottom, pix, blob);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE.
|
// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE.
|
||||||
Pix *TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) {
|
Image TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) {
|
||||||
const TBOX &box = block->pdblk.bounding_box();
|
const TBOX &box = block->pdblk.bounding_box();
|
||||||
Pix *pix = GridReducedPix(box, gridsize, bleft, left, bottom);
|
Image pix = GridReducedPix(box, gridsize, bleft, left, bottom);
|
||||||
int wpl = pixGetWpl(pix);
|
int wpl = pixGetWpl(pix);
|
||||||
l_uint32 *data = pixGetData(pix);
|
l_uint32 *data = pixGetData(pix);
|
||||||
ICOORDELT_IT it(block->pdblk.poly_block()->points());
|
ICOORDELT_IT it(block->pdblk.poly_block()->points());
|
||||||
|
@ -39,10 +39,10 @@ namespace tesseract {
|
|||||||
// Also returns the grid coords of the bottom-left of the Pix, in *left
|
// Also returns the grid coords of the bottom-left of the Pix, in *left
|
||||||
// and *bottom, which corresponds to (0, 0) on the Pix.
|
// and *bottom, which corresponds to (0, 0) on the Pix.
|
||||||
// Note that the Pix is used upside-down, with (0, 0) being the bottom-left.
|
// Note that the Pix is used upside-down, with (0, 0) being the bottom-left.
|
||||||
Pix *TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left,
|
Image TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left,
|
||||||
int *bottom);
|
int *bottom);
|
||||||
// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE.
|
// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE.
|
||||||
Pix *TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom);
|
Image TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom);
|
||||||
|
|
||||||
template <class BBC, class BBC_CLIST, class BBC_C_IT>
|
template <class BBC, class BBC_CLIST, class BBC_C_IT>
|
||||||
class GridSearch;
|
class GridSearch;
|
||||||
@ -135,7 +135,7 @@ public:
|
|||||||
|
|
||||||
// Returns a full-resolution binary pix in which each cell over the given
|
// Returns a full-resolution binary pix in which each cell over the given
|
||||||
// threshold is filled as a black square. pixDestroy after use.
|
// threshold is filled as a black square. pixDestroy after use.
|
||||||
Pix *ThresholdToPix(int threshold) const;
|
Image ThresholdToPix(int threshold) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int *grid_; // 2-d array of ints.
|
int *grid_; // 2-d array of ints.
|
||||||
@ -190,7 +190,7 @@ public:
|
|||||||
// grid (in grid coords), and the pix works up the grid from there.
|
// grid (in grid coords), and the pix works up the grid from there.
|
||||||
// WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call
|
// WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call
|
||||||
// RepositionIterator() on any GridSearches that are active on this grid.
|
// RepositionIterator() on any GridSearches that are active on this grid.
|
||||||
void InsertPixPtBBox(int left, int bottom, Pix *pix, BBC *bbox);
|
void InsertPixPtBBox(int left, int bottom, Image pix, BBC *bbox);
|
||||||
|
|
||||||
// Remove the bbox from the grid.
|
// Remove the bbox from the grid.
|
||||||
// WARNING: Any GridSearch operating on this grid could be invalidated!
|
// WARNING: Any GridSearch operating on this grid could be invalidated!
|
||||||
@ -559,7 +559,7 @@ void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::InsertBBox(bool h_spread, bool v_spread,
|
|||||||
// WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call
|
// WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call
|
||||||
// RepositionIterator() on any GridSearches that are active on this grid.
|
// RepositionIterator() on any GridSearches that are active on this grid.
|
||||||
template <class BBC, class BBC_CLIST, class BBC_C_IT>
|
template <class BBC, class BBC_CLIST, class BBC_C_IT>
|
||||||
void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::InsertPixPtBBox(int left, int bottom, Pix *pix, BBC *bbox) {
|
void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::InsertPixPtBBox(int left, int bottom, Image pix, BBC *bbox) {
|
||||||
int width = pixGetWidth(pix);
|
int width = pixGetWidth(pix);
|
||||||
int height = pixGetHeight(pix);
|
int height = pixGetHeight(pix);
|
||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
|
@ -81,7 +81,7 @@ CCNonTextDetect::~CCNonTextDetect() {
|
|||||||
// The blob_block is the usual result of connected component analysis,
|
// The blob_block is the usual result of connected component analysis,
|
||||||
// holding the detected blobs.
|
// holding the detected blobs.
|
||||||
// The returned Pix should be PixDestroyed after use.
|
// The returned Pix should be PixDestroyed after use.
|
||||||
Pix *CCNonTextDetect::ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *blob_block) {
|
Image CCNonTextDetect::ComputeNonTextMask(bool debug, Image photo_map, TO_BLOCK *blob_block) {
|
||||||
// Insert the smallest blobs into the grid.
|
// Insert the smallest blobs into the grid.
|
||||||
InsertBlobList(&blob_block->small_blobs);
|
InsertBlobList(&blob_block->small_blobs);
|
||||||
InsertBlobList(&blob_block->noise_blobs);
|
InsertBlobList(&blob_block->noise_blobs);
|
||||||
@ -102,7 +102,7 @@ Pix *CCNonTextDetect::ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *b
|
|||||||
}
|
}
|
||||||
noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
|
noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
|
||||||
good_grid.Clear(); // Not needed any more.
|
good_grid.Clear(); // Not needed any more.
|
||||||
Pix *pix = noise_density_->ThresholdToPix(max_noise_count_);
|
Image pix = noise_density_->ThresholdToPix(max_noise_count_);
|
||||||
if (debug) {
|
if (debug) {
|
||||||
pixWrite("junknoisemask.png", pix, IFF_PNG);
|
pixWrite("junknoisemask.png", pix, IFF_PNG);
|
||||||
}
|
}
|
||||||
@ -148,7 +148,7 @@ Pix *CCNonTextDetect::ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *b
|
|||||||
// more likely non-text.
|
// more likely non-text.
|
||||||
// The photo_map is used to bias the decision towards non-text, rather than
|
// The photo_map is used to bias the decision towards non-text, rather than
|
||||||
// supplying definite decision.
|
// supplying definite decision.
|
||||||
IntGrid *CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix *photo_map, BlobGrid *good_grid) {
|
IntGrid *CCNonTextDetect::ComputeNoiseDensity(bool debug, Image photo_map, BlobGrid *good_grid) {
|
||||||
IntGrid *noise_counts = CountCellElements();
|
IntGrid *noise_counts = CountCellElements();
|
||||||
IntGrid *noise_density = noise_counts->NeighbourhoodSum();
|
IntGrid *noise_density = noise_counts->NeighbourhoodSum();
|
||||||
IntGrid *good_counts = good_grid->CountCellElements();
|
IntGrid *good_counts = good_grid->CountCellElements();
|
||||||
@ -235,7 +235,7 @@ static TBOX AttemptBoxExpansion(const TBOX &box, const IntGrid &noise_density, i
|
|||||||
// blobs are drawn on it in ok_color.
|
// blobs are drawn on it in ok_color.
|
||||||
void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_blob_overlaps,
|
void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_blob_overlaps,
|
||||||
ScrollView *win, ScrollView::Color ok_color,
|
ScrollView *win, ScrollView::Color ok_color,
|
||||||
Pix *nontext_mask) {
|
Image nontext_mask) {
|
||||||
int imageheight = tright().y() - bleft().x();
|
int imageheight = tright().y() - bleft().x();
|
||||||
BLOBNBOX_IT blob_it(blobs);
|
BLOBNBOX_IT blob_it(blobs);
|
||||||
BLOBNBOX_LIST dead_blobs;
|
BLOBNBOX_LIST dead_blobs;
|
||||||
@ -255,10 +255,10 @@ void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_bl
|
|||||||
if (noise_density_->AnyZeroInRect(box)) {
|
if (noise_density_->AnyZeroInRect(box)) {
|
||||||
// There is a danger that the bounding box may overlap real text, so
|
// There is a danger that the bounding box may overlap real text, so
|
||||||
// we need to render the outline.
|
// we need to render the outline.
|
||||||
Pix *blob_pix = blob->cblob()->render_outline();
|
Image blob_pix = blob->cblob()->render_outline();
|
||||||
pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(),
|
pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(),
|
||||||
PIX_SRC | PIX_DST, blob_pix, 0, 0);
|
PIX_SRC | PIX_DST, blob_pix, 0, 0);
|
||||||
pixDestroy(&blob_pix);
|
blob_pix.destroy();
|
||||||
} else {
|
} else {
|
||||||
if (box.area() < gridsize() * gridsize()) {
|
if (box.area() < gridsize() * gridsize()) {
|
||||||
// It is a really bad idea to make lots of small components in the
|
// It is a really bad idea to make lots of small components in the
|
||||||
|
@ -42,7 +42,7 @@ public:
|
|||||||
// The blob_block is the usual result of connected component analysis,
|
// The blob_block is the usual result of connected component analysis,
|
||||||
// holding the detected blobs.
|
// holding the detected blobs.
|
||||||
// The returned Pix should be PixDestroyed after use.
|
// The returned Pix should be PixDestroyed after use.
|
||||||
Pix *ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *blob_block);
|
Image ComputeNonTextMask(bool debug, Image photo_map, TO_BLOCK *blob_block);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Computes and returns the noise_density IntGrid, at the same gridsize as
|
// Computes and returns the noise_density IntGrid, at the same gridsize as
|
||||||
@ -52,7 +52,7 @@ private:
|
|||||||
// more likely non-text.
|
// more likely non-text.
|
||||||
// The photo_map is used to bias the decision towards non-text, rather than
|
// The photo_map is used to bias the decision towards non-text, rather than
|
||||||
// supplying definite decision.
|
// supplying definite decision.
|
||||||
IntGrid *ComputeNoiseDensity(bool debug, Pix *photo_map, BlobGrid *good_grid);
|
IntGrid *ComputeNoiseDensity(bool debug, Image photo_map, BlobGrid *good_grid);
|
||||||
|
|
||||||
// Tests each blob in the list to see if it is certain non-text using 2
|
// Tests each blob in the list to see if it is certain non-text using 2
|
||||||
// conditions:
|
// conditions:
|
||||||
@ -68,7 +68,7 @@ private:
|
|||||||
// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
|
// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
|
||||||
// If the win is not nullptr, deleted blobs are drawn on it in red, and kept
|
// If the win is not nullptr, deleted blobs are drawn on it in red, and kept
|
||||||
void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_blob_overlaps, ScrollView *win,
|
void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_blob_overlaps, ScrollView *win,
|
||||||
ScrollView::Color ok_color, Pix *nontext_mask);
|
ScrollView::Color ok_color, Image nontext_mask);
|
||||||
// Returns true if the given blob overlaps more than max_overlaps blobs
|
// Returns true if the given blob overlaps more than max_overlaps blobs
|
||||||
// in the current grid.
|
// in the current grid.
|
||||||
bool BlobOverlapsTooMuch(BLOBNBOX *blob, int max_overlaps);
|
bool BlobOverlapsTooMuch(BLOBNBOX *blob, int max_overlaps);
|
||||||
|
@ -108,7 +108,7 @@ ColumnFinder::~ColumnFinder() {
|
|||||||
delete[] best_columns_;
|
delete[] best_columns_;
|
||||||
delete stroke_width_;
|
delete stroke_width_;
|
||||||
delete input_blobs_win_;
|
delete input_blobs_win_;
|
||||||
pixDestroy(&nontext_map_);
|
nontext_map_.destroy();
|
||||||
while (denorm_ != nullptr) {
|
while (denorm_ != nullptr) {
|
||||||
DENORM *dead_denorm = denorm_;
|
DENORM *dead_denorm = denorm_;
|
||||||
denorm_ = const_cast<DENORM *>(denorm_->predecessor());
|
denorm_ = const_cast<DENORM *>(denorm_->predecessor());
|
||||||
@ -148,7 +148,7 @@ ColumnFinder::~ColumnFinder() {
|
|||||||
// direction, so the textline projection_ map can be setup.
|
// direction, so the textline projection_ map can be setup.
|
||||||
// On return, IsVerticallyAlignedText may be called (now optionally) to
|
// On return, IsVerticallyAlignedText may be called (now optionally) to
|
||||||
// determine the gross textline alignment of the page.
|
// determine the gross textline alignment of the page.
|
||||||
void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask_pix,
|
void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, Image photo_mask_pix,
|
||||||
TO_BLOCK *input_block) {
|
TO_BLOCK *input_block) {
|
||||||
part_grid_.Init(gridsize(), bleft(), tright());
|
part_grid_.Init(gridsize(), bleft(), tright());
|
||||||
delete stroke_width_;
|
delete stroke_width_;
|
||||||
@ -162,7 +162,7 @@ void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask
|
|||||||
}
|
}
|
||||||
#endif // !GRAPHICS_DISABLED
|
#endif // !GRAPHICS_DISABLED
|
||||||
SetBlockRuleEdges(input_block);
|
SetBlockRuleEdges(input_block);
|
||||||
pixDestroy(&nontext_map_);
|
nontext_map_.destroy();
|
||||||
// Run a preliminary strokewidth neighbour detection on the medium blobs.
|
// Run a preliminary strokewidth neighbour detection on the medium blobs.
|
||||||
stroke_width_->SetNeighboursOnMediumBlobs(input_block);
|
stroke_width_->SetNeighboursOnMediumBlobs(input_block);
|
||||||
CCNonTextDetect nontext_detect(gridsize(), bleft(), tright());
|
CCNonTextDetect nontext_detect(gridsize(), bleft(), tright());
|
||||||
@ -283,9 +283,9 @@ void ColumnFinder::CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines,
|
|||||||
// noise/diacriticness determined via classification.
|
// noise/diacriticness determined via classification.
|
||||||
// Returns -1 if the user hits the 'd' key in the blocks window while running
|
// Returns -1 if the user hits the 'd' key in the blocks window while running
|
||||||
// in debug mode, which requests a retry with more debug info.
|
// in debug mode, which requests a retry with more debug info.
|
||||||
int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor,
|
int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor,
|
||||||
TO_BLOCK *input_block, Pix *photo_mask_pix, Pix *thresholds_pix,
|
TO_BLOCK *input_block, Image photo_mask_pix, Image thresholds_pix,
|
||||||
Pix *grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks,
|
Image grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks,
|
||||||
BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks) {
|
BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks) {
|
||||||
pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
|
pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
|
||||||
stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
|
stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
|
||||||
|
@ -107,7 +107,7 @@ public:
|
|||||||
// direction, so the textline projection_ map can be setup.
|
// direction, so the textline projection_ map can be setup.
|
||||||
// On return, IsVerticallyAlignedText may be called (now optionally) to
|
// On return, IsVerticallyAlignedText may be called (now optionally) to
|
||||||
// determine the gross textline alignment of the page.
|
// determine the gross textline alignment of the page.
|
||||||
void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask_pix, TO_BLOCK *input_block);
|
void SetupAndFilterNoise(PageSegMode pageseg_mode, Image photo_mask_pix, TO_BLOCK *input_block);
|
||||||
|
|
||||||
// Tests for vertical alignment of text (returning true if so), and generates
|
// Tests for vertical alignment of text (returning true if so), and generates
|
||||||
// a list of blobs (in osd_blobs) for orientation and script detection.
|
// a list of blobs (in osd_blobs) for orientation and script detection.
|
||||||
@ -156,8 +156,8 @@ public:
|
|||||||
// appropriate word after the rest of layout analysis.
|
// appropriate word after the rest of layout analysis.
|
||||||
// Returns -1 if the user hits the 'd' key in the blocks window while running
|
// Returns -1 if the user hits the 'd' key in the blocks window while running
|
||||||
// in debug mode, which requests a retry with more debug info.
|
// in debug mode, which requests a retry with more debug info.
|
||||||
int FindBlocks(PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor, TO_BLOCK *block,
|
int FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, TO_BLOCK *block,
|
||||||
Pix *photo_mask_pix, Pix *thresholds_pix, Pix *grey_pix, DebugPixa *pixa_debug,
|
Image photo_mask_pix, Image thresholds_pix, Image grey_pix, DebugPixa *pixa_debug,
|
||||||
BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks);
|
BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks);
|
||||||
|
|
||||||
// Get the rotation required to deskew, and its inverse rotation.
|
// Get the rotation required to deskew, and its inverse rotation.
|
||||||
@ -330,7 +330,7 @@ private:
|
|||||||
// Horizontal line separators.
|
// Horizontal line separators.
|
||||||
TabVector_LIST horizontal_lines_;
|
TabVector_LIST horizontal_lines_;
|
||||||
// Image map of photo/noise areas on the page.
|
// Image map of photo/noise areas on the page.
|
||||||
Pix *nontext_map_;
|
Image nontext_map_;
|
||||||
// Textline projection map.
|
// Textline projection map.
|
||||||
TextlineProjection projection_;
|
TextlineProjection projection_;
|
||||||
// Sequence of DENORMS that indicate how to get back to the original image
|
// Sequence of DENORMS that indicate how to get back to the original image
|
||||||
|
@ -608,7 +608,7 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts)
|
|||||||
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
||||||
// into images.
|
// into images.
|
||||||
// Returns true if anything was changed.
|
// Returns true if anything was changed.
|
||||||
bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, Pix *nontext_map,
|
bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map,
|
||||||
const TBOX &im_box, const FCOORD &rotation) {
|
const TBOX &im_box, const FCOORD &rotation) {
|
||||||
// Iterate the ColPartitions in the grid.
|
// Iterate the ColPartitions in the grid.
|
||||||
ColPartitionGridSearch gsearch(this);
|
ColPartitionGridSearch gsearch(this);
|
||||||
@ -1392,7 +1392,7 @@ void ColPartitionGrid::FindMergeCandidates(const ColPartition *part, const TBOX
|
|||||||
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
||||||
// into images.
|
// into images.
|
||||||
// Returns true if the partition was changed.
|
// Returns true if the partition was changed.
|
||||||
bool ColPartitionGrid::SmoothRegionType(Pix *nontext_map, const TBOX &im_box,
|
bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box,
|
||||||
const FCOORD &rerotation, bool debug, ColPartition *part) {
|
const FCOORD &rerotation, bool debug, ColPartition *part) {
|
||||||
const TBOX &part_box = part->bounding_box();
|
const TBOX &part_box = part->bounding_box();
|
||||||
if (debug) {
|
if (debug) {
|
||||||
@ -1511,7 +1511,7 @@ enum NeighbourPartitionType {
|
|||||||
// partitions that makes a decisive result (if any) and returns the type
|
// partitions that makes a decisive result (if any) and returns the type
|
||||||
// and the distance of the collection. If there are any pixels in the
|
// and the distance of the collection. If there are any pixels in the
|
||||||
// nontext_map, then the decision is biased towards image.
|
// nontext_map, then the decision is biased towards image.
|
||||||
BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction, Pix *nontext_map,
|
BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction, Image nontext_map,
|
||||||
const TBOX &im_box, const FCOORD &rerotation,
|
const TBOX &im_box, const FCOORD &rerotation,
|
||||||
bool debug, const ColPartition &part,
|
bool debug, const ColPartition &part,
|
||||||
int *best_distance) {
|
int *best_distance) {
|
||||||
@ -1594,7 +1594,7 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction
|
|||||||
// dists must be an array of vectors of size NPT_COUNT.
|
// dists must be an array of vectors of size NPT_COUNT.
|
||||||
void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part,
|
void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part,
|
||||||
const ICOORD &dist_scaling, const TBOX &search_box,
|
const ICOORD &dist_scaling, const TBOX &search_box,
|
||||||
Pix *nontext_map, const TBOX &im_box,
|
Image nontext_map, const TBOX &im_box,
|
||||||
const FCOORD &rerotation, bool debug,
|
const FCOORD &rerotation, bool debug,
|
||||||
std::vector<int> *dists) {
|
std::vector<int> *dists) {
|
||||||
const TBOX &part_box = base_part.bounding_box();
|
const TBOX &part_box = base_part.bounding_box();
|
||||||
|
@ -98,7 +98,7 @@ public:
|
|||||||
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
||||||
// into images.
|
// into images.
|
||||||
// Returns true if anything was changed.
|
// Returns true if anything was changed.
|
||||||
bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix *nontext_map, const TBOX &im_box,
|
bool GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map, const TBOX &im_box,
|
||||||
const FCOORD &rerotation);
|
const FCOORD &rerotation);
|
||||||
|
|
||||||
// Reflects the grid and its colpartitions in the y-axis, assuming that
|
// Reflects the grid and its colpartitions in the y-axis, assuming that
|
||||||
@ -199,7 +199,7 @@ private:
|
|||||||
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
||||||
// into images.
|
// into images.
|
||||||
// Returns true if the partition was changed.
|
// Returns true if the partition was changed.
|
||||||
bool SmoothRegionType(Pix *nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug,
|
bool SmoothRegionType(Image nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug,
|
||||||
ColPartition *part);
|
ColPartition *part);
|
||||||
// Executes the search for SmoothRegionType in a single direction.
|
// Executes the search for SmoothRegionType in a single direction.
|
||||||
// Creates a bounding box that is padded in all directions except direction,
|
// Creates a bounding box that is padded in all directions except direction,
|
||||||
@ -207,7 +207,7 @@ private:
|
|||||||
// partitions that makes a decisive result (if any) and returns the type
|
// partitions that makes a decisive result (if any) and returns the type
|
||||||
// and the distance of the collection. If there are any pixels in the
|
// and the distance of the collection. If there are any pixels in the
|
||||||
// nontext_map, then the decision is biased towards image.
|
// nontext_map, then the decision is biased towards image.
|
||||||
BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, Pix *nontext_map,
|
BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, Image nontext_map,
|
||||||
const TBOX &im_box, const FCOORD &rerotation, bool debug,
|
const TBOX &im_box, const FCOORD &rerotation, bool debug,
|
||||||
const ColPartition &part, int *best_distance);
|
const ColPartition &part, int *best_distance);
|
||||||
// Counts the partitions in the given search_box by appending the gap
|
// Counts the partitions in the given search_box by appending the gap
|
||||||
@ -216,7 +216,7 @@ private:
|
|||||||
// vectors in the dists array are sorted in increasing order.
|
// vectors in the dists array are sorted in increasing order.
|
||||||
// dists must be an array of vectors of size NPT_COUNT.
|
// dists must be an array of vectors of size NPT_COUNT.
|
||||||
void AccumulatePartDistances(const ColPartition &base_part, const ICOORD &dist_scaling,
|
void AccumulatePartDistances(const ColPartition &base_part, const ICOORD &dist_scaling,
|
||||||
const TBOX &search_box, Pix *nontext_map, const TBOX &im_box,
|
const TBOX &search_box, Image nontext_map, const TBOX &im_box,
|
||||||
const FCOORD &rerotation, bool debug, std::vector<int> *dists);
|
const FCOORD &rerotation, bool debug, std::vector<int> *dists);
|
||||||
|
|
||||||
// Improves the margins of the ColPartition by searching for
|
// Improves the margins of the ColPartition by searching for
|
||||||
|
@ -55,20 +55,20 @@ ShiroRekhaSplitter::~ShiroRekhaSplitter() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ShiroRekhaSplitter::Clear() {
|
void ShiroRekhaSplitter::Clear() {
|
||||||
pixDestroy(&orig_pix_);
|
orig_pix_.destroy();
|
||||||
pixDestroy(&splitted_image_);
|
splitted_image_.destroy();
|
||||||
pageseg_split_strategy_ = NO_SPLIT;
|
pageseg_split_strategy_ = NO_SPLIT;
|
||||||
ocr_split_strategy_ = NO_SPLIT;
|
ocr_split_strategy_ = NO_SPLIT;
|
||||||
pixDestroy(&debug_image_);
|
debug_image_.destroy();
|
||||||
segmentation_block_list_ = nullptr;
|
segmentation_block_list_ = nullptr;
|
||||||
global_xheight_ = kUnspecifiedXheight;
|
global_xheight_ = kUnspecifiedXheight;
|
||||||
perform_close_ = false;
|
perform_close_ = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// On setting the input image, a clone of it is owned by this class.
|
// On setting the input image, a clone of it is owned by this class.
|
||||||
void ShiroRekhaSplitter::set_orig_pix(Pix *pix) {
|
void ShiroRekhaSplitter::set_orig_pix(Image pix) {
|
||||||
if (orig_pix_) {
|
if (orig_pix_) {
|
||||||
pixDestroy(&orig_pix_);
|
orig_pix_.destroy();
|
||||||
}
|
}
|
||||||
orig_pix_ = pixClone(pix);
|
orig_pix_ = pixClone(pix);
|
||||||
}
|
}
|
||||||
@ -91,32 +91,32 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) {
|
|||||||
tprintf("Initial pageseg available = %s\n", segmentation_block_list_ ? "yes" : "no");
|
tprintf("Initial pageseg available = %s\n", segmentation_block_list_ ? "yes" : "no");
|
||||||
}
|
}
|
||||||
// Create a copy of original image to store the splitting output.
|
// Create a copy of original image to store the splitting output.
|
||||||
pixDestroy(&splitted_image_);
|
splitted_image_.destroy();
|
||||||
splitted_image_ = pixCopy(nullptr, orig_pix_);
|
splitted_image_ = pixCopy(nullptr, orig_pix_);
|
||||||
|
|
||||||
// Initialize debug image if required.
|
// Initialize debug image if required.
|
||||||
if (devanagari_split_debugimage) {
|
if (devanagari_split_debugimage) {
|
||||||
pixDestroy(&debug_image_);
|
debug_image_.destroy();
|
||||||
debug_image_ = pixConvertTo32(orig_pix_);
|
debug_image_ = pixConvertTo32(orig_pix_);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine all connected components in the input image. A close operation
|
// Determine all connected components in the input image. A close operation
|
||||||
// may be required prior to this, depending on the current settings.
|
// may be required prior to this, depending on the current settings.
|
||||||
Pix *pix_for_ccs = pixClone(orig_pix_);
|
Image pix_for_ccs = pixClone(orig_pix_);
|
||||||
if (perform_close_ && global_xheight_ != kUnspecifiedXheight && !segmentation_block_list_) {
|
if (perform_close_ && global_xheight_ != kUnspecifiedXheight && !segmentation_block_list_) {
|
||||||
if (devanagari_split_debuglevel > 0) {
|
if (devanagari_split_debuglevel > 0) {
|
||||||
tprintf("Performing a global close operation..\n");
|
tprintf("Performing a global close operation..\n");
|
||||||
}
|
}
|
||||||
// A global measure is available for xheight, but no local information
|
// A global measure is available for xheight, but no local information
|
||||||
// exists.
|
// exists.
|
||||||
pixDestroy(&pix_for_ccs);
|
pix_for_ccs.destroy();
|
||||||
pix_for_ccs = pixCopy(nullptr, orig_pix_);
|
pix_for_ccs = pixCopy(nullptr, orig_pix_);
|
||||||
PerformClose(pix_for_ccs, global_xheight_);
|
PerformClose(pix_for_ccs, global_xheight_);
|
||||||
}
|
}
|
||||||
Pixa *ccs;
|
Pixa *ccs;
|
||||||
Boxa *tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8);
|
Boxa *tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8);
|
||||||
boxaDestroy(&tmp_boxa);
|
boxaDestroy(&tmp_boxa);
|
||||||
pixDestroy(&pix_for_ccs);
|
pix_for_ccs.destroy();
|
||||||
|
|
||||||
// Iterate over all connected components. Get their bounding boxes and clip
|
// Iterate over all connected components. Get their bounding boxes and clip
|
||||||
// out the image regions corresponding to these boxes from the original image.
|
// out the image regions corresponding to these boxes from the original image.
|
||||||
@ -128,7 +128,7 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) {
|
|||||||
}
|
}
|
||||||
for (int i = 0; i < num_ccs; ++i) {
|
for (int i = 0; i < num_ccs; ++i) {
|
||||||
Box *box = ccs->boxa->box[i];
|
Box *box = ccs->boxa->box[i];
|
||||||
Pix *word_pix = pixClipRectangle(orig_pix_, box, nullptr);
|
Image word_pix = pixClipRectangle(orig_pix_, box, nullptr);
|
||||||
ASSERT_HOST(word_pix);
|
ASSERT_HOST(word_pix);
|
||||||
int xheight = GetXheightForCC(box);
|
int xheight = GetXheightForCC(box);
|
||||||
if (xheight == kUnspecifiedXheight && segmentation_block_list_ && devanagari_split_debugimage) {
|
if (xheight == kUnspecifiedXheight && segmentation_block_list_ && devanagari_split_debugimage) {
|
||||||
@ -143,7 +143,7 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) {
|
|||||||
} else if (devanagari_split_debuglevel > 0) {
|
} else if (devanagari_split_debuglevel > 0) {
|
||||||
tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", box->x, box->y, box->w, box->h);
|
tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", box->x, box->y, box->w, box->h);
|
||||||
}
|
}
|
||||||
pixDestroy(&word_pix);
|
word_pix.destroy();
|
||||||
}
|
}
|
||||||
// Actually clear the boxes now.
|
// Actually clear the boxes now.
|
||||||
for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) {
|
for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) {
|
||||||
@ -161,7 +161,7 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) {
|
|||||||
|
|
||||||
// Method to perform a close operation on the input image. The xheight
|
// Method to perform a close operation on the input image. The xheight
|
||||||
// estimate decides the size of sel used.
|
// estimate decides the size of sel used.
|
||||||
void ShiroRekhaSplitter::PerformClose(Pix *pix, int xheight_estimate) {
|
void ShiroRekhaSplitter::PerformClose(Image pix, int xheight_estimate) {
|
||||||
pixCloseBrick(pix, pix, xheight_estimate / 8, xheight_estimate / 3);
|
pixCloseBrick(pix, pix, xheight_estimate / 8, xheight_estimate / 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -221,7 +221,7 @@ int ShiroRekhaSplitter::GetXheightForCC(Box *cc_bbox) {
|
|||||||
// leeway. The leeway depends on the input xheight, if provided, else a
|
// leeway. The leeway depends on the input xheight, if provided, else a
|
||||||
// conservative multiplier on approximate stroke width is used (which may lead
|
// conservative multiplier on approximate stroke width is used (which may lead
|
||||||
// to over-splitting).
|
// to over-splitting).
|
||||||
void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Pix *pix, int xheight,
|
void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight,
|
||||||
int word_left, int word_top, Boxa *regions_to_clear) {
|
int word_left, int word_top, Boxa *regions_to_clear) {
|
||||||
if (split_strategy == NO_SPLIT) {
|
if (split_strategy == NO_SPLIT) {
|
||||||
return;
|
return;
|
||||||
@ -257,7 +257,7 @@ void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Pix *
|
|||||||
// Clear the ascender and descender regions of the word.
|
// Clear the ascender and descender regions of the word.
|
||||||
// Obtain a vertical projection histogram for the resulting image.
|
// Obtain a vertical projection histogram for the resulting image.
|
||||||
Box *box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, width, 5 * stroke_width / 3);
|
Box *box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, width, 5 * stroke_width / 3);
|
||||||
Pix *word_in_xheight = pixCopy(nullptr, pix);
|
Image word_in_xheight = pixCopy(nullptr, pix);
|
||||||
pixClearInRect(word_in_xheight, box_to_clear);
|
pixClearInRect(word_in_xheight, box_to_clear);
|
||||||
// Also clear any pixels which are below shirorekha_bottom + some leeway.
|
// Also clear any pixels which are below shirorekha_bottom + some leeway.
|
||||||
// The leeway is set to xheight if the information is available, else it is a
|
// The leeway is set to xheight if the information is available, else it is a
|
||||||
@ -276,7 +276,7 @@ void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Pix *
|
|||||||
|
|
||||||
PixelHistogram vert_hist;
|
PixelHistogram vert_hist;
|
||||||
vert_hist.ConstructVerticalCountHist(word_in_xheight);
|
vert_hist.ConstructVerticalCountHist(word_in_xheight);
|
||||||
pixDestroy(&word_in_xheight);
|
word_in_xheight.destroy();
|
||||||
|
|
||||||
// If the number of black pixel in any column of the image is less than a
|
// If the number of black pixel in any column of the image is less than a
|
||||||
// fraction of the stroke width, treat it as noise / a stray mark. Perform
|
// fraction of the stroke width, treat it as noise / a stray mark. Perform
|
||||||
@ -385,7 +385,7 @@ Box *ShiroRekhaSplitter::GetBoxForTBOX(const TBOX &tbox) const {
|
|||||||
|
|
||||||
// This method returns the computed mode-height of blobs in the pix.
|
// This method returns the computed mode-height of blobs in the pix.
|
||||||
// It also prunes very small blobs from calculation.
|
// It also prunes very small blobs from calculation.
|
||||||
int ShiroRekhaSplitter::GetModeHeight(Pix *pix) {
|
int ShiroRekhaSplitter::GetModeHeight(Image pix) {
|
||||||
Boxa *boxa = pixConnComp(pix, nullptr, 8);
|
Boxa *boxa = pixConnComp(pix, nullptr, 8);
|
||||||
STATS heights(0, pixGetHeight(pix));
|
STATS heights(0, pixGetHeight(pix));
|
||||||
heights.clear();
|
heights.clear();
|
||||||
@ -402,7 +402,7 @@ int ShiroRekhaSplitter::GetModeHeight(Pix *pix) {
|
|||||||
|
|
||||||
// This method returns y-extents of the shiro-rekha computed from the input
|
// This method returns y-extents of the shiro-rekha computed from the input
|
||||||
// word image.
|
// word image.
|
||||||
void ShiroRekhaSplitter::GetShiroRekhaYExtents(Pix *word_pix, int *shirorekha_top,
|
void ShiroRekhaSplitter::GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top,
|
||||||
int *shirorekha_bottom, int *shirorekha_ylevel) {
|
int *shirorekha_bottom, int *shirorekha_ylevel) {
|
||||||
// Compute a histogram from projecting the word on a vertical line.
|
// Compute a histogram from projecting the word on a vertical line.
|
||||||
PixelHistogram hist_horiz;
|
PixelHistogram hist_horiz;
|
||||||
@ -450,7 +450,7 @@ int PixelHistogram::GetHistogramMaximum(int *count) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Methods to construct histograms from images.
|
// Methods to construct histograms from images.
|
||||||
void PixelHistogram::ConstructVerticalCountHist(Pix *pix) {
|
void PixelHistogram::ConstructVerticalCountHist(Image pix) {
|
||||||
Clear();
|
Clear();
|
||||||
int width = pixGetWidth(pix);
|
int width = pixGetWidth(pix);
|
||||||
int height = pixGetHeight(pix);
|
int height = pixGetHeight(pix);
|
||||||
@ -471,7 +471,7 @@ void PixelHistogram::ConstructVerticalCountHist(Pix *pix) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PixelHistogram::ConstructHorizontalCountHist(Pix *pix) {
|
void PixelHistogram::ConstructHorizontalCountHist(Image pix) {
|
||||||
Clear();
|
Clear();
|
||||||
Numa *counts = pixCountPixelsByRow(pix, nullptr);
|
Numa *counts = pixCountPixelsByRow(pix, nullptr);
|
||||||
length_ = numaGetCount(counts);
|
length_ = numaGetCount(counts);
|
||||||
|
@ -56,8 +56,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Methods to construct histograms from images. These clear any existing data.
|
// Methods to construct histograms from images. These clear any existing data.
|
||||||
void ConstructVerticalCountHist(Pix *pix);
|
void ConstructVerticalCountHist(Image pix);
|
||||||
void ConstructHorizontalCountHist(Pix *pix);
|
void ConstructHorizontalCountHist(Image pix);
|
||||||
|
|
||||||
// This method returns the global-maxima for the histogram. The frequency of
|
// This method returns the global-maxima for the histogram. The frequency of
|
||||||
// the global maxima is returned in count, if specified.
|
// the global maxima is returned in count, if specified.
|
||||||
@ -118,16 +118,16 @@ public:
|
|||||||
// Returns the image obtained from shiro-rekha splitting. The returned object
|
// Returns the image obtained from shiro-rekha splitting. The returned object
|
||||||
// is owned by this class. Callers may want to clone the returned pix to keep
|
// is owned by this class. Callers may want to clone the returned pix to keep
|
||||||
// it alive beyond the life of ShiroRekhaSplitter object.
|
// it alive beyond the life of ShiroRekhaSplitter object.
|
||||||
Pix *splitted_image() {
|
Image splitted_image() {
|
||||||
return splitted_image_;
|
return splitted_image_;
|
||||||
}
|
}
|
||||||
|
|
||||||
// On setting the input image, a clone of it is owned by this class.
|
// On setting the input image, a clone of it is owned by this class.
|
||||||
void set_orig_pix(Pix *pix);
|
void set_orig_pix(Image pix);
|
||||||
|
|
||||||
// Returns the input image provided to the object. This object is owned by
|
// Returns the input image provided to the object. This object is owned by
|
||||||
// this class. Callers may want to clone the returned pix to work with it.
|
// this class. Callers may want to clone the returned pix to work with it.
|
||||||
Pix *orig_pix() {
|
Image orig_pix() {
|
||||||
return orig_pix_;
|
return orig_pix_;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -154,12 +154,12 @@ public:
|
|||||||
// This method returns the computed mode-height of blobs in the pix.
|
// This method returns the computed mode-height of blobs in the pix.
|
||||||
// It also prunes very small blobs from calculation. Could be used to provide
|
// It also prunes very small blobs from calculation. Could be used to provide
|
||||||
// a global xheight estimate for images which have the same point-size text.
|
// a global xheight estimate for images which have the same point-size text.
|
||||||
static int GetModeHeight(Pix *pix);
|
static int GetModeHeight(Image pix);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Method to perform a close operation on the input image. The xheight
|
// Method to perform a close operation on the input image. The xheight
|
||||||
// estimate decides the size of sel used.
|
// estimate decides the size of sel used.
|
||||||
static void PerformClose(Pix *pix, int xheight_estimate);
|
static void PerformClose(Image pix, int xheight_estimate);
|
||||||
|
|
||||||
// This method resolves the cc bbox to a particular row and returns the row's
|
// This method resolves the cc bbox to a particular row and returns the row's
|
||||||
// xheight. This uses block_list_ if available, else just returns the
|
// xheight. This uses block_list_ if available, else just returns the
|
||||||
@ -173,7 +173,7 @@ private:
|
|||||||
// conservative estimate of stroke width along with an associated multiplier
|
// conservative estimate of stroke width along with an associated multiplier
|
||||||
// is used in its place. It is advisable to have a specified xheight when
|
// is used in its place. It is advisable to have a specified xheight when
|
||||||
// splitting for classification/training.
|
// splitting for classification/training.
|
||||||
void SplitWordShiroRekha(SplitStrategy split_strategy, Pix *pix, int xheight, int word_left,
|
void SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left,
|
||||||
int word_top, Boxa *regions_to_clear);
|
int word_top, Boxa *regions_to_clear);
|
||||||
|
|
||||||
// Returns a new box object for the corresponding TBOX, based on the original
|
// Returns a new box object for the corresponding TBOX, based on the original
|
||||||
@ -182,15 +182,15 @@ private:
|
|||||||
|
|
||||||
// This method returns y-extents of the shiro-rekha computed from the input
|
// This method returns y-extents of the shiro-rekha computed from the input
|
||||||
// word image.
|
// word image.
|
||||||
static void GetShiroRekhaYExtents(Pix *word_pix, int *shirorekha_top, int *shirorekha_bottom,
|
static void GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, int *shirorekha_bottom,
|
||||||
int *shirorekha_ylevel);
|
int *shirorekha_ylevel);
|
||||||
|
|
||||||
Pix *orig_pix_; // Just a clone of the input image passed.
|
Image orig_pix_; // Just a clone of the input image passed.
|
||||||
Pix *splitted_image_; // Image produced after the last splitting round. The
|
Image splitted_image_; // Image produced after the last splitting round. The
|
||||||
// object is owned by this class.
|
// object is owned by this class.
|
||||||
SplitStrategy pageseg_split_strategy_;
|
SplitStrategy pageseg_split_strategy_;
|
||||||
SplitStrategy ocr_split_strategy_;
|
SplitStrategy ocr_split_strategy_;
|
||||||
Pix *debug_image_;
|
Image debug_image_;
|
||||||
// This block list is used as a golden segmentation when performing splitting.
|
// This block list is used as a golden segmentation when performing splitting.
|
||||||
BLOCK_LIST *segmentation_block_list_;
|
BLOCK_LIST *segmentation_block_list_;
|
||||||
int global_xheight_;
|
int global_xheight_;
|
||||||
|
@ -322,7 +322,7 @@ void OL_BUCKETS::extract_children( // recursive count
|
|||||||
* Run the edge detector over the block and return a list of blobs.
|
* Run the edge detector over the block and return a list of blobs.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void extract_edges(Pix *pix, // thresholded image
|
void extract_edges(Image pix, // thresholded image
|
||||||
BLOCK *block) { // block to scan
|
BLOCK *block) { // block to scan
|
||||||
C_OUTLINE_LIST outlines; // outlines in block
|
C_OUTLINE_LIST outlines; // outlines in block
|
||||||
C_OUTLINE_IT out_it = &outlines;
|
C_OUTLINE_IT out_it = &outlines;
|
||||||
|
@ -76,7 +76,7 @@ private:
|
|||||||
int32_t index; // for extraction scan
|
int32_t index; // for extraction scan
|
||||||
};
|
};
|
||||||
|
|
||||||
void extract_edges(Pix *pix, // thresholded image
|
void extract_edges(Image pix, // thresholded image
|
||||||
BLOCK *block); // block to scan
|
BLOCK *block); // block to scan
|
||||||
void outlines_to_blobs( // find blobs
|
void outlines_to_blobs( // find blobs
|
||||||
BLOCK *block, // block to scan
|
BLOCK *block, // block to scan
|
||||||
|
@ -34,7 +34,7 @@ namespace tesseract {
|
|||||||
// instead of weak vtables in every compilation unit.
|
// instead of weak vtables in every compilation unit.
|
||||||
EquationDetectBase::~EquationDetectBase() = default;
|
EquationDetectBase::~EquationDetectBase() = default;
|
||||||
|
|
||||||
void EquationDetectBase::RenderSpecialText(Pix *pix, BLOBNBOX *blob) {
|
void EquationDetectBase::RenderSpecialText(Image pix, BLOBNBOX *blob) {
|
||||||
ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32 && blob != nullptr);
|
ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32 && blob != nullptr);
|
||||||
const TBOX &tbox = blob->bounding_box();
|
const TBOX &tbox = blob->bounding_box();
|
||||||
int height = pixGetHeight(pix);
|
int height = pixGetHeight(pix);
|
||||||
|
@ -20,7 +20,7 @@
|
|||||||
#ifndef TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_
|
#ifndef TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_
|
||||||
#define TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_
|
#define TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_
|
||||||
|
|
||||||
#include <tesseract/export.h>
|
#include "image.h"
|
||||||
|
|
||||||
class BLOBNBOX_LIST;
|
class BLOBNBOX_LIST;
|
||||||
class TO_BLOCK;
|
class TO_BLOCK;
|
||||||
@ -53,7 +53,7 @@ public:
|
|||||||
// BSTT_ITALIC: green box
|
// BSTT_ITALIC: green box
|
||||||
// BSTT_UNCLEAR: blue box
|
// BSTT_UNCLEAR: blue box
|
||||||
// All others: yellow box
|
// All others: yellow box
|
||||||
static void RenderSpecialText(Pix *pix, BLOBNBOX *blob);
|
static void RenderSpecialText(Image pix, BLOBNBOX *blob);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace tesseract
|
} // namespace tesseract
|
||||||
|
@ -60,14 +60,14 @@ const int kNoisePadding = 4;
|
|||||||
// The returned pix may be nullptr, meaning no images found.
|
// The returned pix may be nullptr, meaning no images found.
|
||||||
// If not nullptr, it must be PixDestroyed by the caller.
|
// If not nullptr, it must be PixDestroyed by the caller.
|
||||||
// If textord_tabfind_show_images, debug images are appended to pixa_debug.
|
// If textord_tabfind_show_images, debug images are appended to pixa_debug.
|
||||||
Pix *ImageFind::FindImages(Pix *pix, DebugPixa *pixa_debug) {
|
Image ImageFind::FindImages(Image pix, DebugPixa *pixa_debug) {
|
||||||
// Not worth looking at small images.
|
// Not worth looking at small images.
|
||||||
if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) {
|
if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) {
|
||||||
return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
|
return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reduce by factor 2.
|
// Reduce by factor 2.
|
||||||
Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
|
Image pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
|
||||||
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
||||||
pixa_debug->AddPix(pixr, "CascadeReduced");
|
pixa_debug->AddPix(pixr, "CascadeReduced");
|
||||||
}
|
}
|
||||||
@ -78,76 +78,76 @@ Pix *ImageFind::FindImages(Pix *pix, DebugPixa *pixa_debug) {
|
|||||||
// pixGenHalftoneMask(pixr, nullptr, ...) with too small image, so we
|
// pixGenHalftoneMask(pixr, nullptr, ...) with too small image, so we
|
||||||
// want to bypass that.
|
// want to bypass that.
|
||||||
if (pixGetWidth(pixr) < kMinImageFindSize || pixGetHeight(pixr) < kMinImageFindSize) {
|
if (pixGetWidth(pixr) < kMinImageFindSize || pixGetHeight(pixr) < kMinImageFindSize) {
|
||||||
pixDestroy(&pixr);
|
pixr.destroy();
|
||||||
return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
|
return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
|
||||||
}
|
}
|
||||||
// Get the halftone mask.
|
// Get the halftone mask.
|
||||||
l_int32 ht_found = 0;
|
l_int32 ht_found = 0;
|
||||||
Pixa *pixadb = (textord_tabfind_show_images && pixa_debug != nullptr) ? pixaCreate(0) : nullptr;
|
Pixa *pixadb = (textord_tabfind_show_images && pixa_debug != nullptr) ? pixaCreate(0) : nullptr;
|
||||||
Pix *pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb);
|
Image pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb);
|
||||||
if (pixadb) {
|
if (pixadb) {
|
||||||
Pix *pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
|
Image pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
|
||||||
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
||||||
pixa_debug->AddPix(pixdb, "HalftoneMask");
|
pixa_debug->AddPix(pixdb, "HalftoneMask");
|
||||||
}
|
}
|
||||||
pixDestroy(&pixdb);
|
pixdb.destroy();
|
||||||
pixaDestroy(&pixadb);
|
pixaDestroy(&pixadb);
|
||||||
}
|
}
|
||||||
pixDestroy(&pixr);
|
pixr.destroy();
|
||||||
if (!ht_found && pixht2 != nullptr) {
|
if (!ht_found && pixht2 != nullptr) {
|
||||||
pixDestroy(&pixht2);
|
pixht2.destroy();
|
||||||
}
|
}
|
||||||
if (pixht2 == nullptr) {
|
if (pixht2 == nullptr) {
|
||||||
return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
|
return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Expand back up again.
|
// Expand back up again.
|
||||||
Pix *pixht = pixExpandReplicate(pixht2, 2);
|
Image pixht = pixExpandReplicate(pixht2, 2);
|
||||||
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
||||||
pixa_debug->AddPix(pixht, "HalftoneReplicated");
|
pixa_debug->AddPix(pixht, "HalftoneReplicated");
|
||||||
}
|
}
|
||||||
pixDestroy(&pixht2);
|
pixht2.destroy();
|
||||||
|
|
||||||
// Fill to capture pixels near the mask edges that were missed
|
// Fill to capture pixels near the mask edges that were missed
|
||||||
Pix *pixt = pixSeedfillBinary(nullptr, pixht, pix, 8);
|
Image pixt = pixSeedfillBinary(nullptr, pixht, pix, 8);
|
||||||
pixOr(pixht, pixht, pixt);
|
pixOr(pixht, pixht, pixt);
|
||||||
pixDestroy(&pixt);
|
pixt.destroy();
|
||||||
|
|
||||||
// Eliminate lines and bars that may be joined to images.
|
// Eliminate lines and bars that may be joined to images.
|
||||||
Pix *pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
|
Image pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
|
||||||
pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
|
pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
|
||||||
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
||||||
pixa_debug->AddPix(pixfinemask, "FineMask");
|
pixa_debug->AddPix(pixfinemask, "FineMask");
|
||||||
}
|
}
|
||||||
Pix *pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
|
Image pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
|
||||||
Pix *pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
|
Image pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
|
||||||
pixDestroy(&pixreduced);
|
pixreduced.destroy();
|
||||||
pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
|
pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
|
||||||
Pix *pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
|
Image pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
|
||||||
pixDestroy(&pixreduced2);
|
pixreduced2.destroy();
|
||||||
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
||||||
pixa_debug->AddPix(pixcoarsemask, "CoarseMask");
|
pixa_debug->AddPix(pixcoarsemask, "CoarseMask");
|
||||||
}
|
}
|
||||||
// Combine the coarse and fine image masks.
|
// Combine the coarse and fine image masks.
|
||||||
pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
|
pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
|
||||||
pixDestroy(&pixfinemask);
|
pixfinemask.destroy();
|
||||||
// Dilate a bit to make sure we get everything.
|
// Dilate a bit to make sure we get everything.
|
||||||
pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
|
pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
|
||||||
Pix *pixmask = pixExpandReplicate(pixcoarsemask, 16);
|
Image pixmask = pixExpandReplicate(pixcoarsemask, 16);
|
||||||
pixDestroy(&pixcoarsemask);
|
pixcoarsemask.destroy();
|
||||||
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
||||||
pixa_debug->AddPix(pixmask, "MaskDilated");
|
pixa_debug->AddPix(pixmask, "MaskDilated");
|
||||||
}
|
}
|
||||||
// And the image mask with the line and bar remover.
|
// And the image mask with the line and bar remover.
|
||||||
pixAnd(pixht, pixht, pixmask);
|
pixAnd(pixht, pixht, pixmask);
|
||||||
pixDestroy(&pixmask);
|
pixmask.destroy();
|
||||||
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
||||||
pixa_debug->AddPix(pixht, "FinalMask");
|
pixa_debug->AddPix(pixht, "FinalMask");
|
||||||
}
|
}
|
||||||
// Make the result image the same size as the input.
|
// Make the result image the same size as the input.
|
||||||
Pix *result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
|
Image result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
|
||||||
pixOr(result, result, pixht);
|
pixOr(result, result, pixht);
|
||||||
pixDestroy(&pixht);
|
pixht.destroy();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -158,7 +158,7 @@ Pix *ImageFind::FindImages(Pix *pix, DebugPixa *pixa_debug) {
|
|||||||
// If not nullptr, they must be destroyed by the caller.
|
// If not nullptr, they must be destroyed by the caller.
|
||||||
// Resolution of pix should match the source image (Tesseract::pix_binary_)
|
// Resolution of pix should match the source image (Tesseract::pix_binary_)
|
||||||
// so the output coordinate systems match.
|
// so the output coordinate systems match.
|
||||||
void ImageFind::ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa **boxa,
|
void ImageFind::ConnCompAndRectangularize(Image pix, DebugPixa *pixa_debug, Boxa **boxa,
|
||||||
Pixa **pixa) {
|
Pixa **pixa) {
|
||||||
*boxa = nullptr;
|
*boxa = nullptr;
|
||||||
*pixa = nullptr;
|
*pixa = nullptr;
|
||||||
@ -177,15 +177,15 @@ void ImageFind::ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa
|
|||||||
}
|
}
|
||||||
for (int i = 0; i < npixes; ++i) {
|
for (int i = 0; i < npixes; ++i) {
|
||||||
int x_start, x_end, y_start, y_end;
|
int x_start, x_end, y_start, y_end;
|
||||||
Pix *img_pix = pixaGetPix(*pixa, i, L_CLONE);
|
Image img_pix = pixaGetPix(*pixa, i, L_CLONE);
|
||||||
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
if (textord_tabfind_show_images && pixa_debug != nullptr) {
|
||||||
pixa_debug->AddPix(img_pix, "A component");
|
pixa_debug->AddPix(img_pix, "A component");
|
||||||
}
|
}
|
||||||
if (pixNearlyRectangular(img_pix, kMinRectangularFraction, kMaxRectangularFraction,
|
if (pixNearlyRectangular(img_pix, kMinRectangularFraction, kMaxRectangularFraction,
|
||||||
kMaxRectangularGradient, &x_start, &y_start, &x_end, &y_end)) {
|
kMaxRectangularGradient, &x_start, &y_start, &x_end, &y_end)) {
|
||||||
Pix *simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
|
Image simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
|
||||||
pixSetAll(simple_pix);
|
pixSetAll(simple_pix);
|
||||||
pixDestroy(&img_pix);
|
img_pix.destroy();
|
||||||
// pixaReplacePix takes ownership of the simple_pix.
|
// pixaReplacePix takes ownership of the simple_pix.
|
||||||
pixaReplacePix(*pixa, i, simple_pix, nullptr);
|
pixaReplacePix(*pixa, i, simple_pix, nullptr);
|
||||||
img_pix = pixaGetPix(*pixa, i, L_CLONE);
|
img_pix = pixaGetPix(*pixa, i, L_CLONE);
|
||||||
@ -195,7 +195,7 @@ void ImageFind::ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa
|
|||||||
Box *simple_box = boxCreate(x + x_start, y + y_start, x_end - x_start, y_end - y_start);
|
Box *simple_box = boxCreate(x + x_start, y + y_start, x_end - x_start, y_end - y_start);
|
||||||
boxaReplaceBox(*boxa, i, simple_box);
|
boxaReplaceBox(*boxa, i, simple_box);
|
||||||
}
|
}
|
||||||
pixDestroy(&img_pix);
|
img_pix.destroy();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -280,7 +280,7 @@ static bool VScanForEdge(uint32_t *data, int wpl, int y_start, int y_end, int mi
|
|||||||
// On return, the rectangle is defined by x_start, y_start, x_end and y_end.
|
// On return, the rectangle is defined by x_start, y_start, x_end and y_end.
|
||||||
// Note: the algorithm is iterative, allowing it to slice off pixels from
|
// Note: the algorithm is iterative, allowing it to slice off pixels from
|
||||||
// one edge, allowing it to then slice off more pixels from another edge.
|
// one edge, allowing it to then slice off more pixels from another edge.
|
||||||
bool ImageFind::pixNearlyRectangular(Pix *pix, double min_fraction, double max_fraction,
|
bool ImageFind::pixNearlyRectangular(Image pix, double min_fraction, double max_fraction,
|
||||||
double max_skew_gradient, int *x_start, int *y_start,
|
double max_skew_gradient, int *x_start, int *y_start,
|
||||||
int *x_end, int *y_end) {
|
int *x_end, int *y_end) {
|
||||||
ASSERT_HOST(pix != nullptr);
|
ASSERT_HOST(pix != nullptr);
|
||||||
@ -348,7 +348,7 @@ bool ImageFind::pixNearlyRectangular(Pix *pix, double min_fraction, double max_f
|
|||||||
// are shrunk inwards until they bound any black pixels found within the
|
// are shrunk inwards until they bound any black pixels found within the
|
||||||
// original rectangle. Returns false if the rectangle contains no black
|
// original rectangle. Returns false if the rectangle contains no black
|
||||||
// pixels at all.
|
// pixels at all.
|
||||||
bool ImageFind::BoundsWithinRect(Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end) {
|
bool ImageFind::BoundsWithinRect(Image pix, int *x_start, int *y_start, int *x_end, int *y_end) {
|
||||||
Box *input_box = boxCreate(*x_start, *y_start, *x_end - *x_start, *y_end - *y_start);
|
Box *input_box = boxCreate(*x_start, *y_start, *x_end - *x_start, *y_end - *y_start);
|
||||||
Box *output_box = nullptr;
|
Box *output_box = nullptr;
|
||||||
pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
|
pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
|
||||||
@ -427,8 +427,8 @@ uint8_t ImageFind::ClipToByte(double pixel) {
|
|||||||
// If color_map1 is not null then it and color_map2 get rect pasted in them
|
// If color_map1 is not null then it and color_map2 get rect pasted in them
|
||||||
// with the two calculated colors, and rms map gets a pasted rect of the rms.
|
// with the two calculated colors, and rms map gets a pasted rect of the rms.
|
||||||
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
|
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
|
||||||
void ImageFind::ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, Pix *color_map1,
|
void ImageFind::ComputeRectangleColors(const TBOX &rect, Image pix, int factor, Image color_map1,
|
||||||
Pix *color_map2, Pix *rms_map, uint8_t *color1,
|
Image color_map2, Image rms_map, uint8_t *color1,
|
||||||
uint8_t *color2) {
|
uint8_t *color2) {
|
||||||
ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32);
|
ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32);
|
||||||
// Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
|
// Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
|
||||||
@ -448,7 +448,7 @@ void ImageFind::ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, P
|
|||||||
}
|
}
|
||||||
// Now crop the pix to the rectangle.
|
// Now crop the pix to the rectangle.
|
||||||
Box *scaled_box = boxCreate(left_pad, height - top_pad, width_pad, height_pad);
|
Box *scaled_box = boxCreate(left_pad, height - top_pad, width_pad, height_pad);
|
||||||
Pix *scaled = pixClipRectangle(pix, scaled_box, nullptr);
|
Image scaled = pixClipRectangle(pix, scaled_box, nullptr);
|
||||||
|
|
||||||
// Compute stats over the whole image.
|
// Compute stats over the whole image.
|
||||||
STATS red_stats(0, 256);
|
STATS red_stats(0, 256);
|
||||||
@ -538,7 +538,7 @@ void ImageFind::ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, P
|
|||||||
ComposeRGB(color2[COLOR_RED], color2[COLOR_GREEN], color2[COLOR_BLUE]));
|
ComposeRGB(color2[COLOR_RED], color2[COLOR_GREEN], color2[COLOR_BLUE]));
|
||||||
pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
|
pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
|
||||||
}
|
}
|
||||||
pixDestroy(&scaled);
|
scaled.destroy();
|
||||||
boxDestroy(&scaled_box);
|
boxDestroy(&scaled_box);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -585,7 +585,7 @@ void ImageFind::ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, P
|
|||||||
// horizontal. The boxes are rotated by rotation, which should undo such
|
// horizontal. The boxes are rotated by rotation, which should undo such
|
||||||
// rotations, before mapping them onto the pix.
|
// rotations, before mapping them onto the pix.
|
||||||
bool ImageFind::BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box,
|
bool ImageFind::BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box,
|
||||||
const FCOORD &rotation, Pix *pix) {
|
const FCOORD &rotation, Image pix) {
|
||||||
TBOX search_box(box1);
|
TBOX search_box(box1);
|
||||||
search_box += box2;
|
search_box += box2;
|
||||||
if (box1.x_gap(box2) >= box1.y_gap(box2)) {
|
if (box1.x_gap(box2) >= box1.y_gap(box2)) {
|
||||||
@ -607,7 +607,7 @@ bool ImageFind::BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TB
|
|||||||
// Returns the number of pixels in box in the pix.
|
// Returns the number of pixels in box in the pix.
|
||||||
// rotation, pix and im_box are defined in the large comment above.
|
// rotation, pix and im_box are defined in the large comment above.
|
||||||
int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation,
|
int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation,
|
||||||
Pix *pix) {
|
Image pix) {
|
||||||
// Intersect it with the image box.
|
// Intersect it with the image box.
|
||||||
box &= im_box; // This is in-place box intersection.
|
box &= im_box; // This is in-place box intersection.
|
||||||
if (box.null_box()) {
|
if (box.null_box()) {
|
||||||
@ -616,12 +616,12 @@ int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOOR
|
|||||||
box.rotate(rotation);
|
box.rotate(rotation);
|
||||||
TBOX rotated_im_box(im_box);
|
TBOX rotated_im_box(im_box);
|
||||||
rotated_im_box.rotate(rotation);
|
rotated_im_box.rotate(rotation);
|
||||||
Pix *rect_pix = pixCreate(box.width(), box.height(), 1);
|
Image rect_pix = pixCreate(box.width(), box.height(), 1);
|
||||||
pixRasterop(rect_pix, 0, 0, box.width(), box.height(), PIX_SRC, pix,
|
pixRasterop(rect_pix, 0, 0, box.width(), box.height(), PIX_SRC, pix,
|
||||||
box.left() - rotated_im_box.left(), rotated_im_box.top() - box.top());
|
box.left() - rotated_im_box.left(), rotated_im_box.top() - box.top());
|
||||||
l_int32 result;
|
l_int32 result;
|
||||||
pixCountPixels(rect_pix, &result, nullptr);
|
pixCountPixels(rect_pix, &result, nullptr);
|
||||||
pixDestroy(&rect_pix);
|
rect_pix.destroy();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -630,7 +630,7 @@ int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOOR
|
|||||||
// until there is at least one black pixel in the outermost columns.
|
// until there is at least one black pixel in the outermost columns.
|
||||||
// rotation, rerotation, pix and im_box are defined in the large comment above.
|
// rotation, rerotation, pix and im_box are defined in the large comment above.
|
||||||
static void AttemptToShrinkBox(const FCOORD &rotation, const FCOORD &rerotation, const TBOX &im_box,
|
static void AttemptToShrinkBox(const FCOORD &rotation, const FCOORD &rerotation, const TBOX &im_box,
|
||||||
Pix *pix, TBOX *slice) {
|
Image pix, TBOX *slice) {
|
||||||
TBOX rotated_box(*slice);
|
TBOX rotated_box(*slice);
|
||||||
rotated_box.rotate(rerotation);
|
rotated_box.rotate(rerotation);
|
||||||
TBOX rotated_im_box(im_box);
|
TBOX rotated_im_box(im_box);
|
||||||
@ -675,7 +675,7 @@ static void AttemptToShrinkBox(const FCOORD &rotation, const FCOORD &rerotation,
|
|||||||
// In such cases, the output order may cause strange block polygons.
|
// In such cases, the output order may cause strange block polygons.
|
||||||
// rotation, rerotation, pix and im_box are defined in the large comment above.
|
// rotation, rerotation, pix and im_box are defined in the large comment above.
|
||||||
static void CutChunkFromParts(const TBOX &box, const TBOX &im_box, const FCOORD &rotation,
|
static void CutChunkFromParts(const TBOX &box, const TBOX &im_box, const FCOORD &rotation,
|
||||||
const FCOORD &rerotation, Pix *pix, ColPartition_LIST *part_list) {
|
const FCOORD &rerotation, Image pix, ColPartition_LIST *part_list) {
|
||||||
ASSERT_HOST(!part_list->empty());
|
ASSERT_HOST(!part_list->empty());
|
||||||
ColPartition_IT part_it(part_list);
|
ColPartition_IT part_it(part_list);
|
||||||
do {
|
do {
|
||||||
@ -753,7 +753,7 @@ static void CutChunkFromParts(const TBOX &box, const TBOX &im_box, const FCOORD
|
|||||||
// from a rectangle.
|
// from a rectangle.
|
||||||
// rotation, rerotation, pix and im_box are defined in the large comment above.
|
// rotation, rerotation, pix and im_box are defined in the large comment above.
|
||||||
static void DivideImageIntoParts(const TBOX &im_box, const FCOORD &rotation,
|
static void DivideImageIntoParts(const TBOX &im_box, const FCOORD &rotation,
|
||||||
const FCOORD &rerotation, Pix *pix,
|
const FCOORD &rerotation, Image pix,
|
||||||
ColPartitionGridSearch *rectsearch, ColPartition_LIST *part_list) {
|
ColPartitionGridSearch *rectsearch, ColPartition_LIST *part_list) {
|
||||||
// Add the full im_box partition to the list to begin with.
|
// Add the full im_box partition to the list to begin with.
|
||||||
ColPartition *pix_part =
|
ColPartition *pix_part =
|
||||||
@ -1204,7 +1204,7 @@ static bool ScanForOverlappingText(ColPartitionGrid *part_grid, TBOX *box) {
|
|||||||
// and then deletes them.
|
// and then deletes them.
|
||||||
// Box coordinates are rotated by rerotate to match the image.
|
// Box coordinates are rotated by rerotate to match the image.
|
||||||
static void MarkAndDeleteImageParts(const FCOORD &rerotate, ColPartitionGrid *part_grid,
|
static void MarkAndDeleteImageParts(const FCOORD &rerotate, ColPartitionGrid *part_grid,
|
||||||
ColPartition_LIST *image_parts, Pix *image_pix) {
|
ColPartition_LIST *image_parts, Image image_pix) {
|
||||||
if (image_pix == nullptr) {
|
if (image_pix == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -1236,7 +1236,7 @@ static void MarkAndDeleteImageParts(const FCOORD &rerotate, ColPartitionGrid *pa
|
|||||||
// rerotation specifies how to rotate the partition coords to match
|
// rerotation specifies how to rotate the partition coords to match
|
||||||
// the image_mask, since this function is used after orientation correction.
|
// the image_mask, since this function is used after orientation correction.
|
||||||
void ImageFind::TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid,
|
void ImageFind::TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid,
|
||||||
Pix *image_mask) {
|
Image image_mask) {
|
||||||
// Extract the noise parts from the grid and put them on a temporary list.
|
// Extract the noise parts from the grid and put them on a temporary list.
|
||||||
ColPartition_LIST parts_list;
|
ColPartition_LIST parts_list;
|
||||||
ColPartition_IT part_it(&parts_list);
|
ColPartition_IT part_it(&parts_list);
|
||||||
@ -1288,7 +1288,7 @@ static void DeleteSmallImages(ColPartitionGrid *part_grid) {
|
|||||||
// Since the other blobs in the other partitions will be owned by the block,
|
// Since the other blobs in the other partitions will be owned by the block,
|
||||||
// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
|
// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
|
||||||
// situation and collect the image blobs.
|
// situation and collect the image blobs.
|
||||||
void ImageFind::FindImagePartitions(Pix *image_pix, const FCOORD &rotation,
|
void ImageFind::FindImagePartitions(Image image_pix, const FCOORD &rotation,
|
||||||
const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid,
|
const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid,
|
||||||
DebugPixa *pixa_debug, ColPartitionGrid *part_grid,
|
DebugPixa *pixa_debug, ColPartitionGrid *part_grid,
|
||||||
ColPartition_LIST *big_parts) {
|
ColPartition_LIST *big_parts) {
|
||||||
@ -1304,7 +1304,7 @@ void ImageFind::FindImagePartitions(Pix *image_pix, const FCOORD &rotation,
|
|||||||
for (int i = 0; i < nboxes; ++i) {
|
for (int i = 0; i < nboxes; ++i) {
|
||||||
l_int32 x, y, width, height;
|
l_int32 x, y, width, height;
|
||||||
boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
|
boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
|
||||||
Pix *pix = pixaGetPix(pixa, i, L_CLONE);
|
Image pix = pixaGetPix(pixa, i, L_CLONE);
|
||||||
TBOX im_box(x, imageheight - y - height, x + width, imageheight - y);
|
TBOX im_box(x, imageheight - y - height, x + width, imageheight - y);
|
||||||
im_box.rotate(rotation); // Now matches all partitions and blobs.
|
im_box.rotate(rotation); // Now matches all partitions and blobs.
|
||||||
ColPartitionGridSearch rectsearch(part_grid);
|
ColPartitionGridSearch rectsearch(part_grid);
|
||||||
@ -1315,7 +1315,7 @@ void ImageFind::FindImagePartitions(Pix *image_pix, const FCOORD &rotation,
|
|||||||
pixa_debug->AddPix(pix, "ImageComponent");
|
pixa_debug->AddPix(pix, "ImageComponent");
|
||||||
tprintf("Component has %d parts\n", part_list.length());
|
tprintf("Component has %d parts\n", part_list.length());
|
||||||
}
|
}
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
if (!part_list.empty()) {
|
if (!part_list.empty()) {
|
||||||
ColPartition_IT part_it(&part_list);
|
ColPartition_IT part_it(&part_list);
|
||||||
if (part_list.singleton()) {
|
if (part_list.singleton()) {
|
||||||
|
@ -47,7 +47,7 @@ public:
|
|||||||
// The returned pix may be nullptr, meaning no images found.
|
// The returned pix may be nullptr, meaning no images found.
|
||||||
// If not nullptr, it must be PixDestroyed by the caller.
|
// If not nullptr, it must be PixDestroyed by the caller.
|
||||||
// If textord_tabfind_show_images, debug images are appended to pixa_debug.
|
// If textord_tabfind_show_images, debug images are appended to pixa_debug.
|
||||||
static Pix *FindImages(Pix *pix, DebugPixa *pixa_debug);
|
static Image FindImages(Image pix, DebugPixa *pixa_debug);
|
||||||
|
|
||||||
// Generates a Boxa, Pixa pair from the input binary (image mask) pix,
|
// Generates a Boxa, Pixa pair from the input binary (image mask) pix,
|
||||||
// analogous to pixConnComp, except that connected components which are nearly
|
// analogous to pixConnComp, except that connected components which are nearly
|
||||||
@ -56,7 +56,7 @@ public:
|
|||||||
// If not nullptr, they must be destroyed by the caller.
|
// If not nullptr, they must be destroyed by the caller.
|
||||||
// Resolution of pix should match the source image (Tesseract::pix_binary_)
|
// Resolution of pix should match the source image (Tesseract::pix_binary_)
|
||||||
// so the output coordinate systems match.
|
// so the output coordinate systems match.
|
||||||
static void ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa **boxa, Pixa **pixa);
|
static void ConnCompAndRectangularize(Image pix, DebugPixa *pixa_debug, Boxa **boxa, Pixa **pixa);
|
||||||
|
|
||||||
// Returns true if there is a rectangle in the source pix, such that all
|
// Returns true if there is a rectangle in the source pix, such that all
|
||||||
// pixel rows and column slices outside of it have less than
|
// pixel rows and column slices outside of it have less than
|
||||||
@ -67,7 +67,7 @@ public:
|
|||||||
// On return, the rectangle is defined by x_start, y_start, x_end and y_end.
|
// On return, the rectangle is defined by x_start, y_start, x_end and y_end.
|
||||||
// Note: the algorithm is iterative, allowing it to slice off pixels from
|
// Note: the algorithm is iterative, allowing it to slice off pixels from
|
||||||
// one edge, allowing it to then slice off more pixels from another edge.
|
// one edge, allowing it to then slice off more pixels from another edge.
|
||||||
static bool pixNearlyRectangular(Pix *pix, double min_fraction, double max_fraction,
|
static bool pixNearlyRectangular(Image pix, double min_fraction, double max_fraction,
|
||||||
double max_skew_gradient, int *x_start, int *y_start, int *x_end,
|
double max_skew_gradient, int *x_start, int *y_start, int *x_end,
|
||||||
int *y_end);
|
int *y_end);
|
||||||
|
|
||||||
@ -75,7 +75,7 @@ public:
|
|||||||
// are shrunk inwards until they bound any black pixels found within the
|
// are shrunk inwards until they bound any black pixels found within the
|
||||||
// original rectangle. Returns false if the rectangle contains no black
|
// original rectangle. Returns false if the rectangle contains no black
|
||||||
// pixels at all.
|
// pixels at all.
|
||||||
static bool BoundsWithinRect(Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end);
|
static bool BoundsWithinRect(Image pix, int *x_start, int *y_start, int *x_end, int *y_end);
|
||||||
|
|
||||||
// Given a point in 3-D (RGB) space, returns the squared Euclidean distance
|
// Given a point in 3-D (RGB) space, returns the squared Euclidean distance
|
||||||
// of the point from the given line, defined by a pair of points in the 3-D
|
// of the point from the given line, defined by a pair of points in the 3-D
|
||||||
@ -99,8 +99,8 @@ public:
|
|||||||
// If color_map1 is not null then it and color_map2 get rect pasted in them
|
// If color_map1 is not null then it and color_map2 get rect pasted in them
|
||||||
// with the two calculated colors, and rms map gets a pasted rect of the rms.
|
// with the two calculated colors, and rms map gets a pasted rect of the rms.
|
||||||
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
|
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
|
||||||
static void ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, Pix *color_map1,
|
static void ComputeRectangleColors(const TBOX &rect, Image pix, int factor, Image color_map1,
|
||||||
Pix *color_map2, Pix *rms_map, uint8_t *color1,
|
Image color_map2, Image rms_map, uint8_t *color1,
|
||||||
uint8_t *color2);
|
uint8_t *color2);
|
||||||
|
|
||||||
// Returns true if there are no black pixels in between the boxes.
|
// Returns true if there are no black pixels in between the boxes.
|
||||||
@ -109,7 +109,7 @@ public:
|
|||||||
// horizontal. The boxes are rotated by rotation, which should undo such
|
// horizontal. The boxes are rotated by rotation, which should undo such
|
||||||
// rotations, before mapping them onto the pix.
|
// rotations, before mapping them onto the pix.
|
||||||
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box,
|
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box,
|
||||||
const FCOORD &rotation, Pix *pix);
|
const FCOORD &rotation, Image pix);
|
||||||
|
|
||||||
// Returns the number of pixels in box in the pix.
|
// Returns the number of pixels in box in the pix.
|
||||||
// The im_box must represent the bounding box of the pix in tesseract
|
// The im_box must represent the bounding box of the pix in tesseract
|
||||||
@ -117,7 +117,7 @@ public:
|
|||||||
// horizontal. The boxes are rotated by rotation, which should undo such
|
// horizontal. The boxes are rotated by rotation, which should undo such
|
||||||
// rotations, before mapping them onto the pix.
|
// rotations, before mapping them onto the pix.
|
||||||
static int CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation,
|
static int CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation,
|
||||||
Pix *pix);
|
Image pix);
|
||||||
|
|
||||||
// Locates all the image partitions in the part_grid, that were found by a
|
// Locates all the image partitions in the part_grid, that were found by a
|
||||||
// previous call to FindImagePartitions, marks them in the image_mask,
|
// previous call to FindImagePartitions, marks them in the image_mask,
|
||||||
@ -127,7 +127,7 @@ public:
|
|||||||
// rerotation specifies how to rotate the partition coords to match
|
// rerotation specifies how to rotate the partition coords to match
|
||||||
// the image_mask, since this function is used after orientation correction.
|
// the image_mask, since this function is used after orientation correction.
|
||||||
static void TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid,
|
static void TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid,
|
||||||
Pix *image_mask);
|
Image image_mask);
|
||||||
|
|
||||||
// Runs a CC analysis on the image_pix mask image, and creates
|
// Runs a CC analysis on the image_pix mask image, and creates
|
||||||
// image partitions from them, cutting out strong text, and merging with
|
// image partitions from them, cutting out strong text, and merging with
|
||||||
@ -139,7 +139,7 @@ public:
|
|||||||
// Since the other blobs in the other partitions will be owned by the block,
|
// Since the other blobs in the other partitions will be owned by the block,
|
||||||
// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
|
// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
|
||||||
// situation and collect the image blobs.
|
// situation and collect the image blobs.
|
||||||
static void FindImagePartitions(Pix *image_pix, const FCOORD &rotation, const FCOORD &rerotation,
|
static void FindImagePartitions(Image image_pix, const FCOORD &rotation, const FCOORD &rerotation,
|
||||||
TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug,
|
TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug,
|
||||||
ColPartitionGrid *part_grid, ColPartition_LIST *big_parts);
|
ColPartitionGrid *part_grid, ColPartition_LIST *big_parts);
|
||||||
};
|
};
|
||||||
|
@ -64,7 +64,7 @@ const double kMinMusicPixelFraction = 0.75;
|
|||||||
// Erases the unused blobs from the line_pix image, taking into account
|
// Erases the unused blobs from the line_pix image, taking into account
|
||||||
// whether this was a horizontal or vertical line set.
|
// whether this was a horizontal or vertical line set.
|
||||||
static void RemoveUnusedLineSegments(bool horizontal_lines, BLOBNBOX_LIST *line_bblobs,
|
static void RemoveUnusedLineSegments(bool horizontal_lines, BLOBNBOX_LIST *line_bblobs,
|
||||||
Pix *line_pix) {
|
Image line_pix) {
|
||||||
int height = pixGetHeight(line_pix);
|
int height = pixGetHeight(line_pix);
|
||||||
BLOBNBOX_IT bbox_it(line_bblobs);
|
BLOBNBOX_IT bbox_it(line_bblobs);
|
||||||
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
|
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
|
||||||
@ -94,26 +94,26 @@ static void RemoveUnusedLineSegments(bool horizontal_lines, BLOBNBOX_LIST *line_
|
|||||||
// as well by removing components that touch the line, but are not in the
|
// as well by removing components that touch the line, but are not in the
|
||||||
// non_line_pix mask. It is assumed that the non_line_pix mask has already
|
// non_line_pix mask. It is assumed that the non_line_pix mask has already
|
||||||
// been prepared to required accuracy.
|
// been prepared to required accuracy.
|
||||||
static void SubtractLinesAndResidue(Pix *line_pix, Pix *non_line_pix, int resolution,
|
static void SubtractLinesAndResidue(Image line_pix, Image non_line_pix, int resolution,
|
||||||
Pix *src_pix) {
|
Image src_pix) {
|
||||||
// First remove the lines themselves.
|
// First remove the lines themselves.
|
||||||
pixSubtract(src_pix, src_pix, line_pix);
|
pixSubtract(src_pix, src_pix, line_pix);
|
||||||
// Subtract the non-lines from the image to get the residue.
|
// Subtract the non-lines from the image to get the residue.
|
||||||
Pix *residue_pix = pixSubtract(nullptr, src_pix, non_line_pix);
|
Image residue_pix = pixSubtract(nullptr, src_pix, non_line_pix);
|
||||||
// Dilate the lines so they touch the residue.
|
// Dilate the lines so they touch the residue.
|
||||||
Pix *fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3);
|
Image fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3);
|
||||||
// Seed fill the fat lines to get all the residue.
|
// Seed fill the fat lines to get all the residue.
|
||||||
pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8);
|
pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8);
|
||||||
// Subtract the residue from the original image.
|
// Subtract the residue from the original image.
|
||||||
pixSubtract(src_pix, src_pix, fat_line_pix);
|
pixSubtract(src_pix, src_pix, fat_line_pix);
|
||||||
pixDestroy(&fat_line_pix);
|
fat_line_pix.destroy();
|
||||||
pixDestroy(&residue_pix);
|
residue_pix.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the maximum strokewidth in the given binary image by doubling
|
// Returns the maximum strokewidth in the given binary image by doubling
|
||||||
// the maximum of the distance function.
|
// the maximum of the distance function.
|
||||||
static int MaxStrokeWidth(Pix *pix) {
|
static int MaxStrokeWidth(Image pix) {
|
||||||
Pix *dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
|
Image dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
|
||||||
int width = pixGetWidth(dist_pix);
|
int width = pixGetWidth(dist_pix);
|
||||||
int height = pixGetHeight(dist_pix);
|
int height = pixGetHeight(dist_pix);
|
||||||
int wpl = pixGetWpl(dist_pix);
|
int wpl = pixGetWpl(dist_pix);
|
||||||
@ -129,18 +129,18 @@ static int MaxStrokeWidth(Pix *pix) {
|
|||||||
}
|
}
|
||||||
data += wpl;
|
data += wpl;
|
||||||
}
|
}
|
||||||
pixDestroy(&dist_pix);
|
dist_pix.destroy();
|
||||||
return max_dist * 2;
|
return max_dist * 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the number of components in the intersection_pix touched by line_box.
|
// Returns the number of components in the intersection_pix touched by line_box.
|
||||||
static int NumTouchingIntersections(Box *line_box, Pix *intersection_pix) {
|
static int NumTouchingIntersections(Box *line_box, Image intersection_pix) {
|
||||||
if (intersection_pix == nullptr) {
|
if (intersection_pix == nullptr) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
Pix *rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr);
|
Image rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr);
|
||||||
Boxa *boxa = pixConnComp(rect_pix, nullptr, 8);
|
Boxa *boxa = pixConnComp(rect_pix, nullptr, 8);
|
||||||
pixDestroy(&rect_pix);
|
rect_pix.destroy();
|
||||||
if (boxa == nullptr) {
|
if (boxa == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -152,7 +152,7 @@ static int NumTouchingIntersections(Box *line_box, Pix *intersection_pix) {
|
|||||||
// Returns the number of black pixels found in the box made by adding the line
|
// Returns the number of black pixels found in the box made by adding the line
|
||||||
// width to both sides of the line bounding box. (Increasing the smallest
|
// width to both sides of the line bounding box. (Increasing the smallest
|
||||||
// dimension of the bounding box.)
|
// dimension of the bounding box.)
|
||||||
static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Pix *nonline_pix) {
|
static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Image nonline_pix) {
|
||||||
l_int32 x, y, box_width, box_height;
|
l_int32 x, y, box_width, box_height;
|
||||||
boxGetGeometry(line_box, &x, &y, &box_width, &box_height);
|
boxGetGeometry(line_box, &x, &y, &box_width, &box_height);
|
||||||
if (box_width > box_height) {
|
if (box_width > box_height) {
|
||||||
@ -167,11 +167,11 @@ static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Pix *nonline
|
|||||||
box_width = right - x;
|
box_width = right - x;
|
||||||
}
|
}
|
||||||
Box *box = boxCreate(x, y, box_width, box_height);
|
Box *box = boxCreate(x, y, box_width, box_height);
|
||||||
Pix *rect_pix = pixClipRectangle(nonline_pix, box, nullptr);
|
Image rect_pix = pixClipRectangle(nonline_pix, box, nullptr);
|
||||||
boxDestroy(&box);
|
boxDestroy(&box);
|
||||||
l_int32 result;
|
l_int32 result;
|
||||||
pixCountPixels(rect_pix, &result, nullptr);
|
pixCountPixels(rect_pix, &result, nullptr);
|
||||||
pixDestroy(&rect_pix);
|
rect_pix.destroy();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -184,8 +184,8 @@ static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Pix *nonline
|
|||||||
// or Hindi words, or underlines.)
|
// or Hindi words, or underlines.)
|
||||||
// Bad line components are erased from line_pix.
|
// Bad line components are erased from line_pix.
|
||||||
// Returns the number of remaining connected components.
|
// Returns the number of remaining connected components.
|
||||||
static int FilterFalsePositives(int resolution, Pix *nonline_pix, Pix *intersection_pix,
|
static int FilterFalsePositives(int resolution, Image nonline_pix, Image intersection_pix,
|
||||||
Pix *line_pix) {
|
Image line_pix) {
|
||||||
int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
|
int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
|
||||||
Pixa *pixa = nullptr;
|
Pixa *pixa = nullptr;
|
||||||
Boxa *boxa = pixConnComp(line_pix, &pixa, 8);
|
Boxa *boxa = pixConnComp(line_pix, &pixa, 8);
|
||||||
@ -196,9 +196,9 @@ static int FilterFalsePositives(int resolution, Pix *nonline_pix, Pix *intersect
|
|||||||
Box *box = boxaGetBox(boxa, i, L_CLONE);
|
Box *box = boxaGetBox(boxa, i, L_CLONE);
|
||||||
l_int32 x, y, box_width, box_height;
|
l_int32 x, y, box_width, box_height;
|
||||||
boxGetGeometry(box, &x, &y, &box_width, &box_height);
|
boxGetGeometry(box, &x, &y, &box_width, &box_height);
|
||||||
Pix *comp_pix = pixaGetPix(pixa, i, L_CLONE);
|
Image comp_pix = pixaGetPix(pixa, i, L_CLONE);
|
||||||
int max_width = MaxStrokeWidth(comp_pix);
|
int max_width = MaxStrokeWidth(comp_pix);
|
||||||
pixDestroy(&comp_pix);
|
comp_pix.destroy();
|
||||||
bool bad_line = false;
|
bool bad_line = false;
|
||||||
// If the length is too short to stand-alone as a line, and the box width
|
// If the length is too short to stand-alone as a line, and the box width
|
||||||
// is thick enough, and the stroke width is thick enough it is bad.
|
// is thick enough, and the stroke width is thick enough it is bad.
|
||||||
@ -240,18 +240,18 @@ static int FilterFalsePositives(int resolution, Pix *nonline_pix, Pix *intersect
|
|||||||
// The output vectors are owned by the list and Frozen (cannot refit) by
|
// The output vectors are owned by the list and Frozen (cannot refit) by
|
||||||
// having no boxes, as there is no need to refit or merge separator lines.
|
// having no boxes, as there is no need to refit or merge separator lines.
|
||||||
// The detected lines are removed from the pix.
|
// The detected lines are removed from the pix.
|
||||||
void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *vertical_x,
|
void LineFinder::FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x,
|
||||||
int *vertical_y, Pix **pix_music_mask, TabVector_LIST *v_lines,
|
int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines,
|
||||||
TabVector_LIST *h_lines) {
|
TabVector_LIST *h_lines) {
|
||||||
if (pix == nullptr || vertical_x == nullptr || vertical_y == nullptr) {
|
if (pix == nullptr || vertical_x == nullptr || vertical_y == nullptr) {
|
||||||
tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
|
tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Pix *pix_vline = nullptr;
|
Image pix_vline = nullptr;
|
||||||
Pix *pix_non_vline = nullptr;
|
Image pix_non_vline = nullptr;
|
||||||
Pix *pix_hline = nullptr;
|
Image pix_hline = nullptr;
|
||||||
Pix *pix_non_hline = nullptr;
|
Image pix_non_hline = nullptr;
|
||||||
Pix *pix_intersections = nullptr;
|
Image pix_intersections = nullptr;
|
||||||
Pixa *pixa_display = debug ? pixaCreate(0) : nullptr;
|
Pixa *pixa_display = debug ? pixaCreate(0) : nullptr;
|
||||||
GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline, &pix_non_hline,
|
GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline, &pix_non_hline,
|
||||||
&pix_intersections, pix_music_mask, pixa_display);
|
&pix_intersections, pix_music_mask, pixa_display);
|
||||||
@ -263,10 +263,10 @@ void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *v
|
|||||||
if (pix_vline != nullptr) {
|
if (pix_vline != nullptr) {
|
||||||
pixAnd(pix_intersections, pix_vline, pix_hline);
|
pixAnd(pix_intersections, pix_vline, pix_hline);
|
||||||
} else {
|
} else {
|
||||||
pixDestroy(&pix_intersections);
|
pix_intersections.destroy();
|
||||||
}
|
}
|
||||||
if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections, pix_hline)) {
|
if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections, pix_hline)) {
|
||||||
pixDestroy(&pix_hline);
|
pix_hline.destroy();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y, &pix_hline,
|
FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y, &pix_hline,
|
||||||
@ -283,11 +283,11 @@ void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *v
|
|||||||
pixAnd(pix_intersections, pix_vline, pix_hline);
|
pixAnd(pix_intersections, pix_vline, pix_hline);
|
||||||
// Fatten up the intersections and seed-fill to get the intersection
|
// Fatten up the intersections and seed-fill to get the intersection
|
||||||
// residue.
|
// residue.
|
||||||
Pix *pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5);
|
Image pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5);
|
||||||
pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
|
pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
|
||||||
// Now remove the intersection residue.
|
// Now remove the intersection residue.
|
||||||
pixSubtract(pix, pix, pix_join_residue);
|
pixSubtract(pix, pix, pix_join_residue);
|
||||||
pixDestroy(&pix_join_residue);
|
pix_join_residue.destroy();
|
||||||
}
|
}
|
||||||
// Remove any detected music.
|
// Remove any detected music.
|
||||||
if (pix_music_mask != nullptr && *pix_music_mask != nullptr) {
|
if (pix_music_mask != nullptr && *pix_music_mask != nullptr) {
|
||||||
@ -300,11 +300,11 @@ void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *v
|
|||||||
pixaAddPix(pixa_display, pix, L_CLONE);
|
pixaAddPix(pixa_display, pix, L_CLONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
pixDestroy(&pix_vline);
|
pix_vline.destroy();
|
||||||
pixDestroy(&pix_non_vline);
|
pix_non_vline.destroy();
|
||||||
pixDestroy(&pix_hline);
|
pix_hline.destroy();
|
||||||
pixDestroy(&pix_non_hline);
|
pix_non_hline.destroy();
|
||||||
pixDestroy(&pix_intersections);
|
pix_intersections.destroy();
|
||||||
if (pixa_display != nullptr) {
|
if (pixa_display != nullptr) {
|
||||||
pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding", "vhlinefinding.pdf");
|
pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding", "vhlinefinding.pdf");
|
||||||
pixaDestroy(&pixa_display);
|
pixaDestroy(&pixa_display);
|
||||||
@ -359,9 +359,9 @@ void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height, Boxa **bo
|
|||||||
// If no good lines are found, pix_vline is destroyed.
|
// If no good lines are found, pix_vline is destroyed.
|
||||||
// None of the input pointers may be nullptr, and if *pix_vline is nullptr then
|
// None of the input pointers may be nullptr, and if *pix_vline is nullptr then
|
||||||
// the function does nothing.
|
// the function does nothing.
|
||||||
void LineFinder::FindAndRemoveVLines(int resolution, Pix *pix_intersections, int *vertical_x,
|
void LineFinder::FindAndRemoveVLines(int resolution, Image pix_intersections, int *vertical_x,
|
||||||
int *vertical_y, Pix **pix_vline, Pix *pix_non_vline,
|
int *vertical_y, Image *pix_vline, Image pix_non_vline,
|
||||||
Pix *src_pix, TabVector_LIST *vectors) {
|
Image src_pix, TabVector_LIST *vectors) {
|
||||||
if (pix_vline == nullptr || *pix_vline == nullptr) {
|
if (pix_vline == nullptr || *pix_vline == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -380,7 +380,7 @@ void LineFinder::FindAndRemoveVLines(int resolution, Pix *pix_intersections, int
|
|||||||
vertical.set_with_shrink(*vertical_x, *vertical_y);
|
vertical.set_with_shrink(*vertical_x, *vertical_y);
|
||||||
TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr);
|
TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr);
|
||||||
} else {
|
} else {
|
||||||
pixDestroy(pix_vline);
|
pix_vline->destroy();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -394,9 +394,9 @@ void LineFinder::FindAndRemoveVLines(int resolution, Pix *pix_intersections, int
|
|||||||
// If no good lines are found, pix_hline is destroyed.
|
// If no good lines are found, pix_hline is destroyed.
|
||||||
// None of the input pointers may be nullptr, and if *pix_hline is nullptr then
|
// None of the input pointers may be nullptr, and if *pix_hline is nullptr then
|
||||||
// the function does nothing.
|
// the function does nothing.
|
||||||
void LineFinder::FindAndRemoveHLines(int resolution, Pix *pix_intersections, int vertical_x,
|
void LineFinder::FindAndRemoveHLines(int resolution, Image pix_intersections, int vertical_x,
|
||||||
int vertical_y, Pix **pix_hline, Pix *pix_non_hline,
|
int vertical_y, Image *pix_hline, Image pix_non_hline,
|
||||||
Pix *src_pix, TabVector_LIST *vectors) {
|
Image src_pix, TabVector_LIST *vectors) {
|
||||||
if (pix_hline == nullptr || *pix_hline == nullptr) {
|
if (pix_hline == nullptr || *pix_hline == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -422,7 +422,7 @@ void LineFinder::FindAndRemoveHLines(int resolution, Pix *pix_intersections, int
|
|||||||
h_it.data()->XYFlip();
|
h_it.data()->XYFlip();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
pixDestroy(pix_hline);
|
pix_hline->destroy();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -482,14 +482,14 @@ void LineFinder::FindLineVectors(const ICOORD &bleft, const ICOORD &tright,
|
|||||||
// is taken to be a bar. Bars are used as a seed and the entire touching
|
// is taken to be a bar. Bars are used as a seed and the entire touching
|
||||||
// component is added to the output music mask and subtracted from the lines.
|
// component is added to the output music mask and subtracted from the lines.
|
||||||
// Returns nullptr and does minimal work if no music is found.
|
// Returns nullptr and does minimal work if no music is found.
|
||||||
static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pix_hline,
|
static Image FilterMusic(int resolution, Image pix_closed, Image pix_vline, Image pix_hline,
|
||||||
l_int32 *v_empty, l_int32 *h_empty) {
|
l_int32 *v_empty, l_int32 *h_empty) {
|
||||||
int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight);
|
int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight);
|
||||||
Pix *intersection_pix = pixAnd(nullptr, pix_vline, pix_hline);
|
Image intersection_pix = pixAnd(nullptr, pix_vline, pix_hline);
|
||||||
Boxa *boxa = pixConnComp(pix_vline, nullptr, 8);
|
Boxa *boxa = pixConnComp(pix_vline, nullptr, 8);
|
||||||
// Iterate over the boxes to find music bars.
|
// Iterate over the boxes to find music bars.
|
||||||
int nboxes = boxaGetCount(boxa);
|
int nboxes = boxaGetCount(boxa);
|
||||||
Pix *music_mask = nullptr;
|
Image music_mask = nullptr;
|
||||||
for (int i = 0; i < nboxes; ++i) {
|
for (int i = 0; i < nboxes; ++i) {
|
||||||
Box *box = boxaGetBox(boxa, i, L_CLONE);
|
Box *box = boxaGetBox(boxa, i, L_CLONE);
|
||||||
l_int32 x, y, box_width, box_height;
|
l_int32 x, y, box_width, box_height;
|
||||||
@ -507,7 +507,7 @@ static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pi
|
|||||||
boxDestroy(&box);
|
boxDestroy(&box);
|
||||||
}
|
}
|
||||||
boxaDestroy(&boxa);
|
boxaDestroy(&boxa);
|
||||||
pixDestroy(&intersection_pix);
|
intersection_pix.destroy();
|
||||||
if (music_mask != nullptr) {
|
if (music_mask != nullptr) {
|
||||||
// The mask currently contains just the bars. Use the mask as a seed
|
// The mask currently contains just the bars. Use the mask as a seed
|
||||||
// and the pix_closed as the mask for a seedfill to get all the
|
// and the pix_closed as the mask for a seedfill to get all the
|
||||||
@ -521,14 +521,14 @@ static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pi
|
|||||||
int nboxes = boxaGetCount(boxa);
|
int nboxes = boxaGetCount(boxa);
|
||||||
for (int i = 0; i < nboxes; ++i) {
|
for (int i = 0; i < nboxes; ++i) {
|
||||||
Box *box = boxaGetBox(boxa, i, L_CLONE);
|
Box *box = boxaGetBox(boxa, i, L_CLONE);
|
||||||
Pix *rect_pix = pixClipRectangle(music_mask, box, nullptr);
|
Image rect_pix = pixClipRectangle(music_mask, box, nullptr);
|
||||||
l_int32 music_pixels;
|
l_int32 music_pixels;
|
||||||
pixCountPixels(rect_pix, &music_pixels, nullptr);
|
pixCountPixels(rect_pix, &music_pixels, nullptr);
|
||||||
pixDestroy(&rect_pix);
|
rect_pix.destroy();
|
||||||
rect_pix = pixClipRectangle(pix_closed, box, nullptr);
|
rect_pix = pixClipRectangle(pix_closed, box, nullptr);
|
||||||
l_int32 all_pixels;
|
l_int32 all_pixels;
|
||||||
pixCountPixels(rect_pix, &all_pixels, nullptr);
|
pixCountPixels(rect_pix, &all_pixels, nullptr);
|
||||||
pixDestroy(&rect_pix);
|
rect_pix.destroy();
|
||||||
if (music_pixels < kMinMusicPixelFraction * all_pixels) {
|
if (music_pixels < kMinMusicPixelFraction * all_pixels) {
|
||||||
// False positive. Delete from the music mask.
|
// False positive. Delete from the music mask.
|
||||||
pixClearInRect(music_mask, box);
|
pixClearInRect(music_mask, box);
|
||||||
@ -539,7 +539,7 @@ static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pi
|
|||||||
boxaDestroy(&boxa);
|
boxaDestroy(&boxa);
|
||||||
pixZero(music_mask, &no_remaining_music);
|
pixZero(music_mask, &no_remaining_music);
|
||||||
if (no_remaining_music) {
|
if (no_remaining_music) {
|
||||||
pixDestroy(&music_mask);
|
music_mask.destroy();
|
||||||
} else {
|
} else {
|
||||||
pixSubtract(pix_vline, pix_vline, music_mask);
|
pixSubtract(pix_vline, pix_vline, music_mask);
|
||||||
pixSubtract(pix_hline, pix_hline, music_mask);
|
pixSubtract(pix_hline, pix_hline, music_mask);
|
||||||
@ -563,11 +563,11 @@ static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pi
|
|||||||
// but any of the returns that are empty will be nullptr on output.
|
// but any of the returns that are empty will be nullptr on output.
|
||||||
// None of the input (1st level) pointers may be nullptr except pix_music_mask,
|
// None of the input (1st level) pointers may be nullptr except pix_music_mask,
|
||||||
// which will disable music detection, and pixa_display.
|
// which will disable music detection, and pixa_display.
|
||||||
void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix **pix_non_vline,
|
void LineFinder::GetLineMasks(int resolution, Image src_pix, Image *pix_vline, Image *pix_non_vline,
|
||||||
Pix **pix_hline, Pix **pix_non_hline, Pix **pix_intersections,
|
Image *pix_hline, Image *pix_non_hline, Image *pix_intersections,
|
||||||
Pix **pix_music_mask, Pixa *pixa_display) {
|
Image *pix_music_mask, Pixa *pixa_display) {
|
||||||
Pix *pix_closed = nullptr;
|
Image pix_closed = nullptr;
|
||||||
Pix *pix_hollow = nullptr;
|
Image pix_hollow = nullptr;
|
||||||
|
|
||||||
int max_line_width = resolution / kThinLineFraction;
|
int max_line_width = resolution / kThinLineFraction;
|
||||||
int min_line_length = resolution / kMinLineLengthFraction;
|
int min_line_length = resolution / kMinLineLengthFraction;
|
||||||
@ -599,13 +599,13 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix
|
|||||||
// Open up with a big box to detect solid areas, which can then be
|
// Open up with a big box to detect solid areas, which can then be
|
||||||
// subtracted. This is very generous and will leave in even quite wide
|
// subtracted. This is very generous and will leave in even quite wide
|
||||||
// lines.
|
// lines.
|
||||||
Pix *pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
|
Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
|
||||||
if (pixa_display != nullptr) {
|
if (pixa_display != nullptr) {
|
||||||
pixaAddPix(pixa_display, pix_solid, L_CLONE);
|
pixaAddPix(pixa_display, pix_solid, L_CLONE);
|
||||||
}
|
}
|
||||||
pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
|
pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
|
||||||
|
|
||||||
pixDestroy(&pix_solid);
|
pix_solid.destroy();
|
||||||
|
|
||||||
// Now open up in both directions independently to find lines of at least
|
// Now open up in both directions independently to find lines of at least
|
||||||
// 1 inch/kMinLineLengthFraction in length.
|
// 1 inch/kMinLineLengthFraction in length.
|
||||||
@ -615,7 +615,7 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix
|
|||||||
*pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
|
*pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
|
||||||
*pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
|
*pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
|
||||||
|
|
||||||
pixDestroy(&pix_hollow);
|
pix_hollow.destroy();
|
||||||
#ifdef USE_OPENCL
|
#ifdef USE_OPENCL
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -633,10 +633,10 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix
|
|||||||
*pix_music_mask = nullptr;
|
*pix_music_mask = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pixDestroy(&pix_closed);
|
pix_closed.destroy();
|
||||||
Pix *pix_nonlines = nullptr;
|
Image pix_nonlines = nullptr;
|
||||||
*pix_intersections = nullptr;
|
*pix_intersections = nullptr;
|
||||||
Pix *extra_non_hlines = nullptr;
|
Image extra_non_hlines = nullptr;
|
||||||
if (!v_empty) {
|
if (!v_empty) {
|
||||||
// Subtract both line candidates from the source to get definite non-lines.
|
// Subtract both line candidates from the source to get definite non-lines.
|
||||||
pix_nonlines = pixSubtract(nullptr, src_pix, *pix_vline);
|
pix_nonlines = pixSubtract(nullptr, src_pix, *pix_vline);
|
||||||
@ -656,18 +656,18 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix
|
|||||||
pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections);
|
pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections);
|
||||||
}
|
}
|
||||||
if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections, *pix_vline)) {
|
if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections, *pix_vline)) {
|
||||||
pixDestroy(pix_vline); // No candidates left.
|
pix_vline->destroy(); // No candidates left.
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// No vertical lines.
|
// No vertical lines.
|
||||||
pixDestroy(pix_vline);
|
pix_vline->destroy();
|
||||||
*pix_non_vline = nullptr;
|
*pix_non_vline = nullptr;
|
||||||
if (!h_empty) {
|
if (!h_empty) {
|
||||||
pix_nonlines = pixSubtract(nullptr, src_pix, *pix_hline);
|
pix_nonlines = pixSubtract(nullptr, src_pix, *pix_hline);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (h_empty) {
|
if (h_empty) {
|
||||||
pixDestroy(pix_hline);
|
pix_hline->destroy();
|
||||||
*pix_non_hline = nullptr;
|
*pix_non_hline = nullptr;
|
||||||
if (v_empty) {
|
if (v_empty) {
|
||||||
return;
|
return;
|
||||||
@ -677,10 +677,10 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix
|
|||||||
pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8);
|
pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8);
|
||||||
if (extra_non_hlines != nullptr) {
|
if (extra_non_hlines != nullptr) {
|
||||||
pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines);
|
pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines);
|
||||||
pixDestroy(&extra_non_hlines);
|
extra_non_hlines.destroy();
|
||||||
}
|
}
|
||||||
if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections, *pix_hline)) {
|
if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections, *pix_hline)) {
|
||||||
pixDestroy(pix_hline); // No candidates left.
|
pix_hline->destroy(); // No candidates left.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (pixa_display != nullptr) {
|
if (pixa_display != nullptr) {
|
||||||
@ -706,13 +706,13 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix
|
|||||||
pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
|
pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pixDestroy(&pix_nonlines);
|
pix_nonlines.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a list of boxes corresponding to the candidate line segments. Sets
|
// Returns a list of boxes corresponding to the candidate line segments. Sets
|
||||||
// the line_crossings member of the boxes so we can later determine the number
|
// the line_crossings member of the boxes so we can later determine the number
|
||||||
// of intersections touched by a full line.
|
// of intersections touched by a full line.
|
||||||
void LineFinder::GetLineBoxes(bool horizontal_lines, Pix *pix_lines, Pix *pix_intersections,
|
void LineFinder::GetLineBoxes(bool horizontal_lines, Image pix_lines, Image pix_intersections,
|
||||||
C_BLOB_LIST *line_cblobs, BLOBNBOX_LIST *line_bblobs) {
|
C_BLOB_LIST *line_cblobs, BLOBNBOX_LIST *line_bblobs) {
|
||||||
// Put a single pixel crack in every line at an arbitrary spacing,
|
// Put a single pixel crack in every line at an arbitrary spacing,
|
||||||
// so they break up and the bounding boxes can be used to get the
|
// so they break up and the bounding boxes can be used to get the
|
||||||
|
@ -58,8 +58,8 @@ public:
|
|||||||
*
|
*
|
||||||
* The detected lines are removed from the pix.
|
* The detected lines are removed from the pix.
|
||||||
*/
|
*/
|
||||||
static void FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *vertical_x,
|
static void FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x,
|
||||||
int *vertical_y, Pix **pix_music_mask, TabVector_LIST *v_lines,
|
int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines,
|
||||||
TabVector_LIST *h_lines);
|
TabVector_LIST *h_lines);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -83,9 +83,9 @@ private:
|
|||||||
// The output vectors are owned by the list and Frozen (cannot refit) by
|
// The output vectors are owned by the list and Frozen (cannot refit) by
|
||||||
// having no boxes, as there is no need to refit or merge separator lines.
|
// having no boxes, as there is no need to refit or merge separator lines.
|
||||||
// If no good lines are found, pix_vline is destroyed.
|
// If no good lines are found, pix_vline is destroyed.
|
||||||
static void FindAndRemoveVLines(int resolution, Pix *pix_intersections, int *vertical_x,
|
static void FindAndRemoveVLines(int resolution, Image pix_intersections, int *vertical_x,
|
||||||
int *vertical_y, Pix **pix_vline, Pix *pix_non_vline,
|
int *vertical_y, Image *pix_vline, Image pix_non_vline,
|
||||||
Pix *src_pix, TabVector_LIST *vectors);
|
Image src_pix, TabVector_LIST *vectors);
|
||||||
|
|
||||||
// Finds horizontal line objects in pix_vline and removes them from src_pix.
|
// Finds horizontal line objects in pix_vline and removes them from src_pix.
|
||||||
// Uses the given resolution to determine size thresholds instead of any
|
// Uses the given resolution to determine size thresholds instead of any
|
||||||
@ -95,8 +95,8 @@ private:
|
|||||||
// The output vectors are owned by the list and Frozen (cannot refit) by
|
// The output vectors are owned by the list and Frozen (cannot refit) by
|
||||||
// having no boxes, as there is no need to refit or merge separator lines.
|
// having no boxes, as there is no need to refit or merge separator lines.
|
||||||
// If no good lines are found, pix_hline is destroyed.
|
// If no good lines are found, pix_hline is destroyed.
|
||||||
static void FindAndRemoveHLines(int resolution, Pix *pix_intersections, int vertical_x,
|
static void FindAndRemoveHLines(int resolution, Image pix_intersections, int vertical_x,
|
||||||
int vertical_y, Pix **pix_hline, Pix *pix_non_hline, Pix *src_pix,
|
int vertical_y, Image *pix_hline, Image pix_non_hline, Image src_pix,
|
||||||
TabVector_LIST *vectors);
|
TabVector_LIST *vectors);
|
||||||
|
|
||||||
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
|
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
|
||||||
@ -121,14 +121,14 @@ private:
|
|||||||
// None of the input (1st level) pointers may be nullptr except
|
// None of the input (1st level) pointers may be nullptr except
|
||||||
// pix_music_mask, which will disable music detection, and pixa_display, which
|
// pix_music_mask, which will disable music detection, and pixa_display, which
|
||||||
// is for debug.
|
// is for debug.
|
||||||
static void GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix **pix_non_vline,
|
static void GetLineMasks(int resolution, Image src_pix, Image *pix_vline, Image *pix_non_vline,
|
||||||
Pix **pix_hline, Pix **pix_non_hline, Pix **pix_intersections,
|
Image *pix_hline, Image *pix_non_hline, Image *pix_intersections,
|
||||||
Pix **pix_music_mask, Pixa *pixa_display);
|
Image *pix_music_mask, Pixa *pixa_display);
|
||||||
|
|
||||||
// Returns a list of boxes corresponding to the candidate line segments. Sets
|
// Returns a list of boxes corresponding to the candidate line segments. Sets
|
||||||
// the line_crossings member of the boxes so we can later determine the number
|
// the line_crossings member of the boxes so we can later determine the number
|
||||||
// of intersections touched by a full line.
|
// of intersections touched by a full line.
|
||||||
static void GetLineBoxes(bool horizontal_lines, Pix *pix_lines, Pix *pix_intersections,
|
static void GetLineBoxes(bool horizontal_lines, Image pix_lines, Image pix_intersections,
|
||||||
C_BLOB_LIST *line_cblobs, BLOBNBOX_LIST *line_bblobs);
|
C_BLOB_LIST *line_cblobs, BLOBNBOX_LIST *line_bblobs);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ static CRACKEDGE *v_edge(int sign, CRACKEDGE *join, CrackPos *pos);
|
|||||||
* Extract edges from a PDBLK.
|
* Extract edges from a PDBLK.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
void block_edges(Pix *t_pix, // thresholded image
|
void block_edges(Image t_pix, // thresholded image
|
||||||
PDBLK *block, // block in image
|
PDBLK *block, // block in image
|
||||||
C_OUTLINE_IT *outline_it) {
|
C_OUTLINE_IT *outline_it) {
|
||||||
ICOORD bleft; // bounding box
|
ICOORD bleft; // bounding box
|
||||||
|
@ -29,7 +29,7 @@ namespace tesseract {
|
|||||||
class C_OUTLINE_IT;
|
class C_OUTLINE_IT;
|
||||||
class PDBLK;
|
class PDBLK;
|
||||||
|
|
||||||
void block_edges(Pix *t_image, // thresholded image
|
void block_edges(Image t_image, // thresholded image
|
||||||
PDBLK *block, // block in image
|
PDBLK *block, // block in image
|
||||||
C_OUTLINE_IT *outline_it);
|
C_OUTLINE_IT *outline_it);
|
||||||
|
|
||||||
|
@ -350,7 +350,7 @@ void StrokeWidth::RemoveLineResidue(ColPartition_LIST *big_part_list) {
|
|||||||
// Large blobs that cause overlap are put in separate partitions and added
|
// Large blobs that cause overlap are put in separate partitions and added
|
||||||
// to the big_parts list.
|
// to the big_parts list.
|
||||||
void StrokeWidth::GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation,
|
void StrokeWidth::GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation,
|
||||||
TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm,
|
TO_BLOCK *block, Image nontext_pix, const DENORM *denorm,
|
||||||
bool cjk_script, TextlineProjection *projection,
|
bool cjk_script, TextlineProjection *projection,
|
||||||
BLOBNBOX_LIST *diacritic_blobs,
|
BLOBNBOX_LIST *diacritic_blobs,
|
||||||
ColPartitionGrid *part_grid,
|
ColPartitionGrid *part_grid,
|
||||||
|
@ -113,7 +113,7 @@ public:
|
|||||||
// Large blobs that cause overlap are put in separate partitions and added
|
// Large blobs that cause overlap are put in separate partitions and added
|
||||||
// to the big_parts list.
|
// to the big_parts list.
|
||||||
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block,
|
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block,
|
||||||
Pix *nontext_pix, const DENORM *denorm, bool cjk_script,
|
Image nontext_pix, const DENORM *denorm, bool cjk_script,
|
||||||
TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs,
|
TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs,
|
||||||
ColPartitionGrid *part_grid, ColPartition_LIST *big_parts);
|
ColPartitionGrid *part_grid, ColPartition_LIST *big_parts);
|
||||||
|
|
||||||
@ -306,7 +306,7 @@ private:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
// Image map of photo/noise areas on the page. Borrowed pointer (not owned.)
|
// Image map of photo/noise areas on the page. Borrowed pointer (not owned.)
|
||||||
Pix *nontext_map_;
|
Image nontext_map_;
|
||||||
// Textline projection map. Borrowed pointer.
|
// Textline projection map. Borrowed pointer.
|
||||||
TextlineProjection *projection_;
|
TextlineProjection *projection_;
|
||||||
// DENORM used by projection_ to get back to image coords. Borrowed pointer.
|
// DENORM used by projection_ to get back to image coords. Borrowed pointer.
|
||||||
|
@ -53,7 +53,7 @@ TextlineProjection::TextlineProjection(int resolution) : x_origin_(0), y_origin_
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
TextlineProjection::~TextlineProjection() {
|
TextlineProjection::~TextlineProjection() {
|
||||||
pixDestroy(&pix_);
|
pix_.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build the projection profile given the input_block containing lists of
|
// Build the projection profile given the input_block containing lists of
|
||||||
@ -64,8 +64,8 @@ TextlineProjection::~TextlineProjection() {
|
|||||||
// The blobs have had their left and right rules set to also limit
|
// The blobs have had their left and right rules set to also limit
|
||||||
// the range of projection.
|
// the range of projection.
|
||||||
void TextlineProjection::ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation,
|
void TextlineProjection::ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation,
|
||||||
Pix *nontext_map) {
|
Image nontext_map) {
|
||||||
pixDestroy(&pix_);
|
pix_.destroy();
|
||||||
TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
|
TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
|
||||||
x_origin_ = 0;
|
x_origin_ = 0;
|
||||||
y_origin_ = image_box.height();
|
y_origin_ = image_box.height();
|
||||||
@ -75,9 +75,9 @@ void TextlineProjection::ConstructProjection(TO_BLOCK *input_block, const FCOORD
|
|||||||
pix_ = pixCreate(width, height, 8);
|
pix_ = pixCreate(width, height, 8);
|
||||||
ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
|
ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
|
||||||
ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
|
ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
|
||||||
Pix *final_pix = pixBlockconv(pix_, 1, 1);
|
Image final_pix = pixBlockconv(pix_, 1, 1);
|
||||||
// Pix* final_pix = pixBlockconv(pix_, 2, 2);
|
// Pix* final_pix = pixBlockconv(pix_, 2, 2);
|
||||||
pixDestroy(&pix_);
|
pix_.destroy();
|
||||||
pix_ = final_pix;
|
pix_ = final_pix;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -127,7 +127,7 @@ void TextlineProjection::MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs,
|
|||||||
void TextlineProjection::DisplayProjection() const {
|
void TextlineProjection::DisplayProjection() const {
|
||||||
int width = pixGetWidth(pix_);
|
int width = pixGetWidth(pix_);
|
||||||
int height = pixGetHeight(pix_);
|
int height = pixGetHeight(pix_);
|
||||||
Pix *pixc = pixCreate(width, height, 32);
|
Image pixc = pixCreate(width, height, 32);
|
||||||
int src_wpl = pixGetWpl(pix_);
|
int src_wpl = pixGetWpl(pix_);
|
||||||
int col_wpl = pixGetWpl(pixc);
|
int col_wpl = pixGetWpl(pixc);
|
||||||
uint32_t *src_data = pixGetData(pix_);
|
uint32_t *src_data = pixGetData(pix_);
|
||||||
@ -149,7 +149,7 @@ void TextlineProjection::DisplayProjection() const {
|
|||||||
auto *win = new ScrollView("Projection", 0, 0, width, height, width, height);
|
auto *win = new ScrollView("Projection", 0, 0, width, height, width, height);
|
||||||
win->Image(pixc, 0, 0);
|
win->Image(pixc, 0, 0);
|
||||||
win->Update();
|
win->Update();
|
||||||
pixDestroy(&pixc);
|
pixc.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // !GRAPHICS_DISABLED
|
#endif // !GRAPHICS_DISABLED
|
||||||
@ -570,7 +570,7 @@ int TextlineProjection::MeanPixelsInLineSegment(const DENORM *denorm, int offset
|
|||||||
// The function converts between tesseract coords and the pix coords assuming
|
// The function converts between tesseract coords and the pix coords assuming
|
||||||
// that this pix is full resolution equal in size to the original image.
|
// that this pix is full resolution equal in size to the original image.
|
||||||
// Returns an empty box if there are no black pixels in the source box.
|
// Returns an empty box if there are no black pixels in the source box.
|
||||||
static TBOX BoundsWithinBox(Pix *pix, const TBOX &box) {
|
static TBOX BoundsWithinBox(Image pix, const TBOX &box) {
|
||||||
int im_height = pixGetHeight(pix);
|
int im_height = pixGetHeight(pix);
|
||||||
Box *input_box = boxCreate(box.left(), im_height - box.top(), box.width(), box.height());
|
Box *input_box = boxCreate(box.left(), im_height - box.top(), box.width(), box.height());
|
||||||
Box *output_box = nullptr;
|
Box *output_box = nullptr;
|
||||||
@ -593,7 +593,7 @@ static TBOX BoundsWithinBox(Pix *pix, const TBOX &box) {
|
|||||||
// and checks for nontext_map pixels in each half. Reduces the bbox so that it
|
// and checks for nontext_map pixels in each half. Reduces the bbox so that it
|
||||||
// still includes the middle point, but does not touch any fg pixels in
|
// still includes the middle point, but does not touch any fg pixels in
|
||||||
// nontext_map. An empty box may be returned if there is no such box.
|
// nontext_map. An empty box may be returned if there is no such box.
|
||||||
static void TruncateBoxToMissNonText(int x_middle, int y_middle, bool split_on_x, Pix *nontext_map,
|
static void TruncateBoxToMissNonText(int x_middle, int y_middle, bool split_on_x, Image nontext_map,
|
||||||
TBOX *bbox) {
|
TBOX *bbox) {
|
||||||
TBOX box1(*bbox);
|
TBOX box1(*bbox);
|
||||||
TBOX box2(*bbox);
|
TBOX box2(*bbox);
|
||||||
@ -652,7 +652,7 @@ void TextlineProjection::IncrementRectangle8Bit(const TBOX &box) {
|
|||||||
// flags, but the spreading is truncated by set pixels in the nontext_map
|
// flags, but the spreading is truncated by set pixels in the nontext_map
|
||||||
// and also by the horizontal rule line limits on the blobs.
|
// and also by the horizontal rule line limits on the blobs.
|
||||||
void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST *blobs, const FCOORD &rotation,
|
void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST *blobs, const FCOORD &rotation,
|
||||||
const TBOX &nontext_map_box, Pix *nontext_map) {
|
const TBOX &nontext_map_box, Image nontext_map) {
|
||||||
BLOBNBOX_IT blob_it(blobs);
|
BLOBNBOX_IT blob_it(blobs);
|
||||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||||
BLOBNBOX *blob = blob_it.data();
|
BLOBNBOX *blob = blob_it.data();
|
||||||
|
@ -44,7 +44,7 @@ public:
|
|||||||
// The rotation is a multiple of 90 degrees, ie no deskew yet.
|
// The rotation is a multiple of 90 degrees, ie no deskew yet.
|
||||||
// The blobs have had their left and right rules set to also limit
|
// The blobs have had their left and right rules set to also limit
|
||||||
// the range of projection.
|
// the range of projection.
|
||||||
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Pix *nontext_map);
|
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Image nontext_map);
|
||||||
|
|
||||||
// Display the blobs in the window colored according to textline quality.
|
// Display the blobs in the window colored according to textline quality.
|
||||||
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win);
|
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win);
|
||||||
@ -165,7 +165,7 @@ private:
|
|||||||
// flags, but the spreading is truncated by set pixels in the nontext_map
|
// flags, but the spreading is truncated by set pixels in the nontext_map
|
||||||
// and also by the horizontal rule line limits on the blobs.
|
// and also by the horizontal rule line limits on the blobs.
|
||||||
void ProjectBlobs(BLOBNBOX_LIST *blobs, const FCOORD &rotation, const TBOX &image_box,
|
void ProjectBlobs(BLOBNBOX_LIST *blobs, const FCOORD &rotation, const TBOX &image_box,
|
||||||
Pix *nontext_map);
|
Image nontext_map);
|
||||||
// Pads the bounding box of the given blob according to whether it is on
|
// Pads the bounding box of the given blob according to whether it is on
|
||||||
// a horizontal or vertical text line, taking into account tab-stops near
|
// a horizontal or vertical text line, taking into account tab-stops near
|
||||||
// the blob. Returns true if padding was in the horizontal direction.
|
// the blob. Returns true if padding was in the horizontal direction.
|
||||||
@ -192,7 +192,7 @@ private:
|
|||||||
// The image of horizontally smeared blob boxes summed to provide a
|
// The image of horizontally smeared blob boxes summed to provide a
|
||||||
// textline density map. As with a horizontal projection, the map has
|
// textline density map. As with a horizontal projection, the map has
|
||||||
// dips in the gaps between textlines.
|
// dips in the gaps between textlines.
|
||||||
Pix *pix_;
|
Image pix_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace tesseract.
|
} // namespace tesseract.
|
||||||
|
@ -175,7 +175,7 @@ Textord::Textord(CCStruct *ccstruct)
|
|||||||
|
|
||||||
// Make the textlines and words inside each block.
|
// Make the textlines and words inside each block.
|
||||||
void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
|
void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
|
||||||
Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms,
|
Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
|
||||||
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
|
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
|
||||||
TO_BLOCK_LIST *to_blocks) {
|
TO_BLOCK_LIST *to_blocks) {
|
||||||
page_tr_.set_x(width);
|
page_tr_.set_x(width);
|
||||||
|
@ -88,7 +88,7 @@ public:
|
|||||||
// diacritic_blobs contain small confusing components that should be added
|
// diacritic_blobs contain small confusing components that should be added
|
||||||
// to the appropriate word(s) in case they are really diacritics.
|
// to the appropriate word(s) in case they are really diacritics.
|
||||||
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
|
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
|
||||||
Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms,
|
Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
|
||||||
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
|
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
|
||||||
|
|
||||||
// If we were supposed to return only a single textline, and there is more
|
// If we were supposed to return only a single textline, and there is more
|
||||||
@ -113,7 +113,7 @@ public:
|
|||||||
FCOORD rotation // for drawing
|
FCOORD rotation // for drawing
|
||||||
);
|
);
|
||||||
// tordmain.cpp ///////////////////////////////////////////
|
// tordmain.cpp ///////////////////////////////////////////
|
||||||
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
|
void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
|
||||||
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on);
|
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -66,17 +66,17 @@ CLISTIZE(WordWithBox)
|
|||||||
*
|
*
|
||||||
* Set the horizontal and vertical stroke widths in the blob.
|
* Set the horizontal and vertical stroke widths in the blob.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob) {
|
void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob) {
|
||||||
// Cut the blob rectangle into a Pix.
|
// Cut the blob rectangle into a Pix.
|
||||||
int pix_height = pixGetHeight(pix);
|
int pix_height = pixGetHeight(pix);
|
||||||
const TBOX &box = blob->bounding_box();
|
const TBOX &box = blob->bounding_box();
|
||||||
int width = box.width();
|
int width = box.width();
|
||||||
int height = box.height();
|
int height = box.height();
|
||||||
Box *blob_pix_box = boxCreate(box.left(), pix_height - box.top(), width, height);
|
Box *blob_pix_box = boxCreate(box.left(), pix_height - box.top(), width, height);
|
||||||
Pix *pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr);
|
Image pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr);
|
||||||
boxDestroy(&blob_pix_box);
|
boxDestroy(&blob_pix_box);
|
||||||
Pix *dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
|
Image dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
|
||||||
pixDestroy(&pix_blob);
|
pix_blob.destroy();
|
||||||
// Compute the stroke widths.
|
// Compute the stroke widths.
|
||||||
uint32_t *data = pixGetData(dist_pix);
|
uint32_t *data = pixGetData(dist_pix);
|
||||||
int wpl = pixGetWpl(dist_pix);
|
int wpl = pixGetWpl(dist_pix);
|
||||||
@ -129,7 +129,7 @@ void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob) {
|
|||||||
pixel = next_pixel;
|
pixel = next_pixel;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pixDestroy(&dist_pix);
|
dist_pix.destroy();
|
||||||
// Store the horizontal and vertical width in the blob, keeping both
|
// Store the horizontal and vertical width in the blob, keeping both
|
||||||
// widths if there is enough information, otherwise only the one with
|
// widths if there is enough information, otherwise only the one with
|
||||||
// the most samples.
|
// the most samples.
|
||||||
@ -160,7 +160,7 @@ void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob) {
|
|||||||
* Make a list of TO_BLOCKs for portrait and landscape orientation.
|
* Make a list of TO_BLOCKs for portrait and landscape orientation.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
void assign_blobs_to_blocks2(Pix *pix,
|
void assign_blobs_to_blocks2(Image pix,
|
||||||
BLOCK_LIST *blocks, // blocks to process
|
BLOCK_LIST *blocks, // blocks to process
|
||||||
TO_BLOCK_LIST *port_blocks) { // output list
|
TO_BLOCK_LIST *port_blocks) { // output list
|
||||||
BLOCK *block; // current block
|
BLOCK *block; // current block
|
||||||
@ -211,7 +211,7 @@ void assign_blobs_to_blocks2(Pix *pix,
|
|||||||
* grades on different lists in the matching TO_BLOCK in to_blocks.
|
* grades on different lists in the matching TO_BLOCK in to_blocks.
|
||||||
**********************************************************************/
|
**********************************************************************/
|
||||||
|
|
||||||
void Textord::find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) {
|
void Textord::find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) {
|
||||||
int width = pixGetWidth(pix);
|
int width = pixGetWidth(pix);
|
||||||
int height = pixGetHeight(pix);
|
int height = pixGetHeight(pix);
|
||||||
if (width > INT16_MAX || height > INT16_MAX) {
|
if (width > INT16_MAX || height > INT16_MAX) {
|
||||||
|
@ -32,8 +32,8 @@ namespace tesseract {
|
|||||||
|
|
||||||
class Tesseract;
|
class Tesseract;
|
||||||
|
|
||||||
void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob);
|
void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob);
|
||||||
void assign_blobs_to_blocks2(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks);
|
void assign_blobs_to_blocks2(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks);
|
||||||
|
|
||||||
void tweak_row_baseline(ROW *row, double blshift_maxshift, double blshift_xfraction);
|
void tweak_row_baseline(ROW *row, double blshift_maxshift, double blshift_xfraction);
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ const double kRatingEpsilon = 1.0 / 32;
|
|||||||
// with a debug flag and a keep_this argument to find out what is going on.
|
// with a debug flag and a keep_this argument to find out what is going on.
|
||||||
double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_level,
|
double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_level,
|
||||||
CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
|
CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
|
||||||
const std::vector<Pix *> &page_images, SampleIterator *it,
|
const std::vector<Image > &page_images, SampleIterator *it,
|
||||||
double *unichar_error, double *scaled_error,
|
double *unichar_error, double *scaled_error,
|
||||||
std::string *fonts_report) {
|
std::string *fonts_report) {
|
||||||
const int fontsize = it->sample_set()->NumFonts();
|
const int fontsize = it->sample_set()->NumFonts();
|
||||||
@ -59,7 +59,7 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
|
|||||||
for (it->Begin(); !it->AtEnd(); it->Next()) {
|
for (it->Begin(); !it->AtEnd(); it->Next()) {
|
||||||
TrainingSample *mutable_sample = it->MutableSample();
|
TrainingSample *mutable_sample = it->MutableSample();
|
||||||
int page_index = mutable_sample->page_num();
|
int page_index = mutable_sample->page_num();
|
||||||
Pix *page_pix =
|
Image page_pix =
|
||||||
0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;
|
0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;
|
||||||
// No debug, no keep this.
|
// No debug, no keep this.
|
||||||
classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results);
|
classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results);
|
||||||
@ -108,7 +108,7 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
|
|||||||
// and a keep_this argument to find out what is going on.
|
// and a keep_this argument to find out what is going on.
|
||||||
void ErrorCounter::DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier,
|
void ErrorCounter::DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier,
|
||||||
CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
|
CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
|
||||||
const std::vector<Pix *> &page_images, SampleIterator *it) {
|
const std::vector<Image > &page_images, SampleIterator *it) {
|
||||||
int fontsize = it->sample_set()->NumFonts();
|
int fontsize = it->sample_set()->NumFonts();
|
||||||
ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize);
|
ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize);
|
||||||
ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize);
|
ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize);
|
||||||
@ -121,7 +121,7 @@ void ErrorCounter::DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifi
|
|||||||
for (it->Begin(); !it->AtEnd(); it->Next()) {
|
for (it->Begin(); !it->AtEnd(); it->Next()) {
|
||||||
TrainingSample *mutable_sample = it->MutableSample();
|
TrainingSample *mutable_sample = it->MutableSample();
|
||||||
int page_index = mutable_sample->page_num();
|
int page_index = mutable_sample->page_num();
|
||||||
Pix *page_pix =
|
Image page_pix =
|
||||||
0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;
|
0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;
|
||||||
// No debug, no keep this.
|
// No debug, no keep this.
|
||||||
old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,
|
old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,
|
||||||
|
@ -121,7 +121,7 @@ public:
|
|||||||
// * The return value is the un-weighted version of the scaled_error.
|
// * The return value is the un-weighted version of the scaled_error.
|
||||||
static double ComputeErrorRate(ShapeClassifier *classifier, int report_level,
|
static double ComputeErrorRate(ShapeClassifier *classifier, int report_level,
|
||||||
CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
|
CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
|
||||||
const std::vector<Pix *> &page_images, SampleIterator *it,
|
const std::vector<Image > &page_images, SampleIterator *it,
|
||||||
double *unichar_error, double *scaled_error, std::string *fonts_report);
|
double *unichar_error, double *scaled_error, std::string *fonts_report);
|
||||||
// Tests a pair of classifiers, debugging errors of the new against the old.
|
// Tests a pair of classifiers, debugging errors of the new against the old.
|
||||||
// See errorcounter.h for description of arguments.
|
// See errorcounter.h for description of arguments.
|
||||||
@ -131,7 +131,7 @@ public:
|
|||||||
// with a debug flag and a keep_this argument to find out what is going on.
|
// with a debug flag and a keep_this argument to find out what is going on.
|
||||||
static void DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier,
|
static void DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier,
|
||||||
CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
|
CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
|
||||||
const std::vector<Pix *> &page_images, SampleIterator *it);
|
const std::vector<Image > &page_images, SampleIterator *it);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Simple struct to hold an array of counts.
|
// Simple struct to hold an array of counts.
|
||||||
|
@ -63,7 +63,7 @@ MasterTrainer::MasterTrainer(NormalizationMode norm_mode, bool shape_analysis,
|
|||||||
MasterTrainer::~MasterTrainer() {
|
MasterTrainer::~MasterTrainer() {
|
||||||
delete[] fragments_;
|
delete[] fragments_;
|
||||||
for (auto &page_image : page_images_) {
|
for (auto &page_image : page_images_) {
|
||||||
pixDestroy(&page_image);
|
page_image.destroy();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -219,7 +219,7 @@ void MasterTrainer::AddSample(bool verification, const char *unichar, TrainingSa
|
|||||||
void MasterTrainer::LoadPageImages(const char *filename) {
|
void MasterTrainer::LoadPageImages(const char *filename) {
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
int page;
|
int page;
|
||||||
Pix *pix;
|
Image pix;
|
||||||
for (page = 0;; page++) {
|
for (page = 0;; page++) {
|
||||||
pix = pixReadFromMultipageTiff(filename, &offset);
|
pix = pixReadFromMultipageTiff(filename, &offset);
|
||||||
if (!pix) {
|
if (!pix) {
|
||||||
|
@ -284,7 +284,7 @@ private:
|
|||||||
// Vector of Pix pointers used for classifiers that need the image.
|
// Vector of Pix pointers used for classifiers that need the image.
|
||||||
// Indexed by page_num_ in the samples.
|
// Indexed by page_num_ in the samples.
|
||||||
// These images are owned by the trainer and need to be pixDestroyed.
|
// These images are owned by the trainer and need to be pixDestroyed.
|
||||||
std::vector<Pix *> page_images_;
|
std::vector<Image > page_images_;
|
||||||
// Vector of filenames of loaded tr files.
|
// Vector of filenames of loaded tr files.
|
||||||
std::vector<std::string> tr_filenames_;
|
std::vector<std::string> tr_filenames_;
|
||||||
};
|
};
|
||||||
|
@ -86,9 +86,9 @@ const int kMinRampSize = 1000;
|
|||||||
// the edges.
|
// the edges.
|
||||||
// Finally a greyscale ramp provides a continuum of effects between exposure
|
// Finally a greyscale ramp provides a continuum of effects between exposure
|
||||||
// levels.
|
// levels.
|
||||||
Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation) {
|
Image DegradeImage(Image input, int exposure, TRand *randomizer, float *rotation) {
|
||||||
Pix *pix = pixConvertTo8(input, false);
|
Image pix = pixConvertTo8(input, false);
|
||||||
pixDestroy(&input);
|
input.destroy();
|
||||||
input = pix;
|
input = pix;
|
||||||
int width = pixGetWidth(input);
|
int width = pixGetWidth(input);
|
||||||
int height = pixGetHeight(input);
|
int height = pixGetHeight(input);
|
||||||
@ -99,12 +99,12 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
|
|||||||
// see http://www.leptonica.com/grayscale-morphology.html
|
// see http://www.leptonica.com/grayscale-morphology.html
|
||||||
pix = input;
|
pix = input;
|
||||||
input = pixErodeGray(pix, 3, 3);
|
input = pixErodeGray(pix, 3, 3);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
}
|
}
|
||||||
// A convolution is essential to any mode as no scanner produces an
|
// A convolution is essential to any mode as no scanner produces an
|
||||||
// image as sharp as the electronic image.
|
// image as sharp as the electronic image.
|
||||||
pix = pixBlockconv(input, 1, 1);
|
pix = pixBlockconv(input, 1, 1);
|
||||||
pixDestroy(&input);
|
input.destroy();
|
||||||
// A small random rotation helps to make the edges jaggy in a realistic way.
|
// A small random rotation helps to make the edges jaggy in a realistic way.
|
||||||
if (rotation != nullptr) {
|
if (rotation != nullptr) {
|
||||||
float radians_clockwise = 0.0f;
|
float radians_clockwise = 0.0f;
|
||||||
@ -117,7 +117,7 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
|
|||||||
input = pixRotate(pix, radians_clockwise, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, 0, 0);
|
input = pixRotate(pix, radians_clockwise, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, 0, 0);
|
||||||
// Rotate the boxes to match.
|
// Rotate the boxes to match.
|
||||||
*rotation = radians_clockwise;
|
*rotation = radians_clockwise;
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
} else {
|
} else {
|
||||||
input = pix;
|
input = pix;
|
||||||
}
|
}
|
||||||
@ -129,7 +129,7 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
|
|||||||
// see http://www.leptonica.com/grayscale-morphology.html
|
// see http://www.leptonica.com/grayscale-morphology.html
|
||||||
pix = input;
|
pix = input;
|
||||||
input = pixErodeGray(pix, 3, 3);
|
input = pixErodeGray(pix, 3, 3);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
}
|
}
|
||||||
// The convolution really needed to be 2x2 to be realistic enough, but
|
// The convolution really needed to be 2x2 to be realistic enough, but
|
||||||
// we only have 3x3, so we have to bias the image darker or lose thin
|
// we only have 3x3, so we have to bias the image darker or lose thin
|
||||||
@ -176,27 +176,27 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
|
|||||||
// any spatial distortion and also by the integer reduction factor box_scale
|
// any spatial distortion and also by the integer reduction factor box_scale
|
||||||
// so they will match what the network will output.
|
// so they will match what the network will output.
|
||||||
// Returns nullptr on error. The returned Pix must be pixDestroyed.
|
// Returns nullptr on error. The returned Pix must be pixDestroyed.
|
||||||
Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise,
|
Image PrepareDistortedPix(const Image pix, bool perspective, bool invert, bool white_noise,
|
||||||
bool smooth_noise, bool blur, int box_reduction, TRand *randomizer,
|
bool smooth_noise, bool blur, int box_reduction, TRand *randomizer,
|
||||||
std::vector<TBOX> *boxes) {
|
std::vector<TBOX> *boxes) {
|
||||||
Pix *distorted = pixCopy(nullptr, const_cast<Pix *>(pix));
|
Image distorted = pixCopy(nullptr, pix);
|
||||||
// Things to do to synthetic training data.
|
// Things to do to synthetic training data.
|
||||||
if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
|
if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
|
||||||
// TODO(rays) Cook noise in a more thread-safe manner than rand().
|
// TODO(rays) Cook noise in a more thread-safe manner than rand().
|
||||||
// Attempt to make the sequences reproducible.
|
// Attempt to make the sequences reproducible.
|
||||||
srand(randomizer->IntRand());
|
srand(randomizer->IntRand());
|
||||||
Pix *pixn = pixAddGaussianNoise(distorted, 8.0);
|
Image pixn = pixAddGaussianNoise(distorted, 8.0);
|
||||||
pixDestroy(&distorted);
|
distorted.destroy();
|
||||||
if (smooth_noise) {
|
if (smooth_noise) {
|
||||||
distorted = pixBlockconv(pixn, 1, 1);
|
distorted = pixBlockconv(pixn, 1, 1);
|
||||||
pixDestroy(&pixn);
|
pixn.destroy();
|
||||||
} else {
|
} else {
|
||||||
distorted = pixn;
|
distorted = pixn;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (blur && randomizer->SignedRand(1.0) > 0.0) {
|
if (blur && randomizer->SignedRand(1.0) > 0.0) {
|
||||||
Pix *blurred = pixBlockconv(distorted, 1, 1);
|
Image blurred = pixBlockconv(distorted, 1, 1);
|
||||||
pixDestroy(&distorted);
|
distorted.destroy();
|
||||||
distorted = blurred;
|
distorted = blurred;
|
||||||
}
|
}
|
||||||
if (perspective) {
|
if (perspective) {
|
||||||
@ -219,7 +219,7 @@ Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool whi
|
|||||||
// Distorts anything that has a non-null pointer with the same pseudo-random
|
// Distorts anything that has a non-null pointer with the same pseudo-random
|
||||||
// perspective distortion. Width and height only need to be set if there
|
// perspective distortion. Width and height only need to be set if there
|
||||||
// is no pix. If there is a pix, then they will be taken from there.
|
// is no pix. If there is a pix, then they will be taken from there.
|
||||||
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix,
|
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Image *pix,
|
||||||
std::vector<TBOX> *boxes) {
|
std::vector<TBOX> *boxes) {
|
||||||
if (pix != nullptr && *pix != nullptr) {
|
if (pix != nullptr && *pix != nullptr) {
|
||||||
width = pixGetWidth(*pix);
|
width = pixGetWidth(*pix);
|
||||||
@ -230,12 +230,12 @@ void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix
|
|||||||
l_int32 incolor = ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
|
l_int32 incolor = ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
|
||||||
if (pix != nullptr && *pix != nullptr) {
|
if (pix != nullptr && *pix != nullptr) {
|
||||||
// Transform the image.
|
// Transform the image.
|
||||||
Pix *transformed = pixProjective(*pix, im_coeffs, incolor);
|
Image transformed = pixProjective(*pix, im_coeffs, incolor);
|
||||||
if (transformed == nullptr) {
|
if (transformed == nullptr) {
|
||||||
tprintf("Projective transformation failed!!\n");
|
tprintf("Projective transformation failed!!\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
pixDestroy(pix);
|
pix->destroy();
|
||||||
*pix = transformed;
|
*pix = transformed;
|
||||||
}
|
}
|
||||||
if (boxes != nullptr) {
|
if (boxes != nullptr) {
|
||||||
|
@ -30,20 +30,20 @@ namespace tesseract {
|
|||||||
// If rotation is not nullptr, the clockwise rotation in radians is saved there.
|
// If rotation is not nullptr, the clockwise rotation in radians is saved there.
|
||||||
// The input pix must be 8 bit grey. (Binary with values 0 and 255 is OK.)
|
// The input pix must be 8 bit grey. (Binary with values 0 and 255 is OK.)
|
||||||
// The input image is destroyed and a different image returned.
|
// The input image is destroyed and a different image returned.
|
||||||
struct Pix *DegradeImage(struct Pix *input, int exposure, TRand *randomizer, float *rotation);
|
struct Image DegradeImage(struct Image input, int exposure, TRand *randomizer, float *rotation);
|
||||||
|
|
||||||
// Creates and returns a Pix distorted by various means according to the bool
|
// Creates and returns a Pix distorted by various means according to the bool
|
||||||
// flags. If boxes is not nullptr, the boxes are resized/positioned according to
|
// flags. If boxes is not nullptr, the boxes are resized/positioned according to
|
||||||
// any spatial distortion and also by the integer reduction factor box_scale
|
// any spatial distortion and also by the integer reduction factor box_scale
|
||||||
// so they will match what the network will output.
|
// so they will match what the network will output.
|
||||||
// Returns nullptr on error. The returned Pix must be pixDestroyed.
|
// Returns nullptr on error. The returned Pix must be pixDestroyed.
|
||||||
Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise,
|
Image PrepareDistortedPix(const Image pix, bool perspective, bool invert, bool white_noise,
|
||||||
bool smooth_noise, bool blur, int box_reduction, TRand *randomizer,
|
bool smooth_noise, bool blur, int box_reduction, TRand *randomizer,
|
||||||
std::vector<TBOX> *boxes);
|
std::vector<TBOX> *boxes);
|
||||||
// Distorts anything that has a non-null pointer with the same pseudo-random
|
// Distorts anything that has a non-null pointer with the same pseudo-random
|
||||||
// perspective distortion. Width and height only need to be set if there
|
// perspective distortion. Width and height only need to be set if there
|
||||||
// is no pix. If there is a pix, then they will be taken from there.
|
// is no pix. If there is a pix, then they will be taken from there.
|
||||||
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix,
|
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Image *pix,
|
||||||
std::vector<TBOX> *boxes);
|
std::vector<TBOX> *boxes);
|
||||||
// Computes the coefficients of a randomized projective transformation.
|
// Computes the coefficients of a randomized projective transformation.
|
||||||
// The image transform requires backward transformation coefficient, and the
|
// The image transform requires backward transformation coefficient, and the
|
||||||
|
@ -74,14 +74,14 @@ static bool RandBool(const double prob, TRand *rand) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* static */
|
/* static */
|
||||||
static Pix *CairoARGB32ToPixFormat(cairo_surface_t *surface) {
|
static Image CairoARGB32ToPixFormat(cairo_surface_t *surface) {
|
||||||
if (cairo_image_surface_get_format(surface) != CAIRO_FORMAT_ARGB32) {
|
if (cairo_image_surface_get_format(surface) != CAIRO_FORMAT_ARGB32) {
|
||||||
printf("Unexpected surface format %d\n", cairo_image_surface_get_format(surface));
|
printf("Unexpected surface format %d\n", cairo_image_surface_get_format(surface));
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
const int width = cairo_image_surface_get_width(surface);
|
const int width = cairo_image_surface_get_width(surface);
|
||||||
const int height = cairo_image_surface_get_height(surface);
|
const int height = cairo_image_surface_get_height(surface);
|
||||||
Pix *pix = pixCreate(width, height, 32);
|
Image pix = pixCreate(width, height, 32);
|
||||||
int byte_stride = cairo_image_surface_get_stride(surface);
|
int byte_stride = cairo_image_surface_get_stride(surface);
|
||||||
|
|
||||||
for (int i = 0; i < height; ++i) {
|
for (int i = 0; i < height; ++i) {
|
||||||
@ -636,25 +636,25 @@ int StringRenderer::StripUnrenderableWords(std::string *utf8_text) const {
|
|||||||
return num_dropped;
|
return num_dropped;
|
||||||
}
|
}
|
||||||
|
|
||||||
int StringRenderer::RenderToGrayscaleImage(const char *text, int text_length, Pix **pix) {
|
int StringRenderer::RenderToGrayscaleImage(const char *text, int text_length, Image *pix) {
|
||||||
Pix *orig_pix = nullptr;
|
Image orig_pix = nullptr;
|
||||||
int offset = RenderToImage(text, text_length, &orig_pix);
|
int offset = RenderToImage(text, text_length, &orig_pix);
|
||||||
if (orig_pix) {
|
if (orig_pix) {
|
||||||
*pix = pixConvertTo8(orig_pix, false);
|
*pix = pixConvertTo8(orig_pix, false);
|
||||||
pixDestroy(&orig_pix);
|
orig_pix.destroy();
|
||||||
}
|
}
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
int StringRenderer::RenderToBinaryImage(const char *text, int text_length, int threshold,
|
int StringRenderer::RenderToBinaryImage(const char *text, int text_length, int threshold,
|
||||||
Pix **pix) {
|
Image *pix) {
|
||||||
Pix *orig_pix = nullptr;
|
Image orig_pix = nullptr;
|
||||||
int offset = RenderToImage(text, text_length, &orig_pix);
|
int offset = RenderToImage(text, text_length, &orig_pix);
|
||||||
if (orig_pix) {
|
if (orig_pix) {
|
||||||
Pix *gray_pix = pixConvertTo8(orig_pix, false);
|
Image gray_pix = pixConvertTo8(orig_pix, false);
|
||||||
pixDestroy(&orig_pix);
|
orig_pix.destroy();
|
||||||
*pix = pixThresholdToBinary(gray_pix, threshold);
|
*pix = pixThresholdToBinary(gray_pix, threshold);
|
||||||
pixDestroy(&gray_pix);
|
gray_pix.destroy();
|
||||||
} else {
|
} else {
|
||||||
*pix = orig_pix;
|
*pix = orig_pix;
|
||||||
}
|
}
|
||||||
@ -719,9 +719,9 @@ std::string StringRenderer::ConvertFullwidthLatinToBasicLatin(const std::string
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Returns offset to end of text substring rendered in this method.
|
// Returns offset to end of text substring rendered in this method.
|
||||||
int StringRenderer::RenderToImage(const char *text, int text_length, Pix **pix) {
|
int StringRenderer::RenderToImage(const char *text, int text_length, Image *pix) {
|
||||||
if (pix && *pix) {
|
if (pix && *pix) {
|
||||||
pixDestroy(pix);
|
pix->destroy();
|
||||||
}
|
}
|
||||||
InitPangoCairo();
|
InitPangoCairo();
|
||||||
|
|
||||||
@ -813,7 +813,7 @@ int StringRenderer::RenderToImage(const char *text, int text_length, Pix **pix)
|
|||||||
//
|
//
|
||||||
// int offset = 0;
|
// int offset = 0;
|
||||||
// do {
|
// do {
|
||||||
// Pix *pix;
|
// Image pix;
|
||||||
// offset += renderer.RenderAllFontsToImage(min_proportion, txt + offset,
|
// offset += renderer.RenderAllFontsToImage(min_proportion, txt + offset,
|
||||||
// strlen(txt + offset), nullptr,
|
// strlen(txt + offset), nullptr,
|
||||||
// &pix);
|
// &pix);
|
||||||
@ -821,7 +821,7 @@ int StringRenderer::RenderToImage(const char *text, int text_length, Pix **pix)
|
|||||||
// } while (offset < strlen(text));
|
// } while (offset < strlen(text));
|
||||||
//
|
//
|
||||||
int StringRenderer::RenderAllFontsToImage(double min_coverage, const char *text, int text_length,
|
int StringRenderer::RenderAllFontsToImage(double min_coverage, const char *text, int text_length,
|
||||||
std::string *font_used, Pix **image) {
|
std::string *font_used, Image *image) {
|
||||||
*image = nullptr;
|
*image = nullptr;
|
||||||
// Select a suitable font to render the title with.
|
// Select a suitable font to render the title with.
|
||||||
const char kTitleTemplate[] = "%s : %d hits = %.2f%%, raw = %d = %.2f%%";
|
const char kTitleTemplate[] = "%s : %d hits = %.2f%%, raw = %d = %.2f%%";
|
||||||
@ -873,10 +873,10 @@ int StringRenderer::RenderAllFontsToImage(double min_coverage, const char *text,
|
|||||||
// Add the font to the image.
|
// Add the font to the image.
|
||||||
set_font(title_font);
|
set_font(title_font);
|
||||||
v_margin_ /= 8;
|
v_margin_ /= 8;
|
||||||
Pix *title_image = nullptr;
|
Image title_image = nullptr;
|
||||||
RenderToBinaryImage(title, strlen(title), 128, &title_image);
|
RenderToBinaryImage(title, strlen(title), 128, &title_image);
|
||||||
pixOr(*image, *image, title_image);
|
pixOr(*image, *image, title_image);
|
||||||
pixDestroy(&title_image);
|
title_image.destroy();
|
||||||
|
|
||||||
v_margin_ *= 8;
|
v_margin_ *= 8;
|
||||||
set_font(orig_font);
|
set_font(orig_font);
|
||||||
|
@ -34,6 +34,8 @@
|
|||||||
#include "pango/pangocairo.h"
|
#include "pango/pangocairo.h"
|
||||||
#include "pango_font_info.h"
|
#include "pango_font_info.h"
|
||||||
|
|
||||||
|
#include "image.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -53,14 +55,14 @@ public:
|
|||||||
// Renders the text with the chosen font and returns the byte offset up to
|
// Renders the text with the chosen font and returns the byte offset up to
|
||||||
// which the text could be rendered so as to fit the specified page
|
// which the text could be rendered so as to fit the specified page
|
||||||
// dimensions.
|
// dimensions.
|
||||||
int RenderToImage(const char *text, int text_length, Pix **pix);
|
int RenderToImage(const char *text, int text_length, Image *pix);
|
||||||
int RenderToGrayscaleImage(const char *text, int text_length, Pix **pix);
|
int RenderToGrayscaleImage(const char *text, int text_length, Image *pix);
|
||||||
int RenderToBinaryImage(const char *text, int text_length, int threshold, Pix **pix);
|
int RenderToBinaryImage(const char *text, int text_length, int threshold, Image *pix);
|
||||||
// Renders a line of text with all available fonts that were able to render
|
// Renders a line of text with all available fonts that were able to render
|
||||||
// at least min_coverage fraction of the input text. Use 1.0 to require that
|
// at least min_coverage fraction of the input text. Use 1.0 to require that
|
||||||
// a font be able to render all the text.
|
// a font be able to render all the text.
|
||||||
int RenderAllFontsToImage(double min_coverage, const char *text, int text_length,
|
int RenderAllFontsToImage(double min_coverage, const char *text, int text_length,
|
||||||
std::string *font_used, Pix **pix);
|
std::string *font_used, Image *pix);
|
||||||
|
|
||||||
bool set_font(const std::string &desc);
|
bool set_font(const std::string &desc);
|
||||||
// Char spacing is in PIXELS!!!!.
|
// Char spacing is in PIXELS!!!!.
|
||||||
|
@ -331,7 +331,7 @@ static void ExtractFontProperties(const std::string &utf8_text, StringRenderer *
|
|||||||
File::WriteStringToFileOrDie(output_string, output_base + ".fontinfo");
|
File::WriteStringToFileOrDie(output_string, output_base + ".fontinfo");
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool MakeIndividualGlyphs(Pix *pix, const std::vector<BoxChar *> &vbox,
|
static bool MakeIndividualGlyphs(Image pix, const std::vector<BoxChar *> &vbox,
|
||||||
const int input_tiff_page) {
|
const int input_tiff_page) {
|
||||||
// If checks fail, return false without exiting text2image
|
// If checks fail, return false without exiting text2image
|
||||||
if (!pix) {
|
if (!pix) {
|
||||||
@ -383,26 +383,26 @@ static bool MakeIndividualGlyphs(Pix *pix, const std::vector<BoxChar *> &vbox,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Crop the boxed character
|
// Crop the boxed character
|
||||||
Pix *pix_glyph = pixClipRectangle(pix, b, nullptr);
|
Image pix_glyph = pixClipRectangle(pix, b, nullptr);
|
||||||
if (!pix_glyph) {
|
if (!pix_glyph) {
|
||||||
tprintf("ERROR: MakeIndividualGlyphs(): Failed to clip, at i=%d\n", i);
|
tprintf("ERROR: MakeIndividualGlyphs(): Failed to clip, at i=%d\n", i);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Resize to square
|
// Resize to square
|
||||||
Pix *pix_glyph_sq =
|
Image pix_glyph_sq =
|
||||||
pixScaleToSize(pix_glyph, FLAGS_glyph_resized_size, FLAGS_glyph_resized_size);
|
pixScaleToSize(pix_glyph, FLAGS_glyph_resized_size, FLAGS_glyph_resized_size);
|
||||||
if (!pix_glyph_sq) {
|
if (!pix_glyph_sq) {
|
||||||
tprintf("ERROR: MakeIndividualGlyphs(): Failed to resize, at i=%d\n", i);
|
tprintf("ERROR: MakeIndividualGlyphs(): Failed to resize, at i=%d\n", i);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Zero-pad
|
// Zero-pad
|
||||||
Pix *pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq, FLAGS_glyph_num_border_pixels_to_pad, 0);
|
Image pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq, FLAGS_glyph_num_border_pixels_to_pad, 0);
|
||||||
if (!pix_glyph_sq_pad) {
|
if (!pix_glyph_sq_pad) {
|
||||||
tprintf("ERROR: MakeIndividualGlyphs(): Failed to zero-pad, at i=%d\n", i);
|
tprintf("ERROR: MakeIndividualGlyphs(): Failed to zero-pad, at i=%d\n", i);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Write out
|
// Write out
|
||||||
Pix *pix_glyph_sq_pad_8 = pixConvertTo8(pix_glyph_sq_pad, false);
|
Image pix_glyph_sq_pad_8 = pixConvertTo8(pix_glyph_sq_pad, false);
|
||||||
char filename[1024];
|
char filename[1024];
|
||||||
snprintf(filename, 1024, "%s_%d.jpg", FLAGS_outputbase.c_str(), glyph_count++);
|
snprintf(filename, 1024, "%s_%d.jpg", FLAGS_outputbase.c_str(), glyph_count++);
|
||||||
if (pixWriteJpeg(filename, pix_glyph_sq_pad_8, 100, 0)) {
|
if (pixWriteJpeg(filename, pix_glyph_sq_pad_8, 100, 0)) {
|
||||||
@ -413,10 +413,10 @@ static bool MakeIndividualGlyphs(Pix *pix, const std::vector<BoxChar *> &vbox,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
pixDestroy(&pix_glyph);
|
pix_glyph.destroy();
|
||||||
pixDestroy(&pix_glyph_sq);
|
pix_glyph_sq.destroy();
|
||||||
pixDestroy(&pix_glyph_sq_pad);
|
pix_glyph_sq_pad.destroy();
|
||||||
pixDestroy(&pix_glyph_sq_pad_8);
|
pix_glyph_sq_pad_8.destroy();
|
||||||
n_boxes_saved++;
|
n_boxes_saved++;
|
||||||
y_previous = y;
|
y_previous = y;
|
||||||
}
|
}
|
||||||
@ -625,7 +625,7 @@ static int Main() {
|
|||||||
offset < strlen(to_render_utf8) && (FLAGS_max_pages == 0 || page_num < FLAGS_max_pages);
|
offset < strlen(to_render_utf8) && (FLAGS_max_pages == 0 || page_num < FLAGS_max_pages);
|
||||||
++im, ++page_num) {
|
++im, ++page_num) {
|
||||||
tlog(1, "Starting page %d\n", im);
|
tlog(1, "Starting page %d\n", im);
|
||||||
Pix *pix = nullptr;
|
Image pix = nullptr;
|
||||||
if (FLAGS_find_fonts) {
|
if (FLAGS_find_fonts) {
|
||||||
offset += render.RenderAllFontsToImage(FLAGS_min_coverage, to_render_utf8 + offset,
|
offset += render.RenderAllFontsToImage(FLAGS_min_coverage, to_render_utf8 + offset,
|
||||||
strlen(to_render_utf8 + offset), &font_used, &pix);
|
strlen(to_render_utf8 + offset), &font_used, &pix);
|
||||||
@ -655,10 +655,10 @@ static int Main() {
|
|||||||
page_rotation.push_back(rotation);
|
page_rotation.push_back(rotation);
|
||||||
}
|
}
|
||||||
|
|
||||||
Pix *gray_pix = pixConvertTo8(pix, false);
|
Image gray_pix = pixConvertTo8(pix, false);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
Pix *binary = pixThresholdToBinary(gray_pix, 128);
|
Image binary = pixThresholdToBinary(gray_pix, 128);
|
||||||
pixDestroy(&gray_pix);
|
gray_pix.destroy();
|
||||||
char tiff_name[1024];
|
char tiff_name[1024];
|
||||||
if (FLAGS_find_fonts) {
|
if (FLAGS_find_fonts) {
|
||||||
if (FLAGS_render_per_font) {
|
if (FLAGS_render_per_font) {
|
||||||
@ -681,7 +681,7 @@ static int Main() {
|
|||||||
tprintf("ERROR: Individual glyphs not saved\n");
|
tprintf("ERROR: Individual glyphs not saved\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pixDestroy(&binary);
|
binary.destroy();
|
||||||
}
|
}
|
||||||
if (FLAGS_find_fonts && offset != 0) {
|
if (FLAGS_find_fonts && offset != 0) {
|
||||||
// We just want a list of names, or some sample images so we don't need
|
// We just want a list of names, or some sample images so we don't need
|
||||||
|
@ -784,7 +784,7 @@ void ScrollView::ZoomToRectangle(int x1, int y1, int x2, int y2) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Send an image of type Pix.
|
// Send an image of type Pix.
|
||||||
void ScrollView::Image(struct Pix *image, int x_pos, int y_pos) {
|
void ScrollView::Image(struct Image image, int x_pos, int y_pos) {
|
||||||
l_uint8 *data;
|
l_uint8 *data;
|
||||||
size_t size;
|
size_t size;
|
||||||
pixWriteMem(&data, &size, image, IFF_PNG);
|
pixWriteMem(&data, &size, image, IFF_PNG);
|
||||||
|
@ -31,6 +31,8 @@
|
|||||||
#ifndef TESSERACT_VIEWER_SCROLLVIEW_H_
|
#ifndef TESSERACT_VIEWER_SCROLLVIEW_H_
|
||||||
#define TESSERACT_VIEWER_SCROLLVIEW_H_
|
#define TESSERACT_VIEWER_SCROLLVIEW_H_
|
||||||
|
|
||||||
|
#include "image.h"
|
||||||
|
|
||||||
#include <tesseract/export.h>
|
#include <tesseract/export.h>
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
@ -209,7 +211,7 @@ public:
|
|||||||
*******************************************************************************/
|
*******************************************************************************/
|
||||||
|
|
||||||
// Draw a Pix on (x,y).
|
// Draw a Pix on (x,y).
|
||||||
void Image(Pix *image, int x_pos, int y_pos);
|
void Image(Image image, int x_pos, int y_pos);
|
||||||
|
|
||||||
// Flush buffers and update display.
|
// Flush buffers and update display.
|
||||||
static void Update();
|
static void Update();
|
||||||
@ -353,11 +355,11 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
// Transfers a binary Image.
|
// Transfers a binary Image.
|
||||||
void TransferBinaryImage(struct Pix *image);
|
void TransferBinaryImage(struct Image image);
|
||||||
// Transfers a gray scale Image.
|
// Transfers a gray scale Image.
|
||||||
void TransferGrayImage(struct Pix *image);
|
void TransferGrayImage(struct Image image);
|
||||||
// Transfers a 32-Bit Image.
|
// Transfers a 32-Bit Image.
|
||||||
void Transfer32bppImage(struct Pix *image);
|
void Transfer32bppImage(struct Image image);
|
||||||
|
|
||||||
// Sets up ScrollView, depending on the variables from the constructor.
|
// Sets up ScrollView, depending on the variables from the constructor.
|
||||||
void Initialize(const char *name, int x_pos, int y_pos, int x_size, int y_size, int x_canvas_size,
|
void Initialize(const char *name, int x_pos, int y_pos, int x_size, int y_size, int x_canvas_size,
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include <memory> // std::unique_ptr
|
#include <memory> // std::unique_ptr
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "include_gunit.h"
|
#include "include_gunit.h"
|
||||||
|
#include "image.h"
|
||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
|
|
||||||
@ -66,7 +67,7 @@ void OCRTester(const char *imgname, const char *groundtruth, const char *tessdat
|
|||||||
std::string gtText((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
|
std::string gtText((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
|
||||||
auto api = std::make_unique<tesseract::TessBaseAPI>();
|
auto api = std::make_unique<tesseract::TessBaseAPI>();
|
||||||
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
|
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
|
||||||
Pix *image = pixRead(imgname);
|
Image image = pixRead(imgname);
|
||||||
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
||||||
api->SetImage(image);
|
api->SetImage(image);
|
||||||
outText = api->GetUTF8Text();
|
outText = api->GetUTF8Text();
|
||||||
@ -74,7 +75,7 @@ void OCRTester(const char *imgname, const char *groundtruth, const char *tessdat
|
|||||||
<< ::testing::PrintToString(lang);
|
<< ::testing::PrintToString(lang);
|
||||||
api->End();
|
api->End();
|
||||||
delete[] outText;
|
delete[] outText;
|
||||||
pixDestroy(&image);
|
image.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
class MatchGroundTruth : public QuickTest, public ::testing::WithParamInterface<const char *> {};
|
class MatchGroundTruth : public QuickTest, public ::testing::WithParamInterface<const char *> {};
|
||||||
|
@ -37,12 +37,12 @@ protected:
|
|||||||
src_pix_ = nullptr;
|
src_pix_ = nullptr;
|
||||||
}
|
}
|
||||||
~ApplyBoxTest() override {
|
~ApplyBoxTest() override {
|
||||||
pixDestroy(&src_pix_);
|
src_pix_.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SetImage(const char *filename) {
|
bool SetImage(const char *filename) {
|
||||||
bool found = false;
|
bool found = false;
|
||||||
pixDestroy(&src_pix_);
|
src_pix_.destroy();
|
||||||
src_pix_ = pixRead(TestDataNameToPath(filename).c_str());
|
src_pix_ = pixRead(TestDataNameToPath(filename).c_str());
|
||||||
if (api_.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
|
if (api_.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
|
||||||
api_.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
|
api_.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
|
||||||
@ -101,7 +101,7 @@ protected:
|
|||||||
delete it;
|
delete it;
|
||||||
}
|
}
|
||||||
|
|
||||||
Pix *src_pix_;
|
Image src_pix_;
|
||||||
std::string ocr_text_;
|
std::string ocr_text_;
|
||||||
tesseract::TessBaseAPI api_;
|
tesseract::TessBaseAPI api_;
|
||||||
};
|
};
|
||||||
|
@ -44,7 +44,7 @@ class FriendlyTessBaseAPI : public tesseract::TessBaseAPI {
|
|||||||
FRIEND_TEST(TesseractTest, LSTMGeometryTest);
|
FRIEND_TEST(TesseractTest, LSTMGeometryTest);
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string GetCleanedTextResult(tesseract::TessBaseAPI *tess, Pix *pix) {
|
std::string GetCleanedTextResult(tesseract::TessBaseAPI *tess, Image pix) {
|
||||||
tess->SetImage(pix);
|
tess->SetImage(pix);
|
||||||
char *result = tess->GetUTF8Text();
|
char *result = tess->GetUTF8Text();
|
||||||
std::string ocr_result = result;
|
std::string ocr_result = result;
|
||||||
@ -70,14 +70,14 @@ TEST_F(TesseractTest, BasicTesseractTest) {
|
|||||||
std::string truth_text;
|
std::string truth_text;
|
||||||
std::string ocr_text;
|
std::string ocr_text;
|
||||||
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
|
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
|
||||||
Pix *src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
|
Image src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
|
||||||
CHECK(src_pix);
|
CHECK(src_pix);
|
||||||
ocr_text = GetCleanedTextResult(&api, src_pix);
|
ocr_text = GetCleanedTextResult(&api, src_pix);
|
||||||
CHECK_OK(
|
CHECK_OK(
|
||||||
file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
|
file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
|
||||||
absl::StripAsciiWhitespace(&truth_text);
|
absl::StripAsciiWhitespace(&truth_text);
|
||||||
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
|
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
|
||||||
pixDestroy(&src_pix);
|
src_pix.destroy();
|
||||||
} else {
|
} else {
|
||||||
// eng.traineddata not found.
|
// eng.traineddata not found.
|
||||||
GTEST_SKIP();
|
GTEST_SKIP();
|
||||||
@ -105,7 +105,7 @@ TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) {
|
|||||||
EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes));
|
EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes));
|
||||||
boxaDestroy(&block_boxes);
|
boxaDestroy(&block_boxes);
|
||||||
boxaDestroy(¶_boxes);
|
boxaDestroy(¶_boxes);
|
||||||
pixDestroy(&src_pix);
|
src_pix.destroy();
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
// eng.traineddata not found.
|
// eng.traineddata not found.
|
||||||
@ -122,7 +122,7 @@ TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
|
|||||||
GTEST_SKIP();
|
GTEST_SKIP();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Pix *src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
|
Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
|
||||||
CHECK(src_pix);
|
CHECK(src_pix);
|
||||||
api.SetImage(src_pix);
|
api.SetImage(src_pix);
|
||||||
char *result = api.GetHOCRText(0);
|
char *result = api.GetHOCRText(0);
|
||||||
@ -130,7 +130,7 @@ TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
|
|||||||
EXPECT_THAT(result, HasSubstr("Hello"));
|
EXPECT_THAT(result, HasSubstr("Hello"));
|
||||||
EXPECT_THAT(result, HasSubstr("<div class='ocr_page'"));
|
EXPECT_THAT(result, HasSubstr("<div class='ocr_page'"));
|
||||||
delete[] result;
|
delete[] result;
|
||||||
pixDestroy(&src_pix);
|
src_pix.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// hOCR output should contain baseline info for upright textlines.
|
// hOCR output should contain baseline info for upright textlines.
|
||||||
@ -141,7 +141,7 @@ TEST_F(TesseractTest, HOCRContainsBaseline) {
|
|||||||
GTEST_SKIP();
|
GTEST_SKIP();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Pix *src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
|
Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
|
||||||
CHECK(src_pix);
|
CHECK(src_pix);
|
||||||
api.SetInputName("HelloGoogle.tif");
|
api.SetInputName("HelloGoogle.tif");
|
||||||
api.SetImage(src_pix);
|
api.SetImage(src_pix);
|
||||||
@ -152,7 +152,7 @@ TEST_F(TesseractTest, HOCRContainsBaseline) {
|
|||||||
result, std::regex{"<span class='ocr_line'[^>]* baseline [-.0-9]+ [-.0-9]+"}));
|
result, std::regex{"<span class='ocr_line'[^>]* baseline [-.0-9]+ [-.0-9]+"}));
|
||||||
|
|
||||||
delete[] result;
|
delete[] result;
|
||||||
pixDestroy(&src_pix);
|
src_pix.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tests that Tesseract gets exactly the right answer on some page numbers.
|
// Tests that Tesseract gets exactly the right answer on some page numbers.
|
||||||
@ -182,23 +182,23 @@ TEST_F(TesseractTest, AdaptToWordStrTest) {
|
|||||||
// Train on the training text.
|
// Train on the training text.
|
||||||
for (int i = 0; kTrainingPages[i] != nullptr; ++i) {
|
for (int i = 0; kTrainingPages[i] != nullptr; ++i) {
|
||||||
std::string image_file = TestDataNameToPath(kTrainingPages[i]);
|
std::string image_file = TestDataNameToPath(kTrainingPages[i]);
|
||||||
Pix *src_pix = pixRead(image_file.c_str());
|
Image src_pix = pixRead(image_file.c_str());
|
||||||
CHECK(src_pix);
|
CHECK(src_pix);
|
||||||
api.SetImage(src_pix);
|
api.SetImage(src_pix);
|
||||||
EXPECT_TRUE(api.AdaptToWordStr(tesseract::PSM_SINGLE_WORD, kTrainingText[i]))
|
EXPECT_TRUE(api.AdaptToWordStr(tesseract::PSM_SINGLE_WORD, kTrainingText[i]))
|
||||||
<< "Failed to adapt to text \"" << kTrainingText[i] << "\" on image " << image_file;
|
<< "Failed to adapt to text \"" << kTrainingText[i] << "\" on image " << image_file;
|
||||||
pixDestroy(&src_pix);
|
src_pix.destroy();
|
||||||
}
|
}
|
||||||
// Test the test text.
|
// Test the test text.
|
||||||
api.SetVariable("tess_bn_matching", "1");
|
api.SetVariable("tess_bn_matching", "1");
|
||||||
api.SetPageSegMode(tesseract::PSM_SINGLE_WORD);
|
api.SetPageSegMode(tesseract::PSM_SINGLE_WORD);
|
||||||
for (int i = 0; kTestPages[i] != nullptr; ++i) {
|
for (int i = 0; kTestPages[i] != nullptr; ++i) {
|
||||||
Pix *src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
|
Image src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
|
||||||
CHECK(src_pix);
|
CHECK(src_pix);
|
||||||
ocr_text = GetCleanedTextResult(&api, src_pix);
|
ocr_text = GetCleanedTextResult(&api, src_pix);
|
||||||
absl::StripAsciiWhitespace(&truth_text);
|
absl::StripAsciiWhitespace(&truth_text);
|
||||||
EXPECT_STREQ(kTestText[i], ocr_text.c_str());
|
EXPECT_STREQ(kTestText[i], ocr_text.c_str());
|
||||||
pixDestroy(&src_pix);
|
src_pix.destroy();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -213,14 +213,14 @@ TEST_F(TesseractTest, BasicLSTMTest) {
|
|||||||
GTEST_SKIP();
|
GTEST_SKIP();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Pix *src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
|
Image src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
|
||||||
CHECK(src_pix);
|
CHECK(src_pix);
|
||||||
ocr_text = GetCleanedTextResult(&api, src_pix);
|
ocr_text = GetCleanedTextResult(&api, src_pix);
|
||||||
CHECK_OK(
|
CHECK_OK(
|
||||||
file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
|
file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
|
||||||
absl::StripAsciiWhitespace(&truth_text);
|
absl::StripAsciiWhitespace(&truth_text);
|
||||||
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
|
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
|
||||||
pixDestroy(&src_pix);
|
src_pix.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test that LSTM's character bounding boxes are properly converted to
|
// Test that LSTM's character bounding boxes are properly converted to
|
||||||
@ -230,7 +230,7 @@ TEST_F(TesseractTest, BasicLSTMTest) {
|
|||||||
// errors due to float/int conversions (e.g., see OUTLINE::move() in
|
// errors due to float/int conversions (e.g., see OUTLINE::move() in
|
||||||
// ccstruct/poutline.h) Instead, we do a loose check.
|
// ccstruct/poutline.h) Instead, we do a loose check.
|
||||||
TEST_F(TesseractTest, LSTMGeometryTest) {
|
TEST_F(TesseractTest, LSTMGeometryTest) {
|
||||||
Pix *src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
|
Image src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
|
||||||
FriendlyTessBaseAPI api;
|
FriendlyTessBaseAPI api;
|
||||||
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
|
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
|
||||||
// eng.traineddata not found.
|
// eng.traineddata not found.
|
||||||
@ -270,7 +270,7 @@ TEST_F(TesseractTest, LSTMGeometryTest) {
|
|||||||
EXPECT_LT(lstm_blob_box.top() - tess_blob_box.top(), 5);
|
EXPECT_LT(lstm_blob_box.top() - tess_blob_box.top(), 5);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pixDestroy(&src_pix);
|
src_pix.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(TesseractTest, InitConfigOnlyTest) {
|
TEST_F(TesseractTest, InitConfigOnlyTest) {
|
||||||
@ -315,7 +315,7 @@ TEST(TesseractInstanceTest, TestMultipleTessInstances) {
|
|||||||
const std::string kTessdataPath = TESSDATA_DIR;
|
const std::string kTessdataPath = TESSDATA_DIR;
|
||||||
|
|
||||||
// Preload images and verify that OCR is correct on them individually.
|
// Preload images and verify that OCR is correct on them individually.
|
||||||
std::vector<Pix *> pix(num_langs);
|
std::vector<Image > pix(num_langs);
|
||||||
for (int i = 0; i < num_langs; ++i) {
|
for (int i = 0; i < num_langs; ++i) {
|
||||||
SCOPED_TRACE(absl::StrCat("Single instance test with lang = ", langs[i]));
|
SCOPED_TRACE(absl::StrCat("Single instance test with lang = ", langs[i]));
|
||||||
std::string path = file::JoinPath(TESTING_DIR, image_files[i]);
|
std::string path = file::JoinPath(TESTING_DIR, image_files[i]);
|
||||||
@ -346,7 +346,7 @@ TEST(TesseractInstanceTest, TestMultipleTessInstances) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < num_langs; ++i) {
|
for (int i = 0; i < num_langs; ++i) {
|
||||||
pixDestroy(&pix[i]);
|
pix[i].destroy();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
#include "commandlineflags.h"
|
#include "commandlineflags.h"
|
||||||
#include "include_gunit.h"
|
#include "include_gunit.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
|
#include "image.h"
|
||||||
|
|
||||||
// Run with Tesseract instances.
|
// Run with Tesseract instances.
|
||||||
BOOL_PARAM_FLAG(test_tesseract, true, "Test tesseract instances");
|
BOOL_PARAM_FLAG(test_tesseract, true, "Test tesseract instances");
|
||||||
@ -97,7 +98,7 @@ protected:
|
|||||||
const int n = num_langs_ * FLAGS_reps;
|
const int n = num_langs_ * FLAGS_reps;
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
std::string path = TESTING_DIR "/" + image_files[i % num_langs_];
|
std::string path = TESTING_DIR "/" + image_files[i % num_langs_];
|
||||||
Pix *new_pix = pixRead(path.c_str());
|
Image new_pix = pixRead(path.c_str());
|
||||||
QCHECK(new_pix != nullptr) << "Could not read " << path;
|
QCHECK(new_pix != nullptr) << "Could not read " << path;
|
||||||
pix_.push_back(new_pix);
|
pix_.push_back(new_pix);
|
||||||
}
|
}
|
||||||
@ -110,7 +111,7 @@ protected:
|
|||||||
|
|
||||||
static void TearDownTestCase() {
|
static void TearDownTestCase() {
|
||||||
for (auto &pix : pix_) {
|
for (auto &pix : pix_) {
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -127,7 +128,7 @@ protected:
|
|||||||
std::unique_ptr<tensorflow::thread::ThreadPool> pool_;
|
std::unique_ptr<tensorflow::thread::ThreadPool> pool_;
|
||||||
static int pool_size_;
|
static int pool_size_;
|
||||||
#endif
|
#endif
|
||||||
static std::vector<Pix *> pix_;
|
static std::vector<Image > pix_;
|
||||||
static std::vector<std::string> langs_;
|
static std::vector<std::string> langs_;
|
||||||
static std::vector<std::string> gt_text_;
|
static std::vector<std::string> gt_text_;
|
||||||
static int num_langs_;
|
static int num_langs_;
|
||||||
@ -137,7 +138,7 @@ protected:
|
|||||||
#ifdef INCLUDE_TENSORFLOW
|
#ifdef INCLUDE_TENSORFLOW
|
||||||
int BaseapiThreadTest::pool_size_;
|
int BaseapiThreadTest::pool_size_;
|
||||||
#endif
|
#endif
|
||||||
std::vector<Pix *> BaseapiThreadTest::pix_;
|
std::vector<Image > BaseapiThreadTest::pix_;
|
||||||
std::vector<std::string> BaseapiThreadTest::langs_;
|
std::vector<std::string> BaseapiThreadTest::langs_;
|
||||||
std::vector<std::string> BaseapiThreadTest::gt_text_;
|
std::vector<std::string> BaseapiThreadTest::gt_text_;
|
||||||
int BaseapiThreadTest::num_langs_;
|
int BaseapiThreadTest::num_langs_;
|
||||||
@ -147,7 +148,7 @@ static void InitTessInstance(TessBaseAPI *tess, const std::string &lang) {
|
|||||||
EXPECT_EQ(0, tess->Init(TESSDATA_DIR, lang.c_str()));
|
EXPECT_EQ(0, tess->Init(TESSDATA_DIR, lang.c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void GetCleanedText(TessBaseAPI *tess, Pix *pix, std::string *ocr_text) {
|
static void GetCleanedText(TessBaseAPI *tess, Image pix, std::string *ocr_text) {
|
||||||
tess->SetImage(pix);
|
tess->SetImage(pix);
|
||||||
char *result = tess->GetUTF8Text();
|
char *result = tess->GetUTF8Text();
|
||||||
*ocr_text = result;
|
*ocr_text = result;
|
||||||
@ -155,7 +156,7 @@ static void GetCleanedText(TessBaseAPI *tess, Pix *pix, std::string *ocr_text) {
|
|||||||
absl::StripAsciiWhitespace(ocr_text);
|
absl::StripAsciiWhitespace(ocr_text);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void VerifyTextResult(TessBaseAPI *tess, Pix *pix, const std::string &lang,
|
static void VerifyTextResult(TessBaseAPI *tess, Image pix, const std::string &lang,
|
||||||
const std::string &expected_text) {
|
const std::string &expected_text) {
|
||||||
TessBaseAPI *tess_local = nullptr;
|
TessBaseAPI *tess_local = nullptr;
|
||||||
if (tess) {
|
if (tess) {
|
||||||
|
@ -61,7 +61,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Set up pix_binary for lang_tesseract_.
|
// Set up pix_binary for lang_tesseract_.
|
||||||
void SetPixBinary(Pix *pix) {
|
void SetPixBinary(Image pix) {
|
||||||
CHECK_EQ(1, pixGetDepth(pix));
|
CHECK_EQ(1, pixGetDepth(pix));
|
||||||
*(lang_tesseract_->mutable_pix_binary()) = pix;
|
*(lang_tesseract_->mutable_pix_binary()) = pix;
|
||||||
}
|
}
|
||||||
@ -137,7 +137,7 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add a BLOCK covering the whole page.
|
// Add a BLOCK covering the whole page.
|
||||||
void AddPageBlock(Pix *pix, BLOCK_LIST *blocks) {
|
void AddPageBlock(Image pix, BLOCK_LIST *blocks) {
|
||||||
CHECK(pix != nullptr);
|
CHECK(pix != nullptr);
|
||||||
CHECK(blocks != nullptr);
|
CHECK(blocks != nullptr);
|
||||||
BLOCK_IT block_it(blocks);
|
BLOCK_IT block_it(blocks);
|
||||||
@ -183,7 +183,7 @@ TEST_F(EquationFinderTest, IdentifySpecialText) {
|
|||||||
#else // TODO: missing equ_gt1.tif
|
#else // TODO: missing equ_gt1.tif
|
||||||
// Load Image.
|
// Load Image.
|
||||||
std::string imagefile = file::JoinPath(testdata_dir_, "equ_gt1.tif");
|
std::string imagefile = file::JoinPath(testdata_dir_, "equ_gt1.tif");
|
||||||
Pix *pix_binary = pixRead(imagefile.c_str());
|
Image pix_binary = pixRead(imagefile.c_str());
|
||||||
CHECK(pix_binary != nullptr && pixGetDepth(pix_binary) == 1);
|
CHECK(pix_binary != nullptr && pixGetDepth(pix_binary) == 1);
|
||||||
|
|
||||||
// Get components.
|
// Get components.
|
||||||
@ -224,7 +224,7 @@ TEST_F(EquationFinderTest, IdentifySpecialText) {
|
|||||||
EXPECT_LE(10 - kCountRange, stt_count[BSTT_UNCLEAR]);
|
EXPECT_LE(10 - kCountRange, stt_count[BSTT_UNCLEAR]);
|
||||||
|
|
||||||
// Release memory.
|
// Release memory.
|
||||||
pixDestroy(&pix_binary);
|
pix_binary.destroy();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -364,7 +364,7 @@ TEST_F(EquationFinderTest, CheckSeedBlobsCount) {
|
|||||||
TEST_F(EquationFinderTest, ComputeForegroundDensity) {
|
TEST_F(EquationFinderTest, ComputeForegroundDensity) {
|
||||||
// Create the pix with top half foreground, bottom half background.
|
// Create the pix with top half foreground, bottom half background.
|
||||||
int width = 1024, height = 768;
|
int width = 1024, height = 768;
|
||||||
Pix *pix = pixCreate(width, height, 1);
|
Image pix = pixCreate(width, height, 1);
|
||||||
pixRasterop(pix, 0, 0, width, height / 2, PIX_SET, nullptr, 0, 0);
|
pixRasterop(pix, 0, 0, width, height / 2, PIX_SET, nullptr, 0, 0);
|
||||||
TBOX box1(100, 0, 140, 140), box2(100, height / 2 - 20, 140, height / 2 + 20),
|
TBOX box1(100, 0, 140, 140), box2(100, height / 2 - 20, 140, height / 2 + 20),
|
||||||
box3(100, height - 40, 140, height);
|
box3(100, height - 40, 140, height);
|
||||||
@ -402,7 +402,7 @@ TEST_F(EquationFinderTest, CountAlignment) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(EquationFinderTest, ComputeCPsSuperBBox) {
|
TEST_F(EquationFinderTest, ComputeCPsSuperBBox) {
|
||||||
Pix *pix = pixCreate(1001, 1001, 1);
|
Image pix = pixCreate(1001, 1001, 1);
|
||||||
equation_det_->SetPixBinary(pix);
|
equation_det_->SetPixBinary(pix);
|
||||||
ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
|
ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ extern "C" int LLVMFuzzerInitialize(int * /*pArgc*/, char ***pArgv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static PIX *createPix(BitReader &BR, const size_t width, const size_t height) {
|
static PIX *createPix(BitReader &BR, const size_t width, const size_t height) {
|
||||||
Pix *pix = pixCreate(width, height, 1);
|
Image pix = pixCreate(width, height, 1);
|
||||||
|
|
||||||
if (pix == nullptr) {
|
if (pix == nullptr) {
|
||||||
printf("pix creation failed\n");
|
printf("pix creation failed\n");
|
||||||
@ -93,7 +93,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|||||||
|
|
||||||
char *outText = api->GetUTF8Text();
|
char *outText = api->GetUTF8Text();
|
||||||
|
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
delete[] outText;
|
delete[] outText;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -65,11 +65,11 @@ protected:
|
|||||||
src_pix_ = nullptr;
|
src_pix_ = nullptr;
|
||||||
}
|
}
|
||||||
~LayoutTest() override {
|
~LayoutTest() override {
|
||||||
pixDestroy(&src_pix_);
|
src_pix_.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetImage(const char *filename, const char *lang) {
|
void SetImage(const char *filename, const char *lang) {
|
||||||
pixDestroy(&src_pix_);
|
src_pix_.destroy();
|
||||||
src_pix_ = pixRead(TestDataNameToPath(filename).c_str());
|
src_pix_ = pixRead(TestDataNameToPath(filename).c_str());
|
||||||
api_.Init(TessdataPath().c_str(), lang, tesseract::OEM_TESSERACT_ONLY);
|
api_.Init(TessdataPath().c_str(), lang, tesseract::OEM_TESSERACT_ONLY);
|
||||||
api_.SetPageSegMode(tesseract::PSM_AUTO);
|
api_.SetPageSegMode(tesseract::PSM_AUTO);
|
||||||
@ -182,7 +182,7 @@ protected:
|
|||||||
} while (it->Next(tesseract::RIL_BLOCK));
|
} while (it->Next(tesseract::RIL_BLOCK));
|
||||||
}
|
}
|
||||||
|
|
||||||
Pix *src_pix_;
|
Image src_pix_;
|
||||||
std::string ocr_text_;
|
std::string ocr_text_;
|
||||||
tesseract::TessBaseAPI api_;
|
tesseract::TessBaseAPI api_;
|
||||||
};
|
};
|
||||||
|
@ -87,7 +87,7 @@ TEST_F(LSTMTrainerTest, ConvertModel) {
|
|||||||
// baseapi_test.cc).
|
// baseapi_test.cc).
|
||||||
TessBaseAPI api;
|
TessBaseAPI api;
|
||||||
api.Init(FLAGS_test_tmpdir, "deu", tesseract::OEM_LSTM_ONLY);
|
api.Init(FLAGS_test_tmpdir, "deu", tesseract::OEM_LSTM_ONLY);
|
||||||
Pix *src_pix = pixRead(TestingNameToPath("phototest.tif").c_str());
|
Image src_pix = pixRead(TestingNameToPath("phototest.tif").c_str());
|
||||||
CHECK(src_pix);
|
CHECK(src_pix);
|
||||||
api.SetImage(src_pix);
|
api.SetImage(src_pix);
|
||||||
std::unique_ptr<char[]> result(api.GetUTF8Text());
|
std::unique_ptr<char[]> result(api.GetUTF8Text());
|
||||||
@ -96,7 +96,7 @@ TEST_F(LSTMTrainerTest, ConvertModel) {
|
|||||||
file::GetContents(TestingNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
|
file::GetContents(TestingNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
|
||||||
|
|
||||||
EXPECT_STREQ(truth_text.c_str(), result.get());
|
EXPECT_STREQ(truth_text.c_str(), result.get());
|
||||||
pixDestroy(&src_pix);
|
src_pix.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace tesseract
|
} // namespace tesseract
|
||||||
|
@ -81,7 +81,7 @@ public:
|
|||||||
// If keep_this (a shape index) is >= 0, then the results should always
|
// If keep_this (a shape index) is >= 0, then the results should always
|
||||||
// contain keep_this, and (if possible) anything of intermediate confidence.
|
// contain keep_this, and (if possible) anything of intermediate confidence.
|
||||||
// The return value is the number of classes saved in results.
|
// The return value is the number of classes saved in results.
|
||||||
int ClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this,
|
int ClassifySample(const TrainingSample &sample, Image page_pix, int debug, UNICHAR_ID keep_this,
|
||||||
std::vector<ShapeRating> *results) override {
|
std::vector<ShapeRating> *results) override {
|
||||||
results->clear();
|
results->clear();
|
||||||
// Everything except the first kNumNonReject is a reject.
|
// Everything except the first kNumNonReject is a reject.
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
#include <memory> // std::unique_ptr
|
#include <memory> // std::unique_ptr
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "include_gunit.h"
|
#include "include_gunit.h"
|
||||||
|
#include "image.h"
|
||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
|
|
||||||
@ -37,7 +38,7 @@ static void OSDTester(int expected_deg, const char *imgname, const char *tessdat
|
|||||||
// log.info() << tessdatadir << " for image: " << imgname << std::endl;
|
// log.info() << tessdatadir << " for image: " << imgname << std::endl;
|
||||||
auto api = std::make_unique<tesseract::TessBaseAPI>();
|
auto api = std::make_unique<tesseract::TessBaseAPI>();
|
||||||
ASSERT_FALSE(api->Init(tessdatadir, "osd")) << "Could not initialize tesseract.";
|
ASSERT_FALSE(api->Init(tessdatadir, "osd")) << "Could not initialize tesseract.";
|
||||||
Pix *image = pixRead(imgname);
|
Image image = pixRead(imgname);
|
||||||
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
||||||
api->SetImage(image);
|
api->SetImage(image);
|
||||||
int orient_deg;
|
int orient_deg;
|
||||||
@ -53,7 +54,7 @@ static void OSDTester(int expected_deg, const char *imgname, const char *tessdat
|
|||||||
orient_deg, orient_conf, script_name, script_conf);
|
orient_deg, orient_conf, script_name, script_conf);
|
||||||
EXPECT_EQ(expected_deg, orient_deg);
|
EXPECT_EQ(expected_deg, orient_deg);
|
||||||
api->End();
|
api->End();
|
||||||
pixDestroy(&image);
|
image.destroy();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include "helpers.h"
|
#include "helpers.h"
|
||||||
#include "include_gunit.h"
|
#include "include_gunit.h"
|
||||||
|
#include "image.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
@ -37,7 +38,7 @@ class PageSegModeTest : public testing::Test {
|
|||||||
protected:
|
protected:
|
||||||
PageSegModeTest() = default;
|
PageSegModeTest() = default;
|
||||||
~PageSegModeTest() override {
|
~PageSegModeTest() override {
|
||||||
pixDestroy(&src_pix_);
|
src_pix_.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
@ -46,7 +47,7 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void SetImage(const char *filename) {
|
void SetImage(const char *filename) {
|
||||||
pixDestroy(&src_pix_);
|
src_pix_.destroy();
|
||||||
src_pix_ = pixRead(filename);
|
src_pix_ = pixRead(filename);
|
||||||
api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY);
|
api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY);
|
||||||
api_.SetImage(src_pix_);
|
api_.SetImage(src_pix_);
|
||||||
@ -76,7 +77,7 @@ protected:
|
|||||||
delete[] result;
|
delete[] result;
|
||||||
}
|
}
|
||||||
|
|
||||||
Pix *src_pix_ = nullptr;
|
Image src_pix_ = nullptr;
|
||||||
std::string ocr_text_;
|
std::string ocr_text_;
|
||||||
tesseract::TessBaseAPI api_;
|
tesseract::TessBaseAPI api_;
|
||||||
};
|
};
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
#include <tesseract/baseapi.h>
|
#include <tesseract/baseapi.h>
|
||||||
#include <tesseract/ocrclass.h>
|
#include <tesseract/ocrclass.h>
|
||||||
|
#include "image.h"
|
||||||
|
|
||||||
#include <allheaders.h>
|
#include <allheaders.h>
|
||||||
#include "gmock/gmock.h"
|
#include "gmock/gmock.h"
|
||||||
@ -93,7 +94,7 @@ void ClassicProgressTester(const char *imgname, const char *tessdatadir, const c
|
|||||||
|
|
||||||
auto api = std::make_unique<tesseract::TessBaseAPI>();
|
auto api = std::make_unique<tesseract::TessBaseAPI>();
|
||||||
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
|
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
|
||||||
Pix *image = pixRead(imgname);
|
Image image = pixRead(imgname);
|
||||||
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
||||||
api->SetImage(image);
|
api->SetImage(image);
|
||||||
|
|
||||||
@ -109,7 +110,7 @@ void ClassicProgressTester(const char *imgname, const char *tessdatadir, const c
|
|||||||
EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%";
|
EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%";
|
||||||
|
|
||||||
api->End();
|
api->End();
|
||||||
pixDestroy(&image);
|
image.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
void NewProgressTester(const char *imgname, const char *tessdatadir, const char *lang) {
|
void NewProgressTester(const char *imgname, const char *tessdatadir, const char *lang) {
|
||||||
@ -124,7 +125,7 @@ void NewProgressTester(const char *imgname, const char *tessdatadir, const char
|
|||||||
|
|
||||||
auto api = std::make_unique<tesseract::TessBaseAPI>();
|
auto api = std::make_unique<tesseract::TessBaseAPI>();
|
||||||
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
|
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
|
||||||
Pix *image = pixRead(imgname);
|
Image image = pixRead(imgname);
|
||||||
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
||||||
api->SetImage(image);
|
api->SetImage(image);
|
||||||
|
|
||||||
@ -141,7 +142,7 @@ void NewProgressTester(const char *imgname, const char *tessdatadir, const char
|
|||||||
EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%";
|
EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%";
|
||||||
|
|
||||||
api->End();
|
api->End();
|
||||||
pixDestroy(&image);
|
image.destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(QuickTest, ClassicProgressReporting) {
|
TEST(QuickTest, ClassicProgressReporting) {
|
||||||
|
@ -40,7 +40,7 @@ protected:
|
|||||||
// api_.ReadConfigFile(FLAGS_tess_config.c_str());
|
// api_.ReadConfigFile(FLAGS_tess_config.c_str());
|
||||||
api_.SetPageSegMode(tesseract::PSM_AUTO);
|
api_.SetPageSegMode(tesseract::PSM_AUTO);
|
||||||
api_.SetImage(src_pix_);
|
api_.SetImage(src_pix_);
|
||||||
pixDestroy(&src_pix_);
|
src_pix_.destroy();
|
||||||
src_pix_ = api_.GetInputImage();
|
src_pix_ = api_.GetInputImage();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,7 +52,7 @@ protected:
|
|||||||
int width = pixGetWidth(src_pix_);
|
int width = pixGetWidth(src_pix_);
|
||||||
int height = pixGetHeight(src_pix_);
|
int height = pixGetHeight(src_pix_);
|
||||||
int depth = pixGetDepth(src_pix_);
|
int depth = pixGetDepth(src_pix_);
|
||||||
Pix *pix = pixCreate(width, height, depth);
|
Image pix = pixCreate(width, height, depth);
|
||||||
EXPECT_TRUE(depth == 1 || depth == 8);
|
EXPECT_TRUE(depth == 1 || depth == 8);
|
||||||
if (depth == 8) {
|
if (depth == 8) {
|
||||||
pixSetAll(pix);
|
pixSetAll(pix);
|
||||||
@ -68,7 +68,7 @@ protected:
|
|||||||
LOG(INFO) << "BBox: [L:" << left << ", T:" << top << ", R:" << right << ", B:" << bottom
|
LOG(INFO) << "BBox: [L:" << left << ", T:" << top << ", R:" << right << ", B:" << bottom
|
||||||
<< "]"
|
<< "]"
|
||||||
<< "\n";
|
<< "\n";
|
||||||
Pix *block_pix;
|
Image block_pix;
|
||||||
if (depth == 1) {
|
if (depth == 1) {
|
||||||
block_pix = it->GetBinaryImage(im_level);
|
block_pix = it->GetBinaryImage(im_level);
|
||||||
pixRasterop(pix, left, top, right - left, bottom - top, PIX_SRC ^ PIX_DST, block_pix, 0, 0);
|
pixRasterop(pix, left, top, right - left, bottom - top, PIX_SRC ^ PIX_DST, block_pix, 0, 0);
|
||||||
@ -78,14 +78,14 @@ protected:
|
|||||||
PIX_SRC & PIX_DST, block_pix, 0, 0);
|
PIX_SRC & PIX_DST, block_pix, 0, 0);
|
||||||
}
|
}
|
||||||
CHECK(block_pix != nullptr);
|
CHECK(block_pix != nullptr);
|
||||||
pixDestroy(&block_pix);
|
block_pix.destroy();
|
||||||
} while (it->Next(level));
|
} while (it->Next(level));
|
||||||
// if (base::GetFlag(FLAGS_v) >= 1)
|
// if (base::GetFlag(FLAGS_v) >= 1)
|
||||||
// pixWrite(OutputNameToPath("rebuilt.png").c_str(), pix, IFF_PNG);
|
// pixWrite(OutputNameToPath("rebuilt.png").c_str(), pix, IFF_PNG);
|
||||||
pixRasterop(pix, 0, 0, width, height, PIX_SRC ^ PIX_DST, src_pix_, 0, 0);
|
pixRasterop(pix, 0, 0, width, height, PIX_SRC ^ PIX_DST, src_pix_, 0, 0);
|
||||||
if (depth == 8) {
|
if (depth == 8) {
|
||||||
Pix *binary_pix = pixThresholdToBinary(pix, 128);
|
Image binary_pix = pixThresholdToBinary(pix, 128);
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
pixInvert(binary_pix, binary_pix);
|
pixInvert(binary_pix, binary_pix);
|
||||||
pix = binary_pix;
|
pix = binary_pix;
|
||||||
}
|
}
|
||||||
@ -98,7 +98,7 @@ protected:
|
|||||||
LOG(INFO) << "outfile = " << outfile << "\n";
|
LOG(INFO) << "outfile = " << outfile << "\n";
|
||||||
pixWrite(outfile.c_str(), pix, IFF_PNG);
|
pixWrite(outfile.c_str(), pix, IFF_PNG);
|
||||||
}
|
}
|
||||||
pixDestroy(&pix);
|
pix.destroy();
|
||||||
LOG(INFO) << absl::StrFormat("At level %d: pix diff = %d\n", level, pixcount);
|
LOG(INFO) << absl::StrFormat("At level %d: pix diff = %d\n", level, pixcount);
|
||||||
EXPECT_LE(pixcount, max_diff);
|
EXPECT_LE(pixcount, max_diff);
|
||||||
// if (base::GetFlag(FLAGS_v) > 1) CHECK_LE(pixcount, max_diff);
|
// if (base::GetFlag(FLAGS_v) > 1) CHECK_LE(pixcount, max_diff);
|
||||||
@ -206,7 +206,7 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Objects declared here can be used by all tests in the test case for Foo.
|
// Objects declared here can be used by all tests in the test case for Foo.
|
||||||
Pix *src_pix_; // Borrowed from api_. Do not destroy.
|
Image src_pix_; // Borrowed from api_. Do not destroy.
|
||||||
std::string ocr_text_;
|
std::string ocr_text_;
|
||||||
tesseract::TessBaseAPI api_;
|
tesseract::TessBaseAPI api_;
|
||||||
};
|
};
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user