Basic usage of new Image class. Only pixDestroy is wrapped at the moment.

Add new methods to Image class and replace them in non-public code.
This commit is contained in:
Egor Pugin 2021-03-31 22:39:43 +03:00
parent ce6e2f1821
commit a792b67983
102 changed files with 707 additions and 685 deletions

View File

@ -631,7 +631,7 @@ Pix *TessBaseAPI::GetThresholdedImage() {
if (tesseract_ == nullptr || thresholder_ == nullptr) {
return nullptr;
}
if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) {
if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) {
return nullptr;
}
return pixClone(tesseract_->pix_binary());
@ -2098,9 +2098,11 @@ bool TessBaseAPI::Threshold(Pix **pix) {
thresholder_->SetSourceYResolution(kMinCredibleResolution);
}
auto pageseg_mode = static_cast<PageSegMode>(static_cast<int>(tesseract_->tessedit_pageseg_mode));
if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) {
Image im(*pix);
if (!thresholder_->ThresholdToPix(pageseg_mode, &im)) {
return false;
}
*pix = im;
thresholder_->GetImageSizes(&rect_left_, &rect_top_, &rect_width_, &rect_height_, &image_width_,
&image_height_);
if (!thresholder_->IsBinary()) {
@ -2144,7 +2146,7 @@ int TessBaseAPI::FindLines() {
tesseract_->InitAdaptiveClassifier(nullptr);
#endif
}
if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) {
if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) {
return -1;
}
@ -2270,7 +2272,7 @@ bool TessBaseAPI::DetectOS(OSResults *osr) {
return false;
}
ClearResults();
if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) {
if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) {
return false;
}

View File

@ -583,13 +583,13 @@ void EquationDetect::IdentifySeedParts() {
}
float EquationDetect::ComputeForegroundDensity(const TBOX &tbox) {
Pix *pix_bi = lang_tesseract_->pix_binary();
Image pix_bi = lang_tesseract_->pix_binary();
const int pix_height = pixGetHeight(pix_bi);
Box *box = boxCreate(tbox.left(), pix_height - tbox.top(), tbox.width(), tbox.height());
Pix *pix_sub = pixClipRectangle(pix_bi, box, nullptr);
Image pix_sub = pixClipRectangle(pix_bi, box, nullptr);
l_float32 fract;
pixForegroundFraction(pix_sub, &fract);
pixDestroy(&pix_sub);
pix_sub.destroy();
boxDestroy(&box);
return fract;
@ -1395,7 +1395,7 @@ void EquationDetect::GetOutputTiffName(const char *name, std::string &image_name
}
void EquationDetect::PaintSpecialTexts(const std::string &outfile) const {
Pix *pix = nullptr, *pixBi = lang_tesseract_->pix_binary();
Image pix = nullptr, pixBi = lang_tesseract_->pix_binary();
pix = pixConvertTo32(pixBi);
ColPartitionGridSearch gsearch(part_grid_);
ColPartition *part = nullptr;
@ -1408,11 +1408,11 @@ void EquationDetect::PaintSpecialTexts(const std::string &outfile) const {
}
pixWrite(outfile.c_str(), pix, IFF_TIFF_LZW);
pixDestroy(&pix);
pix.destroy();
}
void EquationDetect::PaintColParts(const std::string &outfile) const {
Pix *pix = pixConvertTo32(lang_tesseract_->BestPix());
Image pix = pixConvertTo32(lang_tesseract_->BestPix());
ColPartitionGridSearch gsearch(part_grid_);
gsearch.StartFullSearch();
ColPartition *part = nullptr;
@ -1430,7 +1430,7 @@ void EquationDetect::PaintColParts(const std::string &outfile) const {
}
pixWrite(outfile.c_str(), pix, IFF_TIFF_LZW);
pixDestroy(&pix);
pix.destroy();
}
void EquationDetect::PrintSpecialBlobsDensity(const ColPartition *part) const {

View File

@ -185,7 +185,7 @@ ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padd
}
// Now revised_box always refers to the image.
// BestPix is never colormapped, but may be of any depth.
Pix *pix = BestPix();
Image pix = BestPix();
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
TBOX image_box(0, 0, width, height);
@ -196,22 +196,22 @@ ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padd
}
Box *clip_box = boxCreate(revised_box->left(), height - revised_box->top(), revised_box->width(),
revised_box->height());
Pix *box_pix = pixClipRectangle(pix, clip_box, nullptr);
Image box_pix = pixClipRectangle(pix, clip_box, nullptr);
boxDestroy(&clip_box);
if (box_pix == nullptr) {
return nullptr;
}
if (num_rotations > 0) {
Pix *rot_pix = pixRotateOrth(box_pix, num_rotations);
pixDestroy(&box_pix);
Image rot_pix = pixRotateOrth(box_pix, num_rotations);
box_pix.destroy();
box_pix = rot_pix;
}
// Convert sub-8-bit images to 8 bit.
int depth = pixGetDepth(box_pix);
if (depth < 8) {
Pix *grey;
Image grey;
grey = pixConvertTo8(box_pix, false);
pixDestroy(&box_pix);
box_pix.destroy();
box_pix = grey;
}
bool vertical_text = false;

View File

@ -158,7 +158,7 @@ void OSResults::accumulate(const OSResults &osr) {
// image, so that non-text blobs are removed from consideration.
static void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,
TO_BLOCK_LIST *to_blocks) {
Pix *pix = tess->pix_binary();
Image pix = tess->pix_binary();
ASSERT_HOST(pix != nullptr);
int vertical_x = 0;
int vertical_y = 1;
@ -174,10 +174,10 @@ static void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *block
tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix, &vertical_x, &vertical_y,
nullptr, &v_lines, &h_lines);
Pix *im_pix = tesseract::ImageFind::FindImages(pix, nullptr);
Image im_pix = tesseract::ImageFind::FindImages(pix, nullptr);
if (im_pix != nullptr) {
pixSubtract(pix, pix, im_pix);
pixDestroy(&im_pix);
im_pix.destroy();
}
tess->mutable_textord()->find_components(tess->pix_binary(), blocks, to_blocks);
}

View File

@ -444,19 +444,19 @@ Pix *PageIterator::GetBinaryImage(PageIteratorLevel level) const {
return cblob_it_->data()->render();
}
Box *box = boxCreate(left, top, right - left, bottom - top);
Pix *pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr);
Image pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr);
boxDestroy(&box);
if (level == RIL_BLOCK || level == RIL_PARA) {
// Clip to the block polygon as well.
TBOX mask_box;
Pix *mask = it_->block()->block->render_mask(&mask_box);
Image mask = it_->block()->block->render_mask(&mask_box);
int mask_x = left - mask_box.left();
int mask_y = top - (tesseract_->ImageHeight() - mask_box.top());
// AND the mask and pix, putting the result in pix.
pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), pixGetWidth(pix),
pixGetHeight(pix), PIX_SRC & PIX_DST, mask, std::max(0, mask_x),
std::max(0, mask_y));
pixDestroy(&mask);
mask.destroy();
}
return pix;
}
@ -488,25 +488,25 @@ Pix *PageIterator::GetImage(PageIteratorLevel level, int padding, Pix *original_
right = std::min(right + padding, rect_width_);
bottom = std::min(bottom + padding, rect_height_);
Box *box = boxCreate(*left, *top, right - *left, bottom - *top);
Pix *grey_pix = pixClipRectangle(original_img, box, nullptr);
Image grey_pix = pixClipRectangle(original_img, box, nullptr);
boxDestroy(&box);
if (level == RIL_BLOCK || level == RIL_PARA) {
// Clip to the block polygon as well.
TBOX mask_box;
Pix *mask = it_->block()->block->render_mask(&mask_box);
Image mask = it_->block()->block->render_mask(&mask_box);
// Copy the mask registered correctly into an image the size of grey_pix.
int mask_x = *left - mask_box.left();
int mask_y = *top - (pixGetHeight(original_img) - mask_box.top());
int width = pixGetWidth(grey_pix);
int height = pixGetHeight(grey_pix);
Pix *resized_mask = pixCreate(width, height, 1);
Image resized_mask = pixCreate(width, height, 1);
pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, height, PIX_SRC,
mask, std::max(0, mask_x), std::max(0, mask_y));
pixDestroy(&mask);
mask.destroy();
pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1, 2 * padding + 1);
pixInvert(resized_mask, resized_mask);
pixSetMasked(grey_pix, resized_mask, UINT32_MAX);
pixDestroy(&resized_mask);
resized_mask.destroy();
}
return grey_pix;
}

View File

@ -58,21 +58,21 @@ const int kMaxCircleErosions = 8;
// The returned pix must be pixDestroyed after use. nullptr may be returned
// if the image doesn't meet the trivial conditions that it uses to determine
// success.
static Pix *RemoveEnclosingCircle(Pix *pixs) {
Pix *pixsi = pixInvert(nullptr, pixs);
Pix *pixc = pixCreateTemplate(pixs);
static Image RemoveEnclosingCircle(Image pixs) {
Image pixsi = pixInvert(nullptr, pixs);
Image pixc = pixCreateTemplate(pixs);
pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET);
pixSeedfillBinary(pixc, pixc, pixsi, 4);
pixInvert(pixc, pixc);
pixDestroy(&pixsi);
Pix *pixt = pixAnd(nullptr, pixs, pixc);
pixsi.destroy();
Image pixt = pixAnd(nullptr, pixs, pixc);
l_int32 max_count;
pixCountConnComp(pixt, 8, &max_count);
// The count has to go up before we start looking for the minimum.
l_int32 min_count = INT32_MAX;
Pix *pixout = nullptr;
Image pixout = nullptr;
for (int i = 1; i < kMaxCircleErosions; i++) {
pixDestroy(&pixt);
pixt.destroy();
pixErodeBrick(pixc, pixc, 3, 3);
pixt = pixAnd(nullptr, pixs, pixc);
l_int32 count;
@ -82,14 +82,14 @@ static Pix *RemoveEnclosingCircle(Pix *pixs) {
min_count = count;
} else if (count < min_count) {
min_count = count;
pixDestroy(&pixout);
pixout.destroy();
pixout = pixCopy(nullptr, pixt); // Save the best.
} else if (count >= min_count) {
break; // We have passed by the best.
}
}
pixDestroy(&pixt);
pixDestroy(&pixc);
pixt.destroy();
pixc.destroy();
return pixout;
}
@ -148,9 +148,9 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
deskew_ = FCOORD(1.0f, 0.0f);
reskew_ = FCOORD(1.0f, 0.0f);
if (pageseg_mode == PSM_CIRCLE_WORD) {
Pix *pixcleaned = RemoveEnclosingCircle(pix_binary_);
Image pixcleaned = RemoveEnclosingCircle(pix_binary_);
if (pixcleaned != nullptr) {
pixDestroy(&pix_binary_);
pix_binary_.destroy();
pix_binary_ = pixcleaned;
}
}
@ -200,8 +200,8 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
*/
int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks,
BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr) {
Pix *photomask_pix = nullptr;
Pix *musicmask_pix = nullptr;
Image photomask_pix = nullptr;
Image musicmask_pix = nullptr;
// The blocks made by the ColumnFinder. Moved to blocks before return.
BLOCK_LIST found_blocks;
TO_BLOCK_LIST temp_blocks;
@ -231,8 +231,8 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOC
}
delete finder;
}
pixDestroy(&photomask_pix);
pixDestroy(&musicmask_pix);
photomask_pix.destroy();
musicmask_pix.destroy();
if (result < 0) {
return result;
}
@ -272,8 +272,8 @@ static void AddAllScriptsConverted(const UNICHARSET &sid_set, const UNICHARSET &
ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode,
BLOCK_LIST *blocks, Tesseract *osd_tess,
OSResults *osr, TO_BLOCK_LIST *to_blocks,
Pix **photo_mask_pix,
Pix **music_mask_pix) {
Image *photo_mask_pix,
Image *music_mask_pix) {
int vertical_x = 0;
int vertical_y = 1;
TabVector_LIST v_lines;
@ -293,14 +293,14 @@ ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mo
// Leptonica is used to find a mask of the photo regions in the input.
*photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
if (tessedit_dump_pageseg_images) {
Pix *pix_no_image_ = nullptr;
Image pix_no_image_ = nullptr;
if (*photo_mask_pix != nullptr) {
pix_no_image_ = pixSubtract(nullptr, pix_binary_, *photo_mask_pix);
} else {
pix_no_image_ = pixClone(pix_binary_);
}
pixa_debug_.AddPix(pix_no_image_, "NoImages");
pixDestroy(&pix_no_image_);
pix_no_image_.destroy();
}
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) {
v_lines.clear();

View File

@ -421,7 +421,7 @@ Tesseract::Tesseract()
Tesseract::~Tesseract() {
Clear();
pixDestroy(&pix_original_);
pix_original_.destroy();
end_tesseract();
for (auto *lang : sub_langs_) {
delete lang;
@ -442,10 +442,10 @@ Dict &Tesseract::getDict() {
void Tesseract::Clear() {
std::string debug_name = imagebasename + "_debug.pdf";
pixa_debug_.WritePDF(debug_name.c_str());
pixDestroy(&pix_binary_);
pixDestroy(&pix_grey_);
pixDestroy(&pix_thresholds_);
pixDestroy(&scaled_color_);
pix_binary_.destroy();
pix_grey_.destroy();
pix_thresholds_.destroy();
scaled_color_.destroy();
deskew_ = FCOORD(1.0f, 0.0f);
reskew_ = FCOORD(1.0f, 0.0f);
splitter_.Clear();
@ -518,7 +518,7 @@ void Tesseract::PrepareForPageseg() {
if (pageseg_strategy > max_pageseg_strategy) {
max_pageseg_strategy = pageseg_strategy;
}
pixDestroy(&sub_lang->pix_binary_);
sub_lang->pix_binary_.destroy();
sub_lang->pix_binary_ = pixClone(pix_binary());
}
// Perform shiro-rekha (top-line) splitting and replace the current image by
@ -527,7 +527,7 @@ void Tesseract::PrepareForPageseg() {
splitter_.set_pageseg_split_strategy(max_pageseg_strategy);
if (splitter_.Split(true, &pixa_debug_)) {
ASSERT_HOST(splitter_.splitted_image());
pixDestroy(&pix_binary_);
pix_binary_.destroy();
pix_binary_ = pixClone(splitter_.splitted_image());
}
}
@ -555,14 +555,14 @@ void Tesseract::PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, O
bool split_for_ocr = splitter_.Split(false, &pixa_debug_);
// Restore pix_binary to the binarized original pix for future reference.
ASSERT_HOST(splitter_.orig_pix());
pixDestroy(&pix_binary_);
pix_binary_.destroy();
pix_binary_ = pixClone(splitter_.orig_pix());
// If the pageseg and ocr strategies are different, refresh the block list
// (from the last SegmentImage call) with blobs from the real image to be used
// for OCR.
if (splitter_.HasDifferentSplitStrategies()) {
BLOCK block("", true, 0, 0, 0, 0, pixGetWidth(pix_binary_), pixGetHeight(pix_binary_));
Pix *pix_for_ocr = split_for_ocr ? splitter_.splitted_image() : splitter_.orig_pix();
Image pix_for_ocr = split_for_ocr ? splitter_.splitted_image() : splitter_.orig_pix();
extract_edges(pix_for_ocr, &block);
splitter_.RefreshSegmentationWithNewBlobs(block.blob_list());
}

View File

@ -197,26 +197,26 @@ public:
return reskew_;
}
// Destroy any existing pix and return a pointer to the pointer.
Pix **mutable_pix_binary() {
pixDestroy(&pix_binary_);
Image *mutable_pix_binary() {
pix_binary_.destroy();
return &pix_binary_;
}
Pix *pix_binary() const {
Image pix_binary() const {
return pix_binary_;
}
Pix *pix_grey() const {
Image pix_grey() const {
return pix_grey_;
}
void set_pix_grey(Pix *grey_pix) {
pixDestroy(&pix_grey_);
void set_pix_grey(Image grey_pix) {
pix_grey_.destroy();
pix_grey_ = grey_pix;
}
Pix *pix_original() const {
Image pix_original() const {
return pix_original_;
}
// Takes ownership of the given original_pix.
void set_pix_original(Pix *original_pix) {
pixDestroy(&pix_original_);
void set_pix_original(Image original_pix) {
pix_original_.destroy();
pix_original_ = original_pix;
// Clone to sublangs as well.
for (auto &lang : sub_langs_) {
@ -231,7 +231,7 @@ public:
// To tell the difference pixGetDepth() will return 32, 8 or 1.
// In any case, the return value is a borrowed Pix, and should not be
// deleted or pixDestroyed.
Pix *BestPix() const {
Image BestPix() const {
if (pixGetWidth(pix_original_) == ImageWidth()) {
return pix_original_;
} else if (pix_grey_ != nullptr) {
@ -240,8 +240,8 @@ public:
return pix_binary_;
}
}
void set_pix_thresholds(Pix *thresholds) {
pixDestroy(&pix_thresholds_);
void set_pix_thresholds(Image thresholds) {
pix_thresholds_.destroy();
pix_thresholds_ = thresholds;
}
int source_resolution() const {
@ -256,13 +256,13 @@ public:
int ImageHeight() const {
return pixGetHeight(pix_binary_);
}
Pix *scaled_color() const {
Image scaled_color() const {
return scaled_color_;
}
int scaled_factor() const {
return scaled_factor_;
}
void SetScaledColor(int factor, Pix *color) {
void SetScaledColor(int factor, Image color) {
scaled_factor_ = factor;
scaled_color_ = color;
}
@ -328,8 +328,8 @@ public:
BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr);
ColumnFinder *SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks,
Tesseract *osd_tess, OSResults *osr,
TO_BLOCK_LIST *to_blocks, Pix **photo_mask_pix,
Pix **music_mask_pix);
TO_BLOCK_LIST *to_blocks, Image *photo_mask_pix,
Image *music_mask_pix);
// par_control.cpp
void PrerecAllWordsPar(const std::vector<WordData> &words);
@ -1034,13 +1034,13 @@ private:
std::string word_config_;
// Image used for input to layout analysis and tesseract recognition.
// May be modified by the ShiroRekhaSplitter to eliminate the top-line.
Pix *pix_binary_;
Image pix_binary_;
// Grey-level input image if the input was not binary, otherwise nullptr.
Pix *pix_grey_;
Image pix_grey_;
// Original input image. Color if the input was color.
Pix *pix_original_;
Image pix_original_;
// Thresholds that were used to generate the thresholded image from grey.
Pix *pix_thresholds_;
Image pix_thresholds_;
// Debug images. If non-empty, will be written on destruction.
DebugPixa pixa_debug_;
// Input image resolution after any scaling. The resolution is not well
@ -1053,7 +1053,7 @@ private:
Textord textord_;
// True if the primary language uses right_to_left reading order.
bool right_to_left_;
Pix *scaled_color_;
Image scaled_color_;
int scaled_factor_;
FCOORD deskew_;
FCOORD reskew_;

View File

@ -49,7 +49,7 @@ ImageThresholder::~ImageThresholder() {
// Destroy the Pix if there is one, freeing memory.
void ImageThresholder::Clear() {
pixDestroy(&pix_);
pix_.destroy();
}
// Return true if no image has been set.
@ -71,7 +71,7 @@ void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int h
if (bpp == 0) {
bpp = 1;
}
Pix *pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
Image pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
l_uint32 *data = pixGetData(pix);
int wpl = pixGetWpl(pix);
switch (bpp) {
@ -121,7 +121,7 @@ void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int h
tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
}
SetImage(pix);
pixDestroy(&pix);
pix.destroy();
}
// Store the coordinates of the rectangle to process for later use.
@ -152,22 +152,22 @@ void ImageThresholder::GetImageSizes(int *left, int *top, int *width, int *heigh
// SetImage for Pix clones its input, so the source pix may be pixDestroyed
// immediately after, but may not go away until after the Thresholder has
// finished with it.
void ImageThresholder::SetImage(const Pix *pix) {
void ImageThresholder::SetImage(const Image pix) {
if (pix_ != nullptr) {
pixDestroy(&pix_);
pix_.destroy();
}
Pix *src = const_cast<Pix *>(pix);
Image src = pix;
int depth;
pixGetDimensions(src, &image_width_, &image_height_, &depth);
// Convert the image as necessary so it is one of binary, plain RGB, or
// 8 bit with no colormap. Guarantee that we always end up with our own copy,
// not just a clone of the input.
if (pixGetColormap(src)) {
Pix *tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
Image tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
depth = pixGetDepth(tmp);
if (depth > 1 && depth < 8) {
pix_ = pixConvertTo8(tmp, false);
pixDestroy(&tmp);
tmp.destroy();
} else {
pix_ = tmp;
}
@ -188,7 +188,7 @@ void ImageThresholder::SetImage(const Pix *pix) {
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
/// Returns false on error.
bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix **pix) {
bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Image *pix) {
if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
return false;
@ -196,9 +196,9 @@ bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix **pix) {
if (pix_channels_ == 0) {
// We have a binary image, but it still has to be copied, as this API
// allows the caller to modify the output.
Pix *original = GetPixRect();
Image original = GetPixRect();
*pix = pixCopy(nullptr, original);
pixDestroy(&original);
original.destroy();
} else {
OtsuThresholdRectToPix(pix_, pix);
}
@ -212,18 +212,18 @@ bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix **pix) {
// Ideally the 8 bit threshold should be the exact threshold used to generate
// the binary image in ThresholdToPix, but this is not a hard constraint.
// Returns nullptr if the input is binary. PixDestroy after use.
Pix *ImageThresholder::GetPixRectThresholds() {
Image ImageThresholder::GetPixRectThresholds() {
if (IsBinary()) {
return nullptr;
}
Pix *pix_grey = GetPixRectGrey();
Image pix_grey = GetPixRectGrey();
int width = pixGetWidth(pix_grey);
int height = pixGetHeight(pix_grey);
std::vector<int> thresholds;
std::vector<int> hi_values;
OtsuThreshold(pix_grey, 0, 0, width, height, thresholds, hi_values);
pixDestroy(&pix_grey);
Pix *pix_thresholds = pixCreate(width, height, 8);
pix_grey.destroy();
Image pix_thresholds = pixCreate(width, height, 8);
int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
pixSetAllArbitrary(pix_thresholds, threshold);
return pix_thresholds;
@ -239,14 +239,14 @@ void ImageThresholder::Init() {
// This function will be used in the future by the page layout analysis, and
// the layout analysis that uses it will only be available with Leptonica,
// so there is no raw equivalent.
Pix *ImageThresholder::GetPixRect() {
Image ImageThresholder::GetPixRect() {
if (IsFullImage()) {
// Just clone the whole thing.
return pixClone(pix_);
} else {
// Crop to the given rectangle.
Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
Pix *cropped = pixClipRectangle(pix_, box, nullptr);
Image cropped = pixClipRectangle(pix_, box, nullptr);
boxDestroy(&box);
return cropped;
}
@ -256,24 +256,24 @@ Pix *ImageThresholder::GetPixRect() {
// and at the same resolution as the output binary.
// The returned Pix must be pixDestroyed.
// Provided to the classifier to extract features from the greyscale image.
Pix *ImageThresholder::GetPixRectGrey() {
Image ImageThresholder::GetPixRectGrey() {
auto pix = GetPixRect(); // May have to be reduced to grey.
int depth = pixGetDepth(pix);
if (depth != 8) {
if (depth == 24) {
auto tmp = pixConvert24To32(pix);
pixDestroy(&pix);
pix.destroy();
pix = tmp;
}
auto result = pixConvertTo8(pix, false);
pixDestroy(&pix);
pix.destroy();
return result;
}
return pix;
}
// Otsu thresholds the rectangle, taking the rectangle from *this.
void ImageThresholder::OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const {
void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const {
std::vector<int> thresholds;
std::vector<int> hi_values;
@ -298,8 +298,8 @@ void ImageThresholder::OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const
/// from the class, using thresholds/hi_values to the output pix.
/// NOTE that num_channels is the size of the thresholds and hi_values
// arrays and also the bytes per pixel in src_pix.
void ImageThresholder::ThresholdRectToPix(Pix *src_pix, int num_channels, const std::vector<int> &thresholds,
const std::vector<int> &hi_values, Pix **pix) const {
void ImageThresholder::ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds,
const std::vector<int> &hi_values, Image *pix) const {
*pix = pixCreate(rect_width_, rect_height_, 1);
uint32_t *pixdata = pixGetData(*pix);
int wpl = pixGetWpl(*pix);

View File

@ -113,13 +113,13 @@ public:
/// SetImage for Pix clones its input, so the source pix may be pixDestroyed
/// immediately after, but may not go away until after the Thresholder has
/// finished with it.
void SetImage(const Pix *pix);
void SetImage(const Image pix);
/// Threshold the source image as efficiently as possible to the output Pix.
/// Creates a Pix and sets pix to point to the resulting pointer.
/// Caller must use pixDestroy to free the created Pix.
/// Returns false on error.
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix **pix);
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Image *pix);
// Gets a pix that contains an 8 bit threshold value at each pixel. The
// returned pix may be an integer reduction of the binary image such that
@ -128,20 +128,20 @@ public:
// Ideally the 8 bit threshold should be the exact threshold used to generate
// the binary image in ThresholdToPix, but this is not a hard constraint.
// Returns nullptr if the input is binary. PixDestroy after use.
virtual Pix *GetPixRectThresholds();
virtual Image GetPixRectThresholds();
/// Get a clone/copy of the source image rectangle.
/// The returned Pix must be pixDestroyed.
/// This function will be used in the future by the page layout analysis, and
/// the layout analysis that uses it will only be available with Leptonica,
/// so there is no raw equivalent.
Pix *GetPixRect();
Image GetPixRect();
// Get a clone/copy of the source image rectangle, reduced to greyscale,
// and at the same resolution as the output binary.
// The returned Pix must be pixDestroyed.
// Provided to the classifier to extract features from the greyscale image.
virtual Pix *GetPixRectGrey();
virtual Image GetPixRectGrey();
protected:
// ----------------------------------------------------------------------
@ -157,19 +157,19 @@ protected:
}
// Otsu thresholds the rectangle, taking the rectangle from *this.
void OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const;
void OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const;
/// Threshold the rectangle, taking everything except the src_pix
/// from the class, using thresholds/hi_values to the output pix.
/// NOTE that num_channels is the size of the thresholds and hi_values
// arrays and also the bytes per pixel in src_pix.
void ThresholdRectToPix(Pix *src_pix, int num_channels, const std::vector<int> &thresholds,
const std::vector <int> &hi_values, Pix **pix) const;
void ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds,
const std::vector <int> &hi_values, Image *pix) const;
protected:
/// Clone or other copy of the source Pix.
/// The pix will always be PixDestroy()ed on destruction of the class.
Pix *pix_;
Image pix_;
int image_width_; ///< Width of source pix_.
int image_height_; ///< Height of source pix_.

View File

@ -390,7 +390,7 @@ void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST *blobs) {
// Helper to compute edge offsets for all the blobs on the list.
// See coutln.h for an explanation of edge offsets.
void BLOBNBOX::ComputeEdgeOffsets(Pix *thresholds, Pix *grey, BLOBNBOX_LIST *blobs) {
void BLOBNBOX::ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs) {
int grey_height = 0;
int thr_height = 0;
int scale_factor = 1;
@ -1052,7 +1052,7 @@ void TO_BLOCK::DeleteUnownedNoise() {
// Thresholds must either be the same size as grey or an integer down-scale
// of grey.
// See coutln.h for an explanation of edge offsets.
void TO_BLOCK::ComputeEdgeOffsets(Pix *thresholds, Pix *grey) {
void TO_BLOCK::ComputeEdgeOffsets(Image thresholds, Image grey) {
BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &blobs);
BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &small_blobs);
BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &noise_blobs);

View File

@ -435,7 +435,7 @@ public:
static void DeleteNoiseBlobs(BLOBNBOX_LIST *blobs);
// Helper to compute edge offsets for all the blobs on the list.
// See coutln.h for an explanation of edge offsets.
static void ComputeEdgeOffsets(Pix *thresholds, Pix *grey, BLOBNBOX_LIST *blobs);
static void ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs);
#ifndef GRAPHICS_DISABLED
// Helper to draw all the blobs on the list in the given body_colour,
@ -745,7 +745,7 @@ public:
// Thresholds must either be the same size as grey or an integer down-scale
// of grey.
// See coutln.h for an explanation of edge offsets.
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey);
void ComputeEdgeOffsets(Image thresholds, Image grey);
#ifndef GRAPHICS_DISABLED
// Draw the noise blobs from all lists in red.

View File

@ -401,7 +401,7 @@ void TBLOB::Clear() {
// this blob and the Pix for the full image.
void TBLOB::Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
float x_origin, float y_origin, float x_scale, float y_scale,
float final_xshift, float final_yshift, bool inverse, Pix *pix) {
float final_xshift, float final_yshift, bool inverse, Image pix) {
denorm_.SetupNormalization(block, rotation, predecessor, x_origin, y_origin, x_scale, y_scale,
final_xshift, final_yshift);
denorm_.set_inverse(inverse);
@ -789,7 +789,7 @@ TWERD *TWERD::PolygonalCopy(bool allow_detailed_fx, WERD *src) {
// Baseline normalizes the blobs in-place, recording the normalization in the
// DENORMs in the blobs.
void TWERD::BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height,
void TWERD::BLNormalize(const BLOCK *block, const ROW *row, Image pix, bool inverse, float x_height,
float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint,
const TBOX *norm_box, DENORM *word_denorm) {
TBOX word_box = bounding_box();

View File

@ -324,7 +324,7 @@ struct TBLOB {
// this blob and the Pix for the full image.
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor,
float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift,
float final_yshift, bool inverse, Pix *pix);
float final_yshift, bool inverse, Image pix);
// Rotates by the given rotation in place.
void Rotate(const FCOORD rotation);
// Moves by the given vec in place.
@ -436,7 +436,7 @@ struct TWERD {
static TWERD *PolygonalCopy(bool allow_detailed_fx, WERD *src);
// Baseline normalizes the blobs in-place, recording the normalization in the
// DENORMs in the blobs.
void BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height,
void BLNormalize(const BLOCK *block, const ROW *row, Image pix, bool inverse, float x_height,
float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint,
const TBOX *norm_box, DENORM *word_denorm);
// Copies the data and the blobs, but leaves next untouched.

View File

@ -736,7 +736,7 @@ static bool EvaluateHorizontalDiff(const l_uint32 *line, int diff_sign, int x, i
* for each horizontal step, and the conflict in step direction and gradient
* direction can be used to ignore the vertical steps.
*/
void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix *pix) {
void C_OUTLINE::ComputeEdgeOffsets(int threshold, Image pix) {
if (pixGetDepth(pix) != 8) {
return;
}
@ -904,7 +904,7 @@ void C_OUTLINE::ComputeBinaryOffsets() {
* Renders the outline to the given pix, with left and top being
* the coords of the upper-left corner of the pix.
*/
void C_OUTLINE::render(int left, int top, Pix *pix) const {
void C_OUTLINE::render(int left, int top, Image pix) const {
ICOORD pos = start;
for (int stepindex = 0; stepindex < stepcount; ++stepindex) {
ICOORD next_step = step(stepindex);
@ -924,7 +924,7 @@ void C_OUTLINE::render(int left, int top, Pix *pix) const {
* @param top coord
* @param pix the pix to outline
*/
void C_OUTLINE::render_outline(int left, int top, Pix *pix) const {
void C_OUTLINE::render_outline(int left, int top, Image pix) const {
ICOORD pos = start;
for (int stepindex = 0; stepindex < stepcount; ++stepindex) {
ICOORD next_step = step(stepindex);

View File

@ -234,18 +234,18 @@ public:
// Adds sub-pixel resolution EdgeOffsets for the outline if the supplied
// pix is 8-bit. Does nothing otherwise.
void ComputeEdgeOffsets(int threshold, Pix *pix);
void ComputeEdgeOffsets(int threshold, Image pix);
// Adds sub-pixel resolution EdgeOffsets for the outline using only
// a binary image source.
void ComputeBinaryOffsets();
// Renders the outline to the given pix, with left and top being
// the coords of the upper-left corner of the pix.
void render(int left, int top, Pix *pix) const;
void render(int left, int top, Image pix) const;
// Renders just the outline to the given pix (no fill), with left and top
// being the coords of the upper-left corner of the pix.
void render_outline(int left, int top, Pix *pix) const;
void render_outline(int left, int top, Image pix) const;
#ifndef GRAPHICS_DISABLED
void plot( // draw one

View File

@ -1,6 +1,8 @@
#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
#include "image.h"
#include <allheaders.h>
namespace tesseract {
@ -27,11 +29,11 @@ public:
// Adds the given pix to the set of pages in the PDF file, with the given
// caption added to the top.
void AddPix(const Pix *pix, const char *caption) {
int depth = pixGetDepth(const_cast<Pix *>(pix));
void AddPix(const Image pix, const char *caption) {
int depth = pixGetDepth(pix);
int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
Pix *pix_debug =
pixAddSingleTextblock(const_cast<Pix *>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
Image pix_debug =
pixAddSingleTextblock(pix, fonts_, caption, color, L_ADD_BELOW, nullptr);
pixaAddPix(pixa_, pix_debug, L_INSERT);
}

View File

@ -43,7 +43,7 @@ const int kMaxReadAhead = 8;
ImageData::ImageData() : page_number_(-1), vertical_text_(false) {}
// Takes ownership of the pix and destroys it.
ImageData::ImageData(bool vertical, Pix *pix) : page_number_(0), vertical_text_(vertical) {
ImageData::ImageData(bool vertical, Image pix) : page_number_(0), vertical_text_(vertical) {
SetPix(pix);
}
ImageData::~ImageData() {
@ -176,12 +176,12 @@ bool ImageData::SkipDeSerialize(TFile *fp) {
// Saves the given Pix as a PNG-encoded string and destroys it.
// In case of missing PNG support in Leptonica use PNM format,
// which requires more memory.
void ImageData::SetPix(Pix *pix) {
void ImageData::SetPix(Image pix) {
SetPixInternal(pix, &image_data_);
}
// Returns the Pix image for *this. Must be pixDestroyed after use.
Pix *ImageData::GetPix() const {
Image ImageData::GetPix() const {
return GetPixInternal(image_data_);
}
@ -191,11 +191,11 @@ Pix *ImageData::GetPix() const {
// The return value is the scaled Pix, which must be pixDestroyed after use,
// and scale_factor (if not nullptr) is set to the scale factor that was applied
// to the image to achieve the target_height.
Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
Image ImageData::PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
int *scaled_height, std::vector<TBOX> *boxes) const {
int input_width = 0;
int input_height = 0;
Pix *src_pix = GetPix();
Image src_pix = GetPix();
ASSERT_HOST(src_pix != nullptr);
input_width = pixGetWidth(src_pix);
input_height = pixGetHeight(src_pix);
@ -210,11 +210,11 @@ Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor,
*scaled_height = target_height;
}
// Get the scaled image.
Pix *pix = pixScale(src_pix, im_factor, im_factor);
Image pix = pixScale(src_pix, im_factor, im_factor);
if (pix == nullptr) {
tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", input_width, input_height,
im_factor);
pixDestroy(&src_pix);
src_pix.destroy();
return nullptr;
}
if (scaled_width != nullptr) {
@ -223,7 +223,7 @@ Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor,
if (scaled_height != nullptr) {
*scaled_height = pixGetHeight(pix);
}
pixDestroy(&src_pix);
src_pix.destroy();
if (boxes != nullptr) {
// Get the boxes.
boxes->clear();
@ -253,7 +253,7 @@ int ImageData::MemoryUsed() const {
void ImageData::Display() const {
const int kTextSize = 64;
// Draw the image.
Pix *pix = GetPix();
Image pix = GetPix();
if (pix == nullptr) {
return;
}
@ -263,7 +263,7 @@ void ImageData::Display() const {
new ScrollView("Imagedata", 100, 100, 2 * (width + 2 * kTextSize),
2 * (height + 4 * kTextSize), width + 10, height + 3 * kTextSize, true);
win->Image(pix, 0, height - 1);
pixDestroy(&pix);
pix.destroy();
// Draw the boxes.
win->Pen(ScrollView::RED);
win->Brush(ScrollView::NONE);
@ -306,7 +306,7 @@ void ImageData::AddBoxes(const std::vector<TBOX> &boxes, const std::vector<std::
// Saves the given Pix as a PNG-encoded string and destroys it.
// In case of missing PNG support in Leptonica use PNM format,
// which requires more memory.
void ImageData::SetPixInternal(Pix *pix, std::vector<char> *image_data) {
void ImageData::SetPixInternal(Image pix, std::vector<char> *image_data) {
l_uint8 *data;
size_t size;
l_int32 ret;
@ -314,7 +314,7 @@ void ImageData::SetPixInternal(Pix *pix, std::vector<char> *image_data) {
if (ret) {
ret = pixWriteMem(&data, &size, pix, IFF_PNM);
}
pixDestroy(&pix);
pix.destroy();
// TODO: optimize resize (no init).
image_data->resize(size);
memcpy(&(*image_data)[0], data, size);
@ -322,8 +322,8 @@ void ImageData::SetPixInternal(Pix *pix, std::vector<char> *image_data) {
}
// Returns the Pix image for the image_data. Must be pixDestroyed after use.
Pix *ImageData::GetPixInternal(const std::vector<char> &image_data) {
Pix *pix = nullptr;
Image ImageData::GetPixInternal(const std::vector<char> &image_data) {
Image pix = nullptr;
if (!image_data.empty()) {
// Convert the array to an image.
const auto *u_data = reinterpret_cast<const unsigned char *>(&image_data[0]);

View File

@ -19,6 +19,7 @@
#ifndef TESSERACT_IMAGE_IMAGEDATA_H_
#define TESSERACT_IMAGE_IMAGEDATA_H_
#include "image.h"
#include "points.h" // for FCOORD
#include <mutex> // for std::mutex
@ -62,7 +63,7 @@ class TESS_API ImageData {
public:
ImageData();
// Takes ownership of the pix.
ImageData(bool vertical, Pix *pix);
ImageData(bool vertical, Image pix);
~ImageData();
// Builds and returns an ImageData from the basic data. Note that imagedata,
@ -115,16 +116,16 @@ public:
// Saves the given Pix as a PNG-encoded string and destroys it.
// In case of missing PNG support in Leptonica use PNM format,
// which requires more memory.
void SetPix(Pix *pix);
void SetPix(Image pix);
// Returns the Pix image for *this. Must be pixDestroyed after use.
Pix *GetPix() const;
Image GetPix() const;
// Gets anything and everything with a non-nullptr pointer, prescaled to a
// given target_height (if 0, then the original image height), and aligned.
// Also returns (if not nullptr) the width and height of the scaled image.
// The return value is the scaled Pix, which must be pixDestroyed after use,
// and scale_factor (if not nullptr) is set to the scale factor that was
// applied to the image to achieve the target_height.
Pix *PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
Image PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
int *scaled_height, std::vector<TBOX> *boxes) const;
int MemoryUsed() const;
@ -141,9 +142,9 @@ private:
// Saves the given Pix as a PNG-encoded string and destroys it.
// In case of missing PNG support in Leptonica use PNM format,
// which requires more memory.
static void SetPixInternal(Pix *pix, std::vector<char> *image_data);
static void SetPixInternal(Image pix, std::vector<char> *image_data);
// Returns the Pix image for the image_data. Must be pixDestroyed after use.
static Pix *GetPixInternal(const std::vector<char> &image_data);
static Image GetPixInternal(const std::vector<char> &image_data);
// Parses the text string as a box file and adds any discovered boxes that
// match the page number. Returns false on error.
bool AddBoxes(const char *box_text);

View File

@ -19,8 +19,10 @@
#ifndef NORMALIS_H
#define NORMALIS_H
#include <vector>
#include "image.h"
#include <tesseract/export.h>
#include <vector>
struct Pix;
@ -232,10 +234,10 @@ public:
// Prints the content of the DENORM for debug purposes.
void Print() const;
Pix *pix() const {
Image pix() const {
return pix_;
}
void set_pix(Pix *pix) {
void set_pix(Image pix) {
pix_ = pix;
}
bool inverse() const {
@ -274,7 +276,7 @@ private:
void Init();
// Best available image.
Pix *pix_;
Image pix_;
// True if the source image is white-on-black.
bool inverse_;
// Block the word came from. If not null, block->re_rotation() takes the

View File

@ -152,7 +152,7 @@ public:
median_size_.set_y(y);
}
Pix *render_mask(TBOX *mask_box) {
Image render_mask(TBOX *mask_box) {
return pdblk.render_mask(re_rotation_, mask_box);
}

View File

@ -35,7 +35,7 @@ namespace tesseract {
// that there is no apparent foreground. At least one hi_value will not be -1.
// The return value is the number of channels in the input image, being
// the size of the output thresholds and hi_values arrays.
int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height, std::vector<int> &thresholds,
int OtsuThreshold(Image src_pix, int left, int top, int width, int height, std::vector<int> &thresholds,
std::vector<int> &hi_values) {
int num_channels = pixGetDepth(src_pix) / 8;
// Of all channels with no good hi_value, keep the best so we can always
@ -143,7 +143,7 @@ int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height, std::v
// single channel. Each channel is always one byte per pixel.
// Histogram is always a kHistogramSize(256) element array to count
// occurrences of each pixel value.
void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height,
void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height,
int *histogram) {
int num_channels = pixGetDepth(src_pix) / 8;
channel = ClipToRange(channel, 0, num_channels - 1);

View File

@ -19,6 +19,8 @@
#ifndef TESSERACT_CCMAIN_OTSUTHR_H_
#define TESSERACT_CCMAIN_OTSUTHR_H_
#include "image.h"
#include <vector> // for std::vector
struct Pix;
@ -35,7 +37,7 @@ const int kHistogramSize = 256; // The size of a histogram of pixel values.
// that there is no apparent foreground. At least one hi_value will not be -1.
// The return value is the number of channels in the input image, being
// the size of the output thresholds and hi_values arrays.
int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height,
int OtsuThreshold(Image src_pix, int left, int top, int width, int height,
std::vector<int> &thresholds,
std::vector<int> &hi_values);
@ -43,7 +45,7 @@ int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height,
// single channel. Each channel is always one byte per pixel.
// Histogram is always a kHistogramSize(256) element array to count
// occurrences of each pixel value.
void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height,
void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height,
int *histogram);
// Computes the Otsu threshold(s) for the given histogram.

View File

@ -304,7 +304,7 @@ void WERD_RES::InitForRetryRecognition(const WERD_RES &source) {
// normalization scale and offset.
// Returns false if the word is empty and sets up fake results.
bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tess,
Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
bool use_body_size, bool allow_detailed_fx, ROW *row,
const BLOCK *block) {
auto norm_mode_hint = static_cast<tesseract::OcrEngineMode>(norm_mode);

View File

@ -462,7 +462,7 @@ public:
// but is declared as int for ease of use with tessedit_ocr_engine_mode.
// Returns false if the word is empty and sets up fake results.
bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract,
Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
bool use_body_size, bool allow_detailed_fx, ROW *row,
const BLOCK *block);

View File

@ -134,10 +134,10 @@ void PDBLK::move( // reposition block
// Returns a binary Pix mask with a 1 pixel for every pixel within the
// block. Rotates the coordinate system by rerotation prior to rendering.
Pix *PDBLK::render_mask(const FCOORD &rerotation, TBOX *mask_box) {
Image PDBLK::render_mask(const FCOORD &rerotation, TBOX *mask_box) {
TBOX rotated_box(box);
rotated_box.rotate(rerotation);
Pix *pix = pixCreate(rotated_box.width(), rotated_box.height(), 1);
Image pix = pixCreate(rotated_box.width(), rotated_box.height(), 1);
if (hand_poly != nullptr) {
// We are going to rotate, so get a deep copy of the points and
// make a new POLY_BLOCK with it.

View File

@ -91,7 +91,7 @@ public:
// block. Rotates the coordinate system by rerotation prior to rendering.
// If not nullptr, mask_box is filled with the position box of the returned
// mask image.
Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box);
Image render_mask(const FCOORD &rerotation, TBOX *mask_box);
#ifndef GRAPHICS_DISABLED
/// draw histogram

View File

@ -362,7 +362,7 @@ void QSPLINE::plot( // draw it
}
#endif
void QSPLINE::plot(Pix *pix) const {
void QSPLINE::plot(Image pix) const {
if (pix == nullptr) {
return;
}

View File

@ -82,7 +82,7 @@ public:
// Paint the baseline over pix. If pix has depth of 32, then the line will
// be painted in red. Otherwise it will be painted in black.
void plot(Pix *pix) const;
void plot(Image pix) const;
QSPLINE &operator=(const QSPLINE &source); // from this

View File

@ -388,7 +388,7 @@ void C_BLOB::rotate(const FCOORD &rotation) {
// Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the
// outline list and its children.
static void ComputeEdgeOffsetsOutlineList(int threshold, Pix *pix, C_OUTLINE_LIST *list) {
static void ComputeEdgeOffsetsOutlineList(int threshold, Image pix, C_OUTLINE_LIST *list) {
C_OUTLINE_IT it(list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
@ -405,7 +405,7 @@ static void ComputeEdgeOffsetsOutlineList(int threshold, Pix *pix, C_OUTLINE_LIS
// Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
// if the supplied pix is 8-bit or the binary edges if nullptr.
void C_BLOB::ComputeEdgeOffsets(int threshold, Pix *pix) {
void C_BLOB::ComputeEdgeOffsets(int threshold, Image pix) {
ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines);
}
@ -491,7 +491,7 @@ int16_t C_BLOB::EstimateBaselinePosition() {
return best_min == box.top() ? bottom : best_min;
}
static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Pix *pix) {
static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Image pix) {
C_OUTLINE_IT it(list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
@ -502,7 +502,7 @@ static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Pix *pi
}
}
static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top, Pix *pix) {
static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top, Image pix) {
C_OUTLINE_IT it(list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
@ -511,18 +511,18 @@ static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top,
}
// Returns a Pix rendering of the blob. pixDestroy after use.
Pix *C_BLOB::render() {
Image C_BLOB::render() {
TBOX box = bounding_box();
Pix *pix = pixCreate(box.width(), box.height(), 1);
Image pix = pixCreate(box.width(), box.height(), 1);
render_outline_list(&outlines, box.left(), box.top(), pix);
return pix;
}
// Returns a Pix rendering of the outline of the blob. (no fill).
// pixDestroy after use.
Pix *C_BLOB::render_outline() {
Image C_BLOB::render_outline() {
TBOX box = bounding_box();
Pix *pix = pixCreate(box.width(), box.height(), 1);
Image pix = pixCreate(box.width(), box.height(), 1);
render_outline_list_outline(&outlines, box.left(), box.top(), pix);
return pix;
}

View File

@ -83,17 +83,17 @@ public:
// Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
// if the supplied pix is 8-bit or the binary edges if nullptr.
void ComputeEdgeOffsets(int threshold, Pix *pix);
void ComputeEdgeOffsets(int threshold, Image pix);
// Estimates and returns the baseline position based on the shape of the
// outlines.
int16_t EstimateBaselinePosition();
// Returns a Pix rendering of the blob. pixDestroy after use.
Pix *render();
Image render();
// Returns a Pix rendering of the outline of the blob. (no fill).
// pixDestroy after use.
Pix *render_outline();
Image render_outline();
#ifndef GRAPHICS_DISABLED
void plot( // draw one

View File

@ -36,7 +36,7 @@ namespace tesseract {
// Classifies the given [training] sample, writing to results.
// See shapeclassifier.h for a full description.
// Default implementation calls the ShapeRating version.
int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug,
int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug,
UNICHAR_ID keep_this,
std::vector<UnicharRating> *results) {
results->clear();
@ -54,7 +54,7 @@ int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Pix *pa
// Classifies the given [training] sample, writing to results.
// See shapeclassifier.h for a full description.
// Default implementation aborts.
int ShapeClassifier::ClassifySample(const TrainingSample &sample, Pix *page_pix, int debug,
int ShapeClassifier::ClassifySample(const TrainingSample &sample, Image page_pix, int debug,
int keep_this, std::vector<ShapeRating> *results) {
ASSERT_HOST("Must implement ClassifySample!" == nullptr);
return 0;
@ -64,7 +64,7 @@ int ShapeClassifier::ClassifySample(const TrainingSample &sample, Pix *page_pix,
// If result is not nullptr, it is set with the shape_id and rating.
// Does not need to be overridden if ClassifySample respects the keep_this
// rule.
int ShapeClassifier::BestShapeForUnichar(const TrainingSample &sample, Pix *page_pix,
int ShapeClassifier::BestShapeForUnichar(const TrainingSample &sample, Image page_pix,
UNICHAR_ID unichar_id, ShapeRating *result) {
std::vector<ShapeRating> results;
const ShapeTable *shapes = GetShapeTable();
@ -93,7 +93,7 @@ const UNICHARSET &ShapeClassifier::GetUnicharset() const {
// the user has finished with debugging the sample.
// Probably doesn't need to be overridden if the subclass provides
// DisplayClassifyAs.
void ShapeClassifier::DebugDisplay(const TrainingSample &sample, Pix *page_pix,
void ShapeClassifier::DebugDisplay(const TrainingSample &sample, Image page_pix,
UNICHAR_ID unichar_id) {
static ScrollView *terminator = nullptr;
if (terminator == nullptr) {
@ -159,7 +159,7 @@ void ShapeClassifier::DebugDisplay(const TrainingSample &sample, Pix *page_pix,
// windows to the windows output and returns a new index that may be used
// by any subsequent classifiers. Caller waits for the user to view and
// then destroys the windows by clearing the vector.
int ShapeClassifier::DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix,
int ShapeClassifier::DisplayClassifyAs(const TrainingSample &sample, Image page_pix,
UNICHAR_ID unichar_id, int index,
std::vector<ScrollView *> &windows) {
// Does nothing in the default implementation.

View File

@ -20,7 +20,10 @@
#ifndef TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
#define TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
#include "image.h"
#include <tesseract/unichar.h>
#include <vector>
struct Pix;
@ -61,11 +64,11 @@ public:
// classifiers.
// NOTE: Neither overload of ClassifySample is pure, but at least one must
// be overridden by a classifier in order for it to do anything.
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug,
virtual int UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug,
UNICHAR_ID keep_this, std::vector<UnicharRating> *results);
protected:
virtual int ClassifySample(const TrainingSample &sample, Pix *page_pix, int debug,
virtual int ClassifySample(const TrainingSample &sample, Image page_pix, int debug,
UNICHAR_ID keep_this, std::vector<ShapeRating> *results);
public:
@ -74,7 +77,7 @@ public:
// Returns -1 if ClassifySample fails to provide any result containing
// unichar_id. BestShapeForUnichar does not need to be overridden if
// ClassifySample respects the keep_this rule.
virtual int BestShapeForUnichar(const TrainingSample &sample, Pix *page_pix,
virtual int BestShapeForUnichar(const TrainingSample &sample, Image page_pix,
UNICHAR_ID unichar_id, ShapeRating *result);
// Provides access to the ShapeTable that this classifier works with.
@ -88,14 +91,14 @@ public:
// the user has finished with debugging the sample.
// Probably doesn't need to be overridden if the subclass provides
// DisplayClassifyAs.
void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id);
void DebugDisplay(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id);
// Displays classification as the given unichar_id. Creates as many windows
// as it feels fit, using index as a guide for placement. Adds any created
// windows to the windows output and returns a new index that may be used
// by any subsequent classifiers. Caller waits for the user to view and
// then destroys the windows by clearing the vector.
virtual int DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id,
virtual int DisplayClassifyAs(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id,
int index, std::vector<ScrollView *> &windows);
// Prints debug information on the results. context is some introductory/title

View File

@ -25,7 +25,7 @@ namespace tesseract {
// Classifies the given [training] sample, writing to results.
// See ShapeClassifier for a full description.
int TessClassifier::UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug,
int TessClassifier::UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug,
UNICHAR_ID keep_this,
std::vector<UnicharRating> *results) {
const int old_matcher_level = classify_->matcher_debug_level;
@ -62,7 +62,7 @@ const UNICHARSET &TessClassifier::GetUnicharset() const {
// windows to the windows output and returns a new index that may be used
// by any subsequent classifiers. Caller waits for the user to view and
// then destroys the windows by clearing the vector.
int TessClassifier::DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix, int unichar_id,
int TessClassifier::DisplayClassifyAs(const TrainingSample &sample, Image page_pix, int unichar_id,
int index, std::vector<ScrollView *> &windows) {
int shape_id = unichar_id;
// TODO(rays) Fix this so it works with both flat and real shapetables.

View File

@ -40,7 +40,7 @@ public:
// Classifies the given [training] sample, writing to results.
// See ShapeClassifier for a full description.
int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug,
int UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug,
UNICHAR_ID keep_this, std::vector<UnicharRating> *results) override;
// Provides access to the ShapeTable that this classifier works with.
const ShapeTable *GetShapeTable() const override;
@ -53,7 +53,7 @@ public:
// windows to the windows output and returns a new index that may be used
// by any subsequent classifiers. Caller waits for the user to view and
// then destroys the windows by clearing the vector.
int DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix, int unichar_id, int index,
int DisplayClassifyAs(const TrainingSample &sample, Image page_pix, int unichar_id, int index,
std::vector<ScrollView *> &windows) override;
private:

View File

@ -305,8 +305,8 @@ void TrainingSample::IndexFeatures(const IntFeatureSpace &feature_space) {
}
// Returns a pix representing the sample. (Int features only.)
Pix *TrainingSample::RenderToPix(const UNICHARSET *unicharset) const {
Pix *pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
Image TrainingSample::RenderToPix(const UNICHARSET *unicharset) const {
Image pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
for (uint32_t f = 0; f < num_features_; ++f) {
int start_x = features_[f].X;
int start_y = kIntFeatureExtent - features_[f].Y;
@ -341,7 +341,7 @@ void TrainingSample::DisplayFeatures(ScrollView::Color color, ScrollView *window
// by padding wherever possible.
// The returned Pix must be pixDestroyed after use.
// If the input page_pix is nullptr, nullptr is returned.
Pix *TrainingSample::GetSamplePix(int padding, Pix *page_pix) const {
Image TrainingSample::GetSamplePix(int padding, Image page_pix) const {
if (page_pix == nullptr) {
return nullptr;
}
@ -354,7 +354,7 @@ Pix *TrainingSample::GetSamplePix(int padding, Pix *page_pix) const {
padded_box &= page_box;
Box *box =
boxCreate(page_box.left(), page_height - page_box.top(), page_box.width(), page_box.height());
Pix *sample_pix = pixClipRectangle(page_pix, box, nullptr);
Image sample_pix = pixClipRectangle(page_pix, box, nullptr);
boxDestroy(&box);
return sample_pix;
}

View File

@ -104,7 +104,7 @@ public:
void IndexFeatures(const IntFeatureSpace &feature_space);
// Returns a pix representing the sample. (Int features only.)
Pix *RenderToPix(const UNICHARSET *unicharset) const;
Image RenderToPix(const UNICHARSET *unicharset) const;
// Displays the features in the given window with the given color.
void DisplayFeatures(ScrollView::Color color, ScrollView *window) const;
@ -112,7 +112,7 @@ public:
// by padding wherever possible.
// The returned Pix must be pixDestroyed after use.
// If the input page_pix is nullptr, nullptr is returned.
Pix *GetSamplePix(int padding, Pix *page_pix) const;
Image GetSamplePix(int padding, Image page_pix) const;
// Accessors.
UNICHAR_ID class_id() const {

View File

@ -78,12 +78,12 @@ bool Input::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *sc
// image_data. If non-null, *image_scale returns the image scale factor used.
// Returns nullptr on error.
/* static */
Pix *Input::PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width,
Image Input::PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width,
TRand *randomizer, float *image_scale) {
// Note that NumInputs() is defined as input image height.
int target_height = network->NumInputs();
int width, height;
Pix *pix =
Image pix =
image_data.PreScale(target_height, kMaxInputHeight, image_scale, &width, &height, nullptr);
if (pix == nullptr) {
tprintf("Bad pix from ImageData!\n");
@ -91,7 +91,7 @@ Pix *Input::PrepareLSTMInputs(const ImageData &image_data, const Network *networ
}
if (width < min_width || height < min_width) {
tprintf("Image too small to scale!! (%dx%d vs min width of %d)\n", width, height, min_width);
pixDestroy(&pix);
pix.destroy();
return nullptr;
}
return pix;
@ -104,12 +104,12 @@ Pix *Input::PrepareLSTMInputs(const ImageData &image_data, const Network *networ
// height == 1. If height == 0 then no scaling.
// NOTE: It isn't safe for multiple threads to call this on the same pix.
/* static */
void Input::PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer,
void Input::PreparePixInput(const StaticShape &shape, const Image pix, TRand *randomizer,
NetworkIO *input) {
bool color = shape.depth() == 3;
Pix *var_pix = const_cast<Pix *>(pix);
Image var_pix = pix;
int depth = pixGetDepth(var_pix);
Pix *normed_pix = nullptr;
Image normed_pix = nullptr;
// On input to BaseAPI, an image is forced to be 1, 8 or 24 bit, without
// colormap, so we just have to deal with depth conversion here.
if (color) {
@ -135,12 +135,12 @@ void Input::PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *ran
if (target_height != 0 && target_height != height) {
// Get the scaled image.
float im_factor = static_cast<float>(target_height) / height;
Pix *scaled_pix = pixScale(normed_pix, im_factor, im_factor);
pixDestroy(&normed_pix);
Image scaled_pix = pixScale(normed_pix, im_factor, im_factor);
normed_pix.destroy();
normed_pix = scaled_pix;
}
input->FromPix(shape, normed_pix, randomizer);
pixDestroy(&normed_pix);
normed_pix.destroy();
}
} // namespace tesseract.

View File

@ -77,7 +77,7 @@ public:
// image_data. If non-null, *image_scale returns the image scale factor used.
// Returns nullptr on error.
/* static */
static Pix *PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width,
static Image PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width,
TRand *randomizer, float *image_scale);
// Converts the given pix to a NetworkIO of height and depth appropriate to
// the given StaticShape:
@ -85,7 +85,7 @@ public:
// Scale to target height, if the shape's height is > 1, or its depth if the
// height == 1. If height == 0 then no scaling.
// NOTE: It isn't safe for multiple threads to call this on the same pix.
static void PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer,
static void PreparePixInput(const StaticShape &shape, const Image pix, TRand *randomizer,
NetworkIO *input);
private:

View File

@ -321,7 +321,7 @@ bool LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo
// This ensures consistent recognition results.
SetRandomSeed();
int min_width = network_->XScaleFactor();
Pix *pix = Input::PrepareLSTMInputs(image_data, network_, min_width, &randomizer_, scale_factor);
Image pix = Input::PrepareLSTMInputs(image_data, network_, min_width, &randomizer_, scale_factor);
if (pix == nullptr) {
tprintf("Line cannot be recognized!!\n");
return false;
@ -330,7 +330,7 @@ bool LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo
const int kMaxImageWidth = 128 * pixGetHeight(pix);
if (network_->IsTraining() && pixGetWidth(pix) > kMaxImageWidth) {
tprintf("Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix), pixGetHeight(pix));
pixDestroy(&pix);
pix.destroy();
return false;
}
if (upside_down) {
@ -370,7 +370,7 @@ bool LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo
network_->Forward(debug, *inputs, nullptr, &scratch_space_, outputs);
}
}
pixDestroy(&pix);
pix.destroy();
if (debug) {
std::vector<int> labels, coords;
LabelsFromOutputs(*outputs, &labels, &coords);
@ -404,7 +404,7 @@ std::string LSTMRecognizer::DecodeLabels(const std::vector<int> &labels) {
void LSTMRecognizer::DisplayForward(const NetworkIO &inputs, const std::vector<int> &labels,
const std::vector<int> &label_coords, const char *window_name,
ScrollView **window) {
Pix *input_pix = inputs.ToPix();
Image input_pix = inputs.ToPix();
Network::ClearWindow(false, window_name, pixGetWidth(input_pix), pixGetHeight(input_pix), window);
int line_height = Network::DisplayImage(input_pix, *window);
DisplayLSTMOutput(labels, label_coords, line_height, *window);

View File

@ -327,7 +327,7 @@ double Network::Random(double range) {
// === Debug image display methods. ===
// Displays the image of the matrix to the forward window.
void Network::DisplayForward(const NetworkIO &matrix) {
Pix *image = matrix.ToPix();
Image image = matrix.ToPix();
ClearWindow(false, name_.c_str(), pixGetWidth(image), pixGetHeight(image), &forward_win_);
DisplayImage(image, forward_win_);
forward_win_->Update();
@ -335,7 +335,7 @@ void Network::DisplayForward(const NetworkIO &matrix) {
// Displays the image of the matrix to the backward window.
void Network::DisplayBackward(const NetworkIO &matrix) {
Pix *image = matrix.ToPix();
Image image = matrix.ToPix();
std::string window_name = name_ + "-back";
ClearWindow(false, window_name.c_str(), pixGetWidth(image), pixGetHeight(image), &backward_win_);
DisplayImage(image, backward_win_);
@ -371,10 +371,10 @@ void Network::ClearWindow(bool tess_coords, const char *window_name, int width,
// Displays the pix in the given window. and returns the height of the pix.
// The pix is pixDestroyed.
int Network::DisplayImage(Pix *pix, ScrollView *window) {
int Network::DisplayImage(Image pix, ScrollView *window) {
int height = pixGetHeight(pix);
window->Image(pix, 0, 0);
pixDestroy(&pix);
pix.destroy();
return height;
}
#endif // !GRAPHICS_DISABLED

View File

@ -283,7 +283,7 @@ public:
// Displays the pix in the given window. and returns the height of the pix.
// The pix is pixDestroyed.
static int DisplayImage(Pix *pix, ScrollView *window);
static int DisplayImage(Image pix, ScrollView *window);
protected:
// Returns a random number in [-range, range].

View File

@ -123,7 +123,7 @@ void NetworkIO::ZeroInvalidElements() {
// of text, so a horizontal line through the middle of the image passes through
// at least some of it, so local minima and maxima are a good proxy for black
// and white pixel samples.
static void ComputeBlackWhite(Pix *pix, float *black, float *white) {
static void ComputeBlackWhite(Image pix, float *black, float *white) {
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
STATS mins(0, 256), maxes(0, 256);
@ -159,21 +159,21 @@ static void ComputeBlackWhite(Pix *pix, float *black, float *white) {
// Sets up the array from the given image, using the currently set int_mode_.
// If the image width doesn't match the shape, the image is truncated or padded
// with noise to match.
void NetworkIO::FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer) {
std::vector<const Pix *> pixes(1, pix);
void NetworkIO::FromPix(const StaticShape &shape, const Image pix, TRand *randomizer) {
std::vector<Image> pixes(1, pix);
FromPixes(shape, pixes, randomizer);
}
// Sets up the array from the given set of images, using the currently set
// int_mode_. If the image width doesn't match the shape, the images are
// truncated or padded with noise to match.
void NetworkIO::FromPixes(const StaticShape &shape, const std::vector<const Pix *> &pixes,
void NetworkIO::FromPixes(const StaticShape &shape, const std::vector<Image> &pixes,
TRand *randomizer) {
int target_height = shape.height();
int target_width = shape.width();
std::vector<std::pair<int, int>> h_w_pairs;
for (auto pix : pixes) {
Pix *var_pix = const_cast<Pix *>(pix);
Image var_pix = pix;
int width = pixGetWidth(var_pix);
if (target_width != 0) {
width = target_width;
@ -188,7 +188,7 @@ void NetworkIO::FromPixes(const StaticShape &shape, const std::vector<const Pix
ResizeToMap(int_mode(), stride_map_, shape.depth());
// Iterate over the images again to copy the data.
for (size_t b = 0; b < pixes.size(); ++b) {
Pix *pix = const_cast<Pix *>(pixes[b]);
Image pix = pixes[b];
float black = 0.0f, white = 255.0f;
if (shape.depth() != 3) {
ComputeBlackWhite(pix, &black, &white);
@ -212,7 +212,7 @@ void NetworkIO::FromPixes(const StaticShape &shape, const std::vector<const Pix
// of input channels, the height is the height of the image, and the width
// is the width of the image, or truncated/padded with noise if the width
// is a fixed size.
void NetworkIO::Copy2DImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer) {
void NetworkIO::Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer) {
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
int wpl = pixGetWpl(pix);
@ -253,7 +253,7 @@ void NetworkIO::Copy2DImage(int batch, Pix *pix, float black, float contrast, TR
// above, except that the output depth is the height of the input image, the
// output height is 1, and the output width as for Copy2DImage.
// The image is thus treated as a 1-d set of vertical pixel strips.
void NetworkIO::Copy1DGreyImage(int batch, Pix *pix, float black, float contrast,
void NetworkIO::Copy1DGreyImage(int batch, Image pix, float black, float contrast,
TRand *randomizer) {
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
@ -296,7 +296,7 @@ void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) {
}
// Converts the array to a Pix. Must be pixDestroyed after use.
Pix *NetworkIO::ToPix() const {
Image NetworkIO::ToPix() const {
// Count the width of the image, and find the max multiplication factor.
int im_width = stride_map_.Size(FD_WIDTH);
int im_height = stride_map_.Size(FD_HEIGHT);
@ -307,7 +307,7 @@ Pix *NetworkIO::ToPix() const {
num_features = 1;
feature_factor = 3;
}
Pix *pix = pixCreate(im_width, im_height * num_features, 32);
Image pix = pixCreate(im_width, im_height * num_features, 32);
StrideMap::Index index(stride_map_);
do {
int im_x = index.index(FD_WIDTH);

View File

@ -19,15 +19,16 @@
#ifndef TESSERACT_LSTM_NETWORKIO_H_
#define TESSERACT_LSTM_NETWORKIO_H_
#include <cmath>
#include <cstdio>
#include <vector>
#include "helpers.h"
#include "image.h"
#include "static_shape.h"
#include "stridemap.h"
#include "weightmatrix.h"
#include <cmath>
#include <cstdio>
#include <vector>
struct Pix;
namespace tesseract {
@ -66,11 +67,11 @@ public:
// Sets up the array from the given image, using the currently set int_mode_.
// If the image width doesn't match the shape, the image is truncated or
// padded with noise to match.
void FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer);
void FromPix(const StaticShape &shape, const Image pix, TRand *randomizer);
// Sets up the array from the given set of images, using the currently set
// int_mode_. If the image width doesn't match the shape, the images are
// truncated or padded with noise to match.
void FromPixes(const StaticShape &shape, const std::vector<const Pix *> &pixes,
void FromPixes(const StaticShape &shape, const std::vector<Image> &pixes,
TRand *randomizer);
// Copies the given pix to *this at the given batch index, stretching and
// clipping the pixel values so that [black, black + 2*contrast] maps to the
@ -79,12 +80,12 @@ public:
// of input channels, the height is the height of the image, and the width
// is the width of the image, or truncated/padded with noise if the width
// is a fixed size.
void Copy2DImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer);
void Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer);
// Copies the given pix to *this at the given batch index, as Copy2DImage
// above, except that the output depth is the height of the input image, the
// output height is 1, and the output width as for Copy2DImage.
// The image is thus treated as a 1-d set of vertical pixel strips.
void Copy1DGreyImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer);
void Copy1DGreyImage(int batch, Image pix, float black, float contrast, TRand *randomizer);
// Helper stores the pixel value in i_ or f_ according to int_mode_.
// t: is the index from the StrideMap corresponding to the current
// [batch,y,x] position
@ -94,7 +95,7 @@ public:
// contrast: the range of pixel values to stretch to half the range of *this.
void SetPixel(int t, int f, int pixel, float black, float contrast);
// Converts the array to a Pix. Must be pixDestroyed after use.
Pix *ToPix() const;
Image ToPix() const;
// Prints the first and last num timesteps of the array for each feature.
void Print(int num) const;

View File

@ -629,7 +629,7 @@ static cl_mem allocateZeroCopyBuffer(const KernelEnv &rEnv, l_uint32 *hostbuffer
return membuffer;
}
static Pix *mapOutputCLBuffer(const KernelEnv &rEnv, cl_mem clbuffer, Pix *pixd, Pix *pixs,
static Image mapOutputCLBuffer(const KernelEnv &rEnv, cl_mem clbuffer, Image pixd, Image pixs,
int elements, cl_mem_flags flags, bool memcopy = false,
bool sync = true) {
if (!pixd) {
@ -673,7 +673,7 @@ void OpenclDevice::releaseMorphCLBuffers() {
pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = nullptr;
}
int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs) {
int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Image pixs) {
SetKernelEnv(&rEnv);
if (pixThBuffer != nullptr) {
@ -1455,8 +1455,8 @@ static cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_me
// OpenCL implementation of Get Lines from pix function
// Note: Assumes the source and dest opencl buffer are initialized. No check
// done
void OpenclDevice::pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline, Pix **pix_hline,
Pix **pixClosed, bool getpixClosed, l_int32 close_hsize,
void OpenclDevice::pixGetLinesCL(Image pixd, Image pixs, Image *pix_vline, Image *pix_hline,
Image *pixClosed, bool getpixClosed, l_int32 close_hsize,
l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize,
l_int32 line_hsize, l_int32 line_vsize) {
l_uint32 wpl, h;
@ -1678,7 +1678,7 @@ int OpenclDevice::HistogramRectOCL(void *imageData, int bytes_per_pixel, int byt
************************************************************************/
int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, int bytes_per_pixel,
int bytes_per_line, int *thresholds, int *hi_values,
Pix **pix, int height, int width, int top, int left) {
Image *pix, int height, int width, int top, int left) {
int retVal = 0;
/* create pix result buffer */
*pix = pixCreate(width, height, 1);
@ -1783,7 +1783,7 @@ struct TessScoreEvaluationInputData {
int width;
int numChannels;
unsigned char *imageData;
Pix *pix;
Image pix;
};
static void populateTessScoreEvaluationInputData(TessScoreEvaluationInputData *input) {
@ -1928,7 +1928,7 @@ static double composeRGBPixelMicroBench(GPUEnv *env, TessScoreEvaluationInputDat
# else
clock_gettime(CLOCK_MONOTONIC, &time_funct_start);
# endif
Pix *pix = pixCreate(input.width, input.height, 32);
Image pix = pixCreate(input.width, input.height, 32);
l_uint32 *pixData = pixGetData(pix);
int i, j;
int idx = 0;
@ -1954,7 +1954,7 @@ static double composeRGBPixelMicroBench(GPUEnv *env, TessScoreEvaluationInputDat
time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 +
(time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0;
# endif
pixDestroy(&pix);
pix.destroy();
}
return time;
@ -2044,7 +2044,7 @@ static double histogramRectMicroBench(GPUEnv *env, TessScoreEvaluationInputData
// Reproducing the ThresholdRectToPix native version
static void ThresholdRectToPix_Native(const unsigned char *imagedata, int bytes_per_pixel,
int bytes_per_line, const int *thresholds,
const int *hi_values, Pix **pix) {
const int *hi_values, Image *pix) {
int top = 0;
int left = 0;
int width = pixGetWidth(*pix);
@ -2193,7 +2193,7 @@ static double getLineMasksMorphMicroBench(GPUEnv *env, TessScoreEvaluationInputD
# endif
OpenclDevice::gpuEnv = *env;
OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix);
Pix *pix_vline = nullptr, *pix_hline = nullptr, *pix_closed = nullptr;
Image pix_vline = nullptr, *pix_hline = nullptr, *pix_closed = nullptr;
OpenclDevice::pixGetLinesCL(nullptr, input.pix, &pix_vline, &pix_hline, &pix_closed, true,
closing_brick, closing_brick, max_line_width, max_line_width,
min_line_length, min_line_length);
@ -2221,16 +2221,16 @@ static double getLineMasksMorphMicroBench(GPUEnv *env, TessScoreEvaluationInputD
# endif
// native serial code
Pix *src_pix = input.pix;
Pix *pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick);
Pix *pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
Pix *pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
pixDestroy(&pix_solid);
Pix *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
Pix *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
pixDestroy(&pix_hline);
pixDestroy(&pix_vline);
pixDestroy(&pix_hollow);
Image src_pix = input.pix;
Image pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick);
Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
Image pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
pix_solid.destroy();
Image pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
Image pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
pix_hline.destroy();
pix_vline.destroy();
pix_hollow.destroy();
# if ON_WINDOWS
QueryPerformanceCounter(&time_funct_end);

View File

@ -127,10 +127,10 @@ public:
/* OpenCL implementations of Morphological operations*/
// Initialization of OCL buffers used in Morph operations
static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs);
static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Image pixs);
static void releaseMorphCLBuffers();
static void pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline, Pix **pix_hline, Pix **pixClosed,
static void pixGetLinesCL(Image pixd, Image pixs, Image *pix_vline, Image *pix_hline, Image *pixClosed,
bool getpixClosed, l_int32 close_hsize, l_int32 close_vsize,
l_int32 open_hsize, l_int32 open_vsize, l_int32 line_hsize,
l_int32 line_vsize);
@ -161,7 +161,7 @@ public:
int *histogramAllChannels);
static int ThresholdRectToPixOCL(unsigned char *imagedata, int bytes_per_pixel,
int bytes_per_line, int *thresholds, int *hi_values, Pix **pix,
int bytes_per_line, int *thresholds, int *hi_values, Image *pix,
int rect_height, int rect_width, int rect_top, int rect_left);
static ds_device getDeviceSelection();

View File

@ -608,7 +608,7 @@ void BaselineBlock::DrawFinalRows(const ICOORD &page_tr) {
#endif // !GRAPHICS_DISABLED
void BaselineBlock::DrawPixSpline(Pix *pix_in) {
void BaselineBlock::DrawPixSpline(Image pix_in) {
if (non_text_block_) {
return;
}

View File

@ -178,7 +178,7 @@ public:
void DrawFinalRows(const ICOORD &page_tr);
// Render the generated spline baselines for this block on pix_in.
void DrawPixSpline(Pix *pix_in);
void DrawPixSpline(Image pix_in);
private:
// Top-level line-spacing calculation. Computes an estimate of the line-

View File

@ -187,8 +187,8 @@ bool IntGrid::AnyZeroInRect(const TBOX &rect) const {
// Returns a full-resolution binary pix in which each cell over the given
// threshold is filled as a black square. pixDestroy after use.
// Edge cells, which have a zero 4-neighbour, are not marked.
Pix *IntGrid::ThresholdToPix(int threshold) const {
Pix *pix = pixCreate(tright().x() - bleft().x(), tright().y() - bleft().y(), 1);
Image IntGrid::ThresholdToPix(int threshold) const {
Image pix = pixCreate(tright().x() - bleft().x(), tright().y() - bleft().y(), 1);
int cellsize = gridsize();
for (int y = 0; y < gridheight(); ++y) {
for (int x = 0; x < gridwidth(); ++x) {
@ -204,7 +204,7 @@ Pix *IntGrid::ThresholdToPix(int threshold) const {
}
// Make a Pix of the correct scaled size for the TraceOutline functions.
static Pix *GridReducedPix(const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) {
static Image GridReducedPix(const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) {
// Compute grid bounds of the outline and pad all round by 1.
int grid_left = (box.left() - bleft.x()) / gridsize - 1;
int grid_bottom = (box.bottom() - bleft.y()) / gridsize - 1;
@ -221,10 +221,10 @@ static Pix *GridReducedPix(const TBOX &box, int gridsize, ICOORD bleft, int *lef
// Also returns the grid coords of the bottom-left of the Pix, in *left
// and *bottom, which corresponds to (0, 0) on the Pix.
// Note that the Pix is used upside-down, with (0, 0) being the bottom-left.
Pix *TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left,
Image TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left,
int *bottom) {
const TBOX &box = outline->bounding_box();
Pix *pix = GridReducedPix(box, gridsize, bleft, left, bottom);
Image pix = GridReducedPix(box, gridsize, bleft, left, bottom);
int wpl = pixGetWpl(pix);
l_uint32 *data = pixGetData(pix);
int length = outline->pathlength();
@ -243,13 +243,13 @@ Pix *TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, in
Pix* pix = TraceOutlineOnReducedPix(ol_it.data(), gridsize_, bleft_,
&grid_left, &grid_bottom);
grid->InsertPixPtBBox(grid_left, grid_bottom, pix, blob);
pixDestroy(&pix);
pix.destroy();
#endif
// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE.
Pix *TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) {
Image TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) {
const TBOX &box = block->pdblk.bounding_box();
Pix *pix = GridReducedPix(box, gridsize, bleft, left, bottom);
Image pix = GridReducedPix(box, gridsize, bleft, left, bottom);
int wpl = pixGetWpl(pix);
l_uint32 *data = pixGetData(pix);
ICOORDELT_IT it(block->pdblk.poly_block()->points());

View File

@ -39,10 +39,10 @@ namespace tesseract {
// Also returns the grid coords of the bottom-left of the Pix, in *left
// and *bottom, which corresponds to (0, 0) on the Pix.
// Note that the Pix is used upside-down, with (0, 0) being the bottom-left.
Pix *TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left,
Image TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left,
int *bottom);
// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE.
Pix *TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom);
Image TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom);
template <class BBC, class BBC_CLIST, class BBC_C_IT>
class GridSearch;
@ -135,7 +135,7 @@ public:
// Returns a full-resolution binary pix in which each cell over the given
// threshold is filled as a black square. pixDestroy after use.
Pix *ThresholdToPix(int threshold) const;
Image ThresholdToPix(int threshold) const;
private:
int *grid_; // 2-d array of ints.
@ -190,7 +190,7 @@ public:
// grid (in grid coords), and the pix works up the grid from there.
// WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call
// RepositionIterator() on any GridSearches that are active on this grid.
void InsertPixPtBBox(int left, int bottom, Pix *pix, BBC *bbox);
void InsertPixPtBBox(int left, int bottom, Image pix, BBC *bbox);
// Remove the bbox from the grid.
// WARNING: Any GridSearch operating on this grid could be invalidated!
@ -559,7 +559,7 @@ void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::InsertBBox(bool h_spread, bool v_spread,
// WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call
// RepositionIterator() on any GridSearches that are active on this grid.
template <class BBC, class BBC_CLIST, class BBC_C_IT>
void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::InsertPixPtBBox(int left, int bottom, Pix *pix, BBC *bbox) {
void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::InsertPixPtBBox(int left, int bottom, Image pix, BBC *bbox) {
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
for (int y = 0; y < height; ++y) {

View File

@ -81,7 +81,7 @@ CCNonTextDetect::~CCNonTextDetect() {
// The blob_block is the usual result of connected component analysis,
// holding the detected blobs.
// The returned Pix should be PixDestroyed after use.
Pix *CCNonTextDetect::ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *blob_block) {
Image CCNonTextDetect::ComputeNonTextMask(bool debug, Image photo_map, TO_BLOCK *blob_block) {
// Insert the smallest blobs into the grid.
InsertBlobList(&blob_block->small_blobs);
InsertBlobList(&blob_block->noise_blobs);
@ -102,7 +102,7 @@ Pix *CCNonTextDetect::ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *b
}
noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
good_grid.Clear(); // Not needed any more.
Pix *pix = noise_density_->ThresholdToPix(max_noise_count_);
Image pix = noise_density_->ThresholdToPix(max_noise_count_);
if (debug) {
pixWrite("junknoisemask.png", pix, IFF_PNG);
}
@ -148,7 +148,7 @@ Pix *CCNonTextDetect::ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *b
// more likely non-text.
// The photo_map is used to bias the decision towards non-text, rather than
// supplying definite decision.
IntGrid *CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix *photo_map, BlobGrid *good_grid) {
IntGrid *CCNonTextDetect::ComputeNoiseDensity(bool debug, Image photo_map, BlobGrid *good_grid) {
IntGrid *noise_counts = CountCellElements();
IntGrid *noise_density = noise_counts->NeighbourhoodSum();
IntGrid *good_counts = good_grid->CountCellElements();
@ -235,7 +235,7 @@ static TBOX AttemptBoxExpansion(const TBOX &box, const IntGrid &noise_density, i
// blobs are drawn on it in ok_color.
void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_blob_overlaps,
ScrollView *win, ScrollView::Color ok_color,
Pix *nontext_mask) {
Image nontext_mask) {
int imageheight = tright().y() - bleft().x();
BLOBNBOX_IT blob_it(blobs);
BLOBNBOX_LIST dead_blobs;
@ -255,10 +255,10 @@ void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_bl
if (noise_density_->AnyZeroInRect(box)) {
// There is a danger that the bounding box may overlap real text, so
// we need to render the outline.
Pix *blob_pix = blob->cblob()->render_outline();
Image blob_pix = blob->cblob()->render_outline();
pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(),
PIX_SRC | PIX_DST, blob_pix, 0, 0);
pixDestroy(&blob_pix);
blob_pix.destroy();
} else {
if (box.area() < gridsize() * gridsize()) {
// It is a really bad idea to make lots of small components in the

View File

@ -42,7 +42,7 @@ public:
// The blob_block is the usual result of connected component analysis,
// holding the detected blobs.
// The returned Pix should be PixDestroyed after use.
Pix *ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *blob_block);
Image ComputeNonTextMask(bool debug, Image photo_map, TO_BLOCK *blob_block);
private:
// Computes and returns the noise_density IntGrid, at the same gridsize as
@ -52,7 +52,7 @@ private:
// more likely non-text.
// The photo_map is used to bias the decision towards non-text, rather than
// supplying definite decision.
IntGrid *ComputeNoiseDensity(bool debug, Pix *photo_map, BlobGrid *good_grid);
IntGrid *ComputeNoiseDensity(bool debug, Image photo_map, BlobGrid *good_grid);
// Tests each blob in the list to see if it is certain non-text using 2
// conditions:
@ -68,7 +68,7 @@ private:
// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
// If the win is not nullptr, deleted blobs are drawn on it in red, and kept
void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_blob_overlaps, ScrollView *win,
ScrollView::Color ok_color, Pix *nontext_mask);
ScrollView::Color ok_color, Image nontext_mask);
// Returns true if the given blob overlaps more than max_overlaps blobs
// in the current grid.
bool BlobOverlapsTooMuch(BLOBNBOX *blob, int max_overlaps);

View File

@ -108,7 +108,7 @@ ColumnFinder::~ColumnFinder() {
delete[] best_columns_;
delete stroke_width_;
delete input_blobs_win_;
pixDestroy(&nontext_map_);
nontext_map_.destroy();
while (denorm_ != nullptr) {
DENORM *dead_denorm = denorm_;
denorm_ = const_cast<DENORM *>(denorm_->predecessor());
@ -148,7 +148,7 @@ ColumnFinder::~ColumnFinder() {
// direction, so the textline projection_ map can be setup.
// On return, IsVerticallyAlignedText may be called (now optionally) to
// determine the gross textline alignment of the page.
void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask_pix,
void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, Image photo_mask_pix,
TO_BLOCK *input_block) {
part_grid_.Init(gridsize(), bleft(), tright());
delete stroke_width_;
@ -162,7 +162,7 @@ void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask
}
#endif // !GRAPHICS_DISABLED
SetBlockRuleEdges(input_block);
pixDestroy(&nontext_map_);
nontext_map_.destroy();
// Run a preliminary strokewidth neighbour detection on the medium blobs.
stroke_width_->SetNeighboursOnMediumBlobs(input_block);
CCNonTextDetect nontext_detect(gridsize(), bleft(), tright());
@ -283,9 +283,9 @@ void ColumnFinder::CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines,
// noise/diacriticness determined via classification.
// Returns -1 if the user hits the 'd' key in the blocks window while running
// in debug mode, which requests a retry with more debug info.
int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor,
TO_BLOCK *input_block, Pix *photo_mask_pix, Pix *thresholds_pix,
Pix *grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks,
int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor,
TO_BLOCK *input_block, Image photo_mask_pix, Image thresholds_pix,
Image grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks,
BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks) {
pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
stroke_width_->FindLeaderPartitions(input_block, &part_grid_);

View File

@ -107,7 +107,7 @@ public:
// direction, so the textline projection_ map can be setup.
// On return, IsVerticallyAlignedText may be called (now optionally) to
// determine the gross textline alignment of the page.
void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask_pix, TO_BLOCK *input_block);
void SetupAndFilterNoise(PageSegMode pageseg_mode, Image photo_mask_pix, TO_BLOCK *input_block);
// Tests for vertical alignment of text (returning true if so), and generates
// a list of blobs (in osd_blobs) for orientation and script detection.
@ -156,8 +156,8 @@ public:
// appropriate word after the rest of layout analysis.
// Returns -1 if the user hits the 'd' key in the blocks window while running
// in debug mode, which requests a retry with more debug info.
int FindBlocks(PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor, TO_BLOCK *block,
Pix *photo_mask_pix, Pix *thresholds_pix, Pix *grey_pix, DebugPixa *pixa_debug,
int FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, TO_BLOCK *block,
Image photo_mask_pix, Image thresholds_pix, Image grey_pix, DebugPixa *pixa_debug,
BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks);
// Get the rotation required to deskew, and its inverse rotation.
@ -330,7 +330,7 @@ private:
// Horizontal line separators.
TabVector_LIST horizontal_lines_;
// Image map of photo/noise areas on the page.
Pix *nontext_map_;
Image nontext_map_;
// Textline projection map.
TextlineProjection projection_;
// Sequence of DENORMS that indicate how to get back to the original image

View File

@ -608,7 +608,7 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts)
// nontext_map, which is used to prevent the spread of text neighbourhoods
// into images.
// Returns true if anything was changed.
bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, Pix *nontext_map,
bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map,
const TBOX &im_box, const FCOORD &rotation) {
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
@ -1392,7 +1392,7 @@ void ColPartitionGrid::FindMergeCandidates(const ColPartition *part, const TBOX
// nontext_map, which is used to prevent the spread of text neighbourhoods
// into images.
// Returns true if the partition was changed.
bool ColPartitionGrid::SmoothRegionType(Pix *nontext_map, const TBOX &im_box,
bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box,
const FCOORD &rerotation, bool debug, ColPartition *part) {
const TBOX &part_box = part->bounding_box();
if (debug) {
@ -1511,7 +1511,7 @@ enum NeighbourPartitionType {
// partitions that makes a decisive result (if any) and returns the type
// and the distance of the collection. If there are any pixels in the
// nontext_map, then the decision is biased towards image.
BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction, Pix *nontext_map,
BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction, Image nontext_map,
const TBOX &im_box, const FCOORD &rerotation,
bool debug, const ColPartition &part,
int *best_distance) {
@ -1594,7 +1594,7 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction
// dists must be an array of vectors of size NPT_COUNT.
void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part,
const ICOORD &dist_scaling, const TBOX &search_box,
Pix *nontext_map, const TBOX &im_box,
Image nontext_map, const TBOX &im_box,
const FCOORD &rerotation, bool debug,
std::vector<int> *dists) {
const TBOX &part_box = base_part.bounding_box();

View File

@ -98,7 +98,7 @@ public:
// nontext_map, which is used to prevent the spread of text neighbourhoods
// into images.
// Returns true if anything was changed.
bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix *nontext_map, const TBOX &im_box,
bool GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map, const TBOX &im_box,
const FCOORD &rerotation);
// Reflects the grid and its colpartitions in the y-axis, assuming that
@ -199,7 +199,7 @@ private:
// nontext_map, which is used to prevent the spread of text neighbourhoods
// into images.
// Returns true if the partition was changed.
bool SmoothRegionType(Pix *nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug,
bool SmoothRegionType(Image nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug,
ColPartition *part);
// Executes the search for SmoothRegionType in a single direction.
// Creates a bounding box that is padded in all directions except direction,
@ -207,7 +207,7 @@ private:
// partitions that makes a decisive result (if any) and returns the type
// and the distance of the collection. If there are any pixels in the
// nontext_map, then the decision is biased towards image.
BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, Pix *nontext_map,
BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, Image nontext_map,
const TBOX &im_box, const FCOORD &rerotation, bool debug,
const ColPartition &part, int *best_distance);
// Counts the partitions in the given search_box by appending the gap
@ -216,7 +216,7 @@ private:
// vectors in the dists array are sorted in increasing order.
// dists must be an array of vectors of size NPT_COUNT.
void AccumulatePartDistances(const ColPartition &base_part, const ICOORD &dist_scaling,
const TBOX &search_box, Pix *nontext_map, const TBOX &im_box,
const TBOX &search_box, Image nontext_map, const TBOX &im_box,
const FCOORD &rerotation, bool debug, std::vector<int> *dists);
// Improves the margins of the ColPartition by searching for

View File

@ -55,20 +55,20 @@ ShiroRekhaSplitter::~ShiroRekhaSplitter() {
}
void ShiroRekhaSplitter::Clear() {
pixDestroy(&orig_pix_);
pixDestroy(&splitted_image_);
orig_pix_.destroy();
splitted_image_.destroy();
pageseg_split_strategy_ = NO_SPLIT;
ocr_split_strategy_ = NO_SPLIT;
pixDestroy(&debug_image_);
debug_image_.destroy();
segmentation_block_list_ = nullptr;
global_xheight_ = kUnspecifiedXheight;
perform_close_ = false;
}
// On setting the input image, a clone of it is owned by this class.
void ShiroRekhaSplitter::set_orig_pix(Pix *pix) {
void ShiroRekhaSplitter::set_orig_pix(Image pix) {
if (orig_pix_) {
pixDestroy(&orig_pix_);
orig_pix_.destroy();
}
orig_pix_ = pixClone(pix);
}
@ -91,32 +91,32 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) {
tprintf("Initial pageseg available = %s\n", segmentation_block_list_ ? "yes" : "no");
}
// Create a copy of original image to store the splitting output.
pixDestroy(&splitted_image_);
splitted_image_.destroy();
splitted_image_ = pixCopy(nullptr, orig_pix_);
// Initialize debug image if required.
if (devanagari_split_debugimage) {
pixDestroy(&debug_image_);
debug_image_.destroy();
debug_image_ = pixConvertTo32(orig_pix_);
}
// Determine all connected components in the input image. A close operation
// may be required prior to this, depending on the current settings.
Pix *pix_for_ccs = pixClone(orig_pix_);
Image pix_for_ccs = pixClone(orig_pix_);
if (perform_close_ && global_xheight_ != kUnspecifiedXheight && !segmentation_block_list_) {
if (devanagari_split_debuglevel > 0) {
tprintf("Performing a global close operation..\n");
}
// A global measure is available for xheight, but no local information
// exists.
pixDestroy(&pix_for_ccs);
pix_for_ccs.destroy();
pix_for_ccs = pixCopy(nullptr, orig_pix_);
PerformClose(pix_for_ccs, global_xheight_);
}
Pixa *ccs;
Boxa *tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8);
boxaDestroy(&tmp_boxa);
pixDestroy(&pix_for_ccs);
pix_for_ccs.destroy();
// Iterate over all connected components. Get their bounding boxes and clip
// out the image regions corresponding to these boxes from the original image.
@ -128,7 +128,7 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) {
}
for (int i = 0; i < num_ccs; ++i) {
Box *box = ccs->boxa->box[i];
Pix *word_pix = pixClipRectangle(orig_pix_, box, nullptr);
Image word_pix = pixClipRectangle(orig_pix_, box, nullptr);
ASSERT_HOST(word_pix);
int xheight = GetXheightForCC(box);
if (xheight == kUnspecifiedXheight && segmentation_block_list_ && devanagari_split_debugimage) {
@ -143,7 +143,7 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) {
} else if (devanagari_split_debuglevel > 0) {
tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", box->x, box->y, box->w, box->h);
}
pixDestroy(&word_pix);
word_pix.destroy();
}
// Actually clear the boxes now.
for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) {
@ -161,7 +161,7 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) {
// Method to perform a close operation on the input image. The xheight
// estimate decides the size of sel used.
void ShiroRekhaSplitter::PerformClose(Pix *pix, int xheight_estimate) {
void ShiroRekhaSplitter::PerformClose(Image pix, int xheight_estimate) {
pixCloseBrick(pix, pix, xheight_estimate / 8, xheight_estimate / 3);
}
@ -221,7 +221,7 @@ int ShiroRekhaSplitter::GetXheightForCC(Box *cc_bbox) {
// leeway. The leeway depends on the input xheight, if provided, else a
// conservative multiplier on approximate stroke width is used (which may lead
// to over-splitting).
void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Pix *pix, int xheight,
void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight,
int word_left, int word_top, Boxa *regions_to_clear) {
if (split_strategy == NO_SPLIT) {
return;
@ -257,7 +257,7 @@ void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Pix *
// Clear the ascender and descender regions of the word.
// Obtain a vertical projection histogram for the resulting image.
Box *box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, width, 5 * stroke_width / 3);
Pix *word_in_xheight = pixCopy(nullptr, pix);
Image word_in_xheight = pixCopy(nullptr, pix);
pixClearInRect(word_in_xheight, box_to_clear);
// Also clear any pixels which are below shirorekha_bottom + some leeway.
// The leeway is set to xheight if the information is available, else it is a
@ -276,7 +276,7 @@ void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Pix *
PixelHistogram vert_hist;
vert_hist.ConstructVerticalCountHist(word_in_xheight);
pixDestroy(&word_in_xheight);
word_in_xheight.destroy();
// If the number of black pixel in any column of the image is less than a
// fraction of the stroke width, treat it as noise / a stray mark. Perform
@ -385,7 +385,7 @@ Box *ShiroRekhaSplitter::GetBoxForTBOX(const TBOX &tbox) const {
// This method returns the computed mode-height of blobs in the pix.
// It also prunes very small blobs from calculation.
int ShiroRekhaSplitter::GetModeHeight(Pix *pix) {
int ShiroRekhaSplitter::GetModeHeight(Image pix) {
Boxa *boxa = pixConnComp(pix, nullptr, 8);
STATS heights(0, pixGetHeight(pix));
heights.clear();
@ -402,7 +402,7 @@ int ShiroRekhaSplitter::GetModeHeight(Pix *pix) {
// This method returns y-extents of the shiro-rekha computed from the input
// word image.
void ShiroRekhaSplitter::GetShiroRekhaYExtents(Pix *word_pix, int *shirorekha_top,
void ShiroRekhaSplitter::GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top,
int *shirorekha_bottom, int *shirorekha_ylevel) {
// Compute a histogram from projecting the word on a vertical line.
PixelHistogram hist_horiz;
@ -450,7 +450,7 @@ int PixelHistogram::GetHistogramMaximum(int *count) const {
}
// Methods to construct histograms from images.
void PixelHistogram::ConstructVerticalCountHist(Pix *pix) {
void PixelHistogram::ConstructVerticalCountHist(Image pix) {
Clear();
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
@ -471,7 +471,7 @@ void PixelHistogram::ConstructVerticalCountHist(Pix *pix) {
}
}
void PixelHistogram::ConstructHorizontalCountHist(Pix *pix) {
void PixelHistogram::ConstructHorizontalCountHist(Image pix) {
Clear();
Numa *counts = pixCountPixelsByRow(pix, nullptr);
length_ = numaGetCount(counts);

View File

@ -56,8 +56,8 @@ public:
}
// Methods to construct histograms from images. These clear any existing data.
void ConstructVerticalCountHist(Pix *pix);
void ConstructHorizontalCountHist(Pix *pix);
void ConstructVerticalCountHist(Image pix);
void ConstructHorizontalCountHist(Image pix);
// This method returns the global-maxima for the histogram. The frequency of
// the global maxima is returned in count, if specified.
@ -118,16 +118,16 @@ public:
// Returns the image obtained from shiro-rekha splitting. The returned object
// is owned by this class. Callers may want to clone the returned pix to keep
// it alive beyond the life of ShiroRekhaSplitter object.
Pix *splitted_image() {
Image splitted_image() {
return splitted_image_;
}
// On setting the input image, a clone of it is owned by this class.
void set_orig_pix(Pix *pix);
void set_orig_pix(Image pix);
// Returns the input image provided to the object. This object is owned by
// this class. Callers may want to clone the returned pix to work with it.
Pix *orig_pix() {
Image orig_pix() {
return orig_pix_;
}
@ -154,12 +154,12 @@ public:
// This method returns the computed mode-height of blobs in the pix.
// It also prunes very small blobs from calculation. Could be used to provide
// a global xheight estimate for images which have the same point-size text.
static int GetModeHeight(Pix *pix);
static int GetModeHeight(Image pix);
private:
// Method to perform a close operation on the input image. The xheight
// estimate decides the size of sel used.
static void PerformClose(Pix *pix, int xheight_estimate);
static void PerformClose(Image pix, int xheight_estimate);
// This method resolves the cc bbox to a particular row and returns the row's
// xheight. This uses block_list_ if available, else just returns the
@ -173,7 +173,7 @@ private:
// conservative estimate of stroke width along with an associated multiplier
// is used in its place. It is advisable to have a specified xheight when
// splitting for classification/training.
void SplitWordShiroRekha(SplitStrategy split_strategy, Pix *pix, int xheight, int word_left,
void SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left,
int word_top, Boxa *regions_to_clear);
// Returns a new box object for the corresponding TBOX, based on the original
@ -182,15 +182,15 @@ private:
// This method returns y-extents of the shiro-rekha computed from the input
// word image.
static void GetShiroRekhaYExtents(Pix *word_pix, int *shirorekha_top, int *shirorekha_bottom,
static void GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, int *shirorekha_bottom,
int *shirorekha_ylevel);
Pix *orig_pix_; // Just a clone of the input image passed.
Pix *splitted_image_; // Image produced after the last splitting round. The
Image orig_pix_; // Just a clone of the input image passed.
Image splitted_image_; // Image produced after the last splitting round. The
// object is owned by this class.
SplitStrategy pageseg_split_strategy_;
SplitStrategy ocr_split_strategy_;
Pix *debug_image_;
Image debug_image_;
// This block list is used as a golden segmentation when performing splitting.
BLOCK_LIST *segmentation_block_list_;
int global_xheight_;

View File

@ -322,7 +322,7 @@ void OL_BUCKETS::extract_children( // recursive count
* Run the edge detector over the block and return a list of blobs.
*/
void extract_edges(Pix *pix, // thresholded image
void extract_edges(Image pix, // thresholded image
BLOCK *block) { // block to scan
C_OUTLINE_LIST outlines; // outlines in block
C_OUTLINE_IT out_it = &outlines;

View File

@ -76,7 +76,7 @@ private:
int32_t index; // for extraction scan
};
void extract_edges(Pix *pix, // thresholded image
void extract_edges(Image pix, // thresholded image
BLOCK *block); // block to scan
void outlines_to_blobs( // find blobs
BLOCK *block, // block to scan

View File

@ -34,7 +34,7 @@ namespace tesseract {
// instead of weak vtables in every compilation unit.
EquationDetectBase::~EquationDetectBase() = default;
void EquationDetectBase::RenderSpecialText(Pix *pix, BLOBNBOX *blob) {
void EquationDetectBase::RenderSpecialText(Image pix, BLOBNBOX *blob) {
ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32 && blob != nullptr);
const TBOX &tbox = blob->bounding_box();
int height = pixGetHeight(pix);

View File

@ -20,7 +20,7 @@
#ifndef TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_
#define TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_
#include <tesseract/export.h>
#include "image.h"
class BLOBNBOX_LIST;
class TO_BLOCK;
@ -53,7 +53,7 @@ public:
// BSTT_ITALIC: green box
// BSTT_UNCLEAR: blue box
// All others: yellow box
static void RenderSpecialText(Pix *pix, BLOBNBOX *blob);
static void RenderSpecialText(Image pix, BLOBNBOX *blob);
};
} // namespace tesseract

View File

@ -60,14 +60,14 @@ const int kNoisePadding = 4;
// The returned pix may be nullptr, meaning no images found.
// If not nullptr, it must be PixDestroyed by the caller.
// If textord_tabfind_show_images, debug images are appended to pixa_debug.
Pix *ImageFind::FindImages(Pix *pix, DebugPixa *pixa_debug) {
Image ImageFind::FindImages(Image pix, DebugPixa *pixa_debug) {
// Not worth looking at small images.
if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) {
return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
}
// Reduce by factor 2.
Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
Image pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
if (textord_tabfind_show_images && pixa_debug != nullptr) {
pixa_debug->AddPix(pixr, "CascadeReduced");
}
@ -78,76 +78,76 @@ Pix *ImageFind::FindImages(Pix *pix, DebugPixa *pixa_debug) {
// pixGenHalftoneMask(pixr, nullptr, ...) with too small image, so we
// want to bypass that.
if (pixGetWidth(pixr) < kMinImageFindSize || pixGetHeight(pixr) < kMinImageFindSize) {
pixDestroy(&pixr);
pixr.destroy();
return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
}
// Get the halftone mask.
l_int32 ht_found = 0;
Pixa *pixadb = (textord_tabfind_show_images && pixa_debug != nullptr) ? pixaCreate(0) : nullptr;
Pix *pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb);
Image pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb);
if (pixadb) {
Pix *pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
Image pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
if (textord_tabfind_show_images && pixa_debug != nullptr) {
pixa_debug->AddPix(pixdb, "HalftoneMask");
}
pixDestroy(&pixdb);
pixdb.destroy();
pixaDestroy(&pixadb);
}
pixDestroy(&pixr);
pixr.destroy();
if (!ht_found && pixht2 != nullptr) {
pixDestroy(&pixht2);
pixht2.destroy();
}
if (pixht2 == nullptr) {
return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
}
// Expand back up again.
Pix *pixht = pixExpandReplicate(pixht2, 2);
Image pixht = pixExpandReplicate(pixht2, 2);
if (textord_tabfind_show_images && pixa_debug != nullptr) {
pixa_debug->AddPix(pixht, "HalftoneReplicated");
}
pixDestroy(&pixht2);
pixht2.destroy();
// Fill to capture pixels near the mask edges that were missed
Pix *pixt = pixSeedfillBinary(nullptr, pixht, pix, 8);
Image pixt = pixSeedfillBinary(nullptr, pixht, pix, 8);
pixOr(pixht, pixht, pixt);
pixDestroy(&pixt);
pixt.destroy();
// Eliminate lines and bars that may be joined to images.
Pix *pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
Image pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
if (textord_tabfind_show_images && pixa_debug != nullptr) {
pixa_debug->AddPix(pixfinemask, "FineMask");
}
Pix *pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
Pix *pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
pixDestroy(&pixreduced);
Image pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
Image pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
pixreduced.destroy();
pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
Pix *pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
pixDestroy(&pixreduced2);
Image pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
pixreduced2.destroy();
if (textord_tabfind_show_images && pixa_debug != nullptr) {
pixa_debug->AddPix(pixcoarsemask, "CoarseMask");
}
// Combine the coarse and fine image masks.
pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
pixDestroy(&pixfinemask);
pixfinemask.destroy();
// Dilate a bit to make sure we get everything.
pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
Pix *pixmask = pixExpandReplicate(pixcoarsemask, 16);
pixDestroy(&pixcoarsemask);
Image pixmask = pixExpandReplicate(pixcoarsemask, 16);
pixcoarsemask.destroy();
if (textord_tabfind_show_images && pixa_debug != nullptr) {
pixa_debug->AddPix(pixmask, "MaskDilated");
}
// And the image mask with the line and bar remover.
pixAnd(pixht, pixht, pixmask);
pixDestroy(&pixmask);
pixmask.destroy();
if (textord_tabfind_show_images && pixa_debug != nullptr) {
pixa_debug->AddPix(pixht, "FinalMask");
}
// Make the result image the same size as the input.
Pix *result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
Image result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
pixOr(result, result, pixht);
pixDestroy(&pixht);
pixht.destroy();
return result;
}
@ -158,7 +158,7 @@ Pix *ImageFind::FindImages(Pix *pix, DebugPixa *pixa_debug) {
// If not nullptr, they must be destroyed by the caller.
// Resolution of pix should match the source image (Tesseract::pix_binary_)
// so the output coordinate systems match.
void ImageFind::ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa **boxa,
void ImageFind::ConnCompAndRectangularize(Image pix, DebugPixa *pixa_debug, Boxa **boxa,
Pixa **pixa) {
*boxa = nullptr;
*pixa = nullptr;
@ -177,15 +177,15 @@ void ImageFind::ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa
}
for (int i = 0; i < npixes; ++i) {
int x_start, x_end, y_start, y_end;
Pix *img_pix = pixaGetPix(*pixa, i, L_CLONE);
Image img_pix = pixaGetPix(*pixa, i, L_CLONE);
if (textord_tabfind_show_images && pixa_debug != nullptr) {
pixa_debug->AddPix(img_pix, "A component");
}
if (pixNearlyRectangular(img_pix, kMinRectangularFraction, kMaxRectangularFraction,
kMaxRectangularGradient, &x_start, &y_start, &x_end, &y_end)) {
Pix *simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
Image simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
pixSetAll(simple_pix);
pixDestroy(&img_pix);
img_pix.destroy();
// pixaReplacePix takes ownership of the simple_pix.
pixaReplacePix(*pixa, i, simple_pix, nullptr);
img_pix = pixaGetPix(*pixa, i, L_CLONE);
@ -195,7 +195,7 @@ void ImageFind::ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa
Box *simple_box = boxCreate(x + x_start, y + y_start, x_end - x_start, y_end - y_start);
boxaReplaceBox(*boxa, i, simple_box);
}
pixDestroy(&img_pix);
img_pix.destroy();
}
}
@ -280,7 +280,7 @@ static bool VScanForEdge(uint32_t *data, int wpl, int y_start, int y_end, int mi
// On return, the rectangle is defined by x_start, y_start, x_end and y_end.
// Note: the algorithm is iterative, allowing it to slice off pixels from
// one edge, allowing it to then slice off more pixels from another edge.
bool ImageFind::pixNearlyRectangular(Pix *pix, double min_fraction, double max_fraction,
bool ImageFind::pixNearlyRectangular(Image pix, double min_fraction, double max_fraction,
double max_skew_gradient, int *x_start, int *y_start,
int *x_end, int *y_end) {
ASSERT_HOST(pix != nullptr);
@ -348,7 +348,7 @@ bool ImageFind::pixNearlyRectangular(Pix *pix, double min_fraction, double max_f
// are shrunk inwards until they bound any black pixels found within the
// original rectangle. Returns false if the rectangle contains no black
// pixels at all.
bool ImageFind::BoundsWithinRect(Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end) {
bool ImageFind::BoundsWithinRect(Image pix, int *x_start, int *y_start, int *x_end, int *y_end) {
Box *input_box = boxCreate(*x_start, *y_start, *x_end - *x_start, *y_end - *y_start);
Box *output_box = nullptr;
pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
@ -427,8 +427,8 @@ uint8_t ImageFind::ClipToByte(double pixel) {
// If color_map1 is not null then it and color_map2 get rect pasted in them
// with the two calculated colors, and rms map gets a pasted rect of the rms.
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
void ImageFind::ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, Pix *color_map1,
Pix *color_map2, Pix *rms_map, uint8_t *color1,
void ImageFind::ComputeRectangleColors(const TBOX &rect, Image pix, int factor, Image color_map1,
Image color_map2, Image rms_map, uint8_t *color1,
uint8_t *color2) {
ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32);
// Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
@ -448,7 +448,7 @@ void ImageFind::ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, P
}
// Now crop the pix to the rectangle.
Box *scaled_box = boxCreate(left_pad, height - top_pad, width_pad, height_pad);
Pix *scaled = pixClipRectangle(pix, scaled_box, nullptr);
Image scaled = pixClipRectangle(pix, scaled_box, nullptr);
// Compute stats over the whole image.
STATS red_stats(0, 256);
@ -538,7 +538,7 @@ void ImageFind::ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, P
ComposeRGB(color2[COLOR_RED], color2[COLOR_GREEN], color2[COLOR_BLUE]));
pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
}
pixDestroy(&scaled);
scaled.destroy();
boxDestroy(&scaled_box);
}
@ -585,7 +585,7 @@ void ImageFind::ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, P
// horizontal. The boxes are rotated by rotation, which should undo such
// rotations, before mapping them onto the pix.
bool ImageFind::BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box,
const FCOORD &rotation, Pix *pix) {
const FCOORD &rotation, Image pix) {
TBOX search_box(box1);
search_box += box2;
if (box1.x_gap(box2) >= box1.y_gap(box2)) {
@ -607,7 +607,7 @@ bool ImageFind::BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TB
// Returns the number of pixels in box in the pix.
// rotation, pix and im_box are defined in the large comment above.
int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation,
Pix *pix) {
Image pix) {
// Intersect it with the image box.
box &= im_box; // This is in-place box intersection.
if (box.null_box()) {
@ -616,12 +616,12 @@ int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOOR
box.rotate(rotation);
TBOX rotated_im_box(im_box);
rotated_im_box.rotate(rotation);
Pix *rect_pix = pixCreate(box.width(), box.height(), 1);
Image rect_pix = pixCreate(box.width(), box.height(), 1);
pixRasterop(rect_pix, 0, 0, box.width(), box.height(), PIX_SRC, pix,
box.left() - rotated_im_box.left(), rotated_im_box.top() - box.top());
l_int32 result;
pixCountPixels(rect_pix, &result, nullptr);
pixDestroy(&rect_pix);
rect_pix.destroy();
return result;
}
@ -630,7 +630,7 @@ int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOOR
// until there is at least one black pixel in the outermost columns.
// rotation, rerotation, pix and im_box are defined in the large comment above.
static void AttemptToShrinkBox(const FCOORD &rotation, const FCOORD &rerotation, const TBOX &im_box,
Pix *pix, TBOX *slice) {
Image pix, TBOX *slice) {
TBOX rotated_box(*slice);
rotated_box.rotate(rerotation);
TBOX rotated_im_box(im_box);
@ -675,7 +675,7 @@ static void AttemptToShrinkBox(const FCOORD &rotation, const FCOORD &rerotation,
// In such cases, the output order may cause strange block polygons.
// rotation, rerotation, pix and im_box are defined in the large comment above.
static void CutChunkFromParts(const TBOX &box, const TBOX &im_box, const FCOORD &rotation,
const FCOORD &rerotation, Pix *pix, ColPartition_LIST *part_list) {
const FCOORD &rerotation, Image pix, ColPartition_LIST *part_list) {
ASSERT_HOST(!part_list->empty());
ColPartition_IT part_it(part_list);
do {
@ -753,7 +753,7 @@ static void CutChunkFromParts(const TBOX &box, const TBOX &im_box, const FCOORD
// from a rectangle.
// rotation, rerotation, pix and im_box are defined in the large comment above.
static void DivideImageIntoParts(const TBOX &im_box, const FCOORD &rotation,
const FCOORD &rerotation, Pix *pix,
const FCOORD &rerotation, Image pix,
ColPartitionGridSearch *rectsearch, ColPartition_LIST *part_list) {
// Add the full im_box partition to the list to begin with.
ColPartition *pix_part =
@ -1204,7 +1204,7 @@ static bool ScanForOverlappingText(ColPartitionGrid *part_grid, TBOX *box) {
// and then deletes them.
// Box coordinates are rotated by rerotate to match the image.
static void MarkAndDeleteImageParts(const FCOORD &rerotate, ColPartitionGrid *part_grid,
ColPartition_LIST *image_parts, Pix *image_pix) {
ColPartition_LIST *image_parts, Image image_pix) {
if (image_pix == nullptr) {
return;
}
@ -1236,7 +1236,7 @@ static void MarkAndDeleteImageParts(const FCOORD &rerotate, ColPartitionGrid *pa
// rerotation specifies how to rotate the partition coords to match
// the image_mask, since this function is used after orientation correction.
void ImageFind::TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid,
Pix *image_mask) {
Image image_mask) {
// Extract the noise parts from the grid and put them on a temporary list.
ColPartition_LIST parts_list;
ColPartition_IT part_it(&parts_list);
@ -1288,7 +1288,7 @@ static void DeleteSmallImages(ColPartitionGrid *part_grid) {
// Since the other blobs in the other partitions will be owned by the block,
// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
// situation and collect the image blobs.
void ImageFind::FindImagePartitions(Pix *image_pix, const FCOORD &rotation,
void ImageFind::FindImagePartitions(Image image_pix, const FCOORD &rotation,
const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid,
DebugPixa *pixa_debug, ColPartitionGrid *part_grid,
ColPartition_LIST *big_parts) {
@ -1304,7 +1304,7 @@ void ImageFind::FindImagePartitions(Pix *image_pix, const FCOORD &rotation,
for (int i = 0; i < nboxes; ++i) {
l_int32 x, y, width, height;
boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
Pix *pix = pixaGetPix(pixa, i, L_CLONE);
Image pix = pixaGetPix(pixa, i, L_CLONE);
TBOX im_box(x, imageheight - y - height, x + width, imageheight - y);
im_box.rotate(rotation); // Now matches all partitions and blobs.
ColPartitionGridSearch rectsearch(part_grid);
@ -1315,7 +1315,7 @@ void ImageFind::FindImagePartitions(Pix *image_pix, const FCOORD &rotation,
pixa_debug->AddPix(pix, "ImageComponent");
tprintf("Component has %d parts\n", part_list.length());
}
pixDestroy(&pix);
pix.destroy();
if (!part_list.empty()) {
ColPartition_IT part_it(&part_list);
if (part_list.singleton()) {

View File

@ -47,7 +47,7 @@ public:
// The returned pix may be nullptr, meaning no images found.
// If not nullptr, it must be PixDestroyed by the caller.
// If textord_tabfind_show_images, debug images are appended to pixa_debug.
static Pix *FindImages(Pix *pix, DebugPixa *pixa_debug);
static Image FindImages(Image pix, DebugPixa *pixa_debug);
// Generates a Boxa, Pixa pair from the input binary (image mask) pix,
// analogous to pixConnComp, except that connected components which are nearly
@ -56,7 +56,7 @@ public:
// If not nullptr, they must be destroyed by the caller.
// Resolution of pix should match the source image (Tesseract::pix_binary_)
// so the output coordinate systems match.
static void ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa **boxa, Pixa **pixa);
static void ConnCompAndRectangularize(Image pix, DebugPixa *pixa_debug, Boxa **boxa, Pixa **pixa);
// Returns true if there is a rectangle in the source pix, such that all
// pixel rows and column slices outside of it have less than
@ -67,7 +67,7 @@ public:
// On return, the rectangle is defined by x_start, y_start, x_end and y_end.
// Note: the algorithm is iterative, allowing it to slice off pixels from
// one edge, allowing it to then slice off more pixels from another edge.
static bool pixNearlyRectangular(Pix *pix, double min_fraction, double max_fraction,
static bool pixNearlyRectangular(Image pix, double min_fraction, double max_fraction,
double max_skew_gradient, int *x_start, int *y_start, int *x_end,
int *y_end);
@ -75,7 +75,7 @@ public:
// are shrunk inwards until they bound any black pixels found within the
// original rectangle. Returns false if the rectangle contains no black
// pixels at all.
static bool BoundsWithinRect(Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end);
static bool BoundsWithinRect(Image pix, int *x_start, int *y_start, int *x_end, int *y_end);
// Given a point in 3-D (RGB) space, returns the squared Euclidean distance
// of the point from the given line, defined by a pair of points in the 3-D
@ -99,8 +99,8 @@ public:
// If color_map1 is not null then it and color_map2 get rect pasted in them
// with the two calculated colors, and rms map gets a pasted rect of the rms.
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
static void ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, Pix *color_map1,
Pix *color_map2, Pix *rms_map, uint8_t *color1,
static void ComputeRectangleColors(const TBOX &rect, Image pix, int factor, Image color_map1,
Image color_map2, Image rms_map, uint8_t *color1,
uint8_t *color2);
// Returns true if there are no black pixels in between the boxes.
@ -109,7 +109,7 @@ public:
// horizontal. The boxes are rotated by rotation, which should undo such
// rotations, before mapping them onto the pix.
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box,
const FCOORD &rotation, Pix *pix);
const FCOORD &rotation, Image pix);
// Returns the number of pixels in box in the pix.
// The im_box must represent the bounding box of the pix in tesseract
@ -117,7 +117,7 @@ public:
// horizontal. The boxes are rotated by rotation, which should undo such
// rotations, before mapping them onto the pix.
static int CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation,
Pix *pix);
Image pix);
// Locates all the image partitions in the part_grid, that were found by a
// previous call to FindImagePartitions, marks them in the image_mask,
@ -127,7 +127,7 @@ public:
// rerotation specifies how to rotate the partition coords to match
// the image_mask, since this function is used after orientation correction.
static void TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid,
Pix *image_mask);
Image image_mask);
// Runs a CC analysis on the image_pix mask image, and creates
// image partitions from them, cutting out strong text, and merging with
@ -139,7 +139,7 @@ public:
// Since the other blobs in the other partitions will be owned by the block,
// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
// situation and collect the image blobs.
static void FindImagePartitions(Pix *image_pix, const FCOORD &rotation, const FCOORD &rerotation,
static void FindImagePartitions(Image image_pix, const FCOORD &rotation, const FCOORD &rerotation,
TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug,
ColPartitionGrid *part_grid, ColPartition_LIST *big_parts);
};

View File

@ -64,7 +64,7 @@ const double kMinMusicPixelFraction = 0.75;
// Erases the unused blobs from the line_pix image, taking into account
// whether this was a horizontal or vertical line set.
static void RemoveUnusedLineSegments(bool horizontal_lines, BLOBNBOX_LIST *line_bblobs,
Pix *line_pix) {
Image line_pix) {
int height = pixGetHeight(line_pix);
BLOBNBOX_IT bbox_it(line_bblobs);
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
@ -94,26 +94,26 @@ static void RemoveUnusedLineSegments(bool horizontal_lines, BLOBNBOX_LIST *line_
// as well by removing components that touch the line, but are not in the
// non_line_pix mask. It is assumed that the non_line_pix mask has already
// been prepared to required accuracy.
static void SubtractLinesAndResidue(Pix *line_pix, Pix *non_line_pix, int resolution,
Pix *src_pix) {
static void SubtractLinesAndResidue(Image line_pix, Image non_line_pix, int resolution,
Image src_pix) {
// First remove the lines themselves.
pixSubtract(src_pix, src_pix, line_pix);
// Subtract the non-lines from the image to get the residue.
Pix *residue_pix = pixSubtract(nullptr, src_pix, non_line_pix);
Image residue_pix = pixSubtract(nullptr, src_pix, non_line_pix);
// Dilate the lines so they touch the residue.
Pix *fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3);
Image fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3);
// Seed fill the fat lines to get all the residue.
pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8);
// Subtract the residue from the original image.
pixSubtract(src_pix, src_pix, fat_line_pix);
pixDestroy(&fat_line_pix);
pixDestroy(&residue_pix);
fat_line_pix.destroy();
residue_pix.destroy();
}
// Returns the maximum strokewidth in the given binary image by doubling
// the maximum of the distance function.
static int MaxStrokeWidth(Pix *pix) {
Pix *dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
static int MaxStrokeWidth(Image pix) {
Image dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
int width = pixGetWidth(dist_pix);
int height = pixGetHeight(dist_pix);
int wpl = pixGetWpl(dist_pix);
@ -129,18 +129,18 @@ static int MaxStrokeWidth(Pix *pix) {
}
data += wpl;
}
pixDestroy(&dist_pix);
dist_pix.destroy();
return max_dist * 2;
}
// Returns the number of components in the intersection_pix touched by line_box.
static int NumTouchingIntersections(Box *line_box, Pix *intersection_pix) {
static int NumTouchingIntersections(Box *line_box, Image intersection_pix) {
if (intersection_pix == nullptr) {
return 0;
}
Pix *rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr);
Image rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr);
Boxa *boxa = pixConnComp(rect_pix, nullptr, 8);
pixDestroy(&rect_pix);
rect_pix.destroy();
if (boxa == nullptr) {
return false;
}
@ -152,7 +152,7 @@ static int NumTouchingIntersections(Box *line_box, Pix *intersection_pix) {
// Returns the number of black pixels found in the box made by adding the line
// width to both sides of the line bounding box. (Increasing the smallest
// dimension of the bounding box.)
static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Pix *nonline_pix) {
static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Image nonline_pix) {
l_int32 x, y, box_width, box_height;
boxGetGeometry(line_box, &x, &y, &box_width, &box_height);
if (box_width > box_height) {
@ -167,11 +167,11 @@ static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Pix *nonline
box_width = right - x;
}
Box *box = boxCreate(x, y, box_width, box_height);
Pix *rect_pix = pixClipRectangle(nonline_pix, box, nullptr);
Image rect_pix = pixClipRectangle(nonline_pix, box, nullptr);
boxDestroy(&box);
l_int32 result;
pixCountPixels(rect_pix, &result, nullptr);
pixDestroy(&rect_pix);
rect_pix.destroy();
return result;
}
@ -184,8 +184,8 @@ static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Pix *nonline
// or Hindi words, or underlines.)
// Bad line components are erased from line_pix.
// Returns the number of remaining connected components.
static int FilterFalsePositives(int resolution, Pix *nonline_pix, Pix *intersection_pix,
Pix *line_pix) {
static int FilterFalsePositives(int resolution, Image nonline_pix, Image intersection_pix,
Image line_pix) {
int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
Pixa *pixa = nullptr;
Boxa *boxa = pixConnComp(line_pix, &pixa, 8);
@ -196,9 +196,9 @@ static int FilterFalsePositives(int resolution, Pix *nonline_pix, Pix *intersect
Box *box = boxaGetBox(boxa, i, L_CLONE);
l_int32 x, y, box_width, box_height;
boxGetGeometry(box, &x, &y, &box_width, &box_height);
Pix *comp_pix = pixaGetPix(pixa, i, L_CLONE);
Image comp_pix = pixaGetPix(pixa, i, L_CLONE);
int max_width = MaxStrokeWidth(comp_pix);
pixDestroy(&comp_pix);
comp_pix.destroy();
bool bad_line = false;
// If the length is too short to stand-alone as a line, and the box width
// is thick enough, and the stroke width is thick enough it is bad.
@ -240,18 +240,18 @@ static int FilterFalsePositives(int resolution, Pix *nonline_pix, Pix *intersect
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
// The detected lines are removed from the pix.
void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *vertical_x,
int *vertical_y, Pix **pix_music_mask, TabVector_LIST *v_lines,
void LineFinder::FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x,
int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines,
TabVector_LIST *h_lines) {
if (pix == nullptr || vertical_x == nullptr || vertical_y == nullptr) {
tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
return;
}
Pix *pix_vline = nullptr;
Pix *pix_non_vline = nullptr;
Pix *pix_hline = nullptr;
Pix *pix_non_hline = nullptr;
Pix *pix_intersections = nullptr;
Image pix_vline = nullptr;
Image pix_non_vline = nullptr;
Image pix_hline = nullptr;
Image pix_non_hline = nullptr;
Image pix_intersections = nullptr;
Pixa *pixa_display = debug ? pixaCreate(0) : nullptr;
GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline, &pix_non_hline,
&pix_intersections, pix_music_mask, pixa_display);
@ -263,10 +263,10 @@ void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *v
if (pix_vline != nullptr) {
pixAnd(pix_intersections, pix_vline, pix_hline);
} else {
pixDestroy(&pix_intersections);
pix_intersections.destroy();
}
if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections, pix_hline)) {
pixDestroy(&pix_hline);
pix_hline.destroy();
}
}
FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y, &pix_hline,
@ -283,11 +283,11 @@ void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *v
pixAnd(pix_intersections, pix_vline, pix_hline);
// Fatten up the intersections and seed-fill to get the intersection
// residue.
Pix *pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5);
Image pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5);
pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
// Now remove the intersection residue.
pixSubtract(pix, pix, pix_join_residue);
pixDestroy(&pix_join_residue);
pix_join_residue.destroy();
}
// Remove any detected music.
if (pix_music_mask != nullptr && *pix_music_mask != nullptr) {
@ -300,11 +300,11 @@ void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *v
pixaAddPix(pixa_display, pix, L_CLONE);
}
pixDestroy(&pix_vline);
pixDestroy(&pix_non_vline);
pixDestroy(&pix_hline);
pixDestroy(&pix_non_hline);
pixDestroy(&pix_intersections);
pix_vline.destroy();
pix_non_vline.destroy();
pix_hline.destroy();
pix_non_hline.destroy();
pix_intersections.destroy();
if (pixa_display != nullptr) {
pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding", "vhlinefinding.pdf");
pixaDestroy(&pixa_display);
@ -359,9 +359,9 @@ void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height, Boxa **bo
// If no good lines are found, pix_vline is destroyed.
// None of the input pointers may be nullptr, and if *pix_vline is nullptr then
// the function does nothing.
void LineFinder::FindAndRemoveVLines(int resolution, Pix *pix_intersections, int *vertical_x,
int *vertical_y, Pix **pix_vline, Pix *pix_non_vline,
Pix *src_pix, TabVector_LIST *vectors) {
void LineFinder::FindAndRemoveVLines(int resolution, Image pix_intersections, int *vertical_x,
int *vertical_y, Image *pix_vline, Image pix_non_vline,
Image src_pix, TabVector_LIST *vectors) {
if (pix_vline == nullptr || *pix_vline == nullptr) {
return;
}
@ -380,7 +380,7 @@ void LineFinder::FindAndRemoveVLines(int resolution, Pix *pix_intersections, int
vertical.set_with_shrink(*vertical_x, *vertical_y);
TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr);
} else {
pixDestroy(pix_vline);
pix_vline->destroy();
}
}
@ -394,9 +394,9 @@ void LineFinder::FindAndRemoveVLines(int resolution, Pix *pix_intersections, int
// If no good lines are found, pix_hline is destroyed.
// None of the input pointers may be nullptr, and if *pix_hline is nullptr then
// the function does nothing.
void LineFinder::FindAndRemoveHLines(int resolution, Pix *pix_intersections, int vertical_x,
int vertical_y, Pix **pix_hline, Pix *pix_non_hline,
Pix *src_pix, TabVector_LIST *vectors) {
void LineFinder::FindAndRemoveHLines(int resolution, Image pix_intersections, int vertical_x,
int vertical_y, Image *pix_hline, Image pix_non_hline,
Image src_pix, TabVector_LIST *vectors) {
if (pix_hline == nullptr || *pix_hline == nullptr) {
return;
}
@ -422,7 +422,7 @@ void LineFinder::FindAndRemoveHLines(int resolution, Pix *pix_intersections, int
h_it.data()->XYFlip();
}
} else {
pixDestroy(pix_hline);
pix_hline->destroy();
}
}
@ -482,14 +482,14 @@ void LineFinder::FindLineVectors(const ICOORD &bleft, const ICOORD &tright,
// is taken to be a bar. Bars are used as a seed and the entire touching
// component is added to the output music mask and subtracted from the lines.
// Returns nullptr and does minimal work if no music is found.
static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pix_hline,
static Image FilterMusic(int resolution, Image pix_closed, Image pix_vline, Image pix_hline,
l_int32 *v_empty, l_int32 *h_empty) {
int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight);
Pix *intersection_pix = pixAnd(nullptr, pix_vline, pix_hline);
Image intersection_pix = pixAnd(nullptr, pix_vline, pix_hline);
Boxa *boxa = pixConnComp(pix_vline, nullptr, 8);
// Iterate over the boxes to find music bars.
int nboxes = boxaGetCount(boxa);
Pix *music_mask = nullptr;
Image music_mask = nullptr;
for (int i = 0; i < nboxes; ++i) {
Box *box = boxaGetBox(boxa, i, L_CLONE);
l_int32 x, y, box_width, box_height;
@ -507,7 +507,7 @@ static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pi
boxDestroy(&box);
}
boxaDestroy(&boxa);
pixDestroy(&intersection_pix);
intersection_pix.destroy();
if (music_mask != nullptr) {
// The mask currently contains just the bars. Use the mask as a seed
// and the pix_closed as the mask for a seedfill to get all the
@ -521,14 +521,14 @@ static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pi
int nboxes = boxaGetCount(boxa);
for (int i = 0; i < nboxes; ++i) {
Box *box = boxaGetBox(boxa, i, L_CLONE);
Pix *rect_pix = pixClipRectangle(music_mask, box, nullptr);
Image rect_pix = pixClipRectangle(music_mask, box, nullptr);
l_int32 music_pixels;
pixCountPixels(rect_pix, &music_pixels, nullptr);
pixDestroy(&rect_pix);
rect_pix.destroy();
rect_pix = pixClipRectangle(pix_closed, box, nullptr);
l_int32 all_pixels;
pixCountPixels(rect_pix, &all_pixels, nullptr);
pixDestroy(&rect_pix);
rect_pix.destroy();
if (music_pixels < kMinMusicPixelFraction * all_pixels) {
// False positive. Delete from the music mask.
pixClearInRect(music_mask, box);
@ -539,7 +539,7 @@ static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pi
boxaDestroy(&boxa);
pixZero(music_mask, &no_remaining_music);
if (no_remaining_music) {
pixDestroy(&music_mask);
music_mask.destroy();
} else {
pixSubtract(pix_vline, pix_vline, music_mask);
pixSubtract(pix_hline, pix_hline, music_mask);
@ -563,11 +563,11 @@ static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pi
// but any of the returns that are empty will be nullptr on output.
// None of the input (1st level) pointers may be nullptr except pix_music_mask,
// which will disable music detection, and pixa_display.
void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix **pix_non_vline,
Pix **pix_hline, Pix **pix_non_hline, Pix **pix_intersections,
Pix **pix_music_mask, Pixa *pixa_display) {
Pix *pix_closed = nullptr;
Pix *pix_hollow = nullptr;
void LineFinder::GetLineMasks(int resolution, Image src_pix, Image *pix_vline, Image *pix_non_vline,
Image *pix_hline, Image *pix_non_hline, Image *pix_intersections,
Image *pix_music_mask, Pixa *pixa_display) {
Image pix_closed = nullptr;
Image pix_hollow = nullptr;
int max_line_width = resolution / kThinLineFraction;
int min_line_length = resolution / kMinLineLengthFraction;
@ -599,13 +599,13 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix
// Open up with a big box to detect solid areas, which can then be
// subtracted. This is very generous and will leave in even quite wide
// lines.
Pix *pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width);
if (pixa_display != nullptr) {
pixaAddPix(pixa_display, pix_solid, L_CLONE);
}
pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
pixDestroy(&pix_solid);
pix_solid.destroy();
// Now open up in both directions independently to find lines of at least
// 1 inch/kMinLineLengthFraction in length.
@ -615,7 +615,7 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix
*pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
*pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
pixDestroy(&pix_hollow);
pix_hollow.destroy();
#ifdef USE_OPENCL
}
#endif
@ -633,10 +633,10 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix
*pix_music_mask = nullptr;
}
}
pixDestroy(&pix_closed);
Pix *pix_nonlines = nullptr;
pix_closed.destroy();
Image pix_nonlines = nullptr;
*pix_intersections = nullptr;
Pix *extra_non_hlines = nullptr;
Image extra_non_hlines = nullptr;
if (!v_empty) {
// Subtract both line candidates from the source to get definite non-lines.
pix_nonlines = pixSubtract(nullptr, src_pix, *pix_vline);
@ -656,18 +656,18 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix
pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections);
}
if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections, *pix_vline)) {
pixDestroy(pix_vline); // No candidates left.
pix_vline->destroy(); // No candidates left.
}
} else {
// No vertical lines.
pixDestroy(pix_vline);
pix_vline->destroy();
*pix_non_vline = nullptr;
if (!h_empty) {
pix_nonlines = pixSubtract(nullptr, src_pix, *pix_hline);
}
}
if (h_empty) {
pixDestroy(pix_hline);
pix_hline->destroy();
*pix_non_hline = nullptr;
if (v_empty) {
return;
@ -677,10 +677,10 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix
pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8);
if (extra_non_hlines != nullptr) {
pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines);
pixDestroy(&extra_non_hlines);
extra_non_hlines.destroy();
}
if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections, *pix_hline)) {
pixDestroy(pix_hline); // No candidates left.
pix_hline->destroy(); // No candidates left.
}
}
if (pixa_display != nullptr) {
@ -706,13 +706,13 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix
pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
}
}
pixDestroy(&pix_nonlines);
pix_nonlines.destroy();
}
// Returns a list of boxes corresponding to the candidate line segments. Sets
// the line_crossings member of the boxes so we can later determine the number
// of intersections touched by a full line.
void LineFinder::GetLineBoxes(bool horizontal_lines, Pix *pix_lines, Pix *pix_intersections,
void LineFinder::GetLineBoxes(bool horizontal_lines, Image pix_lines, Image pix_intersections,
C_BLOB_LIST *line_cblobs, BLOBNBOX_LIST *line_bblobs) {
// Put a single pixel crack in every line at an arbitrary spacing,
// so they break up and the bounding boxes can be used to get the

View File

@ -58,8 +58,8 @@ public:
*
* The detected lines are removed from the pix.
*/
static void FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *vertical_x,
int *vertical_y, Pix **pix_music_mask, TabVector_LIST *v_lines,
static void FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x,
int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines,
TabVector_LIST *h_lines);
/**
@ -83,9 +83,9 @@ private:
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
// If no good lines are found, pix_vline is destroyed.
static void FindAndRemoveVLines(int resolution, Pix *pix_intersections, int *vertical_x,
int *vertical_y, Pix **pix_vline, Pix *pix_non_vline,
Pix *src_pix, TabVector_LIST *vectors);
static void FindAndRemoveVLines(int resolution, Image pix_intersections, int *vertical_x,
int *vertical_y, Image *pix_vline, Image pix_non_vline,
Image src_pix, TabVector_LIST *vectors);
// Finds horizontal line objects in pix_vline and removes them from src_pix.
// Uses the given resolution to determine size thresholds instead of any
@ -95,8 +95,8 @@ private:
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
// If no good lines are found, pix_hline is destroyed.
static void FindAndRemoveHLines(int resolution, Pix *pix_intersections, int vertical_x,
int vertical_y, Pix **pix_hline, Pix *pix_non_hline, Pix *src_pix,
static void FindAndRemoveHLines(int resolution, Image pix_intersections, int vertical_x,
int vertical_y, Image *pix_hline, Image pix_non_hline, Image src_pix,
TabVector_LIST *vectors);
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
@ -121,14 +121,14 @@ private:
// None of the input (1st level) pointers may be nullptr except
// pix_music_mask, which will disable music detection, and pixa_display, which
// is for debug.
static void GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix **pix_non_vline,
Pix **pix_hline, Pix **pix_non_hline, Pix **pix_intersections,
Pix **pix_music_mask, Pixa *pixa_display);
static void GetLineMasks(int resolution, Image src_pix, Image *pix_vline, Image *pix_non_vline,
Image *pix_hline, Image *pix_non_hline, Image *pix_intersections,
Image *pix_music_mask, Pixa *pixa_display);
// Returns a list of boxes corresponding to the candidate line segments. Sets
// the line_crossings member of the boxes so we can later determine the number
// of intersections touched by a full line.
static void GetLineBoxes(bool horizontal_lines, Pix *pix_lines, Pix *pix_intersections,
static void GetLineBoxes(bool horizontal_lines, Image pix_lines, Image pix_intersections,
C_BLOB_LIST *line_cblobs, BLOBNBOX_LIST *line_bblobs);
};

View File

@ -59,7 +59,7 @@ static CRACKEDGE *v_edge(int sign, CRACKEDGE *join, CrackPos *pos);
* Extract edges from a PDBLK.
**********************************************************************/
void block_edges(Pix *t_pix, // thresholded image
void block_edges(Image t_pix, // thresholded image
PDBLK *block, // block in image
C_OUTLINE_IT *outline_it) {
ICOORD bleft; // bounding box

View File

@ -29,7 +29,7 @@ namespace tesseract {
class C_OUTLINE_IT;
class PDBLK;
void block_edges(Pix *t_image, // thresholded image
void block_edges(Image t_image, // thresholded image
PDBLK *block, // block in image
C_OUTLINE_IT *outline_it);

View File

@ -350,7 +350,7 @@ void StrokeWidth::RemoveLineResidue(ColPartition_LIST *big_part_list) {
// Large blobs that cause overlap are put in separate partitions and added
// to the big_parts list.
void StrokeWidth::GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation,
TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm,
TO_BLOCK *block, Image nontext_pix, const DENORM *denorm,
bool cjk_script, TextlineProjection *projection,
BLOBNBOX_LIST *diacritic_blobs,
ColPartitionGrid *part_grid,

View File

@ -113,7 +113,7 @@ public:
// Large blobs that cause overlap are put in separate partitions and added
// to the big_parts list.
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block,
Pix *nontext_pix, const DENORM *denorm, bool cjk_script,
Image nontext_pix, const DENORM *denorm, bool cjk_script,
TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs,
ColPartitionGrid *part_grid, ColPartition_LIST *big_parts);
@ -306,7 +306,7 @@ private:
private:
// Image map of photo/noise areas on the page. Borrowed pointer (not owned.)
Pix *nontext_map_;
Image nontext_map_;
// Textline projection map. Borrowed pointer.
TextlineProjection *projection_;
// DENORM used by projection_ to get back to image coords. Borrowed pointer.

View File

@ -53,7 +53,7 @@ TextlineProjection::TextlineProjection(int resolution) : x_origin_(0), y_origin_
}
}
TextlineProjection::~TextlineProjection() {
pixDestroy(&pix_);
pix_.destroy();
}
// Build the projection profile given the input_block containing lists of
@ -64,8 +64,8 @@ TextlineProjection::~TextlineProjection() {
// The blobs have had their left and right rules set to also limit
// the range of projection.
void TextlineProjection::ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation,
Pix *nontext_map) {
pixDestroy(&pix_);
Image nontext_map) {
pix_.destroy();
TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
x_origin_ = 0;
y_origin_ = image_box.height();
@ -75,9 +75,9 @@ void TextlineProjection::ConstructProjection(TO_BLOCK *input_block, const FCOORD
pix_ = pixCreate(width, height, 8);
ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
Pix *final_pix = pixBlockconv(pix_, 1, 1);
Image final_pix = pixBlockconv(pix_, 1, 1);
// Pix* final_pix = pixBlockconv(pix_, 2, 2);
pixDestroy(&pix_);
pix_.destroy();
pix_ = final_pix;
}
@ -127,7 +127,7 @@ void TextlineProjection::MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs,
void TextlineProjection::DisplayProjection() const {
int width = pixGetWidth(pix_);
int height = pixGetHeight(pix_);
Pix *pixc = pixCreate(width, height, 32);
Image pixc = pixCreate(width, height, 32);
int src_wpl = pixGetWpl(pix_);
int col_wpl = pixGetWpl(pixc);
uint32_t *src_data = pixGetData(pix_);
@ -149,7 +149,7 @@ void TextlineProjection::DisplayProjection() const {
auto *win = new ScrollView("Projection", 0, 0, width, height, width, height);
win->Image(pixc, 0, 0);
win->Update();
pixDestroy(&pixc);
pixc.destroy();
}
#endif // !GRAPHICS_DISABLED
@ -570,7 +570,7 @@ int TextlineProjection::MeanPixelsInLineSegment(const DENORM *denorm, int offset
// The function converts between tesseract coords and the pix coords assuming
// that this pix is full resolution equal in size to the original image.
// Returns an empty box if there are no black pixels in the source box.
static TBOX BoundsWithinBox(Pix *pix, const TBOX &box) {
static TBOX BoundsWithinBox(Image pix, const TBOX &box) {
int im_height = pixGetHeight(pix);
Box *input_box = boxCreate(box.left(), im_height - box.top(), box.width(), box.height());
Box *output_box = nullptr;
@ -593,7 +593,7 @@ static TBOX BoundsWithinBox(Pix *pix, const TBOX &box) {
// and checks for nontext_map pixels in each half. Reduces the bbox so that it
// still includes the middle point, but does not touch any fg pixels in
// nontext_map. An empty box may be returned if there is no such box.
static void TruncateBoxToMissNonText(int x_middle, int y_middle, bool split_on_x, Pix *nontext_map,
static void TruncateBoxToMissNonText(int x_middle, int y_middle, bool split_on_x, Image nontext_map,
TBOX *bbox) {
TBOX box1(*bbox);
TBOX box2(*bbox);
@ -652,7 +652,7 @@ void TextlineProjection::IncrementRectangle8Bit(const TBOX &box) {
// flags, but the spreading is truncated by set pixels in the nontext_map
// and also by the horizontal rule line limits on the blobs.
void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST *blobs, const FCOORD &rotation,
const TBOX &nontext_map_box, Pix *nontext_map) {
const TBOX &nontext_map_box, Image nontext_map) {
BLOBNBOX_IT blob_it(blobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX *blob = blob_it.data();

View File

@ -44,7 +44,7 @@ public:
// The rotation is a multiple of 90 degrees, ie no deskew yet.
// The blobs have had their left and right rules set to also limit
// the range of projection.
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Pix *nontext_map);
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Image nontext_map);
// Display the blobs in the window colored according to textline quality.
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win);
@ -165,7 +165,7 @@ private:
// flags, but the spreading is truncated by set pixels in the nontext_map
// and also by the horizontal rule line limits on the blobs.
void ProjectBlobs(BLOBNBOX_LIST *blobs, const FCOORD &rotation, const TBOX &image_box,
Pix *nontext_map);
Image nontext_map);
// Pads the bounding box of the given blob according to whether it is on
// a horizontal or vertical text line, taking into account tab-stops near
// the blob. Returns true if padding was in the horizontal direction.
@ -192,7 +192,7 @@ private:
// The image of horizontally smeared blob boxes summed to provide a
// textline density map. As with a horizontal projection, the map has
// dips in the gaps between textlines.
Pix *pix_;
Image pix_;
};
} // namespace tesseract.

View File

@ -175,7 +175,7 @@ Textord::Textord(CCStruct *ccstruct)
// Make the textlines and words inside each block.
void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms,
Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
TO_BLOCK_LIST *to_blocks) {
page_tr_.set_x(width);

View File

@ -88,7 +88,7 @@ public:
// diacritic_blobs contain small confusing components that should be added
// to the appropriate word(s) in case they are really diacritics.
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height,
Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms,
Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms,
BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
// If we were supposed to return only a single textline, and there is more
@ -113,7 +113,7 @@ public:
FCOORD rotation // for drawing
);
// tordmain.cpp ///////////////////////////////////////////
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on);
private:

View File

@ -66,17 +66,17 @@ CLISTIZE(WordWithBox)
*
* Set the horizontal and vertical stroke widths in the blob.
**********************************************************************/
void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob) {
void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob) {
// Cut the blob rectangle into a Pix.
int pix_height = pixGetHeight(pix);
const TBOX &box = blob->bounding_box();
int width = box.width();
int height = box.height();
Box *blob_pix_box = boxCreate(box.left(), pix_height - box.top(), width, height);
Pix *pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr);
Image pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr);
boxDestroy(&blob_pix_box);
Pix *dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
pixDestroy(&pix_blob);
Image dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
pix_blob.destroy();
// Compute the stroke widths.
uint32_t *data = pixGetData(dist_pix);
int wpl = pixGetWpl(dist_pix);
@ -129,7 +129,7 @@ void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob) {
pixel = next_pixel;
}
}
pixDestroy(&dist_pix);
dist_pix.destroy();
// Store the horizontal and vertical width in the blob, keeping both
// widths if there is enough information, otherwise only the one with
// the most samples.
@ -160,7 +160,7 @@ void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob) {
* Make a list of TO_BLOCKs for portrait and landscape orientation.
**********************************************************************/
void assign_blobs_to_blocks2(Pix *pix,
void assign_blobs_to_blocks2(Image pix,
BLOCK_LIST *blocks, // blocks to process
TO_BLOCK_LIST *port_blocks) { // output list
BLOCK *block; // current block
@ -211,7 +211,7 @@ void assign_blobs_to_blocks2(Pix *pix,
* grades on different lists in the matching TO_BLOCK in to_blocks.
**********************************************************************/
void Textord::find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) {
void Textord::find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) {
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
if (width > INT16_MAX || height > INT16_MAX) {

View File

@ -32,8 +32,8 @@ namespace tesseract {
class Tesseract;
void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob);
void assign_blobs_to_blocks2(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks);
void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob);
void assign_blobs_to_blocks2(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks);
void tweak_row_baseline(ROW *row, double blshift_maxshift, double blshift_xfraction);

View File

@ -43,7 +43,7 @@ const double kRatingEpsilon = 1.0 / 32;
// with a debug flag and a keep_this argument to find out what is going on.
double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_level,
CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
const std::vector<Pix *> &page_images, SampleIterator *it,
const std::vector<Image > &page_images, SampleIterator *it,
double *unichar_error, double *scaled_error,
std::string *fonts_report) {
const int fontsize = it->sample_set()->NumFonts();
@ -59,7 +59,7 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
for (it->Begin(); !it->AtEnd(); it->Next()) {
TrainingSample *mutable_sample = it->MutableSample();
int page_index = mutable_sample->page_num();
Pix *page_pix =
Image page_pix =
0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;
// No debug, no keep this.
classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results);
@ -108,7 +108,7 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
// and a keep_this argument to find out what is going on.
void ErrorCounter::DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier,
CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
const std::vector<Pix *> &page_images, SampleIterator *it) {
const std::vector<Image > &page_images, SampleIterator *it) {
int fontsize = it->sample_set()->NumFonts();
ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize);
ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize);
@ -121,7 +121,7 @@ void ErrorCounter::DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifi
for (it->Begin(); !it->AtEnd(); it->Next()) {
TrainingSample *mutable_sample = it->MutableSample();
int page_index = mutable_sample->page_num();
Pix *page_pix =
Image page_pix =
0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr;
// No debug, no keep this.
old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,

View File

@ -121,7 +121,7 @@ public:
// * The return value is the un-weighted version of the scaled_error.
static double ComputeErrorRate(ShapeClassifier *classifier, int report_level,
CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
const std::vector<Pix *> &page_images, SampleIterator *it,
const std::vector<Image > &page_images, SampleIterator *it,
double *unichar_error, double *scaled_error, std::string *fonts_report);
// Tests a pair of classifiers, debugging errors of the new against the old.
// See errorcounter.h for description of arguments.
@ -131,7 +131,7 @@ public:
// with a debug flag and a keep_this argument to find out what is going on.
static void DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier,
CountTypes boosting_mode, const FontInfoTable &fontinfo_table,
const std::vector<Pix *> &page_images, SampleIterator *it);
const std::vector<Image > &page_images, SampleIterator *it);
private:
// Simple struct to hold an array of counts.

View File

@ -63,7 +63,7 @@ MasterTrainer::MasterTrainer(NormalizationMode norm_mode, bool shape_analysis,
MasterTrainer::~MasterTrainer() {
delete[] fragments_;
for (auto &page_image : page_images_) {
pixDestroy(&page_image);
page_image.destroy();
}
}
@ -219,7 +219,7 @@ void MasterTrainer::AddSample(bool verification, const char *unichar, TrainingSa
void MasterTrainer::LoadPageImages(const char *filename) {
size_t offset = 0;
int page;
Pix *pix;
Image pix;
for (page = 0;; page++) {
pix = pixReadFromMultipageTiff(filename, &offset);
if (!pix) {

View File

@ -284,7 +284,7 @@ private:
// Vector of Pix pointers used for classifiers that need the image.
// Indexed by page_num_ in the samples.
// These images are owned by the trainer and need to be pixDestroyed.
std::vector<Pix *> page_images_;
std::vector<Image > page_images_;
// Vector of filenames of loaded tr files.
std::vector<std::string> tr_filenames_;
};

View File

@ -86,9 +86,9 @@ const int kMinRampSize = 1000;
// the edges.
// Finally a greyscale ramp provides a continuum of effects between exposure
// levels.
Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation) {
Pix *pix = pixConvertTo8(input, false);
pixDestroy(&input);
Image DegradeImage(Image input, int exposure, TRand *randomizer, float *rotation) {
Image pix = pixConvertTo8(input, false);
input.destroy();
input = pix;
int width = pixGetWidth(input);
int height = pixGetHeight(input);
@ -99,12 +99,12 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
// see http://www.leptonica.com/grayscale-morphology.html
pix = input;
input = pixErodeGray(pix, 3, 3);
pixDestroy(&pix);
pix.destroy();
}
// A convolution is essential to any mode as no scanner produces an
// image as sharp as the electronic image.
pix = pixBlockconv(input, 1, 1);
pixDestroy(&input);
input.destroy();
// A small random rotation helps to make the edges jaggy in a realistic way.
if (rotation != nullptr) {
float radians_clockwise = 0.0f;
@ -117,7 +117,7 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
input = pixRotate(pix, radians_clockwise, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, 0, 0);
// Rotate the boxes to match.
*rotation = radians_clockwise;
pixDestroy(&pix);
pix.destroy();
} else {
input = pix;
}
@ -129,7 +129,7 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
// see http://www.leptonica.com/grayscale-morphology.html
pix = input;
input = pixErodeGray(pix, 3, 3);
pixDestroy(&pix);
pix.destroy();
}
// The convolution really needed to be 2x2 to be realistic enough, but
// we only have 3x3, so we have to bias the image darker or lose thin
@ -176,27 +176,27 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
// any spatial distortion and also by the integer reduction factor box_scale
// so they will match what the network will output.
// Returns nullptr on error. The returned Pix must be pixDestroyed.
Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise,
Image PrepareDistortedPix(const Image pix, bool perspective, bool invert, bool white_noise,
bool smooth_noise, bool blur, int box_reduction, TRand *randomizer,
std::vector<TBOX> *boxes) {
Pix *distorted = pixCopy(nullptr, const_cast<Pix *>(pix));
Image distorted = pixCopy(nullptr, pix);
// Things to do to synthetic training data.
if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
// TODO(rays) Cook noise in a more thread-safe manner than rand().
// Attempt to make the sequences reproducible.
srand(randomizer->IntRand());
Pix *pixn = pixAddGaussianNoise(distorted, 8.0);
pixDestroy(&distorted);
Image pixn = pixAddGaussianNoise(distorted, 8.0);
distorted.destroy();
if (smooth_noise) {
distorted = pixBlockconv(pixn, 1, 1);
pixDestroy(&pixn);
pixn.destroy();
} else {
distorted = pixn;
}
}
if (blur && randomizer->SignedRand(1.0) > 0.0) {
Pix *blurred = pixBlockconv(distorted, 1, 1);
pixDestroy(&distorted);
Image blurred = pixBlockconv(distorted, 1, 1);
distorted.destroy();
distorted = blurred;
}
if (perspective) {
@ -219,7 +219,7 @@ Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool whi
// Distorts anything that has a non-null pointer with the same pseudo-random
// perspective distortion. Width and height only need to be set if there
// is no pix. If there is a pix, then they will be taken from there.
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix,
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Image *pix,
std::vector<TBOX> *boxes) {
if (pix != nullptr && *pix != nullptr) {
width = pixGetWidth(*pix);
@ -230,12 +230,12 @@ void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix
l_int32 incolor = ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
if (pix != nullptr && *pix != nullptr) {
// Transform the image.
Pix *transformed = pixProjective(*pix, im_coeffs, incolor);
Image transformed = pixProjective(*pix, im_coeffs, incolor);
if (transformed == nullptr) {
tprintf("Projective transformation failed!!\n");
return;
}
pixDestroy(pix);
pix->destroy();
*pix = transformed;
}
if (boxes != nullptr) {

View File

@ -30,20 +30,20 @@ namespace tesseract {
// If rotation is not nullptr, the clockwise rotation in radians is saved there.
// The input pix must be 8 bit grey. (Binary with values 0 and 255 is OK.)
// The input image is destroyed and a different image returned.
struct Pix *DegradeImage(struct Pix *input, int exposure, TRand *randomizer, float *rotation);
struct Image DegradeImage(struct Image input, int exposure, TRand *randomizer, float *rotation);
// Creates and returns a Pix distorted by various means according to the bool
// flags. If boxes is not nullptr, the boxes are resized/positioned according to
// any spatial distortion and also by the integer reduction factor box_scale
// so they will match what the network will output.
// Returns nullptr on error. The returned Pix must be pixDestroyed.
Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise,
Image PrepareDistortedPix(const Image pix, bool perspective, bool invert, bool white_noise,
bool smooth_noise, bool blur, int box_reduction, TRand *randomizer,
std::vector<TBOX> *boxes);
// Distorts anything that has a non-null pointer with the same pseudo-random
// perspective distortion. Width and height only need to be set if there
// is no pix. If there is a pix, then they will be taken from there.
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix,
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Image *pix,
std::vector<TBOX> *boxes);
// Computes the coefficients of a randomized projective transformation.
// The image transform requires backward transformation coefficient, and the

View File

@ -74,14 +74,14 @@ static bool RandBool(const double prob, TRand *rand) {
}
/* static */
static Pix *CairoARGB32ToPixFormat(cairo_surface_t *surface) {
static Image CairoARGB32ToPixFormat(cairo_surface_t *surface) {
if (cairo_image_surface_get_format(surface) != CAIRO_FORMAT_ARGB32) {
printf("Unexpected surface format %d\n", cairo_image_surface_get_format(surface));
return nullptr;
}
const int width = cairo_image_surface_get_width(surface);
const int height = cairo_image_surface_get_height(surface);
Pix *pix = pixCreate(width, height, 32);
Image pix = pixCreate(width, height, 32);
int byte_stride = cairo_image_surface_get_stride(surface);
for (int i = 0; i < height; ++i) {
@ -636,25 +636,25 @@ int StringRenderer::StripUnrenderableWords(std::string *utf8_text) const {
return num_dropped;
}
int StringRenderer::RenderToGrayscaleImage(const char *text, int text_length, Pix **pix) {
Pix *orig_pix = nullptr;
int StringRenderer::RenderToGrayscaleImage(const char *text, int text_length, Image *pix) {
Image orig_pix = nullptr;
int offset = RenderToImage(text, text_length, &orig_pix);
if (orig_pix) {
*pix = pixConvertTo8(orig_pix, false);
pixDestroy(&orig_pix);
orig_pix.destroy();
}
return offset;
}
int StringRenderer::RenderToBinaryImage(const char *text, int text_length, int threshold,
Pix **pix) {
Pix *orig_pix = nullptr;
Image *pix) {
Image orig_pix = nullptr;
int offset = RenderToImage(text, text_length, &orig_pix);
if (orig_pix) {
Pix *gray_pix = pixConvertTo8(orig_pix, false);
pixDestroy(&orig_pix);
Image gray_pix = pixConvertTo8(orig_pix, false);
orig_pix.destroy();
*pix = pixThresholdToBinary(gray_pix, threshold);
pixDestroy(&gray_pix);
gray_pix.destroy();
} else {
*pix = orig_pix;
}
@ -719,9 +719,9 @@ std::string StringRenderer::ConvertFullwidthLatinToBasicLatin(const std::string
}
// Returns offset to end of text substring rendered in this method.
int StringRenderer::RenderToImage(const char *text, int text_length, Pix **pix) {
int StringRenderer::RenderToImage(const char *text, int text_length, Image *pix) {
if (pix && *pix) {
pixDestroy(pix);
pix->destroy();
}
InitPangoCairo();
@ -813,7 +813,7 @@ int StringRenderer::RenderToImage(const char *text, int text_length, Pix **pix)
//
// int offset = 0;
// do {
// Pix *pix;
// Image pix;
// offset += renderer.RenderAllFontsToImage(min_proportion, txt + offset,
// strlen(txt + offset), nullptr,
// &pix);
@ -821,7 +821,7 @@ int StringRenderer::RenderToImage(const char *text, int text_length, Pix **pix)
// } while (offset < strlen(text));
//
int StringRenderer::RenderAllFontsToImage(double min_coverage, const char *text, int text_length,
std::string *font_used, Pix **image) {
std::string *font_used, Image *image) {
*image = nullptr;
// Select a suitable font to render the title with.
const char kTitleTemplate[] = "%s : %d hits = %.2f%%, raw = %d = %.2f%%";
@ -873,10 +873,10 @@ int StringRenderer::RenderAllFontsToImage(double min_coverage, const char *text,
// Add the font to the image.
set_font(title_font);
v_margin_ /= 8;
Pix *title_image = nullptr;
Image title_image = nullptr;
RenderToBinaryImage(title, strlen(title), 128, &title_image);
pixOr(*image, *image, title_image);
pixDestroy(&title_image);
title_image.destroy();
v_margin_ *= 8;
set_font(orig_font);

View File

@ -34,6 +34,8 @@
#include "pango/pangocairo.h"
#include "pango_font_info.h"
#include "image.h"
#include <string>
#include <unordered_map>
#include <vector>
@ -53,14 +55,14 @@ public:
// Renders the text with the chosen font and returns the byte offset up to
// which the text could be rendered so as to fit the specified page
// dimensions.
int RenderToImage(const char *text, int text_length, Pix **pix);
int RenderToGrayscaleImage(const char *text, int text_length, Pix **pix);
int RenderToBinaryImage(const char *text, int text_length, int threshold, Pix **pix);
int RenderToImage(const char *text, int text_length, Image *pix);
int RenderToGrayscaleImage(const char *text, int text_length, Image *pix);
int RenderToBinaryImage(const char *text, int text_length, int threshold, Image *pix);
// Renders a line of text with all available fonts that were able to render
// at least min_coverage fraction of the input text. Use 1.0 to require that
// a font be able to render all the text.
int RenderAllFontsToImage(double min_coverage, const char *text, int text_length,
std::string *font_used, Pix **pix);
std::string *font_used, Image *pix);
bool set_font(const std::string &desc);
// Char spacing is in PIXELS!!!!.

View File

@ -331,7 +331,7 @@ static void ExtractFontProperties(const std::string &utf8_text, StringRenderer *
File::WriteStringToFileOrDie(output_string, output_base + ".fontinfo");
}
static bool MakeIndividualGlyphs(Pix *pix, const std::vector<BoxChar *> &vbox,
static bool MakeIndividualGlyphs(Image pix, const std::vector<BoxChar *> &vbox,
const int input_tiff_page) {
// If checks fail, return false without exiting text2image
if (!pix) {
@ -383,26 +383,26 @@ static bool MakeIndividualGlyphs(Pix *pix, const std::vector<BoxChar *> &vbox,
continue;
}
// Crop the boxed character
Pix *pix_glyph = pixClipRectangle(pix, b, nullptr);
Image pix_glyph = pixClipRectangle(pix, b, nullptr);
if (!pix_glyph) {
tprintf("ERROR: MakeIndividualGlyphs(): Failed to clip, at i=%d\n", i);
continue;
}
// Resize to square
Pix *pix_glyph_sq =
Image pix_glyph_sq =
pixScaleToSize(pix_glyph, FLAGS_glyph_resized_size, FLAGS_glyph_resized_size);
if (!pix_glyph_sq) {
tprintf("ERROR: MakeIndividualGlyphs(): Failed to resize, at i=%d\n", i);
continue;
}
// Zero-pad
Pix *pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq, FLAGS_glyph_num_border_pixels_to_pad, 0);
Image pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq, FLAGS_glyph_num_border_pixels_to_pad, 0);
if (!pix_glyph_sq_pad) {
tprintf("ERROR: MakeIndividualGlyphs(): Failed to zero-pad, at i=%d\n", i);
continue;
}
// Write out
Pix *pix_glyph_sq_pad_8 = pixConvertTo8(pix_glyph_sq_pad, false);
Image pix_glyph_sq_pad_8 = pixConvertTo8(pix_glyph_sq_pad, false);
char filename[1024];
snprintf(filename, 1024, "%s_%d.jpg", FLAGS_outputbase.c_str(), glyph_count++);
if (pixWriteJpeg(filename, pix_glyph_sq_pad_8, 100, 0)) {
@ -413,10 +413,10 @@ static bool MakeIndividualGlyphs(Pix *pix, const std::vector<BoxChar *> &vbox,
continue;
}
pixDestroy(&pix_glyph);
pixDestroy(&pix_glyph_sq);
pixDestroy(&pix_glyph_sq_pad);
pixDestroy(&pix_glyph_sq_pad_8);
pix_glyph.destroy();
pix_glyph_sq.destroy();
pix_glyph_sq_pad.destroy();
pix_glyph_sq_pad_8.destroy();
n_boxes_saved++;
y_previous = y;
}
@ -625,7 +625,7 @@ static int Main() {
offset < strlen(to_render_utf8) && (FLAGS_max_pages == 0 || page_num < FLAGS_max_pages);
++im, ++page_num) {
tlog(1, "Starting page %d\n", im);
Pix *pix = nullptr;
Image pix = nullptr;
if (FLAGS_find_fonts) {
offset += render.RenderAllFontsToImage(FLAGS_min_coverage, to_render_utf8 + offset,
strlen(to_render_utf8 + offset), &font_used, &pix);
@ -655,10 +655,10 @@ static int Main() {
page_rotation.push_back(rotation);
}
Pix *gray_pix = pixConvertTo8(pix, false);
pixDestroy(&pix);
Pix *binary = pixThresholdToBinary(gray_pix, 128);
pixDestroy(&gray_pix);
Image gray_pix = pixConvertTo8(pix, false);
pix.destroy();
Image binary = pixThresholdToBinary(gray_pix, 128);
gray_pix.destroy();
char tiff_name[1024];
if (FLAGS_find_fonts) {
if (FLAGS_render_per_font) {
@ -681,7 +681,7 @@ static int Main() {
tprintf("ERROR: Individual glyphs not saved\n");
}
}
pixDestroy(&binary);
binary.destroy();
}
if (FLAGS_find_fonts && offset != 0) {
// We just want a list of names, or some sample images so we don't need

View File

@ -784,7 +784,7 @@ void ScrollView::ZoomToRectangle(int x1, int y1, int x2, int y2) {
}
// Send an image of type Pix.
void ScrollView::Image(struct Pix *image, int x_pos, int y_pos) {
void ScrollView::Image(struct Image image, int x_pos, int y_pos) {
l_uint8 *data;
size_t size;
pixWriteMem(&data, &size, image, IFF_PNG);

View File

@ -31,6 +31,8 @@
#ifndef TESSERACT_VIEWER_SCROLLVIEW_H_
#define TESSERACT_VIEWER_SCROLLVIEW_H_
#include "image.h"
#include <tesseract/export.h>
#include <cstdio>
@ -209,7 +211,7 @@ public:
*******************************************************************************/
// Draw a Pix on (x,y).
void Image(Pix *image, int x_pos, int y_pos);
void Image(Image image, int x_pos, int y_pos);
// Flush buffers and update display.
static void Update();
@ -353,11 +355,11 @@ public:
private:
// Transfers a binary Image.
void TransferBinaryImage(struct Pix *image);
void TransferBinaryImage(struct Image image);
// Transfers a gray scale Image.
void TransferGrayImage(struct Pix *image);
void TransferGrayImage(struct Image image);
// Transfers a 32-Bit Image.
void Transfer32bppImage(struct Pix *image);
void Transfer32bppImage(struct Image image);
// Sets up ScrollView, depending on the variables from the constructor.
void Initialize(const char *name, int x_pos, int y_pos, int x_size, int y_size, int x_canvas_size,

View File

@ -31,6 +31,7 @@
#include <memory> // std::unique_ptr
#include <string>
#include "include_gunit.h"
#include "image.h"
namespace tesseract {
@ -66,7 +67,7 @@ void OCRTester(const char *imgname, const char *groundtruth, const char *tessdat
std::string gtText((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
auto api = std::make_unique<tesseract::TessBaseAPI>();
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
Pix *image = pixRead(imgname);
Image image = pixRead(imgname);
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
api->SetImage(image);
outText = api->GetUTF8Text();
@ -74,7 +75,7 @@ void OCRTester(const char *imgname, const char *groundtruth, const char *tessdat
<< ::testing::PrintToString(lang);
api->End();
delete[] outText;
pixDestroy(&image);
image.destroy();
}
class MatchGroundTruth : public QuickTest, public ::testing::WithParamInterface<const char *> {};

View File

@ -37,12 +37,12 @@ protected:
src_pix_ = nullptr;
}
~ApplyBoxTest() override {
pixDestroy(&src_pix_);
src_pix_.destroy();
}
bool SetImage(const char *filename) {
bool found = false;
pixDestroy(&src_pix_);
src_pix_.destroy();
src_pix_ = pixRead(TestDataNameToPath(filename).c_str());
if (api_.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
api_.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
@ -101,7 +101,7 @@ protected:
delete it;
}
Pix *src_pix_;
Image src_pix_;
std::string ocr_text_;
tesseract::TessBaseAPI api_;
};

View File

@ -44,7 +44,7 @@ class FriendlyTessBaseAPI : public tesseract::TessBaseAPI {
FRIEND_TEST(TesseractTest, LSTMGeometryTest);
};
std::string GetCleanedTextResult(tesseract::TessBaseAPI *tess, Pix *pix) {
std::string GetCleanedTextResult(tesseract::TessBaseAPI *tess, Image pix) {
tess->SetImage(pix);
char *result = tess->GetUTF8Text();
std::string ocr_result = result;
@ -70,14 +70,14 @@ TEST_F(TesseractTest, BasicTesseractTest) {
std::string truth_text;
std::string ocr_text;
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
Pix *src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
Image src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
CHECK(src_pix);
ocr_text = GetCleanedTextResult(&api, src_pix);
CHECK_OK(
file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
absl::StripAsciiWhitespace(&truth_text);
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
pixDestroy(&src_pix);
src_pix.destroy();
} else {
// eng.traineddata not found.
GTEST_SKIP();
@ -105,7 +105,7 @@ TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) {
EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes));
boxaDestroy(&block_boxes);
boxaDestroy(&para_boxes);
pixDestroy(&src_pix);
src_pix.destroy();
#endif
} else {
// eng.traineddata not found.
@ -122,7 +122,7 @@ TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
GTEST_SKIP();
return;
}
Pix *src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
CHECK(src_pix);
api.SetImage(src_pix);
char *result = api.GetHOCRText(0);
@ -130,7 +130,7 @@ TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
EXPECT_THAT(result, HasSubstr("Hello"));
EXPECT_THAT(result, HasSubstr("<div class='ocr_page'"));
delete[] result;
pixDestroy(&src_pix);
src_pix.destroy();
}
// hOCR output should contain baseline info for upright textlines.
@ -141,7 +141,7 @@ TEST_F(TesseractTest, HOCRContainsBaseline) {
GTEST_SKIP();
return;
}
Pix *src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
CHECK(src_pix);
api.SetInputName("HelloGoogle.tif");
api.SetImage(src_pix);
@ -152,7 +152,7 @@ TEST_F(TesseractTest, HOCRContainsBaseline) {
result, std::regex{"<span class='ocr_line'[^>]* baseline [-.0-9]+ [-.0-9]+"}));
delete[] result;
pixDestroy(&src_pix);
src_pix.destroy();
}
// Tests that Tesseract gets exactly the right answer on some page numbers.
@ -182,23 +182,23 @@ TEST_F(TesseractTest, AdaptToWordStrTest) {
// Train on the training text.
for (int i = 0; kTrainingPages[i] != nullptr; ++i) {
std::string image_file = TestDataNameToPath(kTrainingPages[i]);
Pix *src_pix = pixRead(image_file.c_str());
Image src_pix = pixRead(image_file.c_str());
CHECK(src_pix);
api.SetImage(src_pix);
EXPECT_TRUE(api.AdaptToWordStr(tesseract::PSM_SINGLE_WORD, kTrainingText[i]))
<< "Failed to adapt to text \"" << kTrainingText[i] << "\" on image " << image_file;
pixDestroy(&src_pix);
src_pix.destroy();
}
// Test the test text.
api.SetVariable("tess_bn_matching", "1");
api.SetPageSegMode(tesseract::PSM_SINGLE_WORD);
for (int i = 0; kTestPages[i] != nullptr; ++i) {
Pix *src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
Image src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
CHECK(src_pix);
ocr_text = GetCleanedTextResult(&api, src_pix);
absl::StripAsciiWhitespace(&truth_text);
EXPECT_STREQ(kTestText[i], ocr_text.c_str());
pixDestroy(&src_pix);
src_pix.destroy();
}
#endif
}
@ -213,14 +213,14 @@ TEST_F(TesseractTest, BasicLSTMTest) {
GTEST_SKIP();
return;
}
Pix *src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
Image src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
CHECK(src_pix);
ocr_text = GetCleanedTextResult(&api, src_pix);
CHECK_OK(
file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
absl::StripAsciiWhitespace(&truth_text);
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
pixDestroy(&src_pix);
src_pix.destroy();
}
// Test that LSTM's character bounding boxes are properly converted to
@ -230,7 +230,7 @@ TEST_F(TesseractTest, BasicLSTMTest) {
// errors due to float/int conversions (e.g., see OUTLINE::move() in
// ccstruct/poutline.h) Instead, we do a loose check.
TEST_F(TesseractTest, LSTMGeometryTest) {
Pix *src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
Image src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
FriendlyTessBaseAPI api;
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
// eng.traineddata not found.
@ -270,7 +270,7 @@ TEST_F(TesseractTest, LSTMGeometryTest) {
EXPECT_LT(lstm_blob_box.top() - tess_blob_box.top(), 5);
}
}
pixDestroy(&src_pix);
src_pix.destroy();
}
TEST_F(TesseractTest, InitConfigOnlyTest) {
@ -315,7 +315,7 @@ TEST(TesseractInstanceTest, TestMultipleTessInstances) {
const std::string kTessdataPath = TESSDATA_DIR;
// Preload images and verify that OCR is correct on them individually.
std::vector<Pix *> pix(num_langs);
std::vector<Image > pix(num_langs);
for (int i = 0; i < num_langs; ++i) {
SCOPED_TRACE(absl::StrCat("Single instance test with lang = ", langs[i]));
std::string path = file::JoinPath(TESTING_DIR, image_files[i]);
@ -346,7 +346,7 @@ TEST(TesseractInstanceTest, TestMultipleTessInstances) {
}
for (int i = 0; i < num_langs; ++i) {
pixDestroy(&pix[i]);
pix[i].destroy();
}
}

View File

@ -32,6 +32,7 @@
#include "commandlineflags.h"
#include "include_gunit.h"
#include "log.h"
#include "image.h"
// Run with Tesseract instances.
BOOL_PARAM_FLAG(test_tesseract, true, "Test tesseract instances");
@ -97,7 +98,7 @@ protected:
const int n = num_langs_ * FLAGS_reps;
for (int i = 0; i < n; ++i) {
std::string path = TESTING_DIR "/" + image_files[i % num_langs_];
Pix *new_pix = pixRead(path.c_str());
Image new_pix = pixRead(path.c_str());
QCHECK(new_pix != nullptr) << "Could not read " << path;
pix_.push_back(new_pix);
}
@ -110,7 +111,7 @@ protected:
static void TearDownTestCase() {
for (auto &pix : pix_) {
pixDestroy(&pix);
pix.destroy();
}
}
@ -127,7 +128,7 @@ protected:
std::unique_ptr<tensorflow::thread::ThreadPool> pool_;
static int pool_size_;
#endif
static std::vector<Pix *> pix_;
static std::vector<Image > pix_;
static std::vector<std::string> langs_;
static std::vector<std::string> gt_text_;
static int num_langs_;
@ -137,7 +138,7 @@ protected:
#ifdef INCLUDE_TENSORFLOW
int BaseapiThreadTest::pool_size_;
#endif
std::vector<Pix *> BaseapiThreadTest::pix_;
std::vector<Image > BaseapiThreadTest::pix_;
std::vector<std::string> BaseapiThreadTest::langs_;
std::vector<std::string> BaseapiThreadTest::gt_text_;
int BaseapiThreadTest::num_langs_;
@ -147,7 +148,7 @@ static void InitTessInstance(TessBaseAPI *tess, const std::string &lang) {
EXPECT_EQ(0, tess->Init(TESSDATA_DIR, lang.c_str()));
}
static void GetCleanedText(TessBaseAPI *tess, Pix *pix, std::string *ocr_text) {
static void GetCleanedText(TessBaseAPI *tess, Image pix, std::string *ocr_text) {
tess->SetImage(pix);
char *result = tess->GetUTF8Text();
*ocr_text = result;
@ -155,7 +156,7 @@ static void GetCleanedText(TessBaseAPI *tess, Pix *pix, std::string *ocr_text) {
absl::StripAsciiWhitespace(ocr_text);
}
static void VerifyTextResult(TessBaseAPI *tess, Pix *pix, const std::string &lang,
static void VerifyTextResult(TessBaseAPI *tess, Image pix, const std::string &lang,
const std::string &expected_text) {
TessBaseAPI *tess_local = nullptr;
if (tess) {

View File

@ -61,7 +61,7 @@ public:
}
// Set up pix_binary for lang_tesseract_.
void SetPixBinary(Pix *pix) {
void SetPixBinary(Image pix) {
CHECK_EQ(1, pixGetDepth(pix));
*(lang_tesseract_->mutable_pix_binary()) = pix;
}
@ -137,7 +137,7 @@ protected:
}
// Add a BLOCK covering the whole page.
void AddPageBlock(Pix *pix, BLOCK_LIST *blocks) {
void AddPageBlock(Image pix, BLOCK_LIST *blocks) {
CHECK(pix != nullptr);
CHECK(blocks != nullptr);
BLOCK_IT block_it(blocks);
@ -183,7 +183,7 @@ TEST_F(EquationFinderTest, IdentifySpecialText) {
#else // TODO: missing equ_gt1.tif
// Load Image.
std::string imagefile = file::JoinPath(testdata_dir_, "equ_gt1.tif");
Pix *pix_binary = pixRead(imagefile.c_str());
Image pix_binary = pixRead(imagefile.c_str());
CHECK(pix_binary != nullptr && pixGetDepth(pix_binary) == 1);
// Get components.
@ -224,7 +224,7 @@ TEST_F(EquationFinderTest, IdentifySpecialText) {
EXPECT_LE(10 - kCountRange, stt_count[BSTT_UNCLEAR]);
// Release memory.
pixDestroy(&pix_binary);
pix_binary.destroy();
#endif
}
@ -364,7 +364,7 @@ TEST_F(EquationFinderTest, CheckSeedBlobsCount) {
TEST_F(EquationFinderTest, ComputeForegroundDensity) {
// Create the pix with top half foreground, bottom half background.
int width = 1024, height = 768;
Pix *pix = pixCreate(width, height, 1);
Image pix = pixCreate(width, height, 1);
pixRasterop(pix, 0, 0, width, height / 2, PIX_SET, nullptr, 0, 0);
TBOX box1(100, 0, 140, 140), box2(100, height / 2 - 20, 140, height / 2 + 20),
box3(100, height - 40, 140, height);
@ -402,7 +402,7 @@ TEST_F(EquationFinderTest, CountAlignment) {
}
TEST_F(EquationFinderTest, ComputeCPsSuperBBox) {
Pix *pix = pixCreate(1001, 1001, 1);
Image pix = pixCreate(1001, 1001, 1);
equation_det_->SetPixBinary(pix);
ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));

View File

@ -68,7 +68,7 @@ extern "C" int LLVMFuzzerInitialize(int * /*pArgc*/, char ***pArgv) {
}
static PIX *createPix(BitReader &BR, const size_t width, const size_t height) {
Pix *pix = pixCreate(width, height, 1);
Image pix = pixCreate(width, height, 1);
if (pix == nullptr) {
printf("pix creation failed\n");
@ -93,7 +93,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
char *outText = api->GetUTF8Text();
pixDestroy(&pix);
pix.destroy();
delete[] outText;
return 0;

View File

@ -65,11 +65,11 @@ protected:
src_pix_ = nullptr;
}
~LayoutTest() override {
pixDestroy(&src_pix_);
src_pix_.destroy();
}
void SetImage(const char *filename, const char *lang) {
pixDestroy(&src_pix_);
src_pix_.destroy();
src_pix_ = pixRead(TestDataNameToPath(filename).c_str());
api_.Init(TessdataPath().c_str(), lang, tesseract::OEM_TESSERACT_ONLY);
api_.SetPageSegMode(tesseract::PSM_AUTO);
@ -182,7 +182,7 @@ protected:
} while (it->Next(tesseract::RIL_BLOCK));
}
Pix *src_pix_;
Image src_pix_;
std::string ocr_text_;
tesseract::TessBaseAPI api_;
};

View File

@ -87,7 +87,7 @@ TEST_F(LSTMTrainerTest, ConvertModel) {
// baseapi_test.cc).
TessBaseAPI api;
api.Init(FLAGS_test_tmpdir, "deu", tesseract::OEM_LSTM_ONLY);
Pix *src_pix = pixRead(TestingNameToPath("phototest.tif").c_str());
Image src_pix = pixRead(TestingNameToPath("phototest.tif").c_str());
CHECK(src_pix);
api.SetImage(src_pix);
std::unique_ptr<char[]> result(api.GetUTF8Text());
@ -96,7 +96,7 @@ TEST_F(LSTMTrainerTest, ConvertModel) {
file::GetContents(TestingNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
EXPECT_STREQ(truth_text.c_str(), result.get());
pixDestroy(&src_pix);
src_pix.destroy();
}
} // namespace tesseract

View File

@ -81,7 +81,7 @@ public:
// If keep_this (a shape index) is >= 0, then the results should always
// contain keep_this, and (if possible) anything of intermediate confidence.
// The return value is the number of classes saved in results.
int ClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this,
int ClassifySample(const TrainingSample &sample, Image page_pix, int debug, UNICHAR_ID keep_this,
std::vector<ShapeRating> *results) override {
results->clear();
// Everything except the first kNumNonReject is a reject.

View File

@ -25,6 +25,7 @@
#include <memory> // std::unique_ptr
#include <string>
#include "include_gunit.h"
#include "image.h"
namespace tesseract {
@ -37,7 +38,7 @@ static void OSDTester(int expected_deg, const char *imgname, const char *tessdat
// log.info() << tessdatadir << " for image: " << imgname << std::endl;
auto api = std::make_unique<tesseract::TessBaseAPI>();
ASSERT_FALSE(api->Init(tessdatadir, "osd")) << "Could not initialize tesseract.";
Pix *image = pixRead(imgname);
Image image = pixRead(imgname);
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
api->SetImage(image);
int orient_deg;
@ -53,7 +54,7 @@ static void OSDTester(int expected_deg, const char *imgname, const char *tessdat
orient_deg, orient_conf, script_name, script_conf);
EXPECT_EQ(expected_deg, orient_deg);
api->End();
pixDestroy(&image);
image.destroy();
}
#endif

View File

@ -19,6 +19,7 @@
#include <string>
#include "helpers.h"
#include "include_gunit.h"
#include "image.h"
#include "log.h"
namespace tesseract {
@ -37,7 +38,7 @@ class PageSegModeTest : public testing::Test {
protected:
PageSegModeTest() = default;
~PageSegModeTest() override {
pixDestroy(&src_pix_);
src_pix_.destroy();
}
void SetUp() override {
@ -46,7 +47,7 @@ protected:
}
void SetImage(const char *filename) {
pixDestroy(&src_pix_);
src_pix_.destroy();
src_pix_ = pixRead(filename);
api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY);
api_.SetImage(src_pix_);
@ -76,7 +77,7 @@ protected:
delete[] result;
}
Pix *src_pix_ = nullptr;
Image src_pix_ = nullptr;
std::string ocr_text_;
tesseract::TessBaseAPI api_;
};

View File

@ -20,6 +20,7 @@
#include <tesseract/baseapi.h>
#include <tesseract/ocrclass.h>
#include "image.h"
#include <allheaders.h>
#include "gmock/gmock.h"
@ -93,7 +94,7 @@ void ClassicProgressTester(const char *imgname, const char *tessdatadir, const c
auto api = std::make_unique<tesseract::TessBaseAPI>();
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
Pix *image = pixRead(imgname);
Image image = pixRead(imgname);
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
api->SetImage(image);
@ -109,7 +110,7 @@ void ClassicProgressTester(const char *imgname, const char *tessdatadir, const c
EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%";
api->End();
pixDestroy(&image);
image.destroy();
}
void NewProgressTester(const char *imgname, const char *tessdatadir, const char *lang) {
@ -124,7 +125,7 @@ void NewProgressTester(const char *imgname, const char *tessdatadir, const char
auto api = std::make_unique<tesseract::TessBaseAPI>();
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
Pix *image = pixRead(imgname);
Image image = pixRead(imgname);
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
api->SetImage(image);
@ -141,7 +142,7 @@ void NewProgressTester(const char *imgname, const char *tessdatadir, const char
EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%";
api->End();
pixDestroy(&image);
image.destroy();
}
TEST(QuickTest, ClassicProgressReporting) {

View File

@ -40,7 +40,7 @@ protected:
// api_.ReadConfigFile(FLAGS_tess_config.c_str());
api_.SetPageSegMode(tesseract::PSM_AUTO);
api_.SetImage(src_pix_);
pixDestroy(&src_pix_);
src_pix_.destroy();
src_pix_ = api_.GetInputImage();
}
@ -52,7 +52,7 @@ protected:
int width = pixGetWidth(src_pix_);
int height = pixGetHeight(src_pix_);
int depth = pixGetDepth(src_pix_);
Pix *pix = pixCreate(width, height, depth);
Image pix = pixCreate(width, height, depth);
EXPECT_TRUE(depth == 1 || depth == 8);
if (depth == 8) {
pixSetAll(pix);
@ -68,7 +68,7 @@ protected:
LOG(INFO) << "BBox: [L:" << left << ", T:" << top << ", R:" << right << ", B:" << bottom
<< "]"
<< "\n";
Pix *block_pix;
Image block_pix;
if (depth == 1) {
block_pix = it->GetBinaryImage(im_level);
pixRasterop(pix, left, top, right - left, bottom - top, PIX_SRC ^ PIX_DST, block_pix, 0, 0);
@ -78,14 +78,14 @@ protected:
PIX_SRC & PIX_DST, block_pix, 0, 0);
}
CHECK(block_pix != nullptr);
pixDestroy(&block_pix);
block_pix.destroy();
} while (it->Next(level));
// if (base::GetFlag(FLAGS_v) >= 1)
// pixWrite(OutputNameToPath("rebuilt.png").c_str(), pix, IFF_PNG);
pixRasterop(pix, 0, 0, width, height, PIX_SRC ^ PIX_DST, src_pix_, 0, 0);
if (depth == 8) {
Pix *binary_pix = pixThresholdToBinary(pix, 128);
pixDestroy(&pix);
Image binary_pix = pixThresholdToBinary(pix, 128);
pix.destroy();
pixInvert(binary_pix, binary_pix);
pix = binary_pix;
}
@ -98,7 +98,7 @@ protected:
LOG(INFO) << "outfile = " << outfile << "\n";
pixWrite(outfile.c_str(), pix, IFF_PNG);
}
pixDestroy(&pix);
pix.destroy();
LOG(INFO) << absl::StrFormat("At level %d: pix diff = %d\n", level, pixcount);
EXPECT_LE(pixcount, max_diff);
// if (base::GetFlag(FLAGS_v) > 1) CHECK_LE(pixcount, max_diff);
@ -206,7 +206,7 @@ protected:
}
// Objects declared here can be used by all tests in the test case for Foo.
Pix *src_pix_; // Borrowed from api_. Do not destroy.
Image src_pix_; // Borrowed from api_. Do not destroy.
std::string ocr_text_;
tesseract::TessBaseAPI api_;
};

Some files were not shown because too many files have changed in this diff Show More