diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index a96e7dcd..7e43b6df 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -631,7 +631,7 @@ Pix *TessBaseAPI::GetThresholdedImage() { if (tesseract_ == nullptr || thresholder_ == nullptr) { return nullptr; } - if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) { + if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) { return nullptr; } return pixClone(tesseract_->pix_binary()); @@ -2098,9 +2098,11 @@ bool TessBaseAPI::Threshold(Pix **pix) { thresholder_->SetSourceYResolution(kMinCredibleResolution); } auto pageseg_mode = static_cast(static_cast(tesseract_->tessedit_pageseg_mode)); - if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) { + Image im(*pix); + if (!thresholder_->ThresholdToPix(pageseg_mode, &im)) { return false; } + *pix = im; thresholder_->GetImageSizes(&rect_left_, &rect_top_, &rect_width_, &rect_height_, &image_width_, &image_height_); if (!thresholder_->IsBinary()) { @@ -2144,7 +2146,7 @@ int TessBaseAPI::FindLines() { tesseract_->InitAdaptiveClassifier(nullptr); #endif } - if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) { + if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) { return -1; } @@ -2270,7 +2272,7 @@ bool TessBaseAPI::DetectOS(OSResults *osr) { return false; } ClearResults(); - if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) { + if (tesseract_->pix_binary() == nullptr && !Threshold(&tesseract_->mutable_pix_binary()->pix_)) { return false; } diff --git a/src/ccmain/equationdetect.cpp b/src/ccmain/equationdetect.cpp index 3b47548b..a5058eab 100644 --- a/src/ccmain/equationdetect.cpp +++ b/src/ccmain/equationdetect.cpp @@ -583,13 +583,13 @@ void EquationDetect::IdentifySeedParts() { } float EquationDetect::ComputeForegroundDensity(const TBOX &tbox) { - Pix *pix_bi = lang_tesseract_->pix_binary(); + Image pix_bi = lang_tesseract_->pix_binary(); const int pix_height = pixGetHeight(pix_bi); Box *box = boxCreate(tbox.left(), pix_height - tbox.top(), tbox.width(), tbox.height()); - Pix *pix_sub = pixClipRectangle(pix_bi, box, nullptr); + Image pix_sub = pixClipRectangle(pix_bi, box, nullptr); l_float32 fract; pixForegroundFraction(pix_sub, &fract); - pixDestroy(&pix_sub); + pix_sub.destroy(); boxDestroy(&box); return fract; @@ -1395,7 +1395,7 @@ void EquationDetect::GetOutputTiffName(const char *name, std::string &image_name } void EquationDetect::PaintSpecialTexts(const std::string &outfile) const { - Pix *pix = nullptr, *pixBi = lang_tesseract_->pix_binary(); + Image pix = nullptr, pixBi = lang_tesseract_->pix_binary(); pix = pixConvertTo32(pixBi); ColPartitionGridSearch gsearch(part_grid_); ColPartition *part = nullptr; @@ -1408,11 +1408,11 @@ void EquationDetect::PaintSpecialTexts(const std::string &outfile) const { } pixWrite(outfile.c_str(), pix, IFF_TIFF_LZW); - pixDestroy(&pix); + pix.destroy(); } void EquationDetect::PaintColParts(const std::string &outfile) const { - Pix *pix = pixConvertTo32(lang_tesseract_->BestPix()); + Image pix = pixConvertTo32(lang_tesseract_->BestPix()); ColPartitionGridSearch gsearch(part_grid_); gsearch.StartFullSearch(); ColPartition *part = nullptr; @@ -1430,7 +1430,7 @@ void EquationDetect::PaintColParts(const std::string &outfile) const { } pixWrite(outfile.c_str(), pix, IFF_TIFF_LZW); - pixDestroy(&pix); + pix.destroy(); } void EquationDetect::PrintSpecialBlobsDensity(const ColPartition *part) const { diff --git a/src/ccmain/linerec.cpp b/src/ccmain/linerec.cpp index c055c644..4f85fd67 100644 --- a/src/ccmain/linerec.cpp +++ b/src/ccmain/linerec.cpp @@ -185,7 +185,7 @@ ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padd } // Now revised_box always refers to the image. // BestPix is never colormapped, but may be of any depth. - Pix *pix = BestPix(); + Image pix = BestPix(); int width = pixGetWidth(pix); int height = pixGetHeight(pix); TBOX image_box(0, 0, width, height); @@ -196,22 +196,22 @@ ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padd } Box *clip_box = boxCreate(revised_box->left(), height - revised_box->top(), revised_box->width(), revised_box->height()); - Pix *box_pix = pixClipRectangle(pix, clip_box, nullptr); + Image box_pix = pixClipRectangle(pix, clip_box, nullptr); boxDestroy(&clip_box); if (box_pix == nullptr) { return nullptr; } if (num_rotations > 0) { - Pix *rot_pix = pixRotateOrth(box_pix, num_rotations); - pixDestroy(&box_pix); + Image rot_pix = pixRotateOrth(box_pix, num_rotations); + box_pix.destroy(); box_pix = rot_pix; } // Convert sub-8-bit images to 8 bit. int depth = pixGetDepth(box_pix); if (depth < 8) { - Pix *grey; + Image grey; grey = pixConvertTo8(box_pix, false); - pixDestroy(&box_pix); + box_pix.destroy(); box_pix = grey; } bool vertical_text = false; diff --git a/src/ccmain/osdetect.cpp b/src/ccmain/osdetect.cpp index 911b3df3..56cc564b 100644 --- a/src/ccmain/osdetect.cpp +++ b/src/ccmain/osdetect.cpp @@ -158,7 +158,7 @@ void OSResults::accumulate(const OSResults &osr) { // image, so that non-text blobs are removed from consideration. static void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) { - Pix *pix = tess->pix_binary(); + Image pix = tess->pix_binary(); ASSERT_HOST(pix != nullptr); int vertical_x = 0; int vertical_y = 1; @@ -174,10 +174,10 @@ static void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *block tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix, &vertical_x, &vertical_y, nullptr, &v_lines, &h_lines); - Pix *im_pix = tesseract::ImageFind::FindImages(pix, nullptr); + Image im_pix = tesseract::ImageFind::FindImages(pix, nullptr); if (im_pix != nullptr) { pixSubtract(pix, pix, im_pix); - pixDestroy(&im_pix); + im_pix.destroy(); } tess->mutable_textord()->find_components(tess->pix_binary(), blocks, to_blocks); } diff --git a/src/ccmain/pageiterator.cpp b/src/ccmain/pageiterator.cpp index 4b3aa208..fd893ac8 100644 --- a/src/ccmain/pageiterator.cpp +++ b/src/ccmain/pageiterator.cpp @@ -444,19 +444,19 @@ Pix *PageIterator::GetBinaryImage(PageIteratorLevel level) const { return cblob_it_->data()->render(); } Box *box = boxCreate(left, top, right - left, bottom - top); - Pix *pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr); + Image pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr); boxDestroy(&box); if (level == RIL_BLOCK || level == RIL_PARA) { // Clip to the block polygon as well. TBOX mask_box; - Pix *mask = it_->block()->block->render_mask(&mask_box); + Image mask = it_->block()->block->render_mask(&mask_box); int mask_x = left - mask_box.left(); int mask_y = top - (tesseract_->ImageHeight() - mask_box.top()); // AND the mask and pix, putting the result in pix. pixRasterop(pix, std::max(0, -mask_x), std::max(0, -mask_y), pixGetWidth(pix), pixGetHeight(pix), PIX_SRC & PIX_DST, mask, std::max(0, mask_x), std::max(0, mask_y)); - pixDestroy(&mask); + mask.destroy(); } return pix; } @@ -488,25 +488,25 @@ Pix *PageIterator::GetImage(PageIteratorLevel level, int padding, Pix *original_ right = std::min(right + padding, rect_width_); bottom = std::min(bottom + padding, rect_height_); Box *box = boxCreate(*left, *top, right - *left, bottom - *top); - Pix *grey_pix = pixClipRectangle(original_img, box, nullptr); + Image grey_pix = pixClipRectangle(original_img, box, nullptr); boxDestroy(&box); if (level == RIL_BLOCK || level == RIL_PARA) { // Clip to the block polygon as well. TBOX mask_box; - Pix *mask = it_->block()->block->render_mask(&mask_box); + Image mask = it_->block()->block->render_mask(&mask_box); // Copy the mask registered correctly into an image the size of grey_pix. int mask_x = *left - mask_box.left(); int mask_y = *top - (pixGetHeight(original_img) - mask_box.top()); int width = pixGetWidth(grey_pix); int height = pixGetHeight(grey_pix); - Pix *resized_mask = pixCreate(width, height, 1); + Image resized_mask = pixCreate(width, height, 1); pixRasterop(resized_mask, std::max(0, -mask_x), std::max(0, -mask_y), width, height, PIX_SRC, mask, std::max(0, mask_x), std::max(0, mask_y)); - pixDestroy(&mask); + mask.destroy(); pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1, 2 * padding + 1); pixInvert(resized_mask, resized_mask); pixSetMasked(grey_pix, resized_mask, UINT32_MAX); - pixDestroy(&resized_mask); + resized_mask.destroy(); } return grey_pix; } diff --git a/src/ccmain/pagesegmain.cpp b/src/ccmain/pagesegmain.cpp index da29b599..89ac689f 100644 --- a/src/ccmain/pagesegmain.cpp +++ b/src/ccmain/pagesegmain.cpp @@ -58,21 +58,21 @@ const int kMaxCircleErosions = 8; // The returned pix must be pixDestroyed after use. nullptr may be returned // if the image doesn't meet the trivial conditions that it uses to determine // success. -static Pix *RemoveEnclosingCircle(Pix *pixs) { - Pix *pixsi = pixInvert(nullptr, pixs); - Pix *pixc = pixCreateTemplate(pixs); +static Image RemoveEnclosingCircle(Image pixs) { + Image pixsi = pixInvert(nullptr, pixs); + Image pixc = pixCreateTemplate(pixs); pixSetOrClearBorder(pixc, 1, 1, 1, 1, PIX_SET); pixSeedfillBinary(pixc, pixc, pixsi, 4); pixInvert(pixc, pixc); - pixDestroy(&pixsi); - Pix *pixt = pixAnd(nullptr, pixs, pixc); + pixsi.destroy(); + Image pixt = pixAnd(nullptr, pixs, pixc); l_int32 max_count; pixCountConnComp(pixt, 8, &max_count); // The count has to go up before we start looking for the minimum. l_int32 min_count = INT32_MAX; - Pix *pixout = nullptr; + Image pixout = nullptr; for (int i = 1; i < kMaxCircleErosions; i++) { - pixDestroy(&pixt); + pixt.destroy(); pixErodeBrick(pixc, pixc, 3, 3); pixt = pixAnd(nullptr, pixs, pixc); l_int32 count; @@ -82,14 +82,14 @@ static Pix *RemoveEnclosingCircle(Pix *pixs) { min_count = count; } else if (count < min_count) { min_count = count; - pixDestroy(&pixout); + pixout.destroy(); pixout = pixCopy(nullptr, pixt); // Save the best. } else if (count >= min_count) { break; // We have passed by the best. } } - pixDestroy(&pixt); - pixDestroy(&pixc); + pixt.destroy(); + pixc.destroy(); return pixout; } @@ -148,9 +148,9 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract deskew_ = FCOORD(1.0f, 0.0f); reskew_ = FCOORD(1.0f, 0.0f); if (pageseg_mode == PSM_CIRCLE_WORD) { - Pix *pixcleaned = RemoveEnclosingCircle(pix_binary_); + Image pixcleaned = RemoveEnclosingCircle(pix_binary_); if (pixcleaned != nullptr) { - pixDestroy(&pix_binary_); + pix_binary_.destroy(); pix_binary_ = pixcleaned; } } @@ -200,8 +200,8 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract */ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr) { - Pix *photomask_pix = nullptr; - Pix *musicmask_pix = nullptr; + Image photomask_pix = nullptr; + Image musicmask_pix = nullptr; // The blocks made by the ColumnFinder. Moved to blocks before return. BLOCK_LIST found_blocks; TO_BLOCK_LIST temp_blocks; @@ -231,8 +231,8 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOC } delete finder; } - pixDestroy(&photomask_pix); - pixDestroy(&musicmask_pix); + photomask_pix.destroy(); + musicmask_pix.destroy(); if (result < 0) { return result; } @@ -272,8 +272,8 @@ static void AddAllScriptsConverted(const UNICHARSET &sid_set, const UNICHARSET & ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, TO_BLOCK_LIST *to_blocks, - Pix **photo_mask_pix, - Pix **music_mask_pix) { + Image *photo_mask_pix, + Image *music_mask_pix) { int vertical_x = 0; int vertical_y = 1; TabVector_LIST v_lines; @@ -293,14 +293,14 @@ ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mo // Leptonica is used to find a mask of the photo regions in the input. *photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_); if (tessedit_dump_pageseg_images) { - Pix *pix_no_image_ = nullptr; + Image pix_no_image_ = nullptr; if (*photo_mask_pix != nullptr) { pix_no_image_ = pixSubtract(nullptr, pix_binary_, *photo_mask_pix); } else { pix_no_image_ = pixClone(pix_binary_); } pixa_debug_.AddPix(pix_no_image_, "NoImages"); - pixDestroy(&pix_no_image_); + pix_no_image_.destroy(); } if (!PSM_COL_FIND_ENABLED(pageseg_mode)) { v_lines.clear(); diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index df408b0a..df68199f 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -421,7 +421,7 @@ Tesseract::Tesseract() Tesseract::~Tesseract() { Clear(); - pixDestroy(&pix_original_); + pix_original_.destroy(); end_tesseract(); for (auto *lang : sub_langs_) { delete lang; @@ -442,10 +442,10 @@ Dict &Tesseract::getDict() { void Tesseract::Clear() { std::string debug_name = imagebasename + "_debug.pdf"; pixa_debug_.WritePDF(debug_name.c_str()); - pixDestroy(&pix_binary_); - pixDestroy(&pix_grey_); - pixDestroy(&pix_thresholds_); - pixDestroy(&scaled_color_); + pix_binary_.destroy(); + pix_grey_.destroy(); + pix_thresholds_.destroy(); + scaled_color_.destroy(); deskew_ = FCOORD(1.0f, 0.0f); reskew_ = FCOORD(1.0f, 0.0f); splitter_.Clear(); @@ -518,7 +518,7 @@ void Tesseract::PrepareForPageseg() { if (pageseg_strategy > max_pageseg_strategy) { max_pageseg_strategy = pageseg_strategy; } - pixDestroy(&sub_lang->pix_binary_); + sub_lang->pix_binary_.destroy(); sub_lang->pix_binary_ = pixClone(pix_binary()); } // Perform shiro-rekha (top-line) splitting and replace the current image by @@ -527,7 +527,7 @@ void Tesseract::PrepareForPageseg() { splitter_.set_pageseg_split_strategy(max_pageseg_strategy); if (splitter_.Split(true, &pixa_debug_)) { ASSERT_HOST(splitter_.splitted_image()); - pixDestroy(&pix_binary_); + pix_binary_.destroy(); pix_binary_ = pixClone(splitter_.splitted_image()); } } @@ -555,14 +555,14 @@ void Tesseract::PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, O bool split_for_ocr = splitter_.Split(false, &pixa_debug_); // Restore pix_binary to the binarized original pix for future reference. ASSERT_HOST(splitter_.orig_pix()); - pixDestroy(&pix_binary_); + pix_binary_.destroy(); pix_binary_ = pixClone(splitter_.orig_pix()); // If the pageseg and ocr strategies are different, refresh the block list // (from the last SegmentImage call) with blobs from the real image to be used // for OCR. if (splitter_.HasDifferentSplitStrategies()) { BLOCK block("", true, 0, 0, 0, 0, pixGetWidth(pix_binary_), pixGetHeight(pix_binary_)); - Pix *pix_for_ocr = split_for_ocr ? splitter_.splitted_image() : splitter_.orig_pix(); + Image pix_for_ocr = split_for_ocr ? splitter_.splitted_image() : splitter_.orig_pix(); extract_edges(pix_for_ocr, &block); splitter_.RefreshSegmentationWithNewBlobs(block.blob_list()); } diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 0668d109..5f8ef83e 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -197,26 +197,26 @@ public: return reskew_; } // Destroy any existing pix and return a pointer to the pointer. - Pix **mutable_pix_binary() { - pixDestroy(&pix_binary_); + Image *mutable_pix_binary() { + pix_binary_.destroy(); return &pix_binary_; } - Pix *pix_binary() const { + Image pix_binary() const { return pix_binary_; } - Pix *pix_grey() const { + Image pix_grey() const { return pix_grey_; } - void set_pix_grey(Pix *grey_pix) { - pixDestroy(&pix_grey_); + void set_pix_grey(Image grey_pix) { + pix_grey_.destroy(); pix_grey_ = grey_pix; } - Pix *pix_original() const { + Image pix_original() const { return pix_original_; } // Takes ownership of the given original_pix. - void set_pix_original(Pix *original_pix) { - pixDestroy(&pix_original_); + void set_pix_original(Image original_pix) { + pix_original_.destroy(); pix_original_ = original_pix; // Clone to sublangs as well. for (auto &lang : sub_langs_) { @@ -231,7 +231,7 @@ public: // To tell the difference pixGetDepth() will return 32, 8 or 1. // In any case, the return value is a borrowed Pix, and should not be // deleted or pixDestroyed. - Pix *BestPix() const { + Image BestPix() const { if (pixGetWidth(pix_original_) == ImageWidth()) { return pix_original_; } else if (pix_grey_ != nullptr) { @@ -240,8 +240,8 @@ public: return pix_binary_; } } - void set_pix_thresholds(Pix *thresholds) { - pixDestroy(&pix_thresholds_); + void set_pix_thresholds(Image thresholds) { + pix_thresholds_.destroy(); pix_thresholds_ = thresholds; } int source_resolution() const { @@ -256,13 +256,13 @@ public: int ImageHeight() const { return pixGetHeight(pix_binary_); } - Pix *scaled_color() const { + Image scaled_color() const { return scaled_color_; } int scaled_factor() const { return scaled_factor_; } - void SetScaledColor(int factor, Pix *color) { + void SetScaledColor(int factor, Image color) { scaled_factor_ = factor; scaled_color_ = color; } @@ -328,8 +328,8 @@ public: BLOBNBOX_LIST *diacritic_blobs, Tesseract *osd_tess, OSResults *osr); ColumnFinder *SetupPageSegAndDetectOrientation(PageSegMode pageseg_mode, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, - TO_BLOCK_LIST *to_blocks, Pix **photo_mask_pix, - Pix **music_mask_pix); + TO_BLOCK_LIST *to_blocks, Image *photo_mask_pix, + Image *music_mask_pix); // par_control.cpp void PrerecAllWordsPar(const std::vector &words); @@ -1034,13 +1034,13 @@ private: std::string word_config_; // Image used for input to layout analysis and tesseract recognition. // May be modified by the ShiroRekhaSplitter to eliminate the top-line. - Pix *pix_binary_; + Image pix_binary_; // Grey-level input image if the input was not binary, otherwise nullptr. - Pix *pix_grey_; + Image pix_grey_; // Original input image. Color if the input was color. - Pix *pix_original_; + Image pix_original_; // Thresholds that were used to generate the thresholded image from grey. - Pix *pix_thresholds_; + Image pix_thresholds_; // Debug images. If non-empty, will be written on destruction. DebugPixa pixa_debug_; // Input image resolution after any scaling. The resolution is not well @@ -1053,7 +1053,7 @@ private: Textord textord_; // True if the primary language uses right_to_left reading order. bool right_to_left_; - Pix *scaled_color_; + Image scaled_color_; int scaled_factor_; FCOORD deskew_; FCOORD reskew_; diff --git a/src/ccmain/thresholder.cpp b/src/ccmain/thresholder.cpp index 1e1960ce..de98b259 100644 --- a/src/ccmain/thresholder.cpp +++ b/src/ccmain/thresholder.cpp @@ -49,7 +49,7 @@ ImageThresholder::~ImageThresholder() { // Destroy the Pix if there is one, freeing memory. void ImageThresholder::Clear() { - pixDestroy(&pix_); + pix_.destroy(); } // Return true if no image has been set. @@ -71,7 +71,7 @@ void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int h if (bpp == 0) { bpp = 1; } - Pix *pix = pixCreate(width, height, bpp == 24 ? 32 : bpp); + Image pix = pixCreate(width, height, bpp == 24 ? 32 : bpp); l_uint32 *data = pixGetData(pix); int wpl = pixGetWpl(pix); switch (bpp) { @@ -121,7 +121,7 @@ void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int h tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp); } SetImage(pix); - pixDestroy(&pix); + pix.destroy(); } // Store the coordinates of the rectangle to process for later use. @@ -152,22 +152,22 @@ void ImageThresholder::GetImageSizes(int *left, int *top, int *width, int *heigh // SetImage for Pix clones its input, so the source pix may be pixDestroyed // immediately after, but may not go away until after the Thresholder has // finished with it. -void ImageThresholder::SetImage(const Pix *pix) { +void ImageThresholder::SetImage(const Image pix) { if (pix_ != nullptr) { - pixDestroy(&pix_); + pix_.destroy(); } - Pix *src = const_cast(pix); + Image src = pix; int depth; pixGetDimensions(src, &image_width_, &image_height_, &depth); // Convert the image as necessary so it is one of binary, plain RGB, or // 8 bit with no colormap. Guarantee that we always end up with our own copy, // not just a clone of the input. if (pixGetColormap(src)) { - Pix *tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC); + Image tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC); depth = pixGetDepth(tmp); if (depth > 1 && depth < 8) { pix_ = pixConvertTo8(tmp, false); - pixDestroy(&tmp); + tmp.destroy(); } else { pix_ = tmp; } @@ -188,7 +188,7 @@ void ImageThresholder::SetImage(const Pix *pix) { // Creates a Pix and sets pix to point to the resulting pointer. // Caller must use pixDestroy to free the created Pix. /// Returns false on error. -bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix **pix) { +bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Image *pix) { if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) { tprintf("Image too large: (%d, %d)\n", image_width_, image_height_); return false; @@ -196,9 +196,9 @@ bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix **pix) { if (pix_channels_ == 0) { // We have a binary image, but it still has to be copied, as this API // allows the caller to modify the output. - Pix *original = GetPixRect(); + Image original = GetPixRect(); *pix = pixCopy(nullptr, original); - pixDestroy(&original); + original.destroy(); } else { OtsuThresholdRectToPix(pix_, pix); } @@ -212,18 +212,18 @@ bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix **pix) { // Ideally the 8 bit threshold should be the exact threshold used to generate // the binary image in ThresholdToPix, but this is not a hard constraint. // Returns nullptr if the input is binary. PixDestroy after use. -Pix *ImageThresholder::GetPixRectThresholds() { +Image ImageThresholder::GetPixRectThresholds() { if (IsBinary()) { return nullptr; } - Pix *pix_grey = GetPixRectGrey(); + Image pix_grey = GetPixRectGrey(); int width = pixGetWidth(pix_grey); int height = pixGetHeight(pix_grey); std::vector thresholds; std::vector hi_values; OtsuThreshold(pix_grey, 0, 0, width, height, thresholds, hi_values); - pixDestroy(&pix_grey); - Pix *pix_thresholds = pixCreate(width, height, 8); + pix_grey.destroy(); + Image pix_thresholds = pixCreate(width, height, 8); int threshold = thresholds[0] > 0 ? thresholds[0] : 128; pixSetAllArbitrary(pix_thresholds, threshold); return pix_thresholds; @@ -239,14 +239,14 @@ void ImageThresholder::Init() { // This function will be used in the future by the page layout analysis, and // the layout analysis that uses it will only be available with Leptonica, // so there is no raw equivalent. -Pix *ImageThresholder::GetPixRect() { +Image ImageThresholder::GetPixRect() { if (IsFullImage()) { // Just clone the whole thing. return pixClone(pix_); } else { // Crop to the given rectangle. Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_); - Pix *cropped = pixClipRectangle(pix_, box, nullptr); + Image cropped = pixClipRectangle(pix_, box, nullptr); boxDestroy(&box); return cropped; } @@ -256,24 +256,24 @@ Pix *ImageThresholder::GetPixRect() { // and at the same resolution as the output binary. // The returned Pix must be pixDestroyed. // Provided to the classifier to extract features from the greyscale image. -Pix *ImageThresholder::GetPixRectGrey() { +Image ImageThresholder::GetPixRectGrey() { auto pix = GetPixRect(); // May have to be reduced to grey. int depth = pixGetDepth(pix); if (depth != 8) { if (depth == 24) { auto tmp = pixConvert24To32(pix); - pixDestroy(&pix); + pix.destroy(); pix = tmp; } auto result = pixConvertTo8(pix, false); - pixDestroy(&pix); + pix.destroy(); return result; } return pix; } // Otsu thresholds the rectangle, taking the rectangle from *this. -void ImageThresholder::OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const { +void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const { std::vector thresholds; std::vector hi_values; @@ -298,8 +298,8 @@ void ImageThresholder::OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const /// from the class, using thresholds/hi_values to the output pix. /// NOTE that num_channels is the size of the thresholds and hi_values // arrays and also the bytes per pixel in src_pix. -void ImageThresholder::ThresholdRectToPix(Pix *src_pix, int num_channels, const std::vector &thresholds, - const std::vector &hi_values, Pix **pix) const { +void ImageThresholder::ThresholdRectToPix(Image src_pix, int num_channels, const std::vector &thresholds, + const std::vector &hi_values, Image *pix) const { *pix = pixCreate(rect_width_, rect_height_, 1); uint32_t *pixdata = pixGetData(*pix); int wpl = pixGetWpl(*pix); diff --git a/src/ccmain/thresholder.h b/src/ccmain/thresholder.h index 5a4b5145..4d3a426e 100644 --- a/src/ccmain/thresholder.h +++ b/src/ccmain/thresholder.h @@ -113,13 +113,13 @@ public: /// SetImage for Pix clones its input, so the source pix may be pixDestroyed /// immediately after, but may not go away until after the Thresholder has /// finished with it. - void SetImage(const Pix *pix); + void SetImage(const Image pix); /// Threshold the source image as efficiently as possible to the output Pix. /// Creates a Pix and sets pix to point to the resulting pointer. /// Caller must use pixDestroy to free the created Pix. /// Returns false on error. - virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix **pix); + virtual bool ThresholdToPix(PageSegMode pageseg_mode, Image *pix); // Gets a pix that contains an 8 bit threshold value at each pixel. The // returned pix may be an integer reduction of the binary image such that @@ -128,20 +128,20 @@ public: // Ideally the 8 bit threshold should be the exact threshold used to generate // the binary image in ThresholdToPix, but this is not a hard constraint. // Returns nullptr if the input is binary. PixDestroy after use. - virtual Pix *GetPixRectThresholds(); + virtual Image GetPixRectThresholds(); /// Get a clone/copy of the source image rectangle. /// The returned Pix must be pixDestroyed. /// This function will be used in the future by the page layout analysis, and /// the layout analysis that uses it will only be available with Leptonica, /// so there is no raw equivalent. - Pix *GetPixRect(); + Image GetPixRect(); // Get a clone/copy of the source image rectangle, reduced to greyscale, // and at the same resolution as the output binary. // The returned Pix must be pixDestroyed. // Provided to the classifier to extract features from the greyscale image. - virtual Pix *GetPixRectGrey(); + virtual Image GetPixRectGrey(); protected: // ---------------------------------------------------------------------- @@ -157,19 +157,19 @@ protected: } // Otsu thresholds the rectangle, taking the rectangle from *this. - void OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const; + void OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const; /// Threshold the rectangle, taking everything except the src_pix /// from the class, using thresholds/hi_values to the output pix. /// NOTE that num_channels is the size of the thresholds and hi_values // arrays and also the bytes per pixel in src_pix. - void ThresholdRectToPix(Pix *src_pix, int num_channels, const std::vector &thresholds, - const std::vector &hi_values, Pix **pix) const; + void ThresholdRectToPix(Image src_pix, int num_channels, const std::vector &thresholds, + const std::vector &hi_values, Image *pix) const; protected: /// Clone or other copy of the source Pix. /// The pix will always be PixDestroy()ed on destruction of the class. - Pix *pix_; + Image pix_; int image_width_; ///< Width of source pix_. int image_height_; ///< Height of source pix_. diff --git a/src/ccstruct/blobbox.cpp b/src/ccstruct/blobbox.cpp index 6c7422e1..d66b7bea 100644 --- a/src/ccstruct/blobbox.cpp +++ b/src/ccstruct/blobbox.cpp @@ -390,7 +390,7 @@ void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST *blobs) { // Helper to compute edge offsets for all the blobs on the list. // See coutln.h for an explanation of edge offsets. -void BLOBNBOX::ComputeEdgeOffsets(Pix *thresholds, Pix *grey, BLOBNBOX_LIST *blobs) { +void BLOBNBOX::ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs) { int grey_height = 0; int thr_height = 0; int scale_factor = 1; @@ -1052,7 +1052,7 @@ void TO_BLOCK::DeleteUnownedNoise() { // Thresholds must either be the same size as grey or an integer down-scale // of grey. // See coutln.h for an explanation of edge offsets. -void TO_BLOCK::ComputeEdgeOffsets(Pix *thresholds, Pix *grey) { +void TO_BLOCK::ComputeEdgeOffsets(Image thresholds, Image grey) { BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &blobs); BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &small_blobs); BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &noise_blobs); diff --git a/src/ccstruct/blobbox.h b/src/ccstruct/blobbox.h index 77ba941c..b5ba7557 100644 --- a/src/ccstruct/blobbox.h +++ b/src/ccstruct/blobbox.h @@ -435,7 +435,7 @@ public: static void DeleteNoiseBlobs(BLOBNBOX_LIST *blobs); // Helper to compute edge offsets for all the blobs on the list. // See coutln.h for an explanation of edge offsets. - static void ComputeEdgeOffsets(Pix *thresholds, Pix *grey, BLOBNBOX_LIST *blobs); + static void ComputeEdgeOffsets(Image thresholds, Image grey, BLOBNBOX_LIST *blobs); #ifndef GRAPHICS_DISABLED // Helper to draw all the blobs on the list in the given body_colour, @@ -745,7 +745,7 @@ public: // Thresholds must either be the same size as grey or an integer down-scale // of grey. // See coutln.h for an explanation of edge offsets. - void ComputeEdgeOffsets(Pix *thresholds, Pix *grey); + void ComputeEdgeOffsets(Image thresholds, Image grey); #ifndef GRAPHICS_DISABLED // Draw the noise blobs from all lists in red. diff --git a/src/ccstruct/blobs.cpp b/src/ccstruct/blobs.cpp index 3fad090a..74ce962e 100644 --- a/src/ccstruct/blobs.cpp +++ b/src/ccstruct/blobs.cpp @@ -401,7 +401,7 @@ void TBLOB::Clear() { // this blob and the Pix for the full image. void TBLOB::Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, - float final_xshift, float final_yshift, bool inverse, Pix *pix) { + float final_xshift, float final_yshift, bool inverse, Image pix) { denorm_.SetupNormalization(block, rotation, predecessor, x_origin, y_origin, x_scale, y_scale, final_xshift, final_yshift); denorm_.set_inverse(inverse); @@ -789,7 +789,7 @@ TWERD *TWERD::PolygonalCopy(bool allow_detailed_fx, WERD *src) { // Baseline normalizes the blobs in-place, recording the normalization in the // DENORMs in the blobs. -void TWERD::BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height, +void TWERD::BLNormalize(const BLOCK *block, const ROW *row, Image pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm) { TBOX word_box = bounding_box(); diff --git a/src/ccstruct/blobs.h b/src/ccstruct/blobs.h index a06ab54c..00fee8c5 100644 --- a/src/ccstruct/blobs.h +++ b/src/ccstruct/blobs.h @@ -324,7 +324,7 @@ struct TBLOB { // this blob and the Pix for the full image. void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, - float final_yshift, bool inverse, Pix *pix); + float final_yshift, bool inverse, Image pix); // Rotates by the given rotation in place. void Rotate(const FCOORD rotation); // Moves by the given vec in place. @@ -436,7 +436,7 @@ struct TWERD { static TWERD *PolygonalCopy(bool allow_detailed_fx, WERD *src); // Baseline normalizes the blobs in-place, recording the normalization in the // DENORMs in the blobs. - void BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height, + void BLNormalize(const BLOCK *block, const ROW *row, Image pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm); // Copies the data and the blobs, but leaves next untouched. diff --git a/src/ccstruct/coutln.cpp b/src/ccstruct/coutln.cpp index 1925936d..e9d26ffe 100644 --- a/src/ccstruct/coutln.cpp +++ b/src/ccstruct/coutln.cpp @@ -736,7 +736,7 @@ static bool EvaluateHorizontalDiff(const l_uint32 *line, int diff_sign, int x, i * for each horizontal step, and the conflict in step direction and gradient * direction can be used to ignore the vertical steps. */ -void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix *pix) { +void C_OUTLINE::ComputeEdgeOffsets(int threshold, Image pix) { if (pixGetDepth(pix) != 8) { return; } @@ -904,7 +904,7 @@ void C_OUTLINE::ComputeBinaryOffsets() { * Renders the outline to the given pix, with left and top being * the coords of the upper-left corner of the pix. */ -void C_OUTLINE::render(int left, int top, Pix *pix) const { +void C_OUTLINE::render(int left, int top, Image pix) const { ICOORD pos = start; for (int stepindex = 0; stepindex < stepcount; ++stepindex) { ICOORD next_step = step(stepindex); @@ -924,7 +924,7 @@ void C_OUTLINE::render(int left, int top, Pix *pix) const { * @param top coord * @param pix the pix to outline */ -void C_OUTLINE::render_outline(int left, int top, Pix *pix) const { +void C_OUTLINE::render_outline(int left, int top, Image pix) const { ICOORD pos = start; for (int stepindex = 0; stepindex < stepcount; ++stepindex) { ICOORD next_step = step(stepindex); diff --git a/src/ccstruct/coutln.h b/src/ccstruct/coutln.h index 274852fa..7477e90d 100644 --- a/src/ccstruct/coutln.h +++ b/src/ccstruct/coutln.h @@ -234,18 +234,18 @@ public: // Adds sub-pixel resolution EdgeOffsets for the outline if the supplied // pix is 8-bit. Does nothing otherwise. - void ComputeEdgeOffsets(int threshold, Pix *pix); + void ComputeEdgeOffsets(int threshold, Image pix); // Adds sub-pixel resolution EdgeOffsets for the outline using only // a binary image source. void ComputeBinaryOffsets(); // Renders the outline to the given pix, with left and top being // the coords of the upper-left corner of the pix. - void render(int left, int top, Pix *pix) const; + void render(int left, int top, Image pix) const; // Renders just the outline to the given pix (no fill), with left and top // being the coords of the upper-left corner of the pix. - void render_outline(int left, int top, Pix *pix) const; + void render_outline(int left, int top, Image pix) const; #ifndef GRAPHICS_DISABLED void plot( // draw one diff --git a/src/ccstruct/debugpixa.h b/src/ccstruct/debugpixa.h index f423cc99..1cd3405c 100644 --- a/src/ccstruct/debugpixa.h +++ b/src/ccstruct/debugpixa.h @@ -1,6 +1,8 @@ #ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_ #define TESSERACT_CCSTRUCT_DEBUGPIXA_H_ +#include "image.h" + #include namespace tesseract { @@ -27,11 +29,11 @@ public: // Adds the given pix to the set of pages in the PDF file, with the given // caption added to the top. - void AddPix(const Pix *pix, const char *caption) { - int depth = pixGetDepth(const_cast(pix)); + void AddPix(const Image pix, const char *caption) { + int depth = pixGetDepth(pix); int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80); - Pix *pix_debug = - pixAddSingleTextblock(const_cast(pix), fonts_, caption, color, L_ADD_BELOW, nullptr); + Image pix_debug = + pixAddSingleTextblock(pix, fonts_, caption, color, L_ADD_BELOW, nullptr); pixaAddPix(pixa_, pix_debug, L_INSERT); } diff --git a/src/ccstruct/imagedata.cpp b/src/ccstruct/imagedata.cpp index d41d0a5c..48ef81d3 100644 --- a/src/ccstruct/imagedata.cpp +++ b/src/ccstruct/imagedata.cpp @@ -43,7 +43,7 @@ const int kMaxReadAhead = 8; ImageData::ImageData() : page_number_(-1), vertical_text_(false) {} // Takes ownership of the pix and destroys it. -ImageData::ImageData(bool vertical, Pix *pix) : page_number_(0), vertical_text_(vertical) { +ImageData::ImageData(bool vertical, Image pix) : page_number_(0), vertical_text_(vertical) { SetPix(pix); } ImageData::~ImageData() { @@ -176,12 +176,12 @@ bool ImageData::SkipDeSerialize(TFile *fp) { // Saves the given Pix as a PNG-encoded string and destroys it. // In case of missing PNG support in Leptonica use PNM format, // which requires more memory. -void ImageData::SetPix(Pix *pix) { +void ImageData::SetPix(Image pix) { SetPixInternal(pix, &image_data_); } // Returns the Pix image for *this. Must be pixDestroyed after use. -Pix *ImageData::GetPix() const { +Image ImageData::GetPix() const { return GetPixInternal(image_data_); } @@ -191,11 +191,11 @@ Pix *ImageData::GetPix() const { // The return value is the scaled Pix, which must be pixDestroyed after use, // and scale_factor (if not nullptr) is set to the scale factor that was applied // to the image to achieve the target_height. -Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width, +Image ImageData::PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width, int *scaled_height, std::vector *boxes) const { int input_width = 0; int input_height = 0; - Pix *src_pix = GetPix(); + Image src_pix = GetPix(); ASSERT_HOST(src_pix != nullptr); input_width = pixGetWidth(src_pix); input_height = pixGetHeight(src_pix); @@ -210,11 +210,11 @@ Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor, *scaled_height = target_height; } // Get the scaled image. - Pix *pix = pixScale(src_pix, im_factor, im_factor); + Image pix = pixScale(src_pix, im_factor, im_factor); if (pix == nullptr) { tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", input_width, input_height, im_factor); - pixDestroy(&src_pix); + src_pix.destroy(); return nullptr; } if (scaled_width != nullptr) { @@ -223,7 +223,7 @@ Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor, if (scaled_height != nullptr) { *scaled_height = pixGetHeight(pix); } - pixDestroy(&src_pix); + src_pix.destroy(); if (boxes != nullptr) { // Get the boxes. boxes->clear(); @@ -253,7 +253,7 @@ int ImageData::MemoryUsed() const { void ImageData::Display() const { const int kTextSize = 64; // Draw the image. - Pix *pix = GetPix(); + Image pix = GetPix(); if (pix == nullptr) { return; } @@ -263,7 +263,7 @@ void ImageData::Display() const { new ScrollView("Imagedata", 100, 100, 2 * (width + 2 * kTextSize), 2 * (height + 4 * kTextSize), width + 10, height + 3 * kTextSize, true); win->Image(pix, 0, height - 1); - pixDestroy(&pix); + pix.destroy(); // Draw the boxes. win->Pen(ScrollView::RED); win->Brush(ScrollView::NONE); @@ -306,7 +306,7 @@ void ImageData::AddBoxes(const std::vector &boxes, const std::vector *image_data) { +void ImageData::SetPixInternal(Image pix, std::vector *image_data) { l_uint8 *data; size_t size; l_int32 ret; @@ -314,7 +314,7 @@ void ImageData::SetPixInternal(Pix *pix, std::vector *image_data) { if (ret) { ret = pixWriteMem(&data, &size, pix, IFF_PNM); } - pixDestroy(&pix); + pix.destroy(); // TODO: optimize resize (no init). image_data->resize(size); memcpy(&(*image_data)[0], data, size); @@ -322,8 +322,8 @@ void ImageData::SetPixInternal(Pix *pix, std::vector *image_data) { } // Returns the Pix image for the image_data. Must be pixDestroyed after use. -Pix *ImageData::GetPixInternal(const std::vector &image_data) { - Pix *pix = nullptr; +Image ImageData::GetPixInternal(const std::vector &image_data) { + Image pix = nullptr; if (!image_data.empty()) { // Convert the array to an image. const auto *u_data = reinterpret_cast(&image_data[0]); diff --git a/src/ccstruct/imagedata.h b/src/ccstruct/imagedata.h index f78d34ee..ebd11f1b 100644 --- a/src/ccstruct/imagedata.h +++ b/src/ccstruct/imagedata.h @@ -19,6 +19,7 @@ #ifndef TESSERACT_IMAGE_IMAGEDATA_H_ #define TESSERACT_IMAGE_IMAGEDATA_H_ +#include "image.h" #include "points.h" // for FCOORD #include // for std::mutex @@ -62,7 +63,7 @@ class TESS_API ImageData { public: ImageData(); // Takes ownership of the pix. - ImageData(bool vertical, Pix *pix); + ImageData(bool vertical, Image pix); ~ImageData(); // Builds and returns an ImageData from the basic data. Note that imagedata, @@ -115,16 +116,16 @@ public: // Saves the given Pix as a PNG-encoded string and destroys it. // In case of missing PNG support in Leptonica use PNM format, // which requires more memory. - void SetPix(Pix *pix); + void SetPix(Image pix); // Returns the Pix image for *this. Must be pixDestroyed after use. - Pix *GetPix() const; + Image GetPix() const; // Gets anything and everything with a non-nullptr pointer, prescaled to a // given target_height (if 0, then the original image height), and aligned. // Also returns (if not nullptr) the width and height of the scaled image. // The return value is the scaled Pix, which must be pixDestroyed after use, // and scale_factor (if not nullptr) is set to the scale factor that was // applied to the image to achieve the target_height. - Pix *PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width, + Image PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width, int *scaled_height, std::vector *boxes) const; int MemoryUsed() const; @@ -141,9 +142,9 @@ private: // Saves the given Pix as a PNG-encoded string and destroys it. // In case of missing PNG support in Leptonica use PNM format, // which requires more memory. - static void SetPixInternal(Pix *pix, std::vector *image_data); + static void SetPixInternal(Image pix, std::vector *image_data); // Returns the Pix image for the image_data. Must be pixDestroyed after use. - static Pix *GetPixInternal(const std::vector &image_data); + static Image GetPixInternal(const std::vector &image_data); // Parses the text string as a box file and adds any discovered boxes that // match the page number. Returns false on error. bool AddBoxes(const char *box_text); diff --git a/src/ccstruct/normalis.h b/src/ccstruct/normalis.h index b6379b57..5f8bc642 100644 --- a/src/ccstruct/normalis.h +++ b/src/ccstruct/normalis.h @@ -19,8 +19,10 @@ #ifndef NORMALIS_H #define NORMALIS_H -#include +#include "image.h" + #include + #include struct Pix; @@ -232,10 +234,10 @@ public: // Prints the content of the DENORM for debug purposes. void Print() const; - Pix *pix() const { + Image pix() const { return pix_; } - void set_pix(Pix *pix) { + void set_pix(Image pix) { pix_ = pix; } bool inverse() const { @@ -274,7 +276,7 @@ private: void Init(); // Best available image. - Pix *pix_; + Image pix_; // True if the source image is white-on-black. bool inverse_; // Block the word came from. If not null, block->re_rotation() takes the diff --git a/src/ccstruct/ocrblock.h b/src/ccstruct/ocrblock.h index 83bbe911..bde794d9 100644 --- a/src/ccstruct/ocrblock.h +++ b/src/ccstruct/ocrblock.h @@ -152,7 +152,7 @@ public: median_size_.set_y(y); } - Pix *render_mask(TBOX *mask_box) { + Image render_mask(TBOX *mask_box) { return pdblk.render_mask(re_rotation_, mask_box); } diff --git a/src/ccstruct/otsuthr.cpp b/src/ccstruct/otsuthr.cpp index ee453a68..68973941 100644 --- a/src/ccstruct/otsuthr.cpp +++ b/src/ccstruct/otsuthr.cpp @@ -35,7 +35,7 @@ namespace tesseract { // that there is no apparent foreground. At least one hi_value will not be -1. // The return value is the number of channels in the input image, being // the size of the output thresholds and hi_values arrays. -int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height, std::vector &thresholds, +int OtsuThreshold(Image src_pix, int left, int top, int width, int height, std::vector &thresholds, std::vector &hi_values) { int num_channels = pixGetDepth(src_pix) / 8; // Of all channels with no good hi_value, keep the best so we can always @@ -143,7 +143,7 @@ int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height, std::v // single channel. Each channel is always one byte per pixel. // Histogram is always a kHistogramSize(256) element array to count // occurrences of each pixel value. -void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height, +void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height, int *histogram) { int num_channels = pixGetDepth(src_pix) / 8; channel = ClipToRange(channel, 0, num_channels - 1); diff --git a/src/ccstruct/otsuthr.h b/src/ccstruct/otsuthr.h index 35f7c919..e0d91103 100644 --- a/src/ccstruct/otsuthr.h +++ b/src/ccstruct/otsuthr.h @@ -19,6 +19,8 @@ #ifndef TESSERACT_CCMAIN_OTSUTHR_H_ #define TESSERACT_CCMAIN_OTSUTHR_H_ +#include "image.h" + #include // for std::vector struct Pix; @@ -35,7 +37,7 @@ const int kHistogramSize = 256; // The size of a histogram of pixel values. // that there is no apparent foreground. At least one hi_value will not be -1. // The return value is the number of channels in the input image, being // the size of the output thresholds and hi_values arrays. -int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height, +int OtsuThreshold(Image src_pix, int left, int top, int width, int height, std::vector &thresholds, std::vector &hi_values); @@ -43,7 +45,7 @@ int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height, // single channel. Each channel is always one byte per pixel. // Histogram is always a kHistogramSize(256) element array to count // occurrences of each pixel value. -void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height, +void HistogramRect(Image src_pix, int channel, int left, int top, int width, int height, int *histogram); // Computes the Otsu threshold(s) for the given histogram. diff --git a/src/ccstruct/pageres.cpp b/src/ccstruct/pageres.cpp index 6e4c5d97..db8e6d60 100644 --- a/src/ccstruct/pageres.cpp +++ b/src/ccstruct/pageres.cpp @@ -304,7 +304,7 @@ void WERD_RES::InitForRetryRecognition(const WERD_RES &source) { // normalization scale and offset. // Returns false if the word is empty and sets up fake results. bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tess, - Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, + Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block) { auto norm_mode_hint = static_cast(norm_mode); diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index 3a5c898e..3658dcb1 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -462,7 +462,7 @@ public: // but is declared as int for ease of use with tessedit_ocr_engine_mode. // Returns false if the word is empty and sets up fake results. bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, - Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, + Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block); diff --git a/src/ccstruct/pdblock.cpp b/src/ccstruct/pdblock.cpp index 490ad634..fa4b9870 100644 --- a/src/ccstruct/pdblock.cpp +++ b/src/ccstruct/pdblock.cpp @@ -134,10 +134,10 @@ void PDBLK::move( // reposition block // Returns a binary Pix mask with a 1 pixel for every pixel within the // block. Rotates the coordinate system by rerotation prior to rendering. -Pix *PDBLK::render_mask(const FCOORD &rerotation, TBOX *mask_box) { +Image PDBLK::render_mask(const FCOORD &rerotation, TBOX *mask_box) { TBOX rotated_box(box); rotated_box.rotate(rerotation); - Pix *pix = pixCreate(rotated_box.width(), rotated_box.height(), 1); + Image pix = pixCreate(rotated_box.width(), rotated_box.height(), 1); if (hand_poly != nullptr) { // We are going to rotate, so get a deep copy of the points and // make a new POLY_BLOCK with it. diff --git a/src/ccstruct/pdblock.h b/src/ccstruct/pdblock.h index e33e0f89..b27fa1f9 100644 --- a/src/ccstruct/pdblock.h +++ b/src/ccstruct/pdblock.h @@ -91,7 +91,7 @@ public: // block. Rotates the coordinate system by rerotation prior to rendering. // If not nullptr, mask_box is filled with the position box of the returned // mask image. - Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box); + Image render_mask(const FCOORD &rerotation, TBOX *mask_box); #ifndef GRAPHICS_DISABLED /// draw histogram diff --git a/src/ccstruct/quspline.cpp b/src/ccstruct/quspline.cpp index bd507729..71698bc0 100644 --- a/src/ccstruct/quspline.cpp +++ b/src/ccstruct/quspline.cpp @@ -362,7 +362,7 @@ void QSPLINE::plot( // draw it } #endif -void QSPLINE::plot(Pix *pix) const { +void QSPLINE::plot(Image pix) const { if (pix == nullptr) { return; } diff --git a/src/ccstruct/quspline.h b/src/ccstruct/quspline.h index 7f7a5761..7ccab2cc 100644 --- a/src/ccstruct/quspline.h +++ b/src/ccstruct/quspline.h @@ -82,7 +82,7 @@ public: // Paint the baseline over pix. If pix has depth of 32, then the line will // be painted in red. Otherwise it will be painted in black. - void plot(Pix *pix) const; + void plot(Image pix) const; QSPLINE &operator=(const QSPLINE &source); // from this diff --git a/src/ccstruct/stepblob.cpp b/src/ccstruct/stepblob.cpp index 6ceaacd8..13051337 100644 --- a/src/ccstruct/stepblob.cpp +++ b/src/ccstruct/stepblob.cpp @@ -388,7 +388,7 @@ void C_BLOB::rotate(const FCOORD &rotation) { // Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the // outline list and its children. -static void ComputeEdgeOffsetsOutlineList(int threshold, Pix *pix, C_OUTLINE_LIST *list) { +static void ComputeEdgeOffsetsOutlineList(int threshold, Image pix, C_OUTLINE_LIST *list) { C_OUTLINE_IT it(list); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { C_OUTLINE *outline = it.data(); @@ -405,7 +405,7 @@ static void ComputeEdgeOffsetsOutlineList(int threshold, Pix *pix, C_OUTLINE_LIS // Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale // if the supplied pix is 8-bit or the binary edges if nullptr. -void C_BLOB::ComputeEdgeOffsets(int threshold, Pix *pix) { +void C_BLOB::ComputeEdgeOffsets(int threshold, Image pix) { ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines); } @@ -491,7 +491,7 @@ int16_t C_BLOB::EstimateBaselinePosition() { return best_min == box.top() ? bottom : best_min; } -static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Pix *pix) { +static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Image pix) { C_OUTLINE_IT it(list); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { C_OUTLINE *outline = it.data(); @@ -502,7 +502,7 @@ static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Pix *pi } } -static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top, Pix *pix) { +static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top, Image pix) { C_OUTLINE_IT it(list); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { C_OUTLINE *outline = it.data(); @@ -511,18 +511,18 @@ static void render_outline_list_outline(C_OUTLINE_LIST *list, int left, int top, } // Returns a Pix rendering of the blob. pixDestroy after use. -Pix *C_BLOB::render() { +Image C_BLOB::render() { TBOX box = bounding_box(); - Pix *pix = pixCreate(box.width(), box.height(), 1); + Image pix = pixCreate(box.width(), box.height(), 1); render_outline_list(&outlines, box.left(), box.top(), pix); return pix; } // Returns a Pix rendering of the outline of the blob. (no fill). // pixDestroy after use. -Pix *C_BLOB::render_outline() { +Image C_BLOB::render_outline() { TBOX box = bounding_box(); - Pix *pix = pixCreate(box.width(), box.height(), 1); + Image pix = pixCreate(box.width(), box.height(), 1); render_outline_list_outline(&outlines, box.left(), box.top(), pix); return pix; } diff --git a/src/ccstruct/stepblob.h b/src/ccstruct/stepblob.h index 5fe4f35b..08aa8a4a 100644 --- a/src/ccstruct/stepblob.h +++ b/src/ccstruct/stepblob.h @@ -83,17 +83,17 @@ public: // Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale // if the supplied pix is 8-bit or the binary edges if nullptr. - void ComputeEdgeOffsets(int threshold, Pix *pix); + void ComputeEdgeOffsets(int threshold, Image pix); // Estimates and returns the baseline position based on the shape of the // outlines. int16_t EstimateBaselinePosition(); // Returns a Pix rendering of the blob. pixDestroy after use. - Pix *render(); + Image render(); // Returns a Pix rendering of the outline of the blob. (no fill). // pixDestroy after use. - Pix *render_outline(); + Image render_outline(); #ifndef GRAPHICS_DISABLED void plot( // draw one diff --git a/src/classify/shapeclassifier.cpp b/src/classify/shapeclassifier.cpp index b180013c..0683de35 100644 --- a/src/classify/shapeclassifier.cpp +++ b/src/classify/shapeclassifier.cpp @@ -36,7 +36,7 @@ namespace tesseract { // Classifies the given [training] sample, writing to results. // See shapeclassifier.h for a full description. // Default implementation calls the ShapeRating version. -int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, +int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug, UNICHAR_ID keep_this, std::vector *results) { results->clear(); @@ -54,7 +54,7 @@ int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Pix *pa // Classifies the given [training] sample, writing to results. // See shapeclassifier.h for a full description. // Default implementation aborts. -int ShapeClassifier::ClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, +int ShapeClassifier::ClassifySample(const TrainingSample &sample, Image page_pix, int debug, int keep_this, std::vector *results) { ASSERT_HOST("Must implement ClassifySample!" == nullptr); return 0; @@ -64,7 +64,7 @@ int ShapeClassifier::ClassifySample(const TrainingSample &sample, Pix *page_pix, // If result is not nullptr, it is set with the shape_id and rating. // Does not need to be overridden if ClassifySample respects the keep_this // rule. -int ShapeClassifier::BestShapeForUnichar(const TrainingSample &sample, Pix *page_pix, +int ShapeClassifier::BestShapeForUnichar(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id, ShapeRating *result) { std::vector results; const ShapeTable *shapes = GetShapeTable(); @@ -93,7 +93,7 @@ const UNICHARSET &ShapeClassifier::GetUnicharset() const { // the user has finished with debugging the sample. // Probably doesn't need to be overridden if the subclass provides // DisplayClassifyAs. -void ShapeClassifier::DebugDisplay(const TrainingSample &sample, Pix *page_pix, +void ShapeClassifier::DebugDisplay(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id) { static ScrollView *terminator = nullptr; if (terminator == nullptr) { @@ -159,7 +159,7 @@ void ShapeClassifier::DebugDisplay(const TrainingSample &sample, Pix *page_pix, // windows to the windows output and returns a new index that may be used // by any subsequent classifiers. Caller waits for the user to view and // then destroys the windows by clearing the vector. -int ShapeClassifier::DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix, +int ShapeClassifier::DisplayClassifyAs(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id, int index, std::vector &windows) { // Does nothing in the default implementation. diff --git a/src/classify/shapeclassifier.h b/src/classify/shapeclassifier.h index 8728e45b..63988b2d 100644 --- a/src/classify/shapeclassifier.h +++ b/src/classify/shapeclassifier.h @@ -20,7 +20,10 @@ #ifndef TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_ #define TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_ +#include "image.h" + #include + #include struct Pix; @@ -61,11 +64,11 @@ public: // classifiers. // NOTE: Neither overload of ClassifySample is pure, but at least one must // be overridden by a classifier in order for it to do anything. - virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, + virtual int UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug, UNICHAR_ID keep_this, std::vector *results); protected: - virtual int ClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, + virtual int ClassifySample(const TrainingSample &sample, Image page_pix, int debug, UNICHAR_ID keep_this, std::vector *results); public: @@ -74,7 +77,7 @@ public: // Returns -1 if ClassifySample fails to provide any result containing // unichar_id. BestShapeForUnichar does not need to be overridden if // ClassifySample respects the keep_this rule. - virtual int BestShapeForUnichar(const TrainingSample &sample, Pix *page_pix, + virtual int BestShapeForUnichar(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id, ShapeRating *result); // Provides access to the ShapeTable that this classifier works with. @@ -88,14 +91,14 @@ public: // the user has finished with debugging the sample. // Probably doesn't need to be overridden if the subclass provides // DisplayClassifyAs. - void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id); + void DebugDisplay(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id); // Displays classification as the given unichar_id. Creates as many windows // as it feels fit, using index as a guide for placement. Adds any created // windows to the windows output and returns a new index that may be used // by any subsequent classifiers. Caller waits for the user to view and // then destroys the windows by clearing the vector. - virtual int DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id, + virtual int DisplayClassifyAs(const TrainingSample &sample, Image page_pix, UNICHAR_ID unichar_id, int index, std::vector &windows); // Prints debug information on the results. context is some introductory/title diff --git a/src/classify/tessclassifier.cpp b/src/classify/tessclassifier.cpp index 2aafb5ef..6bdd8e70 100644 --- a/src/classify/tessclassifier.cpp +++ b/src/classify/tessclassifier.cpp @@ -25,7 +25,7 @@ namespace tesseract { // Classifies the given [training] sample, writing to results. // See ShapeClassifier for a full description. -int TessClassifier::UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, +int TessClassifier::UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug, UNICHAR_ID keep_this, std::vector *results) { const int old_matcher_level = classify_->matcher_debug_level; @@ -62,7 +62,7 @@ const UNICHARSET &TessClassifier::GetUnicharset() const { // windows to the windows output and returns a new index that may be used // by any subsequent classifiers. Caller waits for the user to view and // then destroys the windows by clearing the vector. -int TessClassifier::DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix, int unichar_id, +int TessClassifier::DisplayClassifyAs(const TrainingSample &sample, Image page_pix, int unichar_id, int index, std::vector &windows) { int shape_id = unichar_id; // TODO(rays) Fix this so it works with both flat and real shapetables. diff --git a/src/classify/tessclassifier.h b/src/classify/tessclassifier.h index bf3dfeda..e72d1841 100644 --- a/src/classify/tessclassifier.h +++ b/src/classify/tessclassifier.h @@ -40,7 +40,7 @@ public: // Classifies the given [training] sample, writing to results. // See ShapeClassifier for a full description. - int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, + int UnicharClassifySample(const TrainingSample &sample, Image page_pix, int debug, UNICHAR_ID keep_this, std::vector *results) override; // Provides access to the ShapeTable that this classifier works with. const ShapeTable *GetShapeTable() const override; @@ -53,7 +53,7 @@ public: // windows to the windows output and returns a new index that may be used // by any subsequent classifiers. Caller waits for the user to view and // then destroys the windows by clearing the vector. - int DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix, int unichar_id, int index, + int DisplayClassifyAs(const TrainingSample &sample, Image page_pix, int unichar_id, int index, std::vector &windows) override; private: diff --git a/src/classify/trainingsample.cpp b/src/classify/trainingsample.cpp index 2e5cadd8..d4307a2e 100644 --- a/src/classify/trainingsample.cpp +++ b/src/classify/trainingsample.cpp @@ -305,8 +305,8 @@ void TrainingSample::IndexFeatures(const IntFeatureSpace &feature_space) { } // Returns a pix representing the sample. (Int features only.) -Pix *TrainingSample::RenderToPix(const UNICHARSET *unicharset) const { - Pix *pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1); +Image TrainingSample::RenderToPix(const UNICHARSET *unicharset) const { + Image pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1); for (uint32_t f = 0; f < num_features_; ++f) { int start_x = features_[f].X; int start_y = kIntFeatureExtent - features_[f].Y; @@ -341,7 +341,7 @@ void TrainingSample::DisplayFeatures(ScrollView::Color color, ScrollView *window // by padding wherever possible. // The returned Pix must be pixDestroyed after use. // If the input page_pix is nullptr, nullptr is returned. -Pix *TrainingSample::GetSamplePix(int padding, Pix *page_pix) const { +Image TrainingSample::GetSamplePix(int padding, Image page_pix) const { if (page_pix == nullptr) { return nullptr; } @@ -354,7 +354,7 @@ Pix *TrainingSample::GetSamplePix(int padding, Pix *page_pix) const { padded_box &= page_box; Box *box = boxCreate(page_box.left(), page_height - page_box.top(), page_box.width(), page_box.height()); - Pix *sample_pix = pixClipRectangle(page_pix, box, nullptr); + Image sample_pix = pixClipRectangle(page_pix, box, nullptr); boxDestroy(&box); return sample_pix; } diff --git a/src/classify/trainingsample.h b/src/classify/trainingsample.h index c823da20..a833617d 100644 --- a/src/classify/trainingsample.h +++ b/src/classify/trainingsample.h @@ -104,7 +104,7 @@ public: void IndexFeatures(const IntFeatureSpace &feature_space); // Returns a pix representing the sample. (Int features only.) - Pix *RenderToPix(const UNICHARSET *unicharset) const; + Image RenderToPix(const UNICHARSET *unicharset) const; // Displays the features in the given window with the given color. void DisplayFeatures(ScrollView::Color color, ScrollView *window) const; @@ -112,7 +112,7 @@ public: // by padding wherever possible. // The returned Pix must be pixDestroyed after use. // If the input page_pix is nullptr, nullptr is returned. - Pix *GetSamplePix(int padding, Pix *page_pix) const; + Image GetSamplePix(int padding, Image page_pix) const; // Accessors. UNICHAR_ID class_id() const { diff --git a/src/lstm/input.cpp b/src/lstm/input.cpp index 54bab4c4..cc69c366 100644 --- a/src/lstm/input.cpp +++ b/src/lstm/input.cpp @@ -78,12 +78,12 @@ bool Input::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *sc // image_data. If non-null, *image_scale returns the image scale factor used. // Returns nullptr on error. /* static */ -Pix *Input::PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width, +Image Input::PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width, TRand *randomizer, float *image_scale) { // Note that NumInputs() is defined as input image height. int target_height = network->NumInputs(); int width, height; - Pix *pix = + Image pix = image_data.PreScale(target_height, kMaxInputHeight, image_scale, &width, &height, nullptr); if (pix == nullptr) { tprintf("Bad pix from ImageData!\n"); @@ -91,7 +91,7 @@ Pix *Input::PrepareLSTMInputs(const ImageData &image_data, const Network *networ } if (width < min_width || height < min_width) { tprintf("Image too small to scale!! (%dx%d vs min width of %d)\n", width, height, min_width); - pixDestroy(&pix); + pix.destroy(); return nullptr; } return pix; @@ -104,12 +104,12 @@ Pix *Input::PrepareLSTMInputs(const ImageData &image_data, const Network *networ // height == 1. If height == 0 then no scaling. // NOTE: It isn't safe for multiple threads to call this on the same pix. /* static */ -void Input::PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer, +void Input::PreparePixInput(const StaticShape &shape, const Image pix, TRand *randomizer, NetworkIO *input) { bool color = shape.depth() == 3; - Pix *var_pix = const_cast(pix); + Image var_pix = pix; int depth = pixGetDepth(var_pix); - Pix *normed_pix = nullptr; + Image normed_pix = nullptr; // On input to BaseAPI, an image is forced to be 1, 8 or 24 bit, without // colormap, so we just have to deal with depth conversion here. if (color) { @@ -135,12 +135,12 @@ void Input::PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *ran if (target_height != 0 && target_height != height) { // Get the scaled image. float im_factor = static_cast(target_height) / height; - Pix *scaled_pix = pixScale(normed_pix, im_factor, im_factor); - pixDestroy(&normed_pix); + Image scaled_pix = pixScale(normed_pix, im_factor, im_factor); + normed_pix.destroy(); normed_pix = scaled_pix; } input->FromPix(shape, normed_pix, randomizer); - pixDestroy(&normed_pix); + normed_pix.destroy(); } } // namespace tesseract. diff --git a/src/lstm/input.h b/src/lstm/input.h index 2efd4861..fb6131fb 100644 --- a/src/lstm/input.h +++ b/src/lstm/input.h @@ -77,7 +77,7 @@ public: // image_data. If non-null, *image_scale returns the image scale factor used. // Returns nullptr on error. /* static */ - static Pix *PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width, + static Image PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width, TRand *randomizer, float *image_scale); // Converts the given pix to a NetworkIO of height and depth appropriate to // the given StaticShape: @@ -85,7 +85,7 @@ public: // Scale to target height, if the shape's height is > 1, or its depth if the // height == 1. If height == 0 then no scaling. // NOTE: It isn't safe for multiple threads to call this on the same pix. - static void PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer, + static void PreparePixInput(const StaticShape &shape, const Image pix, TRand *randomizer, NetworkIO *input); private: diff --git a/src/lstm/lstmrecognizer.cpp b/src/lstm/lstmrecognizer.cpp index a60b6920..0de0af4b 100644 --- a/src/lstm/lstmrecognizer.cpp +++ b/src/lstm/lstmrecognizer.cpp @@ -321,7 +321,7 @@ bool LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo // This ensures consistent recognition results. SetRandomSeed(); int min_width = network_->XScaleFactor(); - Pix *pix = Input::PrepareLSTMInputs(image_data, network_, min_width, &randomizer_, scale_factor); + Image pix = Input::PrepareLSTMInputs(image_data, network_, min_width, &randomizer_, scale_factor); if (pix == nullptr) { tprintf("Line cannot be recognized!!\n"); return false; @@ -330,7 +330,7 @@ bool LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo const int kMaxImageWidth = 128 * pixGetHeight(pix); if (network_->IsTraining() && pixGetWidth(pix) > kMaxImageWidth) { tprintf("Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix), pixGetHeight(pix)); - pixDestroy(&pix); + pix.destroy(); return false; } if (upside_down) { @@ -370,7 +370,7 @@ bool LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo network_->Forward(debug, *inputs, nullptr, &scratch_space_, outputs); } } - pixDestroy(&pix); + pix.destroy(); if (debug) { std::vector labels, coords; LabelsFromOutputs(*outputs, &labels, &coords); @@ -404,7 +404,7 @@ std::string LSTMRecognizer::DecodeLabels(const std::vector &labels) { void LSTMRecognizer::DisplayForward(const NetworkIO &inputs, const std::vector &labels, const std::vector &label_coords, const char *window_name, ScrollView **window) { - Pix *input_pix = inputs.ToPix(); + Image input_pix = inputs.ToPix(); Network::ClearWindow(false, window_name, pixGetWidth(input_pix), pixGetHeight(input_pix), window); int line_height = Network::DisplayImage(input_pix, *window); DisplayLSTMOutput(labels, label_coords, line_height, *window); diff --git a/src/lstm/network.cpp b/src/lstm/network.cpp index 780f0cb5..a3552d21 100644 --- a/src/lstm/network.cpp +++ b/src/lstm/network.cpp @@ -327,7 +327,7 @@ double Network::Random(double range) { // === Debug image display methods. === // Displays the image of the matrix to the forward window. void Network::DisplayForward(const NetworkIO &matrix) { - Pix *image = matrix.ToPix(); + Image image = matrix.ToPix(); ClearWindow(false, name_.c_str(), pixGetWidth(image), pixGetHeight(image), &forward_win_); DisplayImage(image, forward_win_); forward_win_->Update(); @@ -335,7 +335,7 @@ void Network::DisplayForward(const NetworkIO &matrix) { // Displays the image of the matrix to the backward window. void Network::DisplayBackward(const NetworkIO &matrix) { - Pix *image = matrix.ToPix(); + Image image = matrix.ToPix(); std::string window_name = name_ + "-back"; ClearWindow(false, window_name.c_str(), pixGetWidth(image), pixGetHeight(image), &backward_win_); DisplayImage(image, backward_win_); @@ -371,10 +371,10 @@ void Network::ClearWindow(bool tess_coords, const char *window_name, int width, // Displays the pix in the given window. and returns the height of the pix. // The pix is pixDestroyed. -int Network::DisplayImage(Pix *pix, ScrollView *window) { +int Network::DisplayImage(Image pix, ScrollView *window) { int height = pixGetHeight(pix); window->Image(pix, 0, 0); - pixDestroy(&pix); + pix.destroy(); return height; } #endif // !GRAPHICS_DISABLED diff --git a/src/lstm/network.h b/src/lstm/network.h index d218bebd..4faac88d 100644 --- a/src/lstm/network.h +++ b/src/lstm/network.h @@ -283,7 +283,7 @@ public: // Displays the pix in the given window. and returns the height of the pix. // The pix is pixDestroyed. - static int DisplayImage(Pix *pix, ScrollView *window); + static int DisplayImage(Image pix, ScrollView *window); protected: // Returns a random number in [-range, range]. diff --git a/src/lstm/networkio.cpp b/src/lstm/networkio.cpp index 34d3dc6e..72f33efc 100644 --- a/src/lstm/networkio.cpp +++ b/src/lstm/networkio.cpp @@ -123,7 +123,7 @@ void NetworkIO::ZeroInvalidElements() { // of text, so a horizontal line through the middle of the image passes through // at least some of it, so local minima and maxima are a good proxy for black // and white pixel samples. -static void ComputeBlackWhite(Pix *pix, float *black, float *white) { +static void ComputeBlackWhite(Image pix, float *black, float *white) { int width = pixGetWidth(pix); int height = pixGetHeight(pix); STATS mins(0, 256), maxes(0, 256); @@ -159,21 +159,21 @@ static void ComputeBlackWhite(Pix *pix, float *black, float *white) { // Sets up the array from the given image, using the currently set int_mode_. // If the image width doesn't match the shape, the image is truncated or padded // with noise to match. -void NetworkIO::FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer) { - std::vector pixes(1, pix); +void NetworkIO::FromPix(const StaticShape &shape, const Image pix, TRand *randomizer) { + std::vector pixes(1, pix); FromPixes(shape, pixes, randomizer); } // Sets up the array from the given set of images, using the currently set // int_mode_. If the image width doesn't match the shape, the images are // truncated or padded with noise to match. -void NetworkIO::FromPixes(const StaticShape &shape, const std::vector &pixes, +void NetworkIO::FromPixes(const StaticShape &shape, const std::vector &pixes, TRand *randomizer) { int target_height = shape.height(); int target_width = shape.width(); std::vector> h_w_pairs; for (auto pix : pixes) { - Pix *var_pix = const_cast(pix); + Image var_pix = pix; int width = pixGetWidth(var_pix); if (target_width != 0) { width = target_width; @@ -188,7 +188,7 @@ void NetworkIO::FromPixes(const StaticShape &shape, const std::vector(pixes[b]); + Image pix = pixes[b]; float black = 0.0f, white = 255.0f; if (shape.depth() != 3) { ComputeBlackWhite(pix, &black, &white); @@ -212,7 +212,7 @@ void NetworkIO::FromPixes(const StaticShape &shape, const std::vector -#include -#include - #include "helpers.h" +#include "image.h" #include "static_shape.h" #include "stridemap.h" #include "weightmatrix.h" +#include +#include +#include + struct Pix; namespace tesseract { @@ -66,11 +67,11 @@ public: // Sets up the array from the given image, using the currently set int_mode_. // If the image width doesn't match the shape, the image is truncated or // padded with noise to match. - void FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer); + void FromPix(const StaticShape &shape, const Image pix, TRand *randomizer); // Sets up the array from the given set of images, using the currently set // int_mode_. If the image width doesn't match the shape, the images are // truncated or padded with noise to match. - void FromPixes(const StaticShape &shape, const std::vector &pixes, + void FromPixes(const StaticShape &shape, const std::vector &pixes, TRand *randomizer); // Copies the given pix to *this at the given batch index, stretching and // clipping the pixel values so that [black, black + 2*contrast] maps to the @@ -79,12 +80,12 @@ public: // of input channels, the height is the height of the image, and the width // is the width of the image, or truncated/padded with noise if the width // is a fixed size. - void Copy2DImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer); + void Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer); // Copies the given pix to *this at the given batch index, as Copy2DImage // above, except that the output depth is the height of the input image, the // output height is 1, and the output width as for Copy2DImage. // The image is thus treated as a 1-d set of vertical pixel strips. - void Copy1DGreyImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer); + void Copy1DGreyImage(int batch, Image pix, float black, float contrast, TRand *randomizer); // Helper stores the pixel value in i_ or f_ according to int_mode_. // t: is the index from the StrideMap corresponding to the current // [batch,y,x] position @@ -94,7 +95,7 @@ public: // contrast: the range of pixel values to stretch to half the range of *this. void SetPixel(int t, int f, int pixel, float black, float contrast); // Converts the array to a Pix. Must be pixDestroyed after use. - Pix *ToPix() const; + Image ToPix() const; // Prints the first and last num timesteps of the array for each feature. void Print(int num) const; diff --git a/src/opencl/openclwrapper.cpp b/src/opencl/openclwrapper.cpp index 2311b23c..578f8f50 100644 --- a/src/opencl/openclwrapper.cpp +++ b/src/opencl/openclwrapper.cpp @@ -629,7 +629,7 @@ static cl_mem allocateZeroCopyBuffer(const KernelEnv &rEnv, l_uint32 *hostbuffer return membuffer; } -static Pix *mapOutputCLBuffer(const KernelEnv &rEnv, cl_mem clbuffer, Pix *pixd, Pix *pixs, +static Image mapOutputCLBuffer(const KernelEnv &rEnv, cl_mem clbuffer, Image pixd, Image pixs, int elements, cl_mem_flags flags, bool memcopy = false, bool sync = true) { if (!pixd) { @@ -673,7 +673,7 @@ void OpenclDevice::releaseMorphCLBuffers() { pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = nullptr; } -int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs) { +int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, Image pixs) { SetKernelEnv(&rEnv); if (pixThBuffer != nullptr) { @@ -1455,8 +1455,8 @@ static cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_me // OpenCL implementation of Get Lines from pix function // Note: Assumes the source and dest opencl buffer are initialized. No check // done -void OpenclDevice::pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline, Pix **pix_hline, - Pix **pixClosed, bool getpixClosed, l_int32 close_hsize, +void OpenclDevice::pixGetLinesCL(Image pixd, Image pixs, Image *pix_vline, Image *pix_hline, + Image *pixClosed, bool getpixClosed, l_int32 close_hsize, l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize, l_int32 line_hsize, l_int32 line_vsize) { l_uint32 wpl, h; @@ -1678,7 +1678,7 @@ int OpenclDevice::HistogramRectOCL(void *imageData, int bytes_per_pixel, int byt ************************************************************************/ int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, int bytes_per_pixel, int bytes_per_line, int *thresholds, int *hi_values, - Pix **pix, int height, int width, int top, int left) { + Image *pix, int height, int width, int top, int left) { int retVal = 0; /* create pix result buffer */ *pix = pixCreate(width, height, 1); @@ -1783,7 +1783,7 @@ struct TessScoreEvaluationInputData { int width; int numChannels; unsigned char *imageData; - Pix *pix; + Image pix; }; static void populateTessScoreEvaluationInputData(TessScoreEvaluationInputData *input) { @@ -1928,7 +1928,7 @@ static double composeRGBPixelMicroBench(GPUEnv *env, TessScoreEvaluationInputDat # else clock_gettime(CLOCK_MONOTONIC, &time_funct_start); # endif - Pix *pix = pixCreate(input.width, input.height, 32); + Image pix = pixCreate(input.width, input.height, 32); l_uint32 *pixData = pixGetData(pix); int i, j; int idx = 0; @@ -1954,7 +1954,7 @@ static double composeRGBPixelMicroBench(GPUEnv *env, TessScoreEvaluationInputDat time = (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; # endif - pixDestroy(&pix); + pix.destroy(); } return time; @@ -2044,7 +2044,7 @@ static double histogramRectMicroBench(GPUEnv *env, TessScoreEvaluationInputData // Reproducing the ThresholdRectToPix native version static void ThresholdRectToPix_Native(const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, const int *thresholds, - const int *hi_values, Pix **pix) { + const int *hi_values, Image *pix) { int top = 0; int left = 0; int width = pixGetWidth(*pix); @@ -2193,7 +2193,7 @@ static double getLineMasksMorphMicroBench(GPUEnv *env, TessScoreEvaluationInputD # endif OpenclDevice::gpuEnv = *env; OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix); - Pix *pix_vline = nullptr, *pix_hline = nullptr, *pix_closed = nullptr; + Image pix_vline = nullptr, *pix_hline = nullptr, *pix_closed = nullptr; OpenclDevice::pixGetLinesCL(nullptr, input.pix, &pix_vline, &pix_hline, &pix_closed, true, closing_brick, closing_brick, max_line_width, max_line_width, min_line_length, min_line_length); @@ -2221,16 +2221,16 @@ static double getLineMasksMorphMicroBench(GPUEnv *env, TessScoreEvaluationInputD # endif // native serial code - Pix *src_pix = input.pix; - Pix *pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); - Pix *pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width); - Pix *pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); - pixDestroy(&pix_solid); - Pix *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); - Pix *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); - pixDestroy(&pix_hline); - pixDestroy(&pix_vline); - pixDestroy(&pix_hollow); + Image src_pix = input.pix; + Image pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); + Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width); + Image pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); + pix_solid.destroy(); + Image pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); + Image pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); + pix_hline.destroy(); + pix_vline.destroy(); + pix_hollow.destroy(); # if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); diff --git a/src/opencl/openclwrapper.h b/src/opencl/openclwrapper.h index 53bfe8ac..9360b0d5 100644 --- a/src/opencl/openclwrapper.h +++ b/src/opencl/openclwrapper.h @@ -127,10 +127,10 @@ public: /* OpenCL implementations of Morphological operations*/ // Initialization of OCL buffers used in Morph operations - static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Pix *pixs); + static int initMorphCLAllocations(l_int32 wpl, l_int32 h, Image pixs); static void releaseMorphCLBuffers(); - static void pixGetLinesCL(Pix *pixd, Pix *pixs, Pix **pix_vline, Pix **pix_hline, Pix **pixClosed, + static void pixGetLinesCL(Image pixd, Image pixs, Image *pix_vline, Image *pix_hline, Image *pixClosed, bool getpixClosed, l_int32 close_hsize, l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize, l_int32 line_hsize, l_int32 line_vsize); @@ -161,7 +161,7 @@ public: int *histogramAllChannels); static int ThresholdRectToPixOCL(unsigned char *imagedata, int bytes_per_pixel, - int bytes_per_line, int *thresholds, int *hi_values, Pix **pix, + int bytes_per_line, int *thresholds, int *hi_values, Image *pix, int rect_height, int rect_width, int rect_top, int rect_left); static ds_device getDeviceSelection(); diff --git a/src/textord/baselinedetect.cpp b/src/textord/baselinedetect.cpp index 217151e6..97bbb4e2 100644 --- a/src/textord/baselinedetect.cpp +++ b/src/textord/baselinedetect.cpp @@ -608,7 +608,7 @@ void BaselineBlock::DrawFinalRows(const ICOORD &page_tr) { #endif // !GRAPHICS_DISABLED -void BaselineBlock::DrawPixSpline(Pix *pix_in) { +void BaselineBlock::DrawPixSpline(Image pix_in) { if (non_text_block_) { return; } diff --git a/src/textord/baselinedetect.h b/src/textord/baselinedetect.h index 1d905946..38721a62 100644 --- a/src/textord/baselinedetect.h +++ b/src/textord/baselinedetect.h @@ -178,7 +178,7 @@ public: void DrawFinalRows(const ICOORD &page_tr); // Render the generated spline baselines for this block on pix_in. - void DrawPixSpline(Pix *pix_in); + void DrawPixSpline(Image pix_in); private: // Top-level line-spacing calculation. Computes an estimate of the line- diff --git a/src/textord/bbgrid.cpp b/src/textord/bbgrid.cpp index c425281c..e186cc15 100644 --- a/src/textord/bbgrid.cpp +++ b/src/textord/bbgrid.cpp @@ -187,8 +187,8 @@ bool IntGrid::AnyZeroInRect(const TBOX &rect) const { // Returns a full-resolution binary pix in which each cell over the given // threshold is filled as a black square. pixDestroy after use. // Edge cells, which have a zero 4-neighbour, are not marked. -Pix *IntGrid::ThresholdToPix(int threshold) const { - Pix *pix = pixCreate(tright().x() - bleft().x(), tright().y() - bleft().y(), 1); +Image IntGrid::ThresholdToPix(int threshold) const { + Image pix = pixCreate(tright().x() - bleft().x(), tright().y() - bleft().y(), 1); int cellsize = gridsize(); for (int y = 0; y < gridheight(); ++y) { for (int x = 0; x < gridwidth(); ++x) { @@ -204,7 +204,7 @@ Pix *IntGrid::ThresholdToPix(int threshold) const { } // Make a Pix of the correct scaled size for the TraceOutline functions. -static Pix *GridReducedPix(const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) { +static Image GridReducedPix(const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom) { // Compute grid bounds of the outline and pad all round by 1. int grid_left = (box.left() - bleft.x()) / gridsize - 1; int grid_bottom = (box.bottom() - bleft.y()) / gridsize - 1; @@ -221,10 +221,10 @@ static Pix *GridReducedPix(const TBOX &box, int gridsize, ICOORD bleft, int *lef // Also returns the grid coords of the bottom-left of the Pix, in *left // and *bottom, which corresponds to (0, 0) on the Pix. // Note that the Pix is used upside-down, with (0, 0) being the bottom-left. -Pix *TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, +Image TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom) { const TBOX &box = outline->bounding_box(); - Pix *pix = GridReducedPix(box, gridsize, bleft, left, bottom); + Image pix = GridReducedPix(box, gridsize, bleft, left, bottom); int wpl = pixGetWpl(pix); l_uint32 *data = pixGetData(pix); int length = outline->pathlength(); @@ -243,13 +243,13 @@ Pix *TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, in Pix* pix = TraceOutlineOnReducedPix(ol_it.data(), gridsize_, bleft_, &grid_left, &grid_bottom); grid->InsertPixPtBBox(grid_left, grid_bottom, pix, blob); - pixDestroy(&pix); + pix.destroy(); #endif // As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE. -Pix *TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) { +Image TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom) { const TBOX &box = block->pdblk.bounding_box(); - Pix *pix = GridReducedPix(box, gridsize, bleft, left, bottom); + Image pix = GridReducedPix(box, gridsize, bleft, left, bottom); int wpl = pixGetWpl(pix); l_uint32 *data = pixGetData(pix); ICOORDELT_IT it(block->pdblk.poly_block()->points()); diff --git a/src/textord/bbgrid.h b/src/textord/bbgrid.h index d389cc0c..391d0db3 100644 --- a/src/textord/bbgrid.h +++ b/src/textord/bbgrid.h @@ -39,10 +39,10 @@ namespace tesseract { // Also returns the grid coords of the bottom-left of the Pix, in *left // and *bottom, which corresponds to (0, 0) on the Pix. // Note that the Pix is used upside-down, with (0, 0) being the bottom-left. -Pix *TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, +Image TraceOutlineOnReducedPix(C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom); // As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE. -Pix *TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom); +Image TraceBlockOnReducedPix(BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom); template class GridSearch; @@ -135,7 +135,7 @@ public: // Returns a full-resolution binary pix in which each cell over the given // threshold is filled as a black square. pixDestroy after use. - Pix *ThresholdToPix(int threshold) const; + Image ThresholdToPix(int threshold) const; private: int *grid_; // 2-d array of ints. @@ -190,7 +190,7 @@ public: // grid (in grid coords), and the pix works up the grid from there. // WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call // RepositionIterator() on any GridSearches that are active on this grid. - void InsertPixPtBBox(int left, int bottom, Pix *pix, BBC *bbox); + void InsertPixPtBBox(int left, int bottom, Image pix, BBC *bbox); // Remove the bbox from the grid. // WARNING: Any GridSearch operating on this grid could be invalidated! @@ -559,7 +559,7 @@ void BBGrid::InsertBBox(bool h_spread, bool v_spread, // WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call // RepositionIterator() on any GridSearches that are active on this grid. template -void BBGrid::InsertPixPtBBox(int left, int bottom, Pix *pix, BBC *bbox) { +void BBGrid::InsertPixPtBBox(int left, int bottom, Image pix, BBC *bbox) { int width = pixGetWidth(pix); int height = pixGetHeight(pix); for (int y = 0; y < height; ++y) { diff --git a/src/textord/ccnontextdetect.cpp b/src/textord/ccnontextdetect.cpp index 4f5a31bd..244a38a1 100644 --- a/src/textord/ccnontextdetect.cpp +++ b/src/textord/ccnontextdetect.cpp @@ -81,7 +81,7 @@ CCNonTextDetect::~CCNonTextDetect() { // The blob_block is the usual result of connected component analysis, // holding the detected blobs. // The returned Pix should be PixDestroyed after use. -Pix *CCNonTextDetect::ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *blob_block) { +Image CCNonTextDetect::ComputeNonTextMask(bool debug, Image photo_map, TO_BLOCK *blob_block) { // Insert the smallest blobs into the grid. InsertBlobList(&blob_block->small_blobs); InsertBlobList(&blob_block->noise_blobs); @@ -102,7 +102,7 @@ Pix *CCNonTextDetect::ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *b } noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid); good_grid.Clear(); // Not needed any more. - Pix *pix = noise_density_->ThresholdToPix(max_noise_count_); + Image pix = noise_density_->ThresholdToPix(max_noise_count_); if (debug) { pixWrite("junknoisemask.png", pix, IFF_PNG); } @@ -148,7 +148,7 @@ Pix *CCNonTextDetect::ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *b // more likely non-text. // The photo_map is used to bias the decision towards non-text, rather than // supplying definite decision. -IntGrid *CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix *photo_map, BlobGrid *good_grid) { +IntGrid *CCNonTextDetect::ComputeNoiseDensity(bool debug, Image photo_map, BlobGrid *good_grid) { IntGrid *noise_counts = CountCellElements(); IntGrid *noise_density = noise_counts->NeighbourhoodSum(); IntGrid *good_counts = good_grid->CountCellElements(); @@ -235,7 +235,7 @@ static TBOX AttemptBoxExpansion(const TBOX &box, const IntGrid &noise_density, i // blobs are drawn on it in ok_color. void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_blob_overlaps, ScrollView *win, ScrollView::Color ok_color, - Pix *nontext_mask) { + Image nontext_mask) { int imageheight = tright().y() - bleft().x(); BLOBNBOX_IT blob_it(blobs); BLOBNBOX_LIST dead_blobs; @@ -255,10 +255,10 @@ void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_bl if (noise_density_->AnyZeroInRect(box)) { // There is a danger that the bounding box may overlap real text, so // we need to render the outline. - Pix *blob_pix = blob->cblob()->render_outline(); + Image blob_pix = blob->cblob()->render_outline(); pixRasterop(nontext_mask, box.left(), imageheight - box.top(), box.width(), box.height(), PIX_SRC | PIX_DST, blob_pix, 0, 0); - pixDestroy(&blob_pix); + blob_pix.destroy(); } else { if (box.area() < gridsize() * gridsize()) { // It is a really bad idea to make lots of small components in the diff --git a/src/textord/ccnontextdetect.h b/src/textord/ccnontextdetect.h index f71e22fd..ecdb5788 100644 --- a/src/textord/ccnontextdetect.h +++ b/src/textord/ccnontextdetect.h @@ -42,7 +42,7 @@ public: // The blob_block is the usual result of connected component analysis, // holding the detected blobs. // The returned Pix should be PixDestroyed after use. - Pix *ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *blob_block); + Image ComputeNonTextMask(bool debug, Image photo_map, TO_BLOCK *blob_block); private: // Computes and returns the noise_density IntGrid, at the same gridsize as @@ -52,7 +52,7 @@ private: // more likely non-text. // The photo_map is used to bias the decision towards non-text, rather than // supplying definite decision. - IntGrid *ComputeNoiseDensity(bool debug, Pix *photo_map, BlobGrid *good_grid); + IntGrid *ComputeNoiseDensity(bool debug, Image photo_map, BlobGrid *good_grid); // Tests each blob in the list to see if it is certain non-text using 2 // conditions: @@ -68,7 +68,7 @@ private: // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. // If the win is not nullptr, deleted blobs are drawn on it in red, and kept void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST *blobs, int max_blob_overlaps, ScrollView *win, - ScrollView::Color ok_color, Pix *nontext_mask); + ScrollView::Color ok_color, Image nontext_mask); // Returns true if the given blob overlaps more than max_overlaps blobs // in the current grid. bool BlobOverlapsTooMuch(BLOBNBOX *blob, int max_overlaps); diff --git a/src/textord/colfind.cpp b/src/textord/colfind.cpp index 34ee7fb2..e984bf02 100644 --- a/src/textord/colfind.cpp +++ b/src/textord/colfind.cpp @@ -108,7 +108,7 @@ ColumnFinder::~ColumnFinder() { delete[] best_columns_; delete stroke_width_; delete input_blobs_win_; - pixDestroy(&nontext_map_); + nontext_map_.destroy(); while (denorm_ != nullptr) { DENORM *dead_denorm = denorm_; denorm_ = const_cast(denorm_->predecessor()); @@ -148,7 +148,7 @@ ColumnFinder::~ColumnFinder() { // direction, so the textline projection_ map can be setup. // On return, IsVerticallyAlignedText may be called (now optionally) to // determine the gross textline alignment of the page. -void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask_pix, +void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, Image photo_mask_pix, TO_BLOCK *input_block) { part_grid_.Init(gridsize(), bleft(), tright()); delete stroke_width_; @@ -162,7 +162,7 @@ void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask } #endif // !GRAPHICS_DISABLED SetBlockRuleEdges(input_block); - pixDestroy(&nontext_map_); + nontext_map_.destroy(); // Run a preliminary strokewidth neighbour detection on the medium blobs. stroke_width_->SetNeighboursOnMediumBlobs(input_block); CCNonTextDetect nontext_detect(gridsize(), bleft(), tright()); @@ -283,9 +283,9 @@ void ColumnFinder::CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines, // noise/diacriticness determined via classification. // Returns -1 if the user hits the 'd' key in the blocks window while running // in debug mode, which requests a retry with more debug info. -int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor, - TO_BLOCK *input_block, Pix *photo_mask_pix, Pix *thresholds_pix, - Pix *grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, +int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, + TO_BLOCK *input_block, Image photo_mask_pix, Image thresholds_pix, + Image grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks) { pixOr(photo_mask_pix, photo_mask_pix, nontext_map_); stroke_width_->FindLeaderPartitions(input_block, &part_grid_); diff --git a/src/textord/colfind.h b/src/textord/colfind.h index 2aa14cd5..044b5216 100644 --- a/src/textord/colfind.h +++ b/src/textord/colfind.h @@ -107,7 +107,7 @@ public: // direction, so the textline projection_ map can be setup. // On return, IsVerticallyAlignedText may be called (now optionally) to // determine the gross textline alignment of the page. - void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask_pix, TO_BLOCK *input_block); + void SetupAndFilterNoise(PageSegMode pageseg_mode, Image photo_mask_pix, TO_BLOCK *input_block); // Tests for vertical alignment of text (returning true if so), and generates // a list of blobs (in osd_blobs) for orientation and script detection. @@ -156,8 +156,8 @@ public: // appropriate word after the rest of layout analysis. // Returns -1 if the user hits the 'd' key in the blocks window while running // in debug mode, which requests a retry with more debug info. - int FindBlocks(PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor, TO_BLOCK *block, - Pix *photo_mask_pix, Pix *thresholds_pix, Pix *grey_pix, DebugPixa *pixa_debug, + int FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, TO_BLOCK *block, + Image photo_mask_pix, Image thresholds_pix, Image grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks); // Get the rotation required to deskew, and its inverse rotation. @@ -330,7 +330,7 @@ private: // Horizontal line separators. TabVector_LIST horizontal_lines_; // Image map of photo/noise areas on the page. - Pix *nontext_map_; + Image nontext_map_; // Textline projection map. TextlineProjection projection_; // Sequence of DENORMS that indicate how to get back to the original image diff --git a/src/textord/colpartitiongrid.cpp b/src/textord/colpartitiongrid.cpp index 7697c88e..a2b62239 100644 --- a/src/textord/colpartitiongrid.cpp +++ b/src/textord/colpartitiongrid.cpp @@ -608,7 +608,7 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts) // nontext_map, which is used to prevent the spread of text neighbourhoods // into images. // Returns true if anything was changed. -bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, Pix *nontext_map, +bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map, const TBOX &im_box, const FCOORD &rotation) { // Iterate the ColPartitions in the grid. ColPartitionGridSearch gsearch(this); @@ -1392,7 +1392,7 @@ void ColPartitionGrid::FindMergeCandidates(const ColPartition *part, const TBOX // nontext_map, which is used to prevent the spread of text neighbourhoods // into images. // Returns true if the partition was changed. -bool ColPartitionGrid::SmoothRegionType(Pix *nontext_map, const TBOX &im_box, +bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug, ColPartition *part) { const TBOX &part_box = part->bounding_box(); if (debug) { @@ -1511,7 +1511,7 @@ enum NeighbourPartitionType { // partitions that makes a decisive result (if any) and returns the type // and the distance of the collection. If there are any pixels in the // nontext_map, then the decision is biased towards image. -BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction, Pix *nontext_map, +BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction, Image nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug, const ColPartition &part, int *best_distance) { @@ -1594,7 +1594,7 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction // dists must be an array of vectors of size NPT_COUNT. void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part, const ICOORD &dist_scaling, const TBOX &search_box, - Pix *nontext_map, const TBOX &im_box, + Image nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug, std::vector *dists) { const TBOX &part_box = base_part.bounding_box(); diff --git a/src/textord/colpartitiongrid.h b/src/textord/colpartitiongrid.h index f147827d..e2b71c7b 100644 --- a/src/textord/colpartitiongrid.h +++ b/src/textord/colpartitiongrid.h @@ -98,7 +98,7 @@ public: // nontext_map, which is used to prevent the spread of text neighbourhoods // into images. // Returns true if anything was changed. - bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix *nontext_map, const TBOX &im_box, + bool GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map, const TBOX &im_box, const FCOORD &rerotation); // Reflects the grid and its colpartitions in the y-axis, assuming that @@ -199,7 +199,7 @@ private: // nontext_map, which is used to prevent the spread of text neighbourhoods // into images. // Returns true if the partition was changed. - bool SmoothRegionType(Pix *nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug, + bool SmoothRegionType(Image nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug, ColPartition *part); // Executes the search for SmoothRegionType in a single direction. // Creates a bounding box that is padded in all directions except direction, @@ -207,7 +207,7 @@ private: // partitions that makes a decisive result (if any) and returns the type // and the distance of the collection. If there are any pixels in the // nontext_map, then the decision is biased towards image. - BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, Pix *nontext_map, + BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, Image nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug, const ColPartition &part, int *best_distance); // Counts the partitions in the given search_box by appending the gap @@ -216,7 +216,7 @@ private: // vectors in the dists array are sorted in increasing order. // dists must be an array of vectors of size NPT_COUNT. void AccumulatePartDistances(const ColPartition &base_part, const ICOORD &dist_scaling, - const TBOX &search_box, Pix *nontext_map, const TBOX &im_box, + const TBOX &search_box, Image nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug, std::vector *dists); // Improves the margins of the ColPartition by searching for diff --git a/src/textord/devanagari_processing.cpp b/src/textord/devanagari_processing.cpp index ccac8370..c4edd9c1 100644 --- a/src/textord/devanagari_processing.cpp +++ b/src/textord/devanagari_processing.cpp @@ -55,20 +55,20 @@ ShiroRekhaSplitter::~ShiroRekhaSplitter() { } void ShiroRekhaSplitter::Clear() { - pixDestroy(&orig_pix_); - pixDestroy(&splitted_image_); + orig_pix_.destroy(); + splitted_image_.destroy(); pageseg_split_strategy_ = NO_SPLIT; ocr_split_strategy_ = NO_SPLIT; - pixDestroy(&debug_image_); + debug_image_.destroy(); segmentation_block_list_ = nullptr; global_xheight_ = kUnspecifiedXheight; perform_close_ = false; } // On setting the input image, a clone of it is owned by this class. -void ShiroRekhaSplitter::set_orig_pix(Pix *pix) { +void ShiroRekhaSplitter::set_orig_pix(Image pix) { if (orig_pix_) { - pixDestroy(&orig_pix_); + orig_pix_.destroy(); } orig_pix_ = pixClone(pix); } @@ -91,32 +91,32 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) { tprintf("Initial pageseg available = %s\n", segmentation_block_list_ ? "yes" : "no"); } // Create a copy of original image to store the splitting output. - pixDestroy(&splitted_image_); + splitted_image_.destroy(); splitted_image_ = pixCopy(nullptr, orig_pix_); // Initialize debug image if required. if (devanagari_split_debugimage) { - pixDestroy(&debug_image_); + debug_image_.destroy(); debug_image_ = pixConvertTo32(orig_pix_); } // Determine all connected components in the input image. A close operation // may be required prior to this, depending on the current settings. - Pix *pix_for_ccs = pixClone(orig_pix_); + Image pix_for_ccs = pixClone(orig_pix_); if (perform_close_ && global_xheight_ != kUnspecifiedXheight && !segmentation_block_list_) { if (devanagari_split_debuglevel > 0) { tprintf("Performing a global close operation..\n"); } // A global measure is available for xheight, but no local information // exists. - pixDestroy(&pix_for_ccs); + pix_for_ccs.destroy(); pix_for_ccs = pixCopy(nullptr, orig_pix_); PerformClose(pix_for_ccs, global_xheight_); } Pixa *ccs; Boxa *tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8); boxaDestroy(&tmp_boxa); - pixDestroy(&pix_for_ccs); + pix_for_ccs.destroy(); // Iterate over all connected components. Get their bounding boxes and clip // out the image regions corresponding to these boxes from the original image. @@ -128,7 +128,7 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) { } for (int i = 0; i < num_ccs; ++i) { Box *box = ccs->boxa->box[i]; - Pix *word_pix = pixClipRectangle(orig_pix_, box, nullptr); + Image word_pix = pixClipRectangle(orig_pix_, box, nullptr); ASSERT_HOST(word_pix); int xheight = GetXheightForCC(box); if (xheight == kUnspecifiedXheight && segmentation_block_list_ && devanagari_split_debugimage) { @@ -143,7 +143,7 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) { } else if (devanagari_split_debuglevel > 0) { tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", box->x, box->y, box->w, box->h); } - pixDestroy(&word_pix); + word_pix.destroy(); } // Actually clear the boxes now. for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) { @@ -161,7 +161,7 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) { // Method to perform a close operation on the input image. The xheight // estimate decides the size of sel used. -void ShiroRekhaSplitter::PerformClose(Pix *pix, int xheight_estimate) { +void ShiroRekhaSplitter::PerformClose(Image pix, int xheight_estimate) { pixCloseBrick(pix, pix, xheight_estimate / 8, xheight_estimate / 3); } @@ -221,7 +221,7 @@ int ShiroRekhaSplitter::GetXheightForCC(Box *cc_bbox) { // leeway. The leeway depends on the input xheight, if provided, else a // conservative multiplier on approximate stroke width is used (which may lead // to over-splitting). -void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Pix *pix, int xheight, +void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left, int word_top, Boxa *regions_to_clear) { if (split_strategy == NO_SPLIT) { return; @@ -257,7 +257,7 @@ void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Pix * // Clear the ascender and descender regions of the word. // Obtain a vertical projection histogram for the resulting image. Box *box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, width, 5 * stroke_width / 3); - Pix *word_in_xheight = pixCopy(nullptr, pix); + Image word_in_xheight = pixCopy(nullptr, pix); pixClearInRect(word_in_xheight, box_to_clear); // Also clear any pixels which are below shirorekha_bottom + some leeway. // The leeway is set to xheight if the information is available, else it is a @@ -276,7 +276,7 @@ void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Pix * PixelHistogram vert_hist; vert_hist.ConstructVerticalCountHist(word_in_xheight); - pixDestroy(&word_in_xheight); + word_in_xheight.destroy(); // If the number of black pixel in any column of the image is less than a // fraction of the stroke width, treat it as noise / a stray mark. Perform @@ -385,7 +385,7 @@ Box *ShiroRekhaSplitter::GetBoxForTBOX(const TBOX &tbox) const { // This method returns the computed mode-height of blobs in the pix. // It also prunes very small blobs from calculation. -int ShiroRekhaSplitter::GetModeHeight(Pix *pix) { +int ShiroRekhaSplitter::GetModeHeight(Image pix) { Boxa *boxa = pixConnComp(pix, nullptr, 8); STATS heights(0, pixGetHeight(pix)); heights.clear(); @@ -402,7 +402,7 @@ int ShiroRekhaSplitter::GetModeHeight(Pix *pix) { // This method returns y-extents of the shiro-rekha computed from the input // word image. -void ShiroRekhaSplitter::GetShiroRekhaYExtents(Pix *word_pix, int *shirorekha_top, +void ShiroRekhaSplitter::GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, int *shirorekha_bottom, int *shirorekha_ylevel) { // Compute a histogram from projecting the word on a vertical line. PixelHistogram hist_horiz; @@ -450,7 +450,7 @@ int PixelHistogram::GetHistogramMaximum(int *count) const { } // Methods to construct histograms from images. -void PixelHistogram::ConstructVerticalCountHist(Pix *pix) { +void PixelHistogram::ConstructVerticalCountHist(Image pix) { Clear(); int width = pixGetWidth(pix); int height = pixGetHeight(pix); @@ -471,7 +471,7 @@ void PixelHistogram::ConstructVerticalCountHist(Pix *pix) { } } -void PixelHistogram::ConstructHorizontalCountHist(Pix *pix) { +void PixelHistogram::ConstructHorizontalCountHist(Image pix) { Clear(); Numa *counts = pixCountPixelsByRow(pix, nullptr); length_ = numaGetCount(counts); diff --git a/src/textord/devanagari_processing.h b/src/textord/devanagari_processing.h index d2e6f14a..bb424826 100644 --- a/src/textord/devanagari_processing.h +++ b/src/textord/devanagari_processing.h @@ -56,8 +56,8 @@ public: } // Methods to construct histograms from images. These clear any existing data. - void ConstructVerticalCountHist(Pix *pix); - void ConstructHorizontalCountHist(Pix *pix); + void ConstructVerticalCountHist(Image pix); + void ConstructHorizontalCountHist(Image pix); // This method returns the global-maxima for the histogram. The frequency of // the global maxima is returned in count, if specified. @@ -118,16 +118,16 @@ public: // Returns the image obtained from shiro-rekha splitting. The returned object // is owned by this class. Callers may want to clone the returned pix to keep // it alive beyond the life of ShiroRekhaSplitter object. - Pix *splitted_image() { + Image splitted_image() { return splitted_image_; } // On setting the input image, a clone of it is owned by this class. - void set_orig_pix(Pix *pix); + void set_orig_pix(Image pix); // Returns the input image provided to the object. This object is owned by // this class. Callers may want to clone the returned pix to work with it. - Pix *orig_pix() { + Image orig_pix() { return orig_pix_; } @@ -154,12 +154,12 @@ public: // This method returns the computed mode-height of blobs in the pix. // It also prunes very small blobs from calculation. Could be used to provide // a global xheight estimate for images which have the same point-size text. - static int GetModeHeight(Pix *pix); + static int GetModeHeight(Image pix); private: // Method to perform a close operation on the input image. The xheight // estimate decides the size of sel used. - static void PerformClose(Pix *pix, int xheight_estimate); + static void PerformClose(Image pix, int xheight_estimate); // This method resolves the cc bbox to a particular row and returns the row's // xheight. This uses block_list_ if available, else just returns the @@ -173,7 +173,7 @@ private: // conservative estimate of stroke width along with an associated multiplier // is used in its place. It is advisable to have a specified xheight when // splitting for classification/training. - void SplitWordShiroRekha(SplitStrategy split_strategy, Pix *pix, int xheight, int word_left, + void SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left, int word_top, Boxa *regions_to_clear); // Returns a new box object for the corresponding TBOX, based on the original @@ -182,15 +182,15 @@ private: // This method returns y-extents of the shiro-rekha computed from the input // word image. - static void GetShiroRekhaYExtents(Pix *word_pix, int *shirorekha_top, int *shirorekha_bottom, + static void GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, int *shirorekha_bottom, int *shirorekha_ylevel); - Pix *orig_pix_; // Just a clone of the input image passed. - Pix *splitted_image_; // Image produced after the last splitting round. The + Image orig_pix_; // Just a clone of the input image passed. + Image splitted_image_; // Image produced after the last splitting round. The // object is owned by this class. SplitStrategy pageseg_split_strategy_; SplitStrategy ocr_split_strategy_; - Pix *debug_image_; + Image debug_image_; // This block list is used as a golden segmentation when performing splitting. BLOCK_LIST *segmentation_block_list_; int global_xheight_; diff --git a/src/textord/edgblob.cpp b/src/textord/edgblob.cpp index d668c6ee..e53ae3c9 100644 --- a/src/textord/edgblob.cpp +++ b/src/textord/edgblob.cpp @@ -322,7 +322,7 @@ void OL_BUCKETS::extract_children( // recursive count * Run the edge detector over the block and return a list of blobs. */ -void extract_edges(Pix *pix, // thresholded image +void extract_edges(Image pix, // thresholded image BLOCK *block) { // block to scan C_OUTLINE_LIST outlines; // outlines in block C_OUTLINE_IT out_it = &outlines; diff --git a/src/textord/edgblob.h b/src/textord/edgblob.h index 11393a56..c8e561ba 100644 --- a/src/textord/edgblob.h +++ b/src/textord/edgblob.h @@ -76,7 +76,7 @@ private: int32_t index; // for extraction scan }; -void extract_edges(Pix *pix, // thresholded image +void extract_edges(Image pix, // thresholded image BLOCK *block); // block to scan void outlines_to_blobs( // find blobs BLOCK *block, // block to scan diff --git a/src/textord/equationdetectbase.cpp b/src/textord/equationdetectbase.cpp index 37d06472..ed406dbc 100644 --- a/src/textord/equationdetectbase.cpp +++ b/src/textord/equationdetectbase.cpp @@ -34,7 +34,7 @@ namespace tesseract { // instead of weak vtables in every compilation unit. EquationDetectBase::~EquationDetectBase() = default; -void EquationDetectBase::RenderSpecialText(Pix *pix, BLOBNBOX *blob) { +void EquationDetectBase::RenderSpecialText(Image pix, BLOBNBOX *blob) { ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32 && blob != nullptr); const TBOX &tbox = blob->bounding_box(); int height = pixGetHeight(pix); diff --git a/src/textord/equationdetectbase.h b/src/textord/equationdetectbase.h index 9bfeb7b6..4f31bdaa 100644 --- a/src/textord/equationdetectbase.h +++ b/src/textord/equationdetectbase.h @@ -20,7 +20,7 @@ #ifndef TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_ #define TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_ -#include +#include "image.h" class BLOBNBOX_LIST; class TO_BLOCK; @@ -53,7 +53,7 @@ public: // BSTT_ITALIC: green box // BSTT_UNCLEAR: blue box // All others: yellow box - static void RenderSpecialText(Pix *pix, BLOBNBOX *blob); + static void RenderSpecialText(Image pix, BLOBNBOX *blob); }; } // namespace tesseract diff --git a/src/textord/imagefind.cpp b/src/textord/imagefind.cpp index 69a911e2..67ff1a92 100644 --- a/src/textord/imagefind.cpp +++ b/src/textord/imagefind.cpp @@ -60,14 +60,14 @@ const int kNoisePadding = 4; // The returned pix may be nullptr, meaning no images found. // If not nullptr, it must be PixDestroyed by the caller. // If textord_tabfind_show_images, debug images are appended to pixa_debug. -Pix *ImageFind::FindImages(Pix *pix, DebugPixa *pixa_debug) { +Image ImageFind::FindImages(Image pix, DebugPixa *pixa_debug) { // Not worth looking at small images. if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) { return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); } // Reduce by factor 2. - Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); + Image pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); if (textord_tabfind_show_images && pixa_debug != nullptr) { pixa_debug->AddPix(pixr, "CascadeReduced"); } @@ -78,76 +78,76 @@ Pix *ImageFind::FindImages(Pix *pix, DebugPixa *pixa_debug) { // pixGenHalftoneMask(pixr, nullptr, ...) with too small image, so we // want to bypass that. if (pixGetWidth(pixr) < kMinImageFindSize || pixGetHeight(pixr) < kMinImageFindSize) { - pixDestroy(&pixr); + pixr.destroy(); return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); } // Get the halftone mask. l_int32 ht_found = 0; Pixa *pixadb = (textord_tabfind_show_images && pixa_debug != nullptr) ? pixaCreate(0) : nullptr; - Pix *pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb); + Image pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb); if (pixadb) { - Pix *pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2); + Image pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2); if (textord_tabfind_show_images && pixa_debug != nullptr) { pixa_debug->AddPix(pixdb, "HalftoneMask"); } - pixDestroy(&pixdb); + pixdb.destroy(); pixaDestroy(&pixadb); } - pixDestroy(&pixr); + pixr.destroy(); if (!ht_found && pixht2 != nullptr) { - pixDestroy(&pixht2); + pixht2.destroy(); } if (pixht2 == nullptr) { return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); } // Expand back up again. - Pix *pixht = pixExpandReplicate(pixht2, 2); + Image pixht = pixExpandReplicate(pixht2, 2); if (textord_tabfind_show_images && pixa_debug != nullptr) { pixa_debug->AddPix(pixht, "HalftoneReplicated"); } - pixDestroy(&pixht2); + pixht2.destroy(); // Fill to capture pixels near the mask edges that were missed - Pix *pixt = pixSeedfillBinary(nullptr, pixht, pix, 8); + Image pixt = pixSeedfillBinary(nullptr, pixht, pix, 8); pixOr(pixht, pixht, pixt); - pixDestroy(&pixt); + pixt.destroy(); // Eliminate lines and bars that may be joined to images. - Pix *pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3); + Image pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3); pixDilateBrick(pixfinemask, pixfinemask, 5, 5); if (textord_tabfind_show_images && pixa_debug != nullptr) { pixa_debug->AddPix(pixfinemask, "FineMask"); } - Pix *pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1); - Pix *pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0); - pixDestroy(&pixreduced); + Image pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1); + Image pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0); + pixreduced.destroy(); pixDilateBrick(pixreduced2, pixreduced2, 5, 5); - Pix *pixcoarsemask = pixExpandReplicate(pixreduced2, 8); - pixDestroy(&pixreduced2); + Image pixcoarsemask = pixExpandReplicate(pixreduced2, 8); + pixreduced2.destroy(); if (textord_tabfind_show_images && pixa_debug != nullptr) { pixa_debug->AddPix(pixcoarsemask, "CoarseMask"); } // Combine the coarse and fine image masks. pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask); - pixDestroy(&pixfinemask); + pixfinemask.destroy(); // Dilate a bit to make sure we get everything. pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3); - Pix *pixmask = pixExpandReplicate(pixcoarsemask, 16); - pixDestroy(&pixcoarsemask); + Image pixmask = pixExpandReplicate(pixcoarsemask, 16); + pixcoarsemask.destroy(); if (textord_tabfind_show_images && pixa_debug != nullptr) { pixa_debug->AddPix(pixmask, "MaskDilated"); } // And the image mask with the line and bar remover. pixAnd(pixht, pixht, pixmask); - pixDestroy(&pixmask); + pixmask.destroy(); if (textord_tabfind_show_images && pixa_debug != nullptr) { pixa_debug->AddPix(pixht, "FinalMask"); } // Make the result image the same size as the input. - Pix *result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); + Image result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); pixOr(result, result, pixht); - pixDestroy(&pixht); + pixht.destroy(); return result; } @@ -158,7 +158,7 @@ Pix *ImageFind::FindImages(Pix *pix, DebugPixa *pixa_debug) { // If not nullptr, they must be destroyed by the caller. // Resolution of pix should match the source image (Tesseract::pix_binary_) // so the output coordinate systems match. -void ImageFind::ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa **boxa, +void ImageFind::ConnCompAndRectangularize(Image pix, DebugPixa *pixa_debug, Boxa **boxa, Pixa **pixa) { *boxa = nullptr; *pixa = nullptr; @@ -177,15 +177,15 @@ void ImageFind::ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa } for (int i = 0; i < npixes; ++i) { int x_start, x_end, y_start, y_end; - Pix *img_pix = pixaGetPix(*pixa, i, L_CLONE); + Image img_pix = pixaGetPix(*pixa, i, L_CLONE); if (textord_tabfind_show_images && pixa_debug != nullptr) { pixa_debug->AddPix(img_pix, "A component"); } if (pixNearlyRectangular(img_pix, kMinRectangularFraction, kMaxRectangularFraction, kMaxRectangularGradient, &x_start, &y_start, &x_end, &y_end)) { - Pix *simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1); + Image simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1); pixSetAll(simple_pix); - pixDestroy(&img_pix); + img_pix.destroy(); // pixaReplacePix takes ownership of the simple_pix. pixaReplacePix(*pixa, i, simple_pix, nullptr); img_pix = pixaGetPix(*pixa, i, L_CLONE); @@ -195,7 +195,7 @@ void ImageFind::ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa Box *simple_box = boxCreate(x + x_start, y + y_start, x_end - x_start, y_end - y_start); boxaReplaceBox(*boxa, i, simple_box); } - pixDestroy(&img_pix); + img_pix.destroy(); } } @@ -280,7 +280,7 @@ static bool VScanForEdge(uint32_t *data, int wpl, int y_start, int y_end, int mi // On return, the rectangle is defined by x_start, y_start, x_end and y_end. // Note: the algorithm is iterative, allowing it to slice off pixels from // one edge, allowing it to then slice off more pixels from another edge. -bool ImageFind::pixNearlyRectangular(Pix *pix, double min_fraction, double max_fraction, +bool ImageFind::pixNearlyRectangular(Image pix, double min_fraction, double max_fraction, double max_skew_gradient, int *x_start, int *y_start, int *x_end, int *y_end) { ASSERT_HOST(pix != nullptr); @@ -348,7 +348,7 @@ bool ImageFind::pixNearlyRectangular(Pix *pix, double min_fraction, double max_f // are shrunk inwards until they bound any black pixels found within the // original rectangle. Returns false if the rectangle contains no black // pixels at all. -bool ImageFind::BoundsWithinRect(Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end) { +bool ImageFind::BoundsWithinRect(Image pix, int *x_start, int *y_start, int *x_end, int *y_end) { Box *input_box = boxCreate(*x_start, *y_start, *x_end - *x_start, *y_end - *y_start); Box *output_box = nullptr; pixClipBoxToForeground(pix, input_box, nullptr, &output_box); @@ -427,8 +427,8 @@ uint8_t ImageFind::ClipToByte(double pixel) { // If color_map1 is not null then it and color_map2 get rect pasted in them // with the two calculated colors, and rms map gets a pasted rect of the rms. // color_map1, color_map2 and rms_map are assumed to be the same scale as pix. -void ImageFind::ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, Pix *color_map1, - Pix *color_map2, Pix *rms_map, uint8_t *color1, +void ImageFind::ComputeRectangleColors(const TBOX &rect, Image pix, int factor, Image color_map1, + Image color_map2, Image rms_map, uint8_t *color1, uint8_t *color2) { ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32); // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more @@ -448,7 +448,7 @@ void ImageFind::ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, P } // Now crop the pix to the rectangle. Box *scaled_box = boxCreate(left_pad, height - top_pad, width_pad, height_pad); - Pix *scaled = pixClipRectangle(pix, scaled_box, nullptr); + Image scaled = pixClipRectangle(pix, scaled_box, nullptr); // Compute stats over the whole image. STATS red_stats(0, 256); @@ -538,7 +538,7 @@ void ImageFind::ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, P ComposeRGB(color2[COLOR_RED], color2[COLOR_GREEN], color2[COLOR_BLUE])); pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]); } - pixDestroy(&scaled); + scaled.destroy(); boxDestroy(&scaled_box); } @@ -585,7 +585,7 @@ void ImageFind::ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, P // horizontal. The boxes are rotated by rotation, which should undo such // rotations, before mapping them onto the pix. bool ImageFind::BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, - const FCOORD &rotation, Pix *pix) { + const FCOORD &rotation, Image pix) { TBOX search_box(box1); search_box += box2; if (box1.x_gap(box2) >= box1.y_gap(box2)) { @@ -607,7 +607,7 @@ bool ImageFind::BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TB // Returns the number of pixels in box in the pix. // rotation, pix and im_box are defined in the large comment above. int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation, - Pix *pix) { + Image pix) { // Intersect it with the image box. box &= im_box; // This is in-place box intersection. if (box.null_box()) { @@ -616,12 +616,12 @@ int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOOR box.rotate(rotation); TBOX rotated_im_box(im_box); rotated_im_box.rotate(rotation); - Pix *rect_pix = pixCreate(box.width(), box.height(), 1); + Image rect_pix = pixCreate(box.width(), box.height(), 1); pixRasterop(rect_pix, 0, 0, box.width(), box.height(), PIX_SRC, pix, box.left() - rotated_im_box.left(), rotated_im_box.top() - box.top()); l_int32 result; pixCountPixels(rect_pix, &result, nullptr); - pixDestroy(&rect_pix); + rect_pix.destroy(); return result; } @@ -630,7 +630,7 @@ int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOOR // until there is at least one black pixel in the outermost columns. // rotation, rerotation, pix and im_box are defined in the large comment above. static void AttemptToShrinkBox(const FCOORD &rotation, const FCOORD &rerotation, const TBOX &im_box, - Pix *pix, TBOX *slice) { + Image pix, TBOX *slice) { TBOX rotated_box(*slice); rotated_box.rotate(rerotation); TBOX rotated_im_box(im_box); @@ -675,7 +675,7 @@ static void AttemptToShrinkBox(const FCOORD &rotation, const FCOORD &rerotation, // In such cases, the output order may cause strange block polygons. // rotation, rerotation, pix and im_box are defined in the large comment above. static void CutChunkFromParts(const TBOX &box, const TBOX &im_box, const FCOORD &rotation, - const FCOORD &rerotation, Pix *pix, ColPartition_LIST *part_list) { + const FCOORD &rerotation, Image pix, ColPartition_LIST *part_list) { ASSERT_HOST(!part_list->empty()); ColPartition_IT part_it(part_list); do { @@ -753,7 +753,7 @@ static void CutChunkFromParts(const TBOX &box, const TBOX &im_box, const FCOORD // from a rectangle. // rotation, rerotation, pix and im_box are defined in the large comment above. static void DivideImageIntoParts(const TBOX &im_box, const FCOORD &rotation, - const FCOORD &rerotation, Pix *pix, + const FCOORD &rerotation, Image pix, ColPartitionGridSearch *rectsearch, ColPartition_LIST *part_list) { // Add the full im_box partition to the list to begin with. ColPartition *pix_part = @@ -1204,7 +1204,7 @@ static bool ScanForOverlappingText(ColPartitionGrid *part_grid, TBOX *box) { // and then deletes them. // Box coordinates are rotated by rerotate to match the image. static void MarkAndDeleteImageParts(const FCOORD &rerotate, ColPartitionGrid *part_grid, - ColPartition_LIST *image_parts, Pix *image_pix) { + ColPartition_LIST *image_parts, Image image_pix) { if (image_pix == nullptr) { return; } @@ -1236,7 +1236,7 @@ static void MarkAndDeleteImageParts(const FCOORD &rerotate, ColPartitionGrid *pa // rerotation specifies how to rotate the partition coords to match // the image_mask, since this function is used after orientation correction. void ImageFind::TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid, - Pix *image_mask) { + Image image_mask) { // Extract the noise parts from the grid and put them on a temporary list. ColPartition_LIST parts_list; ColPartition_IT part_it(&parts_list); @@ -1288,7 +1288,7 @@ static void DeleteSmallImages(ColPartitionGrid *part_grid) { // Since the other blobs in the other partitions will be owned by the block, // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this // situation and collect the image blobs. -void ImageFind::FindImagePartitions(Pix *image_pix, const FCOORD &rotation, +void ImageFind::FindImagePartitions(Image image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts) { @@ -1304,7 +1304,7 @@ void ImageFind::FindImagePartitions(Pix *image_pix, const FCOORD &rotation, for (int i = 0; i < nboxes; ++i) { l_int32 x, y, width, height; boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height); - Pix *pix = pixaGetPix(pixa, i, L_CLONE); + Image pix = pixaGetPix(pixa, i, L_CLONE); TBOX im_box(x, imageheight - y - height, x + width, imageheight - y); im_box.rotate(rotation); // Now matches all partitions and blobs. ColPartitionGridSearch rectsearch(part_grid); @@ -1315,7 +1315,7 @@ void ImageFind::FindImagePartitions(Pix *image_pix, const FCOORD &rotation, pixa_debug->AddPix(pix, "ImageComponent"); tprintf("Component has %d parts\n", part_list.length()); } - pixDestroy(&pix); + pix.destroy(); if (!part_list.empty()) { ColPartition_IT part_it(&part_list); if (part_list.singleton()) { diff --git a/src/textord/imagefind.h b/src/textord/imagefind.h index 8c7bb5a3..21de7f0d 100644 --- a/src/textord/imagefind.h +++ b/src/textord/imagefind.h @@ -47,7 +47,7 @@ public: // The returned pix may be nullptr, meaning no images found. // If not nullptr, it must be PixDestroyed by the caller. // If textord_tabfind_show_images, debug images are appended to pixa_debug. - static Pix *FindImages(Pix *pix, DebugPixa *pixa_debug); + static Image FindImages(Image pix, DebugPixa *pixa_debug); // Generates a Boxa, Pixa pair from the input binary (image mask) pix, // analogous to pixConnComp, except that connected components which are nearly @@ -56,7 +56,7 @@ public: // If not nullptr, they must be destroyed by the caller. // Resolution of pix should match the source image (Tesseract::pix_binary_) // so the output coordinate systems match. - static void ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa **boxa, Pixa **pixa); + static void ConnCompAndRectangularize(Image pix, DebugPixa *pixa_debug, Boxa **boxa, Pixa **pixa); // Returns true if there is a rectangle in the source pix, such that all // pixel rows and column slices outside of it have less than @@ -67,7 +67,7 @@ public: // On return, the rectangle is defined by x_start, y_start, x_end and y_end. // Note: the algorithm is iterative, allowing it to slice off pixels from // one edge, allowing it to then slice off more pixels from another edge. - static bool pixNearlyRectangular(Pix *pix, double min_fraction, double max_fraction, + static bool pixNearlyRectangular(Image pix, double min_fraction, double max_fraction, double max_skew_gradient, int *x_start, int *y_start, int *x_end, int *y_end); @@ -75,7 +75,7 @@ public: // are shrunk inwards until they bound any black pixels found within the // original rectangle. Returns false if the rectangle contains no black // pixels at all. - static bool BoundsWithinRect(Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end); + static bool BoundsWithinRect(Image pix, int *x_start, int *y_start, int *x_end, int *y_end); // Given a point in 3-D (RGB) space, returns the squared Euclidean distance // of the point from the given line, defined by a pair of points in the 3-D @@ -99,8 +99,8 @@ public: // If color_map1 is not null then it and color_map2 get rect pasted in them // with the two calculated colors, and rms map gets a pasted rect of the rms. // color_map1, color_map2 and rms_map are assumed to be the same scale as pix. - static void ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, Pix *color_map1, - Pix *color_map2, Pix *rms_map, uint8_t *color1, + static void ComputeRectangleColors(const TBOX &rect, Image pix, int factor, Image color_map1, + Image color_map2, Image rms_map, uint8_t *color1, uint8_t *color2); // Returns true if there are no black pixels in between the boxes. @@ -109,7 +109,7 @@ public: // horizontal. The boxes are rotated by rotation, which should undo such // rotations, before mapping them onto the pix. static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, - const FCOORD &rotation, Pix *pix); + const FCOORD &rotation, Image pix); // Returns the number of pixels in box in the pix. // The im_box must represent the bounding box of the pix in tesseract @@ -117,7 +117,7 @@ public: // horizontal. The boxes are rotated by rotation, which should undo such // rotations, before mapping them onto the pix. static int CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation, - Pix *pix); + Image pix); // Locates all the image partitions in the part_grid, that were found by a // previous call to FindImagePartitions, marks them in the image_mask, @@ -127,7 +127,7 @@ public: // rerotation specifies how to rotate the partition coords to match // the image_mask, since this function is used after orientation correction. static void TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid, - Pix *image_mask); + Image image_mask); // Runs a CC analysis on the image_pix mask image, and creates // image partitions from them, cutting out strong text, and merging with @@ -139,7 +139,7 @@ public: // Since the other blobs in the other partitions will be owned by the block, // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this // situation and collect the image blobs. - static void FindImagePartitions(Pix *image_pix, const FCOORD &rotation, const FCOORD &rerotation, + static void FindImagePartitions(Image image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts); }; diff --git a/src/textord/linefind.cpp b/src/textord/linefind.cpp index 713a2c54..795ac1f8 100644 --- a/src/textord/linefind.cpp +++ b/src/textord/linefind.cpp @@ -64,7 +64,7 @@ const double kMinMusicPixelFraction = 0.75; // Erases the unused blobs from the line_pix image, taking into account // whether this was a horizontal or vertical line set. static void RemoveUnusedLineSegments(bool horizontal_lines, BLOBNBOX_LIST *line_bblobs, - Pix *line_pix) { + Image line_pix) { int height = pixGetHeight(line_pix); BLOBNBOX_IT bbox_it(line_bblobs); for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { @@ -94,26 +94,26 @@ static void RemoveUnusedLineSegments(bool horizontal_lines, BLOBNBOX_LIST *line_ // as well by removing components that touch the line, but are not in the // non_line_pix mask. It is assumed that the non_line_pix mask has already // been prepared to required accuracy. -static void SubtractLinesAndResidue(Pix *line_pix, Pix *non_line_pix, int resolution, - Pix *src_pix) { +static void SubtractLinesAndResidue(Image line_pix, Image non_line_pix, int resolution, + Image src_pix) { // First remove the lines themselves. pixSubtract(src_pix, src_pix, line_pix); // Subtract the non-lines from the image to get the residue. - Pix *residue_pix = pixSubtract(nullptr, src_pix, non_line_pix); + Image residue_pix = pixSubtract(nullptr, src_pix, non_line_pix); // Dilate the lines so they touch the residue. - Pix *fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3); + Image fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3); // Seed fill the fat lines to get all the residue. pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8); // Subtract the residue from the original image. pixSubtract(src_pix, src_pix, fat_line_pix); - pixDestroy(&fat_line_pix); - pixDestroy(&residue_pix); + fat_line_pix.destroy(); + residue_pix.destroy(); } // Returns the maximum strokewidth in the given binary image by doubling // the maximum of the distance function. -static int MaxStrokeWidth(Pix *pix) { - Pix *dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG); +static int MaxStrokeWidth(Image pix) { + Image dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG); int width = pixGetWidth(dist_pix); int height = pixGetHeight(dist_pix); int wpl = pixGetWpl(dist_pix); @@ -129,18 +129,18 @@ static int MaxStrokeWidth(Pix *pix) { } data += wpl; } - pixDestroy(&dist_pix); + dist_pix.destroy(); return max_dist * 2; } // Returns the number of components in the intersection_pix touched by line_box. -static int NumTouchingIntersections(Box *line_box, Pix *intersection_pix) { +static int NumTouchingIntersections(Box *line_box, Image intersection_pix) { if (intersection_pix == nullptr) { return 0; } - Pix *rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr); + Image rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr); Boxa *boxa = pixConnComp(rect_pix, nullptr, 8); - pixDestroy(&rect_pix); + rect_pix.destroy(); if (boxa == nullptr) { return false; } @@ -152,7 +152,7 @@ static int NumTouchingIntersections(Box *line_box, Pix *intersection_pix) { // Returns the number of black pixels found in the box made by adding the line // width to both sides of the line bounding box. (Increasing the smallest // dimension of the bounding box.) -static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Pix *nonline_pix) { +static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Image nonline_pix) { l_int32 x, y, box_width, box_height; boxGetGeometry(line_box, &x, &y, &box_width, &box_height); if (box_width > box_height) { @@ -167,11 +167,11 @@ static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Pix *nonline box_width = right - x; } Box *box = boxCreate(x, y, box_width, box_height); - Pix *rect_pix = pixClipRectangle(nonline_pix, box, nullptr); + Image rect_pix = pixClipRectangle(nonline_pix, box, nullptr); boxDestroy(&box); l_int32 result; pixCountPixels(rect_pix, &result, nullptr); - pixDestroy(&rect_pix); + rect_pix.destroy(); return result; } @@ -184,8 +184,8 @@ static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Pix *nonline // or Hindi words, or underlines.) // Bad line components are erased from line_pix. // Returns the number of remaining connected components. -static int FilterFalsePositives(int resolution, Pix *nonline_pix, Pix *intersection_pix, - Pix *line_pix) { +static int FilterFalsePositives(int resolution, Image nonline_pix, Image intersection_pix, + Image line_pix) { int min_thick_length = static_cast(resolution * kThickLengthMultiple); Pixa *pixa = nullptr; Boxa *boxa = pixConnComp(line_pix, &pixa, 8); @@ -196,9 +196,9 @@ static int FilterFalsePositives(int resolution, Pix *nonline_pix, Pix *intersect Box *box = boxaGetBox(boxa, i, L_CLONE); l_int32 x, y, box_width, box_height; boxGetGeometry(box, &x, &y, &box_width, &box_height); - Pix *comp_pix = pixaGetPix(pixa, i, L_CLONE); + Image comp_pix = pixaGetPix(pixa, i, L_CLONE); int max_width = MaxStrokeWidth(comp_pix); - pixDestroy(&comp_pix); + comp_pix.destroy(); bool bad_line = false; // If the length is too short to stand-alone as a line, and the box width // is thick enough, and the stroke width is thick enough it is bad. @@ -240,18 +240,18 @@ static int FilterFalsePositives(int resolution, Pix *nonline_pix, Pix *intersect // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. // The detected lines are removed from the pix. -void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *vertical_x, - int *vertical_y, Pix **pix_music_mask, TabVector_LIST *v_lines, +void LineFinder::FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x, + int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines, TabVector_LIST *h_lines) { if (pix == nullptr || vertical_x == nullptr || vertical_y == nullptr) { tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n"); return; } - Pix *pix_vline = nullptr; - Pix *pix_non_vline = nullptr; - Pix *pix_hline = nullptr; - Pix *pix_non_hline = nullptr; - Pix *pix_intersections = nullptr; + Image pix_vline = nullptr; + Image pix_non_vline = nullptr; + Image pix_hline = nullptr; + Image pix_non_hline = nullptr; + Image pix_intersections = nullptr; Pixa *pixa_display = debug ? pixaCreate(0) : nullptr; GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline, &pix_non_hline, &pix_intersections, pix_music_mask, pixa_display); @@ -263,10 +263,10 @@ void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *v if (pix_vline != nullptr) { pixAnd(pix_intersections, pix_vline, pix_hline); } else { - pixDestroy(&pix_intersections); + pix_intersections.destroy(); } if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections, pix_hline)) { - pixDestroy(&pix_hline); + pix_hline.destroy(); } } FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y, &pix_hline, @@ -283,11 +283,11 @@ void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *v pixAnd(pix_intersections, pix_vline, pix_hline); // Fatten up the intersections and seed-fill to get the intersection // residue. - Pix *pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5); + Image pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5); pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8); // Now remove the intersection residue. pixSubtract(pix, pix, pix_join_residue); - pixDestroy(&pix_join_residue); + pix_join_residue.destroy(); } // Remove any detected music. if (pix_music_mask != nullptr && *pix_music_mask != nullptr) { @@ -300,11 +300,11 @@ void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *v pixaAddPix(pixa_display, pix, L_CLONE); } - pixDestroy(&pix_vline); - pixDestroy(&pix_non_vline); - pixDestroy(&pix_hline); - pixDestroy(&pix_non_hline); - pixDestroy(&pix_intersections); + pix_vline.destroy(); + pix_non_vline.destroy(); + pix_hline.destroy(); + pix_non_hline.destroy(); + pix_intersections.destroy(); if (pixa_display != nullptr) { pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding", "vhlinefinding.pdf"); pixaDestroy(&pixa_display); @@ -359,9 +359,9 @@ void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height, Boxa **bo // If no good lines are found, pix_vline is destroyed. // None of the input pointers may be nullptr, and if *pix_vline is nullptr then // the function does nothing. -void LineFinder::FindAndRemoveVLines(int resolution, Pix *pix_intersections, int *vertical_x, - int *vertical_y, Pix **pix_vline, Pix *pix_non_vline, - Pix *src_pix, TabVector_LIST *vectors) { +void LineFinder::FindAndRemoveVLines(int resolution, Image pix_intersections, int *vertical_x, + int *vertical_y, Image *pix_vline, Image pix_non_vline, + Image src_pix, TabVector_LIST *vectors) { if (pix_vline == nullptr || *pix_vline == nullptr) { return; } @@ -380,7 +380,7 @@ void LineFinder::FindAndRemoveVLines(int resolution, Pix *pix_intersections, int vertical.set_with_shrink(*vertical_x, *vertical_y); TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr); } else { - pixDestroy(pix_vline); + pix_vline->destroy(); } } @@ -394,9 +394,9 @@ void LineFinder::FindAndRemoveVLines(int resolution, Pix *pix_intersections, int // If no good lines are found, pix_hline is destroyed. // None of the input pointers may be nullptr, and if *pix_hline is nullptr then // the function does nothing. -void LineFinder::FindAndRemoveHLines(int resolution, Pix *pix_intersections, int vertical_x, - int vertical_y, Pix **pix_hline, Pix *pix_non_hline, - Pix *src_pix, TabVector_LIST *vectors) { +void LineFinder::FindAndRemoveHLines(int resolution, Image pix_intersections, int vertical_x, + int vertical_y, Image *pix_hline, Image pix_non_hline, + Image src_pix, TabVector_LIST *vectors) { if (pix_hline == nullptr || *pix_hline == nullptr) { return; } @@ -422,7 +422,7 @@ void LineFinder::FindAndRemoveHLines(int resolution, Pix *pix_intersections, int h_it.data()->XYFlip(); } } else { - pixDestroy(pix_hline); + pix_hline->destroy(); } } @@ -482,14 +482,14 @@ void LineFinder::FindLineVectors(const ICOORD &bleft, const ICOORD &tright, // is taken to be a bar. Bars are used as a seed and the entire touching // component is added to the output music mask and subtracted from the lines. // Returns nullptr and does minimal work if no music is found. -static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pix_hline, +static Image FilterMusic(int resolution, Image pix_closed, Image pix_vline, Image pix_hline, l_int32 *v_empty, l_int32 *h_empty) { int max_stave_height = static_cast(resolution * kMaxStaveHeight); - Pix *intersection_pix = pixAnd(nullptr, pix_vline, pix_hline); + Image intersection_pix = pixAnd(nullptr, pix_vline, pix_hline); Boxa *boxa = pixConnComp(pix_vline, nullptr, 8); // Iterate over the boxes to find music bars. int nboxes = boxaGetCount(boxa); - Pix *music_mask = nullptr; + Image music_mask = nullptr; for (int i = 0; i < nboxes; ++i) { Box *box = boxaGetBox(boxa, i, L_CLONE); l_int32 x, y, box_width, box_height; @@ -507,7 +507,7 @@ static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pi boxDestroy(&box); } boxaDestroy(&boxa); - pixDestroy(&intersection_pix); + intersection_pix.destroy(); if (music_mask != nullptr) { // The mask currently contains just the bars. Use the mask as a seed // and the pix_closed as the mask for a seedfill to get all the @@ -521,14 +521,14 @@ static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pi int nboxes = boxaGetCount(boxa); for (int i = 0; i < nboxes; ++i) { Box *box = boxaGetBox(boxa, i, L_CLONE); - Pix *rect_pix = pixClipRectangle(music_mask, box, nullptr); + Image rect_pix = pixClipRectangle(music_mask, box, nullptr); l_int32 music_pixels; pixCountPixels(rect_pix, &music_pixels, nullptr); - pixDestroy(&rect_pix); + rect_pix.destroy(); rect_pix = pixClipRectangle(pix_closed, box, nullptr); l_int32 all_pixels; pixCountPixels(rect_pix, &all_pixels, nullptr); - pixDestroy(&rect_pix); + rect_pix.destroy(); if (music_pixels < kMinMusicPixelFraction * all_pixels) { // False positive. Delete from the music mask. pixClearInRect(music_mask, box); @@ -539,7 +539,7 @@ static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pi boxaDestroy(&boxa); pixZero(music_mask, &no_remaining_music); if (no_remaining_music) { - pixDestroy(&music_mask); + music_mask.destroy(); } else { pixSubtract(pix_vline, pix_vline, music_mask); pixSubtract(pix_hline, pix_hline, music_mask); @@ -563,11 +563,11 @@ static Pix *FilterMusic(int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pi // but any of the returns that are empty will be nullptr on output. // None of the input (1st level) pointers may be nullptr except pix_music_mask, // which will disable music detection, and pixa_display. -void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix **pix_non_vline, - Pix **pix_hline, Pix **pix_non_hline, Pix **pix_intersections, - Pix **pix_music_mask, Pixa *pixa_display) { - Pix *pix_closed = nullptr; - Pix *pix_hollow = nullptr; +void LineFinder::GetLineMasks(int resolution, Image src_pix, Image *pix_vline, Image *pix_non_vline, + Image *pix_hline, Image *pix_non_hline, Image *pix_intersections, + Image *pix_music_mask, Pixa *pixa_display) { + Image pix_closed = nullptr; + Image pix_hollow = nullptr; int max_line_width = resolution / kThinLineFraction; int min_line_length = resolution / kMinLineLengthFraction; @@ -599,13 +599,13 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix // Open up with a big box to detect solid areas, which can then be // subtracted. This is very generous and will leave in even quite wide // lines. - Pix *pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width); + Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width); if (pixa_display != nullptr) { pixaAddPix(pixa_display, pix_solid, L_CLONE); } pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); - pixDestroy(&pix_solid); + pix_solid.destroy(); // Now open up in both directions independently to find lines of at least // 1 inch/kMinLineLengthFraction in length. @@ -615,7 +615,7 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); - pixDestroy(&pix_hollow); + pix_hollow.destroy(); #ifdef USE_OPENCL } #endif @@ -633,10 +633,10 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix *pix_music_mask = nullptr; } } - pixDestroy(&pix_closed); - Pix *pix_nonlines = nullptr; + pix_closed.destroy(); + Image pix_nonlines = nullptr; *pix_intersections = nullptr; - Pix *extra_non_hlines = nullptr; + Image extra_non_hlines = nullptr; if (!v_empty) { // Subtract both line candidates from the source to get definite non-lines. pix_nonlines = pixSubtract(nullptr, src_pix, *pix_vline); @@ -656,18 +656,18 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections); } if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections, *pix_vline)) { - pixDestroy(pix_vline); // No candidates left. + pix_vline->destroy(); // No candidates left. } } else { // No vertical lines. - pixDestroy(pix_vline); + pix_vline->destroy(); *pix_non_vline = nullptr; if (!h_empty) { pix_nonlines = pixSubtract(nullptr, src_pix, *pix_hline); } } if (h_empty) { - pixDestroy(pix_hline); + pix_hline->destroy(); *pix_non_hline = nullptr; if (v_empty) { return; @@ -677,10 +677,10 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8); if (extra_non_hlines != nullptr) { pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines); - pixDestroy(&extra_non_hlines); + extra_non_hlines.destroy(); } if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections, *pix_hline)) { - pixDestroy(pix_hline); // No candidates left. + pix_hline->destroy(); // No candidates left. } } if (pixa_display != nullptr) { @@ -706,13 +706,13 @@ void LineFinder::GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); } } - pixDestroy(&pix_nonlines); + pix_nonlines.destroy(); } // Returns a list of boxes corresponding to the candidate line segments. Sets // the line_crossings member of the boxes so we can later determine the number // of intersections touched by a full line. -void LineFinder::GetLineBoxes(bool horizontal_lines, Pix *pix_lines, Pix *pix_intersections, +void LineFinder::GetLineBoxes(bool horizontal_lines, Image pix_lines, Image pix_intersections, C_BLOB_LIST *line_cblobs, BLOBNBOX_LIST *line_bblobs) { // Put a single pixel crack in every line at an arbitrary spacing, // so they break up and the bounding boxes can be used to get the diff --git a/src/textord/linefind.h b/src/textord/linefind.h index aedc40c1..d82d33ae 100644 --- a/src/textord/linefind.h +++ b/src/textord/linefind.h @@ -58,8 +58,8 @@ public: * * The detected lines are removed from the pix. */ - static void FindAndRemoveLines(int resolution, bool debug, Pix *pix, int *vertical_x, - int *vertical_y, Pix **pix_music_mask, TabVector_LIST *v_lines, + static void FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x, + int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines, TabVector_LIST *h_lines); /** @@ -83,9 +83,9 @@ private: // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. // If no good lines are found, pix_vline is destroyed. - static void FindAndRemoveVLines(int resolution, Pix *pix_intersections, int *vertical_x, - int *vertical_y, Pix **pix_vline, Pix *pix_non_vline, - Pix *src_pix, TabVector_LIST *vectors); + static void FindAndRemoveVLines(int resolution, Image pix_intersections, int *vertical_x, + int *vertical_y, Image *pix_vline, Image pix_non_vline, + Image src_pix, TabVector_LIST *vectors); // Finds horizontal line objects in pix_vline and removes them from src_pix. // Uses the given resolution to determine size thresholds instead of any @@ -95,8 +95,8 @@ private: // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. // If no good lines are found, pix_hline is destroyed. - static void FindAndRemoveHLines(int resolution, Pix *pix_intersections, int vertical_x, - int vertical_y, Pix **pix_hline, Pix *pix_non_hline, Pix *src_pix, + static void FindAndRemoveHLines(int resolution, Image pix_intersections, int vertical_x, + int vertical_y, Image *pix_hline, Image pix_non_hline, Image src_pix, TabVector_LIST *vectors); // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright @@ -121,14 +121,14 @@ private: // None of the input (1st level) pointers may be nullptr except // pix_music_mask, which will disable music detection, and pixa_display, which // is for debug. - static void GetLineMasks(int resolution, Pix *src_pix, Pix **pix_vline, Pix **pix_non_vline, - Pix **pix_hline, Pix **pix_non_hline, Pix **pix_intersections, - Pix **pix_music_mask, Pixa *pixa_display); + static void GetLineMasks(int resolution, Image src_pix, Image *pix_vline, Image *pix_non_vline, + Image *pix_hline, Image *pix_non_hline, Image *pix_intersections, + Image *pix_music_mask, Pixa *pixa_display); // Returns a list of boxes corresponding to the candidate line segments. Sets // the line_crossings member of the boxes so we can later determine the number // of intersections touched by a full line. - static void GetLineBoxes(bool horizontal_lines, Pix *pix_lines, Pix *pix_intersections, + static void GetLineBoxes(bool horizontal_lines, Image pix_lines, Image pix_intersections, C_BLOB_LIST *line_cblobs, BLOBNBOX_LIST *line_bblobs); }; diff --git a/src/textord/scanedg.cpp b/src/textord/scanedg.cpp index b056698d..6c18bdd7 100644 --- a/src/textord/scanedg.cpp +++ b/src/textord/scanedg.cpp @@ -59,7 +59,7 @@ static CRACKEDGE *v_edge(int sign, CRACKEDGE *join, CrackPos *pos); * Extract edges from a PDBLK. **********************************************************************/ -void block_edges(Pix *t_pix, // thresholded image +void block_edges(Image t_pix, // thresholded image PDBLK *block, // block in image C_OUTLINE_IT *outline_it) { ICOORD bleft; // bounding box diff --git a/src/textord/scanedg.h b/src/textord/scanedg.h index b32c6e42..d566d491 100644 --- a/src/textord/scanedg.h +++ b/src/textord/scanedg.h @@ -29,7 +29,7 @@ namespace tesseract { class C_OUTLINE_IT; class PDBLK; -void block_edges(Pix *t_image, // thresholded image +void block_edges(Image t_image, // thresholded image PDBLK *block, // block in image C_OUTLINE_IT *outline_it); diff --git a/src/textord/strokewidth.cpp b/src/textord/strokewidth.cpp index 0022bb86..58eab1de 100644 --- a/src/textord/strokewidth.cpp +++ b/src/textord/strokewidth.cpp @@ -350,7 +350,7 @@ void StrokeWidth::RemoveLineResidue(ColPartition_LIST *big_part_list) { // Large blobs that cause overlap are put in separate partitions and added // to the big_parts list. void StrokeWidth::GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, - TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm, + TO_BLOCK *block, Image nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, diff --git a/src/textord/strokewidth.h b/src/textord/strokewidth.h index 343b8870..a69ce1ef 100644 --- a/src/textord/strokewidth.h +++ b/src/textord/strokewidth.h @@ -113,7 +113,7 @@ public: // Large blobs that cause overlap are put in separate partitions and added // to the big_parts list. void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, - Pix *nontext_pix, const DENORM *denorm, bool cjk_script, + Image nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts); @@ -306,7 +306,7 @@ private: private: // Image map of photo/noise areas on the page. Borrowed pointer (not owned.) - Pix *nontext_map_; + Image nontext_map_; // Textline projection map. Borrowed pointer. TextlineProjection *projection_; // DENORM used by projection_ to get back to image coords. Borrowed pointer. diff --git a/src/textord/textlineprojection.cpp b/src/textord/textlineprojection.cpp index 73b3cc88..761be2a4 100644 --- a/src/textord/textlineprojection.cpp +++ b/src/textord/textlineprojection.cpp @@ -53,7 +53,7 @@ TextlineProjection::TextlineProjection(int resolution) : x_origin_(0), y_origin_ } } TextlineProjection::~TextlineProjection() { - pixDestroy(&pix_); + pix_.destroy(); } // Build the projection profile given the input_block containing lists of @@ -64,8 +64,8 @@ TextlineProjection::~TextlineProjection() { // The blobs have had their left and right rules set to also limit // the range of projection. void TextlineProjection::ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, - Pix *nontext_map) { - pixDestroy(&pix_); + Image nontext_map) { + pix_.destroy(); TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map)); x_origin_ = 0; y_origin_ = image_box.height(); @@ -75,9 +75,9 @@ void TextlineProjection::ConstructProjection(TO_BLOCK *input_block, const FCOORD pix_ = pixCreate(width, height, 8); ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map); ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map); - Pix *final_pix = pixBlockconv(pix_, 1, 1); + Image final_pix = pixBlockconv(pix_, 1, 1); // Pix* final_pix = pixBlockconv(pix_, 2, 2); - pixDestroy(&pix_); + pix_.destroy(); pix_ = final_pix; } @@ -127,7 +127,7 @@ void TextlineProjection::MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, void TextlineProjection::DisplayProjection() const { int width = pixGetWidth(pix_); int height = pixGetHeight(pix_); - Pix *pixc = pixCreate(width, height, 32); + Image pixc = pixCreate(width, height, 32); int src_wpl = pixGetWpl(pix_); int col_wpl = pixGetWpl(pixc); uint32_t *src_data = pixGetData(pix_); @@ -149,7 +149,7 @@ void TextlineProjection::DisplayProjection() const { auto *win = new ScrollView("Projection", 0, 0, width, height, width, height); win->Image(pixc, 0, 0); win->Update(); - pixDestroy(&pixc); + pixc.destroy(); } #endif // !GRAPHICS_DISABLED @@ -570,7 +570,7 @@ int TextlineProjection::MeanPixelsInLineSegment(const DENORM *denorm, int offset // The function converts between tesseract coords and the pix coords assuming // that this pix is full resolution equal in size to the original image. // Returns an empty box if there are no black pixels in the source box. -static TBOX BoundsWithinBox(Pix *pix, const TBOX &box) { +static TBOX BoundsWithinBox(Image pix, const TBOX &box) { int im_height = pixGetHeight(pix); Box *input_box = boxCreate(box.left(), im_height - box.top(), box.width(), box.height()); Box *output_box = nullptr; @@ -593,7 +593,7 @@ static TBOX BoundsWithinBox(Pix *pix, const TBOX &box) { // and checks for nontext_map pixels in each half. Reduces the bbox so that it // still includes the middle point, but does not touch any fg pixels in // nontext_map. An empty box may be returned if there is no such box. -static void TruncateBoxToMissNonText(int x_middle, int y_middle, bool split_on_x, Pix *nontext_map, +static void TruncateBoxToMissNonText(int x_middle, int y_middle, bool split_on_x, Image nontext_map, TBOX *bbox) { TBOX box1(*bbox); TBOX box2(*bbox); @@ -652,7 +652,7 @@ void TextlineProjection::IncrementRectangle8Bit(const TBOX &box) { // flags, but the spreading is truncated by set pixels in the nontext_map // and also by the horizontal rule line limits on the blobs. void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST *blobs, const FCOORD &rotation, - const TBOX &nontext_map_box, Pix *nontext_map) { + const TBOX &nontext_map_box, Image nontext_map) { BLOBNBOX_IT blob_it(blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX *blob = blob_it.data(); diff --git a/src/textord/textlineprojection.h b/src/textord/textlineprojection.h index e7fd4050..2bbb7520 100644 --- a/src/textord/textlineprojection.h +++ b/src/textord/textlineprojection.h @@ -44,7 +44,7 @@ public: // The rotation is a multiple of 90 degrees, ie no deskew yet. // The blobs have had their left and right rules set to also limit // the range of projection. - void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Pix *nontext_map); + void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Image nontext_map); // Display the blobs in the window colored according to textline quality. void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win); @@ -165,7 +165,7 @@ private: // flags, but the spreading is truncated by set pixels in the nontext_map // and also by the horizontal rule line limits on the blobs. void ProjectBlobs(BLOBNBOX_LIST *blobs, const FCOORD &rotation, const TBOX &image_box, - Pix *nontext_map); + Image nontext_map); // Pads the bounding box of the given blob according to whether it is on // a horizontal or vertical text line, taking into account tab-stops near // the blob. Returns true if padding was in the horizontal direction. @@ -192,7 +192,7 @@ private: // The image of horizontally smeared blob boxes summed to provide a // textline density map. As with a horizontal projection, the map has // dips in the gaps between textlines. - Pix *pix_; + Image pix_; }; } // namespace tesseract. diff --git a/src/textord/textord.cpp b/src/textord/textord.cpp index cc922721..3abbb7a8 100644 --- a/src/textord/textord.cpp +++ b/src/textord/textord.cpp @@ -175,7 +175,7 @@ Textord::Textord(CCStruct *ccstruct) // Make the textlines and words inside each block. void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, - Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, + Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) { page_tr_.set_x(width); diff --git a/src/textord/textord.h b/src/textord/textord.h index 62186bc3..88aca1df 100644 --- a/src/textord/textord.h +++ b/src/textord/textord.h @@ -88,7 +88,7 @@ public: // diacritic_blobs contain small confusing components that should be added // to the appropriate word(s) in case they are really diacritics. void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, - Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, + Image binary_pix, Image thresholds_pix, Image grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); // If we were supposed to return only a single textline, and there is more @@ -113,7 +113,7 @@ public: FCOORD rotation // for drawing ); // tordmain.cpp /////////////////////////////////////////// - void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); + void find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on); private: diff --git a/src/textord/tordmain.cpp b/src/textord/tordmain.cpp index 3dd4903c..8ade06d6 100644 --- a/src/textord/tordmain.cpp +++ b/src/textord/tordmain.cpp @@ -66,17 +66,17 @@ CLISTIZE(WordWithBox) * * Set the horizontal and vertical stroke widths in the blob. **********************************************************************/ -void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob) { +void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob) { // Cut the blob rectangle into a Pix. int pix_height = pixGetHeight(pix); const TBOX &box = blob->bounding_box(); int width = box.width(); int height = box.height(); Box *blob_pix_box = boxCreate(box.left(), pix_height - box.top(), width, height); - Pix *pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr); + Image pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr); boxDestroy(&blob_pix_box); - Pix *dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG); - pixDestroy(&pix_blob); + Image dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG); + pix_blob.destroy(); // Compute the stroke widths. uint32_t *data = pixGetData(dist_pix); int wpl = pixGetWpl(dist_pix); @@ -129,7 +129,7 @@ void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob) { pixel = next_pixel; } } - pixDestroy(&dist_pix); + dist_pix.destroy(); // Store the horizontal and vertical width in the blob, keeping both // widths if there is enough information, otherwise only the one with // the most samples. @@ -160,7 +160,7 @@ void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob) { * Make a list of TO_BLOCKs for portrait and landscape orientation. **********************************************************************/ -void assign_blobs_to_blocks2(Pix *pix, +void assign_blobs_to_blocks2(Image pix, BLOCK_LIST *blocks, // blocks to process TO_BLOCK_LIST *port_blocks) { // output list BLOCK *block; // current block @@ -211,7 +211,7 @@ void assign_blobs_to_blocks2(Pix *pix, * grades on different lists in the matching TO_BLOCK in to_blocks. **********************************************************************/ -void Textord::find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) { +void Textord::find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) { int width = pixGetWidth(pix); int height = pixGetHeight(pix); if (width > INT16_MAX || height > INT16_MAX) { diff --git a/src/textord/tordmain.h b/src/textord/tordmain.h index 9e2ea2d4..6e68e86c 100644 --- a/src/textord/tordmain.h +++ b/src/textord/tordmain.h @@ -32,8 +32,8 @@ namespace tesseract { class Tesseract; -void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob); -void assign_blobs_to_blocks2(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks); +void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob); +void assign_blobs_to_blocks2(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks); void tweak_row_baseline(ROW *row, double blshift_maxshift, double blshift_xfraction); diff --git a/src/training/common/errorcounter.cpp b/src/training/common/errorcounter.cpp index 3fa732e1..d43fe585 100644 --- a/src/training/common/errorcounter.cpp +++ b/src/training/common/errorcounter.cpp @@ -43,7 +43,7 @@ const double kRatingEpsilon = 1.0 / 32; // with a debug flag and a keep_this argument to find out what is going on. double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_level, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, - const std::vector &page_images, SampleIterator *it, + const std::vector &page_images, SampleIterator *it, double *unichar_error, double *scaled_error, std::string *fonts_report) { const int fontsize = it->sample_set()->NumFonts(); @@ -59,7 +59,7 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le for (it->Begin(); !it->AtEnd(); it->Next()) { TrainingSample *mutable_sample = it->MutableSample(); int page_index = mutable_sample->page_num(); - Pix *page_pix = + Image page_pix = 0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr; // No debug, no keep this. classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results); @@ -108,7 +108,7 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le // and a keep_this argument to find out what is going on. void ErrorCounter::DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, - const std::vector &page_images, SampleIterator *it) { + const std::vector &page_images, SampleIterator *it) { int fontsize = it->sample_set()->NumFonts(); ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize); ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize); @@ -121,7 +121,7 @@ void ErrorCounter::DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifi for (it->Begin(); !it->AtEnd(); it->Next()) { TrainingSample *mutable_sample = it->MutableSample(); int page_index = mutable_sample->page_num(); - Pix *page_pix = + Image page_pix = 0 <= page_index && page_index < page_images.size() ? page_images[page_index] : nullptr; // No debug, no keep this. old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, diff --git a/src/training/common/errorcounter.h b/src/training/common/errorcounter.h index 59a2570b..c20c222c 100644 --- a/src/training/common/errorcounter.h +++ b/src/training/common/errorcounter.h @@ -121,7 +121,7 @@ public: // * The return value is the un-weighted version of the scaled_error. static double ComputeErrorRate(ShapeClassifier *classifier, int report_level, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, - const std::vector &page_images, SampleIterator *it, + const std::vector &page_images, SampleIterator *it, double *unichar_error, double *scaled_error, std::string *fonts_report); // Tests a pair of classifiers, debugging errors of the new against the old. // See errorcounter.h for description of arguments. @@ -131,7 +131,7 @@ public: // with a debug flag and a keep_this argument to find out what is going on. static void DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, - const std::vector &page_images, SampleIterator *it); + const std::vector &page_images, SampleIterator *it); private: // Simple struct to hold an array of counts. diff --git a/src/training/common/mastertrainer.cpp b/src/training/common/mastertrainer.cpp index 579c8574..3e6efc3a 100644 --- a/src/training/common/mastertrainer.cpp +++ b/src/training/common/mastertrainer.cpp @@ -63,7 +63,7 @@ MasterTrainer::MasterTrainer(NormalizationMode norm_mode, bool shape_analysis, MasterTrainer::~MasterTrainer() { delete[] fragments_; for (auto &page_image : page_images_) { - pixDestroy(&page_image); + page_image.destroy(); } } @@ -219,7 +219,7 @@ void MasterTrainer::AddSample(bool verification, const char *unichar, TrainingSa void MasterTrainer::LoadPageImages(const char *filename) { size_t offset = 0; int page; - Pix *pix; + Image pix; for (page = 0;; page++) { pix = pixReadFromMultipageTiff(filename, &offset); if (!pix) { diff --git a/src/training/common/mastertrainer.h b/src/training/common/mastertrainer.h index f6b0104a..1eeea94f 100644 --- a/src/training/common/mastertrainer.h +++ b/src/training/common/mastertrainer.h @@ -284,7 +284,7 @@ private: // Vector of Pix pointers used for classifiers that need the image. // Indexed by page_num_ in the samples. // These images are owned by the trainer and need to be pixDestroyed. - std::vector page_images_; + std::vector page_images_; // Vector of filenames of loaded tr files. std::vector tr_filenames_; }; diff --git a/src/training/degradeimage.cpp b/src/training/degradeimage.cpp index aad34496..cc5947f6 100644 --- a/src/training/degradeimage.cpp +++ b/src/training/degradeimage.cpp @@ -86,9 +86,9 @@ const int kMinRampSize = 1000; // the edges. // Finally a greyscale ramp provides a continuum of effects between exposure // levels. -Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation) { - Pix *pix = pixConvertTo8(input, false); - pixDestroy(&input); +Image DegradeImage(Image input, int exposure, TRand *randomizer, float *rotation) { + Image pix = pixConvertTo8(input, false); + input.destroy(); input = pix; int width = pixGetWidth(input); int height = pixGetHeight(input); @@ -99,12 +99,12 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation) // see http://www.leptonica.com/grayscale-morphology.html pix = input; input = pixErodeGray(pix, 3, 3); - pixDestroy(&pix); + pix.destroy(); } // A convolution is essential to any mode as no scanner produces an // image as sharp as the electronic image. pix = pixBlockconv(input, 1, 1); - pixDestroy(&input); + input.destroy(); // A small random rotation helps to make the edges jaggy in a realistic way. if (rotation != nullptr) { float radians_clockwise = 0.0f; @@ -117,7 +117,7 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation) input = pixRotate(pix, radians_clockwise, L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, 0, 0); // Rotate the boxes to match. *rotation = radians_clockwise; - pixDestroy(&pix); + pix.destroy(); } else { input = pix; } @@ -129,7 +129,7 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation) // see http://www.leptonica.com/grayscale-morphology.html pix = input; input = pixErodeGray(pix, 3, 3); - pixDestroy(&pix); + pix.destroy(); } // The convolution really needed to be 2x2 to be realistic enough, but // we only have 3x3, so we have to bias the image darker or lose thin @@ -176,27 +176,27 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation) // any spatial distortion and also by the integer reduction factor box_scale // so they will match what the network will output. // Returns nullptr on error. The returned Pix must be pixDestroyed. -Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise, +Image PrepareDistortedPix(const Image pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, std::vector *boxes) { - Pix *distorted = pixCopy(nullptr, const_cast(pix)); + Image distorted = pixCopy(nullptr, pix); // Things to do to synthetic training data. if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) { // TODO(rays) Cook noise in a more thread-safe manner than rand(). // Attempt to make the sequences reproducible. srand(randomizer->IntRand()); - Pix *pixn = pixAddGaussianNoise(distorted, 8.0); - pixDestroy(&distorted); + Image pixn = pixAddGaussianNoise(distorted, 8.0); + distorted.destroy(); if (smooth_noise) { distorted = pixBlockconv(pixn, 1, 1); - pixDestroy(&pixn); + pixn.destroy(); } else { distorted = pixn; } } if (blur && randomizer->SignedRand(1.0) > 0.0) { - Pix *blurred = pixBlockconv(distorted, 1, 1); - pixDestroy(&distorted); + Image blurred = pixBlockconv(distorted, 1, 1); + distorted.destroy(); distorted = blurred; } if (perspective) { @@ -219,7 +219,7 @@ Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool whi // Distorts anything that has a non-null pointer with the same pseudo-random // perspective distortion. Width and height only need to be set if there // is no pix. If there is a pix, then they will be taken from there. -void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix, +void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Image *pix, std::vector *boxes) { if (pix != nullptr && *pix != nullptr) { width = pixGetWidth(*pix); @@ -230,12 +230,12 @@ void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix l_int32 incolor = ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs); if (pix != nullptr && *pix != nullptr) { // Transform the image. - Pix *transformed = pixProjective(*pix, im_coeffs, incolor); + Image transformed = pixProjective(*pix, im_coeffs, incolor); if (transformed == nullptr) { tprintf("Projective transformation failed!!\n"); return; } - pixDestroy(pix); + pix->destroy(); *pix = transformed; } if (boxes != nullptr) { diff --git a/src/training/degradeimage.h b/src/training/degradeimage.h index 1d499e31..cb865a87 100644 --- a/src/training/degradeimage.h +++ b/src/training/degradeimage.h @@ -30,20 +30,20 @@ namespace tesseract { // If rotation is not nullptr, the clockwise rotation in radians is saved there. // The input pix must be 8 bit grey. (Binary with values 0 and 255 is OK.) // The input image is destroyed and a different image returned. -struct Pix *DegradeImage(struct Pix *input, int exposure, TRand *randomizer, float *rotation); +struct Image DegradeImage(struct Image input, int exposure, TRand *randomizer, float *rotation); // Creates and returns a Pix distorted by various means according to the bool // flags. If boxes is not nullptr, the boxes are resized/positioned according to // any spatial distortion and also by the integer reduction factor box_scale // so they will match what the network will output. // Returns nullptr on error. The returned Pix must be pixDestroyed. -Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise, +Image PrepareDistortedPix(const Image pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, std::vector *boxes); // Distorts anything that has a non-null pointer with the same pseudo-random // perspective distortion. Width and height only need to be set if there // is no pix. If there is a pix, then they will be taken from there. -void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix, +void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Image *pix, std::vector *boxes); // Computes the coefficients of a randomized projective transformation. // The image transform requires backward transformation coefficient, and the diff --git a/src/training/pango/stringrenderer.cpp b/src/training/pango/stringrenderer.cpp index efe333d7..ad883577 100644 --- a/src/training/pango/stringrenderer.cpp +++ b/src/training/pango/stringrenderer.cpp @@ -74,14 +74,14 @@ static bool RandBool(const double prob, TRand *rand) { } /* static */ -static Pix *CairoARGB32ToPixFormat(cairo_surface_t *surface) { +static Image CairoARGB32ToPixFormat(cairo_surface_t *surface) { if (cairo_image_surface_get_format(surface) != CAIRO_FORMAT_ARGB32) { printf("Unexpected surface format %d\n", cairo_image_surface_get_format(surface)); return nullptr; } const int width = cairo_image_surface_get_width(surface); const int height = cairo_image_surface_get_height(surface); - Pix *pix = pixCreate(width, height, 32); + Image pix = pixCreate(width, height, 32); int byte_stride = cairo_image_surface_get_stride(surface); for (int i = 0; i < height; ++i) { @@ -636,25 +636,25 @@ int StringRenderer::StripUnrenderableWords(std::string *utf8_text) const { return num_dropped; } -int StringRenderer::RenderToGrayscaleImage(const char *text, int text_length, Pix **pix) { - Pix *orig_pix = nullptr; +int StringRenderer::RenderToGrayscaleImage(const char *text, int text_length, Image *pix) { + Image orig_pix = nullptr; int offset = RenderToImage(text, text_length, &orig_pix); if (orig_pix) { *pix = pixConvertTo8(orig_pix, false); - pixDestroy(&orig_pix); + orig_pix.destroy(); } return offset; } int StringRenderer::RenderToBinaryImage(const char *text, int text_length, int threshold, - Pix **pix) { - Pix *orig_pix = nullptr; + Image *pix) { + Image orig_pix = nullptr; int offset = RenderToImage(text, text_length, &orig_pix); if (orig_pix) { - Pix *gray_pix = pixConvertTo8(orig_pix, false); - pixDestroy(&orig_pix); + Image gray_pix = pixConvertTo8(orig_pix, false); + orig_pix.destroy(); *pix = pixThresholdToBinary(gray_pix, threshold); - pixDestroy(&gray_pix); + gray_pix.destroy(); } else { *pix = orig_pix; } @@ -719,9 +719,9 @@ std::string StringRenderer::ConvertFullwidthLatinToBasicLatin(const std::string } // Returns offset to end of text substring rendered in this method. -int StringRenderer::RenderToImage(const char *text, int text_length, Pix **pix) { +int StringRenderer::RenderToImage(const char *text, int text_length, Image *pix) { if (pix && *pix) { - pixDestroy(pix); + pix->destroy(); } InitPangoCairo(); @@ -813,7 +813,7 @@ int StringRenderer::RenderToImage(const char *text, int text_length, Pix **pix) // // int offset = 0; // do { -// Pix *pix; +// Image pix; // offset += renderer.RenderAllFontsToImage(min_proportion, txt + offset, // strlen(txt + offset), nullptr, // &pix); @@ -821,7 +821,7 @@ int StringRenderer::RenderToImage(const char *text, int text_length, Pix **pix) // } while (offset < strlen(text)); // int StringRenderer::RenderAllFontsToImage(double min_coverage, const char *text, int text_length, - std::string *font_used, Pix **image) { + std::string *font_used, Image *image) { *image = nullptr; // Select a suitable font to render the title with. const char kTitleTemplate[] = "%s : %d hits = %.2f%%, raw = %d = %.2f%%"; @@ -873,10 +873,10 @@ int StringRenderer::RenderAllFontsToImage(double min_coverage, const char *text, // Add the font to the image. set_font(title_font); v_margin_ /= 8; - Pix *title_image = nullptr; + Image title_image = nullptr; RenderToBinaryImage(title, strlen(title), 128, &title_image); pixOr(*image, *image, title_image); - pixDestroy(&title_image); + title_image.destroy(); v_margin_ *= 8; set_font(orig_font); diff --git a/src/training/pango/stringrenderer.h b/src/training/pango/stringrenderer.h index 79c925a1..0ea43a01 100644 --- a/src/training/pango/stringrenderer.h +++ b/src/training/pango/stringrenderer.h @@ -34,6 +34,8 @@ #include "pango/pangocairo.h" #include "pango_font_info.h" +#include "image.h" + #include #include #include @@ -53,14 +55,14 @@ public: // Renders the text with the chosen font and returns the byte offset up to // which the text could be rendered so as to fit the specified page // dimensions. - int RenderToImage(const char *text, int text_length, Pix **pix); - int RenderToGrayscaleImage(const char *text, int text_length, Pix **pix); - int RenderToBinaryImage(const char *text, int text_length, int threshold, Pix **pix); + int RenderToImage(const char *text, int text_length, Image *pix); + int RenderToGrayscaleImage(const char *text, int text_length, Image *pix); + int RenderToBinaryImage(const char *text, int text_length, int threshold, Image *pix); // Renders a line of text with all available fonts that were able to render // at least min_coverage fraction of the input text. Use 1.0 to require that // a font be able to render all the text. int RenderAllFontsToImage(double min_coverage, const char *text, int text_length, - std::string *font_used, Pix **pix); + std::string *font_used, Image *pix); bool set_font(const std::string &desc); // Char spacing is in PIXELS!!!!. diff --git a/src/training/text2image.cpp b/src/training/text2image.cpp index 940278c9..c30ca33d 100644 --- a/src/training/text2image.cpp +++ b/src/training/text2image.cpp @@ -331,7 +331,7 @@ static void ExtractFontProperties(const std::string &utf8_text, StringRenderer * File::WriteStringToFileOrDie(output_string, output_base + ".fontinfo"); } -static bool MakeIndividualGlyphs(Pix *pix, const std::vector &vbox, +static bool MakeIndividualGlyphs(Image pix, const std::vector &vbox, const int input_tiff_page) { // If checks fail, return false without exiting text2image if (!pix) { @@ -383,26 +383,26 @@ static bool MakeIndividualGlyphs(Pix *pix, const std::vector &vbox, continue; } // Crop the boxed character - Pix *pix_glyph = pixClipRectangle(pix, b, nullptr); + Image pix_glyph = pixClipRectangle(pix, b, nullptr); if (!pix_glyph) { tprintf("ERROR: MakeIndividualGlyphs(): Failed to clip, at i=%d\n", i); continue; } // Resize to square - Pix *pix_glyph_sq = + Image pix_glyph_sq = pixScaleToSize(pix_glyph, FLAGS_glyph_resized_size, FLAGS_glyph_resized_size); if (!pix_glyph_sq) { tprintf("ERROR: MakeIndividualGlyphs(): Failed to resize, at i=%d\n", i); continue; } // Zero-pad - Pix *pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq, FLAGS_glyph_num_border_pixels_to_pad, 0); + Image pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq, FLAGS_glyph_num_border_pixels_to_pad, 0); if (!pix_glyph_sq_pad) { tprintf("ERROR: MakeIndividualGlyphs(): Failed to zero-pad, at i=%d\n", i); continue; } // Write out - Pix *pix_glyph_sq_pad_8 = pixConvertTo8(pix_glyph_sq_pad, false); + Image pix_glyph_sq_pad_8 = pixConvertTo8(pix_glyph_sq_pad, false); char filename[1024]; snprintf(filename, 1024, "%s_%d.jpg", FLAGS_outputbase.c_str(), glyph_count++); if (pixWriteJpeg(filename, pix_glyph_sq_pad_8, 100, 0)) { @@ -413,10 +413,10 @@ static bool MakeIndividualGlyphs(Pix *pix, const std::vector &vbox, continue; } - pixDestroy(&pix_glyph); - pixDestroy(&pix_glyph_sq); - pixDestroy(&pix_glyph_sq_pad); - pixDestroy(&pix_glyph_sq_pad_8); + pix_glyph.destroy(); + pix_glyph_sq.destroy(); + pix_glyph_sq_pad.destroy(); + pix_glyph_sq_pad_8.destroy(); n_boxes_saved++; y_previous = y; } @@ -625,7 +625,7 @@ static int Main() { offset < strlen(to_render_utf8) && (FLAGS_max_pages == 0 || page_num < FLAGS_max_pages); ++im, ++page_num) { tlog(1, "Starting page %d\n", im); - Pix *pix = nullptr; + Image pix = nullptr; if (FLAGS_find_fonts) { offset += render.RenderAllFontsToImage(FLAGS_min_coverage, to_render_utf8 + offset, strlen(to_render_utf8 + offset), &font_used, &pix); @@ -655,10 +655,10 @@ static int Main() { page_rotation.push_back(rotation); } - Pix *gray_pix = pixConvertTo8(pix, false); - pixDestroy(&pix); - Pix *binary = pixThresholdToBinary(gray_pix, 128); - pixDestroy(&gray_pix); + Image gray_pix = pixConvertTo8(pix, false); + pix.destroy(); + Image binary = pixThresholdToBinary(gray_pix, 128); + gray_pix.destroy(); char tiff_name[1024]; if (FLAGS_find_fonts) { if (FLAGS_render_per_font) { @@ -681,7 +681,7 @@ static int Main() { tprintf("ERROR: Individual glyphs not saved\n"); } } - pixDestroy(&binary); + binary.destroy(); } if (FLAGS_find_fonts && offset != 0) { // We just want a list of names, or some sample images so we don't need diff --git a/src/viewer/scrollview.cpp b/src/viewer/scrollview.cpp index d2751137..44b3dfe6 100644 --- a/src/viewer/scrollview.cpp +++ b/src/viewer/scrollview.cpp @@ -784,7 +784,7 @@ void ScrollView::ZoomToRectangle(int x1, int y1, int x2, int y2) { } // Send an image of type Pix. -void ScrollView::Image(struct Pix *image, int x_pos, int y_pos) { +void ScrollView::Image(struct Image image, int x_pos, int y_pos) { l_uint8 *data; size_t size; pixWriteMem(&data, &size, image, IFF_PNG); diff --git a/src/viewer/scrollview.h b/src/viewer/scrollview.h index 9e5993f5..bfe7ceec 100644 --- a/src/viewer/scrollview.h +++ b/src/viewer/scrollview.h @@ -31,6 +31,8 @@ #ifndef TESSERACT_VIEWER_SCROLLVIEW_H_ #define TESSERACT_VIEWER_SCROLLVIEW_H_ +#include "image.h" + #include #include @@ -209,7 +211,7 @@ public: *******************************************************************************/ // Draw a Pix on (x,y). - void Image(Pix *image, int x_pos, int y_pos); + void Image(Image image, int x_pos, int y_pos); // Flush buffers and update display. static void Update(); @@ -353,11 +355,11 @@ public: private: // Transfers a binary Image. - void TransferBinaryImage(struct Pix *image); + void TransferBinaryImage(struct Image image); // Transfers a gray scale Image. - void TransferGrayImage(struct Pix *image); + void TransferGrayImage(struct Image image); // Transfers a 32-Bit Image. - void Transfer32bppImage(struct Pix *image); + void Transfer32bppImage(struct Image image); // Sets up ScrollView, depending on the variables from the constructor. void Initialize(const char *name, int x_pos, int y_pos, int x_size, int y_size, int x_canvas_size, diff --git a/unittest/apiexample_test.cc b/unittest/apiexample_test.cc index 8ce720b6..b0760d66 100644 --- a/unittest/apiexample_test.cc +++ b/unittest/apiexample_test.cc @@ -31,6 +31,7 @@ #include // std::unique_ptr #include #include "include_gunit.h" +#include "image.h" namespace tesseract { @@ -66,7 +67,7 @@ void OCRTester(const char *imgname, const char *groundtruth, const char *tessdat std::string gtText((std::istreambuf_iterator(file)), std::istreambuf_iterator()); auto api = std::make_unique(); ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract."; - Pix *image = pixRead(imgname); + Image image = pixRead(imgname); ASSERT_TRUE(image != nullptr) << "Failed to read test image."; api->SetImage(image); outText = api->GetUTF8Text(); @@ -74,7 +75,7 @@ void OCRTester(const char *imgname, const char *groundtruth, const char *tessdat << ::testing::PrintToString(lang); api->End(); delete[] outText; - pixDestroy(&image); + image.destroy(); } class MatchGroundTruth : public QuickTest, public ::testing::WithParamInterface {}; diff --git a/unittest/applybox_test.cc b/unittest/applybox_test.cc index cf266c01..cf745667 100644 --- a/unittest/applybox_test.cc +++ b/unittest/applybox_test.cc @@ -37,12 +37,12 @@ protected: src_pix_ = nullptr; } ~ApplyBoxTest() override { - pixDestroy(&src_pix_); + src_pix_.destroy(); } bool SetImage(const char *filename) { bool found = false; - pixDestroy(&src_pix_); + src_pix_.destroy(); src_pix_ = pixRead(TestDataNameToPath(filename).c_str()); if (api_.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) { api_.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK); @@ -101,7 +101,7 @@ protected: delete it; } - Pix *src_pix_; + Image src_pix_; std::string ocr_text_; tesseract::TessBaseAPI api_; }; diff --git a/unittest/baseapi_test.cc b/unittest/baseapi_test.cc index 1357ba21..84cf9297 100644 --- a/unittest/baseapi_test.cc +++ b/unittest/baseapi_test.cc @@ -44,7 +44,7 @@ class FriendlyTessBaseAPI : public tesseract::TessBaseAPI { FRIEND_TEST(TesseractTest, LSTMGeometryTest); }; -std::string GetCleanedTextResult(tesseract::TessBaseAPI *tess, Pix *pix) { +std::string GetCleanedTextResult(tesseract::TessBaseAPI *tess, Image pix) { tess->SetImage(pix); char *result = tess->GetUTF8Text(); std::string ocr_result = result; @@ -70,14 +70,14 @@ TEST_F(TesseractTest, BasicTesseractTest) { std::string truth_text; std::string ocr_text; if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) { - Pix *src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str()); + Image src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str()); CHECK(src_pix); ocr_text = GetCleanedTextResult(&api, src_pix); CHECK_OK( file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults())); absl::StripAsciiWhitespace(&truth_text); EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str()); - pixDestroy(&src_pix); + src_pix.destroy(); } else { // eng.traineddata not found. GTEST_SKIP(); @@ -105,7 +105,7 @@ TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) { EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes)); boxaDestroy(&block_boxes); boxaDestroy(¶_boxes); - pixDestroy(&src_pix); + src_pix.destroy(); #endif } else { // eng.traineddata not found. @@ -122,7 +122,7 @@ TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) { GTEST_SKIP(); return; } - Pix *src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str()); + Image src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str()); CHECK(src_pix); api.SetImage(src_pix); char *result = api.GetHOCRText(0); @@ -130,7 +130,7 @@ TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) { EXPECT_THAT(result, HasSubstr("Hello")); EXPECT_THAT(result, HasSubstr("
]* baseline [-.0-9]+ [-.0-9]+"})); delete[] result; - pixDestroy(&src_pix); + src_pix.destroy(); } // Tests that Tesseract gets exactly the right answer on some page numbers. @@ -182,23 +182,23 @@ TEST_F(TesseractTest, AdaptToWordStrTest) { // Train on the training text. for (int i = 0; kTrainingPages[i] != nullptr; ++i) { std::string image_file = TestDataNameToPath(kTrainingPages[i]); - Pix *src_pix = pixRead(image_file.c_str()); + Image src_pix = pixRead(image_file.c_str()); CHECK(src_pix); api.SetImage(src_pix); EXPECT_TRUE(api.AdaptToWordStr(tesseract::PSM_SINGLE_WORD, kTrainingText[i])) << "Failed to adapt to text \"" << kTrainingText[i] << "\" on image " << image_file; - pixDestroy(&src_pix); + src_pix.destroy(); } // Test the test text. api.SetVariable("tess_bn_matching", "1"); api.SetPageSegMode(tesseract::PSM_SINGLE_WORD); for (int i = 0; kTestPages[i] != nullptr; ++i) { - Pix *src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str()); + Image src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str()); CHECK(src_pix); ocr_text = GetCleanedTextResult(&api, src_pix); absl::StripAsciiWhitespace(&truth_text); EXPECT_STREQ(kTestText[i], ocr_text.c_str()); - pixDestroy(&src_pix); + src_pix.destroy(); } #endif } @@ -213,14 +213,14 @@ TEST_F(TesseractTest, BasicLSTMTest) { GTEST_SKIP(); return; } - Pix *src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str()); + Image src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str()); CHECK(src_pix); ocr_text = GetCleanedTextResult(&api, src_pix); CHECK_OK( file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults())); absl::StripAsciiWhitespace(&truth_text); EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str()); - pixDestroy(&src_pix); + src_pix.destroy(); } // Test that LSTM's character bounding boxes are properly converted to @@ -230,7 +230,7 @@ TEST_F(TesseractTest, BasicLSTMTest) { // errors due to float/int conversions (e.g., see OUTLINE::move() in // ccstruct/poutline.h) Instead, we do a loose check. TEST_F(TesseractTest, LSTMGeometryTest) { - Pix *src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str()); + Image src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str()); FriendlyTessBaseAPI api; if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) { // eng.traineddata not found. @@ -270,7 +270,7 @@ TEST_F(TesseractTest, LSTMGeometryTest) { EXPECT_LT(lstm_blob_box.top() - tess_blob_box.top(), 5); } } - pixDestroy(&src_pix); + src_pix.destroy(); } TEST_F(TesseractTest, InitConfigOnlyTest) { @@ -315,7 +315,7 @@ TEST(TesseractInstanceTest, TestMultipleTessInstances) { const std::string kTessdataPath = TESSDATA_DIR; // Preload images and verify that OCR is correct on them individually. - std::vector pix(num_langs); + std::vector pix(num_langs); for (int i = 0; i < num_langs; ++i) { SCOPED_TRACE(absl::StrCat("Single instance test with lang = ", langs[i])); std::string path = file::JoinPath(TESTING_DIR, image_files[i]); @@ -346,7 +346,7 @@ TEST(TesseractInstanceTest, TestMultipleTessInstances) { } for (int i = 0; i < num_langs; ++i) { - pixDestroy(&pix[i]); + pix[i].destroy(); } } diff --git a/unittest/baseapi_thread_test.cc b/unittest/baseapi_thread_test.cc index f2d03905..94557463 100644 --- a/unittest/baseapi_thread_test.cc +++ b/unittest/baseapi_thread_test.cc @@ -32,6 +32,7 @@ #include "commandlineflags.h" #include "include_gunit.h" #include "log.h" +#include "image.h" // Run with Tesseract instances. BOOL_PARAM_FLAG(test_tesseract, true, "Test tesseract instances"); @@ -97,7 +98,7 @@ protected: const int n = num_langs_ * FLAGS_reps; for (int i = 0; i < n; ++i) { std::string path = TESTING_DIR "/" + image_files[i % num_langs_]; - Pix *new_pix = pixRead(path.c_str()); + Image new_pix = pixRead(path.c_str()); QCHECK(new_pix != nullptr) << "Could not read " << path; pix_.push_back(new_pix); } @@ -110,7 +111,7 @@ protected: static void TearDownTestCase() { for (auto &pix : pix_) { - pixDestroy(&pix); + pix.destroy(); } } @@ -127,7 +128,7 @@ protected: std::unique_ptr pool_; static int pool_size_; #endif - static std::vector pix_; + static std::vector pix_; static std::vector langs_; static std::vector gt_text_; static int num_langs_; @@ -137,7 +138,7 @@ protected: #ifdef INCLUDE_TENSORFLOW int BaseapiThreadTest::pool_size_; #endif -std::vector BaseapiThreadTest::pix_; +std::vector BaseapiThreadTest::pix_; std::vector BaseapiThreadTest::langs_; std::vector BaseapiThreadTest::gt_text_; int BaseapiThreadTest::num_langs_; @@ -147,7 +148,7 @@ static void InitTessInstance(TessBaseAPI *tess, const std::string &lang) { EXPECT_EQ(0, tess->Init(TESSDATA_DIR, lang.c_str())); } -static void GetCleanedText(TessBaseAPI *tess, Pix *pix, std::string *ocr_text) { +static void GetCleanedText(TessBaseAPI *tess, Image pix, std::string *ocr_text) { tess->SetImage(pix); char *result = tess->GetUTF8Text(); *ocr_text = result; @@ -155,7 +156,7 @@ static void GetCleanedText(TessBaseAPI *tess, Pix *pix, std::string *ocr_text) { absl::StripAsciiWhitespace(ocr_text); } -static void VerifyTextResult(TessBaseAPI *tess, Pix *pix, const std::string &lang, +static void VerifyTextResult(TessBaseAPI *tess, Image pix, const std::string &lang, const std::string &expected_text) { TessBaseAPI *tess_local = nullptr; if (tess) { diff --git a/unittest/equationdetect_test.cc b/unittest/equationdetect_test.cc index b234ce4c..e44b243b 100644 --- a/unittest/equationdetect_test.cc +++ b/unittest/equationdetect_test.cc @@ -61,7 +61,7 @@ public: } // Set up pix_binary for lang_tesseract_. - void SetPixBinary(Pix *pix) { + void SetPixBinary(Image pix) { CHECK_EQ(1, pixGetDepth(pix)); *(lang_tesseract_->mutable_pix_binary()) = pix; } @@ -137,7 +137,7 @@ protected: } // Add a BLOCK covering the whole page. - void AddPageBlock(Pix *pix, BLOCK_LIST *blocks) { + void AddPageBlock(Image pix, BLOCK_LIST *blocks) { CHECK(pix != nullptr); CHECK(blocks != nullptr); BLOCK_IT block_it(blocks); @@ -183,7 +183,7 @@ TEST_F(EquationFinderTest, IdentifySpecialText) { #else // TODO: missing equ_gt1.tif // Load Image. std::string imagefile = file::JoinPath(testdata_dir_, "equ_gt1.tif"); - Pix *pix_binary = pixRead(imagefile.c_str()); + Image pix_binary = pixRead(imagefile.c_str()); CHECK(pix_binary != nullptr && pixGetDepth(pix_binary) == 1); // Get components. @@ -224,7 +224,7 @@ TEST_F(EquationFinderTest, IdentifySpecialText) { EXPECT_LE(10 - kCountRange, stt_count[BSTT_UNCLEAR]); // Release memory. - pixDestroy(&pix_binary); + pix_binary.destroy(); #endif } @@ -364,7 +364,7 @@ TEST_F(EquationFinderTest, CheckSeedBlobsCount) { TEST_F(EquationFinderTest, ComputeForegroundDensity) { // Create the pix with top half foreground, bottom half background. int width = 1024, height = 768; - Pix *pix = pixCreate(width, height, 1); + Image pix = pixCreate(width, height, 1); pixRasterop(pix, 0, 0, width, height / 2, PIX_SET, nullptr, 0, 0); TBOX box1(100, 0, 140, 140), box2(100, height / 2 - 20, 140, height / 2 + 20), box3(100, height - 40, 140, height); @@ -402,7 +402,7 @@ TEST_F(EquationFinderTest, CountAlignment) { } TEST_F(EquationFinderTest, ComputeCPsSuperBBox) { - Pix *pix = pixCreate(1001, 1001, 1); + Image pix = pixCreate(1001, 1001, 1); equation_det_->SetPixBinary(pix); ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000)); diff --git a/unittest/fuzzers/fuzzer-api.cpp b/unittest/fuzzers/fuzzer-api.cpp index 41178240..045690a9 100644 --- a/unittest/fuzzers/fuzzer-api.cpp +++ b/unittest/fuzzers/fuzzer-api.cpp @@ -68,7 +68,7 @@ extern "C" int LLVMFuzzerInitialize(int * /*pArgc*/, char ***pArgv) { } static PIX *createPix(BitReader &BR, const size_t width, const size_t height) { - Pix *pix = pixCreate(width, height, 1); + Image pix = pixCreate(width, height, 1); if (pix == nullptr) { printf("pix creation failed\n"); @@ -93,7 +93,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { char *outText = api->GetUTF8Text(); - pixDestroy(&pix); + pix.destroy(); delete[] outText; return 0; diff --git a/unittest/layout_test.cc b/unittest/layout_test.cc index 648821cb..6b960702 100644 --- a/unittest/layout_test.cc +++ b/unittest/layout_test.cc @@ -65,11 +65,11 @@ protected: src_pix_ = nullptr; } ~LayoutTest() override { - pixDestroy(&src_pix_); + src_pix_.destroy(); } void SetImage(const char *filename, const char *lang) { - pixDestroy(&src_pix_); + src_pix_.destroy(); src_pix_ = pixRead(TestDataNameToPath(filename).c_str()); api_.Init(TessdataPath().c_str(), lang, tesseract::OEM_TESSERACT_ONLY); api_.SetPageSegMode(tesseract::PSM_AUTO); @@ -182,7 +182,7 @@ protected: } while (it->Next(tesseract::RIL_BLOCK)); } - Pix *src_pix_; + Image src_pix_; std::string ocr_text_; tesseract::TessBaseAPI api_; }; diff --git a/unittest/lstmtrainer_test.cc b/unittest/lstmtrainer_test.cc index c9709437..6d563e46 100644 --- a/unittest/lstmtrainer_test.cc +++ b/unittest/lstmtrainer_test.cc @@ -87,7 +87,7 @@ TEST_F(LSTMTrainerTest, ConvertModel) { // baseapi_test.cc). TessBaseAPI api; api.Init(FLAGS_test_tmpdir, "deu", tesseract::OEM_LSTM_ONLY); - Pix *src_pix = pixRead(TestingNameToPath("phototest.tif").c_str()); + Image src_pix = pixRead(TestingNameToPath("phototest.tif").c_str()); CHECK(src_pix); api.SetImage(src_pix); std::unique_ptr result(api.GetUTF8Text()); @@ -96,7 +96,7 @@ TEST_F(LSTMTrainerTest, ConvertModel) { file::GetContents(TestingNameToPath("phototest.gold.txt"), &truth_text, file::Defaults())); EXPECT_STREQ(truth_text.c_str(), result.get()); - pixDestroy(&src_pix); + src_pix.destroy(); } } // namespace tesseract diff --git a/unittest/mastertrainer_test.cc b/unittest/mastertrainer_test.cc index 949929d2..7694359e 100644 --- a/unittest/mastertrainer_test.cc +++ b/unittest/mastertrainer_test.cc @@ -81,7 +81,7 @@ public: // If keep_this (a shape index) is >= 0, then the results should always // contain keep_this, and (if possible) anything of intermediate confidence. // The return value is the number of classes saved in results. - int ClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, + int ClassifySample(const TrainingSample &sample, Image page_pix, int debug, UNICHAR_ID keep_this, std::vector *results) override { results->clear(); // Everything except the first kNumNonReject is a reject. diff --git a/unittest/osd_test.cc b/unittest/osd_test.cc index d8e9433e..5677e082 100644 --- a/unittest/osd_test.cc +++ b/unittest/osd_test.cc @@ -25,6 +25,7 @@ #include // std::unique_ptr #include #include "include_gunit.h" +#include "image.h" namespace tesseract { @@ -37,7 +38,7 @@ static void OSDTester(int expected_deg, const char *imgname, const char *tessdat // log.info() << tessdatadir << " for image: " << imgname << std::endl; auto api = std::make_unique(); ASSERT_FALSE(api->Init(tessdatadir, "osd")) << "Could not initialize tesseract."; - Pix *image = pixRead(imgname); + Image image = pixRead(imgname); ASSERT_TRUE(image != nullptr) << "Failed to read test image."; api->SetImage(image); int orient_deg; @@ -53,7 +54,7 @@ static void OSDTester(int expected_deg, const char *imgname, const char *tessdat orient_deg, orient_conf, script_name, script_conf); EXPECT_EQ(expected_deg, orient_deg); api->End(); - pixDestroy(&image); + image.destroy(); } #endif diff --git a/unittest/pagesegmode_test.cc b/unittest/pagesegmode_test.cc index 030c6bdf..87f72d96 100644 --- a/unittest/pagesegmode_test.cc +++ b/unittest/pagesegmode_test.cc @@ -19,6 +19,7 @@ #include #include "helpers.h" #include "include_gunit.h" +#include "image.h" #include "log.h" namespace tesseract { @@ -37,7 +38,7 @@ class PageSegModeTest : public testing::Test { protected: PageSegModeTest() = default; ~PageSegModeTest() override { - pixDestroy(&src_pix_); + src_pix_.destroy(); } void SetUp() override { @@ -46,7 +47,7 @@ protected: } void SetImage(const char *filename) { - pixDestroy(&src_pix_); + src_pix_.destroy(); src_pix_ = pixRead(filename); api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY); api_.SetImage(src_pix_); @@ -76,7 +77,7 @@ protected: delete[] result; } - Pix *src_pix_ = nullptr; + Image src_pix_ = nullptr; std::string ocr_text_; tesseract::TessBaseAPI api_; }; diff --git a/unittest/progress_test.cc b/unittest/progress_test.cc index 9a1db265..9b91ca54 100644 --- a/unittest/progress_test.cc +++ b/unittest/progress_test.cc @@ -20,6 +20,7 @@ #include #include +#include "image.h" #include #include "gmock/gmock.h" @@ -93,7 +94,7 @@ void ClassicProgressTester(const char *imgname, const char *tessdatadir, const c auto api = std::make_unique(); ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract."; - Pix *image = pixRead(imgname); + Image image = pixRead(imgname); ASSERT_TRUE(image != nullptr) << "Failed to read test image."; api->SetImage(image); @@ -109,7 +110,7 @@ void ClassicProgressTester(const char *imgname, const char *tessdatadir, const c EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%"; api->End(); - pixDestroy(&image); + image.destroy(); } void NewProgressTester(const char *imgname, const char *tessdatadir, const char *lang) { @@ -124,7 +125,7 @@ void NewProgressTester(const char *imgname, const char *tessdatadir, const char auto api = std::make_unique(); ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract."; - Pix *image = pixRead(imgname); + Image image = pixRead(imgname); ASSERT_TRUE(image != nullptr) << "Failed to read test image."; api->SetImage(image); @@ -141,7 +142,7 @@ void NewProgressTester(const char *imgname, const char *tessdatadir, const char EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%"; api->End(); - pixDestroy(&image); + image.destroy(); } TEST(QuickTest, ClassicProgressReporting) { diff --git a/unittest/resultiterator_test.cc b/unittest/resultiterator_test.cc index cc398ded..cf9cd250 100644 --- a/unittest/resultiterator_test.cc +++ b/unittest/resultiterator_test.cc @@ -40,7 +40,7 @@ protected: // api_.ReadConfigFile(FLAGS_tess_config.c_str()); api_.SetPageSegMode(tesseract::PSM_AUTO); api_.SetImage(src_pix_); - pixDestroy(&src_pix_); + src_pix_.destroy(); src_pix_ = api_.GetInputImage(); } @@ -52,7 +52,7 @@ protected: int width = pixGetWidth(src_pix_); int height = pixGetHeight(src_pix_); int depth = pixGetDepth(src_pix_); - Pix *pix = pixCreate(width, height, depth); + Image pix = pixCreate(width, height, depth); EXPECT_TRUE(depth == 1 || depth == 8); if (depth == 8) { pixSetAll(pix); @@ -68,7 +68,7 @@ protected: LOG(INFO) << "BBox: [L:" << left << ", T:" << top << ", R:" << right << ", B:" << bottom << "]" << "\n"; - Pix *block_pix; + Image block_pix; if (depth == 1) { block_pix = it->GetBinaryImage(im_level); pixRasterop(pix, left, top, right - left, bottom - top, PIX_SRC ^ PIX_DST, block_pix, 0, 0); @@ -78,14 +78,14 @@ protected: PIX_SRC & PIX_DST, block_pix, 0, 0); } CHECK(block_pix != nullptr); - pixDestroy(&block_pix); + block_pix.destroy(); } while (it->Next(level)); // if (base::GetFlag(FLAGS_v) >= 1) // pixWrite(OutputNameToPath("rebuilt.png").c_str(), pix, IFF_PNG); pixRasterop(pix, 0, 0, width, height, PIX_SRC ^ PIX_DST, src_pix_, 0, 0); if (depth == 8) { - Pix *binary_pix = pixThresholdToBinary(pix, 128); - pixDestroy(&pix); + Image binary_pix = pixThresholdToBinary(pix, 128); + pix.destroy(); pixInvert(binary_pix, binary_pix); pix = binary_pix; } @@ -98,7 +98,7 @@ protected: LOG(INFO) << "outfile = " << outfile << "\n"; pixWrite(outfile.c_str(), pix, IFF_PNG); } - pixDestroy(&pix); + pix.destroy(); LOG(INFO) << absl::StrFormat("At level %d: pix diff = %d\n", level, pixcount); EXPECT_LE(pixcount, max_diff); // if (base::GetFlag(FLAGS_v) > 1) CHECK_LE(pixcount, max_diff); @@ -206,7 +206,7 @@ protected: } // Objects declared here can be used by all tests in the test case for Foo. - Pix *src_pix_; // Borrowed from api_. Do not destroy. + Image src_pix_; // Borrowed from api_. Do not destroy. std::string ocr_text_; tesseract::TessBaseAPI api_; }; diff --git a/unittest/stringrenderer_test.cc b/unittest/stringrenderer_test.cc index 72c305fb..59be6338 100644 --- a/unittest/stringrenderer_test.cc +++ b/unittest/stringrenderer_test.cc @@ -61,7 +61,7 @@ protected: PangoFontInfo::SoftInitFontConfig(); // init early } - void DisplayClusterBoxes(Pix *pix) { + void DisplayClusterBoxes(Image pix) { if (!FLAGS_display) { return; } @@ -72,34 +72,34 @@ protected: boxaAddBox(boxes, const_cast(boxchar->box()), L_CLONE); } } - Pix *box_pix = pixDrawBoxaRandom(pix, boxes, 1); + Image box_pix = pixDrawBoxaRandom(pix, boxes, 1); boxaDestroy(&boxes); pixDisplay(box_pix, 0, 0); - pixDestroy(&box_pix); + box_pix.destroy(); } std::unique_ptr renderer_; }; TEST_F(StringRendererTest, DoesRenderToImage) { renderer_ = std::make_unique("Verdana 10", 600, 600); - Pix *pix = nullptr; + Image pix = nullptr; EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix)); EXPECT_TRUE(pix != nullptr); EXPECT_GT(renderer_->GetBoxes().size(), 0); DisplayClusterBoxes(pix); - pixDestroy(&pix); + pix.destroy(); renderer_ = std::make_unique("UnBatang 10", 600, 600); EXPECT_EQ(strlen(kKorText), renderer_->RenderToImage(kKorText, strlen(kKorText), &pix)); EXPECT_GT(renderer_->GetBoxes().size(), 0); DisplayClusterBoxes(pix); - pixDestroy(&pix); + pix.destroy(); renderer_ = std::make_unique("Lohit Hindi 10", 600, 600); EXPECT_EQ(strlen(kHinText), renderer_->RenderToImage(kHinText, strlen(kHinText), &pix)); EXPECT_GT(renderer_->GetBoxes().size(), 0); DisplayClusterBoxes(pix); - pixDestroy(&pix); + pix.destroy(); // RTL text renderer_ = std::make_unique("Arab 10", 600, 600); @@ -107,7 +107,7 @@ TEST_F(StringRendererTest, DoesRenderToImage) { EXPECT_TRUE(pix != nullptr); EXPECT_GT(renderer_->GetBoxes().size(), 0); DisplayClusterBoxes(pix); - pixDestroy(&pix); + pix.destroy(); // Mixed direction Arabic + english text renderer_ = std::make_unique("Arab 10", 600, 600); @@ -115,7 +115,7 @@ TEST_F(StringRendererTest, DoesRenderToImage) { EXPECT_TRUE(pix != nullptr); EXPECT_GT(renderer_->GetBoxes().size(), 0); DisplayClusterBoxes(pix); - pixDestroy(&pix); + pix.destroy(); } TEST_F(StringRendererTest, DoesRenderToImageWithUnderline) { @@ -123,12 +123,12 @@ TEST_F(StringRendererTest, DoesRenderToImageWithUnderline) { // Underline all words but NOT intervening spaces. renderer_->set_underline_start_prob(1.0); renderer_->set_underline_continuation_prob(0); - Pix *pix = nullptr; + Image pix = nullptr; EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix)); EXPECT_TRUE(pix != nullptr); EXPECT_GT(renderer_->GetBoxes().size(), 0); DisplayClusterBoxes(pix); - pixDestroy(&pix); + pix.destroy(); renderer_->ClearBoxes(); // Underline all words AND intervening spaces. @@ -138,7 +138,7 @@ TEST_F(StringRendererTest, DoesRenderToImageWithUnderline) { EXPECT_TRUE(pix != nullptr); EXPECT_GT(renderer_->GetBoxes().size(), 0); DisplayClusterBoxes(pix); - pixDestroy(&pix); + pix.destroy(); renderer_->ClearBoxes(); // Underline words and intervening spaces with 0.5 prob. @@ -148,14 +148,14 @@ TEST_F(StringRendererTest, DoesRenderToImageWithUnderline) { EXPECT_TRUE(pix != nullptr); EXPECT_GT(renderer_->GetBoxes().size(), 0); DisplayClusterBoxes(pix); - pixDestroy(&pix); + pix.destroy(); } TEST_F(StringRendererTest, DoesHandleNewlineCharacters) { const char kRawText[] = "\n\n\n A \nB \nC \n\n\n"; const char kStrippedText[] = " A B C "; // text with newline chars removed renderer_ = std::make_unique("Verdana 10", 600, 600); - Pix *pix = nullptr; + Image pix = nullptr; EXPECT_EQ(strlen(kRawText), renderer_->RenderToImage(kRawText, strlen(kRawText), &pix)); EXPECT_TRUE(pix != nullptr); const std::vector &boxchars = renderer_->GetBoxes(); @@ -168,14 +168,14 @@ TEST_F(StringRendererTest, DoesHandleNewlineCharacters) { } } DisplayClusterBoxes(pix); - pixDestroy(&pix); + pix.destroy(); } TEST_F(StringRendererTest, DoesRenderLigatures) { renderer_ = std::make_unique("Arab 12", 600, 250); const char kArabicLigature[] = "لا"; - Pix *pix = nullptr; + Image pix = nullptr; EXPECT_EQ(strlen(kArabicLigature), renderer_->RenderToImage(kArabicLigature, strlen(kArabicLigature), &pix)); EXPECT_TRUE(pix != nullptr); @@ -185,13 +185,13 @@ TEST_F(StringRendererTest, DoesRenderLigatures) { EXPECT_TRUE(boxes[0]->box() != nullptr); EXPECT_STREQ(kArabicLigature, boxes[0]->ch().c_str()); DisplayClusterBoxes(pix); - pixDestroy(&pix); + pix.destroy(); renderer_ = std::make_unique("Arab 12", 600, 250); const char kArabicMixedText[] = "والفكر والصراع 1234,\nوالفكر لا والصراع"; renderer_->RenderToImage(kArabicMixedText, strlen(kArabicMixedText), &pix); DisplayClusterBoxes(pix); - pixDestroy(&pix); + pix.destroy(); } static int FindBoxCharXCoord(const std::vector &boxchars, const std::string &ch) { @@ -205,7 +205,7 @@ static int FindBoxCharXCoord(const std::vector &boxchars, const std:: TEST_F(StringRendererTest, ArabicBoxcharsInLTROrder) { renderer_ = std::make_unique("Arab 10", 600, 600); - Pix *pix = nullptr; + Image pix = nullptr; // Arabic letters should be in decreasing x-coordinates const char kArabicWord[] = "\u0644\u0627\u0641\u0643\u0631"; const std::string kRevWord = "\u0631\u0643\u0641\u0627\u0644"; @@ -225,12 +225,12 @@ TEST_F(StringRendererTest, ArabicBoxcharsInLTROrder) { // Just to prove there was a ligature, the number of texts is less than the // number of unicodes. EXPECT_LT(texts.size(), 5); - pixDestroy(&pix); + pix.destroy(); } TEST_F(StringRendererTest, DoesOutputBoxcharsInReadingOrder) { renderer_ = std::make_unique("Arab 10", 600, 600); - Pix *pix = nullptr; + Image pix = nullptr; // Arabic letters should be in decreasing x-coordinates const char kArabicWord[] = "والفكر"; renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix); @@ -239,7 +239,7 @@ TEST_F(StringRendererTest, DoesOutputBoxcharsInReadingOrder) { for (size_t i = 1; i < boxchars.size(); ++i) { EXPECT_GT(boxchars[i - 1]->box()->x, boxchars[i]->box()->x) << boxchars[i - 1]->ch(); } - pixDestroy(&pix); + pix.destroy(); // English letters should be in increasing x-coordinates const char kEnglishWord[] = "Google"; @@ -249,7 +249,7 @@ TEST_F(StringRendererTest, DoesOutputBoxcharsInReadingOrder) { for (size_t i = 1; i < boxchars.size(); ++i) { EXPECT_LT(boxchars[i - 1]->box()->x, boxchars[i]->box()->x) << boxchars[i - 1]->ch(); } - pixDestroy(&pix); + pix.destroy(); // Mixed text should satisfy both. renderer_->ClearBoxes(); @@ -257,30 +257,30 @@ TEST_F(StringRendererTest, DoesOutputBoxcharsInReadingOrder) { EXPECT_LT(FindBoxCharXCoord(boxchars, "a"), FindBoxCharXCoord(boxchars, "b")); EXPECT_LT(FindBoxCharXCoord(boxchars, "1"), FindBoxCharXCoord(boxchars, "2")); EXPECT_GT(FindBoxCharXCoord(boxchars, "و"), FindBoxCharXCoord(boxchars, "ر")); - pixDestroy(&pix); + pix.destroy(); } TEST_F(StringRendererTest, DoesRenderVerticalText) { - Pix *pix = nullptr; + Image pix = nullptr; renderer_ = std::make_unique("UnBatang 10", 600, 600); renderer_->set_vertical_text(true); EXPECT_EQ(strlen(kKorText), renderer_->RenderToImage(kKorText, strlen(kKorText), &pix)); EXPECT_GT(renderer_->GetBoxes().size(), 0); DisplayClusterBoxes(pix); - pixDestroy(&pix); + pix.destroy(); } // Checks that we preserve charboxes across RenderToImage calls, with // appropriate page numbers. TEST_F(StringRendererTest, DoesKeepAllImageBoxes) { renderer_ = std::make_unique("Verdana 10", 600, 600); - Pix *pix = nullptr; + Image pix = nullptr; int num_boxes_per_page = 0; const int kNumTrials = 2; for (int i = 0; i < kNumTrials; ++i) { EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix)); EXPECT_TRUE(pix != nullptr); - pixDestroy(&pix); + pix.destroy(); EXPECT_GT(renderer_->GetBoxes().size(), 0); if (!num_boxes_per_page) { num_boxes_per_page = renderer_->GetBoxes().size(); @@ -295,25 +295,25 @@ TEST_F(StringRendererTest, DoesKeepAllImageBoxes) { TEST_F(StringRendererTest, DoesClearBoxes) { renderer_ = std::make_unique("Verdana 10", 600, 600); - Pix *pix = nullptr; + Image pix = nullptr; EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix)); - pixDestroy(&pix); + pix.destroy(); EXPECT_GT(renderer_->GetBoxes().size(), 0); const int num_boxes_per_page = renderer_->GetBoxes().size(); renderer_->ClearBoxes(); EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix)); - pixDestroy(&pix); + pix.destroy(); EXPECT_EQ(num_boxes_per_page, renderer_->GetBoxes().size()); } TEST_F(StringRendererTest, DoesLigatureTextForRendering) { renderer_ = std::make_unique("Verdana 10", 600, 600); renderer_->set_add_ligatures(true); - Pix *pix = nullptr; + Image pix = nullptr; EXPECT_EQ(strlen(kEngNonLigatureText), renderer_->RenderToImage(kEngNonLigatureText, strlen(kEngNonLigatureText), &pix)); - pixDestroy(&pix); + pix.destroy(); // There should be one less box than letters due to the 'fi' ligature. EXPECT_EQ(strlen(kEngNonLigatureText) - 1, renderer_->GetBoxes().size()); // The output box text should be ligatured. @@ -322,10 +322,10 @@ TEST_F(StringRendererTest, DoesLigatureTextForRendering) { TEST_F(StringRendererTest, DoesRetainInputLigatureForRendering) { renderer_ = std::make_unique("Verdana 10", 600, 600); - Pix *pix = nullptr; + Image pix = nullptr; EXPECT_EQ(strlen(kEngLigatureText), renderer_->RenderToImage(kEngLigatureText, strlen(kEngLigatureText), &pix)); - pixDestroy(&pix); + pix.destroy(); // There should be one less box than letters due to the 'fi' ligature. EXPECT_EQ(strlen(kEngNonLigatureText) - 1, renderer_->GetBoxes().size()); // The output box text should be ligatured. @@ -344,9 +344,9 @@ TEST_F(StringRendererTest, DoesStripUnrenderableWords) { TEST_F(StringRendererTest, DoesRenderWordBoxes) { renderer_ = std::make_unique("Verdana 10", 600, 600); renderer_->set_output_word_boxes(true); - Pix *pix = nullptr; + Image pix = nullptr; EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix)); - pixDestroy(&pix); + pix.destroy(); // Verify #boxchars = #words + #spaces std::vector words = absl::StrSplit(kEngText, ' ', absl::SkipEmpty()); const int kNumSpaces = words.size() - 1; @@ -366,10 +366,10 @@ TEST_F(StringRendererTest, DoesRenderWordBoxes) { TEST_F(StringRendererTest, DoesRenderWordBoxesFromMultiLineText) { renderer_ = std::make_unique("Verdana 10", 600, 600); renderer_->set_output_word_boxes(true); - Pix *pix = nullptr; + Image pix = nullptr; const char kMultlineText[] = "the quick brown fox\njumps over the lazy dog"; EXPECT_EQ(strlen(kMultlineText), renderer_->RenderToImage(kMultlineText, strlen(kEngText), &pix)); - pixDestroy(&pix); + pix.destroy(); // Verify #boxchars = #words + #spaces + #newlines std::vector words = absl::StrSplit(kMultlineText, absl::ByAnyChar(" \n"), absl::SkipEmpty()); @@ -392,7 +392,7 @@ TEST_F(StringRendererTest, DoesRenderAllFontsToImage) { size_t offset = 0; std::string font_used; do { - Pix *pix = nullptr; + Image pix = nullptr; font_used.clear(); offset += renderer_->RenderAllFontsToImage(1.0, kEngText + offset, strlen(kEngText + offset), &font_used, &pix); @@ -403,7 +403,7 @@ TEST_F(StringRendererTest, DoesRenderAllFontsToImage) { if (FLAGS_display) { pixDisplay(pix, 0, 0); } - pixDestroy(&pix); + pix.destroy(); } while (offset < strlen(kEngText)); } @@ -411,9 +411,9 @@ TEST_F(StringRendererTest, DoesNotRenderWordJoiner) { renderer_ = std::make_unique("Verdana 10", 500, 200); const std::string word = "A- -B C-D A BC"; const std::string joined_word = StringRenderer::InsertWordJoiners(word); - Pix *pix = nullptr; + Image pix = nullptr; renderer_->RenderToImage(joined_word.c_str(), joined_word.length(), &pix); - pixDestroy(&pix); + pix.destroy(); const std::vector &boxchars = renderer_->GetBoxes(); const std::string kWordJoinerUTF8 = "\u2060"; ASSERT_EQ(word.length(), boxchars.size()); @@ -428,11 +428,11 @@ TEST_F(StringRendererTest, DISABLED_DoesDropUncoveredChars) { renderer_->set_drop_uncovered_chars(true); const std::string kWord = "office"; const std::string kCleanWord = "oice"; - Pix *pix = nullptr; + Image pix = nullptr; EXPECT_FALSE(renderer_->font().CanRenderString(kWord.c_str(), kWord.length())); EXPECT_FALSE(renderer_->font().CoversUTF8Text(kWord.c_str(), kWord.length())); int offset = renderer_->RenderToImage(kWord.c_str(), kWord.length(), &pix); - pixDestroy(&pix); + pix.destroy(); const std::vector &boxchars = renderer_->GetBoxes(); EXPECT_EQ(kWord.length(), offset); ASSERT_EQ(kCleanWord.length(), boxchars.size()); diff --git a/unittest/textlineprojection_test.cc b/unittest/textlineprojection_test.cc index 59a1322f..48dbdf1b 100644 --- a/unittest/textlineprojection_test.cc +++ b/unittest/textlineprojection_test.cc @@ -46,13 +46,13 @@ protected: projection_ = nullptr; } ~TextlineProjectionTest() override { - pixDestroy(&src_pix_); - pixDestroy(&bin_pix_); + src_pix_.destroy(); + bin_pix_.destroy(); delete finder_; } void SetImage(const char *filename) { - pixDestroy(&src_pix_); + src_pix_.destroy(); src_pix_ = pixRead(file::JoinPath(TESTING_DIR, filename).c_str()); api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY); api_.SetPageSegMode(tesseract::PSM_AUTO_OSD); @@ -89,7 +89,7 @@ protected: BLOCK_LIST src_blocks; BLOCK_IT block_it(&src_blocks); block_it.add_to_end(block); - Pix *photomask_pix = nullptr; + Image photomask_pix = nullptr; // The blocks made by the ColumnFinder. Moved to blocks before return. BLOCK_LIST found_blocks; TO_BLOCK_LIST temp_blocks; @@ -105,7 +105,7 @@ protected: nullptr, nullptr, &found_blocks, &diacritic_blobs, &to_blocks), 0); projection_ = finder_->projection(); - pixDestroy(&photomask_pix); + photomask_pix.destroy(); } // Helper evaluates the given box, expects the result to be greater_than @@ -232,8 +232,8 @@ protected: delete it; } - Pix *src_pix_; - Pix *bin_pix_; + Image src_pix_; + Image bin_pix_; BLOCK_LIST blocks_; std::string ocr_text_; tesseract::TessBaseAPI api_;