mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-28 05:13:49 +08:00
Add more binarization options
Use functions from Leptonica to provide more binarization options. The new options are: 1) Adaptive Otsu and 2) Sauvola (Tiled) .
This commit is contained in:
parent
65118b2e3a
commit
11c73c9481
@ -278,6 +278,17 @@ enum OcrEngineMode {
|
||||
OEM_COUNT // Number of OEMs
|
||||
};
|
||||
|
||||
/**
|
||||
* Except when OTSU_TRESH is chosen
|
||||
* Leptonica is used for thresholding
|
||||
*/
|
||||
enum ThreshMethod {
|
||||
OTSU_TRESH, // Legacy Tesseract's Otsu thresholding
|
||||
ADAPTIVE_OTSU_TRESH,
|
||||
SAUVOLA_TILED_TRESH,
|
||||
TRESH_METHODS_COUNT, // Number of Thresholding methods
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
|
||||
|
@ -2108,20 +2108,41 @@ bool TessBaseAPI::Threshold(Pix **pix) {
|
||||
thresholder_->SetSourceYResolution(kMinCredibleResolution);
|
||||
}
|
||||
auto pageseg_mode = static_cast<PageSegMode>(static_cast<int>(tesseract_->tessedit_pageseg_mode));
|
||||
Image im(*pix);
|
||||
if (!thresholder_->ThresholdToPix(pageseg_mode, &im)) {
|
||||
return false;
|
||||
}
|
||||
*pix = im;
|
||||
|
||||
Image pix_binary(*pix);
|
||||
Image pix_grey;
|
||||
Image pix_thresholds;
|
||||
|
||||
auto thresholding_method = static_cast<ThreshMethod>(static_cast<int>(tesseract_->thresholding_method));
|
||||
|
||||
if (thresholding_method == OTSU_TRESH) {
|
||||
if (!thresholder_->ThresholdToPix(pageseg_mode, &pix_binary)) {
|
||||
return false;
|
||||
}
|
||||
*pix = pix_binary;
|
||||
|
||||
if (!thresholder_->IsBinary()) {
|
||||
tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
|
||||
tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
|
||||
} else {
|
||||
tesseract_->set_pix_thresholds(nullptr);
|
||||
tesseract_->set_pix_grey(nullptr);
|
||||
}
|
||||
} else {
|
||||
auto [ok, pix_grey, pix_binary, pix_thresholds] = thresholder_->Threshold(thresholding_method);
|
||||
|
||||
if (!ok) {
|
||||
return false;
|
||||
}
|
||||
*pix = pix_binary;
|
||||
|
||||
tesseract_->set_pix_thresholds(pix_thresholds);
|
||||
tesseract_->set_pix_grey(pix_grey);
|
||||
}
|
||||
|
||||
thresholder_->GetImageSizes(&rect_left_, &rect_top_, &rect_width_, &rect_height_, &image_width_,
|
||||
&image_height_);
|
||||
if (!thresholder_->IsBinary()) {
|
||||
tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
|
||||
tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
|
||||
} else {
|
||||
tesseract_->set_pix_thresholds(nullptr);
|
||||
tesseract_->set_pix_grey(nullptr);
|
||||
}
|
||||
|
||||
// Set the internal resolution that is used for layout parameters from the
|
||||
// estimated resolution, rather than the image resolution, which may be
|
||||
// fabricated, but we will use the image resolution, if there is one, to
|
||||
|
@ -74,6 +74,9 @@ Tesseract::Tesseract()
|
||||
"11=sparse_text, 12=sparse_text+osd, 13=raw_line"
|
||||
" (Values from PageSegMode enum in tesseract/publictypes.h)",
|
||||
this->params())
|
||||
, INT_MEMBER(thresholding_method, OTSU_TRESH,
|
||||
"Thresholding method: 0 = Otsu, 1 = Adaptive Otsu, 2 = Sauvola",
|
||||
this->params())
|
||||
, INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT,
|
||||
"Which OCR engine(s) to run (Tesseract, LSTM, both)."
|
||||
" Defaults to loading and running the most accurate"
|
||||
|
@ -762,6 +762,8 @@ public:
|
||||
"Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block,"
|
||||
" 5=line, 6=word, 7=char"
|
||||
" (Values from PageSegMode enum in tesseract/publictypes.h)");
|
||||
INT_VAR_H(thresholding_method, OTSU_TRESH,
|
||||
"Thresholding method: 0 = Otsu, 1 = Adaptive Otsu, 2 = Sauvola");
|
||||
INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT,
|
||||
"Which OCR engine(s) to run (Tesseract, LSTM, both). Defaults"
|
||||
" to loading and running the most accurate available.");
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include <cstdint> // for uint32_t
|
||||
#include <cstring>
|
||||
#include <tuple>
|
||||
|
||||
#include "otsuthr.h"
|
||||
#include "thresholder.h"
|
||||
@ -184,6 +185,45 @@ void ImageThresholder::SetImage(const Image pix) {
|
||||
Init();
|
||||
}
|
||||
|
||||
std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
|
||||
ThreshMethod method) {
|
||||
Image pix_grey = nullptr;
|
||||
Image pix_binary = nullptr;
|
||||
Image pix_thresholds = nullptr;
|
||||
|
||||
if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
|
||||
tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
|
||||
return std::make_tuple(false, nullptr, nullptr, nullptr);
|
||||
}
|
||||
|
||||
if (pix_channels_ == 0) {
|
||||
// We have a binary image, but it still has to be copied, as this API
|
||||
// allows the caller to modify the output.
|
||||
Image original = GetPixRect();
|
||||
pix_binary = original.copy();
|
||||
original.destroy();
|
||||
return std::make_tuple(false, nullptr, pix_binary, nullptr);
|
||||
}
|
||||
|
||||
pix_grey = GetPixRectGrey();
|
||||
|
||||
if (method == OTSU_TRESH || method >= TRESH_METHODS_COUNT) {
|
||||
method = ADAPTIVE_OTSU_TRESH;
|
||||
}
|
||||
|
||||
int r;
|
||||
if (method == ADAPTIVE_OTSU_TRESH) {
|
||||
r = pixOtsuAdaptiveThreshold(pix_grey, 300, 300, 0, 0, 0.1,
|
||||
pix_thresholds.a(), pix_binary.a());
|
||||
} else if (method == SAUVOLA_TILED_TRESH) {
|
||||
r = pixSauvolaBinarizeTiled(pix_grey, 25, 0.40, 300, 300, pix_thresholds.a(),
|
||||
pix_binary.a());
|
||||
}
|
||||
|
||||
bool ok = r == 0 ? true : false;
|
||||
return std::make_tuple(ok, pix_grey, pix_binary, pix_thresholds);
|
||||
}
|
||||
|
||||
// Threshold the source image as efficiently as possible to the output Pix.
|
||||
// Creates a Pix and sets pix to point to the resulting pointer.
|
||||
// Caller must use pixDestroy to free the created Pix.
|
||||
|
@ -121,6 +121,9 @@ public:
|
||||
/// Returns false on error.
|
||||
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Image *pix);
|
||||
|
||||
virtual std::tuple<bool, Image, Image, Image> Threshold(
|
||||
ThreshMethod method);
|
||||
|
||||
// Gets a pix that contains an 8 bit threshold value at each pixel. The
|
||||
// returned pix may be an integer reduction of the binary image such that
|
||||
// the scale factor may be inferred from the ratio of the sizes, even down
|
||||
|
@ -33,6 +33,8 @@ public:
|
||||
// service
|
||||
operator Pix *() const { return pix_; }
|
||||
Pix *operator->() const { return pix_; }
|
||||
Pix **a() { return &pix_; }
|
||||
|
||||
|
||||
// api
|
||||
Image clone() const; // increases refcount
|
||||
|
Loading…
Reference in New Issue
Block a user