mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
Allow user to specify dpi for input image
This commit is contained in:
parent
345e5ee1f3
commit
a0564fd4ec
@ -2320,12 +2320,22 @@ bool TessBaseAPI::Threshold(Pix** pix) {
|
||||
if (*pix != nullptr)
|
||||
pixDestroy(pix);
|
||||
// Zero resolution messes up the algorithms, so make sure it is credible.
|
||||
int user_dpi = 0;
|
||||
bool a = GetIntVariable("user_defined_dpi", &user_dpi);
|
||||
int y_res = thresholder_->GetScaledYResolution();
|
||||
if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
|
||||
// Use the minimum default resolution, as it is safer to under-estimate
|
||||
// than over-estimate resolution.
|
||||
tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", y_res,
|
||||
kMinCredibleResolution);
|
||||
if (user_dpi && (user_dpi < kMinCredibleResolution ||
|
||||
user_dpi > kMaxCredibleResolution)) {
|
||||
tprintf("Warning: User defined image dpi is outside of expected range "
|
||||
"(%d - %d)!\n",
|
||||
kMinCredibleResolution, kMaxCredibleResolution);
|
||||
}
|
||||
// Always use user defined dpi
|
||||
if (user_dpi) {
|
||||
thresholder_->SetSourceYResolution(user_dpi);
|
||||
} else if (y_res < kMinCredibleResolution ||
|
||||
y_res > kMaxCredibleResolution) {
|
||||
tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
|
||||
y_res, kMinCredibleResolution);
|
||||
thresholder_->SetSourceYResolution(kMinCredibleResolution);
|
||||
}
|
||||
PageSegMode pageseg_mode =
|
||||
@ -2350,7 +2360,8 @@ bool TessBaseAPI::Threshold(Pix** pix) {
|
||||
kMinCredibleResolution,
|
||||
kMaxCredibleResolution);
|
||||
if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
|
||||
tprintf("Estimated resolution %d out of range! Corrected to %d\n",
|
||||
tprintf("Estimated internal resolution %d out of range! "
|
||||
"Corrected to %d.\n",
|
||||
thresholder_->GetScaledEstimatedResolution(), estimated_res);
|
||||
}
|
||||
tesseract_->set_source_resolution(estimated_res);
|
||||
|
@ -165,6 +165,7 @@ static void PrintHelpExtra(const char* program) {
|
||||
" --tessdata-dir PATH Specify the location of tessdata path.\n"
|
||||
" --user-words PATH Specify the location of user words file.\n"
|
||||
" --user-patterns PATH Specify the location of user patterns file.\n"
|
||||
" --dpi VALUE Specify DPI for input image.\n"
|
||||
" -l LANG[+LANG] Specify language(s) used for OCR.\n"
|
||||
" -c VAR=VALUE Set value for config variables.\n"
|
||||
" Multiple -c arguments are allowed.\n"
|
||||
@ -288,10 +289,9 @@ static void checkArgValues(int arg, const char* mode, int count) {
|
||||
// NOTE: arg_i is used here to avoid ugly *i so many times in this function
|
||||
static void ParseArgs(const int argc, char** argv, const char** lang,
|
||||
const char** image, const char** outputbase,
|
||||
const char** datapath,
|
||||
bool* list_langs, bool* print_parameters,
|
||||
GenericVector<STRING>* vars_vec,
|
||||
GenericVector<STRING>* vars_values, int* arg_i,
|
||||
const char** datapath, l_int32* dpi, bool* list_langs,
|
||||
bool* print_parameters, GenericVector<STRING>* vars_vec,
|
||||
GenericVector<STRING>* vars_values, l_int32* arg_i,
|
||||
tesseract::PageSegMode* pagesegmode,
|
||||
tesseract::OcrEngineMode* enginemode) {
|
||||
bool noocr = false;
|
||||
@ -324,6 +324,9 @@ static void ParseArgs(const int argc, char** argv, const char** lang,
|
||||
} else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
|
||||
*datapath = argv[i + 1];
|
||||
++i;
|
||||
} else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) {
|
||||
*dpi = atoi(argv[i + 1]);
|
||||
++i;
|
||||
} else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
|
||||
vars_vec->push_back("user_words_file");
|
||||
vars_values->push_back(argv[i + 1]);
|
||||
@ -456,6 +459,7 @@ int main(int argc, char** argv) {
|
||||
const char* datapath = nullptr;
|
||||
bool list_langs = false;
|
||||
bool print_parameters = false;
|
||||
l_int32 dpi = 0;
|
||||
int arg_i = 1;
|
||||
tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
|
||||
#ifdef DISABLED_LEGACY_ENGINE
|
||||
@ -479,9 +483,9 @@ int main(int argc, char** argv) {
|
||||
TIFFSetWarningHandler(Win32WarningHandler);
|
||||
#endif /* HAVE_TIFFIO_H && _WIN32 */
|
||||
|
||||
ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs,
|
||||
&print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode,
|
||||
&enginemode);
|
||||
ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi,
|
||||
&list_langs, &print_parameters, &vars_vec, &vars_values, &arg_i,
|
||||
&pagesegmode, &enginemode);
|
||||
|
||||
if (lang == nullptr) {
|
||||
// Set default language if none was given.
|
||||
@ -528,6 +532,11 @@ int main(int argc, char** argv) {
|
||||
|
||||
FixPageSegMode(&api, pagesegmode);
|
||||
|
||||
if (dpi) {
|
||||
char dpi_string[255];
|
||||
snprintf(dpi_string, 254, "%d", dpi);
|
||||
api.SetVariable("user_defined_dpi", dpi_string);
|
||||
}
|
||||
if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
|
||||
int ret_val = EXIT_SUCCESS;
|
||||
|
||||
|
@ -395,6 +395,8 @@ Tesseract::Tesseract()
|
||||
"Create PDF with only one invisible text layer",
|
||||
this->params()),
|
||||
INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params()),
|
||||
INT_MEMBER(user_defined_dpi, 0, "Specify DPI for input image",
|
||||
this->params()),
|
||||
STRING_MEMBER(unrecognised_char, "|",
|
||||
"Output char for unidentified blobs", this->params()),
|
||||
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),
|
||||
|
@ -1042,6 +1042,7 @@ class Tesseract : public Wordrec {
|
||||
BOOL_VAR_H(textonly_pdf, false,
|
||||
"Create PDF with only one invisible text layer");
|
||||
INT_VAR_H(jpg_quality, 85, "Set JPEG quality level");
|
||||
INT_VAR_H(user_defined_dpi, 0, "Specify DPI for input image");
|
||||
STRING_VAR_H(unrecognised_char, "|",
|
||||
"Output char for unidentified blobs");
|
||||
INT_VAR_H(suspect_level, 99, "Suspect marker level");
|
||||
|
Loading…
Reference in New Issue
Block a user