mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-23 18:49:08 +08:00
Merge pull request #4314 from stweil/optimize
Add C++ stream for log messages and use it in two debug messages
This commit is contained in:
commit
4f43536335
@ -41,6 +41,7 @@
|
||||
#endif
|
||||
#include "sorthelper.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "tesserrstream.h" // for tesserr
|
||||
#include "tessvars.h"
|
||||
#include "werdit.h"
|
||||
|
||||
@ -1313,9 +1314,10 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD
|
||||
PointerVector<WERD_RES> best_words;
|
||||
// Points to the best result. May be word or in lang_words.
|
||||
const WERD_RES *word = word_data->word;
|
||||
clock_t start_t = 0;
|
||||
if (tessedit_timing_debug) {
|
||||
start_t = clock();
|
||||
clock_t total_time = 0;
|
||||
const bool timing_debug = tessedit_timing_debug;
|
||||
if (timing_debug) {
|
||||
total_time = clock();
|
||||
}
|
||||
const bool debug = classify_debug_level > 0 || multilang_debug_level > 0;
|
||||
if (debug) {
|
||||
@ -1368,10 +1370,10 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD
|
||||
} else {
|
||||
tprintf("no best words!!\n");
|
||||
}
|
||||
if (tessedit_timing_debug) {
|
||||
clock_t ocr_t = clock();
|
||||
tprintf("%s (ocr took %.2f sec)\n", word_data->word->best_choice->unichar_string().c_str(),
|
||||
static_cast<double>(ocr_t - start_t) / CLOCKS_PER_SEC);
|
||||
if (timing_debug) {
|
||||
total_time = clock() - total_time;
|
||||
tesserr << word_data->word->best_choice->unichar_string()
|
||||
<< " (ocr took " << 1000 * total_time / CLOCKS_PER_SEC << " ms)\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
68
src/ccutil/tesserrstream.h
Normal file
68
src/ccutil/tesserrstream.h
Normal file
@ -0,0 +1,68 @@
|
||||
// File: tesserrstream.h
|
||||
// Description: C++ stream which enhances tprintf
|
||||
// Author: Stefan Weil
|
||||
//
|
||||
// (C) Copyright 2024
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_CCUTIL_TESSERRSTREAM_H
|
||||
#define TESSERACT_CCUTIL_TESSERRSTREAM_H
|
||||
|
||||
#include "tprintf.h"
|
||||
#include <tesseract/export.h> // for TESS_API
|
||||
|
||||
#include <ostream> // for std::ostream
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class TessStreamBuf : public std::streambuf {
|
||||
public:
|
||||
TessStreamBuf() = default;
|
||||
|
||||
protected:
|
||||
virtual int_type overflow(int_type c) override {
|
||||
if (c != EOF) {
|
||||
if (debugfp == nullptr) {
|
||||
debugfp = get_debugfp();
|
||||
}
|
||||
if (fputc(c, debugfp) == EOF) {
|
||||
return EOF;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
virtual std::streamsize xsputn(const char* s, std::streamsize n) override {
|
||||
if (debugfp == nullptr) {
|
||||
debugfp = get_debugfp();
|
||||
}
|
||||
return fwrite(s, 1, n, debugfp);
|
||||
}
|
||||
|
||||
private:
|
||||
FILE *debugfp = nullptr;
|
||||
};
|
||||
|
||||
class TessErrStream : public std::ostream {
|
||||
private:
|
||||
TessStreamBuf buf;
|
||||
|
||||
public:
|
||||
TessErrStream() : std::ostream(nullptr), buf() {
|
||||
rdbuf(&buf);
|
||||
}
|
||||
};
|
||||
|
||||
extern TESS_API TessErrStream tesserr;
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCUTIL_TESSERRSTREAM_H
|
@ -21,6 +21,7 @@
|
||||
# include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include "tesserrstream.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
#include "params.h"
|
||||
@ -36,7 +37,7 @@ INT_VAR(log_level, INT_MAX, "Logging level");
|
||||
static STRING_VAR(debug_file, "", "File to send tprintf output to");
|
||||
|
||||
// File for debug output.
|
||||
static FILE *debugfp;
|
||||
FILE *debugfp;
|
||||
|
||||
// Set output for log messages.
|
||||
// The output is written to stderr if debug_file is empty.
|
||||
@ -49,7 +50,7 @@ static FILE *debugfp;
|
||||
// tprintf("write to /tmp/log\n");
|
||||
// debug_file = "";
|
||||
// tprintf("write to stderr\n");
|
||||
static void set_debugfp() {
|
||||
FILE *get_debugfp() {
|
||||
if (debug_file.empty()) {
|
||||
// Write to stderr.
|
||||
if (debugfp != stderr && debugfp != nullptr) {
|
||||
@ -66,15 +67,18 @@ static void set_debugfp() {
|
||||
#endif
|
||||
debugfp = fopen(debug_file.c_str(), "wb");
|
||||
}
|
||||
return debugfp;
|
||||
}
|
||||
|
||||
// Trace printf.
|
||||
void tprintf(const char *format, ...) {
|
||||
set_debugfp();
|
||||
FILE *f = get_debugfp();
|
||||
va_list args; // variable args
|
||||
va_start(args, format); // variable list
|
||||
vfprintf(debugfp, format, args);
|
||||
vfprintf(f, format, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
TessErrStream tesserr;
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -36,6 +36,9 @@ extern TESS_API void tprintf( // Trace printf
|
||||
const char *format, ...) // Message
|
||||
__attribute__((format(printf, 1, 2)));
|
||||
|
||||
// Get file for debug output.
|
||||
FILE *get_debugfp();
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#undef __attribute__
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "sampleiterator.h"
|
||||
#include "shapeclassifier.h"
|
||||
#include "shapetable.h"
|
||||
#include "tesserrstream.h"
|
||||
#include "trainingsample.h"
|
||||
#include "trainingsampleset.h"
|
||||
#include "unicity_table.h"
|
||||
@ -50,7 +51,10 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
|
||||
ErrorCounter counter(classifier->GetUnicharset(), fontsize);
|
||||
std::vector<UnicharRating> results;
|
||||
|
||||
clock_t start = clock();
|
||||
clock_t total_time = 0;
|
||||
if (report_level > 1) {
|
||||
total_time = clock();
|
||||
}
|
||||
unsigned total_samples = 0;
|
||||
double unscaled_error = 0.0;
|
||||
// Set a number of samples on which to run the classify debug mode.
|
||||
@ -85,7 +89,6 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
|
||||
}
|
||||
++total_samples;
|
||||
}
|
||||
const double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC;
|
||||
// Create the appropriate error report.
|
||||
unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it,
|
||||
unichar_error, fonts_report);
|
||||
@ -94,8 +97,9 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
|
||||
}
|
||||
if (report_level > 1 && total_samples > 0) {
|
||||
// It is useful to know the time in microseconds/char.
|
||||
tprintf("Errors computed in %.2fs at %.1f μs/char\n", total_time,
|
||||
1000000.0 * total_time / total_samples);
|
||||
total_time = 1000 * (clock() - total_time) / CLOCKS_PER_SEC;
|
||||
tesserr << "Errors computed in " << total_time << " ms at "
|
||||
<< 1000 * total_time / total_samples << " μs/char\n";
|
||||
}
|
||||
return unscaled_error;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user