mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 02:59:07 +08:00
Merge branch 'main' of https://github.com/tesseract-ocr/tesseract
This commit is contained in:
commit
a0708eaff2
2
.github/workflows/cmake-win64.yml
vendored
2
.github/workflows/cmake-win64.yml
vendored
@ -26,7 +26,7 @@ jobs:
|
||||
run: |
|
||||
$git_info=$(git describe --tags HEAD)
|
||||
echo "version=${git_info}" >> $env:GITHUB_OUTPUT
|
||||
- name: Setup Instalation Location
|
||||
- name: Setup Installation Location
|
||||
run: |
|
||||
mkdir ${{env.ILOC}}
|
||||
- name: Uninstall Perl
|
||||
|
@ -61,7 +61,7 @@ struct EANYCODE_CHAR { /*single character */
|
||||
// is UTF8 which means that ASCII characters will come out as one structure
|
||||
// but other characters will be returned in two or more instances of this
|
||||
// structure with a single byte of the UTF8 code in each, but each will have
|
||||
// the same bounding box. Programs which want to handle languagues with
|
||||
// the same bounding box. Programs which want to handle languages with
|
||||
// different characters sets will need to handle extended characters
|
||||
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
|
||||
// characters for characters such as bullet and fancy quotes.
|
||||
|
@ -149,7 +149,7 @@ void ResultIterator::CalculateBlobOrder(std::vector<int> *blob_indices) const {
|
||||
for (int i = 0; i < word_length_; i++) {
|
||||
letter_types.push_back(it_->word()->SymbolDirection(i));
|
||||
}
|
||||
// Convert a single separtor sandwiched between two EN's into an EN.
|
||||
// Convert a single separator sandwiched between two ENs into an EN.
|
||||
for (int i = 0; i + 2 < word_length_; i++) {
|
||||
if (letter_types[i] == U_EURO_NUM && letter_types[i + 2] == U_EURO_NUM &&
|
||||
(letter_types[i + 1] == U_EURO_NUM_SEP || letter_types[i + 1] == U_COMMON_NUM_SEP)) {
|
||||
|
@ -314,7 +314,7 @@ int32_t C_BLOB::outer_area() { // area
|
||||
* C_BLOB::count_transitions
|
||||
*
|
||||
* Return the total x and y maxes and mins in the blob.
|
||||
* Chlid outlines are not counted.
|
||||
* Child outlines are not counted.
|
||||
**********************************************************************/
|
||||
|
||||
int32_t C_BLOB::count_transitions( // area
|
||||
|
@ -114,7 +114,7 @@ Dict::Dict(CCUtil *ccutil)
|
||||
" for each dict char above small word size.",
|
||||
getCCUtil()->params())
|
||||
, double_MEMBER(stopper_allowable_character_badness, 3.0,
|
||||
"Max certaintly variation allowed in a word (in sigma)", getCCUtil()->params())
|
||||
"Max certainty variation allowed in a word (in sigma)", getCCUtil()->params())
|
||||
, INT_MEMBER(stopper_debug_level, 0, "Stopper debug level", getCCUtil()->params())
|
||||
, BOOL_MEMBER(stopper_no_acceptable_choices, false,
|
||||
"Make AcceptableChoice() always return false. Useful"
|
||||
@ -171,7 +171,7 @@ Dict::~Dict() {
|
||||
|
||||
DawgCache *Dict::GlobalDawgCache() {
|
||||
// This global cache (a singleton) will outlive every Tesseract instance
|
||||
// (even those that someone else might declare as global statics).
|
||||
// (even those that someone else might declare as global static variables).
|
||||
static DawgCache cache;
|
||||
return &cache;
|
||||
}
|
||||
|
@ -398,9 +398,8 @@ bool try_doc_fixed( // determine pitch
|
||||
int16_t mid_cuts; // no of cheap cuts
|
||||
float pitch_sd; // sync rating
|
||||
|
||||
if (block_it.empty()
|
||||
// || block_it.data()==block_it.data_relative(1)
|
||||
|| !textord_blockndoc_fixed) {
|
||||
if (!textord_blockndoc_fixed ||
|
||||
block_it.empty() || block_it.data()->get_rows()->empty()) {
|
||||
return false;
|
||||
}
|
||||
shift_factor = gradient / (gradient * gradient + 1);
|
||||
|
Loading…
Reference in New Issue
Block a user