2007-03-08 04:03:40 +08:00
|
|
|
/**********************************************************************
|
|
|
|
* File: adaptions.cpp (Formerly adaptions.c)
|
|
|
|
* Description: Functions used to adapt to blobs already confidently
|
|
|
|
* identified
|
|
|
|
* Author: Chris Newton
|
|
|
|
* Created: Thu Oct 7 10:17:28 BST 1993
|
|
|
|
*
|
|
|
|
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
|
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
** you may not use this file except in compliance with the License.
|
|
|
|
** You may obtain a copy of the License at
|
|
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
** See the License for the specific language governing permissions and
|
|
|
|
** limitations under the License.
|
|
|
|
*
|
|
|
|
**********************************************************************/
|
|
|
|
|
2010-07-22 02:11:00 +08:00
|
|
|
#ifdef _MSC_VER
|
|
|
|
#pragma warning(disable:4244) // Conversion warnings
|
|
|
|
#pragma warning(disable:4305) // int/float warnings
|
|
|
|
#endif
|
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
#ifdef __UNIX__
|
|
|
|
#include <assert.h>
|
|
|
|
#endif
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include "tessbox.h"
|
|
|
|
#include "tessvars.h"
|
|
|
|
#include "memry.h"
|
|
|
|
#include "reject.h"
|
|
|
|
#include "control.h"
|
|
|
|
#include "stopper.h"
|
2009-07-11 10:03:51 +08:00
|
|
|
#include "tesseractclass.h"
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2010-09-30 23:53:40 +08:00
|
|
|
// Include automatically generated configuration file if running autoconf.
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config_auto.h"
|
|
|
|
#endif
|
|
|
|
|
2009-07-11 10:03:51 +08:00
|
|
|
namespace tesseract {
|
|
|
|
BOOL8 Tesseract::word_adaptable( //should we adapt?
|
|
|
|
WERD_RES *word,
|
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* cutil: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* training: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract data types by POSIX data types
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract data types by POSIX data types
Now all Tesseract data types which are no longer needed can be removed
from ccutil/host.h.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX
Remove the macros which are now unused from ccutil/host.h.
Remove also the obsolete history comments.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* Fix build error caused by ambiguous ClipToRange
Error message vom Appveyor CI:
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj]
C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj]
c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char'
C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int'
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
* arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-03-14 04:36:30 +08:00
|
|
|
uint16_t mode) {
|
2009-07-11 10:03:51 +08:00
|
|
|
if (tessedit_adaption_debug) {
|
|
|
|
tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
|
|
|
|
word->best_choice == NULL ? "" :
|
|
|
|
word->best_choice->unichar_string().string(),
|
|
|
|
word->best_choice->rating(), word->best_choice->certainty());
|
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
|
|
BOOL8 status = FALSE;
|
|
|
|
BITS16 flags(mode);
|
|
|
|
|
|
|
|
enum MODES
|
|
|
|
{
|
|
|
|
ADAPTABLE_WERD,
|
|
|
|
ACCEPTABLE_WERD,
|
|
|
|
CHECK_DAWGS,
|
|
|
|
CHECK_SPACES,
|
|
|
|
CHECK_ONE_ELL_CONFLICT,
|
|
|
|
CHECK_AMBIG_WERD
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
0: NO adaption
|
|
|
|
*/
|
|
|
|
if (mode == 0) {
|
2009-07-11 10:03:51 +08:00
|
|
|
if (tessedit_adaption_debug) tprintf("adaption disabled\n");
|
2007-03-08 04:03:40 +08:00
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2009-07-11 10:03:51 +08:00
|
|
|
if (flags.bit (ADAPTABLE_WERD)) {
|
|
|
|
status |= word->tess_would_adapt; // result of Classify::AdaptableWord()
|
|
|
|
if (tessedit_adaption_debug && !status) {
|
|
|
|
tprintf("tess_would_adapt bit is false\n");
|
|
|
|
}
|
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:03:51 +08:00
|
|
|
if (flags.bit (ACCEPTABLE_WERD)) {
|
2007-03-08 04:03:40 +08:00
|
|
|
status |= word->tess_accepted;
|
2009-07-11 10:03:51 +08:00
|
|
|
if (tessedit_adaption_debug && !status) {
|
|
|
|
tprintf("tess_accepted bit is false\n");
|
|
|
|
}
|
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:03:51 +08:00
|
|
|
if (!status) { // If not set then
|
2007-03-08 04:03:40 +08:00
|
|
|
return FALSE; // ignore other checks
|
2009-07-11 10:03:51 +08:00
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
|
|
if (flags.bit (CHECK_DAWGS) &&
|
|
|
|
(word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&
|
|
|
|
(word->best_choice->permuter () != FREQ_DAWG_PERM) &&
|
|
|
|
(word->best_choice->permuter () != USER_DAWG_PERM) &&
|
2009-07-11 10:03:51 +08:00
|
|
|
(word->best_choice->permuter () != NUMBER_PERM)) {
|
|
|
|
if (tessedit_adaption_debug) tprintf("word not in dawgs\n");
|
2007-03-08 04:03:40 +08:00
|
|
|
return FALSE;
|
2009-07-11 10:03:51 +08:00
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:03:51 +08:00
|
|
|
if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, FALSE)) {
|
|
|
|
if (tessedit_adaption_debug) tprintf("word has ell conflict\n");
|
2007-03-08 04:03:40 +08:00
|
|
|
return FALSE;
|
2009-07-11 10:03:51 +08:00
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
|
|
if (flags.bit (CHECK_SPACES) &&
|
2009-07-11 10:03:51 +08:00
|
|
|
(strchr(word->best_choice->unichar_string().string(), ' ') != NULL)) {
|
|
|
|
if (tessedit_adaption_debug) tprintf("word contains spaces\n");
|
2007-03-08 04:03:40 +08:00
|
|
|
return FALSE;
|
2009-07-11 10:03:51 +08:00
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
|
|
if (flags.bit (CHECK_AMBIG_WERD) &&
|
2013-09-23 23:26:50 +08:00
|
|
|
word->best_choice->dangerous_ambig_found()) {
|
2009-07-11 10:03:51 +08:00
|
|
|
if (tessedit_adaption_debug) tprintf("word is ambiguous\n");
|
2007-03-08 04:03:40 +08:00
|
|
|
return FALSE;
|
2009-07-11 10:03:51 +08:00
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2009-07-11 10:03:51 +08:00
|
|
|
if (tessedit_adaption_debug) {
|
|
|
|
tprintf("returning status %d\n", status);
|
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
2009-07-11 10:03:51 +08:00
|
|
|
} // namespace tesseract
|