mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-22 09:53:03 +08:00
Merge pull request #3354 from stweil/master
Add braces to single line statements and modernize unittest code using clang-tidy
This commit is contained in:
commit
d72c2b14a5
@ -48,8 +48,9 @@ struct OSBestResult {
|
||||
struct OSResults {
|
||||
OSResults() : unicharset(nullptr) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j)
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
scripts_na[i][j] = 0;
|
||||
}
|
||||
orientations[i] = 0;
|
||||
}
|
||||
}
|
||||
|
@ -93,8 +93,9 @@ bool TessAltoRenderer::BeginDocumentHandler() {
|
||||
///
|
||||
bool TessAltoRenderer::AddImageHandler(TessBaseAPI *api) {
|
||||
const std::unique_ptr<const char[]> text(api->GetAltoText(imagenum()));
|
||||
if (text == nullptr)
|
||||
if (text == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AppendString(text.get());
|
||||
|
||||
@ -126,8 +127,9 @@ char *TessBaseAPI::GetAltoText(int page_number) {
|
||||
/// data structures.
|
||||
///
|
||||
char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
|
||||
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
|
||||
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int lcnt = 0, tcnt = 0, bcnt = 0, wcnt = 0;
|
||||
|
||||
|
@ -155,8 +155,9 @@ static void ExtractFontName(const char* filename, std::string* fontname) {
|
||||
static void addAvailableLanguages(const std::string &datadir, const std::string &base,
|
||||
std::vector<std::string> *langs) {
|
||||
auto base2 = base;
|
||||
if (!base2.empty())
|
||||
if (!base2.empty()) {
|
||||
base2 += "/";
|
||||
}
|
||||
const size_t extlen = sizeof(kTrainedDataSuffix);
|
||||
#ifdef _WIN32
|
||||
WIN32_FIND_DATA data;
|
||||
@ -291,23 +292,26 @@ void TessBaseAPI::SetOutputName(const char *name) {
|
||||
}
|
||||
|
||||
bool TessBaseAPI::SetVariable(const char *name, const char *value) {
|
||||
if (tesseract_ == nullptr)
|
||||
if (tesseract_ == nullptr) {
|
||||
tesseract_ = new Tesseract;
|
||||
}
|
||||
return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
|
||||
tesseract_->params());
|
||||
}
|
||||
|
||||
bool TessBaseAPI::SetDebugVariable(const char *name, const char *value) {
|
||||
if (tesseract_ == nullptr)
|
||||
if (tesseract_ == nullptr) {
|
||||
tesseract_ = new Tesseract;
|
||||
}
|
||||
return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, tesseract_->params());
|
||||
}
|
||||
|
||||
bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
|
||||
auto *p = ParamUtils::FindParam<IntParam>(name, GlobalParams()->int_params,
|
||||
tesseract_->params()->int_params);
|
||||
if (p == nullptr)
|
||||
if (p == nullptr) {
|
||||
return false;
|
||||
}
|
||||
*value = (int32_t)(*p);
|
||||
return true;
|
||||
}
|
||||
@ -315,8 +319,9 @@ bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
|
||||
bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const {
|
||||
auto *p = ParamUtils::FindParam<BoolParam>(name, GlobalParams()->bool_params,
|
||||
tesseract_->params()->bool_params);
|
||||
if (p == nullptr)
|
||||
if (p == nullptr) {
|
||||
return false;
|
||||
}
|
||||
*value = bool(*p);
|
||||
return true;
|
||||
}
|
||||
@ -330,8 +335,9 @@ const char *TessBaseAPI::GetStringVariable(const char *name) const {
|
||||
bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const {
|
||||
auto *p = ParamUtils::FindParam<DoubleParam>(name, GlobalParams()->double_params,
|
||||
tesseract_->params()->double_params);
|
||||
if (p == nullptr)
|
||||
if (p == nullptr) {
|
||||
return false;
|
||||
}
|
||||
*value = (double)(*p);
|
||||
return true;
|
||||
}
|
||||
@ -369,8 +375,9 @@ int TessBaseAPI::Init(const char *data, int data_size, const char *language, Ocr
|
||||
const std::vector<std::string> *vars_values, bool set_only_non_debug_params,
|
||||
FileReader reader) {
|
||||
// Default language is "eng".
|
||||
if (language == nullptr)
|
||||
if (language == nullptr) {
|
||||
language = "eng";
|
||||
}
|
||||
if (data == nullptr) {
|
||||
data = "";
|
||||
}
|
||||
@ -394,8 +401,9 @@ int TessBaseAPI::Init(const char *data, int data_size, const char *language, Ocr
|
||||
if (tesseract_ == nullptr) {
|
||||
reset_classifier = false;
|
||||
tesseract_ = new Tesseract;
|
||||
if (reader != nullptr)
|
||||
if (reader != nullptr) {
|
||||
reader_ = reader;
|
||||
}
|
||||
TessdataManager mgr(reader_);
|
||||
if (data_size != 0) {
|
||||
mgr.LoadMemBuffer(language, data, data_size);
|
||||
@ -409,8 +417,9 @@ int TessBaseAPI::Init(const char *data, int data_size, const char *language, Ocr
|
||||
|
||||
// Update datapath and language requested for the last valid initialization.
|
||||
datapath_ = datapath;
|
||||
if ((strcmp(datapath_.c_str(), "") == 0) && (strcmp(tesseract_->datadir.c_str(), "") != 0))
|
||||
if ((strcmp(datapath_.c_str(), "") == 0) && (strcmp(tesseract_->datadir.c_str(), "") != 0)) {
|
||||
datapath_ = tesseract_->datadir;
|
||||
}
|
||||
|
||||
language_ = language;
|
||||
last_oem_requested_ = oem;
|
||||
@ -446,8 +455,9 @@ void TessBaseAPI::GetLoadedLanguagesAsVector(std::vector<std::string> *langs) co
|
||||
if (tesseract_ != nullptr) {
|
||||
langs->push_back(tesseract_->lang);
|
||||
int num_subs = tesseract_->num_sub_langs();
|
||||
for (int i = 0; i < num_subs; ++i)
|
||||
for (int i = 0; i < num_subs; ++i) {
|
||||
langs->push_back(tesseract_->get_sub_lang(i)->lang);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -471,10 +481,11 @@ void TessBaseAPI::GetAvailableLanguagesAsVector(std::vector<std::string> *langs)
|
||||
* in a separate API at some future time.
|
||||
*/
|
||||
int TessBaseAPI::InitLangMod(const char *datapath, const char *language) {
|
||||
if (tesseract_ == nullptr)
|
||||
if (tesseract_ == nullptr) {
|
||||
tesseract_ = new Tesseract;
|
||||
else
|
||||
} else {
|
||||
ParamUtils::ResetToDefaults(tesseract_->params());
|
||||
}
|
||||
TessdataManager mgr;
|
||||
return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr);
|
||||
}
|
||||
@ -513,15 +524,17 @@ void TessBaseAPI::ReadDebugConfigFile(const char *filename) {
|
||||
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
|
||||
*/
|
||||
void TessBaseAPI::SetPageSegMode(PageSegMode mode) {
|
||||
if (tesseract_ == nullptr)
|
||||
if (tesseract_ == nullptr) {
|
||||
tesseract_ = new Tesseract;
|
||||
}
|
||||
tesseract_->tessedit_pageseg_mode.set_value(mode);
|
||||
}
|
||||
|
||||
/** Return the current page segmentation mode. */
|
||||
PageSegMode TessBaseAPI::GetPageSegMode() const {
|
||||
if (tesseract_ == nullptr)
|
||||
if (tesseract_ == nullptr) {
|
||||
return PSM_SINGLE_BLOCK;
|
||||
}
|
||||
return static_cast<PageSegMode>(static_cast<int>(tesseract_->tessedit_pageseg_mode));
|
||||
}
|
||||
|
||||
@ -540,8 +553,9 @@ PageSegMode TessBaseAPI::GetPageSegMode() const {
|
||||
*/
|
||||
char *TessBaseAPI::TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
|
||||
int bytes_per_line, int left, int top, int width, int height) {
|
||||
if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize)
|
||||
if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) {
|
||||
return nullptr; // Nothing worth doing.
|
||||
}
|
||||
|
||||
// Since this original api didn't give the exact size of the image,
|
||||
// we have to invent a reasonable value.
|
||||
@ -559,8 +573,9 @@ char *TessBaseAPI::TesseractRect(const unsigned char *imagedata, int bytes_per_p
|
||||
* adaptive data.
|
||||
*/
|
||||
void TessBaseAPI::ClearAdaptiveClassifier() {
|
||||
if (tesseract_ == nullptr)
|
||||
if (tesseract_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
tesseract_->ResetAdaptiveClassifier();
|
||||
tesseract_->ResetDocumentDictionary();
|
||||
}
|
||||
@ -582,10 +597,11 @@ void TessBaseAPI::SetImage(const unsigned char *imagedata, int width, int height
|
||||
}
|
||||
|
||||
void TessBaseAPI::SetSourceResolution(int ppi) {
|
||||
if (thresholder_)
|
||||
if (thresholder_) {
|
||||
thresholder_->SetSourceYResolution(ppi);
|
||||
else
|
||||
} else {
|
||||
tprintf("Please call SetImage before SetSourceResolution.\n");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -616,8 +632,9 @@ void TessBaseAPI::SetImage(Pix *pix) {
|
||||
* can be recognized with the same image.
|
||||
*/
|
||||
void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
|
||||
if (thresholder_ == nullptr)
|
||||
if (thresholder_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
thresholder_->SetRectangle(left, top, width, height);
|
||||
ClearResults();
|
||||
}
|
||||
@ -627,8 +644,9 @@ void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
|
||||
* Get a copy of the internal thresholded image from Tesseract.
|
||||
*/
|
||||
Pix *TessBaseAPI::GetThresholdedImage() {
|
||||
if (tesseract_ == nullptr || thresholder_ == nullptr)
|
||||
if (tesseract_ == nullptr || thresholder_ == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -700,10 +718,12 @@ Boxa *TessBaseAPI::GetComponentImages(PageIteratorLevel level, bool text_only, b
|
||||
const int raw_padding, Pixa **pixa, int **blockids,
|
||||
int **paraids) {
|
||||
PageIterator *page_it = GetIterator();
|
||||
if (page_it == nullptr)
|
||||
if (page_it == nullptr) {
|
||||
page_it = AnalyseLayout();
|
||||
if (page_it == nullptr)
|
||||
}
|
||||
if (page_it == nullptr) {
|
||||
return nullptr; // Failed.
|
||||
}
|
||||
|
||||
// Count the components to get a size for the arrays.
|
||||
int component_count = 0;
|
||||
@ -713,26 +733,31 @@ Boxa *TessBaseAPI::GetComponentImages(PageIteratorLevel level, bool text_only, b
|
||||
// Get bounding box in original raw image with padding.
|
||||
do {
|
||||
if (page_it->BoundingBox(level, raw_padding, &left, &top, &right, &bottom) &&
|
||||
(!text_only || PTIsTextType(page_it->BlockType())))
|
||||
(!text_only || PTIsTextType(page_it->BlockType()))) {
|
||||
++component_count;
|
||||
}
|
||||
} while (page_it->Next(level));
|
||||
} else {
|
||||
// Get bounding box from binarized imaged. Note that this could be
|
||||
// differently scaled from the original image.
|
||||
do {
|
||||
if (page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom) &&
|
||||
(!text_only || PTIsTextType(page_it->BlockType())))
|
||||
(!text_only || PTIsTextType(page_it->BlockType()))) {
|
||||
++component_count;
|
||||
}
|
||||
} while (page_it->Next(level));
|
||||
}
|
||||
|
||||
Boxa *boxa = boxaCreate(component_count);
|
||||
if (pixa != nullptr)
|
||||
if (pixa != nullptr) {
|
||||
*pixa = pixaCreate(component_count);
|
||||
if (blockids != nullptr)
|
||||
}
|
||||
if (blockids != nullptr) {
|
||||
*blockids = new int[component_count];
|
||||
if (paraids != nullptr)
|
||||
}
|
||||
if (paraids != nullptr) {
|
||||
*paraids = new int[component_count];
|
||||
}
|
||||
|
||||
int blockid = 0;
|
||||
int paraid = 0;
|
||||
@ -760,8 +785,9 @@ Boxa *TessBaseAPI::GetComponentImages(PageIteratorLevel level, bool text_only, b
|
||||
}
|
||||
if (paraids != nullptr) {
|
||||
(*paraids)[component_index] = paraid;
|
||||
if (page_it->IsAtFinalElement(RIL_PARA, level))
|
||||
if (page_it->IsAtFinalElement(RIL_PARA, level)) {
|
||||
++paraid;
|
||||
}
|
||||
}
|
||||
if (blockids != nullptr) {
|
||||
(*blockids)[component_index] = blockid;
|
||||
@ -805,8 +831,9 @@ PageIterator *TessBaseAPI::AnalyseLayout() {
|
||||
|
||||
PageIterator *TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
|
||||
if (FindLines() == 0) {
|
||||
if (block_list_->empty())
|
||||
if (block_list_->empty()) {
|
||||
return nullptr; // The page was empty.
|
||||
}
|
||||
page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
|
||||
DetectParagraphs(false);
|
||||
return new PageIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(),
|
||||
@ -821,10 +848,12 @@ PageIterator *TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
|
||||
* internal structures.
|
||||
*/
|
||||
int TessBaseAPI::Recognize(ETEXT_DESC *monitor) {
|
||||
if (tesseract_ == nullptr)
|
||||
if (tesseract_ == nullptr) {
|
||||
return -1;
|
||||
if (FindLines() != 0)
|
||||
}
|
||||
if (FindLines() != 0) {
|
||||
return -1;
|
||||
}
|
||||
delete page_res_;
|
||||
if (block_list_->empty()) {
|
||||
page_res_ = new PAGE_RES(false, block_list_, &tesseract_->prev_word_best_choice_);
|
||||
@ -889,11 +918,13 @@ int TessBaseAPI::Recognize(ETEXT_DESC *monitor) {
|
||||
// Now run the main recognition.
|
||||
bool wait_for_text = true;
|
||||
GetBoolVariable("paragraph_text_based", &wait_for_text);
|
||||
if (!wait_for_text)
|
||||
if (!wait_for_text) {
|
||||
DetectParagraphs(false);
|
||||
}
|
||||
if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
|
||||
if (wait_for_text)
|
||||
if (wait_for_text) {
|
||||
DetectParagraphs(true);
|
||||
}
|
||||
} else {
|
||||
result = -1;
|
||||
}
|
||||
@ -933,8 +964,9 @@ int TessBaseAPI::GetSourceYResolution() {
|
||||
bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, const char *retry_config,
|
||||
int timeout_millisec, TessResultRenderer *renderer,
|
||||
int tessedit_page_number) {
|
||||
if (!flist && !buf)
|
||||
if (!flist && !buf) {
|
||||
return false;
|
||||
}
|
||||
unsigned page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
|
||||
char pagename[MAX_PATH];
|
||||
|
||||
@ -953,15 +985,17 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, const char
|
||||
// Add last line without terminating LF.
|
||||
lines.push_back(line);
|
||||
}
|
||||
if (lines.empty())
|
||||
if (lines.empty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Skip to the requested page number.
|
||||
for (unsigned i = 0; i < page; i++) {
|
||||
if (flist) {
|
||||
if (fgets(pagename, sizeof(pagename), flist) == nullptr)
|
||||
if (fgets(pagename, sizeof(pagename), flist) == nullptr) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -973,11 +1007,13 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, const char
|
||||
// Loop over all pages - or just the requested one
|
||||
while (true) {
|
||||
if (flist) {
|
||||
if (fgets(pagename, sizeof(pagename), flist) == nullptr)
|
||||
if (fgets(pagename, sizeof(pagename), flist) == nullptr) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (page >= lines.size())
|
||||
if (page >= lines.size()) {
|
||||
break;
|
||||
}
|
||||
snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str());
|
||||
}
|
||||
chomp_string(pagename);
|
||||
@ -989,10 +1025,12 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, const char
|
||||
tprintf("Page %u : %s\n", page, pagename);
|
||||
bool r = ProcessPage(pix, page, pagename, retry_config, timeout_millisec, renderer);
|
||||
pixDestroy(&pix);
|
||||
if (!r)
|
||||
if (!r) {
|
||||
return false;
|
||||
if (tessedit_page_number >= 0)
|
||||
}
|
||||
if (tessedit_page_number >= 0) {
|
||||
break;
|
||||
}
|
||||
++page;
|
||||
}
|
||||
|
||||
@ -1018,20 +1056,24 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, size_t size, co
|
||||
pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
|
||||
: pixReadFromMultipageTiff(filename, &offset);
|
||||
}
|
||||
if (pix == nullptr)
|
||||
if (pix == nullptr) {
|
||||
break;
|
||||
}
|
||||
tprintf("Page %d\n", page + 1);
|
||||
char page_str[kMaxIntSize];
|
||||
snprintf(page_str, kMaxIntSize - 1, "%d", page);
|
||||
SetVariable("applybox_page", page_str);
|
||||
bool r = ProcessPage(pix, page, filename, retry_config, timeout_millisec, renderer);
|
||||
pixDestroy(&pix);
|
||||
if (!r)
|
||||
if (!r) {
|
||||
return false;
|
||||
if (tessedit_page_number >= 0)
|
||||
}
|
||||
if (tessedit_page_number >= 0) {
|
||||
break;
|
||||
if (!offset)
|
||||
}
|
||||
if (!offset) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -1055,7 +1097,7 @@ bool TessBaseAPI::ProcessPages(const char *filename, const char *retry_config, i
|
||||
#ifdef HAVE_LIBCURL
|
||||
static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) {
|
||||
size = size * nmemb;
|
||||
std::string *buf = reinterpret_cast<std::string *>(userp);
|
||||
auto *buf = reinterpret_cast<std::string *>(userp);
|
||||
buf->append(reinterpret_cast<const char *>(contents), size);
|
||||
return size;
|
||||
}
|
||||
@ -1265,8 +1307,9 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
|
||||
* Recognize. The returned iterator must be deleted after use.
|
||||
*/
|
||||
LTRResultIterator *TessBaseAPI::GetLTRIterator() {
|
||||
if (tesseract_ == nullptr || page_res_ == nullptr)
|
||||
if (tesseract_ == nullptr || page_res_ == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return new LTRResultIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(),
|
||||
thresholder_->GetScaledYResolution(), rect_left_, rect_top_,
|
||||
rect_width_, rect_height_);
|
||||
@ -1281,8 +1324,9 @@ LTRResultIterator *TessBaseAPI::GetLTRIterator() {
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
ResultIterator *TessBaseAPI::GetIterator() {
|
||||
if (tesseract_ == nullptr || page_res_ == nullptr)
|
||||
if (tesseract_ == nullptr || page_res_ == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return ResultIterator::StartOfParagraph(LTRResultIterator(
|
||||
page_res_, tesseract_, thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
|
||||
rect_left_, rect_top_, rect_width_, rect_height_));
|
||||
@ -1297,8 +1341,9 @@ ResultIterator *TessBaseAPI::GetIterator() {
|
||||
* DetectOS, or anything else that changes the internal PAGE_RES.
|
||||
*/
|
||||
MutableIterator *TessBaseAPI::GetMutableIterator() {
|
||||
if (tesseract_ == nullptr || page_res_ == nullptr)
|
||||
if (tesseract_ == nullptr || page_res_ == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return new MutableIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(),
|
||||
thresholder_->GetScaledYResolution(), rect_left_, rect_top_,
|
||||
rect_width_, rect_height_);
|
||||
@ -1306,13 +1351,15 @@ MutableIterator *TessBaseAPI::GetMutableIterator() {
|
||||
|
||||
/** Make a text string from the internal data structures. */
|
||||
char *TessBaseAPI::GetUTF8Text() {
|
||||
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0))
|
||||
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
|
||||
return nullptr;
|
||||
}
|
||||
std::string text("");
|
||||
ResultIterator *it = GetIterator();
|
||||
do {
|
||||
if (it->Empty(RIL_PARA))
|
||||
if (it->Empty(RIL_PARA)) {
|
||||
continue;
|
||||
}
|
||||
const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
|
||||
text += para_text.get();
|
||||
} while (it->Next(RIL_PARA));
|
||||
@ -1331,9 +1378,10 @@ std::tuple<int,int,int,int> TessBaseAPI::GetTableBoundingBox(unsigned i)
|
||||
{
|
||||
const auto &t = constUniqueInstance<std::vector<TessTable>>();
|
||||
|
||||
if(i >= t.size())
|
||||
return std::tuple<int,int,int,int>(0, 0, 0, 0);
|
||||
|
||||
if (i >= t.size()) {
|
||||
return std::tuple<int, int, int, int>(0, 0, 0, 0);
|
||||
}
|
||||
|
||||
const int height = tesseract_->ImageHeight();
|
||||
|
||||
return std::make_tuple<int,int,int,int>(
|
||||
@ -1346,17 +1394,19 @@ std::vector<std::tuple<int,int,int,int>> TessBaseAPI::GetTableRows(unsigned i)
|
||||
{
|
||||
const auto &t = constUniqueInstance<std::vector<TessTable>>();
|
||||
|
||||
if(i >= t.size())
|
||||
return std::vector<std::tuple<int,int,int,int>>();
|
||||
|
||||
if (i >= t.size()) {
|
||||
return std::vector<std::tuple<int, int, int, int>>();
|
||||
}
|
||||
|
||||
std::vector<std::tuple<int,int,int,int>> rows(t[i].rows.size());
|
||||
const int height = tesseract_->ImageHeight();
|
||||
|
||||
for(unsigned j = 0; j < t[i].rows.size(); ++j)
|
||||
rows[j] = std::make_tuple<int,int,int,int>(
|
||||
t[i].rows[j].left(), height - t[i].rows[j].top(),
|
||||
t[i].rows[j].right(), height - t[i].rows[j].bottom());
|
||||
|
||||
|
||||
for (unsigned j = 0; j < t[i].rows.size(); ++j) {
|
||||
rows[j] =
|
||||
std::make_tuple<int, int, int, int>(t[i].rows[j].left(), height - t[i].rows[j].top(),
|
||||
t[i].rows[j].right(), height - t[i].rows[j].bottom());
|
||||
}
|
||||
|
||||
return rows;
|
||||
}
|
||||
|
||||
@ -1364,17 +1414,19 @@ std::vector<std::tuple<int,int,int,int> > TessBaseAPI::GetTableCols(unsigned i)
|
||||
{
|
||||
const auto &t = constUniqueInstance<std::vector<TessTable>>();
|
||||
|
||||
if(i >= t.size())
|
||||
return std::vector<std::tuple<int,int,int,int>>();
|
||||
|
||||
if (i >= t.size()) {
|
||||
return std::vector<std::tuple<int, int, int, int>>();
|
||||
}
|
||||
|
||||
std::vector<std::tuple<int,int,int,int>> cols(t[i].cols.size());
|
||||
const int height = tesseract_->ImageHeight();
|
||||
|
||||
for(unsigned j = 0; j < t[i].cols.size(); ++j)
|
||||
cols[j] = std::make_tuple<int,int,int,int>(
|
||||
t[i].cols[j].left(), height - t[i].cols[j].top(),
|
||||
t[i].cols[j].right(), height - t[i].cols[j].bottom());
|
||||
|
||||
|
||||
for (unsigned j = 0; j < t[i].cols.size(); ++j) {
|
||||
cols[j] =
|
||||
std::make_tuple<int, int, int, int>(t[i].cols[j].left(), height - t[i].cols[j].top(),
|
||||
t[i].cols[j].right(), height - t[i].cols[j].bottom());
|
||||
}
|
||||
|
||||
return cols;
|
||||
}
|
||||
|
||||
@ -1393,8 +1445,9 @@ static void AddBoxToTSV(const PageIterator *it, PageIteratorLevel level, std::st
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *TessBaseAPI::GetTSVText(int page_number) {
|
||||
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
|
||||
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
|
||||
int page_id = page_number + 1; // we use 1-based page numbers.
|
||||
@ -1479,12 +1532,15 @@ char *TessBaseAPI::GetTSVText(int page_number) {
|
||||
tsv_str += "\t";
|
||||
|
||||
// Increment counts if at end of block/paragraph/textline.
|
||||
if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD))
|
||||
if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) {
|
||||
lcnt++;
|
||||
if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD))
|
||||
}
|
||||
if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) {
|
||||
pcnt++;
|
||||
if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD))
|
||||
}
|
||||
if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) {
|
||||
bcnt++;
|
||||
}
|
||||
|
||||
do {
|
||||
tsv_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
|
||||
@ -1530,8 +1586,9 @@ const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 +
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *TessBaseAPI::GetBoxText(int page_number) {
|
||||
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0))
|
||||
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
|
||||
return nullptr;
|
||||
}
|
||||
int blob_count;
|
||||
int utf8_length = TextLength(&blob_count);
|
||||
int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + kMaxBytesPerLine;
|
||||
@ -1546,15 +1603,17 @@ char *TessBaseAPI::GetBoxText(int page_number) {
|
||||
// Tesseract uses space for recognition failure. Fix to a reject
|
||||
// character, kTesseractReject so we don't create illegal box files.
|
||||
for (int i = 0; text[i] != '\0'; ++i) {
|
||||
if (text[i] == ' ')
|
||||
if (text[i] == ' ') {
|
||||
text[i] = kTesseractReject;
|
||||
}
|
||||
}
|
||||
snprintf(result + output_length, total_length - output_length, "%s %d %d %d %d %d\n",
|
||||
text.get(), left, image_height_ - bottom, right, image_height_ - top, page_number);
|
||||
output_length += strlen(result + output_length);
|
||||
// Just in case...
|
||||
if (output_length + kMaxBytesPerLine > total_length)
|
||||
if (output_length + kMaxBytesPerLine > total_length) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (it->Next(RIL_SYMBOL));
|
||||
delete it;
|
||||
@ -1576,8 +1635,9 @@ const int kLatinChs[] = {0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d,
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *TessBaseAPI::GetUNLVText() {
|
||||
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0))
|
||||
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
|
||||
return nullptr;
|
||||
}
|
||||
bool tilde_crunch_written = false;
|
||||
bool last_char_was_newline = true;
|
||||
bool last_char_was_tilde = false;
|
||||
@ -1625,17 +1685,19 @@ char *TessBaseAPI::GetUNLVText() {
|
||||
offset = lengths[i++];
|
||||
}
|
||||
if (i < length && wordstr[offset] != 0) {
|
||||
if (!last_char_was_newline)
|
||||
if (!last_char_was_newline) {
|
||||
*ptr++ = ' ';
|
||||
else
|
||||
} else {
|
||||
last_char_was_newline = false;
|
||||
}
|
||||
for (; i < length; offset += lengths[i++]) {
|
||||
if (wordstr[offset] == ' ' || wordstr[offset] == kTesseractReject) {
|
||||
*ptr++ = kUNLVReject;
|
||||
last_char_was_tilde = true;
|
||||
} else {
|
||||
if (word->reject_map[i].rejected())
|
||||
if (word->reject_map[i].rejected()) {
|
||||
*ptr++ = kUNLVSuspect;
|
||||
}
|
||||
UNICHAR ch(wordstr + offset, lengths[i]);
|
||||
int uni_ch = ch.first_uni();
|
||||
for (int j = 0; kUniChs[j] != 0; ++j) {
|
||||
@ -1690,10 +1752,12 @@ bool TessBaseAPI::DetectOrientationScript(int *orient_deg, float *orient_conf,
|
||||
|
||||
int orient_id = osr.best_result.orientation_id;
|
||||
int script_id = osr.get_best_script(orient_id);
|
||||
if (orient_conf)
|
||||
if (orient_conf) {
|
||||
*orient_conf = osr.best_result.oconfidence;
|
||||
if (orient_deg)
|
||||
}
|
||||
if (orient_deg) {
|
||||
*orient_deg = orient_id * 90; // convert quadrant to degrees
|
||||
}
|
||||
|
||||
if (script_name) {
|
||||
const char *script = osr.unicharset->get_script_from_script_id(script_id);
|
||||
@ -1701,8 +1765,9 @@ bool TessBaseAPI::DetectOrientationScript(int *orient_deg, float *orient_conf,
|
||||
*script_name = script;
|
||||
}
|
||||
|
||||
if (script_conf)
|
||||
if (script_conf) {
|
||||
*script_conf = osr.best_result.sconfidence;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1718,8 +1783,9 @@ char *TessBaseAPI::GetOsdText(int page_number) {
|
||||
const char *script_name;
|
||||
float script_conf;
|
||||
|
||||
if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf))
|
||||
if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// clockwise rotation needed to make the page upright
|
||||
int rotate = OrientationIdToValue(orient_deg / 90);
|
||||
@ -1746,26 +1812,31 @@ char *TessBaseAPI::GetOsdText(int page_number) {
|
||||
/** Returns the average word confidence for Tesseract page result. */
|
||||
int TessBaseAPI::MeanTextConf() {
|
||||
int *conf = AllWordConfidences();
|
||||
if (!conf)
|
||||
if (!conf) {
|
||||
return 0;
|
||||
}
|
||||
int sum = 0;
|
||||
int *pt = conf;
|
||||
while (*pt >= 0)
|
||||
while (*pt >= 0) {
|
||||
sum += *pt++;
|
||||
if (pt != conf)
|
||||
}
|
||||
if (pt != conf) {
|
||||
sum /= pt - conf;
|
||||
}
|
||||
delete[] conf;
|
||||
return sum;
|
||||
}
|
||||
|
||||
/** Returns an array of all word confidences, terminated by -1. */
|
||||
int *TessBaseAPI::AllWordConfidences() {
|
||||
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0))
|
||||
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
|
||||
return nullptr;
|
||||
}
|
||||
int n_word = 0;
|
||||
PAGE_RES_IT res_it(page_res_);
|
||||
for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward())
|
||||
for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
|
||||
n_word++;
|
||||
}
|
||||
|
||||
int *conf = new int[n_word + 1];
|
||||
n_word = 0;
|
||||
@ -1774,10 +1845,12 @@ int *TessBaseAPI::AllWordConfidences() {
|
||||
WERD_CHOICE *choice = word->best_choice;
|
||||
int w_conf = static_cast<int>(100 + 5 * choice->certainty());
|
||||
// This is the eq for converting Tesseract confidence to 1..100
|
||||
if (w_conf < 0)
|
||||
if (w_conf < 0) {
|
||||
w_conf = 0;
|
||||
if (w_conf > 100)
|
||||
}
|
||||
if (w_conf > 100) {
|
||||
w_conf = 100;
|
||||
}
|
||||
conf[n_word++] = w_conf;
|
||||
}
|
||||
conf[n_word] = -1;
|
||||
@ -1815,12 +1888,15 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) {
|
||||
int w = 0;
|
||||
int t;
|
||||
for (t = 0; text[t] != '\0'; ++t) {
|
||||
if (text[t] == '\n' || text[t] == ' ')
|
||||
if (text[t] == '\n' || text[t] == ' ') {
|
||||
continue;
|
||||
while (wordstr[w] == ' ')
|
||||
}
|
||||
while (wordstr[w] == ' ') {
|
||||
++w;
|
||||
if (text[t] != wordstr[w])
|
||||
}
|
||||
if (text[t] != wordstr[w]) {
|
||||
break;
|
||||
}
|
||||
++w;
|
||||
}
|
||||
if (text[t] != '\0' || wordstr[w] != '\0') {
|
||||
@ -1831,10 +1907,11 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) {
|
||||
tesseract_->ReSegmentByClassification(page_res_);
|
||||
tesseract_->TidyUp(page_res_);
|
||||
PAGE_RES_IT pr_it(page_res_);
|
||||
if (pr_it.word() == nullptr)
|
||||
if (pr_it.word() == nullptr) {
|
||||
success = false;
|
||||
else
|
||||
} else {
|
||||
word_res = pr_it.word();
|
||||
}
|
||||
} else {
|
||||
word_res->BestChoiceToCorrectText();
|
||||
}
|
||||
@ -1860,11 +1937,13 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) {
|
||||
* any Recognize or Get* operation.
|
||||
*/
|
||||
void TessBaseAPI::Clear() {
|
||||
if (thresholder_ != nullptr)
|
||||
if (thresholder_ != nullptr) {
|
||||
thresholder_->Clear();
|
||||
}
|
||||
ClearResults();
|
||||
if (tesseract_ != nullptr)
|
||||
if (tesseract_ != nullptr) {
|
||||
SetInputImage(nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1888,8 +1967,9 @@ void TessBaseAPI::End() {
|
||||
delete paragraph_models_;
|
||||
paragraph_models_ = nullptr;
|
||||
}
|
||||
if (osd_tesseract_ == tesseract_)
|
||||
if (osd_tesseract_ == tesseract_) {
|
||||
osd_tesseract_ = nullptr;
|
||||
}
|
||||
delete tesseract_;
|
||||
tesseract_ = nullptr;
|
||||
delete osd_tesseract_;
|
||||
@ -1933,8 +2013,9 @@ bool TessBaseAPI::GetTextDirection(int *out_offset, float *out_slope) {
|
||||
int x1, x2, y1, y2;
|
||||
it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
|
||||
// Calculate offset and slope (NOTE: Kind of ugly)
|
||||
if (x2 <= x1)
|
||||
if (x2 <= x1) {
|
||||
x2 = x1 + 1;
|
||||
}
|
||||
// Convert the point pair to slope/offset of the baseline (in image coords.)
|
||||
*out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
|
||||
*out_offset = static_cast<int>(y1 - *out_slope * x1);
|
||||
@ -1992,8 +2073,9 @@ bool TessBaseAPI::InternalSetImage() {
|
||||
tprintf("Please call Init before attempting to set an image.\n");
|
||||
return false;
|
||||
}
|
||||
if (thresholder_ == nullptr)
|
||||
if (thresholder_ == nullptr) {
|
||||
thresholder_ = new ImageThresholder;
|
||||
}
|
||||
ClearResults();
|
||||
return true;
|
||||
}
|
||||
@ -2006,8 +2088,9 @@ bool TessBaseAPI::InternalSetImage() {
|
||||
*/
|
||||
bool TessBaseAPI::Threshold(Pix **pix) {
|
||||
ASSERT_HOST(pix != nullptr);
|
||||
if (*pix != nullptr)
|
||||
if (*pix != nullptr) {
|
||||
pixDestroy(pix);
|
||||
}
|
||||
// Zero resolution messes up the algorithms, so make sure it is credible.
|
||||
int user_dpi = 0;
|
||||
GetIntVariable("user_defined_dpi", &user_dpi);
|
||||
@ -2030,8 +2113,9 @@ bool TessBaseAPI::Threshold(Pix **pix) {
|
||||
thresholder_->SetSourceYResolution(kMinCredibleResolution);
|
||||
}
|
||||
auto pageseg_mode = static_cast<PageSegMode>(static_cast<int>(tesseract_->tessedit_pageseg_mode));
|
||||
if (!thresholder_->ThresholdToPix(pageseg_mode, pix))
|
||||
if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) {
|
||||
return false;
|
||||
}
|
||||
thresholder_->GetImageSizes(&rect_left_, &rect_top_, &rect_width_, &rect_height_, &image_width_,
|
||||
&image_height_);
|
||||
if (!thresholder_->IsBinary()) {
|
||||
@ -2063,8 +2147,9 @@ int TessBaseAPI::FindLines() {
|
||||
tprintf("Please call SetImage before attempting recognition.\n");
|
||||
return -1;
|
||||
}
|
||||
if (recognition_done_)
|
||||
if (recognition_done_) {
|
||||
ClearResults();
|
||||
}
|
||||
if (!block_list_->empty()) {
|
||||
return 0;
|
||||
}
|
||||
@ -2121,8 +2206,9 @@ int TessBaseAPI::FindLines() {
|
||||
}
|
||||
}
|
||||
|
||||
if (tesseract_->SegmentPage(input_file_.c_str(), block_list_, osd_tess, &osr) < 0)
|
||||
if (tesseract_->SegmentPage(input_file_.c_str(), block_list_, osd_tess, &osr) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// If Devanagari is being recognized, we use different images for page seg
|
||||
// and for OCR.
|
||||
@ -2138,10 +2224,11 @@ void TessBaseAPI::ClearResults() {
|
||||
delete page_res_;
|
||||
page_res_ = nullptr;
|
||||
recognition_done_ = false;
|
||||
if (block_list_ == nullptr)
|
||||
if (block_list_ == nullptr) {
|
||||
block_list_ = new BLOCK_LIST;
|
||||
else
|
||||
} else {
|
||||
block_list_->clear();
|
||||
}
|
||||
if (paragraph_models_ != nullptr) {
|
||||
for (auto model : *paragraph_models_) {
|
||||
delete model;
|
||||
@ -2161,8 +2248,9 @@ void TessBaseAPI::ClearResults() {
|
||||
* Also return the number of recognized blobs in blob_count.
|
||||
*/
|
||||
int TessBaseAPI::TextLength(int *blob_count) {
|
||||
if (tesseract_ == nullptr || page_res_ == nullptr)
|
||||
if (tesseract_ == nullptr || page_res_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
PAGE_RES_IT page_res_it(page_res_);
|
||||
int total_length = 2;
|
||||
@ -2175,13 +2263,15 @@ int TessBaseAPI::TextLength(int *blob_count) {
|
||||
total_blobs += choice->length() + 2;
|
||||
total_length += choice->unichar_string().length() + 2;
|
||||
for (int i = 0; i < word->reject_map.length(); ++i) {
|
||||
if (word->reject_map[i].rejected())
|
||||
if (word->reject_map[i].rejected()) {
|
||||
++total_length;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (blob_count != nullptr)
|
||||
if (blob_count != nullptr) {
|
||||
*blob_count = total_blobs;
|
||||
}
|
||||
return total_length;
|
||||
}
|
||||
|
||||
@ -2191,8 +2281,9 @@ int TessBaseAPI::TextLength(int *blob_count) {
|
||||
* Returns true if the image was processed successfully.
|
||||
*/
|
||||
bool TessBaseAPI::DetectOS(OSResults *osr) {
|
||||
if (tesseract_ == nullptr)
|
||||
if (tesseract_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
ClearResults();
|
||||
if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) {
|
||||
return false;
|
||||
@ -2255,8 +2346,9 @@ void TessBaseAPI::GetBlockTextOrientations(int **block_orientation, bool **verti
|
||||
FCOORD classify_rotation = block_it.data()->classify_rotation();
|
||||
float classify_theta = classify_rotation.angle();
|
||||
double rot_theta = -(re_theta - classify_theta) * 2.0 / M_PI;
|
||||
if (rot_theta < 0)
|
||||
if (rot_theta < 0) {
|
||||
rot_theta += 4;
|
||||
}
|
||||
int num_rotations = static_cast<int>(rot_theta + 0.5);
|
||||
(*block_orientation)[i] = num_rotations;
|
||||
// The classify_rotation is non-zero only if the text has vertical
|
||||
@ -2269,8 +2361,9 @@ void TessBaseAPI::GetBlockTextOrientations(int **block_orientation, bool **verti
|
||||
void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
|
||||
int debug_level = 0;
|
||||
GetIntVariable("paragraph_debug_level", &debug_level);
|
||||
if (paragraph_models_ == nullptr)
|
||||
if (paragraph_models_ == nullptr) {
|
||||
paragraph_models_ = new std::vector<ParagraphModel *>;
|
||||
}
|
||||
MutableIterator *result_it = GetMutableIterator();
|
||||
do { // Detect paragraphs for this block
|
||||
std::vector<ParagraphModel *> models;
|
||||
@ -2287,8 +2380,9 @@ const char *TessBaseAPI::GetUnichar(int unichar_id) {
|
||||
|
||||
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
|
||||
const Dawg *TessBaseAPI::GetDawg(int i) const {
|
||||
if (tesseract_ == nullptr || i >= NumDawgs())
|
||||
if (tesseract_ == nullptr || i >= NumDawgs()) {
|
||||
return nullptr;
|
||||
}
|
||||
return tesseract_->getDict().GetDawg(i);
|
||||
}
|
||||
|
||||
|
@ -62,8 +62,9 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it, PageIteratorLevel le
|
||||
|
||||
// Try to get the baseline coordinates at this level.
|
||||
int x1, y1, x2, y2;
|
||||
if (!it->Baseline(level, &x1, &y1, &x2, &y2))
|
||||
if (!it->Baseline(level, &x1, &y1, &x2, &y2)) {
|
||||
return;
|
||||
}
|
||||
// Following the description of this field of the hOCR spec, we convert the
|
||||
// baseline coordinates so that "the bottom left of the bounding box is the
|
||||
// origin".
|
||||
@ -127,8 +128,9 @@ char *TessBaseAPI::GetHOCRText(int page_number) {
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
|
||||
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, scnt = 1, tcnt = 1, ccnt = 1;
|
||||
int page_id = page_number + 1; // hOCR uses 1-based page numbers.
|
||||
@ -139,8 +141,9 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
GetBoolVariable("hocr_font_info", &font_info);
|
||||
GetBoolVariable("hocr_char_boxes", &hocr_boxes);
|
||||
|
||||
if (input_file_.empty())
|
||||
if (input_file_.empty()) {
|
||||
SetInputName(nullptr);
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
// convert input name from ANSI encoding to utf-8
|
||||
@ -256,12 +259,14 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
switch (res_it->WordDirection()) {
|
||||
// Only emit direction if different from current paragraph direction
|
||||
case DIR_LEFT_TO_RIGHT:
|
||||
if (!para_is_ltr)
|
||||
if (!para_is_ltr) {
|
||||
hocr_str << " dir='ltr'";
|
||||
}
|
||||
break;
|
||||
case DIR_RIGHT_TO_LEFT:
|
||||
if (para_is_ltr)
|
||||
if (para_is_ltr) {
|
||||
hocr_str << " dir='rtl'";
|
||||
}
|
||||
break;
|
||||
case DIR_MIX:
|
||||
case DIR_NEUTRAL:
|
||||
@ -272,10 +277,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
|
||||
bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
|
||||
bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
|
||||
if (bold)
|
||||
if (bold) {
|
||||
hocr_str << "<strong>";
|
||||
if (italic)
|
||||
}
|
||||
if (italic) {
|
||||
hocr_str << "<em>";
|
||||
}
|
||||
do {
|
||||
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
|
||||
if (grapheme && grapheme[0] != 0) {
|
||||
@ -335,10 +342,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
}
|
||||
res_it->Next(RIL_SYMBOL);
|
||||
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
|
||||
if (italic)
|
||||
if (italic) {
|
||||
hocr_str << "</em>";
|
||||
if (bold)
|
||||
}
|
||||
if (bold) {
|
||||
hocr_str << "</strong>";
|
||||
}
|
||||
// If the lstm choice mode is required it is added here
|
||||
if (lstm_choice_mode == 1 && !hocr_boxes && rawTimestepMap != nullptr) {
|
||||
for (auto symbol : *rawTimestepMap) {
|
||||
@ -371,10 +380,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt << "'>";
|
||||
for (auto &j : timestep) {
|
||||
float conf = 100 - tesseract_->lstm_rating_coefficient * j.second;
|
||||
if (conf < 0.0f)
|
||||
if (conf < 0.0f) {
|
||||
conf = 0.0f;
|
||||
if (conf > 100.0f)
|
||||
}
|
||||
if (conf > 100.0f) {
|
||||
conf = 100.0f;
|
||||
}
|
||||
hocr_str << "\n <span class='ocrx_cinfo'"
|
||||
<< " id='"
|
||||
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
|
||||
@ -447,8 +458,9 @@ bool TessHOcrRenderer::BeginDocumentHandler() {
|
||||
"' />\n"
|
||||
" <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
|
||||
" ocr_line ocrx_word ocrp_wconf");
|
||||
if (font_info_)
|
||||
if (font_info_) {
|
||||
AppendString(" ocrp_lang ocrp_dir ocrp_font ocrp_fsize");
|
||||
}
|
||||
AppendString(
|
||||
"'/>\n"
|
||||
" </head>\n"
|
||||
@ -465,8 +477,9 @@ bool TessHOcrRenderer::EndDocumentHandler() {
|
||||
|
||||
bool TessHOcrRenderer::AddImageHandler(TessBaseAPI *api) {
|
||||
const std::unique_ptr<const char[]> hocr(api->GetHOCRText(imagenum()));
|
||||
if (hocr == nullptr)
|
||||
if (hocr == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AppendString(hocr.get());
|
||||
|
||||
|
@ -36,8 +36,9 @@ static void AddBoxToLSTM(int right, int bottom, int top, int image_height, int p
|
||||
}
|
||||
|
||||
char *TessBaseAPI::GetLSTMBoxText(int page_number = 0) {
|
||||
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
|
||||
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::string lstm_box_str;
|
||||
bool first_word = true;
|
||||
@ -94,8 +95,9 @@ TessLSTMBoxRenderer::TessLSTMBoxRenderer(const char *outputbase)
|
||||
|
||||
bool TessLSTMBoxRenderer::AddImageHandler(TessBaseAPI *api) {
|
||||
const std::unique_ptr<const char[]> lstmbox(api->GetLSTMBoxText(imagenum()));
|
||||
if (lstmbox == nullptr)
|
||||
if (lstmbox == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AppendString(lstmbox.get());
|
||||
|
||||
|
@ -203,8 +203,9 @@ void TessPDFRenderer::AppendPDFObject(const char *data) {
|
||||
static double prec(double x) {
|
||||
double kPrecision = 1000.0;
|
||||
double a = round(x * kPrecision) / kPrecision;
|
||||
if (a == -0)
|
||||
if (a == -0) {
|
||||
return 0;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
@ -295,8 +296,9 @@ static void ClipBaseline(int ppi, int x1, int y1, int x2, int y2, int *line_x1,
|
||||
*line_y2 = y2;
|
||||
int rise = abs(y2 - y1) * 72;
|
||||
int run = abs(x2 - x1) * 72;
|
||||
if (rise < 2 * ppi && 2 * ppi < run)
|
||||
if (rise < 2 * ppi && 2 * ppi < run) {
|
||||
*line_y1 = *line_y2 = (y1 + y2) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
static bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) {
|
||||
@ -428,8 +430,9 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
|
||||
res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif, &smallcaps,
|
||||
&fontsize, &font_id);
|
||||
const int kDefaultFontsize = 8;
|
||||
if (fontsize <= 0)
|
||||
if (fontsize <= 0) {
|
||||
fontsize = kDefaultFontsize;
|
||||
}
|
||||
if (fontsize != old_fontsize) {
|
||||
pdf_str << "/f-0-0 " << fontsize << " Tf ";
|
||||
old_fontsize = fontsize;
|
||||
@ -655,18 +658,21 @@ bool TessPDFRenderer::BeginDocumentHandler() {
|
||||
bool TessPDFRenderer::imageToPDFObj(Pix *pix, const char *filename, long int objnum,
|
||||
char **pdf_object, long int *pdf_object_size,
|
||||
const int jpg_quality) {
|
||||
if (!pdf_object_size || !pdf_object)
|
||||
if (!pdf_object_size || !pdf_object) {
|
||||
return false;
|
||||
}
|
||||
*pdf_object = nullptr;
|
||||
*pdf_object_size = 0;
|
||||
if (!filename && !pix)
|
||||
if (!filename && !pix) {
|
||||
return false;
|
||||
}
|
||||
|
||||
L_Compressed_Data *cid = nullptr;
|
||||
|
||||
int sad = 0;
|
||||
if (pixGetInputFormat(pix) == IFF_PNG)
|
||||
if (pixGetInputFormat(pix) == IFF_PNG) {
|
||||
sad = pixGenerateCIData(pix, L_FLATE_ENCODE, 0, 0, &cid);
|
||||
}
|
||||
if (!cid) {
|
||||
sad = l_generateCIDataForPdf(filename, pix, jpg_quality, &cid);
|
||||
}
|
||||
@ -800,8 +806,9 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) {
|
||||
Pix *pix = api->GetInputImage();
|
||||
const char *filename = api->GetInputName();
|
||||
int ppi = api->GetSourceYResolution();
|
||||
if (!pix || ppi <= 0)
|
||||
if (!pix || ppi <= 0) {
|
||||
return false;
|
||||
}
|
||||
double width = pixGetWidth(pix) * 72.0 / ppi;
|
||||
double height = pixGetHeight(pix) * 72.0 / ppi;
|
||||
|
||||
|
@ -48,17 +48,19 @@ TessResultRenderer::TessResultRenderer(const char *outputbase, const char *exten
|
||||
|
||||
TessResultRenderer::~TessResultRenderer() {
|
||||
if (fout_ != nullptr) {
|
||||
if (fout_ != stdout)
|
||||
if (fout_ != stdout) {
|
||||
fclose(fout_);
|
||||
else
|
||||
} else {
|
||||
clearerr(fout_);
|
||||
}
|
||||
}
|
||||
delete next_;
|
||||
}
|
||||
|
||||
void TessResultRenderer::insert(TessResultRenderer *next) {
|
||||
if (next == nullptr)
|
||||
if (next == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
TessResultRenderer *remainder = next_;
|
||||
next_ = next;
|
||||
@ -71,8 +73,9 @@ void TessResultRenderer::insert(TessResultRenderer *next) {
|
||||
}
|
||||
|
||||
bool TessResultRenderer::BeginDocument(const char *title) {
|
||||
if (!happy_)
|
||||
if (!happy_) {
|
||||
return false;
|
||||
}
|
||||
title_ = title;
|
||||
imagenum_ = -1;
|
||||
bool ok = BeginDocumentHandler();
|
||||
@ -83,8 +86,9 @@ bool TessResultRenderer::BeginDocument(const char *title) {
|
||||
}
|
||||
|
||||
bool TessResultRenderer::AddImage(TessBaseAPI *api) {
|
||||
if (!happy_)
|
||||
if (!happy_) {
|
||||
return false;
|
||||
}
|
||||
++imagenum_;
|
||||
bool ok = AddImageHandler(api);
|
||||
if (next_) {
|
||||
@ -94,8 +98,9 @@ bool TessResultRenderer::AddImage(TessBaseAPI *api) {
|
||||
}
|
||||
|
||||
bool TessResultRenderer::EndDocument() {
|
||||
if (!happy_)
|
||||
if (!happy_) {
|
||||
return false;
|
||||
}
|
||||
bool ok = EndDocumentHandler();
|
||||
if (next_) {
|
||||
ok = next_->EndDocument() && ok;
|
||||
@ -108,8 +113,9 @@ void TessResultRenderer::AppendString(const char *s) {
|
||||
}
|
||||
|
||||
void TessResultRenderer::AppendData(const char *s, int len) {
|
||||
if (!tesseract::Serialize(fout_, s, len))
|
||||
if (!tesseract::Serialize(fout_, s, len)) {
|
||||
happy_ = false;
|
||||
}
|
||||
fflush(fout_);
|
||||
}
|
||||
|
||||
@ -169,8 +175,9 @@ bool TessTsvRenderer::EndDocumentHandler() {
|
||||
|
||||
bool TessTsvRenderer::AddImageHandler(TessBaseAPI *api) {
|
||||
const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
|
||||
if (tsv == nullptr)
|
||||
if (tsv == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AppendString(tsv.get());
|
||||
|
||||
@ -185,8 +192,9 @@ TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
|
||||
|
||||
bool TessUnlvRenderer::AddImageHandler(TessBaseAPI *api) {
|
||||
const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
|
||||
if (unlv == nullptr)
|
||||
if (unlv == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AppendString(unlv.get());
|
||||
|
||||
@ -201,8 +209,9 @@ TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
|
||||
|
||||
bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI *api) {
|
||||
const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
|
||||
if (text == nullptr)
|
||||
if (text == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AppendString(text.get());
|
||||
|
||||
@ -218,8 +227,9 @@ TessOsdRenderer::TessOsdRenderer(const char *outputbase) : TessResultRenderer(ou
|
||||
|
||||
bool TessOsdRenderer::AddImageHandler(TessBaseAPI *api) {
|
||||
char *osd = api->GetOsdText(imagenum());
|
||||
if (osd == nullptr)
|
||||
if (osd == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AppendString(osd);
|
||||
delete[] osd;
|
||||
|
@ -140,18 +140,24 @@ static void PrintVersionInfo() {
|
||||
if (tesseract::SIMDDetect::IsNEONAvailable())
|
||||
printf(" Found NEON\n");
|
||||
#else
|
||||
if (tesseract::SIMDDetect::IsAVX512BWAvailable())
|
||||
if (tesseract::SIMDDetect::IsAVX512BWAvailable()) {
|
||||
printf(" Found AVX512BW\n");
|
||||
if (tesseract::SIMDDetect::IsAVX512FAvailable())
|
||||
}
|
||||
if (tesseract::SIMDDetect::IsAVX512FAvailable()) {
|
||||
printf(" Found AVX512F\n");
|
||||
if (tesseract::SIMDDetect::IsAVX2Available())
|
||||
}
|
||||
if (tesseract::SIMDDetect::IsAVX2Available()) {
|
||||
printf(" Found AVX2\n");
|
||||
if (tesseract::SIMDDetect::IsAVXAvailable())
|
||||
}
|
||||
if (tesseract::SIMDDetect::IsAVXAvailable()) {
|
||||
printf(" Found AVX\n");
|
||||
if (tesseract::SIMDDetect::IsFMAAvailable())
|
||||
}
|
||||
if (tesseract::SIMDDetect::IsFMAAvailable()) {
|
||||
printf(" Found FMA\n");
|
||||
if (tesseract::SIMDDetect::IsSSEAvailable())
|
||||
}
|
||||
if (tesseract::SIMDDetect::IsSSEAvailable()) {
|
||||
printf(" Found SSE\n");
|
||||
}
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
printf(" Found OpenMP %d\n", _OPENMP);
|
||||
@ -335,8 +341,9 @@ static void PrintBanner() {
|
||||
* but that doesn't work.
|
||||
*/
|
||||
static void FixPageSegMode(tesseract::TessBaseAPI &api, tesseract::PageSegMode pagesegmode) {
|
||||
if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
|
||||
if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) {
|
||||
api.SetPageSegMode(pagesegmode);
|
||||
}
|
||||
}
|
||||
|
||||
static bool checkArgValues(int arg, const char *mode, int count) {
|
||||
@ -635,8 +642,9 @@ int main(int argc, char **argv) {
|
||||
lang = "eng";
|
||||
}
|
||||
|
||||
if (image == nullptr && !list_langs && !print_parameters)
|
||||
if (image == nullptr && !list_langs && !print_parameters) {
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
// Call GlobalDawgCache here to create the global DawgCache object before
|
||||
// the TessBaseAPI object. This fixes the order of destructor calls:
|
||||
@ -765,8 +773,9 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
if (!renderers.empty()) {
|
||||
if (banner)
|
||||
if (banner) {
|
||||
PrintBanner();
|
||||
}
|
||||
#ifdef DISABLED_LEGACY_ENGINE
|
||||
if (!osd_warning.empty()) {
|
||||
fprintf(stderr, "%s", osd_warning.c_str());
|
||||
|
@ -29,8 +29,9 @@ namespace tesseract {
|
||||
*/
|
||||
|
||||
char *TessBaseAPI::GetWordStrBoxText(int page_number = 0) {
|
||||
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
|
||||
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::string wordstr_box_str;
|
||||
int left = 0, top = 0, right = 0, bottom = 0;
|
||||
@ -93,8 +94,9 @@ TessWordStrBoxRenderer::TessWordStrBoxRenderer(const char *outputbase)
|
||||
|
||||
bool TessWordStrBoxRenderer::AddImageHandler(TessBaseAPI *api) {
|
||||
const std::unique_ptr<const char[]> wordstrbox(api->GetWordStrBoxText(imagenum()));
|
||||
if (wordstrbox == nullptr)
|
||||
if (wordstrbox == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AppendString(wordstrbox.get());
|
||||
|
||||
|
@ -21,8 +21,9 @@ namespace tesseract {
|
||||
// Computes and returns the dot product of the two n-vectors u and v.
|
||||
double DotProductNative(const double *u, const double *v, int n) {
|
||||
double total = 0.0;
|
||||
for (int k = 0; k < n; ++k)
|
||||
for (int k = 0; k < n; ++k) {
|
||||
total += u[k] * v[k];
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
|
@ -51,8 +51,9 @@ void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t> &w, std::vector<int8_t>
|
||||
// group.
|
||||
for (int i = 0; i < num_inputs_per_group_; ++i) {
|
||||
int8_t weight = 0;
|
||||
if (output + j < num_out && input + i < num_in)
|
||||
if (output + j < num_out && input + i < num_in) {
|
||||
weight = w(output + j, input + i);
|
||||
}
|
||||
shaped_w[shaped_index++] = weight;
|
||||
}
|
||||
}
|
||||
@ -60,8 +61,9 @@ void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t> &w, std::vector<int8_t>
|
||||
// Append the bias weights for the register set.
|
||||
for (int j = 0; j < num_outputs_per_register_set; ++j) {
|
||||
int8_t weight = 0;
|
||||
if (output + j < num_out)
|
||||
if (output + j < num_out) {
|
||||
weight = w(output + j, num_in);
|
||||
}
|
||||
shaped_w[shaped_index++] = weight;
|
||||
}
|
||||
output += num_outputs_per_register_set;
|
||||
@ -81,8 +83,9 @@ void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w,
|
||||
for (int i = 0; i < num_out; ++i) {
|
||||
const int8_t *wi = w[i];
|
||||
int total = 0;
|
||||
for (int j = 0; j < num_in; ++j)
|
||||
for (int j = 0; j < num_in; ++j) {
|
||||
total += wi[j] * u[j];
|
||||
}
|
||||
// Add in the bias and correct for integer values.
|
||||
v[i] = (total + wi[num_in] * INT8_MAX) * scales[i];
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ static inline void MultiplyGroup(const __m256i &rep_input, const __m256i &ones,
|
||||
// We don't actually care what the top 64bits are, but this ends
|
||||
// up with them being zero.
|
||||
static inline __m128i load64_to_128(const int8_t *wi_) {
|
||||
const int64_t *wi = reinterpret_cast<const int64_t *>(wi_);
|
||||
const auto *wi = reinterpret_cast<const int64_t *>(wi_);
|
||||
return _mm_set_epi64x(0, wi[0]);
|
||||
}
|
||||
|
||||
@ -326,8 +326,9 @@ static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *
|
||||
group_size /= 2;
|
||||
w_step /= 2;
|
||||
|
||||
if (output + group_size <= rounded_num_out)
|
||||
if (output + group_size <= rounded_num_out) {
|
||||
PartialMatrixDotVector8(wi, scales, u, rounded_num_in, v);
|
||||
}
|
||||
}
|
||||
|
||||
const IntSimdMatrix IntSimdMatrix::intSimdMatrixAVX2 = {
|
||||
|
@ -86,8 +86,9 @@ bool SIMDDetect::sse_available_;
|
||||
// Computes and returns the dot product of the two n-vectors u and v.
|
||||
static double DotProductGeneric(const double *u, const double *v, int n) {
|
||||
double total = 0.0;
|
||||
for (int k = 0; k < n; ++k)
|
||||
for (int k = 0; k < n; ++k) {
|
||||
total += u[k] * v[k];
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
|
@ -55,8 +55,9 @@ bool Tesseract::word_adaptable( // should we adapt?
|
||||
0: NO adaption
|
||||
*/
|
||||
if (mode == 0) {
|
||||
if (tessedit_adaption_debug)
|
||||
if (tessedit_adaption_debug) {
|
||||
tprintf("adaption disabled\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -82,27 +83,31 @@ bool Tesseract::word_adaptable( // should we adapt?
|
||||
(word->best_choice->permuter() != FREQ_DAWG_PERM) &&
|
||||
(word->best_choice->permuter() != USER_DAWG_PERM) &&
|
||||
(word->best_choice->permuter() != NUMBER_PERM)) {
|
||||
if (tessedit_adaption_debug)
|
||||
if (tessedit_adaption_debug) {
|
||||
tprintf("word not in dawgs\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (flags[CHECK_ONE_ELL_CONFLICT] && one_ell_conflict(word, false)) {
|
||||
if (tessedit_adaption_debug)
|
||||
if (tessedit_adaption_debug) {
|
||||
tprintf("word has ell conflict\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (flags[CHECK_SPACES] &&
|
||||
(strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) {
|
||||
if (tessedit_adaption_debug)
|
||||
if (tessedit_adaption_debug) {
|
||||
tprintf("word contains spaces\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (flags[CHECK_AMBIG_WERD] && word->best_choice->dangerous_ambig_found()) {
|
||||
if (tessedit_adaption_debug)
|
||||
if (tessedit_adaption_debug) {
|
||||
tprintf("word is ambiguous\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -148,8 +148,9 @@ PAGE_RES *Tesseract::ApplyBoxes(const char *filename, bool find_segmentation,
|
||||
if (applybox_debug > 0) {
|
||||
tprintf("APPLY_BOXES:\n");
|
||||
tprintf(" Boxes read from boxfile: %6d\n", box_count);
|
||||
if (box_failures > 0)
|
||||
if (box_failures > 0) {
|
||||
tprintf(" Boxes failed resegmentation: %6d\n", box_failures);
|
||||
}
|
||||
}
|
||||
TidyUp(page_res);
|
||||
return page_res;
|
||||
@ -314,8 +315,9 @@ bool Tesseract::ResegmentCharBox(PAGE_RES *page_res, const TBOX *prev_box, const
|
||||
PAGE_RES_IT page_res_it(page_res);
|
||||
WERD_RES *word_res;
|
||||
for (word_res = page_res_it.word(); word_res != nullptr; word_res = page_res_it.forward()) {
|
||||
if (!word_res->box_word->bounding_box().major_overlap(box))
|
||||
if (!word_res->box_word->bounding_box().major_overlap(box)) {
|
||||
continue;
|
||||
}
|
||||
if (applybox_debug > 1) {
|
||||
tprintf("Checking word box:");
|
||||
word_res->box_word->bounding_box().print();
|
||||
@ -326,10 +328,12 @@ bool Tesseract::ResegmentCharBox(PAGE_RES *page_res, const TBOX *prev_box, const
|
||||
int blob_count = 0;
|
||||
for (blob_count = 0; i + blob_count < word_len; ++blob_count) {
|
||||
TBOX blob_box = word_res->box_word->BlobBox(i + blob_count);
|
||||
if (!blob_box.major_overlap(box))
|
||||
if (!blob_box.major_overlap(box)) {
|
||||
break;
|
||||
if (word_res->correct_text[i + blob_count].length() > 0)
|
||||
}
|
||||
if (word_res->correct_text[i + blob_count].length() > 0) {
|
||||
break; // Blob is claimed already.
|
||||
}
|
||||
if (next_box != nullptr) {
|
||||
const double current_box_miss_metric = BoxMissMetric(blob_box, box);
|
||||
const double next_box_miss_metric = BoxMissMetric(blob_box, *next_box);
|
||||
@ -339,8 +343,9 @@ bool Tesseract::ResegmentCharBox(PAGE_RES *page_res, const TBOX *prev_box, const
|
||||
tprintf("Current miss metric = %g, next = %g\n", current_box_miss_metric,
|
||||
next_box_miss_metric);
|
||||
}
|
||||
if (current_box_miss_metric > next_box_miss_metric)
|
||||
if (current_box_miss_metric > next_box_miss_metric) {
|
||||
break; // Blob is a better match for next box.
|
||||
}
|
||||
}
|
||||
char_box += blob_box;
|
||||
}
|
||||
@ -415,13 +420,15 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const
|
||||
BLOCK_IT b_it(block_list);
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
BLOCK *block = b_it.data();
|
||||
if (!box.major_overlap(block->pdblk.bounding_box()))
|
||||
if (!box.major_overlap(block->pdblk.bounding_box())) {
|
||||
continue;
|
||||
}
|
||||
ROW_IT r_it(block->row_list());
|
||||
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
|
||||
ROW *row = r_it.data();
|
||||
if (!box.major_overlap(row->bounding_box()))
|
||||
if (!box.major_overlap(row->bounding_box())) {
|
||||
continue;
|
||||
}
|
||||
WERD_IT w_it(row->word_list());
|
||||
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
|
||||
WERD *word = w_it.data();
|
||||
@ -429,16 +436,19 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const
|
||||
tprintf("Checking word:");
|
||||
word->bounding_box().print();
|
||||
}
|
||||
if (word->text() != nullptr && word->text()[0] != '\0')
|
||||
if (word->text() != nullptr && word->text()[0] != '\0') {
|
||||
continue; // Ignore words that are already done.
|
||||
if (!box.major_overlap(word->bounding_box()))
|
||||
}
|
||||
if (!box.major_overlap(word->bounding_box())) {
|
||||
continue;
|
||||
}
|
||||
C_BLOB_IT blob_it(word->cblob_list());
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
C_BLOB *blob = blob_it.data();
|
||||
TBOX blob_box = blob->bounding_box();
|
||||
if (!blob_box.major_overlap(box))
|
||||
if (!blob_box.major_overlap(box)) {
|
||||
continue;
|
||||
}
|
||||
if (next_box != nullptr) {
|
||||
const double current_box_miss_metric = BoxMissMetric(blob_box, box);
|
||||
const double next_box_miss_metric = BoxMissMetric(blob_box, *next_box);
|
||||
@ -448,8 +458,9 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const
|
||||
tprintf("Current miss metric = %g, next = %g\n", current_box_miss_metric,
|
||||
next_box_miss_metric);
|
||||
}
|
||||
if (current_box_miss_metric > next_box_miss_metric)
|
||||
if (current_box_miss_metric > next_box_miss_metric) {
|
||||
continue; // Blob is a better match for next box.
|
||||
}
|
||||
}
|
||||
if (applybox_debug > 2) {
|
||||
tprintf("Blob match: blob:");
|
||||
@ -473,8 +484,9 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const
|
||||
}
|
||||
}
|
||||
}
|
||||
if (new_word == nullptr && applybox_debug > 0)
|
||||
if (new_word == nullptr && applybox_debug > 0) {
|
||||
tprintf("FAIL!\n");
|
||||
}
|
||||
return new_word != nullptr;
|
||||
}
|
||||
|
||||
@ -485,8 +497,9 @@ void Tesseract::ReSegmentByClassification(PAGE_RES *page_res) {
|
||||
WERD_RES *word_res;
|
||||
for (; (word_res = pr_it.word()) != nullptr; pr_it.forward()) {
|
||||
const WERD *word = word_res->word;
|
||||
if (word->text() == nullptr || word->text()[0] == '\0')
|
||||
if (word->text() == nullptr || word->text()[0] == '\0') {
|
||||
continue; // Ignore words that have no text.
|
||||
}
|
||||
// Convert the correct text to a vector of UNICHAR_ID
|
||||
std::vector<UNICHAR_ID> target_text;
|
||||
if (!ConvertStringToUnichars(word->text(), &target_text)) {
|
||||
@ -507,15 +520,17 @@ void Tesseract::ReSegmentByClassification(PAGE_RES *page_res) {
|
||||
bool Tesseract::ConvertStringToUnichars(const char *utf8, std::vector<UNICHAR_ID> *class_ids) {
|
||||
for (int step = 0; *utf8 != '\0'; utf8 += step) {
|
||||
const char *next_space = strchr(utf8, ' ');
|
||||
if (next_space == nullptr)
|
||||
if (next_space == nullptr) {
|
||||
next_space = utf8 + strlen(utf8);
|
||||
}
|
||||
step = next_space - utf8;
|
||||
UNICHAR_ID class_id = unicharset.unichar_to_id(utf8, step);
|
||||
if (class_id == INVALID_UNICHAR_ID) {
|
||||
return false;
|
||||
}
|
||||
while (utf8[step] == ' ')
|
||||
while (utf8[step] == ' ') {
|
||||
++step;
|
||||
}
|
||||
class_ids->push_back(class_id);
|
||||
}
|
||||
return true;
|
||||
@ -621,15 +636,18 @@ void Tesseract::SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, in
|
||||
const AmbigSpec *ambig_spec = spec_it.data();
|
||||
// We'll only do 1-1.
|
||||
if (ambig_spec->wrong_ngram[1] == INVALID_UNICHAR_ID &&
|
||||
ambig_spec->correct_ngram_id == target_text[text_index])
|
||||
ambig_spec->correct_ngram_id == target_text[text_index]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!spec_it.cycled_list())
|
||||
if (!spec_it.cycled_list()) {
|
||||
break; // Found an ambig.
|
||||
}
|
||||
}
|
||||
}
|
||||
if (choice_it.cycled_list())
|
||||
if (choice_it.cycled_list()) {
|
||||
continue; // No match.
|
||||
}
|
||||
segmentation->push_back(length);
|
||||
if (choices_pos + length == choices_length && text_index + 1 == target_text.size()) {
|
||||
// This is a complete match. If the rating is good record a new best.
|
||||
@ -715,8 +733,9 @@ void Tesseract::TidyUp(PAGE_RES *page_res) {
|
||||
if (bad_blob_count > 0) {
|
||||
tprintf(" Leaving %d unlabelled blobs in %d words.\n", bad_blob_count, ok_word_count);
|
||||
}
|
||||
if (unlabelled_words > 0)
|
||||
if (unlabelled_words > 0) {
|
||||
tprintf(" %d remaining unlabelled words deleted.\n", unlabelled_words);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -154,8 +154,9 @@ void Tesseract::SetupAllWordsPassN(int pass_n, const TBOX *target_word_box, cons
|
||||
// Setup all the words for recognition with polygonal approximation.
|
||||
for (unsigned w = 0; w < words->size(); ++w) {
|
||||
SetupWordPassN(pass_n, &(*words)[w]);
|
||||
if (w > 0)
|
||||
if (w > 0) {
|
||||
(*words)[w].prev_word = &(*words)[w - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -169,8 +170,9 @@ void Tesseract::SetupWordPassN(int pass_n, WordData *word) {
|
||||
} else if (pass_n == 2) {
|
||||
// TODO(rays) Should we do this on pass1 too?
|
||||
word->word->caps_height = 0.0;
|
||||
if (word->word->x_height == 0.0f)
|
||||
if (word->word->x_height == 0.0f) {
|
||||
word->word->x_height = word->row->x_height();
|
||||
}
|
||||
}
|
||||
word->lang_words.truncate(0);
|
||||
for (unsigned s = 0; s <= sub_langs_.size(); ++s) {
|
||||
@ -201,8 +203,9 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT
|
||||
pr_it->restart_page();
|
||||
for (unsigned w = 0; w < words->size(); ++w) {
|
||||
WordData *word = &(*words)[w];
|
||||
if (w > 0)
|
||||
if (w > 0) {
|
||||
word->prev_word = &(*words)[w - 1];
|
||||
}
|
||||
if (monitor != nullptr) {
|
||||
monitor->ocr_alive = true;
|
||||
if (pass_n == 1) {
|
||||
@ -228,12 +231,14 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT
|
||||
for (s = 0; s < word->lang_words.size() && word->lang_words[s]->tess_failed; ++s) {
|
||||
}
|
||||
// If all are failed, skip it. Image words are skipped by this test.
|
||||
if (s > word->lang_words.size())
|
||||
if (s > word->lang_words.size()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Sync pr_it with the wth WordData.
|
||||
while (pr_it->word() != nullptr && pr_it->word() != word->word)
|
||||
while (pr_it->word() != nullptr && pr_it->word() != word->word) {
|
||||
pr_it->forward();
|
||||
}
|
||||
ASSERT_HOST(pr_it->word() != nullptr);
|
||||
bool make_next_word_fuzzy = false;
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
@ -333,8 +338,9 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
|
||||
|
||||
most_recently_used_ = this;
|
||||
// Run pass 1 word recognition.
|
||||
if (!RecogAllWordsPassN(1, monitor, &page_res_it, &words))
|
||||
if (!RecogAllWordsPassN(1, monitor, &page_res_it, &words)) {
|
||||
return false;
|
||||
}
|
||||
// Pass 1 post-processing.
|
||||
for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
|
||||
if (page_res_it.word()->word->flag(W_REP_CHAR)) {
|
||||
@ -343,8 +349,9 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
|
||||
}
|
||||
|
||||
// Count dict words.
|
||||
if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM)
|
||||
if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM) {
|
||||
++(stats_.dict_words);
|
||||
}
|
||||
|
||||
// Update misadaption log (we only need to do it on pass 1, since
|
||||
// adaption only happens on this pass).
|
||||
@ -355,8 +362,9 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
|
||||
}
|
||||
}
|
||||
|
||||
if (dopasses == 1)
|
||||
if (dopasses == 1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
@ -370,8 +378,9 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
|
||||
}
|
||||
most_recently_used_ = this;
|
||||
// Run pass 2 word recognition.
|
||||
if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words))
|
||||
if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// The next passes are only required for Tess-only.
|
||||
@ -380,14 +389,17 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
|
||||
// Fix fuzzy spaces.
|
||||
|
||||
if (!tessedit_test_adaption && tessedit_fix_fuzzy_spaces && !tessedit_word_for_word &&
|
||||
!right_to_left())
|
||||
!right_to_left()) {
|
||||
fix_fuzzy_spaces(monitor, stats_.word_count, page_res);
|
||||
}
|
||||
|
||||
// ****************** Pass 4 *******************
|
||||
if (tessedit_enable_dict_correction)
|
||||
if (tessedit_enable_dict_correction) {
|
||||
dictionary_correction_pass(page_res);
|
||||
if (tessedit_enable_bigram_correction)
|
||||
}
|
||||
if (tessedit_enable_bigram_correction) {
|
||||
bigram_correction_pass(page_res);
|
||||
}
|
||||
|
||||
// ****************** Pass 5,6 *******************
|
||||
rejection_passes(page_res, monitor, target_word_box, word_config);
|
||||
@ -410,8 +422,9 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
// changed by jetsoft
|
||||
// needed for dll to output memory structure
|
||||
if ((dopasses == 0 || dopasses == 2) && (monitor || tessedit_write_unlv))
|
||||
if ((dopasses == 0 || dopasses == 2) && (monitor || tessedit_write_unlv)) {
|
||||
output_pass(page_res_it, target_word_box);
|
||||
}
|
||||
// end jetsoft
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
@ -448,8 +461,9 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
|
||||
while (word_it.forward() != nullptr && (!word_it.word() || word_it.word()->part_of_combo)) {
|
||||
// advance word_it, skipping over parts of combos
|
||||
}
|
||||
if (!word_it.word())
|
||||
if (!word_it.word()) {
|
||||
break;
|
||||
}
|
||||
w = word_it.word();
|
||||
if (!w || !w_prev || w->uch_set != w_prev->uch_set) {
|
||||
continue;
|
||||
@ -628,8 +642,9 @@ void Tesseract::rejection_passes(PAGE_RES *page_res, ETEXT_DESC *monitor,
|
||||
stats_.doc_good_char_quality += accepted_all_char_quality;
|
||||
}
|
||||
check_debug_pt(word, 80);
|
||||
if (tessedit_reject_bad_qual_wds && (blob_quality == 0) && (outline_errs >= chars_in_word))
|
||||
if (tessedit_reject_bad_qual_wds && (blob_quality == 0) && (outline_errs >= chars_in_word)) {
|
||||
word->reject_map.rej_word_bad_quality();
|
||||
}
|
||||
check_debug_pt(word, 90);
|
||||
page_res_it.forward();
|
||||
}
|
||||
@ -664,8 +679,9 @@ void Tesseract::rejection_passes(PAGE_RES *page_res, ETEXT_DESC *monitor,
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
||||
void Tesseract::blamer_pass(PAGE_RES *page_res) {
|
||||
if (!wordrec_run_blamer)
|
||||
if (!wordrec_run_blamer) {
|
||||
return;
|
||||
}
|
||||
PAGE_RES_IT page_res_it(page_res);
|
||||
for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
|
||||
WERD_RES *word = page_res_it.word();
|
||||
@ -712,13 +728,15 @@ void Tesseract::script_pos_pass(PAGE_RES *page_res) {
|
||||
int num_upper = 0;
|
||||
int num_lower = 0;
|
||||
for (int i = 0; i < word->best_choice->length(); ++i) {
|
||||
if (word->uch_set->get_isupper(word->best_choice->unichar_id(i)))
|
||||
if (word->uch_set->get_isupper(word->best_choice->unichar_id(i))) {
|
||||
++num_upper;
|
||||
else if (word->uch_set->get_islower(word->best_choice->unichar_id(i)))
|
||||
} else if (word->uch_set->get_islower(word->best_choice->unichar_id(i))) {
|
||||
++num_lower;
|
||||
}
|
||||
}
|
||||
if (num_upper > 0 && num_lower == 0)
|
||||
if (num_upper > 0 && num_lower == 0) {
|
||||
word->small_caps = true;
|
||||
}
|
||||
}
|
||||
word->SetScriptPositions();
|
||||
}
|
||||
@ -730,8 +748,9 @@ static void WordGap(const PointerVector<WERD_RES> &words, int index, int *right,
|
||||
*next_left = INT32_MAX;
|
||||
if (index < words.size()) {
|
||||
*right = words[index]->word->bounding_box().right();
|
||||
if (index + 1 < words.size())
|
||||
if (index + 1 < words.size()) {
|
||||
*next_left = words[index + 1]->word->bounding_box().left();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -750,8 +769,9 @@ static void EvaluateWordSpan(const PointerVector<WERD_RES> &words, int first_ind
|
||||
} else {
|
||||
*rating += choice->rating();
|
||||
*certainty = std::min(*certainty, choice->certainty());
|
||||
if (!Dict::valid_word_permuter(choice->permuter(), false))
|
||||
if (!Dict::valid_word_permuter(choice->permuter(), false)) {
|
||||
*valid_permuter = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -787,10 +807,11 @@ static int SelectBestWords(double rating_ratio, double certainty_margin, bool de
|
||||
break;
|
||||
}
|
||||
// Keep searching for the matching word break.
|
||||
if ((b_right < n_right && b < best_words->size()) || n == new_words->size())
|
||||
if ((b_right < n_right && b < best_words->size()) || n == new_words->size()) {
|
||||
++b;
|
||||
else
|
||||
} else {
|
||||
++n;
|
||||
}
|
||||
}
|
||||
// Rating of the current run in each.
|
||||
float b_rating = 0.0f, n_rating = 0.0f;
|
||||
@ -838,8 +859,9 @@ static int SelectBestWords(double rating_ratio, double certainty_margin, bool de
|
||||
}
|
||||
// Transfer from out_words to best_words.
|
||||
best_words->clear();
|
||||
for (auto &out_word : out_words)
|
||||
for (auto &out_word : out_words) {
|
||||
best_words->push_back(out_word);
|
||||
}
|
||||
return num_new - num_best;
|
||||
}
|
||||
|
||||
@ -862,8 +884,9 @@ int Tesseract::RetryWithLanguage(const WordData &word_data, WordRecognizer recog
|
||||
*in_word = nullptr;
|
||||
}
|
||||
if (debug) {
|
||||
for (int i = 0; i < new_words.size(); ++i)
|
||||
for (int i = 0; i < new_words.size(); ++i) {
|
||||
new_words[i]->DebugTopChoice("Lang result");
|
||||
}
|
||||
}
|
||||
// Initial version is a bit of a hack based on better certainty and rating
|
||||
// or a dictionary vs non-dictionary word.
|
||||
@ -874,8 +897,9 @@ int Tesseract::RetryWithLanguage(const WordData &word_data, WordRecognizer recog
|
||||
// Helper returns true if all the words are acceptable.
|
||||
static bool WordsAcceptable(const PointerVector<WERD_RES> &words) {
|
||||
for (int w = 0; w < words.size(); ++w) {
|
||||
if (words[w]->tess_failed || !words[w]->tess_accepted)
|
||||
if (words[w]->tess_failed || !words[w]->tess_accepted) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -889,8 +913,9 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
|
||||
*make_next_word_fuzzy = false;
|
||||
WERD *real_word = pr_it->word()->word;
|
||||
if (real_word->rej_cblob_list()->empty() || real_word->cblob_list()->empty() ||
|
||||
real_word->rej_cblob_list()->length() > noise_maxperword)
|
||||
real_word->rej_cblob_list()->length() > noise_maxperword) {
|
||||
return false;
|
||||
}
|
||||
real_word->rej_cblob_list()->sort(&C_BLOB::SortByXMiddle);
|
||||
// Get the noise outlines into a vector with matching bool map.
|
||||
std::vector<C_OUTLINE *> outlines;
|
||||
@ -911,8 +936,9 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
|
||||
for (unsigned i = 0; i < overlapped_any_blob.size(); ++i) {
|
||||
if (overlapped_any_blob[i]) {
|
||||
++num_overlapped;
|
||||
if (word_wanted[i])
|
||||
if (word_wanted[i]) {
|
||||
++num_overlapped_used;
|
||||
}
|
||||
wanted.push_back(word_wanted[i]);
|
||||
wanted_blobs.push_back(target_blobs[i]);
|
||||
wanted_outlines.push_back(outlines[i]);
|
||||
@ -924,10 +950,12 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
|
||||
int non_overlapped = 0;
|
||||
int non_overlapped_used = 0;
|
||||
for (unsigned i = 0; i < word_wanted.size(); ++i) {
|
||||
if (word_wanted[i])
|
||||
if (word_wanted[i]) {
|
||||
++non_overlapped_used;
|
||||
if (outlines[i] != nullptr)
|
||||
}
|
||||
if (outlines[i] != nullptr) {
|
||||
++non_overlapped_used;
|
||||
}
|
||||
}
|
||||
if (debug_noise_removal) {
|
||||
tprintf("Used %d/%d overlapped %d/%d non-overlaped diacritics on word:", num_overlapped_used,
|
||||
@ -1008,8 +1036,9 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
|
||||
target_blobs->resize(outlines.size(), nullptr);
|
||||
// Check for outlines that need to be turned into stand-alone blobs.
|
||||
for (unsigned i = 0; i < outlines.size(); ++i) {
|
||||
if (outlines[i] == nullptr)
|
||||
if (outlines[i] == nullptr) {
|
||||
continue;
|
||||
}
|
||||
// Get a set of adjacent outlines that don't overlap any existing blob.
|
||||
blob_wanted.resize(outlines.size(), false);
|
||||
int num_blob_outlines = 0;
|
||||
@ -1028,8 +1057,9 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
|
||||
}
|
||||
// Choose which combination of them we actually want and where to put
|
||||
// them.
|
||||
if (debug_noise_removal)
|
||||
if (debug_noise_removal) {
|
||||
tprintf("Num blobless outlines = %d\n", num_blob_outlines);
|
||||
}
|
||||
C_BLOB *left_blob = blob_it.data();
|
||||
TBOX left_box = left_blob->bounding_box();
|
||||
C_BLOB *right_blob = blob_it.at_last() ? nullptr : blob_it.data_relative(1);
|
||||
@ -1037,8 +1067,9 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
|
||||
!right_blob->bounding_box().x_overlap(total_ol_box)) &&
|
||||
SelectGoodDiacriticOutlines(pass, noise_cert_disjoint, pr_it, left_blob, outlines,
|
||||
num_blob_outlines, &blob_wanted)) {
|
||||
if (debug_noise_removal)
|
||||
if (debug_noise_removal) {
|
||||
tprintf("Added to left blob\n");
|
||||
}
|
||||
for (unsigned j = 0; j < blob_wanted.size(); ++j) {
|
||||
if (blob_wanted[j]) {
|
||||
(*word_wanted)[j] = true;
|
||||
@ -1050,8 +1081,9 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
|
||||
right_blob->bounding_box().x_overlap(total_ol_box)) &&
|
||||
SelectGoodDiacriticOutlines(pass, noise_cert_disjoint, pr_it, right_blob, outlines,
|
||||
num_blob_outlines, &blob_wanted)) {
|
||||
if (debug_noise_removal)
|
||||
if (debug_noise_removal) {
|
||||
tprintf("Added to right blob\n");
|
||||
}
|
||||
for (unsigned j = 0; j < blob_wanted.size(); ++j) {
|
||||
if (blob_wanted[j]) {
|
||||
(*word_wanted)[j] = true;
|
||||
@ -1060,8 +1092,9 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
|
||||
}
|
||||
} else if (SelectGoodDiacriticOutlines(pass, noise_cert_punc, pr_it, nullptr, outlines,
|
||||
num_blob_outlines, &blob_wanted)) {
|
||||
if (debug_noise_removal)
|
||||
if (debug_noise_removal) {
|
||||
tprintf("Fitted between blobs\n");
|
||||
}
|
||||
for (unsigned j = 0; j < blob_wanted.size(); ++j) {
|
||||
if (blob_wanted[j]) {
|
||||
(*word_wanted)[j] = true;
|
||||
@ -1099,8 +1132,9 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
|
||||
if (debug_noise_removal) {
|
||||
TBOX ol_box;
|
||||
for (unsigned i = 0; i < test_outlines.size(); ++i) {
|
||||
if (test_outlines[i])
|
||||
if (test_outlines[i]) {
|
||||
ol_box += outlines[i]->bounding_box();
|
||||
}
|
||||
}
|
||||
tprintf("All Noise blob classified as %s=%g, delta=%g at:", all_str.c_str(), best_cert,
|
||||
best_cert - target_cert);
|
||||
@ -1121,8 +1155,9 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
|
||||
if (debug_noise_removal) {
|
||||
TBOX ol_box;
|
||||
for (unsigned j = 0; j < outlines.size(); ++j) {
|
||||
if (test_outlines[j])
|
||||
if (test_outlines[j]) {
|
||||
ol_box += outlines[j]->bounding_box();
|
||||
}
|
||||
tprintf("%c", test_outlines[j] ? 'T' : 'F');
|
||||
}
|
||||
tprintf(" blob classified as %s=%g, delta=%g) at:", str.c_str(), cert,
|
||||
@ -1188,8 +1223,9 @@ float Tesseract::ClassifyBlobPlusOutlines(const std::vector<bool> &ok_outlines,
|
||||
ol_it.move_to_first();
|
||||
if (first_to_keep == nullptr) {
|
||||
// We created blob. Empty its outlines and delete it.
|
||||
for (; !ol_it.empty(); ol_it.forward())
|
||||
for (; !ol_it.empty(); ol_it.forward()) {
|
||||
ol_it.extract();
|
||||
}
|
||||
delete local_blob;
|
||||
cert = -c2;
|
||||
} else {
|
||||
@ -1212,8 +1248,9 @@ float Tesseract::ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob
|
||||
WERD_RES *word_res = pr_it->InsertSimpleCloneWord(*pr_it->word(), word);
|
||||
// Get a new iterator that points to the new word.
|
||||
PAGE_RES_IT it(pr_it->page_res);
|
||||
while (it.word() != word_res && it.word() != nullptr)
|
||||
while (it.word() != word_res && it.word() != nullptr) {
|
||||
it.forward();
|
||||
}
|
||||
ASSERT_HOST(it.word() == word_res);
|
||||
WordData wd(it);
|
||||
// Force full initialization.
|
||||
@ -1274,8 +1311,9 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD
|
||||
}
|
||||
if (word->done) {
|
||||
// If done on pass1, leave it as-is.
|
||||
if (!word->tess_failed)
|
||||
if (!word->tess_failed) {
|
||||
most_recently_used_ = word->tesseract;
|
||||
}
|
||||
return;
|
||||
}
|
||||
auto sub = sub_langs_.size();
|
||||
@ -1343,8 +1381,9 @@ void Tesseract::classify_word_pass1(const WordData &word_data, WERD_RES **in_wor
|
||||
#endif // def DISABLED_LEGACY_ENGINE
|
||||
if (!(*in_word)->odd_size || tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
LSTMRecognizeWord(*block, row, *in_word, out_words);
|
||||
if (!out_words->empty())
|
||||
if (!out_words->empty()) {
|
||||
return; // Successful lstm recognition.
|
||||
}
|
||||
}
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
|
||||
// No fallback allowed, so use a fake.
|
||||
@ -1377,8 +1416,9 @@ void Tesseract::classify_word_pass1(const WordData &word_data, WERD_RES **in_wor
|
||||
}
|
||||
}
|
||||
|
||||
if (tessedit_enable_doc_dict && !word->IsAmbiguous())
|
||||
if (tessedit_enable_doc_dict && !word->IsAmbiguous()) {
|
||||
tess_add_doc_word(word->best_choice);
|
||||
}
|
||||
}
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
}
|
||||
@ -1405,14 +1445,16 @@ void Tesseract::ReportXhtFixResult(bool accept_new_word, float new_x_ht, WERD_RE
|
||||
// See the comment in fixxht.cpp for a description of the overall process.
|
||||
bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK *block, ROW *row) {
|
||||
int original_misfits = CountMisfitTops(word);
|
||||
if (original_misfits == 0)
|
||||
if (original_misfits == 0) {
|
||||
return false;
|
||||
}
|
||||
float baseline_shift = 0.0f;
|
||||
float new_x_ht = ComputeCompatibleXheight(word, &baseline_shift);
|
||||
if (baseline_shift != 0.0f) {
|
||||
// Try the shift on its own first.
|
||||
if (!TestNewNormalization(original_misfits, baseline_shift, word->x_height, word, block, row))
|
||||
if (!TestNewNormalization(original_misfits, baseline_shift, word->x_height, word, block, row)) {
|
||||
return false;
|
||||
}
|
||||
original_misfits = CountMisfitTops(word);
|
||||
if (original_misfits > 0) {
|
||||
float new_baseline_shift;
|
||||
@ -1497,8 +1539,9 @@ void Tesseract::classify_word_pass2(const WordData &word_data, WERD_RES **in_wor
|
||||
check_debug_pt(word, 30);
|
||||
if (!word->done) {
|
||||
word->caps_height = 0.0;
|
||||
if (word->x_height == 0.0f)
|
||||
if (word->x_height == 0.0f) {
|
||||
word->x_height = row->x_height();
|
||||
}
|
||||
match_word_pass_n(2, word, row, block);
|
||||
check_debug_pt(word, 40);
|
||||
}
|
||||
@ -1514,8 +1557,9 @@ void Tesseract::classify_word_pass2(const WordData &word_data, WERD_RES **in_wor
|
||||
}
|
||||
# ifndef GRAPHICS_DISABLED
|
||||
if (tessedit_display_outwords) {
|
||||
if (fx_win == nullptr)
|
||||
if (fx_win == nullptr) {
|
||||
create_fx_win();
|
||||
}
|
||||
clear_fx_win();
|
||||
word->rebuild_word->plot(fx_win);
|
||||
TBOX wbox = word->rebuild_word->bounding_box();
|
||||
@ -1534,15 +1578,17 @@ void Tesseract::classify_word_pass2(const WordData &word_data, WERD_RES **in_wor
|
||||
* Baseline normalize the word and pass it to Tess.
|
||||
*/
|
||||
void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK *block) {
|
||||
if (word->tess_failed)
|
||||
if (word->tess_failed) {
|
||||
return;
|
||||
}
|
||||
tess_segment_pass_n(pass_n, word);
|
||||
|
||||
if (!word->tess_failed) {
|
||||
if (!word->word->flag(W_REP_CHAR)) {
|
||||
word->fix_quotes();
|
||||
if (tessedit_fix_hyphens)
|
||||
if (tessedit_fix_hyphens) {
|
||||
word->fix_hyphens();
|
||||
}
|
||||
/* Don't trust fix_quotes! - though I think I've fixed the bug */
|
||||
if (word->best_choice->length() != word->box_word->length()) {
|
||||
tprintf(
|
||||
@ -1571,8 +1617,9 @@ static BLOB_CHOICE *FindBestMatchingChoice(UNICHAR_ID char_id, WERD_RES *word_re
|
||||
for (int i = 0; i < word_res->best_choice->length(); ++i) {
|
||||
BLOB_CHOICE *choice = FindMatchingChoice(char_id, word_res->GetBlobChoices(i));
|
||||
if (choice != nullptr) {
|
||||
if (best_choice == nullptr || choice->rating() < best_choice->rating())
|
||||
if (best_choice == nullptr || choice->rating() < best_choice->rating()) {
|
||||
best_choice = choice;
|
||||
}
|
||||
}
|
||||
}
|
||||
return best_choice;
|
||||
@ -1593,8 +1640,9 @@ static void CorrectRepcharChoices(BLOB_CHOICE *blob_choice, WERD_RES *word_res)
|
||||
}
|
||||
// Correct any incorrect results in word.
|
||||
for (int i = 0; i < word->length(); ++i) {
|
||||
if (word->unichar_id(i) != blob_choice->unichar_id())
|
||||
if (word->unichar_id(i) != blob_choice->unichar_id()) {
|
||||
word->set_unichar_id(blob_choice->unichar_id(), i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1653,13 +1701,15 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(const UNICHARSET &char_se
|
||||
int hyphen_pos = -1;
|
||||
ACCEPTABLE_WERD_TYPE word_type = AC_UNACCEPTABLE;
|
||||
|
||||
if (strlen(lengths) > 20)
|
||||
if (strlen(lengths) > 20) {
|
||||
return word_type;
|
||||
}
|
||||
|
||||
/* Single Leading punctuation char*/
|
||||
|
||||
if (s[offset] != '\0' && chs_leading_punct.contains(s[offset]))
|
||||
if (s[offset] != '\0' && chs_leading_punct.contains(s[offset])) {
|
||||
offset += lengths[i++];
|
||||
}
|
||||
leading_punct_count = i;
|
||||
|
||||
/* Initial cap */
|
||||
@ -1674,8 +1724,9 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(const UNICHARSET &char_se
|
||||
while (s[offset] != '\0' && char_set.get_islower(s + offset, lengths[i])) {
|
||||
offset += lengths[i++];
|
||||
}
|
||||
if (i - leading_punct_count < quality_min_initial_alphas_reqd)
|
||||
if (i - leading_punct_count < quality_min_initial_alphas_reqd) {
|
||||
goto not_a_word;
|
||||
}
|
||||
/*
|
||||
Allow a single hyphen in a lower case word
|
||||
- don't trust upper case - I've seen several cases of "H" -> "I-I"
|
||||
@ -1687,8 +1738,9 @@ Allow a single hyphen in a lower case word
|
||||
while ((s[offset] != '\0') && char_set.get_islower(s + offset, lengths[i])) {
|
||||
offset += lengths[i++];
|
||||
}
|
||||
if (i < hyphen_pos + 3)
|
||||
if (i < hyphen_pos + 3) {
|
||||
goto not_a_word;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Allow "'s" in NON hyphenated lower case words */
|
||||
@ -1698,21 +1750,25 @@ Allow a single hyphen in a lower case word
|
||||
offset += lengths[i++];
|
||||
}
|
||||
}
|
||||
if (upper_count > 0)
|
||||
if (upper_count > 0) {
|
||||
word_type = AC_INITIAL_CAP;
|
||||
else
|
||||
} else {
|
||||
word_type = AC_LOWER_CASE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Up to two different, constrained trailing punctuation chars */
|
||||
if (lengths[i] == 1 && s[offset] != '\0' && chs_trailing_punct1.contains(s[offset]))
|
||||
if (lengths[i] == 1 && s[offset] != '\0' && chs_trailing_punct1.contains(s[offset])) {
|
||||
offset += lengths[i++];
|
||||
}
|
||||
if (lengths[i] == 1 && s[offset] != '\0' && i > 0 && s[offset - lengths[i - 1]] != s[offset] &&
|
||||
chs_trailing_punct2.contains(s[offset]))
|
||||
chs_trailing_punct2.contains(s[offset])) {
|
||||
offset += lengths[i++];
|
||||
}
|
||||
|
||||
if (s[offset] != '\0')
|
||||
if (s[offset] != '\0') {
|
||||
word_type = AC_UNACCEPTABLE;
|
||||
}
|
||||
|
||||
not_a_word:
|
||||
|
||||
@ -1735,8 +1791,9 @@ not_a_word:
|
||||
offset += lengths[i++];
|
||||
}
|
||||
}
|
||||
if (s[offset] != '\0')
|
||||
if (s[offset] != '\0') {
|
||||
word_type = AC_UNACCEPTABLE;
|
||||
}
|
||||
}
|
||||
|
||||
return word_type;
|
||||
@ -1746,15 +1803,17 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
|
||||
bool show_map_detail = false;
|
||||
int16_t i;
|
||||
|
||||
if (!test_pt)
|
||||
if (!test_pt) {
|
||||
return false;
|
||||
}
|
||||
|
||||
tessedit_rejection_debug.set_value(false);
|
||||
debug_x_ht_level.set_value(0);
|
||||
|
||||
if (word->word->bounding_box().contains(FCOORD(test_pt_x, test_pt_y))) {
|
||||
if (location < 0)
|
||||
if (location < 0) {
|
||||
return true; // For breakpoint use
|
||||
}
|
||||
tessedit_rejection_debug.set_value(true);
|
||||
debug_x_ht_level.set_value(2);
|
||||
tprintf("\n\nTESTWD::");
|
||||
@ -1857,14 +1916,16 @@ static void find_modal_font( // good chars in word
|
||||
void Tesseract::set_word_fonts(WERD_RES *word) {
|
||||
// Don't try to set the word fonts for an lstm word, as the configs
|
||||
// will be meaningless.
|
||||
if (word->chopped_word == nullptr)
|
||||
if (word->chopped_word == nullptr) {
|
||||
return;
|
||||
}
|
||||
ASSERT_HOST(word->best_choice != nullptr);
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
const int fontinfo_size = get_fontinfo_table().size();
|
||||
if (fontinfo_size == 0)
|
||||
if (fontinfo_size == 0) {
|
||||
return;
|
||||
}
|
||||
std::vector<int> font_total_score(fontinfo_size);
|
||||
|
||||
// Compute the font scores for the word
|
||||
@ -1873,8 +1934,9 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
|
||||
}
|
||||
for (int b = 0; b < word->best_choice->length(); ++b) {
|
||||
const BLOB_CHOICE *choice = word->GetBlobChoice(b);
|
||||
if (choice == nullptr)
|
||||
if (choice == nullptr) {
|
||||
continue;
|
||||
}
|
||||
auto &fonts = choice->fonts();
|
||||
for (auto &f : fonts) {
|
||||
const int fontinfo_id = f.fontinfo_id;
|
||||
@ -1945,8 +2007,9 @@ void Tesseract::font_recognition_pass(PAGE_RES *page_res) {
|
||||
int16_t doc_font; // modal font
|
||||
int8_t doc_font_count; // modal font
|
||||
find_modal_font(&doc_fonts, &doc_font, &doc_font_count);
|
||||
if (doc_font_count == 0)
|
||||
if (doc_font_count == 0) {
|
||||
return;
|
||||
}
|
||||
// Get the modal font pointer.
|
||||
const FontInfo *modal_font = nullptr;
|
||||
for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
|
||||
@ -1983,12 +2046,14 @@ void Tesseract::font_recognition_pass(PAGE_RES *page_res) {
|
||||
void Tesseract::dictionary_correction_pass(PAGE_RES *page_res) {
|
||||
PAGE_RES_IT word_it(page_res);
|
||||
for (WERD_RES *word = word_it.word(); word != nullptr; word = word_it.forward()) {
|
||||
if (word->best_choices.singleton())
|
||||
if (word->best_choices.singleton()) {
|
||||
continue; // There are no alternates.
|
||||
}
|
||||
|
||||
const WERD_CHOICE *best = word->best_choice;
|
||||
if (word->tesseract->getDict().valid_word(*best) != 0)
|
||||
if (word->tesseract->getDict().valid_word(*best) != 0) {
|
||||
continue; // The best choice is in the dictionary.
|
||||
}
|
||||
|
||||
WERD_CHOICE_IT choice_it(&word->best_choices);
|
||||
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) {
|
||||
|
@ -107,18 +107,20 @@ void Tesseract::unrej_good_chs(WERD_RES *word) {
|
||||
int16_t Tesseract::count_outline_errs(char c, int16_t outline_count) {
|
||||
int expected_outline_count;
|
||||
|
||||
if (outlines_odd.contains(c))
|
||||
if (outlines_odd.contains(c)) {
|
||||
return 0; // Don't use this char
|
||||
else if (outlines_2.contains(c))
|
||||
} else if (outlines_2.contains(c)) {
|
||||
expected_outline_count = 2;
|
||||
else
|
||||
} else {
|
||||
expected_outline_count = 1;
|
||||
}
|
||||
return abs(outline_count - expected_outline_count);
|
||||
}
|
||||
|
||||
void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it, bool good_quality_doc) {
|
||||
if ((tessedit_good_quality_unrej && good_quality_doc))
|
||||
if ((tessedit_good_quality_unrej && good_quality_doc)) {
|
||||
unrej_good_quality_words(page_res_it);
|
||||
}
|
||||
doc_and_block_rejection(page_res_it, good_quality_doc);
|
||||
if (unlv_tilde_crunching) {
|
||||
tilde_crunch(page_res_it);
|
||||
@ -150,8 +152,9 @@ void Tesseract::unrej_good_quality_words( // unreject potential
|
||||
if (bland_unrej) {
|
||||
word = page_res_it.word();
|
||||
for (i = 0; i < word->reject_map.length(); i++) {
|
||||
if (word->reject_map[i].accept_if_good_quality())
|
||||
if (word->reject_map[i].accept_if_good_quality()) {
|
||||
word->reject_map[i].setrej_quality_accept();
|
||||
}
|
||||
}
|
||||
page_res_it.forward();
|
||||
} else if ((page_res_it.row()->char_count > 0) &&
|
||||
@ -169,8 +172,9 @@ void Tesseract::unrej_good_quality_words( // unreject potential
|
||||
} else {
|
||||
// Skip to end of dodgy row.
|
||||
current_row = page_res_it.row();
|
||||
while ((page_res_it.word() != nullptr) && (page_res_it.row() == current_row))
|
||||
while ((page_res_it.word() != nullptr) && (page_res_it.row() == current_row)) {
|
||||
page_res_it.forward();
|
||||
}
|
||||
}
|
||||
check_debug_pt(page_res_it.word(), 110);
|
||||
}
|
||||
@ -265,8 +269,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
|
||||
generated more space errors.
|
||||
*/
|
||||
if (tessedit_use_reject_spaces && prev_word_rejected &&
|
||||
page_res_it.prev_row() == page_res_it.row() && word->word->space() == 1)
|
||||
page_res_it.prev_row() == page_res_it.row() && word->word->space() == 1) {
|
||||
word->reject_spaces = true;
|
||||
}
|
||||
word->reject_map.rej_word_block_rej();
|
||||
}
|
||||
prev_word_rejected = rej_word;
|
||||
@ -326,8 +331,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
|
||||
this generated more space errors.
|
||||
*/
|
||||
if (tessedit_use_reject_spaces && prev_word_rejected &&
|
||||
page_res_it.prev_row() == page_res_it.row() && word->word->space() == 1)
|
||||
page_res_it.prev_row() == page_res_it.row() && word->word->space() == 1) {
|
||||
word->reject_spaces = true;
|
||||
}
|
||||
word->reject_map.rej_word_row_rej();
|
||||
}
|
||||
prev_word_rejected = rej_word;
|
||||
@ -338,8 +344,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
|
||||
tprintf("NOT REJECTING ROW %d #chars: %d # Rejects: %d; \n", row_no,
|
||||
current_row->char_count, current_row->rej_count);
|
||||
}
|
||||
while (page_res_it.word() != nullptr && page_res_it.row() == current_row)
|
||||
while (page_res_it.word() != nullptr && page_res_it.row() == current_row) {
|
||||
page_res_it.forward();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -380,11 +387,13 @@ void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) {
|
||||
}
|
||||
word = page_res_it.word();
|
||||
|
||||
if (crunch_early_convert_bad_unlv_chs)
|
||||
if (crunch_early_convert_bad_unlv_chs) {
|
||||
convert_bad_unlv_chs(word);
|
||||
}
|
||||
|
||||
if (crunch_early_merge_tess_fails)
|
||||
if (crunch_early_merge_tess_fails) {
|
||||
word->merge_tess_fails();
|
||||
}
|
||||
|
||||
if (word->reject_map.accept_count() != 0) {
|
||||
found_terrible_word = false;
|
||||
@ -445,22 +454,25 @@ bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level
|
||||
|
||||
if ((word->best_choice->unichar_string().length() == 0) ||
|
||||
(strspn(word->best_choice->unichar_string().c_str(), " ") ==
|
||||
word->best_choice->unichar_string().size()))
|
||||
word->best_choice->unichar_string().size())) {
|
||||
crunch_mode = 1;
|
||||
else {
|
||||
} else {
|
||||
adjusted_len = word->reject_map.length();
|
||||
if (adjusted_len > crunch_rating_max)
|
||||
if (adjusted_len > crunch_rating_max) {
|
||||
adjusted_len = crunch_rating_max;
|
||||
}
|
||||
rating_per_ch = word->best_choice->rating() / adjusted_len;
|
||||
|
||||
if (rating_per_ch > crunch_terrible_rating)
|
||||
if (rating_per_ch > crunch_terrible_rating) {
|
||||
crunch_mode = 2;
|
||||
else if (crunch_terrible_garbage && (garbage_level == G_TERRIBLE))
|
||||
} else if (crunch_terrible_garbage && (garbage_level == G_TERRIBLE)) {
|
||||
crunch_mode = 3;
|
||||
else if ((word->best_choice->certainty() < crunch_poor_garbage_cert) && (garbage_level != G_OK))
|
||||
} else if ((word->best_choice->certainty() < crunch_poor_garbage_cert) &&
|
||||
(garbage_level != G_OK)) {
|
||||
crunch_mode = 4;
|
||||
else if ((rating_per_ch > crunch_poor_garbage_rate) && (garbage_level != G_OK))
|
||||
} else if ((rating_per_ch > crunch_poor_garbage_rate) && (garbage_level != G_OK)) {
|
||||
crunch_mode = 5;
|
||||
}
|
||||
}
|
||||
if (crunch_mode > 0) {
|
||||
if (crunch_debug > 2) {
|
||||
@ -468,8 +480,9 @@ bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level
|
||||
word->best_choice->unichar_string().c_str());
|
||||
}
|
||||
return true;
|
||||
} else
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool Tesseract::potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level,
|
||||
@ -486,8 +499,9 @@ bool Tesseract::potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_leve
|
||||
(acceptable_word_string(*word->uch_set, str, lengths) == AC_UNACCEPTABLE && !ok_dict_word);
|
||||
|
||||
adjusted_len = word->reject_map.length();
|
||||
if (adjusted_len > 10)
|
||||
if (adjusted_len > 10) {
|
||||
adjusted_len = 10;
|
||||
}
|
||||
rating_per_ch = word->best_choice->rating() / adjusted_len;
|
||||
|
||||
if (rating_per_ch > crunch_pot_poor_rate) {
|
||||
@ -570,8 +584,9 @@ void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {
|
||||
The following step has been left till now as the tess fails are used to
|
||||
determine if the word is deletable.
|
||||
*/
|
||||
if (!crunch_early_merge_tess_fails)
|
||||
if (!crunch_early_merge_tess_fails) {
|
||||
word->merge_tess_fails();
|
||||
}
|
||||
page_res_it.forward();
|
||||
}
|
||||
}
|
||||
@ -585,13 +600,15 @@ void Tesseract::convert_bad_unlv_chs(WERD_RES *word_res) {
|
||||
for (i = 0; i < word_res->reject_map.length(); ++i) {
|
||||
if (word_res->best_choice->unichar_id(i) == unichar_tilde) {
|
||||
word_res->best_choice->set_unichar_id(unichar_dash, i);
|
||||
if (word_res->reject_map[i].accepted())
|
||||
if (word_res->reject_map[i].accepted()) {
|
||||
word_res->reject_map[i].setrej_unlv_rej();
|
||||
}
|
||||
}
|
||||
if (word_res->best_choice->unichar_id(i) == unichar_pow) {
|
||||
word_res->best_choice->set_unichar_id(unichar_space, i);
|
||||
if (word_res->reject_map[i].accepted())
|
||||
if (word_res->reject_map[i].accepted()) {
|
||||
word_res->reject_map[i].setrej_unlv_rej();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -635,8 +652,9 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
|
||||
case FIRST_UPPER:
|
||||
state = SUBSEQUENT_UPPER;
|
||||
upper_string_count++;
|
||||
if (longest_upper_run_len < upper_string_count)
|
||||
if (longest_upper_run_len < upper_string_count) {
|
||||
longest_upper_run_len = upper_string_count;
|
||||
}
|
||||
if (last_char == word->uch_set->unichar_to_id(str, *lengths)) {
|
||||
alpha_repetition_count++;
|
||||
if (longest_alpha_repetition_count < alpha_repetition_count) {
|
||||
@ -664,8 +682,9 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
|
||||
case FIRST_LOWER:
|
||||
state = SUBSEQUENT_LOWER;
|
||||
lower_string_count++;
|
||||
if (longest_lower_run_len < lower_string_count)
|
||||
if (longest_lower_run_len < lower_string_count) {
|
||||
longest_lower_run_len = lower_string_count;
|
||||
}
|
||||
if (last_char == word->uch_set->unichar_to_id(str, *lengths)) {
|
||||
alpha_repetition_count++;
|
||||
if (longest_alpha_repetition_count < alpha_repetition_count) {
|
||||
@ -702,10 +721,11 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (*lengths == 1 && *str == ' ')
|
||||
if (*lengths == 1 && *str == ' ') {
|
||||
tess_rejs++;
|
||||
else
|
||||
} else {
|
||||
bad_char_count++;
|
||||
}
|
||||
switch (state) {
|
||||
case FIRST_NUM:
|
||||
isolated_digits++;
|
||||
@ -740,16 +760,18 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
|
||||
if ((crunch_accept_ok &&
|
||||
acceptable_word_string(*word->uch_set, str, lengths) != AC_UNACCEPTABLE) ||
|
||||
longest_lower_run_len > crunch_leave_lc_strings ||
|
||||
longest_upper_run_len > crunch_leave_uc_strings)
|
||||
longest_upper_run_len > crunch_leave_uc_strings) {
|
||||
return G_NEVER_CRUNCH;
|
||||
}
|
||||
}
|
||||
if (word->reject_map.length() > 1 && strpbrk(str, " ") == nullptr &&
|
||||
(word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
|
||||
word->best_choice->permuter() == FREQ_DAWG_PERM ||
|
||||
word->best_choice->permuter() == USER_DAWG_PERM ||
|
||||
word->best_choice->permuter() == NUMBER_PERM ||
|
||||
acceptable_word_string(*word->uch_set, str, lengths) != AC_UNACCEPTABLE || ok_dict_word))
|
||||
acceptable_word_string(*word->uch_set, str, lengths) != AC_UNACCEPTABLE || ok_dict_word)) {
|
||||
return G_OK;
|
||||
}
|
||||
|
||||
ok_chars = len - bad_char_count - isolated_digits - isolated_alphas - tess_rejs;
|
||||
|
||||
@ -759,24 +781,28 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
|
||||
isolated_digits, isolated_alphas, tess_rejs);
|
||||
}
|
||||
if (bad_char_count == 0 && tess_rejs == 0 &&
|
||||
(len > isolated_digits + isolated_alphas || len <= 2))
|
||||
(len > isolated_digits + isolated_alphas || len <= 2)) {
|
||||
return G_OK;
|
||||
}
|
||||
|
||||
if (tess_rejs > ok_chars || (tess_rejs > 0 && (bad_char_count + tess_rejs) * 2 > len))
|
||||
if (tess_rejs > ok_chars || (tess_rejs > 0 && (bad_char_count + tess_rejs) * 2 > len)) {
|
||||
return G_TERRIBLE;
|
||||
}
|
||||
|
||||
if (len > 4) {
|
||||
dodgy_chars = 2 * tess_rejs + bad_char_count + isolated_digits + isolated_alphas;
|
||||
if (dodgy_chars > 5 || (dodgy_chars / static_cast<float>(len)) > 0.5)
|
||||
if (dodgy_chars > 5 || (dodgy_chars / static_cast<float>(len)) > 0.5) {
|
||||
return G_DODGY;
|
||||
else
|
||||
} else {
|
||||
return G_OK;
|
||||
}
|
||||
} else {
|
||||
dodgy_chars = 2 * tess_rejs + bad_char_count;
|
||||
if ((len == 4 && dodgy_chars > 2) || (len == 3 && dodgy_chars > 2) || dodgy_chars >= len)
|
||||
if ((len == 4 && dodgy_chars > 2) || (len == 3 && dodgy_chars > 2) || dodgy_chars >= len) {
|
||||
return G_DODGY;
|
||||
else
|
||||
} else {
|
||||
return G_OK;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -871,8 +897,9 @@ int16_t Tesseract::failure_count(WERD_RES *word) {
|
||||
int tess_rejs = 0;
|
||||
|
||||
for (; *str != '\0'; str++) {
|
||||
if (*str == ' ')
|
||||
if (*str == ' ') {
|
||||
tess_rejs++;
|
||||
}
|
||||
}
|
||||
return tess_rejs;
|
||||
}
|
||||
@ -889,12 +916,14 @@ bool Tesseract::noise_outlines(TWERD *word) {
|
||||
for (TESSLINE *ol = blob->outlines; ol != nullptr; ol = ol->next) {
|
||||
outline_count++;
|
||||
box = ol->bounding_box();
|
||||
if (box.height() > box.width())
|
||||
if (box.height() > box.width()) {
|
||||
max_dimension = box.height();
|
||||
else
|
||||
} else {
|
||||
max_dimension = box.width();
|
||||
if (max_dimension < small_limit)
|
||||
}
|
||||
if (max_dimension < small_limit) {
|
||||
small_outline_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return small_outline_count >= outline_count;
|
||||
|
@ -105,16 +105,18 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R
|
||||
monitor->progress = 90 + 5 * word_index / word_count;
|
||||
if (monitor->deadline_exceeded() ||
|
||||
(monitor->cancel != nullptr &&
|
||||
(*monitor->cancel)(monitor->cancel_this, stats_.dict_words)))
|
||||
(*monitor->cancel)(monitor->cancel_this, stats_.dict_words))) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!word_res_it_from.at_last()) {
|
||||
word_res_it_to = word_res_it_from;
|
||||
prevent_null_wd_fixsp = word_res->word->cblob_list()->empty();
|
||||
if (check_debug_pt(word_res, 60))
|
||||
if (check_debug_pt(word_res, 60)) {
|
||||
debug_fix_space_level.set_value(10);
|
||||
}
|
||||
word_res_it_to.forward();
|
||||
word_index++;
|
||||
if (monitor != nullptr) {
|
||||
@ -122,22 +124,27 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R
|
||||
monitor->progress = 90 + 5 * word_index / word_count;
|
||||
if (monitor->deadline_exceeded() ||
|
||||
(monitor->cancel != nullptr &&
|
||||
(*monitor->cancel)(monitor->cancel_this, stats_.dict_words)))
|
||||
(*monitor->cancel)(monitor->cancel_this, stats_.dict_words))) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
while (!word_res_it_to.at_last() &&
|
||||
(word_res_it_to.data_relative(1)->word->flag(W_FUZZY_NON) ||
|
||||
word_res_it_to.data_relative(1)->word->flag(W_FUZZY_SP))) {
|
||||
if (check_debug_pt(word_res, 60))
|
||||
if (check_debug_pt(word_res, 60)) {
|
||||
debug_fix_space_level.set_value(10);
|
||||
if (word_res->word->cblob_list()->empty())
|
||||
}
|
||||
if (word_res->word->cblob_list()->empty()) {
|
||||
prevent_null_wd_fixsp = true;
|
||||
}
|
||||
word_res = word_res_it_to.forward();
|
||||
}
|
||||
if (check_debug_pt(word_res, 60))
|
||||
if (check_debug_pt(word_res, 60)) {
|
||||
debug_fix_space_level.set_value(10);
|
||||
if (word_res->word->cblob_list()->empty())
|
||||
}
|
||||
if (word_res->word->cblob_list()->empty()) {
|
||||
prevent_null_wd_fixsp = true;
|
||||
}
|
||||
if (prevent_null_wd_fixsp) {
|
||||
word_res_it_from = word_res_it_to;
|
||||
} else {
|
||||
@ -150,8 +157,9 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R
|
||||
word_res_it_from.forward();
|
||||
}
|
||||
}
|
||||
if (test_pt)
|
||||
if (test_pt) {
|
||||
debug_fix_space_level.set_value(0);
|
||||
}
|
||||
}
|
||||
fix_sp_fp_word(word_res_it_from, row_res_it.data()->row, block_res_it.data()->block);
|
||||
// Last word in row
|
||||
@ -169,8 +177,9 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *
|
||||
best_score = eval_word_spacing(best_perm); // default score
|
||||
dump_words(best_perm, best_score, 1, improved);
|
||||
|
||||
if (best_score != PERFECT_WERDS)
|
||||
if (best_score != PERFECT_WERDS) {
|
||||
initialise_search(best_perm, current_perm);
|
||||
}
|
||||
|
||||
while ((best_score != PERFECT_WERDS) && !current_perm.empty()) {
|
||||
match_current_words(current_perm, row, block);
|
||||
@ -182,8 +191,9 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *
|
||||
best_score = current_score;
|
||||
improved = true;
|
||||
}
|
||||
if (current_score < PERFECT_WERDS)
|
||||
if (current_score < PERFECT_WERDS) {
|
||||
transform_to_next_perm(current_perm);
|
||||
}
|
||||
}
|
||||
dump_words(best_perm, best_score, 3, improved);
|
||||
}
|
||||
@ -268,8 +278,9 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
|
||||
word_count++;
|
||||
if (word->tess_failed) {
|
||||
total_score += prev_word_score;
|
||||
if (prev_word_done)
|
||||
if (prev_word_done) {
|
||||
done_word_count++;
|
||||
}
|
||||
prev_word_score = 0;
|
||||
prev_char_1 = false;
|
||||
prev_char_digit = false;
|
||||
@ -289,8 +300,9 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
|
||||
(!word_done &&
|
||||
conflict_set_I_l_1.contains(word->best_choice->unichar_string()[0])))))) {
|
||||
total_score += prev_word_score;
|
||||
if (prev_word_done)
|
||||
if (prev_word_done) {
|
||||
done_word_count++;
|
||||
}
|
||||
current_word_ok_so_far = word_done;
|
||||
}
|
||||
|
||||
@ -306,8 +318,9 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
|
||||
rejtn */
|
||||
for (i = 0, prev_char_1 = false; i < word_len; i++) {
|
||||
bool current_char_1 = word->best_choice->unichar_string()[i] == '1';
|
||||
if (prev_char_1 || (current_char_1 && (i > 0)))
|
||||
if (prev_char_1 || (current_char_1 && (i > 0))) {
|
||||
total_score++;
|
||||
}
|
||||
prev_char_1 = current_char_1;
|
||||
}
|
||||
|
||||
@ -318,14 +331,17 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
|
||||
offset += word->best_choice->unichar_lengths()[i++]) {
|
||||
bool current_char_punct =
|
||||
strchr(punct_chars, word->best_choice->unichar_string()[offset]) != nullptr;
|
||||
if (prev_char_punct || (current_char_punct && i > 0))
|
||||
if (prev_char_punct || (current_char_punct && i > 0)) {
|
||||
total_score++;
|
||||
}
|
||||
prev_char_punct = current_char_punct;
|
||||
}
|
||||
}
|
||||
prev_char_digit = digit_or_numeric_punct(word, word_len - 1);
|
||||
for (i = 0, offset = 0; i < word_len - 1; offset += word->best_choice->unichar_lengths()[i++])
|
||||
for (i = 0, offset = 0; i < word_len - 1;
|
||||
offset += word->best_choice->unichar_lengths()[i++]) {
|
||||
;
|
||||
}
|
||||
prev_char_1 =
|
||||
((word_done && (word->best_choice->unichar_string()[offset] == '1')) ||
|
||||
(!word_done &&
|
||||
@ -337,20 +353,23 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
|
||||
} while (word_res_it.data()->part_of_combo);
|
||||
} while (!word_res_it.at_first());
|
||||
total_score += prev_word_score;
|
||||
if (prev_word_done)
|
||||
if (prev_word_done) {
|
||||
done_word_count++;
|
||||
if (done_word_count == word_count)
|
||||
}
|
||||
if (done_word_count == word_count) {
|
||||
return PERFECT_WERDS;
|
||||
else
|
||||
} else {
|
||||
return total_score;
|
||||
}
|
||||
}
|
||||
|
||||
bool Tesseract::digit_or_numeric_punct(WERD_RES *word, int char_position) {
|
||||
int i;
|
||||
int offset;
|
||||
|
||||
for (i = 0, offset = 0; i < char_position; offset += word->best_choice->unichar_lengths()[i++])
|
||||
for (i = 0, offset = 0; i < char_position; offset += word->best_choice->unichar_lengths()[i++]) {
|
||||
;
|
||||
}
|
||||
return (
|
||||
word->uch_set->get_isdigit(word->best_choice->unichar_string().c_str() + offset,
|
||||
word->best_choice->unichar_lengths()[i]) ||
|
||||
@ -387,8 +406,9 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
|
||||
box = word->word->bounding_box();
|
||||
if (prev_right > -INT16_MAX) {
|
||||
gap = box.left() - prev_right;
|
||||
if (gap < min_gap)
|
||||
if (gap < min_gap) {
|
||||
min_gap = gap;
|
||||
}
|
||||
}
|
||||
prev_right = box.right();
|
||||
}
|
||||
@ -492,8 +512,9 @@ void Tesseract::dump_words(WERD_RES_LIST &perm, int16_t score, int16_t mode, boo
|
||||
}
|
||||
|
||||
bool Tesseract::fixspace_thinks_word_done(WERD_RES *word) {
|
||||
if (word->done)
|
||||
if (word->done) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
Use all the standard pass 2 conditions for mode 5 in set_done() in
|
||||
@ -531,12 +552,14 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
|
||||
|
||||
word_res = word_res_it.data();
|
||||
if (word_res->word->flag(W_REP_CHAR) || word_res->combination || word_res->part_of_combo ||
|
||||
!word_res->word->flag(W_DONT_CHOP))
|
||||
!word_res->word->flag(W_DONT_CHOP)) {
|
||||
return;
|
||||
}
|
||||
|
||||
blob_index = worst_noise_blob(word_res, &junk);
|
||||
if (blob_index < 0)
|
||||
if (blob_index < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (debug_fix_space_level > 1) {
|
||||
tprintf("FP fixspace working on \"%s\"\n", word_res->best_choice->unichar_string().c_str());
|
||||
@ -669,35 +692,41 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, float *worst_noise_score
|
||||
float small_limit = kBlnXHeight * fixsp_small_outlines_size;
|
||||
float non_noise_limit = kBlnXHeight * 0.8;
|
||||
|
||||
if (word_res->rebuild_word == nullptr)
|
||||
if (word_res->rebuild_word == nullptr) {
|
||||
return -1; // Can't handle cube words.
|
||||
}
|
||||
|
||||
// Normalised.
|
||||
int blob_count = word_res->box_word->length();
|
||||
ASSERT_HOST(blob_count <= 512);
|
||||
if (blob_count < 5)
|
||||
if (blob_count < 5) {
|
||||
return -1; // too short to split
|
||||
}
|
||||
|
||||
/* Get the noise scores for all blobs */
|
||||
|
||||
#ifndef SECURE_NAMES
|
||||
if (debug_fix_space_level > 5)
|
||||
if (debug_fix_space_level > 5) {
|
||||
tprintf("FP fixspace Noise metrics for \"%s\": ",
|
||||
word_res->best_choice->unichar_string().c_str());
|
||||
}
|
||||
#endif
|
||||
|
||||
for (i = 0; i < blob_count && i < word_res->rebuild_word->NumBlobs(); i++) {
|
||||
TBLOB *blob = word_res->rebuild_word->blobs[i];
|
||||
if (word_res->reject_map[i].accepted())
|
||||
if (word_res->reject_map[i].accepted()) {
|
||||
noise_score[i] = non_noise_limit;
|
||||
else
|
||||
} else {
|
||||
noise_score[i] = blob_noise_score(blob);
|
||||
}
|
||||
|
||||
if (debug_fix_space_level > 5)
|
||||
if (debug_fix_space_level > 5) {
|
||||
tprintf("%1.1f ", noise_score[i]);
|
||||
}
|
||||
}
|
||||
if (debug_fix_space_level > 5)
|
||||
if (debug_fix_space_level > 5) {
|
||||
tprintf("\n");
|
||||
}
|
||||
|
||||
/* Now find the worst one which is far enough away from the end of the word */
|
||||
|
||||
@ -707,8 +736,9 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, float *worst_noise_score
|
||||
non_noise_count++;
|
||||
}
|
||||
}
|
||||
if (non_noise_count < fixsp_non_noise_limit)
|
||||
if (non_noise_count < fixsp_non_noise_limit) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
min_noise_blob = i;
|
||||
|
||||
@ -718,13 +748,15 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, float *worst_noise_score
|
||||
non_noise_count++;
|
||||
}
|
||||
}
|
||||
if (non_noise_count < fixsp_non_noise_limit)
|
||||
if (non_noise_count < fixsp_non_noise_limit) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
max_noise_blob = i;
|
||||
|
||||
if (min_noise_blob > max_noise_blob)
|
||||
if (min_noise_blob > max_noise_blob) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
*worst_noise_score = small_limit;
|
||||
worst_noise_blob = -1;
|
||||
@ -752,8 +784,9 @@ float Tesseract::blob_noise_score(TBLOB *blob) {
|
||||
max_dimension = box.width();
|
||||
}
|
||||
|
||||
if (largest_outline_dimension < max_dimension)
|
||||
if (largest_outline_dimension < max_dimension) {
|
||||
largest_outline_dimension = max_dimension;
|
||||
}
|
||||
}
|
||||
|
||||
if (outline_count > 5) {
|
||||
@ -810,8 +843,9 @@ int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
|
||||
|
||||
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
|
||||
word = word_it.data();
|
||||
if (word->rebuild_word == nullptr)
|
||||
if (word->rebuild_word == nullptr) {
|
||||
continue; // Can't handle cube words.
|
||||
}
|
||||
if (word->done || word->tess_accepted || word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
|
||||
word->best_choice->permuter() == FREQ_DAWG_PERM ||
|
||||
word->best_choice->permuter() == USER_DAWG_PERM || safe_dict_word(word) > 0) {
|
||||
@ -827,8 +861,9 @@ int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (score < 0)
|
||||
if (score < 0) {
|
||||
score = 0;
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
|
@ -76,16 +76,19 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) {
|
||||
UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id);
|
||||
if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) {
|
||||
int top = blob->bounding_box().top();
|
||||
if (top >= INT_FEAT_RANGE)
|
||||
if (top >= INT_FEAT_RANGE) {
|
||||
top = INT_FEAT_RANGE - 1;
|
||||
}
|
||||
int min_bottom, max_bottom, min_top, max_top;
|
||||
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, &min_top, &max_top);
|
||||
if (max_top - min_top > kMaxCharTopRange)
|
||||
if (max_top - min_top > kMaxCharTopRange) {
|
||||
continue;
|
||||
}
|
||||
bool bad =
|
||||
top < min_top - x_ht_acceptance_tolerance || top > max_top + x_ht_acceptance_tolerance;
|
||||
if (bad)
|
||||
if (bad) {
|
||||
++bad_blobs;
|
||||
}
|
||||
if (debug_x_ht_level >= 1) {
|
||||
tprintf("Class %s is %s with top %d vs limits of %d->%d, +/-%d\n",
|
||||
unicharset.id_to_unichar(class_id), bad ? "Misfit" : "OK", top, min_top, max_top,
|
||||
@ -112,14 +115,16 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
|
||||
if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) {
|
||||
int top = blob->bounding_box().top() + bottom_shift;
|
||||
// Clip the top to the limit of normalized feature space.
|
||||
if (top >= INT_FEAT_RANGE)
|
||||
if (top >= INT_FEAT_RANGE) {
|
||||
top = INT_FEAT_RANGE - 1;
|
||||
}
|
||||
int bottom = blob->bounding_box().bottom() + bottom_shift;
|
||||
int min_bottom, max_bottom, min_top, max_top;
|
||||
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, &min_top, &max_top);
|
||||
// Chars with a wild top range would mess up the result so ignore them.
|
||||
if (max_top - min_top > kMaxCharTopRange)
|
||||
if (max_top - min_top > kMaxCharTopRange) {
|
||||
continue;
|
||||
}
|
||||
int misfit_dist = std::max((min_top - x_ht_acceptance_tolerance) - top,
|
||||
top - (max_top + x_ht_acceptance_tolerance));
|
||||
int height = top - kBlnBaselineOffset;
|
||||
@ -142,8 +147,9 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
|
||||
}
|
||||
// The range of expected heights gets a vote equal to the distance
|
||||
// of the actual top from the expected top.
|
||||
for (int y = min_xht; y <= max_xht; ++y)
|
||||
for (int y = min_xht; y <= max_xht; ++y) {
|
||||
top_stats.add(y, misfit_dist);
|
||||
}
|
||||
} else if ((min_bottom > bottom + x_ht_acceptance_tolerance ||
|
||||
bottom - x_ht_acceptance_tolerance > max_bottom) &&
|
||||
bottom_shift == 0) {
|
||||
@ -157,10 +163,12 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
|
||||
// of the actual bottom from the expected bottom, spread over the
|
||||
// range of its acceptance.
|
||||
int misfit_weight = abs(min_shift);
|
||||
if (max_shift > min_shift)
|
||||
if (max_shift > min_shift) {
|
||||
misfit_weight /= max_shift - min_shift;
|
||||
for (int y = min_shift; y <= max_shift; ++y)
|
||||
}
|
||||
for (int y = min_shift; y <= max_shift; ++y) {
|
||||
shift_stats.add(y, misfit_weight);
|
||||
}
|
||||
} else {
|
||||
if (bottom_shift == 0) {
|
||||
// Things with bottoms that are already ok need to say so, on the
|
||||
@ -185,8 +193,9 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
|
||||
if (debug_x_ht_level >= 2) {
|
||||
tprintf("baseline shift=%g\n", *baseline_shift);
|
||||
}
|
||||
if (top_stats.get_total() == 0)
|
||||
if (top_stats.get_total() == 0) {
|
||||
return bottom_shift != 0 ? word_res->x_height : 0.0f;
|
||||
}
|
||||
// The new xheight is just the median vote, which is then scaled out
|
||||
// of BLN space back to pixel space to get the x-height in pixel space.
|
||||
float new_xht = top_stats.median();
|
||||
@ -196,10 +205,11 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
|
||||
new_xht / word_res->denorm.y_scale());
|
||||
}
|
||||
// The xheight must change by at least x_ht_min_change to be used.
|
||||
if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change)
|
||||
if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change) {
|
||||
return new_xht / word_res->denorm.y_scale();
|
||||
else
|
||||
} else {
|
||||
return bottom_shift != 0 ? word_res->x_height : 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -80,8 +80,9 @@ void Tesseract::TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector
|
||||
unsigned end_box = 0;
|
||||
// Don't let \t, which marks newlines in the box file, get into the line
|
||||
// content, as that makes the line unusable in training.
|
||||
while (end_box < texts.size() && texts[end_box] == "\t")
|
||||
while (end_box < texts.size() && texts[end_box] == "\t") {
|
||||
++end_box;
|
||||
}
|
||||
for (auto start_box = end_box; start_box < box_count; start_box = end_box) {
|
||||
// Find the textline of boxes starting at start and their bounding box.
|
||||
TBOX line_box = boxes[start_box];
|
||||
@ -96,8 +97,9 @@ void Tesseract::TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector
|
||||
BLOCK_IT b_it(block_list);
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
BLOCK *block = b_it.data();
|
||||
if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText())
|
||||
if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
|
||||
continue; // Not a text block.
|
||||
}
|
||||
TBOX block_box = block->pdblk.bounding_box();
|
||||
block_box.rotate(block->re_rotation());
|
||||
if (block_box.major_overlap(line_box)) {
|
||||
@ -114,12 +116,14 @@ void Tesseract::TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector
|
||||
} else {
|
||||
imagedata = GetLineData(line_box, boxes, texts, start_box, end_box, *best_block);
|
||||
}
|
||||
if (imagedata != nullptr)
|
||||
if (imagedata != nullptr) {
|
||||
training_data->AddPageToDocument(imagedata);
|
||||
}
|
||||
// Don't let \t, which marks newlines in the box file, get into the line
|
||||
// content, as that makes the line unusable in training.
|
||||
while (end_box < texts.size() && texts[end_box] == "\t")
|
||||
while (end_box < texts.size() && texts[end_box] == "\t") {
|
||||
++end_box;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,8 +135,9 @@ ImageData *Tesseract::GetLineData(const TBOX &line_box, const std::vector<TBOX>
|
||||
const BLOCK &block) {
|
||||
TBOX revised_box;
|
||||
ImageData *image_data = GetRectImage(line_box, block, kImagePadding, &revised_box);
|
||||
if (image_data == nullptr)
|
||||
if (image_data == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
image_data->set_page_number(applybox_page);
|
||||
// Copy the boxes and shift them so they are relative to the image.
|
||||
FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y());
|
||||
@ -166,16 +171,18 @@ ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padd
|
||||
// Number of clockwise 90 degree rotations needed to get back to tesseract
|
||||
// coords from the clipped image.
|
||||
int num_rotations = 0;
|
||||
if (block.re_rotation().y() > 0.0f)
|
||||
if (block.re_rotation().y() > 0.0f) {
|
||||
num_rotations = 1;
|
||||
else if (block.re_rotation().x() < 0.0f)
|
||||
} else if (block.re_rotation().x() < 0.0f) {
|
||||
num_rotations = 2;
|
||||
else if (block.re_rotation().y() < 0.0f)
|
||||
} else if (block.re_rotation().y() < 0.0f) {
|
||||
num_rotations = 3;
|
||||
}
|
||||
// Handle two cases automatically: 1 the box came from the block, 2 the box
|
||||
// came from a box file, and refers to the image, which the block may not.
|
||||
if (block.pdblk.bounding_box().major_overlap(*revised_box))
|
||||
if (block.pdblk.bounding_box().major_overlap(*revised_box)) {
|
||||
revised_box->rotate(block.re_rotation());
|
||||
}
|
||||
// Now revised_box always refers to the image.
|
||||
// BestPix is never colormapped, but may be of any depth.
|
||||
Pix *pix = BestPix();
|
||||
@ -184,14 +191,16 @@ ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padd
|
||||
TBOX image_box(0, 0, width, height);
|
||||
// Clip to image bounds;
|
||||
*revised_box &= image_box;
|
||||
if (revised_box->null_box())
|
||||
if (revised_box->null_box()) {
|
||||
return nullptr;
|
||||
}
|
||||
Box *clip_box = boxCreate(revised_box->left(), height - revised_box->top(), revised_box->width(),
|
||||
revised_box->height());
|
||||
Pix *box_pix = pixClipRectangle(pix, clip_box, nullptr);
|
||||
boxDestroy(&clip_box);
|
||||
if (box_pix == nullptr)
|
||||
if (box_pix == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
if (num_rotations > 0) {
|
||||
Pix *rot_pix = pixRotateOrth(box_pix, num_rotations);
|
||||
pixDestroy(&box_pix);
|
||||
@ -210,8 +219,9 @@ ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padd
|
||||
// Rotated the clipped revised box back to internal coordinates.
|
||||
FCOORD rotation(block.re_rotation().x(), -block.re_rotation().y());
|
||||
revised_box->rotate(rotation);
|
||||
if (num_rotations != 2)
|
||||
if (num_rotations != 2) {
|
||||
vertical_text = true;
|
||||
}
|
||||
}
|
||||
return new ImageData(vertical_text, box_pix);
|
||||
}
|
||||
@ -228,14 +238,17 @@ void Tesseract::LSTMRecognizeWord(const BLOCK &block, ROW *row, WERD_RES *word,
|
||||
word_box = TBOX(0, 0, ImageWidth(), ImageHeight());
|
||||
} else {
|
||||
float baseline = row->base_line((word_box.left() + word_box.right()) / 2);
|
||||
if (baseline + row->descenders() < word_box.bottom())
|
||||
if (baseline + row->descenders() < word_box.bottom()) {
|
||||
word_box.set_bottom(baseline + row->descenders());
|
||||
if (baseline + row->x_height() + row->ascenders() > word_box.top())
|
||||
}
|
||||
if (baseline + row->x_height() + row->ascenders() > word_box.top()) {
|
||||
word_box.set_top(baseline + row->x_height() + row->ascenders());
|
||||
}
|
||||
}
|
||||
ImageData *im_data = GetRectImage(word_box, block, kImagePadding, &word_box);
|
||||
if (im_data == nullptr)
|
||||
if (im_data == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
bool do_invert = tessedit_do_invert;
|
||||
lstm_recognizer_->RecognizeLine(*im_data, do_invert, classify_debug_level > 0,
|
||||
@ -254,8 +267,9 @@ void Tesseract::SearchWords(PointerVector<WERD_RES> *words) {
|
||||
// If we drop a word as junk, then there is always a space in front of the
|
||||
// next.
|
||||
const Dict *stopper_dict = lstm_recognizer_->GetDict();
|
||||
if (stopper_dict == nullptr)
|
||||
if (stopper_dict == nullptr) {
|
||||
stopper_dict = &getDict();
|
||||
}
|
||||
bool any_nonspace_delimited = false;
|
||||
for (int w = 0; w < words->size(); ++w) {
|
||||
WERD_RES *word = (*words)[w];
|
||||
|
@ -42,8 +42,9 @@ LTRResultIterator::~LTRResultIterator() = default;
|
||||
// Returns the null terminated UTF-8 encoded text string for the current
|
||||
// object at the given level. Use delete [] to free after use.
|
||||
char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
|
||||
if (it_->word() == nullptr)
|
||||
if (it_->word() == nullptr) {
|
||||
return nullptr; // Already at the end!
|
||||
}
|
||||
std::string text;
|
||||
PAGE_RES_IT res_it(*it_);
|
||||
WERD_CHOICE *best_choice = res_it.word()->best_choice;
|
||||
@ -70,8 +71,9 @@ char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
|
||||
eop = res_it.block() != res_it.prev_block() ||
|
||||
res_it.row()->row->para() != res_it.prev_row()->row->para();
|
||||
} while (level != RIL_TEXTLINE && !eop);
|
||||
if (eop)
|
||||
if (eop) {
|
||||
text += paragraph_separator_;
|
||||
}
|
||||
} while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
|
||||
}
|
||||
int length = text.length() + 1;
|
||||
@ -93,8 +95,9 @@ void LTRResultIterator::SetParagraphSeparator(const char *new_para) {
|
||||
// Returns the mean confidence of the current object at the given level.
|
||||
// The number should be interpreted as a percent probability. (0.0f-100.0f)
|
||||
float LTRResultIterator::Confidence(PageIteratorLevel level) const {
|
||||
if (it_->word() == nullptr)
|
||||
if (it_->word() == nullptr) {
|
||||
return 0.0f; // Already at the end!
|
||||
}
|
||||
float mean_certainty = 0.0f;
|
||||
int certainty_count = 0;
|
||||
PAGE_RES_IT res_it(*it_);
|
||||
@ -208,45 +211,53 @@ const char *LTRResultIterator::WordFontAttributes(bool *is_bold, bool *is_italic
|
||||
|
||||
// Returns the name of the language used to recognize this word.
|
||||
const char *LTRResultIterator::WordRecognitionLanguage() const {
|
||||
if (it_->word() == nullptr || it_->word()->tesseract == nullptr)
|
||||
if (it_->word() == nullptr || it_->word()->tesseract == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return it_->word()->tesseract->lang.c_str();
|
||||
}
|
||||
|
||||
// Return the overall directionality of this word.
|
||||
StrongScriptDirection LTRResultIterator::WordDirection() const {
|
||||
if (it_->word() == nullptr)
|
||||
if (it_->word() == nullptr) {
|
||||
return DIR_NEUTRAL;
|
||||
}
|
||||
bool has_rtl = it_->word()->AnyRtlCharsInWord();
|
||||
bool has_ltr = it_->word()->AnyLtrCharsInWord();
|
||||
if (has_rtl && !has_ltr)
|
||||
if (has_rtl && !has_ltr) {
|
||||
return DIR_RIGHT_TO_LEFT;
|
||||
if (has_ltr && !has_rtl)
|
||||
}
|
||||
if (has_ltr && !has_rtl) {
|
||||
return DIR_LEFT_TO_RIGHT;
|
||||
if (!has_ltr && !has_rtl)
|
||||
}
|
||||
if (!has_ltr && !has_rtl) {
|
||||
return DIR_NEUTRAL;
|
||||
}
|
||||
return DIR_MIX;
|
||||
}
|
||||
|
||||
// Returns true if the current word was found in a dictionary.
|
||||
bool LTRResultIterator::WordIsFromDictionary() const {
|
||||
if (it_->word() == nullptr)
|
||||
if (it_->word() == nullptr) {
|
||||
return false; // Already at the end!
|
||||
}
|
||||
int permuter = it_->word()->best_choice->permuter();
|
||||
return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || permuter == USER_DAWG_PERM;
|
||||
}
|
||||
|
||||
// Returns the number of blanks before the current word.
|
||||
int LTRResultIterator::BlanksBeforeWord() const {
|
||||
if (it_->word() == nullptr)
|
||||
if (it_->word() == nullptr) {
|
||||
return 1;
|
||||
}
|
||||
return it_->word()->word->space();
|
||||
}
|
||||
|
||||
// Returns true if the current word is numeric.
|
||||
bool LTRResultIterator::WordIsNumeric() const {
|
||||
if (it_->word() == nullptr)
|
||||
if (it_->word() == nullptr) {
|
||||
return false; // Already at the end!
|
||||
}
|
||||
int permuter = it_->word()->best_choice->permuter();
|
||||
return permuter == NUMBER_PERM;
|
||||
}
|
||||
@ -281,8 +292,9 @@ const char *LTRResultIterator::GetBlamerMisadaptionDebug() const {
|
||||
|
||||
// Returns true if a truth string was recorded for the current word.
|
||||
bool LTRResultIterator::HasTruthString() const {
|
||||
if (it_->word() == nullptr)
|
||||
if (it_->word() == nullptr) {
|
||||
return false; // Already at the end!
|
||||
}
|
||||
if (it_->word()->blamer_bundle == nullptr || it_->word()->blamer_bundle->NoTruth()) {
|
||||
return false; // no truth information for this word
|
||||
}
|
||||
@ -292,8 +304,9 @@ bool LTRResultIterator::HasTruthString() const {
|
||||
// Returns true if the given string is equivalent to the truth string for
|
||||
// the current word.
|
||||
bool LTRResultIterator::EquivalentToTruth(const char *str) const {
|
||||
if (!HasTruthString())
|
||||
if (!HasTruthString()) {
|
||||
return false;
|
||||
}
|
||||
ASSERT_HOST(it_->word()->uch_set != nullptr);
|
||||
WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
|
||||
return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
|
||||
@ -302,8 +315,9 @@ bool LTRResultIterator::EquivalentToTruth(const char *str) const {
|
||||
// Returns the null terminated UTF-8 encoded truth string for the current word.
|
||||
// Use delete [] to free after use.
|
||||
char *LTRResultIterator::WordTruthUTF8Text() const {
|
||||
if (!HasTruthString())
|
||||
if (!HasTruthString()) {
|
||||
return nullptr;
|
||||
}
|
||||
std::string truth_text = it_->word()->blamer_bundle->TruthString();
|
||||
int length = truth_text.length() + 1;
|
||||
char *result = new char[length];
|
||||
@ -314,8 +328,9 @@ char *LTRResultIterator::WordTruthUTF8Text() const {
|
||||
// Returns the null terminated UTF-8 encoded normalized OCR string for the
|
||||
// current word. Use delete [] to free after use.
|
||||
char *LTRResultIterator::WordNormedUTF8Text() const {
|
||||
if (it_->word() == nullptr)
|
||||
if (it_->word() == nullptr) {
|
||||
return nullptr; // Already at the end!
|
||||
}
|
||||
std::string ocr_text;
|
||||
WERD_CHOICE *best_choice = it_->word()->best_choice;
|
||||
const UNICHARSET *unicharset = it_->word()->uch_set;
|
||||
@ -332,10 +347,12 @@ char *LTRResultIterator::WordNormedUTF8Text() const {
|
||||
// Returns a pointer to serialized choice lattice.
|
||||
// Fills lattice_size with the number of bytes in lattice data.
|
||||
const char *LTRResultIterator::WordLattice(int *lattice_size) const {
|
||||
if (it_->word() == nullptr)
|
||||
if (it_->word() == nullptr) {
|
||||
return nullptr; // Already at the end!
|
||||
if (it_->word()->blamer_bundle == nullptr)
|
||||
}
|
||||
if (it_->word()->blamer_bundle == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
*lattice_size = it_->word()->blamer_bundle->lattice_size();
|
||||
return it_->word()->blamer_bundle->lattice_data();
|
||||
}
|
||||
@ -344,8 +361,9 @@ const char *LTRResultIterator::WordLattice(int *lattice_size) const {
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool LTRResultIterator::SymbolIsSuperscript() const {
|
||||
if (cblob_it_ == nullptr && it_->word() != nullptr)
|
||||
if (cblob_it_ == nullptr && it_->word() != nullptr) {
|
||||
return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUPERSCRIPT;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -353,8 +371,9 @@ bool LTRResultIterator::SymbolIsSuperscript() const {
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool LTRResultIterator::SymbolIsSubscript() const {
|
||||
if (cblob_it_ == nullptr && it_->word() != nullptr)
|
||||
if (cblob_it_ == nullptr && it_->word() != nullptr) {
|
||||
return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -362,8 +381,9 @@ bool LTRResultIterator::SymbolIsSubscript() const {
|
||||
// If iterating at a higher level object than symbols, eg words, then
|
||||
// this will return the attributes of the first symbol in that word.
|
||||
bool LTRResultIterator::SymbolIsDropcap() const {
|
||||
if (cblob_it_ == nullptr && it_->word() != nullptr)
|
||||
if (cblob_it_ == nullptr && it_->word() != nullptr) {
|
||||
return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -391,8 +411,9 @@ ChoiceIterator::ChoiceIterator(const LTRResultIterator &result_it) {
|
||||
filterSpaces();
|
||||
}
|
||||
}
|
||||
if ((oemLegacy || !lstm_choice_mode) && word_res_->ratings != nullptr)
|
||||
if ((oemLegacy || !lstm_choice_mode) && word_res_->ratings != nullptr) {
|
||||
choices = word_res_->GetBlobChoices(result_it.blob_index_);
|
||||
}
|
||||
if (choices != nullptr && !choices->empty()) {
|
||||
choice_it_ = new BLOB_CHOICE_IT(choices);
|
||||
choice_it_->mark_cycle_pt();
|
||||
@ -418,8 +439,9 @@ bool ChoiceIterator::Next() {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (choice_it_ == nullptr)
|
||||
if (choice_it_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
choice_it_->forward();
|
||||
return !choice_it_->cycled_list();
|
||||
}
|
||||
@ -432,8 +454,9 @@ const char *ChoiceIterator::GetUTF8Text() const {
|
||||
std::pair<const char *, float> choice = *LSTM_choice_it_;
|
||||
return choice.first;
|
||||
} else {
|
||||
if (choice_it_ == nullptr)
|
||||
if (choice_it_ == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
UNICHAR_ID id = choice_it_->data()->unichar_id();
|
||||
return word_res_->uch_set->id_to_unichar_ext(id);
|
||||
}
|
||||
@ -451,8 +474,9 @@ float ChoiceIterator::Confidence() const {
|
||||
std::pair<const char *, float> choice = *LSTM_choice_it_;
|
||||
confidence = 100 - rating_coefficient_ * choice.second;
|
||||
} else {
|
||||
if (choice_it_ == nullptr)
|
||||
if (choice_it_ == nullptr) {
|
||||
return 0.0f;
|
||||
}
|
||||
confidence = 100 + 5 * choice_it_->data()->certainty();
|
||||
}
|
||||
return ClipToRange(confidence, 0.0f, 100.0f);
|
||||
@ -468,8 +492,9 @@ std::vector<std::vector<std::pair<const char *, float>>> *ChoiceIterator::Timest
|
||||
}
|
||||
|
||||
void ChoiceIterator::filterSpaces() {
|
||||
if (LSTM_choices_->empty())
|
||||
if (LSTM_choices_->empty()) {
|
||||
return;
|
||||
}
|
||||
std::vector<std::pair<const char *, float>>::iterator it;
|
||||
for (it = LSTM_choices_->begin(); it != LSTM_choices_->end();) {
|
||||
if (!strcmp(it->first, " ")) {
|
||||
|
@ -115,8 +115,9 @@ int OSResults::get_best_script(int orientation_id) const {
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
const char *script = unicharset->get_script_from_script_id(j);
|
||||
if (strcmp(script, "Common") && strcmp(script, "NULL")) {
|
||||
if (max_id == -1 || scripts_na[orientation_id][j] > scripts_na[orientation_id][max_id])
|
||||
if (max_id == -1 || scripts_na[orientation_id][j] > scripts_na[orientation_id][max_id]) {
|
||||
max_id = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
return max_id;
|
||||
@ -144,8 +145,9 @@ void OSResults::print_scores(int orientation_id) const {
|
||||
void OSResults::accumulate(const OSResults &osr) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
orientations[i] += osr.orientations[i];
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j)
|
||||
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
|
||||
scripts_na[i][j] += osr.scripts_na[i][j];
|
||||
}
|
||||
}
|
||||
unicharset = osr.unicharset;
|
||||
update_best_orientation();
|
||||
@ -188,16 +190,18 @@ int orientation_and_script_detection(const char *filename, OSResults *osr,
|
||||
std::string name = filename; // truncated name
|
||||
|
||||
const char *lastdot = strrchr(name.c_str(), '.');
|
||||
if (lastdot != nullptr)
|
||||
if (lastdot != nullptr) {
|
||||
name[lastdot - name.c_str()] = '\0';
|
||||
}
|
||||
|
||||
ASSERT_HOST(tess->pix_binary() != nullptr);
|
||||
int width = pixGetWidth(tess->pix_binary());
|
||||
int height = pixGetHeight(tess->pix_binary());
|
||||
|
||||
BLOCK_LIST blocks;
|
||||
if (!read_unlv_file(name, width, height, &blocks))
|
||||
if (!read_unlv_file(name, width, height, &blocks)) {
|
||||
FullPageBlock(width, height, &blocks);
|
||||
}
|
||||
|
||||
// Try to remove non-text regions from consideration.
|
||||
TO_BLOCK_LIST land_blocks, port_blocks;
|
||||
@ -228,8 +232,9 @@ int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *
|
||||
|
||||
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
|
||||
TO_BLOCK *to_block = block_it.data();
|
||||
if (to_block->block->pdblk.poly_block() && !to_block->block->pdblk.poly_block()->IsText())
|
||||
if (to_block->block->pdblk.poly_block() && !to_block->block->pdblk.poly_block()->IsText()) {
|
||||
continue;
|
||||
}
|
||||
BLOBNBOX_IT bbox_it;
|
||||
bbox_it.set_to_list(&to_block->blobs);
|
||||
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
|
||||
@ -239,18 +244,21 @@ int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *
|
||||
++blobs_total;
|
||||
|
||||
// Catch illegal value of box width and avoid division by zero.
|
||||
if (box.width() == 0)
|
||||
if (box.width() == 0) {
|
||||
continue;
|
||||
}
|
||||
// TODO: Can height and width be negative? If not, remove fabs.
|
||||
float y_x = std::fabs((box.height() * 1.0f) / box.width());
|
||||
float x_y = 1.0f / y_x;
|
||||
// Select a >= 1.0 ratio
|
||||
float ratio = x_y > y_x ? x_y : y_x;
|
||||
// Blob is ambiguous
|
||||
if (ratio > kSizeRatioToReject)
|
||||
if (ratio > kSizeRatioToReject) {
|
||||
continue;
|
||||
if (box.height() < kMinAcceptableBlobHeight)
|
||||
}
|
||||
if (box.height() < kMinAcceptableBlobHeight) {
|
||||
continue;
|
||||
}
|
||||
filtered_it.add_to_end(bbox);
|
||||
}
|
||||
}
|
||||
@ -268,8 +276,9 @@ int os_detect_blobs(const std::vector<int> *allowed_scripts, BLOBNBOX_CLIST *blo
|
||||
OSResults osr_;
|
||||
int minCharactersToTry = tess->min_characters_to_try;
|
||||
int maxCharactersToTry = 5 * minCharactersToTry;
|
||||
if (osr == nullptr)
|
||||
if (osr == nullptr) {
|
||||
osr = &osr_;
|
||||
}
|
||||
|
||||
osr->unicharset = &tess->unicharset;
|
||||
OrientationDetector o(allowed_scripts, osr);
|
||||
@ -391,8 +400,9 @@ bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (total_blob_o_score == 0.0)
|
||||
if (total_blob_o_score == 0.0) {
|
||||
return false;
|
||||
}
|
||||
// Fill in any blanks with the worst score of the others. This is better than
|
||||
// picking an arbitrary probability for it and way better than -inf.
|
||||
float worst_score = 0.0f;
|
||||
@ -400,8 +410,9 @@ bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
|
||||
for (float f : blob_o_score) {
|
||||
if (f > 0.0f) {
|
||||
++num_good_scores;
|
||||
if (worst_score == 0.0f || f < worst_score)
|
||||
if (worst_score == 0.0f || f < worst_score) {
|
||||
worst_score = f;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (num_good_scores == 1) {
|
||||
@ -468,15 +479,18 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
|
||||
// Check that the choice is in an allowed script.
|
||||
int s = 0;
|
||||
for (s = 0; s < allowed_scripts_->size(); ++s) {
|
||||
if ((*allowed_scripts_)[s] == id)
|
||||
if ((*allowed_scripts_)[s] == id) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (s == allowed_scripts_->size())
|
||||
if (s == allowed_scripts_->size()) {
|
||||
continue; // Not found in list.
|
||||
}
|
||||
}
|
||||
// Script already processed before.
|
||||
if (done[id])
|
||||
if (done[id]) {
|
||||
continue;
|
||||
}
|
||||
done[id] = true;
|
||||
|
||||
unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());
|
||||
@ -491,14 +505,17 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
|
||||
++script_count;
|
||||
}
|
||||
|
||||
if (strlen(prev_unichar) == 1)
|
||||
if (unichar[0] >= '0' && unichar[0] <= '9')
|
||||
if (strlen(prev_unichar) == 1) {
|
||||
if (unichar[0] >= '0' && unichar[0] <= '9') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// if script_count is >= 2, character is ambiguous, skip other matches
|
||||
// since they are useless.
|
||||
if (script_count >= 2)
|
||||
if (script_count >= 2) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Character is non ambiguous
|
||||
if (script_count == 1) {
|
||||
@ -521,12 +538,15 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
|
||||
}
|
||||
|
||||
// Update Japanese / Korean pseudo-scripts
|
||||
if (prev_id == katakana_id_)
|
||||
if (prev_id == katakana_id_) {
|
||||
osr_->scripts_na[i][japanese_id_] += 1.0;
|
||||
if (prev_id == hiragana_id_)
|
||||
}
|
||||
if (prev_id == hiragana_id_) {
|
||||
osr_->scripts_na[i][japanese_id_] += 1.0;
|
||||
if (prev_id == hangul_id_)
|
||||
}
|
||||
if (prev_id == hangul_id_) {
|
||||
osr_->scripts_na[i][korean_id_] += 1.0;
|
||||
}
|
||||
if (prev_id == han_id_) {
|
||||
osr_->scripts_na[i][korean_id_] += kHanRatioInKorean;
|
||||
osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese;
|
||||
|
@ -65,14 +65,16 @@ void Tesseract::output_pass( // Tess output pass //send to api
|
||||
(tessedit_write_block_separators && (page_res_it.block() != page_res_it.next_block())) ||
|
||||
(page_res_it.next_word() == nullptr);
|
||||
|
||||
if (page_res_it.next_word() != nullptr)
|
||||
if (page_res_it.next_word() != nullptr) {
|
||||
nextword = page_res_it.next_word()->word;
|
||||
else
|
||||
} else {
|
||||
nextword = nullptr;
|
||||
if (page_res_it.next_block() != nullptr)
|
||||
}
|
||||
if (page_res_it.next_block() != nullptr) {
|
||||
nextblock = page_res_it.next_block()->block;
|
||||
else
|
||||
} else {
|
||||
nextblock = nullptr;
|
||||
}
|
||||
// regardless of tilde crunching
|
||||
write_results(page_res_it,
|
||||
determine_newline_type(page_res_it.word()->word, page_res_it.block()->block,
|
||||
@ -130,18 +132,20 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
|
||||
stats_.last_char_was_tilde = false;
|
||||
}
|
||||
|
||||
if (force_eol)
|
||||
if (force_eol) {
|
||||
stats_.write_results_empty_block = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* NORMAL PROCESSING of non tilde crunched words */
|
||||
|
||||
stats_.tilde_crunch_written = false;
|
||||
if (newline_type)
|
||||
if (newline_type) {
|
||||
stats_.last_char_was_newline = true;
|
||||
else
|
||||
} else {
|
||||
stats_.last_char_was_newline = false;
|
||||
}
|
||||
stats_.write_results_empty_block = force_eol; // about to write a real word
|
||||
|
||||
if (unlv_tilde_crunching && stats_.last_char_was_tilde && (word->word->space() == 0) &&
|
||||
@ -151,16 +155,18 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
|
||||
words have been removed */
|
||||
word->MergeAdjacentBlobs(0);
|
||||
}
|
||||
if (newline_type || (word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes))
|
||||
if (newline_type || (word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes)) {
|
||||
stats_.last_char_was_tilde = false;
|
||||
else {
|
||||
} else {
|
||||
if (word->reject_map.length() > 0) {
|
||||
if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space)
|
||||
if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space) {
|
||||
stats_.last_char_was_tilde = true;
|
||||
else
|
||||
} else {
|
||||
stats_.last_char_was_tilde = false;
|
||||
} else if (word->word->space() > 0)
|
||||
}
|
||||
} else if (word->word->space() > 0) {
|
||||
stats_.last_char_was_tilde = false;
|
||||
}
|
||||
/* else it is unchanged as there are no output chars */
|
||||
}
|
||||
|
||||
@ -176,15 +182,17 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
|
||||
if (tessedit_zero_rejection) {
|
||||
/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
|
||||
for (i = 0; i < word->best_choice->length(); ++i) {
|
||||
if (word->reject_map[i].rejected())
|
||||
if (word->reject_map[i].rejected()) {
|
||||
word->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (tessedit_minimal_rejection) {
|
||||
/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
|
||||
for (i = 0; i < word->best_choice->length(); ++i) {
|
||||
if ((word->best_choice->unichar_id(i) != space) && word->reject_map[i].rejected())
|
||||
if ((word->best_choice->unichar_id(i) != space) && word->reject_map[i].rejected()) {
|
||||
word->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -209,12 +217,15 @@ char determine_newline_type( // test line ends
|
||||
TBOX next_box; // next word
|
||||
TBOX block_box; // block bounding
|
||||
|
||||
if (!word->flag(W_EOL))
|
||||
if (!word->flag(W_EOL)) {
|
||||
return false; // not end of line
|
||||
if (next_word == nullptr || next_block == nullptr || block != next_block)
|
||||
}
|
||||
if (next_word == nullptr || next_block == nullptr || block != next_block) {
|
||||
return CTRL_NEWLINE;
|
||||
if (next_word->space() > 0)
|
||||
}
|
||||
if (next_word->space() > 0) {
|
||||
return CTRL_HARDLINE; // it is tabbed
|
||||
}
|
||||
word_box = word->bounding_box();
|
||||
next_box = next_word->bounding_box();
|
||||
block_box = block->pdblk.bounding_box();
|
||||
@ -236,8 +247,9 @@ char determine_newline_type( // test line ends
|
||||
*************************************************************************/
|
||||
UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated?
|
||||
int i;
|
||||
for (i = 0; ((i < word->reject_map.length()) && (word->reject_map[i].rejected())); ++i)
|
||||
for (i = 0; ((i < word->reject_map.length()) && (word->reject_map[i].rejected())); ++i) {
|
||||
;
|
||||
}
|
||||
|
||||
if (i < word->reject_map.length()) {
|
||||
return word->best_choice->unichar_id(i);
|
||||
@ -265,61 +277,72 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
|
||||
|
||||
if (suspect_level == 0) {
|
||||
for (i = 0; i < len; i++) {
|
||||
if (word_res->reject_map[i].rejected())
|
||||
if (word_res->reject_map[i].rejected()) {
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (suspect_level >= 3)
|
||||
if (suspect_level >= 3) {
|
||||
return; // Use defaults
|
||||
}
|
||||
|
||||
/* NOW FOR LEVELS 1 and 2 Find some stuff to unreject*/
|
||||
|
||||
if (safe_dict_word(word_res) && (count_alphas(word) > suspect_short_words)) {
|
||||
/* Unreject alphas in dictionary words */
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (word_res->reject_map[i].rejected() && uchset.get_isalpha(word.unichar_id(i)))
|
||||
if (word_res->reject_map[i].rejected() && uchset.get_isalpha(word.unichar_id(i))) {
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rating_per_ch = word.rating() / word_res->reject_map.length();
|
||||
|
||||
if (rating_per_ch >= suspect_rating_per_ch)
|
||||
if (rating_per_ch >= suspect_rating_per_ch) {
|
||||
return; // Don't touch bad ratings
|
||||
}
|
||||
|
||||
if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
|
||||
/* Unreject any Tess Acceptable word - but NOT tess reject chs*/
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (word_res->reject_map[i].rejected() && (!uchset.eq(word.unichar_id(i), " ")))
|
||||
if (word_res->reject_map[i].rejected() && (!uchset.eq(word.unichar_id(i), " "))) {
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (word_res->reject_map[i].rejected()) {
|
||||
if (word_res->reject_map[i].flag(R_DOC_REJ))
|
||||
if (word_res->reject_map[i].flag(R_DOC_REJ)) {
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
if (word_res->reject_map[i].flag(R_BLOCK_REJ))
|
||||
}
|
||||
if (word_res->reject_map[i].flag(R_BLOCK_REJ)) {
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
if (word_res->reject_map[i].flag(R_ROW_REJ))
|
||||
}
|
||||
if (word_res->reject_map[i].flag(R_ROW_REJ)) {
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (suspect_level == 2)
|
||||
if (suspect_level == 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!suspect_constrain_1Il || (word_res->reject_map.length() <= suspect_short_words)) {
|
||||
for (i = 0; i < len; i++) {
|
||||
if (word_res->reject_map[i].rejected()) {
|
||||
if ((word_res->reject_map[i].flag(R_1IL_CONFLICT) ||
|
||||
word_res->reject_map[i].flag(R_POSTNN_1IL)))
|
||||
word_res->reject_map[i].flag(R_POSTNN_1IL))) {
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
|
||||
if (!suspect_constrain_1Il && word_res->reject_map[i].flag(R_MM_REJECT))
|
||||
if (!suspect_constrain_1Il && word_res->reject_map[i].flag(R_MM_REJECT)) {
|
||||
word_res->reject_map[i].setrej_minimal_rej_accept();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -343,8 +366,9 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
|
||||
int16_t Tesseract::count_alphas(const WERD_CHOICE &word) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < word.length(); ++i) {
|
||||
if (word.unicharset()->get_isalpha(word.unichar_id(i)))
|
||||
if (word.unicharset()->get_isalpha(word.unichar_id(i))) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
@ -353,8 +377,9 @@ int16_t Tesseract::count_alphanums(const WERD_CHOICE &word) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < word.length(); ++i) {
|
||||
if (word.unicharset()->get_isalpha(word.unichar_id(i)) ||
|
||||
word.unicharset()->get_isdigit(word.unichar_id(i)))
|
||||
word.unicharset()->get_isdigit(word.unichar_id(i))) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
@ -362,26 +387,29 @@ int16_t Tesseract::count_alphanums(const WERD_CHOICE &word) {
|
||||
bool Tesseract::acceptable_number_string(const char *s, const char *lengths) {
|
||||
bool prev_digit = false;
|
||||
|
||||
if (*lengths == 1 && *s == '(')
|
||||
if (*lengths == 1 && *s == '(') {
|
||||
s++;
|
||||
}
|
||||
|
||||
if (*lengths == 1 && ((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-')))
|
||||
if (*lengths == 1 && ((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-'))) {
|
||||
s++;
|
||||
}
|
||||
|
||||
for (; *s != '\0'; s += *(lengths++)) {
|
||||
if (unicharset.get_isdigit(s, *lengths))
|
||||
if (unicharset.get_isdigit(s, *lengths)) {
|
||||
prev_digit = true;
|
||||
else if (prev_digit && (*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-'))))
|
||||
} else if (prev_digit && (*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-')))) {
|
||||
prev_digit = false;
|
||||
else if (prev_digit && *lengths == 1 && (*(s + *lengths) == '\0') &&
|
||||
((*s == '%') || (*s == ')')))
|
||||
} else if (prev_digit && *lengths == 1 && (*(s + *lengths) == '\0') &&
|
||||
((*s == '%') || (*s == ')'))) {
|
||||
return true;
|
||||
else if (prev_digit && *lengths == 1 && (*s == '%') &&
|
||||
(*(lengths + 1) == 1 && *(s + *lengths) == ')') &&
|
||||
(*(s + *lengths + *(lengths + 1)) == '\0'))
|
||||
} else if (prev_digit && *lengths == 1 && (*s == '%') &&
|
||||
(*(lengths + 1) == 1 && *(s + *lengths) == ')') &&
|
||||
(*(s + *lengths + *(lengths + 1)) == '\0')) {
|
||||
return true;
|
||||
else
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -107,8 +107,9 @@ void PageIterator::Begin() {
|
||||
}
|
||||
|
||||
void PageIterator::RestartParagraph() {
|
||||
if (it_->block() == nullptr)
|
||||
if (it_->block() == nullptr) {
|
||||
return; // At end of the document.
|
||||
}
|
||||
PAGE_RES_IT para(page_res_);
|
||||
PAGE_RES_IT next_para(para);
|
||||
next_para.forward_paragraph();
|
||||
@ -145,10 +146,12 @@ void PageIterator::RestartRow() {
|
||||
* the appropriate language has been loaded into Tesseract.
|
||||
*/
|
||||
bool PageIterator::Next(PageIteratorLevel level) {
|
||||
if (it_->block() == nullptr)
|
||||
if (it_->block() == nullptr) {
|
||||
return false; // Already at the end!
|
||||
if (it_->word() == nullptr)
|
||||
}
|
||||
if (it_->word() == nullptr) {
|
||||
level = RIL_BLOCK;
|
||||
}
|
||||
|
||||
switch (level) {
|
||||
case RIL_BLOCK:
|
||||
@ -158,20 +161,24 @@ bool PageIterator::Next(PageIteratorLevel level) {
|
||||
it_->forward_paragraph();
|
||||
break;
|
||||
case RIL_TEXTLINE:
|
||||
for (it_->forward_with_empties(); it_->row() == it_->prev_row(); it_->forward_with_empties())
|
||||
for (it_->forward_with_empties(); it_->row() == it_->prev_row();
|
||||
it_->forward_with_empties()) {
|
||||
;
|
||||
}
|
||||
break;
|
||||
case RIL_WORD:
|
||||
it_->forward_with_empties();
|
||||
break;
|
||||
case RIL_SYMBOL:
|
||||
if (cblob_it_ != nullptr)
|
||||
if (cblob_it_ != nullptr) {
|
||||
cblob_it_->forward();
|
||||
}
|
||||
++blob_index_;
|
||||
if (blob_index_ >= word_length_)
|
||||
if (blob_index_ >= word_length_) {
|
||||
it_->forward_with_empties();
|
||||
else
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
BeginWord(0);
|
||||
@ -184,10 +191,12 @@ bool PageIterator::Next(PageIteratorLevel level) {
|
||||
* moved to the start of a RIL_PARA.
|
||||
*/
|
||||
bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const {
|
||||
if (it_->block() == nullptr)
|
||||
if (it_->block() == nullptr) {
|
||||
return false; // Already at the end!
|
||||
if (it_->word() == nullptr)
|
||||
}
|
||||
if (it_->word() == nullptr) {
|
||||
return true; // In an image block.
|
||||
}
|
||||
switch (level) {
|
||||
case RIL_BLOCK:
|
||||
return blob_index_ == 0 && it_->block() != it_->prev_block();
|
||||
@ -209,8 +218,9 @@ bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const {
|
||||
* given level. (e.g. the last word in a line, the last line in a block)
|
||||
*/
|
||||
bool PageIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const {
|
||||
if (Empty(element))
|
||||
if (Empty(element)) {
|
||||
return true; // Already at the end!
|
||||
}
|
||||
// The result is true if we step forward by element and find we are
|
||||
// at the the end of the page or at beginning of *all* levels in:
|
||||
// [level, element).
|
||||
@ -219,12 +229,14 @@ bool PageIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel e
|
||||
// word on a line, so we also have to be at the first symbol in a word.
|
||||
PageIterator next(*this);
|
||||
next.Next(element);
|
||||
if (next.Empty(element))
|
||||
if (next.Empty(element)) {
|
||||
return true; // Reached the end of the page.
|
||||
}
|
||||
while (element > level) {
|
||||
element = static_cast<PageIteratorLevel>(element - 1);
|
||||
if (!next.IsAtBeginningOf(element))
|
||||
if (!next.IsAtBeginningOf(element)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -237,12 +249,15 @@ bool PageIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel e
|
||||
*/
|
||||
int PageIterator::Cmp(const PageIterator &other) const {
|
||||
int word_cmp = it_->cmp(*other.it_);
|
||||
if (word_cmp != 0)
|
||||
if (word_cmp != 0) {
|
||||
return word_cmp;
|
||||
if (blob_index_ < other.blob_index_)
|
||||
}
|
||||
if (blob_index_ < other.blob_index_) {
|
||||
return -1;
|
||||
if (blob_index_ == other.blob_index_)
|
||||
}
|
||||
if (blob_index_ == other.blob_index_) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -267,8 +282,9 @@ int PageIterator::Cmp(const PageIterator &other) const {
|
||||
*/
|
||||
bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right,
|
||||
int *bottom) const {
|
||||
if (Empty(level))
|
||||
if (Empty(level)) {
|
||||
return false;
|
||||
}
|
||||
TBOX box;
|
||||
PARA *para = nullptr;
|
||||
switch (level) {
|
||||
@ -285,10 +301,11 @@ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int *left, int *
|
||||
box = it_->word()->word->restricted_bounding_box(include_upper_dots_, include_lower_dots_);
|
||||
break;
|
||||
case RIL_SYMBOL:
|
||||
if (cblob_it_ == nullptr)
|
||||
if (cblob_it_ == nullptr) {
|
||||
box = it_->word()->box_word->BlobBox(blob_index_);
|
||||
else
|
||||
} else {
|
||||
box = cblob_it_->data()->bounding_box();
|
||||
}
|
||||
}
|
||||
if (level == RIL_PARA) {
|
||||
PageIterator other = *this;
|
||||
@ -300,8 +317,9 @@ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int *left, int *
|
||||
}
|
||||
} while (other.Next(RIL_TEXTLINE));
|
||||
}
|
||||
if (level != RIL_SYMBOL || cblob_it_ != nullptr)
|
||||
if (level != RIL_SYMBOL || cblob_it_ != nullptr) {
|
||||
box.rotate(it_->block()->block->re_rotation());
|
||||
}
|
||||
// Now we have a box in tesseract coordinates relative to the image rectangle,
|
||||
// we have to convert the coords to a top-down system.
|
||||
const int pix_height = pixGetHeight(tesseract_->pix_binary());
|
||||
@ -326,8 +344,9 @@ bool PageIterator::BoundingBox(PageIteratorLevel level, int *left, int *top, int
|
||||
|
||||
bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding, int *left, int *top,
|
||||
int *right, int *bottom) const {
|
||||
if (!BoundingBoxInternal(level, left, top, right, bottom))
|
||||
if (!BoundingBoxInternal(level, left, top, right, bottom)) {
|
||||
return false;
|
||||
}
|
||||
// Convert to the coordinate system of the original image.
|
||||
*left = ClipToRange(*left / scale_ + rect_left_ - padding, rect_left_, rect_left_ + rect_width_);
|
||||
*top = ClipToRange(*top / scale_ + rect_top_ - padding, rect_top_, rect_top_ + rect_height_);
|
||||
@ -340,32 +359,39 @@ bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding, int *
|
||||
|
||||
/** Return that there is no such object at a given level. */
|
||||
bool PageIterator::Empty(PageIteratorLevel level) const {
|
||||
if (it_->block() == nullptr)
|
||||
if (it_->block() == nullptr) {
|
||||
return true; // Already at the end!
|
||||
if (it_->word() == nullptr && level != RIL_BLOCK)
|
||||
}
|
||||
if (it_->word() == nullptr && level != RIL_BLOCK) {
|
||||
return true; // image block
|
||||
if (level == RIL_SYMBOL && blob_index_ >= word_length_)
|
||||
}
|
||||
if (level == RIL_SYMBOL && blob_index_ >= word_length_) {
|
||||
return true; // Zero length word, or already at the end of it.
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Returns the type of the current block.
|
||||
* See tesseract/publictypes.h for PolyBlockType. */
|
||||
PolyBlockType PageIterator::BlockType() const {
|
||||
if (it_->block() == nullptr || it_->block()->block == nullptr)
|
||||
if (it_->block() == nullptr || it_->block()->block == nullptr) {
|
||||
return PT_UNKNOWN; // Already at the end!
|
||||
if (it_->block()->block->pdblk.poly_block() == nullptr)
|
||||
}
|
||||
if (it_->block()->block->pdblk.poly_block() == nullptr) {
|
||||
return PT_FLOWING_TEXT; // No layout analysis used - assume text.
|
||||
}
|
||||
return it_->block()->block->pdblk.poly_block()->isA();
|
||||
}
|
||||
|
||||
/** Returns the polygon outline of the current block. The returned Pta must
|
||||
* be ptaDestroy-ed after use. */
|
||||
Pta *PageIterator::BlockPolygon() const {
|
||||
if (it_->block() == nullptr || it_->block()->block == nullptr)
|
||||
if (it_->block() == nullptr || it_->block()->block == nullptr) {
|
||||
return nullptr; // Already at the end!
|
||||
if (it_->block()->block->pdblk.poly_block() == nullptr)
|
||||
}
|
||||
if (it_->block()->block->pdblk.poly_block() == nullptr) {
|
||||
return nullptr; // No layout analysis used - no polygon.
|
||||
}
|
||||
// Copy polygon, so we can unrotate it to image coordinates.
|
||||
POLY_BLOCK *internal_poly = it_->block()->block->pdblk.poly_block();
|
||||
ICOORDELT_LIST vertices;
|
||||
@ -411,10 +437,12 @@ Pta *PageIterator::BlockPolygon() const {
|
||||
*/
|
||||
Pix *PageIterator::GetBinaryImage(PageIteratorLevel level) const {
|
||||
int left, top, right, bottom;
|
||||
if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
|
||||
if (!BoundingBoxInternal(level, &left, &top, &right, &bottom)) {
|
||||
return nullptr;
|
||||
if (level == RIL_SYMBOL && cblob_it_ != nullptr && cblob_it_->data()->area() != 0)
|
||||
}
|
||||
if (level == RIL_SYMBOL && cblob_it_ != nullptr && cblob_it_->data()->area() != 0) {
|
||||
return cblob_it_->data()->render();
|
||||
}
|
||||
Box *box = boxCreate(left, top, right - left, bottom - top);
|
||||
Pix *pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr);
|
||||
boxDestroy(&box);
|
||||
@ -447,10 +475,12 @@ Pix *PageIterator::GetBinaryImage(PageIteratorLevel level) const {
|
||||
Pix *PageIterator::GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left,
|
||||
int *top) const {
|
||||
int right, bottom;
|
||||
if (!BoundingBox(level, left, top, &right, &bottom))
|
||||
if (!BoundingBox(level, left, top, &right, &bottom)) {
|
||||
return nullptr;
|
||||
if (original_img == nullptr)
|
||||
}
|
||||
if (original_img == nullptr) {
|
||||
return GetBinaryImage(level);
|
||||
}
|
||||
|
||||
// Expand the box.
|
||||
*left = std::max(*left - padding, 0);
|
||||
@ -487,8 +517,9 @@ Pix *PageIterator::GetImage(PageIteratorLevel level, int padding, Pix *original_
|
||||
* WARNING: with vertical text, baselines may be vertical!
|
||||
*/
|
||||
bool PageIterator::Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const {
|
||||
if (it_->word() == nullptr)
|
||||
if (it_->word() == nullptr) {
|
||||
return false; // Already at the end!
|
||||
}
|
||||
ROW *row = it_->row()->row;
|
||||
WERD *word = it_->word()->word;
|
||||
TBOX box =
|
||||
@ -552,8 +583,9 @@ void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just, bool *
|
||||
bool *is_crown, int *first_line_indent) const {
|
||||
*just = tesseract::JUSTIFICATION_UNKNOWN;
|
||||
if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
|
||||
!it_->row()->row->para()->model)
|
||||
!it_->row()->row->para()->model) {
|
||||
return;
|
||||
}
|
||||
|
||||
PARA *para = it_->row()->row->para();
|
||||
*is_list_item = para->is_list_item;
|
||||
@ -596,13 +628,15 @@ void PageIterator::BeginWord(int offset) {
|
||||
word_ = word_res->word;
|
||||
ASSERT_HOST(word_->cblob_list() != nullptr);
|
||||
word_length_ = word_->cblob_list()->length();
|
||||
if (cblob_it_ == nullptr)
|
||||
if (cblob_it_ == nullptr) {
|
||||
cblob_it_ = new C_BLOB_IT;
|
||||
}
|
||||
cblob_it_->set_to_list(word_->cblob_list());
|
||||
}
|
||||
for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) {
|
||||
if (cblob_it_ != nullptr)
|
||||
if (cblob_it_ != nullptr) {
|
||||
cblob_it_->forward();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -109,8 +109,9 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
|
||||
if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != nullptr && input_file[0] != '\0') {
|
||||
std::string name = input_file;
|
||||
const char *lastdot = strrchr(name.c_str(), '.');
|
||||
if (lastdot != nullptr)
|
||||
if (lastdot != nullptr) {
|
||||
name[lastdot - name.c_str()] = '\0';
|
||||
}
|
||||
read_unlv_file(name, width, height, blocks);
|
||||
}
|
||||
if (blocks->empty()) {
|
||||
@ -138,8 +139,9 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
|
||||
auto_page_seg_ret_val =
|
||||
AutoPageSeg(pageseg_mode, blocks, &to_blocks,
|
||||
enable_noise_removal ? &diacritic_blobs : nullptr, osd_tess, osr);
|
||||
if (pageseg_mode == PSM_OSD_ONLY)
|
||||
if (pageseg_mode == PSM_OSD_ONLY) {
|
||||
return auto_page_seg_ret_val;
|
||||
}
|
||||
// To create blobs from the image region bounds uncomment this line:
|
||||
// to_blocks.clear(); // Uncomment to go back to the old mode.
|
||||
} else {
|
||||
@ -159,8 +161,9 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
|
||||
}
|
||||
|
||||
if (blocks->empty()) {
|
||||
if (textord_debug_tabfind)
|
||||
if (textord_debug_tabfind) {
|
||||
tprintf("Empty page\n");
|
||||
}
|
||||
return 0; // AutoPageSeg found an empty page.
|
||||
}
|
||||
bool splitting = pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
|
||||
@ -223,14 +226,16 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOC
|
||||
result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_, to_block,
|
||||
photomask_pix, pix_thresholds_, pix_grey_, &pixa_debug_,
|
||||
&found_blocks, diacritic_blobs, to_blocks);
|
||||
if (result >= 0)
|
||||
if (result >= 0) {
|
||||
finder->GetDeskewVectors(&deskew_, &reskew_);
|
||||
}
|
||||
delete finder;
|
||||
}
|
||||
pixDestroy(&photomask_pix);
|
||||
pixDestroy(&musicmask_pix);
|
||||
if (result < 0)
|
||||
if (result < 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
blocks->clear();
|
||||
BLOCK_IT block_it(blocks);
|
||||
@ -297,8 +302,9 @@ ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mo
|
||||
pixa_debug_.AddPix(pix_no_image_, "NoImages");
|
||||
pixDestroy(&pix_no_image_);
|
||||
}
|
||||
if (!PSM_COL_FIND_ENABLED(pageseg_mode))
|
||||
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) {
|
||||
v_lines.clear();
|
||||
}
|
||||
|
||||
// The rest of the algorithm uses the usual connected components.
|
||||
textord_.find_components(pix_binary_, blocks, to_blocks);
|
||||
|
@ -33,8 +33,9 @@ void Tesseract::process_selected_words(
|
||||
for (PAGE_RES_IT page_res_it(page_res); page_res_it.word() != nullptr; page_res_it.forward()) {
|
||||
WERD *word = page_res_it.word()->word;
|
||||
if (word->bounding_box().overlap(selection_box)) {
|
||||
if (!(this->*word_processor)(&page_res_it))
|
||||
if (!(this->*word_processor)(&page_res_it)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -98,14 +98,16 @@ static void PrintTable(const std::vector<std::vector<std::string>> &rows, const
|
||||
for (int c = 0; c < num_columns; c++) {
|
||||
int num_unicodes = 0;
|
||||
for (char i : row[c]) {
|
||||
if ((i & 0xC0) != 0x80)
|
||||
if ((i & 0xC0) != 0x80) {
|
||||
num_unicodes++;
|
||||
}
|
||||
}
|
||||
if (c >= max_col_widths.size()) {
|
||||
max_col_widths.push_back(num_unicodes);
|
||||
} else {
|
||||
if (num_unicodes > max_col_widths[c])
|
||||
if (num_unicodes > max_col_widths[c]) {
|
||||
max_col_widths[c] = num_unicodes;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -117,8 +119,9 @@ static void PrintTable(const std::vector<std::vector<std::string>> &rows, const
|
||||
|
||||
for (const auto &row : rows) {
|
||||
for (int c = 0; c < row.size(); c++) {
|
||||
if (c > 0)
|
||||
if (c > 0) {
|
||||
tprintf("%s", colsep);
|
||||
}
|
||||
tprintf(col_width_patterns[c].c_str(), row[c].c_str());
|
||||
}
|
||||
tprintf("\n");
|
||||
@ -126,8 +129,9 @@ static void PrintTable(const std::vector<std::vector<std::string>> &rows, const
|
||||
}
|
||||
|
||||
static std::string RtlEmbed(const std::string &word, bool rtlify) {
|
||||
if (rtlify)
|
||||
if (rtlify) {
|
||||
return std::string(kRLE) + word + std::string(kPDF);
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
@ -173,8 +177,9 @@ static void PrintDetectorState(const ParagraphTheory &theory,
|
||||
|
||||
static void DebugDump(bool should_print, const char *phase, const ParagraphTheory &theory,
|
||||
const std::vector<RowScratchRegisters> &rows) {
|
||||
if (!should_print)
|
||||
if (!should_print) {
|
||||
return;
|
||||
}
|
||||
tprintf("# %s\n", phase);
|
||||
PrintDetectorState(theory, rows);
|
||||
}
|
||||
@ -223,8 +228,9 @@ static const char *SkipChars(const char *str, bool (*skip)(int)) {
|
||||
}
|
||||
|
||||
static const char *SkipOne(const char *str, const char *toskip) {
|
||||
if (*str != '\0' && strchr(toskip, *str))
|
||||
if (*str != '\0' && strchr(toskip, *str)) {
|
||||
return str + 1;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
@ -251,16 +257,18 @@ static bool LikelyListNumeral(const std::string &word) {
|
||||
if (numeral_end == numeral_start) {
|
||||
// If there's a single latin letter, we can use that.
|
||||
numeral_end = SkipChars(numeral_start, IsLatinLetter);
|
||||
if (numeral_end - numeral_start != 1)
|
||||
if (numeral_end - numeral_start != 1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// We got some sort of numeral.
|
||||
num_segments++;
|
||||
// Skip any trailing parens or punctuation.
|
||||
pos = SkipChars(SkipChars(numeral_end, kClose), kSep);
|
||||
if (pos == numeral_end)
|
||||
if (pos == numeral_end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return *pos == '\0';
|
||||
}
|
||||
@ -278,8 +286,9 @@ bool AsciiLikelyListItem(const std::string &word) {
|
||||
|
||||
// Return the first Unicode Codepoint from werd[pos].
|
||||
int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos) {
|
||||
if (!u || !werd || pos > werd->length())
|
||||
if (!u || !werd || pos > werd->length()) {
|
||||
return 0;
|
||||
}
|
||||
return UNICHAR(u->id_to_unichar(werd->unichar_id(pos)), -1).first_uni();
|
||||
}
|
||||
|
||||
@ -308,15 +317,17 @@ private:
|
||||
};
|
||||
|
||||
int UnicodeSpanSkipper::SkipPunc(int pos) {
|
||||
while (pos < wordlen_ && u_->get_ispunctuation(word_->unichar_id(pos)))
|
||||
while (pos < wordlen_ && u_->get_ispunctuation(word_->unichar_id(pos))) {
|
||||
pos++;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
int UnicodeSpanSkipper::SkipDigits(int pos) {
|
||||
while (pos < wordlen_ &&
|
||||
(u_->get_isdigit(word_->unichar_id(pos)) || IsDigitLike(UnicodeFor(u_, word_, pos))))
|
||||
(u_->get_isdigit(word_->unichar_id(pos)) || IsDigitLike(UnicodeFor(u_, word_, pos)))) {
|
||||
pos++;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
@ -324,16 +335,18 @@ int UnicodeSpanSkipper::SkipRomans(int pos) {
|
||||
const char *kRomans = "ivxlmdIVXLMD";
|
||||
while (pos < wordlen_) {
|
||||
int ch = UnicodeFor(u_, word_, pos);
|
||||
if (ch >= 0xF0 || strchr(kRomans, ch) == nullptr)
|
||||
if (ch >= 0xF0 || strchr(kRomans, ch) == nullptr) {
|
||||
break;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
int UnicodeSpanSkipper::SkipAlpha(int pos) {
|
||||
while (pos < wordlen_ && u_->get_isalpha(word_->unichar_id(pos)))
|
||||
while (pos < wordlen_ && u_->get_isalpha(word_->unichar_id(pos))) {
|
||||
pos++;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
@ -367,32 +380,36 @@ static bool LikelyListMarkUnicode(int ch) {
|
||||
// start a list item. Some examples include:
|
||||
// A I iii. VI (2) 3.5. [C-4]
|
||||
static bool UniLikelyListItem(const UNICHARSET *u, const WERD_CHOICE *werd) {
|
||||
if (werd->length() == 1 && LikelyListMarkUnicode(UnicodeFor(u, werd, 0)))
|
||||
if (werd->length() == 1 && LikelyListMarkUnicode(UnicodeFor(u, werd, 0))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
UnicodeSpanSkipper m(u, werd);
|
||||
int num_segments = 0;
|
||||
int pos = 0;
|
||||
while (pos < werd->length() && num_segments < 3) {
|
||||
int numeral_start = m.SkipPunc(pos);
|
||||
if (numeral_start > pos + 1)
|
||||
if (numeral_start > pos + 1) {
|
||||
break;
|
||||
}
|
||||
int numeral_end = m.SkipRomans(numeral_start);
|
||||
if (numeral_end == numeral_start) {
|
||||
numeral_end = m.SkipDigits(numeral_start);
|
||||
if (numeral_end == numeral_start) {
|
||||
// If there's a single latin letter, we can use that.
|
||||
numeral_end = m.SkipAlpha(numeral_start);
|
||||
if (numeral_end - numeral_start != 1)
|
||||
if (numeral_end - numeral_start != 1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// We got some sort of numeral.
|
||||
num_segments++;
|
||||
// Skip any trailing punctuation.
|
||||
pos = m.SkipPunc(numeral_end);
|
||||
if (pos == numeral_end)
|
||||
if (pos == numeral_end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return pos == werd->length();
|
||||
}
|
||||
@ -506,10 +523,12 @@ void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
|
||||
|
||||
int model_numbers = 0;
|
||||
for (const auto &hypothese : hypotheses_) {
|
||||
if (hypothese.model == nullptr)
|
||||
if (hypothese.model == nullptr) {
|
||||
continue;
|
||||
if (model_numbers > 0)
|
||||
}
|
||||
if (model_numbers > 0) {
|
||||
model_string += ",";
|
||||
}
|
||||
if (StrongModel(hypothese.model)) {
|
||||
model_string += std::to_string(1 + theory.IndexOf(hypothese.model));
|
||||
} else if (hypothese.model == kCrownLeft) {
|
||||
@ -519,8 +538,9 @@ void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
|
||||
}
|
||||
model_numbers++;
|
||||
}
|
||||
if (model_numbers == 0)
|
||||
if (model_numbers == 0) {
|
||||
model_string += "0";
|
||||
}
|
||||
|
||||
dbg.push_back(model_string);
|
||||
}
|
||||
@ -534,8 +554,9 @@ void RowScratchRegisters::Init(const RowInfo &row) {
|
||||
}
|
||||
|
||||
LineType RowScratchRegisters::GetLineType() const {
|
||||
if (hypotheses_.empty())
|
||||
if (hypotheses_.empty()) {
|
||||
return LT_UNKNOWN;
|
||||
}
|
||||
bool has_start = false;
|
||||
bool has_body = false;
|
||||
for (const auto &hypothese : hypotheses_) {
|
||||
@ -551,19 +572,22 @@ LineType RowScratchRegisters::GetLineType() const {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (has_start && has_body)
|
||||
if (has_start && has_body) {
|
||||
return LT_MULTIPLE;
|
||||
}
|
||||
return has_start ? LT_START : LT_BODY;
|
||||
}
|
||||
|
||||
LineType RowScratchRegisters::GetLineType(const ParagraphModel *model) const {
|
||||
if (hypotheses_.empty())
|
||||
if (hypotheses_.empty()) {
|
||||
return LT_UNKNOWN;
|
||||
}
|
||||
bool has_start = false;
|
||||
bool has_body = false;
|
||||
for (const auto &hypothese : hypotheses_) {
|
||||
if (hypothese.model != model)
|
||||
if (hypothese.model != model) {
|
||||
continue;
|
||||
}
|
||||
switch (hypothese.ty) {
|
||||
case LT_START:
|
||||
has_start = true;
|
||||
@ -576,8 +600,9 @@ LineType RowScratchRegisters::GetLineType(const ParagraphModel *model) const {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (has_start && has_body)
|
||||
if (has_start && has_body) {
|
||||
return LT_MULTIPLE;
|
||||
}
|
||||
return has_start ? LT_START : LT_BODY;
|
||||
}
|
||||
|
||||
@ -619,41 +644,47 @@ void RowScratchRegisters::AddBodyLine(const ParagraphModel *model) {
|
||||
|
||||
void RowScratchRegisters::StartHypotheses(SetOfModels *models) const {
|
||||
for (const auto &hypothese : hypotheses_) {
|
||||
if (hypothese.ty == LT_START && StrongModel(hypothese.model))
|
||||
if (hypothese.ty == LT_START && StrongModel(hypothese.model)) {
|
||||
push_back_new(*models, hypothese.model);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RowScratchRegisters::StrongHypotheses(SetOfModels *models) const {
|
||||
for (const auto &hypothese : hypotheses_) {
|
||||
if (StrongModel(hypothese.model))
|
||||
if (StrongModel(hypothese.model)) {
|
||||
push_back_new(*models, hypothese.model);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RowScratchRegisters::NonNullHypotheses(SetOfModels *models) const {
|
||||
for (const auto &hypothese : hypotheses_) {
|
||||
if (hypothese.model != nullptr)
|
||||
if (hypothese.model != nullptr) {
|
||||
push_back_new(*models, hypothese.model);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ParagraphModel *RowScratchRegisters::UniqueStartHypothesis() const {
|
||||
if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_START)
|
||||
if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_START) {
|
||||
return nullptr;
|
||||
}
|
||||
return hypotheses_[0].model;
|
||||
}
|
||||
|
||||
const ParagraphModel *RowScratchRegisters::UniqueBodyHypothesis() const {
|
||||
if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_BODY)
|
||||
if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_BODY) {
|
||||
return nullptr;
|
||||
}
|
||||
return hypotheses_[0].model;
|
||||
}
|
||||
|
||||
// Discard any hypotheses whose model is not in the given list.
|
||||
void RowScratchRegisters::DiscardNonMatchingHypotheses(const SetOfModels &models) {
|
||||
if (models.empty())
|
||||
if (models.empty()) {
|
||||
return;
|
||||
}
|
||||
for (int h = hypotheses_.size() - 1; h >= 0; h--) {
|
||||
if (!contains(models, hypotheses_[h].model)) {
|
||||
hypotheses_.erase(hypotheses_.begin() + h);
|
||||
@ -691,8 +722,9 @@ private:
|
||||
static int ClosestCluster(const std::vector<Cluster> &clusters, int value) {
|
||||
int best_index = 0;
|
||||
for (int i = 0; i < clusters.size(); i++) {
|
||||
if (abs(value - clusters[i].center) < abs(value - clusters[best_index].center))
|
||||
if (abs(value - clusters[i].center) < abs(value - clusters[best_index].center)) {
|
||||
best_index = i;
|
||||
}
|
||||
}
|
||||
return best_index;
|
||||
}
|
||||
@ -716,8 +748,9 @@ void SimpleClusterer::GetClusters(std::vector<Cluster> *clusters) {
|
||||
static void CalculateTabStops(std::vector<RowScratchRegisters> *rows, int row_start, int row_end,
|
||||
int tolerance, std::vector<Cluster> *left_tabs,
|
||||
std::vector<Cluster> *right_tabs) {
|
||||
if (!AcceptableRowArgs(0, 1, __func__, rows, row_start, row_end))
|
||||
if (!AcceptableRowArgs(0, 1, __func__, rows, row_start, row_end)) {
|
||||
return;
|
||||
}
|
||||
// First pass: toss all left and right indents into clusterers.
|
||||
SimpleClusterer initial_lefts(tolerance);
|
||||
SimpleClusterer initial_rights(tolerance);
|
||||
@ -744,10 +777,12 @@ static void CalculateTabStops(std::vector<RowScratchRegisters> *rows, int row_st
|
||||
// to how rare it is. These outliers get re-added if we end up having too
|
||||
// few tab stops, to work with, however.
|
||||
int infrequent_enough_to_ignore = 0;
|
||||
if (row_end - row_start >= 8)
|
||||
if (row_end - row_start >= 8) {
|
||||
infrequent_enough_to_ignore = 1;
|
||||
if (row_end - row_start >= 20)
|
||||
}
|
||||
if (row_end - row_start >= 20) {
|
||||
infrequent_enough_to_ignore = 2;
|
||||
}
|
||||
|
||||
for (int i = row_start; i < row_end; i++) {
|
||||
int lidx = ClosestCluster(initial_left_tabs, (*rows)[i].lindent_);
|
||||
@ -827,8 +862,9 @@ static void CalculateTabStops(std::vector<RowScratchRegisters> *rows, int row_st
|
||||
// is greater than eop_threshold.
|
||||
static void MarkRowsWithModel(std::vector<RowScratchRegisters> *rows, int row_start, int row_end,
|
||||
const ParagraphModel *model, bool ltr, int eop_threshold) {
|
||||
if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end))
|
||||
if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) {
|
||||
return;
|
||||
}
|
||||
for (int row = row_start; row < row_end; row++) {
|
||||
bool valid_first = ValidFirstLine(rows, row, model);
|
||||
bool valid_body = ValidBodyLine(rows, row, model);
|
||||
@ -895,8 +931,9 @@ struct GeometricClassifierState {
|
||||
|
||||
// Align tabs are the tab stops the text is aligned to.
|
||||
const std::vector<Cluster> &AlignTabs() const {
|
||||
if (just == tesseract::JUSTIFICATION_RIGHT)
|
||||
if (just == tesseract::JUSTIFICATION_RIGHT) {
|
||||
return right_tabs;
|
||||
}
|
||||
return left_tabs;
|
||||
}
|
||||
|
||||
@ -906,8 +943,9 @@ struct GeometricClassifierState {
|
||||
// this function comment, the offside tabs are the horizontal tab stops
|
||||
// marking the beginning of ("Note", "this" and "marking").
|
||||
const std::vector<Cluster> &OffsideTabs() const {
|
||||
if (just == tesseract::JUSTIFICATION_RIGHT)
|
||||
if (just == tesseract::JUSTIFICATION_RIGHT) {
|
||||
return left_tabs;
|
||||
}
|
||||
return right_tabs;
|
||||
}
|
||||
|
||||
@ -933,8 +971,9 @@ struct GeometricClassifierState {
|
||||
}
|
||||
|
||||
void Fail(int min_debug_level, const char *why) const {
|
||||
if (debug_level < min_debug_level)
|
||||
if (debug_level < min_debug_level) {
|
||||
return;
|
||||
}
|
||||
tprintf("# %s\n", why);
|
||||
PrintRows();
|
||||
}
|
||||
@ -1009,8 +1048,9 @@ static void GeometricClassifyThreeTabStopTextBlock(int debug_level, GeometricCla
|
||||
for (int i = s.row_start; i < s.row_end; i++) {
|
||||
if (s.IsFullRow(i)) {
|
||||
num_full_rows++;
|
||||
if (i == s.row_end - 1)
|
||||
if (i == s.row_end - 1) {
|
||||
last_row_full++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1093,8 +1133,9 @@ static void GeometricClassifyThreeTabStopTextBlock(int debug_level, GeometricCla
|
||||
// far more "full" lines than "short" lines.
|
||||
static void GeometricClassify(int debug_level, std::vector<RowScratchRegisters> *rows,
|
||||
int row_start, int row_end, ParagraphTheory *theory) {
|
||||
if (!AcceptableRowArgs(debug_level, 4, __func__, rows, row_start, row_end))
|
||||
if (!AcceptableRowArgs(debug_level, 4, __func__, rows, row_start, row_end)) {
|
||||
return;
|
||||
}
|
||||
if (debug_level > 1) {
|
||||
tprintf("###############################################\n");
|
||||
tprintf("##### GeometricClassify( rows[%d:%d) ) ####\n", row_start, row_end);
|
||||
@ -1257,24 +1298,27 @@ void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) {
|
||||
const ParagraphModel *ParagraphTheory::Fits(const std::vector<RowScratchRegisters> *rows,
|
||||
int start, int end) const {
|
||||
for (const auto *model : *models_) {
|
||||
if (model->justification() != JUSTIFICATION_CENTER && RowsFitModel(rows, start, end, model))
|
||||
if (model->justification() != JUSTIFICATION_CENTER && RowsFitModel(rows, start, end, model)) {
|
||||
return model;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void ParagraphTheory::NonCenteredModels(SetOfModels *models) {
|
||||
for (const auto *model : *models_) {
|
||||
if (model->justification() != JUSTIFICATION_CENTER)
|
||||
if (model->justification() != JUSTIFICATION_CENTER) {
|
||||
push_back_new(*models, model);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int ParagraphTheory::IndexOf(const ParagraphModel *model) const {
|
||||
int i = 0;
|
||||
for (const auto *m : *models_) {
|
||||
if (m == model)
|
||||
if (m == model) {
|
||||
return i;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return -1;
|
||||
@ -1330,10 +1374,12 @@ ParagraphModelSmearer::ParagraphModelSmearer(std::vector<RowScratchRegisters> *r
|
||||
// see paragraphs_internal.h
|
||||
void ParagraphModelSmearer::CalculateOpenModels(int row_start, int row_end) {
|
||||
SetOfModels no_models;
|
||||
if (row_start < row_start_)
|
||||
if (row_start < row_start_) {
|
||||
row_start = row_start_;
|
||||
if (row_end > row_end_)
|
||||
}
|
||||
if (row_end > row_end_) {
|
||||
row_end = row_end_;
|
||||
}
|
||||
|
||||
for (int row = (row_start > 0) ? row_start - 1 : row_start; row < row_end; row++) {
|
||||
if ((*rows_)[row].ri_->num_words == 0) {
|
||||
@ -1366,8 +1412,9 @@ void ParagraphModelSmearer::Smear() {
|
||||
// was recently used (an "open" model) which might model it well.
|
||||
for (int i = row_start_; i < row_end_; i++) {
|
||||
RowScratchRegisters &row = (*rows_)[i];
|
||||
if (row.ri_->num_words == 0)
|
||||
if (row.ri_->num_words == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Step One:
|
||||
// Figure out if there are "open" models which are left-alined or
|
||||
@ -1424,8 +1471,9 @@ void ParagraphModelSmearer::Smear() {
|
||||
theory_->NonCenteredModels(&last_line_models);
|
||||
}
|
||||
for (auto model : last_line_models) {
|
||||
if (ValidBodyLine(rows_, i, model))
|
||||
if (ValidBodyLine(rows_, i, model)) {
|
||||
row.AddBodyLine(model);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1498,8 +1546,9 @@ static void DowngradeWeakestToCrowns(int debug_level, ParagraphTheory *theory,
|
||||
while (end > 0 && (model = (*rows)[end - 1].UniqueBodyHypothesis()) == nullptr) {
|
||||
end--;
|
||||
}
|
||||
if (end == 0)
|
||||
if (end == 0) {
|
||||
break;
|
||||
}
|
||||
start = end - 1;
|
||||
while (start >= 0 && (*rows)[start].UniqueBodyHypothesis() == model) {
|
||||
start--; // walk back to the first line that is not the same body type.
|
||||
@ -1510,21 +1559,24 @@ static void DowngradeWeakestToCrowns(int debug_level, ParagraphTheory *theory,
|
||||
}
|
||||
start++;
|
||||
// Now rows[start, end) is a sequence of unique body hypotheses of model.
|
||||
if (StrongModel(model) && model->justification() == JUSTIFICATION_CENTER)
|
||||
if (StrongModel(model) && model->justification() == JUSTIFICATION_CENTER) {
|
||||
continue;
|
||||
}
|
||||
if (!StrongModel(model)) {
|
||||
while (start > 0 && CrownCompatible(rows, start - 1, start, model))
|
||||
while (start > 0 && CrownCompatible(rows, start - 1, start, model)) {
|
||||
start--;
|
||||
}
|
||||
}
|
||||
if (start == 0 || (!StrongModel(model)) ||
|
||||
(StrongModel(model) && !ValidFirstLine(rows, start - 1, model))) {
|
||||
// crownify rows[start, end)
|
||||
const ParagraphModel *crown_model = model;
|
||||
if (StrongModel(model)) {
|
||||
if (model->justification() == JUSTIFICATION_LEFT)
|
||||
if (model->justification() == JUSTIFICATION_LEFT) {
|
||||
crown_model = kCrownLeft;
|
||||
else
|
||||
} else {
|
||||
crown_model = kCrownRight;
|
||||
}
|
||||
}
|
||||
(*rows)[start].SetUnknown();
|
||||
(*rows)[start].AddStartLine(crown_model);
|
||||
@ -1555,8 +1607,9 @@ static void DowngradeWeakestToCrowns(int debug_level, ParagraphTheory *theory,
|
||||
// the common margin for each row in the run of rows[start, end).
|
||||
void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows, int start,
|
||||
int end, int percentile) {
|
||||
if (!AcceptableRowArgs(0, 0, __func__, rows, start, end))
|
||||
if (!AcceptableRowArgs(0, 0, __func__, rows, start, end)) {
|
||||
return;
|
||||
}
|
||||
|
||||
int lmin, lmax, rmin, rmax;
|
||||
lmin = lmax = (*rows)[start].lmargin_ + (*rows)[start].lindent_;
|
||||
@ -1564,8 +1617,9 @@ void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows,
|
||||
for (int i = start; i < end; i++) {
|
||||
RowScratchRegisters &sr = (*rows)[i];
|
||||
sr.SetUnknown();
|
||||
if (sr.ri_->num_words == 0)
|
||||
if (sr.ri_->num_words == 0) {
|
||||
continue;
|
||||
}
|
||||
UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax);
|
||||
UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax);
|
||||
}
|
||||
@ -1573,8 +1627,9 @@ void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows,
|
||||
STATS rights(rmin, rmax + 1);
|
||||
for (int i = start; i < end; i++) {
|
||||
RowScratchRegisters &sr = (*rows)[i];
|
||||
if (sr.ri_->num_words == 0)
|
||||
if (sr.ri_->num_words == 0) {
|
||||
continue;
|
||||
}
|
||||
lefts.add(sr.lmargin_ + sr.lindent_, 1);
|
||||
rights.add(sr.rmargin_ + sr.rindent_, 1);
|
||||
}
|
||||
@ -1593,8 +1648,9 @@ void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows,
|
||||
|
||||
// Return the median inter-word space in rows[row_start, row_end).
|
||||
int InterwordSpace(const std::vector<RowScratchRegisters> &rows, int row_start, int row_end) {
|
||||
if (row_end < row_start + 1)
|
||||
if (row_end < row_start + 1) {
|
||||
return 1;
|
||||
}
|
||||
int word_height =
|
||||
(rows[row_start].ri_->lword_box.height() + rows[row_end - 1].ri_->lword_box.height()) / 2;
|
||||
int word_width =
|
||||
@ -1606,8 +1662,9 @@ int InterwordSpace(const std::vector<RowScratchRegisters> &rows, int row_start,
|
||||
}
|
||||
}
|
||||
int minimum_reasonable_space = word_height / 3;
|
||||
if (minimum_reasonable_space < 2)
|
||||
if (minimum_reasonable_space < 2) {
|
||||
minimum_reasonable_space = 2;
|
||||
}
|
||||
int median = spacing_widths.median();
|
||||
return (median > minimum_reasonable_space) ? median : minimum_reasonable_space;
|
||||
}
|
||||
@ -1616,8 +1673,9 @@ int InterwordSpace(const std::vector<RowScratchRegisters> &rows, int row_start,
|
||||
// the end of the before line (knowing which way the text is aligned and read).
|
||||
bool FirstWordWouldHaveFit(const RowScratchRegisters &before, const RowScratchRegisters &after,
|
||||
tesseract::ParagraphJustification justification) {
|
||||
if (before.ri_->num_words == 0 || after.ri_->num_words == 0)
|
||||
if (before.ri_->num_words == 0 || after.ri_->num_words == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (justification == JUSTIFICATION_UNKNOWN) {
|
||||
tprintf("Don't call FirstWordWouldHaveFit(r, s, JUSTIFICATION_UNKNOWN).\n");
|
||||
@ -1630,8 +1688,9 @@ bool FirstWordWouldHaveFit(const RowScratchRegisters &before, const RowScratchRe
|
||||
}
|
||||
available_space -= before.ri_->average_interword_space;
|
||||
|
||||
if (before.ri_->ltr)
|
||||
if (before.ri_->ltr) {
|
||||
return after.ri_->lword_box.width() < available_space;
|
||||
}
|
||||
return after.ri_->rword_box.width() < available_space;
|
||||
}
|
||||
|
||||
@ -1639,16 +1698,19 @@ bool FirstWordWouldHaveFit(const RowScratchRegisters &before, const RowScratchRe
|
||||
// the end of the before line (not knowing which way the text goes) in a left
|
||||
// or right alignment.
|
||||
bool FirstWordWouldHaveFit(const RowScratchRegisters &before, const RowScratchRegisters &after) {
|
||||
if (before.ri_->num_words == 0 || after.ri_->num_words == 0)
|
||||
if (before.ri_->num_words == 0 || after.ri_->num_words == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
int available_space = before.lindent_;
|
||||
if (before.rindent_ > available_space)
|
||||
if (before.rindent_ > available_space) {
|
||||
available_space = before.rindent_;
|
||||
}
|
||||
available_space -= before.ri_->average_interword_space;
|
||||
|
||||
if (before.ri_->ltr)
|
||||
if (before.ri_->ltr) {
|
||||
return after.ri_->lword_box.width() < available_space;
|
||||
}
|
||||
return after.ri_->rword_box.width() < available_space;
|
||||
}
|
||||
|
||||
@ -1682,8 +1744,9 @@ static ParagraphModel InternalParagraphModelByOutline(
|
||||
bool ltr = (ltr_line_count >= (end - start) / 2);
|
||||
|
||||
*consistent = true;
|
||||
if (!AcceptableRowArgs(0, 2, __func__, rows, start, end))
|
||||
if (!AcceptableRowArgs(0, 2, __func__, rows, start, end)) {
|
||||
return ParagraphModel();
|
||||
}
|
||||
|
||||
// Ensure the caller only passed us a region with a common rmargin and
|
||||
// lmargin.
|
||||
@ -1708,15 +1771,17 @@ static ParagraphModel InternalParagraphModelByOutline(
|
||||
int cdiff = cmax - cmin;
|
||||
if (rdiff > tolerance && ldiff > tolerance) {
|
||||
if (cdiff < tolerance * 2) {
|
||||
if (end - start < 3)
|
||||
if (end - start < 3) {
|
||||
return ParagraphModel();
|
||||
}
|
||||
return ParagraphModel(JUSTIFICATION_CENTER, 0, 0, 0, tolerance);
|
||||
}
|
||||
*consistent = false;
|
||||
return ParagraphModel();
|
||||
}
|
||||
if (end - start < 3) // Don't return a model for two line paras.
|
||||
if (end - start < 3) { // Don't return a model for two line paras.
|
||||
return ParagraphModel();
|
||||
}
|
||||
|
||||
// These booleans keep us from saying something is aligned left when the body
|
||||
// left variance is too large.
|
||||
@ -1737,14 +1802,16 @@ static ParagraphModel InternalParagraphModelByOutline(
|
||||
// If the other is obviously ragged, it can't be the one aligned to.
|
||||
// [Note the last line is included in this raggedness.]
|
||||
if (tolerance < rdiff) {
|
||||
if (body_admits_left_alignment && text_admits_left_alignment)
|
||||
if (body_admits_left_alignment && text_admits_left_alignment) {
|
||||
return left_model;
|
||||
}
|
||||
*consistent = false;
|
||||
return ParagraphModel();
|
||||
}
|
||||
if (tolerance < ldiff) {
|
||||
if (body_admits_right_alignment && text_admits_right_alignment)
|
||||
if (body_admits_right_alignment && text_admits_right_alignment) {
|
||||
return right_model;
|
||||
}
|
||||
*consistent = false;
|
||||
return ParagraphModel();
|
||||
}
|
||||
@ -1756,10 +1823,12 @@ static ParagraphModel InternalParagraphModelByOutline(
|
||||
int first_left = (*rows)[start].lindent_;
|
||||
int first_right = (*rows)[start].rindent_;
|
||||
|
||||
if (ltr && body_admits_left_alignment && (first_left < lmin || first_left > lmax))
|
||||
if (ltr && body_admits_left_alignment && (first_left < lmin || first_left > lmax)) {
|
||||
return left_model;
|
||||
if (!ltr && body_admits_right_alignment && (first_right < rmin || first_right > rmax))
|
||||
}
|
||||
if (!ltr && body_admits_right_alignment && (first_right < rmin || first_right > rmax)) {
|
||||
return right_model;
|
||||
}
|
||||
|
||||
*consistent = false;
|
||||
return ParagraphModel();
|
||||
@ -1785,13 +1854,16 @@ static ParagraphModel ParagraphModelByOutline(int debug_level,
|
||||
// Do rows[start, end) form a single instance of the given paragraph model?
|
||||
bool RowsFitModel(const std::vector<RowScratchRegisters> *rows, int start, int end,
|
||||
const ParagraphModel *model) {
|
||||
if (!AcceptableRowArgs(0, 1, __func__, rows, start, end))
|
||||
if (!AcceptableRowArgs(0, 1, __func__, rows, start, end)) {
|
||||
return false;
|
||||
if (!ValidFirstLine(rows, start, model))
|
||||
}
|
||||
if (!ValidFirstLine(rows, start, model)) {
|
||||
return false;
|
||||
}
|
||||
for (int i = start + 1; i < end; i++) {
|
||||
if (!ValidBodyLine(rows, i, model))
|
||||
if (!ValidBodyLine(rows, i, model)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -1872,15 +1944,18 @@ static void MarkStrongEvidence(std::vector<RowScratchRegisters> *rows, int row_s
|
||||
static void ModelStrongEvidence(int debug_level, std::vector<RowScratchRegisters> *rows,
|
||||
int row_start, int row_end, bool allow_flush_models,
|
||||
ParagraphTheory *theory) {
|
||||
if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end))
|
||||
if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end)) {
|
||||
return;
|
||||
}
|
||||
|
||||
int start = row_start;
|
||||
while (start < row_end) {
|
||||
while (start < row_end && (*rows)[start].GetLineType() != LT_START)
|
||||
while (start < row_end && (*rows)[start].GetLineType() != LT_START) {
|
||||
start++;
|
||||
if (start >= row_end - 1)
|
||||
}
|
||||
if (start >= row_end - 1) {
|
||||
break;
|
||||
}
|
||||
|
||||
int tolerance = Epsilon((*rows)[start + 1].ri_->average_interword_space);
|
||||
int end = start;
|
||||
@ -1960,8 +2035,9 @@ static void ModelStrongEvidence(int debug_level, std::vector<RowScratchRegisters
|
||||
// (4) Smear the paragraph models to cover surrounding text.
|
||||
static void StrongEvidenceClassify(int debug_level, std::vector<RowScratchRegisters> *rows,
|
||||
int row_start, int row_end, ParagraphTheory *theory) {
|
||||
if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end))
|
||||
if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (debug_level > 1) {
|
||||
tprintf("#############################################\n");
|
||||
@ -2015,8 +2091,9 @@ static void ConvertHypothesizedModelRunsToParagraphs(int debug_level,
|
||||
rows[start].NonNullHypotheses(&models);
|
||||
if (!models.empty()) {
|
||||
model = models[0];
|
||||
if (rows[start].GetLineType(model) != LT_BODY)
|
||||
if (rows[start].GetLineType(model) != LT_BODY) {
|
||||
single_line_paragraph = true;
|
||||
}
|
||||
}
|
||||
if (model && !single_line_paragraph) {
|
||||
// walk back looking for more body lines and then a start line.
|
||||
@ -2140,8 +2217,9 @@ static bool RowIsStranded(const std::vector<RowScratchRegisters> &rows, int row)
|
||||
continues = false;
|
||||
}
|
||||
}
|
||||
if (run_length > 2 || (!all_starts && run_length > 1))
|
||||
if (run_length > 2 || (!all_starts && run_length > 1)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -2187,10 +2265,11 @@ static void LeftoverSegments(const std::vector<RowScratchRegisters> &rows,
|
||||
}
|
||||
|
||||
if (needs_fixing) {
|
||||
if (!to_fix->empty() && to_fix->back().end == i - 1)
|
||||
if (!to_fix->empty() && to_fix->back().end == i - 1) {
|
||||
to_fix->back().end = i;
|
||||
else
|
||||
} else {
|
||||
to_fix->push_back(Interval(i, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Convert inclusive intervals to half-open intervals.
|
||||
@ -2328,8 +2407,9 @@ static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowI
|
||||
if (!pit.Empty(RIL_WORD)) {
|
||||
do {
|
||||
fake_text += "x";
|
||||
if (first_word)
|
||||
if (first_word) {
|
||||
info->lword_text += "x";
|
||||
}
|
||||
info->rword_text += "x";
|
||||
if (pit.IsAtFinalElement(RIL_WORD, RIL_SYMBOL) &&
|
||||
!pit.IsAtFinalElement(RIL_TEXTLINE, RIL_SYMBOL)) {
|
||||
@ -2339,8 +2419,9 @@ static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowI
|
||||
}
|
||||
} while (!pit.IsAtFinalElement(RIL_TEXTLINE, RIL_SYMBOL) && pit.Next(RIL_SYMBOL));
|
||||
}
|
||||
if (fake_text.size() == 0)
|
||||
if (fake_text.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int lspaces = info->pix_ldistance / info->average_interword_space;
|
||||
for (int i = 0; i < lspaces; i++) {
|
||||
@ -2358,19 +2439,23 @@ static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowI
|
||||
info->num_words = 0;
|
||||
do {
|
||||
if (word_res) {
|
||||
if (!lword)
|
||||
if (!lword) {
|
||||
lword = word_res;
|
||||
if (rword != word_res)
|
||||
}
|
||||
if (rword != word_res) {
|
||||
info->num_words++;
|
||||
}
|
||||
rword = word_res;
|
||||
}
|
||||
word_res = page_res_it.forward();
|
||||
} while (page_res_it.row() == this_row);
|
||||
|
||||
if (lword)
|
||||
if (lword) {
|
||||
info->lword_box = lword->word->bounding_box();
|
||||
if (rword)
|
||||
}
|
||||
if (rword) {
|
||||
info->rword_box = rword->word->bounding_box();
|
||||
}
|
||||
}
|
||||
|
||||
// Given a Tesseract Iterator pointing to a text line, fill in the paragraph
|
||||
@ -2414,14 +2499,17 @@ static void InitializeRowInfo(bool after_recognition, const MutableIterator &it,
|
||||
int trailing_ws_idx = strlen(text.get()); // strip trailing space
|
||||
while (trailing_ws_idx > 0 &&
|
||||
// isspace() only takes ASCII
|
||||
isascii(text[trailing_ws_idx - 1]) && isspace(text[trailing_ws_idx - 1]))
|
||||
isascii(text[trailing_ws_idx - 1]) && isspace(text[trailing_ws_idx - 1])) {
|
||||
trailing_ws_idx--;
|
||||
}
|
||||
if (trailing_ws_idx > 0) {
|
||||
int lspaces = info->pix_ldistance / info->average_interword_space;
|
||||
for (int i = 0; i < lspaces; i++)
|
||||
for (int i = 0; i < lspaces; i++) {
|
||||
info->text += ' ';
|
||||
for (int i = 0; i < trailing_ws_idx; i++)
|
||||
}
|
||||
for (int i = 0; i < trailing_ws_idx; i++) {
|
||||
info->text += text[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (info->text.size() == 0) {
|
||||
@ -2440,8 +2528,9 @@ static void InitializeRowInfo(bool after_recognition, const MutableIterator &it,
|
||||
werds.push_back(word_res);
|
||||
ltr += word_res->AnyLtrCharsInWord() ? 1 : 0;
|
||||
rtl += word_res->AnyRtlCharsInWord() ? 1 : 0;
|
||||
if (word_res->word->flag(W_REP_CHAR))
|
||||
if (word_res->word->flag(W_REP_CHAR)) {
|
||||
num_leaders++;
|
||||
}
|
||||
}
|
||||
word_res = page_res_it.forward();
|
||||
} while (page_res_it.row() == this_row);
|
||||
@ -2479,13 +2568,15 @@ void DetectParagraphs(int debug_level, bool after_text_recognition,
|
||||
// Convert the Tesseract structures to RowInfos
|
||||
// for the paragraph detection algorithm.
|
||||
MutableIterator row(*block_start);
|
||||
if (row.Empty(RIL_TEXTLINE))
|
||||
if (row.Empty(RIL_TEXTLINE)) {
|
||||
return; // end of input already.
|
||||
}
|
||||
|
||||
std::vector<RowInfo> row_infos;
|
||||
do {
|
||||
if (!row.PageResIt()->row())
|
||||
if (!row.PageResIt()->row()) {
|
||||
continue; // empty row.
|
||||
}
|
||||
row.PageResIt()->row()->row->set_para(nullptr);
|
||||
row_infos.emplace_back();
|
||||
RowInfo &ri = row_infos.back();
|
||||
@ -2498,10 +2589,12 @@ void DetectParagraphs(int debug_level, bool after_text_recognition,
|
||||
int min_lmargin = row_infos[0].pix_ldistance;
|
||||
int min_rmargin = row_infos[0].pix_rdistance;
|
||||
for (unsigned i = 1; i < row_infos.size(); i++) {
|
||||
if (row_infos[i].pix_ldistance < min_lmargin)
|
||||
if (row_infos[i].pix_ldistance < min_lmargin) {
|
||||
min_lmargin = row_infos[i].pix_ldistance;
|
||||
if (row_infos[i].pix_rdistance < min_rmargin)
|
||||
}
|
||||
if (row_infos[i].pix_rdistance < min_rmargin) {
|
||||
min_rmargin = row_infos[i].pix_rdistance;
|
||||
}
|
||||
}
|
||||
if (min_lmargin > 0 || min_rmargin > 0) {
|
||||
for (auto &row_info : row_infos) {
|
||||
@ -2524,8 +2617,9 @@ void DetectParagraphs(int debug_level, bool after_text_recognition,
|
||||
// Now stitch in the row_owners into the rows.
|
||||
row = *block_start;
|
||||
for (auto &row_owner : row_owners) {
|
||||
while (!row.PageResIt()->row())
|
||||
while (!row.PageResIt()->row()) {
|
||||
row.Next(RIL_TEXTLINE);
|
||||
}
|
||||
row.PageResIt()->row()->row->set_para(row_owner);
|
||||
row.Next(RIL_TEXTLINE);
|
||||
}
|
||||
|
@ -123,8 +123,9 @@ const char *ParamContent::GetName() const {
|
||||
return dIt->name_str();
|
||||
} else if (param_type_ == VT_STRING) {
|
||||
return sIt->name_str();
|
||||
} else
|
||||
} else {
|
||||
return "ERROR: ParamContent::GetName()";
|
||||
}
|
||||
}
|
||||
|
||||
// Getter for the description.
|
||||
@ -137,8 +138,9 @@ const char *ParamContent::GetDescription() const {
|
||||
return dIt->info_str();
|
||||
} else if (param_type_ == VT_STRING) {
|
||||
return sIt->info_str();
|
||||
} else
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Getter for the value.
|
||||
|
@ -189,10 +189,11 @@ static void pgeditor_msg( // message display
|
||||
class BlnEventHandler : public SVEventHandler {
|
||||
public:
|
||||
void Notify(const SVEvent *sv_event) override {
|
||||
if (sv_event->type == SVET_DESTROY)
|
||||
if (sv_event->type == SVET_DESTROY) {
|
||||
bln_word_window = nullptr;
|
||||
else if (sv_event->type == SVET_CLICK)
|
||||
} else if (sv_event->type == SVET_CLICK) {
|
||||
show_point(current_page_res, sv_event->x, sv_event->y);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -339,10 +340,12 @@ void Tesseract::do_re_display(bool (tesseract::Tesseract::*word_painter)(PAGE_RE
|
||||
PAGE_RES_IT pr_it(current_page_res);
|
||||
for (WERD_RES *word = pr_it.word(); word != nullptr; word = pr_it.forward()) {
|
||||
(this->*word_painter)(&pr_it);
|
||||
if (display_baselines && pr_it.row() != pr_it.prev_row())
|
||||
if (display_baselines && pr_it.row() != pr_it.prev_row()) {
|
||||
pr_it.row()->row->plot_baseline(image_win, ScrollView::GREEN);
|
||||
if (display_blocks && pr_it.block() != pr_it.prev_block())
|
||||
}
|
||||
if (display_blocks && pr_it.block() != pr_it.prev_block()) {
|
||||
pr_it.block()->block->pdblk.plot(image_win, block_count++, ScrollView::RED);
|
||||
}
|
||||
}
|
||||
image_win->Update();
|
||||
}
|
||||
@ -357,8 +360,9 @@ void Tesseract::do_re_display(bool (tesseract::Tesseract::*word_painter)(PAGE_RE
|
||||
|
||||
void Tesseract::pgeditor_main(int width, int height, PAGE_RES *page_res) {
|
||||
current_page_res = page_res;
|
||||
if (current_page_res->block_res_list.empty())
|
||||
if (current_page_res->block_res_list.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
recog_done = false;
|
||||
stillRunning = true;
|
||||
@ -442,46 +446,52 @@ bool Tesseract::process_cmd_win_event( // UI command semantics
|
||||
delete[] parameter;
|
||||
break;
|
||||
case BOUNDING_BOX_CMD_EVENT:
|
||||
if (new_value[0] == 'T')
|
||||
if (new_value[0] == 'T') {
|
||||
word_display_mode.set(DF_BOX);
|
||||
else
|
||||
} else {
|
||||
word_display_mode.reset(DF_BOX);
|
||||
}
|
||||
mode = CHANGE_DISP_CMD_EVENT;
|
||||
break;
|
||||
case BLAMER_CMD_EVENT:
|
||||
if (new_value[0] == 'T')
|
||||
if (new_value[0] == 'T') {
|
||||
word_display_mode.set(DF_BLAMER);
|
||||
else
|
||||
} else {
|
||||
word_display_mode.reset(DF_BLAMER);
|
||||
}
|
||||
do_re_display(&tesseract::Tesseract::word_display);
|
||||
mode = CHANGE_DISP_CMD_EVENT;
|
||||
break;
|
||||
case CORRECT_TEXT_CMD_EVENT:
|
||||
if (new_value[0] == 'T')
|
||||
if (new_value[0] == 'T') {
|
||||
word_display_mode.set(DF_TEXT);
|
||||
else
|
||||
} else {
|
||||
word_display_mode.reset(DF_TEXT);
|
||||
}
|
||||
mode = CHANGE_DISP_CMD_EVENT;
|
||||
break;
|
||||
case POLYGONAL_CMD_EVENT:
|
||||
if (new_value[0] == 'T')
|
||||
if (new_value[0] == 'T') {
|
||||
word_display_mode.set(DF_POLYGONAL);
|
||||
else
|
||||
} else {
|
||||
word_display_mode.reset(DF_POLYGONAL);
|
||||
}
|
||||
mode = CHANGE_DISP_CMD_EVENT;
|
||||
break;
|
||||
case BL_NORM_CMD_EVENT:
|
||||
if (new_value[0] == 'T')
|
||||
if (new_value[0] == 'T') {
|
||||
word_display_mode.set(DF_BN_POLYGONAL);
|
||||
else
|
||||
} else {
|
||||
word_display_mode.reset(DF_BN_POLYGONAL);
|
||||
}
|
||||
mode = CHANGE_DISP_CMD_EVENT;
|
||||
break;
|
||||
case BITMAP_CMD_EVENT:
|
||||
if (new_value[0] == 'T')
|
||||
if (new_value[0] == 'T') {
|
||||
word_display_mode.set(DF_EDGE_STEP);
|
||||
else
|
||||
} else {
|
||||
word_display_mode.reset(DF_EDGE_STEP);
|
||||
}
|
||||
mode = CHANGE_DISP_CMD_EVENT;
|
||||
break;
|
||||
case UNIFORM_DISP_CMD_EVENT:
|
||||
@ -574,8 +584,9 @@ void Tesseract::process_image_event( // action in image win
|
||||
if (event.type == SVET_SELECTION) {
|
||||
down.set_x(event.x + event.x_size);
|
||||
down.set_y(event.y + event.y_size);
|
||||
if (mode == SHOW_POINT_CMD_EVENT)
|
||||
if (mode == SHOW_POINT_CMD_EVENT) {
|
||||
show_point(current_page_res, event.x, event.y);
|
||||
}
|
||||
}
|
||||
|
||||
up.set_x(event.x);
|
||||
@ -707,43 +718,52 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
|
||||
BoxWord *box_word = word_res->box_word;
|
||||
WERD_CHOICE *best_choice = word_res->best_choice;
|
||||
int length = box_word->length();
|
||||
if (word_res->fontinfo == nullptr)
|
||||
if (word_res->fontinfo == nullptr) {
|
||||
return false;
|
||||
}
|
||||
const FontInfo &font_info = *word_res->fontinfo;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
ScrollView::Color color = ScrollView::GREEN;
|
||||
switch (color_mode) {
|
||||
case CM_SUBSCRIPT:
|
||||
if (best_choice->BlobPosition(i) == SP_SUBSCRIPT)
|
||||
if (best_choice->BlobPosition(i) == SP_SUBSCRIPT) {
|
||||
color = ScrollView::RED;
|
||||
}
|
||||
break;
|
||||
case CM_SUPERSCRIPT:
|
||||
if (best_choice->BlobPosition(i) == SP_SUPERSCRIPT)
|
||||
if (best_choice->BlobPosition(i) == SP_SUPERSCRIPT) {
|
||||
color = ScrollView::RED;
|
||||
}
|
||||
break;
|
||||
case CM_ITALIC:
|
||||
if (font_info.is_italic())
|
||||
if (font_info.is_italic()) {
|
||||
color = ScrollView::RED;
|
||||
}
|
||||
break;
|
||||
case CM_BOLD:
|
||||
if (font_info.is_bold())
|
||||
if (font_info.is_bold()) {
|
||||
color = ScrollView::RED;
|
||||
}
|
||||
break;
|
||||
case CM_FIXEDPITCH:
|
||||
if (font_info.is_fixed_pitch())
|
||||
if (font_info.is_fixed_pitch()) {
|
||||
color = ScrollView::RED;
|
||||
}
|
||||
break;
|
||||
case CM_SERIF:
|
||||
if (font_info.is_serif())
|
||||
if (font_info.is_serif()) {
|
||||
color = ScrollView::RED;
|
||||
}
|
||||
break;
|
||||
case CM_SMALLCAPS:
|
||||
if (word_res->small_caps)
|
||||
if (word_res->small_caps) {
|
||||
color = ScrollView::RED;
|
||||
}
|
||||
break;
|
||||
case CM_DROPCAPS:
|
||||
if (best_choice->BlobPosition(i) == SP_DROPCAP)
|
||||
if (best_choice->BlobPosition(i) == SP_DROPCAP) {
|
||||
color = ScrollView::RED;
|
||||
}
|
||||
break;
|
||||
// TODO(rays) underline is currently completely unsupported.
|
||||
case CM_UNDERLINE:
|
||||
@ -773,8 +793,9 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
|
||||
image_win->Pen(c);
|
||||
// cblob iterator
|
||||
C_BLOB_IT c_it(word->cblob_list());
|
||||
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward())
|
||||
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
|
||||
c_it.data()->bounding_box().plot(image_win);
|
||||
}
|
||||
displayed_something = true;
|
||||
}
|
||||
|
||||
@ -829,8 +850,9 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
|
||||
image_win->Pen(ScrollView::RED);
|
||||
word_height = word_bb.height();
|
||||
int text_height = 0.50 * word_height;
|
||||
if (text_height > 20)
|
||||
if (text_height > 20) {
|
||||
text_height = 20;
|
||||
}
|
||||
image_win->TextAttributes("Arial", text_height, false, false, false);
|
||||
shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f;
|
||||
image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height, text.c_str());
|
||||
@ -842,10 +864,11 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
|
||||
displayed_something = true;
|
||||
}
|
||||
|
||||
if (!displayed_something) // display BBox anyway
|
||||
if (!displayed_something) { // display BBox anyway
|
||||
word->bounding_box().plot(image_win,
|
||||
static_cast<ScrollView::Color>((int32_t)editor_image_word_bb_color),
|
||||
static_cast<ScrollView::Color>((int32_t)editor_image_word_bb_color));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace tesseract
|
||||
@ -912,14 +935,16 @@ void Tesseract::blob_feature_display(PAGE_RES *page_res, const TBOX &selection_b
|
||||
// Display baseline features.
|
||||
ScrollView *bl_win = CreateFeatureSpaceWindow("BL Features", 512, 0);
|
||||
ClearFeatureSpaceWindow(baseline, bl_win);
|
||||
for (auto &bl_feature : bl_features)
|
||||
for (auto &bl_feature : bl_features) {
|
||||
RenderIntFeature(bl_win, &bl_feature, ScrollView::GREEN);
|
||||
}
|
||||
bl_win->Update();
|
||||
// Display cn features.
|
||||
ScrollView *cn_win = CreateFeatureSpaceWindow("CN Features", 512, 0);
|
||||
ClearFeatureSpaceWindow(character, cn_win);
|
||||
for (auto &cn_feature : cn_features)
|
||||
for (auto &cn_feature : cn_features) {
|
||||
RenderIntFeature(cn_win, &cn_feature, ScrollView::GREEN);
|
||||
}
|
||||
cn_win->Update();
|
||||
|
||||
it->DeleteCurrentWord();
|
||||
|
@ -43,8 +43,9 @@ FILE *Tesseract::init_recog_training(const char *filename) {
|
||||
|
||||
std::string output_fname = filename;
|
||||
const char *lastdot = strrchr(output_fname.c_str(), '.');
|
||||
if (lastdot != nullptr)
|
||||
if (lastdot != nullptr) {
|
||||
output_fname[lastdot - output_fname.c_str()] = '\0';
|
||||
}
|
||||
output_fname += ".txt";
|
||||
FILE *output_file = fopen(output_fname.c_str(), "a+");
|
||||
if (output_file == nullptr) {
|
||||
@ -56,8 +57,9 @@ FILE *Tesseract::init_recog_training(const char *filename) {
|
||||
|
||||
// Copies the bounding box from page_res_it->word() to the given TBOX.
|
||||
static bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) {
|
||||
while (page_res_it->block() != nullptr && page_res_it->word() == nullptr)
|
||||
while (page_res_it->block() != nullptr && page_res_it->word() == nullptr) {
|
||||
page_res_it->forward();
|
||||
}
|
||||
|
||||
if (page_res_it->word() != nullptr) {
|
||||
*tbox = page_res_it->word()->word->bounding_box();
|
||||
@ -85,8 +87,9 @@ void Tesseract::recog_training_segmented(const char *filename, PAGE_RES *page_re
|
||||
volatile ETEXT_DESC *monitor, FILE *output_file) {
|
||||
std::string box_fname = filename;
|
||||
const char *lastdot = strrchr(box_fname.c_str(), '.');
|
||||
if (lastdot != nullptr)
|
||||
if (lastdot != nullptr) {
|
||||
box_fname[lastdot - box_fname.c_str()] = '\0';
|
||||
}
|
||||
box_fname += ".box";
|
||||
// ReadNextBox() will close box_file
|
||||
FILE *box_file = fopen(box_fname.c_str(), "r");
|
||||
@ -142,8 +145,9 @@ void Tesseract::recog_training_segmented(const char *filename, PAGE_RES *page_re
|
||||
int total_words = 0;
|
||||
for (page_res_it.restart_page(); page_res_it.block() != nullptr; page_res_it.forward()) {
|
||||
if (page_res_it.word()) {
|
||||
if (page_res_it.word()->uch_set == nullptr)
|
||||
if (page_res_it.word()->uch_set == nullptr) {
|
||||
page_res_it.word()->SetupFake(unicharset);
|
||||
}
|
||||
total_words++;
|
||||
}
|
||||
}
|
||||
@ -164,8 +168,9 @@ static void PrintPath(int length, const BLOB_CHOICE **blob_choices, const UNICHA
|
||||
const BLOB_CHOICE *blob_choice = blob_choices[i];
|
||||
fprintf(output_file, "%s", unicharset.id_to_unichar(blob_choice->unichar_id()));
|
||||
rating += blob_choice->rating();
|
||||
if (certainty > blob_choice->certainty())
|
||||
if (certainty > blob_choice->certainty()) {
|
||||
certainty = blob_choice->certainty();
|
||||
}
|
||||
}
|
||||
fprintf(output_file, "\t%s\t%.4f\t%.4f\n", label, rating, certainty);
|
||||
}
|
||||
|
@ -68,14 +68,16 @@ void Tesseract::set_done(WERD_RES *word, int16_t pass) {
|
||||
word->best_choice->permuter() == USER_DAWG_PERM;
|
||||
if (word->done && (pass == 1) && (!word_from_dict || word_is_ambig) &&
|
||||
one_ell_conflict(word, false)) {
|
||||
if (tessedit_rejection_debug)
|
||||
if (tessedit_rejection_debug) {
|
||||
tprintf("one_ell_conflict detected\n");
|
||||
}
|
||||
word->done = false;
|
||||
}
|
||||
if (word->done &&
|
||||
((!word_from_dict && word->best_choice->permuter() != NUMBER_PERM) || word_is_ambig)) {
|
||||
if (tessedit_rejection_debug)
|
||||
if (tessedit_rejection_debug) {
|
||||
tprintf("non-dict or ambig word detected\n");
|
||||
}
|
||||
word->done = false;
|
||||
}
|
||||
if (tessedit_rejection_debug) {
|
||||
@ -104,8 +106,9 @@ void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) {
|
||||
0: Rays original heuristic - the baseline
|
||||
*/
|
||||
if (tessedit_reject_mode == 0) {
|
||||
if (!word->done)
|
||||
if (!word->done) {
|
||||
reject_poor_matches(word);
|
||||
}
|
||||
} else if (tessedit_reject_mode == 5) {
|
||||
/*
|
||||
5: Reject I/1/l from words where there is no strong contextual confirmation;
|
||||
@ -122,12 +125,14 @@ void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) {
|
||||
mechanism can be turned on or off independently. This works WITHOUT
|
||||
affecting the done flag setting.
|
||||
*/
|
||||
if (rej_use_tess_accepted && !word->tess_accepted)
|
||||
if (rej_use_tess_accepted && !word->tess_accepted) {
|
||||
word->reject_map.rej_word_not_tess_accepted();
|
||||
}
|
||||
|
||||
if (rej_use_tess_blanks &&
|
||||
(strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr))
|
||||
(strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) {
|
||||
word->reject_map.rej_word_contains_blanks();
|
||||
}
|
||||
|
||||
WERD_CHOICE *best_choice = word->best_choice;
|
||||
if (rej_use_good_perm) {
|
||||
@ -144,8 +149,9 @@ void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) {
|
||||
offset += best_choice->unichar_lengths()[i++]) {
|
||||
if (word->reject_map[i].accepted() &&
|
||||
word->uch_set->get_isalpha(best_choice->unichar_string().c_str() + offset,
|
||||
best_choice->unichar_lengths()[i]))
|
||||
best_choice->unichar_lengths()[i])) {
|
||||
word->reject_map[i].setrej_bad_permuter();
|
||||
}
|
||||
// rej alpha
|
||||
}
|
||||
}
|
||||
@ -160,8 +166,9 @@ void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) {
|
||||
ASSERT_HOST("Fatal error encountered!" == nullptr);
|
||||
}
|
||||
|
||||
if (tessedit_image_border > -1)
|
||||
if (tessedit_image_border > -1) {
|
||||
reject_edge_blobs(word);
|
||||
}
|
||||
|
||||
check_debug_pt(word, 10);
|
||||
if (tessedit_rejection_debug) {
|
||||
@ -181,9 +188,10 @@ void reject_blanks(WERD_RES *word) {
|
||||
|
||||
for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
|
||||
offset += word->best_choice->unichar_lengths()[i], i += 1) {
|
||||
if (word->best_choice->unichar_string()[offset] == ' ')
|
||||
if (word->best_choice->unichar_string()[offset] == ' ') {
|
||||
// rej unrecognised blobs
|
||||
word->reject_map[i].setrej_tess_failure();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -203,10 +211,11 @@ void Tesseract::reject_I_1_L(WERD_RES *word) {
|
||||
void reject_poor_matches(WERD_RES *word) {
|
||||
float threshold = compute_reject_threshold(word->best_choice);
|
||||
for (int i = 0; i < word->best_choice->length(); ++i) {
|
||||
if (word->best_choice->unichar_id(i) == UNICHAR_SPACE)
|
||||
if (word->best_choice->unichar_id(i) == UNICHAR_SPACE) {
|
||||
word->reject_map[i].setrej_tess_failure();
|
||||
else if (word->best_choice->certainty(i) < threshold)
|
||||
} else if (word->best_choice->certainty(i) < threshold) {
|
||||
word->reject_map[i].setrej_poor_match();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -301,8 +310,9 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
|
||||
If there are no occurrences of the conflict set characters then the word
|
||||
is OK.
|
||||
*/
|
||||
if (strpbrk(word, conflict_set_I_l_1.c_str()) == nullptr)
|
||||
if (strpbrk(word, conflict_set_I_l_1.c_str()) == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
There is a conflict if there are NO other (confirmed) alphanumerics apart
|
||||
@ -310,13 +320,15 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
|
||||
*/
|
||||
|
||||
for (i = 0, offset = 0, non_conflict_set_char = false; (i < word_len) && !non_conflict_set_char;
|
||||
offset += lengths[i++])
|
||||
offset += lengths[i++]) {
|
||||
non_conflict_set_char = (word_res->uch_set->get_isalpha(word + offset, lengths[i]) ||
|
||||
word_res->uch_set->get_isdigit(word + offset, lengths[i])) &&
|
||||
!conflict_set_I_l_1.contains(word[offset]);
|
||||
}
|
||||
if (!non_conflict_set_char) {
|
||||
if (update_map)
|
||||
if (update_map) {
|
||||
reject_I_1_L(word_res);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -341,8 +353,9 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
|
||||
if (safe_dict_word(word_res) > 0) {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
|
||||
if (update_map)
|
||||
if (update_map) {
|
||||
word_res->reject_map[first_alphanum_index_].setrej_1Il_conflict();
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
|
||||
@ -354,8 +367,9 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
|
||||
if (safe_dict_word(word_res) > 0) {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
|
||||
if (update_map)
|
||||
if (update_map) {
|
||||
word_res->reject_map[first_alphanum_index_].setrej_1Il_conflict();
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
|
||||
@ -380,16 +394,18 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
|
||||
first_alphanum_offset_ = first_alphanum_offset(word, lengths);
|
||||
if (lengths[first_alphanum_index_] == 1 && word[first_alphanum_offset_] == 'l') {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
|
||||
if (safe_dict_word(word_res) > 0)
|
||||
if (safe_dict_word(word_res) > 0) {
|
||||
return false;
|
||||
else
|
||||
} else {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
|
||||
}
|
||||
} else if (lengths[first_alphanum_index_] == 1 && word[first_alphanum_offset_] == 'I') {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
|
||||
if (safe_dict_word(word_res) > 0)
|
||||
if (safe_dict_word(word_res) > 0) {
|
||||
return false;
|
||||
else
|
||||
} else {
|
||||
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
|
||||
}
|
||||
}
|
||||
/*
|
||||
For strings containing digits:
|
||||
@ -407,8 +423,9 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
|
||||
offset += word_res->best_choice->unichar_lengths()[i++]) {
|
||||
if ((!allow_1s || (word[offset] != '1')) &&
|
||||
conflict_set_I_l_1.contains(word[offset])) {
|
||||
if (update_map)
|
||||
if (update_map) {
|
||||
word_res->reject_map[i].setrej_1Il_conflict();
|
||||
}
|
||||
conflict = true;
|
||||
}
|
||||
}
|
||||
@ -423,16 +440,19 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
|
||||
first_alphanum_index_ = first_alphanum_index(word, lengths);
|
||||
first_alphanum_offset_ = first_alphanum_offset(word, lengths);
|
||||
if (conflict_set_I_l_1.contains(word[first_alphanum_offset_])) {
|
||||
if (update_map)
|
||||
if (update_map) {
|
||||
word_res->reject_map[first_alphanum_index_].setrej_1Il_conflict();
|
||||
}
|
||||
return true;
|
||||
} else
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else if (word_type == AC_UPPER_CASE) {
|
||||
return false;
|
||||
} else {
|
||||
if (update_map)
|
||||
if (update_map) {
|
||||
reject_I_1_L(word_res);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -443,8 +463,9 @@ int16_t Tesseract::first_alphanum_index(const char *word, const char *word_lengt
|
||||
|
||||
for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
|
||||
if (unicharset.get_isalpha(word + offset, word_lengths[i]) ||
|
||||
unicharset.get_isdigit(word + offset, word_lengths[i]))
|
||||
unicharset.get_isdigit(word + offset, word_lengths[i])) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@ -455,8 +476,9 @@ int16_t Tesseract::first_alphanum_offset(const char *word, const char *word_leng
|
||||
|
||||
for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
|
||||
if (unicharset.get_isalpha(word + offset, word_lengths[i]) ||
|
||||
unicharset.get_isdigit(word + offset, word_lengths[i]))
|
||||
unicharset.get_isdigit(word + offset, word_lengths[i])) {
|
||||
return offset;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@ -467,8 +489,9 @@ int16_t Tesseract::alpha_count(const char *word, const char *word_lengths) {
|
||||
int16_t count = 0;
|
||||
|
||||
for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
|
||||
if (unicharset.get_isalpha(word + offset, word_lengths[i]))
|
||||
if (unicharset.get_isalpha(word + offset, word_lengths[i])) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
@ -479,8 +502,9 @@ bool Tesseract::word_contains_non_1_digit(const char *word, const char *word_len
|
||||
|
||||
for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
|
||||
if (unicharset.get_isdigit(word + offset, word_lengths[i]) &&
|
||||
(word_lengths[i] != 1 || word[offset] != '1'))
|
||||
(word_lengths[i] != 1 || word[offset] != '1')) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -503,17 +527,20 @@ void Tesseract::dont_allow_1Il(WERD_RES *word) {
|
||||
accepted_1Il = true;
|
||||
} else {
|
||||
if (word->uch_set->get_isalpha(s + offset, lengths[i]) ||
|
||||
word->uch_set->get_isdigit(s + offset, lengths[i]))
|
||||
word->uch_set->get_isdigit(s + offset, lengths[i])) {
|
||||
return; // >=1 non 1Il ch accepted
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!accepted_1Il)
|
||||
if (!accepted_1Il) {
|
||||
return; // Nothing to worry about
|
||||
}
|
||||
|
||||
for (i = 0, offset = 0; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) {
|
||||
if (conflict_set_I_l_1.contains(s[offset]) && word->reject_map[i].accepted())
|
||||
if (conflict_set_I_l_1.contains(s[offset]) && word->reject_map[i].accepted()) {
|
||||
word->reject_map[i].setrej_postNN_1Il();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -535,33 +562,38 @@ void Tesseract::reject_mostly_rejects(WERD_RES *word) {
|
||||
/* Reject the whole of the word if the fraction of rejects exceeds a limit */
|
||||
|
||||
if (static_cast<float>(word->reject_map.reject_count()) / word->reject_map.length() >=
|
||||
rej_whole_of_mostly_reject_word_fract)
|
||||
rej_whole_of_mostly_reject_word_fract) {
|
||||
word->reject_map.rej_word_mostly_rej();
|
||||
}
|
||||
}
|
||||
|
||||
bool Tesseract::repeated_nonalphanum_wd(WERD_RES *word, ROW *row) {
|
||||
int16_t char_quality;
|
||||
int16_t accepted_char_quality;
|
||||
|
||||
if (word->best_choice->unichar_lengths().length() <= 1)
|
||||
if (word->best_choice->unichar_lengths().length() <= 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ok_repeated_ch_non_alphanum_wds.contains(word->best_choice->unichar_string()[0]))
|
||||
if (!ok_repeated_ch_non_alphanum_wds.contains(word->best_choice->unichar_string()[0])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
UNICHAR_ID uch_id = word->best_choice->unichar_id(0);
|
||||
for (int i = 1; i < word->best_choice->length(); ++i) {
|
||||
if (word->best_choice->unichar_id(i) != uch_id)
|
||||
if (word->best_choice->unichar_id(i) != uch_id) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
word_char_quality(word, &char_quality, &accepted_char_quality);
|
||||
|
||||
if ((word->best_choice->unichar_lengths().length() == char_quality) &&
|
||||
(char_quality == accepted_char_quality))
|
||||
(char_quality == accepted_char_quality)) {
|
||||
return true;
|
||||
else
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t Tesseract::safe_dict_word(const WERD_RES *werd_res) {
|
||||
@ -581,18 +613,20 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) {
|
||||
TBOX out_box;
|
||||
float aspect_ratio;
|
||||
|
||||
if (tessedit_lower_flip_hyphen <= 1)
|
||||
if (tessedit_lower_flip_hyphen <= 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
int num_blobs = word_res->rebuild_word->NumBlobs();
|
||||
UNICHAR_ID unichar_dash = word_res->uch_set->unichar_to_id("-");
|
||||
for (i = 0; i < best_choice->length() && i < num_blobs; ++i) {
|
||||
TBLOB *blob = word_res->rebuild_word->blobs[i];
|
||||
out_box = blob->bounding_box();
|
||||
if (i + 1 == num_blobs)
|
||||
if (i + 1 == num_blobs) {
|
||||
next_left = 9999;
|
||||
else
|
||||
} else {
|
||||
next_left = word_res->rebuild_word->blobs[i + 1]->bounding_box().left();
|
||||
}
|
||||
// Don't touch small or touching blobs - it is too dangerous.
|
||||
if ((out_box.width() > 8 * word_res->denorm.x_scale()) && (out_box.left() > prev_right) &&
|
||||
(out_box.right() < next_left)) {
|
||||
@ -603,20 +637,24 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) {
|
||||
word_res->uch_set->get_enabled(unichar_dash)) {
|
||||
/* Certain HYPHEN */
|
||||
best_choice->set_unichar_id(unichar_dash, i);
|
||||
if (word_res->reject_map[i].rejected())
|
||||
if (word_res->reject_map[i].rejected()) {
|
||||
word_res->reject_map[i].setrej_hyphen_accept();
|
||||
}
|
||||
}
|
||||
if ((aspect_ratio > tessedit_lower_flip_hyphen) && word_res->reject_map[i].accepted())
|
||||
if ((aspect_ratio > tessedit_lower_flip_hyphen) && word_res->reject_map[i].accepted()) {
|
||||
// Suspected HYPHEN
|
||||
word_res->reject_map[i].setrej_hyphen();
|
||||
}
|
||||
} else if (best_choice->unichar_id(i) == unichar_dash) {
|
||||
if ((aspect_ratio >= tessedit_upper_flip_hyphen) && (word_res->reject_map[i].rejected()))
|
||||
if ((aspect_ratio >= tessedit_upper_flip_hyphen) && (word_res->reject_map[i].rejected())) {
|
||||
word_res->reject_map[i].setrej_hyphen_accept();
|
||||
}
|
||||
// Certain HYPHEN
|
||||
|
||||
if ((aspect_ratio <= tessedit_lower_flip_hyphen) && (word_res->reject_map[i].accepted()))
|
||||
if ((aspect_ratio <= tessedit_lower_flip_hyphen) && (word_res->reject_map[i].accepted())) {
|
||||
// Suspected HYPHEN
|
||||
word_res->reject_map[i].setrej_hyphen();
|
||||
}
|
||||
}
|
||||
}
|
||||
prev_right = out_box.right();
|
||||
@ -631,8 +669,9 @@ void Tesseract::flip_0O(WERD_RES *word_res) {
|
||||
int i;
|
||||
TBOX out_box;
|
||||
|
||||
if (!tessedit_flip_0O)
|
||||
if (!tessedit_flip_0O) {
|
||||
return;
|
||||
}
|
||||
|
||||
int num_blobs = word_res->rebuild_word->NumBlobs();
|
||||
for (i = 0; i < best_choice->length() && i < num_blobs; ++i) {
|
||||
@ -641,8 +680,9 @@ void Tesseract::flip_0O(WERD_RES *word_res) {
|
||||
word_res->uch_set->get_isdigit(best_choice->unichar_id(i))) {
|
||||
out_box = blob->bounding_box();
|
||||
if ((out_box.top() < kBlnBaselineOffset + kBlnXHeight) ||
|
||||
(out_box.bottom() > kBlnBaselineOffset + kBlnXHeight / 4))
|
||||
(out_box.bottom() > kBlnBaselineOffset + kBlnXHeight / 4)) {
|
||||
return; // Beware words with sub/superscripts
|
||||
}
|
||||
}
|
||||
}
|
||||
UNICHAR_ID unichar_0 = word_res->uch_set->unichar_to_id("0");
|
||||
|
@ -41,8 +41,9 @@ ResultIterator::ResultIterator(const LTRResultIterator &resit) : LTRResultIterat
|
||||
|
||||
auto *p = ParamUtils::FindParam<BoolParam>(
|
||||
"preserve_interword_spaces", GlobalParams()->bool_params, tesseract_->params()->bool_params);
|
||||
if (p != nullptr)
|
||||
if (p != nullptr) {
|
||||
preserve_interword_spaces_ = (bool)(*p);
|
||||
}
|
||||
|
||||
current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
|
||||
MoveToLogicalStartOfTextline();
|
||||
@ -57,8 +58,9 @@ bool ResultIterator::ParagraphIsLtr() const {
|
||||
}
|
||||
|
||||
bool ResultIterator::CurrentParagraphIsLtr() const {
|
||||
if (!it_->word())
|
||||
if (!it_->word()) {
|
||||
return true; // doesn't matter.
|
||||
}
|
||||
LTRResultIterator it(*this);
|
||||
it.RestartParagraph();
|
||||
// Try to figure out the ltr-ness of the paragraph. The rules below
|
||||
@ -95,17 +97,20 @@ bool ResultIterator::CurrentParagraphIsLtr() const {
|
||||
num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0;
|
||||
num_ltr += rightmost_ltr ? 1 : 0;
|
||||
}
|
||||
if (leftmost_rtl)
|
||||
if (leftmost_rtl) {
|
||||
return false;
|
||||
if (rightmost_ltr)
|
||||
}
|
||||
if (rightmost_ltr) {
|
||||
return true;
|
||||
}
|
||||
// First line is ambiguous. Take statistics on the whole paragraph.
|
||||
if (!it.Empty(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA))
|
||||
if (!it.Empty(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA)) {
|
||||
do {
|
||||
StrongScriptDirection dir = it.WordDirection();
|
||||
num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0;
|
||||
num_ltr += (dir == DIR_LEFT_TO_RIGHT) ? 1 : 0;
|
||||
} while (it.Next(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA));
|
||||
}
|
||||
return num_ltr >= num_rtl;
|
||||
}
|
||||
|
||||
@ -116,12 +121,14 @@ const int ResultIterator::kComplexWord = -3;
|
||||
void ResultIterator::CalculateBlobOrder(std::vector<int> *blob_indices) const {
|
||||
bool context_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_;
|
||||
blob_indices->clear();
|
||||
if (Empty(RIL_WORD))
|
||||
if (Empty(RIL_WORD)) {
|
||||
return;
|
||||
}
|
||||
if (context_is_ltr || it_->word()->UnicharsInReadingOrder()) {
|
||||
// Easy! just return the blobs in order;
|
||||
for (int i = 0; i < word_length_; i++)
|
||||
for (int i = 0; i < word_length_; i++) {
|
||||
blob_indices->push_back(i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@ -159,8 +166,9 @@ void ResultIterator::CalculateBlobOrder(std::vector<int> *blob_indices) const {
|
||||
}
|
||||
if (j < word_length_ && letter_types[j] == U_EURO_NUM) {
|
||||
// The sequence [i..j] should be converted to all European Numbers.
|
||||
for (int k = i; k < j; k++)
|
||||
for (int k = i; k < j; k++) {
|
||||
letter_types[k] = U_EURO_NUM;
|
||||
}
|
||||
}
|
||||
j = i - 1;
|
||||
while (j > -1 && letter_types[j] == U_EURO_NUM_TERM) {
|
||||
@ -168,8 +176,9 @@ void ResultIterator::CalculateBlobOrder(std::vector<int> *blob_indices) const {
|
||||
}
|
||||
if (j > -1 && letter_types[j] == U_EURO_NUM) {
|
||||
// The sequence [j..i] should be converted to all European Numbers.
|
||||
for (int k = j; k <= i; k++)
|
||||
for (int k = j; k <= i; k++) {
|
||||
letter_types[k] = U_EURO_NUM;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -192,8 +201,9 @@ void ResultIterator::CalculateBlobOrder(std::vector<int> *blob_indices) const {
|
||||
}
|
||||
}
|
||||
// [i..last_good] is the L sequence
|
||||
for (int k = i; k <= last_good; k++)
|
||||
for (int k = i; k <= last_good; k++) {
|
||||
letter_types[k] = U_LTR;
|
||||
}
|
||||
i = last_good + 1;
|
||||
} else {
|
||||
letter_types[i] = U_RTL;
|
||||
@ -212,8 +222,9 @@ void ResultIterator::CalculateBlobOrder(std::vector<int> *blob_indices) const {
|
||||
for (; j >= 0 && letter_types[j] != U_RTL; j--) {
|
||||
} // pass
|
||||
// Now (j, i] is LTR
|
||||
for (int k = j + 1; k <= i; k++)
|
||||
for (int k = j + 1; k <= i; k++) {
|
||||
blob_indices->push_back(k);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
@ -260,8 +271,9 @@ void ResultIterator::CalculateTextlineOrder(bool paragraph_is_ltr, const LTRResu
|
||||
// A LTRResultIterator goes strictly left-to-right word order.
|
||||
LTRResultIterator ltr_it(resit);
|
||||
ltr_it.RestartRow();
|
||||
if (ltr_it.Empty(RIL_WORD))
|
||||
if (ltr_it.Empty(RIL_WORD)) {
|
||||
return;
|
||||
}
|
||||
do {
|
||||
directions->push_back(ltr_it.WordDirection());
|
||||
} while (ltr_it.Next(RIL_WORD) && !ltr_it.IsAtBeginningOf(RIL_TEXTLINE));
|
||||
@ -274,8 +286,9 @@ void ResultIterator::CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
const std::vector<StrongScriptDirection> &word_dirs,
|
||||
std::vector<int> *reading_order) {
|
||||
reading_order->clear();
|
||||
if (word_dirs.size() == 0)
|
||||
if (word_dirs.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Take all of the runs of minor direction words and insert them
|
||||
// in reverse order.
|
||||
@ -305,14 +318,16 @@ void ResultIterator::CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
// Scan for the beginning of the minor left-to-right run.
|
||||
int left = neutral_end;
|
||||
for (int i = left; i >= 0 && word_dirs[i] != DIR_RIGHT_TO_LEFT; i--) {
|
||||
if (word_dirs[i] == DIR_LEFT_TO_RIGHT)
|
||||
if (word_dirs[i] == DIR_LEFT_TO_RIGHT) {
|
||||
left = i;
|
||||
}
|
||||
}
|
||||
reading_order->push_back(kMinorRunStart);
|
||||
for (int i = left; i < word_dirs.size(); i++) {
|
||||
reading_order->push_back(i);
|
||||
if (word_dirs[i] == DIR_MIX)
|
||||
if (word_dirs[i] == DIR_MIX) {
|
||||
reading_order->push_back(kComplexWord);
|
||||
}
|
||||
}
|
||||
reading_order->push_back(kMinorRunEnd);
|
||||
start = left - 1;
|
||||
@ -322,12 +337,15 @@ void ResultIterator::CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
for (int i = start; i != end;) {
|
||||
if (word_dirs[i] == minor_direction) {
|
||||
int j = i;
|
||||
while (j != end && word_dirs[j] != major_direction)
|
||||
while (j != end && word_dirs[j] != major_direction) {
|
||||
j += major_step;
|
||||
if (j == end)
|
||||
}
|
||||
if (j == end) {
|
||||
j -= major_step;
|
||||
while (j != i && word_dirs[j] != minor_direction)
|
||||
}
|
||||
while (j != i && word_dirs[j] != minor_direction) {
|
||||
j -= major_step;
|
||||
}
|
||||
// [j..i] is a minor direction run.
|
||||
reading_order->push_back(kMinorRunStart);
|
||||
for (int k = j; k != i; k -= major_step) {
|
||||
@ -338,8 +356,9 @@ void ResultIterator::CalculateTextlineOrder(bool paragraph_is_ltr,
|
||||
i = j + major_step;
|
||||
} else {
|
||||
reading_order->push_back(i);
|
||||
if (word_dirs[i] == DIR_MIX)
|
||||
if (word_dirs[i] == DIR_MIX) {
|
||||
reading_order->push_back(kComplexWord);
|
||||
}
|
||||
i += major_step;
|
||||
}
|
||||
}
|
||||
@ -363,30 +382,34 @@ void ResultIterator::MoveToLogicalStartOfWord() {
|
||||
}
|
||||
std::vector<int> blob_order;
|
||||
CalculateBlobOrder(&blob_order);
|
||||
if (blob_order.size() == 0 || blob_order[0] == 0)
|
||||
if (blob_order.size() == 0 || blob_order[0] == 0) {
|
||||
return;
|
||||
}
|
||||
BeginWord(blob_order[0]);
|
||||
}
|
||||
|
||||
bool ResultIterator::IsAtFinalSymbolOfWord() const {
|
||||
if (!it_->word())
|
||||
if (!it_->word()) {
|
||||
return true;
|
||||
}
|
||||
std::vector<int> blob_order;
|
||||
CalculateBlobOrder(&blob_order);
|
||||
return blob_order.size() == 0 || blob_order.back() == blob_index_;
|
||||
}
|
||||
|
||||
bool ResultIterator::IsAtFirstSymbolOfWord() const {
|
||||
if (!it_->word())
|
||||
if (!it_->word()) {
|
||||
return true;
|
||||
}
|
||||
std::vector<int> blob_order;
|
||||
CalculateBlobOrder(&blob_order);
|
||||
return blob_order.size() == 0 || blob_order[0] == blob_index_;
|
||||
}
|
||||
|
||||
void ResultIterator::AppendSuffixMarks(std::string *text) const {
|
||||
if (!it_->word())
|
||||
if (!it_->word()) {
|
||||
return;
|
||||
}
|
||||
bool reading_direction_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_;
|
||||
// scan forward to see what meta-information the word ordering algorithm
|
||||
// left us.
|
||||
@ -429,15 +452,18 @@ void ResultIterator::MoveToLogicalStartOfTextline() {
|
||||
&word_indices);
|
||||
int i = 0;
|
||||
for (; i < word_indices.size() && word_indices[i] < 0; i++) {
|
||||
if (word_indices[i] == kMinorRunStart)
|
||||
if (word_indices[i] == kMinorRunStart) {
|
||||
in_minor_direction_ = true;
|
||||
else if (word_indices[i] == kMinorRunEnd)
|
||||
} else if (word_indices[i] == kMinorRunEnd) {
|
||||
in_minor_direction_ = false;
|
||||
}
|
||||
}
|
||||
if (in_minor_direction_)
|
||||
if (in_minor_direction_) {
|
||||
at_beginning_of_minor_run_ = true;
|
||||
if (i >= word_indices.size())
|
||||
}
|
||||
if (i >= word_indices.size()) {
|
||||
return;
|
||||
}
|
||||
int first_word_index = word_indices[i];
|
||||
for (int j = 0; j < first_word_index; j++) {
|
||||
PageIterator::Next(RIL_WORD);
|
||||
@ -454,14 +480,16 @@ void ResultIterator::Begin() {
|
||||
}
|
||||
|
||||
bool ResultIterator::Next(PageIteratorLevel level) {
|
||||
if (it_->block() == nullptr)
|
||||
if (it_->block() == nullptr) {
|
||||
return false; // already at end!
|
||||
}
|
||||
switch (level) {
|
||||
case RIL_BLOCK: // explicit fall-through
|
||||
case RIL_PARA: // explicit fall-through
|
||||
case RIL_TEXTLINE:
|
||||
if (!PageIterator::Next(level))
|
||||
if (!PageIterator::Next(level)) {
|
||||
return false;
|
||||
}
|
||||
if (IsWithinFirstTextlineOfParagraph()) {
|
||||
// if we've advanced to a new paragraph,
|
||||
// recalculate current_paragraph_is_ltr_
|
||||
@ -474,8 +502,9 @@ bool ResultIterator::Next(PageIteratorLevel level) {
|
||||
std::vector<int> blob_order;
|
||||
CalculateBlobOrder(&blob_order);
|
||||
int next_blob = 0;
|
||||
while (next_blob < blob_order.size() && blob_index_ != blob_order[next_blob])
|
||||
while (next_blob < blob_order.size() && blob_index_ != blob_order[next_blob]) {
|
||||
next_blob++;
|
||||
}
|
||||
next_blob++;
|
||||
if (next_blob < blob_order.size()) {
|
||||
// we're in the same word; simply advance one blob.
|
||||
@ -488,22 +517,26 @@ bool ResultIterator::Next(PageIteratorLevel level) {
|
||||
// Fall through.
|
||||
case RIL_WORD: // explicit fall-through.
|
||||
{
|
||||
if (it_->word() == nullptr)
|
||||
if (it_->word() == nullptr) {
|
||||
return Next(RIL_BLOCK);
|
||||
}
|
||||
std::vector<int> word_indices;
|
||||
int this_word_index = LTRWordIndex();
|
||||
CalculateTextlineOrder(current_paragraph_is_ltr_, *this, &word_indices);
|
||||
int final_real_index = word_indices.size() - 1;
|
||||
while (final_real_index > 0 && word_indices[final_real_index] < 0)
|
||||
while (final_real_index > 0 && word_indices[final_real_index] < 0) {
|
||||
final_real_index--;
|
||||
}
|
||||
for (int i = 0; i < final_real_index; i++) {
|
||||
if (word_indices[i] == this_word_index) {
|
||||
int j = i + 1;
|
||||
for (; j < final_real_index && word_indices[j] < 0; j++) {
|
||||
if (word_indices[j] == kMinorRunStart)
|
||||
if (word_indices[j] == kMinorRunStart) {
|
||||
in_minor_direction_ = true;
|
||||
if (word_indices[j] == kMinorRunEnd)
|
||||
}
|
||||
if (word_indices[j] == kMinorRunEnd) {
|
||||
in_minor_direction_ = false;
|
||||
}
|
||||
}
|
||||
at_beginning_of_minor_run_ = (word_indices[j - 1] == kMinorRunStart);
|
||||
// awesome, we move to word_indices[j]
|
||||
@ -530,37 +563,44 @@ bool ResultIterator::Next(PageIteratorLevel level) {
|
||||
}
|
||||
|
||||
bool ResultIterator::IsAtBeginningOf(PageIteratorLevel level) const {
|
||||
if (it_->block() == nullptr)
|
||||
if (it_->block() == nullptr) {
|
||||
return false; // Already at the end!
|
||||
if (it_->word() == nullptr)
|
||||
}
|
||||
if (it_->word() == nullptr) {
|
||||
return true; // In an image block.
|
||||
if (level == RIL_SYMBOL)
|
||||
}
|
||||
if (level == RIL_SYMBOL) {
|
||||
return true; // Always at beginning of a symbol.
|
||||
}
|
||||
|
||||
bool at_word_start = IsAtFirstSymbolOfWord();
|
||||
if (level == RIL_WORD)
|
||||
if (level == RIL_WORD) {
|
||||
return at_word_start;
|
||||
}
|
||||
|
||||
ResultIterator line_start(*this);
|
||||
// move to the first word in the line...
|
||||
line_start.MoveToLogicalStartOfTextline();
|
||||
|
||||
bool at_textline_start = at_word_start && *line_start.it_ == *it_;
|
||||
if (level == RIL_TEXTLINE)
|
||||
if (level == RIL_TEXTLINE) {
|
||||
return at_textline_start;
|
||||
}
|
||||
|
||||
// now we move to the left-most word...
|
||||
line_start.RestartRow();
|
||||
bool at_block_start =
|
||||
at_textline_start && line_start.it_->block() != line_start.it_->prev_block();
|
||||
if (level == RIL_BLOCK)
|
||||
if (level == RIL_BLOCK) {
|
||||
return at_block_start;
|
||||
}
|
||||
|
||||
bool at_para_start =
|
||||
at_block_start || (at_textline_start && line_start.it_->row()->row->para() !=
|
||||
line_start.it_->prev_row()->row->para());
|
||||
if (level == RIL_PARA)
|
||||
if (level == RIL_PARA) {
|
||||
return at_para_start;
|
||||
}
|
||||
|
||||
ASSERT_HOST(false); // shouldn't happen.
|
||||
return false;
|
||||
@ -572,8 +612,9 @@ bool ResultIterator::IsAtBeginningOf(PageIteratorLevel level) const {
|
||||
* PageIterator.
|
||||
*/
|
||||
bool ResultIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const {
|
||||
if (Empty(element))
|
||||
if (Empty(element)) {
|
||||
return true; // Already at the end!
|
||||
}
|
||||
// The result is true if we step forward by element and find we are
|
||||
// at the the end of the page or at beginning of *all* levels in:
|
||||
// [level, element).
|
||||
@ -582,20 +623,23 @@ bool ResultIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel
|
||||
// word on a line, so we also have to be at the first symbol in a word.
|
||||
ResultIterator next(*this);
|
||||
next.Next(element);
|
||||
if (next.Empty(element))
|
||||
if (next.Empty(element)) {
|
||||
return true; // Reached the end of the page.
|
||||
}
|
||||
while (element > level) {
|
||||
element = static_cast<PageIteratorLevel>(element - 1);
|
||||
if (!next.IsAtBeginningOf(element))
|
||||
if (!next.IsAtBeginningOf(element)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns the number of blanks before the current word.
|
||||
int ResultIterator::BlanksBeforeWord() const {
|
||||
if (CurrentParagraphIsLtr())
|
||||
if (CurrentParagraphIsLtr()) {
|
||||
return LTRResultIterator::BlanksBeforeWord();
|
||||
}
|
||||
return IsAtBeginningOf(RIL_TEXTLINE) ? 0 : 1;
|
||||
}
|
||||
|
||||
@ -604,8 +648,9 @@ int ResultIterator::BlanksBeforeWord() const {
|
||||
* object at the given level. Use delete [] to free after use.
|
||||
*/
|
||||
char *ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
|
||||
if (it_->word() == nullptr)
|
||||
if (it_->word() == nullptr) {
|
||||
return nullptr; // Already at the end!
|
||||
}
|
||||
std::string text;
|
||||
switch (level) {
|
||||
case RIL_BLOCK: {
|
||||
@ -631,8 +676,9 @@ char *ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
|
||||
text += reading_direction_is_ltr ? kLRM : kRLM;
|
||||
}
|
||||
text = it_->word()->BestUTF8(blob_index_, false);
|
||||
if (IsAtFinalSymbolOfWord())
|
||||
if (IsAtFinalSymbolOfWord()) {
|
||||
AppendSuffixMarks(&text);
|
||||
}
|
||||
} break;
|
||||
}
|
||||
int length = text.length() + 1;
|
||||
@ -659,8 +705,9 @@ std::vector<std::vector<std::pair<const char *, float>>> *ResultIterator::GetBes
|
||||
}
|
||||
|
||||
void ResultIterator::AppendUTF8WordText(std::string *text) const {
|
||||
if (!it_->word())
|
||||
if (!it_->word()) {
|
||||
return;
|
||||
}
|
||||
ASSERT_HOST(it_->word()->best_choice != nullptr);
|
||||
bool reading_direction_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_;
|
||||
if (at_beginning_of_minor_run_) {
|
||||
@ -721,8 +768,9 @@ void ResultIterator::AppendUTF8ParagraphText(std::string *text) const {
|
||||
ResultIterator it(*this);
|
||||
it.RestartParagraph();
|
||||
it.MoveToLogicalStartOfTextline();
|
||||
if (it.Empty(RIL_WORD))
|
||||
if (it.Empty(RIL_WORD)) {
|
||||
return;
|
||||
}
|
||||
do {
|
||||
it.IterateAndAppendUTF8TextlineText(text);
|
||||
} while (it.it_->block() != nullptr && !it.IsAtBeginningOf(RIL_PARA));
|
||||
@ -732,8 +780,9 @@ bool ResultIterator::BidiDebug(int min_level) const {
|
||||
int debug_level = 1;
|
||||
auto *p = ParamUtils::FindParam<IntParam>("bidi_debug", GlobalParams()->int_params,
|
||||
tesseract_->params()->int_params);
|
||||
if (p != nullptr)
|
||||
if (p != nullptr) {
|
||||
debug_level = (int32_t)(*p);
|
||||
}
|
||||
return debug_level >= min_level;
|
||||
}
|
||||
|
||||
|
@ -23,15 +23,17 @@ namespace tesseract {
|
||||
|
||||
static int LeadingUnicharsToChopped(WERD_RES *word, int num_unichars) {
|
||||
int num_chopped = 0;
|
||||
for (int i = 0; i < num_unichars; i++)
|
||||
for (int i = 0; i < num_unichars; i++) {
|
||||
num_chopped += word->best_state[i];
|
||||
}
|
||||
return num_chopped;
|
||||
}
|
||||
|
||||
static int TrailingUnicharsToChopped(WERD_RES *word, int num_unichars) {
|
||||
int num_chopped = 0;
|
||||
for (int i = 0; i < num_unichars; i++)
|
||||
for (int i = 0; i < num_unichars; i++) {
|
||||
num_chopped += word->best_state[word->best_state.size() - 1 - i];
|
||||
}
|
||||
return num_chopped;
|
||||
}
|
||||
|
||||
@ -46,14 +48,18 @@ static void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index, int super_y_b
|
||||
ScriptPos *trailing_pos, int *num_trailing_outliers) {
|
||||
ScriptPos sp_unused1, sp_unused2;
|
||||
int unused1, unused2;
|
||||
if (!leading_pos)
|
||||
if (!leading_pos) {
|
||||
leading_pos = &sp_unused1;
|
||||
if (!num_leading_outliers)
|
||||
}
|
||||
if (!num_leading_outliers) {
|
||||
num_leading_outliers = &unused1;
|
||||
if (!trailing_pos)
|
||||
}
|
||||
if (!trailing_pos) {
|
||||
trailing_pos = &sp_unused2;
|
||||
if (!num_trailing_outliers)
|
||||
}
|
||||
if (!num_trailing_outliers) {
|
||||
num_trailing_outliers = &unused2;
|
||||
}
|
||||
|
||||
*num_leading_outliers = *num_trailing_outliers = 0;
|
||||
*leading_pos = *trailing_pos = SP_NORMAL;
|
||||
@ -133,8 +139,9 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) {
|
||||
ScriptPos rpos;
|
||||
YOutlierPieces(word, last_word_char, super_y_bottom, sub_y_top, nullptr, nullptr, &rpos,
|
||||
&num_remainder_trailing);
|
||||
if (num_trailing > 0 && rpos != sp_trailing)
|
||||
if (num_trailing > 0 && rpos != sp_trailing) {
|
||||
num_remainder_trailing = 0;
|
||||
}
|
||||
if (num_remainder_trailing > 0 && last_char_certainty < trailing_certainty) {
|
||||
trailing_certainty = last_char_certainty;
|
||||
}
|
||||
@ -147,8 +154,9 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) {
|
||||
ScriptPos lpos;
|
||||
YOutlierPieces(word, num_leading, super_y_bottom, sub_y_top, &lpos, &num_remainder_leading,
|
||||
nullptr, nullptr);
|
||||
if (num_leading > 0 && lpos != sp_leading)
|
||||
if (num_leading > 0 && lpos != sp_leading) {
|
||||
num_remainder_leading = 0;
|
||||
}
|
||||
if (num_remainder_leading > 0 && first_char_certainty < leading_certainty) {
|
||||
leading_certainty = first_char_certainty;
|
||||
}
|
||||
@ -177,10 +185,12 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) {
|
||||
if (superscript_debug >= 2) {
|
||||
tprintf(" Certainties -- Average: %.2f Unlikely thresh: %.2f ", avg_certainty,
|
||||
unlikely_threshold);
|
||||
if (num_leading)
|
||||
if (num_leading) {
|
||||
tprintf("Orig. leading (min): %.2f ", leading_certainty);
|
||||
if (num_trailing)
|
||||
}
|
||||
if (num_trailing) {
|
||||
tprintf("Orig. trailing (min): %.2f ", trailing_certainty);
|
||||
}
|
||||
tprintf("\n");
|
||||
}
|
||||
|
||||
@ -569,10 +579,12 @@ bool Tesseract::BelievableSuperscript(bool debug, const WERD_RES &word, float ce
|
||||
tprintf(" Accept: worst revised certainty is %.2f\n", worst_certainty);
|
||||
}
|
||||
if (!all_ok) {
|
||||
if (left_ok)
|
||||
if (left_ok) {
|
||||
*left_ok = initial_ok_run_count;
|
||||
if (right_ok)
|
||||
}
|
||||
if (right_ok) {
|
||||
*right_ok = ok_run_count;
|
||||
}
|
||||
}
|
||||
return all_ok;
|
||||
}
|
||||
|
@ -39,13 +39,15 @@ void Tesseract::tess_segment_pass_n(int pass_n, WERD_RES *word) {
|
||||
wordrec_enable_assoc.set_value(false);
|
||||
chop_enable.set_value(false);
|
||||
}
|
||||
if (pass_n == 1)
|
||||
if (pass_n == 1) {
|
||||
set_pass1();
|
||||
else
|
||||
} else {
|
||||
set_pass2();
|
||||
}
|
||||
recog_word(word);
|
||||
if (word->best_choice == nullptr)
|
||||
if (word->best_choice == nullptr) {
|
||||
word->SetupFake(*word->uch_set);
|
||||
}
|
||||
if (word->word->flag(W_DONT_CHOP)) {
|
||||
wordrec_enable_assoc.set_value(saved_enable_assoc);
|
||||
chop_enable.set_value(saved_chop_enable);
|
||||
|
@ -151,8 +151,9 @@ bool Tesseract::init_tesseract_lang_data(const std::string &arg0, const std::str
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
// Determine which ocr engine(s) should be loaded and used for recognition.
|
||||
if (oem != OEM_DEFAULT)
|
||||
if (oem != OEM_DEFAULT) {
|
||||
tessedit_ocr_engine_mode.set_value(oem);
|
||||
}
|
||||
#endif
|
||||
|
||||
// If we are only loading the config file (and so not planning on doing any
|
||||
@ -231,8 +232,9 @@ bool Tesseract::init_tesseract_lang_data(const std::string &arg0, const std::str
|
||||
// Helper returns true if the given string is in the vector of strings.
|
||||
static bool IsStrInList(const std::string &str, const std::vector<std::string> &str_list) {
|
||||
for (const auto &i : str_list) {
|
||||
if (i == str)
|
||||
if (i == str) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -248,8 +250,9 @@ void Tesseract::ParseLanguageString(const std::string &lang_str, std::vector<std
|
||||
while (!remains.empty()) {
|
||||
// Find the start of the lang code and which vector to add to.
|
||||
const char *start = remains.c_str();
|
||||
while (*start == '+')
|
||||
while (*start == '+') {
|
||||
++start;
|
||||
}
|
||||
std::vector<std::string> *target = to_load;
|
||||
if (*start == '~') {
|
||||
target = not_to_load;
|
||||
@ -258,8 +261,9 @@ void Tesseract::ParseLanguageString(const std::string &lang_str, std::vector<std
|
||||
// Find the index of the end of the lang code in string start.
|
||||
int end = strlen(start);
|
||||
const char *plus = strchr(start, '+');
|
||||
if (plus != nullptr && plus - start < end)
|
||||
if (plus != nullptr && plus - start < end) {
|
||||
end = plus - start;
|
||||
}
|
||||
std::string lang_code(start);
|
||||
lang_code.resize(end);
|
||||
std::string next(start + end);
|
||||
@ -438,8 +442,9 @@ void Tesseract::SetupUniversalFontIds() {
|
||||
int Tesseract::init_tesseract_lm(const std::string &arg0, const std::string &textbase,
|
||||
const std::string &language, TessdataManager *mgr) {
|
||||
if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY, nullptr, 0, nullptr,
|
||||
nullptr, false, mgr))
|
||||
nullptr, false, mgr)) {
|
||||
return -1;
|
||||
}
|
||||
getDict().SetupForLoad(Dict::GlobalDawgCache());
|
||||
getDict().Load(lang, mgr);
|
||||
getDict().FinishLoad();
|
||||
|
@ -450,8 +450,9 @@ void Tesseract::Clear() {
|
||||
reskew_ = FCOORD(1.0f, 0.0f);
|
||||
splitter_.Clear();
|
||||
scaled_factor_ = -1;
|
||||
for (auto &sub_lang : sub_langs_)
|
||||
for (auto &sub_lang : sub_langs_) {
|
||||
sub_lang->Clear();
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
@ -514,8 +515,9 @@ void Tesseract::PrepareForPageseg() {
|
||||
for (auto &sub_lang : sub_langs_) {
|
||||
auto pageseg_strategy = static_cast<ShiroRekhaSplitter::SplitStrategy>(
|
||||
static_cast<int32_t>(sub_lang->pageseg_devanagari_split_strategy));
|
||||
if (pageseg_strategy > max_pageseg_strategy)
|
||||
if (pageseg_strategy > max_pageseg_strategy) {
|
||||
max_pageseg_strategy = pageseg_strategy;
|
||||
}
|
||||
pixDestroy(&sub_lang->pix_binary_);
|
||||
sub_lang->pix_binary_ = pixClone(pix_binary());
|
||||
}
|
||||
@ -542,8 +544,9 @@ void Tesseract::PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, O
|
||||
for (auto &sub_lang : sub_langs_) {
|
||||
auto ocr_strategy = static_cast<ShiroRekhaSplitter::SplitStrategy>(
|
||||
static_cast<int32_t>(sub_lang->ocr_devanagari_split_strategy));
|
||||
if (ocr_strategy > max_ocr_strategy)
|
||||
if (ocr_strategy > max_ocr_strategy) {
|
||||
max_ocr_strategy = ocr_strategy;
|
||||
}
|
||||
}
|
||||
// Utilize the segmentation information available.
|
||||
splitter_.set_segmentation_block_list(block_list);
|
||||
|
@ -284,18 +284,21 @@ public:
|
||||
}
|
||||
// Returns true if any language uses Tesseract (as opposed to LSTM).
|
||||
bool AnyTessLang() const {
|
||||
if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY)
|
||||
if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY) {
|
||||
return true;
|
||||
}
|
||||
for (auto &lang : sub_langs_) {
|
||||
if (lang->tessedit_ocr_engine_mode != OEM_LSTM_ONLY)
|
||||
if (lang->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if any language uses the LSTM.
|
||||
bool AnyLSTMLang() const {
|
||||
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY)
|
||||
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) {
|
||||
return true;
|
||||
}
|
||||
for (auto &lang : sub_langs_) {
|
||||
if (lang->tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) {
|
||||
return true;
|
||||
|
@ -38,8 +38,9 @@ void Tesseract::recog_word(WERD_RES *word) {
|
||||
if (wordrec_skip_no_truth_words &&
|
||||
(word->blamer_bundle == nullptr ||
|
||||
word->blamer_bundle->incorrect_result_reason() == IRR_NO_TRUTH)) {
|
||||
if (classify_debug_level)
|
||||
if (classify_debug_level) {
|
||||
tprintf("No truth for word - skipping\n");
|
||||
}
|
||||
word->tess_failed = true;
|
||||
return;
|
||||
}
|
||||
@ -266,12 +267,14 @@ void Tesseract::join_words(WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_b
|
||||
// finished with them.
|
||||
int bc2_index = 1;
|
||||
for (bc2_it.forward(); !bc2_it.at_first(); bc2_it.forward(), ++bc2_index) {
|
||||
if (total_joined_choices >= kTooManyAltChoices && bc2_index > kAltsPerPiece)
|
||||
if (total_joined_choices >= kTooManyAltChoices && bc2_index > kAltsPerPiece) {
|
||||
break;
|
||||
}
|
||||
int bc1_index = 0;
|
||||
for (bc1_it.move_to_first(); bc1_index < num_word1_choices; ++bc1_index, bc1_it.forward()) {
|
||||
if (total_joined_choices >= kTooManyAltChoices && bc1_index > kAltsPerPiece)
|
||||
if (total_joined_choices >= kTooManyAltChoices && bc1_index > kAltsPerPiece) {
|
||||
break;
|
||||
}
|
||||
auto *wc = new WERD_CHOICE(*bc1_it.data());
|
||||
*wc += *bc2_it.data();
|
||||
jc_it.add_after_then_move(wc);
|
||||
|
@ -69,8 +69,9 @@ bool ImageThresholder::IsEmpty() const {
|
||||
void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int height,
|
||||
int bytes_per_pixel, int bytes_per_line) {
|
||||
int bpp = bytes_per_pixel * 8;
|
||||
if (bpp == 0)
|
||||
if (bpp == 0) {
|
||||
bpp = 1;
|
||||
}
|
||||
Pix *pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
|
||||
l_uint32 *data = pixGetData(pix);
|
||||
int wpl = pixGetWpl(pix);
|
||||
@ -78,10 +79,11 @@ void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int h
|
||||
case 1:
|
||||
for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
if (imagedata[x / 8] & (0x80 >> (x % 8)))
|
||||
if (imagedata[x / 8] & (0x80 >> (x % 8))) {
|
||||
CLEAR_DATA_BIT(data, x);
|
||||
else
|
||||
} else {
|
||||
SET_DATA_BIT(data, x);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -89,8 +91,9 @@ void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int h
|
||||
case 8:
|
||||
// Greyscale just copies the bytes in the right order.
|
||||
for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
|
||||
for (int x = 0; x < width; ++x)
|
||||
for (int x = 0; x < width; ++x) {
|
||||
SET_DATA_BYTE(data, x, imagedata[x]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
@ -151,8 +154,9 @@ void ImageThresholder::GetImageSizes(int *left, int *top, int *width, int *heigh
|
||||
// immediately after, but may not go away until after the Thresholder has
|
||||
// finished with it.
|
||||
void ImageThresholder::SetImage(const Pix *pix) {
|
||||
if (pix_ != nullptr)
|
||||
if (pix_ != nullptr) {
|
||||
pixDestroy(&pix_);
|
||||
}
|
||||
Pix *src = const_cast<Pix *>(pix);
|
||||
int depth;
|
||||
pixGetDimensions(src, &image_width_, &image_height_, &depth);
|
||||
@ -210,8 +214,9 @@ bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix **pix) {
|
||||
// the binary image in ThresholdToPix, but this is not a hard constraint.
|
||||
// Returns nullptr if the input is binary. PixDestroy after use.
|
||||
Pix *ImageThresholder::GetPixRectThresholds() {
|
||||
if (IsBinary())
|
||||
if (IsBinary()) {
|
||||
return nullptr;
|
||||
}
|
||||
Pix *pix_grey = GetPixRectGrey();
|
||||
int width = pixGetWidth(pix_grey);
|
||||
int height = pixGetHeight(pix_grey);
|
||||
@ -319,10 +324,11 @@ void ImageThresholder::ThresholdRectToPix(Pix *src_pix, int num_channels, const
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (white_result)
|
||||
if (white_result) {
|
||||
CLEAR_DATA_BIT(pixline, x);
|
||||
else
|
||||
} else {
|
||||
SET_DATA_BIT(pixline, x);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -54,8 +54,9 @@ PAGE_RES_IT *make_pseudo_word(PAGE_RES *page_res, const TBOX &selection_box) {
|
||||
WERD *pseudo_word = new WERD(&new_blobs, 1, nullptr);
|
||||
word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word);
|
||||
auto *it = new PAGE_RES_IT(page_res);
|
||||
while (it->word() != word_res && it->word() != nullptr)
|
||||
while (it->word() != word_res && it->word() != nullptr) {
|
||||
it->forward();
|
||||
}
|
||||
ASSERT_HOST(it->word() == word_res);
|
||||
return it;
|
||||
}
|
||||
|
@ -76,8 +76,9 @@ void BlamerBundle::SetWordTruth(const UNICHARSET &unicharset, const char *truth_
|
||||
std::string uch(truth_str + total_length);
|
||||
uch.resize(lengths[i] - total_length);
|
||||
UNICHAR_ID id = encoding[i];
|
||||
if (id != INVALID_UNICHAR_ID)
|
||||
if (id != INVALID_UNICHAR_ID) {
|
||||
uch = unicharset.get_normed_unichar(id);
|
||||
}
|
||||
truth_text_.push_back(uch);
|
||||
}
|
||||
}
|
||||
@ -90,16 +91,18 @@ void BlamerBundle::SetSymbolTruth(const UNICHARSET &unicharset, const char *char
|
||||
UNICHAR_ID id = unicharset.unichar_to_id(char_str);
|
||||
if (id != INVALID_UNICHAR_ID) {
|
||||
std::string normed_uch(unicharset.get_normed_unichar(id));
|
||||
if (normed_uch.length() > 0)
|
||||
if (normed_uch.length() > 0) {
|
||||
symbol_str = normed_uch;
|
||||
}
|
||||
}
|
||||
int length = truth_word_.length();
|
||||
truth_text_.push_back(symbol_str);
|
||||
truth_word_.InsertBox(length, char_box);
|
||||
if (length == 0)
|
||||
if (length == 0) {
|
||||
truth_has_char_boxes_ = true;
|
||||
else if (truth_word_.BlobBox(length - 1) == char_box)
|
||||
} else if (truth_word_.BlobBox(length - 1) == char_box) {
|
||||
truth_has_char_boxes_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Marks that there is something wrong with the truth text, like it contains
|
||||
@ -111,8 +114,9 @@ void BlamerBundle::SetRejectedTruth() {
|
||||
|
||||
// Returns true if the provided word_choice is correct.
|
||||
bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE *word_choice) const {
|
||||
if (word_choice == nullptr)
|
||||
if (word_choice == nullptr) {
|
||||
return false;
|
||||
}
|
||||
const UNICHARSET *uni_set = word_choice->unicharset();
|
||||
std::string normed_choice_str;
|
||||
for (int i = 0; i < word_choice->length(); ++i) {
|
||||
@ -127,8 +131,9 @@ void BlamerBundle::FillDebugString(const std::string &msg, const WERD_CHOICE *ch
|
||||
for (auto &text : this->truth_text_) {
|
||||
debug += text;
|
||||
}
|
||||
if (!this->truth_has_char_boxes_)
|
||||
if (!this->truth_has_char_boxes_) {
|
||||
debug += " (no char boxes)";
|
||||
}
|
||||
if (choice != nullptr) {
|
||||
debug += " Choice ";
|
||||
std::string choice_str;
|
||||
@ -200,8 +205,9 @@ void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, Blam
|
||||
bundle2->norm_box_tolerance_ = norm_box_tolerance_;
|
||||
BlamerBundle *curr_bb = bundle1;
|
||||
for (b = 0; b < norm_truth_word_.length(); ++b) {
|
||||
if (b == begin2_truth_index)
|
||||
if (b == begin2_truth_index) {
|
||||
curr_bb = bundle2;
|
||||
}
|
||||
curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
|
||||
curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
|
||||
curr_bb->truth_text_.push_back(truth_text_[b]);
|
||||
@ -222,8 +228,9 @@ void BlamerBundle::JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &b
|
||||
bool debug) {
|
||||
std::string debug_str;
|
||||
IncorrectResultReason irr = incorrect_result_reason_;
|
||||
if (irr != IRR_NO_TRUTH_SPLIT)
|
||||
if (irr != IRR_NO_TRUTH_SPLIT) {
|
||||
debug_str = "";
|
||||
}
|
||||
if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
|
||||
bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
|
||||
bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
|
||||
@ -253,8 +260,9 @@ void BlamerBundle::JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &b
|
||||
// blames character classifier for incorrect answer.
|
||||
void BlamerBundle::BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box,
|
||||
const BLOB_CHOICE_LIST &choices, bool debug) {
|
||||
if (!truth_has_char_boxes_ || incorrect_result_reason_ != IRR_CORRECT)
|
||||
if (!truth_has_char_boxes_ || incorrect_result_reason_ != IRR_CORRECT) {
|
||||
return; // Nothing to do here.
|
||||
}
|
||||
|
||||
for (int b = 0; b < norm_truth_word_.length(); ++b) {
|
||||
const TBOX &truth_box = norm_truth_word_.BlobBox(b);
|
||||
@ -394,23 +402,26 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
params_training_bundle_.StartHypothesisList();
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_)
|
||||
if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_) {
|
||||
return; // Nothing to do here.
|
||||
}
|
||||
|
||||
std::string debug_str = "Blamer computing correct_segmentation_cols\n";
|
||||
int curr_box_col = 0;
|
||||
int next_box_col = 0;
|
||||
int num_blobs = word->NumBlobs();
|
||||
if (num_blobs == 0)
|
||||
if (num_blobs == 0) {
|
||||
return; // No blobs to play with.
|
||||
}
|
||||
int blob_index = 0;
|
||||
int16_t next_box_x = word->blobs[blob_index]->bounding_box().right();
|
||||
for (int truth_idx = 0; blob_index < num_blobs && truth_idx < norm_truth_word_.length();
|
||||
++blob_index) {
|
||||
++next_box_col;
|
||||
int16_t curr_box_x = next_box_x;
|
||||
if (blob_index + 1 < num_blobs)
|
||||
if (blob_index + 1 < num_blobs) {
|
||||
next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
|
||||
}
|
||||
int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
|
||||
debug_str += "Box x coord vs. truth: " + std::to_string(curr_box_x);
|
||||
debug_str += " " + std::to_string(truth_x);
|
||||
@ -435,8 +446,9 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
|
||||
"Blamer failed to find correct segmentation"
|
||||
" (tolerance=" +
|
||||
std::to_string(norm_box_tolerance_);
|
||||
if (blob_index >= num_blobs)
|
||||
if (blob_index >= num_blobs) {
|
||||
debug_str += " blob == nullptr";
|
||||
}
|
||||
debug_str += ")\n";
|
||||
debug_str += " path length " + std::to_string(correct_segmentation_cols_.size());
|
||||
debug_str += " vs. truth " + std::to_string(norm_truth_word_.length());
|
||||
|
@ -144,8 +144,9 @@ struct BlamerBundle {
|
||||
return misadaption_debug_;
|
||||
}
|
||||
void UpdateBestRating(float rating) {
|
||||
if (rating < best_correctly_segmented_rating_)
|
||||
if (rating < best_correctly_segmented_rating_) {
|
||||
best_correctly_segmented_rating_ = rating;
|
||||
}
|
||||
}
|
||||
int correct_segmentation_length() const {
|
||||
return correct_segmentation_cols_.size();
|
||||
@ -197,8 +198,9 @@ struct BlamerBundle {
|
||||
void ClearResults() {
|
||||
norm_truth_word_.DeleteAllBoxes();
|
||||
norm_box_tolerance_ = 0;
|
||||
if (!NoTruth())
|
||||
if (!NoTruth()) {
|
||||
incorrect_result_reason_ = IRR_CORRECT;
|
||||
}
|
||||
debug_ = "";
|
||||
segsearch_is_looking_for_blame_ = false;
|
||||
best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
|
||||
@ -296,8 +298,9 @@ private:
|
||||
debug_ = IncorrectReason();
|
||||
debug_ += " to blame: ";
|
||||
FillDebugString(msg, choice, debug_);
|
||||
if (debug)
|
||||
if (debug) {
|
||||
tprintf("SetBlame(): %s", debug_.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -155,13 +155,14 @@ void BLOBNBOX::chop( // chop blobs
|
||||
} while (blob != end_it->data());
|
||||
if (ymin < ymax) {
|
||||
leftx = static_cast<int16_t>(floor(rightx - blobwidth));
|
||||
if (leftx < box.left())
|
||||
if (leftx < box.left()) {
|
||||
leftx = box.left(); // clip to real box
|
||||
}
|
||||
bl = ICOORD(leftx, static_cast<int16_t>(floor(ymin)));
|
||||
tr = ICOORD(static_cast<int16_t>(ceil(rightx)), static_cast<int16_t>(ceil(ymax)));
|
||||
if (blobindex == 0)
|
||||
if (blobindex == 0) {
|
||||
box = TBOX(bl, tr); // change box
|
||||
else {
|
||||
} else {
|
||||
newblob = new BLOBNBOX;
|
||||
// box is all it has
|
||||
newblob->box = TBOX(bl, tr);
|
||||
@ -202,12 +203,14 @@ void BLOBNBOX::MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max)
|
||||
NeighbourGaps(gaps);
|
||||
*h_min = std::min(gaps[BND_LEFT], gaps[BND_RIGHT]);
|
||||
*h_max = std::max(gaps[BND_LEFT], gaps[BND_RIGHT]);
|
||||
if (*h_max > max_dimension && *h_min < max_dimension)
|
||||
if (*h_max > max_dimension && *h_min < max_dimension) {
|
||||
*h_max = *h_min;
|
||||
}
|
||||
*v_min = std::min(gaps[BND_ABOVE], gaps[BND_BELOW]);
|
||||
*v_max = std::max(gaps[BND_ABOVE], gaps[BND_BELOW]);
|
||||
if (*v_max > max_dimension && *v_min < max_dimension)
|
||||
if (*v_max > max_dimension && *v_min < max_dimension) {
|
||||
*v_max = *v_min;
|
||||
}
|
||||
}
|
||||
|
||||
// Nulls out any neighbours that are DeletableNoise to remove references.
|
||||
@ -227,8 +230,9 @@ int BLOBNBOX::GoodTextBlob() const {
|
||||
int score = 0;
|
||||
for (int dir = 0; dir < BND_COUNT; ++dir) {
|
||||
auto bnd = static_cast<BlobNeighbourDir>(dir);
|
||||
if (good_stroke_neighbour(bnd))
|
||||
if (good_stroke_neighbour(bnd)) {
|
||||
++score;
|
||||
}
|
||||
}
|
||||
return score;
|
||||
}
|
||||
@ -239,8 +243,9 @@ int BLOBNBOX::NoisyNeighbours() const {
|
||||
for (int dir = 0; dir < BND_COUNT; ++dir) {
|
||||
auto bnd = static_cast<BlobNeighbourDir>(dir);
|
||||
BLOBNBOX *blob = neighbour(bnd);
|
||||
if (blob != nullptr && blob->region_type() == BRT_NOISE)
|
||||
if (blob != nullptr && blob->region_type() == BRT_NOISE) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
@ -250,8 +255,9 @@ int BLOBNBOX::NoisyNeighbours() const {
|
||||
// eg if it has a high aspect ratio, yet has a complex shape, such as a
|
||||
// joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc.
|
||||
bool BLOBNBOX::DefiniteIndividualFlow() {
|
||||
if (cblob() == nullptr)
|
||||
if (cblob() == nullptr) {
|
||||
return false;
|
||||
}
|
||||
int box_perimeter = 2 * (box.height() + box.width());
|
||||
if (box.width() > box.height() * kDefiniteAspectRatio) {
|
||||
// Attempt to distinguish a wide joined word from a dash.
|
||||
@ -260,10 +266,11 @@ bool BLOBNBOX::DefiniteIndividualFlow() {
|
||||
// so perimeter - 2*(box width + stroke width) should be close to zero.
|
||||
// A complex shape such as a joined word should have a much larger value.
|
||||
int perimeter = cblob()->perimeter();
|
||||
if (vert_stroke_width() > 0 || perimeter <= 0)
|
||||
if (vert_stroke_width() > 0 || perimeter <= 0) {
|
||||
perimeter -= 2 * vert_stroke_width();
|
||||
else
|
||||
} else {
|
||||
perimeter -= 4 * cblob()->area() / perimeter;
|
||||
}
|
||||
perimeter -= 2 * box.width();
|
||||
// Use a multiple of the box perimeter as a threshold.
|
||||
if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
|
||||
@ -275,10 +282,11 @@ bool BLOBNBOX::DefiniteIndividualFlow() {
|
||||
if (box.height() > box.width() * kDefiniteAspectRatio) {
|
||||
// As above, but for a putative vertical word vs a I/1/l.
|
||||
int perimeter = cblob()->perimeter();
|
||||
if (horz_stroke_width() > 0 || perimeter <= 0)
|
||||
if (horz_stroke_width() > 0 || perimeter <= 0) {
|
||||
perimeter -= 2 * horz_stroke_width();
|
||||
else
|
||||
} else {
|
||||
perimeter -= 4 * cblob()->area() / perimeter;
|
||||
}
|
||||
perimeter -= 2 * box.height();
|
||||
if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
|
||||
set_vert_possible(true);
|
||||
@ -291,14 +299,18 @@ bool BLOBNBOX::DefiniteIndividualFlow() {
|
||||
|
||||
// Returns true if there is no tabstop violation in merging this and other.
|
||||
bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX &other) const {
|
||||
if (box.left() < other.box.left() && box.left() < other.left_rule_)
|
||||
if (box.left() < other.box.left() && box.left() < other.left_rule_) {
|
||||
return false;
|
||||
if (other.box.left() < box.left() && other.box.left() < left_rule_)
|
||||
}
|
||||
if (other.box.left() < box.left() && other.box.left() < left_rule_) {
|
||||
return false;
|
||||
if (box.right() > other.box.right() && box.right() > other.right_rule_)
|
||||
}
|
||||
if (box.right() > other.box.right() && box.right() > other.right_rule_) {
|
||||
return false;
|
||||
if (other.box.right() > box.right() && other.box.right() > right_rule_)
|
||||
}
|
||||
if (other.box.right() > box.right() && other.box.right() > right_rule_) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -350,8 +362,9 @@ TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) {
|
||||
// outline.
|
||||
void BLOBNBOX::EstimateBaselinePosition() {
|
||||
baseline_y_ = box.bottom(); // The default.
|
||||
if (cblob_ptr == nullptr)
|
||||
if (cblob_ptr == nullptr) {
|
||||
return;
|
||||
}
|
||||
baseline_y_ = cblob_ptr->EstimateBaselinePosition();
|
||||
}
|
||||
|
||||
@ -423,8 +436,9 @@ void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour
|
||||
BLOBNBOX_IT it(list);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
BLOBNBOX *blob = it.data();
|
||||
if (blob->DeletableNoise())
|
||||
if (blob->DeletableNoise()) {
|
||||
blob->plot(win, body_colour, child_colour);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -441,22 +455,29 @@ ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type, BlobTextFl
|
||||
case BRT_UNKNOWN:
|
||||
return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE;
|
||||
case BRT_VERT_TEXT:
|
||||
if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE)
|
||||
if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE) {
|
||||
return ScrollView::GREEN;
|
||||
if (flow_type == BTFT_CHAIN)
|
||||
}
|
||||
if (flow_type == BTFT_CHAIN) {
|
||||
return ScrollView::LIME_GREEN;
|
||||
}
|
||||
return ScrollView::YELLOW;
|
||||
case BRT_TEXT:
|
||||
if (flow_type == BTFT_STRONG_CHAIN)
|
||||
if (flow_type == BTFT_STRONG_CHAIN) {
|
||||
return ScrollView::BLUE;
|
||||
if (flow_type == BTFT_TEXT_ON_IMAGE)
|
||||
}
|
||||
if (flow_type == BTFT_TEXT_ON_IMAGE) {
|
||||
return ScrollView::LIGHT_BLUE;
|
||||
if (flow_type == BTFT_CHAIN)
|
||||
}
|
||||
if (flow_type == BTFT_CHAIN) {
|
||||
return ScrollView::MEDIUM_BLUE;
|
||||
if (flow_type == BTFT_LEADER)
|
||||
}
|
||||
if (flow_type == BTFT_LEADER) {
|
||||
return ScrollView::WHEAT;
|
||||
if (flow_type == BTFT_NONTEXT)
|
||||
}
|
||||
if (flow_type == BTFT_NONTEXT) {
|
||||
return ScrollView::PINK;
|
||||
}
|
||||
return ScrollView::MAGENTA;
|
||||
default:
|
||||
return ScrollView::GREY;
|
||||
@ -471,8 +492,9 @@ ScrollView::Color BLOBNBOX::BoxColor() const {
|
||||
void BLOBNBOX::plot(ScrollView *window, // window to draw in
|
||||
ScrollView::Color blob_colour, // for outer bits
|
||||
ScrollView::Color child_colour) { // for holes
|
||||
if (cblob_ptr != nullptr)
|
||||
if (cblob_ptr != nullptr) {
|
||||
cblob_ptr->plot(window, blob_colour, child_colour);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/**********************************************************************
|
||||
@ -627,9 +649,10 @@ TBOX box_next( // get bounding box
|
||||
do {
|
||||
it->forward();
|
||||
blob = it->data();
|
||||
if (blob->cblob() == nullptr)
|
||||
if (blob->cblob() == nullptr) {
|
||||
// was pre-chopped
|
||||
result += blob->bounding_box();
|
||||
}
|
||||
}
|
||||
// until next real blob
|
||||
while ((blob->cblob() == nullptr) || blob->joined_to_prev());
|
||||
@ -725,17 +748,21 @@ void TO_ROW::add_blob( // constructor
|
||||
allowed = row_size + y_min - y_max;
|
||||
if (allowed > 0) {
|
||||
available = top > y_max ? top - y_max : 0;
|
||||
if (bottom < y_min)
|
||||
if (bottom < y_min) {
|
||||
// total available
|
||||
available += y_min - bottom;
|
||||
}
|
||||
if (available > 0) {
|
||||
available += available; // do it gradually
|
||||
if (available < allowed)
|
||||
if (available < allowed) {
|
||||
available = allowed;
|
||||
if (bottom < y_min)
|
||||
}
|
||||
if (bottom < y_min) {
|
||||
y_min -= (y_min - bottom) * allowed / available;
|
||||
if (top > y_max)
|
||||
}
|
||||
if (top > y_max) {
|
||||
y_max += (top - y_max) * allowed / available;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -751,16 +778,18 @@ void TO_ROW::insert_blob( // constructor
|
||||
) {
|
||||
BLOBNBOX_IT it = &blobs; // list of blobs
|
||||
|
||||
if (it.empty())
|
||||
if (it.empty()) {
|
||||
it.add_before_then_move(blob);
|
||||
else {
|
||||
} else {
|
||||
it.mark_cycle_pt();
|
||||
while (!it.cycled_list() && it.data()->bounding_box().left() <= blob->bounding_box().left())
|
||||
while (!it.cycled_list() && it.data()->bounding_box().left() <= blob->bounding_box().left()) {
|
||||
it.forward();
|
||||
if (it.cycled_list())
|
||||
}
|
||||
if (it.cycled_list()) {
|
||||
it.add_to_end(blob);
|
||||
else
|
||||
} else {
|
||||
it.add_before_stay_put(blob);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -776,19 +805,22 @@ void TO_ROW::compute_vertical_projection() { // project whole row
|
||||
TBOX blob_box; // bounding box
|
||||
BLOBNBOX_IT blob_it = blob_list();
|
||||
|
||||
if (blob_it.empty())
|
||||
if (blob_it.empty()) {
|
||||
return;
|
||||
}
|
||||
row_box = blob_it.data()->bounding_box();
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward())
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
row_box += blob_it.data()->bounding_box();
|
||||
}
|
||||
|
||||
projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN);
|
||||
projection_left = row_box.left() - PROJECTION_MARGIN;
|
||||
projection_right = row_box.right() + PROJECTION_MARGIN;
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
blob = blob_it.data();
|
||||
if (blob->cblob() != nullptr)
|
||||
if (blob->cblob() != nullptr) {
|
||||
vertical_cblob_projection(blob->cblob(), &projection);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -959,14 +991,15 @@ static void SizeFilterBlobs(int min_height, int max_height, BLOBNBOX_LIST *src_l
|
||||
blob->ReInit();
|
||||
int width = blob->bounding_box().width();
|
||||
int height = blob->bounding_box().height();
|
||||
if (height < min_height && (width < min_height || width > max_height))
|
||||
if (height < min_height && (width < min_height || width > max_height)) {
|
||||
noise_it.add_after_then_move(blob);
|
||||
else if (height > max_height)
|
||||
} else if (height > max_height) {
|
||||
large_it.add_after_then_move(blob);
|
||||
else if (height < min_height)
|
||||
} else if (height < min_height) {
|
||||
small_it.add_after_then_move(blob);
|
||||
else
|
||||
} else {
|
||||
medium_it.add_after_then_move(blob);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -124,10 +124,12 @@ enum BlobTextFlowType {
|
||||
// this cannot be true if t1 == t2, so the result is undefined.
|
||||
inline bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2) {
|
||||
// LEADER always loses.
|
||||
if (type1 == BTFT_LEADER)
|
||||
if (type1 == BTFT_LEADER) {
|
||||
return false;
|
||||
if (type2 == BTFT_LEADER)
|
||||
}
|
||||
if (type2 == BTFT_LEADER) {
|
||||
return true;
|
||||
}
|
||||
// With those out of the way, the ordering of the enum determines the result.
|
||||
return type1 >= type2;
|
||||
}
|
||||
@ -148,8 +150,9 @@ public:
|
||||
area = static_cast<int>(srcblob->area());
|
||||
}
|
||||
~BLOBNBOX() {
|
||||
if (owns_cblob_)
|
||||
if (owns_cblob_) {
|
||||
delete cblob_ptr;
|
||||
}
|
||||
}
|
||||
static BLOBNBOX *RealBlob(C_OUTLINE *outline) {
|
||||
auto *blob = new C_BLOB(outline);
|
||||
@ -470,8 +473,9 @@ public:
|
||||
right_rule_ = 0;
|
||||
left_crossing_rule_ = 0;
|
||||
right_crossing_rule_ = 0;
|
||||
if (area_stroke_width_ == 0.0f && area > 0 && cblob() != nullptr && cblob()->perimeter() != 0)
|
||||
if (area_stroke_width_ == 0.0f && area > 0 && cblob() != nullptr && cblob()->perimeter() != 0) {
|
||||
area_stroke_width_ = 2.0f * area / cblob()->perimeter();
|
||||
}
|
||||
owner_ = nullptr;
|
||||
base_char_top_ = box.top();
|
||||
base_char_bottom_ = box.bottom();
|
||||
|
@ -102,8 +102,9 @@ TESSLINE *TESSLINE::BuildFromOutlineList(EDGEPT *outline) {
|
||||
EDGEPT *pt = outline;
|
||||
do {
|
||||
pt->step_count = pt->next->start_step - pt->start_step;
|
||||
if (pt->step_count < 0)
|
||||
if (pt->step_count < 0) {
|
||||
pt->step_count += pt->src_outline->pathlength();
|
||||
}
|
||||
pt = pt->next;
|
||||
} while (pt != outline);
|
||||
}
|
||||
@ -140,8 +141,9 @@ void TESSLINE::CopyFrom(const TESSLINE &src) {
|
||||
|
||||
// Deletes owned data.
|
||||
void TESSLINE::Clear() {
|
||||
if (loop == nullptr)
|
||||
if (loop == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
EDGEPT *this_edge = loop;
|
||||
do {
|
||||
@ -220,14 +222,18 @@ void TESSLINE::ComputeBoundingBox() {
|
||||
EDGEPT *this_edge = loop;
|
||||
do {
|
||||
if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) {
|
||||
if (this_edge->pos.x < minx)
|
||||
if (this_edge->pos.x < minx) {
|
||||
minx = this_edge->pos.x;
|
||||
if (this_edge->pos.y < miny)
|
||||
}
|
||||
if (this_edge->pos.y < miny) {
|
||||
miny = this_edge->pos.y;
|
||||
if (this_edge->pos.x > maxx)
|
||||
}
|
||||
if (this_edge->pos.x > maxx) {
|
||||
maxx = this_edge->pos.x;
|
||||
if (this_edge->pos.y > maxy)
|
||||
}
|
||||
if (this_edge->pos.y > maxy) {
|
||||
maxy = this_edge->pos.y;
|
||||
}
|
||||
}
|
||||
this_edge = this_edge->next;
|
||||
} while (this_edge != loop);
|
||||
@ -262,19 +268,21 @@ TBOX TESSLINE::bounding_box() const {
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void TESSLINE::plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color) {
|
||||
if (is_hole)
|
||||
if (is_hole) {
|
||||
window->Pen(child_color);
|
||||
else
|
||||
} else {
|
||||
window->Pen(color);
|
||||
}
|
||||
window->SetCursor(start.x, start.y);
|
||||
EDGEPT *pt = loop;
|
||||
do {
|
||||
bool prev_hidden = pt->IsHidden();
|
||||
pt = pt->next;
|
||||
if (prev_hidden)
|
||||
if (prev_hidden) {
|
||||
window->SetCursor(pt->pos.x, pt->pos.y);
|
||||
else
|
||||
} else {
|
||||
window->DrawTo(pt->pos.x, pt->pos.y);
|
||||
}
|
||||
} while (pt != loop);
|
||||
}
|
||||
#endif // !GRAPHICS_DISABLED
|
||||
@ -287,10 +295,12 @@ EDGEPT *TESSLINE::FindBestStartPt() const {
|
||||
// Iterate the polygon.
|
||||
EDGEPT *pt = loop;
|
||||
do {
|
||||
if (pt->IsHidden())
|
||||
if (pt->IsHidden()) {
|
||||
continue;
|
||||
if (pt->prev->IsHidden() || pt->prev->src_outline != pt->src_outline)
|
||||
}
|
||||
if (pt->prev->IsHidden() || pt->prev->src_outline != pt->src_outline) {
|
||||
return pt; // Qualifies as the best.
|
||||
}
|
||||
if (pt->start_step < best_step) {
|
||||
best_step = pt->start_step;
|
||||
best_start = pt;
|
||||
@ -368,10 +378,11 @@ void TBLOB::CopyFrom(const TBLOB &src) {
|
||||
TESSLINE *prev_outline = nullptr;
|
||||
for (TESSLINE *srcline = src.outlines; srcline != nullptr; srcline = srcline->next) {
|
||||
auto *new_outline = new TESSLINE(*srcline);
|
||||
if (outlines == nullptr)
|
||||
if (outlines == nullptr) {
|
||||
outlines = new_outline;
|
||||
else
|
||||
} else {
|
||||
prev_outline->next = new_outline;
|
||||
}
|
||||
prev_outline = new_outline;
|
||||
}
|
||||
denorm_ = src.denorm_;
|
||||
@ -442,8 +453,9 @@ void TBLOB::ComputeBoundingBoxes() {
|
||||
// Returns the number of outlines.
|
||||
int TBLOB::NumOutlines() const {
|
||||
int result = 0;
|
||||
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next)
|
||||
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
|
||||
++result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -454,8 +466,9 @@ int TBLOB::NumOutlines() const {
|
||||
* bounding box of the union of all top-level outlines in the blob.
|
||||
**********************************************************************/
|
||||
TBOX TBLOB::bounding_box() const {
|
||||
if (outlines == nullptr)
|
||||
if (outlines == nullptr) {
|
||||
return TBOX(0, 0, 0, 0);
|
||||
}
|
||||
TESSLINE *outline = outlines;
|
||||
TBOX box = outline->bounding_box();
|
||||
for (outline = outline->next; outline != nullptr; outline = outline->next) {
|
||||
@ -496,8 +509,9 @@ void TBLOB::CorrectBlobOrder(TBLOB *next) {
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void TBLOB::plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color) {
|
||||
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next)
|
||||
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
|
||||
outline->plot(window, color, child_color);
|
||||
}
|
||||
}
|
||||
#endif // !GRAPHICS_DISABLED
|
||||
|
||||
@ -515,10 +529,12 @@ int TBLOB::ComputeMoments(FCOORD *center, FCOORD *second_moments) const {
|
||||
// The 2nd moments are just the standard deviation of the point positions.
|
||||
double x2nd = sqrt(accumulator.x_variance());
|
||||
double y2nd = sqrt(accumulator.y_variance());
|
||||
if (x2nd < 1.0)
|
||||
if (x2nd < 1.0) {
|
||||
x2nd = 1.0;
|
||||
if (y2nd < 1.0)
|
||||
}
|
||||
if (y2nd < 1.0) {
|
||||
y2nd = 1.0;
|
||||
}
|
||||
second_moments->set_x(x2nd);
|
||||
second_moments->set_y(y2nd);
|
||||
return accumulator.count();
|
||||
@ -548,10 +564,12 @@ void TBLOB::GetEdgeCoords(const TBOX &box, std::vector<std::vector<int>> &x_coor
|
||||
y_coords.resize(box.width());
|
||||
CollectEdges(box, nullptr, nullptr, &x_coords, &y_coords);
|
||||
// Sort the output vectors.
|
||||
for (auto &coord : x_coords)
|
||||
for (auto &coord : x_coords) {
|
||||
std::sort(coord.begin(), coord.end());
|
||||
for (auto &coord : y_coords)
|
||||
}
|
||||
for (auto &coord : y_coords) {
|
||||
std::sort(coord.begin(), coord.end());
|
||||
}
|
||||
}
|
||||
|
||||
// Accumulates the segment between pt1 and pt2 in the LLSQ, quantizing over
|
||||
@ -563,8 +581,9 @@ static void SegmentLLSQ(const FCOORD &pt1, const FCOORD &pt2, LLSQ *accumulator)
|
||||
int xend = IntCastRounded(std::max(pt1.x(), pt2.x()));
|
||||
int ystart = IntCastRounded(std::min(pt1.y(), pt2.y()));
|
||||
int yend = IntCastRounded(std::max(pt1.y(), pt2.y()));
|
||||
if (xstart == xend && ystart == yend)
|
||||
if (xstart == xend && ystart == yend) {
|
||||
return; // Nothing to do.
|
||||
}
|
||||
double weight = step.length() / (xend - xstart + yend - ystart);
|
||||
// Compute and save the y-position at the middle of each x-step.
|
||||
for (int x = xstart; x < xend; ++x) {
|
||||
@ -658,8 +677,9 @@ static void CollectEdgesOfRun(const EDGEPT *startpt, const EDGEPT *lastpt, const
|
||||
// bounds of the outline steps/ due to wrap-around, so we use % step_length
|
||||
// everywhere, except for start_index.
|
||||
int end_index = lastpt->start_step + lastpt->step_count;
|
||||
if (end_index <= start_index)
|
||||
if (end_index <= start_index) {
|
||||
end_index += step_length;
|
||||
}
|
||||
// pos is the integer coordinates of the binary image steps.
|
||||
ICOORD pos = outline->position_at_index(start_index);
|
||||
FCOORD origin(box.left(), box.bottom());
|
||||
@ -733,11 +753,13 @@ void TBLOB::CollectEdges(const TBOX &box, TBOX *bounding_box, LLSQ *llsq,
|
||||
// Iterate the polygon.
|
||||
EDGEPT *loop_pt = ol->FindBestStartPt();
|
||||
EDGEPT *pt = loop_pt;
|
||||
if (pt == nullptr)
|
||||
if (pt == nullptr) {
|
||||
continue;
|
||||
}
|
||||
do {
|
||||
if (pt->IsHidden())
|
||||
if (pt->IsHidden()) {
|
||||
continue;
|
||||
}
|
||||
// Find a run of equal src_outline.
|
||||
EDGEPT *last_pt = pt;
|
||||
do {
|
||||
@ -771,8 +793,9 @@ void TWERD::BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inver
|
||||
float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint,
|
||||
const TBOX *norm_box, DENORM *word_denorm) {
|
||||
TBOX word_box = bounding_box();
|
||||
if (norm_box != nullptr)
|
||||
if (norm_box != nullptr) {
|
||||
word_box = *norm_box;
|
||||
}
|
||||
float word_middle = (word_box.left() + word_box.right()) / 2.0f;
|
||||
float input_y_offset = 0.0f;
|
||||
auto final_y_offset = static_cast<float>(kBlnBaselineOffset);
|
||||
@ -849,8 +872,9 @@ TBOX TWERD::bounding_box() const {
|
||||
// Merges the blobs from start to end, not including end, and deletes
|
||||
// the blobs between start and end.
|
||||
void TWERD::MergeBlobs(int start, int end) {
|
||||
if (start >= blobs.size() - 1)
|
||||
if (start >= blobs.size() - 1) {
|
||||
return; // Nothing to do.
|
||||
}
|
||||
TESSLINE *outline = blobs[start]->outlines;
|
||||
for (int i = start + 1; i < end && i < blobs.size(); ++i) {
|
||||
TBLOB *next_blob = blobs[i];
|
||||
@ -859,8 +883,9 @@ void TWERD::MergeBlobs(int start, int end) {
|
||||
blobs[start]->outlines = next_blob->outlines;
|
||||
outline = blobs[start]->outlines;
|
||||
} else {
|
||||
while (outline->next != nullptr)
|
||||
while (outline->next != nullptr) {
|
||||
outline = outline->next;
|
||||
}
|
||||
outline->next = next_blob->outlines;
|
||||
next_blob->outlines = nullptr;
|
||||
}
|
||||
@ -893,21 +918,24 @@ void TWERD::plot(ScrollView *window) {
|
||||
* call to divide_blobs.
|
||||
**********************************************************************/
|
||||
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location) {
|
||||
if (blob->outlines == nullptr || blob->outlines->next == nullptr)
|
||||
if (blob->outlines == nullptr || blob->outlines->next == nullptr) {
|
||||
return false; // Need at least 2 outlines for it to be possible.
|
||||
}
|
||||
int max_gap = 0;
|
||||
TPOINT vertical = italic_blob ? kDivisibleVerticalItalic : kDivisibleVerticalUpright;
|
||||
for (TESSLINE *outline1 = blob->outlines; outline1 != nullptr; outline1 = outline1->next) {
|
||||
if (outline1->is_hole)
|
||||
if (outline1->is_hole) {
|
||||
continue; // Holes do not count as separable.
|
||||
}
|
||||
TPOINT mid_pt1(static_cast<int16_t>((outline1->topleft.x + outline1->botright.x) / 2),
|
||||
static_cast<int16_t>((outline1->topleft.y + outline1->botright.y) / 2));
|
||||
int mid_prod1 = mid_pt1.cross(vertical);
|
||||
int min_prod1, max_prod1;
|
||||
outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1);
|
||||
for (TESSLINE *outline2 = outline1->next; outline2 != nullptr; outline2 = outline2->next) {
|
||||
if (outline2->is_hole)
|
||||
if (outline2->is_hole) {
|
||||
continue; // Holes do not count as separable.
|
||||
}
|
||||
TPOINT mid_pt2(static_cast<int16_t>((outline2->topleft.x + outline2->botright.x) / 2),
|
||||
static_cast<int16_t>((outline2->topleft.y + outline2->botright.y) / 2));
|
||||
int mid_prod2 = mid_pt2.cross(vertical);
|
||||
@ -951,26 +979,30 @@ void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT
|
||||
int mid_prod = mid_pt.cross(vertical);
|
||||
if (mid_prod < location_prod) {
|
||||
// Outline is in left blob.
|
||||
if (outline1)
|
||||
if (outline1) {
|
||||
outline1->next = outline;
|
||||
else
|
||||
} else {
|
||||
blob->outlines = outline;
|
||||
}
|
||||
outline1 = outline;
|
||||
} else {
|
||||
// Outline is in right blob.
|
||||
if (outline2)
|
||||
if (outline2) {
|
||||
outline2->next = outline;
|
||||
else
|
||||
} else {
|
||||
other_blob->outlines = outline;
|
||||
}
|
||||
outline2 = outline;
|
||||
}
|
||||
outline = outline->next;
|
||||
}
|
||||
|
||||
if (outline1)
|
||||
if (outline1) {
|
||||
outline1->next = nullptr;
|
||||
if (outline2)
|
||||
}
|
||||
if (outline2) {
|
||||
outline2->next = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -132,14 +132,18 @@ struct EDGEPT {
|
||||
const EDGEPT *pt = this;
|
||||
do {
|
||||
pt = pt->next;
|
||||
if (pt->pos.x < box.left())
|
||||
if (pt->pos.x < box.left()) {
|
||||
box.set_left(pt->pos.x);
|
||||
if (pt->pos.x > box.right())
|
||||
}
|
||||
if (pt->pos.x > box.right()) {
|
||||
box.set_right(pt->pos.x);
|
||||
if (pt->pos.y < box.bottom())
|
||||
}
|
||||
if (pt->pos.y < box.bottom()) {
|
||||
box.set_bottom(pt->pos.y);
|
||||
if (pt->pos.y > box.top())
|
||||
}
|
||||
if (pt->pos.y > box.top()) {
|
||||
box.set_top(pt->pos.y);
|
||||
}
|
||||
} while (pt != end && pt != this);
|
||||
return box;
|
||||
}
|
||||
@ -162,8 +166,9 @@ struct EDGEPT {
|
||||
int count = 0;
|
||||
const EDGEPT *pt = this;
|
||||
do {
|
||||
if (pt == end)
|
||||
if (pt == end) {
|
||||
return true;
|
||||
}
|
||||
pt = pt->next;
|
||||
++count;
|
||||
} while (pt != this && count <= min_points);
|
||||
@ -251,8 +256,9 @@ struct TESSLINE {
|
||||
if (Contains(pt1) && Contains(pt2)) {
|
||||
EDGEPT *pt = loop;
|
||||
do {
|
||||
if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos))
|
||||
if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos)) {
|
||||
return true;
|
||||
}
|
||||
pt = pt->next;
|
||||
} while (pt != loop);
|
||||
}
|
||||
@ -336,16 +342,18 @@ struct TBLOB {
|
||||
// Returns true if the given line segment crosses any outline of this blob.
|
||||
bool SegmentCrossesOutline(const TPOINT &pt1, const TPOINT &pt2) const {
|
||||
for (const TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
|
||||
if (outline->SegmentCrosses(pt1, pt2))
|
||||
if (outline->SegmentCrosses(pt1, pt2)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if the point is contained within any of the outline boxes.
|
||||
bool Contains(const TPOINT &pt) const {
|
||||
for (const TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
|
||||
if (outline->Contains(pt))
|
||||
if (outline->Contains(pt)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -368,8 +376,9 @@ struct TBLOB {
|
||||
|
||||
int BBArea() const {
|
||||
int total_area = 0;
|
||||
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next)
|
||||
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
|
||||
total_area += outline->BBArea();
|
||||
}
|
||||
return total_area;
|
||||
}
|
||||
|
||||
|
@ -77,8 +77,9 @@ bool ReadAllBoxes(int target_page, bool skip_blanks, const char *filename, std::
|
||||
std::vector<int> *pages) {
|
||||
std::ifstream input(BoxFileName(filename).c_str(), std::ios::in | std::ios::binary);
|
||||
std::vector<char> box_data(std::istreambuf_iterator<char>(input), {});
|
||||
if (box_data.empty())
|
||||
if (box_data.empty()) {
|
||||
return false;
|
||||
}
|
||||
// Convert the array of bytes to a string, so it can be used by the parser.
|
||||
box_data.push_back('\0');
|
||||
return ReadMemBoxes(target_page, skip_blanks, &box_data[0],
|
||||
@ -91,34 +92,41 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool
|
||||
std::vector<std::string> *box_texts, std::vector<int> *pages) {
|
||||
std::string box_str(box_data);
|
||||
std::vector<std::string> lines = split(box_str, '\n');
|
||||
if (lines.empty())
|
||||
if (lines.empty()) {
|
||||
return false;
|
||||
}
|
||||
int num_boxes = 0;
|
||||
for (auto &line : lines) {
|
||||
int page = 0;
|
||||
std::string utf8_str;
|
||||
TBOX box;
|
||||
if (!ParseBoxFileStr(line.c_str(), &page, utf8_str, &box)) {
|
||||
if (continue_on_failure)
|
||||
if (continue_on_failure) {
|
||||
continue;
|
||||
else
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (skip_blanks && (utf8_str == " " || utf8_str == "\t"))
|
||||
if (skip_blanks && (utf8_str == " " || utf8_str == "\t")) {
|
||||
continue;
|
||||
if (target_page >= 0 && page != target_page)
|
||||
}
|
||||
if (target_page >= 0 && page != target_page) {
|
||||
continue;
|
||||
if (boxes != nullptr)
|
||||
}
|
||||
if (boxes != nullptr) {
|
||||
boxes->push_back(box);
|
||||
if (texts != nullptr)
|
||||
}
|
||||
if (texts != nullptr) {
|
||||
texts->push_back(utf8_str);
|
||||
}
|
||||
if (box_texts != nullptr) {
|
||||
std::string full_text;
|
||||
MakeBoxFileStr(utf8_str.c_str(), box, target_page, full_text);
|
||||
box_texts->push_back(full_text);
|
||||
}
|
||||
if (pages != nullptr)
|
||||
if (pages != nullptr) {
|
||||
pages->push_back(page);
|
||||
}
|
||||
++num_boxes;
|
||||
}
|
||||
return num_boxes > 0;
|
||||
@ -153,21 +161,25 @@ bool ReadNextBox(int target_page, int *line_number, FILE *box_file, std::string
|
||||
|
||||
buffptr = buff;
|
||||
const auto *ubuf = reinterpret_cast<const unsigned char *>(buffptr);
|
||||
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf)
|
||||
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) {
|
||||
buffptr += 3; // Skip unicode file designation.
|
||||
}
|
||||
// Check for blank lines in box file
|
||||
if (*buffptr == '\n' || *buffptr == '\0')
|
||||
if (*buffptr == '\n' || *buffptr == '\0') {
|
||||
continue;
|
||||
}
|
||||
// Skip blank boxes.
|
||||
if (*buffptr == ' ' || *buffptr == '\t')
|
||||
if (*buffptr == ' ' || *buffptr == '\t') {
|
||||
continue;
|
||||
}
|
||||
if (*buffptr != '\0') {
|
||||
if (!ParseBoxFileStr(buffptr, &page, utf8_str, bounding_box)) {
|
||||
tprintf("Box file format error on line %i; ignored\n", *line_number);
|
||||
continue;
|
||||
}
|
||||
if (target_page >= 0 && target_page != page)
|
||||
continue; // Not on the appropriate page.
|
||||
if (target_page >= 0 && target_page != page) {
|
||||
continue; // Not on the appropriate page.
|
||||
}
|
||||
return true; // Successfully read a box.
|
||||
}
|
||||
}
|
||||
@ -196,19 +208,22 @@ bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf
|
||||
int uch_len = 0;
|
||||
// Skip unicode file designation, if present.
|
||||
const auto *ubuf = reinterpret_cast<const unsigned char *>(buffptr);
|
||||
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf)
|
||||
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) {
|
||||
buffptr += 3;
|
||||
}
|
||||
// Allow a single blank as the UTF-8 string. Check for empty string and
|
||||
// then blindly eat the first character.
|
||||
if (*buffptr == '\0')
|
||||
if (*buffptr == '\0') {
|
||||
return false;
|
||||
}
|
||||
do {
|
||||
uch[uch_len++] = *buffptr++;
|
||||
} while (*buffptr != '\0' && *buffptr != ' ' && *buffptr != '\t' &&
|
||||
uch_len < kBoxReadBufSize - 1);
|
||||
uch[uch_len] = '\0';
|
||||
if (*buffptr != '\0')
|
||||
if (*buffptr != '\0') {
|
||||
++buffptr;
|
||||
}
|
||||
int x_min = INT_MAX;
|
||||
int y_min = INT_MAX;
|
||||
int x_max = INT_MIN;
|
||||
@ -244,10 +259,12 @@ bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf
|
||||
used += new_used;
|
||||
}
|
||||
utf8_str = uch;
|
||||
if (x_min > x_max)
|
||||
if (x_min > x_max) {
|
||||
std::swap(x_min, x_max);
|
||||
if (y_min > y_max)
|
||||
}
|
||||
if (y_min > y_max) {
|
||||
std::swap(y_min, y_max);
|
||||
}
|
||||
bounding_box->set_to_given_coords(x_min, y_min, x_max, y_max);
|
||||
return true; // Successfully read a box.
|
||||
}
|
||||
|
@ -46,8 +46,9 @@ void BoxWord::CopyFrom(const BoxWord &src) {
|
||||
length_ = src.length_;
|
||||
boxes_.clear();
|
||||
boxes_.reserve(length_);
|
||||
for (int i = 0; i < length_; ++i)
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
boxes_.push_back(src.boxes_[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
|
||||
@ -97,25 +98,31 @@ void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) {
|
||||
C_BLOB_IT b_it(original_word->cblob_list());
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
TBOX blob_box = b_it.data()->bounding_box();
|
||||
if (block != nullptr)
|
||||
if (block != nullptr) {
|
||||
blob_box.rotate(block->re_rotation());
|
||||
}
|
||||
if (blob_box.major_overlap(box)) {
|
||||
original_box += blob_box;
|
||||
}
|
||||
}
|
||||
if (!original_box.null_box()) {
|
||||
if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance))
|
||||
if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance)) {
|
||||
box.set_left(original_box.left());
|
||||
if (NearlyEqual<int>(original_box.right(), box.right(), kBoxClipTolerance))
|
||||
}
|
||||
if (NearlyEqual<int>(original_box.right(), box.right(), kBoxClipTolerance)) {
|
||||
box.set_right(original_box.right());
|
||||
if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance))
|
||||
}
|
||||
if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance)) {
|
||||
box.set_top(original_box.top());
|
||||
if (NearlyEqual<int>(original_box.bottom(), box.bottom(), kBoxClipTolerance))
|
||||
}
|
||||
if (NearlyEqual<int>(original_box.bottom(), box.bottom(), kBoxClipTolerance)) {
|
||||
box.set_bottom(original_box.bottom());
|
||||
}
|
||||
}
|
||||
original_box = original_word->bounding_box();
|
||||
if (block != nullptr)
|
||||
if (block != nullptr) {
|
||||
original_box.rotate(block->re_rotation());
|
||||
}
|
||||
boxes_[i] = box.intersection(original_box);
|
||||
}
|
||||
ComputeBoundingBox();
|
||||
@ -126,25 +133,28 @@ void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) {
|
||||
void BoxWord::MergeBoxes(int start, int end) {
|
||||
start = ClipToRange(start, 0, length_);
|
||||
end = ClipToRange(end, 0, length_);
|
||||
if (end <= start + 1)
|
||||
if (end <= start + 1) {
|
||||
return;
|
||||
}
|
||||
for (int i = start + 1; i < end; ++i) {
|
||||
boxes_[start] += boxes_[i];
|
||||
}
|
||||
int shrinkage = end - 1 - start;
|
||||
length_ -= shrinkage;
|
||||
for (int i = start + 1; i < length_; ++i)
|
||||
for (int i = start + 1; i < length_; ++i) {
|
||||
boxes_[i] = boxes_[i + shrinkage];
|
||||
}
|
||||
boxes_.resize(length_);
|
||||
}
|
||||
|
||||
// Inserts a new box before the given index.
|
||||
// Recomputes the bounding box.
|
||||
void BoxWord::InsertBox(int index, const TBOX &box) {
|
||||
if (index < length_)
|
||||
if (index < length_) {
|
||||
boxes_.insert(boxes_.begin() + index, box);
|
||||
else
|
||||
} else {
|
||||
boxes_.push_back(box);
|
||||
}
|
||||
length_ = boxes_.size();
|
||||
ComputeBoundingBox();
|
||||
}
|
||||
@ -175,8 +185,9 @@ void BoxWord::DeleteAllBoxes() {
|
||||
// Computes the bounding box of the word.
|
||||
void BoxWord::ComputeBoundingBox() {
|
||||
bbox_ = TBOX();
|
||||
for (int i = 0; i < length_; ++i)
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
bbox_ += boxes_[i];
|
||||
}
|
||||
}
|
||||
|
||||
// This and other putatively are the same, so call the (permanent) callback
|
||||
@ -185,8 +196,9 @@ void BoxWord::ComputeBoundingBox() {
|
||||
void BoxWord::ProcessMatchedBlobs(const TWERD &other, std::function<void(int)> cb) const {
|
||||
for (int i = 0; i < length_ && i < other.NumBlobs(); ++i) {
|
||||
TBOX blob_box = other.blobs[i]->bounding_box();
|
||||
if (blob_box == boxes_[i])
|
||||
if (blob_box == boxes_[i]) {
|
||||
cb(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -115,8 +115,9 @@ C_OUTLINE::C_OUTLINE(
|
||||
if ((dirdiff == 64 || dirdiff == -64) && stepindex > 0) {
|
||||
stepindex -= 2; // cancel there-and-back
|
||||
prevdir = stepindex >= 0 ? step_dir(stepindex) : lastdir;
|
||||
} else
|
||||
} else {
|
||||
prevdir = dir;
|
||||
}
|
||||
}
|
||||
ASSERT_HOST(pos.x() == startpt.x() && pos.y() == startpt.y());
|
||||
do {
|
||||
@ -124,8 +125,9 @@ C_OUTLINE::C_OUTLINE(
|
||||
if (dirdiff == 64 || dirdiff == -64) {
|
||||
start += step(0);
|
||||
stepindex -= 2; // cancel there-and-back
|
||||
for (int i = 0; i < stepindex; ++i)
|
||||
for (int i = 0; i < stepindex; ++i) {
|
||||
set_step(i, step_dir(i + 1));
|
||||
}
|
||||
}
|
||||
} while (stepindex > 1 && (dirdiff == 64 || dirdiff == -64));
|
||||
stepcount = stepindex;
|
||||
@ -221,11 +223,13 @@ C_OUTLINE::C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation) : offsets(nullptr) {
|
||||
}
|
||||
start += step(0);
|
||||
destindex -= 2;
|
||||
for (int i = 0; i < destindex; ++i)
|
||||
for (int i = 0; i < destindex; ++i) {
|
||||
set_step(i, step_dir(i + 1));
|
||||
}
|
||||
}
|
||||
if (destindex >= 4)
|
||||
if (destindex >= 4) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT_HOST(destindex <= stepcount);
|
||||
stepcount = destindex;
|
||||
@ -269,14 +273,16 @@ int32_t C_OUTLINE::area() const {
|
||||
for (stepindex = 0; stepindex < total_steps; stepindex++) {
|
||||
// all intersected
|
||||
next_step = step(stepindex);
|
||||
if (next_step.x() < 0)
|
||||
if (next_step.x() < 0) {
|
||||
total += pos.y();
|
||||
else if (next_step.x() > 0)
|
||||
} else if (next_step.x() > 0) {
|
||||
total -= pos.y();
|
||||
}
|
||||
pos += next_step;
|
||||
}
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
total += it.data()->area(); // add areas of children
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
@ -294,8 +300,9 @@ int32_t C_OUTLINE::perimeter() const {
|
||||
C_OUTLINE_IT it(const_cast<C_OUTLINE_LIST *>(&children));
|
||||
|
||||
total_steps = pathlength();
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
total_steps += it.data()->pathlength(); // Add perimeters of children.
|
||||
}
|
||||
|
||||
return total_steps;
|
||||
}
|
||||
@ -315,16 +322,18 @@ int32_t C_OUTLINE::outer_area() const {
|
||||
|
||||
pos = start_pos();
|
||||
total_steps = pathlength();
|
||||
if (total_steps == 0)
|
||||
if (total_steps == 0) {
|
||||
return box.area();
|
||||
}
|
||||
total = 0;
|
||||
for (stepindex = 0; stepindex < total_steps; stepindex++) {
|
||||
// all intersected
|
||||
next_step = step(stepindex);
|
||||
if (next_step.x() < 0)
|
||||
if (next_step.x() < 0) {
|
||||
total += pos.y();
|
||||
else if (next_step.x() > 0)
|
||||
} else if (next_step.x() > 0) {
|
||||
total -= pos.y();
|
||||
}
|
||||
pos += next_step;
|
||||
}
|
||||
|
||||
@ -372,8 +381,9 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) {
|
||||
next_step = step(stepindex);
|
||||
pos += next_step;
|
||||
if (next_step.x() < 0) {
|
||||
if (looking_for_max_x && pos.x() < min_x)
|
||||
if (looking_for_max_x && pos.x() < min_x) {
|
||||
min_x = pos.x();
|
||||
}
|
||||
if (looking_for_min_x && max_x - pos.x() > threshold) {
|
||||
if (looking_for_max_x) {
|
||||
initial_x = max_x;
|
||||
@ -385,8 +395,9 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) {
|
||||
min_x = pos.x(); // reset min
|
||||
}
|
||||
} else if (next_step.x() > 0) {
|
||||
if (looking_for_min_x && pos.x() > max_x)
|
||||
if (looking_for_min_x && pos.x() > max_x) {
|
||||
max_x = pos.x();
|
||||
}
|
||||
if (looking_for_max_x && pos.x() - min_x > threshold) {
|
||||
if (looking_for_min_x) {
|
||||
initial_x = min_x; // remember first min
|
||||
@ -398,8 +409,9 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) {
|
||||
max_x = pos.x();
|
||||
}
|
||||
} else if (next_step.y() < 0) {
|
||||
if (looking_for_max_y && pos.y() < min_y)
|
||||
if (looking_for_max_y && pos.y() < min_y) {
|
||||
min_y = pos.y();
|
||||
}
|
||||
if (looking_for_min_y && max_y - pos.y() > threshold) {
|
||||
if (looking_for_max_y) {
|
||||
initial_y = max_y; // remember first max
|
||||
@ -411,8 +423,9 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) {
|
||||
min_y = pos.y(); // reset min
|
||||
}
|
||||
} else {
|
||||
if (looking_for_min_y && pos.y() > max_y)
|
||||
if (looking_for_min_y && pos.y() > max_y) {
|
||||
max_y = pos.y();
|
||||
}
|
||||
if (looking_for_max_y && pos.y() - min_y > threshold) {
|
||||
if (looking_for_min_y) {
|
||||
initial_y = min_y; // remember first min
|
||||
@ -426,26 +439,30 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) {
|
||||
}
|
||||
}
|
||||
if (first_was_max_x && looking_for_min_x) {
|
||||
if (max_x - initial_x > threshold)
|
||||
if (max_x - initial_x > threshold) {
|
||||
total++;
|
||||
else
|
||||
} else {
|
||||
total--;
|
||||
}
|
||||
} else if (!first_was_max_x && looking_for_max_x) {
|
||||
if (initial_x - min_x > threshold)
|
||||
if (initial_x - min_x > threshold) {
|
||||
total++;
|
||||
else
|
||||
} else {
|
||||
total--;
|
||||
}
|
||||
}
|
||||
if (first_was_max_y && looking_for_min_y) {
|
||||
if (max_y - initial_y > threshold)
|
||||
if (max_y - initial_y > threshold) {
|
||||
total++;
|
||||
else
|
||||
} else {
|
||||
total--;
|
||||
}
|
||||
} else if (!first_was_max_y && looking_for_max_y) {
|
||||
if (initial_y - min_y > threshold)
|
||||
if (initial_y - min_y > threshold) {
|
||||
total++;
|
||||
else
|
||||
} else {
|
||||
total--;
|
||||
}
|
||||
}
|
||||
|
||||
return total;
|
||||
@ -463,22 +480,27 @@ bool C_OUTLINE::operator<(const C_OUTLINE &other) const {
|
||||
ICOORD pos; // position of point
|
||||
int32_t stepindex; // index to cstep
|
||||
|
||||
if (!box.overlap(other.box))
|
||||
if (!box.overlap(other.box)) {
|
||||
return false; // can't be contained
|
||||
if (stepcount == 0)
|
||||
}
|
||||
if (stepcount == 0) {
|
||||
return other.box.contains(this->box);
|
||||
}
|
||||
|
||||
pos = start;
|
||||
for (stepindex = 0; stepindex < stepcount && (count = other.winding_number(pos)) == INTERSECTING;
|
||||
stepindex++)
|
||||
stepindex++) {
|
||||
pos += step(stepindex); // try all points
|
||||
}
|
||||
if (count == INTERSECTING) {
|
||||
// all intersected
|
||||
pos = other.start;
|
||||
for (stepindex = 0;
|
||||
stepindex < other.stepcount && (count = winding_number(pos)) == INTERSECTING; stepindex++)
|
||||
stepindex < other.stepcount && (count = winding_number(pos)) == INTERSECTING;
|
||||
stepindex++) {
|
||||
// try other way round
|
||||
pos += other.step(stepindex);
|
||||
}
|
||||
return count == INTERSECTING || count == 0;
|
||||
}
|
||||
return count != 0;
|
||||
@ -505,16 +527,18 @@ int16_t C_OUTLINE::winding_number(ICOORD point) const {
|
||||
// crossing the line
|
||||
if (vec.y() <= 0 && vec.y() + stepvec.y() > 0) {
|
||||
cross = vec * stepvec; // cross product
|
||||
if (cross > 0)
|
||||
if (cross > 0) {
|
||||
count++; // crossing right half
|
||||
else if (cross == 0)
|
||||
} else if (cross == 0) {
|
||||
return INTERSECTING; // going through point
|
||||
}
|
||||
} else if (vec.y() > 0 && vec.y() + stepvec.y() <= 0) {
|
||||
cross = vec * stepvec;
|
||||
if (cross < 0)
|
||||
if (cross < 0) {
|
||||
count--; // crossing back
|
||||
else if (cross == 0)
|
||||
} else if (cross == 0) {
|
||||
return INTERSECTING; // illegal
|
||||
}
|
||||
}
|
||||
vec += stepvec; // sum vectors
|
||||
}
|
||||
@ -534,8 +558,9 @@ int16_t C_OUTLINE::turn_direction() const { // winding number
|
||||
int8_t dirdiff; // direction difference
|
||||
int16_t count; // winding count
|
||||
|
||||
if (stepcount == 0)
|
||||
if (stepcount == 0) {
|
||||
return 128;
|
||||
}
|
||||
count = 0;
|
||||
prevdir = step_dir(stepcount - 1);
|
||||
for (stepindex = 0; stepindex < stepcount; stepindex++) {
|
||||
@ -584,8 +609,9 @@ void C_OUTLINE::move(const ICOORD vec) {
|
||||
box.move(vec);
|
||||
start += vec;
|
||||
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
it.data()->move(vec); // move child outlines
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -595,16 +621,18 @@ void C_OUTLINE::move(const ICOORD vec) {
|
||||
* (probably due to excessive length).
|
||||
*/
|
||||
bool C_OUTLINE::IsLegallyNested() const {
|
||||
if (stepcount == 0)
|
||||
if (stepcount == 0) {
|
||||
return true;
|
||||
}
|
||||
int64_t parent_area = outer_area();
|
||||
// We aren't going to modify the list, or its contents, but there is
|
||||
// no const iterator.
|
||||
C_OUTLINE_IT child_it(const_cast<C_OUTLINE_LIST *>(&children));
|
||||
for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) {
|
||||
const C_OUTLINE *child = child_it.data();
|
||||
if (child->outer_area() * parent_area > 0 || !child->IsLegallyNested())
|
||||
if (child->outer_area() * parent_area > 0 || !child->IsLegallyNested()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -659,8 +687,9 @@ static void ComputeGradient(const l_uint32 *data, int wpl, int x, int y, int wid
|
||||
*/
|
||||
static bool EvaluateVerticalDiff(const l_uint32 *data, int wpl, int diff_sign, int x, int y,
|
||||
int height, int *best_diff, int *best_sum, int *best_y) {
|
||||
if (y <= 0 || y >= height)
|
||||
if (y <= 0 || y >= height) {
|
||||
return false;
|
||||
}
|
||||
const l_uint32 *line = data + y * wpl;
|
||||
int pixel1 = GET_DATA_BYTE(line - wpl, x);
|
||||
int pixel2 = GET_DATA_BYTE(line, x);
|
||||
@ -680,8 +709,9 @@ static bool EvaluateVerticalDiff(const l_uint32 *data, int wpl, int diff_sign, i
|
||||
*/
|
||||
static bool EvaluateHorizontalDiff(const l_uint32 *line, int diff_sign, int x, int width,
|
||||
int *best_diff, int *best_sum, int *best_x) {
|
||||
if (x <= 0 || x >= width)
|
||||
if (x <= 0 || x >= width) {
|
||||
return false;
|
||||
}
|
||||
int pixel1 = GET_DATA_BYTE(line, x - 1);
|
||||
int pixel2 = GET_DATA_BYTE(line, x);
|
||||
int diff = (pixel2 - pixel1) * diff_sign;
|
||||
@ -709,8 +739,9 @@ static bool EvaluateHorizontalDiff(const l_uint32 *line, int diff_sign, int x, i
|
||||
* direction can be used to ignore the vertical steps.
|
||||
*/
|
||||
void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix *pix) {
|
||||
if (pixGetDepth(pix) != 8)
|
||||
if (pixGetDepth(pix) != 8) {
|
||||
return;
|
||||
}
|
||||
const l_uint32 *data = pixGetData(pix);
|
||||
int wpl = pixGetWpl(pix);
|
||||
int width = pixGetWidth(pix);
|
||||
@ -780,8 +811,9 @@ void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix *pix) {
|
||||
}
|
||||
offsets[s].offset_numerator = ClipToRange<int>(offset, -INT8_MAX, INT8_MAX);
|
||||
offsets[s].pixel_diff = ClipToRange<int>(best_diff, 0, UINT8_MAX);
|
||||
if (negative)
|
||||
if (negative) {
|
||||
gradient = -gradient;
|
||||
}
|
||||
// Compute gradient angle quantized to 256 directions, rotated by 64 (pi/2)
|
||||
// to convert from gradient direction to edge direction.
|
||||
offsets[s].direction = Modulo(FCOORD::binary_angle_plus_pi(gradient.angle()) + 64, 256);
|
||||
@ -1020,10 +1052,11 @@ void C_OUTLINE::increment_step(int s, int increment, ICOORD *pos, int *dir_count
|
||||
int dir_index = chain_code(step_index);
|
||||
dir_counts[dir_index] += increment;
|
||||
ICOORD step_vec = step(step_index);
|
||||
if (step_vec.x() == 0)
|
||||
if (step_vec.x() == 0) {
|
||||
pos_totals[dir_index] += pos->x() * increment;
|
||||
else
|
||||
} else {
|
||||
pos_totals[dir_index] += pos->y() * increment;
|
||||
}
|
||||
*pos += step_vec;
|
||||
}
|
||||
|
||||
|
@ -154,8 +154,9 @@ public:
|
||||
// NOT to be used lightly, as it has to iterate the outline to find out.
|
||||
ICOORD position_at_index(int index) const {
|
||||
ICOORD pos = start;
|
||||
for (int i = 0; i < index; ++i)
|
||||
for (int i = 0; i < index; ++i) {
|
||||
pos += step(i);
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
// Returns the sub-pixel accurate position given the integer position pos
|
||||
@ -168,17 +169,19 @@ public:
|
||||
if (offsets != nullptr && offsets[index].pixel_diff > 0) {
|
||||
float offset = offsets[index].offset_numerator;
|
||||
offset /= offsets[index].pixel_diff;
|
||||
if (step_to_next.x() != 0)
|
||||
if (step_to_next.x() != 0) {
|
||||
f_pos.set_y(f_pos.y() + offset);
|
||||
else
|
||||
} else {
|
||||
f_pos.set_x(f_pos.x() + offset);
|
||||
}
|
||||
}
|
||||
return f_pos;
|
||||
}
|
||||
// Returns the step direction for the given index or -1 if there is none.
|
||||
int direction_at_index(int index) const {
|
||||
if (offsets != nullptr && offsets[index].pixel_diff > 0)
|
||||
if (offsets != nullptr && offsets[index].pixel_diff > 0) {
|
||||
return offsets[index].direction;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
// Returns the edge strength for the given index.
|
||||
@ -186,8 +189,9 @@ public:
|
||||
// is binary). Returns 0 if the gradient direction conflicts with the
|
||||
// step direction, indicating that this position could be skipped.
|
||||
int edge_strength_at_index(int index) const {
|
||||
if (offsets != nullptr)
|
||||
if (offsets != nullptr) {
|
||||
return offsets[index].pixel_diff;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
// Return the step as a chain code (0-3) related to the standard feature
|
||||
|
@ -72,16 +72,18 @@ double DetLineFit::Fit(int skip_first, int skip_last, ICOORD *pt1, ICOORD *pt2)
|
||||
// Count the points and find the first and last kNumEndPoints.
|
||||
int pt_count = pts_.size();
|
||||
ICOORD *starts[kNumEndPoints];
|
||||
if (skip_first >= pt_count)
|
||||
if (skip_first >= pt_count) {
|
||||
skip_first = pt_count - 1;
|
||||
}
|
||||
int start_count = 0;
|
||||
int end_i = std::min(skip_first + kNumEndPoints, pt_count);
|
||||
for (int i = skip_first; i < end_i; ++i) {
|
||||
starts[start_count++] = &pts_[i].pt;
|
||||
}
|
||||
ICOORD *ends[kNumEndPoints];
|
||||
if (skip_last >= pt_count)
|
||||
if (skip_last >= pt_count) {
|
||||
skip_last = pt_count - 1;
|
||||
}
|
||||
int end_count = 0;
|
||||
end_i = std::max(0, pt_count - kNumEndPoints - skip_last);
|
||||
for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
|
||||
@ -90,10 +92,11 @@ double DetLineFit::Fit(int skip_first, int skip_last, ICOORD *pt1, ICOORD *pt2)
|
||||
// 1 or 2 points need special treatment.
|
||||
if (pt_count <= 2) {
|
||||
*pt1 = *starts[0];
|
||||
if (pt_count > 1)
|
||||
if (pt_count > 1) {
|
||||
*pt2 = *ends[0];
|
||||
else
|
||||
} else {
|
||||
*pt2 = *pt1;
|
||||
}
|
||||
return 0.0;
|
||||
}
|
||||
// Although with between 2 and 2*kNumEndPoints-1 points, there will be
|
||||
@ -213,12 +216,14 @@ double DetLineFit::EvaluateLineFit() {
|
||||
// and returns the squared upper-quartile error distance.
|
||||
double DetLineFit::ComputeUpperQuartileError() {
|
||||
int num_errors = distances_.size();
|
||||
if (num_errors == 0)
|
||||
if (num_errors == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
// Get the absolute values of the errors.
|
||||
for (int i = 0; i < num_errors; ++i) {
|
||||
if (distances_[i].key() < 0)
|
||||
if (distances_[i].key() < 0) {
|
||||
distances_[i].key() = -distances_[i].key();
|
||||
}
|
||||
}
|
||||
// Now get the upper quartile distance.
|
||||
auto index = 3 * num_errors / 4;
|
||||
@ -235,8 +240,9 @@ int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
|
||||
int num_dists = distances_.size();
|
||||
// Get the absolute values of the errors.
|
||||
for (int i = 0; i < num_dists; ++i) {
|
||||
if (distances_[i].key() > threshold)
|
||||
if (distances_[i].key() > threshold) {
|
||||
++num_misfits;
|
||||
}
|
||||
}
|
||||
return num_misfits;
|
||||
}
|
||||
@ -265,8 +271,9 @@ void DetLineFit::ComputeDistances(const ICOORD &start, const ICOORD &end) {
|
||||
// Ignore this point if it overlaps the previous one.
|
||||
int separation = abs(dot - prev_dot);
|
||||
if (separation < line_length * pts_[i].halfwidth ||
|
||||
separation < line_length * pts_[i - 1].halfwidth)
|
||||
separation < line_length * pts_[i - 1].halfwidth) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
distances_.emplace_back(dist, pts_[i].pt);
|
||||
prev_abs_dist = abs_dist;
|
||||
@ -286,8 +293,9 @@ void DetLineFit::ComputeConstrainedDistances(const FCOORD &direction, double min
|
||||
FCOORD pt_vector = pt.pt;
|
||||
// Compute |line_vector||pt_vector|sin(angle between)
|
||||
double dist = direction * pt_vector;
|
||||
if (min_dist <= dist && dist <= max_dist)
|
||||
if (min_dist <= dist && dist <= max_dist) {
|
||||
distances_.emplace_back(dist, pt.pt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -30,19 +30,22 @@ namespace tesseract {
|
||||
// The return value is the tail of the best path.
|
||||
DPPoint *DPPoint::Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size,
|
||||
DPPoint *points) {
|
||||
if (size <= 0 || max_step < min_step || min_step >= size)
|
||||
return nullptr; // Degenerate, but not necessarily an error.
|
||||
if (size <= 0 || max_step < min_step || min_step >= size) {
|
||||
return nullptr; // Degenerate, but not necessarily an error.
|
||||
}
|
||||
ASSERT_HOST(min_step > 0); // Infinite loop possible if this is not true.
|
||||
if (debug)
|
||||
if (debug) {
|
||||
tprintf("min = %d, max=%d\n", min_step, max_step);
|
||||
}
|
||||
// Evaluate the total cost at each point.
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (int offset = min_step; offset <= max_step; ++offset) {
|
||||
DPPoint *prev = offset <= i ? points + i - offset : nullptr;
|
||||
int64_t new_cost = (points[i].*cost_func)(prev);
|
||||
if (points[i].best_prev_ != nullptr && offset > min_step * 2 &&
|
||||
new_cost > points[i].total_cost_)
|
||||
new_cost > points[i].total_cost_) {
|
||||
break; // Find only the first minimum if going over twice the min.
|
||||
}
|
||||
}
|
||||
points[i].total_cost_ += points[i].local_cost_;
|
||||
if (debug) {
|
||||
|
@ -24,19 +24,23 @@ namespace tesseract {
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool FontInfo::Serialize(FILE *fp) const {
|
||||
if (!write_info(fp, *this))
|
||||
if (!write_info(fp, *this)) {
|
||||
return false;
|
||||
if (!write_spacing_info(fp, *this))
|
||||
}
|
||||
if (!write_spacing_info(fp, *this)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool FontInfo::DeSerialize(TFile *fp) {
|
||||
if (!read_info(fp, this))
|
||||
if (!read_info(fp, this)) {
|
||||
return false;
|
||||
if (!read_spacing_info(fp, this))
|
||||
}
|
||||
if (!read_spacing_info(fp, this)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -64,8 +68,9 @@ bool FontInfoTable::SetContainsFontProperties(int font_id,
|
||||
const std::vector<ScoredFont> &font_set) const {
|
||||
uint32_t properties = at(font_id).properties;
|
||||
for (auto f : font_set) {
|
||||
if (at(f.fontinfo_id).properties == properties)
|
||||
if (at(f.fontinfo_id).properties == properties) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -73,13 +78,15 @@ bool FontInfoTable::SetContainsFontProperties(int font_id,
|
||||
// Returns true if the given set of fonts includes multiple properties.
|
||||
bool FontInfoTable::SetContainsMultipleFontProperties(
|
||||
const std::vector<ScoredFont> &font_set) const {
|
||||
if (font_set.empty())
|
||||
if (font_set.empty()) {
|
||||
return false;
|
||||
}
|
||||
int first_font = font_set[0].fontinfo_id;
|
||||
uint32_t properties = at(first_font).properties;
|
||||
for (int f = 1; f < font_set.size(); ++f) {
|
||||
if (at(font_set[f].fontinfo_id).properties != properties)
|
||||
if (at(font_set[f].fontinfo_id).properties != properties) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -138,12 +145,14 @@ void FontSetDeleteCallback(FontSet fs) {
|
||||
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
|
||||
bool read_info(TFile *f, FontInfo *fi) {
|
||||
uint32_t size;
|
||||
if (!f->DeSerialize(&size))
|
||||
if (!f->DeSerialize(&size)) {
|
||||
return false;
|
||||
}
|
||||
char *font_name = new char[size + 1];
|
||||
fi->name = font_name;
|
||||
if (!f->DeSerialize(font_name, size))
|
||||
if (!f->DeSerialize(font_name, size)) {
|
||||
return false;
|
||||
}
|
||||
font_name[size] = '\0';
|
||||
return f->DeSerialize(&fi->properties);
|
||||
}
|
||||
@ -156,11 +165,13 @@ bool write_info(FILE *f, const FontInfo &fi) {
|
||||
|
||||
bool read_spacing_info(TFile *f, FontInfo *fi) {
|
||||
int32_t vec_size, kern_size;
|
||||
if (!f->DeSerialize(&vec_size))
|
||||
if (!f->DeSerialize(&vec_size)) {
|
||||
return false;
|
||||
}
|
||||
ASSERT_HOST(vec_size >= 0);
|
||||
if (vec_size == 0)
|
||||
if (vec_size == 0) {
|
||||
return true;
|
||||
}
|
||||
fi->init_spacing(vec_size);
|
||||
for (int i = 0; i < vec_size; ++i) {
|
||||
auto *fs = new FontSpacingInfo();
|
||||
@ -185,8 +196,9 @@ bool read_spacing_info(TFile *f, FontInfo *fi) {
|
||||
|
||||
bool write_spacing_info(FILE *f, const FontInfo &fi) {
|
||||
int32_t vec_size = (fi.spacing_vec == nullptr) ? 0 : fi.spacing_vec->size();
|
||||
if (!tesseract::Serialize(f, &vec_size))
|
||||
if (!tesseract::Serialize(f, &vec_size)) {
|
||||
return false;
|
||||
}
|
||||
int16_t x_gap_invalid = -1;
|
||||
for (int i = 0; i < vec_size; ++i) {
|
||||
FontSpacingInfo *fs = fi.spacing_vec->at(i);
|
||||
@ -211,8 +223,9 @@ bool write_spacing_info(FILE *f, const FontInfo &fi) {
|
||||
}
|
||||
|
||||
bool read_set(TFile *f, FontSet *fs) {
|
||||
if (!f->DeSerialize(&fs->size))
|
||||
if (!f->DeSerialize(&fs->size)) {
|
||||
return false;
|
||||
}
|
||||
fs->configs = new int[fs->size];
|
||||
return f->DeSerialize(&fs->configs[0], fs->size);
|
||||
}
|
||||
|
@ -98,12 +98,14 @@ struct FontInfo {
|
||||
bool get_spacing(UNICHAR_ID prev_uch_id, UNICHAR_ID uch_id, int *spacing) const {
|
||||
const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id);
|
||||
const FontSpacingInfo *fsi = this->get_spacing(uch_id);
|
||||
if (prev_fsi == nullptr || fsi == nullptr)
|
||||
if (prev_fsi == nullptr || fsi == nullptr) {
|
||||
return false;
|
||||
}
|
||||
size_t i = 0;
|
||||
for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) {
|
||||
if (prev_fsi->kerned_unichar_ids[i] == uch_id)
|
||||
if (prev_fsi->kerned_unichar_ids[i] == uch_id) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i < prev_fsi->kerned_unichar_ids.size()) {
|
||||
*spacing = prev_fsi->kerned_x_gaps[i];
|
||||
@ -154,11 +156,13 @@ struct FontSet {
|
||||
int *configs; // FontInfo ids
|
||||
|
||||
bool operator==(const FontSet &rhs) const {
|
||||
if (size != rhs.size)
|
||||
if (size != rhs.size) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (configs[i] != rhs.configs[i])
|
||||
if (configs[i] != rhs.configs[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -86,65 +86,87 @@ ImageData *ImageData::Build(const char *name, int page_number, const char *lang,
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool ImageData::Serialize(TFile *fp) const {
|
||||
if (!fp->Serialize(imagefilename_))
|
||||
if (!fp->Serialize(imagefilename_)) {
|
||||
return false;
|
||||
if (!fp->Serialize(&page_number_))
|
||||
}
|
||||
if (!fp->Serialize(&page_number_)) {
|
||||
return false;
|
||||
if (!fp->Serialize(image_data_))
|
||||
}
|
||||
if (!fp->Serialize(image_data_)) {
|
||||
return false;
|
||||
if (!fp->Serialize(language_))
|
||||
}
|
||||
if (!fp->Serialize(language_)) {
|
||||
return false;
|
||||
if (!fp->Serialize(transcription_))
|
||||
}
|
||||
if (!fp->Serialize(transcription_)) {
|
||||
return false;
|
||||
if (!fp->Serialize(boxes_))
|
||||
}
|
||||
if (!fp->Serialize(boxes_)) {
|
||||
return false;
|
||||
if (!fp->Serialize(box_texts_))
|
||||
}
|
||||
if (!fp->Serialize(box_texts_)) {
|
||||
return false;
|
||||
}
|
||||
int8_t vertical = vertical_text_;
|
||||
return fp->Serialize(&vertical);
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
bool ImageData::DeSerialize(TFile *fp) {
|
||||
if (!fp->DeSerialize(imagefilename_))
|
||||
if (!fp->DeSerialize(imagefilename_)) {
|
||||
return false;
|
||||
if (!fp->DeSerialize(&page_number_))
|
||||
}
|
||||
if (!fp->DeSerialize(&page_number_)) {
|
||||
return false;
|
||||
if (!fp->DeSerialize(image_data_))
|
||||
}
|
||||
if (!fp->DeSerialize(image_data_)) {
|
||||
return false;
|
||||
if (!fp->DeSerialize(language_))
|
||||
}
|
||||
if (!fp->DeSerialize(language_)) {
|
||||
return false;
|
||||
if (!fp->DeSerialize(transcription_))
|
||||
}
|
||||
if (!fp->DeSerialize(transcription_)) {
|
||||
return false;
|
||||
if (!fp->DeSerialize(boxes_))
|
||||
}
|
||||
if (!fp->DeSerialize(boxes_)) {
|
||||
return false;
|
||||
if (!fp->DeSerialize(box_texts_))
|
||||
}
|
||||
if (!fp->DeSerialize(box_texts_)) {
|
||||
return false;
|
||||
}
|
||||
int8_t vertical = 0;
|
||||
if (!fp->DeSerialize(&vertical))
|
||||
if (!fp->DeSerialize(&vertical)) {
|
||||
return false;
|
||||
}
|
||||
vertical_text_ = vertical != 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// As DeSerialize, but only seeks past the data - hence a static method.
|
||||
bool ImageData::SkipDeSerialize(TFile *fp) {
|
||||
if (!fp->DeSerializeSkip())
|
||||
if (!fp->DeSerializeSkip()) {
|
||||
return false;
|
||||
}
|
||||
int32_t page_number;
|
||||
if (!fp->DeSerialize(&page_number))
|
||||
if (!fp->DeSerialize(&page_number)) {
|
||||
return false;
|
||||
if (!fp->DeSerializeSkip())
|
||||
}
|
||||
if (!fp->DeSerializeSkip()) {
|
||||
return false;
|
||||
if (!fp->DeSerializeSkip())
|
||||
}
|
||||
if (!fp->DeSerializeSkip()) {
|
||||
return false;
|
||||
if (!fp->DeSerializeSkip())
|
||||
}
|
||||
if (!fp->DeSerializeSkip()) {
|
||||
return false;
|
||||
if (!fp->DeSerializeSkip(sizeof(TBOX)))
|
||||
}
|
||||
if (!fp->DeSerializeSkip(sizeof(TBOX))) {
|
||||
return false;
|
||||
}
|
||||
int32_t number;
|
||||
if (!fp->DeSerialize(&number))
|
||||
if (!fp->DeSerialize(&number)) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < number; i++) {
|
||||
if (!fp->DeSerializeSkip()) {
|
||||
return false;
|
||||
@ -200,10 +222,12 @@ Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor,
|
||||
target_height = std::min(input_height, max_height);
|
||||
}
|
||||
float im_factor = static_cast<float>(target_height) / input_height;
|
||||
if (scaled_width != nullptr)
|
||||
if (scaled_width != nullptr) {
|
||||
*scaled_width = IntCastRounded(im_factor * input_width);
|
||||
if (scaled_height != nullptr)
|
||||
}
|
||||
if (scaled_height != nullptr) {
|
||||
*scaled_height = target_height;
|
||||
}
|
||||
// Get the scaled image.
|
||||
Pix *pix = pixScale(src_pix, im_factor, im_factor);
|
||||
if (pix == nullptr) {
|
||||
@ -212,10 +236,12 @@ Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor,
|
||||
pixDestroy(&src_pix);
|
||||
return nullptr;
|
||||
}
|
||||
if (scaled_width != nullptr)
|
||||
if (scaled_width != nullptr) {
|
||||
*scaled_width = pixGetWidth(pix);
|
||||
if (scaled_height != nullptr)
|
||||
}
|
||||
if (scaled_height != nullptr) {
|
||||
*scaled_height = pixGetHeight(pix);
|
||||
}
|
||||
pixDestroy(&src_pix);
|
||||
if (boxes != nullptr) {
|
||||
// Get the boxes.
|
||||
@ -230,8 +256,9 @@ Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor,
|
||||
boxes->push_back(box);
|
||||
}
|
||||
}
|
||||
if (scale_factor != nullptr)
|
||||
if (scale_factor != nullptr) {
|
||||
*scale_factor = im_factor;
|
||||
}
|
||||
return pix;
|
||||
}
|
||||
|
||||
@ -246,8 +273,9 @@ void ImageData::Display() const {
|
||||
const int kTextSize = 64;
|
||||
// Draw the image.
|
||||
Pix *pix = GetPix();
|
||||
if (pix == nullptr)
|
||||
if (pix == nullptr) {
|
||||
return;
|
||||
}
|
||||
int width = pixGetWidth(pix);
|
||||
int height = pixGetHeight(pix);
|
||||
auto *win =
|
||||
@ -259,8 +287,9 @@ void ImageData::Display() const {
|
||||
win->Pen(ScrollView::RED);
|
||||
win->Brush(ScrollView::NONE);
|
||||
int text_size = kTextSize;
|
||||
if (!boxes_.empty() && boxes_[0].height() * 2 < text_size)
|
||||
if (!boxes_.empty() && boxes_[0].height() * 2 < text_size) {
|
||||
text_size = boxes_[0].height() * 2;
|
||||
}
|
||||
win->TextAttributes("Arial", text_size, false, false, false);
|
||||
if (!boxes_.empty()) {
|
||||
for (int b = 0; b < boxes_.size(); ++b) {
|
||||
@ -284,8 +313,9 @@ void ImageData::AddBoxes(const std::vector<TBOX> &boxes, const std::vector<std::
|
||||
const std::vector<int> &box_pages) {
|
||||
// Copy the boxes and make the transcription.
|
||||
for (int i = 0; i < box_pages.size(); ++i) {
|
||||
if (page_number_ >= 0 && box_pages[i] != page_number_)
|
||||
if (page_number_ >= 0 && box_pages[i] != page_number_) {
|
||||
continue;
|
||||
}
|
||||
transcription_ += texts[i];
|
||||
boxes_.push_back(boxes[i]);
|
||||
box_texts_.push_back(texts[i]);
|
||||
@ -402,11 +432,13 @@ void DocumentData::AddPageToDocument(ImageData *page) {
|
||||
// thread.
|
||||
void DocumentData::LoadPageInBackground(int index) {
|
||||
ImageData *page = nullptr;
|
||||
if (IsPageAvailable(index, &page))
|
||||
if (IsPageAvailable(index, &page)) {
|
||||
return;
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(pages_mutex_);
|
||||
if (pages_offset_ == index)
|
||||
if (pages_offset_ == index) {
|
||||
return;
|
||||
}
|
||||
pages_offset_ = index;
|
||||
for (auto page : pages_) {
|
||||
delete page;
|
||||
@ -427,8 +459,9 @@ const ImageData *DocumentData::GetPage(int index) {
|
||||
pages_mutex_.lock();
|
||||
bool needs_loading = pages_offset_ != index;
|
||||
pages_mutex_.unlock();
|
||||
if (needs_loading)
|
||||
if (needs_loading) {
|
||||
LoadPageInBackground(index);
|
||||
}
|
||||
// We can't directly load the page, or the background load will delete it
|
||||
// while the caller is using it, so give it a chance to work.
|
||||
std::this_thread::yield();
|
||||
@ -569,8 +602,9 @@ bool DocumentCache::LoadDocuments(const std::vector<std::string> &filenames,
|
||||
// In the round-robin case, each DocumentData handles restricting its content
|
||||
// to its fair share of memory. In the sequential case, DocumentCache
|
||||
// determines which DocumentDatas are held entirely in memory.
|
||||
if (cache_strategy_ == CS_ROUND_ROBIN)
|
||||
if (cache_strategy_ == CS_ROUND_ROBIN) {
|
||||
fair_share_memory = max_memory_ / filenames.size();
|
||||
}
|
||||
for (auto filename : filenames) {
|
||||
auto *document = new DocumentData(filename);
|
||||
document->SetDocument(filename.c_str(), fair_share_memory, reader);
|
||||
@ -578,8 +612,9 @@ bool DocumentCache::LoadDocuments(const std::vector<std::string> &filenames,
|
||||
}
|
||||
if (!documents_.empty()) {
|
||||
// Try to get the first page now to verify the list of filenames.
|
||||
if (GetPageBySerial(0) != nullptr)
|
||||
if (GetPageBySerial(0) != nullptr) {
|
||||
return true;
|
||||
}
|
||||
tprintf("Load of page 0 failed!\n");
|
||||
}
|
||||
return false;
|
||||
@ -607,8 +642,9 @@ int DocumentCache::TotalPages() {
|
||||
if (cache_strategy_ == CS_SEQUENTIAL) {
|
||||
// In sequential mode, we assume each doc has the same number of pages
|
||||
// whether it is true or not.
|
||||
if (num_pages_per_doc_ == 0)
|
||||
if (num_pages_per_doc_ == 0) {
|
||||
GetPageSequential(0);
|
||||
}
|
||||
return num_pages_per_doc_ * documents_.size();
|
||||
}
|
||||
int total_pages = 0;
|
||||
@ -650,8 +686,9 @@ const ImageData *DocumentCache::GetPageSequential(int serial) {
|
||||
ASSERT_HOST(num_pages_per_doc_ > 0);
|
||||
}
|
||||
// Get rid of zero now if we don't need it.
|
||||
if (serial / num_pages_per_doc_ % num_docs > 0)
|
||||
if (serial / num_pages_per_doc_ % num_docs > 0) {
|
||||
documents_[0]->UnCache();
|
||||
}
|
||||
}
|
||||
int doc_index = serial / num_pages_per_doc_ % num_docs;
|
||||
const ImageData *doc = documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
|
||||
@ -694,8 +731,9 @@ int DocumentCache::CountNeighbourDocs(int index, int dir) {
|
||||
int num_docs = documents_.size();
|
||||
for (int offset = dir; abs(offset) < num_docs; offset += dir) {
|
||||
int offset_index = (index + offset + num_docs) % num_docs;
|
||||
if (!documents_[offset_index]->IsCached())
|
||||
if (!documents_[offset_index]->IsCached()) {
|
||||
return offset - dir;
|
||||
}
|
||||
}
|
||||
return num_docs;
|
||||
}
|
||||
|
@ -312,10 +312,11 @@ public:
|
||||
// Returns a page by serial number using the current cache_strategy_ to
|
||||
// determine the mapping from serial number to page.
|
||||
const ImageData *GetPageBySerial(int serial) {
|
||||
if (cache_strategy_ == CS_SEQUENTIAL)
|
||||
if (cache_strategy_ == CS_SEQUENTIAL) {
|
||||
return GetPageSequential(serial);
|
||||
else
|
||||
} else {
|
||||
return GetPageRoundRobin(serial);
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<DocumentData *> &documents() const {
|
||||
|
@ -80,8 +80,9 @@ void LLSQ::add(const LLSQ &other) {
|
||||
**********************************************************************/
|
||||
|
||||
void LLSQ::remove(double x, double y) { // delete an element
|
||||
if (total_weight <= 0.0) // illegal
|
||||
if (total_weight <= 0.0) { // illegal
|
||||
EMPTY_LLSQ.error("LLSQ::remove", ABORT, nullptr);
|
||||
}
|
||||
total_weight--; // count elements
|
||||
sigx -= x; // update accumulators
|
||||
sigy -= y;
|
||||
@ -99,10 +100,11 @@ void LLSQ::remove(double x, double y) { // delete an element
|
||||
double LLSQ::m() const { // get gradient
|
||||
double covar = covariance();
|
||||
double x_var = x_variance();
|
||||
if (x_var != 0.0)
|
||||
if (x_var != 0.0) {
|
||||
return covar / x_var;
|
||||
else
|
||||
} else {
|
||||
return 0.0; // too little
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
@ -112,10 +114,11 @@ double LLSQ::m() const { // get gradient
|
||||
**********************************************************************/
|
||||
|
||||
double LLSQ::c(double m) const { // get constant
|
||||
if (total_weight > 0.0)
|
||||
if (total_weight > 0.0) {
|
||||
return (sigy - m * sigx) / total_weight;
|
||||
else
|
||||
} else {
|
||||
return 0; // too little
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
@ -129,10 +132,11 @@ double LLSQ::rms(double m, double c) const { // get error
|
||||
|
||||
if (total_weight > 0) {
|
||||
error = sigyy + m * (m * sigxx + 2 * (c * sigx - sigxy)) + c * (total_weight * c - 2 * sigy);
|
||||
if (error >= 0)
|
||||
if (error >= 0) {
|
||||
error = std::sqrt(error / total_weight); // sqrt of mean
|
||||
else
|
||||
} else {
|
||||
error = 0;
|
||||
}
|
||||
} else {
|
||||
error = 0; // too little
|
||||
}
|
||||
@ -151,8 +155,9 @@ double LLSQ::pearson() const { // get correlation
|
||||
double covar = covariance();
|
||||
if (covar != 0.0) {
|
||||
double var_product = x_variance() * y_variance();
|
||||
if (var_product > 0.0)
|
||||
if (var_product > 0.0) {
|
||||
r = covar / std::sqrt(var_product);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
@ -74,22 +74,25 @@ public:
|
||||
|
||||
// Returns the covariance.
|
||||
double covariance() const {
|
||||
if (total_weight > 0.0)
|
||||
if (total_weight > 0.0) {
|
||||
return (sigxy - sigx * sigy / total_weight) / total_weight;
|
||||
else
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
double x_variance() const {
|
||||
if (total_weight > 0.0)
|
||||
if (total_weight > 0.0) {
|
||||
return (sigxx - sigx * sigx / total_weight) / total_weight;
|
||||
else
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
double y_variance() const {
|
||||
if (total_weight > 0.0)
|
||||
if (total_weight > 0.0) {
|
||||
return (sigyy - sigy * sigy / total_weight) / total_weight;
|
||||
else
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -34,13 +34,15 @@ MATRIX::~MATRIX() = default;
|
||||
|
||||
// Returns true if there are any real classification results.
|
||||
bool MATRIX::Classified(int col, int row, int wildcard_id) const {
|
||||
if (get(col, row) == NOT_CLASSIFIED)
|
||||
if (get(col, row) == NOT_CLASSIFIED) {
|
||||
return false;
|
||||
}
|
||||
BLOB_CHOICE_IT b_it(get(col, row));
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
BLOB_CHOICE *choice = b_it.data();
|
||||
if (choice->IsClassified())
|
||||
if (choice->IsClassified()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -118,8 +120,9 @@ void MATRIX::print(const UNICHARSET &unicharset) const {
|
||||
for (col = 0; col < dim; ++col) {
|
||||
for (row = col; row < dim && row < col + band_width; ++row) {
|
||||
BLOB_CHOICE_LIST *rating = this->get(col, row);
|
||||
if (rating == NOT_CLASSIFIED)
|
||||
if (rating == NOT_CLASSIFIED) {
|
||||
continue;
|
||||
}
|
||||
BLOB_CHOICE_IT b_it(rating);
|
||||
tprintf("col=%d row=%d ", col, row);
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
@ -131,13 +134,15 @@ void MATRIX::print(const UNICHARSET &unicharset) const {
|
||||
tprintf("\n");
|
||||
}
|
||||
tprintf("\n");
|
||||
for (col = 0; col < dim; ++col)
|
||||
for (col = 0; col < dim; ++col) {
|
||||
tprintf("\t%d", col);
|
||||
}
|
||||
tprintf("\n");
|
||||
for (row = 0; row < dim; ++row) {
|
||||
for (col = 0; col <= row; ++col) {
|
||||
if (col == 0)
|
||||
if (col == 0) {
|
||||
tprintf("%d\t", row);
|
||||
}
|
||||
if (row >= col + band_width) {
|
||||
tprintf(" \t");
|
||||
continue;
|
||||
@ -149,8 +154,9 @@ void MATRIX::print(const UNICHARSET &unicharset) const {
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
tprintf("%s ", unicharset.id_to_unichar(b_it.data()->unichar_id()));
|
||||
++counter;
|
||||
if (counter == 3)
|
||||
if (counter == 3) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tprintf("\t");
|
||||
} else {
|
||||
|
@ -64,8 +64,9 @@ public:
|
||||
int new_size = dim1 * dim2;
|
||||
array_ = new T[new_size];
|
||||
size_allocated_ = new_size;
|
||||
for (int i = 0; i < size_allocated_; ++i)
|
||||
for (int i = 0; i < size_allocated_; ++i) {
|
||||
array_[i] = empty_;
|
||||
}
|
||||
}
|
||||
// Default constructor for array allocation. Use Resize to set the size.
|
||||
GENERIC_2D_ARRAY()
|
||||
@ -100,8 +101,9 @@ public:
|
||||
dim1_ = size1;
|
||||
dim2_ = size2;
|
||||
// Fill the padding data so it isn't uninitialized.
|
||||
for (int i = size1 * size2; i < new_size; ++i)
|
||||
for (int i = size1 * size2; i < new_size; ++i) {
|
||||
array_[i] = empty_;
|
||||
}
|
||||
}
|
||||
|
||||
// Reallocate the array to the given size. Does not keep old data.
|
||||
@ -138,26 +140,31 @@ public:
|
||||
// Sets all the elements of the array to the empty value.
|
||||
void Clear() {
|
||||
int total_size = num_elements();
|
||||
for (int i = 0; i < total_size; ++i)
|
||||
for (int i = 0; i < total_size; ++i) {
|
||||
array_[i] = empty_;
|
||||
}
|
||||
}
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
// Only works with bitwise-serializeable types!
|
||||
bool Serialize(FILE *fp) const {
|
||||
if (!SerializeSize(fp))
|
||||
if (!SerializeSize(fp)) {
|
||||
return false;
|
||||
if (!tesseract::Serialize(fp, &empty_))
|
||||
}
|
||||
if (!tesseract::Serialize(fp, &empty_)) {
|
||||
return false;
|
||||
}
|
||||
int size = num_elements();
|
||||
return tesseract::Serialize(fp, &array_[0], size);
|
||||
}
|
||||
|
||||
bool Serialize(TFile *fp) const {
|
||||
if (!SerializeSize(fp))
|
||||
if (!SerializeSize(fp)) {
|
||||
return false;
|
||||
if (!fp->Serialize(&empty_))
|
||||
}
|
||||
if (!fp->Serialize(&empty_)) {
|
||||
return false;
|
||||
}
|
||||
int size = num_elements();
|
||||
return fp->Serialize(&array_[0], size);
|
||||
}
|
||||
@ -166,18 +173,23 @@ public:
|
||||
// Only works with bitwise-serializeable types!
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, FILE *fp) {
|
||||
if (!DeSerializeSize(swap, fp))
|
||||
if (!DeSerializeSize(swap, fp)) {
|
||||
return false;
|
||||
if (!tesseract::DeSerialize(fp, &empty_))
|
||||
return false;
|
||||
if (swap)
|
||||
ReverseN(&empty_, sizeof(empty_));
|
||||
int size = num_elements();
|
||||
if (!tesseract::DeSerialize(fp, &array_[0], size))
|
||||
}
|
||||
if (!tesseract::DeSerialize(fp, &empty_)) {
|
||||
return false;
|
||||
}
|
||||
if (swap) {
|
||||
for (int i = 0; i < size; ++i)
|
||||
ReverseN(&empty_, sizeof(empty_));
|
||||
}
|
||||
int size = num_elements();
|
||||
if (!tesseract::DeSerialize(fp, &array_[0], size)) {
|
||||
return false;
|
||||
}
|
||||
if (swap) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
ReverseN(&array_[i], sizeof(array_[i]));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -190,14 +202,17 @@ public:
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
// Assumes a T::Serialize(FILE*) const function.
|
||||
bool SerializeClasses(FILE *fp) const {
|
||||
if (!SerializeSize(fp))
|
||||
if (!SerializeSize(fp)) {
|
||||
return false;
|
||||
if (!empty_.Serialize(fp))
|
||||
}
|
||||
if (!empty_.Serialize(fp)) {
|
||||
return false;
|
||||
}
|
||||
int size = num_elements();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (!array_[i].Serialize(fp))
|
||||
if (!array_[i].Serialize(fp)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -206,14 +221,17 @@ public:
|
||||
// Assumes a T::DeSerialize(bool swap, FILE*) function.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerializeClasses(bool swap, FILE *fp) {
|
||||
if (!DeSerializeSize(swap, fp))
|
||||
if (!DeSerializeSize(swap, fp)) {
|
||||
return false;
|
||||
if (!empty_.DeSerialize(swap, fp))
|
||||
}
|
||||
if (!empty_.DeSerialize(swap, fp)) {
|
||||
return false;
|
||||
}
|
||||
int size = num_elements();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (!array_[i].DeSerialize(swap, fp))
|
||||
if (!array_[i].DeSerialize(swap, fp)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -328,16 +346,18 @@ public:
|
||||
int size = num_elements();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const T &value = array_[i];
|
||||
if (value < rangemin || rangemax < value)
|
||||
if (value < rangemin || rangemax < value) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Normalize the whole array.
|
||||
double Normalize() {
|
||||
int size = num_elements();
|
||||
if (size <= 0)
|
||||
if (size <= 0) {
|
||||
return 0.0;
|
||||
}
|
||||
// Compute the mean.
|
||||
double mean = 0.0;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
@ -364,14 +384,16 @@ public:
|
||||
// Returns the maximum value of the array.
|
||||
T Max() const {
|
||||
int size = num_elements();
|
||||
if (size <= 0)
|
||||
if (size <= 0) {
|
||||
return empty_;
|
||||
}
|
||||
// Compute the max.
|
||||
T max_value = array_[0];
|
||||
for (int i = 1; i < size; ++i) {
|
||||
const T &value = array_[i];
|
||||
if (value > max_value)
|
||||
if (value > max_value) {
|
||||
max_value = value;
|
||||
}
|
||||
}
|
||||
return max_value;
|
||||
}
|
||||
@ -379,14 +401,16 @@ public:
|
||||
// Returns the maximum absolute value of the array.
|
||||
T MaxAbs() const {
|
||||
int size = num_elements();
|
||||
if (size <= 0)
|
||||
if (size <= 0) {
|
||||
return empty_;
|
||||
}
|
||||
// Compute the max.
|
||||
T max_abs = static_cast<T>(0);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
T value = static_cast<T>(fabs(array_[i]));
|
||||
if (value > max_abs)
|
||||
if (value > max_abs) {
|
||||
max_abs = value;
|
||||
}
|
||||
}
|
||||
return max_abs;
|
||||
}
|
||||
@ -454,19 +478,24 @@ public:
|
||||
// src_step represents the stride in the src between each adjacent group
|
||||
// in the destination.
|
||||
int num_replicas = 1, move_size = 1, src_step = 1;
|
||||
for (int d = 0; d < min_d; ++d)
|
||||
for (int d = 0; d < min_d; ++d) {
|
||||
num_replicas *= dims[d];
|
||||
for (int d = max_d + 1; d < num_dims; ++d)
|
||||
}
|
||||
for (int d = max_d + 1; d < num_dims; ++d) {
|
||||
move_size *= dims[d];
|
||||
for (int d = src_dim + 1; d < num_dims; ++d)
|
||||
}
|
||||
for (int d = src_dim + 1; d < num_dims; ++d) {
|
||||
src_step *= dims[d];
|
||||
if (src_dim > dest_dim)
|
||||
}
|
||||
if (src_dim > dest_dim) {
|
||||
src_step *= dims[src_dim];
|
||||
}
|
||||
// wrap_size is the size of a single replica, being the amount that is
|
||||
// handled num_replicas times.
|
||||
int wrap_size = move_size;
|
||||
for (int d = min_d; d <= max_d; ++d)
|
||||
for (int d = min_d; d <= max_d; ++d) {
|
||||
wrap_size *= dims[d];
|
||||
}
|
||||
result->ResizeNoInit(dim1_, dim2_);
|
||||
result->empty_ = empty_;
|
||||
const T *src = array_;
|
||||
@ -487,8 +516,9 @@ public:
|
||||
int size = num_elements();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
T matrix_cell = array_[i];
|
||||
if (matrix_cell != empty_)
|
||||
if (matrix_cell != empty_) {
|
||||
delete matrix_cell;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -496,15 +526,17 @@ protected:
|
||||
// Factored helper to serialize the size.
|
||||
bool SerializeSize(FILE *fp) const {
|
||||
uint32_t size = dim1_;
|
||||
if (!tesseract::Serialize(fp, &size))
|
||||
if (!tesseract::Serialize(fp, &size)) {
|
||||
return false;
|
||||
}
|
||||
size = dim2_;
|
||||
return tesseract::Serialize(fp, &size);
|
||||
}
|
||||
bool SerializeSize(TFile *fp) const {
|
||||
uint32_t size = dim1_;
|
||||
if (!fp->Serialize(&size))
|
||||
if (!fp->Serialize(&size)) {
|
||||
return false;
|
||||
}
|
||||
size = dim2_;
|
||||
return fp->Serialize(&size);
|
||||
}
|
||||
@ -512,33 +544,41 @@ protected:
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerializeSize(bool swap, FILE *fp) {
|
||||
uint32_t size1, size2;
|
||||
if (!tesseract::DeSerialize(fp, &size1))
|
||||
if (!tesseract::DeSerialize(fp, &size1)) {
|
||||
return false;
|
||||
if (!tesseract::DeSerialize(fp, &size2))
|
||||
}
|
||||
if (!tesseract::DeSerialize(fp, &size2)) {
|
||||
return false;
|
||||
}
|
||||
if (swap) {
|
||||
ReverseN(&size1, sizeof(size1));
|
||||
ReverseN(&size2, sizeof(size2));
|
||||
}
|
||||
// Arbitrarily limit the number of elements to protect against bad data.
|
||||
if (size1 > UINT16_MAX)
|
||||
if (size1 > UINT16_MAX) {
|
||||
return false;
|
||||
if (size2 > UINT16_MAX)
|
||||
}
|
||||
if (size2 > UINT16_MAX) {
|
||||
return false;
|
||||
}
|
||||
Resize(size1, size2, empty_);
|
||||
return true;
|
||||
}
|
||||
bool DeSerializeSize(TFile *fp) {
|
||||
int32_t size1, size2;
|
||||
if (!fp->DeSerialize(&size1))
|
||||
if (!fp->DeSerialize(&size1)) {
|
||||
return false;
|
||||
if (!fp->DeSerialize(&size2))
|
||||
}
|
||||
if (!fp->DeSerialize(&size2)) {
|
||||
return false;
|
||||
}
|
||||
// Arbitrarily limit the number of elements to protect against bad data.
|
||||
if (size1 > UINT16_MAX)
|
||||
if (size1 > UINT16_MAX) {
|
||||
return false;
|
||||
if (size2 > UINT16_MAX)
|
||||
}
|
||||
if (size2 > UINT16_MAX) {
|
||||
return false;
|
||||
}
|
||||
Resize(size1, size2, empty_);
|
||||
return true;
|
||||
}
|
||||
@ -667,10 +707,12 @@ struct MATRIX_COORD {
|
||||
// making a new column at ind+1.
|
||||
void MapForSplit(int ind) {
|
||||
ASSERT_HOST(row >= col);
|
||||
if (col > ind)
|
||||
if (col > ind) {
|
||||
++col;
|
||||
if (row >= ind)
|
||||
}
|
||||
if (row >= ind) {
|
||||
++row;
|
||||
}
|
||||
ASSERT_HOST(row >= col);
|
||||
}
|
||||
|
||||
|
@ -55,19 +55,21 @@ DIR128::DIR128( // from fcoord
|
||||
|
||||
low = 0;
|
||||
if (fc.y() == 0) {
|
||||
if (fc.x() >= 0)
|
||||
if (fc.x() >= 0) {
|
||||
dir = 0;
|
||||
else
|
||||
} else {
|
||||
dir = MODULUS / 2;
|
||||
}
|
||||
return;
|
||||
}
|
||||
high = MODULUS;
|
||||
do {
|
||||
current = (high + low) / 2;
|
||||
if (dirtab[current] * fc >= 0)
|
||||
if (dirtab[current] * fc >= 0) {
|
||||
low = current;
|
||||
else
|
||||
} else {
|
||||
high = current;
|
||||
}
|
||||
} while (high - low > 1);
|
||||
dir = low;
|
||||
}
|
||||
|
@ -35,8 +35,9 @@ public:
|
||||
DIR128( // constructor
|
||||
int16_t value) { // value to assign
|
||||
value %= MODULUS; // modulo arithmetic
|
||||
if (value < 0)
|
||||
if (value < 0) {
|
||||
value += MODULUS; // done properly
|
||||
}
|
||||
dir = static_cast<int8_t>(value);
|
||||
}
|
||||
DIR128(const FCOORD fc); // quantize vector
|
||||
@ -44,8 +45,9 @@ public:
|
||||
DIR128 &operator=( // assign of int16_t
|
||||
int16_t value) { // value to assign
|
||||
value %= MODULUS; // modulo arithmetic
|
||||
if (value < 0)
|
||||
if (value < 0) {
|
||||
value += MODULUS; // done properly
|
||||
}
|
||||
dir = static_cast<int8_t>(value);
|
||||
return *this;
|
||||
}
|
||||
@ -55,10 +57,11 @@ public:
|
||||
// result
|
||||
int16_t result = dir - minus.dir;
|
||||
|
||||
if (result > MODULUS / 2)
|
||||
if (result > MODULUS / 2) {
|
||||
result -= MODULUS; // get in range
|
||||
else if (result < -MODULUS / 2)
|
||||
} else if (result < -MODULUS / 2) {
|
||||
result += MODULUS;
|
||||
}
|
||||
return static_cast<int8_t>(result);
|
||||
}
|
||||
DIR128 operator+( // addition
|
||||
|
@ -51,10 +51,11 @@ DENORM &DENORM::operator=(const DENORM &src) {
|
||||
predecessor_ = src.predecessor_;
|
||||
pix_ = src.pix_;
|
||||
block_ = src.block_;
|
||||
if (src.rotation_ == nullptr)
|
||||
if (src.rotation_ == nullptr) {
|
||||
rotation_ = nullptr;
|
||||
else
|
||||
} else {
|
||||
rotation_ = new FCOORD(*src.rotation_);
|
||||
}
|
||||
x_origin_ = src.x_origin_;
|
||||
y_origin_ = src.y_origin_;
|
||||
x_scale_ = src.x_scale_;
|
||||
@ -99,10 +100,11 @@ void DENORM::SetupNormalization(const BLOCK *block, const FCOORD *rotation,
|
||||
float final_yshift) {
|
||||
Clear();
|
||||
block_ = block;
|
||||
if (rotation == nullptr)
|
||||
if (rotation == nullptr) {
|
||||
rotation_ = nullptr;
|
||||
else
|
||||
} else {
|
||||
rotation_ = new FCOORD(*rotation);
|
||||
}
|
||||
predecessor_ = predecessor;
|
||||
x_origin_ = x_origin;
|
||||
y_origin_ = y_origin;
|
||||
@ -185,15 +187,17 @@ static void ComputeRunlengthImage(const TBOX &box,
|
||||
int x_edge = ClipToRange(x_coord, 0, width);
|
||||
int gap = x_edge - x;
|
||||
while (x < x_edge) {
|
||||
if (gap < (*minruns)(x, iy))
|
||||
if (gap < (*minruns)(x, iy)) {
|
||||
(*minruns)(x, iy) = gap;
|
||||
}
|
||||
++x;
|
||||
}
|
||||
}
|
||||
int gap = width - x;
|
||||
while (x < width) {
|
||||
if (gap < (*minruns)(x, iy))
|
||||
if (gap < (*minruns)(x, iy)) {
|
||||
(*minruns)(x, iy) = gap;
|
||||
}
|
||||
++x;
|
||||
}
|
||||
}
|
||||
@ -228,8 +232,9 @@ static void ComputeEdgeDensityProfiles(const TBOX &box, const GENERIC_2D_ARRAY<i
|
||||
for (int iy = 0; iy < height; ++iy) {
|
||||
for (int ix = 0; ix < width; ++ix) {
|
||||
int run = minruns(ix, iy);
|
||||
if (run == 0)
|
||||
if (run == 0) {
|
||||
run = 1;
|
||||
}
|
||||
float density = 1.0f / run;
|
||||
hx[ix] += density;
|
||||
hy[iy] += density;
|
||||
@ -315,8 +320,9 @@ void DENORM::LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const {
|
||||
} else {
|
||||
translated.set_x(translated.x() * x_scale_);
|
||||
translated.set_y(translated.y() * y_scale_);
|
||||
if (rotation_ != nullptr)
|
||||
if (rotation_ != nullptr) {
|
||||
translated.rotate(*rotation_);
|
||||
}
|
||||
}
|
||||
transformed->set_x(translated.x() + final_xshift_);
|
||||
transformed->set_y(translated.y() + final_yshift_);
|
||||
@ -411,10 +417,12 @@ void DENORM::DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD
|
||||
void DENORM::LocalNormBlob(TBLOB *blob) const {
|
||||
ICOORD translation(-IntCastRounded(x_origin_), -IntCastRounded(y_origin_));
|
||||
blob->Move(translation);
|
||||
if (y_scale_ != 1.0f)
|
||||
if (y_scale_ != 1.0f) {
|
||||
blob->Scale(y_scale_);
|
||||
if (rotation_ != nullptr)
|
||||
}
|
||||
if (rotation_ != nullptr) {
|
||||
blob->Rotate(*rotation_);
|
||||
}
|
||||
translation.set_x(IntCastRounded(final_xshift_));
|
||||
translation.set_y(IntCastRounded(final_yshift_));
|
||||
blob->Move(translation);
|
||||
@ -431,8 +439,9 @@ void DENORM::XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TB
|
||||
*min_xht = 0.0f;
|
||||
*max_xht = FLT_MAX;
|
||||
|
||||
if (!unicharset.top_bottom_useful())
|
||||
if (!unicharset.top_bottom_useful()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Clip the top and bottom to the limit of normalized feature space.
|
||||
int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1);
|
||||
@ -441,8 +450,9 @@ void DENORM::XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TB
|
||||
double tolerance = y_scale();
|
||||
// If the script doesn't have upper and lower-case characters, widen the
|
||||
// tolerance to allow sloppy baseline/x-height estimates.
|
||||
if (!unicharset.script_has_upper_lower())
|
||||
if (!unicharset.script_has_upper_lower()) {
|
||||
tolerance = y_scale() * kSloppyTolerance;
|
||||
}
|
||||
|
||||
int min_bottom, max_bottom, min_top, max_top;
|
||||
unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, &max_top);
|
||||
@ -479,8 +489,9 @@ void DENORM::XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TB
|
||||
// and to allow the large caps in small caps to accept the xheight of the
|
||||
// small caps, add kBlnBaselineOffset to chars with a maximum max, and have
|
||||
// a top already at a significantly high position.
|
||||
if (max_top == kBlnCellHeight - 1 && top > kBlnCellHeight - kBlnBaselineOffset / 2)
|
||||
if (max_top == kBlnCellHeight - 1 && top > kBlnCellHeight - kBlnBaselineOffset / 2) {
|
||||
max_top += kBlnBaselineOffset;
|
||||
}
|
||||
top -= bln_yshift;
|
||||
int height = top - kBlnBaselineOffset;
|
||||
double min_height = min_top - kBlnBaselineOffset - tolerance;
|
||||
@ -502,8 +513,9 @@ void DENORM::Print() const {
|
||||
tprintf("Pix dimensions %d x %d x %d\n", pixGetWidth(pix_), pixGetHeight(pix_),
|
||||
pixGetDepth(pix_));
|
||||
}
|
||||
if (inverse_)
|
||||
if (inverse_) {
|
||||
tprintf("Inverse\n");
|
||||
}
|
||||
if (block_ && block_->re_rotation().x() != 1.0f) {
|
||||
tprintf("Block rotation %g, %g\n", block_->re_rotation().x(), block_->re_rotation().y());
|
||||
}
|
||||
@ -520,8 +532,9 @@ void DENORM::Print() const {
|
||||
tprintf("\n");
|
||||
} else {
|
||||
tprintf("Scale = (%g, %g)\n", x_scale_, y_scale_);
|
||||
if (rotation_ != nullptr)
|
||||
if (rotation_ != nullptr) {
|
||||
tprintf("Rotation = (%g, %g)\n", rotation_->x(), rotation_->y());
|
||||
}
|
||||
}
|
||||
tprintf("Final Origin = (%g, %g)\n", final_xshift_, final_xshift_);
|
||||
if (predecessor_ != nullptr) {
|
||||
|
@ -245,8 +245,9 @@ public:
|
||||
inverse_ = value;
|
||||
}
|
||||
const DENORM *RootDenorm() const {
|
||||
if (predecessor_ != nullptr)
|
||||
if (predecessor_ != nullptr) {
|
||||
return predecessor_->RootDenorm();
|
||||
}
|
||||
return this;
|
||||
}
|
||||
const DENORM *predecessor() const {
|
||||
|
@ -200,13 +200,15 @@ void BLOCK::print( // print list of sides
|
||||
|
||||
if (dump) {
|
||||
tprintf("Left side coords are:\n");
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
tprintf("(%d,%d) ", it.data()->x(), it.data()->y());
|
||||
}
|
||||
tprintf("\n");
|
||||
tprintf("Right side coords are:\n");
|
||||
it.set_to_list(&pdblk.rightside);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
tprintf("(%d,%d) ", it.data()->x(), it.data()->y());
|
||||
}
|
||||
tprintf("\n");
|
||||
}
|
||||
}
|
||||
@ -226,8 +228,9 @@ BLOCK &BLOCK::operator=( // assignment
|
||||
kerning = source.kerning;
|
||||
spacing = source.spacing;
|
||||
filename = source.filename; // STRINGs assign ok
|
||||
if (!rows.empty())
|
||||
if (!rows.empty()) {
|
||||
rows.clear();
|
||||
}
|
||||
re_rotation_ = source.re_rotation_;
|
||||
classify_rotation_ = source.classify_rotation_;
|
||||
skew_ = source.skew_;
|
||||
@ -247,8 +250,9 @@ BLOCK &BLOCK::operator=( // assignment
|
||||
static bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) {
|
||||
bool found = false;
|
||||
*margin = 0;
|
||||
if (segments->empty())
|
||||
if (segments->empty()) {
|
||||
return found;
|
||||
}
|
||||
ICOORDELT_IT seg_it(segments);
|
||||
for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
|
||||
int cur_margin = x - seg_it.data()->x();
|
||||
@ -277,8 +281,9 @@ static bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) {
|
||||
static bool RightMargin(ICOORDELT_LIST *segments, int x, int *margin) {
|
||||
bool found = false;
|
||||
*margin = 0;
|
||||
if (segments->empty())
|
||||
if (segments->empty()) {
|
||||
return found;
|
||||
}
|
||||
ICOORDELT_IT seg_it(segments);
|
||||
for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
|
||||
int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x;
|
||||
@ -352,10 +357,12 @@ void BLOCK::compute_row_margins() {
|
||||
if (bbox.bottom() <= mid_second_line) {
|
||||
// we found a real drop cap
|
||||
first_row->set_has_drop_cap(true);
|
||||
if (drop_cap_bottom > bbox.bottom())
|
||||
if (drop_cap_bottom > bbox.bottom()) {
|
||||
drop_cap_bottom = bbox.bottom();
|
||||
if (drop_cap_right < bbox.right())
|
||||
}
|
||||
if (drop_cap_right < bbox.right()) {
|
||||
drop_cap_right = bbox.right();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -374,10 +381,12 @@ void BLOCK::compute_row_margins() {
|
||||
|
||||
if (row_box.top() >= drop_cap_bottom) {
|
||||
int drop_cap_distance = row_box.left() - row->space() - drop_cap_right;
|
||||
if (drop_cap_distance < 0)
|
||||
if (drop_cap_distance < 0) {
|
||||
drop_cap_distance = 0;
|
||||
if (drop_cap_distance < left_margin)
|
||||
}
|
||||
if (drop_cap_distance < left_margin) {
|
||||
left_margin = drop_cap_distance;
|
||||
}
|
||||
}
|
||||
|
||||
int right_y = row->base_line(row_box.right()) + row->x_height();
|
||||
@ -469,8 +478,9 @@ void RefreshWordBlobsFromNewBlobs(BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs
|
||||
BLOCK_IT block_it(block_list);
|
||||
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
|
||||
BLOCK *block = block_it.data();
|
||||
if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText())
|
||||
if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
|
||||
continue; // Don't touch non-text blocks.
|
||||
}
|
||||
// Iterate over all rows in the block.
|
||||
ROW_IT row_it(block->row_list());
|
||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
||||
|
@ -73,10 +73,12 @@ bool ParagraphModel::ValidBodyLine(int lmargin, int lindent, int rindent, int rm
|
||||
}
|
||||
|
||||
bool ParagraphModel::Comparable(const ParagraphModel &other) const {
|
||||
if (justification_ != other.justification_)
|
||||
if (justification_ != other.justification_) {
|
||||
return false;
|
||||
if (justification_ == JUSTIFICATION_CENTER || justification_ == JUSTIFICATION_UNKNOWN)
|
||||
}
|
||||
if (justification_ == JUSTIFICATION_CENTER || justification_ == JUSTIFICATION_UNKNOWN) {
|
||||
return true;
|
||||
}
|
||||
int tolerance = (tolerance_ + other.tolerance_) / 4;
|
||||
return NearlyEqual(margin_ + first_indent_, other.margin_ + other.first_indent_, tolerance) &&
|
||||
NearlyEqual(margin_ + body_indent_, other.margin_ + other.body_indent_, tolerance);
|
||||
|
@ -126,8 +126,9 @@ public:
|
||||
, tolerance_(tolerance) {
|
||||
// Make one of {first_indent, body_indent} is 0.
|
||||
int added_margin = first_indent;
|
||||
if (body_indent < added_margin)
|
||||
if (body_indent < added_margin) {
|
||||
added_margin = body_indent;
|
||||
}
|
||||
margin_ += added_margin;
|
||||
first_indent_ -= added_margin;
|
||||
body_indent_ -= added_margin;
|
||||
|
@ -123,16 +123,18 @@ void ROW::recalc_bounding_box() { // recalculate BB
|
||||
}
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
word = it.data();
|
||||
if (it.at_first())
|
||||
if (it.at_first()) {
|
||||
word->set_flag(W_BOL, true);
|
||||
else
|
||||
} else {
|
||||
// not start of line
|
||||
word->set_flag(W_BOL, false);
|
||||
if (it.at_last())
|
||||
}
|
||||
if (it.at_last()) {
|
||||
word->set_flag(W_EOL, true);
|
||||
else
|
||||
} else {
|
||||
// not end of line
|
||||
word->set_flag(W_EOL, false);
|
||||
}
|
||||
// extend BB as reqd
|
||||
bound_box += word->bounding_box();
|
||||
}
|
||||
@ -149,8 +151,9 @@ void ROW::move( // reposition row
|
||||
) {
|
||||
WERD_IT it(&words); // word iterator
|
||||
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
it.data()->move(vec);
|
||||
}
|
||||
|
||||
bound_box.move(vec);
|
||||
baseline.move(vec);
|
||||
@ -228,8 +231,9 @@ ROW &ROW::operator=(const ROW &source) {
|
||||
bodysize = source.bodysize;
|
||||
ascrise = source.ascrise;
|
||||
descdrop = source.descdrop;
|
||||
if (!words.empty())
|
||||
if (!words.empty()) {
|
||||
words.clear();
|
||||
}
|
||||
baseline = source.baseline; // QSPLINES must do =
|
||||
bound_box = source.bound_box;
|
||||
has_drop_cap_ = source.has_drop_cap_;
|
||||
|
@ -186,11 +186,13 @@ int OtsuStats(const int *histogram, int *H_out, int *omega0_out) {
|
||||
for (int t = 0; t < kHistogramSize - 1; ++t) {
|
||||
omega_0 += histogram[t];
|
||||
mu_t += t * static_cast<double>(histogram[t]);
|
||||
if (omega_0 == 0)
|
||||
if (omega_0 == 0) {
|
||||
continue;
|
||||
}
|
||||
omega_1 = H - omega_0;
|
||||
if (omega_1 == 0)
|
||||
if (omega_1 == 0) {
|
||||
break;
|
||||
}
|
||||
mu_0 = mu_t / omega_0;
|
||||
mu_1 = (mu_T - mu_t) / omega_1;
|
||||
double sig_sq_B = mu_1 - mu_0;
|
||||
@ -201,10 +203,12 @@ int OtsuStats(const int *histogram, int *H_out, int *omega0_out) {
|
||||
best_omega_0 = omega_0;
|
||||
}
|
||||
}
|
||||
if (H_out != nullptr)
|
||||
if (H_out != nullptr) {
|
||||
*H_out = H;
|
||||
if (omega0_out != nullptr)
|
||||
}
|
||||
if (omega0_out != nullptr) {
|
||||
*omega0_out = best_omega_0;
|
||||
}
|
||||
return best_t;
|
||||
}
|
||||
|
||||
|
@ -194,17 +194,21 @@ WERD_RES &WERD_RES::operator=(const WERD_RES &source) {
|
||||
} else {
|
||||
word = source.word; // pt to same word
|
||||
}
|
||||
if (source.bln_boxes != nullptr)
|
||||
if (source.bln_boxes != nullptr) {
|
||||
bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
|
||||
if (source.chopped_word != nullptr)
|
||||
}
|
||||
if (source.chopped_word != nullptr) {
|
||||
chopped_word = new TWERD(*source.chopped_word);
|
||||
if (source.rebuild_word != nullptr)
|
||||
}
|
||||
if (source.rebuild_word != nullptr) {
|
||||
rebuild_word = new TWERD(*source.rebuild_word);
|
||||
}
|
||||
// TODO(rays) Do we ever need to copy the seam_array?
|
||||
blob_row = source.blob_row;
|
||||
denorm = source.denorm;
|
||||
if (source.box_word != nullptr)
|
||||
if (source.box_word != nullptr) {
|
||||
box_word = new tesseract::BoxWord(*source.box_word);
|
||||
}
|
||||
best_state = source.best_state;
|
||||
correct_text = source.correct_text;
|
||||
blob_widths = source.blob_widths;
|
||||
@ -415,8 +419,9 @@ void WERD_RES::InsertSeam(int blob_number, SEAM *seam) {
|
||||
// Expand the ratings matrix.
|
||||
ratings = ratings->ConsumeAndMakeBigger(blob_number);
|
||||
// Fix all the segmentation states.
|
||||
if (raw_choice != nullptr)
|
||||
if (raw_choice != nullptr) {
|
||||
raw_choice->UpdateStateForSplit(blob_number);
|
||||
}
|
||||
WERD_CHOICE_IT wc_it(&best_choices);
|
||||
for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
|
||||
WERD_CHOICE *choice = wc_it.data();
|
||||
@ -433,8 +438,9 @@ bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan(float threshold) const {
|
||||
WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST *>(&best_choices));
|
||||
for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
|
||||
WERD_CHOICE *choice = wc_it.data();
|
||||
if (choice->adjust_factor() <= threshold)
|
||||
if (choice->adjust_factor() <= threshold) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -472,8 +478,9 @@ bool WERD_RES::StatesAllValid() {
|
||||
void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) {
|
||||
if (debug || (word_to_debug != nullptr && *word_to_debug != '\0' && best_choice != nullptr &&
|
||||
best_choice->unichar_string() == std::string(word_to_debug))) {
|
||||
if (raw_choice != nullptr)
|
||||
if (raw_choice != nullptr) {
|
||||
raw_choice->print("\nBest Raw Choice");
|
||||
}
|
||||
|
||||
WERD_CHOICE_IT it(&best_choices);
|
||||
int index = 0;
|
||||
@ -490,10 +497,11 @@ void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) {
|
||||
void WERD_RES::DebugTopChoice(const char *msg) const {
|
||||
tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ", tess_accepted, tess_would_adapt,
|
||||
done);
|
||||
if (best_choice == nullptr)
|
||||
if (best_choice == nullptr) {
|
||||
tprintf("<Null choice>\n");
|
||||
else
|
||||
} else {
|
||||
best_choice->print(msg);
|
||||
}
|
||||
}
|
||||
|
||||
// Removes from best_choices all choices which are not within a reasonable
|
||||
@ -502,11 +510,13 @@ void WERD_RES::DebugTopChoice(const char *msg) const {
|
||||
// re-ranker, in place of this heuristic that is based on the previous
|
||||
// adjustment factor.
|
||||
void WERD_RES::FilterWordChoices(int debug_level) {
|
||||
if (best_choice == nullptr || best_choices.singleton())
|
||||
if (best_choice == nullptr || best_choices.singleton()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (debug_level >= 2)
|
||||
if (debug_level >= 2) {
|
||||
best_choice->print("\nFiltering against best choice");
|
||||
}
|
||||
WERD_CHOICE_IT it(&best_choices);
|
||||
int index = 0;
|
||||
for (it.forward(); !it.at_first(); it.forward(), ++index) {
|
||||
@ -539,11 +549,13 @@ void WERD_RES::FilterWordChoices(int debug_level) {
|
||||
}
|
||||
++chunk;
|
||||
// If needed, advance choice_chunk to keep up with chunk.
|
||||
while (choice_chunk < chunk && ++i < choice->length())
|
||||
while (choice_chunk < chunk && ++i < choice->length()) {
|
||||
choice_chunk += choice->state(i);
|
||||
}
|
||||
// If needed, advance best_chunk to keep up with chunk.
|
||||
while (best_chunk < chunk && ++j < best_choice->length())
|
||||
while (best_chunk < chunk && ++j < best_choice->length()) {
|
||||
best_chunk += best_choice->state(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -578,10 +590,12 @@ void WERD_RES::ComputeAdaptionThresholds(float certainty_scale, float min_rating
|
||||
*thresholds = max_rating;
|
||||
}
|
||||
|
||||
if (*thresholds > max_rating)
|
||||
if (*thresholds > max_rating) {
|
||||
*thresholds = max_rating;
|
||||
if (*thresholds < min_rating)
|
||||
}
|
||||
if (*thresholds < min_rating) {
|
||||
*thresholds = min_rating;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -612,8 +626,9 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *
|
||||
// prune them later when more information is available.
|
||||
float max_certainty_delta =
|
||||
StopperAmbigThreshold(best_choice->adjust_factor(), word_choice->adjust_factor());
|
||||
if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
|
||||
if (max_certainty_delta > -kStopperAmbiguityThresholdOffset) {
|
||||
max_certainty_delta = -kStopperAmbiguityThresholdOffset;
|
||||
}
|
||||
if (word_choice->certainty() - best_choice->certainty() < max_certainty_delta) {
|
||||
if (debug) {
|
||||
std::string bad_string;
|
||||
@ -642,8 +657,9 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *
|
||||
// Time to insert.
|
||||
it.add_before_stay_put(word_choice);
|
||||
inserted = true;
|
||||
if (num_choices == 0)
|
||||
if (num_choices == 0) {
|
||||
best_choice = word_choice; // This is the new best.
|
||||
}
|
||||
++num_choices;
|
||||
}
|
||||
if (choice->unichar_string() == new_str) {
|
||||
@ -661,8 +677,9 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *
|
||||
}
|
||||
} else {
|
||||
++num_choices;
|
||||
if (num_choices > max_num_choices)
|
||||
if (num_choices > max_num_choices) {
|
||||
delete it.extract();
|
||||
}
|
||||
}
|
||||
it.forward();
|
||||
} while (!it.at_first());
|
||||
@ -670,14 +687,16 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *
|
||||
if (!inserted && num_choices < max_num_choices) {
|
||||
it.add_to_end(word_choice);
|
||||
inserted = true;
|
||||
if (num_choices == 0)
|
||||
if (num_choices == 0) {
|
||||
best_choice = word_choice; // This is the new best.
|
||||
}
|
||||
}
|
||||
if (debug) {
|
||||
if (inserted)
|
||||
if (inserted) {
|
||||
tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary");
|
||||
else
|
||||
} else {
|
||||
tprintf("Poor");
|
||||
}
|
||||
word_choice->print(" Word Choice");
|
||||
}
|
||||
if (!inserted) {
|
||||
@ -701,8 +720,9 @@ void WERD_RES::PrintBestChoices() const {
|
||||
std::string alternates_str;
|
||||
WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST *>(&best_choices));
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
if (!it.at_first())
|
||||
if (!it.at_first()) {
|
||||
alternates_str += "\", \"";
|
||||
}
|
||||
alternates_str += it.data()->unichar_string();
|
||||
}
|
||||
tprintf("Alternates for \"%s\": {\"%s\"}\n", best_choice->unichar_string().c_str(),
|
||||
@ -715,15 +735,17 @@ int WERD_RES::GetBlobsWidth(int start_blob, int last_blob) {
|
||||
int result = 0;
|
||||
for (int b = start_blob; b <= last_blob; ++b) {
|
||||
result += blob_widths[b];
|
||||
if (b < last_blob)
|
||||
if (b < last_blob) {
|
||||
result += blob_gaps[b];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
// Returns the width of a gap between the specified blob and the next one.
|
||||
int WERD_RES::GetBlobsGap(int blob_index) {
|
||||
if (blob_index < 0 || blob_index >= blob_gaps.size())
|
||||
if (blob_index < 0 || blob_index >= blob_gaps.size()) {
|
||||
return 0;
|
||||
}
|
||||
return blob_gaps[blob_index];
|
||||
}
|
||||
|
||||
@ -732,8 +754,9 @@ int WERD_RES::GetBlobsGap(int blob_index) {
|
||||
// Borrowed pointer, so do not delete. May return nullptr if there is no
|
||||
// BLOB_CHOICE matching the unichar_id at the given index.
|
||||
BLOB_CHOICE *WERD_RES::GetBlobChoice(int index) const {
|
||||
if (index < 0 || index >= best_choice->length())
|
||||
if (index < 0 || index >= best_choice->length()) {
|
||||
return nullptr;
|
||||
}
|
||||
BLOB_CHOICE_LIST *choices = GetBlobChoices(index);
|
||||
return FindMatchingChoice(best_choice->unichar_id(index), choices);
|
||||
}
|
||||
@ -767,8 +790,9 @@ void WERD_RES::ConsumeWordResults(WERD_RES *word) {
|
||||
word->blob_widths.clear();
|
||||
blob_gaps = word->blob_gaps;
|
||||
word->blob_gaps.clear();
|
||||
if (ratings != nullptr)
|
||||
if (ratings != nullptr) {
|
||||
ratings->delete_matrix_pointers();
|
||||
}
|
||||
MovePointerData(&ratings, &word->ratings);
|
||||
best_choice = word->best_choice;
|
||||
MovePointerData(&raw_choice, &word->raw_choice);
|
||||
@ -802,8 +826,9 @@ void WERD_RES::RebuildBestState() {
|
||||
ASSERT_HOST(best_choice != nullptr);
|
||||
delete rebuild_word;
|
||||
rebuild_word = new TWERD;
|
||||
if (seam_array.empty())
|
||||
if (seam_array.empty()) {
|
||||
start_seam_list(chopped_word, &seam_array);
|
||||
}
|
||||
best_state.clear();
|
||||
int start = 0;
|
||||
for (int i = 0; i < best_choice->length(); ++i) {
|
||||
@ -856,8 +881,9 @@ void WERD_RES::SetScriptPositions() {
|
||||
void WERD_RES::SetAllScriptPositions(tesseract::ScriptPos position) {
|
||||
raw_choice->SetAllScriptPositions(position);
|
||||
WERD_CHOICE_IT wc_it(&best_choices);
|
||||
for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
|
||||
for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
|
||||
wc_it.data()->SetAllScriptPositions(position);
|
||||
}
|
||||
}
|
||||
|
||||
// Classifies the word with some already-calculated BLOB_CHOICEs.
|
||||
@ -958,8 +984,9 @@ bool WERD_RES::ConditionalBlobMerge(std::function<UNICHAR_ID(UNICHAR_ID, UNICHAR
|
||||
// Merges 2 adjacent blobs in the result (index and index+1) and corrects
|
||||
// all the data to account for the change.
|
||||
void WERD_RES::MergeAdjacentBlobs(int index) {
|
||||
if (reject_map.length() == best_choice->length())
|
||||
if (reject_map.length() == best_choice->length()) {
|
||||
reject_map.remove_pos(index);
|
||||
}
|
||||
best_choice->remove_unichar_id(index + 1);
|
||||
rebuild_word->MergeBlobs(index, index + 2);
|
||||
box_word->MergeBoxes(index, index + 2);
|
||||
@ -989,15 +1016,17 @@ static int is_simple_quote(const char *signed_str, int length) {
|
||||
UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) {
|
||||
const char *ch = uch_set->id_to_unichar(id1);
|
||||
const char *next_ch = uch_set->id_to_unichar(id2);
|
||||
if (is_simple_quote(ch, strlen(ch)) && is_simple_quote(next_ch, strlen(next_ch)))
|
||||
if (is_simple_quote(ch, strlen(ch)) && is_simple_quote(next_ch, strlen(next_ch))) {
|
||||
return uch_set->unichar_to_id("\"");
|
||||
}
|
||||
return INVALID_UNICHAR_ID;
|
||||
}
|
||||
|
||||
// Change pairs of quotes to double quotes.
|
||||
void WERD_RES::fix_quotes() {
|
||||
if (!uch_set->contains_unichar("\"") || !uch_set->get_enabled(uch_set->unichar_to_id("\"")))
|
||||
if (!uch_set->contains_unichar("\"") || !uch_set->get_enabled(uch_set->unichar_to_id("\""))) {
|
||||
return; // Don't create it if it is disallowed.
|
||||
}
|
||||
|
||||
using namespace std::placeholders; // for _1, _2
|
||||
ConditionalBlobMerge(std::bind(&WERD_RES::BothQuotes, this, _1, _2), nullptr);
|
||||
@ -1009,8 +1038,9 @@ UNICHAR_ID WERD_RES::BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2) {
|
||||
const char *ch = uch_set->id_to_unichar(id1);
|
||||
const char *next_ch = uch_set->id_to_unichar(id2);
|
||||
if (strlen(ch) == 1 && strlen(next_ch) == 1 && (*ch == '-' || *ch == '~') &&
|
||||
(*next_ch == '-' || *next_ch == '~'))
|
||||
(*next_ch == '-' || *next_ch == '~')) {
|
||||
return uch_set->unichar_to_id("-");
|
||||
}
|
||||
return INVALID_UNICHAR_ID;
|
||||
}
|
||||
|
||||
@ -1023,8 +1053,9 @@ bool WERD_RES::HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2) {
|
||||
// Change pairs of hyphens to a single hyphen if the bounding boxes touch
|
||||
// Typically a long dash which has been segmented.
|
||||
void WERD_RES::fix_hyphens() {
|
||||
if (!uch_set->contains_unichar("-") || !uch_set->get_enabled(uch_set->unichar_to_id("-")))
|
||||
if (!uch_set->contains_unichar("-") || !uch_set->get_enabled(uch_set->unichar_to_id("-"))) {
|
||||
return; // Don't create it if it is disallowed.
|
||||
}
|
||||
|
||||
using namespace std::placeholders; // for _1, _2
|
||||
ConditionalBlobMerge(std::bind(&WERD_RES::BothHyphens, this, _1, _2),
|
||||
@ -1034,10 +1065,11 @@ void WERD_RES::fix_hyphens() {
|
||||
// Callback helper for merge_tess_fails returns a space if both
|
||||
// arguments are space, otherwise INVALID_UNICHAR_ID.
|
||||
UNICHAR_ID WERD_RES::BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2) {
|
||||
if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
|
||||
if (id1 == id2 && id1 == uch_set->unichar_to_id(" ")) {
|
||||
return id1;
|
||||
else
|
||||
} else {
|
||||
return INVALID_UNICHAR_ID;
|
||||
}
|
||||
}
|
||||
|
||||
// Change pairs of tess failures to a single one
|
||||
@ -1057,8 +1089,9 @@ bool WERD_RES::PiecesAllNatural(int start, int count) const {
|
||||
for (int index = start; index < start + count - 1; ++index) {
|
||||
if (index >= 0 && index < seam_array.size()) {
|
||||
SEAM *seam = seam_array[index];
|
||||
if (seam != nullptr && seam->HasAnySplits())
|
||||
if (seam != nullptr && seam->HasAnySplits()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@ -1103,8 +1136,9 @@ void WERD_RES::ClearResults() {
|
||||
blob_gaps.clear();
|
||||
ClearRatings();
|
||||
ClearWordChoices();
|
||||
if (blamer_bundle != nullptr)
|
||||
if (blamer_bundle != nullptr) {
|
||||
blamer_bundle->ClearResults();
|
||||
}
|
||||
}
|
||||
void WERD_RES::ClearWordChoices() {
|
||||
best_choice = nullptr;
|
||||
@ -1126,8 +1160,9 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
|
||||
ASSERT_HOST(page_res == other.page_res);
|
||||
if (other.block_res == nullptr) {
|
||||
// other points to the end of the page.
|
||||
if (block_res == nullptr)
|
||||
if (block_res == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
if (block_res == nullptr) {
|
||||
@ -1196,8 +1231,9 @@ WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *ne
|
||||
WERD_RES_IT wr_it(&row()->word_res_list);
|
||||
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
|
||||
WERD_RES *word = wr_it.data();
|
||||
if (word == word_res)
|
||||
if (word == word_res) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT_HOST(!wr_it.cycled_list());
|
||||
wr_it.add_before_then_move(new_res);
|
||||
@ -1227,8 +1263,9 @@ static void ComputeBlobEnds(const WERD_RES &word, const TBOX &clip_box,
|
||||
// boundaries between them.
|
||||
int blob_end = INT32_MAX;
|
||||
if (!blob_it.at_first() || next_word_blobs != nullptr) {
|
||||
if (blob_it.at_first())
|
||||
if (blob_it.at_first()) {
|
||||
blob_it.set_to_list(next_word_blobs);
|
||||
}
|
||||
blob_end = (blob_box.right() + blob_it.data()->bounding_box().left()) / 2;
|
||||
}
|
||||
blob_end = ClipToRange<int>(blob_end, clip_box.left(), clip_box.right());
|
||||
@ -1246,11 +1283,13 @@ static TBOX ComputeWordBounds(const tesseract::PointerVector<WERD_RES> &words, i
|
||||
TBOX current_box = words[w_index]->word->bounding_box();
|
||||
TBOX next_box;
|
||||
if (w_index + 1 < words.size() && words[w_index + 1] != nullptr &&
|
||||
words[w_index + 1]->word != nullptr)
|
||||
words[w_index + 1]->word != nullptr) {
|
||||
next_box = words[w_index + 1]->word->bounding_box();
|
||||
}
|
||||
for (w_it.forward(); !w_it.at_first() && w_it.data()->part_of_combo; w_it.forward()) {
|
||||
if (w_it.data() == nullptr || w_it.data()->word == nullptr)
|
||||
if (w_it.data() == nullptr || w_it.data()->word == nullptr) {
|
||||
continue;
|
||||
}
|
||||
TBOX w_box = w_it.data()->word->bounding_box();
|
||||
int height_limit = std::min<int>(w_box.height(), w_box.width() / 2);
|
||||
int width_limit = w_box.width() / kSignificantOverlapFraction;
|
||||
@ -1274,8 +1313,9 @@ static TBOX ComputeWordBounds(const tesseract::PointerVector<WERD_RES> &words, i
|
||||
clipped_box.set_top(current_box.top());
|
||||
clipped_box.set_bottom(current_box.bottom());
|
||||
}
|
||||
if (clipped_box.width() <= 0)
|
||||
if (clipped_box.width() <= 0) {
|
||||
clipped_box = current_box;
|
||||
}
|
||||
return clipped_box;
|
||||
}
|
||||
|
||||
@ -1324,8 +1364,9 @@ void PAGE_RES_IT::ReplaceCurrentWord(tesseract::PointerVector<WERD_RES> *words)
|
||||
if (!input_word->combination) {
|
||||
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
|
||||
WERD *word = w_it.data();
|
||||
if (word == input_word->word)
|
||||
if (word == input_word->word) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// w_it is now set to the input_word's word.
|
||||
ASSERT_HOST(!w_it.cycled_list());
|
||||
@ -1334,8 +1375,9 @@ void PAGE_RES_IT::ReplaceCurrentWord(tesseract::PointerVector<WERD_RES> *words)
|
||||
WERD_RES_IT wr_it(&row()->word_res_list);
|
||||
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
|
||||
WERD_RES *word = wr_it.data();
|
||||
if (word == input_word)
|
||||
if (word == input_word) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT_HOST(!wr_it.cycled_list());
|
||||
// Since we only have an estimate of the bounds between blobs, use the blob
|
||||
@ -1397,8 +1439,9 @@ void PAGE_RES_IT::ReplaceCurrentWord(tesseract::PointerVector<WERD_RES> *words)
|
||||
// Delete the current word, which has been replaced. We could just call
|
||||
// DeleteCurrentWord, but that would iterate both lists again, and we know
|
||||
// we are already in the right place.
|
||||
if (!input_word->combination)
|
||||
if (!input_word->combination) {
|
||||
delete w_it.extract();
|
||||
}
|
||||
delete wr_it.extract();
|
||||
ResetWordIterator();
|
||||
}
|
||||
@ -1494,8 +1537,9 @@ void PAGE_RES_IT::ResetWordIterator() {
|
||||
for (word_res_it.mark_cycle_pt();
|
||||
!word_res_it.cycled_list() && word_res_it.data() != next_word_res; word_res_it.forward()) {
|
||||
if (!word_res_it.data()->part_of_combo) {
|
||||
if (prev_row_res == row_res)
|
||||
if (prev_row_res == row_res) {
|
||||
prev_word_res = word_res;
|
||||
}
|
||||
word_res = word_res_it.data();
|
||||
}
|
||||
}
|
||||
@ -1507,8 +1551,9 @@ void PAGE_RES_IT::ResetWordIterator() {
|
||||
WERD_RES_IT wr_it(&row_res->word_res_list);
|
||||
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
|
||||
if (!wr_it.data()->part_of_combo) {
|
||||
if (prev_row_res == row_res)
|
||||
if (prev_row_res == row_res) {
|
||||
prev_word_res = word_res;
|
||||
}
|
||||
word_res = wr_it.data();
|
||||
}
|
||||
}
|
||||
@ -1561,8 +1606,9 @@ WERD_RES *PAGE_RES_IT::internal_forward(bool new_block, bool empty_ok) {
|
||||
word_res_it.mark_cycle_pt();
|
||||
}
|
||||
// Skip any part_of_combo words.
|
||||
while (!word_res_it.cycled_list() && word_res_it.data()->part_of_combo)
|
||||
while (!word_res_it.cycled_list() && word_res_it.data()->part_of_combo) {
|
||||
word_res_it.forward();
|
||||
}
|
||||
if (!word_res_it.cycled_list()) {
|
||||
next_block_res = block_res_it.data();
|
||||
next_row_res = row_res_it.data();
|
||||
@ -1595,8 +1641,9 @@ foundword:
|
||||
*************************************************************************/
|
||||
WERD_RES *PAGE_RES_IT::restart_row() {
|
||||
ROW_RES *row = this->row();
|
||||
if (!row)
|
||||
if (!row) {
|
||||
return nullptr;
|
||||
}
|
||||
for (restart_page(); this->row() != row; forward()) {
|
||||
// pass
|
||||
}
|
||||
@ -1644,8 +1691,9 @@ void PAGE_RES_IT::rej_stat_word() {
|
||||
page_res->rej_count += rejects_in_word;
|
||||
block_res->rej_count += rejects_in_word;
|
||||
row_res->rej_count += rejects_in_word;
|
||||
if (chars_in_word == rejects_in_word)
|
||||
if (chars_in_word == rejects_in_word) {
|
||||
row_res->whole_word_rej_count += rejects_in_word;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -356,56 +356,68 @@ public:
|
||||
// characters purely based on their shape on the page, and by default produce
|
||||
// the corresponding unicode for a left-to-right context.
|
||||
const char *BestUTF8(int blob_index, bool in_rtl_context) const {
|
||||
if (blob_index < 0 || best_choice == nullptr || blob_index >= best_choice->length())
|
||||
if (blob_index < 0 || best_choice == nullptr || blob_index >= best_choice->length()) {
|
||||
return nullptr;
|
||||
}
|
||||
UNICHAR_ID id = best_choice->unichar_id(blob_index);
|
||||
if (id < 0 || id >= uch_set->size())
|
||||
if (id < 0 || id >= uch_set->size()) {
|
||||
return nullptr;
|
||||
}
|
||||
UNICHAR_ID mirrored = uch_set->get_mirror(id);
|
||||
if (in_rtl_context && mirrored > 0)
|
||||
if (in_rtl_context && mirrored > 0) {
|
||||
id = mirrored;
|
||||
}
|
||||
return uch_set->id_to_unichar_ext(id);
|
||||
}
|
||||
// Returns the UTF-8 string for the given blob index in the raw_choice word.
|
||||
const char *RawUTF8(int blob_index) const {
|
||||
if (blob_index < 0 || blob_index >= raw_choice->length())
|
||||
if (blob_index < 0 || blob_index >= raw_choice->length()) {
|
||||
return nullptr;
|
||||
}
|
||||
UNICHAR_ID id = raw_choice->unichar_id(blob_index);
|
||||
if (id < 0 || id >= uch_set->size())
|
||||
if (id < 0 || id >= uch_set->size()) {
|
||||
return nullptr;
|
||||
}
|
||||
return uch_set->id_to_unichar(id);
|
||||
}
|
||||
|
||||
UNICHARSET::Direction SymbolDirection(int blob_index) const {
|
||||
if (best_choice == nullptr || blob_index >= best_choice->length() || blob_index < 0)
|
||||
if (best_choice == nullptr || blob_index >= best_choice->length() || blob_index < 0) {
|
||||
return UNICHARSET::U_OTHER_NEUTRAL;
|
||||
}
|
||||
return uch_set->get_direction(best_choice->unichar_id(blob_index));
|
||||
}
|
||||
|
||||
bool AnyRtlCharsInWord() const {
|
||||
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1)
|
||||
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) {
|
||||
return false;
|
||||
}
|
||||
for (int id = 0; id < best_choice->length(); id++) {
|
||||
int unichar_id = best_choice->unichar_id(id);
|
||||
if (unichar_id < 0 || unichar_id >= uch_set->size())
|
||||
if (unichar_id < 0 || unichar_id >= uch_set->size()) {
|
||||
continue; // Ignore illegal chars.
|
||||
}
|
||||
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
|
||||
if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC)
|
||||
if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AnyLtrCharsInWord() const {
|
||||
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1)
|
||||
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) {
|
||||
return false;
|
||||
}
|
||||
for (int id = 0; id < best_choice->length(); id++) {
|
||||
int unichar_id = best_choice->unichar_id(id);
|
||||
if (unichar_id < 0 || unichar_id >= uch_set->size())
|
||||
if (unichar_id < 0 || unichar_id >= uch_set->size()) {
|
||||
continue; // Ignore illegal chars.
|
||||
}
|
||||
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
|
||||
if (dir == UNICHARSET::U_LEFT_TO_RIGHT || dir == UNICHARSET::U_ARABIC_NUMBER)
|
||||
if (dir == UNICHARSET::U_LEFT_TO_RIGHT || dir == UNICHARSET::U_ARABIC_NUMBER) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -632,8 +644,9 @@ public:
|
||||
auto *result = new WERD_RES(*src);
|
||||
// That didn't copy the ratings, but we want a copy if there is one to
|
||||
// begin with.
|
||||
if (src->ratings != nullptr)
|
||||
if (src->ratings != nullptr) {
|
||||
result->ratings = src->ratings->DeepCopy();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -24,15 +24,18 @@
|
||||
namespace tesseract {
|
||||
|
||||
int ParamsTrainingFeatureByName(const char *name) {
|
||||
if (name == nullptr)
|
||||
if (name == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
int array_size =
|
||||
sizeof(kParamsTrainingFeatureTypeName) / sizeof(kParamsTrainingFeatureTypeName[0]);
|
||||
for (int i = 0; i < array_size; i++) {
|
||||
if (kParamsTrainingFeatureTypeName[i] == nullptr)
|
||||
if (kParamsTrainingFeatureTypeName[i] == nullptr) {
|
||||
continue;
|
||||
if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0)
|
||||
}
|
||||
if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
@ -139,8 +139,9 @@ public:
|
||||
// Adds a new ParamsTrainingHypothesis to the current hypothesis list
|
||||
// and returns the reference to the newly added entry.
|
||||
ParamsTrainingHypothesis &AddHypothesis(const ParamsTrainingHypothesis &other) {
|
||||
if (hyp_list_vec.empty())
|
||||
if (hyp_list_vec.empty()) {
|
||||
StartHypothesisList();
|
||||
}
|
||||
hyp_list_vec.back().push_back(ParamsTrainingHypothesis(other));
|
||||
return hyp_list_vec.back().back();
|
||||
}
|
||||
|
@ -100,8 +100,10 @@ bool PDBLK::contains( // test containment
|
||||
// get rectangle
|
||||
it.bounding_box(bleft, tright);
|
||||
// inside rect
|
||||
if (pt.x() >= bleft.x() && pt.x() <= tright.x() && pt.y() >= bleft.y() && pt.y() <= tright.y())
|
||||
if (pt.x() >= bleft.x() && pt.x() <= tright.x() && pt.y() >= bleft.y() &&
|
||||
pt.y() <= tright.y()) {
|
||||
return true; // is inside
|
||||
}
|
||||
}
|
||||
return false; // not inside
|
||||
}
|
||||
@ -117,13 +119,15 @@ void PDBLK::move( // reposition block
|
||||
) {
|
||||
ICOORDELT_IT it(&leftside);
|
||||
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
*(it.data()) += vec;
|
||||
}
|
||||
|
||||
it.set_to_list(&rightside);
|
||||
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
*(it.data()) += vec;
|
||||
}
|
||||
|
||||
box.move(vec);
|
||||
}
|
||||
@ -165,8 +169,9 @@ Pix *PDBLK::render_mask(const FCOORD &rerotation, TBOX *mask_box) {
|
||||
// Just fill the whole block as there is only a bounding box.
|
||||
pixRasterop(pix, 0, 0, rotated_box.width(), rotated_box.height(), PIX_SET, nullptr, 0, 0);
|
||||
}
|
||||
if (mask_box != nullptr)
|
||||
if (mask_box != nullptr) {
|
||||
*mask_box = rotated_box;
|
||||
}
|
||||
return pix;
|
||||
}
|
||||
|
||||
@ -241,10 +246,12 @@ PDBLK &PDBLK::operator=( // assignment
|
||||
const PDBLK &source // from this
|
||||
) {
|
||||
// this->ELIST_LINK::operator=(source);
|
||||
if (!leftside.empty())
|
||||
if (!leftside.empty()) {
|
||||
leftside.clear();
|
||||
if (!rightside.empty())
|
||||
}
|
||||
if (!rightside.empty()) {
|
||||
rightside.clear();
|
||||
}
|
||||
leftside.deep_copy(&source.leftside, &ICOORDELT::deep_copy);
|
||||
rightside.deep_copy(&source.rightside, &ICOORDELT::deep_copy);
|
||||
box = source.box;
|
||||
@ -281,8 +288,9 @@ void BLOCK_RECT_IT::set_to_block( // start (new) block
|
||||
// set iterators
|
||||
left_it.set_to_list(&blkptr->leftside);
|
||||
right_it.set_to_list(&blkptr->rightside);
|
||||
if (!blkptr->leftside.empty())
|
||||
if (!blkptr->leftside.empty()) {
|
||||
start_block(); // ready for iteration
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
@ -298,9 +306,10 @@ void BLOCK_RECT_IT::start_block() { // start (new) block
|
||||
right_it.mark_cycle_pt();
|
||||
ymin = left_it.data()->y(); // bottom of first box
|
||||
ymax = left_it.data_relative(1)->y();
|
||||
if (right_it.data_relative(1)->y() < ymax)
|
||||
if (right_it.data_relative(1)->y() < ymax) {
|
||||
// smallest step
|
||||
ymax = right_it.data_relative(1)->y();
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
@ -311,10 +320,12 @@ void BLOCK_RECT_IT::start_block() { // start (new) block
|
||||
|
||||
void BLOCK_RECT_IT::forward() { // next rectangle
|
||||
if (!left_it.empty()) { // non-empty list
|
||||
if (left_it.data_relative(1)->y() == ymax)
|
||||
if (left_it.data_relative(1)->y() == ymax) {
|
||||
left_it.forward(); // move to meet top
|
||||
if (right_it.data_relative(1)->y() == ymax)
|
||||
}
|
||||
if (right_it.data_relative(1)->y() == ymax) {
|
||||
right_it.forward();
|
||||
}
|
||||
// last is special
|
||||
if (left_it.at_last() || right_it.at_last()) {
|
||||
left_it.move_to_first(); // restart
|
||||
@ -326,9 +337,10 @@ void BLOCK_RECT_IT::forward() { // next rectangle
|
||||
}
|
||||
// next point
|
||||
ymax = left_it.data_relative(1)->y();
|
||||
if (right_it.data_relative(1)->y() < ymax)
|
||||
if (right_it.data_relative(1)->y() < ymax) {
|
||||
// least step forward
|
||||
ymax = right_it.data_relative(1)->y();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -54,8 +54,9 @@ void ICOORD::set_with_shrink(int x, int y) {
|
||||
// Fit the vector into an ICOORD, which is 16 bit.
|
||||
int factor = 1;
|
||||
int max_extent = std::max(abs(x), abs(y));
|
||||
if (max_extent > INT16_MAX)
|
||||
if (max_extent > INT16_MAX) {
|
||||
factor = max_extent / INT16_MAX + 1;
|
||||
}
|
||||
xcoord = x / factor;
|
||||
ycoord = y / factor;
|
||||
}
|
||||
@ -63,10 +64,11 @@ void ICOORD::set_with_shrink(int x, int y) {
|
||||
// The fortran/basic sgn function returns -1, 0, 1 if x < 0, x == 0, x > 0
|
||||
// respectively.
|
||||
static int sign(int x) {
|
||||
if (x < 0)
|
||||
if (x < 0) {
|
||||
return -1;
|
||||
else
|
||||
} else {
|
||||
return x > 0 ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
@ -76,10 +78,12 @@ bool ICOORD::Serialize(FILE *fp) const {
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool ICOORD::DeSerialize(bool swap, FILE *fp) {
|
||||
if (!tesseract::DeSerialize(fp, &xcoord))
|
||||
if (!tesseract::DeSerialize(fp, &xcoord)) {
|
||||
return false;
|
||||
if (!tesseract::DeSerialize(fp, &ycoord))
|
||||
}
|
||||
if (!tesseract::DeSerialize(fp, &ycoord)) {
|
||||
return false;
|
||||
}
|
||||
if (swap) {
|
||||
ReverseN(&xcoord, sizeof(xcoord));
|
||||
ReverseN(&ycoord, sizeof(ycoord));
|
||||
|
@ -66,13 +66,15 @@ TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline) {
|
||||
EDGEPT *edgepts = stack_edgepts;
|
||||
|
||||
// Use heap memory if the stack buffer is not big enough.
|
||||
if (c_outline->pathlength() > FASTEDGELENGTH)
|
||||
if (c_outline->pathlength() > FASTEDGELENGTH) {
|
||||
edgepts = new EDGEPT[c_outline->pathlength()];
|
||||
}
|
||||
|
||||
loop_box = c_outline->bounding_box();
|
||||
area = loop_box.height();
|
||||
if (!poly_wide_objects_better && loop_box.width() > area)
|
||||
if (!poly_wide_objects_better && loop_box.width() > area) {
|
||||
area = loop_box.width();
|
||||
}
|
||||
area *= area;
|
||||
edgesteps_to_edgepts(c_outline, edgepts);
|
||||
fix2(edgepts, area);
|
||||
@ -100,8 +102,9 @@ TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline) {
|
||||
} while (edgept != startpt);
|
||||
prev_result->next = result;
|
||||
result->prev = prev_result;
|
||||
if (edgepts != stack_edgepts)
|
||||
if (edgepts != stack_edgepts) {
|
||||
delete[] edgepts;
|
||||
}
|
||||
return TESSLINE::BuildFromOutlineList(result);
|
||||
}
|
||||
|
||||
@ -141,8 +144,9 @@ EDGEPT *edgesteps_to_edgepts( // convert outline
|
||||
dir += 128 - 16;
|
||||
vec += c_outline->step(stepindex + 1);
|
||||
stepinc = 2;
|
||||
} else
|
||||
} else {
|
||||
stepinc = 1;
|
||||
}
|
||||
if (count == 0) {
|
||||
prevdir = dir;
|
||||
prev_vec = vec;
|
||||
@ -171,8 +175,9 @@ EDGEPT *edgesteps_to_edgepts( // convert outline
|
||||
prev_vec = vec;
|
||||
count = 1;
|
||||
prev_stepindex = stepindex;
|
||||
} else
|
||||
} else {
|
||||
count++;
|
||||
}
|
||||
stepindex += stepinc;
|
||||
} while (stepindex < length);
|
||||
edgepts[epindex].pos.x = pos.x();
|
||||
@ -222,8 +227,9 @@ void fix2( // polygonal approx
|
||||
|
||||
edgept = start; /*start of loop */
|
||||
while (((edgept->flags[DIR] - edgept->prev->flags[DIR] + 1) & 7) < 3 &&
|
||||
(dir1 = (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2 && dir1 != 6)
|
||||
(dir1 = (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2 && dir1 != 6) {
|
||||
edgept = edgept->next; /*find suitable start */
|
||||
}
|
||||
loopstart = edgept; /*remember start */
|
||||
|
||||
stopped = 0; /*not finished yet */
|
||||
@ -240,15 +246,17 @@ void fix2( // polygonal approx
|
||||
if (((dir1 - dir2 + 1) & 7) < 3) {
|
||||
while (edgept->prev->flags[DIR] == edgept->next->flags[DIR]) {
|
||||
edgept = edgept->next; /*look at next */
|
||||
if (edgept->flags[DIR] == dir1)
|
||||
if (edgept->flags[DIR] == dir1) {
|
||||
/*sum lengths */
|
||||
sum1 += edgept->flags[RUNLENGTH];
|
||||
else
|
||||
} else {
|
||||
sum2 += edgept->flags[RUNLENGTH];
|
||||
}
|
||||
}
|
||||
|
||||
if (edgept == loopstart)
|
||||
if (edgept == loopstart) {
|
||||
stopped = 1; /*finished */
|
||||
}
|
||||
if (sum2 + sum1 > 2 && linestart->prev->flags[DIR] == dir2 &&
|
||||
(linestart->prev->flags[RUNLENGTH] > linestart->flags[RUNLENGTH] || sum2 > sum1)) {
|
||||
/*start is back one */
|
||||
@ -260,8 +268,9 @@ void fix2( // polygonal approx
|
||||
(edgept->flags[DIR] == dir1 && sum1 >= sum2) ||
|
||||
((edgept->prev->flags[RUNLENGTH] < edgept->flags[RUNLENGTH] ||
|
||||
(edgept->flags[DIR] == dir2 && sum2 >= sum1)) &&
|
||||
linestart->next != edgept))
|
||||
linestart->next != edgept)) {
|
||||
edgept = edgept->next;
|
||||
}
|
||||
}
|
||||
/*sharp bend */
|
||||
edgept->flags[FLAGS] |= FIXED;
|
||||
@ -303,42 +312,49 @@ void fix2( // polygonal approx
|
||||
} while (edgept != start); /*until finished */
|
||||
|
||||
stopped = 0;
|
||||
if (area < 450)
|
||||
if (area < 450) {
|
||||
area = 450;
|
||||
}
|
||||
|
||||
gapmin = area * fixed_dist * fixed_dist / 44000;
|
||||
|
||||
edgept = start;
|
||||
fixed_count = 0;
|
||||
do {
|
||||
if (edgept->flags[FLAGS] & FIXED)
|
||||
if (edgept->flags[FLAGS] & FIXED) {
|
||||
fixed_count++;
|
||||
}
|
||||
edgept = edgept->next;
|
||||
} while (edgept != start);
|
||||
while ((edgept->flags[FLAGS] & FIXED) == 0)
|
||||
while ((edgept->flags[FLAGS] & FIXED) == 0) {
|
||||
edgept = edgept->next;
|
||||
}
|
||||
edgefix0 = edgept;
|
||||
|
||||
edgept = edgept->next;
|
||||
while ((edgept->flags[FLAGS] & FIXED) == 0)
|
||||
while ((edgept->flags[FLAGS] & FIXED) == 0) {
|
||||
edgept = edgept->next;
|
||||
}
|
||||
edgefix1 = edgept;
|
||||
|
||||
edgept = edgept->next;
|
||||
while ((edgept->flags[FLAGS] & FIXED) == 0)
|
||||
while ((edgept->flags[FLAGS] & FIXED) == 0) {
|
||||
edgept = edgept->next;
|
||||
}
|
||||
edgefix2 = edgept;
|
||||
|
||||
edgept = edgept->next;
|
||||
while ((edgept->flags[FLAGS] & FIXED) == 0)
|
||||
while ((edgept->flags[FLAGS] & FIXED) == 0) {
|
||||
edgept = edgept->next;
|
||||
}
|
||||
edgefix3 = edgept;
|
||||
|
||||
startfix = edgefix2;
|
||||
|
||||
do {
|
||||
if (fixed_count <= 3)
|
||||
if (fixed_count <= 3) {
|
||||
break; // already too few
|
||||
}
|
||||
d12vec.diff(edgefix1->pos, edgefix2->pos);
|
||||
d12 = d12vec.length();
|
||||
// TODO(rays) investigate this change:
|
||||
@ -366,8 +382,9 @@ void fix2( // polygonal approx
|
||||
edgefix2 = edgefix3;
|
||||
edgept = edgept->next;
|
||||
while ((edgept->flags[FLAGS] & FIXED) == 0) {
|
||||
if (edgept == startfix)
|
||||
if (edgept == startfix) {
|
||||
stopped = 1;
|
||||
}
|
||||
edgept = edgept->next;
|
||||
}
|
||||
edgefix3 = edgept;
|
||||
@ -391,8 +408,9 @@ EDGEPT *poly2( // second poly
|
||||
EDGEPT *linestart; /*start of line */
|
||||
int edgesum; /*correction count */
|
||||
|
||||
if (area < 1200)
|
||||
if (area < 1200) {
|
||||
area = 1200; /*minimum value */
|
||||
}
|
||||
|
||||
loopstart = nullptr; /*not found it yet */
|
||||
edgept = startpt; /*start of loop */
|
||||
@ -424,28 +442,32 @@ EDGEPT *poly2( // second poly
|
||||
edgesum += edgept->flags[RUNLENGTH];
|
||||
edgept = edgept->next; /*move on */
|
||||
} while ((edgept->flags[FLAGS] & FIXED) == 0 && edgept != loopstart && edgesum < 126);
|
||||
if (poly_debug)
|
||||
if (poly_debug) {
|
||||
tprintf("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n", linestart->pos.x,
|
||||
linestart->pos.y, linestart->flags[DIR], linestart->vec.x, linestart->vec.y,
|
||||
edgesum, edgept->pos.x, edgept->pos.y);
|
||||
}
|
||||
/*reapproximate */
|
||||
cutline(linestart, edgept, area);
|
||||
|
||||
while ((edgept->next->flags[FLAGS] & FIXED) && edgept != loopstart)
|
||||
while ((edgept->next->flags[FLAGS] & FIXED) && edgept != loopstart) {
|
||||
edgept = edgept->next; /*look for next non-fixed */
|
||||
}
|
||||
}
|
||||
/*do all the loop */
|
||||
while (edgept != loopstart);
|
||||
edgesum = 0;
|
||||
do {
|
||||
if (edgept->flags[FLAGS] & FIXED)
|
||||
if (edgept->flags[FLAGS] & FIXED) {
|
||||
edgesum++;
|
||||
}
|
||||
edgept = edgept->next;
|
||||
}
|
||||
// count fixed pts
|
||||
while (edgept != loopstart);
|
||||
if (edgesum < 3)
|
||||
if (edgesum < 3) {
|
||||
area /= 2; // must have 3 pts
|
||||
}
|
||||
} while (edgesum < 3);
|
||||
do {
|
||||
linestart = edgept;
|
||||
@ -457,8 +479,9 @@ EDGEPT *poly2( // second poly
|
||||
linestart->vec.x = edgept->pos.x - linestart->pos.x;
|
||||
linestart->vec.y = edgept->pos.y - linestart->pos.y;
|
||||
} while (edgept != loopstart);
|
||||
} else
|
||||
} else {
|
||||
edgept = startpt; /*start of loop */
|
||||
}
|
||||
|
||||
loopstart = edgept; /*new start */
|
||||
return loopstart; /*correct exit */
|
||||
@ -484,8 +507,9 @@ void cutline( // recursive refine
|
||||
int squaresum; /*sum of perps */
|
||||
|
||||
edge = first; /*start of line */
|
||||
if (edge->next == last)
|
||||
if (edge->next == last) {
|
||||
return; /*simple line */
|
||||
}
|
||||
|
||||
/*vector sum */
|
||||
vecsum.x = last->pos.x - edge->pos.x;
|
||||
@ -497,10 +521,11 @@ void cutline( // recursive refine
|
||||
}
|
||||
/*absolute value */
|
||||
vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x;
|
||||
if (vecsum.y > vlen)
|
||||
if (vecsum.y > vlen) {
|
||||
vlen = vecsum.y; /*maximum */
|
||||
else if (-vecsum.y > vlen)
|
||||
} else if (-vecsum.y > vlen) {
|
||||
vlen = -vecsum.y; /*absolute value */
|
||||
}
|
||||
|
||||
vec.x = edge->vec.x; /*accumulated vector */
|
||||
vec.y = edge->vec.y;
|
||||
@ -515,8 +540,9 @@ void cutline( // recursive refine
|
||||
}
|
||||
squaresum += perp; /*sum squares */
|
||||
ptcount++; /*count points */
|
||||
if (poly_debug)
|
||||
if (poly_debug) {
|
||||
tprintf("Cutline:Final perp=%d\n", perp);
|
||||
}
|
||||
if (perp > maxperp) {
|
||||
maxperp = perp;
|
||||
maxpoint = edge; /*find greatest deviation */
|
||||
@ -536,16 +562,18 @@ void cutline( // recursive refine
|
||||
maxperp /= perp;
|
||||
maxperp <<= 8; /*avoid overflow */
|
||||
}
|
||||
if (squaresum < 256 * INT16_MAX)
|
||||
if (squaresum < 256 * INT16_MAX) {
|
||||
/*mean squared perp */
|
||||
perp = (squaresum << 8) / (perp * ptcount);
|
||||
else
|
||||
} else {
|
||||
/*avoid overflow */
|
||||
perp = (squaresum / perp << 8) / ptcount;
|
||||
}
|
||||
|
||||
if (poly_debug)
|
||||
if (poly_debug) {
|
||||
tprintf("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", area, maxperp / 256.0,
|
||||
maxperp * 200.0 / area, perp / 256.0, perp * 300.0 / area);
|
||||
}
|
||||
if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) {
|
||||
maxpoint->flags[FLAGS] |= FIXED;
|
||||
/*partitions */
|
||||
|
@ -76,15 +76,19 @@ void POLY_BLOCK::compute_bb() { // constructor
|
||||
topright = botleft;
|
||||
do {
|
||||
pos = *pts.data();
|
||||
if (pos.x() < botleft.x())
|
||||
if (pos.x() < botleft.x()) {
|
||||
// get bounding box
|
||||
botleft = ICOORD(pos.x(), botleft.y());
|
||||
if (pos.y() < botleft.y())
|
||||
}
|
||||
if (pos.y() < botleft.y()) {
|
||||
botleft = ICOORD(botleft.x(), pos.y());
|
||||
if (pos.x() > topright.x())
|
||||
}
|
||||
if (pos.x() > topright.x()) {
|
||||
topright = ICOORD(pos.x(), topright.y());
|
||||
if (pos.y() > topright.y())
|
||||
}
|
||||
if (pos.y() > topright.y()) {
|
||||
topright = ICOORD(topright.x(), pos.y());
|
||||
}
|
||||
pts.forward();
|
||||
} while (!pts.at_first());
|
||||
ibl = ICOORD(botleft.x(), botleft.y());
|
||||
@ -115,18 +119,21 @@ int16_t POLY_BLOCK::winding_number(const ICOORD &point) {
|
||||
// crossing the line
|
||||
if (vec.y() <= 0 && vec.y() + vvec.y() > 0) {
|
||||
cross = vec * vvec; // cross product
|
||||
if (cross > 0)
|
||||
if (cross > 0) {
|
||||
count++; // crossing right half
|
||||
else if (cross == 0)
|
||||
} else if (cross == 0) {
|
||||
return INTERSECTING; // going through point
|
||||
}
|
||||
} else if (vec.y() > 0 && vec.y() + vvec.y() <= 0) {
|
||||
cross = vec * vvec;
|
||||
if (cross < 0)
|
||||
if (cross < 0) {
|
||||
count--; // crossing back
|
||||
else if (cross == 0)
|
||||
} else if (cross == 0) {
|
||||
return INTERSECTING; // illegal
|
||||
} else if (vec.y() == 0 && vec.x() == 0)
|
||||
}
|
||||
} else if (vec.y() == 0 && vec.x() == 0) {
|
||||
return INTERSECTING;
|
||||
}
|
||||
it.forward();
|
||||
} while (!it.at_first());
|
||||
return count; // winding number
|
||||
@ -138,8 +145,9 @@ bool POLY_BLOCK::contains(POLY_BLOCK *other) {
|
||||
ICOORDELT_IT it = &vertices; // iterator
|
||||
ICOORD vertex;
|
||||
|
||||
if (!box.overlap(*(other->bounding_box())))
|
||||
if (!box.overlap(*(other->bounding_box()))) {
|
||||
return false; // can't be contained
|
||||
}
|
||||
|
||||
/* check that no vertex of this is inside other */
|
||||
|
||||
@ -147,9 +155,11 @@ bool POLY_BLOCK::contains(POLY_BLOCK *other) {
|
||||
vertex = *it.data();
|
||||
// get winding number
|
||||
count = other->winding_number(vertex);
|
||||
if (count != INTERSECTING)
|
||||
if (count != 0)
|
||||
if (count != INTERSECTING) {
|
||||
if (count != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
it.forward();
|
||||
} while (!it.at_first());
|
||||
|
||||
@ -161,9 +171,11 @@ bool POLY_BLOCK::contains(POLY_BLOCK *other) {
|
||||
vertex = *it.data();
|
||||
// try other way round
|
||||
count = winding_number(vertex);
|
||||
if (count != INTERSECTING)
|
||||
if (count == 0)
|
||||
if (count != INTERSECTING) {
|
||||
if (count == 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
it.forward();
|
||||
} while (!it.at_first());
|
||||
return true;
|
||||
@ -291,8 +303,9 @@ bool POLY_BLOCK::overlap(POLY_BLOCK *other) {
|
||||
ICOORDELT_IT it = &vertices; // iterator
|
||||
ICOORD vertex;
|
||||
|
||||
if (!box.overlap(*(other->bounding_box())))
|
||||
if (!box.overlap(*(other->bounding_box()))) {
|
||||
return false; // can't be any overlap.
|
||||
}
|
||||
|
||||
/* see if a vertex of this is inside other */
|
||||
|
||||
@ -300,9 +313,11 @@ bool POLY_BLOCK::overlap(POLY_BLOCK *other) {
|
||||
vertex = *it.data();
|
||||
// get winding number
|
||||
count = other->winding_number(vertex);
|
||||
if (count != INTERSECTING)
|
||||
if (count != 0)
|
||||
if (count != INTERSECTING) {
|
||||
if (count != 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
it.forward();
|
||||
} while (!it.at_first());
|
||||
|
||||
@ -314,9 +329,11 @@ bool POLY_BLOCK::overlap(POLY_BLOCK *other) {
|
||||
vertex = *it.data();
|
||||
// try other way round
|
||||
count = winding_number(vertex);
|
||||
if (count != INTERSECTING)
|
||||
if (count != 0)
|
||||
if (count != INTERSECTING) {
|
||||
if (count != 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
it.forward();
|
||||
} while (!it.at_first());
|
||||
return false;
|
||||
@ -346,8 +363,9 @@ ICOORDELT_LIST *PB_LINE_IT::get_line(int16_t y) {
|
||||
|
||||
if (!r.empty()) {
|
||||
r.sort(lessthan);
|
||||
for (r.mark_cycle_pt(); !r.cycled_list(); r.forward())
|
||||
for (r.mark_cycle_pt(); !r.cycled_list(); r.forward()) {
|
||||
x = r.data();
|
||||
}
|
||||
for (r.mark_cycle_pt(); !r.cycled_list(); r.forward()) {
|
||||
r.data()->set_y(r.data_relative(1)->x() - r.data()->x());
|
||||
r.forward();
|
||||
@ -362,12 +380,13 @@ int lessthan(const void *first, const void *second) {
|
||||
const ICOORDELT *p1 = *reinterpret_cast<const ICOORDELT *const *>(first);
|
||||
const ICOORDELT *p2 = *reinterpret_cast<const ICOORDELT *const *>(second);
|
||||
|
||||
if (p1->x() < p2->x())
|
||||
if (p1->x() < p2->x()) {
|
||||
return (-1);
|
||||
else if (p1->x() > p2->x())
|
||||
} else if (p1->x() > p2->x()) {
|
||||
return (1);
|
||||
else
|
||||
} else {
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
|
@ -104,19 +104,21 @@ QSPLINE::QSPLINE( // constructor
|
||||
/*first blob */
|
||||
pointindex = ptcounts[segment];
|
||||
if (pointindex > 0 && xpts[pointindex] != xpts[pointindex - 1] &&
|
||||
xpts[pointindex] != xstarts[segment])
|
||||
xpts[pointindex] != xstarts[segment]) {
|
||||
qlsq.add(xstarts[segment],
|
||||
ypts[pointindex - 1] + (ypts[pointindex] - ypts[pointindex - 1]) *
|
||||
(xstarts[segment] - xpts[pointindex - 1]) /
|
||||
(xpts[pointindex] - xpts[pointindex - 1]));
|
||||
}
|
||||
for (; pointindex < ptcounts[segment + 1]; pointindex++) {
|
||||
qlsq.add(xpts[pointindex], ypts[pointindex]);
|
||||
}
|
||||
if (pointindex > 0 && pointindex < pointcount && xpts[pointindex] != xstarts[segment + 1])
|
||||
if (pointindex > 0 && pointindex < pointcount && xpts[pointindex] != xstarts[segment + 1]) {
|
||||
qlsq.add(xstarts[segment + 1],
|
||||
ypts[pointindex - 1] + (ypts[pointindex] - ypts[pointindex - 1]) *
|
||||
(xstarts[segment + 1] - xpts[pointindex - 1]) /
|
||||
(xpts[pointindex] - xpts[pointindex - 1]));
|
||||
}
|
||||
qlsq.fit(degree);
|
||||
quadratics[segment].a = qlsq.get_a();
|
||||
quadratics[segment].b = qlsq.get_b();
|
||||
@ -224,10 +226,11 @@ int32_t QSPLINE::spline_index( // evaluate
|
||||
top = segments;
|
||||
while (top - bottom > 1) {
|
||||
index = (top + bottom) / 2; // centre of range
|
||||
if (x >= xcoords[index])
|
||||
if (x >= xcoords[index]) {
|
||||
bottom = index; // new min
|
||||
else
|
||||
} else {
|
||||
top = index; // new max
|
||||
}
|
||||
}
|
||||
return bottom;
|
||||
}
|
||||
@ -290,10 +293,12 @@ void QSPLINE::extrapolate( // linear extrapolation
|
||||
int increment; // in size
|
||||
|
||||
increment = xmin < xcoords[0] ? 1 : 0;
|
||||
if (xmax > xcoords[segments])
|
||||
if (xmax > xcoords[segments]) {
|
||||
increment++;
|
||||
if (increment == 0)
|
||||
}
|
||||
if (increment == 0) {
|
||||
return;
|
||||
}
|
||||
xstarts = new int32_t[segments + 1 + increment];
|
||||
quads = new QUAD_COEFFS[segments + increment];
|
||||
if (xmin < xcoords[0]) {
|
||||
@ -302,8 +307,9 @@ void QSPLINE::extrapolate( // linear extrapolation
|
||||
quads[0].b = gradient;
|
||||
quads[0].c = y(xcoords[0]) - quads[0].b * xcoords[0];
|
||||
dest_segment = 1;
|
||||
} else
|
||||
} else {
|
||||
dest_segment = 0;
|
||||
}
|
||||
for (segment = 0; segment < segments; segment++) {
|
||||
xstarts[dest_segment] = xcoords[segment];
|
||||
quads[dest_segment] = quadratics[segment];
|
||||
@ -345,10 +351,11 @@ void QSPLINE::plot( // draw it
|
||||
increment = static_cast<double>(xcoords[segment + 1] - xcoords[segment]) / QSPLINE_PRECISION;
|
||||
x = xcoords[segment];
|
||||
for (step = 0; step <= QSPLINE_PRECISION; step++) {
|
||||
if (segment == 0 && step == 0)
|
||||
if (segment == 0 && step == 0) {
|
||||
window->SetCursor(x, quadratics[segment].y(x));
|
||||
else
|
||||
} else {
|
||||
window->DrawTo(x, quadratics[segment].y(x));
|
||||
}
|
||||
x += increment;
|
||||
}
|
||||
}
|
||||
|
@ -287,8 +287,9 @@ BLOB_CHOICE_LIST *WERD_CHOICE::blob_choices(int index, MATRIX *ratings) const {
|
||||
// MATRIX for the given index into the word.
|
||||
MATRIX_COORD WERD_CHOICE::MatrixCoord(int index) const {
|
||||
int col = 0;
|
||||
for (int i = 0; i < index; ++i)
|
||||
for (int i = 0; i < index; ++i) {
|
||||
col += state_[i];
|
||||
}
|
||||
int row = col + state_[index] - 1;
|
||||
return MATRIX_COORD(col, row);
|
||||
}
|
||||
@ -327,10 +328,11 @@ void WERD_CHOICE::remove_unichar_ids(int start, int num) {
|
||||
ASSERT_HOST(start >= 0 && start + num <= length_);
|
||||
// Accumulate the states to account for the merged blobs.
|
||||
for (int i = 0; i < num; ++i) {
|
||||
if (start > 0)
|
||||
if (start > 0) {
|
||||
state_[start - 1] += state_[start + i];
|
||||
else if (start + num < length_)
|
||||
} else if (start + num < length_) {
|
||||
state_[start + num] += state_[start + i];
|
||||
}
|
||||
}
|
||||
for (int i = start; i + num < length_; ++i) {
|
||||
unichar_ids_[i] = unichar_ids_[i + num];
|
||||
@ -428,8 +430,9 @@ bool WERD_CHOICE::has_rtl_unichar_id() const {
|
||||
*/
|
||||
void WERD_CHOICE::string_and_lengths(std::string *word_str, std::string *word_lengths_str) const {
|
||||
*word_str = "";
|
||||
if (word_lengths_str != nullptr)
|
||||
if (word_lengths_str != nullptr) {
|
||||
*word_lengths_str = "";
|
||||
}
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]);
|
||||
*word_str += ch;
|
||||
@ -473,13 +476,16 @@ WERD_CHOICE &WERD_CHOICE::operator+=(const WERD_CHOICE &second) {
|
||||
script_pos_[length_ + i] = second.BlobPosition(i);
|
||||
}
|
||||
length_ += second.length();
|
||||
if (second.adjust_factor_ > adjust_factor_)
|
||||
if (second.adjust_factor_ > adjust_factor_) {
|
||||
adjust_factor_ = second.adjust_factor_;
|
||||
}
|
||||
rating_ += second.rating(); // add ratings
|
||||
if (second.certainty() < certainty_) // take min
|
||||
if (second.certainty() < certainty_) { // take min
|
||||
certainty_ = second.certainty();
|
||||
if (second.dangerous_ambig_found_)
|
||||
}
|
||||
if (second.dangerous_ambig_found_) {
|
||||
dangerous_ambig_found_ = true;
|
||||
}
|
||||
if (permuter_ == NO_PERM) {
|
||||
permuter_ = second.permuter();
|
||||
} else if (second.permuter() != NO_PERM && second.permuter() != permuter_) {
|
||||
@ -525,8 +531,9 @@ WERD_CHOICE &WERD_CHOICE::operator=(const WERD_CHOICE &source) {
|
||||
// NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
|
||||
void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD *word, int debug) {
|
||||
// Initialize to normal.
|
||||
for (int i = 0; i < length_; ++i)
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
script_pos_[i] = tesseract::SP_NORMAL;
|
||||
}
|
||||
if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) {
|
||||
return;
|
||||
}
|
||||
@ -586,8 +593,9 @@ void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD *word, int debug) {
|
||||
|
||||
// Sets all the script_pos_ positions to the given position.
|
||||
void WERD_CHOICE::SetAllScriptPositions(tesseract::ScriptPos position) {
|
||||
for (int i = 0; i < length_; ++i)
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
script_pos_[i] = position;
|
||||
}
|
||||
}
|
||||
|
||||
/* static */
|
||||
@ -627,8 +635,9 @@ int WERD_CHOICE::GetTopScriptID() const {
|
||||
int max_script = unicharset_->get_script_table_size();
|
||||
int *sid = new int[max_script];
|
||||
int x;
|
||||
for (x = 0; x < max_script; x++)
|
||||
for (x = 0; x < max_script; x++) {
|
||||
sid[x] = 0;
|
||||
}
|
||||
for (x = 0; x < length_; ++x) {
|
||||
int script_id = unicharset_->get_script(unichar_id(x));
|
||||
sid[script_id]++;
|
||||
@ -647,11 +656,14 @@ int WERD_CHOICE::GetTopScriptID() const {
|
||||
// Note that high script ID overrides lower one on a tie, thus biasing
|
||||
// towards non-Common script (if sorted that way in unicharset file).
|
||||
int max_sid = 0;
|
||||
for (x = 1; x < max_script; x++)
|
||||
if (sid[x] >= sid[max_sid])
|
||||
for (x = 1; x < max_script; x++) {
|
||||
if (sid[x] >= sid[max_sid]) {
|
||||
max_sid = x;
|
||||
if (sid[max_sid] < length_ / 2)
|
||||
}
|
||||
}
|
||||
if (sid[max_sid] < length_ / 2) {
|
||||
max_sid = unicharset_->null_sid();
|
||||
}
|
||||
delete[] sid;
|
||||
return max_sid;
|
||||
}
|
||||
@ -711,8 +723,9 @@ void WERD_CHOICE::print(const char *msg) const {
|
||||
// Prints the segmentation state with an introductory message.
|
||||
void WERD_CHOICE::print_state(const char *msg) const {
|
||||
tprintf("%s", msg);
|
||||
for (int i = 0; i < length_; ++i)
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
tprintf(" %d", state_[i]);
|
||||
}
|
||||
tprintf("\n");
|
||||
}
|
||||
|
||||
@ -727,16 +740,18 @@ void WERD_CHOICE::DisplaySegmentation(TWERD *word) {
|
||||
// Check the state against the static prev_drawn_state.
|
||||
static std::vector<int> prev_drawn_state;
|
||||
bool already_done = prev_drawn_state.size() == length_;
|
||||
if (!already_done)
|
||||
if (!already_done) {
|
||||
prev_drawn_state.resize(length_);
|
||||
}
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
if (prev_drawn_state[i] != state_[i]) {
|
||||
already_done = false;
|
||||
}
|
||||
prev_drawn_state[i] = state_[i];
|
||||
}
|
||||
if (already_done || word->blobs.empty())
|
||||
if (already_done || word->blobs.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Create the window if needed.
|
||||
if (segm_window == nullptr) {
|
||||
@ -764,14 +779,16 @@ void WERD_CHOICE::DisplaySegmentation(TWERD *word) {
|
||||
|
||||
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2) {
|
||||
const UNICHARSET *uchset = word1.unicharset();
|
||||
if (word2.unicharset() != uchset)
|
||||
if (word2.unicharset() != uchset) {
|
||||
return false;
|
||||
}
|
||||
int w1start, w1end;
|
||||
word1.punct_stripped(&w1start, &w1end);
|
||||
int w2start, w2end;
|
||||
word2.punct_stripped(&w2start, &w2end);
|
||||
if (w1end - w1start != w2end - w2start)
|
||||
if (w1end - w1start != w2end - w2start) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < w1end - w1start; i++) {
|
||||
if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
|
||||
uchset->to_lower(word2.unichar_id(w2start + i))) {
|
||||
@ -804,8 +821,9 @@ void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings,
|
||||
c_it.set_to_list(ratings);
|
||||
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
|
||||
c_it.data()->print(¤t_unicharset);
|
||||
if (!c_it.at_last())
|
||||
if (!c_it.at_last()) {
|
||||
tprintf("\n");
|
||||
}
|
||||
}
|
||||
tprintf("\n");
|
||||
fflush(stdout);
|
||||
|
@ -297,8 +297,9 @@ public:
|
||||
return state_[index];
|
||||
}
|
||||
ScriptPos BlobPosition(int index) const {
|
||||
if (index < 0 || index >= length_)
|
||||
if (index < 0 || index >= length_) {
|
||||
return SP_NORMAL;
|
||||
}
|
||||
return script_pos_[index];
|
||||
}
|
||||
inline float rating() const {
|
||||
@ -479,16 +480,18 @@ public:
|
||||
// Returns true if any unichar_id in the word is a non-space-delimited char.
|
||||
bool ContainsAnyNonSpaceDelimited() const {
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
if (!unicharset_->IsSpaceDelimited(unichar_ids_[i]))
|
||||
if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if the word is all spaces.
|
||||
bool IsAllSpaces() const {
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
if (unichar_ids_[i] != UNICHAR_SPACE)
|
||||
if (unichar_ids_[i] != UNICHAR_SPACE) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -88,25 +88,29 @@ TBOX TBOX::intersection( // shared area box
|
||||
int16_t right;
|
||||
int16_t top;
|
||||
if (overlap(box)) {
|
||||
if (box.bot_left.x() > bot_left.x())
|
||||
if (box.bot_left.x() > bot_left.x()) {
|
||||
left = box.bot_left.x();
|
||||
else
|
||||
} else {
|
||||
left = bot_left.x();
|
||||
}
|
||||
|
||||
if (box.top_right.x() < top_right.x())
|
||||
if (box.top_right.x() < top_right.x()) {
|
||||
right = box.top_right.x();
|
||||
else
|
||||
} else {
|
||||
right = top_right.x();
|
||||
}
|
||||
|
||||
if (box.bot_left.y() > bot_left.y())
|
||||
if (box.bot_left.y() > bot_left.y()) {
|
||||
bottom = box.bot_left.y();
|
||||
else
|
||||
} else {
|
||||
bottom = bot_left.y();
|
||||
}
|
||||
|
||||
if (box.top_right.y() < top_right.y())
|
||||
if (box.top_right.y() < top_right.y()) {
|
||||
top = box.top_right.y();
|
||||
else
|
||||
} else {
|
||||
top = top_right.y();
|
||||
}
|
||||
} else {
|
||||
left = INT16_MAX;
|
||||
bottom = INT16_MAX;
|
||||
@ -126,25 +130,29 @@ TBOX TBOX::bounding_union( // box enclosing both
|
||||
ICOORD bl; // bottom left
|
||||
ICOORD tr; // top right
|
||||
|
||||
if (box.bot_left.x() < bot_left.x())
|
||||
if (box.bot_left.x() < bot_left.x()) {
|
||||
bl.set_x(box.bot_left.x());
|
||||
else
|
||||
} else {
|
||||
bl.set_x(bot_left.x());
|
||||
}
|
||||
|
||||
if (box.top_right.x() > top_right.x())
|
||||
if (box.top_right.x() > top_right.x()) {
|
||||
tr.set_x(box.top_right.x());
|
||||
else
|
||||
} else {
|
||||
tr.set_x(top_right.x());
|
||||
}
|
||||
|
||||
if (box.bot_left.y() < bot_left.y())
|
||||
if (box.bot_left.y() < bot_left.y()) {
|
||||
bl.set_y(box.bot_left.y());
|
||||
else
|
||||
} else {
|
||||
bl.set_y(bot_left.y());
|
||||
}
|
||||
|
||||
if (box.top_right.y() > top_right.y())
|
||||
if (box.top_right.y() > top_right.y()) {
|
||||
tr.set_y(box.top_right.y());
|
||||
else
|
||||
} else {
|
||||
tr.set_y(top_right.y());
|
||||
}
|
||||
return TBOX(bl, tr);
|
||||
}
|
||||
|
||||
@ -177,19 +185,23 @@ void TBOX::print_to_str(std::string &str) const {
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool TBOX::Serialize(FILE *fp) const {
|
||||
if (!bot_left.Serialize(fp))
|
||||
if (!bot_left.Serialize(fp)) {
|
||||
return false;
|
||||
if (!top_right.Serialize(fp))
|
||||
}
|
||||
if (!top_right.Serialize(fp)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool TBOX::DeSerialize(bool swap, FILE *fp) {
|
||||
if (!bot_left.DeSerialize(swap, fp))
|
||||
if (!bot_left.DeSerialize(swap, fp)) {
|
||||
return false;
|
||||
if (!top_right.DeSerialize(swap, fp))
|
||||
}
|
||||
if (!top_right.DeSerialize(swap, fp)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -202,17 +214,21 @@ bool TBOX::DeSerialize(bool swap, FILE *fp) {
|
||||
TBOX &operator+=( // bounding bounding bx
|
||||
TBOX &op1, // operands
|
||||
const TBOX &op2) {
|
||||
if (op2.bot_left.x() < op1.bot_left.x())
|
||||
if (op2.bot_left.x() < op1.bot_left.x()) {
|
||||
op1.bot_left.set_x(op2.bot_left.x());
|
||||
}
|
||||
|
||||
if (op2.top_right.x() > op1.top_right.x())
|
||||
if (op2.top_right.x() > op1.top_right.x()) {
|
||||
op1.top_right.set_x(op2.top_right.x());
|
||||
}
|
||||
|
||||
if (op2.bot_left.y() < op1.bot_left.y())
|
||||
if (op2.bot_left.y() < op1.bot_left.y()) {
|
||||
op1.bot_left.set_y(op2.bot_left.y());
|
||||
}
|
||||
|
||||
if (op2.top_right.y() > op1.top_right.y())
|
||||
if (op2.top_right.y() > op1.top_right.y()) {
|
||||
op1.top_right.set_y(op2.top_right.y());
|
||||
}
|
||||
|
||||
return op1;
|
||||
}
|
||||
@ -225,17 +241,21 @@ TBOX &operator+=( // bounding bounding bx
|
||||
|
||||
TBOX &operator&=(TBOX &op1, const TBOX &op2) {
|
||||
if (op1.overlap(op2)) {
|
||||
if (op2.bot_left.x() > op1.bot_left.x())
|
||||
if (op2.bot_left.x() > op1.bot_left.x()) {
|
||||
op1.bot_left.set_x(op2.bot_left.x());
|
||||
}
|
||||
|
||||
if (op2.top_right.x() < op1.top_right.x())
|
||||
if (op2.top_right.x() < op1.top_right.x()) {
|
||||
op1.top_right.set_x(op2.top_right.x());
|
||||
}
|
||||
|
||||
if (op2.bot_left.y() > op1.bot_left.y())
|
||||
if (op2.bot_left.y() > op1.bot_left.y()) {
|
||||
op1.bot_left.set_y(op2.bot_left.y());
|
||||
}
|
||||
|
||||
if (op2.top_right.y() < op1.top_right.y())
|
||||
if (op2.top_right.y() < op1.top_right.y()) {
|
||||
op1.top_right.set_y(op2.top_right.y());
|
||||
}
|
||||
} else {
|
||||
op1.bot_left.set_x(INT16_MAX);
|
||||
op1.bot_left.set_y(INT16_MAX);
|
||||
|
@ -115,24 +115,27 @@ public:
|
||||
}
|
||||
|
||||
int16_t height() const { // how high is it?
|
||||
if (!null_box())
|
||||
if (!null_box()) {
|
||||
return top_right.y() - bot_left.y();
|
||||
else
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t width() const { // how high is it?
|
||||
if (!null_box())
|
||||
if (!null_box()) {
|
||||
return top_right.x() - bot_left.x();
|
||||
else
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t area() const { // what is the area?
|
||||
if (!null_box())
|
||||
if (!null_box()) {
|
||||
return width() * height();
|
||||
else
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Pads the box on either side by the supplied x,y pad amounts.
|
||||
@ -372,13 +375,15 @@ inline bool TBOX::major_overlap( // Do boxes overlap more that half.
|
||||
int overlap = std::min(box.top_right.x(), top_right.x());
|
||||
overlap -= std::max(box.bot_left.x(), bot_left.x());
|
||||
overlap += overlap;
|
||||
if (overlap < std::min(box.width(), width()))
|
||||
if (overlap < std::min(box.width(), width())) {
|
||||
return false;
|
||||
}
|
||||
overlap = std::min(box.top_right.y(), top_right.y());
|
||||
overlap -= std::max(box.bot_left.y(), bot_left.y());
|
||||
overlap += overlap;
|
||||
if (overlap < std::min(box.height(), height()))
|
||||
if (overlap < std::min(box.height(), height())) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -460,10 +465,11 @@ inline double TBOX::x_overlap_fraction(const TBOX &other) const {
|
||||
int width = right() - left();
|
||||
if (width == 0) {
|
||||
int x = left();
|
||||
if (other.left() <= x && x <= other.right())
|
||||
if (other.left() <= x && x <= other.right()) {
|
||||
return 1.0;
|
||||
else
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
} else {
|
||||
return std::max(0.0, static_cast<double>(high - low) / width);
|
||||
}
|
||||
@ -482,10 +488,11 @@ inline double TBOX::y_overlap_fraction(const TBOX &other) const {
|
||||
int height = top() - bottom();
|
||||
if (height == 0) {
|
||||
int y = bottom();
|
||||
if (other.bottom() <= y && y <= other.top())
|
||||
if (other.bottom() <= y && y <= other.top()) {
|
||||
return 1.0;
|
||||
else
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
} else {
|
||||
return std::max(0.0, static_cast<double>(high - low) / height);
|
||||
}
|
||||
|
@ -57,11 +57,12 @@ bool REJ::rej_before_quality_accept() {
|
||||
}
|
||||
|
||||
bool REJ::rejected() { // Is char rejected?
|
||||
if (flag(R_MINIMAL_REJ_ACCEPT))
|
||||
if (flag(R_MINIMAL_REJ_ACCEPT)) {
|
||||
return false;
|
||||
else
|
||||
} else {
|
||||
return (perm_rejected() || rej_between_quality_and_minimal_rej_accept() ||
|
||||
(!flag(R_QUALITY_ACCEPT) && rej_before_quality_accept()));
|
||||
}
|
||||
}
|
||||
|
||||
bool REJ::accept_if_good_quality() { // potential rej?
|
||||
@ -230,24 +231,27 @@ int16_t REJMAP::accept_count() { // How many accepted?
|
||||
int16_t count = 0;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (ptr[i].accepted())
|
||||
if (ptr[i].accepted()) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
bool REJMAP::recoverable_rejects() { // Any non perm rejs?
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (ptr[i].recoverable())
|
||||
if (ptr[i].recoverable()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool REJMAP::quality_recoverable_rejects() { // Any potential rejs?
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (ptr[i].accept_if_good_quality())
|
||||
if (ptr[i].accept_if_good_quality()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -260,8 +264,9 @@ void REJMAP::remove_pos( // Cut out an element
|
||||
ASSERT_HOST(len > 0);
|
||||
|
||||
len--;
|
||||
for (; pos < len; pos++)
|
||||
for (; pos < len; pos++) {
|
||||
ptr[pos] = ptr[pos + 1];
|
||||
}
|
||||
}
|
||||
|
||||
void REJMAP::print(FILE *fp) {
|
||||
@ -304,8 +309,9 @@ void REJMAP::rej_word_not_tess_accepted() { // Reject whole word
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (ptr[i].accepted())
|
||||
if (ptr[i].accepted()) {
|
||||
ptr[i].setrej_not_tess_accepted();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -313,8 +319,9 @@ void REJMAP::rej_word_contains_blanks() { // Reject whole word
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (ptr[i].accepted())
|
||||
if (ptr[i].accepted()) {
|
||||
ptr[i].setrej_contains_blanks();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -322,8 +329,9 @@ void REJMAP::rej_word_bad_permuter() { // Reject whole word
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (ptr[i].accepted())
|
||||
if (ptr[i].accepted()) {
|
||||
ptr[i].setrej_bad_permuter();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -331,8 +339,9 @@ void REJMAP::rej_word_xht_fixup() { // Reject whole word
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (ptr[i].accepted())
|
||||
if (ptr[i].accepted()) {
|
||||
ptr[i].setrej_xht_fixup();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -340,8 +349,9 @@ void REJMAP::rej_word_no_alphanums() { // Reject whole word
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (ptr[i].accepted())
|
||||
if (ptr[i].accepted()) {
|
||||
ptr[i].setrej_no_alphanums();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -349,8 +359,9 @@ void REJMAP::rej_word_mostly_rej() { // Reject whole word
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (ptr[i].accepted())
|
||||
if (ptr[i].accepted()) {
|
||||
ptr[i].setrej_mostly_rej();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -358,8 +369,9 @@ void REJMAP::rej_word_bad_quality() { // Reject whole word
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (ptr[i].accepted())
|
||||
if (ptr[i].accepted()) {
|
||||
ptr[i].setrej_bad_quality();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -367,8 +379,9 @@ void REJMAP::rej_word_doc_rej() { // Reject whole word
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (ptr[i].accepted())
|
||||
if (ptr[i].accepted()) {
|
||||
ptr[i].setrej_doc_rej();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -376,8 +389,9 @@ void REJMAP::rej_word_block_rej() { // Reject whole word
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (ptr[i].accepted())
|
||||
if (ptr[i].accepted()) {
|
||||
ptr[i].setrej_block_rej();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -385,8 +399,9 @@ void REJMAP::rej_word_row_rej() { // Reject whole word
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
if (ptr[i].accepted())
|
||||
if (ptr[i].accepted()) {
|
||||
ptr[i].setrej_row_rej();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -102,10 +102,11 @@ class REJ {
|
||||
BITS16 flags2;
|
||||
|
||||
void set_flag(REJ_FLAGS rej_flag) {
|
||||
if (rej_flag < 16)
|
||||
if (rej_flag < 16) {
|
||||
flags1.set(rej_flag);
|
||||
else
|
||||
} else {
|
||||
flags2.set(rej_flag - 16);
|
||||
}
|
||||
}
|
||||
|
||||
bool rej_before_nn_accept();
|
||||
@ -128,21 +129,23 @@ public:
|
||||
const REJ &source) = default;
|
||||
|
||||
bool flag(REJ_FLAGS rej_flag) {
|
||||
if (rej_flag < 16)
|
||||
if (rej_flag < 16) {
|
||||
return flags1[rej_flag];
|
||||
else
|
||||
} else {
|
||||
return flags2[rej_flag - 16];
|
||||
}
|
||||
}
|
||||
|
||||
char display_char() {
|
||||
if (perm_rejected())
|
||||
if (perm_rejected()) {
|
||||
return MAP_REJECT_PERM;
|
||||
else if (accept_if_good_quality())
|
||||
} else if (accept_if_good_quality()) {
|
||||
return MAP_REJECT_POTENTIAL;
|
||||
else if (rejected())
|
||||
} else if (rejected()) {
|
||||
return MAP_REJECT_TEMP;
|
||||
else
|
||||
} else {
|
||||
return MAP_ACCEPT;
|
||||
}
|
||||
}
|
||||
|
||||
bool perm_rejected(); // Is char perm reject?
|
||||
|
@ -54,14 +54,17 @@ bool SEAM::IsHealthy(const TBLOB &blob, int min_points, int min_area) const {
|
||||
bool SEAM::PrepareToInsertSeam(const std::vector<SEAM *> &seams,
|
||||
const std::vector<TBLOB *> &blobs, int insert_index, bool modify) {
|
||||
for (int s = 0; s < insert_index; ++s) {
|
||||
if (!seams[s]->FindBlobWidth(blobs, s, modify))
|
||||
if (!seams[s]->FindBlobWidth(blobs, s, modify)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!FindBlobWidth(blobs, insert_index, modify))
|
||||
if (!FindBlobWidth(blobs, insert_index, modify)) {
|
||||
return false;
|
||||
for (int s = insert_index; s < seams.size(); ++s) {
|
||||
if (!seams[s]->FindBlobWidth(blobs, s + 1, modify))
|
||||
}
|
||||
for (unsigned s = insert_index; s < seams.size(); ++s) {
|
||||
if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -78,19 +81,22 @@ bool SEAM::FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modi
|
||||
const SPLIT &split = splits_[s];
|
||||
bool found_split = split.ContainedByBlob(*blobs[index]);
|
||||
// Look right.
|
||||
for (int b = index + 1; !found_split && b < blobs.size(); ++b) {
|
||||
for (unsigned b = index + 1; !found_split && b < blobs.size(); ++b) {
|
||||
found_split = split.ContainedByBlob(*blobs[b]);
|
||||
if (found_split && b - index > widthp_ && modify)
|
||||
if (found_split && b - index > widthp_ && modify) {
|
||||
widthp_ = b - index;
|
||||
}
|
||||
}
|
||||
// Look left.
|
||||
for (int b = index - 1; !found_split && b >= 0; --b) {
|
||||
found_split = split.ContainedByBlob(*blobs[b]);
|
||||
if (found_split && index - b > widthn_ && modify)
|
||||
if (found_split && index - b > widthn_ && modify) {
|
||||
widthn_ = index - b;
|
||||
}
|
||||
}
|
||||
if (found_split)
|
||||
if (found_split) {
|
||||
++num_found;
|
||||
}
|
||||
}
|
||||
return num_found == num_splits_;
|
||||
}
|
||||
@ -120,8 +126,9 @@ void SEAM::UndoSeam(TBLOB *blob, TBLOB *other_blob) const {
|
||||
}
|
||||
|
||||
TESSLINE *outline = blob->outlines;
|
||||
while (outline->next)
|
||||
while (outline->next) {
|
||||
outline = outline->next;
|
||||
}
|
||||
outline->next = other_blob->outlines;
|
||||
other_blob->outlines = nullptr;
|
||||
delete other_blob;
|
||||
@ -139,8 +146,9 @@ void SEAM::Print(const char *label) const {
|
||||
tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y, widthp_, widthn_);
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
splits_[s].Print();
|
||||
if (s + 1 < num_splits_)
|
||||
if (s + 1 < num_splits_) {
|
||||
tprintf(", ");
|
||||
}
|
||||
}
|
||||
tprintf("\n");
|
||||
}
|
||||
@ -150,8 +158,8 @@ void SEAM::Print(const char *label) const {
|
||||
void SEAM::PrintSeams(const char *label, const std::vector<SEAM *> &seams) {
|
||||
if (!seams.empty()) {
|
||||
tprintf("%s\n", label);
|
||||
for (int x = 0; x < seams.size(); ++x) {
|
||||
tprintf("%2d: ", x);
|
||||
for (unsigned x = 0; x < seams.size(); ++x) {
|
||||
tprintf("%2u: ", x);
|
||||
seams[x]->Print("");
|
||||
}
|
||||
tprintf("\n");
|
||||
@ -161,8 +169,9 @@ void SEAM::PrintSeams(const char *label, const std::vector<SEAM *> &seams) {
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
// Draws the seam in the given window.
|
||||
void SEAM::Mark(ScrollView *window) const {
|
||||
for (int s = 0; s < num_splits_; ++s)
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
splits_[s].Mark(window);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -171,8 +180,9 @@ void SEAM::Mark(ScrollView *window) const {
|
||||
/* static */
|
||||
void SEAM::BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
|
||||
int first, int last) {
|
||||
for (int x = first; x < last; ++x)
|
||||
for (int x = first; x < last; ++x) {
|
||||
seams[x]->Reveal();
|
||||
}
|
||||
|
||||
TESSLINE *outline = blobs[first]->outlines;
|
||||
int next_blob = first + 1;
|
||||
@ -194,15 +204,18 @@ void SEAM::BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB
|
||||
void SEAM::JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
|
||||
int first, int last) {
|
||||
TESSLINE *outline = blobs[first]->outlines;
|
||||
if (!outline)
|
||||
if (!outline) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (int x = first; x < last; ++x) {
|
||||
SEAM *seam = seams[x];
|
||||
if (x - seam->widthn_ >= first && x + seam->widthp_ < last)
|
||||
if (x - seam->widthn_ >= first && x + seam->widthp_ < last) {
|
||||
seam->Hide();
|
||||
while (outline->next)
|
||||
}
|
||||
while (outline->next) {
|
||||
outline = outline->next;
|
||||
}
|
||||
outline->next = blobs[x + 1]->outlines;
|
||||
}
|
||||
}
|
||||
@ -224,8 +237,9 @@ void SEAM::Reveal() const {
|
||||
// Computes and returns, but does not set, the full priority of *this SEAM.
|
||||
float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
|
||||
double center_knob, double width_change_knob) const {
|
||||
if (num_splits_ == 0)
|
||||
if (num_splits_ == 0) {
|
||||
return 0.0f;
|
||||
}
|
||||
for (int s = 1; s < num_splits_; ++s) {
|
||||
splits_[s].SplitOutline();
|
||||
}
|
||||
|
@ -71,15 +71,17 @@ public:
|
||||
location_ += other.location_;
|
||||
location_ /= 2;
|
||||
|
||||
for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s)
|
||||
for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s) {
|
||||
splits_[num_splits_++] = other.splits_[s];
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the given blob contains all splits of *this SEAM.
|
||||
bool ContainedByBlob(const TBLOB &blob) const {
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
if (!splits_[s].ContainedByBlob(blob))
|
||||
if (!splits_[s].ContainedByBlob(blob)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -88,17 +90,20 @@ public:
|
||||
// the EDGEPT pointer, not the coordinates.
|
||||
bool UsesPoint(const EDGEPT *point) const {
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
if (splits_[s].UsesPoint(point))
|
||||
if (splits_[s].UsesPoint(point)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Returns true if *this and other share any common point, by coordinates.
|
||||
bool SharesPosition(const SEAM &other) const {
|
||||
for (int s = 0; s < num_splits_; ++s) {
|
||||
for (int t = 0; t < other.num_splits_; ++t)
|
||||
if (splits_[s].SharesPosition(other.splits_[t]))
|
||||
for (int t = 0; t < other.num_splits_; ++t) {
|
||||
if (splits_[s].SharesPosition(other.splits_[t])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -108,8 +113,9 @@ public:
|
||||
TBOX split1_box = splits_[s].bounding_box();
|
||||
for (int t = 0; t < other.num_splits_; ++t) {
|
||||
TBOX split2_box = other.splits_[t].bounding_box();
|
||||
if (split1_box.y_overlap(split2_box))
|
||||
if (split1_box.y_overlap(split2_box)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
@ -77,8 +77,9 @@ float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, int centered_
|
||||
TBOX box2 = Box21();
|
||||
int min_left = std::min(box1.left(), box2.left());
|
||||
int max_right = std::max(box1.right(), box2.right());
|
||||
if (xmin < min_left && xmax > max_right)
|
||||
if (xmin < min_left && xmax > max_right) {
|
||||
return kBadPriority;
|
||||
}
|
||||
|
||||
float grade = 0.0f;
|
||||
// grade_overlap.
|
||||
@ -89,10 +90,12 @@ float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, int centered_
|
||||
if (overlap == min_width) {
|
||||
grade += 100.0f; // Total overlap.
|
||||
} else {
|
||||
if (2 * overlap > min_width)
|
||||
if (2 * overlap > min_width) {
|
||||
overlap += 2 * overlap - min_width;
|
||||
if (overlap > 0)
|
||||
}
|
||||
if (overlap > 0) {
|
||||
grade += overlap_knob * overlap;
|
||||
}
|
||||
}
|
||||
// grade_center_of_blob.
|
||||
if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
|
||||
@ -100,8 +103,9 @@ float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, int centered_
|
||||
}
|
||||
// grade_width_change.
|
||||
float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2));
|
||||
if (width_change_grade > 0.0f)
|
||||
if (width_change_grade > 0.0f) {
|
||||
grade += width_change_grade * width_change_knob;
|
||||
}
|
||||
return grade;
|
||||
}
|
||||
|
||||
@ -228,8 +232,9 @@ void SPLIT::Mark(ScrollView *window) const {
|
||||
// Inserts the resulting outlines into the given list.
|
||||
void SPLIT::SplitOutlineList(TESSLINE *outlines) const {
|
||||
SplitOutline();
|
||||
while (outlines->next != nullptr)
|
||||
while (outlines->next != nullptr) {
|
||||
outlines = outlines->next;
|
||||
}
|
||||
|
||||
outlines->next = new TESSLINE;
|
||||
outlines->next->loop = point1;
|
||||
|
@ -77,8 +77,9 @@ bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
|
||||
**********************************************************************/
|
||||
void STATS::clear() { // clear out buckets
|
||||
total_count_ = 0;
|
||||
if (buckets_ != nullptr)
|
||||
if (buckets_ != nullptr) {
|
||||
memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
@ -157,8 +158,9 @@ double STATS::sd() const { // standard deviation
|
||||
}
|
||||
double variance = static_cast<double>(sum) / total_count_;
|
||||
variance = sqsum / total_count_ - variance * variance;
|
||||
if (variance > 0.0)
|
||||
if (variance > 0.0) {
|
||||
return sqrt(variance);
|
||||
}
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
@ -184,8 +186,9 @@ double STATS::ile(double frac) const {
|
||||
#endif
|
||||
int sum = 0;
|
||||
int index = 0;
|
||||
for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++])
|
||||
for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
|
||||
;
|
||||
}
|
||||
if (index > 0) {
|
||||
ASSERT_HOST(buckets_[index - 1] > 0);
|
||||
return rangemin_ + index - static_cast<double>(sum - target) / buckets_[index - 1];
|
||||
@ -204,8 +207,9 @@ int32_t STATS::min_bucket() const { // Find min
|
||||
return rangemin_;
|
||||
}
|
||||
int32_t min = 0;
|
||||
for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++)
|
||||
for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
|
||||
;
|
||||
}
|
||||
return rangemin_ + min;
|
||||
}
|
||||
|
||||
@ -220,8 +224,9 @@ int32_t STATS::max_bucket() const { // Find max
|
||||
return rangemin_;
|
||||
}
|
||||
int32_t max;
|
||||
for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--)
|
||||
for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--) {
|
||||
;
|
||||
}
|
||||
return rangemin_ + max;
|
||||
}
|
||||
|
||||
@ -244,11 +249,13 @@ double STATS::median() const { // get median
|
||||
int32_t min_pile;
|
||||
int32_t max_pile;
|
||||
/* Find preceding non zero pile */
|
||||
for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--)
|
||||
for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--) {
|
||||
;
|
||||
}
|
||||
/* Find following non zero pile */
|
||||
for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++)
|
||||
for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++) {
|
||||
;
|
||||
}
|
||||
median = (min_pile + max_pile) / 2.0;
|
||||
}
|
||||
return median;
|
||||
@ -264,19 +271,24 @@ bool STATS::local_min(int32_t x) const {
|
||||
return false;
|
||||
}
|
||||
x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
|
||||
if (buckets_[x] == 0)
|
||||
if (buckets_[x] == 0) {
|
||||
return true;
|
||||
}
|
||||
int32_t index; // table index
|
||||
for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index)
|
||||
for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index) {
|
||||
;
|
||||
if (index >= 0 && buckets_[index] < buckets_[x])
|
||||
}
|
||||
if (index >= 0 && buckets_[index] < buckets_[x]) {
|
||||
return false;
|
||||
for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index)
|
||||
}
|
||||
for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
|
||||
;
|
||||
if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x])
|
||||
}
|
||||
if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
|
||||
return false;
|
||||
else
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
@ -297,10 +309,12 @@ void STATS::smooth(int32_t factor) {
|
||||
// centre weight
|
||||
int count = buckets_[entry] * factor;
|
||||
for (int offset = 1; offset < factor; offset++) {
|
||||
if (entry - offset >= 0)
|
||||
if (entry - offset >= 0) {
|
||||
count += buckets_[entry - offset] * (factor - offset);
|
||||
if (entry + offset < entrycount)
|
||||
}
|
||||
if (entry + offset < entrycount) {
|
||||
count += buckets_[entry + offset] * (factor - offset);
|
||||
}
|
||||
}
|
||||
result.add(entry + rangemin_, count);
|
||||
}
|
||||
@ -335,8 +349,9 @@ int32_t STATS::cluster(float lower, // thresholds
|
||||
float min_dist; // from best_cluster
|
||||
int32_t cluster_count; // no of clusters
|
||||
|
||||
if (buckets_ == nullptr || max_clusters < 1)
|
||||
if (buckets_ == nullptr || max_clusters < 1) {
|
||||
return 0;
|
||||
}
|
||||
centres = new float[max_clusters + 1];
|
||||
for (cluster_count = 1;
|
||||
cluster_count <= max_clusters && clusters[cluster_count].buckets_ != nullptr &&
|
||||
@ -380,8 +395,9 @@ int32_t STATS::cluster(float lower, // thresholds
|
||||
for (cluster = 1; cluster <= cluster_count; cluster++) {
|
||||
dist = entry + rangemin_ - centres[cluster];
|
||||
// find distance
|
||||
if (dist < 0)
|
||||
if (dist < 0) {
|
||||
dist = -dist;
|
||||
}
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist; // find least
|
||||
best_cluster = cluster;
|
||||
@ -463,8 +479,9 @@ static bool GatherPeak(int index, const int *src_buckets, int *used_buckets, int
|
||||
// more useful than decreasing total count.
|
||||
// Returns the actual number of modes found.
|
||||
int STATS::top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes) const {
|
||||
if (max_modes <= 0)
|
||||
if (max_modes <= 0) {
|
||||
return 0;
|
||||
}
|
||||
int src_count = rangemax_ - rangemin_;
|
||||
// Used copies the counts in buckets_ as they get used.
|
||||
STATS used(rangemin_, rangemax_);
|
||||
@ -493,23 +510,27 @@ int STATS::top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes)
|
||||
int prev_pile = max_count;
|
||||
for (int offset = 1; max_index + offset < src_count; ++offset) {
|
||||
if (!GatherPeak(max_index + offset, buckets_, used.buckets_, &prev_pile, &total_count,
|
||||
&total_value))
|
||||
&total_value)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
prev_pile = buckets_[max_index];
|
||||
for (int offset = 1; max_index - offset >= 0; ++offset) {
|
||||
if (!GatherPeak(max_index - offset, buckets_, used.buckets_, &prev_pile, &total_count,
|
||||
&total_value))
|
||||
&total_value)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (total_count > least_count || modes.size() < max_modes) {
|
||||
// We definitely want this mode, so if we have enough discard the least.
|
||||
if (modes.size() == max_modes)
|
||||
if (modes.size() == max_modes) {
|
||||
modes.resize(max_modes - 1);
|
||||
}
|
||||
int target_index = 0;
|
||||
// Linear search for the target insertion point.
|
||||
while (target_index < modes.size() && modes[target_index].data() >= total_count)
|
||||
while (target_index < modes.size() && modes[target_index].data() >= total_count) {
|
||||
++target_index;
|
||||
}
|
||||
auto peak_mean = static_cast<float>(total_value / total_count + rangemin_);
|
||||
modes.insert(modes.begin() + target_index, KDPairInc<float, int>(peak_mean, total_count));
|
||||
least_count = modes.back().data();
|
||||
@ -535,8 +556,9 @@ void STATS::print() const {
|
||||
for (int index = min; index <= max; index++) {
|
||||
if (buckets_[index] != 0) {
|
||||
tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]);
|
||||
if (++num_printed % 8 == 0)
|
||||
if (++num_printed % 8 == 0) {
|
||||
tprintf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
tprintf("\n");
|
||||
|
@ -73,10 +73,12 @@ public:
|
||||
double median() const; // get median of samples
|
||||
// Returns the count of the given value.
|
||||
int32_t pile_count(int32_t value) const {
|
||||
if (value <= rangemin_)
|
||||
if (value <= rangemin_) {
|
||||
return buckets_[0];
|
||||
if (value >= rangemax_ - 1)
|
||||
}
|
||||
if (value >= rangemax_ - 1) {
|
||||
return buckets_[rangemax_ - rangemin_ - 1];
|
||||
}
|
||||
return buckets_[value - rangemin_];
|
||||
}
|
||||
// Returns the total count of all buckets.
|
||||
|
@ -71,8 +71,9 @@ static void position_outline( // put in place
|
||||
dest_outline = it.extract();
|
||||
child_it.add_to_end(dest_outline);
|
||||
// make it a child
|
||||
if (it.empty())
|
||||
if (it.empty()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return; // finished
|
||||
@ -110,8 +111,9 @@ static void plot_outline_list( // draw outlines
|
||||
outline = it.data();
|
||||
// draw it
|
||||
outline->plot(window, colour);
|
||||
if (!outline->child()->empty())
|
||||
if (!outline->child()->empty()) {
|
||||
plot_outline_list(outline->child(), window, child_colour, child_colour);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Draws the outlines in the given colour, and child_colour, normalized
|
||||
@ -124,8 +126,9 @@ static void plot_normed_outline_list(const DENORM &denorm, C_OUTLINE_LIST *list,
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
C_OUTLINE *outline = it.data();
|
||||
outline->plot_normed(denorm, colour, window);
|
||||
if (!outline->child()->empty())
|
||||
if (!outline->child()->empty()) {
|
||||
plot_normed_outline_list(denorm, outline->child(), child_colour, child_colour, window);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -143,8 +146,9 @@ static void reverse_outline_list(C_OUTLINE_LIST *list) {
|
||||
C_OUTLINE *outline = it.data();
|
||||
outline->reverse(); // reverse it
|
||||
outline->set_flag(COUT_INVERSE, true);
|
||||
if (!outline->child()->empty())
|
||||
if (!outline->child()->empty()) {
|
||||
reverse_outline_list(outline->child());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -205,10 +209,11 @@ void C_BLOB::ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_
|
||||
// Set inverse flag and reverse if needed.
|
||||
blob->CheckInverseFlagAndDirection();
|
||||
// Put on appropriate list.
|
||||
if (!blob_is_good && bad_blobs_it != nullptr)
|
||||
if (!blob_is_good && bad_blobs_it != nullptr) {
|
||||
bad_blobs_it->add_after_then_move(blob);
|
||||
else
|
||||
} else {
|
||||
good_blobs_it->add_after_then_move(blob);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -346,8 +351,9 @@ void C_BLOB::move( // reposition blob
|
||||
) {
|
||||
C_OUTLINE_IT it(&outlines); // iterator
|
||||
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
it.data()->move(vec); // move each outline
|
||||
}
|
||||
}
|
||||
|
||||
// Static helper for C_BLOB::rotate to allow recursion of child outlines.
|
||||
@ -386,12 +392,14 @@ static void ComputeEdgeOffsetsOutlineList(int threshold, Pix *pix, C_OUTLINE_LIS
|
||||
C_OUTLINE_IT it(list);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
C_OUTLINE *outline = it.data();
|
||||
if (pix != nullptr && pixGetDepth(pix) == 8)
|
||||
if (pix != nullptr && pixGetDepth(pix) == 8) {
|
||||
outline->ComputeEdgeOffsets(threshold, pix);
|
||||
else
|
||||
} else {
|
||||
outline->ComputeBinaryOffsets();
|
||||
if (!outline->child()->empty())
|
||||
}
|
||||
if (!outline->child()->empty()) {
|
||||
ComputeEdgeOffsetsOutlineList(threshold, pix, outline->child());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -420,8 +428,9 @@ int16_t C_BLOB::EstimateBaselinePosition() {
|
||||
int left = box.left();
|
||||
int width = box.width();
|
||||
int bottom = box.bottom();
|
||||
if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio)
|
||||
if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio) {
|
||||
return bottom; // This is only for non-CJK blobs.
|
||||
}
|
||||
// Get the minimum y coordinate at each x-coordinate.
|
||||
std::vector<int> y_mins;
|
||||
y_mins.resize(width + 1, box.top());
|
||||
@ -430,16 +439,18 @@ int16_t C_BLOB::EstimateBaselinePosition() {
|
||||
C_OUTLINE *outline = it.data();
|
||||
ICOORD pos = outline->start_pos();
|
||||
for (int s = 0; s < outline->pathlength(); ++s) {
|
||||
if (pos.y() < y_mins[pos.x() - left])
|
||||
if (pos.y() < y_mins[pos.x() - left]) {
|
||||
y_mins[pos.x() - left] = pos.y();
|
||||
}
|
||||
pos += outline->step(s);
|
||||
}
|
||||
}
|
||||
// Find the total extent of the bottom or bottom + 1.
|
||||
int bottom_extent = 0;
|
||||
for (int x = 0; x <= width; ++x) {
|
||||
if (y_mins[x] == bottom || y_mins[x] == bottom + 1)
|
||||
if (y_mins[x] == bottom || y_mins[x] == bottom + 1) {
|
||||
++bottom_extent;
|
||||
}
|
||||
}
|
||||
// Find the lowest run longer than the bottom extent that is not the bottom.
|
||||
int best_min = box.top();
|
||||
@ -450,21 +461,24 @@ int16_t C_BLOB::EstimateBaselinePosition() {
|
||||
// Find the length of the current run.
|
||||
int y_at_x = y_mins[x];
|
||||
int run = 1;
|
||||
while (x + run <= width && y_mins[x + run] == y_at_x)
|
||||
while (x + run <= width && y_mins[x + run] == y_at_x) {
|
||||
++run;
|
||||
}
|
||||
if (y_at_x > bottom + 1) {
|
||||
// Possible contender.
|
||||
int total_run = run;
|
||||
// Find extent of current value or +1 to the right of x.
|
||||
while (x + total_run <= width &&
|
||||
(y_mins[x + total_run] == y_at_x || y_mins[x + total_run] == y_at_x + 1))
|
||||
(y_mins[x + total_run] == y_at_x || y_mins[x + total_run] == y_at_x + 1)) {
|
||||
++total_run;
|
||||
}
|
||||
// At least one end has to be higher so it is not a local max.
|
||||
if (prev_prev_y > y_at_x + 1 || x + total_run > width || y_mins[x + total_run] > y_at_x + 1) {
|
||||
// If the prev_run is at y + 1, then we can add that too. There cannot
|
||||
// be a suitable run at y before that or we would have found it already.
|
||||
if (prev_run > 0 && prev_y == y_at_x + 1)
|
||||
if (prev_run > 0 && prev_y == y_at_x + 1) {
|
||||
total_run += prev_run;
|
||||
}
|
||||
if (total_run > bottom_extent && y_at_x < best_min) {
|
||||
best_min = y_at_x;
|
||||
}
|
||||
@ -482,8 +496,9 @@ static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Pix *pi
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
C_OUTLINE *outline = it.data();
|
||||
outline->render(left, top, pix);
|
||||
if (!outline->child()->empty())
|
||||
if (!outline->child()->empty()) {
|
||||
render_outline_list(outline->child(), left, top, pix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -108,8 +108,9 @@ public:
|
||||
#endif // !GRAPHICS_DISABLED
|
||||
|
||||
C_BLOB &operator=(const C_BLOB &source) {
|
||||
if (!outlines.empty())
|
||||
if (!outlines.empty()) {
|
||||
outlines.clear();
|
||||
}
|
||||
outlines.deep_copy(&source.outlines, &C_OUTLINE::deep_copy);
|
||||
return *this;
|
||||
}
|
||||
|
@ -48,8 +48,9 @@ T& uniqueInstance(std::unique_ptr<T> new_instance = nullptr)
|
||||
{
|
||||
static std::unique_ptr<T> _instance = std::make_unique<T>();
|
||||
|
||||
if(new_instance)
|
||||
if (new_instance) {
|
||||
_instance = std::move(new_instance);
|
||||
}
|
||||
|
||||
return *_instance.get();
|
||||
}
|
||||
|
@ -69,8 +69,9 @@ WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text)
|
||||
with the concencus onto the reject list.
|
||||
*/
|
||||
start_it.set_to_list(&cblobs);
|
||||
if (start_it.empty())
|
||||
if (start_it.empty()) {
|
||||
return;
|
||||
}
|
||||
for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
|
||||
bool reject_blob = false;
|
||||
bool blob_inverted;
|
||||
@ -84,22 +85,25 @@ WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text)
|
||||
if (reject_blob) {
|
||||
rej_cblob_it.add_after_then_move(start_it.extract());
|
||||
} else {
|
||||
if (blob_inverted)
|
||||
if (blob_inverted) {
|
||||
inverted_vote++;
|
||||
else
|
||||
} else {
|
||||
non_inverted_vote++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
flags.set(W_INVERSE, (inverted_vote > non_inverted_vote));
|
||||
|
||||
start_it.set_to_list(&cblobs);
|
||||
if (start_it.empty())
|
||||
if (start_it.empty()) {
|
||||
return;
|
||||
}
|
||||
for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
|
||||
c_outline_it.set_to_list(start_it.data()->out_list());
|
||||
if (c_outline_it.data()->flag(COUT_INVERSE) != flags[W_INVERSE])
|
||||
if (c_outline_it.data()->flag(COUT_INVERSE) != flags[W_INVERSE]) {
|
||||
rej_cblob_it.add_after_then_move(start_it.extract());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -116,8 +120,9 @@ WERD::WERD(C_BLOB_LIST *blob_list, ///< In word order
|
||||
C_BLOB_IT start_it = blob_list; // iterator
|
||||
C_BLOB_IT end_it = blob_list; // another
|
||||
|
||||
while (!end_it.at_last())
|
||||
while (!end_it.at_last()) {
|
||||
end_it.forward(); // move to last
|
||||
}
|
||||
(reinterpret_cast<C_BLOB_LIST *>(&cblobs))->assign_to_sublist(&start_it, &end_it);
|
||||
// move to our list
|
||||
blanks = clone->blanks;
|
||||
@ -191,8 +196,9 @@ TBOX WERD::true_bounding_box() const {
|
||||
void WERD::move(const ICOORD vec) {
|
||||
C_BLOB_IT cblob_it(&cblobs); // cblob iterator
|
||||
|
||||
for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward())
|
||||
for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) {
|
||||
cblob_it.data()->move(vec);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -293,8 +299,9 @@ void WERD::plot(ScrollView *window, ScrollView::Color colour) {
|
||||
// Get the next color in the (looping) rainbow.
|
||||
ScrollView::Color WERD::NextColor(ScrollView::Color colour) {
|
||||
auto next = static_cast<ScrollView::Color>(colour + 1);
|
||||
if (next >= LAST_COLOUR || next < FIRST_COLOUR)
|
||||
if (next >= LAST_COLOUR || next < FIRST_COLOUR) {
|
||||
next = FIRST_COLOUR;
|
||||
}
|
||||
return next;
|
||||
}
|
||||
|
||||
@ -355,12 +362,14 @@ WERD &WERD::operator=(const WERD &source) {
|
||||
flags = source.flags;
|
||||
script_id_ = source.script_id_;
|
||||
correct = source.correct;
|
||||
if (!cblobs.empty())
|
||||
if (!cblobs.empty()) {
|
||||
cblobs.clear();
|
||||
}
|
||||
cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy);
|
||||
|
||||
if (!rej_cblobs.empty())
|
||||
if (!rej_cblobs.empty()) {
|
||||
rej_cblobs.clear();
|
||||
}
|
||||
rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy);
|
||||
return *this;
|
||||
}
|
||||
@ -495,8 +504,9 @@ void WERD::CleanNoise(float size_threshold) {
|
||||
rej_it.add_after_then_move(rej_blob);
|
||||
}
|
||||
}
|
||||
if (blob->out_list()->empty())
|
||||
if (blob->out_list()->empty()) {
|
||||
delete blob_it.extract();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -525,13 +535,15 @@ bool WERD::AddSelectedOutlines(const std::vector<bool> &wanted,
|
||||
const std::vector<C_OUTLINE *> &outlines,
|
||||
bool *make_next_word_fuzzy) {
|
||||
bool outline_added_to_start = false;
|
||||
if (make_next_word_fuzzy != nullptr)
|
||||
if (make_next_word_fuzzy != nullptr) {
|
||||
*make_next_word_fuzzy = false;
|
||||
}
|
||||
C_BLOB_IT rej_it(&rej_cblobs);
|
||||
for (int i = 0; i < outlines.size(); ++i) {
|
||||
C_OUTLINE *outline = outlines[i];
|
||||
if (outline == nullptr)
|
||||
if (outline == nullptr) {
|
||||
continue; // Already used it.
|
||||
}
|
||||
if (wanted[i]) {
|
||||
C_BLOB *target_blob = target_blobs[i];
|
||||
TBOX noise_box = outline->bounding_box();
|
||||
@ -553,8 +565,9 @@ bool WERD::AddSelectedOutlines(const std::vector<bool> &wanted,
|
||||
}
|
||||
if (blob_it.cycled_list()) {
|
||||
blob_it.add_to_end(target_blob);
|
||||
if (make_next_word_fuzzy != nullptr)
|
||||
if (make_next_word_fuzzy != nullptr) {
|
||||
*make_next_word_fuzzy = true;
|
||||
}
|
||||
}
|
||||
// Add all consecutive wanted, but null-blob outlines to same blob.
|
||||
C_OUTLINE_IT ol_it(target_blob->out_list());
|
||||
|
@ -65,8 +65,9 @@ void UnicharAmbigs::InitUnicharAmbigs(const UNICHARSET &unicharset, bool use_amb
|
||||
// Loads the universal ambigs that are useful for any language.
|
||||
void UnicharAmbigs::LoadUniversal(const UNICHARSET &encoder_set, UNICHARSET *unicharset) {
|
||||
TFile file;
|
||||
if (!file.Open(kUniversalAmbigsFile, ksizeofUniversalAmbigsFile))
|
||||
if (!file.Open(kUniversalAmbigsFile, ksizeofUniversalAmbigsFile)) {
|
||||
return;
|
||||
}
|
||||
LoadUnicharAmbigs(encoder_set, &file, 0, false, unicharset);
|
||||
}
|
||||
|
||||
@ -75,8 +76,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
|
||||
UNICHARSET *unicharset) {
|
||||
int i, j;
|
||||
UnicharIdVector *adaption_ambigs_entry;
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf("Reading ambiguities\n");
|
||||
}
|
||||
|
||||
int test_ambig_part_size;
|
||||
int replacement_ambig_part_size;
|
||||
@ -100,19 +102,22 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
|
||||
}
|
||||
while (ambig_file->FGets(buffer, kBufferSize) != nullptr) {
|
||||
chomp_string(buffer);
|
||||
if (debug_level > 2)
|
||||
if (debug_level > 2) {
|
||||
tprintf("read line %s\n", buffer);
|
||||
}
|
||||
++line_num;
|
||||
if (!ParseAmbiguityLine(line_num, version, debug_level, encoder_set, buffer,
|
||||
&test_ambig_part_size, test_unichar_ids, &replacement_ambig_part_size,
|
||||
replacement_string, &type))
|
||||
replacement_string, &type)) {
|
||||
continue;
|
||||
}
|
||||
// Construct AmbigSpec and add it to the appropriate AmbigSpec_LIST.
|
||||
auto *ambig_spec = new AmbigSpec();
|
||||
if (!InsertIntoTable((type == REPLACE_AMBIG) ? replace_ambigs_ : dang_ambigs_,
|
||||
test_ambig_part_size, test_unichar_ids, replacement_ambig_part_size,
|
||||
replacement_string, type, ambig_spec, unicharset))
|
||||
replacement_string, type, ambig_spec, unicharset)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Update one_to_one_definite_ambigs_.
|
||||
if (test_ambig_part_size == 1 && replacement_ambig_part_size == 1 && type == DEFINITE_AMBIG) {
|
||||
@ -138,8 +143,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
|
||||
// vector does not already contain it) keeping it in sorted order.
|
||||
for (j = 0;
|
||||
j < adaption_ambigs_entry->size() && (*adaption_ambigs_entry)[j] > id_to_insert;
|
||||
++j)
|
||||
++j) {
|
||||
;
|
||||
}
|
||||
if (j < adaption_ambigs_entry->size()) {
|
||||
if ((*adaption_ambigs_entry)[j] != id_to_insert) {
|
||||
adaption_ambigs_entry->insert(adaption_ambigs_entry->begin() + j, id_to_insert);
|
||||
@ -158,8 +164,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
|
||||
if (use_ambigs_for_adaption) {
|
||||
for (i = 0; i < ambigs_for_adaption_.size(); ++i) {
|
||||
adaption_ambigs_entry = ambigs_for_adaption_[i];
|
||||
if (adaption_ambigs_entry == nullptr)
|
||||
if (adaption_ambigs_entry == nullptr) {
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < adaption_ambigs_entry->size(); ++j) {
|
||||
UNICHAR_ID ambig_id = (*adaption_ambigs_entry)[j];
|
||||
if (reverse_ambigs_for_adaption_[ambig_id] == nullptr) {
|
||||
@ -176,8 +183,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
|
||||
const UnicharAmbigsVector &print_table = (tbl == 0) ? replace_ambigs_ : dang_ambigs_;
|
||||
for (i = 0; i < print_table.size(); ++i) {
|
||||
AmbigSpec_LIST *lst = print_table[i];
|
||||
if (lst == nullptr)
|
||||
if (lst == nullptr) {
|
||||
continue;
|
||||
}
|
||||
if (!lst->empty()) {
|
||||
tprintf("%s Ambiguities for %s:\n", (tbl == 0) ? "Replaceable" : "Dangerous",
|
||||
unicharset->debug_str(i).c_str());
|
||||
@ -222,8 +230,9 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
|
||||
std::string input(buffer);
|
||||
std::vector<std::string> fields = split(input, ' ');
|
||||
if (fields.size() != 3) {
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf(kIllegalMsg, line_num);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Encode wrong-string.
|
||||
@ -233,13 +242,15 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
|
||||
}
|
||||
*test_ambig_part_size = unichars.size();
|
||||
if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Copy encoded string to output.
|
||||
for (int i = 0; i < unichars.size(); ++i)
|
||||
for (int i = 0; i < unichars.size(); ++i) {
|
||||
test_unichar_ids[i] = unichars[i];
|
||||
}
|
||||
test_unichar_ids[unichars.size()] = INVALID_UNICHAR_ID;
|
||||
// Encode replacement-string to check validity.
|
||||
if (!unicharset.encode_string(fields[1].c_str(), true, &unichars, nullptr, nullptr)) {
|
||||
@ -247,13 +258,15 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
|
||||
}
|
||||
*replacement_ambig_part_size = unichars.size();
|
||||
if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (sscanf(fields[2].c_str(), "%d", type) != 1) {
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf(kIllegalMsg, line_num);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
snprintf(replacement_string, kMaxAmbigStringSize, "%s", fields[1].c_str());
|
||||
@ -264,21 +277,25 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
|
||||
char *next_token;
|
||||
if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) ||
|
||||
!sscanf(token, "%d", test_ambig_part_size) || *test_ambig_part_size <= 0) {
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf(kIllegalMsg, line_num);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
for (i = 0; i < *test_ambig_part_size; ++i) {
|
||||
if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token)))
|
||||
if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token))) {
|
||||
break;
|
||||
}
|
||||
if (!unicharset.contains_unichar(token)) {
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf(kIllegalUnicharMsg, token);
|
||||
}
|
||||
break;
|
||||
}
|
||||
test_unichar_ids[i] = unicharset.unichar_to_id(token);
|
||||
@ -287,29 +304,34 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
|
||||
|
||||
if (i != *test_ambig_part_size || !(token = strtok_r(nullptr, kAmbigDelimiters, &next_token)) ||
|
||||
!sscanf(token, "%d", replacement_ambig_part_size) || *replacement_ambig_part_size <= 0) {
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf(kIllegalMsg, line_num);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
replacement_string[0] = '\0';
|
||||
for (i = 0; i < *replacement_ambig_part_size; ++i) {
|
||||
if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token)))
|
||||
if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token))) {
|
||||
break;
|
||||
}
|
||||
strcat(replacement_string, token);
|
||||
if (!unicharset.contains_unichar(token)) {
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf(kIllegalUnicharMsg, token);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i != *replacement_ambig_part_size) {
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf(kIllegalMsg, line_num);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (version > 0) {
|
||||
@ -323,8 +345,9 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
|
||||
// modified word, not the individual unigrams. Tesseract
|
||||
// has limited support for ngram unichar (e.g. dawg permuter).
|
||||
if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", type)) {
|
||||
if (debug_level)
|
||||
if (debug_level) {
|
||||
tprintf(kIllegalMsg, line_num);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -382,8 +405,9 @@ bool UnicharAmbigs::InsertIntoTable(UnicharAmbigsVector &table, int test_ambig_p
|
||||
if (table[test_unichar_ids[0]] == nullptr) {
|
||||
table[test_unichar_ids[0]] = new AmbigSpec_LIST();
|
||||
}
|
||||
if (table[test_unichar_ids[0]]->add_sorted(AmbigSpec::compare_ambig_specs, true, ambig_spec))
|
||||
if (table[test_unichar_ids[0]]->add_sorted(AmbigSpec::compare_ambig_specs, true, ambig_spec)) {
|
||||
return true;
|
||||
}
|
||||
delete ambig_spec;
|
||||
return false;
|
||||
}
|
||||
|
@ -60,16 +60,20 @@ public:
|
||||
const UNICHAR_ID val1 = *ptr1++;
|
||||
const UNICHAR_ID val2 = *ptr2++;
|
||||
if (val1 != val2) {
|
||||
if (val1 == INVALID_UNICHAR_ID)
|
||||
if (val1 == INVALID_UNICHAR_ID) {
|
||||
return -1;
|
||||
if (val2 == INVALID_UNICHAR_ID)
|
||||
}
|
||||
if (val2 == INVALID_UNICHAR_ID) {
|
||||
return 1;
|
||||
if (val1 < val2)
|
||||
}
|
||||
if (val1 < val2) {
|
||||
return -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
if (val1 == INVALID_UNICHAR_ID)
|
||||
if (val1 == INVALID_UNICHAR_ID) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,15 +92,17 @@ public:
|
||||
// The function assumes that array is terminated by INVALID_UNICHAR_ID.
|
||||
static inline void print(const UNICHAR_ID array[], const UNICHARSET &unicharset) {
|
||||
const UNICHAR_ID *ptr = array;
|
||||
if (*ptr == INVALID_UNICHAR_ID)
|
||||
if (*ptr == INVALID_UNICHAR_ID) {
|
||||
tprintf("[Empty]");
|
||||
}
|
||||
while (*ptr != INVALID_UNICHAR_ID) {
|
||||
tprintf("%s ", unicharset.id_to_unichar(*ptr++));
|
||||
}
|
||||
tprintf("( ");
|
||||
ptr = array;
|
||||
while (*ptr != INVALID_UNICHAR_ID)
|
||||
while (*ptr != INVALID_UNICHAR_ID) {
|
||||
tprintf("%d ", *ptr++);
|
||||
}
|
||||
tprintf(")\n");
|
||||
}
|
||||
};
|
||||
@ -115,8 +121,9 @@ public:
|
||||
const AmbigSpec *s1 = *static_cast<const AmbigSpec *const *>(spec1);
|
||||
const AmbigSpec *s2 = *static_cast<const AmbigSpec *const *>(spec2);
|
||||
int result = UnicharIdArrayUtils::compare(s1->wrong_ngram, s2->wrong_ngram);
|
||||
if (result != 0)
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
return UnicharIdArrayUtils::compare(s1->correct_fragments, s2->correct_fragments);
|
||||
}
|
||||
|
||||
@ -177,8 +184,9 @@ public:
|
||||
|
||||
// Returns definite 1-1 ambigs for the given unichar id.
|
||||
inline const UnicharIdVector *OneToOneDefiniteAmbigs(UNICHAR_ID unichar_id) const {
|
||||
if (one_to_one_definite_ambigs_.empty())
|
||||
if (one_to_one_definite_ambigs_.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
return one_to_one_definite_ambigs_[unichar_id];
|
||||
}
|
||||
|
||||
@ -188,8 +196,9 @@ public:
|
||||
// m->rn,rn->m,m->iii, UnicharAmbigsForAdaption() called with unichar id of
|
||||
// m will return a pointer to a vector with unichar ids of r,n,i.
|
||||
inline const UnicharIdVector *AmbigsForAdaption(UNICHAR_ID unichar_id) const {
|
||||
if (ambigs_for_adaption_.empty())
|
||||
if (ambigs_for_adaption_.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
return ambigs_for_adaption_[unichar_id];
|
||||
}
|
||||
|
||||
@ -197,8 +206,9 @@ public:
|
||||
// the given unichar_id is an ambiguity (appears in the 'wrong' part of
|
||||
// some ambiguity pair).
|
||||
inline const UnicharIdVector *ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const {
|
||||
if (reverse_ambigs_for_adaption_.empty())
|
||||
if (reverse_ambigs_for_adaption_.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
return reverse_ambigs_for_adaption_[unichar_id];
|
||||
}
|
||||
|
||||
|
@ -108,8 +108,9 @@ void BitVector::Init(int length) {
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool BitVector::Serialize(FILE *fp) const {
|
||||
if (!tesseract::Serialize(fp, &bit_size_))
|
||||
if (!tesseract::Serialize(fp, &bit_size_)) {
|
||||
return false;
|
||||
}
|
||||
int wordlen = WordLength();
|
||||
return tesseract::Serialize(fp, &array_[0], wordlen);
|
||||
}
|
||||
@ -118,18 +119,21 @@ bool BitVector::Serialize(FILE *fp) const {
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool BitVector::DeSerialize(bool swap, FILE *fp) {
|
||||
uint32_t new_bit_size;
|
||||
if (!tesseract::DeSerialize(fp, &new_bit_size))
|
||||
if (!tesseract::DeSerialize(fp, &new_bit_size)) {
|
||||
return false;
|
||||
}
|
||||
if (swap) {
|
||||
ReverseN(&new_bit_size, sizeof(new_bit_size));
|
||||
}
|
||||
Alloc(new_bit_size);
|
||||
int wordlen = WordLength();
|
||||
if (!tesseract::DeSerialize(fp, &array_[0], wordlen))
|
||||
if (!tesseract::DeSerialize(fp, &array_[0], wordlen)) {
|
||||
return false;
|
||||
}
|
||||
if (swap) {
|
||||
for (int i = 0; i < wordlen; ++i)
|
||||
for (int i = 0; i < wordlen; ++i) {
|
||||
ReverseN(&array_[i], sizeof(array_[i]));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -146,8 +150,9 @@ void BitVector::SetAllTrue() {
|
||||
int BitVector::NextSetBit(int prev_bit) const {
|
||||
// Move on to the next bit.
|
||||
int next_bit = prev_bit + 1;
|
||||
if (next_bit >= bit_size_)
|
||||
if (next_bit >= bit_size_) {
|
||||
return -1;
|
||||
}
|
||||
// Check the remains of the word containing the next_bit first.
|
||||
int next_word = WordIndex(next_bit);
|
||||
int bit_index = next_word * kBitFactor;
|
||||
@ -156,10 +161,12 @@ int BitVector::NextSetBit(int prev_bit) const {
|
||||
uint8_t byte = word & 0xff;
|
||||
while (bit_index < word_end) {
|
||||
if (bit_index + 8 > next_bit && byte != 0) {
|
||||
while (bit_index + lsb_index_[byte] < next_bit && byte != 0)
|
||||
while (bit_index + lsb_index_[byte] < next_bit && byte != 0) {
|
||||
byte = lsb_eroded_[byte];
|
||||
if (byte != 0)
|
||||
}
|
||||
if (byte != 0) {
|
||||
return bit_index + lsb_index_[byte];
|
||||
}
|
||||
}
|
||||
word >>= 8;
|
||||
bit_index += 8;
|
||||
@ -172,8 +179,9 @@ int BitVector::NextSetBit(int prev_bit) const {
|
||||
++next_word;
|
||||
bit_index += kBitFactor;
|
||||
}
|
||||
if (bit_index >= bit_size_)
|
||||
if (bit_index >= bit_size_) {
|
||||
return -1;
|
||||
}
|
||||
// Find the first non-zero byte within the word.
|
||||
while ((word & 0xff) == 0) {
|
||||
word >>= 8;
|
||||
@ -200,29 +208,35 @@ int BitVector::NumSetBits() const {
|
||||
// sensible if they aren't the same size, but they should be really.
|
||||
void BitVector::operator|=(const BitVector &other) {
|
||||
int length = std::min(WordLength(), other.WordLength());
|
||||
for (int w = 0; w < length; ++w)
|
||||
for (int w = 0; w < length; ++w) {
|
||||
array_[w] |= other.array_[w];
|
||||
}
|
||||
}
|
||||
void BitVector::operator&=(const BitVector &other) {
|
||||
int length = std::min(WordLength(), other.WordLength());
|
||||
for (int w = 0; w < length; ++w)
|
||||
for (int w = 0; w < length; ++w) {
|
||||
array_[w] &= other.array_[w];
|
||||
for (int w = WordLength() - 1; w >= length; --w)
|
||||
}
|
||||
for (int w = WordLength() - 1; w >= length; --w) {
|
||||
array_[w] = 0;
|
||||
}
|
||||
}
|
||||
void BitVector::operator^=(const BitVector &other) {
|
||||
int length = std::min(WordLength(), other.WordLength());
|
||||
for (int w = 0; w < length; ++w)
|
||||
for (int w = 0; w < length; ++w) {
|
||||
array_[w] ^= other.array_[w];
|
||||
}
|
||||
}
|
||||
// Set subtraction *this = v1 - v2.
|
||||
void BitVector::SetSubtract(const BitVector &v1, const BitVector &v2) {
|
||||
Alloc(v1.size());
|
||||
int length = std::min(v1.WordLength(), v2.WordLength());
|
||||
for (int w = 0; w < length; ++w)
|
||||
for (int w = 0; w < length; ++w) {
|
||||
array_[w] = v1.array_[w] ^ (v1.array_[w] & v2.array_[w]);
|
||||
for (int w = WordLength() - 1; w >= length; --w)
|
||||
}
|
||||
for (int w = WordLength() - 1; w >= length; --w) {
|
||||
array_[w] = v1.array_[w];
|
||||
}
|
||||
}
|
||||
|
||||
// Allocates memory for a vector of the given length.
|
||||
|
@ -75,10 +75,11 @@ public:
|
||||
array_[WordIndex(index)] &= ~BitMask(index);
|
||||
}
|
||||
void SetValue(int index, bool value) {
|
||||
if (value)
|
||||
if (value) {
|
||||
SetBit(index);
|
||||
else
|
||||
} else {
|
||||
ResetBit(index);
|
||||
}
|
||||
}
|
||||
bool At(int index) const {
|
||||
return (array_[WordIndex(index)] & BitMask(index)) != 0;
|
||||
|
@ -94,8 +94,9 @@ void CLIST::assign_to_sublist( // to this list
|
||||
CLIST_ITERATOR *end_it) { // from list end
|
||||
constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");
|
||||
|
||||
if (!empty())
|
||||
if (!empty()) {
|
||||
LIST_NOT_EMPTY.error("CLIST.assign_to_sublist", ABORT, nullptr);
|
||||
}
|
||||
|
||||
last = start_it->extract_sublist(end_it);
|
||||
}
|
||||
@ -110,8 +111,9 @@ int32_t CLIST::length() const { // count elements
|
||||
CLIST_ITERATOR it(const_cast<CLIST *>(this));
|
||||
int32_t count = 0;
|
||||
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
@ -178,15 +180,18 @@ bool CLIST::add_sorted(int comparator(const void *, const void *), bool unique,
|
||||
CLIST_ITERATOR it(this);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
void *data = it.data();
|
||||
if (data == new_data && unique)
|
||||
if (data == new_data && unique) {
|
||||
return false;
|
||||
if (comparator(&data, &new_data) > 0)
|
||||
}
|
||||
if (comparator(&data, &new_data) > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (it.cycled_list())
|
||||
if (it.cycled_list()) {
|
||||
it.add_to_end(new_data);
|
||||
else
|
||||
} else {
|
||||
it.add_before_then_move(new_data);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -214,8 +219,9 @@ void CLIST::set_subtract(int comparator(const void *, const void *), bool unique
|
||||
subtra = s_it.data();
|
||||
}
|
||||
}
|
||||
if (subtra == nullptr || comparator(&subtra, &minu) != 0)
|
||||
if (subtra == nullptr || comparator(&subtra, &minu) != 0) {
|
||||
add_sorted(comparator, unique, minu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -236,8 +242,9 @@ void *CLIST_ITERATOR::forward() {
|
||||
if (!list)
|
||||
NO_LIST.error("CLIST_ITERATOR::forward", ABORT, nullptr);
|
||||
#endif
|
||||
if (list->empty())
|
||||
if (list->empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (current) { // not removed so
|
||||
// set previous
|
||||
@ -246,8 +253,9 @@ void *CLIST_ITERATOR::forward() {
|
||||
// In case next is deleted by another iterator, get next from current.
|
||||
current = current->next;
|
||||
} else {
|
||||
if (ex_current_was_cycle_pt)
|
||||
if (ex_current_was_cycle_pt) {
|
||||
cycle_pt = next;
|
||||
}
|
||||
current = next;
|
||||
}
|
||||
|
||||
@ -283,11 +291,13 @@ void *CLIST_ITERATOR::data_relative( // get data + or - ...
|
||||
BAD_PARAMETER.error("CLIST_ITERATOR::data_relative", ABORT, "offset < -l");
|
||||
#endif
|
||||
|
||||
if (offset == -1)
|
||||
if (offset == -1) {
|
||||
ptr = prev;
|
||||
else
|
||||
for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next)
|
||||
} else {
|
||||
for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) {
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!ptr)
|
||||
@ -311,13 +321,15 @@ void *CLIST_ITERATOR::move_to_last() {
|
||||
NO_LIST.error("CLIST_ITERATOR::move_to_last", ABORT, nullptr);
|
||||
#endif
|
||||
|
||||
while (current != list->last)
|
||||
while (current != list->last) {
|
||||
forward();
|
||||
}
|
||||
|
||||
if (current == nullptr)
|
||||
if (current == nullptr) {
|
||||
return nullptr;
|
||||
else
|
||||
} else {
|
||||
return current->data;
|
||||
}
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
@ -348,13 +360,15 @@ void CLIST_ITERATOR::exchange( // positions of 2 links
|
||||
/* Do nothing if either list is empty or if both iterators reference the same
|
||||
link */
|
||||
|
||||
if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current))
|
||||
if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Error if either current element is deleted */
|
||||
|
||||
if (!current || !other_it->current)
|
||||
if (!current || !other_it->current) {
|
||||
DONT_EXCHANGE_DELETED.error("CLIST_ITERATOR.exchange", ABORT, nullptr);
|
||||
}
|
||||
|
||||
/* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
|
||||
(other before this); non-doubleton adjacent elements (this before other);
|
||||
@ -393,15 +407,19 @@ non-adjacent elements. */
|
||||
/* update end of list pointer when necessary (remember that the 2 iterators
|
||||
may iterate over different lists!) */
|
||||
|
||||
if (list->last == current)
|
||||
if (list->last == current) {
|
||||
list->last = other_it->current;
|
||||
if (other_it->list->last == other_it->current)
|
||||
}
|
||||
if (other_it->list->last == other_it->current) {
|
||||
other_it->list->last = current;
|
||||
}
|
||||
|
||||
if (current == cycle_pt)
|
||||
if (current == cycle_pt) {
|
||||
cycle_pt = other_it->cycle_pt;
|
||||
if (other_it->current == other_it->cycle_pt)
|
||||
}
|
||||
if (other_it->current == other_it->cycle_pt) {
|
||||
other_it->cycle_pt = cycle_pt;
|
||||
}
|
||||
|
||||
/* The actual exchange - in all cases*/
|
||||
|
||||
@ -449,19 +467,22 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist( // from this current
|
||||
|
||||
temp_it.mark_cycle_pt();
|
||||
do { // walk sublist
|
||||
if (temp_it.cycled_list()) // can't find end pt
|
||||
if (temp_it.cycled_list()) { // can't find end pt
|
||||
BAD_SUBLIST.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr);
|
||||
}
|
||||
|
||||
if (temp_it.at_last()) {
|
||||
list->last = prev;
|
||||
ex_current_was_last = other_it->ex_current_was_last = true;
|
||||
}
|
||||
|
||||
if (temp_it.current == cycle_pt)
|
||||
if (temp_it.current == cycle_pt) {
|
||||
ex_current_was_cycle_pt = true;
|
||||
}
|
||||
|
||||
if (temp_it.current == other_it->cycle_pt)
|
||||
if (temp_it.current == other_it->cycle_pt) {
|
||||
other_it->ex_current_was_cycle_pt = true;
|
||||
}
|
||||
|
||||
temp_it.forward();
|
||||
} while (temp_it.prev != other_it->current);
|
||||
|
@ -186,10 +186,12 @@ public:
|
||||
|
||||
void *data() { // get current data
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::data", ABORT, nullptr);
|
||||
if (!current)
|
||||
}
|
||||
if (!current) {
|
||||
NULL_DATA.error("CLIST_ITERATOR::data", ABORT, nullptr);
|
||||
}
|
||||
#endif
|
||||
return current->data;
|
||||
}
|
||||
@ -209,8 +211,9 @@ public:
|
||||
|
||||
bool empty() { // is list empty?
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::empty", ABORT, nullptr);
|
||||
}
|
||||
#endif
|
||||
return list->empty();
|
||||
}
|
||||
@ -248,8 +251,9 @@ public:
|
||||
inline void CLIST_ITERATOR::set_to_list( // change list
|
||||
CLIST *list_to_iterate) {
|
||||
#ifndef NDEBUG
|
||||
if (!list_to_iterate)
|
||||
if (!list_to_iterate) {
|
||||
BAD_PARAMETER.error("CLIST_ITERATOR::set_to_list", ABORT, "list_to_iterate is nullptr");
|
||||
}
|
||||
#endif
|
||||
|
||||
list = list_to_iterate;
|
||||
@ -284,10 +288,12 @@ inline void CLIST_ITERATOR::add_after_then_move( // element to add
|
||||
CLIST_LINK *new_element;
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::add_after_then_move", ABORT, nullptr);
|
||||
if (!new_data)
|
||||
}
|
||||
if (!new_data) {
|
||||
BAD_PARAMETER.error("CLIST_ITERATOR::add_after_then_move", ABORT, "new_data is nullptr");
|
||||
}
|
||||
#endif
|
||||
|
||||
new_element = new CLIST_LINK;
|
||||
@ -303,14 +309,17 @@ inline void CLIST_ITERATOR::add_after_then_move( // element to add
|
||||
if (current) { // not extracted
|
||||
current->next = new_element;
|
||||
prev = current;
|
||||
if (current == list->last)
|
||||
if (current == list->last) {
|
||||
list->last = new_element;
|
||||
}
|
||||
} else { // current extracted
|
||||
prev->next = new_element;
|
||||
if (ex_current_was_last)
|
||||
if (ex_current_was_last) {
|
||||
list->last = new_element;
|
||||
if (ex_current_was_cycle_pt)
|
||||
}
|
||||
if (ex_current_was_cycle_pt) {
|
||||
cycle_pt = new_element;
|
||||
}
|
||||
}
|
||||
}
|
||||
current = new_element;
|
||||
@ -328,10 +337,12 @@ inline void CLIST_ITERATOR::add_after_stay_put( // element to add
|
||||
CLIST_LINK *new_element;
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::add_after_stay_put", ABORT, nullptr);
|
||||
if (!new_data)
|
||||
}
|
||||
if (!new_data) {
|
||||
BAD_PARAMETER.error("CLIST_ITERATOR::add_after_stay_put", ABORT, "new_data is nullptr");
|
||||
}
|
||||
#endif
|
||||
|
||||
new_element = new CLIST_LINK;
|
||||
@ -348,10 +359,12 @@ inline void CLIST_ITERATOR::add_after_stay_put( // element to add
|
||||
|
||||
if (current) { // not extracted
|
||||
current->next = new_element;
|
||||
if (prev == current)
|
||||
if (prev == current) {
|
||||
prev = new_element;
|
||||
if (current == list->last)
|
||||
}
|
||||
if (current == list->last) {
|
||||
list->last = new_element;
|
||||
}
|
||||
} else { // current extracted
|
||||
prev->next = new_element;
|
||||
if (ex_current_was_last) {
|
||||
@ -375,10 +388,12 @@ inline void CLIST_ITERATOR::add_before_then_move( // element to add
|
||||
CLIST_LINK *new_element;
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::add_before_then_move", ABORT, nullptr);
|
||||
if (!new_data)
|
||||
}
|
||||
if (!new_data) {
|
||||
BAD_PARAMETER.error("CLIST_ITERATOR::add_before_then_move", ABORT, "new_data is nullptr");
|
||||
}
|
||||
#endif
|
||||
|
||||
new_element = new CLIST_LINK;
|
||||
@ -395,10 +410,12 @@ inline void CLIST_ITERATOR::add_before_then_move( // element to add
|
||||
next = current;
|
||||
} else { // current extracted
|
||||
new_element->next = next;
|
||||
if (ex_current_was_last)
|
||||
if (ex_current_was_last) {
|
||||
list->last = new_element;
|
||||
if (ex_current_was_cycle_pt)
|
||||
}
|
||||
if (ex_current_was_cycle_pt) {
|
||||
cycle_pt = new_element;
|
||||
}
|
||||
}
|
||||
}
|
||||
current = new_element;
|
||||
@ -416,10 +433,12 @@ inline void CLIST_ITERATOR::add_before_stay_put( // element to add
|
||||
CLIST_LINK *new_element;
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::add_before_stay_put", ABORT, nullptr);
|
||||
if (!new_data)
|
||||
}
|
||||
if (!new_data) {
|
||||
BAD_PARAMETER.error("CLIST_ITERATOR::add_before_stay_put", ABORT, "new_data is nullptr");
|
||||
}
|
||||
#endif
|
||||
|
||||
new_element = new CLIST_LINK;
|
||||
@ -435,12 +454,14 @@ inline void CLIST_ITERATOR::add_before_stay_put( // element to add
|
||||
prev->next = new_element;
|
||||
if (current) { // not extracted
|
||||
new_element->next = current;
|
||||
if (next == current)
|
||||
if (next == current) {
|
||||
next = new_element;
|
||||
}
|
||||
} else { // current extracted
|
||||
new_element->next = next;
|
||||
if (ex_current_was_last)
|
||||
if (ex_current_was_last) {
|
||||
list->last = new_element;
|
||||
}
|
||||
}
|
||||
prev = new_element;
|
||||
}
|
||||
@ -456,10 +477,12 @@ inline void CLIST_ITERATOR::add_before_stay_put( // element to add
|
||||
|
||||
inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::add_list_after", ABORT, nullptr);
|
||||
if (!list_to_add)
|
||||
}
|
||||
if (!list_to_add) {
|
||||
BAD_PARAMETER.error("CLIST_ITERATOR::add_list_after", ABORT, "list_to_add is nullptr");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!list_to_add->empty()) {
|
||||
@ -472,8 +495,9 @@ inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) {
|
||||
} else {
|
||||
if (current) { // not extracted
|
||||
current->next = list_to_add->First();
|
||||
if (current == list->last)
|
||||
if (current == list->last) {
|
||||
list->last = list_to_add->last;
|
||||
}
|
||||
list_to_add->last->next = next;
|
||||
next = current->next;
|
||||
} else { // current extracted
|
||||
@ -500,10 +524,12 @@ inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) {
|
||||
|
||||
inline void CLIST_ITERATOR::add_list_before(CLIST *list_to_add) {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::add_list_before", ABORT, nullptr);
|
||||
if (!list_to_add)
|
||||
}
|
||||
if (!list_to_add) {
|
||||
BAD_PARAMETER.error("CLIST_ITERATOR::add_list_before", ABORT, "list_to_add is nullptr");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!list_to_add->empty()) {
|
||||
@ -519,10 +545,12 @@ inline void CLIST_ITERATOR::add_list_before(CLIST *list_to_add) {
|
||||
list_to_add->last->next = current;
|
||||
} else { // current extracted
|
||||
list_to_add->last->next = next;
|
||||
if (ex_current_was_last)
|
||||
if (ex_current_was_last) {
|
||||
list->last = list_to_add->last;
|
||||
if (ex_current_was_cycle_pt)
|
||||
}
|
||||
if (ex_current_was_cycle_pt) {
|
||||
cycle_pt = prev->next;
|
||||
}
|
||||
}
|
||||
current = prev->next;
|
||||
next = current->next;
|
||||
@ -544,11 +572,13 @@ inline void *CLIST_ITERATOR::extract() {
|
||||
void *extracted_data;
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::extract", ABORT, nullptr);
|
||||
if (!current) // list empty or
|
||||
// element extracted
|
||||
}
|
||||
if (!current) { // list empty or
|
||||
// element extracted
|
||||
NULL_CURRENT.error("CLIST_ITERATOR::extract", ABORT, nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (list->singleton()) {
|
||||
@ -581,8 +611,9 @@ inline void *CLIST_ITERATOR::extract() {
|
||||
|
||||
inline void *CLIST_ITERATOR::move_to_first() {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::move_to_first", ABORT, nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
current = list->First();
|
||||
@ -604,14 +635,16 @@ inline void *CLIST_ITERATOR::move_to_first() {
|
||||
|
||||
inline void CLIST_ITERATOR::mark_cycle_pt() {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::mark_cycle_pt", ABORT, nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (current)
|
||||
if (current) {
|
||||
cycle_pt = current;
|
||||
else
|
||||
} else {
|
||||
ex_current_was_cycle_pt = true;
|
||||
}
|
||||
started_cycling = false;
|
||||
}
|
||||
|
||||
@ -624,8 +657,9 @@ inline void CLIST_ITERATOR::mark_cycle_pt() {
|
||||
|
||||
inline bool CLIST_ITERATOR::at_first() {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::at_first", ABORT, nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
// we're at a deleted
|
||||
@ -643,8 +677,9 @@ inline bool CLIST_ITERATOR::at_first() {
|
||||
|
||||
inline bool CLIST_ITERATOR::at_last() {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::at_last", ABORT, nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
// we're at a deleted
|
||||
@ -662,8 +697,9 @@ inline bool CLIST_ITERATOR::at_last() {
|
||||
|
||||
inline bool CLIST_ITERATOR::cycled_list() {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::cycled_list", ABORT, nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
return ((list->empty()) || ((current == cycle_pt) && started_cycling));
|
||||
@ -678,8 +714,9 @@ inline bool CLIST_ITERATOR::cycled_list() {
|
||||
|
||||
inline int32_t CLIST_ITERATOR::length() {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::length", ABORT, nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
return list->length();
|
||||
@ -696,8 +733,9 @@ inline void CLIST_ITERATOR::sort( // sort elements
|
||||
int comparator( // comparison routine
|
||||
const void *, const void *)) {
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::sort", ABORT, nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
list->sort(comparator);
|
||||
@ -719,10 +757,12 @@ inline void CLIST_ITERATOR::add_to_end( // element to add
|
||||
CLIST_LINK *new_element;
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!list)
|
||||
if (!list) {
|
||||
NO_LIST.error("CLIST_ITERATOR::add_to_end", ABORT, nullptr);
|
||||
if (!new_data)
|
||||
}
|
||||
if (!new_data) {
|
||||
BAD_PARAMETER.error("CLIST_ITERATOR::add_to_end", ABORT, "new_data is nullptr");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (this->at_last()) {
|
||||
|
@ -69,8 +69,9 @@ void ELIST::assign_to_sublist( // to this list
|
||||
ELIST_ITERATOR *end_it) { // from list end
|
||||
constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");
|
||||
|
||||
if (!empty())
|
||||
if (!empty()) {
|
||||
LIST_NOT_EMPTY.error("ELIST.assign_to_sublist", ABORT, nullptr);
|
||||
}
|
||||
|
||||
last = start_it->extract_sublist(end_it);
|
||||
}
|
||||
@ -85,8 +86,9 @@ int32_t ELIST::length() const { // count elements
|
||||
ELIST_ITERATOR it(const_cast<ELIST *>(this));
|
||||
int32_t count = 0;
|
||||
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
@ -163,10 +165,11 @@ ELIST_LINK *ELIST::add_sorted_and_find(int comparator(const void *, const void *
|
||||
return link;
|
||||
}
|
||||
}
|
||||
if (it.cycled_list())
|
||||
if (it.cycled_list()) {
|
||||
it.add_to_end(new_link);
|
||||
else
|
||||
} else {
|
||||
it.add_before_then_move(new_link);
|
||||
}
|
||||
}
|
||||
return new_link;
|
||||
}
|
||||
@ -188,8 +191,9 @@ ELIST_LINK *ELIST_ITERATOR::forward() {
|
||||
if (!list)
|
||||
NO_LIST.error("ELIST_ITERATOR::forward", ABORT, nullptr);
|
||||
#endif
|
||||
if (list->empty())
|
||||
if (list->empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (current) { // not removed so
|
||||
// set previous
|
||||
@ -198,8 +202,9 @@ ELIST_LINK *ELIST_ITERATOR::forward() {
|
||||
// In case next is deleted by another iterator, get next from current.
|
||||
current = current->next;
|
||||
} else {
|
||||
if (ex_current_was_cycle_pt)
|
||||
if (ex_current_was_cycle_pt) {
|
||||
cycle_pt = next;
|
||||
}
|
||||
current = next;
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
@ -236,11 +241,13 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ...
|
||||
BAD_PARAMETER.error("ELIST_ITERATOR::data_relative", ABORT, "offset < -l");
|
||||
#endif
|
||||
|
||||
if (offset == -1)
|
||||
if (offset == -1) {
|
||||
ptr = prev;
|
||||
else
|
||||
for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next)
|
||||
} else {
|
||||
for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) {
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!ptr)
|
||||
@ -264,8 +271,9 @@ ELIST_LINK *ELIST_ITERATOR::move_to_last() {
|
||||
NO_LIST.error("ELIST_ITERATOR::move_to_last", ABORT, nullptr);
|
||||
#endif
|
||||
|
||||
while (current != list->last)
|
||||
while (current != list->last) {
|
||||
forward();
|
||||
}
|
||||
|
||||
return current;
|
||||
}
|
||||
@ -298,13 +306,15 @@ void ELIST_ITERATOR::exchange( // positions of 2 links
|
||||
/* Do nothing if either list is empty or if both iterators reference the same
|
||||
link */
|
||||
|
||||
if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current))
|
||||
if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Error if either current element is deleted */
|
||||
|
||||
if (!current || !other_it->current)
|
||||
if (!current || !other_it->current) {
|
||||
DONT_EXCHANGE_DELETED.error("ELIST_ITERATOR.exchange", ABORT, nullptr);
|
||||
}
|
||||
|
||||
/* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
|
||||
(other before this); non-doubleton adjacent elements (this before other);
|
||||
@ -343,15 +353,19 @@ non-adjacent elements. */
|
||||
/* update end of list pointer when necessary (remember that the 2 iterators
|
||||
may iterate over different lists!) */
|
||||
|
||||
if (list->last == current)
|
||||
if (list->last == current) {
|
||||
list->last = other_it->current;
|
||||
if (other_it->list->last == other_it->current)
|
||||
}
|
||||
if (other_it->list->last == other_it->current) {
|
||||
other_it->list->last = current;
|
||||
}
|
||||
|
||||
if (current == cycle_pt)
|
||||
if (current == cycle_pt) {
|
||||
cycle_pt = other_it->cycle_pt;
|
||||
if (other_it->current == other_it->cycle_pt)
|
||||
}
|
||||
if (other_it->current == other_it->cycle_pt) {
|
||||
other_it->cycle_pt = cycle_pt;
|
||||
}
|
||||
|
||||
/* The actual exchange - in all cases*/
|
||||
|
||||
@ -401,19 +415,22 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist( // from this current
|
||||
|
||||
temp_it.mark_cycle_pt();
|
||||
do { // walk sublist
|
||||
if (temp_it.cycled_list()) // can't find end pt
|
||||
if (temp_it.cycled_list()) { // can't find end pt
|
||||
BAD_SUBLIST.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr);
|
||||
}
|
||||
|
||||
if (temp_it.at_last()) {
|
||||
list->last = prev;
|
||||
ex_current_was_last = other_it->ex_current_was_last = true;
|
||||
}
|
||||
|
||||
if (temp_it.current == cycle_pt)
|
||||
if (temp_it.current == cycle_pt) {
|
||||
ex_current_was_cycle_pt = true;
|
||||
}
|
||||
|
||||
if (temp_it.current == other_it->cycle_pt)
|
||||
if (temp_it.current == other_it->cycle_pt) {
|
||||
other_it->ex_current_was_cycle_pt = true;
|
||||
}
|
||||
|
||||
temp_it.forward();
|
||||
} while (temp_it.prev != other_it->current);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user