Merge pull request #3354 from stweil/master

Add braces to single line statements and modernize unittest code using clang-tidy
This commit is contained in:
Egor Pugin 2021-03-22 11:23:27 +03:00 committed by GitHub
commit d72c2b14a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
333 changed files with 10398 additions and 5541 deletions

View File

@ -48,8 +48,9 @@ struct OSBestResult {
struct OSResults {
OSResults() : unicharset(nullptr) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j)
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
scripts_na[i][j] = 0;
}
orientations[i] = 0;
}
}

View File

@ -93,8 +93,9 @@ bool TessAltoRenderer::BeginDocumentHandler() {
///
bool TessAltoRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> text(api->GetAltoText(imagenum()));
if (text == nullptr)
if (text == nullptr) {
return false;
}
AppendString(text.get());
@ -126,8 +127,9 @@ char *TessBaseAPI::GetAltoText(int page_number) {
/// data structures.
///
char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) {
return nullptr;
}
int lcnt = 0, tcnt = 0, bcnt = 0, wcnt = 0;

View File

@ -155,8 +155,9 @@ static void ExtractFontName(const char* filename, std::string* fontname) {
static void addAvailableLanguages(const std::string &datadir, const std::string &base,
std::vector<std::string> *langs) {
auto base2 = base;
if (!base2.empty())
if (!base2.empty()) {
base2 += "/";
}
const size_t extlen = sizeof(kTrainedDataSuffix);
#ifdef _WIN32
WIN32_FIND_DATA data;
@ -291,23 +292,26 @@ void TessBaseAPI::SetOutputName(const char *name) {
}
bool TessBaseAPI::SetVariable(const char *name, const char *value) {
if (tesseract_ == nullptr)
if (tesseract_ == nullptr) {
tesseract_ = new Tesseract;
}
return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY,
tesseract_->params());
}
bool TessBaseAPI::SetDebugVariable(const char *name, const char *value) {
if (tesseract_ == nullptr)
if (tesseract_ == nullptr) {
tesseract_ = new Tesseract;
}
return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, tesseract_->params());
}
bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
auto *p = ParamUtils::FindParam<IntParam>(name, GlobalParams()->int_params,
tesseract_->params()->int_params);
if (p == nullptr)
if (p == nullptr) {
return false;
}
*value = (int32_t)(*p);
return true;
}
@ -315,8 +319,9 @@ bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const {
auto *p = ParamUtils::FindParam<BoolParam>(name, GlobalParams()->bool_params,
tesseract_->params()->bool_params);
if (p == nullptr)
if (p == nullptr) {
return false;
}
*value = bool(*p);
return true;
}
@ -330,8 +335,9 @@ const char *TessBaseAPI::GetStringVariable(const char *name) const {
bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const {
auto *p = ParamUtils::FindParam<DoubleParam>(name, GlobalParams()->double_params,
tesseract_->params()->double_params);
if (p == nullptr)
if (p == nullptr) {
return false;
}
*value = (double)(*p);
return true;
}
@ -369,8 +375,9 @@ int TessBaseAPI::Init(const char *data, int data_size, const char *language, Ocr
const std::vector<std::string> *vars_values, bool set_only_non_debug_params,
FileReader reader) {
// Default language is "eng".
if (language == nullptr)
if (language == nullptr) {
language = "eng";
}
if (data == nullptr) {
data = "";
}
@ -394,8 +401,9 @@ int TessBaseAPI::Init(const char *data, int data_size, const char *language, Ocr
if (tesseract_ == nullptr) {
reset_classifier = false;
tesseract_ = new Tesseract;
if (reader != nullptr)
if (reader != nullptr) {
reader_ = reader;
}
TessdataManager mgr(reader_);
if (data_size != 0) {
mgr.LoadMemBuffer(language, data, data_size);
@ -409,8 +417,9 @@ int TessBaseAPI::Init(const char *data, int data_size, const char *language, Ocr
// Update datapath and language requested for the last valid initialization.
datapath_ = datapath;
if ((strcmp(datapath_.c_str(), "") == 0) && (strcmp(tesseract_->datadir.c_str(), "") != 0))
if ((strcmp(datapath_.c_str(), "") == 0) && (strcmp(tesseract_->datadir.c_str(), "") != 0)) {
datapath_ = tesseract_->datadir;
}
language_ = language;
last_oem_requested_ = oem;
@ -446,8 +455,9 @@ void TessBaseAPI::GetLoadedLanguagesAsVector(std::vector<std::string> *langs) co
if (tesseract_ != nullptr) {
langs->push_back(tesseract_->lang);
int num_subs = tesseract_->num_sub_langs();
for (int i = 0; i < num_subs; ++i)
for (int i = 0; i < num_subs; ++i) {
langs->push_back(tesseract_->get_sub_lang(i)->lang);
}
}
}
@ -471,10 +481,11 @@ void TessBaseAPI::GetAvailableLanguagesAsVector(std::vector<std::string> *langs)
* in a separate API at some future time.
*/
int TessBaseAPI::InitLangMod(const char *datapath, const char *language) {
if (tesseract_ == nullptr)
if (tesseract_ == nullptr) {
tesseract_ = new Tesseract;
else
} else {
ParamUtils::ResetToDefaults(tesseract_->params());
}
TessdataManager mgr;
return tesseract_->init_tesseract_lm(datapath, nullptr, language, &mgr);
}
@ -513,15 +524,17 @@ void TessBaseAPI::ReadDebugConfigFile(const char *filename) {
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
*/
void TessBaseAPI::SetPageSegMode(PageSegMode mode) {
if (tesseract_ == nullptr)
if (tesseract_ == nullptr) {
tesseract_ = new Tesseract;
}
tesseract_->tessedit_pageseg_mode.set_value(mode);
}
/** Return the current page segmentation mode. */
PageSegMode TessBaseAPI::GetPageSegMode() const {
if (tesseract_ == nullptr)
if (tesseract_ == nullptr) {
return PSM_SINGLE_BLOCK;
}
return static_cast<PageSegMode>(static_cast<int>(tesseract_->tessedit_pageseg_mode));
}
@ -540,8 +553,9 @@ PageSegMode TessBaseAPI::GetPageSegMode() const {
*/
char *TessBaseAPI::TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
int bytes_per_line, int left, int top, int width, int height) {
if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize)
if (tesseract_ == nullptr || width < kMinRectSize || height < kMinRectSize) {
return nullptr; // Nothing worth doing.
}
// Since this original api didn't give the exact size of the image,
// we have to invent a reasonable value.
@ -559,8 +573,9 @@ char *TessBaseAPI::TesseractRect(const unsigned char *imagedata, int bytes_per_p
* adaptive data.
*/
void TessBaseAPI::ClearAdaptiveClassifier() {
if (tesseract_ == nullptr)
if (tesseract_ == nullptr) {
return;
}
tesseract_->ResetAdaptiveClassifier();
tesseract_->ResetDocumentDictionary();
}
@ -582,10 +597,11 @@ void TessBaseAPI::SetImage(const unsigned char *imagedata, int width, int height
}
void TessBaseAPI::SetSourceResolution(int ppi) {
if (thresholder_)
if (thresholder_) {
thresholder_->SetSourceYResolution(ppi);
else
} else {
tprintf("Please call SetImage before SetSourceResolution.\n");
}
}
/**
@ -616,8 +632,9 @@ void TessBaseAPI::SetImage(Pix *pix) {
* can be recognized with the same image.
*/
void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
if (thresholder_ == nullptr)
if (thresholder_ == nullptr) {
return;
}
thresholder_->SetRectangle(left, top, width, height);
ClearResults();
}
@ -627,8 +644,9 @@ void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
* Get a copy of the internal thresholded image from Tesseract.
*/
Pix *TessBaseAPI::GetThresholdedImage() {
if (tesseract_ == nullptr || thresholder_ == nullptr)
if (tesseract_ == nullptr || thresholder_ == nullptr) {
return nullptr;
}
if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) {
return nullptr;
}
@ -700,10 +718,12 @@ Boxa *TessBaseAPI::GetComponentImages(PageIteratorLevel level, bool text_only, b
const int raw_padding, Pixa **pixa, int **blockids,
int **paraids) {
PageIterator *page_it = GetIterator();
if (page_it == nullptr)
if (page_it == nullptr) {
page_it = AnalyseLayout();
if (page_it == nullptr)
}
if (page_it == nullptr) {
return nullptr; // Failed.
}
// Count the components to get a size for the arrays.
int component_count = 0;
@ -713,26 +733,31 @@ Boxa *TessBaseAPI::GetComponentImages(PageIteratorLevel level, bool text_only, b
// Get bounding box in original raw image with padding.
do {
if (page_it->BoundingBox(level, raw_padding, &left, &top, &right, &bottom) &&
(!text_only || PTIsTextType(page_it->BlockType())))
(!text_only || PTIsTextType(page_it->BlockType()))) {
++component_count;
}
} while (page_it->Next(level));
} else {
// Get bounding box from binarized imaged. Note that this could be
// differently scaled from the original image.
do {
if (page_it->BoundingBoxInternal(level, &left, &top, &right, &bottom) &&
(!text_only || PTIsTextType(page_it->BlockType())))
(!text_only || PTIsTextType(page_it->BlockType()))) {
++component_count;
}
} while (page_it->Next(level));
}
Boxa *boxa = boxaCreate(component_count);
if (pixa != nullptr)
if (pixa != nullptr) {
*pixa = pixaCreate(component_count);
if (blockids != nullptr)
}
if (blockids != nullptr) {
*blockids = new int[component_count];
if (paraids != nullptr)
}
if (paraids != nullptr) {
*paraids = new int[component_count];
}
int blockid = 0;
int paraid = 0;
@ -760,8 +785,9 @@ Boxa *TessBaseAPI::GetComponentImages(PageIteratorLevel level, bool text_only, b
}
if (paraids != nullptr) {
(*paraids)[component_index] = paraid;
if (page_it->IsAtFinalElement(RIL_PARA, level))
if (page_it->IsAtFinalElement(RIL_PARA, level)) {
++paraid;
}
}
if (blockids != nullptr) {
(*blockids)[component_index] = blockid;
@ -805,8 +831,9 @@ PageIterator *TessBaseAPI::AnalyseLayout() {
PageIterator *TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
if (FindLines() == 0) {
if (block_list_->empty())
if (block_list_->empty()) {
return nullptr; // The page was empty.
}
page_res_ = new PAGE_RES(merge_similar_words, block_list_, nullptr);
DetectParagraphs(false);
return new PageIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(),
@ -821,10 +848,12 @@ PageIterator *TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
* internal structures.
*/
int TessBaseAPI::Recognize(ETEXT_DESC *monitor) {
if (tesseract_ == nullptr)
if (tesseract_ == nullptr) {
return -1;
if (FindLines() != 0)
}
if (FindLines() != 0) {
return -1;
}
delete page_res_;
if (block_list_->empty()) {
page_res_ = new PAGE_RES(false, block_list_, &tesseract_->prev_word_best_choice_);
@ -889,11 +918,13 @@ int TessBaseAPI::Recognize(ETEXT_DESC *monitor) {
// Now run the main recognition.
bool wait_for_text = true;
GetBoolVariable("paragraph_text_based", &wait_for_text);
if (!wait_for_text)
if (!wait_for_text) {
DetectParagraphs(false);
}
if (tesseract_->recog_all_words(page_res_, monitor, nullptr, nullptr, 0)) {
if (wait_for_text)
if (wait_for_text) {
DetectParagraphs(true);
}
} else {
result = -1;
}
@ -933,8 +964,9 @@ int TessBaseAPI::GetSourceYResolution() {
bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer,
int tessedit_page_number) {
if (!flist && !buf)
if (!flist && !buf) {
return false;
}
unsigned page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
char pagename[MAX_PATH];
@ -953,15 +985,17 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, const char
// Add last line without terminating LF.
lines.push_back(line);
}
if (lines.empty())
if (lines.empty()) {
return false;
}
}
// Skip to the requested page number.
for (unsigned i = 0; i < page; i++) {
if (flist) {
if (fgets(pagename, sizeof(pagename), flist) == nullptr)
if (fgets(pagename, sizeof(pagename), flist) == nullptr) {
break;
}
}
}
@ -973,11 +1007,13 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, const char
// Loop over all pages - or just the requested one
while (true) {
if (flist) {
if (fgets(pagename, sizeof(pagename), flist) == nullptr)
if (fgets(pagename, sizeof(pagename), flist) == nullptr) {
break;
}
} else {
if (page >= lines.size())
if (page >= lines.size()) {
break;
}
snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str());
}
chomp_string(pagename);
@ -989,10 +1025,12 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, const char
tprintf("Page %u : %s\n", page, pagename);
bool r = ProcessPage(pix, page, pagename, retry_config, timeout_millisec, renderer);
pixDestroy(&pix);
if (!r)
if (!r) {
return false;
if (tessedit_page_number >= 0)
}
if (tessedit_page_number >= 0) {
break;
}
++page;
}
@ -1018,20 +1056,24 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, size_t size, co
pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
: pixReadFromMultipageTiff(filename, &offset);
}
if (pix == nullptr)
if (pix == nullptr) {
break;
}
tprintf("Page %d\n", page + 1);
char page_str[kMaxIntSize];
snprintf(page_str, kMaxIntSize - 1, "%d", page);
SetVariable("applybox_page", page_str);
bool r = ProcessPage(pix, page, filename, retry_config, timeout_millisec, renderer);
pixDestroy(&pix);
if (!r)
if (!r) {
return false;
if (tessedit_page_number >= 0)
}
if (tessedit_page_number >= 0) {
break;
if (!offset)
}
if (!offset) {
break;
}
}
return true;
}
@ -1055,7 +1097,7 @@ bool TessBaseAPI::ProcessPages(const char *filename, const char *retry_config, i
#ifdef HAVE_LIBCURL
static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) {
size = size * nmemb;
std::string *buf = reinterpret_cast<std::string *>(userp);
auto *buf = reinterpret_cast<std::string *>(userp);
buf->append(reinterpret_cast<const char *>(contents), size);
return size;
}
@ -1265,8 +1307,9 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename,
* Recognize. The returned iterator must be deleted after use.
*/
LTRResultIterator *TessBaseAPI::GetLTRIterator() {
if (tesseract_ == nullptr || page_res_ == nullptr)
if (tesseract_ == nullptr || page_res_ == nullptr) {
return nullptr;
}
return new LTRResultIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(),
thresholder_->GetScaledYResolution(), rect_left_, rect_top_,
rect_width_, rect_height_);
@ -1281,8 +1324,9 @@ LTRResultIterator *TessBaseAPI::GetLTRIterator() {
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
ResultIterator *TessBaseAPI::GetIterator() {
if (tesseract_ == nullptr || page_res_ == nullptr)
if (tesseract_ == nullptr || page_res_ == nullptr) {
return nullptr;
}
return ResultIterator::StartOfParagraph(LTRResultIterator(
page_res_, tesseract_, thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(),
rect_left_, rect_top_, rect_width_, rect_height_));
@ -1297,8 +1341,9 @@ ResultIterator *TessBaseAPI::GetIterator() {
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
MutableIterator *TessBaseAPI::GetMutableIterator() {
if (tesseract_ == nullptr || page_res_ == nullptr)
if (tesseract_ == nullptr || page_res_ == nullptr) {
return nullptr;
}
return new MutableIterator(page_res_, tesseract_, thresholder_->GetScaleFactor(),
thresholder_->GetScaledYResolution(), rect_left_, rect_top_,
rect_width_, rect_height_);
@ -1306,13 +1351,15 @@ MutableIterator *TessBaseAPI::GetMutableIterator() {
/** Make a text string from the internal data structures. */
char *TessBaseAPI::GetUTF8Text() {
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0))
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
return nullptr;
}
std::string text("");
ResultIterator *it = GetIterator();
do {
if (it->Empty(RIL_PARA))
if (it->Empty(RIL_PARA)) {
continue;
}
const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
text += para_text.get();
} while (it->Next(RIL_PARA));
@ -1331,9 +1378,10 @@ std::tuple<int,int,int,int> TessBaseAPI::GetTableBoundingBox(unsigned i)
{
const auto &t = constUniqueInstance<std::vector<TessTable>>();
if(i >= t.size())
return std::tuple<int,int,int,int>(0, 0, 0, 0);
if (i >= t.size()) {
return std::tuple<int, int, int, int>(0, 0, 0, 0);
}
const int height = tesseract_->ImageHeight();
return std::make_tuple<int,int,int,int>(
@ -1346,17 +1394,19 @@ std::vector<std::tuple<int,int,int,int>> TessBaseAPI::GetTableRows(unsigned i)
{
const auto &t = constUniqueInstance<std::vector<TessTable>>();
if(i >= t.size())
return std::vector<std::tuple<int,int,int,int>>();
if (i >= t.size()) {
return std::vector<std::tuple<int, int, int, int>>();
}
std::vector<std::tuple<int,int,int,int>> rows(t[i].rows.size());
const int height = tesseract_->ImageHeight();
for(unsigned j = 0; j < t[i].rows.size(); ++j)
rows[j] = std::make_tuple<int,int,int,int>(
t[i].rows[j].left(), height - t[i].rows[j].top(),
t[i].rows[j].right(), height - t[i].rows[j].bottom());
for (unsigned j = 0; j < t[i].rows.size(); ++j) {
rows[j] =
std::make_tuple<int, int, int, int>(t[i].rows[j].left(), height - t[i].rows[j].top(),
t[i].rows[j].right(), height - t[i].rows[j].bottom());
}
return rows;
}
@ -1364,17 +1414,19 @@ std::vector<std::tuple<int,int,int,int> > TessBaseAPI::GetTableCols(unsigned i)
{
const auto &t = constUniqueInstance<std::vector<TessTable>>();
if(i >= t.size())
return std::vector<std::tuple<int,int,int,int>>();
if (i >= t.size()) {
return std::vector<std::tuple<int, int, int, int>>();
}
std::vector<std::tuple<int,int,int,int>> cols(t[i].cols.size());
const int height = tesseract_->ImageHeight();
for(unsigned j = 0; j < t[i].cols.size(); ++j)
cols[j] = std::make_tuple<int,int,int,int>(
t[i].cols[j].left(), height - t[i].cols[j].top(),
t[i].cols[j].right(), height - t[i].cols[j].bottom());
for (unsigned j = 0; j < t[i].cols.size(); ++j) {
cols[j] =
std::make_tuple<int, int, int, int>(t[i].cols[j].left(), height - t[i].cols[j].top(),
t[i].cols[j].right(), height - t[i].cols[j].bottom());
}
return cols;
}
@ -1393,8 +1445,9 @@ static void AddBoxToTSV(const PageIterator *it, PageIteratorLevel level, std::st
* Returned string must be freed with the delete [] operator.
*/
char *TessBaseAPI::GetTSVText(int page_number) {
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
return nullptr;
}
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
int page_id = page_number + 1; // we use 1-based page numbers.
@ -1479,12 +1532,15 @@ char *TessBaseAPI::GetTSVText(int page_number) {
tsv_str += "\t";
// Increment counts if at end of block/paragraph/textline.
if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD))
if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) {
lcnt++;
if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD))
}
if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) {
pcnt++;
if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD))
}
if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) {
bcnt++;
}
do {
tsv_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
@ -1530,8 +1586,9 @@ const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 +
* Returned string must be freed with the delete [] operator.
*/
char *TessBaseAPI::GetBoxText(int page_number) {
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0))
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
return nullptr;
}
int blob_count;
int utf8_length = TextLength(&blob_count);
int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + kMaxBytesPerLine;
@ -1546,15 +1603,17 @@ char *TessBaseAPI::GetBoxText(int page_number) {
// Tesseract uses space for recognition failure. Fix to a reject
// character, kTesseractReject so we don't create illegal box files.
for (int i = 0; text[i] != '\0'; ++i) {
if (text[i] == ' ')
if (text[i] == ' ') {
text[i] = kTesseractReject;
}
}
snprintf(result + output_length, total_length - output_length, "%s %d %d %d %d %d\n",
text.get(), left, image_height_ - bottom, right, image_height_ - top, page_number);
output_length += strlen(result + output_length);
// Just in case...
if (output_length + kMaxBytesPerLine > total_length)
if (output_length + kMaxBytesPerLine > total_length) {
break;
}
}
} while (it->Next(RIL_SYMBOL));
delete it;
@ -1576,8 +1635,9 @@ const int kLatinChs[] = {0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d,
* Returned string must be freed with the delete [] operator.
*/
char *TessBaseAPI::GetUNLVText() {
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0))
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
return nullptr;
}
bool tilde_crunch_written = false;
bool last_char_was_newline = true;
bool last_char_was_tilde = false;
@ -1625,17 +1685,19 @@ char *TessBaseAPI::GetUNLVText() {
offset = lengths[i++];
}
if (i < length && wordstr[offset] != 0) {
if (!last_char_was_newline)
if (!last_char_was_newline) {
*ptr++ = ' ';
else
} else {
last_char_was_newline = false;
}
for (; i < length; offset += lengths[i++]) {
if (wordstr[offset] == ' ' || wordstr[offset] == kTesseractReject) {
*ptr++ = kUNLVReject;
last_char_was_tilde = true;
} else {
if (word->reject_map[i].rejected())
if (word->reject_map[i].rejected()) {
*ptr++ = kUNLVSuspect;
}
UNICHAR ch(wordstr + offset, lengths[i]);
int uni_ch = ch.first_uni();
for (int j = 0; kUniChs[j] != 0; ++j) {
@ -1690,10 +1752,12 @@ bool TessBaseAPI::DetectOrientationScript(int *orient_deg, float *orient_conf,
int orient_id = osr.best_result.orientation_id;
int script_id = osr.get_best_script(orient_id);
if (orient_conf)
if (orient_conf) {
*orient_conf = osr.best_result.oconfidence;
if (orient_deg)
}
if (orient_deg) {
*orient_deg = orient_id * 90; // convert quadrant to degrees
}
if (script_name) {
const char *script = osr.unicharset->get_script_from_script_id(script_id);
@ -1701,8 +1765,9 @@ bool TessBaseAPI::DetectOrientationScript(int *orient_deg, float *orient_conf,
*script_name = script;
}
if (script_conf)
if (script_conf) {
*script_conf = osr.best_result.sconfidence;
}
return true;
}
@ -1718,8 +1783,9 @@ char *TessBaseAPI::GetOsdText(int page_number) {
const char *script_name;
float script_conf;
if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf))
if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf)) {
return nullptr;
}
// clockwise rotation needed to make the page upright
int rotate = OrientationIdToValue(orient_deg / 90);
@ -1746,26 +1812,31 @@ char *TessBaseAPI::GetOsdText(int page_number) {
/** Returns the average word confidence for Tesseract page result. */
int TessBaseAPI::MeanTextConf() {
int *conf = AllWordConfidences();
if (!conf)
if (!conf) {
return 0;
}
int sum = 0;
int *pt = conf;
while (*pt >= 0)
while (*pt >= 0) {
sum += *pt++;
if (pt != conf)
}
if (pt != conf) {
sum /= pt - conf;
}
delete[] conf;
return sum;
}
/** Returns an array of all word confidences, terminated by -1. */
int *TessBaseAPI::AllWordConfidences() {
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0))
if (tesseract_ == nullptr || (!recognition_done_ && Recognize(nullptr) < 0)) {
return nullptr;
}
int n_word = 0;
PAGE_RES_IT res_it(page_res_);
for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward())
for (res_it.restart_page(); res_it.word() != nullptr; res_it.forward()) {
n_word++;
}
int *conf = new int[n_word + 1];
n_word = 0;
@ -1774,10 +1845,12 @@ int *TessBaseAPI::AllWordConfidences() {
WERD_CHOICE *choice = word->best_choice;
int w_conf = static_cast<int>(100 + 5 * choice->certainty());
// This is the eq for converting Tesseract confidence to 1..100
if (w_conf < 0)
if (w_conf < 0) {
w_conf = 0;
if (w_conf > 100)
}
if (w_conf > 100) {
w_conf = 100;
}
conf[n_word++] = w_conf;
}
conf[n_word] = -1;
@ -1815,12 +1888,15 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) {
int w = 0;
int t;
for (t = 0; text[t] != '\0'; ++t) {
if (text[t] == '\n' || text[t] == ' ')
if (text[t] == '\n' || text[t] == ' ') {
continue;
while (wordstr[w] == ' ')
}
while (wordstr[w] == ' ') {
++w;
if (text[t] != wordstr[w])
}
if (text[t] != wordstr[w]) {
break;
}
++w;
}
if (text[t] != '\0' || wordstr[w] != '\0') {
@ -1831,10 +1907,11 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) {
tesseract_->ReSegmentByClassification(page_res_);
tesseract_->TidyUp(page_res_);
PAGE_RES_IT pr_it(page_res_);
if (pr_it.word() == nullptr)
if (pr_it.word() == nullptr) {
success = false;
else
} else {
word_res = pr_it.word();
}
} else {
word_res->BestChoiceToCorrectText();
}
@ -1860,11 +1937,13 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char *wordstr) {
* any Recognize or Get* operation.
*/
void TessBaseAPI::Clear() {
if (thresholder_ != nullptr)
if (thresholder_ != nullptr) {
thresholder_->Clear();
}
ClearResults();
if (tesseract_ != nullptr)
if (tesseract_ != nullptr) {
SetInputImage(nullptr);
}
}
/**
@ -1888,8 +1967,9 @@ void TessBaseAPI::End() {
delete paragraph_models_;
paragraph_models_ = nullptr;
}
if (osd_tesseract_ == tesseract_)
if (osd_tesseract_ == tesseract_) {
osd_tesseract_ = nullptr;
}
delete tesseract_;
tesseract_ = nullptr;
delete osd_tesseract_;
@ -1933,8 +2013,9 @@ bool TessBaseAPI::GetTextDirection(int *out_offset, float *out_slope) {
int x1, x2, y1, y2;
it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
// Calculate offset and slope (NOTE: Kind of ugly)
if (x2 <= x1)
if (x2 <= x1) {
x2 = x1 + 1;
}
// Convert the point pair to slope/offset of the baseline (in image coords.)
*out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
*out_offset = static_cast<int>(y1 - *out_slope * x1);
@ -1992,8 +2073,9 @@ bool TessBaseAPI::InternalSetImage() {
tprintf("Please call Init before attempting to set an image.\n");
return false;
}
if (thresholder_ == nullptr)
if (thresholder_ == nullptr) {
thresholder_ = new ImageThresholder;
}
ClearResults();
return true;
}
@ -2006,8 +2088,9 @@ bool TessBaseAPI::InternalSetImage() {
*/
bool TessBaseAPI::Threshold(Pix **pix) {
ASSERT_HOST(pix != nullptr);
if (*pix != nullptr)
if (*pix != nullptr) {
pixDestroy(pix);
}
// Zero resolution messes up the algorithms, so make sure it is credible.
int user_dpi = 0;
GetIntVariable("user_defined_dpi", &user_dpi);
@ -2030,8 +2113,9 @@ bool TessBaseAPI::Threshold(Pix **pix) {
thresholder_->SetSourceYResolution(kMinCredibleResolution);
}
auto pageseg_mode = static_cast<PageSegMode>(static_cast<int>(tesseract_->tessedit_pageseg_mode));
if (!thresholder_->ThresholdToPix(pageseg_mode, pix))
if (!thresholder_->ThresholdToPix(pageseg_mode, pix)) {
return false;
}
thresholder_->GetImageSizes(&rect_left_, &rect_top_, &rect_width_, &rect_height_, &image_width_,
&image_height_);
if (!thresholder_->IsBinary()) {
@ -2063,8 +2147,9 @@ int TessBaseAPI::FindLines() {
tprintf("Please call SetImage before attempting recognition.\n");
return -1;
}
if (recognition_done_)
if (recognition_done_) {
ClearResults();
}
if (!block_list_->empty()) {
return 0;
}
@ -2121,8 +2206,9 @@ int TessBaseAPI::FindLines() {
}
}
if (tesseract_->SegmentPage(input_file_.c_str(), block_list_, osd_tess, &osr) < 0)
if (tesseract_->SegmentPage(input_file_.c_str(), block_list_, osd_tess, &osr) < 0) {
return -1;
}
// If Devanagari is being recognized, we use different images for page seg
// and for OCR.
@ -2138,10 +2224,11 @@ void TessBaseAPI::ClearResults() {
delete page_res_;
page_res_ = nullptr;
recognition_done_ = false;
if (block_list_ == nullptr)
if (block_list_ == nullptr) {
block_list_ = new BLOCK_LIST;
else
} else {
block_list_->clear();
}
if (paragraph_models_ != nullptr) {
for (auto model : *paragraph_models_) {
delete model;
@ -2161,8 +2248,9 @@ void TessBaseAPI::ClearResults() {
* Also return the number of recognized blobs in blob_count.
*/
int TessBaseAPI::TextLength(int *blob_count) {
if (tesseract_ == nullptr || page_res_ == nullptr)
if (tesseract_ == nullptr || page_res_ == nullptr) {
return 0;
}
PAGE_RES_IT page_res_it(page_res_);
int total_length = 2;
@ -2175,13 +2263,15 @@ int TessBaseAPI::TextLength(int *blob_count) {
total_blobs += choice->length() + 2;
total_length += choice->unichar_string().length() + 2;
for (int i = 0; i < word->reject_map.length(); ++i) {
if (word->reject_map[i].rejected())
if (word->reject_map[i].rejected()) {
++total_length;
}
}
}
}
if (blob_count != nullptr)
if (blob_count != nullptr) {
*blob_count = total_blobs;
}
return total_length;
}
@ -2191,8 +2281,9 @@ int TessBaseAPI::TextLength(int *blob_count) {
* Returns true if the image was processed successfully.
*/
bool TessBaseAPI::DetectOS(OSResults *osr) {
if (tesseract_ == nullptr)
if (tesseract_ == nullptr) {
return false;
}
ClearResults();
if (tesseract_->pix_binary() == nullptr && !Threshold(tesseract_->mutable_pix_binary())) {
return false;
@ -2255,8 +2346,9 @@ void TessBaseAPI::GetBlockTextOrientations(int **block_orientation, bool **verti
FCOORD classify_rotation = block_it.data()->classify_rotation();
float classify_theta = classify_rotation.angle();
double rot_theta = -(re_theta - classify_theta) * 2.0 / M_PI;
if (rot_theta < 0)
if (rot_theta < 0) {
rot_theta += 4;
}
int num_rotations = static_cast<int>(rot_theta + 0.5);
(*block_orientation)[i] = num_rotations;
// The classify_rotation is non-zero only if the text has vertical
@ -2269,8 +2361,9 @@ void TessBaseAPI::GetBlockTextOrientations(int **block_orientation, bool **verti
void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
int debug_level = 0;
GetIntVariable("paragraph_debug_level", &debug_level);
if (paragraph_models_ == nullptr)
if (paragraph_models_ == nullptr) {
paragraph_models_ = new std::vector<ParagraphModel *>;
}
MutableIterator *result_it = GetMutableIterator();
do { // Detect paragraphs for this block
std::vector<ParagraphModel *> models;
@ -2287,8 +2380,9 @@ const char *TessBaseAPI::GetUnichar(int unichar_id) {
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
const Dawg *TessBaseAPI::GetDawg(int i) const {
if (tesseract_ == nullptr || i >= NumDawgs())
if (tesseract_ == nullptr || i >= NumDawgs()) {
return nullptr;
}
return tesseract_->getDict().GetDawg(i);
}

View File

@ -62,8 +62,9 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it, PageIteratorLevel le
// Try to get the baseline coordinates at this level.
int x1, y1, x2, y2;
if (!it->Baseline(level, &x1, &y1, &x2, &y2))
if (!it->Baseline(level, &x1, &y1, &x2, &y2)) {
return;
}
// Following the description of this field of the hOCR spec, we convert the
// baseline coordinates so that "the bottom left of the bounding box is the
// origin".
@ -127,8 +128,9 @@ char *TessBaseAPI::GetHOCRText(int page_number) {
* Returned string must be freed with the delete [] operator.
*/
char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) {
return nullptr;
}
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, scnt = 1, tcnt = 1, ccnt = 1;
int page_id = page_number + 1; // hOCR uses 1-based page numbers.
@ -139,8 +141,9 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
GetBoolVariable("hocr_font_info", &font_info);
GetBoolVariable("hocr_char_boxes", &hocr_boxes);
if (input_file_.empty())
if (input_file_.empty()) {
SetInputName(nullptr);
}
#ifdef _WIN32
// convert input name from ANSI encoding to utf-8
@ -256,12 +259,14 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
switch (res_it->WordDirection()) {
// Only emit direction if different from current paragraph direction
case DIR_LEFT_TO_RIGHT:
if (!para_is_ltr)
if (!para_is_ltr) {
hocr_str << " dir='ltr'";
}
break;
case DIR_RIGHT_TO_LEFT:
if (para_is_ltr)
if (para_is_ltr) {
hocr_str << " dir='rtl'";
}
break;
case DIR_MIX:
case DIR_NEUTRAL:
@ -272,10 +277,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
if (bold)
if (bold) {
hocr_str << "<strong>";
if (italic)
}
if (italic) {
hocr_str << "<em>";
}
do {
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
if (grapheme && grapheme[0] != 0) {
@ -335,10 +342,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
}
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
if (italic)
if (italic) {
hocr_str << "</em>";
if (bold)
}
if (bold) {
hocr_str << "</strong>";
}
// If the lstm choice mode is required it is added here
if (lstm_choice_mode == 1 && !hocr_boxes && rawTimestepMap != nullptr) {
for (auto symbol : *rawTimestepMap) {
@ -371,10 +380,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt << "'>";
for (auto &j : timestep) {
float conf = 100 - tesseract_->lstm_rating_coefficient * j.second;
if (conf < 0.0f)
if (conf < 0.0f) {
conf = 0.0f;
if (conf > 100.0f)
}
if (conf > 100.0f) {
conf = 100.0f;
}
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
@ -447,8 +458,9 @@ bool TessHOcrRenderer::BeginDocumentHandler() {
"' />\n"
" <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
" ocr_line ocrx_word ocrp_wconf");
if (font_info_)
if (font_info_) {
AppendString(" ocrp_lang ocrp_dir ocrp_font ocrp_fsize");
}
AppendString(
"'/>\n"
" </head>\n"
@ -465,8 +477,9 @@ bool TessHOcrRenderer::EndDocumentHandler() {
bool TessHOcrRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> hocr(api->GetHOCRText(imagenum()));
if (hocr == nullptr)
if (hocr == nullptr) {
return false;
}
AppendString(hocr.get());

View File

@ -36,8 +36,9 @@ static void AddBoxToLSTM(int right, int bottom, int top, int image_height, int p
}
char *TessBaseAPI::GetLSTMBoxText(int page_number = 0) {
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
return nullptr;
}
std::string lstm_box_str;
bool first_word = true;
@ -94,8 +95,9 @@ TessLSTMBoxRenderer::TessLSTMBoxRenderer(const char *outputbase)
bool TessLSTMBoxRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> lstmbox(api->GetLSTMBoxText(imagenum()));
if (lstmbox == nullptr)
if (lstmbox == nullptr) {
return false;
}
AppendString(lstmbox.get());

View File

@ -203,8 +203,9 @@ void TessPDFRenderer::AppendPDFObject(const char *data) {
static double prec(double x) {
double kPrecision = 1000.0;
double a = round(x * kPrecision) / kPrecision;
if (a == -0)
if (a == -0) {
return 0;
}
return a;
}
@ -295,8 +296,9 @@ static void ClipBaseline(int ppi, int x1, int y1, int x2, int y2, int *line_x1,
*line_y2 = y2;
int rise = abs(y2 - y1) * 72;
int run = abs(x2 - x1) * 72;
if (rise < 2 * ppi && 2 * ppi < run)
if (rise < 2 * ppi && 2 * ppi < run) {
*line_y1 = *line_y2 = (y1 + y2) / 2;
}
}
static bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint]) {
@ -428,8 +430,9 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif, &smallcaps,
&fontsize, &font_id);
const int kDefaultFontsize = 8;
if (fontsize <= 0)
if (fontsize <= 0) {
fontsize = kDefaultFontsize;
}
if (fontsize != old_fontsize) {
pdf_str << "/f-0-0 " << fontsize << " Tf ";
old_fontsize = fontsize;
@ -655,18 +658,21 @@ bool TessPDFRenderer::BeginDocumentHandler() {
bool TessPDFRenderer::imageToPDFObj(Pix *pix, const char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size,
const int jpg_quality) {
if (!pdf_object_size || !pdf_object)
if (!pdf_object_size || !pdf_object) {
return false;
}
*pdf_object = nullptr;
*pdf_object_size = 0;
if (!filename && !pix)
if (!filename && !pix) {
return false;
}
L_Compressed_Data *cid = nullptr;
int sad = 0;
if (pixGetInputFormat(pix) == IFF_PNG)
if (pixGetInputFormat(pix) == IFF_PNG) {
sad = pixGenerateCIData(pix, L_FLATE_ENCODE, 0, 0, &cid);
}
if (!cid) {
sad = l_generateCIDataForPdf(filename, pix, jpg_quality, &cid);
}
@ -800,8 +806,9 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI *api) {
Pix *pix = api->GetInputImage();
const char *filename = api->GetInputName();
int ppi = api->GetSourceYResolution();
if (!pix || ppi <= 0)
if (!pix || ppi <= 0) {
return false;
}
double width = pixGetWidth(pix) * 72.0 / ppi;
double height = pixGetHeight(pix) * 72.0 / ppi;

View File

@ -48,17 +48,19 @@ TessResultRenderer::TessResultRenderer(const char *outputbase, const char *exten
TessResultRenderer::~TessResultRenderer() {
if (fout_ != nullptr) {
if (fout_ != stdout)
if (fout_ != stdout) {
fclose(fout_);
else
} else {
clearerr(fout_);
}
}
delete next_;
}
void TessResultRenderer::insert(TessResultRenderer *next) {
if (next == nullptr)
if (next == nullptr) {
return;
}
TessResultRenderer *remainder = next_;
next_ = next;
@ -71,8 +73,9 @@ void TessResultRenderer::insert(TessResultRenderer *next) {
}
bool TessResultRenderer::BeginDocument(const char *title) {
if (!happy_)
if (!happy_) {
return false;
}
title_ = title;
imagenum_ = -1;
bool ok = BeginDocumentHandler();
@ -83,8 +86,9 @@ bool TessResultRenderer::BeginDocument(const char *title) {
}
bool TessResultRenderer::AddImage(TessBaseAPI *api) {
if (!happy_)
if (!happy_) {
return false;
}
++imagenum_;
bool ok = AddImageHandler(api);
if (next_) {
@ -94,8 +98,9 @@ bool TessResultRenderer::AddImage(TessBaseAPI *api) {
}
bool TessResultRenderer::EndDocument() {
if (!happy_)
if (!happy_) {
return false;
}
bool ok = EndDocumentHandler();
if (next_) {
ok = next_->EndDocument() && ok;
@ -108,8 +113,9 @@ void TessResultRenderer::AppendString(const char *s) {
}
void TessResultRenderer::AppendData(const char *s, int len) {
if (!tesseract::Serialize(fout_, s, len))
if (!tesseract::Serialize(fout_, s, len)) {
happy_ = false;
}
fflush(fout_);
}
@ -169,8 +175,9 @@ bool TessTsvRenderer::EndDocumentHandler() {
bool TessTsvRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
if (tsv == nullptr)
if (tsv == nullptr) {
return false;
}
AppendString(tsv.get());
@ -185,8 +192,9 @@ TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
bool TessUnlvRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
if (unlv == nullptr)
if (unlv == nullptr) {
return false;
}
AppendString(unlv.get());
@ -201,8 +209,9 @@ TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
if (text == nullptr)
if (text == nullptr) {
return false;
}
AppendString(text.get());
@ -218,8 +227,9 @@ TessOsdRenderer::TessOsdRenderer(const char *outputbase) : TessResultRenderer(ou
bool TessOsdRenderer::AddImageHandler(TessBaseAPI *api) {
char *osd = api->GetOsdText(imagenum());
if (osd == nullptr)
if (osd == nullptr) {
return false;
}
AppendString(osd);
delete[] osd;

View File

@ -140,18 +140,24 @@ static void PrintVersionInfo() {
if (tesseract::SIMDDetect::IsNEONAvailable())
printf(" Found NEON\n");
#else
if (tesseract::SIMDDetect::IsAVX512BWAvailable())
if (tesseract::SIMDDetect::IsAVX512BWAvailable()) {
printf(" Found AVX512BW\n");
if (tesseract::SIMDDetect::IsAVX512FAvailable())
}
if (tesseract::SIMDDetect::IsAVX512FAvailable()) {
printf(" Found AVX512F\n");
if (tesseract::SIMDDetect::IsAVX2Available())
}
if (tesseract::SIMDDetect::IsAVX2Available()) {
printf(" Found AVX2\n");
if (tesseract::SIMDDetect::IsAVXAvailable())
}
if (tesseract::SIMDDetect::IsAVXAvailable()) {
printf(" Found AVX\n");
if (tesseract::SIMDDetect::IsFMAAvailable())
}
if (tesseract::SIMDDetect::IsFMAAvailable()) {
printf(" Found FMA\n");
if (tesseract::SIMDDetect::IsSSEAvailable())
}
if (tesseract::SIMDDetect::IsSSEAvailable()) {
printf(" Found SSE\n");
}
#endif
#ifdef _OPENMP
printf(" Found OpenMP %d\n", _OPENMP);
@ -335,8 +341,9 @@ static void PrintBanner() {
* but that doesn't work.
*/
static void FixPageSegMode(tesseract::TessBaseAPI &api, tesseract::PageSegMode pagesegmode) {
if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) {
api.SetPageSegMode(pagesegmode);
}
}
static bool checkArgValues(int arg, const char *mode, int count) {
@ -635,8 +642,9 @@ int main(int argc, char **argv) {
lang = "eng";
}
if (image == nullptr && !list_langs && !print_parameters)
if (image == nullptr && !list_langs && !print_parameters) {
return EXIT_SUCCESS;
}
// Call GlobalDawgCache here to create the global DawgCache object before
// the TessBaseAPI object. This fixes the order of destructor calls:
@ -765,8 +773,9 @@ int main(int argc, char **argv) {
}
if (!renderers.empty()) {
if (banner)
if (banner) {
PrintBanner();
}
#ifdef DISABLED_LEGACY_ENGINE
if (!osd_warning.empty()) {
fprintf(stderr, "%s", osd_warning.c_str());

View File

@ -29,8 +29,9 @@ namespace tesseract {
*/
char *TessBaseAPI::GetWordStrBoxText(int page_number = 0) {
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0)) {
return nullptr;
}
std::string wordstr_box_str;
int left = 0, top = 0, right = 0, bottom = 0;
@ -93,8 +94,9 @@ TessWordStrBoxRenderer::TessWordStrBoxRenderer(const char *outputbase)
bool TessWordStrBoxRenderer::AddImageHandler(TessBaseAPI *api) {
const std::unique_ptr<const char[]> wordstrbox(api->GetWordStrBoxText(imagenum()));
if (wordstrbox == nullptr)
if (wordstrbox == nullptr) {
return false;
}
AppendString(wordstrbox.get());

View File

@ -21,8 +21,9 @@ namespace tesseract {
// Computes and returns the dot product of the two n-vectors u and v.
double DotProductNative(const double *u, const double *v, int n) {
double total = 0.0;
for (int k = 0; k < n; ++k)
for (int k = 0; k < n; ++k) {
total += u[k] * v[k];
}
return total;
}

View File

@ -51,8 +51,9 @@ void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t> &w, std::vector<int8_t>
// group.
for (int i = 0; i < num_inputs_per_group_; ++i) {
int8_t weight = 0;
if (output + j < num_out && input + i < num_in)
if (output + j < num_out && input + i < num_in) {
weight = w(output + j, input + i);
}
shaped_w[shaped_index++] = weight;
}
}
@ -60,8 +61,9 @@ void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t> &w, std::vector<int8_t>
// Append the bias weights for the register set.
for (int j = 0; j < num_outputs_per_register_set; ++j) {
int8_t weight = 0;
if (output + j < num_out)
if (output + j < num_out) {
weight = w(output + j, num_in);
}
shaped_w[shaped_index++] = weight;
}
output += num_outputs_per_register_set;
@ -81,8 +83,9 @@ void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w,
for (int i = 0; i < num_out; ++i) {
const int8_t *wi = w[i];
int total = 0;
for (int j = 0; j < num_in; ++j)
for (int j = 0; j < num_in; ++j) {
total += wi[j] * u[j];
}
// Add in the bias and correct for integer values.
v[i] = (total + wi[num_in] * INT8_MAX) * scales[i];
}

View File

@ -82,7 +82,7 @@ static inline void MultiplyGroup(const __m256i &rep_input, const __m256i &ones,
// We don't actually care what the top 64bits are, but this ends
// up with them being zero.
static inline __m128i load64_to_128(const int8_t *wi_) {
const int64_t *wi = reinterpret_cast<const int64_t *>(wi_);
const auto *wi = reinterpret_cast<const int64_t *>(wi_);
return _mm_set_epi64x(0, wi[0]);
}
@ -326,8 +326,9 @@ static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *
group_size /= 2;
w_step /= 2;
if (output + group_size <= rounded_num_out)
if (output + group_size <= rounded_num_out) {
PartialMatrixDotVector8(wi, scales, u, rounded_num_in, v);
}
}
const IntSimdMatrix IntSimdMatrix::intSimdMatrixAVX2 = {

View File

@ -86,8 +86,9 @@ bool SIMDDetect::sse_available_;
// Computes and returns the dot product of the two n-vectors u and v.
static double DotProductGeneric(const double *u, const double *v, int n) {
double total = 0.0;
for (int k = 0; k < n; ++k)
for (int k = 0; k < n; ++k) {
total += u[k] * v[k];
}
return total;
}

View File

@ -55,8 +55,9 @@ bool Tesseract::word_adaptable( // should we adapt?
0: NO adaption
*/
if (mode == 0) {
if (tessedit_adaption_debug)
if (tessedit_adaption_debug) {
tprintf("adaption disabled\n");
}
return false;
}
@ -82,27 +83,31 @@ bool Tesseract::word_adaptable( // should we adapt?
(word->best_choice->permuter() != FREQ_DAWG_PERM) &&
(word->best_choice->permuter() != USER_DAWG_PERM) &&
(word->best_choice->permuter() != NUMBER_PERM)) {
if (tessedit_adaption_debug)
if (tessedit_adaption_debug) {
tprintf("word not in dawgs\n");
}
return false;
}
if (flags[CHECK_ONE_ELL_CONFLICT] && one_ell_conflict(word, false)) {
if (tessedit_adaption_debug)
if (tessedit_adaption_debug) {
tprintf("word has ell conflict\n");
}
return false;
}
if (flags[CHECK_SPACES] &&
(strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) {
if (tessedit_adaption_debug)
if (tessedit_adaption_debug) {
tprintf("word contains spaces\n");
}
return false;
}
if (flags[CHECK_AMBIG_WERD] && word->best_choice->dangerous_ambig_found()) {
if (tessedit_adaption_debug)
if (tessedit_adaption_debug) {
tprintf("word is ambiguous\n");
}
return false;
}

View File

@ -148,8 +148,9 @@ PAGE_RES *Tesseract::ApplyBoxes(const char *filename, bool find_segmentation,
if (applybox_debug > 0) {
tprintf("APPLY_BOXES:\n");
tprintf(" Boxes read from boxfile: %6d\n", box_count);
if (box_failures > 0)
if (box_failures > 0) {
tprintf(" Boxes failed resegmentation: %6d\n", box_failures);
}
}
TidyUp(page_res);
return page_res;
@ -314,8 +315,9 @@ bool Tesseract::ResegmentCharBox(PAGE_RES *page_res, const TBOX *prev_box, const
PAGE_RES_IT page_res_it(page_res);
WERD_RES *word_res;
for (word_res = page_res_it.word(); word_res != nullptr; word_res = page_res_it.forward()) {
if (!word_res->box_word->bounding_box().major_overlap(box))
if (!word_res->box_word->bounding_box().major_overlap(box)) {
continue;
}
if (applybox_debug > 1) {
tprintf("Checking word box:");
word_res->box_word->bounding_box().print();
@ -326,10 +328,12 @@ bool Tesseract::ResegmentCharBox(PAGE_RES *page_res, const TBOX *prev_box, const
int blob_count = 0;
for (blob_count = 0; i + blob_count < word_len; ++blob_count) {
TBOX blob_box = word_res->box_word->BlobBox(i + blob_count);
if (!blob_box.major_overlap(box))
if (!blob_box.major_overlap(box)) {
break;
if (word_res->correct_text[i + blob_count].length() > 0)
}
if (word_res->correct_text[i + blob_count].length() > 0) {
break; // Blob is claimed already.
}
if (next_box != nullptr) {
const double current_box_miss_metric = BoxMissMetric(blob_box, box);
const double next_box_miss_metric = BoxMissMetric(blob_box, *next_box);
@ -339,8 +343,9 @@ bool Tesseract::ResegmentCharBox(PAGE_RES *page_res, const TBOX *prev_box, const
tprintf("Current miss metric = %g, next = %g\n", current_box_miss_metric,
next_box_miss_metric);
}
if (current_box_miss_metric > next_box_miss_metric)
if (current_box_miss_metric > next_box_miss_metric) {
break; // Blob is a better match for next box.
}
}
char_box += blob_box;
}
@ -415,13 +420,15 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const
BLOCK_IT b_it(block_list);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
BLOCK *block = b_it.data();
if (!box.major_overlap(block->pdblk.bounding_box()))
if (!box.major_overlap(block->pdblk.bounding_box())) {
continue;
}
ROW_IT r_it(block->row_list());
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
ROW *row = r_it.data();
if (!box.major_overlap(row->bounding_box()))
if (!box.major_overlap(row->bounding_box())) {
continue;
}
WERD_IT w_it(row->word_list());
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
WERD *word = w_it.data();
@ -429,16 +436,19 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const
tprintf("Checking word:");
word->bounding_box().print();
}
if (word->text() != nullptr && word->text()[0] != '\0')
if (word->text() != nullptr && word->text()[0] != '\0') {
continue; // Ignore words that are already done.
if (!box.major_overlap(word->bounding_box()))
}
if (!box.major_overlap(word->bounding_box())) {
continue;
}
C_BLOB_IT blob_it(word->cblob_list());
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
C_BLOB *blob = blob_it.data();
TBOX blob_box = blob->bounding_box();
if (!blob_box.major_overlap(box))
if (!blob_box.major_overlap(box)) {
continue;
}
if (next_box != nullptr) {
const double current_box_miss_metric = BoxMissMetric(blob_box, box);
const double next_box_miss_metric = BoxMissMetric(blob_box, *next_box);
@ -448,8 +458,9 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const
tprintf("Current miss metric = %g, next = %g\n", current_box_miss_metric,
next_box_miss_metric);
}
if (current_box_miss_metric > next_box_miss_metric)
if (current_box_miss_metric > next_box_miss_metric) {
continue; // Blob is a better match for next box.
}
}
if (applybox_debug > 2) {
tprintf("Blob match: blob:");
@ -473,8 +484,9 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list, const TBOX &box, const
}
}
}
if (new_word == nullptr && applybox_debug > 0)
if (new_word == nullptr && applybox_debug > 0) {
tprintf("FAIL!\n");
}
return new_word != nullptr;
}
@ -485,8 +497,9 @@ void Tesseract::ReSegmentByClassification(PAGE_RES *page_res) {
WERD_RES *word_res;
for (; (word_res = pr_it.word()) != nullptr; pr_it.forward()) {
const WERD *word = word_res->word;
if (word->text() == nullptr || word->text()[0] == '\0')
if (word->text() == nullptr || word->text()[0] == '\0') {
continue; // Ignore words that have no text.
}
// Convert the correct text to a vector of UNICHAR_ID
std::vector<UNICHAR_ID> target_text;
if (!ConvertStringToUnichars(word->text(), &target_text)) {
@ -507,15 +520,17 @@ void Tesseract::ReSegmentByClassification(PAGE_RES *page_res) {
bool Tesseract::ConvertStringToUnichars(const char *utf8, std::vector<UNICHAR_ID> *class_ids) {
for (int step = 0; *utf8 != '\0'; utf8 += step) {
const char *next_space = strchr(utf8, ' ');
if (next_space == nullptr)
if (next_space == nullptr) {
next_space = utf8 + strlen(utf8);
}
step = next_space - utf8;
UNICHAR_ID class_id = unicharset.unichar_to_id(utf8, step);
if (class_id == INVALID_UNICHAR_ID) {
return false;
}
while (utf8[step] == ' ')
while (utf8[step] == ' ') {
++step;
}
class_ids->push_back(class_id);
}
return true;
@ -621,15 +636,18 @@ void Tesseract::SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, in
const AmbigSpec *ambig_spec = spec_it.data();
// We'll only do 1-1.
if (ambig_spec->wrong_ngram[1] == INVALID_UNICHAR_ID &&
ambig_spec->correct_ngram_id == target_text[text_index])
ambig_spec->correct_ngram_id == target_text[text_index]) {
break;
}
}
if (!spec_it.cycled_list())
if (!spec_it.cycled_list()) {
break; // Found an ambig.
}
}
}
if (choice_it.cycled_list())
if (choice_it.cycled_list()) {
continue; // No match.
}
segmentation->push_back(length);
if (choices_pos + length == choices_length && text_index + 1 == target_text.size()) {
// This is a complete match. If the rating is good record a new best.
@ -715,8 +733,9 @@ void Tesseract::TidyUp(PAGE_RES *page_res) {
if (bad_blob_count > 0) {
tprintf(" Leaving %d unlabelled blobs in %d words.\n", bad_blob_count, ok_word_count);
}
if (unlabelled_words > 0)
if (unlabelled_words > 0) {
tprintf(" %d remaining unlabelled words deleted.\n", unlabelled_words);
}
}
}

View File

@ -154,8 +154,9 @@ void Tesseract::SetupAllWordsPassN(int pass_n, const TBOX *target_word_box, cons
// Setup all the words for recognition with polygonal approximation.
for (unsigned w = 0; w < words->size(); ++w) {
SetupWordPassN(pass_n, &(*words)[w]);
if (w > 0)
if (w > 0) {
(*words)[w].prev_word = &(*words)[w - 1];
}
}
}
@ -169,8 +170,9 @@ void Tesseract::SetupWordPassN(int pass_n, WordData *word) {
} else if (pass_n == 2) {
// TODO(rays) Should we do this on pass1 too?
word->word->caps_height = 0.0;
if (word->word->x_height == 0.0f)
if (word->word->x_height == 0.0f) {
word->word->x_height = word->row->x_height();
}
}
word->lang_words.truncate(0);
for (unsigned s = 0; s <= sub_langs_.size(); ++s) {
@ -201,8 +203,9 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT
pr_it->restart_page();
for (unsigned w = 0; w < words->size(); ++w) {
WordData *word = &(*words)[w];
if (w > 0)
if (w > 0) {
word->prev_word = &(*words)[w - 1];
}
if (monitor != nullptr) {
monitor->ocr_alive = true;
if (pass_n == 1) {
@ -228,12 +231,14 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT
for (s = 0; s < word->lang_words.size() && word->lang_words[s]->tess_failed; ++s) {
}
// If all are failed, skip it. Image words are skipped by this test.
if (s > word->lang_words.size())
if (s > word->lang_words.size()) {
continue;
}
}
// Sync pr_it with the wth WordData.
while (pr_it->word() != nullptr && pr_it->word() != word->word)
while (pr_it->word() != nullptr && pr_it->word() != word->word) {
pr_it->forward();
}
ASSERT_HOST(pr_it->word() != nullptr);
bool make_next_word_fuzzy = false;
#ifndef DISABLED_LEGACY_ENGINE
@ -333,8 +338,9 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
most_recently_used_ = this;
// Run pass 1 word recognition.
if (!RecogAllWordsPassN(1, monitor, &page_res_it, &words))
if (!RecogAllWordsPassN(1, monitor, &page_res_it, &words)) {
return false;
}
// Pass 1 post-processing.
for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
if (page_res_it.word()->word->flag(W_REP_CHAR)) {
@ -343,8 +349,9 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
}
// Count dict words.
if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM)
if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM) {
++(stats_.dict_words);
}
// Update misadaption log (we only need to do it on pass 1, since
// adaption only happens on this pass).
@ -355,8 +362,9 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
}
}
if (dopasses == 1)
if (dopasses == 1) {
return true;
}
#ifndef DISABLED_LEGACY_ENGINE
@ -370,8 +378,9 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
}
most_recently_used_ = this;
// Run pass 2 word recognition.
if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words))
if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words)) {
return false;
}
}
// The next passes are only required for Tess-only.
@ -380,14 +389,17 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
// Fix fuzzy spaces.
if (!tessedit_test_adaption && tessedit_fix_fuzzy_spaces && !tessedit_word_for_word &&
!right_to_left())
!right_to_left()) {
fix_fuzzy_spaces(monitor, stats_.word_count, page_res);
}
// ****************** Pass 4 *******************
if (tessedit_enable_dict_correction)
if (tessedit_enable_dict_correction) {
dictionary_correction_pass(page_res);
if (tessedit_enable_bigram_correction)
}
if (tessedit_enable_bigram_correction) {
bigram_correction_pass(page_res);
}
// ****************** Pass 5,6 *******************
rejection_passes(page_res, monitor, target_word_box, word_config);
@ -410,8 +422,9 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
#ifndef DISABLED_LEGACY_ENGINE
// changed by jetsoft
// needed for dll to output memory structure
if ((dopasses == 0 || dopasses == 2) && (monitor || tessedit_write_unlv))
if ((dopasses == 0 || dopasses == 2) && (monitor || tessedit_write_unlv)) {
output_pass(page_res_it, target_word_box);
}
// end jetsoft
#endif // ndef DISABLED_LEGACY_ENGINE
@ -448,8 +461,9 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
while (word_it.forward() != nullptr && (!word_it.word() || word_it.word()->part_of_combo)) {
// advance word_it, skipping over parts of combos
}
if (!word_it.word())
if (!word_it.word()) {
break;
}
w = word_it.word();
if (!w || !w_prev || w->uch_set != w_prev->uch_set) {
continue;
@ -628,8 +642,9 @@ void Tesseract::rejection_passes(PAGE_RES *page_res, ETEXT_DESC *monitor,
stats_.doc_good_char_quality += accepted_all_char_quality;
}
check_debug_pt(word, 80);
if (tessedit_reject_bad_qual_wds && (blob_quality == 0) && (outline_errs >= chars_in_word))
if (tessedit_reject_bad_qual_wds && (blob_quality == 0) && (outline_errs >= chars_in_word)) {
word->reject_map.rej_word_bad_quality();
}
check_debug_pt(word, 90);
page_res_it.forward();
}
@ -664,8 +679,9 @@ void Tesseract::rejection_passes(PAGE_RES *page_res, ETEXT_DESC *monitor,
#endif // ndef DISABLED_LEGACY_ENGINE
void Tesseract::blamer_pass(PAGE_RES *page_res) {
if (!wordrec_run_blamer)
if (!wordrec_run_blamer) {
return;
}
PAGE_RES_IT page_res_it(page_res);
for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
WERD_RES *word = page_res_it.word();
@ -712,13 +728,15 @@ void Tesseract::script_pos_pass(PAGE_RES *page_res) {
int num_upper = 0;
int num_lower = 0;
for (int i = 0; i < word->best_choice->length(); ++i) {
if (word->uch_set->get_isupper(word->best_choice->unichar_id(i)))
if (word->uch_set->get_isupper(word->best_choice->unichar_id(i))) {
++num_upper;
else if (word->uch_set->get_islower(word->best_choice->unichar_id(i)))
} else if (word->uch_set->get_islower(word->best_choice->unichar_id(i))) {
++num_lower;
}
}
if (num_upper > 0 && num_lower == 0)
if (num_upper > 0 && num_lower == 0) {
word->small_caps = true;
}
}
word->SetScriptPositions();
}
@ -730,8 +748,9 @@ static void WordGap(const PointerVector<WERD_RES> &words, int index, int *right,
*next_left = INT32_MAX;
if (index < words.size()) {
*right = words[index]->word->bounding_box().right();
if (index + 1 < words.size())
if (index + 1 < words.size()) {
*next_left = words[index + 1]->word->bounding_box().left();
}
}
}
@ -750,8 +769,9 @@ static void EvaluateWordSpan(const PointerVector<WERD_RES> &words, int first_ind
} else {
*rating += choice->rating();
*certainty = std::min(*certainty, choice->certainty());
if (!Dict::valid_word_permuter(choice->permuter(), false))
if (!Dict::valid_word_permuter(choice->permuter(), false)) {
*valid_permuter = false;
}
}
}
}
@ -787,10 +807,11 @@ static int SelectBestWords(double rating_ratio, double certainty_margin, bool de
break;
}
// Keep searching for the matching word break.
if ((b_right < n_right && b < best_words->size()) || n == new_words->size())
if ((b_right < n_right && b < best_words->size()) || n == new_words->size()) {
++b;
else
} else {
++n;
}
}
// Rating of the current run in each.
float b_rating = 0.0f, n_rating = 0.0f;
@ -838,8 +859,9 @@ static int SelectBestWords(double rating_ratio, double certainty_margin, bool de
}
// Transfer from out_words to best_words.
best_words->clear();
for (auto &out_word : out_words)
for (auto &out_word : out_words) {
best_words->push_back(out_word);
}
return num_new - num_best;
}
@ -862,8 +884,9 @@ int Tesseract::RetryWithLanguage(const WordData &word_data, WordRecognizer recog
*in_word = nullptr;
}
if (debug) {
for (int i = 0; i < new_words.size(); ++i)
for (int i = 0; i < new_words.size(); ++i) {
new_words[i]->DebugTopChoice("Lang result");
}
}
// Initial version is a bit of a hack based on better certainty and rating
// or a dictionary vs non-dictionary word.
@ -874,8 +897,9 @@ int Tesseract::RetryWithLanguage(const WordData &word_data, WordRecognizer recog
// Helper returns true if all the words are acceptable.
static bool WordsAcceptable(const PointerVector<WERD_RES> &words) {
for (int w = 0; w < words.size(); ++w) {
if (words[w]->tess_failed || !words[w]->tess_accepted)
if (words[w]->tess_failed || !words[w]->tess_accepted) {
return false;
}
}
return true;
}
@ -889,8 +913,9 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
*make_next_word_fuzzy = false;
WERD *real_word = pr_it->word()->word;
if (real_word->rej_cblob_list()->empty() || real_word->cblob_list()->empty() ||
real_word->rej_cblob_list()->length() > noise_maxperword)
real_word->rej_cblob_list()->length() > noise_maxperword) {
return false;
}
real_word->rej_cblob_list()->sort(&C_BLOB::SortByXMiddle);
// Get the noise outlines into a vector with matching bool map.
std::vector<C_OUTLINE *> outlines;
@ -911,8 +936,9 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
for (unsigned i = 0; i < overlapped_any_blob.size(); ++i) {
if (overlapped_any_blob[i]) {
++num_overlapped;
if (word_wanted[i])
if (word_wanted[i]) {
++num_overlapped_used;
}
wanted.push_back(word_wanted[i]);
wanted_blobs.push_back(target_blobs[i]);
wanted_outlines.push_back(outlines[i]);
@ -924,10 +950,12 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
int non_overlapped = 0;
int non_overlapped_used = 0;
for (unsigned i = 0; i < word_wanted.size(); ++i) {
if (word_wanted[i])
if (word_wanted[i]) {
++non_overlapped_used;
if (outlines[i] != nullptr)
}
if (outlines[i] != nullptr) {
++non_overlapped_used;
}
}
if (debug_noise_removal) {
tprintf("Used %d/%d overlapped %d/%d non-overlaped diacritics on word:", num_overlapped_used,
@ -1008,8 +1036,9 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
target_blobs->resize(outlines.size(), nullptr);
// Check for outlines that need to be turned into stand-alone blobs.
for (unsigned i = 0; i < outlines.size(); ++i) {
if (outlines[i] == nullptr)
if (outlines[i] == nullptr) {
continue;
}
// Get a set of adjacent outlines that don't overlap any existing blob.
blob_wanted.resize(outlines.size(), false);
int num_blob_outlines = 0;
@ -1028,8 +1057,9 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
}
// Choose which combination of them we actually want and where to put
// them.
if (debug_noise_removal)
if (debug_noise_removal) {
tprintf("Num blobless outlines = %d\n", num_blob_outlines);
}
C_BLOB *left_blob = blob_it.data();
TBOX left_box = left_blob->bounding_box();
C_BLOB *right_blob = blob_it.at_last() ? nullptr : blob_it.data_relative(1);
@ -1037,8 +1067,9 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
!right_blob->bounding_box().x_overlap(total_ol_box)) &&
SelectGoodDiacriticOutlines(pass, noise_cert_disjoint, pr_it, left_blob, outlines,
num_blob_outlines, &blob_wanted)) {
if (debug_noise_removal)
if (debug_noise_removal) {
tprintf("Added to left blob\n");
}
for (unsigned j = 0; j < blob_wanted.size(); ++j) {
if (blob_wanted[j]) {
(*word_wanted)[j] = true;
@ -1050,8 +1081,9 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
right_blob->bounding_box().x_overlap(total_ol_box)) &&
SelectGoodDiacriticOutlines(pass, noise_cert_disjoint, pr_it, right_blob, outlines,
num_blob_outlines, &blob_wanted)) {
if (debug_noise_removal)
if (debug_noise_removal) {
tprintf("Added to right blob\n");
}
for (unsigned j = 0; j < blob_wanted.size(); ++j) {
if (blob_wanted[j]) {
(*word_wanted)[j] = true;
@ -1060,8 +1092,9 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
}
} else if (SelectGoodDiacriticOutlines(pass, noise_cert_punc, pr_it, nullptr, outlines,
num_blob_outlines, &blob_wanted)) {
if (debug_noise_removal)
if (debug_noise_removal) {
tprintf("Fitted between blobs\n");
}
for (unsigned j = 0; j < blob_wanted.size(); ++j) {
if (blob_wanted[j]) {
(*word_wanted)[j] = true;
@ -1099,8 +1132,9 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
if (debug_noise_removal) {
TBOX ol_box;
for (unsigned i = 0; i < test_outlines.size(); ++i) {
if (test_outlines[i])
if (test_outlines[i]) {
ol_box += outlines[i]->bounding_box();
}
}
tprintf("All Noise blob classified as %s=%g, delta=%g at:", all_str.c_str(), best_cert,
best_cert - target_cert);
@ -1121,8 +1155,9 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
if (debug_noise_removal) {
TBOX ol_box;
for (unsigned j = 0; j < outlines.size(); ++j) {
if (test_outlines[j])
if (test_outlines[j]) {
ol_box += outlines[j]->bounding_box();
}
tprintf("%c", test_outlines[j] ? 'T' : 'F');
}
tprintf(" blob classified as %s=%g, delta=%g) at:", str.c_str(), cert,
@ -1188,8 +1223,9 @@ float Tesseract::ClassifyBlobPlusOutlines(const std::vector<bool> &ok_outlines,
ol_it.move_to_first();
if (first_to_keep == nullptr) {
// We created blob. Empty its outlines and delete it.
for (; !ol_it.empty(); ol_it.forward())
for (; !ol_it.empty(); ol_it.forward()) {
ol_it.extract();
}
delete local_blob;
cert = -c2;
} else {
@ -1212,8 +1248,9 @@ float Tesseract::ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob
WERD_RES *word_res = pr_it->InsertSimpleCloneWord(*pr_it->word(), word);
// Get a new iterator that points to the new word.
PAGE_RES_IT it(pr_it->page_res);
while (it.word() != word_res && it.word() != nullptr)
while (it.word() != word_res && it.word() != nullptr) {
it.forward();
}
ASSERT_HOST(it.word() == word_res);
WordData wd(it);
// Force full initialization.
@ -1274,8 +1311,9 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD
}
if (word->done) {
// If done on pass1, leave it as-is.
if (!word->tess_failed)
if (!word->tess_failed) {
most_recently_used_ = word->tesseract;
}
return;
}
auto sub = sub_langs_.size();
@ -1343,8 +1381,9 @@ void Tesseract::classify_word_pass1(const WordData &word_data, WERD_RES **in_wor
#endif // def DISABLED_LEGACY_ENGINE
if (!(*in_word)->odd_size || tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
LSTMRecognizeWord(*block, row, *in_word, out_words);
if (!out_words->empty())
if (!out_words->empty()) {
return; // Successful lstm recognition.
}
}
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
// No fallback allowed, so use a fake.
@ -1377,8 +1416,9 @@ void Tesseract::classify_word_pass1(const WordData &word_data, WERD_RES **in_wor
}
}
if (tessedit_enable_doc_dict && !word->IsAmbiguous())
if (tessedit_enable_doc_dict && !word->IsAmbiguous()) {
tess_add_doc_word(word->best_choice);
}
}
#endif // ndef DISABLED_LEGACY_ENGINE
}
@ -1405,14 +1445,16 @@ void Tesseract::ReportXhtFixResult(bool accept_new_word, float new_x_ht, WERD_RE
// See the comment in fixxht.cpp for a description of the overall process.
bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK *block, ROW *row) {
int original_misfits = CountMisfitTops(word);
if (original_misfits == 0)
if (original_misfits == 0) {
return false;
}
float baseline_shift = 0.0f;
float new_x_ht = ComputeCompatibleXheight(word, &baseline_shift);
if (baseline_shift != 0.0f) {
// Try the shift on its own first.
if (!TestNewNormalization(original_misfits, baseline_shift, word->x_height, word, block, row))
if (!TestNewNormalization(original_misfits, baseline_shift, word->x_height, word, block, row)) {
return false;
}
original_misfits = CountMisfitTops(word);
if (original_misfits > 0) {
float new_baseline_shift;
@ -1497,8 +1539,9 @@ void Tesseract::classify_word_pass2(const WordData &word_data, WERD_RES **in_wor
check_debug_pt(word, 30);
if (!word->done) {
word->caps_height = 0.0;
if (word->x_height == 0.0f)
if (word->x_height == 0.0f) {
word->x_height = row->x_height();
}
match_word_pass_n(2, word, row, block);
check_debug_pt(word, 40);
}
@ -1514,8 +1557,9 @@ void Tesseract::classify_word_pass2(const WordData &word_data, WERD_RES **in_wor
}
# ifndef GRAPHICS_DISABLED
if (tessedit_display_outwords) {
if (fx_win == nullptr)
if (fx_win == nullptr) {
create_fx_win();
}
clear_fx_win();
word->rebuild_word->plot(fx_win);
TBOX wbox = word->rebuild_word->bounding_box();
@ -1534,15 +1578,17 @@ void Tesseract::classify_word_pass2(const WordData &word_data, WERD_RES **in_wor
* Baseline normalize the word and pass it to Tess.
*/
void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK *block) {
if (word->tess_failed)
if (word->tess_failed) {
return;
}
tess_segment_pass_n(pass_n, word);
if (!word->tess_failed) {
if (!word->word->flag(W_REP_CHAR)) {
word->fix_quotes();
if (tessedit_fix_hyphens)
if (tessedit_fix_hyphens) {
word->fix_hyphens();
}
/* Don't trust fix_quotes! - though I think I've fixed the bug */
if (word->best_choice->length() != word->box_word->length()) {
tprintf(
@ -1571,8 +1617,9 @@ static BLOB_CHOICE *FindBestMatchingChoice(UNICHAR_ID char_id, WERD_RES *word_re
for (int i = 0; i < word_res->best_choice->length(); ++i) {
BLOB_CHOICE *choice = FindMatchingChoice(char_id, word_res->GetBlobChoices(i));
if (choice != nullptr) {
if (best_choice == nullptr || choice->rating() < best_choice->rating())
if (best_choice == nullptr || choice->rating() < best_choice->rating()) {
best_choice = choice;
}
}
}
return best_choice;
@ -1593,8 +1640,9 @@ static void CorrectRepcharChoices(BLOB_CHOICE *blob_choice, WERD_RES *word_res)
}
// Correct any incorrect results in word.
for (int i = 0; i < word->length(); ++i) {
if (word->unichar_id(i) != blob_choice->unichar_id())
if (word->unichar_id(i) != blob_choice->unichar_id()) {
word->set_unichar_id(blob_choice->unichar_id(), i);
}
}
}
@ -1653,13 +1701,15 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(const UNICHARSET &char_se
int hyphen_pos = -1;
ACCEPTABLE_WERD_TYPE word_type = AC_UNACCEPTABLE;
if (strlen(lengths) > 20)
if (strlen(lengths) > 20) {
return word_type;
}
/* Single Leading punctuation char*/
if (s[offset] != '\0' && chs_leading_punct.contains(s[offset]))
if (s[offset] != '\0' && chs_leading_punct.contains(s[offset])) {
offset += lengths[i++];
}
leading_punct_count = i;
/* Initial cap */
@ -1674,8 +1724,9 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(const UNICHARSET &char_se
while (s[offset] != '\0' && char_set.get_islower(s + offset, lengths[i])) {
offset += lengths[i++];
}
if (i - leading_punct_count < quality_min_initial_alphas_reqd)
if (i - leading_punct_count < quality_min_initial_alphas_reqd) {
goto not_a_word;
}
/*
Allow a single hyphen in a lower case word
- don't trust upper case - I've seen several cases of "H" -> "I-I"
@ -1687,8 +1738,9 @@ Allow a single hyphen in a lower case word
while ((s[offset] != '\0') && char_set.get_islower(s + offset, lengths[i])) {
offset += lengths[i++];
}
if (i < hyphen_pos + 3)
if (i < hyphen_pos + 3) {
goto not_a_word;
}
}
} else {
/* Allow "'s" in NON hyphenated lower case words */
@ -1698,21 +1750,25 @@ Allow a single hyphen in a lower case word
offset += lengths[i++];
}
}
if (upper_count > 0)
if (upper_count > 0) {
word_type = AC_INITIAL_CAP;
else
} else {
word_type = AC_LOWER_CASE;
}
}
/* Up to two different, constrained trailing punctuation chars */
if (lengths[i] == 1 && s[offset] != '\0' && chs_trailing_punct1.contains(s[offset]))
if (lengths[i] == 1 && s[offset] != '\0' && chs_trailing_punct1.contains(s[offset])) {
offset += lengths[i++];
}
if (lengths[i] == 1 && s[offset] != '\0' && i > 0 && s[offset - lengths[i - 1]] != s[offset] &&
chs_trailing_punct2.contains(s[offset]))
chs_trailing_punct2.contains(s[offset])) {
offset += lengths[i++];
}
if (s[offset] != '\0')
if (s[offset] != '\0') {
word_type = AC_UNACCEPTABLE;
}
not_a_word:
@ -1735,8 +1791,9 @@ not_a_word:
offset += lengths[i++];
}
}
if (s[offset] != '\0')
if (s[offset] != '\0') {
word_type = AC_UNACCEPTABLE;
}
}
return word_type;
@ -1746,15 +1803,17 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
bool show_map_detail = false;
int16_t i;
if (!test_pt)
if (!test_pt) {
return false;
}
tessedit_rejection_debug.set_value(false);
debug_x_ht_level.set_value(0);
if (word->word->bounding_box().contains(FCOORD(test_pt_x, test_pt_y))) {
if (location < 0)
if (location < 0) {
return true; // For breakpoint use
}
tessedit_rejection_debug.set_value(true);
debug_x_ht_level.set_value(2);
tprintf("\n\nTESTWD::");
@ -1857,14 +1916,16 @@ static void find_modal_font( // good chars in word
void Tesseract::set_word_fonts(WERD_RES *word) {
// Don't try to set the word fonts for an lstm word, as the configs
// will be meaningless.
if (word->chopped_word == nullptr)
if (word->chopped_word == nullptr) {
return;
}
ASSERT_HOST(word->best_choice != nullptr);
#ifndef DISABLED_LEGACY_ENGINE
const int fontinfo_size = get_fontinfo_table().size();
if (fontinfo_size == 0)
if (fontinfo_size == 0) {
return;
}
std::vector<int> font_total_score(fontinfo_size);
// Compute the font scores for the word
@ -1873,8 +1934,9 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
}
for (int b = 0; b < word->best_choice->length(); ++b) {
const BLOB_CHOICE *choice = word->GetBlobChoice(b);
if (choice == nullptr)
if (choice == nullptr) {
continue;
}
auto &fonts = choice->fonts();
for (auto &f : fonts) {
const int fontinfo_id = f.fontinfo_id;
@ -1945,8 +2007,9 @@ void Tesseract::font_recognition_pass(PAGE_RES *page_res) {
int16_t doc_font; // modal font
int8_t doc_font_count; // modal font
find_modal_font(&doc_fonts, &doc_font, &doc_font_count);
if (doc_font_count == 0)
if (doc_font_count == 0) {
return;
}
// Get the modal font pointer.
const FontInfo *modal_font = nullptr;
for (page_res_it.restart_page(); page_res_it.word() != nullptr; page_res_it.forward()) {
@ -1983,12 +2046,14 @@ void Tesseract::font_recognition_pass(PAGE_RES *page_res) {
void Tesseract::dictionary_correction_pass(PAGE_RES *page_res) {
PAGE_RES_IT word_it(page_res);
for (WERD_RES *word = word_it.word(); word != nullptr; word = word_it.forward()) {
if (word->best_choices.singleton())
if (word->best_choices.singleton()) {
continue; // There are no alternates.
}
const WERD_CHOICE *best = word->best_choice;
if (word->tesseract->getDict().valid_word(*best) != 0)
if (word->tesseract->getDict().valid_word(*best) != 0) {
continue; // The best choice is in the dictionary.
}
WERD_CHOICE_IT choice_it(&word->best_choices);
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) {

View File

@ -107,18 +107,20 @@ void Tesseract::unrej_good_chs(WERD_RES *word) {
int16_t Tesseract::count_outline_errs(char c, int16_t outline_count) {
int expected_outline_count;
if (outlines_odd.contains(c))
if (outlines_odd.contains(c)) {
return 0; // Don't use this char
else if (outlines_2.contains(c))
} else if (outlines_2.contains(c)) {
expected_outline_count = 2;
else
} else {
expected_outline_count = 1;
}
return abs(outline_count - expected_outline_count);
}
void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it, bool good_quality_doc) {
if ((tessedit_good_quality_unrej && good_quality_doc))
if ((tessedit_good_quality_unrej && good_quality_doc)) {
unrej_good_quality_words(page_res_it);
}
doc_and_block_rejection(page_res_it, good_quality_doc);
if (unlv_tilde_crunching) {
tilde_crunch(page_res_it);
@ -150,8 +152,9 @@ void Tesseract::unrej_good_quality_words( // unreject potential
if (bland_unrej) {
word = page_res_it.word();
for (i = 0; i < word->reject_map.length(); i++) {
if (word->reject_map[i].accept_if_good_quality())
if (word->reject_map[i].accept_if_good_quality()) {
word->reject_map[i].setrej_quality_accept();
}
}
page_res_it.forward();
} else if ((page_res_it.row()->char_count > 0) &&
@ -169,8 +172,9 @@ void Tesseract::unrej_good_quality_words( // unreject potential
} else {
// Skip to end of dodgy row.
current_row = page_res_it.row();
while ((page_res_it.word() != nullptr) && (page_res_it.row() == current_row))
while ((page_res_it.word() != nullptr) && (page_res_it.row() == current_row)) {
page_res_it.forward();
}
}
check_debug_pt(page_res_it.word(), 110);
}
@ -265,8 +269,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
generated more space errors.
*/
if (tessedit_use_reject_spaces && prev_word_rejected &&
page_res_it.prev_row() == page_res_it.row() && word->word->space() == 1)
page_res_it.prev_row() == page_res_it.row() && word->word->space() == 1) {
word->reject_spaces = true;
}
word->reject_map.rej_word_block_rej();
}
prev_word_rejected = rej_word;
@ -326,8 +331,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
this generated more space errors.
*/
if (tessedit_use_reject_spaces && prev_word_rejected &&
page_res_it.prev_row() == page_res_it.row() && word->word->space() == 1)
page_res_it.prev_row() == page_res_it.row() && word->word->space() == 1) {
word->reject_spaces = true;
}
word->reject_map.rej_word_row_rej();
}
prev_word_rejected = rej_word;
@ -338,8 +344,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
tprintf("NOT REJECTING ROW %d #chars: %d # Rejects: %d; \n", row_no,
current_row->char_count, current_row->rej_count);
}
while (page_res_it.word() != nullptr && page_res_it.row() == current_row)
while (page_res_it.word() != nullptr && page_res_it.row() == current_row) {
page_res_it.forward();
}
}
}
}
@ -380,11 +387,13 @@ void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) {
}
word = page_res_it.word();
if (crunch_early_convert_bad_unlv_chs)
if (crunch_early_convert_bad_unlv_chs) {
convert_bad_unlv_chs(word);
}
if (crunch_early_merge_tess_fails)
if (crunch_early_merge_tess_fails) {
word->merge_tess_fails();
}
if (word->reject_map.accept_count() != 0) {
found_terrible_word = false;
@ -445,22 +454,25 @@ bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level
if ((word->best_choice->unichar_string().length() == 0) ||
(strspn(word->best_choice->unichar_string().c_str(), " ") ==
word->best_choice->unichar_string().size()))
word->best_choice->unichar_string().size())) {
crunch_mode = 1;
else {
} else {
adjusted_len = word->reject_map.length();
if (adjusted_len > crunch_rating_max)
if (adjusted_len > crunch_rating_max) {
adjusted_len = crunch_rating_max;
}
rating_per_ch = word->best_choice->rating() / adjusted_len;
if (rating_per_ch > crunch_terrible_rating)
if (rating_per_ch > crunch_terrible_rating) {
crunch_mode = 2;
else if (crunch_terrible_garbage && (garbage_level == G_TERRIBLE))
} else if (crunch_terrible_garbage && (garbage_level == G_TERRIBLE)) {
crunch_mode = 3;
else if ((word->best_choice->certainty() < crunch_poor_garbage_cert) && (garbage_level != G_OK))
} else if ((word->best_choice->certainty() < crunch_poor_garbage_cert) &&
(garbage_level != G_OK)) {
crunch_mode = 4;
else if ((rating_per_ch > crunch_poor_garbage_rate) && (garbage_level != G_OK))
} else if ((rating_per_ch > crunch_poor_garbage_rate) && (garbage_level != G_OK)) {
crunch_mode = 5;
}
}
if (crunch_mode > 0) {
if (crunch_debug > 2) {
@ -468,8 +480,9 @@ bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level
word->best_choice->unichar_string().c_str());
}
return true;
} else
} else {
return false;
}
}
bool Tesseract::potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level,
@ -486,8 +499,9 @@ bool Tesseract::potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_leve
(acceptable_word_string(*word->uch_set, str, lengths) == AC_UNACCEPTABLE && !ok_dict_word);
adjusted_len = word->reject_map.length();
if (adjusted_len > 10)
if (adjusted_len > 10) {
adjusted_len = 10;
}
rating_per_ch = word->best_choice->rating() / adjusted_len;
if (rating_per_ch > crunch_pot_poor_rate) {
@ -570,8 +584,9 @@ void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {
The following step has been left till now as the tess fails are used to
determine if the word is deletable.
*/
if (!crunch_early_merge_tess_fails)
if (!crunch_early_merge_tess_fails) {
word->merge_tess_fails();
}
page_res_it.forward();
}
}
@ -585,13 +600,15 @@ void Tesseract::convert_bad_unlv_chs(WERD_RES *word_res) {
for (i = 0; i < word_res->reject_map.length(); ++i) {
if (word_res->best_choice->unichar_id(i) == unichar_tilde) {
word_res->best_choice->set_unichar_id(unichar_dash, i);
if (word_res->reject_map[i].accepted())
if (word_res->reject_map[i].accepted()) {
word_res->reject_map[i].setrej_unlv_rej();
}
}
if (word_res->best_choice->unichar_id(i) == unichar_pow) {
word_res->best_choice->set_unichar_id(unichar_space, i);
if (word_res->reject_map[i].accepted())
if (word_res->reject_map[i].accepted()) {
word_res->reject_map[i].setrej_unlv_rej();
}
}
}
}
@ -635,8 +652,9 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
case FIRST_UPPER:
state = SUBSEQUENT_UPPER;
upper_string_count++;
if (longest_upper_run_len < upper_string_count)
if (longest_upper_run_len < upper_string_count) {
longest_upper_run_len = upper_string_count;
}
if (last_char == word->uch_set->unichar_to_id(str, *lengths)) {
alpha_repetition_count++;
if (longest_alpha_repetition_count < alpha_repetition_count) {
@ -664,8 +682,9 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
case FIRST_LOWER:
state = SUBSEQUENT_LOWER;
lower_string_count++;
if (longest_lower_run_len < lower_string_count)
if (longest_lower_run_len < lower_string_count) {
longest_lower_run_len = lower_string_count;
}
if (last_char == word->uch_set->unichar_to_id(str, *lengths)) {
alpha_repetition_count++;
if (longest_alpha_repetition_count < alpha_repetition_count) {
@ -702,10 +721,11 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
break;
}
} else {
if (*lengths == 1 && *str == ' ')
if (*lengths == 1 && *str == ' ') {
tess_rejs++;
else
} else {
bad_char_count++;
}
switch (state) {
case FIRST_NUM:
isolated_digits++;
@ -740,16 +760,18 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
if ((crunch_accept_ok &&
acceptable_word_string(*word->uch_set, str, lengths) != AC_UNACCEPTABLE) ||
longest_lower_run_len > crunch_leave_lc_strings ||
longest_upper_run_len > crunch_leave_uc_strings)
longest_upper_run_len > crunch_leave_uc_strings) {
return G_NEVER_CRUNCH;
}
}
if (word->reject_map.length() > 1 && strpbrk(str, " ") == nullptr &&
(word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
word->best_choice->permuter() == FREQ_DAWG_PERM ||
word->best_choice->permuter() == USER_DAWG_PERM ||
word->best_choice->permuter() == NUMBER_PERM ||
acceptable_word_string(*word->uch_set, str, lengths) != AC_UNACCEPTABLE || ok_dict_word))
acceptable_word_string(*word->uch_set, str, lengths) != AC_UNACCEPTABLE || ok_dict_word)) {
return G_OK;
}
ok_chars = len - bad_char_count - isolated_digits - isolated_alphas - tess_rejs;
@ -759,24 +781,28 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, bool ok_dict_word) {
isolated_digits, isolated_alphas, tess_rejs);
}
if (bad_char_count == 0 && tess_rejs == 0 &&
(len > isolated_digits + isolated_alphas || len <= 2))
(len > isolated_digits + isolated_alphas || len <= 2)) {
return G_OK;
}
if (tess_rejs > ok_chars || (tess_rejs > 0 && (bad_char_count + tess_rejs) * 2 > len))
if (tess_rejs > ok_chars || (tess_rejs > 0 && (bad_char_count + tess_rejs) * 2 > len)) {
return G_TERRIBLE;
}
if (len > 4) {
dodgy_chars = 2 * tess_rejs + bad_char_count + isolated_digits + isolated_alphas;
if (dodgy_chars > 5 || (dodgy_chars / static_cast<float>(len)) > 0.5)
if (dodgy_chars > 5 || (dodgy_chars / static_cast<float>(len)) > 0.5) {
return G_DODGY;
else
} else {
return G_OK;
}
} else {
dodgy_chars = 2 * tess_rejs + bad_char_count;
if ((len == 4 && dodgy_chars > 2) || (len == 3 && dodgy_chars > 2) || dodgy_chars >= len)
if ((len == 4 && dodgy_chars > 2) || (len == 3 && dodgy_chars > 2) || dodgy_chars >= len) {
return G_DODGY;
else
} else {
return G_OK;
}
}
}
@ -871,8 +897,9 @@ int16_t Tesseract::failure_count(WERD_RES *word) {
int tess_rejs = 0;
for (; *str != '\0'; str++) {
if (*str == ' ')
if (*str == ' ') {
tess_rejs++;
}
}
return tess_rejs;
}
@ -889,12 +916,14 @@ bool Tesseract::noise_outlines(TWERD *word) {
for (TESSLINE *ol = blob->outlines; ol != nullptr; ol = ol->next) {
outline_count++;
box = ol->bounding_box();
if (box.height() > box.width())
if (box.height() > box.width()) {
max_dimension = box.height();
else
} else {
max_dimension = box.width();
if (max_dimension < small_limit)
}
if (max_dimension < small_limit) {
small_outline_count++;
}
}
}
return small_outline_count >= outline_count;

View File

@ -105,16 +105,18 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R
monitor->progress = 90 + 5 * word_index / word_count;
if (monitor->deadline_exceeded() ||
(monitor->cancel != nullptr &&
(*monitor->cancel)(monitor->cancel_this, stats_.dict_words)))
(*monitor->cancel)(monitor->cancel_this, stats_.dict_words))) {
return;
}
}
}
if (!word_res_it_from.at_last()) {
word_res_it_to = word_res_it_from;
prevent_null_wd_fixsp = word_res->word->cblob_list()->empty();
if (check_debug_pt(word_res, 60))
if (check_debug_pt(word_res, 60)) {
debug_fix_space_level.set_value(10);
}
word_res_it_to.forward();
word_index++;
if (monitor != nullptr) {
@ -122,22 +124,27 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R
monitor->progress = 90 + 5 * word_index / word_count;
if (monitor->deadline_exceeded() ||
(monitor->cancel != nullptr &&
(*monitor->cancel)(monitor->cancel_this, stats_.dict_words)))
(*monitor->cancel)(monitor->cancel_this, stats_.dict_words))) {
return;
}
}
while (!word_res_it_to.at_last() &&
(word_res_it_to.data_relative(1)->word->flag(W_FUZZY_NON) ||
word_res_it_to.data_relative(1)->word->flag(W_FUZZY_SP))) {
if (check_debug_pt(word_res, 60))
if (check_debug_pt(word_res, 60)) {
debug_fix_space_level.set_value(10);
if (word_res->word->cblob_list()->empty())
}
if (word_res->word->cblob_list()->empty()) {
prevent_null_wd_fixsp = true;
}
word_res = word_res_it_to.forward();
}
if (check_debug_pt(word_res, 60))
if (check_debug_pt(word_res, 60)) {
debug_fix_space_level.set_value(10);
if (word_res->word->cblob_list()->empty())
}
if (word_res->word->cblob_list()->empty()) {
prevent_null_wd_fixsp = true;
}
if (prevent_null_wd_fixsp) {
word_res_it_from = word_res_it_to;
} else {
@ -150,8 +157,9 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R
word_res_it_from.forward();
}
}
if (test_pt)
if (test_pt) {
debug_fix_space_level.set_value(0);
}
}
fix_sp_fp_word(word_res_it_from, row_res_it.data()->row, block_res_it.data()->block);
// Last word in row
@ -169,8 +177,9 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *
best_score = eval_word_spacing(best_perm); // default score
dump_words(best_perm, best_score, 1, improved);
if (best_score != PERFECT_WERDS)
if (best_score != PERFECT_WERDS) {
initialise_search(best_perm, current_perm);
}
while ((best_score != PERFECT_WERDS) && !current_perm.empty()) {
match_current_words(current_perm, row, block);
@ -182,8 +191,9 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *
best_score = current_score;
improved = true;
}
if (current_score < PERFECT_WERDS)
if (current_score < PERFECT_WERDS) {
transform_to_next_perm(current_perm);
}
}
dump_words(best_perm, best_score, 3, improved);
}
@ -268,8 +278,9 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
word_count++;
if (word->tess_failed) {
total_score += prev_word_score;
if (prev_word_done)
if (prev_word_done) {
done_word_count++;
}
prev_word_score = 0;
prev_char_1 = false;
prev_char_digit = false;
@ -289,8 +300,9 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
(!word_done &&
conflict_set_I_l_1.contains(word->best_choice->unichar_string()[0])))))) {
total_score += prev_word_score;
if (prev_word_done)
if (prev_word_done) {
done_word_count++;
}
current_word_ok_so_far = word_done;
}
@ -306,8 +318,9 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
rejtn */
for (i = 0, prev_char_1 = false; i < word_len; i++) {
bool current_char_1 = word->best_choice->unichar_string()[i] == '1';
if (prev_char_1 || (current_char_1 && (i > 0)))
if (prev_char_1 || (current_char_1 && (i > 0))) {
total_score++;
}
prev_char_1 = current_char_1;
}
@ -318,14 +331,17 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
offset += word->best_choice->unichar_lengths()[i++]) {
bool current_char_punct =
strchr(punct_chars, word->best_choice->unichar_string()[offset]) != nullptr;
if (prev_char_punct || (current_char_punct && i > 0))
if (prev_char_punct || (current_char_punct && i > 0)) {
total_score++;
}
prev_char_punct = current_char_punct;
}
}
prev_char_digit = digit_or_numeric_punct(word, word_len - 1);
for (i = 0, offset = 0; i < word_len - 1; offset += word->best_choice->unichar_lengths()[i++])
for (i = 0, offset = 0; i < word_len - 1;
offset += word->best_choice->unichar_lengths()[i++]) {
;
}
prev_char_1 =
((word_done && (word->best_choice->unichar_string()[offset] == '1')) ||
(!word_done &&
@ -337,20 +353,23 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
} while (word_res_it.data()->part_of_combo);
} while (!word_res_it.at_first());
total_score += prev_word_score;
if (prev_word_done)
if (prev_word_done) {
done_word_count++;
if (done_word_count == word_count)
}
if (done_word_count == word_count) {
return PERFECT_WERDS;
else
} else {
return total_score;
}
}
bool Tesseract::digit_or_numeric_punct(WERD_RES *word, int char_position) {
int i;
int offset;
for (i = 0, offset = 0; i < char_position; offset += word->best_choice->unichar_lengths()[i++])
for (i = 0, offset = 0; i < char_position; offset += word->best_choice->unichar_lengths()[i++]) {
;
}
return (
word->uch_set->get_isdigit(word->best_choice->unichar_string().c_str() + offset,
word->best_choice->unichar_lengths()[i]) ||
@ -387,8 +406,9 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
box = word->word->bounding_box();
if (prev_right > -INT16_MAX) {
gap = box.left() - prev_right;
if (gap < min_gap)
if (gap < min_gap) {
min_gap = gap;
}
}
prev_right = box.right();
}
@ -492,8 +512,9 @@ void Tesseract::dump_words(WERD_RES_LIST &perm, int16_t score, int16_t mode, boo
}
bool Tesseract::fixspace_thinks_word_done(WERD_RES *word) {
if (word->done)
if (word->done) {
return true;
}
/*
Use all the standard pass 2 conditions for mode 5 in set_done() in
@ -531,12 +552,14 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
word_res = word_res_it.data();
if (word_res->word->flag(W_REP_CHAR) || word_res->combination || word_res->part_of_combo ||
!word_res->word->flag(W_DONT_CHOP))
!word_res->word->flag(W_DONT_CHOP)) {
return;
}
blob_index = worst_noise_blob(word_res, &junk);
if (blob_index < 0)
if (blob_index < 0) {
return;
}
if (debug_fix_space_level > 1) {
tprintf("FP fixspace working on \"%s\"\n", word_res->best_choice->unichar_string().c_str());
@ -669,35 +692,41 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, float *worst_noise_score
float small_limit = kBlnXHeight * fixsp_small_outlines_size;
float non_noise_limit = kBlnXHeight * 0.8;
if (word_res->rebuild_word == nullptr)
if (word_res->rebuild_word == nullptr) {
return -1; // Can't handle cube words.
}
// Normalised.
int blob_count = word_res->box_word->length();
ASSERT_HOST(blob_count <= 512);
if (blob_count < 5)
if (blob_count < 5) {
return -1; // too short to split
}
/* Get the noise scores for all blobs */
#ifndef SECURE_NAMES
if (debug_fix_space_level > 5)
if (debug_fix_space_level > 5) {
tprintf("FP fixspace Noise metrics for \"%s\": ",
word_res->best_choice->unichar_string().c_str());
}
#endif
for (i = 0; i < blob_count && i < word_res->rebuild_word->NumBlobs(); i++) {
TBLOB *blob = word_res->rebuild_word->blobs[i];
if (word_res->reject_map[i].accepted())
if (word_res->reject_map[i].accepted()) {
noise_score[i] = non_noise_limit;
else
} else {
noise_score[i] = blob_noise_score(blob);
}
if (debug_fix_space_level > 5)
if (debug_fix_space_level > 5) {
tprintf("%1.1f ", noise_score[i]);
}
}
if (debug_fix_space_level > 5)
if (debug_fix_space_level > 5) {
tprintf("\n");
}
/* Now find the worst one which is far enough away from the end of the word */
@ -707,8 +736,9 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, float *worst_noise_score
non_noise_count++;
}
}
if (non_noise_count < fixsp_non_noise_limit)
if (non_noise_count < fixsp_non_noise_limit) {
return -1;
}
min_noise_blob = i;
@ -718,13 +748,15 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, float *worst_noise_score
non_noise_count++;
}
}
if (non_noise_count < fixsp_non_noise_limit)
if (non_noise_count < fixsp_non_noise_limit) {
return -1;
}
max_noise_blob = i;
if (min_noise_blob > max_noise_blob)
if (min_noise_blob > max_noise_blob) {
return -1;
}
*worst_noise_score = small_limit;
worst_noise_blob = -1;
@ -752,8 +784,9 @@ float Tesseract::blob_noise_score(TBLOB *blob) {
max_dimension = box.width();
}
if (largest_outline_dimension < max_dimension)
if (largest_outline_dimension < max_dimension) {
largest_outline_dimension = max_dimension;
}
}
if (outline_count > 5) {
@ -810,8 +843,9 @@ int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
word = word_it.data();
if (word->rebuild_word == nullptr)
if (word->rebuild_word == nullptr) {
continue; // Can't handle cube words.
}
if (word->done || word->tess_accepted || word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
word->best_choice->permuter() == FREQ_DAWG_PERM ||
word->best_choice->permuter() == USER_DAWG_PERM || safe_dict_word(word) > 0) {
@ -827,8 +861,9 @@ int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
}
}
}
if (score < 0)
if (score < 0) {
score = 0;
}
return score;
}

View File

@ -76,16 +76,19 @@ int Tesseract::CountMisfitTops(WERD_RES *word_res) {
UNICHAR_ID class_id = word_res->best_choice->unichar_id(blob_id);
if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) {
int top = blob->bounding_box().top();
if (top >= INT_FEAT_RANGE)
if (top >= INT_FEAT_RANGE) {
top = INT_FEAT_RANGE - 1;
}
int min_bottom, max_bottom, min_top, max_top;
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, &min_top, &max_top);
if (max_top - min_top > kMaxCharTopRange)
if (max_top - min_top > kMaxCharTopRange) {
continue;
}
bool bad =
top < min_top - x_ht_acceptance_tolerance || top > max_top + x_ht_acceptance_tolerance;
if (bad)
if (bad) {
++bad_blobs;
}
if (debug_x_ht_level >= 1) {
tprintf("Class %s is %s with top %d vs limits of %d->%d, +/-%d\n",
unicharset.id_to_unichar(class_id), bad ? "Misfit" : "OK", top, min_top, max_top,
@ -112,14 +115,16 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
if (unicharset.get_isalpha(class_id) || unicharset.get_isdigit(class_id)) {
int top = blob->bounding_box().top() + bottom_shift;
// Clip the top to the limit of normalized feature space.
if (top >= INT_FEAT_RANGE)
if (top >= INT_FEAT_RANGE) {
top = INT_FEAT_RANGE - 1;
}
int bottom = blob->bounding_box().bottom() + bottom_shift;
int min_bottom, max_bottom, min_top, max_top;
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom, &min_top, &max_top);
// Chars with a wild top range would mess up the result so ignore them.
if (max_top - min_top > kMaxCharTopRange)
if (max_top - min_top > kMaxCharTopRange) {
continue;
}
int misfit_dist = std::max((min_top - x_ht_acceptance_tolerance) - top,
top - (max_top + x_ht_acceptance_tolerance));
int height = top - kBlnBaselineOffset;
@ -142,8 +147,9 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
}
// The range of expected heights gets a vote equal to the distance
// of the actual top from the expected top.
for (int y = min_xht; y <= max_xht; ++y)
for (int y = min_xht; y <= max_xht; ++y) {
top_stats.add(y, misfit_dist);
}
} else if ((min_bottom > bottom + x_ht_acceptance_tolerance ||
bottom - x_ht_acceptance_tolerance > max_bottom) &&
bottom_shift == 0) {
@ -157,10 +163,12 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
// of the actual bottom from the expected bottom, spread over the
// range of its acceptance.
int misfit_weight = abs(min_shift);
if (max_shift > min_shift)
if (max_shift > min_shift) {
misfit_weight /= max_shift - min_shift;
for (int y = min_shift; y <= max_shift; ++y)
}
for (int y = min_shift; y <= max_shift; ++y) {
shift_stats.add(y, misfit_weight);
}
} else {
if (bottom_shift == 0) {
// Things with bottoms that are already ok need to say so, on the
@ -185,8 +193,9 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
if (debug_x_ht_level >= 2) {
tprintf("baseline shift=%g\n", *baseline_shift);
}
if (top_stats.get_total() == 0)
if (top_stats.get_total() == 0) {
return bottom_shift != 0 ? word_res->x_height : 0.0f;
}
// The new xheight is just the median vote, which is then scaled out
// of BLN space back to pixel space to get the x-height in pixel space.
float new_xht = top_stats.median();
@ -196,10 +205,11 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
new_xht / word_res->denorm.y_scale());
}
// The xheight must change by at least x_ht_min_change to be used.
if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change)
if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change) {
return new_xht / word_res->denorm.y_scale();
else
} else {
return bottom_shift != 0 ? word_res->x_height : 0.0f;
}
}
} // namespace tesseract

View File

@ -80,8 +80,9 @@ void Tesseract::TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector
unsigned end_box = 0;
// Don't let \t, which marks newlines in the box file, get into the line
// content, as that makes the line unusable in training.
while (end_box < texts.size() && texts[end_box] == "\t")
while (end_box < texts.size() && texts[end_box] == "\t") {
++end_box;
}
for (auto start_box = end_box; start_box < box_count; start_box = end_box) {
// Find the textline of boxes starting at start and their bounding box.
TBOX line_box = boxes[start_box];
@ -96,8 +97,9 @@ void Tesseract::TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector
BLOCK_IT b_it(block_list);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
BLOCK *block = b_it.data();
if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText())
if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
continue; // Not a text block.
}
TBOX block_box = block->pdblk.bounding_box();
block_box.rotate(block->re_rotation());
if (block_box.major_overlap(line_box)) {
@ -114,12 +116,14 @@ void Tesseract::TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector
} else {
imagedata = GetLineData(line_box, boxes, texts, start_box, end_box, *best_block);
}
if (imagedata != nullptr)
if (imagedata != nullptr) {
training_data->AddPageToDocument(imagedata);
}
// Don't let \t, which marks newlines in the box file, get into the line
// content, as that makes the line unusable in training.
while (end_box < texts.size() && texts[end_box] == "\t")
while (end_box < texts.size() && texts[end_box] == "\t") {
++end_box;
}
}
}
@ -131,8 +135,9 @@ ImageData *Tesseract::GetLineData(const TBOX &line_box, const std::vector<TBOX>
const BLOCK &block) {
TBOX revised_box;
ImageData *image_data = GetRectImage(line_box, block, kImagePadding, &revised_box);
if (image_data == nullptr)
if (image_data == nullptr) {
return nullptr;
}
image_data->set_page_number(applybox_page);
// Copy the boxes and shift them so they are relative to the image.
FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y());
@ -166,16 +171,18 @@ ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padd
// Number of clockwise 90 degree rotations needed to get back to tesseract
// coords from the clipped image.
int num_rotations = 0;
if (block.re_rotation().y() > 0.0f)
if (block.re_rotation().y() > 0.0f) {
num_rotations = 1;
else if (block.re_rotation().x() < 0.0f)
} else if (block.re_rotation().x() < 0.0f) {
num_rotations = 2;
else if (block.re_rotation().y() < 0.0f)
} else if (block.re_rotation().y() < 0.0f) {
num_rotations = 3;
}
// Handle two cases automatically: 1 the box came from the block, 2 the box
// came from a box file, and refers to the image, which the block may not.
if (block.pdblk.bounding_box().major_overlap(*revised_box))
if (block.pdblk.bounding_box().major_overlap(*revised_box)) {
revised_box->rotate(block.re_rotation());
}
// Now revised_box always refers to the image.
// BestPix is never colormapped, but may be of any depth.
Pix *pix = BestPix();
@ -184,14 +191,16 @@ ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padd
TBOX image_box(0, 0, width, height);
// Clip to image bounds;
*revised_box &= image_box;
if (revised_box->null_box())
if (revised_box->null_box()) {
return nullptr;
}
Box *clip_box = boxCreate(revised_box->left(), height - revised_box->top(), revised_box->width(),
revised_box->height());
Pix *box_pix = pixClipRectangle(pix, clip_box, nullptr);
boxDestroy(&clip_box);
if (box_pix == nullptr)
if (box_pix == nullptr) {
return nullptr;
}
if (num_rotations > 0) {
Pix *rot_pix = pixRotateOrth(box_pix, num_rotations);
pixDestroy(&box_pix);
@ -210,8 +219,9 @@ ImageData *Tesseract::GetRectImage(const TBOX &box, const BLOCK &block, int padd
// Rotated the clipped revised box back to internal coordinates.
FCOORD rotation(block.re_rotation().x(), -block.re_rotation().y());
revised_box->rotate(rotation);
if (num_rotations != 2)
if (num_rotations != 2) {
vertical_text = true;
}
}
return new ImageData(vertical_text, box_pix);
}
@ -228,14 +238,17 @@ void Tesseract::LSTMRecognizeWord(const BLOCK &block, ROW *row, WERD_RES *word,
word_box = TBOX(0, 0, ImageWidth(), ImageHeight());
} else {
float baseline = row->base_line((word_box.left() + word_box.right()) / 2);
if (baseline + row->descenders() < word_box.bottom())
if (baseline + row->descenders() < word_box.bottom()) {
word_box.set_bottom(baseline + row->descenders());
if (baseline + row->x_height() + row->ascenders() > word_box.top())
}
if (baseline + row->x_height() + row->ascenders() > word_box.top()) {
word_box.set_top(baseline + row->x_height() + row->ascenders());
}
}
ImageData *im_data = GetRectImage(word_box, block, kImagePadding, &word_box);
if (im_data == nullptr)
if (im_data == nullptr) {
return;
}
bool do_invert = tessedit_do_invert;
lstm_recognizer_->RecognizeLine(*im_data, do_invert, classify_debug_level > 0,
@ -254,8 +267,9 @@ void Tesseract::SearchWords(PointerVector<WERD_RES> *words) {
// If we drop a word as junk, then there is always a space in front of the
// next.
const Dict *stopper_dict = lstm_recognizer_->GetDict();
if (stopper_dict == nullptr)
if (stopper_dict == nullptr) {
stopper_dict = &getDict();
}
bool any_nonspace_delimited = false;
for (int w = 0; w < words->size(); ++w) {
WERD_RES *word = (*words)[w];

View File

@ -42,8 +42,9 @@ LTRResultIterator::~LTRResultIterator() = default;
// Returns the null terminated UTF-8 encoded text string for the current
// object at the given level. Use delete [] to free after use.
char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
if (it_->word() == nullptr)
if (it_->word() == nullptr) {
return nullptr; // Already at the end!
}
std::string text;
PAGE_RES_IT res_it(*it_);
WERD_CHOICE *best_choice = res_it.word()->best_choice;
@ -70,8 +71,9 @@ char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
eop = res_it.block() != res_it.prev_block() ||
res_it.row()->row->para() != res_it.prev_row()->row->para();
} while (level != RIL_TEXTLINE && !eop);
if (eop)
if (eop) {
text += paragraph_separator_;
}
} while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
}
int length = text.length() + 1;
@ -93,8 +95,9 @@ void LTRResultIterator::SetParagraphSeparator(const char *new_para) {
// Returns the mean confidence of the current object at the given level.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
float LTRResultIterator::Confidence(PageIteratorLevel level) const {
if (it_->word() == nullptr)
if (it_->word() == nullptr) {
return 0.0f; // Already at the end!
}
float mean_certainty = 0.0f;
int certainty_count = 0;
PAGE_RES_IT res_it(*it_);
@ -208,45 +211,53 @@ const char *LTRResultIterator::WordFontAttributes(bool *is_bold, bool *is_italic
// Returns the name of the language used to recognize this word.
const char *LTRResultIterator::WordRecognitionLanguage() const {
if (it_->word() == nullptr || it_->word()->tesseract == nullptr)
if (it_->word() == nullptr || it_->word()->tesseract == nullptr) {
return nullptr;
}
return it_->word()->tesseract->lang.c_str();
}
// Return the overall directionality of this word.
StrongScriptDirection LTRResultIterator::WordDirection() const {
if (it_->word() == nullptr)
if (it_->word() == nullptr) {
return DIR_NEUTRAL;
}
bool has_rtl = it_->word()->AnyRtlCharsInWord();
bool has_ltr = it_->word()->AnyLtrCharsInWord();
if (has_rtl && !has_ltr)
if (has_rtl && !has_ltr) {
return DIR_RIGHT_TO_LEFT;
if (has_ltr && !has_rtl)
}
if (has_ltr && !has_rtl) {
return DIR_LEFT_TO_RIGHT;
if (!has_ltr && !has_rtl)
}
if (!has_ltr && !has_rtl) {
return DIR_NEUTRAL;
}
return DIR_MIX;
}
// Returns true if the current word was found in a dictionary.
bool LTRResultIterator::WordIsFromDictionary() const {
if (it_->word() == nullptr)
if (it_->word() == nullptr) {
return false; // Already at the end!
}
int permuter = it_->word()->best_choice->permuter();
return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || permuter == USER_DAWG_PERM;
}
// Returns the number of blanks before the current word.
int LTRResultIterator::BlanksBeforeWord() const {
if (it_->word() == nullptr)
if (it_->word() == nullptr) {
return 1;
}
return it_->word()->word->space();
}
// Returns true if the current word is numeric.
bool LTRResultIterator::WordIsNumeric() const {
if (it_->word() == nullptr)
if (it_->word() == nullptr) {
return false; // Already at the end!
}
int permuter = it_->word()->best_choice->permuter();
return permuter == NUMBER_PERM;
}
@ -281,8 +292,9 @@ const char *LTRResultIterator::GetBlamerMisadaptionDebug() const {
// Returns true if a truth string was recorded for the current word.
bool LTRResultIterator::HasTruthString() const {
if (it_->word() == nullptr)
if (it_->word() == nullptr) {
return false; // Already at the end!
}
if (it_->word()->blamer_bundle == nullptr || it_->word()->blamer_bundle->NoTruth()) {
return false; // no truth information for this word
}
@ -292,8 +304,9 @@ bool LTRResultIterator::HasTruthString() const {
// Returns true if the given string is equivalent to the truth string for
// the current word.
bool LTRResultIterator::EquivalentToTruth(const char *str) const {
if (!HasTruthString())
if (!HasTruthString()) {
return false;
}
ASSERT_HOST(it_->word()->uch_set != nullptr);
WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
@ -302,8 +315,9 @@ bool LTRResultIterator::EquivalentToTruth(const char *str) const {
// Returns the null terminated UTF-8 encoded truth string for the current word.
// Use delete [] to free after use.
char *LTRResultIterator::WordTruthUTF8Text() const {
if (!HasTruthString())
if (!HasTruthString()) {
return nullptr;
}
std::string truth_text = it_->word()->blamer_bundle->TruthString();
int length = truth_text.length() + 1;
char *result = new char[length];
@ -314,8 +328,9 @@ char *LTRResultIterator::WordTruthUTF8Text() const {
// Returns the null terminated UTF-8 encoded normalized OCR string for the
// current word. Use delete [] to free after use.
char *LTRResultIterator::WordNormedUTF8Text() const {
if (it_->word() == nullptr)
if (it_->word() == nullptr) {
return nullptr; // Already at the end!
}
std::string ocr_text;
WERD_CHOICE *best_choice = it_->word()->best_choice;
const UNICHARSET *unicharset = it_->word()->uch_set;
@ -332,10 +347,12 @@ char *LTRResultIterator::WordNormedUTF8Text() const {
// Returns a pointer to serialized choice lattice.
// Fills lattice_size with the number of bytes in lattice data.
const char *LTRResultIterator::WordLattice(int *lattice_size) const {
if (it_->word() == nullptr)
if (it_->word() == nullptr) {
return nullptr; // Already at the end!
if (it_->word()->blamer_bundle == nullptr)
}
if (it_->word()->blamer_bundle == nullptr) {
return nullptr;
}
*lattice_size = it_->word()->blamer_bundle->lattice_size();
return it_->word()->blamer_bundle->lattice_data();
}
@ -344,8 +361,9 @@ const char *LTRResultIterator::WordLattice(int *lattice_size) const {
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool LTRResultIterator::SymbolIsSuperscript() const {
if (cblob_it_ == nullptr && it_->word() != nullptr)
if (cblob_it_ == nullptr && it_->word() != nullptr) {
return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUPERSCRIPT;
}
return false;
}
@ -353,8 +371,9 @@ bool LTRResultIterator::SymbolIsSuperscript() const {
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool LTRResultIterator::SymbolIsSubscript() const {
if (cblob_it_ == nullptr && it_->word() != nullptr)
if (cblob_it_ == nullptr && it_->word() != nullptr) {
return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT;
}
return false;
}
@ -362,8 +381,9 @@ bool LTRResultIterator::SymbolIsSubscript() const {
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool LTRResultIterator::SymbolIsDropcap() const {
if (cblob_it_ == nullptr && it_->word() != nullptr)
if (cblob_it_ == nullptr && it_->word() != nullptr) {
return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP;
}
return false;
}
@ -391,8 +411,9 @@ ChoiceIterator::ChoiceIterator(const LTRResultIterator &result_it) {
filterSpaces();
}
}
if ((oemLegacy || !lstm_choice_mode) && word_res_->ratings != nullptr)
if ((oemLegacy || !lstm_choice_mode) && word_res_->ratings != nullptr) {
choices = word_res_->GetBlobChoices(result_it.blob_index_);
}
if (choices != nullptr && !choices->empty()) {
choice_it_ = new BLOB_CHOICE_IT(choices);
choice_it_->mark_cycle_pt();
@ -418,8 +439,9 @@ bool ChoiceIterator::Next() {
return true;
}
} else {
if (choice_it_ == nullptr)
if (choice_it_ == nullptr) {
return false;
}
choice_it_->forward();
return !choice_it_->cycled_list();
}
@ -432,8 +454,9 @@ const char *ChoiceIterator::GetUTF8Text() const {
std::pair<const char *, float> choice = *LSTM_choice_it_;
return choice.first;
} else {
if (choice_it_ == nullptr)
if (choice_it_ == nullptr) {
return nullptr;
}
UNICHAR_ID id = choice_it_->data()->unichar_id();
return word_res_->uch_set->id_to_unichar_ext(id);
}
@ -451,8 +474,9 @@ float ChoiceIterator::Confidence() const {
std::pair<const char *, float> choice = *LSTM_choice_it_;
confidence = 100 - rating_coefficient_ * choice.second;
} else {
if (choice_it_ == nullptr)
if (choice_it_ == nullptr) {
return 0.0f;
}
confidence = 100 + 5 * choice_it_->data()->certainty();
}
return ClipToRange(confidence, 0.0f, 100.0f);
@ -468,8 +492,9 @@ std::vector<std::vector<std::pair<const char *, float>>> *ChoiceIterator::Timest
}
void ChoiceIterator::filterSpaces() {
if (LSTM_choices_->empty())
if (LSTM_choices_->empty()) {
return;
}
std::vector<std::pair<const char *, float>>::iterator it;
for (it = LSTM_choices_->begin(); it != LSTM_choices_->end();) {
if (!strcmp(it->first, " ")) {

View File

@ -115,8 +115,9 @@ int OSResults::get_best_script(int orientation_id) const {
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
const char *script = unicharset->get_script_from_script_id(j);
if (strcmp(script, "Common") && strcmp(script, "NULL")) {
if (max_id == -1 || scripts_na[orientation_id][j] > scripts_na[orientation_id][max_id])
if (max_id == -1 || scripts_na[orientation_id][j] > scripts_na[orientation_id][max_id]) {
max_id = j;
}
}
}
return max_id;
@ -144,8 +145,9 @@ void OSResults::print_scores(int orientation_id) const {
void OSResults::accumulate(const OSResults &osr) {
for (int i = 0; i < 4; ++i) {
orientations[i] += osr.orientations[i];
for (int j = 0; j < kMaxNumberOfScripts; ++j)
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
scripts_na[i][j] += osr.scripts_na[i][j];
}
}
unicharset = osr.unicharset;
update_best_orientation();
@ -188,16 +190,18 @@ int orientation_and_script_detection(const char *filename, OSResults *osr,
std::string name = filename; // truncated name
const char *lastdot = strrchr(name.c_str(), '.');
if (lastdot != nullptr)
if (lastdot != nullptr) {
name[lastdot - name.c_str()] = '\0';
}
ASSERT_HOST(tess->pix_binary() != nullptr);
int width = pixGetWidth(tess->pix_binary());
int height = pixGetHeight(tess->pix_binary());
BLOCK_LIST blocks;
if (!read_unlv_file(name, width, height, &blocks))
if (!read_unlv_file(name, width, height, &blocks)) {
FullPageBlock(width, height, &blocks);
}
// Try to remove non-text regions from consideration.
TO_BLOCK_LIST land_blocks, port_blocks;
@ -228,8 +232,9 @@ int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
TO_BLOCK *to_block = block_it.data();
if (to_block->block->pdblk.poly_block() && !to_block->block->pdblk.poly_block()->IsText())
if (to_block->block->pdblk.poly_block() && !to_block->block->pdblk.poly_block()->IsText()) {
continue;
}
BLOBNBOX_IT bbox_it;
bbox_it.set_to_list(&to_block->blobs);
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
@ -239,18 +244,21 @@ int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *
++blobs_total;
// Catch illegal value of box width and avoid division by zero.
if (box.width() == 0)
if (box.width() == 0) {
continue;
}
// TODO: Can height and width be negative? If not, remove fabs.
float y_x = std::fabs((box.height() * 1.0f) / box.width());
float x_y = 1.0f / y_x;
// Select a >= 1.0 ratio
float ratio = x_y > y_x ? x_y : y_x;
// Blob is ambiguous
if (ratio > kSizeRatioToReject)
if (ratio > kSizeRatioToReject) {
continue;
if (box.height() < kMinAcceptableBlobHeight)
}
if (box.height() < kMinAcceptableBlobHeight) {
continue;
}
filtered_it.add_to_end(bbox);
}
}
@ -268,8 +276,9 @@ int os_detect_blobs(const std::vector<int> *allowed_scripts, BLOBNBOX_CLIST *blo
OSResults osr_;
int minCharactersToTry = tess->min_characters_to_try;
int maxCharactersToTry = 5 * minCharactersToTry;
if (osr == nullptr)
if (osr == nullptr) {
osr = &osr_;
}
osr->unicharset = &tess->unicharset;
OrientationDetector o(allowed_scripts, osr);
@ -391,8 +400,9 @@ bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
}
}
}
if (total_blob_o_score == 0.0)
if (total_blob_o_score == 0.0) {
return false;
}
// Fill in any blanks with the worst score of the others. This is better than
// picking an arbitrary probability for it and way better than -inf.
float worst_score = 0.0f;
@ -400,8 +410,9 @@ bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
for (float f : blob_o_score) {
if (f > 0.0f) {
++num_good_scores;
if (worst_score == 0.0f || f < worst_score)
if (worst_score == 0.0f || f < worst_score) {
worst_score = f;
}
}
}
if (num_good_scores == 1) {
@ -468,15 +479,18 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
// Check that the choice is in an allowed script.
int s = 0;
for (s = 0; s < allowed_scripts_->size(); ++s) {
if ((*allowed_scripts_)[s] == id)
if ((*allowed_scripts_)[s] == id) {
break;
}
}
if (s == allowed_scripts_->size())
if (s == allowed_scripts_->size()) {
continue; // Not found in list.
}
}
// Script already processed before.
if (done[id])
if (done[id]) {
continue;
}
done[id] = true;
unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());
@ -491,14 +505,17 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
++script_count;
}
if (strlen(prev_unichar) == 1)
if (unichar[0] >= '0' && unichar[0] <= '9')
if (strlen(prev_unichar) == 1) {
if (unichar[0] >= '0' && unichar[0] <= '9') {
break;
}
}
// if script_count is >= 2, character is ambiguous, skip other matches
// since they are useless.
if (script_count >= 2)
if (script_count >= 2) {
break;
}
}
// Character is non ambiguous
if (script_count == 1) {
@ -521,12 +538,15 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
}
// Update Japanese / Korean pseudo-scripts
if (prev_id == katakana_id_)
if (prev_id == katakana_id_) {
osr_->scripts_na[i][japanese_id_] += 1.0;
if (prev_id == hiragana_id_)
}
if (prev_id == hiragana_id_) {
osr_->scripts_na[i][japanese_id_] += 1.0;
if (prev_id == hangul_id_)
}
if (prev_id == hangul_id_) {
osr_->scripts_na[i][korean_id_] += 1.0;
}
if (prev_id == han_id_) {
osr_->scripts_na[i][korean_id_] += kHanRatioInKorean;
osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese;

View File

@ -65,14 +65,16 @@ void Tesseract::output_pass( // Tess output pass //send to api
(tessedit_write_block_separators && (page_res_it.block() != page_res_it.next_block())) ||
(page_res_it.next_word() == nullptr);
if (page_res_it.next_word() != nullptr)
if (page_res_it.next_word() != nullptr) {
nextword = page_res_it.next_word()->word;
else
} else {
nextword = nullptr;
if (page_res_it.next_block() != nullptr)
}
if (page_res_it.next_block() != nullptr) {
nextblock = page_res_it.next_block()->block;
else
} else {
nextblock = nullptr;
}
// regardless of tilde crunching
write_results(page_res_it,
determine_newline_type(page_res_it.word()->word, page_res_it.block()->block,
@ -130,18 +132,20 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
stats_.last_char_was_tilde = false;
}
if (force_eol)
if (force_eol) {
stats_.write_results_empty_block = true;
}
return;
}
/* NORMAL PROCESSING of non tilde crunched words */
stats_.tilde_crunch_written = false;
if (newline_type)
if (newline_type) {
stats_.last_char_was_newline = true;
else
} else {
stats_.last_char_was_newline = false;
}
stats_.write_results_empty_block = force_eol; // about to write a real word
if (unlv_tilde_crunching && stats_.last_char_was_tilde && (word->word->space() == 0) &&
@ -151,16 +155,18 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
words have been removed */
word->MergeAdjacentBlobs(0);
}
if (newline_type || (word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes))
if (newline_type || (word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes)) {
stats_.last_char_was_tilde = false;
else {
} else {
if (word->reject_map.length() > 0) {
if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space)
if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space) {
stats_.last_char_was_tilde = true;
else
} else {
stats_.last_char_was_tilde = false;
} else if (word->word->space() > 0)
}
} else if (word->word->space() > 0) {
stats_.last_char_was_tilde = false;
}
/* else it is unchanged as there are no output chars */
}
@ -176,15 +182,17 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
if (tessedit_zero_rejection) {
/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
for (i = 0; i < word->best_choice->length(); ++i) {
if (word->reject_map[i].rejected())
if (word->reject_map[i].rejected()) {
word->reject_map[i].setrej_minimal_rej_accept();
}
}
}
if (tessedit_minimal_rejection) {
/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
for (i = 0; i < word->best_choice->length(); ++i) {
if ((word->best_choice->unichar_id(i) != space) && word->reject_map[i].rejected())
if ((word->best_choice->unichar_id(i) != space) && word->reject_map[i].rejected()) {
word->reject_map[i].setrej_minimal_rej_accept();
}
}
}
}
@ -209,12 +217,15 @@ char determine_newline_type( // test line ends
TBOX next_box; // next word
TBOX block_box; // block bounding
if (!word->flag(W_EOL))
if (!word->flag(W_EOL)) {
return false; // not end of line
if (next_word == nullptr || next_block == nullptr || block != next_block)
}
if (next_word == nullptr || next_block == nullptr || block != next_block) {
return CTRL_NEWLINE;
if (next_word->space() > 0)
}
if (next_word->space() > 0) {
return CTRL_HARDLINE; // it is tabbed
}
word_box = word->bounding_box();
next_box = next_word->bounding_box();
block_box = block->pdblk.bounding_box();
@ -236,8 +247,9 @@ char determine_newline_type( // test line ends
*************************************************************************/
UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated?
int i;
for (i = 0; ((i < word->reject_map.length()) && (word->reject_map[i].rejected())); ++i)
for (i = 0; ((i < word->reject_map.length()) && (word->reject_map[i].rejected())); ++i) {
;
}
if (i < word->reject_map.length()) {
return word->best_choice->unichar_id(i);
@ -265,61 +277,72 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
if (suspect_level == 0) {
for (i = 0; i < len; i++) {
if (word_res->reject_map[i].rejected())
if (word_res->reject_map[i].rejected()) {
word_res->reject_map[i].setrej_minimal_rej_accept();
}
}
return;
}
if (suspect_level >= 3)
if (suspect_level >= 3) {
return; // Use defaults
}
/* NOW FOR LEVELS 1 and 2 Find some stuff to unreject*/
if (safe_dict_word(word_res) && (count_alphas(word) > suspect_short_words)) {
/* Unreject alphas in dictionary words */
for (i = 0; i < len; ++i) {
if (word_res->reject_map[i].rejected() && uchset.get_isalpha(word.unichar_id(i)))
if (word_res->reject_map[i].rejected() && uchset.get_isalpha(word.unichar_id(i))) {
word_res->reject_map[i].setrej_minimal_rej_accept();
}
}
}
rating_per_ch = word.rating() / word_res->reject_map.length();
if (rating_per_ch >= suspect_rating_per_ch)
if (rating_per_ch >= suspect_rating_per_ch) {
return; // Don't touch bad ratings
}
if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
/* Unreject any Tess Acceptable word - but NOT tess reject chs*/
for (i = 0; i < len; ++i) {
if (word_res->reject_map[i].rejected() && (!uchset.eq(word.unichar_id(i), " ")))
if (word_res->reject_map[i].rejected() && (!uchset.eq(word.unichar_id(i), " "))) {
word_res->reject_map[i].setrej_minimal_rej_accept();
}
}
}
for (i = 0; i < len; i++) {
if (word_res->reject_map[i].rejected()) {
if (word_res->reject_map[i].flag(R_DOC_REJ))
if (word_res->reject_map[i].flag(R_DOC_REJ)) {
word_res->reject_map[i].setrej_minimal_rej_accept();
if (word_res->reject_map[i].flag(R_BLOCK_REJ))
}
if (word_res->reject_map[i].flag(R_BLOCK_REJ)) {
word_res->reject_map[i].setrej_minimal_rej_accept();
if (word_res->reject_map[i].flag(R_ROW_REJ))
}
if (word_res->reject_map[i].flag(R_ROW_REJ)) {
word_res->reject_map[i].setrej_minimal_rej_accept();
}
}
}
if (suspect_level == 2)
if (suspect_level == 2) {
return;
}
if (!suspect_constrain_1Il || (word_res->reject_map.length() <= suspect_short_words)) {
for (i = 0; i < len; i++) {
if (word_res->reject_map[i].rejected()) {
if ((word_res->reject_map[i].flag(R_1IL_CONFLICT) ||
word_res->reject_map[i].flag(R_POSTNN_1IL)))
word_res->reject_map[i].flag(R_POSTNN_1IL))) {
word_res->reject_map[i].setrej_minimal_rej_accept();
}
if (!suspect_constrain_1Il && word_res->reject_map[i].flag(R_MM_REJECT))
if (!suspect_constrain_1Il && word_res->reject_map[i].flag(R_MM_REJECT)) {
word_res->reject_map[i].setrej_minimal_rej_accept();
}
}
}
}
@ -343,8 +366,9 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
int16_t Tesseract::count_alphas(const WERD_CHOICE &word) {
int count = 0;
for (int i = 0; i < word.length(); ++i) {
if (word.unicharset()->get_isalpha(word.unichar_id(i)))
if (word.unicharset()->get_isalpha(word.unichar_id(i))) {
count++;
}
}
return count;
}
@ -353,8 +377,9 @@ int16_t Tesseract::count_alphanums(const WERD_CHOICE &word) {
int count = 0;
for (int i = 0; i < word.length(); ++i) {
if (word.unicharset()->get_isalpha(word.unichar_id(i)) ||
word.unicharset()->get_isdigit(word.unichar_id(i)))
word.unicharset()->get_isdigit(word.unichar_id(i))) {
count++;
}
}
return count;
}
@ -362,26 +387,29 @@ int16_t Tesseract::count_alphanums(const WERD_CHOICE &word) {
bool Tesseract::acceptable_number_string(const char *s, const char *lengths) {
bool prev_digit = false;
if (*lengths == 1 && *s == '(')
if (*lengths == 1 && *s == '(') {
s++;
}
if (*lengths == 1 && ((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-')))
if (*lengths == 1 && ((*s == '$') || (*s == '.') || (*s == '+') || (*s == '-'))) {
s++;
}
for (; *s != '\0'; s += *(lengths++)) {
if (unicharset.get_isdigit(s, *lengths))
if (unicharset.get_isdigit(s, *lengths)) {
prev_digit = true;
else if (prev_digit && (*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-'))))
} else if (prev_digit && (*lengths == 1 && ((*s == '.') || (*s == ',') || (*s == '-')))) {
prev_digit = false;
else if (prev_digit && *lengths == 1 && (*(s + *lengths) == '\0') &&
((*s == '%') || (*s == ')')))
} else if (prev_digit && *lengths == 1 && (*(s + *lengths) == '\0') &&
((*s == '%') || (*s == ')'))) {
return true;
else if (prev_digit && *lengths == 1 && (*s == '%') &&
(*(lengths + 1) == 1 && *(s + *lengths) == ')') &&
(*(s + *lengths + *(lengths + 1)) == '\0'))
} else if (prev_digit && *lengths == 1 && (*s == '%') &&
(*(lengths + 1) == 1 && *(s + *lengths) == ')') &&
(*(s + *lengths + *(lengths + 1)) == '\0')) {
return true;
else
} else {
return false;
}
}
return true;
}

View File

@ -107,8 +107,9 @@ void PageIterator::Begin() {
}
void PageIterator::RestartParagraph() {
if (it_->block() == nullptr)
if (it_->block() == nullptr) {
return; // At end of the document.
}
PAGE_RES_IT para(page_res_);
PAGE_RES_IT next_para(para);
next_para.forward_paragraph();
@ -145,10 +146,12 @@ void PageIterator::RestartRow() {
* the appropriate language has been loaded into Tesseract.
*/
bool PageIterator::Next(PageIteratorLevel level) {
if (it_->block() == nullptr)
if (it_->block() == nullptr) {
return false; // Already at the end!
if (it_->word() == nullptr)
}
if (it_->word() == nullptr) {
level = RIL_BLOCK;
}
switch (level) {
case RIL_BLOCK:
@ -158,20 +161,24 @@ bool PageIterator::Next(PageIteratorLevel level) {
it_->forward_paragraph();
break;
case RIL_TEXTLINE:
for (it_->forward_with_empties(); it_->row() == it_->prev_row(); it_->forward_with_empties())
for (it_->forward_with_empties(); it_->row() == it_->prev_row();
it_->forward_with_empties()) {
;
}
break;
case RIL_WORD:
it_->forward_with_empties();
break;
case RIL_SYMBOL:
if (cblob_it_ != nullptr)
if (cblob_it_ != nullptr) {
cblob_it_->forward();
}
++blob_index_;
if (blob_index_ >= word_length_)
if (blob_index_ >= word_length_) {
it_->forward_with_empties();
else
} else {
return true;
}
break;
}
BeginWord(0);
@ -184,10 +191,12 @@ bool PageIterator::Next(PageIteratorLevel level) {
* moved to the start of a RIL_PARA.
*/
bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const {
if (it_->block() == nullptr)
if (it_->block() == nullptr) {
return false; // Already at the end!
if (it_->word() == nullptr)
}
if (it_->word() == nullptr) {
return true; // In an image block.
}
switch (level) {
case RIL_BLOCK:
return blob_index_ == 0 && it_->block() != it_->prev_block();
@ -209,8 +218,9 @@ bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const {
* given level. (e.g. the last word in a line, the last line in a block)
*/
bool PageIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const {
if (Empty(element))
if (Empty(element)) {
return true; // Already at the end!
}
// The result is true if we step forward by element and find we are
// at the the end of the page or at beginning of *all* levels in:
// [level, element).
@ -219,12 +229,14 @@ bool PageIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel e
// word on a line, so we also have to be at the first symbol in a word.
PageIterator next(*this);
next.Next(element);
if (next.Empty(element))
if (next.Empty(element)) {
return true; // Reached the end of the page.
}
while (element > level) {
element = static_cast<PageIteratorLevel>(element - 1);
if (!next.IsAtBeginningOf(element))
if (!next.IsAtBeginningOf(element)) {
return false;
}
}
return true;
}
@ -237,12 +249,15 @@ bool PageIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel e
*/
int PageIterator::Cmp(const PageIterator &other) const {
int word_cmp = it_->cmp(*other.it_);
if (word_cmp != 0)
if (word_cmp != 0) {
return word_cmp;
if (blob_index_ < other.blob_index_)
}
if (blob_index_ < other.blob_index_) {
return -1;
if (blob_index_ == other.blob_index_)
}
if (blob_index_ == other.blob_index_) {
return 0;
}
return 1;
}
@ -267,8 +282,9 @@ int PageIterator::Cmp(const PageIterator &other) const {
*/
bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right,
int *bottom) const {
if (Empty(level))
if (Empty(level)) {
return false;
}
TBOX box;
PARA *para = nullptr;
switch (level) {
@ -285,10 +301,11 @@ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int *left, int *
box = it_->word()->word->restricted_bounding_box(include_upper_dots_, include_lower_dots_);
break;
case RIL_SYMBOL:
if (cblob_it_ == nullptr)
if (cblob_it_ == nullptr) {
box = it_->word()->box_word->BlobBox(blob_index_);
else
} else {
box = cblob_it_->data()->bounding_box();
}
}
if (level == RIL_PARA) {
PageIterator other = *this;
@ -300,8 +317,9 @@ bool PageIterator::BoundingBoxInternal(PageIteratorLevel level, int *left, int *
}
} while (other.Next(RIL_TEXTLINE));
}
if (level != RIL_SYMBOL || cblob_it_ != nullptr)
if (level != RIL_SYMBOL || cblob_it_ != nullptr) {
box.rotate(it_->block()->block->re_rotation());
}
// Now we have a box in tesseract coordinates relative to the image rectangle,
// we have to convert the coords to a top-down system.
const int pix_height = pixGetHeight(tesseract_->pix_binary());
@ -326,8 +344,9 @@ bool PageIterator::BoundingBox(PageIteratorLevel level, int *left, int *top, int
bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding, int *left, int *top,
int *right, int *bottom) const {
if (!BoundingBoxInternal(level, left, top, right, bottom))
if (!BoundingBoxInternal(level, left, top, right, bottom)) {
return false;
}
// Convert to the coordinate system of the original image.
*left = ClipToRange(*left / scale_ + rect_left_ - padding, rect_left_, rect_left_ + rect_width_);
*top = ClipToRange(*top / scale_ + rect_top_ - padding, rect_top_, rect_top_ + rect_height_);
@ -340,32 +359,39 @@ bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding, int *
/** Return that there is no such object at a given level. */
bool PageIterator::Empty(PageIteratorLevel level) const {
if (it_->block() == nullptr)
if (it_->block() == nullptr) {
return true; // Already at the end!
if (it_->word() == nullptr && level != RIL_BLOCK)
}
if (it_->word() == nullptr && level != RIL_BLOCK) {
return true; // image block
if (level == RIL_SYMBOL && blob_index_ >= word_length_)
}
if (level == RIL_SYMBOL && blob_index_ >= word_length_) {
return true; // Zero length word, or already at the end of it.
}
return false;
}
/** Returns the type of the current block.
* See tesseract/publictypes.h for PolyBlockType. */
PolyBlockType PageIterator::BlockType() const {
if (it_->block() == nullptr || it_->block()->block == nullptr)
if (it_->block() == nullptr || it_->block()->block == nullptr) {
return PT_UNKNOWN; // Already at the end!
if (it_->block()->block->pdblk.poly_block() == nullptr)
}
if (it_->block()->block->pdblk.poly_block() == nullptr) {
return PT_FLOWING_TEXT; // No layout analysis used - assume text.
}
return it_->block()->block->pdblk.poly_block()->isA();
}
/** Returns the polygon outline of the current block. The returned Pta must
* be ptaDestroy-ed after use. */
Pta *PageIterator::BlockPolygon() const {
if (it_->block() == nullptr || it_->block()->block == nullptr)
if (it_->block() == nullptr || it_->block()->block == nullptr) {
return nullptr; // Already at the end!
if (it_->block()->block->pdblk.poly_block() == nullptr)
}
if (it_->block()->block->pdblk.poly_block() == nullptr) {
return nullptr; // No layout analysis used - no polygon.
}
// Copy polygon, so we can unrotate it to image coordinates.
POLY_BLOCK *internal_poly = it_->block()->block->pdblk.poly_block();
ICOORDELT_LIST vertices;
@ -411,10 +437,12 @@ Pta *PageIterator::BlockPolygon() const {
*/
Pix *PageIterator::GetBinaryImage(PageIteratorLevel level) const {
int left, top, right, bottom;
if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
if (!BoundingBoxInternal(level, &left, &top, &right, &bottom)) {
return nullptr;
if (level == RIL_SYMBOL && cblob_it_ != nullptr && cblob_it_->data()->area() != 0)
}
if (level == RIL_SYMBOL && cblob_it_ != nullptr && cblob_it_->data()->area() != 0) {
return cblob_it_->data()->render();
}
Box *box = boxCreate(left, top, right - left, bottom - top);
Pix *pix = pixClipRectangle(tesseract_->pix_binary(), box, nullptr);
boxDestroy(&box);
@ -447,10 +475,12 @@ Pix *PageIterator::GetBinaryImage(PageIteratorLevel level) const {
Pix *PageIterator::GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left,
int *top) const {
int right, bottom;
if (!BoundingBox(level, left, top, &right, &bottom))
if (!BoundingBox(level, left, top, &right, &bottom)) {
return nullptr;
if (original_img == nullptr)
}
if (original_img == nullptr) {
return GetBinaryImage(level);
}
// Expand the box.
*left = std::max(*left - padding, 0);
@ -487,8 +517,9 @@ Pix *PageIterator::GetImage(PageIteratorLevel level, int padding, Pix *original_
* WARNING: with vertical text, baselines may be vertical!
*/
bool PageIterator::Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const {
if (it_->word() == nullptr)
if (it_->word() == nullptr) {
return false; // Already at the end!
}
ROW *row = it_->row()->row;
WERD *word = it_->word()->word;
TBOX box =
@ -552,8 +583,9 @@ void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just, bool *
bool *is_crown, int *first_line_indent) const {
*just = tesseract::JUSTIFICATION_UNKNOWN;
if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
!it_->row()->row->para()->model)
!it_->row()->row->para()->model) {
return;
}
PARA *para = it_->row()->row->para();
*is_list_item = para->is_list_item;
@ -596,13 +628,15 @@ void PageIterator::BeginWord(int offset) {
word_ = word_res->word;
ASSERT_HOST(word_->cblob_list() != nullptr);
word_length_ = word_->cblob_list()->length();
if (cblob_it_ == nullptr)
if (cblob_it_ == nullptr) {
cblob_it_ = new C_BLOB_IT;
}
cblob_it_->set_to_list(word_->cblob_list());
}
for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) {
if (cblob_it_ != nullptr)
if (cblob_it_ != nullptr) {
cblob_it_->forward();
}
}
}

View File

@ -109,8 +109,9 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != nullptr && input_file[0] != '\0') {
std::string name = input_file;
const char *lastdot = strrchr(name.c_str(), '.');
if (lastdot != nullptr)
if (lastdot != nullptr) {
name[lastdot - name.c_str()] = '\0';
}
read_unlv_file(name, width, height, blocks);
}
if (blocks->empty()) {
@ -138,8 +139,9 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
auto_page_seg_ret_val =
AutoPageSeg(pageseg_mode, blocks, &to_blocks,
enable_noise_removal ? &diacritic_blobs : nullptr, osd_tess, osr);
if (pageseg_mode == PSM_OSD_ONLY)
if (pageseg_mode == PSM_OSD_ONLY) {
return auto_page_seg_ret_val;
}
// To create blobs from the image region bounds uncomment this line:
// to_blocks.clear(); // Uncomment to go back to the old mode.
} else {
@ -159,8 +161,9 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract
}
if (blocks->empty()) {
if (textord_debug_tabfind)
if (textord_debug_tabfind) {
tprintf("Empty page\n");
}
return 0; // AutoPageSeg found an empty page.
}
bool splitting = pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;
@ -223,14 +226,16 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOC
result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_, to_block,
photomask_pix, pix_thresholds_, pix_grey_, &pixa_debug_,
&found_blocks, diacritic_blobs, to_blocks);
if (result >= 0)
if (result >= 0) {
finder->GetDeskewVectors(&deskew_, &reskew_);
}
delete finder;
}
pixDestroy(&photomask_pix);
pixDestroy(&musicmask_pix);
if (result < 0)
if (result < 0) {
return result;
}
blocks->clear();
BLOCK_IT block_it(blocks);
@ -297,8 +302,9 @@ ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mo
pixa_debug_.AddPix(pix_no_image_, "NoImages");
pixDestroy(&pix_no_image_);
}
if (!PSM_COL_FIND_ENABLED(pageseg_mode))
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) {
v_lines.clear();
}
// The rest of the algorithm uses the usual connected components.
textord_.find_components(pix_binary_, blocks, to_blocks);

View File

@ -33,8 +33,9 @@ void Tesseract::process_selected_words(
for (PAGE_RES_IT page_res_it(page_res); page_res_it.word() != nullptr; page_res_it.forward()) {
WERD *word = page_res_it.word()->word;
if (word->bounding_box().overlap(selection_box)) {
if (!(this->*word_processor)(&page_res_it))
if (!(this->*word_processor)(&page_res_it)) {
return;
}
}
}
}

View File

@ -98,14 +98,16 @@ static void PrintTable(const std::vector<std::vector<std::string>> &rows, const
for (int c = 0; c < num_columns; c++) {
int num_unicodes = 0;
for (char i : row[c]) {
if ((i & 0xC0) != 0x80)
if ((i & 0xC0) != 0x80) {
num_unicodes++;
}
}
if (c >= max_col_widths.size()) {
max_col_widths.push_back(num_unicodes);
} else {
if (num_unicodes > max_col_widths[c])
if (num_unicodes > max_col_widths[c]) {
max_col_widths[c] = num_unicodes;
}
}
}
}
@ -117,8 +119,9 @@ static void PrintTable(const std::vector<std::vector<std::string>> &rows, const
for (const auto &row : rows) {
for (int c = 0; c < row.size(); c++) {
if (c > 0)
if (c > 0) {
tprintf("%s", colsep);
}
tprintf(col_width_patterns[c].c_str(), row[c].c_str());
}
tprintf("\n");
@ -126,8 +129,9 @@ static void PrintTable(const std::vector<std::vector<std::string>> &rows, const
}
static std::string RtlEmbed(const std::string &word, bool rtlify) {
if (rtlify)
if (rtlify) {
return std::string(kRLE) + word + std::string(kPDF);
}
return word;
}
@ -173,8 +177,9 @@ static void PrintDetectorState(const ParagraphTheory &theory,
static void DebugDump(bool should_print, const char *phase, const ParagraphTheory &theory,
const std::vector<RowScratchRegisters> &rows) {
if (!should_print)
if (!should_print) {
return;
}
tprintf("# %s\n", phase);
PrintDetectorState(theory, rows);
}
@ -223,8 +228,9 @@ static const char *SkipChars(const char *str, bool (*skip)(int)) {
}
static const char *SkipOne(const char *str, const char *toskip) {
if (*str != '\0' && strchr(toskip, *str))
if (*str != '\0' && strchr(toskip, *str)) {
return str + 1;
}
return str;
}
@ -251,16 +257,18 @@ static bool LikelyListNumeral(const std::string &word) {
if (numeral_end == numeral_start) {
// If there's a single latin letter, we can use that.
numeral_end = SkipChars(numeral_start, IsLatinLetter);
if (numeral_end - numeral_start != 1)
if (numeral_end - numeral_start != 1) {
break;
}
}
}
// We got some sort of numeral.
num_segments++;
// Skip any trailing parens or punctuation.
pos = SkipChars(SkipChars(numeral_end, kClose), kSep);
if (pos == numeral_end)
if (pos == numeral_end) {
break;
}
}
return *pos == '\0';
}
@ -278,8 +286,9 @@ bool AsciiLikelyListItem(const std::string &word) {
// Return the first Unicode Codepoint from werd[pos].
int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos) {
if (!u || !werd || pos > werd->length())
if (!u || !werd || pos > werd->length()) {
return 0;
}
return UNICHAR(u->id_to_unichar(werd->unichar_id(pos)), -1).first_uni();
}
@ -308,15 +317,17 @@ private:
};
int UnicodeSpanSkipper::SkipPunc(int pos) {
while (pos < wordlen_ && u_->get_ispunctuation(word_->unichar_id(pos)))
while (pos < wordlen_ && u_->get_ispunctuation(word_->unichar_id(pos))) {
pos++;
}
return pos;
}
int UnicodeSpanSkipper::SkipDigits(int pos) {
while (pos < wordlen_ &&
(u_->get_isdigit(word_->unichar_id(pos)) || IsDigitLike(UnicodeFor(u_, word_, pos))))
(u_->get_isdigit(word_->unichar_id(pos)) || IsDigitLike(UnicodeFor(u_, word_, pos)))) {
pos++;
}
return pos;
}
@ -324,16 +335,18 @@ int UnicodeSpanSkipper::SkipRomans(int pos) {
const char *kRomans = "ivxlmdIVXLMD";
while (pos < wordlen_) {
int ch = UnicodeFor(u_, word_, pos);
if (ch >= 0xF0 || strchr(kRomans, ch) == nullptr)
if (ch >= 0xF0 || strchr(kRomans, ch) == nullptr) {
break;
}
pos++;
}
return pos;
}
int UnicodeSpanSkipper::SkipAlpha(int pos) {
while (pos < wordlen_ && u_->get_isalpha(word_->unichar_id(pos)))
while (pos < wordlen_ && u_->get_isalpha(word_->unichar_id(pos))) {
pos++;
}
return pos;
}
@ -367,32 +380,36 @@ static bool LikelyListMarkUnicode(int ch) {
// start a list item. Some examples include:
// A I iii. VI (2) 3.5. [C-4]
static bool UniLikelyListItem(const UNICHARSET *u, const WERD_CHOICE *werd) {
if (werd->length() == 1 && LikelyListMarkUnicode(UnicodeFor(u, werd, 0)))
if (werd->length() == 1 && LikelyListMarkUnicode(UnicodeFor(u, werd, 0))) {
return true;
}
UnicodeSpanSkipper m(u, werd);
int num_segments = 0;
int pos = 0;
while (pos < werd->length() && num_segments < 3) {
int numeral_start = m.SkipPunc(pos);
if (numeral_start > pos + 1)
if (numeral_start > pos + 1) {
break;
}
int numeral_end = m.SkipRomans(numeral_start);
if (numeral_end == numeral_start) {
numeral_end = m.SkipDigits(numeral_start);
if (numeral_end == numeral_start) {
// If there's a single latin letter, we can use that.
numeral_end = m.SkipAlpha(numeral_start);
if (numeral_end - numeral_start != 1)
if (numeral_end - numeral_start != 1) {
break;
}
}
}
// We got some sort of numeral.
num_segments++;
// Skip any trailing punctuation.
pos = m.SkipPunc(numeral_end);
if (pos == numeral_end)
if (pos == numeral_end) {
break;
}
}
return pos == werd->length();
}
@ -506,10 +523,12 @@ void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
int model_numbers = 0;
for (const auto &hypothese : hypotheses_) {
if (hypothese.model == nullptr)
if (hypothese.model == nullptr) {
continue;
if (model_numbers > 0)
}
if (model_numbers > 0) {
model_string += ",";
}
if (StrongModel(hypothese.model)) {
model_string += std::to_string(1 + theory.IndexOf(hypothese.model));
} else if (hypothese.model == kCrownLeft) {
@ -519,8 +538,9 @@ void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
}
model_numbers++;
}
if (model_numbers == 0)
if (model_numbers == 0) {
model_string += "0";
}
dbg.push_back(model_string);
}
@ -534,8 +554,9 @@ void RowScratchRegisters::Init(const RowInfo &row) {
}
LineType RowScratchRegisters::GetLineType() const {
if (hypotheses_.empty())
if (hypotheses_.empty()) {
return LT_UNKNOWN;
}
bool has_start = false;
bool has_body = false;
for (const auto &hypothese : hypotheses_) {
@ -551,19 +572,22 @@ LineType RowScratchRegisters::GetLineType() const {
break;
}
}
if (has_start && has_body)
if (has_start && has_body) {
return LT_MULTIPLE;
}
return has_start ? LT_START : LT_BODY;
}
LineType RowScratchRegisters::GetLineType(const ParagraphModel *model) const {
if (hypotheses_.empty())
if (hypotheses_.empty()) {
return LT_UNKNOWN;
}
bool has_start = false;
bool has_body = false;
for (const auto &hypothese : hypotheses_) {
if (hypothese.model != model)
if (hypothese.model != model) {
continue;
}
switch (hypothese.ty) {
case LT_START:
has_start = true;
@ -576,8 +600,9 @@ LineType RowScratchRegisters::GetLineType(const ParagraphModel *model) const {
break;
}
}
if (has_start && has_body)
if (has_start && has_body) {
return LT_MULTIPLE;
}
return has_start ? LT_START : LT_BODY;
}
@ -619,41 +644,47 @@ void RowScratchRegisters::AddBodyLine(const ParagraphModel *model) {
void RowScratchRegisters::StartHypotheses(SetOfModels *models) const {
for (const auto &hypothese : hypotheses_) {
if (hypothese.ty == LT_START && StrongModel(hypothese.model))
if (hypothese.ty == LT_START && StrongModel(hypothese.model)) {
push_back_new(*models, hypothese.model);
}
}
}
void RowScratchRegisters::StrongHypotheses(SetOfModels *models) const {
for (const auto &hypothese : hypotheses_) {
if (StrongModel(hypothese.model))
if (StrongModel(hypothese.model)) {
push_back_new(*models, hypothese.model);
}
}
}
void RowScratchRegisters::NonNullHypotheses(SetOfModels *models) const {
for (const auto &hypothese : hypotheses_) {
if (hypothese.model != nullptr)
if (hypothese.model != nullptr) {
push_back_new(*models, hypothese.model);
}
}
}
const ParagraphModel *RowScratchRegisters::UniqueStartHypothesis() const {
if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_START)
if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_START) {
return nullptr;
}
return hypotheses_[0].model;
}
const ParagraphModel *RowScratchRegisters::UniqueBodyHypothesis() const {
if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_BODY)
if (hypotheses_.size() != 1 || hypotheses_[0].ty != LT_BODY) {
return nullptr;
}
return hypotheses_[0].model;
}
// Discard any hypotheses whose model is not in the given list.
void RowScratchRegisters::DiscardNonMatchingHypotheses(const SetOfModels &models) {
if (models.empty())
if (models.empty()) {
return;
}
for (int h = hypotheses_.size() - 1; h >= 0; h--) {
if (!contains(models, hypotheses_[h].model)) {
hypotheses_.erase(hypotheses_.begin() + h);
@ -691,8 +722,9 @@ private:
static int ClosestCluster(const std::vector<Cluster> &clusters, int value) {
int best_index = 0;
for (int i = 0; i < clusters.size(); i++) {
if (abs(value - clusters[i].center) < abs(value - clusters[best_index].center))
if (abs(value - clusters[i].center) < abs(value - clusters[best_index].center)) {
best_index = i;
}
}
return best_index;
}
@ -716,8 +748,9 @@ void SimpleClusterer::GetClusters(std::vector<Cluster> *clusters) {
static void CalculateTabStops(std::vector<RowScratchRegisters> *rows, int row_start, int row_end,
int tolerance, std::vector<Cluster> *left_tabs,
std::vector<Cluster> *right_tabs) {
if (!AcceptableRowArgs(0, 1, __func__, rows, row_start, row_end))
if (!AcceptableRowArgs(0, 1, __func__, rows, row_start, row_end)) {
return;
}
// First pass: toss all left and right indents into clusterers.
SimpleClusterer initial_lefts(tolerance);
SimpleClusterer initial_rights(tolerance);
@ -744,10 +777,12 @@ static void CalculateTabStops(std::vector<RowScratchRegisters> *rows, int row_st
// to how rare it is. These outliers get re-added if we end up having too
// few tab stops, to work with, however.
int infrequent_enough_to_ignore = 0;
if (row_end - row_start >= 8)
if (row_end - row_start >= 8) {
infrequent_enough_to_ignore = 1;
if (row_end - row_start >= 20)
}
if (row_end - row_start >= 20) {
infrequent_enough_to_ignore = 2;
}
for (int i = row_start; i < row_end; i++) {
int lidx = ClosestCluster(initial_left_tabs, (*rows)[i].lindent_);
@ -827,8 +862,9 @@ static void CalculateTabStops(std::vector<RowScratchRegisters> *rows, int row_st
// is greater than eop_threshold.
static void MarkRowsWithModel(std::vector<RowScratchRegisters> *rows, int row_start, int row_end,
const ParagraphModel *model, bool ltr, int eop_threshold) {
if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end))
if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) {
return;
}
for (int row = row_start; row < row_end; row++) {
bool valid_first = ValidFirstLine(rows, row, model);
bool valid_body = ValidBodyLine(rows, row, model);
@ -895,8 +931,9 @@ struct GeometricClassifierState {
// Align tabs are the tab stops the text is aligned to.
const std::vector<Cluster> &AlignTabs() const {
if (just == tesseract::JUSTIFICATION_RIGHT)
if (just == tesseract::JUSTIFICATION_RIGHT) {
return right_tabs;
}
return left_tabs;
}
@ -906,8 +943,9 @@ struct GeometricClassifierState {
// this function comment, the offside tabs are the horizontal tab stops
// marking the beginning of ("Note", "this" and "marking").
const std::vector<Cluster> &OffsideTabs() const {
if (just == tesseract::JUSTIFICATION_RIGHT)
if (just == tesseract::JUSTIFICATION_RIGHT) {
return left_tabs;
}
return right_tabs;
}
@ -933,8 +971,9 @@ struct GeometricClassifierState {
}
void Fail(int min_debug_level, const char *why) const {
if (debug_level < min_debug_level)
if (debug_level < min_debug_level) {
return;
}
tprintf("# %s\n", why);
PrintRows();
}
@ -1009,8 +1048,9 @@ static void GeometricClassifyThreeTabStopTextBlock(int debug_level, GeometricCla
for (int i = s.row_start; i < s.row_end; i++) {
if (s.IsFullRow(i)) {
num_full_rows++;
if (i == s.row_end - 1)
if (i == s.row_end - 1) {
last_row_full++;
}
}
}
@ -1093,8 +1133,9 @@ static void GeometricClassifyThreeTabStopTextBlock(int debug_level, GeometricCla
// far more "full" lines than "short" lines.
static void GeometricClassify(int debug_level, std::vector<RowScratchRegisters> *rows,
int row_start, int row_end, ParagraphTheory *theory) {
if (!AcceptableRowArgs(debug_level, 4, __func__, rows, row_start, row_end))
if (!AcceptableRowArgs(debug_level, 4, __func__, rows, row_start, row_end)) {
return;
}
if (debug_level > 1) {
tprintf("###############################################\n");
tprintf("##### GeometricClassify( rows[%d:%d) ) ####\n", row_start, row_end);
@ -1257,24 +1298,27 @@ void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) {
const ParagraphModel *ParagraphTheory::Fits(const std::vector<RowScratchRegisters> *rows,
int start, int end) const {
for (const auto *model : *models_) {
if (model->justification() != JUSTIFICATION_CENTER && RowsFitModel(rows, start, end, model))
if (model->justification() != JUSTIFICATION_CENTER && RowsFitModel(rows, start, end, model)) {
return model;
}
}
return nullptr;
}
void ParagraphTheory::NonCenteredModels(SetOfModels *models) {
for (const auto *model : *models_) {
if (model->justification() != JUSTIFICATION_CENTER)
if (model->justification() != JUSTIFICATION_CENTER) {
push_back_new(*models, model);
}
}
}
int ParagraphTheory::IndexOf(const ParagraphModel *model) const {
int i = 0;
for (const auto *m : *models_) {
if (m == model)
if (m == model) {
return i;
}
i++;
}
return -1;
@ -1330,10 +1374,12 @@ ParagraphModelSmearer::ParagraphModelSmearer(std::vector<RowScratchRegisters> *r
// see paragraphs_internal.h
void ParagraphModelSmearer::CalculateOpenModels(int row_start, int row_end) {
SetOfModels no_models;
if (row_start < row_start_)
if (row_start < row_start_) {
row_start = row_start_;
if (row_end > row_end_)
}
if (row_end > row_end_) {
row_end = row_end_;
}
for (int row = (row_start > 0) ? row_start - 1 : row_start; row < row_end; row++) {
if ((*rows_)[row].ri_->num_words == 0) {
@ -1366,8 +1412,9 @@ void ParagraphModelSmearer::Smear() {
// was recently used (an "open" model) which might model it well.
for (int i = row_start_; i < row_end_; i++) {
RowScratchRegisters &row = (*rows_)[i];
if (row.ri_->num_words == 0)
if (row.ri_->num_words == 0) {
continue;
}
// Step One:
// Figure out if there are "open" models which are left-alined or
@ -1424,8 +1471,9 @@ void ParagraphModelSmearer::Smear() {
theory_->NonCenteredModels(&last_line_models);
}
for (auto model : last_line_models) {
if (ValidBodyLine(rows_, i, model))
if (ValidBodyLine(rows_, i, model)) {
row.AddBodyLine(model);
}
}
}
@ -1498,8 +1546,9 @@ static void DowngradeWeakestToCrowns(int debug_level, ParagraphTheory *theory,
while (end > 0 && (model = (*rows)[end - 1].UniqueBodyHypothesis()) == nullptr) {
end--;
}
if (end == 0)
if (end == 0) {
break;
}
start = end - 1;
while (start >= 0 && (*rows)[start].UniqueBodyHypothesis() == model) {
start--; // walk back to the first line that is not the same body type.
@ -1510,21 +1559,24 @@ static void DowngradeWeakestToCrowns(int debug_level, ParagraphTheory *theory,
}
start++;
// Now rows[start, end) is a sequence of unique body hypotheses of model.
if (StrongModel(model) && model->justification() == JUSTIFICATION_CENTER)
if (StrongModel(model) && model->justification() == JUSTIFICATION_CENTER) {
continue;
}
if (!StrongModel(model)) {
while (start > 0 && CrownCompatible(rows, start - 1, start, model))
while (start > 0 && CrownCompatible(rows, start - 1, start, model)) {
start--;
}
}
if (start == 0 || (!StrongModel(model)) ||
(StrongModel(model) && !ValidFirstLine(rows, start - 1, model))) {
// crownify rows[start, end)
const ParagraphModel *crown_model = model;
if (StrongModel(model)) {
if (model->justification() == JUSTIFICATION_LEFT)
if (model->justification() == JUSTIFICATION_LEFT) {
crown_model = kCrownLeft;
else
} else {
crown_model = kCrownRight;
}
}
(*rows)[start].SetUnknown();
(*rows)[start].AddStartLine(crown_model);
@ -1555,8 +1607,9 @@ static void DowngradeWeakestToCrowns(int debug_level, ParagraphTheory *theory,
// the common margin for each row in the run of rows[start, end).
void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows, int start,
int end, int percentile) {
if (!AcceptableRowArgs(0, 0, __func__, rows, start, end))
if (!AcceptableRowArgs(0, 0, __func__, rows, start, end)) {
return;
}
int lmin, lmax, rmin, rmax;
lmin = lmax = (*rows)[start].lmargin_ + (*rows)[start].lindent_;
@ -1564,8 +1617,9 @@ void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows,
for (int i = start; i < end; i++) {
RowScratchRegisters &sr = (*rows)[i];
sr.SetUnknown();
if (sr.ri_->num_words == 0)
if (sr.ri_->num_words == 0) {
continue;
}
UpdateRange(sr.lmargin_ + sr.lindent_, &lmin, &lmax);
UpdateRange(sr.rmargin_ + sr.rindent_, &rmin, &rmax);
}
@ -1573,8 +1627,9 @@ void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows,
STATS rights(rmin, rmax + 1);
for (int i = start; i < end; i++) {
RowScratchRegisters &sr = (*rows)[i];
if (sr.ri_->num_words == 0)
if (sr.ri_->num_words == 0) {
continue;
}
lefts.add(sr.lmargin_ + sr.lindent_, 1);
rights.add(sr.rmargin_ + sr.rindent_, 1);
}
@ -1593,8 +1648,9 @@ void RecomputeMarginsAndClearHypotheses(std::vector<RowScratchRegisters> *rows,
// Return the median inter-word space in rows[row_start, row_end).
int InterwordSpace(const std::vector<RowScratchRegisters> &rows, int row_start, int row_end) {
if (row_end < row_start + 1)
if (row_end < row_start + 1) {
return 1;
}
int word_height =
(rows[row_start].ri_->lword_box.height() + rows[row_end - 1].ri_->lword_box.height()) / 2;
int word_width =
@ -1606,8 +1662,9 @@ int InterwordSpace(const std::vector<RowScratchRegisters> &rows, int row_start,
}
}
int minimum_reasonable_space = word_height / 3;
if (minimum_reasonable_space < 2)
if (minimum_reasonable_space < 2) {
minimum_reasonable_space = 2;
}
int median = spacing_widths.median();
return (median > minimum_reasonable_space) ? median : minimum_reasonable_space;
}
@ -1616,8 +1673,9 @@ int InterwordSpace(const std::vector<RowScratchRegisters> &rows, int row_start,
// the end of the before line (knowing which way the text is aligned and read).
bool FirstWordWouldHaveFit(const RowScratchRegisters &before, const RowScratchRegisters &after,
tesseract::ParagraphJustification justification) {
if (before.ri_->num_words == 0 || after.ri_->num_words == 0)
if (before.ri_->num_words == 0 || after.ri_->num_words == 0) {
return true;
}
if (justification == JUSTIFICATION_UNKNOWN) {
tprintf("Don't call FirstWordWouldHaveFit(r, s, JUSTIFICATION_UNKNOWN).\n");
@ -1630,8 +1688,9 @@ bool FirstWordWouldHaveFit(const RowScratchRegisters &before, const RowScratchRe
}
available_space -= before.ri_->average_interword_space;
if (before.ri_->ltr)
if (before.ri_->ltr) {
return after.ri_->lword_box.width() < available_space;
}
return after.ri_->rword_box.width() < available_space;
}
@ -1639,16 +1698,19 @@ bool FirstWordWouldHaveFit(const RowScratchRegisters &before, const RowScratchRe
// the end of the before line (not knowing which way the text goes) in a left
// or right alignment.
bool FirstWordWouldHaveFit(const RowScratchRegisters &before, const RowScratchRegisters &after) {
if (before.ri_->num_words == 0 || after.ri_->num_words == 0)
if (before.ri_->num_words == 0 || after.ri_->num_words == 0) {
return true;
}
int available_space = before.lindent_;
if (before.rindent_ > available_space)
if (before.rindent_ > available_space) {
available_space = before.rindent_;
}
available_space -= before.ri_->average_interword_space;
if (before.ri_->ltr)
if (before.ri_->ltr) {
return after.ri_->lword_box.width() < available_space;
}
return after.ri_->rword_box.width() < available_space;
}
@ -1682,8 +1744,9 @@ static ParagraphModel InternalParagraphModelByOutline(
bool ltr = (ltr_line_count >= (end - start) / 2);
*consistent = true;
if (!AcceptableRowArgs(0, 2, __func__, rows, start, end))
if (!AcceptableRowArgs(0, 2, __func__, rows, start, end)) {
return ParagraphModel();
}
// Ensure the caller only passed us a region with a common rmargin and
// lmargin.
@ -1708,15 +1771,17 @@ static ParagraphModel InternalParagraphModelByOutline(
int cdiff = cmax - cmin;
if (rdiff > tolerance && ldiff > tolerance) {
if (cdiff < tolerance * 2) {
if (end - start < 3)
if (end - start < 3) {
return ParagraphModel();
}
return ParagraphModel(JUSTIFICATION_CENTER, 0, 0, 0, tolerance);
}
*consistent = false;
return ParagraphModel();
}
if (end - start < 3) // Don't return a model for two line paras.
if (end - start < 3) { // Don't return a model for two line paras.
return ParagraphModel();
}
// These booleans keep us from saying something is aligned left when the body
// left variance is too large.
@ -1737,14 +1802,16 @@ static ParagraphModel InternalParagraphModelByOutline(
// If the other is obviously ragged, it can't be the one aligned to.
// [Note the last line is included in this raggedness.]
if (tolerance < rdiff) {
if (body_admits_left_alignment && text_admits_left_alignment)
if (body_admits_left_alignment && text_admits_left_alignment) {
return left_model;
}
*consistent = false;
return ParagraphModel();
}
if (tolerance < ldiff) {
if (body_admits_right_alignment && text_admits_right_alignment)
if (body_admits_right_alignment && text_admits_right_alignment) {
return right_model;
}
*consistent = false;
return ParagraphModel();
}
@ -1756,10 +1823,12 @@ static ParagraphModel InternalParagraphModelByOutline(
int first_left = (*rows)[start].lindent_;
int first_right = (*rows)[start].rindent_;
if (ltr && body_admits_left_alignment && (first_left < lmin || first_left > lmax))
if (ltr && body_admits_left_alignment && (first_left < lmin || first_left > lmax)) {
return left_model;
if (!ltr && body_admits_right_alignment && (first_right < rmin || first_right > rmax))
}
if (!ltr && body_admits_right_alignment && (first_right < rmin || first_right > rmax)) {
return right_model;
}
*consistent = false;
return ParagraphModel();
@ -1785,13 +1854,16 @@ static ParagraphModel ParagraphModelByOutline(int debug_level,
// Do rows[start, end) form a single instance of the given paragraph model?
bool RowsFitModel(const std::vector<RowScratchRegisters> *rows, int start, int end,
const ParagraphModel *model) {
if (!AcceptableRowArgs(0, 1, __func__, rows, start, end))
if (!AcceptableRowArgs(0, 1, __func__, rows, start, end)) {
return false;
if (!ValidFirstLine(rows, start, model))
}
if (!ValidFirstLine(rows, start, model)) {
return false;
}
for (int i = start + 1; i < end; i++) {
if (!ValidBodyLine(rows, i, model))
if (!ValidBodyLine(rows, i, model)) {
return false;
}
}
return true;
}
@ -1872,15 +1944,18 @@ static void MarkStrongEvidence(std::vector<RowScratchRegisters> *rows, int row_s
static void ModelStrongEvidence(int debug_level, std::vector<RowScratchRegisters> *rows,
int row_start, int row_end, bool allow_flush_models,
ParagraphTheory *theory) {
if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end))
if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end)) {
return;
}
int start = row_start;
while (start < row_end) {
while (start < row_end && (*rows)[start].GetLineType() != LT_START)
while (start < row_end && (*rows)[start].GetLineType() != LT_START) {
start++;
if (start >= row_end - 1)
}
if (start >= row_end - 1) {
break;
}
int tolerance = Epsilon((*rows)[start + 1].ri_->average_interword_space);
int end = start;
@ -1960,8 +2035,9 @@ static void ModelStrongEvidence(int debug_level, std::vector<RowScratchRegisters
// (4) Smear the paragraph models to cover surrounding text.
static void StrongEvidenceClassify(int debug_level, std::vector<RowScratchRegisters> *rows,
int row_start, int row_end, ParagraphTheory *theory) {
if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end))
if (!AcceptableRowArgs(debug_level, 2, __func__, rows, row_start, row_end)) {
return;
}
if (debug_level > 1) {
tprintf("#############################################\n");
@ -2015,8 +2091,9 @@ static void ConvertHypothesizedModelRunsToParagraphs(int debug_level,
rows[start].NonNullHypotheses(&models);
if (!models.empty()) {
model = models[0];
if (rows[start].GetLineType(model) != LT_BODY)
if (rows[start].GetLineType(model) != LT_BODY) {
single_line_paragraph = true;
}
}
if (model && !single_line_paragraph) {
// walk back looking for more body lines and then a start line.
@ -2140,8 +2217,9 @@ static bool RowIsStranded(const std::vector<RowScratchRegisters> &rows, int row)
continues = false;
}
}
if (run_length > 2 || (!all_starts && run_length > 1))
if (run_length > 2 || (!all_starts && run_length > 1)) {
return false;
}
}
return true;
}
@ -2187,10 +2265,11 @@ static void LeftoverSegments(const std::vector<RowScratchRegisters> &rows,
}
if (needs_fixing) {
if (!to_fix->empty() && to_fix->back().end == i - 1)
if (!to_fix->empty() && to_fix->back().end == i - 1) {
to_fix->back().end = i;
else
} else {
to_fix->push_back(Interval(i, i));
}
}
}
// Convert inclusive intervals to half-open intervals.
@ -2328,8 +2407,9 @@ static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowI
if (!pit.Empty(RIL_WORD)) {
do {
fake_text += "x";
if (first_word)
if (first_word) {
info->lword_text += "x";
}
info->rword_text += "x";
if (pit.IsAtFinalElement(RIL_WORD, RIL_SYMBOL) &&
!pit.IsAtFinalElement(RIL_TEXTLINE, RIL_SYMBOL)) {
@ -2339,8 +2419,9 @@ static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowI
}
} while (!pit.IsAtFinalElement(RIL_TEXTLINE, RIL_SYMBOL) && pit.Next(RIL_SYMBOL));
}
if (fake_text.size() == 0)
if (fake_text.size() == 0) {
return;
}
int lspaces = info->pix_ldistance / info->average_interword_space;
for (int i = 0; i < lspaces; i++) {
@ -2358,19 +2439,23 @@ static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowI
info->num_words = 0;
do {
if (word_res) {
if (!lword)
if (!lword) {
lword = word_res;
if (rword != word_res)
}
if (rword != word_res) {
info->num_words++;
}
rword = word_res;
}
word_res = page_res_it.forward();
} while (page_res_it.row() == this_row);
if (lword)
if (lword) {
info->lword_box = lword->word->bounding_box();
if (rword)
}
if (rword) {
info->rword_box = rword->word->bounding_box();
}
}
// Given a Tesseract Iterator pointing to a text line, fill in the paragraph
@ -2414,14 +2499,17 @@ static void InitializeRowInfo(bool after_recognition, const MutableIterator &it,
int trailing_ws_idx = strlen(text.get()); // strip trailing space
while (trailing_ws_idx > 0 &&
// isspace() only takes ASCII
isascii(text[trailing_ws_idx - 1]) && isspace(text[trailing_ws_idx - 1]))
isascii(text[trailing_ws_idx - 1]) && isspace(text[trailing_ws_idx - 1])) {
trailing_ws_idx--;
}
if (trailing_ws_idx > 0) {
int lspaces = info->pix_ldistance / info->average_interword_space;
for (int i = 0; i < lspaces; i++)
for (int i = 0; i < lspaces; i++) {
info->text += ' ';
for (int i = 0; i < trailing_ws_idx; i++)
}
for (int i = 0; i < trailing_ws_idx; i++) {
info->text += text[i];
}
}
if (info->text.size() == 0) {
@ -2440,8 +2528,9 @@ static void InitializeRowInfo(bool after_recognition, const MutableIterator &it,
werds.push_back(word_res);
ltr += word_res->AnyLtrCharsInWord() ? 1 : 0;
rtl += word_res->AnyRtlCharsInWord() ? 1 : 0;
if (word_res->word->flag(W_REP_CHAR))
if (word_res->word->flag(W_REP_CHAR)) {
num_leaders++;
}
}
word_res = page_res_it.forward();
} while (page_res_it.row() == this_row);
@ -2479,13 +2568,15 @@ void DetectParagraphs(int debug_level, bool after_text_recognition,
// Convert the Tesseract structures to RowInfos
// for the paragraph detection algorithm.
MutableIterator row(*block_start);
if (row.Empty(RIL_TEXTLINE))
if (row.Empty(RIL_TEXTLINE)) {
return; // end of input already.
}
std::vector<RowInfo> row_infos;
do {
if (!row.PageResIt()->row())
if (!row.PageResIt()->row()) {
continue; // empty row.
}
row.PageResIt()->row()->row->set_para(nullptr);
row_infos.emplace_back();
RowInfo &ri = row_infos.back();
@ -2498,10 +2589,12 @@ void DetectParagraphs(int debug_level, bool after_text_recognition,
int min_lmargin = row_infos[0].pix_ldistance;
int min_rmargin = row_infos[0].pix_rdistance;
for (unsigned i = 1; i < row_infos.size(); i++) {
if (row_infos[i].pix_ldistance < min_lmargin)
if (row_infos[i].pix_ldistance < min_lmargin) {
min_lmargin = row_infos[i].pix_ldistance;
if (row_infos[i].pix_rdistance < min_rmargin)
}
if (row_infos[i].pix_rdistance < min_rmargin) {
min_rmargin = row_infos[i].pix_rdistance;
}
}
if (min_lmargin > 0 || min_rmargin > 0) {
for (auto &row_info : row_infos) {
@ -2524,8 +2617,9 @@ void DetectParagraphs(int debug_level, bool after_text_recognition,
// Now stitch in the row_owners into the rows.
row = *block_start;
for (auto &row_owner : row_owners) {
while (!row.PageResIt()->row())
while (!row.PageResIt()->row()) {
row.Next(RIL_TEXTLINE);
}
row.PageResIt()->row()->row->set_para(row_owner);
row.Next(RIL_TEXTLINE);
}

View File

@ -123,8 +123,9 @@ const char *ParamContent::GetName() const {
return dIt->name_str();
} else if (param_type_ == VT_STRING) {
return sIt->name_str();
} else
} else {
return "ERROR: ParamContent::GetName()";
}
}
// Getter for the description.
@ -137,8 +138,9 @@ const char *ParamContent::GetDescription() const {
return dIt->info_str();
} else if (param_type_ == VT_STRING) {
return sIt->info_str();
} else
} else {
return nullptr;
}
}
// Getter for the value.

View File

@ -189,10 +189,11 @@ static void pgeditor_msg( // message display
class BlnEventHandler : public SVEventHandler {
public:
void Notify(const SVEvent *sv_event) override {
if (sv_event->type == SVET_DESTROY)
if (sv_event->type == SVET_DESTROY) {
bln_word_window = nullptr;
else if (sv_event->type == SVET_CLICK)
} else if (sv_event->type == SVET_CLICK) {
show_point(current_page_res, sv_event->x, sv_event->y);
}
}
};
@ -339,10 +340,12 @@ void Tesseract::do_re_display(bool (tesseract::Tesseract::*word_painter)(PAGE_RE
PAGE_RES_IT pr_it(current_page_res);
for (WERD_RES *word = pr_it.word(); word != nullptr; word = pr_it.forward()) {
(this->*word_painter)(&pr_it);
if (display_baselines && pr_it.row() != pr_it.prev_row())
if (display_baselines && pr_it.row() != pr_it.prev_row()) {
pr_it.row()->row->plot_baseline(image_win, ScrollView::GREEN);
if (display_blocks && pr_it.block() != pr_it.prev_block())
}
if (display_blocks && pr_it.block() != pr_it.prev_block()) {
pr_it.block()->block->pdblk.plot(image_win, block_count++, ScrollView::RED);
}
}
image_win->Update();
}
@ -357,8 +360,9 @@ void Tesseract::do_re_display(bool (tesseract::Tesseract::*word_painter)(PAGE_RE
void Tesseract::pgeditor_main(int width, int height, PAGE_RES *page_res) {
current_page_res = page_res;
if (current_page_res->block_res_list.empty())
if (current_page_res->block_res_list.empty()) {
return;
}
recog_done = false;
stillRunning = true;
@ -442,46 +446,52 @@ bool Tesseract::process_cmd_win_event( // UI command semantics
delete[] parameter;
break;
case BOUNDING_BOX_CMD_EVENT:
if (new_value[0] == 'T')
if (new_value[0] == 'T') {
word_display_mode.set(DF_BOX);
else
} else {
word_display_mode.reset(DF_BOX);
}
mode = CHANGE_DISP_CMD_EVENT;
break;
case BLAMER_CMD_EVENT:
if (new_value[0] == 'T')
if (new_value[0] == 'T') {
word_display_mode.set(DF_BLAMER);
else
} else {
word_display_mode.reset(DF_BLAMER);
}
do_re_display(&tesseract::Tesseract::word_display);
mode = CHANGE_DISP_CMD_EVENT;
break;
case CORRECT_TEXT_CMD_EVENT:
if (new_value[0] == 'T')
if (new_value[0] == 'T') {
word_display_mode.set(DF_TEXT);
else
} else {
word_display_mode.reset(DF_TEXT);
}
mode = CHANGE_DISP_CMD_EVENT;
break;
case POLYGONAL_CMD_EVENT:
if (new_value[0] == 'T')
if (new_value[0] == 'T') {
word_display_mode.set(DF_POLYGONAL);
else
} else {
word_display_mode.reset(DF_POLYGONAL);
}
mode = CHANGE_DISP_CMD_EVENT;
break;
case BL_NORM_CMD_EVENT:
if (new_value[0] == 'T')
if (new_value[0] == 'T') {
word_display_mode.set(DF_BN_POLYGONAL);
else
} else {
word_display_mode.reset(DF_BN_POLYGONAL);
}
mode = CHANGE_DISP_CMD_EVENT;
break;
case BITMAP_CMD_EVENT:
if (new_value[0] == 'T')
if (new_value[0] == 'T') {
word_display_mode.set(DF_EDGE_STEP);
else
} else {
word_display_mode.reset(DF_EDGE_STEP);
}
mode = CHANGE_DISP_CMD_EVENT;
break;
case UNIFORM_DISP_CMD_EVENT:
@ -574,8 +584,9 @@ void Tesseract::process_image_event( // action in image win
if (event.type == SVET_SELECTION) {
down.set_x(event.x + event.x_size);
down.set_y(event.y + event.y_size);
if (mode == SHOW_POINT_CMD_EVENT)
if (mode == SHOW_POINT_CMD_EVENT) {
show_point(current_page_res, event.x, event.y);
}
}
up.set_x(event.x);
@ -707,43 +718,52 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
BoxWord *box_word = word_res->box_word;
WERD_CHOICE *best_choice = word_res->best_choice;
int length = box_word->length();
if (word_res->fontinfo == nullptr)
if (word_res->fontinfo == nullptr) {
return false;
}
const FontInfo &font_info = *word_res->fontinfo;
for (int i = 0; i < length; ++i) {
ScrollView::Color color = ScrollView::GREEN;
switch (color_mode) {
case CM_SUBSCRIPT:
if (best_choice->BlobPosition(i) == SP_SUBSCRIPT)
if (best_choice->BlobPosition(i) == SP_SUBSCRIPT) {
color = ScrollView::RED;
}
break;
case CM_SUPERSCRIPT:
if (best_choice->BlobPosition(i) == SP_SUPERSCRIPT)
if (best_choice->BlobPosition(i) == SP_SUPERSCRIPT) {
color = ScrollView::RED;
}
break;
case CM_ITALIC:
if (font_info.is_italic())
if (font_info.is_italic()) {
color = ScrollView::RED;
}
break;
case CM_BOLD:
if (font_info.is_bold())
if (font_info.is_bold()) {
color = ScrollView::RED;
}
break;
case CM_FIXEDPITCH:
if (font_info.is_fixed_pitch())
if (font_info.is_fixed_pitch()) {
color = ScrollView::RED;
}
break;
case CM_SERIF:
if (font_info.is_serif())
if (font_info.is_serif()) {
color = ScrollView::RED;
}
break;
case CM_SMALLCAPS:
if (word_res->small_caps)
if (word_res->small_caps) {
color = ScrollView::RED;
}
break;
case CM_DROPCAPS:
if (best_choice->BlobPosition(i) == SP_DROPCAP)
if (best_choice->BlobPosition(i) == SP_DROPCAP) {
color = ScrollView::RED;
}
break;
// TODO(rays) underline is currently completely unsupported.
case CM_UNDERLINE:
@ -773,8 +793,9 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
image_win->Pen(c);
// cblob iterator
C_BLOB_IT c_it(word->cblob_list());
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward())
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
c_it.data()->bounding_box().plot(image_win);
}
displayed_something = true;
}
@ -829,8 +850,9 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
image_win->Pen(ScrollView::RED);
word_height = word_bb.height();
int text_height = 0.50 * word_height;
if (text_height > 20)
if (text_height > 20) {
text_height = 20;
}
image_win->TextAttributes("Arial", text_height, false, false, false);
shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f;
image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height, text.c_str());
@ -842,10 +864,11 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
displayed_something = true;
}
if (!displayed_something) // display BBox anyway
if (!displayed_something) { // display BBox anyway
word->bounding_box().plot(image_win,
static_cast<ScrollView::Color>((int32_t)editor_image_word_bb_color),
static_cast<ScrollView::Color>((int32_t)editor_image_word_bb_color));
}
return true;
}
} // namespace tesseract
@ -912,14 +935,16 @@ void Tesseract::blob_feature_display(PAGE_RES *page_res, const TBOX &selection_b
// Display baseline features.
ScrollView *bl_win = CreateFeatureSpaceWindow("BL Features", 512, 0);
ClearFeatureSpaceWindow(baseline, bl_win);
for (auto &bl_feature : bl_features)
for (auto &bl_feature : bl_features) {
RenderIntFeature(bl_win, &bl_feature, ScrollView::GREEN);
}
bl_win->Update();
// Display cn features.
ScrollView *cn_win = CreateFeatureSpaceWindow("CN Features", 512, 0);
ClearFeatureSpaceWindow(character, cn_win);
for (auto &cn_feature : cn_features)
for (auto &cn_feature : cn_features) {
RenderIntFeature(cn_win, &cn_feature, ScrollView::GREEN);
}
cn_win->Update();
it->DeleteCurrentWord();

View File

@ -43,8 +43,9 @@ FILE *Tesseract::init_recog_training(const char *filename) {
std::string output_fname = filename;
const char *lastdot = strrchr(output_fname.c_str(), '.');
if (lastdot != nullptr)
if (lastdot != nullptr) {
output_fname[lastdot - output_fname.c_str()] = '\0';
}
output_fname += ".txt";
FILE *output_file = fopen(output_fname.c_str(), "a+");
if (output_file == nullptr) {
@ -56,8 +57,9 @@ FILE *Tesseract::init_recog_training(const char *filename) {
// Copies the bounding box from page_res_it->word() to the given TBOX.
static bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox) {
while (page_res_it->block() != nullptr && page_res_it->word() == nullptr)
while (page_res_it->block() != nullptr && page_res_it->word() == nullptr) {
page_res_it->forward();
}
if (page_res_it->word() != nullptr) {
*tbox = page_res_it->word()->word->bounding_box();
@ -85,8 +87,9 @@ void Tesseract::recog_training_segmented(const char *filename, PAGE_RES *page_re
volatile ETEXT_DESC *monitor, FILE *output_file) {
std::string box_fname = filename;
const char *lastdot = strrchr(box_fname.c_str(), '.');
if (lastdot != nullptr)
if (lastdot != nullptr) {
box_fname[lastdot - box_fname.c_str()] = '\0';
}
box_fname += ".box";
// ReadNextBox() will close box_file
FILE *box_file = fopen(box_fname.c_str(), "r");
@ -142,8 +145,9 @@ void Tesseract::recog_training_segmented(const char *filename, PAGE_RES *page_re
int total_words = 0;
for (page_res_it.restart_page(); page_res_it.block() != nullptr; page_res_it.forward()) {
if (page_res_it.word()) {
if (page_res_it.word()->uch_set == nullptr)
if (page_res_it.word()->uch_set == nullptr) {
page_res_it.word()->SetupFake(unicharset);
}
total_words++;
}
}
@ -164,8 +168,9 @@ static void PrintPath(int length, const BLOB_CHOICE **blob_choices, const UNICHA
const BLOB_CHOICE *blob_choice = blob_choices[i];
fprintf(output_file, "%s", unicharset.id_to_unichar(blob_choice->unichar_id()));
rating += blob_choice->rating();
if (certainty > blob_choice->certainty())
if (certainty > blob_choice->certainty()) {
certainty = blob_choice->certainty();
}
}
fprintf(output_file, "\t%s\t%.4f\t%.4f\n", label, rating, certainty);
}

View File

@ -68,14 +68,16 @@ void Tesseract::set_done(WERD_RES *word, int16_t pass) {
word->best_choice->permuter() == USER_DAWG_PERM;
if (word->done && (pass == 1) && (!word_from_dict || word_is_ambig) &&
one_ell_conflict(word, false)) {
if (tessedit_rejection_debug)
if (tessedit_rejection_debug) {
tprintf("one_ell_conflict detected\n");
}
word->done = false;
}
if (word->done &&
((!word_from_dict && word->best_choice->permuter() != NUMBER_PERM) || word_is_ambig)) {
if (tessedit_rejection_debug)
if (tessedit_rejection_debug) {
tprintf("non-dict or ambig word detected\n");
}
word->done = false;
}
if (tessedit_rejection_debug) {
@ -104,8 +106,9 @@ void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) {
0: Rays original heuristic - the baseline
*/
if (tessedit_reject_mode == 0) {
if (!word->done)
if (!word->done) {
reject_poor_matches(word);
}
} else if (tessedit_reject_mode == 5) {
/*
5: Reject I/1/l from words where there is no strong contextual confirmation;
@ -122,12 +125,14 @@ void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) {
mechanism can be turned on or off independently. This works WITHOUT
affecting the done flag setting.
*/
if (rej_use_tess_accepted && !word->tess_accepted)
if (rej_use_tess_accepted && !word->tess_accepted) {
word->reject_map.rej_word_not_tess_accepted();
}
if (rej_use_tess_blanks &&
(strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr))
(strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) {
word->reject_map.rej_word_contains_blanks();
}
WERD_CHOICE *best_choice = word->best_choice;
if (rej_use_good_perm) {
@ -144,8 +149,9 @@ void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) {
offset += best_choice->unichar_lengths()[i++]) {
if (word->reject_map[i].accepted() &&
word->uch_set->get_isalpha(best_choice->unichar_string().c_str() + offset,
best_choice->unichar_lengths()[i]))
best_choice->unichar_lengths()[i])) {
word->reject_map[i].setrej_bad_permuter();
}
// rej alpha
}
}
@ -160,8 +166,9 @@ void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) {
ASSERT_HOST("Fatal error encountered!" == nullptr);
}
if (tessedit_image_border > -1)
if (tessedit_image_border > -1) {
reject_edge_blobs(word);
}
check_debug_pt(word, 10);
if (tessedit_rejection_debug) {
@ -181,9 +188,10 @@ void reject_blanks(WERD_RES *word) {
for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
offset += word->best_choice->unichar_lengths()[i], i += 1) {
if (word->best_choice->unichar_string()[offset] == ' ')
if (word->best_choice->unichar_string()[offset] == ' ') {
// rej unrecognised blobs
word->reject_map[i].setrej_tess_failure();
}
}
}
@ -203,10 +211,11 @@ void Tesseract::reject_I_1_L(WERD_RES *word) {
void reject_poor_matches(WERD_RES *word) {
float threshold = compute_reject_threshold(word->best_choice);
for (int i = 0; i < word->best_choice->length(); ++i) {
if (word->best_choice->unichar_id(i) == UNICHAR_SPACE)
if (word->best_choice->unichar_id(i) == UNICHAR_SPACE) {
word->reject_map[i].setrej_tess_failure();
else if (word->best_choice->certainty(i) < threshold)
} else if (word->best_choice->certainty(i) < threshold) {
word->reject_map[i].setrej_poor_match();
}
}
}
@ -301,8 +310,9 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
If there are no occurrences of the conflict set characters then the word
is OK.
*/
if (strpbrk(word, conflict_set_I_l_1.c_str()) == nullptr)
if (strpbrk(word, conflict_set_I_l_1.c_str()) == nullptr) {
return false;
}
/*
There is a conflict if there are NO other (confirmed) alphanumerics apart
@ -310,13 +320,15 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
*/
for (i = 0, offset = 0, non_conflict_set_char = false; (i < word_len) && !non_conflict_set_char;
offset += lengths[i++])
offset += lengths[i++]) {
non_conflict_set_char = (word_res->uch_set->get_isalpha(word + offset, lengths[i]) ||
word_res->uch_set->get_isdigit(word + offset, lengths[i])) &&
!conflict_set_I_l_1.contains(word[offset]);
}
if (!non_conflict_set_char) {
if (update_map)
if (update_map) {
reject_I_1_L(word_res);
}
return true;
}
@ -341,8 +353,9 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
if (safe_dict_word(word_res) > 0) {
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
if (update_map)
if (update_map) {
word_res->reject_map[first_alphanum_index_].setrej_1Il_conflict();
}
return true;
} else {
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
@ -354,8 +367,9 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
if (safe_dict_word(word_res) > 0) {
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
if (update_map)
if (update_map) {
word_res->reject_map[first_alphanum_index_].setrej_1Il_conflict();
}
return true;
} else {
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
@ -380,16 +394,18 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
first_alphanum_offset_ = first_alphanum_offset(word, lengths);
if (lengths[first_alphanum_index_] == 1 && word[first_alphanum_offset_] == 'l') {
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
if (safe_dict_word(word_res) > 0)
if (safe_dict_word(word_res) > 0) {
return false;
else
} else {
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
}
} else if (lengths[first_alphanum_index_] == 1 && word[first_alphanum_offset_] == 'I') {
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'l';
if (safe_dict_word(word_res) > 0)
if (safe_dict_word(word_res) > 0) {
return false;
else
} else {
word_res->best_choice->unichar_string()[first_alphanum_offset_] = 'I';
}
}
/*
For strings containing digits:
@ -407,8 +423,9 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
offset += word_res->best_choice->unichar_lengths()[i++]) {
if ((!allow_1s || (word[offset] != '1')) &&
conflict_set_I_l_1.contains(word[offset])) {
if (update_map)
if (update_map) {
word_res->reject_map[i].setrej_1Il_conflict();
}
conflict = true;
}
}
@ -423,16 +440,19 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
first_alphanum_index_ = first_alphanum_index(word, lengths);
first_alphanum_offset_ = first_alphanum_offset(word, lengths);
if (conflict_set_I_l_1.contains(word[first_alphanum_offset_])) {
if (update_map)
if (update_map) {
word_res->reject_map[first_alphanum_index_].setrej_1Il_conflict();
}
return true;
} else
} else {
return false;
}
} else if (word_type == AC_UPPER_CASE) {
return false;
} else {
if (update_map)
if (update_map) {
reject_I_1_L(word_res);
}
return true;
}
}
@ -443,8 +463,9 @@ int16_t Tesseract::first_alphanum_index(const char *word, const char *word_lengt
for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
if (unicharset.get_isalpha(word + offset, word_lengths[i]) ||
unicharset.get_isdigit(word + offset, word_lengths[i]))
unicharset.get_isdigit(word + offset, word_lengths[i])) {
return i;
}
}
return -1;
}
@ -455,8 +476,9 @@ int16_t Tesseract::first_alphanum_offset(const char *word, const char *word_leng
for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
if (unicharset.get_isalpha(word + offset, word_lengths[i]) ||
unicharset.get_isdigit(word + offset, word_lengths[i]))
unicharset.get_isdigit(word + offset, word_lengths[i])) {
return offset;
}
}
return -1;
}
@ -467,8 +489,9 @@ int16_t Tesseract::alpha_count(const char *word, const char *word_lengths) {
int16_t count = 0;
for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
if (unicharset.get_isalpha(word + offset, word_lengths[i]))
if (unicharset.get_isalpha(word + offset, word_lengths[i])) {
count++;
}
}
return count;
}
@ -479,8 +502,9 @@ bool Tesseract::word_contains_non_1_digit(const char *word, const char *word_len
for (i = 0, offset = 0; word[offset] != '\0'; offset += word_lengths[i++]) {
if (unicharset.get_isdigit(word + offset, word_lengths[i]) &&
(word_lengths[i] != 1 || word[offset] != '1'))
(word_lengths[i] != 1 || word[offset] != '1')) {
return true;
}
}
return false;
}
@ -503,17 +527,20 @@ void Tesseract::dont_allow_1Il(WERD_RES *word) {
accepted_1Il = true;
} else {
if (word->uch_set->get_isalpha(s + offset, lengths[i]) ||
word->uch_set->get_isdigit(s + offset, lengths[i]))
word->uch_set->get_isdigit(s + offset, lengths[i])) {
return; // >=1 non 1Il ch accepted
}
}
}
}
if (!accepted_1Il)
if (!accepted_1Il) {
return; // Nothing to worry about
}
for (i = 0, offset = 0; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) {
if (conflict_set_I_l_1.contains(s[offset]) && word->reject_map[i].accepted())
if (conflict_set_I_l_1.contains(s[offset]) && word->reject_map[i].accepted()) {
word->reject_map[i].setrej_postNN_1Il();
}
}
}
@ -535,33 +562,38 @@ void Tesseract::reject_mostly_rejects(WERD_RES *word) {
/* Reject the whole of the word if the fraction of rejects exceeds a limit */
if (static_cast<float>(word->reject_map.reject_count()) / word->reject_map.length() >=
rej_whole_of_mostly_reject_word_fract)
rej_whole_of_mostly_reject_word_fract) {
word->reject_map.rej_word_mostly_rej();
}
}
bool Tesseract::repeated_nonalphanum_wd(WERD_RES *word, ROW *row) {
int16_t char_quality;
int16_t accepted_char_quality;
if (word->best_choice->unichar_lengths().length() <= 1)
if (word->best_choice->unichar_lengths().length() <= 1) {
return false;
}
if (!ok_repeated_ch_non_alphanum_wds.contains(word->best_choice->unichar_string()[0]))
if (!ok_repeated_ch_non_alphanum_wds.contains(word->best_choice->unichar_string()[0])) {
return false;
}
UNICHAR_ID uch_id = word->best_choice->unichar_id(0);
for (int i = 1; i < word->best_choice->length(); ++i) {
if (word->best_choice->unichar_id(i) != uch_id)
if (word->best_choice->unichar_id(i) != uch_id) {
return false;
}
}
word_char_quality(word, &char_quality, &accepted_char_quality);
if ((word->best_choice->unichar_lengths().length() == char_quality) &&
(char_quality == accepted_char_quality))
(char_quality == accepted_char_quality)) {
return true;
else
} else {
return false;
}
}
int16_t Tesseract::safe_dict_word(const WERD_RES *werd_res) {
@ -581,18 +613,20 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) {
TBOX out_box;
float aspect_ratio;
if (tessedit_lower_flip_hyphen <= 1)
if (tessedit_lower_flip_hyphen <= 1) {
return;
}
int num_blobs = word_res->rebuild_word->NumBlobs();
UNICHAR_ID unichar_dash = word_res->uch_set->unichar_to_id("-");
for (i = 0; i < best_choice->length() && i < num_blobs; ++i) {
TBLOB *blob = word_res->rebuild_word->blobs[i];
out_box = blob->bounding_box();
if (i + 1 == num_blobs)
if (i + 1 == num_blobs) {
next_left = 9999;
else
} else {
next_left = word_res->rebuild_word->blobs[i + 1]->bounding_box().left();
}
// Don't touch small or touching blobs - it is too dangerous.
if ((out_box.width() > 8 * word_res->denorm.x_scale()) && (out_box.left() > prev_right) &&
(out_box.right() < next_left)) {
@ -603,20 +637,24 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) {
word_res->uch_set->get_enabled(unichar_dash)) {
/* Certain HYPHEN */
best_choice->set_unichar_id(unichar_dash, i);
if (word_res->reject_map[i].rejected())
if (word_res->reject_map[i].rejected()) {
word_res->reject_map[i].setrej_hyphen_accept();
}
}
if ((aspect_ratio > tessedit_lower_flip_hyphen) && word_res->reject_map[i].accepted())
if ((aspect_ratio > tessedit_lower_flip_hyphen) && word_res->reject_map[i].accepted()) {
// Suspected HYPHEN
word_res->reject_map[i].setrej_hyphen();
}
} else if (best_choice->unichar_id(i) == unichar_dash) {
if ((aspect_ratio >= tessedit_upper_flip_hyphen) && (word_res->reject_map[i].rejected()))
if ((aspect_ratio >= tessedit_upper_flip_hyphen) && (word_res->reject_map[i].rejected())) {
word_res->reject_map[i].setrej_hyphen_accept();
}
// Certain HYPHEN
if ((aspect_ratio <= tessedit_lower_flip_hyphen) && (word_res->reject_map[i].accepted()))
if ((aspect_ratio <= tessedit_lower_flip_hyphen) && (word_res->reject_map[i].accepted())) {
// Suspected HYPHEN
word_res->reject_map[i].setrej_hyphen();
}
}
}
prev_right = out_box.right();
@ -631,8 +669,9 @@ void Tesseract::flip_0O(WERD_RES *word_res) {
int i;
TBOX out_box;
if (!tessedit_flip_0O)
if (!tessedit_flip_0O) {
return;
}
int num_blobs = word_res->rebuild_word->NumBlobs();
for (i = 0; i < best_choice->length() && i < num_blobs; ++i) {
@ -641,8 +680,9 @@ void Tesseract::flip_0O(WERD_RES *word_res) {
word_res->uch_set->get_isdigit(best_choice->unichar_id(i))) {
out_box = blob->bounding_box();
if ((out_box.top() < kBlnBaselineOffset + kBlnXHeight) ||
(out_box.bottom() > kBlnBaselineOffset + kBlnXHeight / 4))
(out_box.bottom() > kBlnBaselineOffset + kBlnXHeight / 4)) {
return; // Beware words with sub/superscripts
}
}
}
UNICHAR_ID unichar_0 = word_res->uch_set->unichar_to_id("0");

View File

@ -41,8 +41,9 @@ ResultIterator::ResultIterator(const LTRResultIterator &resit) : LTRResultIterat
auto *p = ParamUtils::FindParam<BoolParam>(
"preserve_interword_spaces", GlobalParams()->bool_params, tesseract_->params()->bool_params);
if (p != nullptr)
if (p != nullptr) {
preserve_interword_spaces_ = (bool)(*p);
}
current_paragraph_is_ltr_ = CurrentParagraphIsLtr();
MoveToLogicalStartOfTextline();
@ -57,8 +58,9 @@ bool ResultIterator::ParagraphIsLtr() const {
}
bool ResultIterator::CurrentParagraphIsLtr() const {
if (!it_->word())
if (!it_->word()) {
return true; // doesn't matter.
}
LTRResultIterator it(*this);
it.RestartParagraph();
// Try to figure out the ltr-ness of the paragraph. The rules below
@ -95,17 +97,20 @@ bool ResultIterator::CurrentParagraphIsLtr() const {
num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0;
num_ltr += rightmost_ltr ? 1 : 0;
}
if (leftmost_rtl)
if (leftmost_rtl) {
return false;
if (rightmost_ltr)
}
if (rightmost_ltr) {
return true;
}
// First line is ambiguous. Take statistics on the whole paragraph.
if (!it.Empty(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA))
if (!it.Empty(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA)) {
do {
StrongScriptDirection dir = it.WordDirection();
num_rtl += (dir == DIR_RIGHT_TO_LEFT) ? 1 : 0;
num_ltr += (dir == DIR_LEFT_TO_RIGHT) ? 1 : 0;
} while (it.Next(RIL_WORD) && !it.IsAtBeginningOf(RIL_PARA));
}
return num_ltr >= num_rtl;
}
@ -116,12 +121,14 @@ const int ResultIterator::kComplexWord = -3;
void ResultIterator::CalculateBlobOrder(std::vector<int> *blob_indices) const {
bool context_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_;
blob_indices->clear();
if (Empty(RIL_WORD))
if (Empty(RIL_WORD)) {
return;
}
if (context_is_ltr || it_->word()->UnicharsInReadingOrder()) {
// Easy! just return the blobs in order;
for (int i = 0; i < word_length_; i++)
for (int i = 0; i < word_length_; i++) {
blob_indices->push_back(i);
}
return;
}
@ -159,8 +166,9 @@ void ResultIterator::CalculateBlobOrder(std::vector<int> *blob_indices) const {
}
if (j < word_length_ && letter_types[j] == U_EURO_NUM) {
// The sequence [i..j] should be converted to all European Numbers.
for (int k = i; k < j; k++)
for (int k = i; k < j; k++) {
letter_types[k] = U_EURO_NUM;
}
}
j = i - 1;
while (j > -1 && letter_types[j] == U_EURO_NUM_TERM) {
@ -168,8 +176,9 @@ void ResultIterator::CalculateBlobOrder(std::vector<int> *blob_indices) const {
}
if (j > -1 && letter_types[j] == U_EURO_NUM) {
// The sequence [j..i] should be converted to all European Numbers.
for (int k = j; k <= i; k++)
for (int k = j; k <= i; k++) {
letter_types[k] = U_EURO_NUM;
}
}
}
}
@ -192,8 +201,9 @@ void ResultIterator::CalculateBlobOrder(std::vector<int> *blob_indices) const {
}
}
// [i..last_good] is the L sequence
for (int k = i; k <= last_good; k++)
for (int k = i; k <= last_good; k++) {
letter_types[k] = U_LTR;
}
i = last_good + 1;
} else {
letter_types[i] = U_RTL;
@ -212,8 +222,9 @@ void ResultIterator::CalculateBlobOrder(std::vector<int> *blob_indices) const {
for (; j >= 0 && letter_types[j] != U_RTL; j--) {
} // pass
// Now (j, i] is LTR
for (int k = j + 1; k <= i; k++)
for (int k = j + 1; k <= i; k++) {
blob_indices->push_back(k);
}
i = j;
}
}
@ -260,8 +271,9 @@ void ResultIterator::CalculateTextlineOrder(bool paragraph_is_ltr, const LTRResu
// A LTRResultIterator goes strictly left-to-right word order.
LTRResultIterator ltr_it(resit);
ltr_it.RestartRow();
if (ltr_it.Empty(RIL_WORD))
if (ltr_it.Empty(RIL_WORD)) {
return;
}
do {
directions->push_back(ltr_it.WordDirection());
} while (ltr_it.Next(RIL_WORD) && !ltr_it.IsAtBeginningOf(RIL_TEXTLINE));
@ -274,8 +286,9 @@ void ResultIterator::CalculateTextlineOrder(bool paragraph_is_ltr,
const std::vector<StrongScriptDirection> &word_dirs,
std::vector<int> *reading_order) {
reading_order->clear();
if (word_dirs.size() == 0)
if (word_dirs.size() == 0) {
return;
}
// Take all of the runs of minor direction words and insert them
// in reverse order.
@ -305,14 +318,16 @@ void ResultIterator::CalculateTextlineOrder(bool paragraph_is_ltr,
// Scan for the beginning of the minor left-to-right run.
int left = neutral_end;
for (int i = left; i >= 0 && word_dirs[i] != DIR_RIGHT_TO_LEFT; i--) {
if (word_dirs[i] == DIR_LEFT_TO_RIGHT)
if (word_dirs[i] == DIR_LEFT_TO_RIGHT) {
left = i;
}
}
reading_order->push_back(kMinorRunStart);
for (int i = left; i < word_dirs.size(); i++) {
reading_order->push_back(i);
if (word_dirs[i] == DIR_MIX)
if (word_dirs[i] == DIR_MIX) {
reading_order->push_back(kComplexWord);
}
}
reading_order->push_back(kMinorRunEnd);
start = left - 1;
@ -322,12 +337,15 @@ void ResultIterator::CalculateTextlineOrder(bool paragraph_is_ltr,
for (int i = start; i != end;) {
if (word_dirs[i] == minor_direction) {
int j = i;
while (j != end && word_dirs[j] != major_direction)
while (j != end && word_dirs[j] != major_direction) {
j += major_step;
if (j == end)
}
if (j == end) {
j -= major_step;
while (j != i && word_dirs[j] != minor_direction)
}
while (j != i && word_dirs[j] != minor_direction) {
j -= major_step;
}
// [j..i] is a minor direction run.
reading_order->push_back(kMinorRunStart);
for (int k = j; k != i; k -= major_step) {
@ -338,8 +356,9 @@ void ResultIterator::CalculateTextlineOrder(bool paragraph_is_ltr,
i = j + major_step;
} else {
reading_order->push_back(i);
if (word_dirs[i] == DIR_MIX)
if (word_dirs[i] == DIR_MIX) {
reading_order->push_back(kComplexWord);
}
i += major_step;
}
}
@ -363,30 +382,34 @@ void ResultIterator::MoveToLogicalStartOfWord() {
}
std::vector<int> blob_order;
CalculateBlobOrder(&blob_order);
if (blob_order.size() == 0 || blob_order[0] == 0)
if (blob_order.size() == 0 || blob_order[0] == 0) {
return;
}
BeginWord(blob_order[0]);
}
bool ResultIterator::IsAtFinalSymbolOfWord() const {
if (!it_->word())
if (!it_->word()) {
return true;
}
std::vector<int> blob_order;
CalculateBlobOrder(&blob_order);
return blob_order.size() == 0 || blob_order.back() == blob_index_;
}
bool ResultIterator::IsAtFirstSymbolOfWord() const {
if (!it_->word())
if (!it_->word()) {
return true;
}
std::vector<int> blob_order;
CalculateBlobOrder(&blob_order);
return blob_order.size() == 0 || blob_order[0] == blob_index_;
}
void ResultIterator::AppendSuffixMarks(std::string *text) const {
if (!it_->word())
if (!it_->word()) {
return;
}
bool reading_direction_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_;
// scan forward to see what meta-information the word ordering algorithm
// left us.
@ -429,15 +452,18 @@ void ResultIterator::MoveToLogicalStartOfTextline() {
&word_indices);
int i = 0;
for (; i < word_indices.size() && word_indices[i] < 0; i++) {
if (word_indices[i] == kMinorRunStart)
if (word_indices[i] == kMinorRunStart) {
in_minor_direction_ = true;
else if (word_indices[i] == kMinorRunEnd)
} else if (word_indices[i] == kMinorRunEnd) {
in_minor_direction_ = false;
}
}
if (in_minor_direction_)
if (in_minor_direction_) {
at_beginning_of_minor_run_ = true;
if (i >= word_indices.size())
}
if (i >= word_indices.size()) {
return;
}
int first_word_index = word_indices[i];
for (int j = 0; j < first_word_index; j++) {
PageIterator::Next(RIL_WORD);
@ -454,14 +480,16 @@ void ResultIterator::Begin() {
}
bool ResultIterator::Next(PageIteratorLevel level) {
if (it_->block() == nullptr)
if (it_->block() == nullptr) {
return false; // already at end!
}
switch (level) {
case RIL_BLOCK: // explicit fall-through
case RIL_PARA: // explicit fall-through
case RIL_TEXTLINE:
if (!PageIterator::Next(level))
if (!PageIterator::Next(level)) {
return false;
}
if (IsWithinFirstTextlineOfParagraph()) {
// if we've advanced to a new paragraph,
// recalculate current_paragraph_is_ltr_
@ -474,8 +502,9 @@ bool ResultIterator::Next(PageIteratorLevel level) {
std::vector<int> blob_order;
CalculateBlobOrder(&blob_order);
int next_blob = 0;
while (next_blob < blob_order.size() && blob_index_ != blob_order[next_blob])
while (next_blob < blob_order.size() && blob_index_ != blob_order[next_blob]) {
next_blob++;
}
next_blob++;
if (next_blob < blob_order.size()) {
// we're in the same word; simply advance one blob.
@ -488,22 +517,26 @@ bool ResultIterator::Next(PageIteratorLevel level) {
// Fall through.
case RIL_WORD: // explicit fall-through.
{
if (it_->word() == nullptr)
if (it_->word() == nullptr) {
return Next(RIL_BLOCK);
}
std::vector<int> word_indices;
int this_word_index = LTRWordIndex();
CalculateTextlineOrder(current_paragraph_is_ltr_, *this, &word_indices);
int final_real_index = word_indices.size() - 1;
while (final_real_index > 0 && word_indices[final_real_index] < 0)
while (final_real_index > 0 && word_indices[final_real_index] < 0) {
final_real_index--;
}
for (int i = 0; i < final_real_index; i++) {
if (word_indices[i] == this_word_index) {
int j = i + 1;
for (; j < final_real_index && word_indices[j] < 0; j++) {
if (word_indices[j] == kMinorRunStart)
if (word_indices[j] == kMinorRunStart) {
in_minor_direction_ = true;
if (word_indices[j] == kMinorRunEnd)
}
if (word_indices[j] == kMinorRunEnd) {
in_minor_direction_ = false;
}
}
at_beginning_of_minor_run_ = (word_indices[j - 1] == kMinorRunStart);
// awesome, we move to word_indices[j]
@ -530,37 +563,44 @@ bool ResultIterator::Next(PageIteratorLevel level) {
}
bool ResultIterator::IsAtBeginningOf(PageIteratorLevel level) const {
if (it_->block() == nullptr)
if (it_->block() == nullptr) {
return false; // Already at the end!
if (it_->word() == nullptr)
}
if (it_->word() == nullptr) {
return true; // In an image block.
if (level == RIL_SYMBOL)
}
if (level == RIL_SYMBOL) {
return true; // Always at beginning of a symbol.
}
bool at_word_start = IsAtFirstSymbolOfWord();
if (level == RIL_WORD)
if (level == RIL_WORD) {
return at_word_start;
}
ResultIterator line_start(*this);
// move to the first word in the line...
line_start.MoveToLogicalStartOfTextline();
bool at_textline_start = at_word_start && *line_start.it_ == *it_;
if (level == RIL_TEXTLINE)
if (level == RIL_TEXTLINE) {
return at_textline_start;
}
// now we move to the left-most word...
line_start.RestartRow();
bool at_block_start =
at_textline_start && line_start.it_->block() != line_start.it_->prev_block();
if (level == RIL_BLOCK)
if (level == RIL_BLOCK) {
return at_block_start;
}
bool at_para_start =
at_block_start || (at_textline_start && line_start.it_->row()->row->para() !=
line_start.it_->prev_row()->row->para());
if (level == RIL_PARA)
if (level == RIL_PARA) {
return at_para_start;
}
ASSERT_HOST(false); // shouldn't happen.
return false;
@ -572,8 +612,9 @@ bool ResultIterator::IsAtBeginningOf(PageIteratorLevel level) const {
* PageIterator.
*/
bool ResultIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const {
if (Empty(element))
if (Empty(element)) {
return true; // Already at the end!
}
// The result is true if we step forward by element and find we are
// at the the end of the page or at beginning of *all* levels in:
// [level, element).
@ -582,20 +623,23 @@ bool ResultIterator::IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel
// word on a line, so we also have to be at the first symbol in a word.
ResultIterator next(*this);
next.Next(element);
if (next.Empty(element))
if (next.Empty(element)) {
return true; // Reached the end of the page.
}
while (element > level) {
element = static_cast<PageIteratorLevel>(element - 1);
if (!next.IsAtBeginningOf(element))
if (!next.IsAtBeginningOf(element)) {
return false;
}
}
return true;
}
// Returns the number of blanks before the current word.
int ResultIterator::BlanksBeforeWord() const {
if (CurrentParagraphIsLtr())
if (CurrentParagraphIsLtr()) {
return LTRResultIterator::BlanksBeforeWord();
}
return IsAtBeginningOf(RIL_TEXTLINE) ? 0 : 1;
}
@ -604,8 +648,9 @@ int ResultIterator::BlanksBeforeWord() const {
* object at the given level. Use delete [] to free after use.
*/
char *ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
if (it_->word() == nullptr)
if (it_->word() == nullptr) {
return nullptr; // Already at the end!
}
std::string text;
switch (level) {
case RIL_BLOCK: {
@ -631,8 +676,9 @@ char *ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
text += reading_direction_is_ltr ? kLRM : kRLM;
}
text = it_->word()->BestUTF8(blob_index_, false);
if (IsAtFinalSymbolOfWord())
if (IsAtFinalSymbolOfWord()) {
AppendSuffixMarks(&text);
}
} break;
}
int length = text.length() + 1;
@ -659,8 +705,9 @@ std::vector<std::vector<std::pair<const char *, float>>> *ResultIterator::GetBes
}
void ResultIterator::AppendUTF8WordText(std::string *text) const {
if (!it_->word())
if (!it_->word()) {
return;
}
ASSERT_HOST(it_->word()->best_choice != nullptr);
bool reading_direction_is_ltr = current_paragraph_is_ltr_ ^ in_minor_direction_;
if (at_beginning_of_minor_run_) {
@ -721,8 +768,9 @@ void ResultIterator::AppendUTF8ParagraphText(std::string *text) const {
ResultIterator it(*this);
it.RestartParagraph();
it.MoveToLogicalStartOfTextline();
if (it.Empty(RIL_WORD))
if (it.Empty(RIL_WORD)) {
return;
}
do {
it.IterateAndAppendUTF8TextlineText(text);
} while (it.it_->block() != nullptr && !it.IsAtBeginningOf(RIL_PARA));
@ -732,8 +780,9 @@ bool ResultIterator::BidiDebug(int min_level) const {
int debug_level = 1;
auto *p = ParamUtils::FindParam<IntParam>("bidi_debug", GlobalParams()->int_params,
tesseract_->params()->int_params);
if (p != nullptr)
if (p != nullptr) {
debug_level = (int32_t)(*p);
}
return debug_level >= min_level;
}

View File

@ -23,15 +23,17 @@ namespace tesseract {
static int LeadingUnicharsToChopped(WERD_RES *word, int num_unichars) {
int num_chopped = 0;
for (int i = 0; i < num_unichars; i++)
for (int i = 0; i < num_unichars; i++) {
num_chopped += word->best_state[i];
}
return num_chopped;
}
static int TrailingUnicharsToChopped(WERD_RES *word, int num_unichars) {
int num_chopped = 0;
for (int i = 0; i < num_unichars; i++)
for (int i = 0; i < num_unichars; i++) {
num_chopped += word->best_state[word->best_state.size() - 1 - i];
}
return num_chopped;
}
@ -46,14 +48,18 @@ static void YOutlierPieces(WERD_RES *word, int rebuilt_blob_index, int super_y_b
ScriptPos *trailing_pos, int *num_trailing_outliers) {
ScriptPos sp_unused1, sp_unused2;
int unused1, unused2;
if (!leading_pos)
if (!leading_pos) {
leading_pos = &sp_unused1;
if (!num_leading_outliers)
}
if (!num_leading_outliers) {
num_leading_outliers = &unused1;
if (!trailing_pos)
}
if (!trailing_pos) {
trailing_pos = &sp_unused2;
if (!num_trailing_outliers)
}
if (!num_trailing_outliers) {
num_trailing_outliers = &unused2;
}
*num_leading_outliers = *num_trailing_outliers = 0;
*leading_pos = *trailing_pos = SP_NORMAL;
@ -133,8 +139,9 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) {
ScriptPos rpos;
YOutlierPieces(word, last_word_char, super_y_bottom, sub_y_top, nullptr, nullptr, &rpos,
&num_remainder_trailing);
if (num_trailing > 0 && rpos != sp_trailing)
if (num_trailing > 0 && rpos != sp_trailing) {
num_remainder_trailing = 0;
}
if (num_remainder_trailing > 0 && last_char_certainty < trailing_certainty) {
trailing_certainty = last_char_certainty;
}
@ -147,8 +154,9 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) {
ScriptPos lpos;
YOutlierPieces(word, num_leading, super_y_bottom, sub_y_top, &lpos, &num_remainder_leading,
nullptr, nullptr);
if (num_leading > 0 && lpos != sp_leading)
if (num_leading > 0 && lpos != sp_leading) {
num_remainder_leading = 0;
}
if (num_remainder_leading > 0 && first_char_certainty < leading_certainty) {
leading_certainty = first_char_certainty;
}
@ -177,10 +185,12 @@ bool Tesseract::SubAndSuperscriptFix(WERD_RES *word) {
if (superscript_debug >= 2) {
tprintf(" Certainties -- Average: %.2f Unlikely thresh: %.2f ", avg_certainty,
unlikely_threshold);
if (num_leading)
if (num_leading) {
tprintf("Orig. leading (min): %.2f ", leading_certainty);
if (num_trailing)
}
if (num_trailing) {
tprintf("Orig. trailing (min): %.2f ", trailing_certainty);
}
tprintf("\n");
}
@ -569,10 +579,12 @@ bool Tesseract::BelievableSuperscript(bool debug, const WERD_RES &word, float ce
tprintf(" Accept: worst revised certainty is %.2f\n", worst_certainty);
}
if (!all_ok) {
if (left_ok)
if (left_ok) {
*left_ok = initial_ok_run_count;
if (right_ok)
}
if (right_ok) {
*right_ok = ok_run_count;
}
}
return all_ok;
}

View File

@ -39,13 +39,15 @@ void Tesseract::tess_segment_pass_n(int pass_n, WERD_RES *word) {
wordrec_enable_assoc.set_value(false);
chop_enable.set_value(false);
}
if (pass_n == 1)
if (pass_n == 1) {
set_pass1();
else
} else {
set_pass2();
}
recog_word(word);
if (word->best_choice == nullptr)
if (word->best_choice == nullptr) {
word->SetupFake(*word->uch_set);
}
if (word->word->flag(W_DONT_CHOP)) {
wordrec_enable_assoc.set_value(saved_enable_assoc);
chop_enable.set_value(saved_chop_enable);

View File

@ -151,8 +151,9 @@ bool Tesseract::init_tesseract_lang_data(const std::string &arg0, const std::str
#ifndef DISABLED_LEGACY_ENGINE
// Determine which ocr engine(s) should be loaded and used for recognition.
if (oem != OEM_DEFAULT)
if (oem != OEM_DEFAULT) {
tessedit_ocr_engine_mode.set_value(oem);
}
#endif
// If we are only loading the config file (and so not planning on doing any
@ -231,8 +232,9 @@ bool Tesseract::init_tesseract_lang_data(const std::string &arg0, const std::str
// Helper returns true if the given string is in the vector of strings.
static bool IsStrInList(const std::string &str, const std::vector<std::string> &str_list) {
for (const auto &i : str_list) {
if (i == str)
if (i == str) {
return true;
}
}
return false;
}
@ -248,8 +250,9 @@ void Tesseract::ParseLanguageString(const std::string &lang_str, std::vector<std
while (!remains.empty()) {
// Find the start of the lang code and which vector to add to.
const char *start = remains.c_str();
while (*start == '+')
while (*start == '+') {
++start;
}
std::vector<std::string> *target = to_load;
if (*start == '~') {
target = not_to_load;
@ -258,8 +261,9 @@ void Tesseract::ParseLanguageString(const std::string &lang_str, std::vector<std
// Find the index of the end of the lang code in string start.
int end = strlen(start);
const char *plus = strchr(start, '+');
if (plus != nullptr && plus - start < end)
if (plus != nullptr && plus - start < end) {
end = plus - start;
}
std::string lang_code(start);
lang_code.resize(end);
std::string next(start + end);
@ -438,8 +442,9 @@ void Tesseract::SetupUniversalFontIds() {
int Tesseract::init_tesseract_lm(const std::string &arg0, const std::string &textbase,
const std::string &language, TessdataManager *mgr) {
if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY, nullptr, 0, nullptr,
nullptr, false, mgr))
nullptr, false, mgr)) {
return -1;
}
getDict().SetupForLoad(Dict::GlobalDawgCache());
getDict().Load(lang, mgr);
getDict().FinishLoad();

View File

@ -450,8 +450,9 @@ void Tesseract::Clear() {
reskew_ = FCOORD(1.0f, 0.0f);
splitter_.Clear();
scaled_factor_ = -1;
for (auto &sub_lang : sub_langs_)
for (auto &sub_lang : sub_langs_) {
sub_lang->Clear();
}
}
#ifndef DISABLED_LEGACY_ENGINE
@ -514,8 +515,9 @@ void Tesseract::PrepareForPageseg() {
for (auto &sub_lang : sub_langs_) {
auto pageseg_strategy = static_cast<ShiroRekhaSplitter::SplitStrategy>(
static_cast<int32_t>(sub_lang->pageseg_devanagari_split_strategy));
if (pageseg_strategy > max_pageseg_strategy)
if (pageseg_strategy > max_pageseg_strategy) {
max_pageseg_strategy = pageseg_strategy;
}
pixDestroy(&sub_lang->pix_binary_);
sub_lang->pix_binary_ = pixClone(pix_binary());
}
@ -542,8 +544,9 @@ void Tesseract::PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, O
for (auto &sub_lang : sub_langs_) {
auto ocr_strategy = static_cast<ShiroRekhaSplitter::SplitStrategy>(
static_cast<int32_t>(sub_lang->ocr_devanagari_split_strategy));
if (ocr_strategy > max_ocr_strategy)
if (ocr_strategy > max_ocr_strategy) {
max_ocr_strategy = ocr_strategy;
}
}
// Utilize the segmentation information available.
splitter_.set_segmentation_block_list(block_list);

View File

@ -284,18 +284,21 @@ public:
}
// Returns true if any language uses Tesseract (as opposed to LSTM).
bool AnyTessLang() const {
if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY)
if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY) {
return true;
}
for (auto &lang : sub_langs_) {
if (lang->tessedit_ocr_engine_mode != OEM_LSTM_ONLY)
if (lang->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) {
return true;
}
}
return false;
}
// Returns true if any language uses the LSTM.
bool AnyLSTMLang() const {
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY)
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) {
return true;
}
for (auto &lang : sub_langs_) {
if (lang->tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) {
return true;

View File

@ -38,8 +38,9 @@ void Tesseract::recog_word(WERD_RES *word) {
if (wordrec_skip_no_truth_words &&
(word->blamer_bundle == nullptr ||
word->blamer_bundle->incorrect_result_reason() == IRR_NO_TRUTH)) {
if (classify_debug_level)
if (classify_debug_level) {
tprintf("No truth for word - skipping\n");
}
word->tess_failed = true;
return;
}
@ -266,12 +267,14 @@ void Tesseract::join_words(WERD_RES *word, WERD_RES *word2, BlamerBundle *orig_b
// finished with them.
int bc2_index = 1;
for (bc2_it.forward(); !bc2_it.at_first(); bc2_it.forward(), ++bc2_index) {
if (total_joined_choices >= kTooManyAltChoices && bc2_index > kAltsPerPiece)
if (total_joined_choices >= kTooManyAltChoices && bc2_index > kAltsPerPiece) {
break;
}
int bc1_index = 0;
for (bc1_it.move_to_first(); bc1_index < num_word1_choices; ++bc1_index, bc1_it.forward()) {
if (total_joined_choices >= kTooManyAltChoices && bc1_index > kAltsPerPiece)
if (total_joined_choices >= kTooManyAltChoices && bc1_index > kAltsPerPiece) {
break;
}
auto *wc = new WERD_CHOICE(*bc1_it.data());
*wc += *bc2_it.data();
jc_it.add_after_then_move(wc);

View File

@ -69,8 +69,9 @@ bool ImageThresholder::IsEmpty() const {
void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line) {
int bpp = bytes_per_pixel * 8;
if (bpp == 0)
if (bpp == 0) {
bpp = 1;
}
Pix *pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
l_uint32 *data = pixGetData(pix);
int wpl = pixGetWpl(pix);
@ -78,10 +79,11 @@ void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int h
case 1:
for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
for (int x = 0; x < width; ++x) {
if (imagedata[x / 8] & (0x80 >> (x % 8)))
if (imagedata[x / 8] & (0x80 >> (x % 8))) {
CLEAR_DATA_BIT(data, x);
else
} else {
SET_DATA_BIT(data, x);
}
}
}
break;
@ -89,8 +91,9 @@ void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int h
case 8:
// Greyscale just copies the bytes in the right order.
for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
for (int x = 0; x < width; ++x)
for (int x = 0; x < width; ++x) {
SET_DATA_BYTE(data, x, imagedata[x]);
}
}
break;
@ -151,8 +154,9 @@ void ImageThresholder::GetImageSizes(int *left, int *top, int *width, int *heigh
// immediately after, but may not go away until after the Thresholder has
// finished with it.
void ImageThresholder::SetImage(const Pix *pix) {
if (pix_ != nullptr)
if (pix_ != nullptr) {
pixDestroy(&pix_);
}
Pix *src = const_cast<Pix *>(pix);
int depth;
pixGetDimensions(src, &image_width_, &image_height_, &depth);
@ -210,8 +214,9 @@ bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix **pix) {
// the binary image in ThresholdToPix, but this is not a hard constraint.
// Returns nullptr if the input is binary. PixDestroy after use.
Pix *ImageThresholder::GetPixRectThresholds() {
if (IsBinary())
if (IsBinary()) {
return nullptr;
}
Pix *pix_grey = GetPixRectGrey();
int width = pixGetWidth(pix_grey);
int height = pixGetHeight(pix_grey);
@ -319,10 +324,11 @@ void ImageThresholder::ThresholdRectToPix(Pix *src_pix, int num_channels, const
break;
}
}
if (white_result)
if (white_result) {
CLEAR_DATA_BIT(pixline, x);
else
} else {
SET_DATA_BIT(pixline, x);
}
}
}
}

View File

@ -54,8 +54,9 @@ PAGE_RES_IT *make_pseudo_word(PAGE_RES *page_res, const TBOX &selection_box) {
WERD *pseudo_word = new WERD(&new_blobs, 1, nullptr);
word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word);
auto *it = new PAGE_RES_IT(page_res);
while (it->word() != word_res && it->word() != nullptr)
while (it->word() != word_res && it->word() != nullptr) {
it->forward();
}
ASSERT_HOST(it->word() == word_res);
return it;
}

View File

@ -76,8 +76,9 @@ void BlamerBundle::SetWordTruth(const UNICHARSET &unicharset, const char *truth_
std::string uch(truth_str + total_length);
uch.resize(lengths[i] - total_length);
UNICHAR_ID id = encoding[i];
if (id != INVALID_UNICHAR_ID)
if (id != INVALID_UNICHAR_ID) {
uch = unicharset.get_normed_unichar(id);
}
truth_text_.push_back(uch);
}
}
@ -90,16 +91,18 @@ void BlamerBundle::SetSymbolTruth(const UNICHARSET &unicharset, const char *char
UNICHAR_ID id = unicharset.unichar_to_id(char_str);
if (id != INVALID_UNICHAR_ID) {
std::string normed_uch(unicharset.get_normed_unichar(id));
if (normed_uch.length() > 0)
if (normed_uch.length() > 0) {
symbol_str = normed_uch;
}
}
int length = truth_word_.length();
truth_text_.push_back(symbol_str);
truth_word_.InsertBox(length, char_box);
if (length == 0)
if (length == 0) {
truth_has_char_boxes_ = true;
else if (truth_word_.BlobBox(length - 1) == char_box)
} else if (truth_word_.BlobBox(length - 1) == char_box) {
truth_has_char_boxes_ = false;
}
}
// Marks that there is something wrong with the truth text, like it contains
@ -111,8 +114,9 @@ void BlamerBundle::SetRejectedTruth() {
// Returns true if the provided word_choice is correct.
bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE *word_choice) const {
if (word_choice == nullptr)
if (word_choice == nullptr) {
return false;
}
const UNICHARSET *uni_set = word_choice->unicharset();
std::string normed_choice_str;
for (int i = 0; i < word_choice->length(); ++i) {
@ -127,8 +131,9 @@ void BlamerBundle::FillDebugString(const std::string &msg, const WERD_CHOICE *ch
for (auto &text : this->truth_text_) {
debug += text;
}
if (!this->truth_has_char_boxes_)
if (!this->truth_has_char_boxes_) {
debug += " (no char boxes)";
}
if (choice != nullptr) {
debug += " Choice ";
std::string choice_str;
@ -200,8 +205,9 @@ void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, Blam
bundle2->norm_box_tolerance_ = norm_box_tolerance_;
BlamerBundle *curr_bb = bundle1;
for (b = 0; b < norm_truth_word_.length(); ++b) {
if (b == begin2_truth_index)
if (b == begin2_truth_index) {
curr_bb = bundle2;
}
curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
curr_bb->truth_text_.push_back(truth_text_[b]);
@ -222,8 +228,9 @@ void BlamerBundle::JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &b
bool debug) {
std::string debug_str;
IncorrectResultReason irr = incorrect_result_reason_;
if (irr != IRR_NO_TRUTH_SPLIT)
if (irr != IRR_NO_TRUTH_SPLIT) {
debug_str = "";
}
if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
@ -253,8 +260,9 @@ void BlamerBundle::JoinBlames(const BlamerBundle &bundle1, const BlamerBundle &b
// blames character classifier for incorrect answer.
void BlamerBundle::BlameClassifier(const UNICHARSET &unicharset, const TBOX &blob_box,
const BLOB_CHOICE_LIST &choices, bool debug) {
if (!truth_has_char_boxes_ || incorrect_result_reason_ != IRR_CORRECT)
if (!truth_has_char_boxes_ || incorrect_result_reason_ != IRR_CORRECT) {
return; // Nothing to do here.
}
for (int b = 0; b < norm_truth_word_.length(); ++b) {
const TBOX &truth_box = norm_truth_word_.BlobBox(b);
@ -394,23 +402,26 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
#ifndef DISABLED_LEGACY_ENGINE
params_training_bundle_.StartHypothesisList();
#endif // ndef DISABLED_LEGACY_ENGINE
if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_)
if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_) {
return; // Nothing to do here.
}
std::string debug_str = "Blamer computing correct_segmentation_cols\n";
int curr_box_col = 0;
int next_box_col = 0;
int num_blobs = word->NumBlobs();
if (num_blobs == 0)
if (num_blobs == 0) {
return; // No blobs to play with.
}
int blob_index = 0;
int16_t next_box_x = word->blobs[blob_index]->bounding_box().right();
for (int truth_idx = 0; blob_index < num_blobs && truth_idx < norm_truth_word_.length();
++blob_index) {
++next_box_col;
int16_t curr_box_x = next_box_x;
if (blob_index + 1 < num_blobs)
if (blob_index + 1 < num_blobs) {
next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
}
int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
debug_str += "Box x coord vs. truth: " + std::to_string(curr_box_x);
debug_str += " " + std::to_string(truth_x);
@ -435,8 +446,9 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
"Blamer failed to find correct segmentation"
" (tolerance=" +
std::to_string(norm_box_tolerance_);
if (blob_index >= num_blobs)
if (blob_index >= num_blobs) {
debug_str += " blob == nullptr";
}
debug_str += ")\n";
debug_str += " path length " + std::to_string(correct_segmentation_cols_.size());
debug_str += " vs. truth " + std::to_string(norm_truth_word_.length());

View File

@ -144,8 +144,9 @@ struct BlamerBundle {
return misadaption_debug_;
}
void UpdateBestRating(float rating) {
if (rating < best_correctly_segmented_rating_)
if (rating < best_correctly_segmented_rating_) {
best_correctly_segmented_rating_ = rating;
}
}
int correct_segmentation_length() const {
return correct_segmentation_cols_.size();
@ -197,8 +198,9 @@ struct BlamerBundle {
void ClearResults() {
norm_truth_word_.DeleteAllBoxes();
norm_box_tolerance_ = 0;
if (!NoTruth())
if (!NoTruth()) {
incorrect_result_reason_ = IRR_CORRECT;
}
debug_ = "";
segsearch_is_looking_for_blame_ = false;
best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
@ -296,8 +298,9 @@ private:
debug_ = IncorrectReason();
debug_ += " to blame: ";
FillDebugString(msg, choice, debug_);
if (debug)
if (debug) {
tprintf("SetBlame(): %s", debug_.c_str());
}
}
private:

View File

@ -155,13 +155,14 @@ void BLOBNBOX::chop( // chop blobs
} while (blob != end_it->data());
if (ymin < ymax) {
leftx = static_cast<int16_t>(floor(rightx - blobwidth));
if (leftx < box.left())
if (leftx < box.left()) {
leftx = box.left(); // clip to real box
}
bl = ICOORD(leftx, static_cast<int16_t>(floor(ymin)));
tr = ICOORD(static_cast<int16_t>(ceil(rightx)), static_cast<int16_t>(ceil(ymax)));
if (blobindex == 0)
if (blobindex == 0) {
box = TBOX(bl, tr); // change box
else {
} else {
newblob = new BLOBNBOX;
// box is all it has
newblob->box = TBOX(bl, tr);
@ -202,12 +203,14 @@ void BLOBNBOX::MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max)
NeighbourGaps(gaps);
*h_min = std::min(gaps[BND_LEFT], gaps[BND_RIGHT]);
*h_max = std::max(gaps[BND_LEFT], gaps[BND_RIGHT]);
if (*h_max > max_dimension && *h_min < max_dimension)
if (*h_max > max_dimension && *h_min < max_dimension) {
*h_max = *h_min;
}
*v_min = std::min(gaps[BND_ABOVE], gaps[BND_BELOW]);
*v_max = std::max(gaps[BND_ABOVE], gaps[BND_BELOW]);
if (*v_max > max_dimension && *v_min < max_dimension)
if (*v_max > max_dimension && *v_min < max_dimension) {
*v_max = *v_min;
}
}
// Nulls out any neighbours that are DeletableNoise to remove references.
@ -227,8 +230,9 @@ int BLOBNBOX::GoodTextBlob() const {
int score = 0;
for (int dir = 0; dir < BND_COUNT; ++dir) {
auto bnd = static_cast<BlobNeighbourDir>(dir);
if (good_stroke_neighbour(bnd))
if (good_stroke_neighbour(bnd)) {
++score;
}
}
return score;
}
@ -239,8 +243,9 @@ int BLOBNBOX::NoisyNeighbours() const {
for (int dir = 0; dir < BND_COUNT; ++dir) {
auto bnd = static_cast<BlobNeighbourDir>(dir);
BLOBNBOX *blob = neighbour(bnd);
if (blob != nullptr && blob->region_type() == BRT_NOISE)
if (blob != nullptr && blob->region_type() == BRT_NOISE) {
++count;
}
}
return count;
}
@ -250,8 +255,9 @@ int BLOBNBOX::NoisyNeighbours() const {
// eg if it has a high aspect ratio, yet has a complex shape, such as a
// joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc.
bool BLOBNBOX::DefiniteIndividualFlow() {
if (cblob() == nullptr)
if (cblob() == nullptr) {
return false;
}
int box_perimeter = 2 * (box.height() + box.width());
if (box.width() > box.height() * kDefiniteAspectRatio) {
// Attempt to distinguish a wide joined word from a dash.
@ -260,10 +266,11 @@ bool BLOBNBOX::DefiniteIndividualFlow() {
// so perimeter - 2*(box width + stroke width) should be close to zero.
// A complex shape such as a joined word should have a much larger value.
int perimeter = cblob()->perimeter();
if (vert_stroke_width() > 0 || perimeter <= 0)
if (vert_stroke_width() > 0 || perimeter <= 0) {
perimeter -= 2 * vert_stroke_width();
else
} else {
perimeter -= 4 * cblob()->area() / perimeter;
}
perimeter -= 2 * box.width();
// Use a multiple of the box perimeter as a threshold.
if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
@ -275,10 +282,11 @@ bool BLOBNBOX::DefiniteIndividualFlow() {
if (box.height() > box.width() * kDefiniteAspectRatio) {
// As above, but for a putative vertical word vs a I/1/l.
int perimeter = cblob()->perimeter();
if (horz_stroke_width() > 0 || perimeter <= 0)
if (horz_stroke_width() > 0 || perimeter <= 0) {
perimeter -= 2 * horz_stroke_width();
else
} else {
perimeter -= 4 * cblob()->area() / perimeter;
}
perimeter -= 2 * box.height();
if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
set_vert_possible(true);
@ -291,14 +299,18 @@ bool BLOBNBOX::DefiniteIndividualFlow() {
// Returns true if there is no tabstop violation in merging this and other.
bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX &other) const {
if (box.left() < other.box.left() && box.left() < other.left_rule_)
if (box.left() < other.box.left() && box.left() < other.left_rule_) {
return false;
if (other.box.left() < box.left() && other.box.left() < left_rule_)
}
if (other.box.left() < box.left() && other.box.left() < left_rule_) {
return false;
if (box.right() > other.box.right() && box.right() > other.right_rule_)
}
if (box.right() > other.box.right() && box.right() > other.right_rule_) {
return false;
if (other.box.right() > box.right() && other.box.right() > right_rule_)
}
if (other.box.right() > box.right() && other.box.right() > right_rule_) {
return false;
}
return true;
}
@ -350,8 +362,9 @@ TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) {
// outline.
void BLOBNBOX::EstimateBaselinePosition() {
baseline_y_ = box.bottom(); // The default.
if (cblob_ptr == nullptr)
if (cblob_ptr == nullptr) {
return;
}
baseline_y_ = cblob_ptr->EstimateBaselinePosition();
}
@ -423,8 +436,9 @@ void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST *list, ScrollView::Color body_colour
BLOBNBOX_IT it(list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX *blob = it.data();
if (blob->DeletableNoise())
if (blob->DeletableNoise()) {
blob->plot(win, body_colour, child_colour);
}
}
}
@ -441,22 +455,29 @@ ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type, BlobTextFl
case BRT_UNKNOWN:
return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE;
case BRT_VERT_TEXT:
if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE)
if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE) {
return ScrollView::GREEN;
if (flow_type == BTFT_CHAIN)
}
if (flow_type == BTFT_CHAIN) {
return ScrollView::LIME_GREEN;
}
return ScrollView::YELLOW;
case BRT_TEXT:
if (flow_type == BTFT_STRONG_CHAIN)
if (flow_type == BTFT_STRONG_CHAIN) {
return ScrollView::BLUE;
if (flow_type == BTFT_TEXT_ON_IMAGE)
}
if (flow_type == BTFT_TEXT_ON_IMAGE) {
return ScrollView::LIGHT_BLUE;
if (flow_type == BTFT_CHAIN)
}
if (flow_type == BTFT_CHAIN) {
return ScrollView::MEDIUM_BLUE;
if (flow_type == BTFT_LEADER)
}
if (flow_type == BTFT_LEADER) {
return ScrollView::WHEAT;
if (flow_type == BTFT_NONTEXT)
}
if (flow_type == BTFT_NONTEXT) {
return ScrollView::PINK;
}
return ScrollView::MAGENTA;
default:
return ScrollView::GREY;
@ -471,8 +492,9 @@ ScrollView::Color BLOBNBOX::BoxColor() const {
void BLOBNBOX::plot(ScrollView *window, // window to draw in
ScrollView::Color blob_colour, // for outer bits
ScrollView::Color child_colour) { // for holes
if (cblob_ptr != nullptr)
if (cblob_ptr != nullptr) {
cblob_ptr->plot(window, blob_colour, child_colour);
}
}
#endif
/**********************************************************************
@ -627,9 +649,10 @@ TBOX box_next( // get bounding box
do {
it->forward();
blob = it->data();
if (blob->cblob() == nullptr)
if (blob->cblob() == nullptr) {
// was pre-chopped
result += blob->bounding_box();
}
}
// until next real blob
while ((blob->cblob() == nullptr) || blob->joined_to_prev());
@ -725,17 +748,21 @@ void TO_ROW::add_blob( // constructor
allowed = row_size + y_min - y_max;
if (allowed > 0) {
available = top > y_max ? top - y_max : 0;
if (bottom < y_min)
if (bottom < y_min) {
// total available
available += y_min - bottom;
}
if (available > 0) {
available += available; // do it gradually
if (available < allowed)
if (available < allowed) {
available = allowed;
if (bottom < y_min)
}
if (bottom < y_min) {
y_min -= (y_min - bottom) * allowed / available;
if (top > y_max)
}
if (top > y_max) {
y_max += (top - y_max) * allowed / available;
}
}
}
}
@ -751,16 +778,18 @@ void TO_ROW::insert_blob( // constructor
) {
BLOBNBOX_IT it = &blobs; // list of blobs
if (it.empty())
if (it.empty()) {
it.add_before_then_move(blob);
else {
} else {
it.mark_cycle_pt();
while (!it.cycled_list() && it.data()->bounding_box().left() <= blob->bounding_box().left())
while (!it.cycled_list() && it.data()->bounding_box().left() <= blob->bounding_box().left()) {
it.forward();
if (it.cycled_list())
}
if (it.cycled_list()) {
it.add_to_end(blob);
else
} else {
it.add_before_stay_put(blob);
}
}
}
@ -776,19 +805,22 @@ void TO_ROW::compute_vertical_projection() { // project whole row
TBOX blob_box; // bounding box
BLOBNBOX_IT blob_it = blob_list();
if (blob_it.empty())
if (blob_it.empty()) {
return;
}
row_box = blob_it.data()->bounding_box();
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward())
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
row_box += blob_it.data()->bounding_box();
}
projection.set_range(row_box.left() - PROJECTION_MARGIN, row_box.right() + PROJECTION_MARGIN);
projection_left = row_box.left() - PROJECTION_MARGIN;
projection_right = row_box.right() + PROJECTION_MARGIN;
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
blob = blob_it.data();
if (blob->cblob() != nullptr)
if (blob->cblob() != nullptr) {
vertical_cblob_projection(blob->cblob(), &projection);
}
}
}
@ -959,14 +991,15 @@ static void SizeFilterBlobs(int min_height, int max_height, BLOBNBOX_LIST *src_l
blob->ReInit();
int width = blob->bounding_box().width();
int height = blob->bounding_box().height();
if (height < min_height && (width < min_height || width > max_height))
if (height < min_height && (width < min_height || width > max_height)) {
noise_it.add_after_then_move(blob);
else if (height > max_height)
} else if (height > max_height) {
large_it.add_after_then_move(blob);
else if (height < min_height)
} else if (height < min_height) {
small_it.add_after_then_move(blob);
else
} else {
medium_it.add_after_then_move(blob);
}
}
}

View File

@ -124,10 +124,12 @@ enum BlobTextFlowType {
// this cannot be true if t1 == t2, so the result is undefined.
inline bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2) {
// LEADER always loses.
if (type1 == BTFT_LEADER)
if (type1 == BTFT_LEADER) {
return false;
if (type2 == BTFT_LEADER)
}
if (type2 == BTFT_LEADER) {
return true;
}
// With those out of the way, the ordering of the enum determines the result.
return type1 >= type2;
}
@ -148,8 +150,9 @@ public:
area = static_cast<int>(srcblob->area());
}
~BLOBNBOX() {
if (owns_cblob_)
if (owns_cblob_) {
delete cblob_ptr;
}
}
static BLOBNBOX *RealBlob(C_OUTLINE *outline) {
auto *blob = new C_BLOB(outline);
@ -470,8 +473,9 @@ public:
right_rule_ = 0;
left_crossing_rule_ = 0;
right_crossing_rule_ = 0;
if (area_stroke_width_ == 0.0f && area > 0 && cblob() != nullptr && cblob()->perimeter() != 0)
if (area_stroke_width_ == 0.0f && area > 0 && cblob() != nullptr && cblob()->perimeter() != 0) {
area_stroke_width_ = 2.0f * area / cblob()->perimeter();
}
owner_ = nullptr;
base_char_top_ = box.top();
base_char_bottom_ = box.bottom();

View File

@ -102,8 +102,9 @@ TESSLINE *TESSLINE::BuildFromOutlineList(EDGEPT *outline) {
EDGEPT *pt = outline;
do {
pt->step_count = pt->next->start_step - pt->start_step;
if (pt->step_count < 0)
if (pt->step_count < 0) {
pt->step_count += pt->src_outline->pathlength();
}
pt = pt->next;
} while (pt != outline);
}
@ -140,8 +141,9 @@ void TESSLINE::CopyFrom(const TESSLINE &src) {
// Deletes owned data.
void TESSLINE::Clear() {
if (loop == nullptr)
if (loop == nullptr) {
return;
}
EDGEPT *this_edge = loop;
do {
@ -220,14 +222,18 @@ void TESSLINE::ComputeBoundingBox() {
EDGEPT *this_edge = loop;
do {
if (!this_edge->IsHidden() || !this_edge->prev->IsHidden()) {
if (this_edge->pos.x < minx)
if (this_edge->pos.x < minx) {
minx = this_edge->pos.x;
if (this_edge->pos.y < miny)
}
if (this_edge->pos.y < miny) {
miny = this_edge->pos.y;
if (this_edge->pos.x > maxx)
}
if (this_edge->pos.x > maxx) {
maxx = this_edge->pos.x;
if (this_edge->pos.y > maxy)
}
if (this_edge->pos.y > maxy) {
maxy = this_edge->pos.y;
}
}
this_edge = this_edge->next;
} while (this_edge != loop);
@ -262,19 +268,21 @@ TBOX TESSLINE::bounding_box() const {
#ifndef GRAPHICS_DISABLED
void TESSLINE::plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color) {
if (is_hole)
if (is_hole) {
window->Pen(child_color);
else
} else {
window->Pen(color);
}
window->SetCursor(start.x, start.y);
EDGEPT *pt = loop;
do {
bool prev_hidden = pt->IsHidden();
pt = pt->next;
if (prev_hidden)
if (prev_hidden) {
window->SetCursor(pt->pos.x, pt->pos.y);
else
} else {
window->DrawTo(pt->pos.x, pt->pos.y);
}
} while (pt != loop);
}
#endif // !GRAPHICS_DISABLED
@ -287,10 +295,12 @@ EDGEPT *TESSLINE::FindBestStartPt() const {
// Iterate the polygon.
EDGEPT *pt = loop;
do {
if (pt->IsHidden())
if (pt->IsHidden()) {
continue;
if (pt->prev->IsHidden() || pt->prev->src_outline != pt->src_outline)
}
if (pt->prev->IsHidden() || pt->prev->src_outline != pt->src_outline) {
return pt; // Qualifies as the best.
}
if (pt->start_step < best_step) {
best_step = pt->start_step;
best_start = pt;
@ -368,10 +378,11 @@ void TBLOB::CopyFrom(const TBLOB &src) {
TESSLINE *prev_outline = nullptr;
for (TESSLINE *srcline = src.outlines; srcline != nullptr; srcline = srcline->next) {
auto *new_outline = new TESSLINE(*srcline);
if (outlines == nullptr)
if (outlines == nullptr) {
outlines = new_outline;
else
} else {
prev_outline->next = new_outline;
}
prev_outline = new_outline;
}
denorm_ = src.denorm_;
@ -442,8 +453,9 @@ void TBLOB::ComputeBoundingBoxes() {
// Returns the number of outlines.
int TBLOB::NumOutlines() const {
int result = 0;
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next)
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
++result;
}
return result;
}
@ -454,8 +466,9 @@ int TBLOB::NumOutlines() const {
* bounding box of the union of all top-level outlines in the blob.
**********************************************************************/
TBOX TBLOB::bounding_box() const {
if (outlines == nullptr)
if (outlines == nullptr) {
return TBOX(0, 0, 0, 0);
}
TESSLINE *outline = outlines;
TBOX box = outline->bounding_box();
for (outline = outline->next; outline != nullptr; outline = outline->next) {
@ -496,8 +509,9 @@ void TBLOB::CorrectBlobOrder(TBLOB *next) {
#ifndef GRAPHICS_DISABLED
void TBLOB::plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color) {
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next)
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
outline->plot(window, color, child_color);
}
}
#endif // !GRAPHICS_DISABLED
@ -515,10 +529,12 @@ int TBLOB::ComputeMoments(FCOORD *center, FCOORD *second_moments) const {
// The 2nd moments are just the standard deviation of the point positions.
double x2nd = sqrt(accumulator.x_variance());
double y2nd = sqrt(accumulator.y_variance());
if (x2nd < 1.0)
if (x2nd < 1.0) {
x2nd = 1.0;
if (y2nd < 1.0)
}
if (y2nd < 1.0) {
y2nd = 1.0;
}
second_moments->set_x(x2nd);
second_moments->set_y(y2nd);
return accumulator.count();
@ -548,10 +564,12 @@ void TBLOB::GetEdgeCoords(const TBOX &box, std::vector<std::vector<int>> &x_coor
y_coords.resize(box.width());
CollectEdges(box, nullptr, nullptr, &x_coords, &y_coords);
// Sort the output vectors.
for (auto &coord : x_coords)
for (auto &coord : x_coords) {
std::sort(coord.begin(), coord.end());
for (auto &coord : y_coords)
}
for (auto &coord : y_coords) {
std::sort(coord.begin(), coord.end());
}
}
// Accumulates the segment between pt1 and pt2 in the LLSQ, quantizing over
@ -563,8 +581,9 @@ static void SegmentLLSQ(const FCOORD &pt1, const FCOORD &pt2, LLSQ *accumulator)
int xend = IntCastRounded(std::max(pt1.x(), pt2.x()));
int ystart = IntCastRounded(std::min(pt1.y(), pt2.y()));
int yend = IntCastRounded(std::max(pt1.y(), pt2.y()));
if (xstart == xend && ystart == yend)
if (xstart == xend && ystart == yend) {
return; // Nothing to do.
}
double weight = step.length() / (xend - xstart + yend - ystart);
// Compute and save the y-position at the middle of each x-step.
for (int x = xstart; x < xend; ++x) {
@ -658,8 +677,9 @@ static void CollectEdgesOfRun(const EDGEPT *startpt, const EDGEPT *lastpt, const
// bounds of the outline steps/ due to wrap-around, so we use % step_length
// everywhere, except for start_index.
int end_index = lastpt->start_step + lastpt->step_count;
if (end_index <= start_index)
if (end_index <= start_index) {
end_index += step_length;
}
// pos is the integer coordinates of the binary image steps.
ICOORD pos = outline->position_at_index(start_index);
FCOORD origin(box.left(), box.bottom());
@ -733,11 +753,13 @@ void TBLOB::CollectEdges(const TBOX &box, TBOX *bounding_box, LLSQ *llsq,
// Iterate the polygon.
EDGEPT *loop_pt = ol->FindBestStartPt();
EDGEPT *pt = loop_pt;
if (pt == nullptr)
if (pt == nullptr) {
continue;
}
do {
if (pt->IsHidden())
if (pt->IsHidden()) {
continue;
}
// Find a run of equal src_outline.
EDGEPT *last_pt = pt;
do {
@ -771,8 +793,9 @@ void TWERD::BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inver
float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint,
const TBOX *norm_box, DENORM *word_denorm) {
TBOX word_box = bounding_box();
if (norm_box != nullptr)
if (norm_box != nullptr) {
word_box = *norm_box;
}
float word_middle = (word_box.left() + word_box.right()) / 2.0f;
float input_y_offset = 0.0f;
auto final_y_offset = static_cast<float>(kBlnBaselineOffset);
@ -849,8 +872,9 @@ TBOX TWERD::bounding_box() const {
// Merges the blobs from start to end, not including end, and deletes
// the blobs between start and end.
void TWERD::MergeBlobs(int start, int end) {
if (start >= blobs.size() - 1)
if (start >= blobs.size() - 1) {
return; // Nothing to do.
}
TESSLINE *outline = blobs[start]->outlines;
for (int i = start + 1; i < end && i < blobs.size(); ++i) {
TBLOB *next_blob = blobs[i];
@ -859,8 +883,9 @@ void TWERD::MergeBlobs(int start, int end) {
blobs[start]->outlines = next_blob->outlines;
outline = blobs[start]->outlines;
} else {
while (outline->next != nullptr)
while (outline->next != nullptr) {
outline = outline->next;
}
outline->next = next_blob->outlines;
next_blob->outlines = nullptr;
}
@ -893,21 +918,24 @@ void TWERD::plot(ScrollView *window) {
* call to divide_blobs.
**********************************************************************/
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location) {
if (blob->outlines == nullptr || blob->outlines->next == nullptr)
if (blob->outlines == nullptr || blob->outlines->next == nullptr) {
return false; // Need at least 2 outlines for it to be possible.
}
int max_gap = 0;
TPOINT vertical = italic_blob ? kDivisibleVerticalItalic : kDivisibleVerticalUpright;
for (TESSLINE *outline1 = blob->outlines; outline1 != nullptr; outline1 = outline1->next) {
if (outline1->is_hole)
if (outline1->is_hole) {
continue; // Holes do not count as separable.
}
TPOINT mid_pt1(static_cast<int16_t>((outline1->topleft.x + outline1->botright.x) / 2),
static_cast<int16_t>((outline1->topleft.y + outline1->botright.y) / 2));
int mid_prod1 = mid_pt1.cross(vertical);
int min_prod1, max_prod1;
outline1->MinMaxCrossProduct(vertical, &min_prod1, &max_prod1);
for (TESSLINE *outline2 = outline1->next; outline2 != nullptr; outline2 = outline2->next) {
if (outline2->is_hole)
if (outline2->is_hole) {
continue; // Holes do not count as separable.
}
TPOINT mid_pt2(static_cast<int16_t>((outline2->topleft.x + outline2->botright.x) / 2),
static_cast<int16_t>((outline2->topleft.y + outline2->botright.y) / 2));
int mid_prod2 = mid_pt2.cross(vertical);
@ -951,26 +979,30 @@ void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT
int mid_prod = mid_pt.cross(vertical);
if (mid_prod < location_prod) {
// Outline is in left blob.
if (outline1)
if (outline1) {
outline1->next = outline;
else
} else {
blob->outlines = outline;
}
outline1 = outline;
} else {
// Outline is in right blob.
if (outline2)
if (outline2) {
outline2->next = outline;
else
} else {
other_blob->outlines = outline;
}
outline2 = outline;
}
outline = outline->next;
}
if (outline1)
if (outline1) {
outline1->next = nullptr;
if (outline2)
}
if (outline2) {
outline2->next = nullptr;
}
}
} // namespace tesseract

View File

@ -132,14 +132,18 @@ struct EDGEPT {
const EDGEPT *pt = this;
do {
pt = pt->next;
if (pt->pos.x < box.left())
if (pt->pos.x < box.left()) {
box.set_left(pt->pos.x);
if (pt->pos.x > box.right())
}
if (pt->pos.x > box.right()) {
box.set_right(pt->pos.x);
if (pt->pos.y < box.bottom())
}
if (pt->pos.y < box.bottom()) {
box.set_bottom(pt->pos.y);
if (pt->pos.y > box.top())
}
if (pt->pos.y > box.top()) {
box.set_top(pt->pos.y);
}
} while (pt != end && pt != this);
return box;
}
@ -162,8 +166,9 @@ struct EDGEPT {
int count = 0;
const EDGEPT *pt = this;
do {
if (pt == end)
if (pt == end) {
return true;
}
pt = pt->next;
++count;
} while (pt != this && count <= min_points);
@ -251,8 +256,9 @@ struct TESSLINE {
if (Contains(pt1) && Contains(pt2)) {
EDGEPT *pt = loop;
do {
if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos))
if (TPOINT::IsCrossed(pt1, pt2, pt->pos, pt->next->pos)) {
return true;
}
pt = pt->next;
} while (pt != loop);
}
@ -336,16 +342,18 @@ struct TBLOB {
// Returns true if the given line segment crosses any outline of this blob.
bool SegmentCrossesOutline(const TPOINT &pt1, const TPOINT &pt2) const {
for (const TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
if (outline->SegmentCrosses(pt1, pt2))
if (outline->SegmentCrosses(pt1, pt2)) {
return true;
}
}
return false;
}
// Returns true if the point is contained within any of the outline boxes.
bool Contains(const TPOINT &pt) const {
for (const TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
if (outline->Contains(pt))
if (outline->Contains(pt)) {
return true;
}
}
return false;
}
@ -368,8 +376,9 @@ struct TBLOB {
int BBArea() const {
int total_area = 0;
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next)
for (TESSLINE *outline = outlines; outline != nullptr; outline = outline->next) {
total_area += outline->BBArea();
}
return total_area;
}

View File

@ -77,8 +77,9 @@ bool ReadAllBoxes(int target_page, bool skip_blanks, const char *filename, std::
std::vector<int> *pages) {
std::ifstream input(BoxFileName(filename).c_str(), std::ios::in | std::ios::binary);
std::vector<char> box_data(std::istreambuf_iterator<char>(input), {});
if (box_data.empty())
if (box_data.empty()) {
return false;
}
// Convert the array of bytes to a string, so it can be used by the parser.
box_data.push_back('\0');
return ReadMemBoxes(target_page, skip_blanks, &box_data[0],
@ -91,34 +92,41 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool
std::vector<std::string> *box_texts, std::vector<int> *pages) {
std::string box_str(box_data);
std::vector<std::string> lines = split(box_str, '\n');
if (lines.empty())
if (lines.empty()) {
return false;
}
int num_boxes = 0;
for (auto &line : lines) {
int page = 0;
std::string utf8_str;
TBOX box;
if (!ParseBoxFileStr(line.c_str(), &page, utf8_str, &box)) {
if (continue_on_failure)
if (continue_on_failure) {
continue;
else
} else {
return false;
}
}
if (skip_blanks && (utf8_str == " " || utf8_str == "\t"))
if (skip_blanks && (utf8_str == " " || utf8_str == "\t")) {
continue;
if (target_page >= 0 && page != target_page)
}
if (target_page >= 0 && page != target_page) {
continue;
if (boxes != nullptr)
}
if (boxes != nullptr) {
boxes->push_back(box);
if (texts != nullptr)
}
if (texts != nullptr) {
texts->push_back(utf8_str);
}
if (box_texts != nullptr) {
std::string full_text;
MakeBoxFileStr(utf8_str.c_str(), box, target_page, full_text);
box_texts->push_back(full_text);
}
if (pages != nullptr)
if (pages != nullptr) {
pages->push_back(page);
}
++num_boxes;
}
return num_boxes > 0;
@ -153,21 +161,25 @@ bool ReadNextBox(int target_page, int *line_number, FILE *box_file, std::string
buffptr = buff;
const auto *ubuf = reinterpret_cast<const unsigned char *>(buffptr);
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf)
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) {
buffptr += 3; // Skip unicode file designation.
}
// Check for blank lines in box file
if (*buffptr == '\n' || *buffptr == '\0')
if (*buffptr == '\n' || *buffptr == '\0') {
continue;
}
// Skip blank boxes.
if (*buffptr == ' ' || *buffptr == '\t')
if (*buffptr == ' ' || *buffptr == '\t') {
continue;
}
if (*buffptr != '\0') {
if (!ParseBoxFileStr(buffptr, &page, utf8_str, bounding_box)) {
tprintf("Box file format error on line %i; ignored\n", *line_number);
continue;
}
if (target_page >= 0 && target_page != page)
continue; // Not on the appropriate page.
if (target_page >= 0 && target_page != page) {
continue; // Not on the appropriate page.
}
return true; // Successfully read a box.
}
}
@ -196,19 +208,22 @@ bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf
int uch_len = 0;
// Skip unicode file designation, if present.
const auto *ubuf = reinterpret_cast<const unsigned char *>(buffptr);
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf)
if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf) {
buffptr += 3;
}
// Allow a single blank as the UTF-8 string. Check for empty string and
// then blindly eat the first character.
if (*buffptr == '\0')
if (*buffptr == '\0') {
return false;
}
do {
uch[uch_len++] = *buffptr++;
} while (*buffptr != '\0' && *buffptr != ' ' && *buffptr != '\t' &&
uch_len < kBoxReadBufSize - 1);
uch[uch_len] = '\0';
if (*buffptr != '\0')
if (*buffptr != '\0') {
++buffptr;
}
int x_min = INT_MAX;
int y_min = INT_MAX;
int x_max = INT_MIN;
@ -244,10 +259,12 @@ bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf
used += new_used;
}
utf8_str = uch;
if (x_min > x_max)
if (x_min > x_max) {
std::swap(x_min, x_max);
if (y_min > y_max)
}
if (y_min > y_max) {
std::swap(y_min, y_max);
}
bounding_box->set_to_given_coords(x_min, y_min, x_max, y_max);
return true; // Successfully read a box.
}

View File

@ -46,8 +46,9 @@ void BoxWord::CopyFrom(const BoxWord &src) {
length_ = src.length_;
boxes_.clear();
boxes_.reserve(length_);
for (int i = 0; i < length_; ++i)
for (int i = 0; i < length_; ++i) {
boxes_.push_back(src.boxes_[i]);
}
}
// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
@ -97,25 +98,31 @@ void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) {
C_BLOB_IT b_it(original_word->cblob_list());
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
TBOX blob_box = b_it.data()->bounding_box();
if (block != nullptr)
if (block != nullptr) {
blob_box.rotate(block->re_rotation());
}
if (blob_box.major_overlap(box)) {
original_box += blob_box;
}
}
if (!original_box.null_box()) {
if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance))
if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance)) {
box.set_left(original_box.left());
if (NearlyEqual<int>(original_box.right(), box.right(), kBoxClipTolerance))
}
if (NearlyEqual<int>(original_box.right(), box.right(), kBoxClipTolerance)) {
box.set_right(original_box.right());
if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance))
}
if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance)) {
box.set_top(original_box.top());
if (NearlyEqual<int>(original_box.bottom(), box.bottom(), kBoxClipTolerance))
}
if (NearlyEqual<int>(original_box.bottom(), box.bottom(), kBoxClipTolerance)) {
box.set_bottom(original_box.bottom());
}
}
original_box = original_word->bounding_box();
if (block != nullptr)
if (block != nullptr) {
original_box.rotate(block->re_rotation());
}
boxes_[i] = box.intersection(original_box);
}
ComputeBoundingBox();
@ -126,25 +133,28 @@ void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) {
void BoxWord::MergeBoxes(int start, int end) {
start = ClipToRange(start, 0, length_);
end = ClipToRange(end, 0, length_);
if (end <= start + 1)
if (end <= start + 1) {
return;
}
for (int i = start + 1; i < end; ++i) {
boxes_[start] += boxes_[i];
}
int shrinkage = end - 1 - start;
length_ -= shrinkage;
for (int i = start + 1; i < length_; ++i)
for (int i = start + 1; i < length_; ++i) {
boxes_[i] = boxes_[i + shrinkage];
}
boxes_.resize(length_);
}
// Inserts a new box before the given index.
// Recomputes the bounding box.
void BoxWord::InsertBox(int index, const TBOX &box) {
if (index < length_)
if (index < length_) {
boxes_.insert(boxes_.begin() + index, box);
else
} else {
boxes_.push_back(box);
}
length_ = boxes_.size();
ComputeBoundingBox();
}
@ -175,8 +185,9 @@ void BoxWord::DeleteAllBoxes() {
// Computes the bounding box of the word.
void BoxWord::ComputeBoundingBox() {
bbox_ = TBOX();
for (int i = 0; i < length_; ++i)
for (int i = 0; i < length_; ++i) {
bbox_ += boxes_[i];
}
}
// This and other putatively are the same, so call the (permanent) callback
@ -185,8 +196,9 @@ void BoxWord::ComputeBoundingBox() {
void BoxWord::ProcessMatchedBlobs(const TWERD &other, std::function<void(int)> cb) const {
for (int i = 0; i < length_ && i < other.NumBlobs(); ++i) {
TBOX blob_box = other.blobs[i]->bounding_box();
if (blob_box == boxes_[i])
if (blob_box == boxes_[i]) {
cb(i);
}
}
}

View File

@ -115,8 +115,9 @@ C_OUTLINE::C_OUTLINE(
if ((dirdiff == 64 || dirdiff == -64) && stepindex > 0) {
stepindex -= 2; // cancel there-and-back
prevdir = stepindex >= 0 ? step_dir(stepindex) : lastdir;
} else
} else {
prevdir = dir;
}
}
ASSERT_HOST(pos.x() == startpt.x() && pos.y() == startpt.y());
do {
@ -124,8 +125,9 @@ C_OUTLINE::C_OUTLINE(
if (dirdiff == 64 || dirdiff == -64) {
start += step(0);
stepindex -= 2; // cancel there-and-back
for (int i = 0; i < stepindex; ++i)
for (int i = 0; i < stepindex; ++i) {
set_step(i, step_dir(i + 1));
}
}
} while (stepindex > 1 && (dirdiff == 64 || dirdiff == -64));
stepcount = stepindex;
@ -221,11 +223,13 @@ C_OUTLINE::C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation) : offsets(nullptr) {
}
start += step(0);
destindex -= 2;
for (int i = 0; i < destindex; ++i)
for (int i = 0; i < destindex; ++i) {
set_step(i, step_dir(i + 1));
}
}
if (destindex >= 4)
if (destindex >= 4) {
break;
}
}
ASSERT_HOST(destindex <= stepcount);
stepcount = destindex;
@ -269,14 +273,16 @@ int32_t C_OUTLINE::area() const {
for (stepindex = 0; stepindex < total_steps; stepindex++) {
// all intersected
next_step = step(stepindex);
if (next_step.x() < 0)
if (next_step.x() < 0) {
total += pos.y();
else if (next_step.x() > 0)
} else if (next_step.x() > 0) {
total -= pos.y();
}
pos += next_step;
}
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
total += it.data()->area(); // add areas of children
}
return total;
}
@ -294,8 +300,9 @@ int32_t C_OUTLINE::perimeter() const {
C_OUTLINE_IT it(const_cast<C_OUTLINE_LIST *>(&children));
total_steps = pathlength();
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
total_steps += it.data()->pathlength(); // Add perimeters of children.
}
return total_steps;
}
@ -315,16 +322,18 @@ int32_t C_OUTLINE::outer_area() const {
pos = start_pos();
total_steps = pathlength();
if (total_steps == 0)
if (total_steps == 0) {
return box.area();
}
total = 0;
for (stepindex = 0; stepindex < total_steps; stepindex++) {
// all intersected
next_step = step(stepindex);
if (next_step.x() < 0)
if (next_step.x() < 0) {
total += pos.y();
else if (next_step.x() > 0)
} else if (next_step.x() > 0) {
total -= pos.y();
}
pos += next_step;
}
@ -372,8 +381,9 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) {
next_step = step(stepindex);
pos += next_step;
if (next_step.x() < 0) {
if (looking_for_max_x && pos.x() < min_x)
if (looking_for_max_x && pos.x() < min_x) {
min_x = pos.x();
}
if (looking_for_min_x && max_x - pos.x() > threshold) {
if (looking_for_max_x) {
initial_x = max_x;
@ -385,8 +395,9 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) {
min_x = pos.x(); // reset min
}
} else if (next_step.x() > 0) {
if (looking_for_min_x && pos.x() > max_x)
if (looking_for_min_x && pos.x() > max_x) {
max_x = pos.x();
}
if (looking_for_max_x && pos.x() - min_x > threshold) {
if (looking_for_min_x) {
initial_x = min_x; // remember first min
@ -398,8 +409,9 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) {
max_x = pos.x();
}
} else if (next_step.y() < 0) {
if (looking_for_max_y && pos.y() < min_y)
if (looking_for_max_y && pos.y() < min_y) {
min_y = pos.y();
}
if (looking_for_min_y && max_y - pos.y() > threshold) {
if (looking_for_max_y) {
initial_y = max_y; // remember first max
@ -411,8 +423,9 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) {
min_y = pos.y(); // reset min
}
} else {
if (looking_for_min_y && pos.y() > max_y)
if (looking_for_min_y && pos.y() > max_y) {
max_y = pos.y();
}
if (looking_for_max_y && pos.y() - min_y > threshold) {
if (looking_for_min_y) {
initial_y = min_y; // remember first min
@ -426,26 +439,30 @@ int32_t C_OUTLINE::count_transitions(int32_t threshold) {
}
}
if (first_was_max_x && looking_for_min_x) {
if (max_x - initial_x > threshold)
if (max_x - initial_x > threshold) {
total++;
else
} else {
total--;
}
} else if (!first_was_max_x && looking_for_max_x) {
if (initial_x - min_x > threshold)
if (initial_x - min_x > threshold) {
total++;
else
} else {
total--;
}
}
if (first_was_max_y && looking_for_min_y) {
if (max_y - initial_y > threshold)
if (max_y - initial_y > threshold) {
total++;
else
} else {
total--;
}
} else if (!first_was_max_y && looking_for_max_y) {
if (initial_y - min_y > threshold)
if (initial_y - min_y > threshold) {
total++;
else
} else {
total--;
}
}
return total;
@ -463,22 +480,27 @@ bool C_OUTLINE::operator<(const C_OUTLINE &other) const {
ICOORD pos; // position of point
int32_t stepindex; // index to cstep
if (!box.overlap(other.box))
if (!box.overlap(other.box)) {
return false; // can't be contained
if (stepcount == 0)
}
if (stepcount == 0) {
return other.box.contains(this->box);
}
pos = start;
for (stepindex = 0; stepindex < stepcount && (count = other.winding_number(pos)) == INTERSECTING;
stepindex++)
stepindex++) {
pos += step(stepindex); // try all points
}
if (count == INTERSECTING) {
// all intersected
pos = other.start;
for (stepindex = 0;
stepindex < other.stepcount && (count = winding_number(pos)) == INTERSECTING; stepindex++)
stepindex < other.stepcount && (count = winding_number(pos)) == INTERSECTING;
stepindex++) {
// try other way round
pos += other.step(stepindex);
}
return count == INTERSECTING || count == 0;
}
return count != 0;
@ -505,16 +527,18 @@ int16_t C_OUTLINE::winding_number(ICOORD point) const {
// crossing the line
if (vec.y() <= 0 && vec.y() + stepvec.y() > 0) {
cross = vec * stepvec; // cross product
if (cross > 0)
if (cross > 0) {
count++; // crossing right half
else if (cross == 0)
} else if (cross == 0) {
return INTERSECTING; // going through point
}
} else if (vec.y() > 0 && vec.y() + stepvec.y() <= 0) {
cross = vec * stepvec;
if (cross < 0)
if (cross < 0) {
count--; // crossing back
else if (cross == 0)
} else if (cross == 0) {
return INTERSECTING; // illegal
}
}
vec += stepvec; // sum vectors
}
@ -534,8 +558,9 @@ int16_t C_OUTLINE::turn_direction() const { // winding number
int8_t dirdiff; // direction difference
int16_t count; // winding count
if (stepcount == 0)
if (stepcount == 0) {
return 128;
}
count = 0;
prevdir = step_dir(stepcount - 1);
for (stepindex = 0; stepindex < stepcount; stepindex++) {
@ -584,8 +609,9 @@ void C_OUTLINE::move(const ICOORD vec) {
box.move(vec);
start += vec;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->move(vec); // move child outlines
}
}
/**
@ -595,16 +621,18 @@ void C_OUTLINE::move(const ICOORD vec) {
* (probably due to excessive length).
*/
bool C_OUTLINE::IsLegallyNested() const {
if (stepcount == 0)
if (stepcount == 0) {
return true;
}
int64_t parent_area = outer_area();
// We aren't going to modify the list, or its contents, but there is
// no const iterator.
C_OUTLINE_IT child_it(const_cast<C_OUTLINE_LIST *>(&children));
for (child_it.mark_cycle_pt(); !child_it.cycled_list(); child_it.forward()) {
const C_OUTLINE *child = child_it.data();
if (child->outer_area() * parent_area > 0 || !child->IsLegallyNested())
if (child->outer_area() * parent_area > 0 || !child->IsLegallyNested()) {
return false;
}
}
return true;
}
@ -659,8 +687,9 @@ static void ComputeGradient(const l_uint32 *data, int wpl, int x, int y, int wid
*/
static bool EvaluateVerticalDiff(const l_uint32 *data, int wpl, int diff_sign, int x, int y,
int height, int *best_diff, int *best_sum, int *best_y) {
if (y <= 0 || y >= height)
if (y <= 0 || y >= height) {
return false;
}
const l_uint32 *line = data + y * wpl;
int pixel1 = GET_DATA_BYTE(line - wpl, x);
int pixel2 = GET_DATA_BYTE(line, x);
@ -680,8 +709,9 @@ static bool EvaluateVerticalDiff(const l_uint32 *data, int wpl, int diff_sign, i
*/
static bool EvaluateHorizontalDiff(const l_uint32 *line, int diff_sign, int x, int width,
int *best_diff, int *best_sum, int *best_x) {
if (x <= 0 || x >= width)
if (x <= 0 || x >= width) {
return false;
}
int pixel1 = GET_DATA_BYTE(line, x - 1);
int pixel2 = GET_DATA_BYTE(line, x);
int diff = (pixel2 - pixel1) * diff_sign;
@ -709,8 +739,9 @@ static bool EvaluateHorizontalDiff(const l_uint32 *line, int diff_sign, int x, i
* direction can be used to ignore the vertical steps.
*/
void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix *pix) {
if (pixGetDepth(pix) != 8)
if (pixGetDepth(pix) != 8) {
return;
}
const l_uint32 *data = pixGetData(pix);
int wpl = pixGetWpl(pix);
int width = pixGetWidth(pix);
@ -780,8 +811,9 @@ void C_OUTLINE::ComputeEdgeOffsets(int threshold, Pix *pix) {
}
offsets[s].offset_numerator = ClipToRange<int>(offset, -INT8_MAX, INT8_MAX);
offsets[s].pixel_diff = ClipToRange<int>(best_diff, 0, UINT8_MAX);
if (negative)
if (negative) {
gradient = -gradient;
}
// Compute gradient angle quantized to 256 directions, rotated by 64 (pi/2)
// to convert from gradient direction to edge direction.
offsets[s].direction = Modulo(FCOORD::binary_angle_plus_pi(gradient.angle()) + 64, 256);
@ -1020,10 +1052,11 @@ void C_OUTLINE::increment_step(int s, int increment, ICOORD *pos, int *dir_count
int dir_index = chain_code(step_index);
dir_counts[dir_index] += increment;
ICOORD step_vec = step(step_index);
if (step_vec.x() == 0)
if (step_vec.x() == 0) {
pos_totals[dir_index] += pos->x() * increment;
else
} else {
pos_totals[dir_index] += pos->y() * increment;
}
*pos += step_vec;
}

View File

@ -154,8 +154,9 @@ public:
// NOT to be used lightly, as it has to iterate the outline to find out.
ICOORD position_at_index(int index) const {
ICOORD pos = start;
for (int i = 0; i < index; ++i)
for (int i = 0; i < index; ++i) {
pos += step(i);
}
return pos;
}
// Returns the sub-pixel accurate position given the integer position pos
@ -168,17 +169,19 @@ public:
if (offsets != nullptr && offsets[index].pixel_diff > 0) {
float offset = offsets[index].offset_numerator;
offset /= offsets[index].pixel_diff;
if (step_to_next.x() != 0)
if (step_to_next.x() != 0) {
f_pos.set_y(f_pos.y() + offset);
else
} else {
f_pos.set_x(f_pos.x() + offset);
}
}
return f_pos;
}
// Returns the step direction for the given index or -1 if there is none.
int direction_at_index(int index) const {
if (offsets != nullptr && offsets[index].pixel_diff > 0)
if (offsets != nullptr && offsets[index].pixel_diff > 0) {
return offsets[index].direction;
}
return -1;
}
// Returns the edge strength for the given index.
@ -186,8 +189,9 @@ public:
// is binary). Returns 0 if the gradient direction conflicts with the
// step direction, indicating that this position could be skipped.
int edge_strength_at_index(int index) const {
if (offsets != nullptr)
if (offsets != nullptr) {
return offsets[index].pixel_diff;
}
return 1;
}
// Return the step as a chain code (0-3) related to the standard feature

View File

@ -72,16 +72,18 @@ double DetLineFit::Fit(int skip_first, int skip_last, ICOORD *pt1, ICOORD *pt2)
// Count the points and find the first and last kNumEndPoints.
int pt_count = pts_.size();
ICOORD *starts[kNumEndPoints];
if (skip_first >= pt_count)
if (skip_first >= pt_count) {
skip_first = pt_count - 1;
}
int start_count = 0;
int end_i = std::min(skip_first + kNumEndPoints, pt_count);
for (int i = skip_first; i < end_i; ++i) {
starts[start_count++] = &pts_[i].pt;
}
ICOORD *ends[kNumEndPoints];
if (skip_last >= pt_count)
if (skip_last >= pt_count) {
skip_last = pt_count - 1;
}
int end_count = 0;
end_i = std::max(0, pt_count - kNumEndPoints - skip_last);
for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
@ -90,10 +92,11 @@ double DetLineFit::Fit(int skip_first, int skip_last, ICOORD *pt1, ICOORD *pt2)
// 1 or 2 points need special treatment.
if (pt_count <= 2) {
*pt1 = *starts[0];
if (pt_count > 1)
if (pt_count > 1) {
*pt2 = *ends[0];
else
} else {
*pt2 = *pt1;
}
return 0.0;
}
// Although with between 2 and 2*kNumEndPoints-1 points, there will be
@ -213,12 +216,14 @@ double DetLineFit::EvaluateLineFit() {
// and returns the squared upper-quartile error distance.
double DetLineFit::ComputeUpperQuartileError() {
int num_errors = distances_.size();
if (num_errors == 0)
if (num_errors == 0) {
return 0.0;
}
// Get the absolute values of the errors.
for (int i = 0; i < num_errors; ++i) {
if (distances_[i].key() < 0)
if (distances_[i].key() < 0) {
distances_[i].key() = -distances_[i].key();
}
}
// Now get the upper quartile distance.
auto index = 3 * num_errors / 4;
@ -235,8 +240,9 @@ int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
int num_dists = distances_.size();
// Get the absolute values of the errors.
for (int i = 0; i < num_dists; ++i) {
if (distances_[i].key() > threshold)
if (distances_[i].key() > threshold) {
++num_misfits;
}
}
return num_misfits;
}
@ -265,8 +271,9 @@ void DetLineFit::ComputeDistances(const ICOORD &start, const ICOORD &end) {
// Ignore this point if it overlaps the previous one.
int separation = abs(dot - prev_dot);
if (separation < line_length * pts_[i].halfwidth ||
separation < line_length * pts_[i - 1].halfwidth)
separation < line_length * pts_[i - 1].halfwidth) {
continue;
}
}
distances_.emplace_back(dist, pts_[i].pt);
prev_abs_dist = abs_dist;
@ -286,8 +293,9 @@ void DetLineFit::ComputeConstrainedDistances(const FCOORD &direction, double min
FCOORD pt_vector = pt.pt;
// Compute |line_vector||pt_vector|sin(angle between)
double dist = direction * pt_vector;
if (min_dist <= dist && dist <= max_dist)
if (min_dist <= dist && dist <= max_dist) {
distances_.emplace_back(dist, pt.pt);
}
}
}

View File

@ -30,19 +30,22 @@ namespace tesseract {
// The return value is the tail of the best path.
DPPoint *DPPoint::Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size,
DPPoint *points) {
if (size <= 0 || max_step < min_step || min_step >= size)
return nullptr; // Degenerate, but not necessarily an error.
if (size <= 0 || max_step < min_step || min_step >= size) {
return nullptr; // Degenerate, but not necessarily an error.
}
ASSERT_HOST(min_step > 0); // Infinite loop possible if this is not true.
if (debug)
if (debug) {
tprintf("min = %d, max=%d\n", min_step, max_step);
}
// Evaluate the total cost at each point.
for (int i = 0; i < size; ++i) {
for (int offset = min_step; offset <= max_step; ++offset) {
DPPoint *prev = offset <= i ? points + i - offset : nullptr;
int64_t new_cost = (points[i].*cost_func)(prev);
if (points[i].best_prev_ != nullptr && offset > min_step * 2 &&
new_cost > points[i].total_cost_)
new_cost > points[i].total_cost_) {
break; // Find only the first minimum if going over twice the min.
}
}
points[i].total_cost_ += points[i].local_cost_;
if (debug) {

View File

@ -24,19 +24,23 @@ namespace tesseract {
// Writes to the given file. Returns false in case of error.
bool FontInfo::Serialize(FILE *fp) const {
if (!write_info(fp, *this))
if (!write_info(fp, *this)) {
return false;
if (!write_spacing_info(fp, *this))
}
if (!write_spacing_info(fp, *this)) {
return false;
}
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool FontInfo::DeSerialize(TFile *fp) {
if (!read_info(fp, this))
if (!read_info(fp, this)) {
return false;
if (!read_spacing_info(fp, this))
}
if (!read_spacing_info(fp, this)) {
return false;
}
return true;
}
@ -64,8 +68,9 @@ bool FontInfoTable::SetContainsFontProperties(int font_id,
const std::vector<ScoredFont> &font_set) const {
uint32_t properties = at(font_id).properties;
for (auto f : font_set) {
if (at(f.fontinfo_id).properties == properties)
if (at(f.fontinfo_id).properties == properties) {
return true;
}
}
return false;
}
@ -73,13 +78,15 @@ bool FontInfoTable::SetContainsFontProperties(int font_id,
// Returns true if the given set of fonts includes multiple properties.
bool FontInfoTable::SetContainsMultipleFontProperties(
const std::vector<ScoredFont> &font_set) const {
if (font_set.empty())
if (font_set.empty()) {
return false;
}
int first_font = font_set[0].fontinfo_id;
uint32_t properties = at(first_font).properties;
for (int f = 1; f < font_set.size(); ++f) {
if (at(font_set[f].fontinfo_id).properties != properties)
if (at(font_set[f].fontinfo_id).properties != properties) {
return true;
}
}
return false;
}
@ -138,12 +145,14 @@ void FontSetDeleteCallback(FontSet fs) {
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
bool read_info(TFile *f, FontInfo *fi) {
uint32_t size;
if (!f->DeSerialize(&size))
if (!f->DeSerialize(&size)) {
return false;
}
char *font_name = new char[size + 1];
fi->name = font_name;
if (!f->DeSerialize(font_name, size))
if (!f->DeSerialize(font_name, size)) {
return false;
}
font_name[size] = '\0';
return f->DeSerialize(&fi->properties);
}
@ -156,11 +165,13 @@ bool write_info(FILE *f, const FontInfo &fi) {
bool read_spacing_info(TFile *f, FontInfo *fi) {
int32_t vec_size, kern_size;
if (!f->DeSerialize(&vec_size))
if (!f->DeSerialize(&vec_size)) {
return false;
}
ASSERT_HOST(vec_size >= 0);
if (vec_size == 0)
if (vec_size == 0) {
return true;
}
fi->init_spacing(vec_size);
for (int i = 0; i < vec_size; ++i) {
auto *fs = new FontSpacingInfo();
@ -185,8 +196,9 @@ bool read_spacing_info(TFile *f, FontInfo *fi) {
bool write_spacing_info(FILE *f, const FontInfo &fi) {
int32_t vec_size = (fi.spacing_vec == nullptr) ? 0 : fi.spacing_vec->size();
if (!tesseract::Serialize(f, &vec_size))
if (!tesseract::Serialize(f, &vec_size)) {
return false;
}
int16_t x_gap_invalid = -1;
for (int i = 0; i < vec_size; ++i) {
FontSpacingInfo *fs = fi.spacing_vec->at(i);
@ -211,8 +223,9 @@ bool write_spacing_info(FILE *f, const FontInfo &fi) {
}
bool read_set(TFile *f, FontSet *fs) {
if (!f->DeSerialize(&fs->size))
if (!f->DeSerialize(&fs->size)) {
return false;
}
fs->configs = new int[fs->size];
return f->DeSerialize(&fs->configs[0], fs->size);
}

View File

@ -98,12 +98,14 @@ struct FontInfo {
bool get_spacing(UNICHAR_ID prev_uch_id, UNICHAR_ID uch_id, int *spacing) const {
const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id);
const FontSpacingInfo *fsi = this->get_spacing(uch_id);
if (prev_fsi == nullptr || fsi == nullptr)
if (prev_fsi == nullptr || fsi == nullptr) {
return false;
}
size_t i = 0;
for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) {
if (prev_fsi->kerned_unichar_ids[i] == uch_id)
if (prev_fsi->kerned_unichar_ids[i] == uch_id) {
break;
}
}
if (i < prev_fsi->kerned_unichar_ids.size()) {
*spacing = prev_fsi->kerned_x_gaps[i];
@ -154,11 +156,13 @@ struct FontSet {
int *configs; // FontInfo ids
bool operator==(const FontSet &rhs) const {
if (size != rhs.size)
if (size != rhs.size) {
return false;
}
for (int i = 0; i < size; ++i) {
if (configs[i] != rhs.configs[i])
if (configs[i] != rhs.configs[i]) {
return false;
}
}
return true;
}

View File

@ -86,65 +86,87 @@ ImageData *ImageData::Build(const char *name, int page_number, const char *lang,
// Writes to the given file. Returns false in case of error.
bool ImageData::Serialize(TFile *fp) const {
if (!fp->Serialize(imagefilename_))
if (!fp->Serialize(imagefilename_)) {
return false;
if (!fp->Serialize(&page_number_))
}
if (!fp->Serialize(&page_number_)) {
return false;
if (!fp->Serialize(image_data_))
}
if (!fp->Serialize(image_data_)) {
return false;
if (!fp->Serialize(language_))
}
if (!fp->Serialize(language_)) {
return false;
if (!fp->Serialize(transcription_))
}
if (!fp->Serialize(transcription_)) {
return false;
if (!fp->Serialize(boxes_))
}
if (!fp->Serialize(boxes_)) {
return false;
if (!fp->Serialize(box_texts_))
}
if (!fp->Serialize(box_texts_)) {
return false;
}
int8_t vertical = vertical_text_;
return fp->Serialize(&vertical);
}
// Reads from the given file. Returns false in case of error.
bool ImageData::DeSerialize(TFile *fp) {
if (!fp->DeSerialize(imagefilename_))
if (!fp->DeSerialize(imagefilename_)) {
return false;
if (!fp->DeSerialize(&page_number_))
}
if (!fp->DeSerialize(&page_number_)) {
return false;
if (!fp->DeSerialize(image_data_))
}
if (!fp->DeSerialize(image_data_)) {
return false;
if (!fp->DeSerialize(language_))
}
if (!fp->DeSerialize(language_)) {
return false;
if (!fp->DeSerialize(transcription_))
}
if (!fp->DeSerialize(transcription_)) {
return false;
if (!fp->DeSerialize(boxes_))
}
if (!fp->DeSerialize(boxes_)) {
return false;
if (!fp->DeSerialize(box_texts_))
}
if (!fp->DeSerialize(box_texts_)) {
return false;
}
int8_t vertical = 0;
if (!fp->DeSerialize(&vertical))
if (!fp->DeSerialize(&vertical)) {
return false;
}
vertical_text_ = vertical != 0;
return true;
}
// As DeSerialize, but only seeks past the data - hence a static method.
bool ImageData::SkipDeSerialize(TFile *fp) {
if (!fp->DeSerializeSkip())
if (!fp->DeSerializeSkip()) {
return false;
}
int32_t page_number;
if (!fp->DeSerialize(&page_number))
if (!fp->DeSerialize(&page_number)) {
return false;
if (!fp->DeSerializeSkip())
}
if (!fp->DeSerializeSkip()) {
return false;
if (!fp->DeSerializeSkip())
}
if (!fp->DeSerializeSkip()) {
return false;
if (!fp->DeSerializeSkip())
}
if (!fp->DeSerializeSkip()) {
return false;
if (!fp->DeSerializeSkip(sizeof(TBOX)))
}
if (!fp->DeSerializeSkip(sizeof(TBOX))) {
return false;
}
int32_t number;
if (!fp->DeSerialize(&number))
if (!fp->DeSerialize(&number)) {
return false;
}
for (int i = 0; i < number; i++) {
if (!fp->DeSerializeSkip()) {
return false;
@ -200,10 +222,12 @@ Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor,
target_height = std::min(input_height, max_height);
}
float im_factor = static_cast<float>(target_height) / input_height;
if (scaled_width != nullptr)
if (scaled_width != nullptr) {
*scaled_width = IntCastRounded(im_factor * input_width);
if (scaled_height != nullptr)
}
if (scaled_height != nullptr) {
*scaled_height = target_height;
}
// Get the scaled image.
Pix *pix = pixScale(src_pix, im_factor, im_factor);
if (pix == nullptr) {
@ -212,10 +236,12 @@ Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor,
pixDestroy(&src_pix);
return nullptr;
}
if (scaled_width != nullptr)
if (scaled_width != nullptr) {
*scaled_width = pixGetWidth(pix);
if (scaled_height != nullptr)
}
if (scaled_height != nullptr) {
*scaled_height = pixGetHeight(pix);
}
pixDestroy(&src_pix);
if (boxes != nullptr) {
// Get the boxes.
@ -230,8 +256,9 @@ Pix *ImageData::PreScale(int target_height, int max_height, float *scale_factor,
boxes->push_back(box);
}
}
if (scale_factor != nullptr)
if (scale_factor != nullptr) {
*scale_factor = im_factor;
}
return pix;
}
@ -246,8 +273,9 @@ void ImageData::Display() const {
const int kTextSize = 64;
// Draw the image.
Pix *pix = GetPix();
if (pix == nullptr)
if (pix == nullptr) {
return;
}
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
auto *win =
@ -259,8 +287,9 @@ void ImageData::Display() const {
win->Pen(ScrollView::RED);
win->Brush(ScrollView::NONE);
int text_size = kTextSize;
if (!boxes_.empty() && boxes_[0].height() * 2 < text_size)
if (!boxes_.empty() && boxes_[0].height() * 2 < text_size) {
text_size = boxes_[0].height() * 2;
}
win->TextAttributes("Arial", text_size, false, false, false);
if (!boxes_.empty()) {
for (int b = 0; b < boxes_.size(); ++b) {
@ -284,8 +313,9 @@ void ImageData::AddBoxes(const std::vector<TBOX> &boxes, const std::vector<std::
const std::vector<int> &box_pages) {
// Copy the boxes and make the transcription.
for (int i = 0; i < box_pages.size(); ++i) {
if (page_number_ >= 0 && box_pages[i] != page_number_)
if (page_number_ >= 0 && box_pages[i] != page_number_) {
continue;
}
transcription_ += texts[i];
boxes_.push_back(boxes[i]);
box_texts_.push_back(texts[i]);
@ -402,11 +432,13 @@ void DocumentData::AddPageToDocument(ImageData *page) {
// thread.
void DocumentData::LoadPageInBackground(int index) {
ImageData *page = nullptr;
if (IsPageAvailable(index, &page))
if (IsPageAvailable(index, &page)) {
return;
}
std::lock_guard<std::mutex> lock(pages_mutex_);
if (pages_offset_ == index)
if (pages_offset_ == index) {
return;
}
pages_offset_ = index;
for (auto page : pages_) {
delete page;
@ -427,8 +459,9 @@ const ImageData *DocumentData::GetPage(int index) {
pages_mutex_.lock();
bool needs_loading = pages_offset_ != index;
pages_mutex_.unlock();
if (needs_loading)
if (needs_loading) {
LoadPageInBackground(index);
}
// We can't directly load the page, or the background load will delete it
// while the caller is using it, so give it a chance to work.
std::this_thread::yield();
@ -569,8 +602,9 @@ bool DocumentCache::LoadDocuments(const std::vector<std::string> &filenames,
// In the round-robin case, each DocumentData handles restricting its content
// to its fair share of memory. In the sequential case, DocumentCache
// determines which DocumentDatas are held entirely in memory.
if (cache_strategy_ == CS_ROUND_ROBIN)
if (cache_strategy_ == CS_ROUND_ROBIN) {
fair_share_memory = max_memory_ / filenames.size();
}
for (auto filename : filenames) {
auto *document = new DocumentData(filename);
document->SetDocument(filename.c_str(), fair_share_memory, reader);
@ -578,8 +612,9 @@ bool DocumentCache::LoadDocuments(const std::vector<std::string> &filenames,
}
if (!documents_.empty()) {
// Try to get the first page now to verify the list of filenames.
if (GetPageBySerial(0) != nullptr)
if (GetPageBySerial(0) != nullptr) {
return true;
}
tprintf("Load of page 0 failed!\n");
}
return false;
@ -607,8 +642,9 @@ int DocumentCache::TotalPages() {
if (cache_strategy_ == CS_SEQUENTIAL) {
// In sequential mode, we assume each doc has the same number of pages
// whether it is true or not.
if (num_pages_per_doc_ == 0)
if (num_pages_per_doc_ == 0) {
GetPageSequential(0);
}
return num_pages_per_doc_ * documents_.size();
}
int total_pages = 0;
@ -650,8 +686,9 @@ const ImageData *DocumentCache::GetPageSequential(int serial) {
ASSERT_HOST(num_pages_per_doc_ > 0);
}
// Get rid of zero now if we don't need it.
if (serial / num_pages_per_doc_ % num_docs > 0)
if (serial / num_pages_per_doc_ % num_docs > 0) {
documents_[0]->UnCache();
}
}
int doc_index = serial / num_pages_per_doc_ % num_docs;
const ImageData *doc = documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
@ -694,8 +731,9 @@ int DocumentCache::CountNeighbourDocs(int index, int dir) {
int num_docs = documents_.size();
for (int offset = dir; abs(offset) < num_docs; offset += dir) {
int offset_index = (index + offset + num_docs) % num_docs;
if (!documents_[offset_index]->IsCached())
if (!documents_[offset_index]->IsCached()) {
return offset - dir;
}
}
return num_docs;
}

View File

@ -312,10 +312,11 @@ public:
// Returns a page by serial number using the current cache_strategy_ to
// determine the mapping from serial number to page.
const ImageData *GetPageBySerial(int serial) {
if (cache_strategy_ == CS_SEQUENTIAL)
if (cache_strategy_ == CS_SEQUENTIAL) {
return GetPageSequential(serial);
else
} else {
return GetPageRoundRobin(serial);
}
}
const std::vector<DocumentData *> &documents() const {

View File

@ -80,8 +80,9 @@ void LLSQ::add(const LLSQ &other) {
**********************************************************************/
void LLSQ::remove(double x, double y) { // delete an element
if (total_weight <= 0.0) // illegal
if (total_weight <= 0.0) { // illegal
EMPTY_LLSQ.error("LLSQ::remove", ABORT, nullptr);
}
total_weight--; // count elements
sigx -= x; // update accumulators
sigy -= y;
@ -99,10 +100,11 @@ void LLSQ::remove(double x, double y) { // delete an element
double LLSQ::m() const { // get gradient
double covar = covariance();
double x_var = x_variance();
if (x_var != 0.0)
if (x_var != 0.0) {
return covar / x_var;
else
} else {
return 0.0; // too little
}
}
/**********************************************************************
@ -112,10 +114,11 @@ double LLSQ::m() const { // get gradient
**********************************************************************/
double LLSQ::c(double m) const { // get constant
if (total_weight > 0.0)
if (total_weight > 0.0) {
return (sigy - m * sigx) / total_weight;
else
} else {
return 0; // too little
}
}
/**********************************************************************
@ -129,10 +132,11 @@ double LLSQ::rms(double m, double c) const { // get error
if (total_weight > 0) {
error = sigyy + m * (m * sigxx + 2 * (c * sigx - sigxy)) + c * (total_weight * c - 2 * sigy);
if (error >= 0)
if (error >= 0) {
error = std::sqrt(error / total_weight); // sqrt of mean
else
} else {
error = 0;
}
} else {
error = 0; // too little
}
@ -151,8 +155,9 @@ double LLSQ::pearson() const { // get correlation
double covar = covariance();
if (covar != 0.0) {
double var_product = x_variance() * y_variance();
if (var_product > 0.0)
if (var_product > 0.0) {
r = covar / std::sqrt(var_product);
}
}
return r;
}

View File

@ -74,22 +74,25 @@ public:
// Returns the covariance.
double covariance() const {
if (total_weight > 0.0)
if (total_weight > 0.0) {
return (sigxy - sigx * sigy / total_weight) / total_weight;
else
} else {
return 0.0;
}
}
double x_variance() const {
if (total_weight > 0.0)
if (total_weight > 0.0) {
return (sigxx - sigx * sigx / total_weight) / total_weight;
else
} else {
return 0.0;
}
}
double y_variance() const {
if (total_weight > 0.0)
if (total_weight > 0.0) {
return (sigyy - sigy * sigy / total_weight) / total_weight;
else
} else {
return 0.0;
}
}
private:

View File

@ -34,13 +34,15 @@ MATRIX::~MATRIX() = default;
// Returns true if there are any real classification results.
bool MATRIX::Classified(int col, int row, int wildcard_id) const {
if (get(col, row) == NOT_CLASSIFIED)
if (get(col, row) == NOT_CLASSIFIED) {
return false;
}
BLOB_CHOICE_IT b_it(get(col, row));
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
BLOB_CHOICE *choice = b_it.data();
if (choice->IsClassified())
if (choice->IsClassified()) {
return true;
}
}
return false;
}
@ -118,8 +120,9 @@ void MATRIX::print(const UNICHARSET &unicharset) const {
for (col = 0; col < dim; ++col) {
for (row = col; row < dim && row < col + band_width; ++row) {
BLOB_CHOICE_LIST *rating = this->get(col, row);
if (rating == NOT_CLASSIFIED)
if (rating == NOT_CLASSIFIED) {
continue;
}
BLOB_CHOICE_IT b_it(rating);
tprintf("col=%d row=%d ", col, row);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
@ -131,13 +134,15 @@ void MATRIX::print(const UNICHARSET &unicharset) const {
tprintf("\n");
}
tprintf("\n");
for (col = 0; col < dim; ++col)
for (col = 0; col < dim; ++col) {
tprintf("\t%d", col);
}
tprintf("\n");
for (row = 0; row < dim; ++row) {
for (col = 0; col <= row; ++col) {
if (col == 0)
if (col == 0) {
tprintf("%d\t", row);
}
if (row >= col + band_width) {
tprintf(" \t");
continue;
@ -149,8 +154,9 @@ void MATRIX::print(const UNICHARSET &unicharset) const {
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
tprintf("%s ", unicharset.id_to_unichar(b_it.data()->unichar_id()));
++counter;
if (counter == 3)
if (counter == 3) {
break;
}
}
tprintf("\t");
} else {

View File

@ -64,8 +64,9 @@ public:
int new_size = dim1 * dim2;
array_ = new T[new_size];
size_allocated_ = new_size;
for (int i = 0; i < size_allocated_; ++i)
for (int i = 0; i < size_allocated_; ++i) {
array_[i] = empty_;
}
}
// Default constructor for array allocation. Use Resize to set the size.
GENERIC_2D_ARRAY()
@ -100,8 +101,9 @@ public:
dim1_ = size1;
dim2_ = size2;
// Fill the padding data so it isn't uninitialized.
for (int i = size1 * size2; i < new_size; ++i)
for (int i = size1 * size2; i < new_size; ++i) {
array_[i] = empty_;
}
}
// Reallocate the array to the given size. Does not keep old data.
@ -138,26 +140,31 @@ public:
// Sets all the elements of the array to the empty value.
void Clear() {
int total_size = num_elements();
for (int i = 0; i < total_size; ++i)
for (int i = 0; i < total_size; ++i) {
array_[i] = empty_;
}
}
// Writes to the given file. Returns false in case of error.
// Only works with bitwise-serializeable types!
bool Serialize(FILE *fp) const {
if (!SerializeSize(fp))
if (!SerializeSize(fp)) {
return false;
if (!tesseract::Serialize(fp, &empty_))
}
if (!tesseract::Serialize(fp, &empty_)) {
return false;
}
int size = num_elements();
return tesseract::Serialize(fp, &array_[0], size);
}
bool Serialize(TFile *fp) const {
if (!SerializeSize(fp))
if (!SerializeSize(fp)) {
return false;
if (!fp->Serialize(&empty_))
}
if (!fp->Serialize(&empty_)) {
return false;
}
int size = num_elements();
return fp->Serialize(&array_[0], size);
}
@ -166,18 +173,23 @@ public:
// Only works with bitwise-serializeable types!
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE *fp) {
if (!DeSerializeSize(swap, fp))
if (!DeSerializeSize(swap, fp)) {
return false;
if (!tesseract::DeSerialize(fp, &empty_))
return false;
if (swap)
ReverseN(&empty_, sizeof(empty_));
int size = num_elements();
if (!tesseract::DeSerialize(fp, &array_[0], size))
}
if (!tesseract::DeSerialize(fp, &empty_)) {
return false;
}
if (swap) {
for (int i = 0; i < size; ++i)
ReverseN(&empty_, sizeof(empty_));
}
int size = num_elements();
if (!tesseract::DeSerialize(fp, &array_[0], size)) {
return false;
}
if (swap) {
for (int i = 0; i < size; ++i) {
ReverseN(&array_[i], sizeof(array_[i]));
}
}
return true;
}
@ -190,14 +202,17 @@ public:
// Writes to the given file. Returns false in case of error.
// Assumes a T::Serialize(FILE*) const function.
bool SerializeClasses(FILE *fp) const {
if (!SerializeSize(fp))
if (!SerializeSize(fp)) {
return false;
if (!empty_.Serialize(fp))
}
if (!empty_.Serialize(fp)) {
return false;
}
int size = num_elements();
for (int i = 0; i < size; ++i) {
if (!array_[i].Serialize(fp))
if (!array_[i].Serialize(fp)) {
return false;
}
}
return true;
}
@ -206,14 +221,17 @@ public:
// Assumes a T::DeSerialize(bool swap, FILE*) function.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerializeClasses(bool swap, FILE *fp) {
if (!DeSerializeSize(swap, fp))
if (!DeSerializeSize(swap, fp)) {
return false;
if (!empty_.DeSerialize(swap, fp))
}
if (!empty_.DeSerialize(swap, fp)) {
return false;
}
int size = num_elements();
for (int i = 0; i < size; ++i) {
if (!array_[i].DeSerialize(swap, fp))
if (!array_[i].DeSerialize(swap, fp)) {
return false;
}
}
return true;
}
@ -328,16 +346,18 @@ public:
int size = num_elements();
for (int i = 0; i < size; ++i) {
const T &value = array_[i];
if (value < rangemin || rangemax < value)
if (value < rangemin || rangemax < value) {
return false;
}
}
return true;
}
// Normalize the whole array.
double Normalize() {
int size = num_elements();
if (size <= 0)
if (size <= 0) {
return 0.0;
}
// Compute the mean.
double mean = 0.0;
for (int i = 0; i < size; ++i) {
@ -364,14 +384,16 @@ public:
// Returns the maximum value of the array.
T Max() const {
int size = num_elements();
if (size <= 0)
if (size <= 0) {
return empty_;
}
// Compute the max.
T max_value = array_[0];
for (int i = 1; i < size; ++i) {
const T &value = array_[i];
if (value > max_value)
if (value > max_value) {
max_value = value;
}
}
return max_value;
}
@ -379,14 +401,16 @@ public:
// Returns the maximum absolute value of the array.
T MaxAbs() const {
int size = num_elements();
if (size <= 0)
if (size <= 0) {
return empty_;
}
// Compute the max.
T max_abs = static_cast<T>(0);
for (int i = 0; i < size; ++i) {
T value = static_cast<T>(fabs(array_[i]));
if (value > max_abs)
if (value > max_abs) {
max_abs = value;
}
}
return max_abs;
}
@ -454,19 +478,24 @@ public:
// src_step represents the stride in the src between each adjacent group
// in the destination.
int num_replicas = 1, move_size = 1, src_step = 1;
for (int d = 0; d < min_d; ++d)
for (int d = 0; d < min_d; ++d) {
num_replicas *= dims[d];
for (int d = max_d + 1; d < num_dims; ++d)
}
for (int d = max_d + 1; d < num_dims; ++d) {
move_size *= dims[d];
for (int d = src_dim + 1; d < num_dims; ++d)
}
for (int d = src_dim + 1; d < num_dims; ++d) {
src_step *= dims[d];
if (src_dim > dest_dim)
}
if (src_dim > dest_dim) {
src_step *= dims[src_dim];
}
// wrap_size is the size of a single replica, being the amount that is
// handled num_replicas times.
int wrap_size = move_size;
for (int d = min_d; d <= max_d; ++d)
for (int d = min_d; d <= max_d; ++d) {
wrap_size *= dims[d];
}
result->ResizeNoInit(dim1_, dim2_);
result->empty_ = empty_;
const T *src = array_;
@ -487,8 +516,9 @@ public:
int size = num_elements();
for (int i = 0; i < size; ++i) {
T matrix_cell = array_[i];
if (matrix_cell != empty_)
if (matrix_cell != empty_) {
delete matrix_cell;
}
}
}
@ -496,15 +526,17 @@ protected:
// Factored helper to serialize the size.
bool SerializeSize(FILE *fp) const {
uint32_t size = dim1_;
if (!tesseract::Serialize(fp, &size))
if (!tesseract::Serialize(fp, &size)) {
return false;
}
size = dim2_;
return tesseract::Serialize(fp, &size);
}
bool SerializeSize(TFile *fp) const {
uint32_t size = dim1_;
if (!fp->Serialize(&size))
if (!fp->Serialize(&size)) {
return false;
}
size = dim2_;
return fp->Serialize(&size);
}
@ -512,33 +544,41 @@ protected:
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerializeSize(bool swap, FILE *fp) {
uint32_t size1, size2;
if (!tesseract::DeSerialize(fp, &size1))
if (!tesseract::DeSerialize(fp, &size1)) {
return false;
if (!tesseract::DeSerialize(fp, &size2))
}
if (!tesseract::DeSerialize(fp, &size2)) {
return false;
}
if (swap) {
ReverseN(&size1, sizeof(size1));
ReverseN(&size2, sizeof(size2));
}
// Arbitrarily limit the number of elements to protect against bad data.
if (size1 > UINT16_MAX)
if (size1 > UINT16_MAX) {
return false;
if (size2 > UINT16_MAX)
}
if (size2 > UINT16_MAX) {
return false;
}
Resize(size1, size2, empty_);
return true;
}
bool DeSerializeSize(TFile *fp) {
int32_t size1, size2;
if (!fp->DeSerialize(&size1))
if (!fp->DeSerialize(&size1)) {
return false;
if (!fp->DeSerialize(&size2))
}
if (!fp->DeSerialize(&size2)) {
return false;
}
// Arbitrarily limit the number of elements to protect against bad data.
if (size1 > UINT16_MAX)
if (size1 > UINT16_MAX) {
return false;
if (size2 > UINT16_MAX)
}
if (size2 > UINT16_MAX) {
return false;
}
Resize(size1, size2, empty_);
return true;
}
@ -667,10 +707,12 @@ struct MATRIX_COORD {
// making a new column at ind+1.
void MapForSplit(int ind) {
ASSERT_HOST(row >= col);
if (col > ind)
if (col > ind) {
++col;
if (row >= ind)
}
if (row >= ind) {
++row;
}
ASSERT_HOST(row >= col);
}

View File

@ -55,19 +55,21 @@ DIR128::DIR128( // from fcoord
low = 0;
if (fc.y() == 0) {
if (fc.x() >= 0)
if (fc.x() >= 0) {
dir = 0;
else
} else {
dir = MODULUS / 2;
}
return;
}
high = MODULUS;
do {
current = (high + low) / 2;
if (dirtab[current] * fc >= 0)
if (dirtab[current] * fc >= 0) {
low = current;
else
} else {
high = current;
}
} while (high - low > 1);
dir = low;
}

View File

@ -35,8 +35,9 @@ public:
DIR128( // constructor
int16_t value) { // value to assign
value %= MODULUS; // modulo arithmetic
if (value < 0)
if (value < 0) {
value += MODULUS; // done properly
}
dir = static_cast<int8_t>(value);
}
DIR128(const FCOORD fc); // quantize vector
@ -44,8 +45,9 @@ public:
DIR128 &operator=( // assign of int16_t
int16_t value) { // value to assign
value %= MODULUS; // modulo arithmetic
if (value < 0)
if (value < 0) {
value += MODULUS; // done properly
}
dir = static_cast<int8_t>(value);
return *this;
}
@ -55,10 +57,11 @@ public:
// result
int16_t result = dir - minus.dir;
if (result > MODULUS / 2)
if (result > MODULUS / 2) {
result -= MODULUS; // get in range
else if (result < -MODULUS / 2)
} else if (result < -MODULUS / 2) {
result += MODULUS;
}
return static_cast<int8_t>(result);
}
DIR128 operator+( // addition

View File

@ -51,10 +51,11 @@ DENORM &DENORM::operator=(const DENORM &src) {
predecessor_ = src.predecessor_;
pix_ = src.pix_;
block_ = src.block_;
if (src.rotation_ == nullptr)
if (src.rotation_ == nullptr) {
rotation_ = nullptr;
else
} else {
rotation_ = new FCOORD(*src.rotation_);
}
x_origin_ = src.x_origin_;
y_origin_ = src.y_origin_;
x_scale_ = src.x_scale_;
@ -99,10 +100,11 @@ void DENORM::SetupNormalization(const BLOCK *block, const FCOORD *rotation,
float final_yshift) {
Clear();
block_ = block;
if (rotation == nullptr)
if (rotation == nullptr) {
rotation_ = nullptr;
else
} else {
rotation_ = new FCOORD(*rotation);
}
predecessor_ = predecessor;
x_origin_ = x_origin;
y_origin_ = y_origin;
@ -185,15 +187,17 @@ static void ComputeRunlengthImage(const TBOX &box,
int x_edge = ClipToRange(x_coord, 0, width);
int gap = x_edge - x;
while (x < x_edge) {
if (gap < (*minruns)(x, iy))
if (gap < (*minruns)(x, iy)) {
(*minruns)(x, iy) = gap;
}
++x;
}
}
int gap = width - x;
while (x < width) {
if (gap < (*minruns)(x, iy))
if (gap < (*minruns)(x, iy)) {
(*minruns)(x, iy) = gap;
}
++x;
}
}
@ -228,8 +232,9 @@ static void ComputeEdgeDensityProfiles(const TBOX &box, const GENERIC_2D_ARRAY<i
for (int iy = 0; iy < height; ++iy) {
for (int ix = 0; ix < width; ++ix) {
int run = minruns(ix, iy);
if (run == 0)
if (run == 0) {
run = 1;
}
float density = 1.0f / run;
hx[ix] += density;
hy[iy] += density;
@ -315,8 +320,9 @@ void DENORM::LocalNormTransform(const FCOORD &pt, FCOORD *transformed) const {
} else {
translated.set_x(translated.x() * x_scale_);
translated.set_y(translated.y() * y_scale_);
if (rotation_ != nullptr)
if (rotation_ != nullptr) {
translated.rotate(*rotation_);
}
}
transformed->set_x(translated.x() + final_xshift_);
transformed->set_y(translated.y() + final_yshift_);
@ -411,10 +417,12 @@ void DENORM::DenormTransform(const DENORM *last_denorm, const FCOORD &pt, FCOORD
void DENORM::LocalNormBlob(TBLOB *blob) const {
ICOORD translation(-IntCastRounded(x_origin_), -IntCastRounded(y_origin_));
blob->Move(translation);
if (y_scale_ != 1.0f)
if (y_scale_ != 1.0f) {
blob->Scale(y_scale_);
if (rotation_ != nullptr)
}
if (rotation_ != nullptr) {
blob->Rotate(*rotation_);
}
translation.set_x(IntCastRounded(final_xshift_));
translation.set_y(IntCastRounded(final_yshift_));
blob->Move(translation);
@ -431,8 +439,9 @@ void DENORM::XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TB
*min_xht = 0.0f;
*max_xht = FLT_MAX;
if (!unicharset.top_bottom_useful())
if (!unicharset.top_bottom_useful()) {
return;
}
// Clip the top and bottom to the limit of normalized feature space.
int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1);
@ -441,8 +450,9 @@ void DENORM::XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TB
double tolerance = y_scale();
// If the script doesn't have upper and lower-case characters, widen the
// tolerance to allow sloppy baseline/x-height estimates.
if (!unicharset.script_has_upper_lower())
if (!unicharset.script_has_upper_lower()) {
tolerance = y_scale() * kSloppyTolerance;
}
int min_bottom, max_bottom, min_top, max_top;
unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom, &min_top, &max_top);
@ -479,8 +489,9 @@ void DENORM::XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TB
// and to allow the large caps in small caps to accept the xheight of the
// small caps, add kBlnBaselineOffset to chars with a maximum max, and have
// a top already at a significantly high position.
if (max_top == kBlnCellHeight - 1 && top > kBlnCellHeight - kBlnBaselineOffset / 2)
if (max_top == kBlnCellHeight - 1 && top > kBlnCellHeight - kBlnBaselineOffset / 2) {
max_top += kBlnBaselineOffset;
}
top -= bln_yshift;
int height = top - kBlnBaselineOffset;
double min_height = min_top - kBlnBaselineOffset - tolerance;
@ -502,8 +513,9 @@ void DENORM::Print() const {
tprintf("Pix dimensions %d x %d x %d\n", pixGetWidth(pix_), pixGetHeight(pix_),
pixGetDepth(pix_));
}
if (inverse_)
if (inverse_) {
tprintf("Inverse\n");
}
if (block_ && block_->re_rotation().x() != 1.0f) {
tprintf("Block rotation %g, %g\n", block_->re_rotation().x(), block_->re_rotation().y());
}
@ -520,8 +532,9 @@ void DENORM::Print() const {
tprintf("\n");
} else {
tprintf("Scale = (%g, %g)\n", x_scale_, y_scale_);
if (rotation_ != nullptr)
if (rotation_ != nullptr) {
tprintf("Rotation = (%g, %g)\n", rotation_->x(), rotation_->y());
}
}
tprintf("Final Origin = (%g, %g)\n", final_xshift_, final_xshift_);
if (predecessor_ != nullptr) {

View File

@ -245,8 +245,9 @@ public:
inverse_ = value;
}
const DENORM *RootDenorm() const {
if (predecessor_ != nullptr)
if (predecessor_ != nullptr) {
return predecessor_->RootDenorm();
}
return this;
}
const DENORM *predecessor() const {

View File

@ -200,13 +200,15 @@ void BLOCK::print( // print list of sides
if (dump) {
tprintf("Left side coords are:\n");
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
tprintf("(%d,%d) ", it.data()->x(), it.data()->y());
}
tprintf("\n");
tprintf("Right side coords are:\n");
it.set_to_list(&pdblk.rightside);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
tprintf("(%d,%d) ", it.data()->x(), it.data()->y());
}
tprintf("\n");
}
}
@ -226,8 +228,9 @@ BLOCK &BLOCK::operator=( // assignment
kerning = source.kerning;
spacing = source.spacing;
filename = source.filename; // STRINGs assign ok
if (!rows.empty())
if (!rows.empty()) {
rows.clear();
}
re_rotation_ = source.re_rotation_;
classify_rotation_ = source.classify_rotation_;
skew_ = source.skew_;
@ -247,8 +250,9 @@ BLOCK &BLOCK::operator=( // assignment
static bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) {
bool found = false;
*margin = 0;
if (segments->empty())
if (segments->empty()) {
return found;
}
ICOORDELT_IT seg_it(segments);
for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
int cur_margin = x - seg_it.data()->x();
@ -277,8 +281,9 @@ static bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) {
static bool RightMargin(ICOORDELT_LIST *segments, int x, int *margin) {
bool found = false;
*margin = 0;
if (segments->empty())
if (segments->empty()) {
return found;
}
ICOORDELT_IT seg_it(segments);
for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x;
@ -352,10 +357,12 @@ void BLOCK::compute_row_margins() {
if (bbox.bottom() <= mid_second_line) {
// we found a real drop cap
first_row->set_has_drop_cap(true);
if (drop_cap_bottom > bbox.bottom())
if (drop_cap_bottom > bbox.bottom()) {
drop_cap_bottom = bbox.bottom();
if (drop_cap_right < bbox.right())
}
if (drop_cap_right < bbox.right()) {
drop_cap_right = bbox.right();
}
}
}
}
@ -374,10 +381,12 @@ void BLOCK::compute_row_margins() {
if (row_box.top() >= drop_cap_bottom) {
int drop_cap_distance = row_box.left() - row->space() - drop_cap_right;
if (drop_cap_distance < 0)
if (drop_cap_distance < 0) {
drop_cap_distance = 0;
if (drop_cap_distance < left_margin)
}
if (drop_cap_distance < left_margin) {
left_margin = drop_cap_distance;
}
}
int right_y = row->base_line(row_box.right()) + row->x_height();
@ -469,8 +478,9 @@ void RefreshWordBlobsFromNewBlobs(BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs
BLOCK_IT block_it(block_list);
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
BLOCK *block = block_it.data();
if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText())
if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
continue; // Don't touch non-text blocks.
}
// Iterate over all rows in the block.
ROW_IT row_it(block->row_list());
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {

View File

@ -73,10 +73,12 @@ bool ParagraphModel::ValidBodyLine(int lmargin, int lindent, int rindent, int rm
}
bool ParagraphModel::Comparable(const ParagraphModel &other) const {
if (justification_ != other.justification_)
if (justification_ != other.justification_) {
return false;
if (justification_ == JUSTIFICATION_CENTER || justification_ == JUSTIFICATION_UNKNOWN)
}
if (justification_ == JUSTIFICATION_CENTER || justification_ == JUSTIFICATION_UNKNOWN) {
return true;
}
int tolerance = (tolerance_ + other.tolerance_) / 4;
return NearlyEqual(margin_ + first_indent_, other.margin_ + other.first_indent_, tolerance) &&
NearlyEqual(margin_ + body_indent_, other.margin_ + other.body_indent_, tolerance);

View File

@ -126,8 +126,9 @@ public:
, tolerance_(tolerance) {
// Make one of {first_indent, body_indent} is 0.
int added_margin = first_indent;
if (body_indent < added_margin)
if (body_indent < added_margin) {
added_margin = body_indent;
}
margin_ += added_margin;
first_indent_ -= added_margin;
body_indent_ -= added_margin;

View File

@ -123,16 +123,18 @@ void ROW::recalc_bounding_box() { // recalculate BB
}
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
word = it.data();
if (it.at_first())
if (it.at_first()) {
word->set_flag(W_BOL, true);
else
} else {
// not start of line
word->set_flag(W_BOL, false);
if (it.at_last())
}
if (it.at_last()) {
word->set_flag(W_EOL, true);
else
} else {
// not end of line
word->set_flag(W_EOL, false);
}
// extend BB as reqd
bound_box += word->bounding_box();
}
@ -149,8 +151,9 @@ void ROW::move( // reposition row
) {
WERD_IT it(&words); // word iterator
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->move(vec);
}
bound_box.move(vec);
baseline.move(vec);
@ -228,8 +231,9 @@ ROW &ROW::operator=(const ROW &source) {
bodysize = source.bodysize;
ascrise = source.ascrise;
descdrop = source.descdrop;
if (!words.empty())
if (!words.empty()) {
words.clear();
}
baseline = source.baseline; // QSPLINES must do =
bound_box = source.bound_box;
has_drop_cap_ = source.has_drop_cap_;

View File

@ -186,11 +186,13 @@ int OtsuStats(const int *histogram, int *H_out, int *omega0_out) {
for (int t = 0; t < kHistogramSize - 1; ++t) {
omega_0 += histogram[t];
mu_t += t * static_cast<double>(histogram[t]);
if (omega_0 == 0)
if (omega_0 == 0) {
continue;
}
omega_1 = H - omega_0;
if (omega_1 == 0)
if (omega_1 == 0) {
break;
}
mu_0 = mu_t / omega_0;
mu_1 = (mu_T - mu_t) / omega_1;
double sig_sq_B = mu_1 - mu_0;
@ -201,10 +203,12 @@ int OtsuStats(const int *histogram, int *H_out, int *omega0_out) {
best_omega_0 = omega_0;
}
}
if (H_out != nullptr)
if (H_out != nullptr) {
*H_out = H;
if (omega0_out != nullptr)
}
if (omega0_out != nullptr) {
*omega0_out = best_omega_0;
}
return best_t;
}

View File

@ -194,17 +194,21 @@ WERD_RES &WERD_RES::operator=(const WERD_RES &source) {
} else {
word = source.word; // pt to same word
}
if (source.bln_boxes != nullptr)
if (source.bln_boxes != nullptr) {
bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
if (source.chopped_word != nullptr)
}
if (source.chopped_word != nullptr) {
chopped_word = new TWERD(*source.chopped_word);
if (source.rebuild_word != nullptr)
}
if (source.rebuild_word != nullptr) {
rebuild_word = new TWERD(*source.rebuild_word);
}
// TODO(rays) Do we ever need to copy the seam_array?
blob_row = source.blob_row;
denorm = source.denorm;
if (source.box_word != nullptr)
if (source.box_word != nullptr) {
box_word = new tesseract::BoxWord(*source.box_word);
}
best_state = source.best_state;
correct_text = source.correct_text;
blob_widths = source.blob_widths;
@ -415,8 +419,9 @@ void WERD_RES::InsertSeam(int blob_number, SEAM *seam) {
// Expand the ratings matrix.
ratings = ratings->ConsumeAndMakeBigger(blob_number);
// Fix all the segmentation states.
if (raw_choice != nullptr)
if (raw_choice != nullptr) {
raw_choice->UpdateStateForSplit(blob_number);
}
WERD_CHOICE_IT wc_it(&best_choices);
for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
WERD_CHOICE *choice = wc_it.data();
@ -433,8 +438,9 @@ bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan(float threshold) const {
WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST *>(&best_choices));
for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
WERD_CHOICE *choice = wc_it.data();
if (choice->adjust_factor() <= threshold)
if (choice->adjust_factor() <= threshold) {
return false;
}
}
return true;
}
@ -472,8 +478,9 @@ bool WERD_RES::StatesAllValid() {
void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) {
if (debug || (word_to_debug != nullptr && *word_to_debug != '\0' && best_choice != nullptr &&
best_choice->unichar_string() == std::string(word_to_debug))) {
if (raw_choice != nullptr)
if (raw_choice != nullptr) {
raw_choice->print("\nBest Raw Choice");
}
WERD_CHOICE_IT it(&best_choices);
int index = 0;
@ -490,10 +497,11 @@ void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) {
void WERD_RES::DebugTopChoice(const char *msg) const {
tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ", tess_accepted, tess_would_adapt,
done);
if (best_choice == nullptr)
if (best_choice == nullptr) {
tprintf("<Null choice>\n");
else
} else {
best_choice->print(msg);
}
}
// Removes from best_choices all choices which are not within a reasonable
@ -502,11 +510,13 @@ void WERD_RES::DebugTopChoice(const char *msg) const {
// re-ranker, in place of this heuristic that is based on the previous
// adjustment factor.
void WERD_RES::FilterWordChoices(int debug_level) {
if (best_choice == nullptr || best_choices.singleton())
if (best_choice == nullptr || best_choices.singleton()) {
return;
}
if (debug_level >= 2)
if (debug_level >= 2) {
best_choice->print("\nFiltering against best choice");
}
WERD_CHOICE_IT it(&best_choices);
int index = 0;
for (it.forward(); !it.at_first(); it.forward(), ++index) {
@ -539,11 +549,13 @@ void WERD_RES::FilterWordChoices(int debug_level) {
}
++chunk;
// If needed, advance choice_chunk to keep up with chunk.
while (choice_chunk < chunk && ++i < choice->length())
while (choice_chunk < chunk && ++i < choice->length()) {
choice_chunk += choice->state(i);
}
// If needed, advance best_chunk to keep up with chunk.
while (best_chunk < chunk && ++j < best_choice->length())
while (best_chunk < chunk && ++j < best_choice->length()) {
best_chunk += best_choice->state(j);
}
}
}
}
@ -578,10 +590,12 @@ void WERD_RES::ComputeAdaptionThresholds(float certainty_scale, float min_rating
*thresholds = max_rating;
}
if (*thresholds > max_rating)
if (*thresholds > max_rating) {
*thresholds = max_rating;
if (*thresholds < min_rating)
}
if (*thresholds < min_rating) {
*thresholds = min_rating;
}
}
}
@ -612,8 +626,9 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *
// prune them later when more information is available.
float max_certainty_delta =
StopperAmbigThreshold(best_choice->adjust_factor(), word_choice->adjust_factor());
if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
if (max_certainty_delta > -kStopperAmbiguityThresholdOffset) {
max_certainty_delta = -kStopperAmbiguityThresholdOffset;
}
if (word_choice->certainty() - best_choice->certainty() < max_certainty_delta) {
if (debug) {
std::string bad_string;
@ -642,8 +657,9 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *
// Time to insert.
it.add_before_stay_put(word_choice);
inserted = true;
if (num_choices == 0)
if (num_choices == 0) {
best_choice = word_choice; // This is the new best.
}
++num_choices;
}
if (choice->unichar_string() == new_str) {
@ -661,8 +677,9 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *
}
} else {
++num_choices;
if (num_choices > max_num_choices)
if (num_choices > max_num_choices) {
delete it.extract();
}
}
it.forward();
} while (!it.at_first());
@ -670,14 +687,16 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *
if (!inserted && num_choices < max_num_choices) {
it.add_to_end(word_choice);
inserted = true;
if (num_choices == 0)
if (num_choices == 0) {
best_choice = word_choice; // This is the new best.
}
}
if (debug) {
if (inserted)
if (inserted) {
tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary");
else
} else {
tprintf("Poor");
}
word_choice->print(" Word Choice");
}
if (!inserted) {
@ -701,8 +720,9 @@ void WERD_RES::PrintBestChoices() const {
std::string alternates_str;
WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST *>(&best_choices));
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
if (!it.at_first())
if (!it.at_first()) {
alternates_str += "\", \"";
}
alternates_str += it.data()->unichar_string();
}
tprintf("Alternates for \"%s\": {\"%s\"}\n", best_choice->unichar_string().c_str(),
@ -715,15 +735,17 @@ int WERD_RES::GetBlobsWidth(int start_blob, int last_blob) {
int result = 0;
for (int b = start_blob; b <= last_blob; ++b) {
result += blob_widths[b];
if (b < last_blob)
if (b < last_blob) {
result += blob_gaps[b];
}
}
return result;
}
// Returns the width of a gap between the specified blob and the next one.
int WERD_RES::GetBlobsGap(int blob_index) {
if (blob_index < 0 || blob_index >= blob_gaps.size())
if (blob_index < 0 || blob_index >= blob_gaps.size()) {
return 0;
}
return blob_gaps[blob_index];
}
@ -732,8 +754,9 @@ int WERD_RES::GetBlobsGap(int blob_index) {
// Borrowed pointer, so do not delete. May return nullptr if there is no
// BLOB_CHOICE matching the unichar_id at the given index.
BLOB_CHOICE *WERD_RES::GetBlobChoice(int index) const {
if (index < 0 || index >= best_choice->length())
if (index < 0 || index >= best_choice->length()) {
return nullptr;
}
BLOB_CHOICE_LIST *choices = GetBlobChoices(index);
return FindMatchingChoice(best_choice->unichar_id(index), choices);
}
@ -767,8 +790,9 @@ void WERD_RES::ConsumeWordResults(WERD_RES *word) {
word->blob_widths.clear();
blob_gaps = word->blob_gaps;
word->blob_gaps.clear();
if (ratings != nullptr)
if (ratings != nullptr) {
ratings->delete_matrix_pointers();
}
MovePointerData(&ratings, &word->ratings);
best_choice = word->best_choice;
MovePointerData(&raw_choice, &word->raw_choice);
@ -802,8 +826,9 @@ void WERD_RES::RebuildBestState() {
ASSERT_HOST(best_choice != nullptr);
delete rebuild_word;
rebuild_word = new TWERD;
if (seam_array.empty())
if (seam_array.empty()) {
start_seam_list(chopped_word, &seam_array);
}
best_state.clear();
int start = 0;
for (int i = 0; i < best_choice->length(); ++i) {
@ -856,8 +881,9 @@ void WERD_RES::SetScriptPositions() {
void WERD_RES::SetAllScriptPositions(tesseract::ScriptPos position) {
raw_choice->SetAllScriptPositions(position);
WERD_CHOICE_IT wc_it(&best_choices);
for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
wc_it.data()->SetAllScriptPositions(position);
}
}
// Classifies the word with some already-calculated BLOB_CHOICEs.
@ -958,8 +984,9 @@ bool WERD_RES::ConditionalBlobMerge(std::function<UNICHAR_ID(UNICHAR_ID, UNICHAR
// Merges 2 adjacent blobs in the result (index and index+1) and corrects
// all the data to account for the change.
void WERD_RES::MergeAdjacentBlobs(int index) {
if (reject_map.length() == best_choice->length())
if (reject_map.length() == best_choice->length()) {
reject_map.remove_pos(index);
}
best_choice->remove_unichar_id(index + 1);
rebuild_word->MergeBlobs(index, index + 2);
box_word->MergeBoxes(index, index + 2);
@ -989,15 +1016,17 @@ static int is_simple_quote(const char *signed_str, int length) {
UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) {
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (is_simple_quote(ch, strlen(ch)) && is_simple_quote(next_ch, strlen(next_ch)))
if (is_simple_quote(ch, strlen(ch)) && is_simple_quote(next_ch, strlen(next_ch))) {
return uch_set->unichar_to_id("\"");
}
return INVALID_UNICHAR_ID;
}
// Change pairs of quotes to double quotes.
void WERD_RES::fix_quotes() {
if (!uch_set->contains_unichar("\"") || !uch_set->get_enabled(uch_set->unichar_to_id("\"")))
if (!uch_set->contains_unichar("\"") || !uch_set->get_enabled(uch_set->unichar_to_id("\""))) {
return; // Don't create it if it is disallowed.
}
using namespace std::placeholders; // for _1, _2
ConditionalBlobMerge(std::bind(&WERD_RES::BothQuotes, this, _1, _2), nullptr);
@ -1009,8 +1038,9 @@ UNICHAR_ID WERD_RES::BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2) {
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
if (strlen(ch) == 1 && strlen(next_ch) == 1 && (*ch == '-' || *ch == '~') &&
(*next_ch == '-' || *next_ch == '~'))
(*next_ch == '-' || *next_ch == '~')) {
return uch_set->unichar_to_id("-");
}
return INVALID_UNICHAR_ID;
}
@ -1023,8 +1053,9 @@ bool WERD_RES::HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2) {
// Change pairs of hyphens to a single hyphen if the bounding boxes touch
// Typically a long dash which has been segmented.
void WERD_RES::fix_hyphens() {
if (!uch_set->contains_unichar("-") || !uch_set->get_enabled(uch_set->unichar_to_id("-")))
if (!uch_set->contains_unichar("-") || !uch_set->get_enabled(uch_set->unichar_to_id("-"))) {
return; // Don't create it if it is disallowed.
}
using namespace std::placeholders; // for _1, _2
ConditionalBlobMerge(std::bind(&WERD_RES::BothHyphens, this, _1, _2),
@ -1034,10 +1065,11 @@ void WERD_RES::fix_hyphens() {
// Callback helper for merge_tess_fails returns a space if both
// arguments are space, otherwise INVALID_UNICHAR_ID.
UNICHAR_ID WERD_RES::BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2) {
if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
if (id1 == id2 && id1 == uch_set->unichar_to_id(" ")) {
return id1;
else
} else {
return INVALID_UNICHAR_ID;
}
}
// Change pairs of tess failures to a single one
@ -1057,8 +1089,9 @@ bool WERD_RES::PiecesAllNatural(int start, int count) const {
for (int index = start; index < start + count - 1; ++index) {
if (index >= 0 && index < seam_array.size()) {
SEAM *seam = seam_array[index];
if (seam != nullptr && seam->HasAnySplits())
if (seam != nullptr && seam->HasAnySplits()) {
return false;
}
}
}
return true;
@ -1103,8 +1136,9 @@ void WERD_RES::ClearResults() {
blob_gaps.clear();
ClearRatings();
ClearWordChoices();
if (blamer_bundle != nullptr)
if (blamer_bundle != nullptr) {
blamer_bundle->ClearResults();
}
}
void WERD_RES::ClearWordChoices() {
best_choice = nullptr;
@ -1126,8 +1160,9 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
ASSERT_HOST(page_res == other.page_res);
if (other.block_res == nullptr) {
// other points to the end of the page.
if (block_res == nullptr)
if (block_res == nullptr) {
return 0;
}
return -1;
}
if (block_res == nullptr) {
@ -1196,8 +1231,9 @@ WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *ne
WERD_RES_IT wr_it(&row()->word_res_list);
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
WERD_RES *word = wr_it.data();
if (word == word_res)
if (word == word_res) {
break;
}
}
ASSERT_HOST(!wr_it.cycled_list());
wr_it.add_before_then_move(new_res);
@ -1227,8 +1263,9 @@ static void ComputeBlobEnds(const WERD_RES &word, const TBOX &clip_box,
// boundaries between them.
int blob_end = INT32_MAX;
if (!blob_it.at_first() || next_word_blobs != nullptr) {
if (blob_it.at_first())
if (blob_it.at_first()) {
blob_it.set_to_list(next_word_blobs);
}
blob_end = (blob_box.right() + blob_it.data()->bounding_box().left()) / 2;
}
blob_end = ClipToRange<int>(blob_end, clip_box.left(), clip_box.right());
@ -1246,11 +1283,13 @@ static TBOX ComputeWordBounds(const tesseract::PointerVector<WERD_RES> &words, i
TBOX current_box = words[w_index]->word->bounding_box();
TBOX next_box;
if (w_index + 1 < words.size() && words[w_index + 1] != nullptr &&
words[w_index + 1]->word != nullptr)
words[w_index + 1]->word != nullptr) {
next_box = words[w_index + 1]->word->bounding_box();
}
for (w_it.forward(); !w_it.at_first() && w_it.data()->part_of_combo; w_it.forward()) {
if (w_it.data() == nullptr || w_it.data()->word == nullptr)
if (w_it.data() == nullptr || w_it.data()->word == nullptr) {
continue;
}
TBOX w_box = w_it.data()->word->bounding_box();
int height_limit = std::min<int>(w_box.height(), w_box.width() / 2);
int width_limit = w_box.width() / kSignificantOverlapFraction;
@ -1274,8 +1313,9 @@ static TBOX ComputeWordBounds(const tesseract::PointerVector<WERD_RES> &words, i
clipped_box.set_top(current_box.top());
clipped_box.set_bottom(current_box.bottom());
}
if (clipped_box.width() <= 0)
if (clipped_box.width() <= 0) {
clipped_box = current_box;
}
return clipped_box;
}
@ -1324,8 +1364,9 @@ void PAGE_RES_IT::ReplaceCurrentWord(tesseract::PointerVector<WERD_RES> *words)
if (!input_word->combination) {
for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
WERD *word = w_it.data();
if (word == input_word->word)
if (word == input_word->word) {
break;
}
}
// w_it is now set to the input_word's word.
ASSERT_HOST(!w_it.cycled_list());
@ -1334,8 +1375,9 @@ void PAGE_RES_IT::ReplaceCurrentWord(tesseract::PointerVector<WERD_RES> *words)
WERD_RES_IT wr_it(&row()->word_res_list);
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
WERD_RES *word = wr_it.data();
if (word == input_word)
if (word == input_word) {
break;
}
}
ASSERT_HOST(!wr_it.cycled_list());
// Since we only have an estimate of the bounds between blobs, use the blob
@ -1397,8 +1439,9 @@ void PAGE_RES_IT::ReplaceCurrentWord(tesseract::PointerVector<WERD_RES> *words)
// Delete the current word, which has been replaced. We could just call
// DeleteCurrentWord, but that would iterate both lists again, and we know
// we are already in the right place.
if (!input_word->combination)
if (!input_word->combination) {
delete w_it.extract();
}
delete wr_it.extract();
ResetWordIterator();
}
@ -1494,8 +1537,9 @@ void PAGE_RES_IT::ResetWordIterator() {
for (word_res_it.mark_cycle_pt();
!word_res_it.cycled_list() && word_res_it.data() != next_word_res; word_res_it.forward()) {
if (!word_res_it.data()->part_of_combo) {
if (prev_row_res == row_res)
if (prev_row_res == row_res) {
prev_word_res = word_res;
}
word_res = word_res_it.data();
}
}
@ -1507,8 +1551,9 @@ void PAGE_RES_IT::ResetWordIterator() {
WERD_RES_IT wr_it(&row_res->word_res_list);
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
if (!wr_it.data()->part_of_combo) {
if (prev_row_res == row_res)
if (prev_row_res == row_res) {
prev_word_res = word_res;
}
word_res = wr_it.data();
}
}
@ -1561,8 +1606,9 @@ WERD_RES *PAGE_RES_IT::internal_forward(bool new_block, bool empty_ok) {
word_res_it.mark_cycle_pt();
}
// Skip any part_of_combo words.
while (!word_res_it.cycled_list() && word_res_it.data()->part_of_combo)
while (!word_res_it.cycled_list() && word_res_it.data()->part_of_combo) {
word_res_it.forward();
}
if (!word_res_it.cycled_list()) {
next_block_res = block_res_it.data();
next_row_res = row_res_it.data();
@ -1595,8 +1641,9 @@ foundword:
*************************************************************************/
WERD_RES *PAGE_RES_IT::restart_row() {
ROW_RES *row = this->row();
if (!row)
if (!row) {
return nullptr;
}
for (restart_page(); this->row() != row; forward()) {
// pass
}
@ -1644,8 +1691,9 @@ void PAGE_RES_IT::rej_stat_word() {
page_res->rej_count += rejects_in_word;
block_res->rej_count += rejects_in_word;
row_res->rej_count += rejects_in_word;
if (chars_in_word == rejects_in_word)
if (chars_in_word == rejects_in_word) {
row_res->whole_word_rej_count += rejects_in_word;
}
}
} // namespace tesseract

View File

@ -356,56 +356,68 @@ public:
// characters purely based on their shape on the page, and by default produce
// the corresponding unicode for a left-to-right context.
const char *BestUTF8(int blob_index, bool in_rtl_context) const {
if (blob_index < 0 || best_choice == nullptr || blob_index >= best_choice->length())
if (blob_index < 0 || best_choice == nullptr || blob_index >= best_choice->length()) {
return nullptr;
}
UNICHAR_ID id = best_choice->unichar_id(blob_index);
if (id < 0 || id >= uch_set->size())
if (id < 0 || id >= uch_set->size()) {
return nullptr;
}
UNICHAR_ID mirrored = uch_set->get_mirror(id);
if (in_rtl_context && mirrored > 0)
if (in_rtl_context && mirrored > 0) {
id = mirrored;
}
return uch_set->id_to_unichar_ext(id);
}
// Returns the UTF-8 string for the given blob index in the raw_choice word.
const char *RawUTF8(int blob_index) const {
if (blob_index < 0 || blob_index >= raw_choice->length())
if (blob_index < 0 || blob_index >= raw_choice->length()) {
return nullptr;
}
UNICHAR_ID id = raw_choice->unichar_id(blob_index);
if (id < 0 || id >= uch_set->size())
if (id < 0 || id >= uch_set->size()) {
return nullptr;
}
return uch_set->id_to_unichar(id);
}
UNICHARSET::Direction SymbolDirection(int blob_index) const {
if (best_choice == nullptr || blob_index >= best_choice->length() || blob_index < 0)
if (best_choice == nullptr || blob_index >= best_choice->length() || blob_index < 0) {
return UNICHARSET::U_OTHER_NEUTRAL;
}
return uch_set->get_direction(best_choice->unichar_id(blob_index));
}
bool AnyRtlCharsInWord() const {
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1)
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) {
return false;
}
for (int id = 0; id < best_choice->length(); id++) {
int unichar_id = best_choice->unichar_id(id);
if (unichar_id < 0 || unichar_id >= uch_set->size())
if (unichar_id < 0 || unichar_id >= uch_set->size()) {
continue; // Ignore illegal chars.
}
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC)
if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
return true;
}
}
return false;
}
bool AnyLtrCharsInWord() const {
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1)
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) {
return false;
}
for (int id = 0; id < best_choice->length(); id++) {
int unichar_id = best_choice->unichar_id(id);
if (unichar_id < 0 || unichar_id >= uch_set->size())
if (unichar_id < 0 || unichar_id >= uch_set->size()) {
continue; // Ignore illegal chars.
}
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
if (dir == UNICHARSET::U_LEFT_TO_RIGHT || dir == UNICHARSET::U_ARABIC_NUMBER)
if (dir == UNICHARSET::U_LEFT_TO_RIGHT || dir == UNICHARSET::U_ARABIC_NUMBER) {
return true;
}
}
return false;
}
@ -632,8 +644,9 @@ public:
auto *result = new WERD_RES(*src);
// That didn't copy the ratings, but we want a copy if there is one to
// begin with.
if (src->ratings != nullptr)
if (src->ratings != nullptr) {
result->ratings = src->ratings->DeepCopy();
}
return result;
}

View File

@ -24,15 +24,18 @@
namespace tesseract {
int ParamsTrainingFeatureByName(const char *name) {
if (name == nullptr)
if (name == nullptr) {
return -1;
}
int array_size =
sizeof(kParamsTrainingFeatureTypeName) / sizeof(kParamsTrainingFeatureTypeName[0]);
for (int i = 0; i < array_size; i++) {
if (kParamsTrainingFeatureTypeName[i] == nullptr)
if (kParamsTrainingFeatureTypeName[i] == nullptr) {
continue;
if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0)
}
if (strcmp(name, kParamsTrainingFeatureTypeName[i]) == 0) {
return i;
}
}
return -1;
}

View File

@ -139,8 +139,9 @@ public:
// Adds a new ParamsTrainingHypothesis to the current hypothesis list
// and returns the reference to the newly added entry.
ParamsTrainingHypothesis &AddHypothesis(const ParamsTrainingHypothesis &other) {
if (hyp_list_vec.empty())
if (hyp_list_vec.empty()) {
StartHypothesisList();
}
hyp_list_vec.back().push_back(ParamsTrainingHypothesis(other));
return hyp_list_vec.back().back();
}

View File

@ -100,8 +100,10 @@ bool PDBLK::contains( // test containment
// get rectangle
it.bounding_box(bleft, tright);
// inside rect
if (pt.x() >= bleft.x() && pt.x() <= tright.x() && pt.y() >= bleft.y() && pt.y() <= tright.y())
if (pt.x() >= bleft.x() && pt.x() <= tright.x() && pt.y() >= bleft.y() &&
pt.y() <= tright.y()) {
return true; // is inside
}
}
return false; // not inside
}
@ -117,13 +119,15 @@ void PDBLK::move( // reposition block
) {
ICOORDELT_IT it(&leftside);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
*(it.data()) += vec;
}
it.set_to_list(&rightside);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
*(it.data()) += vec;
}
box.move(vec);
}
@ -165,8 +169,9 @@ Pix *PDBLK::render_mask(const FCOORD &rerotation, TBOX *mask_box) {
// Just fill the whole block as there is only a bounding box.
pixRasterop(pix, 0, 0, rotated_box.width(), rotated_box.height(), PIX_SET, nullptr, 0, 0);
}
if (mask_box != nullptr)
if (mask_box != nullptr) {
*mask_box = rotated_box;
}
return pix;
}
@ -241,10 +246,12 @@ PDBLK &PDBLK::operator=( // assignment
const PDBLK &source // from this
) {
// this->ELIST_LINK::operator=(source);
if (!leftside.empty())
if (!leftside.empty()) {
leftside.clear();
if (!rightside.empty())
}
if (!rightside.empty()) {
rightside.clear();
}
leftside.deep_copy(&source.leftside, &ICOORDELT::deep_copy);
rightside.deep_copy(&source.rightside, &ICOORDELT::deep_copy);
box = source.box;
@ -281,8 +288,9 @@ void BLOCK_RECT_IT::set_to_block( // start (new) block
// set iterators
left_it.set_to_list(&blkptr->leftside);
right_it.set_to_list(&blkptr->rightside);
if (!blkptr->leftside.empty())
if (!blkptr->leftside.empty()) {
start_block(); // ready for iteration
}
}
/**********************************************************************
@ -298,9 +306,10 @@ void BLOCK_RECT_IT::start_block() { // start (new) block
right_it.mark_cycle_pt();
ymin = left_it.data()->y(); // bottom of first box
ymax = left_it.data_relative(1)->y();
if (right_it.data_relative(1)->y() < ymax)
if (right_it.data_relative(1)->y() < ymax) {
// smallest step
ymax = right_it.data_relative(1)->y();
}
}
/**********************************************************************
@ -311,10 +320,12 @@ void BLOCK_RECT_IT::start_block() { // start (new) block
void BLOCK_RECT_IT::forward() { // next rectangle
if (!left_it.empty()) { // non-empty list
if (left_it.data_relative(1)->y() == ymax)
if (left_it.data_relative(1)->y() == ymax) {
left_it.forward(); // move to meet top
if (right_it.data_relative(1)->y() == ymax)
}
if (right_it.data_relative(1)->y() == ymax) {
right_it.forward();
}
// last is special
if (left_it.at_last() || right_it.at_last()) {
left_it.move_to_first(); // restart
@ -326,9 +337,10 @@ void BLOCK_RECT_IT::forward() { // next rectangle
}
// next point
ymax = left_it.data_relative(1)->y();
if (right_it.data_relative(1)->y() < ymax)
if (right_it.data_relative(1)->y() < ymax) {
// least step forward
ymax = right_it.data_relative(1)->y();
}
}
}

View File

@ -54,8 +54,9 @@ void ICOORD::set_with_shrink(int x, int y) {
// Fit the vector into an ICOORD, which is 16 bit.
int factor = 1;
int max_extent = std::max(abs(x), abs(y));
if (max_extent > INT16_MAX)
if (max_extent > INT16_MAX) {
factor = max_extent / INT16_MAX + 1;
}
xcoord = x / factor;
ycoord = y / factor;
}
@ -63,10 +64,11 @@ void ICOORD::set_with_shrink(int x, int y) {
// The fortran/basic sgn function returns -1, 0, 1 if x < 0, x == 0, x > 0
// respectively.
static int sign(int x) {
if (x < 0)
if (x < 0) {
return -1;
else
} else {
return x > 0 ? 1 : 0;
}
}
// Writes to the given file. Returns false in case of error.
@ -76,10 +78,12 @@ bool ICOORD::Serialize(FILE *fp) const {
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool ICOORD::DeSerialize(bool swap, FILE *fp) {
if (!tesseract::DeSerialize(fp, &xcoord))
if (!tesseract::DeSerialize(fp, &xcoord)) {
return false;
if (!tesseract::DeSerialize(fp, &ycoord))
}
if (!tesseract::DeSerialize(fp, &ycoord)) {
return false;
}
if (swap) {
ReverseN(&xcoord, sizeof(xcoord));
ReverseN(&ycoord, sizeof(ycoord));

View File

@ -66,13 +66,15 @@ TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline) {
EDGEPT *edgepts = stack_edgepts;
// Use heap memory if the stack buffer is not big enough.
if (c_outline->pathlength() > FASTEDGELENGTH)
if (c_outline->pathlength() > FASTEDGELENGTH) {
edgepts = new EDGEPT[c_outline->pathlength()];
}
loop_box = c_outline->bounding_box();
area = loop_box.height();
if (!poly_wide_objects_better && loop_box.width() > area)
if (!poly_wide_objects_better && loop_box.width() > area) {
area = loop_box.width();
}
area *= area;
edgesteps_to_edgepts(c_outline, edgepts);
fix2(edgepts, area);
@ -100,8 +102,9 @@ TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline) {
} while (edgept != startpt);
prev_result->next = result;
result->prev = prev_result;
if (edgepts != stack_edgepts)
if (edgepts != stack_edgepts) {
delete[] edgepts;
}
return TESSLINE::BuildFromOutlineList(result);
}
@ -141,8 +144,9 @@ EDGEPT *edgesteps_to_edgepts( // convert outline
dir += 128 - 16;
vec += c_outline->step(stepindex + 1);
stepinc = 2;
} else
} else {
stepinc = 1;
}
if (count == 0) {
prevdir = dir;
prev_vec = vec;
@ -171,8 +175,9 @@ EDGEPT *edgesteps_to_edgepts( // convert outline
prev_vec = vec;
count = 1;
prev_stepindex = stepindex;
} else
} else {
count++;
}
stepindex += stepinc;
} while (stepindex < length);
edgepts[epindex].pos.x = pos.x();
@ -222,8 +227,9 @@ void fix2( // polygonal approx
edgept = start; /*start of loop */
while (((edgept->flags[DIR] - edgept->prev->flags[DIR] + 1) & 7) < 3 &&
(dir1 = (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2 && dir1 != 6)
(dir1 = (edgept->prev->flags[DIR] - edgept->next->flags[DIR]) & 7) != 2 && dir1 != 6) {
edgept = edgept->next; /*find suitable start */
}
loopstart = edgept; /*remember start */
stopped = 0; /*not finished yet */
@ -240,15 +246,17 @@ void fix2( // polygonal approx
if (((dir1 - dir2 + 1) & 7) < 3) {
while (edgept->prev->flags[DIR] == edgept->next->flags[DIR]) {
edgept = edgept->next; /*look at next */
if (edgept->flags[DIR] == dir1)
if (edgept->flags[DIR] == dir1) {
/*sum lengths */
sum1 += edgept->flags[RUNLENGTH];
else
} else {
sum2 += edgept->flags[RUNLENGTH];
}
}
if (edgept == loopstart)
if (edgept == loopstart) {
stopped = 1; /*finished */
}
if (sum2 + sum1 > 2 && linestart->prev->flags[DIR] == dir2 &&
(linestart->prev->flags[RUNLENGTH] > linestart->flags[RUNLENGTH] || sum2 > sum1)) {
/*start is back one */
@ -260,8 +268,9 @@ void fix2( // polygonal approx
(edgept->flags[DIR] == dir1 && sum1 >= sum2) ||
((edgept->prev->flags[RUNLENGTH] < edgept->flags[RUNLENGTH] ||
(edgept->flags[DIR] == dir2 && sum2 >= sum1)) &&
linestart->next != edgept))
linestart->next != edgept)) {
edgept = edgept->next;
}
}
/*sharp bend */
edgept->flags[FLAGS] |= FIXED;
@ -303,42 +312,49 @@ void fix2( // polygonal approx
} while (edgept != start); /*until finished */
stopped = 0;
if (area < 450)
if (area < 450) {
area = 450;
}
gapmin = area * fixed_dist * fixed_dist / 44000;
edgept = start;
fixed_count = 0;
do {
if (edgept->flags[FLAGS] & FIXED)
if (edgept->flags[FLAGS] & FIXED) {
fixed_count++;
}
edgept = edgept->next;
} while (edgept != start);
while ((edgept->flags[FLAGS] & FIXED) == 0)
while ((edgept->flags[FLAGS] & FIXED) == 0) {
edgept = edgept->next;
}
edgefix0 = edgept;
edgept = edgept->next;
while ((edgept->flags[FLAGS] & FIXED) == 0)
while ((edgept->flags[FLAGS] & FIXED) == 0) {
edgept = edgept->next;
}
edgefix1 = edgept;
edgept = edgept->next;
while ((edgept->flags[FLAGS] & FIXED) == 0)
while ((edgept->flags[FLAGS] & FIXED) == 0) {
edgept = edgept->next;
}
edgefix2 = edgept;
edgept = edgept->next;
while ((edgept->flags[FLAGS] & FIXED) == 0)
while ((edgept->flags[FLAGS] & FIXED) == 0) {
edgept = edgept->next;
}
edgefix3 = edgept;
startfix = edgefix2;
do {
if (fixed_count <= 3)
if (fixed_count <= 3) {
break; // already too few
}
d12vec.diff(edgefix1->pos, edgefix2->pos);
d12 = d12vec.length();
// TODO(rays) investigate this change:
@ -366,8 +382,9 @@ void fix2( // polygonal approx
edgefix2 = edgefix3;
edgept = edgept->next;
while ((edgept->flags[FLAGS] & FIXED) == 0) {
if (edgept == startfix)
if (edgept == startfix) {
stopped = 1;
}
edgept = edgept->next;
}
edgefix3 = edgept;
@ -391,8 +408,9 @@ EDGEPT *poly2( // second poly
EDGEPT *linestart; /*start of line */
int edgesum; /*correction count */
if (area < 1200)
if (area < 1200) {
area = 1200; /*minimum value */
}
loopstart = nullptr; /*not found it yet */
edgept = startpt; /*start of loop */
@ -424,28 +442,32 @@ EDGEPT *poly2( // second poly
edgesum += edgept->flags[RUNLENGTH];
edgept = edgept->next; /*move on */
} while ((edgept->flags[FLAGS] & FIXED) == 0 && edgept != loopstart && edgesum < 126);
if (poly_debug)
if (poly_debug) {
tprintf("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n", linestart->pos.x,
linestart->pos.y, linestart->flags[DIR], linestart->vec.x, linestart->vec.y,
edgesum, edgept->pos.x, edgept->pos.y);
}
/*reapproximate */
cutline(linestart, edgept, area);
while ((edgept->next->flags[FLAGS] & FIXED) && edgept != loopstart)
while ((edgept->next->flags[FLAGS] & FIXED) && edgept != loopstart) {
edgept = edgept->next; /*look for next non-fixed */
}
}
/*do all the loop */
while (edgept != loopstart);
edgesum = 0;
do {
if (edgept->flags[FLAGS] & FIXED)
if (edgept->flags[FLAGS] & FIXED) {
edgesum++;
}
edgept = edgept->next;
}
// count fixed pts
while (edgept != loopstart);
if (edgesum < 3)
if (edgesum < 3) {
area /= 2; // must have 3 pts
}
} while (edgesum < 3);
do {
linestart = edgept;
@ -457,8 +479,9 @@ EDGEPT *poly2( // second poly
linestart->vec.x = edgept->pos.x - linestart->pos.x;
linestart->vec.y = edgept->pos.y - linestart->pos.y;
} while (edgept != loopstart);
} else
} else {
edgept = startpt; /*start of loop */
}
loopstart = edgept; /*new start */
return loopstart; /*correct exit */
@ -484,8 +507,9 @@ void cutline( // recursive refine
int squaresum; /*sum of perps */
edge = first; /*start of line */
if (edge->next == last)
if (edge->next == last) {
return; /*simple line */
}
/*vector sum */
vecsum.x = last->pos.x - edge->pos.x;
@ -497,10 +521,11 @@ void cutline( // recursive refine
}
/*absolute value */
vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x;
if (vecsum.y > vlen)
if (vecsum.y > vlen) {
vlen = vecsum.y; /*maximum */
else if (-vecsum.y > vlen)
} else if (-vecsum.y > vlen) {
vlen = -vecsum.y; /*absolute value */
}
vec.x = edge->vec.x; /*accumulated vector */
vec.y = edge->vec.y;
@ -515,8 +540,9 @@ void cutline( // recursive refine
}
squaresum += perp; /*sum squares */
ptcount++; /*count points */
if (poly_debug)
if (poly_debug) {
tprintf("Cutline:Final perp=%d\n", perp);
}
if (perp > maxperp) {
maxperp = perp;
maxpoint = edge; /*find greatest deviation */
@ -536,16 +562,18 @@ void cutline( // recursive refine
maxperp /= perp;
maxperp <<= 8; /*avoid overflow */
}
if (squaresum < 256 * INT16_MAX)
if (squaresum < 256 * INT16_MAX) {
/*mean squared perp */
perp = (squaresum << 8) / (perp * ptcount);
else
} else {
/*avoid overflow */
perp = (squaresum / perp << 8) / ptcount;
}
if (poly_debug)
if (poly_debug) {
tprintf("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", area, maxperp / 256.0,
maxperp * 200.0 / area, perp / 256.0, perp * 300.0 / area);
}
if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) {
maxpoint->flags[FLAGS] |= FIXED;
/*partitions */

View File

@ -76,15 +76,19 @@ void POLY_BLOCK::compute_bb() { // constructor
topright = botleft;
do {
pos = *pts.data();
if (pos.x() < botleft.x())
if (pos.x() < botleft.x()) {
// get bounding box
botleft = ICOORD(pos.x(), botleft.y());
if (pos.y() < botleft.y())
}
if (pos.y() < botleft.y()) {
botleft = ICOORD(botleft.x(), pos.y());
if (pos.x() > topright.x())
}
if (pos.x() > topright.x()) {
topright = ICOORD(pos.x(), topright.y());
if (pos.y() > topright.y())
}
if (pos.y() > topright.y()) {
topright = ICOORD(topright.x(), pos.y());
}
pts.forward();
} while (!pts.at_first());
ibl = ICOORD(botleft.x(), botleft.y());
@ -115,18 +119,21 @@ int16_t POLY_BLOCK::winding_number(const ICOORD &point) {
// crossing the line
if (vec.y() <= 0 && vec.y() + vvec.y() > 0) {
cross = vec * vvec; // cross product
if (cross > 0)
if (cross > 0) {
count++; // crossing right half
else if (cross == 0)
} else if (cross == 0) {
return INTERSECTING; // going through point
}
} else if (vec.y() > 0 && vec.y() + vvec.y() <= 0) {
cross = vec * vvec;
if (cross < 0)
if (cross < 0) {
count--; // crossing back
else if (cross == 0)
} else if (cross == 0) {
return INTERSECTING; // illegal
} else if (vec.y() == 0 && vec.x() == 0)
}
} else if (vec.y() == 0 && vec.x() == 0) {
return INTERSECTING;
}
it.forward();
} while (!it.at_first());
return count; // winding number
@ -138,8 +145,9 @@ bool POLY_BLOCK::contains(POLY_BLOCK *other) {
ICOORDELT_IT it = &vertices; // iterator
ICOORD vertex;
if (!box.overlap(*(other->bounding_box())))
if (!box.overlap(*(other->bounding_box()))) {
return false; // can't be contained
}
/* check that no vertex of this is inside other */
@ -147,9 +155,11 @@ bool POLY_BLOCK::contains(POLY_BLOCK *other) {
vertex = *it.data();
// get winding number
count = other->winding_number(vertex);
if (count != INTERSECTING)
if (count != 0)
if (count != INTERSECTING) {
if (count != 0) {
return false;
}
}
it.forward();
} while (!it.at_first());
@ -161,9 +171,11 @@ bool POLY_BLOCK::contains(POLY_BLOCK *other) {
vertex = *it.data();
// try other way round
count = winding_number(vertex);
if (count != INTERSECTING)
if (count == 0)
if (count != INTERSECTING) {
if (count == 0) {
return false;
}
}
it.forward();
} while (!it.at_first());
return true;
@ -291,8 +303,9 @@ bool POLY_BLOCK::overlap(POLY_BLOCK *other) {
ICOORDELT_IT it = &vertices; // iterator
ICOORD vertex;
if (!box.overlap(*(other->bounding_box())))
if (!box.overlap(*(other->bounding_box()))) {
return false; // can't be any overlap.
}
/* see if a vertex of this is inside other */
@ -300,9 +313,11 @@ bool POLY_BLOCK::overlap(POLY_BLOCK *other) {
vertex = *it.data();
// get winding number
count = other->winding_number(vertex);
if (count != INTERSECTING)
if (count != 0)
if (count != INTERSECTING) {
if (count != 0) {
return true;
}
}
it.forward();
} while (!it.at_first());
@ -314,9 +329,11 @@ bool POLY_BLOCK::overlap(POLY_BLOCK *other) {
vertex = *it.data();
// try other way round
count = winding_number(vertex);
if (count != INTERSECTING)
if (count != 0)
if (count != INTERSECTING) {
if (count != 0) {
return true;
}
}
it.forward();
} while (!it.at_first());
return false;
@ -346,8 +363,9 @@ ICOORDELT_LIST *PB_LINE_IT::get_line(int16_t y) {
if (!r.empty()) {
r.sort(lessthan);
for (r.mark_cycle_pt(); !r.cycled_list(); r.forward())
for (r.mark_cycle_pt(); !r.cycled_list(); r.forward()) {
x = r.data();
}
for (r.mark_cycle_pt(); !r.cycled_list(); r.forward()) {
r.data()->set_y(r.data_relative(1)->x() - r.data()->x());
r.forward();
@ -362,12 +380,13 @@ int lessthan(const void *first, const void *second) {
const ICOORDELT *p1 = *reinterpret_cast<const ICOORDELT *const *>(first);
const ICOORDELT *p2 = *reinterpret_cast<const ICOORDELT *const *>(second);
if (p1->x() < p2->x())
if (p1->x() < p2->x()) {
return (-1);
else if (p1->x() > p2->x())
} else if (p1->x() > p2->x()) {
return (1);
else
} else {
return (0);
}
}
#ifndef GRAPHICS_DISABLED

View File

@ -104,19 +104,21 @@ QSPLINE::QSPLINE( // constructor
/*first blob */
pointindex = ptcounts[segment];
if (pointindex > 0 && xpts[pointindex] != xpts[pointindex - 1] &&
xpts[pointindex] != xstarts[segment])
xpts[pointindex] != xstarts[segment]) {
qlsq.add(xstarts[segment],
ypts[pointindex - 1] + (ypts[pointindex] - ypts[pointindex - 1]) *
(xstarts[segment] - xpts[pointindex - 1]) /
(xpts[pointindex] - xpts[pointindex - 1]));
}
for (; pointindex < ptcounts[segment + 1]; pointindex++) {
qlsq.add(xpts[pointindex], ypts[pointindex]);
}
if (pointindex > 0 && pointindex < pointcount && xpts[pointindex] != xstarts[segment + 1])
if (pointindex > 0 && pointindex < pointcount && xpts[pointindex] != xstarts[segment + 1]) {
qlsq.add(xstarts[segment + 1],
ypts[pointindex - 1] + (ypts[pointindex] - ypts[pointindex - 1]) *
(xstarts[segment + 1] - xpts[pointindex - 1]) /
(xpts[pointindex] - xpts[pointindex - 1]));
}
qlsq.fit(degree);
quadratics[segment].a = qlsq.get_a();
quadratics[segment].b = qlsq.get_b();
@ -224,10 +226,11 @@ int32_t QSPLINE::spline_index( // evaluate
top = segments;
while (top - bottom > 1) {
index = (top + bottom) / 2; // centre of range
if (x >= xcoords[index])
if (x >= xcoords[index]) {
bottom = index; // new min
else
} else {
top = index; // new max
}
}
return bottom;
}
@ -290,10 +293,12 @@ void QSPLINE::extrapolate( // linear extrapolation
int increment; // in size
increment = xmin < xcoords[0] ? 1 : 0;
if (xmax > xcoords[segments])
if (xmax > xcoords[segments]) {
increment++;
if (increment == 0)
}
if (increment == 0) {
return;
}
xstarts = new int32_t[segments + 1 + increment];
quads = new QUAD_COEFFS[segments + increment];
if (xmin < xcoords[0]) {
@ -302,8 +307,9 @@ void QSPLINE::extrapolate( // linear extrapolation
quads[0].b = gradient;
quads[0].c = y(xcoords[0]) - quads[0].b * xcoords[0];
dest_segment = 1;
} else
} else {
dest_segment = 0;
}
for (segment = 0; segment < segments; segment++) {
xstarts[dest_segment] = xcoords[segment];
quads[dest_segment] = quadratics[segment];
@ -345,10 +351,11 @@ void QSPLINE::plot( // draw it
increment = static_cast<double>(xcoords[segment + 1] - xcoords[segment]) / QSPLINE_PRECISION;
x = xcoords[segment];
for (step = 0; step <= QSPLINE_PRECISION; step++) {
if (segment == 0 && step == 0)
if (segment == 0 && step == 0) {
window->SetCursor(x, quadratics[segment].y(x));
else
} else {
window->DrawTo(x, quadratics[segment].y(x));
}
x += increment;
}
}

View File

@ -287,8 +287,9 @@ BLOB_CHOICE_LIST *WERD_CHOICE::blob_choices(int index, MATRIX *ratings) const {
// MATRIX for the given index into the word.
MATRIX_COORD WERD_CHOICE::MatrixCoord(int index) const {
int col = 0;
for (int i = 0; i < index; ++i)
for (int i = 0; i < index; ++i) {
col += state_[i];
}
int row = col + state_[index] - 1;
return MATRIX_COORD(col, row);
}
@ -327,10 +328,11 @@ void WERD_CHOICE::remove_unichar_ids(int start, int num) {
ASSERT_HOST(start >= 0 && start + num <= length_);
// Accumulate the states to account for the merged blobs.
for (int i = 0; i < num; ++i) {
if (start > 0)
if (start > 0) {
state_[start - 1] += state_[start + i];
else if (start + num < length_)
} else if (start + num < length_) {
state_[start + num] += state_[start + i];
}
}
for (int i = start; i + num < length_; ++i) {
unichar_ids_[i] = unichar_ids_[i + num];
@ -428,8 +430,9 @@ bool WERD_CHOICE::has_rtl_unichar_id() const {
*/
void WERD_CHOICE::string_and_lengths(std::string *word_str, std::string *word_lengths_str) const {
*word_str = "";
if (word_lengths_str != nullptr)
if (word_lengths_str != nullptr) {
*word_lengths_str = "";
}
for (int i = 0; i < length_; ++i) {
const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]);
*word_str += ch;
@ -473,13 +476,16 @@ WERD_CHOICE &WERD_CHOICE::operator+=(const WERD_CHOICE &second) {
script_pos_[length_ + i] = second.BlobPosition(i);
}
length_ += second.length();
if (second.adjust_factor_ > adjust_factor_)
if (second.adjust_factor_ > adjust_factor_) {
adjust_factor_ = second.adjust_factor_;
}
rating_ += second.rating(); // add ratings
if (second.certainty() < certainty_) // take min
if (second.certainty() < certainty_) { // take min
certainty_ = second.certainty();
if (second.dangerous_ambig_found_)
}
if (second.dangerous_ambig_found_) {
dangerous_ambig_found_ = true;
}
if (permuter_ == NO_PERM) {
permuter_ = second.permuter();
} else if (second.permuter() != NO_PERM && second.permuter() != permuter_) {
@ -525,8 +531,9 @@ WERD_CHOICE &WERD_CHOICE::operator=(const WERD_CHOICE &source) {
// NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD *word, int debug) {
// Initialize to normal.
for (int i = 0; i < length_; ++i)
for (int i = 0; i < length_; ++i) {
script_pos_[i] = tesseract::SP_NORMAL;
}
if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) {
return;
}
@ -586,8 +593,9 @@ void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD *word, int debug) {
// Sets all the script_pos_ positions to the given position.
void WERD_CHOICE::SetAllScriptPositions(tesseract::ScriptPos position) {
for (int i = 0; i < length_; ++i)
for (int i = 0; i < length_; ++i) {
script_pos_[i] = position;
}
}
/* static */
@ -627,8 +635,9 @@ int WERD_CHOICE::GetTopScriptID() const {
int max_script = unicharset_->get_script_table_size();
int *sid = new int[max_script];
int x;
for (x = 0; x < max_script; x++)
for (x = 0; x < max_script; x++) {
sid[x] = 0;
}
for (x = 0; x < length_; ++x) {
int script_id = unicharset_->get_script(unichar_id(x));
sid[script_id]++;
@ -647,11 +656,14 @@ int WERD_CHOICE::GetTopScriptID() const {
// Note that high script ID overrides lower one on a tie, thus biasing
// towards non-Common script (if sorted that way in unicharset file).
int max_sid = 0;
for (x = 1; x < max_script; x++)
if (sid[x] >= sid[max_sid])
for (x = 1; x < max_script; x++) {
if (sid[x] >= sid[max_sid]) {
max_sid = x;
if (sid[max_sid] < length_ / 2)
}
}
if (sid[max_sid] < length_ / 2) {
max_sid = unicharset_->null_sid();
}
delete[] sid;
return max_sid;
}
@ -711,8 +723,9 @@ void WERD_CHOICE::print(const char *msg) const {
// Prints the segmentation state with an introductory message.
void WERD_CHOICE::print_state(const char *msg) const {
tprintf("%s", msg);
for (int i = 0; i < length_; ++i)
for (int i = 0; i < length_; ++i) {
tprintf(" %d", state_[i]);
}
tprintf("\n");
}
@ -727,16 +740,18 @@ void WERD_CHOICE::DisplaySegmentation(TWERD *word) {
// Check the state against the static prev_drawn_state.
static std::vector<int> prev_drawn_state;
bool already_done = prev_drawn_state.size() == length_;
if (!already_done)
if (!already_done) {
prev_drawn_state.resize(length_);
}
for (int i = 0; i < length_; ++i) {
if (prev_drawn_state[i] != state_[i]) {
already_done = false;
}
prev_drawn_state[i] = state_[i];
}
if (already_done || word->blobs.empty())
if (already_done || word->blobs.empty()) {
return;
}
// Create the window if needed.
if (segm_window == nullptr) {
@ -764,14 +779,16 @@ void WERD_CHOICE::DisplaySegmentation(TWERD *word) {
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2) {
const UNICHARSET *uchset = word1.unicharset();
if (word2.unicharset() != uchset)
if (word2.unicharset() != uchset) {
return false;
}
int w1start, w1end;
word1.punct_stripped(&w1start, &w1end);
int w2start, w2end;
word2.punct_stripped(&w2start, &w2end);
if (w1end - w1start != w2end - w2start)
if (w1end - w1start != w2end - w2start) {
return false;
}
for (int i = 0; i < w1end - w1start; i++) {
if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
uchset->to_lower(word2.unichar_id(w2start + i))) {
@ -804,8 +821,9 @@ void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings,
c_it.set_to_list(ratings);
for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
c_it.data()->print(&current_unicharset);
if (!c_it.at_last())
if (!c_it.at_last()) {
tprintf("\n");
}
}
tprintf("\n");
fflush(stdout);

View File

@ -297,8 +297,9 @@ public:
return state_[index];
}
ScriptPos BlobPosition(int index) const {
if (index < 0 || index >= length_)
if (index < 0 || index >= length_) {
return SP_NORMAL;
}
return script_pos_[index];
}
inline float rating() const {
@ -479,16 +480,18 @@ public:
// Returns true if any unichar_id in the word is a non-space-delimited char.
bool ContainsAnyNonSpaceDelimited() const {
for (int i = 0; i < length_; ++i) {
if (!unicharset_->IsSpaceDelimited(unichar_ids_[i]))
if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) {
return true;
}
}
return false;
}
// Returns true if the word is all spaces.
bool IsAllSpaces() const {
for (int i = 0; i < length_; ++i) {
if (unichar_ids_[i] != UNICHAR_SPACE)
if (unichar_ids_[i] != UNICHAR_SPACE) {
return false;
}
}
return true;
}

View File

@ -88,25 +88,29 @@ TBOX TBOX::intersection( // shared area box
int16_t right;
int16_t top;
if (overlap(box)) {
if (box.bot_left.x() > bot_left.x())
if (box.bot_left.x() > bot_left.x()) {
left = box.bot_left.x();
else
} else {
left = bot_left.x();
}
if (box.top_right.x() < top_right.x())
if (box.top_right.x() < top_right.x()) {
right = box.top_right.x();
else
} else {
right = top_right.x();
}
if (box.bot_left.y() > bot_left.y())
if (box.bot_left.y() > bot_left.y()) {
bottom = box.bot_left.y();
else
} else {
bottom = bot_left.y();
}
if (box.top_right.y() < top_right.y())
if (box.top_right.y() < top_right.y()) {
top = box.top_right.y();
else
} else {
top = top_right.y();
}
} else {
left = INT16_MAX;
bottom = INT16_MAX;
@ -126,25 +130,29 @@ TBOX TBOX::bounding_union( // box enclosing both
ICOORD bl; // bottom left
ICOORD tr; // top right
if (box.bot_left.x() < bot_left.x())
if (box.bot_left.x() < bot_left.x()) {
bl.set_x(box.bot_left.x());
else
} else {
bl.set_x(bot_left.x());
}
if (box.top_right.x() > top_right.x())
if (box.top_right.x() > top_right.x()) {
tr.set_x(box.top_right.x());
else
} else {
tr.set_x(top_right.x());
}
if (box.bot_left.y() < bot_left.y())
if (box.bot_left.y() < bot_left.y()) {
bl.set_y(box.bot_left.y());
else
} else {
bl.set_y(bot_left.y());
}
if (box.top_right.y() > top_right.y())
if (box.top_right.y() > top_right.y()) {
tr.set_y(box.top_right.y());
else
} else {
tr.set_y(top_right.y());
}
return TBOX(bl, tr);
}
@ -177,19 +185,23 @@ void TBOX::print_to_str(std::string &str) const {
// Writes to the given file. Returns false in case of error.
bool TBOX::Serialize(FILE *fp) const {
if (!bot_left.Serialize(fp))
if (!bot_left.Serialize(fp)) {
return false;
if (!top_right.Serialize(fp))
}
if (!top_right.Serialize(fp)) {
return false;
}
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool TBOX::DeSerialize(bool swap, FILE *fp) {
if (!bot_left.DeSerialize(swap, fp))
if (!bot_left.DeSerialize(swap, fp)) {
return false;
if (!top_right.DeSerialize(swap, fp))
}
if (!top_right.DeSerialize(swap, fp)) {
return false;
}
return true;
}
@ -202,17 +214,21 @@ bool TBOX::DeSerialize(bool swap, FILE *fp) {
TBOX &operator+=( // bounding bounding bx
TBOX &op1, // operands
const TBOX &op2) {
if (op2.bot_left.x() < op1.bot_left.x())
if (op2.bot_left.x() < op1.bot_left.x()) {
op1.bot_left.set_x(op2.bot_left.x());
}
if (op2.top_right.x() > op1.top_right.x())
if (op2.top_right.x() > op1.top_right.x()) {
op1.top_right.set_x(op2.top_right.x());
}
if (op2.bot_left.y() < op1.bot_left.y())
if (op2.bot_left.y() < op1.bot_left.y()) {
op1.bot_left.set_y(op2.bot_left.y());
}
if (op2.top_right.y() > op1.top_right.y())
if (op2.top_right.y() > op1.top_right.y()) {
op1.top_right.set_y(op2.top_right.y());
}
return op1;
}
@ -225,17 +241,21 @@ TBOX &operator+=( // bounding bounding bx
TBOX &operator&=(TBOX &op1, const TBOX &op2) {
if (op1.overlap(op2)) {
if (op2.bot_left.x() > op1.bot_left.x())
if (op2.bot_left.x() > op1.bot_left.x()) {
op1.bot_left.set_x(op2.bot_left.x());
}
if (op2.top_right.x() < op1.top_right.x())
if (op2.top_right.x() < op1.top_right.x()) {
op1.top_right.set_x(op2.top_right.x());
}
if (op2.bot_left.y() > op1.bot_left.y())
if (op2.bot_left.y() > op1.bot_left.y()) {
op1.bot_left.set_y(op2.bot_left.y());
}
if (op2.top_right.y() < op1.top_right.y())
if (op2.top_right.y() < op1.top_right.y()) {
op1.top_right.set_y(op2.top_right.y());
}
} else {
op1.bot_left.set_x(INT16_MAX);
op1.bot_left.set_y(INT16_MAX);

View File

@ -115,24 +115,27 @@ public:
}
int16_t height() const { // how high is it?
if (!null_box())
if (!null_box()) {
return top_right.y() - bot_left.y();
else
} else {
return 0;
}
}
int16_t width() const { // how high is it?
if (!null_box())
if (!null_box()) {
return top_right.x() - bot_left.x();
else
} else {
return 0;
}
}
int32_t area() const { // what is the area?
if (!null_box())
if (!null_box()) {
return width() * height();
else
} else {
return 0;
}
}
// Pads the box on either side by the supplied x,y pad amounts.
@ -372,13 +375,15 @@ inline bool TBOX::major_overlap( // Do boxes overlap more that half.
int overlap = std::min(box.top_right.x(), top_right.x());
overlap -= std::max(box.bot_left.x(), bot_left.x());
overlap += overlap;
if (overlap < std::min(box.width(), width()))
if (overlap < std::min(box.width(), width())) {
return false;
}
overlap = std::min(box.top_right.y(), top_right.y());
overlap -= std::max(box.bot_left.y(), bot_left.y());
overlap += overlap;
if (overlap < std::min(box.height(), height()))
if (overlap < std::min(box.height(), height())) {
return false;
}
return true;
}
@ -460,10 +465,11 @@ inline double TBOX::x_overlap_fraction(const TBOX &other) const {
int width = right() - left();
if (width == 0) {
int x = left();
if (other.left() <= x && x <= other.right())
if (other.left() <= x && x <= other.right()) {
return 1.0;
else
} else {
return 0.0;
}
} else {
return std::max(0.0, static_cast<double>(high - low) / width);
}
@ -482,10 +488,11 @@ inline double TBOX::y_overlap_fraction(const TBOX &other) const {
int height = top() - bottom();
if (height == 0) {
int y = bottom();
if (other.bottom() <= y && y <= other.top())
if (other.bottom() <= y && y <= other.top()) {
return 1.0;
else
} else {
return 0.0;
}
} else {
return std::max(0.0, static_cast<double>(high - low) / height);
}

View File

@ -57,11 +57,12 @@ bool REJ::rej_before_quality_accept() {
}
bool REJ::rejected() { // Is char rejected?
if (flag(R_MINIMAL_REJ_ACCEPT))
if (flag(R_MINIMAL_REJ_ACCEPT)) {
return false;
else
} else {
return (perm_rejected() || rej_between_quality_and_minimal_rej_accept() ||
(!flag(R_QUALITY_ACCEPT) && rej_before_quality_accept()));
}
}
bool REJ::accept_if_good_quality() { // potential rej?
@ -230,24 +231,27 @@ int16_t REJMAP::accept_count() { // How many accepted?
int16_t count = 0;
for (i = 0; i < len; i++) {
if (ptr[i].accepted())
if (ptr[i].accepted()) {
count++;
}
}
return count;
}
bool REJMAP::recoverable_rejects() { // Any non perm rejs?
for (int i = 0; i < len; i++) {
if (ptr[i].recoverable())
if (ptr[i].recoverable()) {
return true;
}
}
return false;
}
bool REJMAP::quality_recoverable_rejects() { // Any potential rejs?
for (int i = 0; i < len; i++) {
if (ptr[i].accept_if_good_quality())
if (ptr[i].accept_if_good_quality()) {
return true;
}
}
return false;
}
@ -260,8 +264,9 @@ void REJMAP::remove_pos( // Cut out an element
ASSERT_HOST(len > 0);
len--;
for (; pos < len; pos++)
for (; pos < len; pos++) {
ptr[pos] = ptr[pos + 1];
}
}
void REJMAP::print(FILE *fp) {
@ -304,8 +309,9 @@ void REJMAP::rej_word_not_tess_accepted() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted())
if (ptr[i].accepted()) {
ptr[i].setrej_not_tess_accepted();
}
}
}
@ -313,8 +319,9 @@ void REJMAP::rej_word_contains_blanks() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted())
if (ptr[i].accepted()) {
ptr[i].setrej_contains_blanks();
}
}
}
@ -322,8 +329,9 @@ void REJMAP::rej_word_bad_permuter() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted())
if (ptr[i].accepted()) {
ptr[i].setrej_bad_permuter();
}
}
}
@ -331,8 +339,9 @@ void REJMAP::rej_word_xht_fixup() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted())
if (ptr[i].accepted()) {
ptr[i].setrej_xht_fixup();
}
}
}
@ -340,8 +349,9 @@ void REJMAP::rej_word_no_alphanums() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted())
if (ptr[i].accepted()) {
ptr[i].setrej_no_alphanums();
}
}
}
@ -349,8 +359,9 @@ void REJMAP::rej_word_mostly_rej() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted())
if (ptr[i].accepted()) {
ptr[i].setrej_mostly_rej();
}
}
}
@ -358,8 +369,9 @@ void REJMAP::rej_word_bad_quality() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted())
if (ptr[i].accepted()) {
ptr[i].setrej_bad_quality();
}
}
}
@ -367,8 +379,9 @@ void REJMAP::rej_word_doc_rej() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted())
if (ptr[i].accepted()) {
ptr[i].setrej_doc_rej();
}
}
}
@ -376,8 +389,9 @@ void REJMAP::rej_word_block_rej() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted())
if (ptr[i].accepted()) {
ptr[i].setrej_block_rej();
}
}
}
@ -385,8 +399,9 @@ void REJMAP::rej_word_row_rej() { // Reject whole word
int i;
for (i = 0; i < len; i++) {
if (ptr[i].accepted())
if (ptr[i].accepted()) {
ptr[i].setrej_row_rej();
}
}
}

View File

@ -102,10 +102,11 @@ class REJ {
BITS16 flags2;
void set_flag(REJ_FLAGS rej_flag) {
if (rej_flag < 16)
if (rej_flag < 16) {
flags1.set(rej_flag);
else
} else {
flags2.set(rej_flag - 16);
}
}
bool rej_before_nn_accept();
@ -128,21 +129,23 @@ public:
const REJ &source) = default;
bool flag(REJ_FLAGS rej_flag) {
if (rej_flag < 16)
if (rej_flag < 16) {
return flags1[rej_flag];
else
} else {
return flags2[rej_flag - 16];
}
}
char display_char() {
if (perm_rejected())
if (perm_rejected()) {
return MAP_REJECT_PERM;
else if (accept_if_good_quality())
} else if (accept_if_good_quality()) {
return MAP_REJECT_POTENTIAL;
else if (rejected())
} else if (rejected()) {
return MAP_REJECT_TEMP;
else
} else {
return MAP_ACCEPT;
}
}
bool perm_rejected(); // Is char perm reject?

View File

@ -54,14 +54,17 @@ bool SEAM::IsHealthy(const TBLOB &blob, int min_points, int min_area) const {
bool SEAM::PrepareToInsertSeam(const std::vector<SEAM *> &seams,
const std::vector<TBLOB *> &blobs, int insert_index, bool modify) {
for (int s = 0; s < insert_index; ++s) {
if (!seams[s]->FindBlobWidth(blobs, s, modify))
if (!seams[s]->FindBlobWidth(blobs, s, modify)) {
return false;
}
}
if (!FindBlobWidth(blobs, insert_index, modify))
if (!FindBlobWidth(blobs, insert_index, modify)) {
return false;
for (int s = insert_index; s < seams.size(); ++s) {
if (!seams[s]->FindBlobWidth(blobs, s + 1, modify))
}
for (unsigned s = insert_index; s < seams.size(); ++s) {
if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) {
return false;
}
}
return true;
}
@ -78,19 +81,22 @@ bool SEAM::FindBlobWidth(const std::vector<TBLOB *> &blobs, int index, bool modi
const SPLIT &split = splits_[s];
bool found_split = split.ContainedByBlob(*blobs[index]);
// Look right.
for (int b = index + 1; !found_split && b < blobs.size(); ++b) {
for (unsigned b = index + 1; !found_split && b < blobs.size(); ++b) {
found_split = split.ContainedByBlob(*blobs[b]);
if (found_split && b - index > widthp_ && modify)
if (found_split && b - index > widthp_ && modify) {
widthp_ = b - index;
}
}
// Look left.
for (int b = index - 1; !found_split && b >= 0; --b) {
found_split = split.ContainedByBlob(*blobs[b]);
if (found_split && index - b > widthn_ && modify)
if (found_split && index - b > widthn_ && modify) {
widthn_ = index - b;
}
}
if (found_split)
if (found_split) {
++num_found;
}
}
return num_found == num_splits_;
}
@ -120,8 +126,9 @@ void SEAM::UndoSeam(TBLOB *blob, TBLOB *other_blob) const {
}
TESSLINE *outline = blob->outlines;
while (outline->next)
while (outline->next) {
outline = outline->next;
}
outline->next = other_blob->outlines;
other_blob->outlines = nullptr;
delete other_blob;
@ -139,8 +146,9 @@ void SEAM::Print(const char *label) const {
tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y, widthp_, widthn_);
for (int s = 0; s < num_splits_; ++s) {
splits_[s].Print();
if (s + 1 < num_splits_)
if (s + 1 < num_splits_) {
tprintf(", ");
}
}
tprintf("\n");
}
@ -150,8 +158,8 @@ void SEAM::Print(const char *label) const {
void SEAM::PrintSeams(const char *label, const std::vector<SEAM *> &seams) {
if (!seams.empty()) {
tprintf("%s\n", label);
for (int x = 0; x < seams.size(); ++x) {
tprintf("%2d: ", x);
for (unsigned x = 0; x < seams.size(); ++x) {
tprintf("%2u: ", x);
seams[x]->Print("");
}
tprintf("\n");
@ -161,8 +169,9 @@ void SEAM::PrintSeams(const char *label, const std::vector<SEAM *> &seams) {
#ifndef GRAPHICS_DISABLED
// Draws the seam in the given window.
void SEAM::Mark(ScrollView *window) const {
for (int s = 0; s < num_splits_; ++s)
for (int s = 0; s < num_splits_; ++s) {
splits_[s].Mark(window);
}
}
#endif
@ -171,8 +180,9 @@ void SEAM::Mark(ScrollView *window) const {
/* static */
void SEAM::BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
int first, int last) {
for (int x = first; x < last; ++x)
for (int x = first; x < last; ++x) {
seams[x]->Reveal();
}
TESSLINE *outline = blobs[first]->outlines;
int next_blob = first + 1;
@ -194,15 +204,18 @@ void SEAM::BreakPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB
void SEAM::JoinPieces(const std::vector<SEAM *> &seams, const std::vector<TBLOB *> &blobs,
int first, int last) {
TESSLINE *outline = blobs[first]->outlines;
if (!outline)
if (!outline) {
return;
}
for (int x = first; x < last; ++x) {
SEAM *seam = seams[x];
if (x - seam->widthn_ >= first && x + seam->widthp_ < last)
if (x - seam->widthn_ >= first && x + seam->widthp_ < last) {
seam->Hide();
while (outline->next)
}
while (outline->next) {
outline = outline->next;
}
outline->next = blobs[x + 1]->outlines;
}
}
@ -224,8 +237,9 @@ void SEAM::Reveal() const {
// Computes and returns, but does not set, the full priority of *this SEAM.
float SEAM::FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth,
double center_knob, double width_change_knob) const {
if (num_splits_ == 0)
if (num_splits_ == 0) {
return 0.0f;
}
for (int s = 1; s < num_splits_; ++s) {
splits_[s].SplitOutline();
}

View File

@ -71,15 +71,17 @@ public:
location_ += other.location_;
location_ /= 2;
for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s)
for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s) {
splits_[num_splits_++] = other.splits_[s];
}
}
// Returns true if the given blob contains all splits of *this SEAM.
bool ContainedByBlob(const TBLOB &blob) const {
for (int s = 0; s < num_splits_; ++s) {
if (!splits_[s].ContainedByBlob(blob))
if (!splits_[s].ContainedByBlob(blob)) {
return false;
}
}
return true;
}
@ -88,17 +90,20 @@ public:
// the EDGEPT pointer, not the coordinates.
bool UsesPoint(const EDGEPT *point) const {
for (int s = 0; s < num_splits_; ++s) {
if (splits_[s].UsesPoint(point))
if (splits_[s].UsesPoint(point)) {
return true;
}
}
return false;
}
// Returns true if *this and other share any common point, by coordinates.
bool SharesPosition(const SEAM &other) const {
for (int s = 0; s < num_splits_; ++s) {
for (int t = 0; t < other.num_splits_; ++t)
if (splits_[s].SharesPosition(other.splits_[t]))
for (int t = 0; t < other.num_splits_; ++t) {
if (splits_[s].SharesPosition(other.splits_[t])) {
return true;
}
}
}
return false;
}
@ -108,8 +113,9 @@ public:
TBOX split1_box = splits_[s].bounding_box();
for (int t = 0; t < other.num_splits_; ++t) {
TBOX split2_box = other.splits_[t].bounding_box();
if (split1_box.y_overlap(split2_box))
if (split1_box.y_overlap(split2_box)) {
return true;
}
}
}
return false;

View File

@ -77,8 +77,9 @@ float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, int centered_
TBOX box2 = Box21();
int min_left = std::min(box1.left(), box2.left());
int max_right = std::max(box1.right(), box2.right());
if (xmin < min_left && xmax > max_right)
if (xmin < min_left && xmax > max_right) {
return kBadPriority;
}
float grade = 0.0f;
// grade_overlap.
@ -89,10 +90,12 @@ float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, int centered_
if (overlap == min_width) {
grade += 100.0f; // Total overlap.
} else {
if (2 * overlap > min_width)
if (2 * overlap > min_width) {
overlap += 2 * overlap - min_width;
if (overlap > 0)
}
if (overlap > 0) {
grade += overlap_knob * overlap;
}
}
// grade_center_of_blob.
if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
@ -100,8 +103,9 @@ float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob, int centered_
}
// grade_width_change.
float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2));
if (width_change_grade > 0.0f)
if (width_change_grade > 0.0f) {
grade += width_change_grade * width_change_knob;
}
return grade;
}
@ -228,8 +232,9 @@ void SPLIT::Mark(ScrollView *window) const {
// Inserts the resulting outlines into the given list.
void SPLIT::SplitOutlineList(TESSLINE *outlines) const {
SplitOutline();
while (outlines->next != nullptr)
while (outlines->next != nullptr) {
outlines = outlines->next;
}
outlines->next = new TESSLINE;
outlines->next->loop = point1;

View File

@ -77,8 +77,9 @@ bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
**********************************************************************/
void STATS::clear() { // clear out buckets
total_count_ = 0;
if (buckets_ != nullptr)
if (buckets_ != nullptr) {
memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
}
}
/**********************************************************************
@ -157,8 +158,9 @@ double STATS::sd() const { // standard deviation
}
double variance = static_cast<double>(sum) / total_count_;
variance = sqsum / total_count_ - variance * variance;
if (variance > 0.0)
if (variance > 0.0) {
return sqrt(variance);
}
return 0.0;
}
@ -184,8 +186,9 @@ double STATS::ile(double frac) const {
#endif
int sum = 0;
int index = 0;
for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++])
for (index = 0; index < rangemax_ - rangemin_ && sum < target; sum += buckets_[index++]) {
;
}
if (index > 0) {
ASSERT_HOST(buckets_[index - 1] > 0);
return rangemin_ + index - static_cast<double>(sum - target) / buckets_[index - 1];
@ -204,8 +207,9 @@ int32_t STATS::min_bucket() const { // Find min
return rangemin_;
}
int32_t min = 0;
for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++)
for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++) {
;
}
return rangemin_ + min;
}
@ -220,8 +224,9 @@ int32_t STATS::max_bucket() const { // Find max
return rangemin_;
}
int32_t max;
for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--)
for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--) {
;
}
return rangemin_ + max;
}
@ -244,11 +249,13 @@ double STATS::median() const { // get median
int32_t min_pile;
int32_t max_pile;
/* Find preceding non zero pile */
for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--)
for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--) {
;
}
/* Find following non zero pile */
for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++)
for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++) {
;
}
median = (min_pile + max_pile) / 2.0;
}
return median;
@ -264,19 +271,24 @@ bool STATS::local_min(int32_t x) const {
return false;
}
x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
if (buckets_[x] == 0)
if (buckets_[x] == 0) {
return true;
}
int32_t index; // table index
for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index)
for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index) {
;
if (index >= 0 && buckets_[index] < buckets_[x])
}
if (index >= 0 && buckets_[index] < buckets_[x]) {
return false;
for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index)
}
for (index = x + 1; index < rangemax_ - rangemin_ && buckets_[index] == buckets_[x]; ++index) {
;
if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x])
}
if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x]) {
return false;
else
} else {
return true;
}
}
/**********************************************************************
@ -297,10 +309,12 @@ void STATS::smooth(int32_t factor) {
// centre weight
int count = buckets_[entry] * factor;
for (int offset = 1; offset < factor; offset++) {
if (entry - offset >= 0)
if (entry - offset >= 0) {
count += buckets_[entry - offset] * (factor - offset);
if (entry + offset < entrycount)
}
if (entry + offset < entrycount) {
count += buckets_[entry + offset] * (factor - offset);
}
}
result.add(entry + rangemin_, count);
}
@ -335,8 +349,9 @@ int32_t STATS::cluster(float lower, // thresholds
float min_dist; // from best_cluster
int32_t cluster_count; // no of clusters
if (buckets_ == nullptr || max_clusters < 1)
if (buckets_ == nullptr || max_clusters < 1) {
return 0;
}
centres = new float[max_clusters + 1];
for (cluster_count = 1;
cluster_count <= max_clusters && clusters[cluster_count].buckets_ != nullptr &&
@ -380,8 +395,9 @@ int32_t STATS::cluster(float lower, // thresholds
for (cluster = 1; cluster <= cluster_count; cluster++) {
dist = entry + rangemin_ - centres[cluster];
// find distance
if (dist < 0)
if (dist < 0) {
dist = -dist;
}
if (dist < min_dist) {
min_dist = dist; // find least
best_cluster = cluster;
@ -463,8 +479,9 @@ static bool GatherPeak(int index, const int *src_buckets, int *used_buckets, int
// more useful than decreasing total count.
// Returns the actual number of modes found.
int STATS::top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes) const {
if (max_modes <= 0)
if (max_modes <= 0) {
return 0;
}
int src_count = rangemax_ - rangemin_;
// Used copies the counts in buckets_ as they get used.
STATS used(rangemin_, rangemax_);
@ -493,23 +510,27 @@ int STATS::top_n_modes(int max_modes, std::vector<KDPairInc<float, int>> &modes)
int prev_pile = max_count;
for (int offset = 1; max_index + offset < src_count; ++offset) {
if (!GatherPeak(max_index + offset, buckets_, used.buckets_, &prev_pile, &total_count,
&total_value))
&total_value)) {
break;
}
}
prev_pile = buckets_[max_index];
for (int offset = 1; max_index - offset >= 0; ++offset) {
if (!GatherPeak(max_index - offset, buckets_, used.buckets_, &prev_pile, &total_count,
&total_value))
&total_value)) {
break;
}
}
if (total_count > least_count || modes.size() < max_modes) {
// We definitely want this mode, so if we have enough discard the least.
if (modes.size() == max_modes)
if (modes.size() == max_modes) {
modes.resize(max_modes - 1);
}
int target_index = 0;
// Linear search for the target insertion point.
while (target_index < modes.size() && modes[target_index].data() >= total_count)
while (target_index < modes.size() && modes[target_index].data() >= total_count) {
++target_index;
}
auto peak_mean = static_cast<float>(total_value / total_count + rangemin_);
modes.insert(modes.begin() + target_index, KDPairInc<float, int>(peak_mean, total_count));
least_count = modes.back().data();
@ -535,8 +556,9 @@ void STATS::print() const {
for (int index = min; index <= max; index++) {
if (buckets_[index] != 0) {
tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]);
if (++num_printed % 8 == 0)
if (++num_printed % 8 == 0) {
tprintf("\n");
}
}
}
tprintf("\n");

View File

@ -73,10 +73,12 @@ public:
double median() const; // get median of samples
// Returns the count of the given value.
int32_t pile_count(int32_t value) const {
if (value <= rangemin_)
if (value <= rangemin_) {
return buckets_[0];
if (value >= rangemax_ - 1)
}
if (value >= rangemax_ - 1) {
return buckets_[rangemax_ - rangemin_ - 1];
}
return buckets_[value - rangemin_];
}
// Returns the total count of all buckets.

View File

@ -71,8 +71,9 @@ static void position_outline( // put in place
dest_outline = it.extract();
child_it.add_to_end(dest_outline);
// make it a child
if (it.empty())
if (it.empty()) {
break;
}
}
}
return; // finished
@ -110,8 +111,9 @@ static void plot_outline_list( // draw outlines
outline = it.data();
// draw it
outline->plot(window, colour);
if (!outline->child()->empty())
if (!outline->child()->empty()) {
plot_outline_list(outline->child(), window, child_colour, child_colour);
}
}
}
// Draws the outlines in the given colour, and child_colour, normalized
@ -124,8 +126,9 @@ static void plot_normed_outline_list(const DENORM &denorm, C_OUTLINE_LIST *list,
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
outline->plot_normed(denorm, colour, window);
if (!outline->child()->empty())
if (!outline->child()->empty()) {
plot_normed_outline_list(denorm, outline->child(), child_colour, child_colour, window);
}
}
}
#endif
@ -143,8 +146,9 @@ static void reverse_outline_list(C_OUTLINE_LIST *list) {
C_OUTLINE *outline = it.data();
outline->reverse(); // reverse it
outline->set_flag(COUT_INVERSE, true);
if (!outline->child()->empty())
if (!outline->child()->empty()) {
reverse_outline_list(outline->child());
}
}
}
@ -205,10 +209,11 @@ void C_BLOB::ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_
// Set inverse flag and reverse if needed.
blob->CheckInverseFlagAndDirection();
// Put on appropriate list.
if (!blob_is_good && bad_blobs_it != nullptr)
if (!blob_is_good && bad_blobs_it != nullptr) {
bad_blobs_it->add_after_then_move(blob);
else
} else {
good_blobs_it->add_after_then_move(blob);
}
}
}
@ -346,8 +351,9 @@ void C_BLOB::move( // reposition blob
) {
C_OUTLINE_IT it(&outlines); // iterator
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
it.data()->move(vec); // move each outline
}
}
// Static helper for C_BLOB::rotate to allow recursion of child outlines.
@ -386,12 +392,14 @@ static void ComputeEdgeOffsetsOutlineList(int threshold, Pix *pix, C_OUTLINE_LIS
C_OUTLINE_IT it(list);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
if (pix != nullptr && pixGetDepth(pix) == 8)
if (pix != nullptr && pixGetDepth(pix) == 8) {
outline->ComputeEdgeOffsets(threshold, pix);
else
} else {
outline->ComputeBinaryOffsets();
if (!outline->child()->empty())
}
if (!outline->child()->empty()) {
ComputeEdgeOffsetsOutlineList(threshold, pix, outline->child());
}
}
}
@ -420,8 +428,9 @@ int16_t C_BLOB::EstimateBaselinePosition() {
int left = box.left();
int width = box.width();
int bottom = box.bottom();
if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio)
if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio) {
return bottom; // This is only for non-CJK blobs.
}
// Get the minimum y coordinate at each x-coordinate.
std::vector<int> y_mins;
y_mins.resize(width + 1, box.top());
@ -430,16 +439,18 @@ int16_t C_BLOB::EstimateBaselinePosition() {
C_OUTLINE *outline = it.data();
ICOORD pos = outline->start_pos();
for (int s = 0; s < outline->pathlength(); ++s) {
if (pos.y() < y_mins[pos.x() - left])
if (pos.y() < y_mins[pos.x() - left]) {
y_mins[pos.x() - left] = pos.y();
}
pos += outline->step(s);
}
}
// Find the total extent of the bottom or bottom + 1.
int bottom_extent = 0;
for (int x = 0; x <= width; ++x) {
if (y_mins[x] == bottom || y_mins[x] == bottom + 1)
if (y_mins[x] == bottom || y_mins[x] == bottom + 1) {
++bottom_extent;
}
}
// Find the lowest run longer than the bottom extent that is not the bottom.
int best_min = box.top();
@ -450,21 +461,24 @@ int16_t C_BLOB::EstimateBaselinePosition() {
// Find the length of the current run.
int y_at_x = y_mins[x];
int run = 1;
while (x + run <= width && y_mins[x + run] == y_at_x)
while (x + run <= width && y_mins[x + run] == y_at_x) {
++run;
}
if (y_at_x > bottom + 1) {
// Possible contender.
int total_run = run;
// Find extent of current value or +1 to the right of x.
while (x + total_run <= width &&
(y_mins[x + total_run] == y_at_x || y_mins[x + total_run] == y_at_x + 1))
(y_mins[x + total_run] == y_at_x || y_mins[x + total_run] == y_at_x + 1)) {
++total_run;
}
// At least one end has to be higher so it is not a local max.
if (prev_prev_y > y_at_x + 1 || x + total_run > width || y_mins[x + total_run] > y_at_x + 1) {
// If the prev_run is at y + 1, then we can add that too. There cannot
// be a suitable run at y before that or we would have found it already.
if (prev_run > 0 && prev_y == y_at_x + 1)
if (prev_run > 0 && prev_y == y_at_x + 1) {
total_run += prev_run;
}
if (total_run > bottom_extent && y_at_x < best_min) {
best_min = y_at_x;
}
@ -482,8 +496,9 @@ static void render_outline_list(C_OUTLINE_LIST *list, int left, int top, Pix *pi
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
outline->render(left, top, pix);
if (!outline->child()->empty())
if (!outline->child()->empty()) {
render_outline_list(outline->child(), left, top, pix);
}
}
}

View File

@ -108,8 +108,9 @@ public:
#endif // !GRAPHICS_DISABLED
C_BLOB &operator=(const C_BLOB &source) {
if (!outlines.empty())
if (!outlines.empty()) {
outlines.clear();
}
outlines.deep_copy(&source.outlines, &C_OUTLINE::deep_copy);
return *this;
}

View File

@ -48,8 +48,9 @@ T& uniqueInstance(std::unique_ptr<T> new_instance = nullptr)
{
static std::unique_ptr<T> _instance = std::make_unique<T>();
if(new_instance)
if (new_instance) {
_instance = std::move(new_instance);
}
return *_instance.get();
}

View File

@ -69,8 +69,9 @@ WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text)
with the concencus onto the reject list.
*/
start_it.set_to_list(&cblobs);
if (start_it.empty())
if (start_it.empty()) {
return;
}
for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
bool reject_blob = false;
bool blob_inverted;
@ -84,22 +85,25 @@ WERD::WERD(C_BLOB_LIST *blob_list, uint8_t blank_count, const char *text)
if (reject_blob) {
rej_cblob_it.add_after_then_move(start_it.extract());
} else {
if (blob_inverted)
if (blob_inverted) {
inverted_vote++;
else
} else {
non_inverted_vote++;
}
}
}
flags.set(W_INVERSE, (inverted_vote > non_inverted_vote));
start_it.set_to_list(&cblobs);
if (start_it.empty())
if (start_it.empty()) {
return;
}
for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
c_outline_it.set_to_list(start_it.data()->out_list());
if (c_outline_it.data()->flag(COUT_INVERSE) != flags[W_INVERSE])
if (c_outline_it.data()->flag(COUT_INVERSE) != flags[W_INVERSE]) {
rej_cblob_it.add_after_then_move(start_it.extract());
}
}
}
@ -116,8 +120,9 @@ WERD::WERD(C_BLOB_LIST *blob_list, ///< In word order
C_BLOB_IT start_it = blob_list; // iterator
C_BLOB_IT end_it = blob_list; // another
while (!end_it.at_last())
while (!end_it.at_last()) {
end_it.forward(); // move to last
}
(reinterpret_cast<C_BLOB_LIST *>(&cblobs))->assign_to_sublist(&start_it, &end_it);
// move to our list
blanks = clone->blanks;
@ -191,8 +196,9 @@ TBOX WERD::true_bounding_box() const {
void WERD::move(const ICOORD vec) {
C_BLOB_IT cblob_it(&cblobs); // cblob iterator
for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward())
for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward()) {
cblob_it.data()->move(vec);
}
}
/**
@ -293,8 +299,9 @@ void WERD::plot(ScrollView *window, ScrollView::Color colour) {
// Get the next color in the (looping) rainbow.
ScrollView::Color WERD::NextColor(ScrollView::Color colour) {
auto next = static_cast<ScrollView::Color>(colour + 1);
if (next >= LAST_COLOUR || next < FIRST_COLOUR)
if (next >= LAST_COLOUR || next < FIRST_COLOUR) {
next = FIRST_COLOUR;
}
return next;
}
@ -355,12 +362,14 @@ WERD &WERD::operator=(const WERD &source) {
flags = source.flags;
script_id_ = source.script_id_;
correct = source.correct;
if (!cblobs.empty())
if (!cblobs.empty()) {
cblobs.clear();
}
cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy);
if (!rej_cblobs.empty())
if (!rej_cblobs.empty()) {
rej_cblobs.clear();
}
rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy);
return *this;
}
@ -495,8 +504,9 @@ void WERD::CleanNoise(float size_threshold) {
rej_it.add_after_then_move(rej_blob);
}
}
if (blob->out_list()->empty())
if (blob->out_list()->empty()) {
delete blob_it.extract();
}
}
}
@ -525,13 +535,15 @@ bool WERD::AddSelectedOutlines(const std::vector<bool> &wanted,
const std::vector<C_OUTLINE *> &outlines,
bool *make_next_word_fuzzy) {
bool outline_added_to_start = false;
if (make_next_word_fuzzy != nullptr)
if (make_next_word_fuzzy != nullptr) {
*make_next_word_fuzzy = false;
}
C_BLOB_IT rej_it(&rej_cblobs);
for (int i = 0; i < outlines.size(); ++i) {
C_OUTLINE *outline = outlines[i];
if (outline == nullptr)
if (outline == nullptr) {
continue; // Already used it.
}
if (wanted[i]) {
C_BLOB *target_blob = target_blobs[i];
TBOX noise_box = outline->bounding_box();
@ -553,8 +565,9 @@ bool WERD::AddSelectedOutlines(const std::vector<bool> &wanted,
}
if (blob_it.cycled_list()) {
blob_it.add_to_end(target_blob);
if (make_next_word_fuzzy != nullptr)
if (make_next_word_fuzzy != nullptr) {
*make_next_word_fuzzy = true;
}
}
// Add all consecutive wanted, but null-blob outlines to same blob.
C_OUTLINE_IT ol_it(target_blob->out_list());

View File

@ -65,8 +65,9 @@ void UnicharAmbigs::InitUnicharAmbigs(const UNICHARSET &unicharset, bool use_amb
// Loads the universal ambigs that are useful for any language.
void UnicharAmbigs::LoadUniversal(const UNICHARSET &encoder_set, UNICHARSET *unicharset) {
TFile file;
if (!file.Open(kUniversalAmbigsFile, ksizeofUniversalAmbigsFile))
if (!file.Open(kUniversalAmbigsFile, ksizeofUniversalAmbigsFile)) {
return;
}
LoadUnicharAmbigs(encoder_set, &file, 0, false, unicharset);
}
@ -75,8 +76,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
UNICHARSET *unicharset) {
int i, j;
UnicharIdVector *adaption_ambigs_entry;
if (debug_level)
if (debug_level) {
tprintf("Reading ambiguities\n");
}
int test_ambig_part_size;
int replacement_ambig_part_size;
@ -100,19 +102,22 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
}
while (ambig_file->FGets(buffer, kBufferSize) != nullptr) {
chomp_string(buffer);
if (debug_level > 2)
if (debug_level > 2) {
tprintf("read line %s\n", buffer);
}
++line_num;
if (!ParseAmbiguityLine(line_num, version, debug_level, encoder_set, buffer,
&test_ambig_part_size, test_unichar_ids, &replacement_ambig_part_size,
replacement_string, &type))
replacement_string, &type)) {
continue;
}
// Construct AmbigSpec and add it to the appropriate AmbigSpec_LIST.
auto *ambig_spec = new AmbigSpec();
if (!InsertIntoTable((type == REPLACE_AMBIG) ? replace_ambigs_ : dang_ambigs_,
test_ambig_part_size, test_unichar_ids, replacement_ambig_part_size,
replacement_string, type, ambig_spec, unicharset))
replacement_string, type, ambig_spec, unicharset)) {
continue;
}
// Update one_to_one_definite_ambigs_.
if (test_ambig_part_size == 1 && replacement_ambig_part_size == 1 && type == DEFINITE_AMBIG) {
@ -138,8 +143,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
// vector does not already contain it) keeping it in sorted order.
for (j = 0;
j < adaption_ambigs_entry->size() && (*adaption_ambigs_entry)[j] > id_to_insert;
++j)
++j) {
;
}
if (j < adaption_ambigs_entry->size()) {
if ((*adaption_ambigs_entry)[j] != id_to_insert) {
adaption_ambigs_entry->insert(adaption_ambigs_entry->begin() + j, id_to_insert);
@ -158,8 +164,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
if (use_ambigs_for_adaption) {
for (i = 0; i < ambigs_for_adaption_.size(); ++i) {
adaption_ambigs_entry = ambigs_for_adaption_[i];
if (adaption_ambigs_entry == nullptr)
if (adaption_ambigs_entry == nullptr) {
continue;
}
for (j = 0; j < adaption_ambigs_entry->size(); ++j) {
UNICHAR_ID ambig_id = (*adaption_ambigs_entry)[j];
if (reverse_ambigs_for_adaption_[ambig_id] == nullptr) {
@ -176,8 +183,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
const UnicharAmbigsVector &print_table = (tbl == 0) ? replace_ambigs_ : dang_ambigs_;
for (i = 0; i < print_table.size(); ++i) {
AmbigSpec_LIST *lst = print_table[i];
if (lst == nullptr)
if (lst == nullptr) {
continue;
}
if (!lst->empty()) {
tprintf("%s Ambiguities for %s:\n", (tbl == 0) ? "Replaceable" : "Dangerous",
unicharset->debug_str(i).c_str());
@ -222,8 +230,9 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
std::string input(buffer);
std::vector<std::string> fields = split(input, ' ');
if (fields.size() != 3) {
if (debug_level)
if (debug_level) {
tprintf(kIllegalMsg, line_num);
}
return false;
}
// Encode wrong-string.
@ -233,13 +242,15 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
}
*test_ambig_part_size = unichars.size();
if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
if (debug_level) {
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
}
return false;
}
// Copy encoded string to output.
for (int i = 0; i < unichars.size(); ++i)
for (int i = 0; i < unichars.size(); ++i) {
test_unichar_ids[i] = unichars[i];
}
test_unichar_ids[unichars.size()] = INVALID_UNICHAR_ID;
// Encode replacement-string to check validity.
if (!unicharset.encode_string(fields[1].c_str(), true, &unichars, nullptr, nullptr)) {
@ -247,13 +258,15 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
}
*replacement_ambig_part_size = unichars.size();
if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
if (debug_level) {
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
}
return false;
}
if (sscanf(fields[2].c_str(), "%d", type) != 1) {
if (debug_level)
if (debug_level) {
tprintf(kIllegalMsg, line_num);
}
return false;
}
snprintf(replacement_string, kMaxAmbigStringSize, "%s", fields[1].c_str());
@ -264,21 +277,25 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
char *next_token;
if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", test_ambig_part_size) || *test_ambig_part_size <= 0) {
if (debug_level)
if (debug_level) {
tprintf(kIllegalMsg, line_num);
}
return false;
}
if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
if (debug_level) {
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
}
return false;
}
for (i = 0; i < *test_ambig_part_size; ++i) {
if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token)))
if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token))) {
break;
}
if (!unicharset.contains_unichar(token)) {
if (debug_level)
if (debug_level) {
tprintf(kIllegalUnicharMsg, token);
}
break;
}
test_unichar_ids[i] = unicharset.unichar_to_id(token);
@ -287,29 +304,34 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
if (i != *test_ambig_part_size || !(token = strtok_r(nullptr, kAmbigDelimiters, &next_token)) ||
!sscanf(token, "%d", replacement_ambig_part_size) || *replacement_ambig_part_size <= 0) {
if (debug_level)
if (debug_level) {
tprintf(kIllegalMsg, line_num);
}
return false;
}
if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
if (debug_level)
if (debug_level) {
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
}
return false;
}
replacement_string[0] = '\0';
for (i = 0; i < *replacement_ambig_part_size; ++i) {
if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token)))
if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token))) {
break;
}
strcat(replacement_string, token);
if (!unicharset.contains_unichar(token)) {
if (debug_level)
if (debug_level) {
tprintf(kIllegalUnicharMsg, token);
}
break;
}
}
if (i != *replacement_ambig_part_size) {
if (debug_level)
if (debug_level) {
tprintf(kIllegalMsg, line_num);
}
return false;
}
if (version > 0) {
@ -323,8 +345,9 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
// modified word, not the individual unigrams. Tesseract
// has limited support for ngram unichar (e.g. dawg permuter).
if (!(token = strtok_r(nullptr, kAmbigDelimiters, &next_token)) || !sscanf(token, "%d", type)) {
if (debug_level)
if (debug_level) {
tprintf(kIllegalMsg, line_num);
}
return false;
}
}
@ -382,8 +405,9 @@ bool UnicharAmbigs::InsertIntoTable(UnicharAmbigsVector &table, int test_ambig_p
if (table[test_unichar_ids[0]] == nullptr) {
table[test_unichar_ids[0]] = new AmbigSpec_LIST();
}
if (table[test_unichar_ids[0]]->add_sorted(AmbigSpec::compare_ambig_specs, true, ambig_spec))
if (table[test_unichar_ids[0]]->add_sorted(AmbigSpec::compare_ambig_specs, true, ambig_spec)) {
return true;
}
delete ambig_spec;
return false;
}

View File

@ -60,16 +60,20 @@ public:
const UNICHAR_ID val1 = *ptr1++;
const UNICHAR_ID val2 = *ptr2++;
if (val1 != val2) {
if (val1 == INVALID_UNICHAR_ID)
if (val1 == INVALID_UNICHAR_ID) {
return -1;
if (val2 == INVALID_UNICHAR_ID)
}
if (val2 == INVALID_UNICHAR_ID) {
return 1;
if (val1 < val2)
}
if (val1 < val2) {
return -1;
}
return 1;
}
if (val1 == INVALID_UNICHAR_ID)
if (val1 == INVALID_UNICHAR_ID) {
return 0;
}
}
}
@ -88,15 +92,17 @@ public:
// The function assumes that array is terminated by INVALID_UNICHAR_ID.
static inline void print(const UNICHAR_ID array[], const UNICHARSET &unicharset) {
const UNICHAR_ID *ptr = array;
if (*ptr == INVALID_UNICHAR_ID)
if (*ptr == INVALID_UNICHAR_ID) {
tprintf("[Empty]");
}
while (*ptr != INVALID_UNICHAR_ID) {
tprintf("%s ", unicharset.id_to_unichar(*ptr++));
}
tprintf("( ");
ptr = array;
while (*ptr != INVALID_UNICHAR_ID)
while (*ptr != INVALID_UNICHAR_ID) {
tprintf("%d ", *ptr++);
}
tprintf(")\n");
}
};
@ -115,8 +121,9 @@ public:
const AmbigSpec *s1 = *static_cast<const AmbigSpec *const *>(spec1);
const AmbigSpec *s2 = *static_cast<const AmbigSpec *const *>(spec2);
int result = UnicharIdArrayUtils::compare(s1->wrong_ngram, s2->wrong_ngram);
if (result != 0)
if (result != 0) {
return result;
}
return UnicharIdArrayUtils::compare(s1->correct_fragments, s2->correct_fragments);
}
@ -177,8 +184,9 @@ public:
// Returns definite 1-1 ambigs for the given unichar id.
inline const UnicharIdVector *OneToOneDefiniteAmbigs(UNICHAR_ID unichar_id) const {
if (one_to_one_definite_ambigs_.empty())
if (one_to_one_definite_ambigs_.empty()) {
return nullptr;
}
return one_to_one_definite_ambigs_[unichar_id];
}
@ -188,8 +196,9 @@ public:
// m->rn,rn->m,m->iii, UnicharAmbigsForAdaption() called with unichar id of
// m will return a pointer to a vector with unichar ids of r,n,i.
inline const UnicharIdVector *AmbigsForAdaption(UNICHAR_ID unichar_id) const {
if (ambigs_for_adaption_.empty())
if (ambigs_for_adaption_.empty()) {
return nullptr;
}
return ambigs_for_adaption_[unichar_id];
}
@ -197,8 +206,9 @@ public:
// the given unichar_id is an ambiguity (appears in the 'wrong' part of
// some ambiguity pair).
inline const UnicharIdVector *ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const {
if (reverse_ambigs_for_adaption_.empty())
if (reverse_ambigs_for_adaption_.empty()) {
return nullptr;
}
return reverse_ambigs_for_adaption_[unichar_id];
}

View File

@ -108,8 +108,9 @@ void BitVector::Init(int length) {
// Writes to the given file. Returns false in case of error.
bool BitVector::Serialize(FILE *fp) const {
if (!tesseract::Serialize(fp, &bit_size_))
if (!tesseract::Serialize(fp, &bit_size_)) {
return false;
}
int wordlen = WordLength();
return tesseract::Serialize(fp, &array_[0], wordlen);
}
@ -118,18 +119,21 @@ bool BitVector::Serialize(FILE *fp) const {
// If swap is true, assumes a big/little-endian swap is needed.
bool BitVector::DeSerialize(bool swap, FILE *fp) {
uint32_t new_bit_size;
if (!tesseract::DeSerialize(fp, &new_bit_size))
if (!tesseract::DeSerialize(fp, &new_bit_size)) {
return false;
}
if (swap) {
ReverseN(&new_bit_size, sizeof(new_bit_size));
}
Alloc(new_bit_size);
int wordlen = WordLength();
if (!tesseract::DeSerialize(fp, &array_[0], wordlen))
if (!tesseract::DeSerialize(fp, &array_[0], wordlen)) {
return false;
}
if (swap) {
for (int i = 0; i < wordlen; ++i)
for (int i = 0; i < wordlen; ++i) {
ReverseN(&array_[i], sizeof(array_[i]));
}
}
return true;
}
@ -146,8 +150,9 @@ void BitVector::SetAllTrue() {
int BitVector::NextSetBit(int prev_bit) const {
// Move on to the next bit.
int next_bit = prev_bit + 1;
if (next_bit >= bit_size_)
if (next_bit >= bit_size_) {
return -1;
}
// Check the remains of the word containing the next_bit first.
int next_word = WordIndex(next_bit);
int bit_index = next_word * kBitFactor;
@ -156,10 +161,12 @@ int BitVector::NextSetBit(int prev_bit) const {
uint8_t byte = word & 0xff;
while (bit_index < word_end) {
if (bit_index + 8 > next_bit && byte != 0) {
while (bit_index + lsb_index_[byte] < next_bit && byte != 0)
while (bit_index + lsb_index_[byte] < next_bit && byte != 0) {
byte = lsb_eroded_[byte];
if (byte != 0)
}
if (byte != 0) {
return bit_index + lsb_index_[byte];
}
}
word >>= 8;
bit_index += 8;
@ -172,8 +179,9 @@ int BitVector::NextSetBit(int prev_bit) const {
++next_word;
bit_index += kBitFactor;
}
if (bit_index >= bit_size_)
if (bit_index >= bit_size_) {
return -1;
}
// Find the first non-zero byte within the word.
while ((word & 0xff) == 0) {
word >>= 8;
@ -200,29 +208,35 @@ int BitVector::NumSetBits() const {
// sensible if they aren't the same size, but they should be really.
void BitVector::operator|=(const BitVector &other) {
int length = std::min(WordLength(), other.WordLength());
for (int w = 0; w < length; ++w)
for (int w = 0; w < length; ++w) {
array_[w] |= other.array_[w];
}
}
void BitVector::operator&=(const BitVector &other) {
int length = std::min(WordLength(), other.WordLength());
for (int w = 0; w < length; ++w)
for (int w = 0; w < length; ++w) {
array_[w] &= other.array_[w];
for (int w = WordLength() - 1; w >= length; --w)
}
for (int w = WordLength() - 1; w >= length; --w) {
array_[w] = 0;
}
}
void BitVector::operator^=(const BitVector &other) {
int length = std::min(WordLength(), other.WordLength());
for (int w = 0; w < length; ++w)
for (int w = 0; w < length; ++w) {
array_[w] ^= other.array_[w];
}
}
// Set subtraction *this = v1 - v2.
void BitVector::SetSubtract(const BitVector &v1, const BitVector &v2) {
Alloc(v1.size());
int length = std::min(v1.WordLength(), v2.WordLength());
for (int w = 0; w < length; ++w)
for (int w = 0; w < length; ++w) {
array_[w] = v1.array_[w] ^ (v1.array_[w] & v2.array_[w]);
for (int w = WordLength() - 1; w >= length; --w)
}
for (int w = WordLength() - 1; w >= length; --w) {
array_[w] = v1.array_[w];
}
}
// Allocates memory for a vector of the given length.

View File

@ -75,10 +75,11 @@ public:
array_[WordIndex(index)] &= ~BitMask(index);
}
void SetValue(int index, bool value) {
if (value)
if (value) {
SetBit(index);
else
} else {
ResetBit(index);
}
}
bool At(int index) const {
return (array_[WordIndex(index)] & BitMask(index)) != 0;

View File

@ -94,8 +94,9 @@ void CLIST::assign_to_sublist( // to this list
CLIST_ITERATOR *end_it) { // from list end
constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");
if (!empty())
if (!empty()) {
LIST_NOT_EMPTY.error("CLIST.assign_to_sublist", ABORT, nullptr);
}
last = start_it->extract_sublist(end_it);
}
@ -110,8 +111,9 @@ int32_t CLIST::length() const { // count elements
CLIST_ITERATOR it(const_cast<CLIST *>(this));
int32_t count = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
count++;
}
return count;
}
@ -178,15 +180,18 @@ bool CLIST::add_sorted(int comparator(const void *, const void *), bool unique,
CLIST_ITERATOR it(this);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
void *data = it.data();
if (data == new_data && unique)
if (data == new_data && unique) {
return false;
if (comparator(&data, &new_data) > 0)
}
if (comparator(&data, &new_data) > 0) {
break;
}
}
if (it.cycled_list())
if (it.cycled_list()) {
it.add_to_end(new_data);
else
} else {
it.add_before_then_move(new_data);
}
return true;
}
return false;
@ -214,8 +219,9 @@ void CLIST::set_subtract(int comparator(const void *, const void *), bool unique
subtra = s_it.data();
}
}
if (subtra == nullptr || comparator(&subtra, &minu) != 0)
if (subtra == nullptr || comparator(&subtra, &minu) != 0) {
add_sorted(comparator, unique, minu);
}
}
}
@ -236,8 +242,9 @@ void *CLIST_ITERATOR::forward() {
if (!list)
NO_LIST.error("CLIST_ITERATOR::forward", ABORT, nullptr);
#endif
if (list->empty())
if (list->empty()) {
return nullptr;
}
if (current) { // not removed so
// set previous
@ -246,8 +253,9 @@ void *CLIST_ITERATOR::forward() {
// In case next is deleted by another iterator, get next from current.
current = current->next;
} else {
if (ex_current_was_cycle_pt)
if (ex_current_was_cycle_pt) {
cycle_pt = next;
}
current = next;
}
@ -283,11 +291,13 @@ void *CLIST_ITERATOR::data_relative( // get data + or - ...
BAD_PARAMETER.error("CLIST_ITERATOR::data_relative", ABORT, "offset < -l");
#endif
if (offset == -1)
if (offset == -1) {
ptr = prev;
else
for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next)
} else {
for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) {
;
}
}
#ifndef NDEBUG
if (!ptr)
@ -311,13 +321,15 @@ void *CLIST_ITERATOR::move_to_last() {
NO_LIST.error("CLIST_ITERATOR::move_to_last", ABORT, nullptr);
#endif
while (current != list->last)
while (current != list->last) {
forward();
}
if (current == nullptr)
if (current == nullptr) {
return nullptr;
else
} else {
return current->data;
}
}
/***********************************************************************
@ -348,13 +360,15 @@ void CLIST_ITERATOR::exchange( // positions of 2 links
/* Do nothing if either list is empty or if both iterators reference the same
link */
if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current))
if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current)) {
return;
}
/* Error if either current element is deleted */
if (!current || !other_it->current)
if (!current || !other_it->current) {
DONT_EXCHANGE_DELETED.error("CLIST_ITERATOR.exchange", ABORT, nullptr);
}
/* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
(other before this); non-doubleton adjacent elements (this before other);
@ -393,15 +407,19 @@ non-adjacent elements. */
/* update end of list pointer when necessary (remember that the 2 iterators
may iterate over different lists!) */
if (list->last == current)
if (list->last == current) {
list->last = other_it->current;
if (other_it->list->last == other_it->current)
}
if (other_it->list->last == other_it->current) {
other_it->list->last = current;
}
if (current == cycle_pt)
if (current == cycle_pt) {
cycle_pt = other_it->cycle_pt;
if (other_it->current == other_it->cycle_pt)
}
if (other_it->current == other_it->cycle_pt) {
other_it->cycle_pt = cycle_pt;
}
/* The actual exchange - in all cases*/
@ -449,19 +467,22 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist( // from this current
temp_it.mark_cycle_pt();
do { // walk sublist
if (temp_it.cycled_list()) // can't find end pt
if (temp_it.cycled_list()) { // can't find end pt
BAD_SUBLIST.error("CLIST_ITERATOR.extract_sublist", ABORT, nullptr);
}
if (temp_it.at_last()) {
list->last = prev;
ex_current_was_last = other_it->ex_current_was_last = true;
}
if (temp_it.current == cycle_pt)
if (temp_it.current == cycle_pt) {
ex_current_was_cycle_pt = true;
}
if (temp_it.current == other_it->cycle_pt)
if (temp_it.current == other_it->cycle_pt) {
other_it->ex_current_was_cycle_pt = true;
}
temp_it.forward();
} while (temp_it.prev != other_it->current);

View File

@ -186,10 +186,12 @@ public:
void *data() { // get current data
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::data", ABORT, nullptr);
if (!current)
}
if (!current) {
NULL_DATA.error("CLIST_ITERATOR::data", ABORT, nullptr);
}
#endif
return current->data;
}
@ -209,8 +211,9 @@ public:
bool empty() { // is list empty?
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::empty", ABORT, nullptr);
}
#endif
return list->empty();
}
@ -248,8 +251,9 @@ public:
inline void CLIST_ITERATOR::set_to_list( // change list
CLIST *list_to_iterate) {
#ifndef NDEBUG
if (!list_to_iterate)
if (!list_to_iterate) {
BAD_PARAMETER.error("CLIST_ITERATOR::set_to_list", ABORT, "list_to_iterate is nullptr");
}
#endif
list = list_to_iterate;
@ -284,10 +288,12 @@ inline void CLIST_ITERATOR::add_after_then_move( // element to add
CLIST_LINK *new_element;
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::add_after_then_move", ABORT, nullptr);
if (!new_data)
}
if (!new_data) {
BAD_PARAMETER.error("CLIST_ITERATOR::add_after_then_move", ABORT, "new_data is nullptr");
}
#endif
new_element = new CLIST_LINK;
@ -303,14 +309,17 @@ inline void CLIST_ITERATOR::add_after_then_move( // element to add
if (current) { // not extracted
current->next = new_element;
prev = current;
if (current == list->last)
if (current == list->last) {
list->last = new_element;
}
} else { // current extracted
prev->next = new_element;
if (ex_current_was_last)
if (ex_current_was_last) {
list->last = new_element;
if (ex_current_was_cycle_pt)
}
if (ex_current_was_cycle_pt) {
cycle_pt = new_element;
}
}
}
current = new_element;
@ -328,10 +337,12 @@ inline void CLIST_ITERATOR::add_after_stay_put( // element to add
CLIST_LINK *new_element;
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::add_after_stay_put", ABORT, nullptr);
if (!new_data)
}
if (!new_data) {
BAD_PARAMETER.error("CLIST_ITERATOR::add_after_stay_put", ABORT, "new_data is nullptr");
}
#endif
new_element = new CLIST_LINK;
@ -348,10 +359,12 @@ inline void CLIST_ITERATOR::add_after_stay_put( // element to add
if (current) { // not extracted
current->next = new_element;
if (prev == current)
if (prev == current) {
prev = new_element;
if (current == list->last)
}
if (current == list->last) {
list->last = new_element;
}
} else { // current extracted
prev->next = new_element;
if (ex_current_was_last) {
@ -375,10 +388,12 @@ inline void CLIST_ITERATOR::add_before_then_move( // element to add
CLIST_LINK *new_element;
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::add_before_then_move", ABORT, nullptr);
if (!new_data)
}
if (!new_data) {
BAD_PARAMETER.error("CLIST_ITERATOR::add_before_then_move", ABORT, "new_data is nullptr");
}
#endif
new_element = new CLIST_LINK;
@ -395,10 +410,12 @@ inline void CLIST_ITERATOR::add_before_then_move( // element to add
next = current;
} else { // current extracted
new_element->next = next;
if (ex_current_was_last)
if (ex_current_was_last) {
list->last = new_element;
if (ex_current_was_cycle_pt)
}
if (ex_current_was_cycle_pt) {
cycle_pt = new_element;
}
}
}
current = new_element;
@ -416,10 +433,12 @@ inline void CLIST_ITERATOR::add_before_stay_put( // element to add
CLIST_LINK *new_element;
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::add_before_stay_put", ABORT, nullptr);
if (!new_data)
}
if (!new_data) {
BAD_PARAMETER.error("CLIST_ITERATOR::add_before_stay_put", ABORT, "new_data is nullptr");
}
#endif
new_element = new CLIST_LINK;
@ -435,12 +454,14 @@ inline void CLIST_ITERATOR::add_before_stay_put( // element to add
prev->next = new_element;
if (current) { // not extracted
new_element->next = current;
if (next == current)
if (next == current) {
next = new_element;
}
} else { // current extracted
new_element->next = next;
if (ex_current_was_last)
if (ex_current_was_last) {
list->last = new_element;
}
}
prev = new_element;
}
@ -456,10 +477,12 @@ inline void CLIST_ITERATOR::add_before_stay_put( // element to add
inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) {
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::add_list_after", ABORT, nullptr);
if (!list_to_add)
}
if (!list_to_add) {
BAD_PARAMETER.error("CLIST_ITERATOR::add_list_after", ABORT, "list_to_add is nullptr");
}
#endif
if (!list_to_add->empty()) {
@ -472,8 +495,9 @@ inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) {
} else {
if (current) { // not extracted
current->next = list_to_add->First();
if (current == list->last)
if (current == list->last) {
list->last = list_to_add->last;
}
list_to_add->last->next = next;
next = current->next;
} else { // current extracted
@ -500,10 +524,12 @@ inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) {
inline void CLIST_ITERATOR::add_list_before(CLIST *list_to_add) {
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::add_list_before", ABORT, nullptr);
if (!list_to_add)
}
if (!list_to_add) {
BAD_PARAMETER.error("CLIST_ITERATOR::add_list_before", ABORT, "list_to_add is nullptr");
}
#endif
if (!list_to_add->empty()) {
@ -519,10 +545,12 @@ inline void CLIST_ITERATOR::add_list_before(CLIST *list_to_add) {
list_to_add->last->next = current;
} else { // current extracted
list_to_add->last->next = next;
if (ex_current_was_last)
if (ex_current_was_last) {
list->last = list_to_add->last;
if (ex_current_was_cycle_pt)
}
if (ex_current_was_cycle_pt) {
cycle_pt = prev->next;
}
}
current = prev->next;
next = current->next;
@ -544,11 +572,13 @@ inline void *CLIST_ITERATOR::extract() {
void *extracted_data;
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::extract", ABORT, nullptr);
if (!current) // list empty or
// element extracted
}
if (!current) { // list empty or
// element extracted
NULL_CURRENT.error("CLIST_ITERATOR::extract", ABORT, nullptr);
}
#endif
if (list->singleton()) {
@ -581,8 +611,9 @@ inline void *CLIST_ITERATOR::extract() {
inline void *CLIST_ITERATOR::move_to_first() {
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::move_to_first", ABORT, nullptr);
}
#endif
current = list->First();
@ -604,14 +635,16 @@ inline void *CLIST_ITERATOR::move_to_first() {
inline void CLIST_ITERATOR::mark_cycle_pt() {
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::mark_cycle_pt", ABORT, nullptr);
}
#endif
if (current)
if (current) {
cycle_pt = current;
else
} else {
ex_current_was_cycle_pt = true;
}
started_cycling = false;
}
@ -624,8 +657,9 @@ inline void CLIST_ITERATOR::mark_cycle_pt() {
inline bool CLIST_ITERATOR::at_first() {
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::at_first", ABORT, nullptr);
}
#endif
// we're at a deleted
@ -643,8 +677,9 @@ inline bool CLIST_ITERATOR::at_first() {
inline bool CLIST_ITERATOR::at_last() {
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::at_last", ABORT, nullptr);
}
#endif
// we're at a deleted
@ -662,8 +697,9 @@ inline bool CLIST_ITERATOR::at_last() {
inline bool CLIST_ITERATOR::cycled_list() {
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::cycled_list", ABORT, nullptr);
}
#endif
return ((list->empty()) || ((current == cycle_pt) && started_cycling));
@ -678,8 +714,9 @@ inline bool CLIST_ITERATOR::cycled_list() {
inline int32_t CLIST_ITERATOR::length() {
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::length", ABORT, nullptr);
}
#endif
return list->length();
@ -696,8 +733,9 @@ inline void CLIST_ITERATOR::sort( // sort elements
int comparator( // comparison routine
const void *, const void *)) {
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::sort", ABORT, nullptr);
}
#endif
list->sort(comparator);
@ -719,10 +757,12 @@ inline void CLIST_ITERATOR::add_to_end( // element to add
CLIST_LINK *new_element;
#ifndef NDEBUG
if (!list)
if (!list) {
NO_LIST.error("CLIST_ITERATOR::add_to_end", ABORT, nullptr);
if (!new_data)
}
if (!new_data) {
BAD_PARAMETER.error("CLIST_ITERATOR::add_to_end", ABORT, "new_data is nullptr");
}
#endif
if (this->at_last()) {

View File

@ -69,8 +69,9 @@ void ELIST::assign_to_sublist( // to this list
ELIST_ITERATOR *end_it) { // from list end
constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist");
if (!empty())
if (!empty()) {
LIST_NOT_EMPTY.error("ELIST.assign_to_sublist", ABORT, nullptr);
}
last = start_it->extract_sublist(end_it);
}
@ -85,8 +86,9 @@ int32_t ELIST::length() const { // count elements
ELIST_ITERATOR it(const_cast<ELIST *>(this));
int32_t count = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward())
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
count++;
}
return count;
}
@ -163,10 +165,11 @@ ELIST_LINK *ELIST::add_sorted_and_find(int comparator(const void *, const void *
return link;
}
}
if (it.cycled_list())
if (it.cycled_list()) {
it.add_to_end(new_link);
else
} else {
it.add_before_then_move(new_link);
}
}
return new_link;
}
@ -188,8 +191,9 @@ ELIST_LINK *ELIST_ITERATOR::forward() {
if (!list)
NO_LIST.error("ELIST_ITERATOR::forward", ABORT, nullptr);
#endif
if (list->empty())
if (list->empty()) {
return nullptr;
}
if (current) { // not removed so
// set previous
@ -198,8 +202,9 @@ ELIST_LINK *ELIST_ITERATOR::forward() {
// In case next is deleted by another iterator, get next from current.
current = current->next;
} else {
if (ex_current_was_cycle_pt)
if (ex_current_was_cycle_pt) {
cycle_pt = next;
}
current = next;
}
#ifndef NDEBUG
@ -236,11 +241,13 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ...
BAD_PARAMETER.error("ELIST_ITERATOR::data_relative", ABORT, "offset < -l");
#endif
if (offset == -1)
if (offset == -1) {
ptr = prev;
else
for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next)
} else {
for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) {
;
}
}
#ifndef NDEBUG
if (!ptr)
@ -264,8 +271,9 @@ ELIST_LINK *ELIST_ITERATOR::move_to_last() {
NO_LIST.error("ELIST_ITERATOR::move_to_last", ABORT, nullptr);
#endif
while (current != list->last)
while (current != list->last) {
forward();
}
return current;
}
@ -298,13 +306,15 @@ void ELIST_ITERATOR::exchange( // positions of 2 links
/* Do nothing if either list is empty or if both iterators reference the same
link */
if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current))
if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current)) {
return;
}
/* Error if either current element is deleted */
if (!current || !other_it->current)
if (!current || !other_it->current) {
DONT_EXCHANGE_DELETED.error("ELIST_ITERATOR.exchange", ABORT, nullptr);
}
/* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements
(other before this); non-doubleton adjacent elements (this before other);
@ -343,15 +353,19 @@ non-adjacent elements. */
/* update end of list pointer when necessary (remember that the 2 iterators
may iterate over different lists!) */
if (list->last == current)
if (list->last == current) {
list->last = other_it->current;
if (other_it->list->last == other_it->current)
}
if (other_it->list->last == other_it->current) {
other_it->list->last = current;
}
if (current == cycle_pt)
if (current == cycle_pt) {
cycle_pt = other_it->cycle_pt;
if (other_it->current == other_it->cycle_pt)
}
if (other_it->current == other_it->cycle_pt) {
other_it->cycle_pt = cycle_pt;
}
/* The actual exchange - in all cases*/
@ -401,19 +415,22 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist( // from this current
temp_it.mark_cycle_pt();
do { // walk sublist
if (temp_it.cycled_list()) // can't find end pt
if (temp_it.cycled_list()) { // can't find end pt
BAD_SUBLIST.error("ELIST_ITERATOR.extract_sublist", ABORT, nullptr);
}
if (temp_it.at_last()) {
list->last = prev;
ex_current_was_last = other_it->ex_current_was_last = true;
}
if (temp_it.current == cycle_pt)
if (temp_it.current == cycle_pt) {
ex_current_was_cycle_pt = true;
}
if (temp_it.current == other_it->cycle_pt)
if (temp_it.current == other_it->cycle_pt) {
other_it->ex_current_was_cycle_pt = true;
}
temp_it.forward();
} while (temp_it.prev != other_it->current);

Some files were not shown because too many files have changed in this diff Show More