2007-03-08 04:03:40 +08:00
|
|
|
/* -*-C-*-
|
|
|
|
********************************************************************************
|
|
|
|
*
|
|
|
|
* File: matrix.c (Formerly matrix.c)
|
|
|
|
* Description: Ratings matrix code. (Used by associator)
|
|
|
|
* Author: Mark Seaman, OCR Technology
|
|
|
|
* Created: Wed May 16 13:18:47 1990
|
|
|
|
* Modified: Wed Mar 20 09:44:47 1991 (Mark Seaman) marks@hpgrlt
|
|
|
|
* Language: C
|
|
|
|
* Package: N/A
|
|
|
|
* Status: Experimental (Do Not Distribute)
|
|
|
|
*
|
|
|
|
* (c) Copyright 1990, Hewlett-Packard Company.
|
|
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
** you may not use this file except in compliance with the License.
|
|
|
|
** You may obtain a copy of the License at
|
|
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
** See the License for the specific language governing permissions and
|
|
|
|
** limitations under the License.
|
|
|
|
*
|
|
|
|
*********************************************************************************/
|
|
|
|
/*----------------------------------------------------------------------
|
|
|
|
I n c l u d e s
|
|
|
|
----------------------------------------------------------------------*/
|
|
|
|
#include "matrix.h"
|
|
|
|
|
2010-11-24 02:34:14 +08:00
|
|
|
#include "callcpp.h"
|
2009-07-11 10:46:01 +08:00
|
|
|
#include "ratngs.h"
|
2010-11-24 02:34:14 +08:00
|
|
|
#include "tprintf.h"
|
2009-07-11 10:46:01 +08:00
|
|
|
#include "unicharset.h"
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2013-09-23 23:26:50 +08:00
|
|
|
// Returns true if there are any real classification results.
|
|
|
|
bool MATRIX::Classified(int col, int row, int wildcard_id) const {
|
|
|
|
if (get(col, row) == NOT_CLASSIFIED) return false;
|
|
|
|
BLOB_CHOICE_IT b_it(get(col, row));
|
|
|
|
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
|
|
|
BLOB_CHOICE* choice = b_it.data();
|
|
|
|
if (choice->IsClassified())
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Expands the existing matrix in-place to make the band wider, without
|
|
|
|
// losing any existing data.
|
|
|
|
void MATRIX::IncreaseBandSize(int bandwidth) {
|
|
|
|
ResizeWithCopy(dimension(), bandwidth);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns a bigger MATRIX with a new column and row in the matrix in order
|
|
|
|
// to split the blob at the given (ind,ind) diagonal location.
|
|
|
|
// Entries are relocated to the new MATRIX using the transformation defined
|
|
|
|
// by MATRIX_COORD::MapForSplit.
|
|
|
|
// Transfers the pointer data to the new MATRIX and deletes *this.
|
|
|
|
MATRIX* MATRIX::ConsumeAndMakeBigger(int ind) {
|
|
|
|
int dim = dimension();
|
|
|
|
int band_width = bandwidth();
|
|
|
|
// Check to see if bandwidth needs expanding.
|
|
|
|
for (int col = ind; col >= 0 && col > ind - band_width; --col) {
|
|
|
|
if (array_[col * band_width + band_width - 1] != empty_) {
|
|
|
|
++band_width;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
MATRIX* result = new MATRIX(dim + 1, band_width);
|
|
|
|
|
|
|
|
for (int col = 0; col < dim; ++col) {
|
|
|
|
for (int row = col; row < dim && row < col + bandwidth(); ++row) {
|
|
|
|
MATRIX_COORD coord(col, row);
|
|
|
|
coord.MapForSplit(ind);
|
|
|
|
BLOB_CHOICE_LIST* choices = get(col, row);
|
|
|
|
if (choices != NULL) {
|
|
|
|
// Correct matrix location on each choice.
|
|
|
|
BLOB_CHOICE_IT bc_it(choices);
|
|
|
|
for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
|
|
|
|
BLOB_CHOICE* choice = bc_it.data();
|
|
|
|
choice->set_matrix_cell(coord.col, coord.row);
|
|
|
|
}
|
|
|
|
ASSERT_HOST(coord.Valid(*result));
|
|
|
|
result->put(coord.col, coord.row, choices);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
delete this;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Makes and returns a deep copy of *this, including all the BLOB_CHOICEs
|
|
|
|
// on the lists, but not any LanguageModelState that may be attached to the
|
|
|
|
// BLOB_CHOICEs.
|
|
|
|
MATRIX* MATRIX::DeepCopy() const {
|
|
|
|
int dim = dimension();
|
|
|
|
int band_width = bandwidth();
|
|
|
|
MATRIX* result = new MATRIX(dim, band_width);
|
|
|
|
for (int col = 0; col < dim; ++col) {
|
2015-07-10 05:28:20 +08:00
|
|
|
for (int row = col; row < dim && row < col + band_width; ++row) {
|
2013-09-23 23:26:50 +08:00
|
|
|
BLOB_CHOICE_LIST* choices = get(col, row);
|
|
|
|
if (choices != NULL) {
|
|
|
|
BLOB_CHOICE_LIST* copy_choices = new BLOB_CHOICE_LIST;
|
2015-07-10 05:28:20 +08:00
|
|
|
copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy);
|
2013-09-23 23:26:50 +08:00
|
|
|
result->put(col, row, copy_choices);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2009-07-11 10:46:01 +08:00
|
|
|
// Print the best guesses out of the match rating matrix.
|
2012-02-02 11:06:39 +08:00
|
|
|
void MATRIX::print(const UNICHARSET &unicharset) const {
|
2013-09-23 23:26:50 +08:00
|
|
|
tprintf("Ratings Matrix (top 3 choices)\n");
|
|
|
|
int dim = dimension();
|
|
|
|
int band_width = bandwidth();
|
2010-11-24 02:34:14 +08:00
|
|
|
int row, col;
|
2013-09-23 23:26:50 +08:00
|
|
|
for (col = 0; col < dim; ++col) {
|
|
|
|
for (row = col; row < dim && row < col + band_width; ++row) {
|
|
|
|
BLOB_CHOICE_LIST *rating = this->get(col, row);
|
|
|
|
if (rating == NOT_CLASSIFIED) continue;
|
|
|
|
BLOB_CHOICE_IT b_it(rating);
|
|
|
|
tprintf("col=%d row=%d ", col, row);
|
|
|
|
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
|
|
|
tprintf("%s rat=%g cert=%g " ,
|
|
|
|
unicharset.id_to_unichar(b_it.data()->unichar_id()),
|
|
|
|
b_it.data()->rating(), b_it.data()->certainty());
|
|
|
|
}
|
|
|
|
tprintf("\n");
|
|
|
|
}
|
|
|
|
tprintf("\n");
|
|
|
|
}
|
2010-11-24 02:34:14 +08:00
|
|
|
tprintf("\n");
|
2013-09-23 23:26:50 +08:00
|
|
|
for (col = 0; col < dim; ++col) tprintf("\t%d", col);
|
|
|
|
tprintf("\n");
|
|
|
|
for (row = 0; row < dim; ++row) {
|
2010-11-24 02:34:14 +08:00
|
|
|
for (col = 0; col <= row; ++col) {
|
|
|
|
if (col == 0) tprintf("%d\t", row);
|
2013-09-23 23:26:50 +08:00
|
|
|
if (row >= col + band_width) {
|
|
|
|
tprintf(" \t");
|
|
|
|
continue;
|
|
|
|
}
|
2010-11-24 02:34:14 +08:00
|
|
|
BLOB_CHOICE_LIST *rating = this->get(col, row);
|
2007-03-08 04:03:40 +08:00
|
|
|
if (rating != NOT_CLASSIFIED) {
|
2010-11-24 02:34:14 +08:00
|
|
|
BLOB_CHOICE_IT b_it(rating);
|
|
|
|
int counter = 0;
|
|
|
|
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
2013-09-23 23:26:50 +08:00
|
|
|
tprintf("%s ",
|
|
|
|
unicharset.id_to_unichar(b_it.data()->unichar_id()));
|
2010-11-24 02:34:14 +08:00
|
|
|
++counter;
|
|
|
|
if (counter == 3) break;
|
2009-07-11 10:46:01 +08:00
|
|
|
}
|
2010-11-24 02:34:14 +08:00
|
|
|
tprintf("\t");
|
|
|
|
} else {
|
|
|
|
tprintf(" \t");
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
}
|
2010-11-24 02:34:14 +08:00
|
|
|
tprintf("\n");
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
}
|