mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-17 02:38:13 +08:00
72ac460f96
Signed-off-by: Stefan Weil <sw@weilnetz.de>
160 lines
5.2 KiB
C++
160 lines
5.2 KiB
C++
// Copyright 2011 Google Inc. All Rights Reserved.
|
|
// Author: rays@google.com (Ray Smith)
|
|
///////////////////////////////////////////////////////////////////////
|
|
// File: intfeaturedist.cpp
|
|
// Description: Fast set-difference-based feature distance calculator.
|
|
// Created: Thu Sep 01 13:07:30 PDT 2011
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
#include "intfeaturedist.h"
|
|
#include "intfeaturemap.h"
|
|
|
|
namespace tesseract {
|
|
|
|
IntFeatureDist::IntFeatureDist()
|
|
: size_(0), total_feature_weight_(0.0),
|
|
feature_map_(nullptr), features_(nullptr),
|
|
features_delta_one_(nullptr), features_delta_two_(nullptr) {
|
|
}
|
|
|
|
IntFeatureDist::~IntFeatureDist() {
|
|
Clear();
|
|
}
|
|
|
|
// Initialize the table to the given size of feature space.
|
|
void IntFeatureDist::Init(const IntFeatureMap* feature_map) {
|
|
size_ = feature_map->sparse_size();
|
|
Clear();
|
|
feature_map_ = feature_map;
|
|
features_ = new bool[size_];
|
|
features_delta_one_ = new bool[size_];
|
|
features_delta_two_ = new bool[size_];
|
|
memset(features_, false, size_ * sizeof(features_[0]));
|
|
memset(features_delta_one_, false, size_ * sizeof(features_delta_one_[0]));
|
|
memset(features_delta_two_, false, size_ * sizeof(features_delta_two_[0]));
|
|
total_feature_weight_ = 0.0;
|
|
}
|
|
|
|
// Setup the map for the given indexed_features that have been indexed by
|
|
// feature_map.
|
|
void IntFeatureDist::Set(const GenericVector<int>& indexed_features,
|
|
int canonical_count, bool value) {
|
|
total_feature_weight_ = canonical_count;
|
|
for (int i = 0; i < indexed_features.size(); ++i) {
|
|
int f = indexed_features[i];
|
|
features_[f] = value;
|
|
for (int dir = -kNumOffsetMaps; dir <= kNumOffsetMaps; ++dir) {
|
|
if (dir == 0) continue;
|
|
int mapped_f = feature_map_->OffsetFeature(f, dir);
|
|
if (mapped_f >= 0) {
|
|
features_delta_one_[mapped_f] = value;
|
|
for (int dir2 = -kNumOffsetMaps; dir2 <= kNumOffsetMaps; ++dir2) {
|
|
if (dir2 == 0) continue;
|
|
int mapped_f2 = feature_map_->OffsetFeature(mapped_f, dir2);
|
|
if (mapped_f2 >= 0)
|
|
features_delta_two_[mapped_f2] = value;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Compute the distance between the given feature vector and the last
|
|
// Set feature vector.
|
|
double IntFeatureDist::FeatureDistance(
|
|
const GenericVector<int>& features) const {
|
|
int num_test_features = features.size();
|
|
double denominator = total_feature_weight_ + num_test_features;
|
|
double misses = denominator;
|
|
for (int i = 0; i < num_test_features; ++i) {
|
|
int index = features[i];
|
|
double weight = 1.0;
|
|
if (features_[index]) {
|
|
// A perfect match.
|
|
misses -= 2.0 * weight;
|
|
} else if (features_delta_one_[index]) {
|
|
misses -= 1.5 * weight;
|
|
} else if (features_delta_two_[index]) {
|
|
// A near miss.
|
|
misses -= 1.0 * weight;
|
|
}
|
|
}
|
|
return misses / denominator;
|
|
}
|
|
|
|
// Compute the distance between the given feature vector and the last
|
|
// Set feature vector.
|
|
double IntFeatureDist::DebugFeatureDistance(
|
|
const GenericVector<int>& features) const {
|
|
int num_test_features = features.size();
|
|
double denominator = total_feature_weight_ + num_test_features;
|
|
double misses = denominator;
|
|
for (int i = 0; i < num_test_features; ++i) {
|
|
int index = features[i];
|
|
double weight = 1.0;
|
|
INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(features[i]);
|
|
tprintf("Testing feature weight %g:", weight);
|
|
f.print();
|
|
if (features_[index]) {
|
|
// A perfect match.
|
|
misses -= 2.0 * weight;
|
|
tprintf("Perfect hit\n");
|
|
} else if (features_delta_one_[index]) {
|
|
misses -= 1.5 * weight;
|
|
tprintf("-1 hit\n");
|
|
} else if (features_delta_two_[index]) {
|
|
// A near miss.
|
|
misses -= 1.0 * weight;
|
|
tprintf("-2 hit\n");
|
|
} else {
|
|
tprintf("Total miss\n");
|
|
}
|
|
}
|
|
tprintf("Features present:");
|
|
for (int i = 0; i < size_; ++i) {
|
|
if (features_[i]) {
|
|
INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i);
|
|
f.print();
|
|
}
|
|
}
|
|
tprintf("\nMinus one features:");
|
|
for (int i = 0; i < size_; ++i) {
|
|
if (features_delta_one_[i]) {
|
|
INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i);
|
|
f.print();
|
|
}
|
|
}
|
|
tprintf("\nMinus two features:");
|
|
for (int i = 0; i < size_; ++i) {
|
|
if (features_delta_two_[i]) {
|
|
INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i);
|
|
f.print();
|
|
}
|
|
}
|
|
tprintf("\n");
|
|
return misses / denominator;
|
|
}
|
|
|
|
// Clear all data.
|
|
void IntFeatureDist::Clear() {
|
|
delete [] features_;
|
|
features_ = nullptr;
|
|
delete [] features_delta_one_;
|
|
features_delta_one_ = nullptr;
|
|
delete [] features_delta_two_;
|
|
features_delta_two_ = nullptr;
|
|
}
|
|
|
|
} // namespace tesseract
|