diff options
Diffstat (limited to 'tesseract/src/training/common/intfeaturemap.cpp')
-rw-r--r-- | tesseract/src/training/common/intfeaturemap.cpp | 244 |
1 files changed, 244 insertions, 0 deletions
diff --git a/tesseract/src/training/common/intfeaturemap.cpp b/tesseract/src/training/common/intfeaturemap.cpp new file mode 100644 index 00000000..53829c87 --- /dev/null +++ b/tesseract/src/training/common/intfeaturemap.cpp @@ -0,0 +1,244 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +/////////////////////////////////////////////////////////////////////// +// File: intfeaturemap.cpp +// Description: Encapsulation of IntFeatureSpace with IndexMapBiDi +// to provide a subspace mapping and fast feature lookup. +// Created: Tue Oct 26 08:58:30 PDT 2010 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "intfeaturemap.h" + +#include "intfeaturespace.h" +#include "intfx.h" +// These includes do not exist yet, but will be coming soon. +//#include "sampleiterator.h" +//#include "trainingsample.h" +//#include "trainingsampleset.h" + +namespace tesseract { + +const int kMaxOffsetDist = 32; + +IntFeatureMap::IntFeatureMap() + : mapping_changed_(true), compact_size_(0) { + for (int dir = 0; dir < kNumOffsetMaps; ++dir) { + offset_plus_[dir] = nullptr; + offset_minus_[dir] = nullptr; + } +} + +IntFeatureMap::~IntFeatureMap() { + Clear(); +} + +// Pseudo-accessors. +int IntFeatureMap::IndexFeature(const INT_FEATURE_STRUCT& f) const { + return feature_space_.Index(f); +} +int IntFeatureMap::MapFeature(const INT_FEATURE_STRUCT& f) const { + return feature_map_.SparseToCompact(feature_space_.Index(f)); +} +int IntFeatureMap::MapIndexFeature(int index_feature) const { + return feature_map_.SparseToCompact(index_feature); +} +INT_FEATURE_STRUCT IntFeatureMap::InverseIndexFeature(int index_feature) const { + return feature_space_.PositionFromIndex(index_feature); +} +INT_FEATURE_STRUCT IntFeatureMap::InverseMapFeature(int map_feature) const { + int index = feature_map_.CompactToSparse(map_feature); + return feature_space_.PositionFromIndex(index); +} +void IntFeatureMap::DeleteMapFeature(int map_feature) { + feature_map_.Merge(-1, map_feature); + mapping_changed_ = true; +} +bool IntFeatureMap::IsMapFeatureDeleted(int map_feature) const { + return feature_map_.IsCompactDeleted(map_feature); +} + +// Copies the given feature_space and uses it as the index feature map +// from INT_FEATURE_STRUCT. +void IntFeatureMap::Init(const IntFeatureSpace& feature_space) { + feature_space_ = feature_space; + mapping_changed_ = false; + int sparse_size = feature_space_.Size(); + feature_map_.Init(sparse_size, true); + feature_map_.Setup(); + compact_size_ = feature_map_.CompactSize(); + // Initialize look-up tables if needed. + FCOORD dir = FeatureDirection(0); + if (dir.x() == 0.0f && dir.y() == 0.0f) + InitIntegerFX(); + // Compute look-up tables to generate offset features. + for (int dir = 0; dir < kNumOffsetMaps; ++dir) { + delete [] offset_plus_[dir]; + delete [] offset_minus_[dir]; + offset_plus_[dir] = new int[sparse_size]; + offset_minus_[dir] = new int[sparse_size]; + } + for (int dir = 1; dir <= kNumOffsetMaps; ++dir) { + for (int i = 0; i < sparse_size; ++i) { + int offset_index = ComputeOffsetFeature(i, dir); + offset_plus_[dir - 1][i] = offset_index; + offset_index = ComputeOffsetFeature(i, -dir); + offset_minus_[dir - 1][i] = offset_index; + } + } +} + +// Helper to return an offset index feature. In this context an offset +// feature with a dir of +/-1 is a feature of a similar direction, +// but shifted perpendicular to the direction of the feature. An offset +// feature with a dir of +/-2 is feature at the same position, but rotated +// by +/- one [compact] quantum. Returns the index of the generated offset +// feature, or -1 if it doesn't exist. Dir should be in +// [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction. +// A dir of 0 is an identity transformation. +// Both input and output are from the index(sparse) feature space, not +// the mapped/compact feature space, but the offset feature is the minimum +// distance moved from the input to guarantee that it maps to the next +// available quantum in the mapped/compact space. +int IntFeatureMap::OffsetFeature(int index_feature, int dir) const { + if (dir > 0 && dir <= kNumOffsetMaps) + return offset_plus_[dir - 1][index_feature]; + else if (dir < 0 && -dir <= kNumOffsetMaps) + return offset_minus_[-dir - 1][index_feature]; + else if (dir == 0) + return index_feature; + else + return -1; +} + + +//#define EXPERIMENT_ON +#ifdef EXPERIMENT_ON // This code is commented out as SampleIterator and +// TrainingSample are not reviewed/checked in yet, but these functions are a +// useful indicator of how an IntFeatureMap is setup. + +// Computes the features used by the subset of samples defined by +// the iterator and sets up the feature mapping. +// Returns the size of the compacted feature space. +int IntFeatureMap::FindNZFeatureMapping(SampleIterator* it) { + feature_map_.Init(feature_space_.Size(), false); + int total_samples = 0; + for (it->Begin(); !it->AtEnd(); it->Next()) { + const TrainingSample& sample = it->GetSample(); + GenericVector<int> features; + feature_space_.IndexAndSortFeatures(sample.features(), + sample.num_features(), + &features); + int num_features = features.size(); + for (int f = 0; f < num_features; ++f) + feature_map_.SetMap(features[f], true); + ++total_samples; + } + feature_map_.Setup(); + compact_size_ = feature_map_.CompactSize(); + mapping_changed_ = true; + FinalizeMapping(it); + tprintf("%d non-zero features found in %d samples\n", + compact_size_, total_samples); + return compact_size_; +} +#endif + +// After deleting some features, finish setting up the mapping, and map +// all the samples. Returns the size of the compacted feature space. +int IntFeatureMap::FinalizeMapping(SampleIterator* it) { + if (mapping_changed_) { + feature_map_.CompleteMerges(); + compact_size_ = feature_map_.CompactSize(); +#ifdef EXPERIMENT_ON + it->MapSampleFeatures(*this); +#endif + mapping_changed_ = false; + } + return compact_size_; +} + +// Prints the map features from the set in human-readable form. +void IntFeatureMap::DebugMapFeatures( + const GenericVector<int>& map_features) const { + for (int i = 0; i < map_features.size(); ++i) { + INT_FEATURE_STRUCT f = InverseMapFeature(map_features[i]); + f.print(); + } +} + +void IntFeatureMap::Clear() { + for (int dir = 0; dir < kNumOffsetMaps; ++dir) { + delete [] offset_plus_[dir]; + delete [] offset_minus_[dir]; + offset_plus_[dir] = nullptr; + offset_minus_[dir] = nullptr; + } +} + +// Helper to compute an offset index feature. In this context an offset +// feature with a dir of +/-1 is a feature of a similar direction, +// but shifted perpendicular to the direction of the feature. An offset +// feature with a dir of +/-2 is feature at the same position, but rotated +// by +/- one [compact] quantum. Returns the index of the generated offset +// feature, or -1 if it doesn't exist. Dir should be in +// [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction. +// A dir of 0 is an identity transformation. +// Both input and output are from the index(sparse) feature space, not +// the mapped/compact feature space, but the offset feature is the minimum +// distance moved from the input to guarantee that it maps to the next +// available quantum in the mapped/compact space. +int IntFeatureMap::ComputeOffsetFeature(int index_feature, int dir) const { + INT_FEATURE_STRUCT f = InverseIndexFeature(index_feature); + ASSERT_HOST(IndexFeature(f) == index_feature); + if (dir == 0) { + return index_feature; + } else if (dir == 1 || dir == -1) { + FCOORD feature_dir = FeatureDirection(f.Theta); + FCOORD rotation90(0.0f, 1.0f); + feature_dir.rotate(rotation90); + // Find the nearest existing feature. + for (int m = 1; m < kMaxOffsetDist; ++m) { + double x_pos = f.X + feature_dir.x() * (m * dir); + double y_pos = f.Y + feature_dir.y() * (m * dir); + int x = IntCastRounded(x_pos); + int y = IntCastRounded(y_pos); + if (x >= 0 && x <= UINT8_MAX && y >= 0 && y <= UINT8_MAX) { + INT_FEATURE_STRUCT offset_f; + offset_f.X = x; + offset_f.Y = y; + offset_f.Theta = f.Theta; + int offset_index = IndexFeature(offset_f); + if (offset_index != index_feature && offset_index >= 0) + return offset_index; // Found one. + } else { + return -1; // Hit the edge of feature space. + } + } + } else if (dir == 2 || dir == -2) { + // Find the nearest existing index_feature. + for (int m = 1; m < kMaxOffsetDist; ++m) { + int theta = f.Theta + m * dir / 2; + INT_FEATURE_STRUCT offset_f; + offset_f.X = f.X; + offset_f.Y = f.Y; + offset_f.Theta = Modulo(theta, 256); + int offset_index = IndexFeature(offset_f); + if (offset_index != index_feature && offset_index >= 0) + return offset_index; // Found one. + } + } + return -1; // Nothing within the max distance. +} + +} // namespace tesseract. |